]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/uloc.cpp
ICU-59117.0.1.tar.gz
[apple/icu.git] / icuSources / common / uloc.cpp
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (C) 1997-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 *
9 * File ULOC.CPP
10 *
11 * Modification History:
12 *
13 * Date Name Description
14 * 04/01/97 aliu Creation.
15 * 08/21/98 stephen JDK 1.2 sync
16 * 12/08/98 rtg New Locale implementation and C API
17 * 03/15/99 damiba overhaul.
18 * 04/06/99 stephen changed setDefault() to realloc and copy
19 * 06/14/99 stephen Changed calls to ures_open for new params
20 * 07/21/99 stephen Modified setDefault() to propagate to C++
21 * 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs,
22 * brought canonicalization code into line with spec
23 *****************************************************************************/
24
25 /*
26 POSIX's locale format, from putil.c: [no spaces]
27
28 ll [ _CC ] [ . MM ] [ @ VV]
29
30 l = lang, C = ctry, M = charmap, V = variant
31 */
32
33 #include "unicode/utypes.h"
34 #include "unicode/ustring.h"
35 #include "unicode/uloc.h"
36
37 #include "putilimp.h"
38 #include "ustr_imp.h"
39 #include "ulocimp.h"
40 #include "umutex.h"
41 #include "cstring.h"
42 #include "cmemory.h"
43 #include "locmap.h"
44 #include "uarrsort.h"
45 #include "uenumimp.h"
46 #include "uassert.h"
47 #include "charstr.h"
48
49 #include <stdio.h> /* for sprintf */
50
51 U_NAMESPACE_USE
52
53 /* ### Declarations **************************************************/
54
55 /* Locale stuff from locid.cpp */
56 U_CFUNC void locale_set_default(const char *id);
57 U_CFUNC const char *locale_get_default(void);
58 U_CFUNC int32_t
59 locale_getKeywords(const char *localeID,
60 char prev,
61 char *keywords, int32_t keywordCapacity,
62 char *values, int32_t valuesCapacity, int32_t *valLen,
63 UBool valuesToo,
64 UErrorCode *status);
65
66 /* ### Data tables **************************************************/
67
68 /**
69 * Table of language codes, both 2- and 3-letter, with preference
70 * given to 2-letter codes where possible. Includes 3-letter codes
71 * that lack a 2-letter equivalent.
72 *
73 * This list must be in sorted order. This list is returned directly
74 * to the user by some API.
75 *
76 * This list must be kept in sync with LANGUAGES_3, with corresponding
77 * entries matched.
78 *
79 * This table should be terminated with a NULL entry, followed by a
80 * second list, and another NULL entry. The first list is visible to
81 * user code when this array is returned by API. The second list
82 * contains codes we support, but do not expose through user API.
83 *
84 * Notes
85 *
86 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
87 * include the revisions up to 2001/7/27 *CWB*
88 *
89 * The 3 character codes are the terminology codes like RFC 3066. This
90 * is compatible with prior ICU codes
91 *
92 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
93 * table but now at the end of the table because 3 character codes are
94 * duplicates. This avoids bad searches going from 3 to 2 character
95 * codes.
96 *
97 * The range qaa-qtz is reserved for local use
98 */
99 /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
100 /* ISO639 table version is 20150505 */
101 static const char * const LANGUAGES[] = {
102 "aa", "ab", "ace", "ach", "ada", "ady", "ae", "aeb",
103 "af", "afh", "agq", "ain", "ak", "akk", "akz", "ale",
104 "aln", "alt", "am", "an", "ang", "anp", "ar", "arc",
105 "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "as",
106 "asa", "ase", "ast", "av", "avk", "awa", "ay", "az",
107 "ba", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
108 "be", "bej", "bem", "bew", "bez", "bfd", "bfq", "bg",
109 "bgn", "bho", "bi", "bik", "bin", "bjn", "bkm", "bla",
110 "bm", "bn", "bo", "bpy", "bqi", "br", "bra", "brh",
111 "brx", "bs", "bss", "bua", "bug", "bum", "byn", "byv",
112 "ca", "cad", "car", "cay", "cch", "ce", "ceb", "cgg",
113 "ch", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
114 "chr", "chy", "ckb", "co", "cop", "cps", "cr", "crh",
115 "cs", "csb", "cu", "cv", "cy",
116 "da", "dak", "dar", "dav", "de", "del", "den", "dgr",
117 "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "dv",
118 "dyo", "dyu", "dz", "dzg",
119 "ebu", "ee", "efi", "egl", "egy", "eka", "el", "elx",
120 "en", "enm", "eo", "es", "esu", "et", "eu", "ewo",
121 "ext",
122 "fa", "fan", "fat", "ff", "fi", "fil", "fit", "fj",
123 "fo", "fon", "fr", "frc", "frm", "fro", "frp", "frr",
124 "frs", "fur", "fy",
125 "ga", "gaa", "gag", "gan", "gay", "gba", "gbz", "gd",
126 "gez", "gil", "gl", "glk", "gmh", "gn", "goh", "gom",
127 "gon", "gor", "got", "grb", "grc", "gsw", "gu", "guc",
128 "gur", "guz", "gv", "gwi",
129 "ha", "hai", "hak", "haw", "he", "hi", "hif", "hil",
130 "hit", "hmn", "ho", "hr", "hsb", "hsn", "ht", "hu",
131 "hup", "hy", "hz",
132 "ia", "iba", "ibb", "id", "ie", "ig", "ii", "ik",
133 "ilo", "inh", "io", "is", "it", "iu", "izh",
134 "ja", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
135 "jv",
136 "ka", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
137 "kbl", "kcg", "kde", "kea", "ken", "kfo", "kg", "kgp",
138 "kha", "kho", "khq", "khw", "ki", "kiu", "kj", "kk",
139 "kkj", "kl", "kln", "km", "kmb", "kn", "ko", "koi",
140 "kok", "kos", "kpe", "kr", "krc", "kri", "krj", "krl",
141 "kru", "ks", "ksb", "ksf", "ksh", "ku", "kum", "kut",
142 "kv", "kw", "ky",
143 "la", "lad", "lag", "lah", "lam", "lb", "lez", "lfn",
144 "lg", "li", "lij", "liv", "lkt", "lmo", "ln", "lo",
145 "lol", "loz", "lrc", "lt", "ltg", "lu", "lua", "lui",
146 "lun", "luo", "lus", "luy", "lv", "lzh", "lzz",
147 "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
148 "mdf", "mdh", "mdr", "men", "mer", "mfe", "mg", "mga",
149 "mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk",
150 "ml", "mn", "mnc", "mni", "moh", "mos", "mr", "mrj",
151 "ms", "mt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
152 "my", "mye", "myv", "mzn",
153 "na", "nan", "nap", "naq", "nb", "nd", "nds", "ne",
154 "new", "ng", "nia", "niu", "njo", "nl", "nmg", "nn",
155 "nnh", "no", "nog", "non", "nov", "nqo", "nr", "nso",
156 "nus", "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi",
157 "oc", "oj", "om", "or", "os", "osa", "ota",
158 "pa", "pag", "pal", "pam", "pap", "pau", "pcd", "pdc",
159 "pdt", "peo", "pfl", "phn", "pi", "pl", "pms", "pnt",
160 "pon", "prg", "pro", "ps", "pt",
161 "qu", "quc", "qug",
162 "raj", "rap", "rar", "rgn", "rif", "rm", "rn", "ro",
163 "rof", "rom", "rtm", "ru", "rue", "rug", "rup",
164 "rw", "rwk",
165 "sa", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
166 "sba", "sbp", "sc", "scn", "sco", "sd", "sdc", "sdh",
167 "se", "see", "seh", "sei", "sel", "ses", "sg", "sga",
168 "sgs", "shi", "shn", "shu", "si", "sid", "sk",
169 "sl", "sli", "sly", "sm", "sma", "smj", "smn", "sms",
170 "sn", "snk", "so", "sog", "sq", "sr", "srn", "srr",
171 "ss", "ssy", "st", "stq", "su", "suk", "sus", "sux",
172 "sv", "sw", "swb", "swc", "syc", "syr", "szl",
173 "ta", "tcy", "te", "tem", "teo", "ter", "tet", "tg",
174 "th", "ti", "tig", "tiv", "tk", "tkl", "tkr", "tl",
175 "tlh", "tli", "tly", "tmh", "tn", "to", "tog", "tpi",
176 "tr", "tru", "trv", "ts", "tsd", "tsi", "tt", "ttt",
177 "tum", "tvl", "tw", "twq", "ty", "tyv", "tzm",
178 "udm", "ug", "uga", "uk", "umb", "und", "ur", "uz",
179 "vai", "ve", "vec", "vep", "vi", "vls", "vmf", "vo",
180 "vot", "vro", "vun",
181 "wa", "wae", "wal", "war", "was", "wbp", "wo", "wuu",
182 "xal", "xh", "xmf", "xog",
183 "yao", "yap", "yav", "ybb", "yi", "yo", "yrl", "yue",
184 "za", "zap", "zbl", "zea", "zen", "zgh", "zh", "zu",
185 "zun", "zxx", "zza",
186 NULL,
187 "in", "iw", "ji", "jw", "sh", /* obsolete language codes */
188 NULL
189 };
190
191 static const char* const DEPRECATED_LANGUAGES[]={
192 "in", "iw", "ji", "jw", NULL, NULL
193 };
194 static const char* const REPLACEMENT_LANGUAGES[]={
195 "id", "he", "yi", "jv", NULL, NULL
196 };
197
198 /**
199 * Table of 3-letter language codes.
200 *
201 * This is a lookup table used to convert 3-letter language codes to
202 * their 2-letter equivalent, where possible. It must be kept in sync
203 * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the
204 * same language as LANGUAGES_3[i]. The commented-out lines are
205 * copied from LANGUAGES to make eyeballing this baby easier.
206 *
207 * Where a 3-letter language code has no 2-letter equivalent, the
208 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
209 *
210 * This table should be terminated with a NULL entry, followed by a
211 * second list, and another NULL entry. The two lists correspond to
212 * the two lists in LANGUAGES.
213 */
214 /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
215 /* ISO639 table version is 20150505 */
216 static const char * const LANGUAGES_3[] = {
217 "aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb",
218 "afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale",
219 "aln", "alt", "amh", "arg", "ang", "anp", "ara", "arc",
220 "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "asm",
221 "asa", "ase", "ast", "ava", "avk", "awa", "aym", "aze",
222 "bak", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
223 "bel", "bej", "bem", "bew", "bez", "bfd", "bfq", "bul",
224 "bgn", "bho", "bis", "bik", "bin", "bjn", "bkm", "bla",
225 "bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh",
226 "brx", "bos", "bss", "bua", "bug", "bum", "byn", "byv",
227 "cat", "cad", "car", "cay", "cch", "che", "ceb", "cgg",
228 "cha", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
229 "chr", "chy", "ckb", "cos", "cop", "cps", "cre", "crh",
230 "ces", "csb", "chu", "chv", "cym",
231 "dan", "dak", "dar", "dav", "deu", "del", "den", "dgr",
232 "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "div",
233 "dyo", "dyu", "dzo", "dzg",
234 "ebu", "ewe", "efi", "egl", "egy", "eka", "ell", "elx",
235 "eng", "enm", "epo", "spa", "esu", "est", "eus", "ewo",
236 "ext",
237 "fas", "fan", "fat", "ful", "fin", "fil", "fit", "fij",
238 "fao", "fon", "fra", "frc", "frm", "fro", "frp", "frr",
239 "frs", "fur", "fry",
240 "gle", "gaa", "gag", "gan", "gay", "gba", "gbz", "gla",
241 "gez", "gil", "glg", "glk", "gmh", "grn", "goh", "gom",
242 "gon", "gor", "got", "grb", "grc", "gsw", "guj", "guc",
243 "gur", "guz", "glv", "gwi",
244 "hau", "hai", "hak", "haw", "heb", "hin", "hif", "hil",
245 "hit", "hmn", "hmo", "hrv", "hsb", "hsn", "hat", "hun",
246 "hup", "hye", "her",
247 "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ipk",
248 "ilo", "inh", "ido", "isl", "ita", "iku", "izh",
249 "jpn", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
250 "jav",
251 "kat", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
252 "kbl", "kcg", "kde", "kea", "ken", "kfo", "kon", "kgp",
253 "kha", "kho", "khq", "khw", "kik", "kiu", "kua", "kaz",
254 "kkj", "kal", "kln", "khm", "kmb", "kan", "kor", "koi",
255 "kok", "kos", "kpe", "kau", "krc", "kri", "krj", "krl",
256 "kru", "kas", "ksb", "ksf", "ksh", "kur", "kum", "kut",
257 "kom", "cor", "kir",
258 "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lfn",
259 "lug", "lim", "lij", "liv", "lkt", "lmo", "lin", "lao",
260 "lol", "loz", "lrc", "lit", "ltg", "lub", "lua", "lui",
261 "lun", "luo", "lus", "luy", "lav", "lzh", "lzz",
262 "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
263 "mdf", "mdh", "mdr", "men", "mer", "mfe", "mlg", "mga",
264 "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
265 "mal", "mon", "mnc", "mni", "moh", "mos", "mar", "mrj",
266 "msa", "mlt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
267 "mya", "mye", "myv", "mzn",
268 "nau", "nan", "nap", "naq", "nob", "nde", "nds", "nep",
269 "new", "ndo", "nia", "niu", "njo", "nld", "nmg", "nno",
270 "nnh", "nor", "nog", "non", "nov", "nqo", "nbl", "nso",
271 "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi",
272 "oci", "oji", "orm", "ori", "oss", "osa", "ota",
273 "pan", "pag", "pal", "pam", "pap", "pau", "pcd", "pdc",
274 "pdt", "peo", "pfl", "phn", "pli", "pol", "pms", "pnt",
275 "pon", "prg", "pro", "pus", "por",
276 "que", "quc", "qug",
277 "raj", "rap", "rar", "rgn", "rif", "roh", "run", "ron",
278 "rof", "rom", "rtm", "rus", "rue", "rug", "rup",
279 "kin", "rwk",
280 "san", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
281 "sba", "sbp", "srd", "scn", "sco", "snd", "sdc", "sdh",
282 "sme", "see", "seh", "sei", "sel", "ses", "sag", "sga",
283 "sgs", "shi", "shn", "shu", "sin", "sid", "slk",
284 "slv", "sli", "sly", "smo", "sma", "smj", "smn", "sms",
285 "sna", "snk", "som", "sog", "sqi", "srp", "srn", "srr",
286 "ssw", "ssy", "sot", "stq", "sun", "suk", "sus", "sux",
287 "swe", "swa", "swb", "swc", "syc", "syr", "szl",
288 "tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk",
289 "tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr", "tgl",
290 "tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tpi",
291 "tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt",
292 "tum", "tvl", "twi", "twq", "tah", "tyv", "tzm",
293 "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
294 "vai", "ven", "vec", "vep", "vie", "vls", "vmf", "vol",
295 "vot", "vro", "vun",
296 "wln", "wae", "wal", "war", "was", "wbp", "wol", "wuu",
297 "xal", "xho", "xmf", "xog",
298 "yao", "yap", "yav", "ybb", "yid", "yor", "yrl", "yue",
299 "zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul",
300 "zun", "zxx", "zza",
301 NULL,
302 /* "in", "iw", "ji", "jw", "sh", */
303 "ind", "heb", "yid", "jaw", "srp",
304 NULL
305 };
306
307 /**
308 * Table of 2-letter country codes.
309 *
310 * This list must be in sorted order. This list is returned directly
311 * to the user by some API.
312 *
313 * This list must be kept in sync with COUNTRIES_3, with corresponding
314 * entries matched.
315 *
316 * This table should be terminated with a NULL entry, followed by a
317 * second list, and another NULL entry. The first list is visible to
318 * user code when this array is returned by API. The second list
319 * contains codes we support, but do not expose through user API.
320 *
321 * Notes:
322 *
323 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
324 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
325 * new codes keeping the old ones for compatibility updated to include
326 * 1999/12/03 revisions *CWB*
327 *
328 * RO(ROM) is now RO(ROU) according to
329 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
330 */
331 static const char * const COUNTRIES[] = {
332 "AC", "AD", "AE", "AF", "AG", "AI", "AL", "AM",
333 "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",
334 "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",
335 "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV",
336 "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
337 "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CP", "CR",
338 "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DG", "DJ", "DK",
339 "DM", "DO", "DZ", "EA", "EC", "EE", "EG", "EH", "ER",
340 "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
341 "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
342 "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
343 "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
344 "IC", "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
345 "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
346 "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
347 "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
348 "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK",
349 "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",
350 "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
351 "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",
352 "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",
353 "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
354 "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA",
355 "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",
356 "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV",
357 "SX", "SY", "SZ", "TA", "TC", "TD", "TF", "TG", "TH", "TJ",
358 "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
359 "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
360 "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
361 "WS", "XK", "YE", "YT", "ZA", "ZM", "ZW",
362 NULL,
363 "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR", /* obsolete country codes */
364 NULL
365 };
366
367 static const char* const DEPRECATED_COUNTRIES[] = {
368 "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */
369 };
370 static const char* const REPLACEMENT_COUNTRIES[] = {
371 /* "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
372 "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL /* replacement country codes */
373 };
374
375 /**
376 * Table of 3-letter country codes.
377 *
378 * This is a lookup table used to convert 3-letter country codes to
379 * their 2-letter equivalent. It must be kept in sync with COUNTRIES.
380 * For all valid i, COUNTRIES[i] must refer to the same country as
381 * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES
382 * to make eyeballing this baby easier.
383 *
384 * This table should be terminated with a NULL entry, followed by a
385 * second list, and another NULL entry. The two lists correspond to
386 * the two lists in COUNTRIES.
387 */
388 static const char * const COUNTRIES_3[] = {
389 /* "AC", "AD", "AE", "AF", "AG", "AI", "AL", "AM", */
390 "ASC", "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
391 /* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */
392 "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
393 /* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */
394 "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
395 /* "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", */
396 "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",
397 /* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */
398 "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
399 /* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CP", "CR", */
400 "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CPT", "CRI",
401 /* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DG", "DJ", "DK", */
402 "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DGA", "DJI", "DNK",
403 /* "DM", "DO", "DZ", "EA", "EC", "EE", "EG", "EH", "ER", */
404 "DMA", "DOM", "DZA", "EA ", "ECU", "EST", "EGY", "ESH", "ERI", /* no valid 3-letter code for EA */
405 /* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
406 "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
407 /* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
408 "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
409 /* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */
410 "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
411 /* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
412 "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
413 /* "IC", "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
414 "IC ", "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL", /* no valid 3-letter code for IC */
415 /* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
416 "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
417 /* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
418 "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
419 /* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */
420 "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
421 /* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */
422 "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
423 /* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */
424 "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
425 /* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */
426 "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
427 /* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */
428 "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
429 /* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */
430 "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
431 /* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */
432 "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
433 /* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */
434 "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
435 /* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */
436 "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
437 /* "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", */
438 "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",
439 /* "SX", "SY", "SZ", "TA", "TC", "TD", "TF", "TG", "TH", "TJ", */
440 "SXM", "SYR", "SWZ", "TAA", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
441 /* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */
442 "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
443 /* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */
444 "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
445 /* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
446 "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
447 /* "WS", "XK", "YE", "YT", "ZA", "ZM", "ZW", */
448 "WSM", "XKK", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
449 NULL,
450 /* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */
451 "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
452 NULL
453 };
454
455 typedef struct CanonicalizationMap {
456 const char *id; /* input ID */
457 const char *canonicalID; /* canonicalized output ID */
458 const char *keyword; /* keyword, or NULL if none */
459 const char *value; /* keyword value, or NULL if kw==NULL */
460 } CanonicalizationMap;
461
462 /**
463 * A map to canonicalize locale IDs. This handles a variety of
464 * different semantic kinds of transformations.
465 */
466 static const CanonicalizationMap CANONICALIZE_MAP[] = {
467 { "", "en_US_POSIX", NULL, NULL }, /* .NET name */
468 { "c", "en_US_POSIX", NULL, NULL }, /* POSIX name */
469 { "posix", "en_US_POSIX", NULL, NULL }, /* POSIX name (alias of C) */
470 { "art_LOJBAN", "jbo", NULL, NULL }, /* registered name */
471 { "az_AZ_CYRL", "az_Cyrl_AZ", NULL, NULL }, /* .NET name */
472 { "az_AZ_LATN", "az_Latn_AZ", NULL, NULL }, /* .NET name */
473 { "ca_ES_PREEURO", "ca_ES", "currency", "ESP" },
474 { "de__PHONEBOOK", "de", "collation", "phonebook" }, /* Old ICU name */
475 { "de_AT_PREEURO", "de_AT", "currency", "ATS" },
476 { "de_DE_PREEURO", "de_DE", "currency", "DEM" },
477 { "de_LU_PREEURO", "de_LU", "currency", "LUF" },
478 { "el_GR_PREEURO", "el_GR", "currency", "GRD" },
479 { "en_BE_PREEURO", "en_BE", "currency", "BEF" },
480 { "en_IE_PREEURO", "en_IE", "currency", "IEP" },
481 { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */
482 { "es_ES_PREEURO", "es_ES", "currency", "ESP" },
483 { "eu_ES_PREEURO", "eu_ES", "currency", "ESP" },
484 { "fi_FI_PREEURO", "fi_FI", "currency", "FIM" },
485 { "fr_BE_PREEURO", "fr_BE", "currency", "BEF" },
486 { "fr_FR_PREEURO", "fr_FR", "currency", "FRF" },
487 { "fr_LU_PREEURO", "fr_LU", "currency", "LUF" },
488 { "ga_IE_PREEURO", "ga_IE", "currency", "IEP" },
489 { "gl_ES_PREEURO", "gl_ES", "currency", "ESP" },
490 { "hi__DIRECT", "hi", "collation", "direct" }, /* Old ICU name */
491 { "it_IT_PREEURO", "it_IT", "currency", "ITL" },
492 { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */
493 { "nb_NO_NY", "nn_NO", NULL, NULL }, /* "markus said this was ok" :-) */
494 { "nl_BE_PREEURO", "nl_BE", "currency", "BEF" },
495 { "nl_NL_PREEURO", "nl_NL", "currency", "NLG" },
496 { "pt_PT_PREEURO", "pt_PT", "currency", "PTE" },
497 { "sr_SP_CYRL", "sr_Cyrl_RS", NULL, NULL }, /* .NET name */
498 { "sr_SP_LATN", "sr_Latn_RS", NULL, NULL }, /* .NET name */
499 { "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL, NULL }, /* Linux name */
500 { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */
501 { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL, NULL }, /* Linux name */
502 { "uz_UZ_CYRL", "uz_Cyrl_UZ", NULL, NULL }, /* .NET name */
503 { "uz_UZ_LATN", "uz_Latn_UZ", NULL, NULL }, /* .NET name */
504 { "zh_CHS", "zh_Hans", NULL, NULL }, /* .NET name */
505 { "zh_CHT", "zh_Hant", NULL, NULL }, /* .NET name */
506 { "zh_GAN", "gan", NULL, NULL }, /* registered name */
507 { "zh_GUOYU", "zh", NULL, NULL }, /* registered name */
508 { "zh_HAKKA", "hak", NULL, NULL }, /* registered name */
509 { "zh_MIN_NAN", "nan", NULL, NULL }, /* registered name */
510 { "zh_WUU", "wuu", NULL, NULL }, /* registered name */
511 { "zh_XIANG", "hsn", NULL, NULL }, /* registered name */
512 { "zh_YUE", "yue", NULL, NULL }, /* registered name */
513 };
514
515 typedef struct VariantMap {
516 const char *variant; /* input ID */
517 const char *keyword; /* keyword, or NULL if none */
518 const char *value; /* keyword value, or NULL if kw==NULL */
519 } VariantMap;
520
521 static const VariantMap VARIANT_MAP[] = {
522 { "EURO", "currency", "EUR" },
523 { "PINYIN", "collation", "pinyin" }, /* Solaris variant */
524 { "STROKE", "collation", "stroke" } /* Solaris variant */
525 };
526
527 /* ### BCP47 Conversion *******************************************/
528 /* Test if the locale id has BCP47 u extension and does not have '@' */
529 #define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
530 /* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
531 #define _ConvertBCP47(finalID, id, buffer, length,err) \
532 if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || U_FAILURE(*err)) { \
533 finalID=id; \
534 } else { \
535 finalID=buffer; \
536 }
537 /* Gets the size of the shortest subtag in the given localeID. */
538 static int32_t getShortestSubtagLength(const char *localeID) {
539 int32_t localeIDLength = uprv_strlen(localeID);
540 int32_t length = localeIDLength;
541 int32_t tmpLength = 0;
542 int32_t i;
543 UBool reset = TRUE;
544
545 for (i = 0; i < localeIDLength; i++) {
546 if (localeID[i] != '_' && localeID[i] != '-') {
547 if (reset) {
548 tmpLength = 0;
549 reset = FALSE;
550 }
551 tmpLength++;
552 } else {
553 if (tmpLength != 0 && tmpLength < length) {
554 length = tmpLength;
555 }
556 reset = TRUE;
557 }
558 }
559
560 return length;
561 }
562
563 /* ### Keywords **************************************************/
564 #define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
565 #define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )
566 /* Punctuation/symbols allowed in legacy key values */
567 #define UPRV_OK_VALUE_PUNCTUATION(c) ((c) == '_' || (c) == '-' || (c) == '+' || (c) == '/')
568
569 #define ULOC_KEYWORD_BUFFER_LEN 25
570 #define ULOC_MAX_NO_KEYWORDS 25
571
572 U_CAPI const char * U_EXPORT2
573 locale_getKeywordsStart(const char *localeID) {
574 const char *result = NULL;
575 if((result = uprv_strchr(localeID, '@')) != NULL) {
576 return result;
577 }
578 #if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
579 else {
580 /* We do this because the @ sign is variant, and the @ sign used on one
581 EBCDIC machine won't be compiled the same way on other EBCDIC based
582 machines. */
583 static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
584 const uint8_t *charToFind = ebcdicSigns;
585 while(*charToFind) {
586 if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
587 return result;
588 }
589 charToFind++;
590 }
591 }
592 #endif
593 return NULL;
594 }
595
596 /**
597 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
598 * @param keywordName incoming name to be canonicalized
599 * @param status return status (keyword too long)
600 * @return length of the keyword name
601 */
602 static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
603 {
604 int32_t keywordNameLen = 0;
605
606 for (; *keywordName != 0; keywordName++) {
607 if (!UPRV_ISALPHANUM(*keywordName)) {
608 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
609 return 0;
610 }
611 if (keywordNameLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
612 buf[keywordNameLen++] = uprv_tolower(*keywordName);
613 } else {
614 /* keyword name too long for internal buffer */
615 *status = U_INTERNAL_PROGRAM_ERROR;
616 return 0;
617 }
618 }
619 if (keywordNameLen == 0) {
620 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name */
621 return 0;
622 }
623 buf[keywordNameLen] = 0; /* terminate */
624
625 return keywordNameLen;
626 }
627
628 typedef struct {
629 char keyword[ULOC_KEYWORD_BUFFER_LEN];
630 int32_t keywordLen;
631 const char *valueStart;
632 int32_t valueLen;
633 } KeywordStruct;
634
635 static int32_t U_CALLCONV
636 compareKeywordStructs(const void * /*context*/, const void *left, const void *right) {
637 const char* leftString = ((const KeywordStruct *)left)->keyword;
638 const char* rightString = ((const KeywordStruct *)right)->keyword;
639 return uprv_strcmp(leftString, rightString);
640 }
641
642 /**
643 * Both addKeyword and addValue must already be in canonical form.
644 * Either both addKeyword and addValue are NULL, or neither is NULL.
645 * If they are not NULL they must be zero terminated.
646 * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword.
647 */
648 static int32_t
649 _getKeywords(const char *localeID,
650 char prev,
651 char *keywords, int32_t keywordCapacity,
652 char *values, int32_t valuesCapacity, int32_t *valLen,
653 UBool valuesToo,
654 const char* addKeyword,
655 const char* addValue,
656 UErrorCode *status)
657 {
658 KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
659
660 int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
661 int32_t numKeywords = 0;
662 const char* pos = localeID;
663 const char* equalSign = NULL;
664 const char* semicolon = NULL;
665 int32_t i = 0, j, n;
666 int32_t keywordsLen = 0;
667 int32_t valuesLen = 0;
668
669 if(prev == '@') { /* start of keyword definition */
670 /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
671 do {
672 UBool duplicate = FALSE;
673 /* skip leading spaces */
674 while(*pos == ' ') {
675 pos++;
676 }
677 if (!*pos) { /* handle trailing "; " */
678 break;
679 }
680 if(numKeywords == maxKeywords) {
681 *status = U_INTERNAL_PROGRAM_ERROR;
682 return 0;
683 }
684 equalSign = uprv_strchr(pos, '=');
685 semicolon = uprv_strchr(pos, ';');
686 /* lack of '=' [foo@currency] is illegal */
687 /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
688 if(!equalSign || (semicolon && semicolon<equalSign)) {
689 *status = U_INVALID_FORMAT_ERROR;
690 return 0;
691 }
692 /* need to normalize both keyword and keyword name */
693 if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
694 /* keyword name too long for internal buffer */
695 *status = U_INTERNAL_PROGRAM_ERROR;
696 return 0;
697 }
698 for(i = 0, n = 0; i < equalSign - pos; ++i) {
699 if (pos[i] != ' ') {
700 keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
701 }
702 }
703
704 /* zero-length keyword is an error. */
705 if (n == 0) {
706 *status = U_INVALID_FORMAT_ERROR;
707 return 0;
708 }
709
710 keywordList[numKeywords].keyword[n] = 0;
711 keywordList[numKeywords].keywordLen = n;
712 /* now grab the value part. First we skip the '=' */
713 equalSign++;
714 /* then we leading spaces */
715 while(*equalSign == ' ') {
716 equalSign++;
717 }
718
719 /* Premature end or zero-length value */
720 if (!*equalSign || equalSign == semicolon) {
721 *status = U_INVALID_FORMAT_ERROR;
722 return 0;
723 }
724
725 keywordList[numKeywords].valueStart = equalSign;
726
727 pos = semicolon;
728 i = 0;
729 if(pos) {
730 while(*(pos - i - 1) == ' ') {
731 i++;
732 }
733 keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
734 pos++;
735 } else {
736 i = (int32_t)uprv_strlen(equalSign);
737 while(i && equalSign[i-1] == ' ') {
738 i--;
739 }
740 keywordList[numKeywords].valueLen = i;
741 }
742 /* If this is a duplicate keyword, then ignore it */
743 for (j=0; j<numKeywords; ++j) {
744 if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
745 duplicate = TRUE;
746 break;
747 }
748 }
749 if (!duplicate) {
750 ++numKeywords;
751 }
752 } while(pos);
753
754 /* Handle addKeyword/addValue. */
755 if (addKeyword != NULL) {
756 UBool duplicate = FALSE;
757 U_ASSERT(addValue != NULL);
758 /* Search for duplicate; if found, do nothing. Explicit keyword
759 overrides addKeyword. */
760 for (j=0; j<numKeywords; ++j) {
761 if (uprv_strcmp(keywordList[j].keyword, addKeyword) == 0) {
762 duplicate = TRUE;
763 break;
764 }
765 }
766 if (!duplicate) {
767 if (numKeywords == maxKeywords) {
768 *status = U_INTERNAL_PROGRAM_ERROR;
769 return 0;
770 }
771 uprv_strcpy(keywordList[numKeywords].keyword, addKeyword);
772 keywordList[numKeywords].keywordLen = (int32_t)uprv_strlen(addKeyword);
773 keywordList[numKeywords].valueStart = addValue;
774 keywordList[numKeywords].valueLen = (int32_t)uprv_strlen(addValue);
775 ++numKeywords;
776 }
777 } else {
778 U_ASSERT(addValue == NULL);
779 }
780
781 /* now we have a list of keywords */
782 /* we need to sort it */
783 uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
784
785 /* Now construct the keyword part */
786 for(i = 0; i < numKeywords; i++) {
787 if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) {
788 uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword);
789 if(valuesToo) {
790 keywords[keywordsLen + keywordList[i].keywordLen] = '=';
791 } else {
792 keywords[keywordsLen + keywordList[i].keywordLen] = 0;
793 }
794 }
795 keywordsLen += keywordList[i].keywordLen + 1;
796 if(valuesToo) {
797 if(keywordsLen + keywordList[i].valueLen < keywordCapacity) {
798 uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
799 }
800 keywordsLen += keywordList[i].valueLen;
801
802 if(i < numKeywords - 1) {
803 if(keywordsLen < keywordCapacity) {
804 keywords[keywordsLen] = ';';
805 }
806 keywordsLen++;
807 }
808 }
809 if(values) {
810 if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) {
811 uprv_strcpy(values+valuesLen, keywordList[i].valueStart);
812 values[valuesLen + keywordList[i].valueLen] = 0;
813 }
814 valuesLen += keywordList[i].valueLen + 1;
815 }
816 }
817 if(values) {
818 values[valuesLen] = 0;
819 if(valLen) {
820 *valLen = valuesLen;
821 }
822 }
823 return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);
824 } else {
825 return 0;
826 }
827 }
828
829 U_CFUNC int32_t
830 locale_getKeywords(const char *localeID,
831 char prev,
832 char *keywords, int32_t keywordCapacity,
833 char *values, int32_t valuesCapacity, int32_t *valLen,
834 UBool valuesToo,
835 UErrorCode *status) {
836 return _getKeywords(localeID, prev, keywords, keywordCapacity,
837 values, valuesCapacity, valLen, valuesToo,
838 NULL, NULL, status);
839 }
840
841 U_CAPI int32_t U_EXPORT2
842 uloc_getKeywordValue(const char* localeID,
843 const char* keywordName,
844 char* buffer, int32_t bufferCapacity,
845 UErrorCode* status)
846 {
847 const char* startSearchHere = NULL;
848 const char* nextSeparator = NULL;
849 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
850 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
851 int32_t result = 0;
852
853 if(status && U_SUCCESS(*status) && localeID) {
854 char tempBuffer[ULOC_FULLNAME_CAPACITY];
855 const char* tmpLocaleID;
856
857 if (keywordName == NULL || keywordName[0] == 0) {
858 *status = U_ILLEGAL_ARGUMENT_ERROR;
859 return 0;
860 }
861
862 locale_canonKeywordName(keywordNameBuffer, keywordName, status);
863 if(U_FAILURE(*status)) {
864 return 0;
865 }
866
867 if (_hasBCP47Extension(localeID)) {
868 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
869 } else {
870 tmpLocaleID=localeID;
871 }
872
873 startSearchHere = locale_getKeywordsStart(tmpLocaleID);
874 if(startSearchHere == NULL) {
875 /* no keywords, return at once */
876 return 0;
877 }
878
879 /* find the first keyword */
880 while(startSearchHere) {
881 const char* keyValueTail;
882 int32_t keyValueLen;
883
884 startSearchHere++; /* skip @ or ; */
885 nextSeparator = uprv_strchr(startSearchHere, '=');
886 if(!nextSeparator) {
887 *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
888 return 0;
889 }
890 /* strip leading & trailing spaces (TC decided to tolerate these) */
891 while(*startSearchHere == ' ') {
892 startSearchHere++;
893 }
894 keyValueTail = nextSeparator;
895 while (keyValueTail > startSearchHere && *(keyValueTail-1) == ' ') {
896 keyValueTail--;
897 }
898 /* now keyValueTail points to first char after the keyName */
899 /* copy & normalize keyName from locale */
900 if (startSearchHere == keyValueTail) {
901 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
902 return 0;
903 }
904 keyValueLen = 0;
905 while (startSearchHere < keyValueTail) {
906 if (!UPRV_ISALPHANUM(*startSearchHere)) {
907 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
908 return 0;
909 }
910 if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
911 localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*startSearchHere++);
912 } else {
913 /* keyword name too long for internal buffer */
914 *status = U_INTERNAL_PROGRAM_ERROR;
915 return 0;
916 }
917 }
918 localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
919
920 startSearchHere = uprv_strchr(nextSeparator, ';');
921
922 if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
923 /* current entry matches the keyword. */
924 nextSeparator++; /* skip '=' */
925 /* First strip leading & trailing spaces (TC decided to tolerate these) */
926 while(*nextSeparator == ' ') {
927 nextSeparator++;
928 }
929 keyValueTail = (startSearchHere)? startSearchHere: nextSeparator + uprv_strlen(nextSeparator);
930 while(keyValueTail > nextSeparator && *(keyValueTail-1) == ' ') {
931 keyValueTail--;
932 }
933 /* Now copy the value, but check well-formedness */
934 if (nextSeparator == keyValueTail) {
935 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value name in passed-in locale */
936 return 0;
937 }
938 keyValueLen = 0;
939 while (nextSeparator < keyValueTail) {
940 if (!UPRV_ISALPHANUM(*nextSeparator) && !UPRV_OK_VALUE_PUNCTUATION(*nextSeparator)) {
941 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
942 return 0;
943 }
944 if (keyValueLen < bufferCapacity) {
945 /* Should we lowercase value to return here? Tests expect as-is. */
946 buffer[keyValueLen++] = *nextSeparator++;
947 } else { /* keep advancing so we return correct length in case of overflow */
948 keyValueLen++;
949 nextSeparator++;
950 }
951 }
952 result = u_terminateChars(buffer, bufferCapacity, keyValueLen, status);
953 return result;
954 }
955 }
956 }
957 return 0;
958 }
959
960 U_CAPI int32_t U_EXPORT2
961 uloc_setKeywordValue(const char* keywordName,
962 const char* keywordValue,
963 char* buffer, int32_t bufferCapacity,
964 UErrorCode* status)
965 {
966 /* TODO: sorting. removal. */
967 int32_t keywordNameLen;
968 int32_t keywordValueLen;
969 int32_t bufLen;
970 int32_t needLen = 0;
971 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
972 char keywordValueBuffer[ULOC_KEYWORDS_CAPACITY+1];
973 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
974 int32_t rc;
975 char* nextSeparator = NULL;
976 char* nextEqualsign = NULL;
977 char* startSearchHere = NULL;
978 char* keywordStart = NULL;
979 CharString updatedKeysAndValues;
980 int32_t updatedKeysAndValuesLen;
981 UBool handledInputKeyAndValue = FALSE;
982 char keyValuePrefix = '@';
983
984 if(U_FAILURE(*status)) {
985 return -1;
986 }
987 if (keywordName == NULL || keywordName[0] == 0 || bufferCapacity <= 1) {
988 *status = U_ILLEGAL_ARGUMENT_ERROR;
989 return 0;
990 }
991 bufLen = (int32_t)uprv_strlen(buffer);
992 if(bufferCapacity<bufLen) {
993 /* The capacity is less than the length?! Is this NULL terminated? */
994 *status = U_ILLEGAL_ARGUMENT_ERROR;
995 return 0;
996 }
997 keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
998 if(U_FAILURE(*status)) {
999 return 0;
1000 }
1001
1002 keywordValueLen = 0;
1003 if(keywordValue) {
1004 while (*keywordValue != 0) {
1005 if (!UPRV_ISALPHANUM(*keywordValue) && !UPRV_OK_VALUE_PUNCTUATION(*keywordValue)) {
1006 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
1007 return 0;
1008 }
1009 if (keywordValueLen < ULOC_KEYWORDS_CAPACITY) {
1010 /* Should we force lowercase in value to set? */
1011 keywordValueBuffer[keywordValueLen++] = *keywordValue++;
1012 } else {
1013 /* keywordValue too long for internal buffer */
1014 *status = U_INTERNAL_PROGRAM_ERROR;
1015 return 0;
1016 }
1017 }
1018 }
1019 keywordValueBuffer[keywordValueLen] = 0; /* terminate */
1020
1021 startSearchHere = (char*)locale_getKeywordsStart(buffer);
1022 if(startSearchHere == NULL || (startSearchHere[1]==0)) {
1023 if(keywordValueLen == 0) { /* no keywords = nothing to remove */
1024 return bufLen;
1025 }
1026
1027 needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
1028 if(startSearchHere) { /* had a single @ */
1029 needLen--; /* already had the @ */
1030 /* startSearchHere points at the @ */
1031 } else {
1032 startSearchHere=buffer+bufLen;
1033 }
1034 if(needLen >= bufferCapacity) {
1035 *status = U_BUFFER_OVERFLOW_ERROR;
1036 return needLen; /* no change */
1037 }
1038 *startSearchHere++ = '@';
1039 uprv_strcpy(startSearchHere, keywordNameBuffer);
1040 startSearchHere += keywordNameLen;
1041 *startSearchHere++ = '=';
1042 uprv_strcpy(startSearchHere, keywordValueBuffer);
1043 return needLen;
1044 } /* end shortcut - no @ */
1045
1046 keywordStart = startSearchHere;
1047 /* search for keyword */
1048 while(keywordStart) {
1049 const char* keyValueTail;
1050 int32_t keyValueLen;
1051
1052 keywordStart++; /* skip @ or ; */
1053 nextEqualsign = uprv_strchr(keywordStart, '=');
1054 if (!nextEqualsign) {
1055 *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
1056 return 0;
1057 }
1058 /* strip leading & trailing spaces (TC decided to tolerate these) */
1059 while(*keywordStart == ' ') {
1060 keywordStart++;
1061 }
1062 keyValueTail = nextEqualsign;
1063 while (keyValueTail > keywordStart && *(keyValueTail-1) == ' ') {
1064 keyValueTail--;
1065 }
1066 /* now keyValueTail points to first char after the keyName */
1067 /* copy & normalize keyName from locale */
1068 if (keywordStart == keyValueTail) {
1069 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
1070 return 0;
1071 }
1072 keyValueLen = 0;
1073 while (keywordStart < keyValueTail) {
1074 if (!UPRV_ISALPHANUM(*keywordStart)) {
1075 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
1076 return 0;
1077 }
1078 if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
1079 localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*keywordStart++);
1080 } else {
1081 /* keyword name too long for internal buffer */
1082 *status = U_INTERNAL_PROGRAM_ERROR;
1083 return 0;
1084 }
1085 }
1086 localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
1087
1088 nextSeparator = uprv_strchr(nextEqualsign, ';');
1089
1090 /* start processing the value part */
1091 nextEqualsign++; /* skip '=' */
1092 /* First strip leading & trailing spaces (TC decided to tolerate these) */
1093 while(*nextEqualsign == ' ') {
1094 nextEqualsign++;
1095 }
1096 keyValueTail = (nextSeparator)? nextSeparator: nextEqualsign + uprv_strlen(nextEqualsign);
1097 while(keyValueTail > nextEqualsign && *(keyValueTail-1) == ' ') {
1098 keyValueTail--;
1099 }
1100 if (nextEqualsign == keyValueTail) {
1101 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value in passed-in locale */
1102 return 0;
1103 }
1104
1105 rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
1106 if(rc == 0) {
1107 /* Current entry matches the input keyword. Update the entry */
1108 if(keywordValueLen > 0) { /* updating a value */
1109 updatedKeysAndValues.append(keyValuePrefix, *status);
1110 keyValuePrefix = ';'; /* for any subsequent key-value pair */
1111 updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1112 updatedKeysAndValues.append('=', *status);
1113 updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1114 } /* else removing this entry, don't emit anything */
1115 handledInputKeyAndValue = TRUE;
1116 } else {
1117 /* input keyword sorts earlier than current entry, add before current entry */
1118 if (rc < 0 && keywordValueLen > 0 && !handledInputKeyAndValue) {
1119 /* insert new entry at this location */
1120 updatedKeysAndValues.append(keyValuePrefix, *status);
1121 keyValuePrefix = ';'; /* for any subsequent key-value pair */
1122 updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1123 updatedKeysAndValues.append('=', *status);
1124 updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1125 handledInputKeyAndValue = TRUE;
1126 }
1127 /* copy the current entry */
1128 updatedKeysAndValues.append(keyValuePrefix, *status);
1129 keyValuePrefix = ';'; /* for any subsequent key-value pair */
1130 updatedKeysAndValues.append(localeKeywordNameBuffer, keyValueLen, *status);
1131 updatedKeysAndValues.append('=', *status);
1132 updatedKeysAndValues.append(nextEqualsign, keyValueTail-nextEqualsign, *status);
1133 }
1134 if (!nextSeparator && keywordValueLen > 0 && !handledInputKeyAndValue) {
1135 /* append new entry at the end, it sorts later than existing entries */
1136 updatedKeysAndValues.append(keyValuePrefix, *status);
1137 /* skip keyValuePrefix update, no subsequent key-value pair */
1138 updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1139 updatedKeysAndValues.append('=', *status);
1140 updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1141 handledInputKeyAndValue = TRUE;
1142 }
1143 keywordStart = nextSeparator;
1144 } /* end loop searching */
1145
1146 /* Any error from updatedKeysAndValues.append above would be internal and not due to
1147 * problems with the passed-in locale. So if we did encounter problems with the
1148 * passed-in locale above, those errors took precedence and overrode any error
1149 * status from updatedKeysAndValues.append, and also caused a return of 0. If there
1150 * are errors here they are from updatedKeysAndValues.append; they do cause an
1151 * error return but the passed-in locale is unmodified and the original bufLen is
1152 * returned.
1153 */
1154 if (!handledInputKeyAndValue || U_FAILURE(*status)) {
1155 /* if input key/value specified removal of a keyword not present in locale, or
1156 * there was an error in CharString.append, leave original locale alone. */
1157 return bufLen;
1158 }
1159
1160 updatedKeysAndValuesLen = updatedKeysAndValues.length();
1161 /* needLen = length of the part before '@' + length of updated key-value part including '@' */
1162 needLen = (int32_t)(startSearchHere - buffer) + updatedKeysAndValuesLen;
1163 if(needLen >= bufferCapacity) {
1164 *status = U_BUFFER_OVERFLOW_ERROR;
1165 return needLen; /* no change */
1166 }
1167 if (updatedKeysAndValuesLen > 0) {
1168 uprv_strncpy(startSearchHere, updatedKeysAndValues.data(), updatedKeysAndValuesLen);
1169 }
1170 buffer[needLen]=0;
1171 return needLen;
1172 }
1173
1174 /* ### ID parsing implementation **************************************************/
1175
1176 #define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
1177
1178 /*returns TRUE if one of the special prefixes is here (s=string)
1179 'x-' or 'i-' */
1180 #define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1181
1182 /* Dot terminates it because of POSIX form where dot precedes the codepage
1183 * except for variant
1184 */
1185 #define _isTerminator(a) ((a==0)||(a=='.')||(a=='@'))
1186
1187 static char* _strnchr(const char* str, int32_t len, char c) {
1188 U_ASSERT(str != 0 && len >= 0);
1189 while (len-- != 0) {
1190 char d = *str;
1191 if (d == c) {
1192 return (char*) str;
1193 } else if (d == 0) {
1194 break;
1195 }
1196 ++str;
1197 }
1198 return NULL;
1199 }
1200
1201 /**
1202 * Lookup 'key' in the array 'list'. The array 'list' should contain
1203 * a NULL entry, followed by more entries, and a second NULL entry.
1204 *
1205 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1206 * COUNTRIES_3.
1207 */
1208 static int16_t _findIndex(const char* const* list, const char* key)
1209 {
1210 const char* const* anchor = list;
1211 int32_t pass = 0;
1212
1213 /* Make two passes through two NULL-terminated arrays at 'list' */
1214 while (pass++ < 2) {
1215 while (*list) {
1216 if (uprv_strcmp(key, *list) == 0) {
1217 return (int16_t)(list - anchor);
1218 }
1219 list++;
1220 }
1221 ++list; /* skip final NULL *CWB*/
1222 }
1223 return -1;
1224 }
1225
1226 /* count the length of src while copying it to dest; return strlen(src) */
1227 static inline int32_t
1228 _copyCount(char *dest, int32_t destCapacity, const char *src) {
1229 const char *anchor;
1230 char c;
1231
1232 anchor=src;
1233 for(;;) {
1234 if((c=*src)==0) {
1235 return (int32_t)(src-anchor);
1236 }
1237 if(destCapacity<=0) {
1238 return (int32_t)((src-anchor)+uprv_strlen(src));
1239 }
1240 ++src;
1241 *dest++=c;
1242 --destCapacity;
1243 }
1244 }
1245
1246 U_CFUNC const char*
1247 uloc_getCurrentCountryID(const char* oldID){
1248 int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
1249 if (offset >= 0) {
1250 return REPLACEMENT_COUNTRIES[offset];
1251 }
1252 return oldID;
1253 }
1254 U_CFUNC const char*
1255 uloc_getCurrentLanguageID(const char* oldID){
1256 int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
1257 if (offset >= 0) {
1258 return REPLACEMENT_LANGUAGES[offset];
1259 }
1260 return oldID;
1261 }
1262 /*
1263 * the internal functions _getLanguage(), _getCountry(), _getVariant()
1264 * avoid duplicating code to handle the earlier locale ID pieces
1265 * in the functions for the later ones by
1266 * setting the *pEnd pointer to where they stopped parsing
1267 *
1268 * TODO try to use this in Locale
1269 */
1270 U_CFUNC int32_t
1271 ulocimp_getLanguage(const char *localeID,
1272 char *language, int32_t languageCapacity,
1273 const char **pEnd) {
1274 int32_t i=0;
1275 int32_t offset;
1276 char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
1277
1278 /* if it starts with i- or x- then copy that prefix */
1279 if(_isIDPrefix(localeID)) {
1280 if(i<languageCapacity) {
1281 language[i]=(char)uprv_tolower(*localeID);
1282 }
1283 if(i<languageCapacity) {
1284 language[i+1]='-';
1285 }
1286 i+=2;
1287 localeID+=2;
1288 }
1289
1290 /* copy the language as far as possible and count its length */
1291 while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
1292 if(i<languageCapacity) {
1293 language[i]=(char)uprv_tolower(*localeID);
1294 }
1295 if(i<3) {
1296 U_ASSERT(i>=0);
1297 lang[i]=(char)uprv_tolower(*localeID);
1298 }
1299 i++;
1300 localeID++;
1301 }
1302
1303 if(i==3) {
1304 /* convert 3 character code to 2 character code if possible *CWB*/
1305 offset=_findIndex(LANGUAGES_3, lang);
1306 if(offset>=0) {
1307 i=_copyCount(language, languageCapacity, LANGUAGES[offset]);
1308 }
1309 }
1310
1311 if(pEnd!=NULL) {
1312 *pEnd=localeID;
1313 }
1314 return i;
1315 }
1316
1317 U_CFUNC int32_t
1318 ulocimp_getScript(const char *localeID,
1319 char *script, int32_t scriptCapacity,
1320 const char **pEnd)
1321 {
1322 int32_t idLen = 0;
1323
1324 if (pEnd != NULL) {
1325 *pEnd = localeID;
1326 }
1327
1328 /* copy the second item as far as possible and count its length */
1329 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])
1330 && uprv_isASCIILetter(localeID[idLen])) {
1331 idLen++;
1332 }
1333
1334 /* If it's exactly 4 characters long, then it's a script and not a country. */
1335 if (idLen == 4) {
1336 int32_t i;
1337 if (pEnd != NULL) {
1338 *pEnd = localeID+idLen;
1339 }
1340 if(idLen > scriptCapacity) {
1341 idLen = scriptCapacity;
1342 }
1343 if (idLen >= 1) {
1344 script[0]=(char)uprv_toupper(*(localeID++));
1345 }
1346 for (i = 1; i < idLen; i++) {
1347 script[i]=(char)uprv_tolower(*(localeID++));
1348 }
1349 }
1350 else {
1351 idLen = 0;
1352 }
1353 return idLen;
1354 }
1355
1356 U_CFUNC int32_t
1357 ulocimp_getCountry(const char *localeID,
1358 char *country, int32_t countryCapacity,
1359 const char **pEnd)
1360 {
1361 int32_t idLen=0;
1362 char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 };
1363 int32_t offset;
1364
1365 /* copy the country as far as possible and count its length */
1366 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
1367 if(idLen<(ULOC_COUNTRY_CAPACITY-1)) { /*CWB*/
1368 cnty[idLen]=(char)uprv_toupper(localeID[idLen]);
1369 }
1370 idLen++;
1371 }
1372
1373 /* the country should be either length 2 or 3 */
1374 if (idLen == 2 || idLen == 3) {
1375 UBool gotCountry = FALSE;
1376 /* convert 3 character code to 2 character code if possible *CWB*/
1377 if(idLen==3) {
1378 offset=_findIndex(COUNTRIES_3, cnty);
1379 if(offset>=0) {
1380 idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]);
1381 gotCountry = TRUE;
1382 }
1383 }
1384 if (!gotCountry) {
1385 int32_t i = 0;
1386 for (i = 0; i < idLen; i++) {
1387 if (i < countryCapacity) {
1388 country[i]=(char)uprv_toupper(localeID[i]);
1389 }
1390 }
1391 }
1392 localeID+=idLen;
1393 } else {
1394 idLen = 0;
1395 }
1396
1397 if(pEnd!=NULL) {
1398 *pEnd=localeID;
1399 }
1400
1401 return idLen;
1402 }
1403
1404 /**
1405 * @param needSeparator if true, then add leading '_' if any variants
1406 * are added to 'variant'
1407 */
1408 static int32_t
1409 _getVariantEx(const char *localeID,
1410 char prev,
1411 char *variant, int32_t variantCapacity,
1412 UBool needSeparator) {
1413 int32_t i=0;
1414
1415 /* get one or more variant tags and separate them with '_' */
1416 if(_isIDSeparator(prev)) {
1417 /* get a variant string after a '-' or '_' */
1418 while(!_isTerminator(*localeID)) {
1419 if (needSeparator) {
1420 if (i<variantCapacity) {
1421 variant[i] = '_';
1422 }
1423 ++i;
1424 needSeparator = FALSE;
1425 }
1426 if(i<variantCapacity) {
1427 variant[i]=(char)uprv_toupper(*localeID);
1428 if(variant[i]=='-') {
1429 variant[i]='_';
1430 }
1431 }
1432 i++;
1433 localeID++;
1434 }
1435 }
1436
1437 /* if there is no variant tag after a '-' or '_' then look for '@' */
1438 if(i==0) {
1439 if(prev=='@') {
1440 /* keep localeID */
1441 } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
1442 ++localeID; /* point after the '@' */
1443 } else {
1444 return 0;
1445 }
1446 while(!_isTerminator(*localeID)) {
1447 if (needSeparator) {
1448 if (i<variantCapacity) {
1449 variant[i] = '_';
1450 }
1451 ++i;
1452 needSeparator = FALSE;
1453 }
1454 if(i<variantCapacity) {
1455 variant[i]=(char)uprv_toupper(*localeID);
1456 if(variant[i]=='-' || variant[i]==',') {
1457 variant[i]='_';
1458 }
1459 }
1460 i++;
1461 localeID++;
1462 }
1463 }
1464
1465 return i;
1466 }
1467
1468 static int32_t
1469 _getVariant(const char *localeID,
1470 char prev,
1471 char *variant, int32_t variantCapacity) {
1472 return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE);
1473 }
1474
1475 /**
1476 * Delete ALL instances of a variant from the given list of one or
1477 * more variants. Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".
1478 * @param variants the source string of one or more variants,
1479 * separated by '_'. This will be MODIFIED IN PLACE. Not zero
1480 * terminated; if it is, trailing zero will NOT be maintained.
1481 * @param variantsLen length of variants
1482 * @param toDelete variant to delete, without separators, e.g. "EURO"
1483 * or "PREEURO"; not zero terminated
1484 * @param toDeleteLen length of toDelete
1485 * @return number of characters deleted from variants
1486 */
1487 static int32_t
1488 _deleteVariant(char* variants, int32_t variantsLen,
1489 const char* toDelete, int32_t toDeleteLen)
1490 {
1491 int32_t delta = 0; /* number of chars deleted */
1492 for (;;) {
1493 UBool flag = FALSE;
1494 if (variantsLen < toDeleteLen) {
1495 return delta;
1496 }
1497 if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 &&
1498 (variantsLen == toDeleteLen ||
1499 (flag=(variants[toDeleteLen] == '_'))))
1500 {
1501 int32_t d = toDeleteLen + (flag?1:0);
1502 variantsLen -= d;
1503 delta += d;
1504 if (variantsLen > 0) {
1505 uprv_memmove(variants, variants+d, variantsLen);
1506 }
1507 } else {
1508 char* p = _strnchr(variants, variantsLen, '_');
1509 if (p == NULL) {
1510 return delta;
1511 }
1512 ++p;
1513 variantsLen -= (int32_t)(p - variants);
1514 variants = p;
1515 }
1516 }
1517 }
1518
1519 /* Keyword enumeration */
1520
1521 typedef struct UKeywordsContext {
1522 char* keywords;
1523 char* current;
1524 } UKeywordsContext;
1525
1526 U_CDECL_BEGIN
1527
1528 static void U_CALLCONV
1529 uloc_kw_closeKeywords(UEnumeration *enumerator) {
1530 uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
1531 uprv_free(enumerator->context);
1532 uprv_free(enumerator);
1533 }
1534
1535 static int32_t U_CALLCONV
1536 uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) {
1537 char *kw = ((UKeywordsContext *)en->context)->keywords;
1538 int32_t result = 0;
1539 while(*kw) {
1540 result++;
1541 kw += uprv_strlen(kw)+1;
1542 }
1543 return result;
1544 }
1545
1546 static const char * U_CALLCONV
1547 uloc_kw_nextKeyword(UEnumeration* en,
1548 int32_t* resultLength,
1549 UErrorCode* /*status*/) {
1550 const char* result = ((UKeywordsContext *)en->context)->current;
1551 int32_t len = 0;
1552 if(*result) {
1553 len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
1554 ((UKeywordsContext *)en->context)->current += len+1;
1555 } else {
1556 result = NULL;
1557 }
1558 if (resultLength) {
1559 *resultLength = len;
1560 }
1561 return result;
1562 }
1563
1564 static void U_CALLCONV
1565 uloc_kw_resetKeywords(UEnumeration* en,
1566 UErrorCode* /*status*/) {
1567 ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
1568 }
1569
1570 U_CDECL_END
1571
1572
1573 static const UEnumeration gKeywordsEnum = {
1574 NULL,
1575 NULL,
1576 uloc_kw_closeKeywords,
1577 uloc_kw_countKeywords,
1578 uenum_unextDefault,
1579 uloc_kw_nextKeyword,
1580 uloc_kw_resetKeywords
1581 };
1582
1583 U_CAPI UEnumeration* U_EXPORT2
1584 uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
1585 {
1586 UKeywordsContext *myContext = NULL;
1587 UEnumeration *result = NULL;
1588
1589 if(U_FAILURE(*status)) {
1590 return NULL;
1591 }
1592 result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
1593 /* Null pointer test */
1594 if (result == NULL) {
1595 *status = U_MEMORY_ALLOCATION_ERROR;
1596 return NULL;
1597 }
1598 uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration));
1599 myContext = static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext)));
1600 if (myContext == NULL) {
1601 *status = U_MEMORY_ALLOCATION_ERROR;
1602 uprv_free(result);
1603 return NULL;
1604 }
1605 myContext->keywords = (char *)uprv_malloc(keywordListSize+1);
1606 uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
1607 myContext->keywords[keywordListSize] = 0;
1608 myContext->current = myContext->keywords;
1609 result->context = myContext;
1610 return result;
1611 }
1612
1613 U_CAPI UEnumeration* U_EXPORT2
1614 uloc_openKeywords(const char* localeID,
1615 UErrorCode* status)
1616 {
1617 int32_t i=0;
1618 char keywords[256];
1619 int32_t keywordsCapacity = 256;
1620 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1621 const char* tmpLocaleID;
1622
1623 if(status==NULL || U_FAILURE(*status)) {
1624 return 0;
1625 }
1626
1627 if (_hasBCP47Extension(localeID)) {
1628 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
1629 } else {
1630 if (localeID==NULL) {
1631 localeID=uloc_getDefault();
1632 }
1633 tmpLocaleID=localeID;
1634 }
1635
1636 /* Skip the language */
1637 ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
1638 if(_isIDSeparator(*tmpLocaleID)) {
1639 const char *scriptID;
1640 /* Skip the script if available */
1641 ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
1642 if(scriptID != tmpLocaleID+1) {
1643 /* Found optional script */
1644 tmpLocaleID = scriptID;
1645 }
1646 /* Skip the Country */
1647 if (_isIDSeparator(*tmpLocaleID)) {
1648 ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &tmpLocaleID);
1649 if(_isIDSeparator(*tmpLocaleID)) {
1650 _getVariant(tmpLocaleID+1, *tmpLocaleID, NULL, 0);
1651 }
1652 }
1653 }
1654
1655 /* keywords are located after '@' */
1656 if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {
1657 i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status);
1658 }
1659
1660 if(i) {
1661 return uloc_openKeywordList(keywords, i, status);
1662 } else {
1663 return NULL;
1664 }
1665 }
1666
1667
1668 /* bit-flags for 'options' parameter of _canonicalize */
1669 #define _ULOC_STRIP_KEYWORDS 0x2
1670 #define _ULOC_CANONICALIZE 0x1
1671
1672 #define OPTION_SET(options, mask) ((options & mask) != 0)
1673
1674 static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
1675 #define I_DEFAULT_LENGTH UPRV_LENGTHOF(i_default)
1676
1677 /**
1678 * Canonicalize the given localeID, to level 1 or to level 2,
1679 * depending on the options. To specify level 1, pass in options=0.
1680 * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1681 *
1682 * This is the code underlying uloc_getName and uloc_canonicalize.
1683 */
1684 static int32_t
1685 _canonicalize(const char* localeID,
1686 char* result,
1687 int32_t resultCapacity,
1688 uint32_t options,
1689 UErrorCode* err) {
1690 int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity;
1691 char localeBuffer[ULOC_FULLNAME_CAPACITY];
1692 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1693 const char* origLocaleID;
1694 const char* tmpLocaleID;
1695 const char* keywordAssign = NULL;
1696 const char* separatorIndicator = NULL;
1697 const char* addKeyword = NULL;
1698 const char* addValue = NULL;
1699 char* name;
1700 char* variant = NULL; /* pointer into name, or NULL */
1701
1702 if (U_FAILURE(*err)) {
1703 return 0;
1704 }
1705
1706 if (_hasBCP47Extension(localeID)) {
1707 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1708 } else {
1709 if (localeID==NULL) {
1710 localeID=uloc_getDefault();
1711 }
1712 tmpLocaleID=localeID;
1713 }
1714
1715 origLocaleID=tmpLocaleID;
1716
1717 /* if we are doing a full canonicalization, then put results in
1718 localeBuffer, if necessary; otherwise send them to result. */
1719 if (/*OPTION_SET(options, _ULOC_CANONICALIZE) &&*/
1720 (result == NULL || resultCapacity < (int32_t)sizeof(localeBuffer))) {
1721 name = localeBuffer;
1722 nameCapacity = (int32_t)sizeof(localeBuffer);
1723 } else {
1724 name = result;
1725 nameCapacity = resultCapacity;
1726 }
1727
1728 /* get all pieces, one after another, and separate with '_' */
1729 len=ulocimp_getLanguage(tmpLocaleID, name, nameCapacity, &tmpLocaleID);
1730
1731 if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) {
1732 const char *d = uloc_getDefault();
1733
1734 len = (int32_t)uprv_strlen(d);
1735
1736 if (name != NULL) {
1737 uprv_strncpy(name, d, len);
1738 }
1739 } else if(_isIDSeparator(*tmpLocaleID)) {
1740 const char *scriptID;
1741
1742 ++fieldCount;
1743 if(len<nameCapacity) {
1744 name[len]='_';
1745 }
1746 ++len;
1747
1748 scriptSize=ulocimp_getScript(tmpLocaleID+1,
1749 (len<nameCapacity ? name+len : NULL), nameCapacity-len, &scriptID);
1750 if(scriptSize > 0) {
1751 /* Found optional script */
1752 tmpLocaleID = scriptID;
1753 ++fieldCount;
1754 len+=scriptSize;
1755 if (_isIDSeparator(*tmpLocaleID)) {
1756 /* If there is something else, then we add the _ */
1757 if(len<nameCapacity) {
1758 name[len]='_';
1759 }
1760 ++len;
1761 }
1762 }
1763
1764 if (_isIDSeparator(*tmpLocaleID)) {
1765 const char *cntryID;
1766 int32_t cntrySize = ulocimp_getCountry(tmpLocaleID+1,
1767 (len<nameCapacity ? name+len : NULL), nameCapacity-len, &cntryID);
1768 if (cntrySize > 0) {
1769 /* Found optional country */
1770 tmpLocaleID = cntryID;
1771 len+=cntrySize;
1772 }
1773 if(_isIDSeparator(*tmpLocaleID)) {
1774 /* If there is something else, then we add the _ if we found country before. */
1775 if (cntrySize >= 0 && ! _isIDSeparator(*(tmpLocaleID+1)) ) {
1776 ++fieldCount;
1777 if(len<nameCapacity) {
1778 name[len]='_';
1779 }
1780 ++len;
1781 }
1782
1783 variantSize = _getVariant(tmpLocaleID+1, *tmpLocaleID,
1784 (len<nameCapacity ? name+len : NULL), nameCapacity-len);
1785 if (variantSize > 0) {
1786 variant = len<nameCapacity ? name+len : NULL;
1787 len += variantSize;
1788 tmpLocaleID += variantSize + 1; /* skip '_' and variant */
1789 }
1790 }
1791 }
1792 }
1793
1794 /* Copy POSIX-style charset specifier, if any [mr.utf8] */
1795 if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
1796 UBool done = FALSE;
1797 do {
1798 char c = *tmpLocaleID;
1799 switch (c) {
1800 case 0:
1801 case '@':
1802 done = TRUE;
1803 break;
1804 default:
1805 if (len<nameCapacity) {
1806 name[len] = c;
1807 }
1808 ++len;
1809 ++tmpLocaleID;
1810 break;
1811 }
1812 } while (!done);
1813 }
1814
1815 /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
1816 After this, tmpLocaleID either points to '@' or is NULL */
1817 if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {
1818 keywordAssign = uprv_strchr(tmpLocaleID, '=');
1819 separatorIndicator = uprv_strchr(tmpLocaleID, ';');
1820 }
1821
1822 /* Copy POSIX-style variant, if any [mr@FOO] */
1823 if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
1824 tmpLocaleID != NULL && keywordAssign == NULL) {
1825 for (;;) {
1826 char c = *tmpLocaleID;
1827 if (c == 0) {
1828 break;
1829 }
1830 if (len<nameCapacity) {
1831 name[len] = c;
1832 }
1833 ++len;
1834 ++tmpLocaleID;
1835 }
1836 }
1837
1838 if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
1839 /* Handle @FOO variant if @ is present and not followed by = */
1840 if (tmpLocaleID!=NULL && keywordAssign==NULL) {
1841 int32_t posixVariantSize;
1842 /* Add missing '_' if needed */
1843 if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
1844 do {
1845 if(len<nameCapacity) {
1846 name[len]='_';
1847 }
1848 ++len;
1849 ++fieldCount;
1850 } while(fieldCount<2);
1851 }
1852 posixVariantSize = _getVariantEx(tmpLocaleID+1, '@', name+len, nameCapacity-len,
1853 (UBool)(variantSize > 0));
1854 if (posixVariantSize > 0) {
1855 if (variant == NULL) {
1856 variant = name+len;
1857 }
1858 len += posixVariantSize;
1859 variantSize += posixVariantSize;
1860 }
1861 }
1862
1863 /* Handle generic variants first */
1864 if (variant) {
1865 for (j=0; j<UPRV_LENGTHOF(VARIANT_MAP); j++) {
1866 const char* variantToCompare = VARIANT_MAP[j].variant;
1867 int32_t n = (int32_t)uprv_strlen(variantToCompare);
1868 int32_t variantLen = _deleteVariant(variant, uprv_min(variantSize, (nameCapacity-len)), variantToCompare, n);
1869 len -= variantLen;
1870 if (variantLen > 0) {
1871 if (len > 0 && name[len-1] == '_') { /* delete trailing '_' */
1872 --len;
1873 }
1874 addKeyword = VARIANT_MAP[j].keyword;
1875 addValue = VARIANT_MAP[j].value;
1876 break;
1877 }
1878 }
1879 if (len > 0 && len <= nameCapacity && name[len-1] == '_') { /* delete trailing '_' */
1880 --len;
1881 }
1882 }
1883
1884 /* Look up the ID in the canonicalization map */
1885 for (j=0; j<UPRV_LENGTHOF(CANONICALIZE_MAP); j++) {
1886 const char* id = CANONICALIZE_MAP[j].id;
1887 int32_t n = (int32_t)uprv_strlen(id);
1888 if (len == n && uprv_strncmp(name, id, n) == 0) {
1889 if (n == 0 && tmpLocaleID != NULL) {
1890 break; /* Don't remap "" if keywords present */
1891 }
1892 len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID);
1893 if (CANONICALIZE_MAP[j].keyword) {
1894 addKeyword = CANONICALIZE_MAP[j].keyword;
1895 addValue = CANONICALIZE_MAP[j].value;
1896 }
1897 break;
1898 }
1899 }
1900 }
1901
1902 if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
1903 if (tmpLocaleID!=NULL && keywordAssign!=NULL &&
1904 (!separatorIndicator || separatorIndicator > keywordAssign)) {
1905 if(len<nameCapacity) {
1906 name[len]='@';
1907 }
1908 ++len;
1909 ++fieldCount;
1910 len += _getKeywords(tmpLocaleID+1, '@', (len<nameCapacity ? name+len : NULL), nameCapacity-len,
1911 NULL, 0, NULL, TRUE, addKeyword, addValue, err);
1912 } else if (addKeyword != NULL) {
1913 U_ASSERT(addValue != NULL && len < nameCapacity);
1914 /* inelegant but works -- later make _getKeywords do this? */
1915 len += _copyCount(name+len, nameCapacity-len, "@");
1916 len += _copyCount(name+len, nameCapacity-len, addKeyword);
1917 len += _copyCount(name+len, nameCapacity-len, "=");
1918 len += _copyCount(name+len, nameCapacity-len, addValue);
1919 }
1920 }
1921
1922 if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) {
1923 uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);
1924 }
1925
1926 return u_terminateChars(result, resultCapacity, len, err);
1927 }
1928
1929 /* ### ID parsing API **************************************************/
1930
1931 U_CAPI int32_t U_EXPORT2
1932 uloc_getParent(const char* localeID,
1933 char* parent,
1934 int32_t parentCapacity,
1935 UErrorCode* err)
1936 {
1937 const char *lastUnderscore;
1938 int32_t i;
1939
1940 if (U_FAILURE(*err))
1941 return 0;
1942
1943 if (localeID == NULL)
1944 localeID = uloc_getDefault();
1945
1946 lastUnderscore=uprv_strrchr(localeID, '_');
1947 if(lastUnderscore!=NULL) {
1948 i=(int32_t)(lastUnderscore-localeID);
1949 } else {
1950 i=0;
1951 }
1952
1953 if(i>0 && parent != localeID) {
1954 uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
1955 }
1956 return u_terminateChars(parent, parentCapacity, i, err);
1957 }
1958
1959 U_CAPI int32_t U_EXPORT2
1960 uloc_getLanguage(const char* localeID,
1961 char* language,
1962 int32_t languageCapacity,
1963 UErrorCode* err)
1964 {
1965 /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
1966 int32_t i=0;
1967
1968 if (err==NULL || U_FAILURE(*err)) {
1969 return 0;
1970 }
1971
1972 if(localeID==NULL) {
1973 localeID=uloc_getDefault();
1974 }
1975
1976 i=ulocimp_getLanguage(localeID, language, languageCapacity, NULL);
1977 return u_terminateChars(language, languageCapacity, i, err);
1978 }
1979
1980 U_CAPI int32_t U_EXPORT2
1981 uloc_getScript(const char* localeID,
1982 char* script,
1983 int32_t scriptCapacity,
1984 UErrorCode* err)
1985 {
1986 int32_t i=0;
1987
1988 if(err==NULL || U_FAILURE(*err)) {
1989 return 0;
1990 }
1991
1992 if(localeID==NULL) {
1993 localeID=uloc_getDefault();
1994 }
1995
1996 /* skip the language */
1997 ulocimp_getLanguage(localeID, NULL, 0, &localeID);
1998 if(_isIDSeparator(*localeID)) {
1999 i=ulocimp_getScript(localeID+1, script, scriptCapacity, NULL);
2000 }
2001 return u_terminateChars(script, scriptCapacity, i, err);
2002 }
2003
2004 U_CAPI int32_t U_EXPORT2
2005 uloc_getCountry(const char* localeID,
2006 char* country,
2007 int32_t countryCapacity,
2008 UErrorCode* err)
2009 {
2010 int32_t i=0;
2011
2012 if(err==NULL || U_FAILURE(*err)) {
2013 return 0;
2014 }
2015
2016 if(localeID==NULL) {
2017 localeID=uloc_getDefault();
2018 }
2019
2020 /* Skip the language */
2021 ulocimp_getLanguage(localeID, NULL, 0, &localeID);
2022 if(_isIDSeparator(*localeID)) {
2023 const char *scriptID;
2024 /* Skip the script if available */
2025 ulocimp_getScript(localeID+1, NULL, 0, &scriptID);
2026 if(scriptID != localeID+1) {
2027 /* Found optional script */
2028 localeID = scriptID;
2029 }
2030 if(_isIDSeparator(*localeID)) {
2031 i=ulocimp_getCountry(localeID+1, country, countryCapacity, NULL);
2032 }
2033 }
2034 return u_terminateChars(country, countryCapacity, i, err);
2035 }
2036
2037 U_CAPI int32_t U_EXPORT2
2038 uloc_getVariant(const char* localeID,
2039 char* variant,
2040 int32_t variantCapacity,
2041 UErrorCode* err)
2042 {
2043 char tempBuffer[ULOC_FULLNAME_CAPACITY];
2044 const char* tmpLocaleID;
2045 int32_t i=0;
2046
2047 if(err==NULL || U_FAILURE(*err)) {
2048 return 0;
2049 }
2050
2051 if (_hasBCP47Extension(localeID)) {
2052 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
2053 } else {
2054 if (localeID==NULL) {
2055 localeID=uloc_getDefault();
2056 }
2057 tmpLocaleID=localeID;
2058 }
2059
2060 /* Skip the language */
2061 ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
2062 if(_isIDSeparator(*tmpLocaleID)) {
2063 const char *scriptID;
2064 /* Skip the script if available */
2065 ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
2066 if(scriptID != tmpLocaleID+1) {
2067 /* Found optional script */
2068 tmpLocaleID = scriptID;
2069 }
2070 /* Skip the Country */
2071 if (_isIDSeparator(*tmpLocaleID)) {
2072 const char *cntryID;
2073 ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &cntryID);
2074 if (cntryID != tmpLocaleID+1) {
2075 /* Found optional country */
2076 tmpLocaleID = cntryID;
2077 }
2078 if(_isIDSeparator(*tmpLocaleID)) {
2079 /* If there was no country ID, skip a possible extra IDSeparator */
2080 if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {
2081 tmpLocaleID++;
2082 }
2083 i=_getVariant(tmpLocaleID+1, *tmpLocaleID, variant, variantCapacity);
2084 }
2085 }
2086 }
2087
2088 /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
2089 /* if we do not have a variant tag yet then try a POSIX variant after '@' */
2090 /*
2091 if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
2092 i=_getVariant(localeID+1, '@', variant, variantCapacity);
2093 }
2094 */
2095 return u_terminateChars(variant, variantCapacity, i, err);
2096 }
2097
2098 U_CAPI int32_t U_EXPORT2
2099 uloc_getName(const char* localeID,
2100 char* name,
2101 int32_t nameCapacity,
2102 UErrorCode* err)
2103 {
2104 return _canonicalize(localeID, name, nameCapacity, 0, err);
2105 }
2106
2107 U_CAPI int32_t U_EXPORT2
2108 uloc_getBaseName(const char* localeID,
2109 char* name,
2110 int32_t nameCapacity,
2111 UErrorCode* err)
2112 {
2113 return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err);
2114 }
2115
2116 U_CAPI int32_t U_EXPORT2
2117 uloc_canonicalize(const char* localeID,
2118 char* name,
2119 int32_t nameCapacity,
2120 UErrorCode* err)
2121 {
2122 return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err);
2123 }
2124
2125 U_CAPI const char* U_EXPORT2
2126 uloc_getISO3Language(const char* localeID)
2127 {
2128 int16_t offset;
2129 char lang[ULOC_LANG_CAPACITY];
2130 UErrorCode err = U_ZERO_ERROR;
2131
2132 if (localeID == NULL)
2133 {
2134 localeID = uloc_getDefault();
2135 }
2136 uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
2137 if (U_FAILURE(err))
2138 return "";
2139 offset = _findIndex(LANGUAGES, lang);
2140 if (offset < 0)
2141 return "";
2142 return LANGUAGES_3[offset];
2143 }
2144
2145 U_CAPI const char* U_EXPORT2
2146 uloc_getISO3Country(const char* localeID)
2147 {
2148 int16_t offset;
2149 char cntry[ULOC_LANG_CAPACITY];
2150 UErrorCode err = U_ZERO_ERROR;
2151
2152 if (localeID == NULL)
2153 {
2154 localeID = uloc_getDefault();
2155 }
2156 uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
2157 if (U_FAILURE(err))
2158 return "";
2159 offset = _findIndex(COUNTRIES, cntry);
2160 if (offset < 0)
2161 return "";
2162
2163 return COUNTRIES_3[offset];
2164 }
2165
2166 U_CAPI uint32_t U_EXPORT2
2167 uloc_getLCID(const char* localeID)
2168 {
2169 UErrorCode status = U_ZERO_ERROR;
2170 char langID[ULOC_FULLNAME_CAPACITY];
2171 uint32_t lcid = 0;
2172
2173 /* Check for incomplete id. */
2174 if (!localeID || uprv_strlen(localeID) < 2) {
2175 return 0;
2176 }
2177
2178 // Attempt platform lookup if available
2179 lcid = uprv_convertToLCIDPlatform(localeID);
2180 if (lcid > 0)
2181 {
2182 // Windows found an LCID, return that
2183 return lcid;
2184 }
2185
2186 uloc_getLanguage(localeID, langID, sizeof(langID), &status);
2187 if (U_FAILURE(status)) {
2188 return 0;
2189 }
2190
2191 if (uprv_strchr(localeID, '@')) {
2192 // uprv_convertToLCID does not support keywords other than collation.
2193 // Remove all keywords except collation.
2194 int32_t len;
2195 char collVal[ULOC_KEYWORDS_CAPACITY];
2196 char tmpLocaleID[ULOC_FULLNAME_CAPACITY];
2197
2198 len = uloc_getKeywordValue(localeID, "collation", collVal,
2199 UPRV_LENGTHOF(collVal) - 1, &status);
2200
2201 if (U_SUCCESS(status) && len > 0) {
2202 collVal[len] = 0;
2203
2204 len = uloc_getBaseName(localeID, tmpLocaleID,
2205 UPRV_LENGTHOF(tmpLocaleID) - 1, &status);
2206
2207 if (U_SUCCESS(status) && len > 0) {
2208 tmpLocaleID[len] = 0;
2209
2210 len = uloc_setKeywordValue("collation", collVal, tmpLocaleID,
2211 UPRV_LENGTHOF(tmpLocaleID) - len - 1, &status);
2212
2213 if (U_SUCCESS(status) && len > 0) {
2214 tmpLocaleID[len] = 0;
2215 return uprv_convertToLCID(langID, tmpLocaleID, &status);
2216 }
2217 }
2218 }
2219
2220 // fall through - all keywords are simply ignored
2221 status = U_ZERO_ERROR;
2222 }
2223
2224 return uprv_convertToLCID(langID, localeID, &status);
2225 }
2226
2227 U_CAPI int32_t U_EXPORT2
2228 uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
2229 UErrorCode *status)
2230 {
2231 return uprv_convertToPosix(hostid, locale, localeCapacity, status);
2232 }
2233
2234 /* ### Default locale **************************************************/
2235
2236 U_CAPI const char* U_EXPORT2
2237 uloc_getDefault()
2238 {
2239 return locale_get_default();
2240 }
2241
2242 U_CAPI void U_EXPORT2
2243 uloc_setDefault(const char* newDefaultLocale,
2244 UErrorCode* err)
2245 {
2246 if (U_FAILURE(*err))
2247 return;
2248 /* the error code isn't currently used for anything by this function*/
2249
2250 /* propagate change to C++ */
2251 locale_set_default(newDefaultLocale);
2252 }
2253
2254 /**
2255 * Returns a list of all 2-letter language codes defined in ISO 639. This is a pointer
2256 * to an array of pointers to arrays of char. All of these pointers are owned
2257 * by ICU-- do not delete them, and do not write through them. The array is
2258 * terminated with a null pointer.
2259 */
2260 U_CAPI const char* const* U_EXPORT2
2261 uloc_getISOLanguages()
2262 {
2263 return LANGUAGES;
2264 }
2265
2266 /**
2267 * Returns a list of all 2-letter country codes defined in ISO 639. This is a
2268 * pointer to an array of pointers to arrays of char. All of these pointers are
2269 * owned by ICU-- do not delete them, and do not write through them. The array is
2270 * terminated with a null pointer.
2271 */
2272 U_CAPI const char* const* U_EXPORT2
2273 uloc_getISOCountries()
2274 {
2275 return COUNTRIES;
2276 }
2277
2278
2279 /* this function to be moved into cstring.c later */
2280 static char gDecimal = 0;
2281
2282 static /* U_CAPI */
2283 double
2284 /* U_EXPORT2 */
2285 _uloc_strtod(const char *start, char **end) {
2286 char *decimal;
2287 char *myEnd;
2288 char buf[30];
2289 double rv;
2290 if (!gDecimal) {
2291 char rep[5];
2292 /* For machines that decide to change the decimal on you,
2293 and try to be too smart with localization.
2294 This normally should be just a '.'. */
2295 sprintf(rep, "%+1.1f", 1.0);
2296 gDecimal = rep[2];
2297 }
2298
2299 if(gDecimal == '.') {
2300 return uprv_strtod(start, end); /* fall through to OS */
2301 } else {
2302 uprv_strncpy(buf, start, 29);
2303 buf[29]=0;
2304 decimal = uprv_strchr(buf, '.');
2305 if(decimal) {
2306 *decimal = gDecimal;
2307 } else {
2308 return uprv_strtod(start, end); /* no decimal point */
2309 }
2310 rv = uprv_strtod(buf, &myEnd);
2311 if(end) {
2312 *end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */
2313 }
2314 return rv;
2315 }
2316 }
2317
2318 typedef struct {
2319 float q;
2320 int32_t dummy; /* to avoid uninitialized memory copy from qsort */
2321 char locale[ULOC_FULLNAME_CAPACITY+1];
2322 } _acceptLangItem;
2323
2324 static int32_t U_CALLCONV
2325 uloc_acceptLanguageCompare(const void * /*context*/, const void *a, const void *b)
2326 {
2327 const _acceptLangItem *aa = (const _acceptLangItem*)a;
2328 const _acceptLangItem *bb = (const _acceptLangItem*)b;
2329
2330 int32_t rc = 0;
2331 if(bb->q < aa->q) {
2332 rc = -1; /* A > B */
2333 } else if(bb->q > aa->q) {
2334 rc = 1; /* A < B */
2335 } else {
2336 rc = 0; /* A = B */
2337 }
2338
2339 if(rc==0) {
2340 rc = uprv_stricmp(aa->locale, bb->locale);
2341 }
2342
2343 #if defined(ULOC_DEBUG)
2344 /* fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
2345 aa->locale, aa->q,
2346 bb->locale, bb->q,
2347 rc);*/
2348 #endif
2349
2350 return rc;
2351 }
2352
2353 /*
2354 mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
2355 */
2356
2357 U_CAPI int32_t U_EXPORT2
2358 uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult *outResult,
2359 const char *httpAcceptLanguage,
2360 UEnumeration* availableLocales,
2361 UErrorCode *status)
2362 {
2363 MaybeStackArray<_acceptLangItem, 4> items; // Struct for collecting items.
2364 char tmp[ULOC_FULLNAME_CAPACITY +1];
2365 int32_t n = 0;
2366 const char *itemEnd;
2367 const char *paramEnd;
2368 const char *s;
2369 const char *t;
2370 int32_t res;
2371 int32_t i;
2372 int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);
2373
2374 if(U_FAILURE(*status)) {
2375 return -1;
2376 }
2377
2378 for(s=httpAcceptLanguage;s&&*s;) {
2379 while(isspace(*s)) /* eat space at the beginning */
2380 s++;
2381 itemEnd=uprv_strchr(s,',');
2382 paramEnd=uprv_strchr(s,';');
2383 if(!itemEnd) {
2384 itemEnd = httpAcceptLanguage+l; /* end of string */
2385 }
2386 if(paramEnd && paramEnd<itemEnd) {
2387 /* semicolon (;) is closer than end (,) */
2388 t = paramEnd+1;
2389 if(*t=='q') {
2390 t++;
2391 }
2392 while(isspace(*t)) {
2393 t++;
2394 }
2395 if(*t=='=') {
2396 t++;
2397 }
2398 while(isspace(*t)) {
2399 t++;
2400 }
2401 items[n].q = (float)_uloc_strtod(t,NULL);
2402 } else {
2403 /* no semicolon - it's 1.0 */
2404 items[n].q = 1.0f;
2405 paramEnd = itemEnd;
2406 }
2407 items[n].dummy=0;
2408 /* eat spaces prior to semi */
2409 for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)
2410 ;
2411 int32_t slen = ((t+1)-s);
2412 if(slen > ULOC_FULLNAME_CAPACITY) {
2413 *status = U_BUFFER_OVERFLOW_ERROR;
2414 return -1; // too big
2415 }
2416 uprv_strncpy(items[n].locale, s, slen);
2417 items[n].locale[slen]=0; // terminate
2418 int32_t clen = uloc_canonicalize(items[n].locale, tmp, UPRV_LENGTHOF(tmp)-1, status);
2419 if(U_FAILURE(*status)) return -1;
2420 if((clen!=slen) || (uprv_strncmp(items[n].locale, tmp, slen))) {
2421 // canonicalization had an effect- copy back
2422 uprv_strncpy(items[n].locale, tmp, clen);
2423 items[n].locale[clen] = 0; // terminate
2424 }
2425 #if defined(ULOC_DEBUG)
2426 /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
2427 #endif
2428 n++;
2429 s = itemEnd;
2430 while(*s==',') { /* eat duplicate commas */
2431 s++;
2432 }
2433 if(n>=items.getCapacity()) { // If we need more items
2434 if(NULL == items.resize(items.getCapacity()*2, items.getCapacity())) {
2435 *status = U_MEMORY_ALLOCATION_ERROR;
2436 return -1;
2437 }
2438 #if defined(ULOC_DEBUG)
2439 fprintf(stderr,"malloced at size %d\n", items.getCapacity());
2440 #endif
2441 }
2442 }
2443 uprv_sortArray(items.getAlias(), n, sizeof(items[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
2444 if (U_FAILURE(*status)) {
2445 return -1;
2446 }
2447 LocalMemory<const char*> strs(NULL);
2448 if (strs.allocateInsteadAndReset(n) == NULL) {
2449 *status = U_MEMORY_ALLOCATION_ERROR;
2450 return -1;
2451 }
2452 for(i=0;i<n;i++) {
2453 #if defined(ULOC_DEBUG)
2454 /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
2455 #endif
2456 strs[i]=items[i].locale;
2457 }
2458 res = uloc_acceptLanguage(result, resultAvailable, outResult,
2459 strs.getAlias(), n, availableLocales, status);
2460 return res;
2461 }
2462
2463
2464 U_CAPI int32_t U_EXPORT2
2465 uloc_acceptLanguage(char *result, int32_t resultAvailable,
2466 UAcceptResult *outResult, const char **acceptList,
2467 int32_t acceptListCount,
2468 UEnumeration* availableLocales,
2469 UErrorCode *status)
2470 {
2471 int32_t i,j;
2472 int32_t len;
2473 int32_t maxLen=0;
2474 char tmp[ULOC_FULLNAME_CAPACITY+1];
2475 const char *l;
2476 char **fallbackList;
2477 if(U_FAILURE(*status)) {
2478 return -1;
2479 }
2480 fallbackList = static_cast<char **>(uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount)));
2481 if(fallbackList==NULL) {
2482 *status = U_MEMORY_ALLOCATION_ERROR;
2483 return -1;
2484 }
2485 for(i=0;i<acceptListCount;i++) {
2486 #if defined(ULOC_DEBUG)
2487 fprintf(stderr,"%02d: %s\n", i, acceptList[i]);
2488 #endif
2489 while((l=uenum_next(availableLocales, NULL, status))) {
2490 #if defined(ULOC_DEBUG)
2491 fprintf(stderr," %s\n", l);
2492 #endif
2493 len = (int32_t)uprv_strlen(l);
2494 if(!uprv_strcmp(acceptList[i], l)) {
2495 if(outResult) {
2496 *outResult = ULOC_ACCEPT_VALID;
2497 }
2498 #if defined(ULOC_DEBUG)
2499 fprintf(stderr, "MATCH! %s\n", l);
2500 #endif
2501 if(len>0) {
2502 uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2503 }
2504 for(j=0;j<i;j++) {
2505 uprv_free(fallbackList[j]);
2506 }
2507 uprv_free(fallbackList);
2508 return u_terminateChars(result, resultAvailable, len, status);
2509 }
2510 if(len>maxLen) {
2511 maxLen = len;
2512 }
2513 }
2514 uenum_reset(availableLocales, status);
2515 /* save off parent info */
2516 if(uloc_getParent(acceptList[i], tmp, UPRV_LENGTHOF(tmp), status)!=0) {
2517 fallbackList[i] = uprv_strdup(tmp);
2518 } else {
2519 fallbackList[i]=0;
2520 }
2521 }
2522
2523 for(maxLen--;maxLen>0;maxLen--) {
2524 for(i=0;i<acceptListCount;i++) {
2525 if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) {
2526 #if defined(ULOC_DEBUG)
2527 fprintf(stderr,"Try: [%s]", fallbackList[i]);
2528 #endif
2529 while((l=uenum_next(availableLocales, NULL, status))) {
2530 #if defined(ULOC_DEBUG)
2531 fprintf(stderr," %s\n", l);
2532 #endif
2533 len = (int32_t)uprv_strlen(l);
2534 if(!uprv_strcmp(fallbackList[i], l)) {
2535 if(outResult) {
2536 *outResult = ULOC_ACCEPT_FALLBACK;
2537 }
2538 #if defined(ULOC_DEBUG)
2539 fprintf(stderr, "fallback MATCH! %s\n", l);
2540 #endif
2541 if(len>0) {
2542 uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2543 }
2544 for(j=0;j<acceptListCount;j++) {
2545 uprv_free(fallbackList[j]);
2546 }
2547 uprv_free(fallbackList);
2548 return u_terminateChars(result, resultAvailable, len, status);
2549 }
2550 }
2551 uenum_reset(availableLocales, status);
2552
2553 if(uloc_getParent(fallbackList[i], tmp, UPRV_LENGTHOF(tmp), status)!=0) {
2554 uprv_free(fallbackList[i]);
2555 fallbackList[i] = uprv_strdup(tmp);
2556 } else {
2557 uprv_free(fallbackList[i]);
2558 fallbackList[i]=0;
2559 }
2560 }
2561 }
2562 if(outResult) {
2563 *outResult = ULOC_ACCEPT_FAILED;
2564 }
2565 }
2566 for(i=0;i<acceptListCount;i++) {
2567 uprv_free(fallbackList[i]);
2568 }
2569 uprv_free(fallbackList);
2570 return -1;
2571 }
2572
2573 U_CAPI const char* U_EXPORT2
2574 uloc_toUnicodeLocaleKey(const char* keyword)
2575 {
2576 const char* bcpKey = ulocimp_toBcpKey(keyword);
2577 if (bcpKey == NULL && ultag_isUnicodeLocaleKey(keyword, -1)) {
2578 // unknown keyword, but syntax is fine..
2579 return keyword;
2580 }
2581 return bcpKey;
2582 }
2583
2584 U_CAPI const char* U_EXPORT2
2585 uloc_toUnicodeLocaleType(const char* keyword, const char* value)
2586 {
2587 const char* bcpType = ulocimp_toBcpType(keyword, value, NULL, NULL);
2588 if (bcpType == NULL && ultag_isUnicodeLocaleType(value, -1)) {
2589 // unknown keyword, but syntax is fine..
2590 return value;
2591 }
2592 return bcpType;
2593 }
2594
2595 static UBool
2596 isWellFormedLegacyKey(const char* legacyKey)
2597 {
2598 const char* p = legacyKey;
2599 while (*p) {
2600 if (!UPRV_ISALPHANUM(*p)) {
2601 return FALSE;
2602 }
2603 p++;
2604 }
2605 return TRUE;
2606 }
2607
2608 static UBool
2609 isWellFormedLegacyType(const char* legacyType)
2610 {
2611 const char* p = legacyType;
2612 int32_t alphaNumLen = 0;
2613 while (*p) {
2614 if (*p == '_' || *p == '/' || *p == '-') {
2615 if (alphaNumLen == 0) {
2616 return FALSE;
2617 }
2618 alphaNumLen = 0;
2619 } else if (UPRV_ISALPHANUM(*p)) {
2620 alphaNumLen++;
2621 } else {
2622 return FALSE;
2623 }
2624 p++;
2625 }
2626 return (alphaNumLen != 0);
2627 }
2628
2629 U_CAPI const char* U_EXPORT2
2630 uloc_toLegacyKey(const char* keyword)
2631 {
2632 const char* legacyKey = ulocimp_toLegacyKey(keyword);
2633 if (legacyKey == NULL) {
2634 // Checks if the specified locale key is well-formed with the legacy locale syntax.
2635 //
2636 // Note:
2637 // LDML/CLDR provides some definition of keyword syntax in
2638 // * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
2639 // * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
2640 // Keys can only consist of [0-9a-zA-Z].
2641 if (isWellFormedLegacyKey(keyword)) {
2642 return keyword;
2643 }
2644 }
2645 return legacyKey;
2646 }
2647
2648 U_CAPI const char* U_EXPORT2
2649 uloc_toLegacyType(const char* keyword, const char* value)
2650 {
2651 const char* legacyType = ulocimp_toLegacyType(keyword, value, NULL, NULL);
2652 if (legacyType == NULL) {
2653 // Checks if the specified locale type is well-formed with the legacy locale syntax.
2654 //
2655 // Note:
2656 // LDML/CLDR provides some definition of keyword syntax in
2657 // * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
2658 // * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
2659 // Values (types) can only consist of [0-9a-zA-Z], plus for legacy values
2660 // we allow [/_-+] in the middle (e.g. "Etc/GMT+1", "Asia/Tel_Aviv")
2661 if (isWellFormedLegacyType(value)) {
2662 return value;
2663 }
2664 }
2665 return legacyType;
2666 }
2667
2668 /*eof*/