2 **********************************************************************
3 * Copyright (C) 1997-2010, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
9 * Modification History:
11 * Date Name Description
12 * 04/01/97 aliu Creation.
13 * 08/21/98 stephen JDK 1.2 sync
14 * 12/08/98 rtg New Locale implementation and C API
15 * 03/15/99 damiba overhaul.
16 * 04/06/99 stephen changed setDefault() to realloc and copy
17 * 06/14/99 stephen Changed calls to ures_open for new params
18 * 07/21/99 stephen Modified setDefault() to propagate to C++
19 * 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs,
20 * brought canonicalization code into line with spec
21 *****************************************************************************/
24 POSIX's locale format, from putil.c: [no spaces]
26 ll [ _CC ] [ . MM ] [ @ VV]
28 l = lang, C = ctry, M = charmap, V = variant
31 #include "unicode/utypes.h"
32 #include "unicode/ustring.h"
33 #include "unicode/uloc.h"
47 #include <stdio.h> /* for sprintf */
49 /* ### Declarations **************************************************/
51 /* Locale stuff from locid.cpp */
52 U_CFUNC
void locale_set_default(const char *id
);
53 U_CFUNC
const char *locale_get_default(void);
55 locale_getKeywords(const char *localeID
,
57 char *keywords
, int32_t keywordCapacity
,
58 char *values
, int32_t valuesCapacity
, int32_t *valLen
,
62 /* ### Data tables **************************************************/
65 * Table of language codes, both 2- and 3-letter, with preference
66 * given to 2-letter codes where possible. Includes 3-letter codes
67 * that lack a 2-letter equivalent.
69 * This list must be in sorted order. This list is returned directly
70 * to the user by some API.
72 * This list must be kept in sync with LANGUAGES_3, with corresponding
75 * This table should be terminated with a NULL entry, followed by a
76 * second list, and another NULL entry. The first list is visible to
77 * user code when this array is returned by API. The second list
78 * contains codes we support, but do not expose through user API.
82 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
83 * include the revisions up to 2001/7/27 *CWB*
85 * The 3 character codes are the terminology codes like RFC 3066. This
86 * is compatible with prior ICU codes
88 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
89 * table but now at the end of the table because 3 character codes are
90 * duplicates. This avoids bad searches going from 3 to 2 character
93 * The range qaa-qtz is reserved for local use
95 static const char * const LANGUAGES
[] = {
96 "aa", "ab", "ace", "ach", "ada", "ady", "ae", "af", "afa",
97 "afh", "ain", "ak", "akk", "ale", "alg", "alt", "am", "an",
99 "ar", "arc", "arn", "arp", "art", "arw", "as", "ast",
100 "ath", "aus", "av", "awa", "ay", "az", "ba", "bad",
101 "bai", "bal", "ban", "bas", "bat", "be", "bej",
102 "bem", "ber", "bg", "bh", "bho", "bi", "bik", "bin",
103 "bla", "bm", "bn", "bnt", "bo", "br", "bra", "bs",
104 "btk", "bua", "bug", "byn", "ca", "cad", "cai", "car", "cau",
105 "cch", "ce", "ceb", "cel", "ch", "chb", "chg", "chk", "chm",
106 "chn", "cho", "chp", "chr", "chy", "cmc", "co", "cop",
107 "cpe", "cpf", "cpp", "cr", "crh", "crp", "cs", "csb", "cu", "cus",
108 "cv", "cy", "da", "dak", "dar", "day", "de", "del", "den",
109 "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv", "dyu",
110 "dz", "ee", "efi", "egy", "eka", "el", "elx", "en",
111 "enm", "eo", "es", "et", "eu", "ewo", "fa",
112 "fan", "fat", "ff", "fi", "fil", "fiu", "fj", "fo", "fon",
113 "fr", "frm", "fro", "frr", "frs", "fur", "fy",
114 "ga", "gaa", "gay", "gba", "gd", "gem", "gez", "gil",
115 "gl", "gmh", "gn", "goh", "gon", "gor", "got", "grb",
116 "grc", "gsw", "gu", "gv", "gwi",
117 "ha", "hai", "haw", "he", "hi", "hil", "him",
118 "hit", "hmn", "ho", "hr", "hsb", "ht", "hu", "hup", "hy", "hz",
119 "ia", "iba", "id", "ie", "ig", "ii", "ijo", "ik",
120 "ilo", "inc", "ine", "inh", "io", "ira", "iro", "is", "it",
121 "iu", "ja", "jbo", "jpr", "jrb", "jv", "ka", "kaa", "kab",
122 "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg", "kha", "khi",
123 "kho", "ki", "kj", "kk", "kl", "km", "kmb", "kn",
124 "ko", "kok", "kos", "kpe", "kr", "krc", "krl", "kro", "kru", "ks",
125 "ku", "kum", "kut", "kv", "kw", "ky", "la", "lad",
126 "lah", "lam", "lb", "lez", "lg", "li", "ln", "lo", "lol",
127 "loz", "lt", "lu", "lua", "lui", "lun", "luo", "lus",
128 "lv", "mad", "mag", "mai", "mak", "man", "map", "mas",
129 "mdf", "mdr", "men", "mfe", "mg", "mga", "mh", "mi", "mic", "min",
130 "mis", "mk", "mkh", "ml", "mn", "mnc", "mni", "mno",
131 "mo", "moh", "mos", "mr", "ms", "mt", "mul", "mun",
132 "mus", "mwl", "mwr", "my", "myn", "myv", "na", "nah", "nai", "nap",
133 "nb", "nd", "nds", "ne", "new", "ng", "nia", "nic",
134 "niu", "nl", "nn", "no", "nog", "non", "nqo", "nr", "nso", "nub",
135 "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi", "oc", "oj",
136 "om", "or", "os", "osa", "ota", "oto", "pa", "paa",
137 "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",
138 "pi", "pl", "pon", "pra", "pro", "ps", "pt", "qu",
139 "raj", "rap", "rar", "rm", "rn", "ro", "roa", "rom",
140 "ru", "rup", "rw", "sa", "sad", "sah", "sai", "sal", "sam",
141 "sas", "sat", "sc", "scn", "sco", "sd", "se", "sel", "sem",
142 "sg", "sga", "sgn", "shn", "si", "sid", "sio", "sit",
143 "sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn",
144 "sms", "sn", "snk", "so", "sog", "son", "sq", "sr",
145 "srn", "srr", "ss", "ssa", "st", "su", "suk", "sus", "sux",
146 "sv", "sw", "syc", "syr", "ta", "tai", "te", "tem", "ter",
147 "tet", "tg", "th", "ti", "tig", "tiv", "tk", "tkl",
148 "tl", "tlh", "tli", "tmh", "tn", "to", "tog", "tpi", "tr", "trv",
149 "ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw",
150 "ty", "tyv", "udm", "ug", "uga", "uk", "umb", "und", "ur",
151 "uz", "vai", "ve", "vi", "vo", "vot", "wa", "wak",
152 "wal", "war", "was", "wen", "wo", "xal", "xh", "yao", "yap",
153 "yi", "yo", "ypk", "za", "zap", "zbl", "zen", "zh", "znd",
154 "zu", "zun", "zxx", "zza",
156 "in", "iw", "ji", "jw", "sh", /* obsolete language codes */
159 static const char* const DEPRECATED_LANGUAGES
[]={
160 "in", "iw", "ji", "jw", NULL
, NULL
162 static const char* const REPLACEMENT_LANGUAGES
[]={
163 "id", "he", "yi", "jv", NULL
, NULL
167 * Table of 3-letter language codes.
169 * This is a lookup table used to convert 3-letter language codes to
170 * their 2-letter equivalent, where possible. It must be kept in sync
171 * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the
172 * same language as LANGUAGES_3[i]. The commented-out lines are
173 * copied from LANGUAGES to make eyeballing this baby easier.
175 * Where a 3-letter language code has no 2-letter equivalent, the
176 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
178 * This table should be terminated with a NULL entry, followed by a
179 * second list, and another NULL entry. The two lists correspond to
180 * the two lists in LANGUAGES.
182 static const char * const LANGUAGES_3
[] = {
183 /* "aa", "ab", "ace", "ach", "ada", "ady", "ae", "af", "afa", */
184 "aar", "abk", "ace", "ach", "ada", "ady", "ave", "afr", "afa",
185 /* "afh", "ain", "ak", "akk", "ale", "alg", "alt", "am", "an", "ang", "anp", "apa", */
186 "afh", "ain", "aka", "akk", "ale", "alg", "alt", "amh", "arg", "ang", "anp", "apa",
187 /* "ar", "arc", "arn", "arp", "art", "arw", "as", "ast", */
188 "ara", "arc", "arn", "arp", "art", "arw", "asm", "ast",
189 /* "ath", "aus", "av", "awa", "ay", "az", "ba", "bad", */
190 "ath", "aus", "ava", "awa", "aym", "aze", "bak", "bad",
191 /* "bai", "bal", "ban", "bas", "bat", "be", "bej", */
192 "bai", "bal", "ban", "bas", "bat", "bel", "bej",
193 /* "bem", "ber", "bg", "bh", "bho", "bi", "bik", "bin", */
194 "bem", "ber", "bul", "bih", "bho", "bis", "bik", "bin",
195 /* "bla", "bm", "bn", "bnt", "bo", "br", "bra", "bs", */
196 "bla", "bam", "ben", "bnt", "bod", "bre", "bra", "bos",
197 /* "btk", "bua", "bug", "byn", "ca", "cad", "cai", "car", "cau", */
198 "btk", "bua", "bug", "byn", "cat", "cad", "cai", "car", "cau",
199 /* "cch", "ce", "ceb", "cel", "ch", "chb", "chg", "chk", "chm", */
200 "cch", "che", "ceb", "cel", "cha", "chb", "chg", "chk", "chm",
201 /* "chn", "cho", "chp", "chr", "chy", "cmc", "co", "cop", */
202 "chn", "cho", "chp", "chr", "chy", "cmc", "cos", "cop",
203 /* "cpe", "cpf", "cpp", "cr", "crh", "crp", "cs", "csb", "cu", "cus", */
204 "cpe", "cpf", "cpp", "cre", "crh", "crp", "ces", "csb", "chu", "cus",
205 /* "cv", "cy", "da", "dak", "dar", "day", "de", "del", "den", */
206 "chv", "cym", "dan", "dak", "dar", "day", "deu", "del", "den",
207 /* "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv", "dyu", */
208 "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "div", "dyu",
209 /* "dz", "ee", "efi", "egy", "eka", "el", "elx", "en", */
210 "dzo", "ewe", "efi", "egy", "eka", "ell", "elx", "eng",
211 /* "enm", "eo", "es", "et", "eu", "ewo", "fa", */
212 "enm", "epo", "spa", "est", "eus", "ewo", "fas",
213 /* "fan", "fat", "ff", "fi", "fil", "fiu", "fj", "fo", "fon", */
214 "fan", "fat", "ful", "fin", "fil", "fiu", "fij", "fao", "fon",
215 /* "fr", "frm", "fro", "frr", "frs", "fur", "fy", "ga", "gaa", "gay", */
216 "fra", "frm", "fro", "frr", "frs", "fur", "fry", "gle", "gaa", "gay",
217 /* "gba", "gd", "gem", "gez", "gil", "gl", "gmh", "gn", */
218 "gba", "gla", "gem", "gez", "gil", "glg", "gmh", "grn",
219 /* "goh", "gon", "gor", "got", "grb", "grc", "gsw", "gu", "gv", */
220 "goh", "gon", "gor", "got", "grb", "grc", "gsw", "guj", "glv",
221 /* "gwi", "ha", "hai", "haw", "he", "hi", "hil", "him", */
222 "gwi", "hau", "hai", "haw", "heb", "hin", "hil", "him",
223 /* "hit", "hmn", "ho", "hr", "hsb", "ht", "hu", "hup", "hy", "hz", */
224 "hit", "hmn", "hmo", "hrv", "hsb", "hat", "hun", "hup", "hye", "her",
225 /* "ia", "iba", "id", "ie", "ig", "ii", "ijo", "ik", */
226 "ina", "iba", "ind", "ile", "ibo", "iii", "ijo", "ipk",
227 /* "ilo", "inc", "ine", "inh", "io", "ira", "iro", "is", "it", */
228 "ilo", "inc", "ine", "inh", "ido", "ira", "iro", "isl", "ita",
229 /* "iu", "ja", "jbo", "jpr", "jrb", "jv", "ka", "kaa", "kab", */
230 "iku", "jpn", "jbo", "jpr", "jrb", "jav", "kat", "kaa", "kab",
231 /* "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg", "kha", "khi",*/
232 "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg", "kha", "khi",
233 /* "kho", "ki", "kj", "kk", "kl", "km", "kmb", "kn", */
234 "kho", "kik", "kua", "kaz", "kal", "khm", "kmb", "kan",
235 /* "ko", "kok", "kos", "kpe", "kr", "krc", "krl", "kro", "kru", "ks", */
236 "kor", "kok", "kos", "kpe", "kau", "krc", "krl", "kro", "kru", "kas",
237 /* "ku", "kum", "kut", "kv", "kw", "ky", "la", "lad", */
238 "kur", "kum", "kut", "kom", "cor", "kir", "lat", "lad",
239 /* "lah", "lam", "lb", "lez", "lg", "li", "ln", "lo", "lol", */
240 "lah", "lam", "ltz", "lez", "lug", "lim", "lin", "lao", "lol",
241 /* "loz", "lt", "lu", "lua", "lui", "lun", "luo", "lus", */
242 "loz", "lit", "lub", "lua", "lui", "lun", "luo", "lus",
243 /* "lv", "mad", "mag", "mai", "mak", "man", "map", "mas", */
244 "lav", "mad", "mag", "mai", "mak", "man", "map", "mas",
245 /* "mdf", "mdr", "men", "mfe", "mg", "mga", "mh", "mi", "mic", "min", */
246 "mdf", "mdr", "men", "mfe", "mlg", "mga", "mah", "mri", "mic", "min",
247 /* "mis", "mk", "mkh", "ml", "mn", "mnc", "mni", "mno", */
248 "mis", "mkd", "mkh", "mal", "mon", "mnc", "mni", "mno",
249 /* "mo", "moh", "mos", "mr", "ms", "mt", "mul", "mun", */
250 "mol", "moh", "mos", "mar", "msa", "mlt", "mul", "mun",
251 /* "mus", "mwl", "mwr", "my", "myn", "myv", "na", "nah", "nai", "nap", */
252 "mus", "mwl", "mwr", "mya", "myn", "myv", "nau", "nah", "nai", "nap",
253 /* "nb", "nd", "nds", "ne", "new", "ng", "nia", "nic", */
254 "nob", "nde", "nds", "nep", "new", "ndo", "nia", "nic",
255 /* "niu", "nl", "nn", "no", "nog", "non", "nqo", "nr", "nso", "nub", */
256 "niu", "nld", "nno", "nor", "nog", "non", "nqo", "nbl", "nso", "nub",
257 /* "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi", "oc", "oj", */
258 "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi", "oci", "oji",
259 /* "om", "or", "os", "osa", "ota", "oto", "pa", "paa", */
260 "orm", "ori", "oss", "osa", "ota", "oto", "pan", "paa",
261 /* "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn", */
262 "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",
263 /* "pi", "pl", "pon", "pra", "pro", "ps", "pt", "qu", */
264 "pli", "pol", "pon", "pra", "pro", "pus", "por", "que",
265 /* "raj", "rap", "rar", "rm", "rn", "ro", "roa", "rom", */
266 "raj", "rap", "rar", "roh", "run", "ron", "roa", "rom",
267 /* "ru", "rup", "rw", "sa", "sad", "sah", "sai", "sal", "sam", */
268 "rus", "rup", "kin", "san", "sad", "sah", "sai", "sal", "sam",
269 /* "sas", "sat", "sc", "scn", "sco", "sd", "se", "sel", "sem", */
270 "sas", "sat", "srd", "scn", "sco", "snd", "sme", "sel", "sem",
271 /* "sg", "sga", "sgn", "shn", "si", "sid", "sio", "sit", */
272 "sag", "sga", "sgn", "shn", "sin", "sid", "sio", "sit",
273 /* "sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn", */
274 "slk", "slv", "sla", "smo", "sma", "smi", "smj", "smn",
275 /* "sms", "sn", "snk", "so", "sog", "son", "sq", "sr", */
276 "sms", "sna", "snk", "som", "sog", "son", "sqi", "srp",
277 /* "srn", "srr", "ss", "ssa", "st", "su", "suk", "sus", "sux", */
278 "srn", "srr", "ssw", "ssa", "sot", "sun", "suk", "sus", "sux",
279 /* "sv", "sw", "syc", "syr", "ta", "tai", "te", "tem", "ter", */
280 "swe", "swa", "syc", "syr", "tam", "tai", "tel", "tem", "ter",
281 /* "tet", "tg", "th", "ti", "tig", "tiv", "tk", "tkl", */
282 "tet", "tgk", "tha", "tir", "tig", "tiv", "tuk", "tkl",
283 /* "tl", "tlh", "tli", "tmh", "tn", "to", "tog", "tpi", "tr", "trv", */
284 "tgl", "tlh", "tli", "tmh", "tsn", "ton", "tog", "tpi", "tur", "trv",
285 /* "ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw", */
286 "tso", "tsi", "tat", "tum", "tup", "tut", "tvl", "twi",
287 /* "ty", "tyv", "udm", "ug", "uga", "uk", "umb", "und", "ur", */
288 "tah", "tyv", "udm", "uig", "uga", "ukr", "umb", "und", "urd",
289 /* "uz", "vai", "ve", "vi", "vo", "vot", "wa", "wak", */
290 "uzb", "vai", "ven", "vie", "vol", "vot", "wln", "wak",
291 /* "wal", "war", "was", "wen", "wo", "xal", "xh", "yao", "yap", */
292 "wal", "war", "was", "wen", "wol", "xal", "xho", "yao", "yap",
293 /* "yi", "yo", "ypk", "za", "zap", "zbl", "zen", "zh", "znd", */
294 "yid", "yor", "ypk", "zha", "zap", "zbl", "zen", "zho", "znd",
295 /* "zu", "zun", "zxx", "zza", */
296 "zul", "zun", "zxx", "zza",
298 /* "in", "iw", "ji", "jw", "sh", */
299 "ind", "heb", "yid", "jaw", "srp",
304 * Table of 2-letter country codes.
306 * This list must be in sorted order. This list is returned directly
307 * to the user by some API.
309 * This list must be kept in sync with COUNTRIES_3, with corresponding
312 * This table should be terminated with a NULL entry, followed by a
313 * second list, and another NULL entry. The first list is visible to
314 * user code when this array is returned by API. The second list
315 * contains codes we support, but do not expose through user API.
319 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
320 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
321 * new codes keeping the old ones for compatibility updated to include
322 * 1999/12/03 revisions *CWB*
324 * RO(ROM) is now RO(ROU) according to
325 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
327 static const char * const COUNTRIES
[] = {
328 "AD", "AE", "AF", "AG", "AI", "AL", "AM", "AN",
329 "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",
330 "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",
331 "BJ", "BL", "BM", "BN", "BO", "BR", "BS", "BT", "BV",
332 "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
333 "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR",
334 "CU", "CV", "CX", "CY", "CZ", "DE", "DJ", "DK",
335 "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER",
336 "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
337 "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
338 "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
339 "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
340 "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
341 "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
342 "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
343 "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
344 "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK",
345 "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",
346 "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
347 "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",
348 "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",
349 "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
350 "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA",
351 "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",
352 "SK", "SL", "SM", "SN", "SO", "SR", "ST", "SV",
353 "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ",
354 "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
355 "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
356 "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
357 "WS", "YE", "YT", "ZA", "ZM", "ZW",
359 "FX", "CS", "RO", "TP", "YU", "ZR", /* obsolete country codes */
363 static const char* const DEPRECATED_COUNTRIES
[] ={
364 "BU", "CS", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR", NULL
, NULL
/* deprecated country list */
366 static const char* const REPLACEMENT_COUNTRIES
[] = {
367 /* "BU", "CS", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR" */
368 "MM", "RS", "BJ", "FR", "BF", "VU", "ZW", "TL", "RS", "CD", NULL
, NULL
/* replacement country codes */
372 * Table of 3-letter country codes.
374 * This is a lookup table used to convert 3-letter country codes to
375 * their 2-letter equivalent. It must be kept in sync with COUNTRIES.
376 * For all valid i, COUNTRIES[i] must refer to the same country as
377 * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES
378 * to make eyeballing this baby easier.
380 * This table should be terminated with a NULL entry, followed by a
381 * second list, and another NULL entry. The two lists correspond to
382 * the two lists in COUNTRIES.
384 static const char * const COUNTRIES_3
[] = {
385 /* "AD", "AE", "AF", "AG", "AI", "AL", "AM", "AN", */
386 "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM", "ANT",
387 /* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */
388 "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
389 /* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */
390 "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
391 /* "BJ", "BL", "BM", "BN", "BO", "BR", "BS", "BT", "BV", */
392 "BEN", "BLM", "BMU", "BRN", "BOL", "BRA", "BHS", "BTN", "BVT",
393 /* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */
394 "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
395 /* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */
396 "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
397 /* "CU", "CV", "CX", "CY", "CZ", "DE", "DJ", "DK", */
398 "CUB", "CPV", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
399 /* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */
400 "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
401 /* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
402 "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
403 /* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
404 "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
405 /* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */
406 "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
407 /* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
408 "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
409 /* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
410 "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
411 /* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
412 "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
413 /* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
414 "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
415 /* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */
416 "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
417 /* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */
418 "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
419 /* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */
420 "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
421 /* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */
422 "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
423 /* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */
424 "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
425 /* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */
426 "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
427 /* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */
428 "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
429 /* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */
430 "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
431 /* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */
432 "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
433 /* "SK", "SL", "SM", "SN", "SO", "SR", "ST", "SV", */
434 "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "STP", "SLV",
435 /* "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", */
436 "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
437 /* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */
438 "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
439 /* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */
440 "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
441 /* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
442 "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
443 /* "WS", "YE", "YT", "ZA", "ZM", "ZW", */
444 "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
446 /* "FX", "CS", "RO", "TP", "YU", "ZR", */
447 "FXX", "SCG", "ROM", "TMP", "YUG", "ZAR",
451 typedef struct CanonicalizationMap
{
452 const char *id
; /* input ID */
453 const char *canonicalID
; /* canonicalized output ID */
454 const char *keyword
; /* keyword, or NULL if none */
455 const char *value
; /* keyword value, or NULL if kw==NULL */
456 } CanonicalizationMap
;
459 * A map to canonicalize locale IDs. This handles a variety of
460 * different semantic kinds of transformations.
462 static const CanonicalizationMap CANONICALIZE_MAP
[] = {
463 { "", "en_US_POSIX", NULL
, NULL
}, /* .NET name */
464 { "c", "en_US_POSIX", NULL
, NULL
}, /* POSIX name */
465 { "posix", "en_US_POSIX", NULL
, NULL
}, /* POSIX name (alias of C) */
466 { "art_LOJBAN", "jbo", NULL
, NULL
}, /* registered name */
467 { "az_AZ_CYRL", "az_Cyrl_AZ", NULL
, NULL
}, /* .NET name */
468 { "az_AZ_LATN", "az_Latn_AZ", NULL
, NULL
}, /* .NET name */
469 { "ca_ES_PREEURO", "ca_ES", "currency", "ESP" },
470 { "cel_GAULISH", "cel__GAULISH", NULL
, NULL
}, /* registered name */
471 { "de_1901", "de__1901", NULL
, NULL
}, /* registered name */
472 { "de_1906", "de__1906", NULL
, NULL
}, /* registered name */
473 { "de__PHONEBOOK", "de", "collation", "phonebook" }, /* Old ICU name */
474 { "de_AT_PREEURO", "de_AT", "currency", "ATS" },
475 { "de_DE_PREEURO", "de_DE", "currency", "DEM" },
476 { "de_LU_PREEURO", "de_LU", "currency", "LUF" },
477 { "el_GR_PREEURO", "el_GR", "currency", "GRD" },
478 { "en_BOONT", "en__BOONT", NULL
, NULL
}, /* registered name */
479 { "en_SCOUSE", "en__SCOUSE", NULL
, NULL
}, /* registered name */
480 { "en_BE_PREEURO", "en_BE", "currency", "BEF" },
481 { "en_IE_PREEURO", "en_IE", "currency", "IEP" },
482 { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */
483 { "es_ES_PREEURO", "es_ES", "currency", "ESP" },
484 { "eu_ES_PREEURO", "eu_ES", "currency", "ESP" },
485 { "fi_FI_PREEURO", "fi_FI", "currency", "FIM" },
486 { "fr_BE_PREEURO", "fr_BE", "currency", "BEF" },
487 { "fr_FR_PREEURO", "fr_FR", "currency", "FRF" },
488 { "fr_LU_PREEURO", "fr_LU", "currency", "LUF" },
489 { "ga_IE_PREEURO", "ga_IE", "currency", "IEP" },
490 { "gl_ES_PREEURO", "gl_ES", "currency", "ESP" },
491 { "hi__DIRECT", "hi", "collation", "direct" }, /* Old ICU name */
492 { "it_IT_PREEURO", "it_IT", "currency", "ITL" },
493 { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */
494 { "nb_NO_NY", "nn_NO", NULL
, NULL
}, /* "markus said this was ok" :-) */
495 { "nl_BE_PREEURO", "nl_BE", "currency", "BEF" },
496 { "nl_NL_PREEURO", "nl_NL", "currency", "NLG" },
497 { "pt_PT_PREEURO", "pt_PT", "currency", "PTE" },
498 { "sl_ROZAJ", "sl__ROZAJ", NULL
, NULL
}, /* registered name */
499 { "sr_SP_CYRL", "sr_Cyrl_RS", NULL
, NULL
}, /* .NET name */
500 { "sr_SP_LATN", "sr_Latn_RS", NULL
, NULL
}, /* .NET name */
501 { "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL
, NULL
}, /* Linux name */
502 { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */
503 { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL
, NULL
}, /* Linux name */
504 { "uz_UZ_CYRL", "uz_Cyrl_UZ", NULL
, NULL
}, /* .NET name */
505 { "uz_UZ_LATN", "uz_Latn_UZ", NULL
, NULL
}, /* .NET name */
506 { "zh_CHS", "zh_Hans", NULL
, NULL
}, /* .NET name */
507 { "zh_CHT", "zh_Hant", NULL
, NULL
}, /* .NET name */
508 { "zh_GAN", "zh__GAN", NULL
, NULL
}, /* registered name */
509 { "zh_GUOYU", "zh", NULL
, NULL
}, /* registered name */
510 { "zh_HAKKA", "zh__HAKKA", NULL
, NULL
}, /* registered name */
511 { "zh_MIN", "zh__MIN", NULL
, NULL
}, /* registered name */
512 { "zh_MIN_NAN", "zh__MINNAN", NULL
, NULL
}, /* registered name */
513 { "zh_WUU", "zh__WUU", NULL
, NULL
}, /* registered name */
514 { "zh_XIANG", "zh__XIANG", NULL
, NULL
}, /* registered name */
515 { "zh_YUE", "zh__YUE", NULL
, NULL
}, /* registered name */
518 typedef struct VariantMap
{
519 const char *variant
; /* input ID */
520 const char *keyword
; /* keyword, or NULL if none */
521 const char *value
; /* keyword value, or NULL if kw==NULL */
524 static const VariantMap VARIANT_MAP
[] = {
525 { "EURO", "currency", "EUR" },
526 { "PINYIN", "collation", "pinyin" }, /* Solaris variant */
527 { "STROKE", "collation", "stroke" } /* Solaris variant */
530 /* ### BCP47 Conversion *******************************************/
531 /* Test if the locale id has BCP47 u extension and does not have '@' */
532 #define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
533 /* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
534 #define _ConvertBCP47(finalID, id, buffer, length,err) \
535 if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || U_FAILURE(*err)) { \
540 /* Gets the size of the shortest subtag in the given localeID. */
541 static int32_t getShortestSubtagLength(const char *localeID
) {
542 int32_t localeIDLength
= uprv_strlen(localeID
);
543 int32_t length
= localeIDLength
;
544 int32_t tmpLength
= 0;
548 for (i
= 0; i
< localeIDLength
; i
++) {
549 if (localeID
[i
] != '_' && localeID
[i
] != '-') {
556 if (tmpLength
!= 0 && tmpLength
< length
) {
566 /* ### Keywords **************************************************/
568 #define ULOC_KEYWORD_BUFFER_LEN 25
569 #define ULOC_MAX_NO_KEYWORDS 25
571 U_CAPI
const char * U_EXPORT2
572 locale_getKeywordsStart(const char *localeID
) {
573 const char *result
= NULL
;
574 if((result
= uprv_strchr(localeID
, '@')) != NULL
) {
577 #if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
579 /* We do this because the @ sign is variant, and the @ sign used on one
580 EBCDIC machine won't be compiled the same way on other EBCDIC based
582 static const uint8_t ebcdicSigns
[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
583 const uint8_t *charToFind
= ebcdicSigns
;
585 if((result
= uprv_strchr(localeID
, *charToFind
)) != NULL
) {
596 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
597 * @param keywordName incoming name to be canonicalized
598 * @param status return status (keyword too long)
599 * @return length of the keyword name
601 static int32_t locale_canonKeywordName(char *buf
, const char *keywordName
, UErrorCode
*status
)
604 int32_t keywordNameLen
= (int32_t)uprv_strlen(keywordName
);
606 if(keywordNameLen
>= ULOC_KEYWORD_BUFFER_LEN
) {
607 /* keyword name too long for internal buffer */
608 *status
= U_INTERNAL_PROGRAM_ERROR
;
612 /* normalize the keyword name */
613 for(i
= 0; i
< keywordNameLen
; i
++) {
614 buf
[i
] = uprv_tolower(keywordName
[i
]);
618 return keywordNameLen
;
622 char keyword
[ULOC_KEYWORD_BUFFER_LEN
];
624 const char *valueStart
;
628 static int32_t U_CALLCONV
629 compareKeywordStructs(const void *context
, const void *left
, const void *right
) {
630 const char* leftString
= ((const KeywordStruct
*)left
)->keyword
;
631 const char* rightString
= ((const KeywordStruct
*)right
)->keyword
;
632 return uprv_strcmp(leftString
, rightString
);
636 * Both addKeyword and addValue must already be in canonical form.
637 * Either both addKeyword and addValue are NULL, or neither is NULL.
638 * If they are not NULL they must be zero terminated.
639 * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword.
642 _getKeywords(const char *localeID
,
644 char *keywords
, int32_t keywordCapacity
,
645 char *values
, int32_t valuesCapacity
, int32_t *valLen
,
647 const char* addKeyword
,
648 const char* addValue
,
651 KeywordStruct keywordList
[ULOC_MAX_NO_KEYWORDS
];
653 int32_t maxKeywords
= ULOC_MAX_NO_KEYWORDS
;
654 int32_t numKeywords
= 0;
655 const char* pos
= localeID
;
656 const char* equalSign
= NULL
;
657 const char* semicolon
= NULL
;
659 int32_t keywordsLen
= 0;
660 int32_t valuesLen
= 0;
662 if(prev
== '@') { /* start of keyword definition */
663 /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
665 UBool duplicate
= FALSE
;
666 /* skip leading spaces */
670 if (!*pos
) { /* handle trailing "; " */
673 if(numKeywords
== maxKeywords
) {
674 *status
= U_INTERNAL_PROGRAM_ERROR
;
677 equalSign
= uprv_strchr(pos
, '=');
678 semicolon
= uprv_strchr(pos
, ';');
679 /* lack of '=' [foo@currency] is illegal */
680 /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
681 if(!equalSign
|| (semicolon
&& semicolon
<equalSign
)) {
682 *status
= U_INVALID_FORMAT_ERROR
;
685 /* need to normalize both keyword and keyword name */
686 if(equalSign
- pos
>= ULOC_KEYWORD_BUFFER_LEN
) {
687 /* keyword name too long for internal buffer */
688 *status
= U_INTERNAL_PROGRAM_ERROR
;
691 for(i
= 0, n
= 0; i
< equalSign
- pos
; ++i
) {
693 keywordList
[numKeywords
].keyword
[n
++] = uprv_tolower(pos
[i
]);
696 keywordList
[numKeywords
].keyword
[n
] = 0;
697 keywordList
[numKeywords
].keywordLen
= n
;
698 /* now grab the value part. First we skip the '=' */
700 /* then we leading spaces */
701 while(*equalSign
== ' ') {
704 keywordList
[numKeywords
].valueStart
= equalSign
;
709 while(*(pos
- i
- 1) == ' ') {
712 keywordList
[numKeywords
].valueLen
= (int32_t)(pos
- equalSign
- i
);
715 i
= (int32_t)uprv_strlen(equalSign
);
716 while(equalSign
[i
-1] == ' ') {
719 keywordList
[numKeywords
].valueLen
= i
;
721 /* If this is a duplicate keyword, then ignore it */
722 for (j
=0; j
<numKeywords
; ++j
) {
723 if (uprv_strcmp(keywordList
[j
].keyword
, keywordList
[numKeywords
].keyword
) == 0) {
733 /* Handle addKeyword/addValue. */
734 if (addKeyword
!= NULL
) {
735 UBool duplicate
= FALSE
;
736 U_ASSERT(addValue
!= NULL
);
737 /* Search for duplicate; if found, do nothing. Explicit keyword
738 overrides addKeyword. */
739 for (j
=0; j
<numKeywords
; ++j
) {
740 if (uprv_strcmp(keywordList
[j
].keyword
, addKeyword
) == 0) {
746 if (numKeywords
== maxKeywords
) {
747 *status
= U_INTERNAL_PROGRAM_ERROR
;
750 uprv_strcpy(keywordList
[numKeywords
].keyword
, addKeyword
);
751 keywordList
[numKeywords
].keywordLen
= (int32_t)uprv_strlen(addKeyword
);
752 keywordList
[numKeywords
].valueStart
= addValue
;
753 keywordList
[numKeywords
].valueLen
= (int32_t)uprv_strlen(addValue
);
757 U_ASSERT(addValue
== NULL
);
760 /* now we have a list of keywords */
761 /* we need to sort it */
762 uprv_sortArray(keywordList
, numKeywords
, sizeof(KeywordStruct
), compareKeywordStructs
, NULL
, FALSE
, status
);
764 /* Now construct the keyword part */
765 for(i
= 0; i
< numKeywords
; i
++) {
766 if(keywordsLen
+ keywordList
[i
].keywordLen
+ 1< keywordCapacity
) {
767 uprv_strcpy(keywords
+keywordsLen
, keywordList
[i
].keyword
);
769 keywords
[keywordsLen
+ keywordList
[i
].keywordLen
] = '=';
771 keywords
[keywordsLen
+ keywordList
[i
].keywordLen
] = 0;
774 keywordsLen
+= keywordList
[i
].keywordLen
+ 1;
776 if(keywordsLen
+ keywordList
[i
].valueLen
< keywordCapacity
) {
777 uprv_strncpy(keywords
+keywordsLen
, keywordList
[i
].valueStart
, keywordList
[i
].valueLen
);
779 keywordsLen
+= keywordList
[i
].valueLen
;
781 if(i
< numKeywords
- 1) {
782 if(keywordsLen
< keywordCapacity
) {
783 keywords
[keywordsLen
] = ';';
789 if(valuesLen
+ keywordList
[i
].valueLen
+ 1< valuesCapacity
) {
790 uprv_strcpy(values
+valuesLen
, keywordList
[i
].valueStart
);
791 values
[valuesLen
+ keywordList
[i
].valueLen
] = 0;
793 valuesLen
+= keywordList
[i
].valueLen
+ 1;
797 values
[valuesLen
] = 0;
802 return u_terminateChars(keywords
, keywordCapacity
, keywordsLen
, status
);
809 locale_getKeywords(const char *localeID
,
811 char *keywords
, int32_t keywordCapacity
,
812 char *values
, int32_t valuesCapacity
, int32_t *valLen
,
814 UErrorCode
*status
) {
815 return _getKeywords(localeID
, prev
, keywords
, keywordCapacity
,
816 values
, valuesCapacity
, valLen
, valuesToo
,
820 U_CAPI
int32_t U_EXPORT2
821 uloc_getKeywordValue(const char* localeID
,
822 const char* keywordName
,
823 char* buffer
, int32_t bufferCapacity
,
826 const char* startSearchHere
= NULL
;
827 const char* nextSeparator
= NULL
;
828 char keywordNameBuffer
[ULOC_KEYWORD_BUFFER_LEN
];
829 char localeKeywordNameBuffer
[ULOC_KEYWORD_BUFFER_LEN
];
833 if(status
&& U_SUCCESS(*status
) && localeID
) {
834 char tempBuffer
[ULOC_FULLNAME_CAPACITY
];
835 const char* tmpLocaleID
;
837 if (_hasBCP47Extension(localeID
)) {
838 _ConvertBCP47(tmpLocaleID
, localeID
, tempBuffer
, sizeof(tempBuffer
), status
);
840 tmpLocaleID
=localeID
;
843 startSearchHere
= uprv_strchr(tmpLocaleID
, '@'); /* TODO: REVISIT: shouldn't this be locale_getKeywordsStart ? */
844 if(startSearchHere
== NULL
) {
845 /* no keywords, return at once */
849 locale_canonKeywordName(keywordNameBuffer
, keywordName
, status
);
850 if(U_FAILURE(*status
)) {
854 /* find the first keyword */
855 while(startSearchHere
) {
857 /* skip leading spaces (allowed?) */
858 while(*startSearchHere
== ' ') {
861 nextSeparator
= uprv_strchr(startSearchHere
, '=');
862 /* need to normalize both keyword and keyword name */
866 if(nextSeparator
- startSearchHere
>= ULOC_KEYWORD_BUFFER_LEN
) {
867 /* keyword name too long for internal buffer */
868 *status
= U_INTERNAL_PROGRAM_ERROR
;
871 for(i
= 0; i
< nextSeparator
- startSearchHere
; i
++) {
872 localeKeywordNameBuffer
[i
] = uprv_tolower(startSearchHere
[i
]);
874 /* trim trailing spaces */
875 while(startSearchHere
[i
-1] == ' ') {
878 localeKeywordNameBuffer
[i
] = 0;
880 startSearchHere
= uprv_strchr(nextSeparator
, ';');
882 if(uprv_strcmp(keywordNameBuffer
, localeKeywordNameBuffer
) == 0) {
884 while(*nextSeparator
== ' ') {
887 /* we actually found the keyword. Copy the value */
888 if(startSearchHere
&& startSearchHere
- nextSeparator
< bufferCapacity
) {
889 while(*(startSearchHere
-1) == ' ') {
892 uprv_strncpy(buffer
, nextSeparator
, startSearchHere
- nextSeparator
);
893 result
= u_terminateChars(buffer
, bufferCapacity
, (int32_t)(startSearchHere
- nextSeparator
), status
);
894 } else if(!startSearchHere
&& (int32_t)uprv_strlen(nextSeparator
) < bufferCapacity
) { /* last item in string */
895 i
= (int32_t)uprv_strlen(nextSeparator
);
896 while(nextSeparator
[i
- 1] == ' ') {
899 uprv_strncpy(buffer
, nextSeparator
, i
);
900 result
= u_terminateChars(buffer
, bufferCapacity
, i
, status
);
902 /* give a bigger buffer, please */
903 *status
= U_BUFFER_OVERFLOW_ERROR
;
904 if(startSearchHere
) {
905 result
= (int32_t)(startSearchHere
- nextSeparator
);
907 result
= (int32_t)uprv_strlen(nextSeparator
);
917 U_CAPI
int32_t U_EXPORT2
918 uloc_setKeywordValue(const char* keywordName
,
919 const char* keywordValue
,
920 char* buffer
, int32_t bufferCapacity
,
923 /* TODO: sorting. removal. */
924 int32_t keywordNameLen
;
925 int32_t keywordValueLen
;
928 int32_t foundValueLen
;
929 int32_t keywordAtEnd
= 0; /* is the keyword at the end of the string? */
930 char keywordNameBuffer
[ULOC_KEYWORD_BUFFER_LEN
];
931 char localeKeywordNameBuffer
[ULOC_KEYWORD_BUFFER_LEN
];
934 char* nextSeparator
= NULL
;
935 char* nextEqualsign
= NULL
;
936 char* startSearchHere
= NULL
;
937 char* keywordStart
= NULL
;
938 char *insertHere
= NULL
;
939 if(U_FAILURE(*status
)) {
942 if(bufferCapacity
>1) {
943 bufLen
= (int32_t)uprv_strlen(buffer
);
945 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
948 if(bufferCapacity
<bufLen
) {
949 /* The capacity is less than the length?! Is this NULL terminated? */
950 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
953 if(keywordValue
&& !*keywordValue
) {
957 keywordValueLen
= (int32_t)uprv_strlen(keywordValue
);
961 keywordNameLen
= locale_canonKeywordName(keywordNameBuffer
, keywordName
, status
);
962 if(U_FAILURE(*status
)) {
965 startSearchHere
= (char*)locale_getKeywordsStart(buffer
);
966 if(startSearchHere
== NULL
|| (startSearchHere
[1]==0)) {
967 if(!keywordValue
) { /* no keywords = nothing to remove */
971 needLen
= bufLen
+1+keywordNameLen
+1+keywordValueLen
;
972 if(startSearchHere
) { /* had a single @ */
973 needLen
--; /* already had the @ */
974 /* startSearchHere points at the @ */
976 startSearchHere
=buffer
+bufLen
;
978 if(needLen
>= bufferCapacity
) {
979 *status
= U_BUFFER_OVERFLOW_ERROR
;
980 return needLen
; /* no change */
982 *startSearchHere
= '@';
984 uprv_strcpy(startSearchHere
, keywordNameBuffer
);
985 startSearchHere
+= keywordNameLen
;
986 *startSearchHere
= '=';
988 uprv_strcpy(startSearchHere
, keywordValue
);
989 startSearchHere
+=keywordValueLen
;
991 } /* end shortcut - no @ */
993 keywordStart
= startSearchHere
;
994 /* search for keyword */
995 while(keywordStart
) {
997 /* skip leading spaces (allowed?) */
998 while(*keywordStart
== ' ') {
1001 nextEqualsign
= uprv_strchr(keywordStart
, '=');
1002 /* need to normalize both keyword and keyword name */
1003 if(!nextEqualsign
) {
1006 if(nextEqualsign
- keywordStart
>= ULOC_KEYWORD_BUFFER_LEN
) {
1007 /* keyword name too long for internal buffer */
1008 *status
= U_INTERNAL_PROGRAM_ERROR
;
1011 for(i
= 0; i
< nextEqualsign
- keywordStart
; i
++) {
1012 localeKeywordNameBuffer
[i
] = uprv_tolower(keywordStart
[i
]);
1014 /* trim trailing spaces */
1015 while(keywordStart
[i
-1] == ' ') {
1018 localeKeywordNameBuffer
[i
] = 0;
1020 nextSeparator
= uprv_strchr(nextEqualsign
, ';');
1021 rc
= uprv_strcmp(keywordNameBuffer
, localeKeywordNameBuffer
);
1024 while(*nextEqualsign
== ' ') {
1027 /* we actually found the keyword. Change the value */
1028 if (nextSeparator
) {
1030 foundValueLen
= (int32_t)(nextSeparator
- nextEqualsign
);
1033 foundValueLen
= (int32_t)uprv_strlen(nextEqualsign
);
1035 if(keywordValue
) { /* adding a value - not removing */
1036 if(foundValueLen
== keywordValueLen
) {
1037 uprv_strncpy(nextEqualsign
, keywordValue
, keywordValueLen
);
1038 return bufLen
; /* no change in size */
1039 } else if(foundValueLen
> keywordValueLen
) {
1040 int32_t delta
= foundValueLen
- keywordValueLen
;
1041 if(nextSeparator
) { /* RH side */
1042 uprv_memmove(nextSeparator
- delta
, nextSeparator
, bufLen
-(nextSeparator
-buffer
));
1044 uprv_strncpy(nextEqualsign
, keywordValue
, keywordValueLen
);
1048 } else { /* FVL < KVL */
1049 int32_t delta
= keywordValueLen
- foundValueLen
;
1050 if((bufLen
+delta
) >= bufferCapacity
) {
1051 *status
= U_BUFFER_OVERFLOW_ERROR
;
1052 return bufLen
+delta
;
1054 if(nextSeparator
) { /* RH side */
1055 uprv_memmove(nextSeparator
+delta
,nextSeparator
, bufLen
-(nextSeparator
-buffer
));
1057 uprv_strncpy(nextEqualsign
, keywordValue
, keywordValueLen
);
1062 } else { /* removing a keyword */
1064 /* zero out the ';' or '@' just before startSearchhere */
1065 keywordStart
[-1] = 0;
1066 return (int32_t)((keywordStart
-buffer
)-1); /* (string length without keyword) minus separator */
1068 uprv_memmove(keywordStart
, nextSeparator
+1, bufLen
-((nextSeparator
+1)-buffer
));
1069 keywordStart
[bufLen
-((nextSeparator
+1)-buffer
)]=0;
1070 return (int32_t)(bufLen
-((nextSeparator
+1)-keywordStart
));
1073 } else if(rc
<0){ /* end match keyword */
1074 /* could insert at this location. */
1075 insertHere
= keywordStart
;
1077 keywordStart
= nextSeparator
;
1078 } /* end loop searching */
1081 return bufLen
; /* removal of non-extant keyword - no change */
1084 /* we know there is at least one keyword. */
1085 needLen
= bufLen
+1+keywordNameLen
+1+keywordValueLen
;
1086 if(needLen
>= bufferCapacity
) {
1087 *status
= U_BUFFER_OVERFLOW_ERROR
;
1088 return needLen
; /* no change */
1092 uprv_memmove(insertHere
+(1+keywordNameLen
+1+keywordValueLen
), insertHere
, bufLen
-(insertHere
-buffer
));
1093 keywordStart
= insertHere
;
1095 keywordStart
= buffer
+bufLen
;
1096 *keywordStart
= ';';
1099 uprv_strncpy(keywordStart
, keywordNameBuffer
, keywordNameLen
);
1100 keywordStart
+= keywordNameLen
;
1101 *keywordStart
= '=';
1103 uprv_strncpy(keywordStart
, keywordValue
, keywordValueLen
); /* terminates. */
1104 keywordStart
+=keywordValueLen
;
1106 *keywordStart
= ';';
1113 /* ### ID parsing implementation **************************************************/
1115 #define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
1117 /*returns TRUE if one of the special prefixes is here (s=string)
1119 #define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1121 /* Dot terminates it because of POSIX form where dot precedes the codepage
1122 * except for variant
1124 #define _isTerminator(a) ((a==0)||(a=='.')||(a=='@'))
1126 static char* _strnchr(const char* str
, int32_t len
, char c
) {
1127 U_ASSERT(str
!= 0 && len
>= 0);
1128 while (len
-- != 0) {
1132 } else if (d
== 0) {
1141 * Lookup 'key' in the array 'list'. The array 'list' should contain
1142 * a NULL entry, followed by more entries, and a second NULL entry.
1144 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1147 static int16_t _findIndex(const char* const* list
, const char* key
)
1149 const char* const* anchor
= list
;
1152 /* Make two passes through two NULL-terminated arrays at 'list' */
1153 while (pass
++ < 2) {
1155 if (uprv_strcmp(key
, *list
) == 0) {
1156 return (int16_t)(list
- anchor
);
1160 ++list
; /* skip final NULL *CWB*/
1165 /* count the length of src while copying it to dest; return strlen(src) */
1166 static U_INLINE
int32_t
1167 _copyCount(char *dest
, int32_t destCapacity
, const char *src
) {
1174 return (int32_t)(src
-anchor
);
1176 if(destCapacity
<=0) {
1177 return (int32_t)((src
-anchor
)+uprv_strlen(src
));
1186 uloc_getCurrentCountryID(const char* oldID
){
1187 int32_t offset
= _findIndex(DEPRECATED_COUNTRIES
, oldID
);
1189 return REPLACEMENT_COUNTRIES
[offset
];
1194 uloc_getCurrentLanguageID(const char* oldID
){
1195 int32_t offset
= _findIndex(DEPRECATED_LANGUAGES
, oldID
);
1197 return REPLACEMENT_LANGUAGES
[offset
];
1202 * the internal functions _getLanguage(), _getCountry(), _getVariant()
1203 * avoid duplicating code to handle the earlier locale ID pieces
1204 * in the functions for the later ones by
1205 * setting the *pEnd pointer to where they stopped parsing
1207 * TODO try to use this in Locale
1210 ulocimp_getLanguage(const char *localeID
,
1211 char *language
, int32_t languageCapacity
,
1212 const char **pEnd
) {
1215 char lang
[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
1217 /* if it starts with i- or x- then copy that prefix */
1218 if(_isIDPrefix(localeID
)) {
1219 if(i
<languageCapacity
) {
1220 language
[i
]=(char)uprv_tolower(*localeID
);
1222 if(i
<languageCapacity
) {
1229 /* copy the language as far as possible and count its length */
1230 while(!_isTerminator(*localeID
) && !_isIDSeparator(*localeID
)) {
1231 if(i
<languageCapacity
) {
1232 language
[i
]=(char)uprv_tolower(*localeID
);
1235 lang
[i
]=(char)uprv_tolower(*localeID
);
1242 /* convert 3 character code to 2 character code if possible *CWB*/
1243 offset
=_findIndex(LANGUAGES_3
, lang
);
1245 i
=_copyCount(language
, languageCapacity
, LANGUAGES
[offset
]);
1256 ulocimp_getScript(const char *localeID
,
1257 char *script
, int32_t scriptCapacity
,
1266 /* copy the second item as far as possible and count its length */
1267 while(!_isTerminator(localeID
[idLen
]) && !_isIDSeparator(localeID
[idLen
])) {
1271 /* If it's exactly 4 characters long, then it's a script and not a country. */
1275 *pEnd
= localeID
+idLen
;
1277 if(idLen
> scriptCapacity
) {
1278 idLen
= scriptCapacity
;
1281 script
[0]=(char)uprv_toupper(*(localeID
++));
1283 for (i
= 1; i
< idLen
; i
++) {
1284 script
[i
]=(char)uprv_tolower(*(localeID
++));
1294 ulocimp_getCountry(const char *localeID
,
1295 char *country
, int32_t countryCapacity
,
1299 char cnty
[ULOC_COUNTRY_CAPACITY
]={ 0, 0, 0, 0 };
1302 /* copy the country as far as possible and count its length */
1303 while(!_isTerminator(localeID
[idLen
]) && !_isIDSeparator(localeID
[idLen
])) {
1304 if(idLen
<(ULOC_COUNTRY_CAPACITY
-1)) { /*CWB*/
1305 cnty
[idLen
]=(char)uprv_toupper(localeID
[idLen
]);
1310 /* the country should be either length 2 or 3 */
1311 if (idLen
== 2 || idLen
== 3) {
1312 UBool gotCountry
= FALSE
;
1313 /* convert 3 character code to 2 character code if possible *CWB*/
1315 offset
=_findIndex(COUNTRIES_3
, cnty
);
1317 idLen
=_copyCount(country
, countryCapacity
, COUNTRIES
[offset
]);
1323 for (i
= 0; i
< idLen
; i
++) {
1324 if (i
< countryCapacity
) {
1325 country
[i
]=(char)uprv_toupper(localeID
[i
]);
1342 * @param needSeparator if true, then add leading '_' if any variants
1343 * are added to 'variant'
1346 _getVariantEx(const char *localeID
,
1348 char *variant
, int32_t variantCapacity
,
1349 UBool needSeparator
) {
1352 /* get one or more variant tags and separate them with '_' */
1353 if(_isIDSeparator(prev
)) {
1354 /* get a variant string after a '-' or '_' */
1355 while(!_isTerminator(*localeID
)) {
1356 if (needSeparator
) {
1357 if (i
<variantCapacity
) {
1361 needSeparator
= FALSE
;
1363 if(i
<variantCapacity
) {
1364 variant
[i
]=(char)uprv_toupper(*localeID
);
1365 if(variant
[i
]=='-') {
1374 /* if there is no variant tag after a '-' or '_' then look for '@' */
1378 } else if((localeID
=locale_getKeywordsStart(localeID
))!=NULL
) {
1379 ++localeID
; /* point after the '@' */
1383 while(!_isTerminator(*localeID
)) {
1384 if (needSeparator
) {
1385 if (i
<variantCapacity
) {
1389 needSeparator
= FALSE
;
1391 if(i
<variantCapacity
) {
1392 variant
[i
]=(char)uprv_toupper(*localeID
);
1393 if(variant
[i
]=='-' || variant
[i
]==',') {
1406 _getVariant(const char *localeID
,
1408 char *variant
, int32_t variantCapacity
) {
1409 return _getVariantEx(localeID
, prev
, variant
, variantCapacity
, FALSE
);
1413 * Delete ALL instances of a variant from the given list of one or
1414 * more variants. Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".
1415 * @param variants the source string of one or more variants,
1416 * separated by '_'. This will be MODIFIED IN PLACE. Not zero
1417 * terminated; if it is, trailing zero will NOT be maintained.
1418 * @param variantsLen length of variants
1419 * @param toDelete variant to delete, without separators, e.g. "EURO"
1420 * or "PREEURO"; not zero terminated
1421 * @param toDeleteLen length of toDelete
1422 * @return number of characters deleted from variants
1425 _deleteVariant(char* variants
, int32_t variantsLen
,
1426 const char* toDelete
, int32_t toDeleteLen
)
1428 int32_t delta
= 0; /* number of chars deleted */
1431 if (variantsLen
< toDeleteLen
) {
1434 if (uprv_strncmp(variants
, toDelete
, toDeleteLen
) == 0 &&
1435 (variantsLen
== toDeleteLen
||
1436 (flag
=(variants
[toDeleteLen
] == '_'))))
1438 int32_t d
= toDeleteLen
+ (flag
?1:0);
1441 if (variantsLen
> 0) {
1442 uprv_memmove(variants
, variants
+d
, variantsLen
);
1445 char* p
= _strnchr(variants
, variantsLen
, '_');
1450 variantsLen
-= (int32_t)(p
- variants
);
1456 /* Keyword enumeration */
1458 typedef struct UKeywordsContext
{
1463 static void U_CALLCONV
1464 uloc_kw_closeKeywords(UEnumeration
*enumerator
) {
1465 uprv_free(((UKeywordsContext
*)enumerator
->context
)->keywords
);
1466 uprv_free(enumerator
->context
);
1467 uprv_free(enumerator
);
1470 static int32_t U_CALLCONV
1471 uloc_kw_countKeywords(UEnumeration
*en
, UErrorCode
*status
) {
1472 char *kw
= ((UKeywordsContext
*)en
->context
)->keywords
;
1476 kw
+= uprv_strlen(kw
)+1;
1481 static const char* U_CALLCONV
1482 uloc_kw_nextKeyword(UEnumeration
* en
,
1483 int32_t* resultLength
,
1484 UErrorCode
* status
) {
1485 const char* result
= ((UKeywordsContext
*)en
->context
)->current
;
1488 len
= (int32_t)uprv_strlen(((UKeywordsContext
*)en
->context
)->current
);
1489 ((UKeywordsContext
*)en
->context
)->current
+= len
+1;
1494 *resultLength
= len
;
1499 static void U_CALLCONV
1500 uloc_kw_resetKeywords(UEnumeration
* en
,
1501 UErrorCode
* status
) {
1502 ((UKeywordsContext
*)en
->context
)->current
= ((UKeywordsContext
*)en
->context
)->keywords
;
1505 static const UEnumeration gKeywordsEnum
= {
1508 uloc_kw_closeKeywords
,
1509 uloc_kw_countKeywords
,
1511 uloc_kw_nextKeyword
,
1512 uloc_kw_resetKeywords
1515 U_CAPI UEnumeration
* U_EXPORT2
1516 uloc_openKeywordList(const char *keywordList
, int32_t keywordListSize
, UErrorCode
* status
)
1518 UKeywordsContext
*myContext
= NULL
;
1519 UEnumeration
*result
= NULL
;
1521 if(U_FAILURE(*status
)) {
1524 result
= (UEnumeration
*)uprv_malloc(sizeof(UEnumeration
));
1525 /* Null pointer test */
1526 if (result
== NULL
) {
1527 *status
= U_MEMORY_ALLOCATION_ERROR
;
1530 uprv_memcpy(result
, &gKeywordsEnum
, sizeof(UEnumeration
));
1531 myContext
= uprv_malloc(sizeof(UKeywordsContext
));
1532 if (myContext
== NULL
) {
1533 *status
= U_MEMORY_ALLOCATION_ERROR
;
1537 myContext
->keywords
= (char *)uprv_malloc(keywordListSize
+1);
1538 uprv_memcpy(myContext
->keywords
, keywordList
, keywordListSize
);
1539 myContext
->keywords
[keywordListSize
] = 0;
1540 myContext
->current
= myContext
->keywords
;
1541 result
->context
= myContext
;
1545 U_CAPI UEnumeration
* U_EXPORT2
1546 uloc_openKeywords(const char* localeID
,
1551 int32_t keywordsCapacity
= 256;
1552 char tempBuffer
[ULOC_FULLNAME_CAPACITY
];
1553 const char* tmpLocaleID
;
1555 if(status
==NULL
|| U_FAILURE(*status
)) {
1559 if (_hasBCP47Extension(localeID
)) {
1560 _ConvertBCP47(tmpLocaleID
, localeID
, tempBuffer
, sizeof(tempBuffer
), status
);
1562 if (localeID
==NULL
) {
1563 localeID
=uloc_getDefault();
1565 tmpLocaleID
=localeID
;
1568 /* Skip the language */
1569 ulocimp_getLanguage(tmpLocaleID
, NULL
, 0, &tmpLocaleID
);
1570 if(_isIDSeparator(*tmpLocaleID
)) {
1571 const char *scriptID
;
1572 /* Skip the script if available */
1573 ulocimp_getScript(tmpLocaleID
+1, NULL
, 0, &scriptID
);
1574 if(scriptID
!= tmpLocaleID
+1) {
1575 /* Found optional script */
1576 tmpLocaleID
= scriptID
;
1578 /* Skip the Country */
1579 if (_isIDSeparator(*tmpLocaleID
)) {
1580 ulocimp_getCountry(tmpLocaleID
+1, NULL
, 0, &tmpLocaleID
);
1581 if(_isIDSeparator(*tmpLocaleID
)) {
1582 _getVariant(tmpLocaleID
+1, *tmpLocaleID
, NULL
, 0);
1587 /* keywords are located after '@' */
1588 if((tmpLocaleID
= locale_getKeywordsStart(tmpLocaleID
)) != NULL
) {
1589 i
=locale_getKeywords(tmpLocaleID
+1, '@', keywords
, keywordsCapacity
, NULL
, 0, NULL
, FALSE
, status
);
1593 return uloc_openKeywordList(keywords
, i
, status
);
1600 /* bit-flags for 'options' parameter of _canonicalize */
1601 #define _ULOC_STRIP_KEYWORDS 0x2
1602 #define _ULOC_CANONICALIZE 0x1
1604 #define OPTION_SET(options, mask) ((options & mask) != 0)
1606 static const char i_default
[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
1607 #define I_DEFAULT_LENGTH (sizeof i_default / sizeof i_default[0])
1610 * Canonicalize the given localeID, to level 1 or to level 2,
1611 * depending on the options. To specify level 1, pass in options=0.
1612 * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1614 * This is the code underlying uloc_getName and uloc_canonicalize.
1617 _canonicalize(const char* localeID
,
1619 int32_t resultCapacity
,
1622 int32_t j
, len
, fieldCount
=0, scriptSize
=0, variantSize
=0, nameCapacity
;
1623 char localeBuffer
[ULOC_FULLNAME_CAPACITY
];
1624 char tempBuffer
[ULOC_FULLNAME_CAPACITY
];
1625 const char* origLocaleID
;
1626 const char* tmpLocaleID
;
1627 const char* keywordAssign
= NULL
;
1628 const char* separatorIndicator
= NULL
;
1629 const char* addKeyword
= NULL
;
1630 const char* addValue
= NULL
;
1632 char* variant
= NULL
; /* pointer into name, or NULL */
1634 if (U_FAILURE(*err
)) {
1638 if (_hasBCP47Extension(localeID
)) {
1639 _ConvertBCP47(tmpLocaleID
, localeID
, tempBuffer
, sizeof(tempBuffer
), err
);
1641 if (localeID
==NULL
) {
1642 localeID
=uloc_getDefault();
1644 tmpLocaleID
=localeID
;
1647 origLocaleID
=tmpLocaleID
;
1649 /* if we are doing a full canonicalization, then put results in
1650 localeBuffer, if necessary; otherwise send them to result. */
1651 if (/*OPTION_SET(options, _ULOC_CANONICALIZE) &&*/
1652 (result
== NULL
|| resultCapacity
< sizeof(localeBuffer
))) {
1653 name
= localeBuffer
;
1654 nameCapacity
= sizeof(localeBuffer
);
1657 nameCapacity
= resultCapacity
;
1660 /* get all pieces, one after another, and separate with '_' */
1661 len
=ulocimp_getLanguage(tmpLocaleID
, name
, nameCapacity
, &tmpLocaleID
);
1663 if(len
== I_DEFAULT_LENGTH
&& uprv_strncmp(origLocaleID
, i_default
, len
) == 0) {
1664 const char *d
= uloc_getDefault();
1666 len
= (int32_t)uprv_strlen(d
);
1669 uprv_strncpy(name
, d
, len
);
1671 } else if(_isIDSeparator(*tmpLocaleID
)) {
1672 const char *scriptID
;
1675 if(len
<nameCapacity
) {
1680 scriptSize
=ulocimp_getScript(tmpLocaleID
+1, name
+len
, nameCapacity
-len
, &scriptID
);
1681 if(scriptSize
> 0) {
1682 /* Found optional script */
1683 tmpLocaleID
= scriptID
;
1686 if (_isIDSeparator(*tmpLocaleID
)) {
1687 /* If there is something else, then we add the _ */
1688 if(len
<nameCapacity
) {
1695 if (_isIDSeparator(*tmpLocaleID
)) {
1696 const char *cntryID
;
1697 int32_t cntrySize
= ulocimp_getCountry(tmpLocaleID
+1, name
+len
, nameCapacity
-len
, &cntryID
);
1698 if (cntrySize
> 0) {
1699 /* Found optional country */
1700 tmpLocaleID
= cntryID
;
1703 if(_isIDSeparator(*tmpLocaleID
)) {
1704 /* If there is something else, then we add the _ if we found country before.*/
1705 if (cntrySize
> 0) {
1707 if(len
<nameCapacity
) {
1713 variantSize
= _getVariant(tmpLocaleID
+1, *tmpLocaleID
, name
+len
, nameCapacity
-len
);
1714 if (variantSize
> 0) {
1717 tmpLocaleID
+= variantSize
+ 1; /* skip '_' and variant */
1723 /* Copy POSIX-style charset specifier, if any [mr.utf8] */
1724 if (!OPTION_SET(options
, _ULOC_CANONICALIZE
) && *tmpLocaleID
== '.') {
1727 char c
= *tmpLocaleID
;
1734 if (len
<nameCapacity
) {
1744 /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
1745 After this, tmpLocaleID either points to '@' or is NULL */
1746 if ((tmpLocaleID
=locale_getKeywordsStart(tmpLocaleID
))!=NULL
) {
1747 keywordAssign
= uprv_strchr(tmpLocaleID
, '=');
1748 separatorIndicator
= uprv_strchr(tmpLocaleID
, ';');
1751 /* Copy POSIX-style variant, if any [mr@FOO] */
1752 if (!OPTION_SET(options
, _ULOC_CANONICALIZE
) &&
1753 tmpLocaleID
!= NULL
&& keywordAssign
== NULL
) {
1755 char c
= *tmpLocaleID
;
1759 if (len
<nameCapacity
) {
1767 if (OPTION_SET(options
, _ULOC_CANONICALIZE
)) {
1768 /* Handle @FOO variant if @ is present and not followed by = */
1769 if (tmpLocaleID
!=NULL
&& keywordAssign
==NULL
) {
1770 int32_t posixVariantSize
;
1771 /* Add missing '_' if needed */
1772 if (fieldCount
< 2 || (fieldCount
< 3 && scriptSize
> 0)) {
1774 if(len
<nameCapacity
) {
1779 } while(fieldCount
<2);
1781 posixVariantSize
= _getVariantEx(tmpLocaleID
+1, '@', name
+len
, nameCapacity
-len
,
1782 (UBool
)(variantSize
> 0));
1783 if (posixVariantSize
> 0) {
1784 if (variant
== NULL
) {
1787 len
+= posixVariantSize
;
1788 variantSize
+= posixVariantSize
;
1792 /* Handle generic variants first */
1794 for (j
=0; j
<(int32_t)(sizeof(VARIANT_MAP
)/sizeof(VARIANT_MAP
[0])); j
++) {
1795 const char* variantToCompare
= VARIANT_MAP
[j
].variant
;
1796 int32_t n
= (int32_t)uprv_strlen(variantToCompare
);
1797 int32_t variantLen
= _deleteVariant(variant
, uprv_min(variantSize
, (nameCapacity
-len
)), variantToCompare
, n
);
1799 if (variantLen
> 0) {
1800 if (name
[len
-1] == '_') { /* delete trailing '_' */
1803 addKeyword
= VARIANT_MAP
[j
].keyword
;
1804 addValue
= VARIANT_MAP
[j
].value
;
1808 if (name
[len
-1] == '_') { /* delete trailing '_' */
1813 /* Look up the ID in the canonicalization map */
1814 for (j
=0; j
<(int32_t)(sizeof(CANONICALIZE_MAP
)/sizeof(CANONICALIZE_MAP
[0])); j
++) {
1815 const char* id
= CANONICALIZE_MAP
[j
].id
;
1816 int32_t n
= (int32_t)uprv_strlen(id
);
1817 if (len
== n
&& uprv_strncmp(name
, id
, n
) == 0) {
1818 if (n
== 0 && tmpLocaleID
!= NULL
) {
1819 break; /* Don't remap "" if keywords present */
1821 len
= _copyCount(name
, nameCapacity
, CANONICALIZE_MAP
[j
].canonicalID
);
1822 if (CANONICALIZE_MAP
[j
].keyword
) {
1823 addKeyword
= CANONICALIZE_MAP
[j
].keyword
;
1824 addValue
= CANONICALIZE_MAP
[j
].value
;
1831 if (!OPTION_SET(options
, _ULOC_STRIP_KEYWORDS
)) {
1832 if (tmpLocaleID
!=NULL
&& keywordAssign
!=NULL
&&
1833 (!separatorIndicator
|| separatorIndicator
> keywordAssign
)) {
1834 if(len
<nameCapacity
) {
1839 len
+= _getKeywords(tmpLocaleID
+1, '@', name
+len
, nameCapacity
-len
, NULL
, 0, NULL
, TRUE
,
1840 addKeyword
, addValue
, err
);
1841 } else if (addKeyword
!= NULL
) {
1842 U_ASSERT(addValue
!= NULL
);
1843 /* inelegant but works -- later make _getKeywords do this? */
1844 len
+= _copyCount(name
+len
, nameCapacity
-len
, "@");
1845 len
+= _copyCount(name
+len
, nameCapacity
-len
, addKeyword
);
1846 len
+= _copyCount(name
+len
, nameCapacity
-len
, "=");
1847 len
+= _copyCount(name
+len
, nameCapacity
-len
, addValue
);
1851 if (U_SUCCESS(*err
) && result
!= NULL
&& name
== localeBuffer
) {
1852 uprv_strncpy(result
, localeBuffer
, (len
> resultCapacity
) ? resultCapacity
: len
);
1855 return u_terminateChars(result
, resultCapacity
, len
, err
);
1858 /* ### ID parsing API **************************************************/
1860 U_CAPI
int32_t U_EXPORT2
1861 uloc_getParent(const char* localeID
,
1863 int32_t parentCapacity
,
1866 const char *lastUnderscore
;
1869 if (U_FAILURE(*err
))
1872 if (localeID
== NULL
)
1873 localeID
= uloc_getDefault();
1875 lastUnderscore
=uprv_strrchr(localeID
, '_');
1876 if(lastUnderscore
!=NULL
) {
1877 i
=(int32_t)(lastUnderscore
-localeID
);
1882 if(i
>0 && parent
!= localeID
) {
1883 uprv_memcpy(parent
, localeID
, uprv_min(i
, parentCapacity
));
1885 return u_terminateChars(parent
, parentCapacity
, i
, err
);
1888 U_CAPI
int32_t U_EXPORT2
1889 uloc_getLanguage(const char* localeID
,
1891 int32_t languageCapacity
,
1894 /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
1897 if (err
==NULL
|| U_FAILURE(*err
)) {
1901 if(localeID
==NULL
) {
1902 localeID
=uloc_getDefault();
1905 i
=ulocimp_getLanguage(localeID
, language
, languageCapacity
, NULL
);
1906 return u_terminateChars(language
, languageCapacity
, i
, err
);
1909 U_CAPI
int32_t U_EXPORT2
1910 uloc_getScript(const char* localeID
,
1912 int32_t scriptCapacity
,
1917 if(err
==NULL
|| U_FAILURE(*err
)) {
1921 if(localeID
==NULL
) {
1922 localeID
=uloc_getDefault();
1925 /* skip the language */
1926 ulocimp_getLanguage(localeID
, NULL
, 0, &localeID
);
1927 if(_isIDSeparator(*localeID
)) {
1928 i
=ulocimp_getScript(localeID
+1, script
, scriptCapacity
, NULL
);
1930 return u_terminateChars(script
, scriptCapacity
, i
, err
);
1933 U_CAPI
int32_t U_EXPORT2
1934 uloc_getCountry(const char* localeID
,
1936 int32_t countryCapacity
,
1941 if(err
==NULL
|| U_FAILURE(*err
)) {
1945 if(localeID
==NULL
) {
1946 localeID
=uloc_getDefault();
1949 /* Skip the language */
1950 ulocimp_getLanguage(localeID
, NULL
, 0, &localeID
);
1951 if(_isIDSeparator(*localeID
)) {
1952 const char *scriptID
;
1953 /* Skip the script if available */
1954 ulocimp_getScript(localeID
+1, NULL
, 0, &scriptID
);
1955 if(scriptID
!= localeID
+1) {
1956 /* Found optional script */
1957 localeID
= scriptID
;
1959 if(_isIDSeparator(*localeID
)) {
1960 i
=ulocimp_getCountry(localeID
+1, country
, countryCapacity
, NULL
);
1963 return u_terminateChars(country
, countryCapacity
, i
, err
);
1966 U_CAPI
int32_t U_EXPORT2
1967 uloc_getVariant(const char* localeID
,
1969 int32_t variantCapacity
,
1972 char tempBuffer
[ULOC_FULLNAME_CAPACITY
];
1973 const char* tmpLocaleID
;
1976 if(err
==NULL
|| U_FAILURE(*err
)) {
1980 if (_hasBCP47Extension(localeID
)) {
1981 _ConvertBCP47(tmpLocaleID
, localeID
, tempBuffer
, sizeof(tempBuffer
), err
);
1983 if (localeID
==NULL
) {
1984 localeID
=uloc_getDefault();
1986 tmpLocaleID
=localeID
;
1989 /* Skip the language */
1990 ulocimp_getLanguage(tmpLocaleID
, NULL
, 0, &tmpLocaleID
);
1991 if(_isIDSeparator(*tmpLocaleID
)) {
1992 const char *scriptID
;
1993 /* Skip the script if available */
1994 ulocimp_getScript(tmpLocaleID
+1, NULL
, 0, &scriptID
);
1995 if(scriptID
!= tmpLocaleID
+1) {
1996 /* Found optional script */
1997 tmpLocaleID
= scriptID
;
1999 /* Skip the Country */
2000 if (_isIDSeparator(*tmpLocaleID
)) {
2001 const char *cntryID
;
2002 ulocimp_getCountry(tmpLocaleID
+1, NULL
, 0, &cntryID
);
2003 if (cntryID
!= tmpLocaleID
+1) {
2004 /* Found optional country */
2005 tmpLocaleID
= cntryID
;
2007 if(_isIDSeparator(*tmpLocaleID
)) {
2008 /* If there was no country ID, skip a possible extra IDSeparator */
2009 if (tmpLocaleID
!= cntryID
&& _isIDSeparator(tmpLocaleID
[1])) {
2012 i
=_getVariant(tmpLocaleID
+1, *tmpLocaleID
, variant
, variantCapacity
);
2017 /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
2018 /* if we do not have a variant tag yet then try a POSIX variant after '@' */
2020 if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
2021 i=_getVariant(localeID+1, '@', variant, variantCapacity);
2024 return u_terminateChars(variant
, variantCapacity
, i
, err
);
2027 U_CAPI
int32_t U_EXPORT2
2028 uloc_getName(const char* localeID
,
2030 int32_t nameCapacity
,
2033 return _canonicalize(localeID
, name
, nameCapacity
, 0, err
);
2036 U_CAPI
int32_t U_EXPORT2
2037 uloc_getBaseName(const char* localeID
,
2039 int32_t nameCapacity
,
2042 return _canonicalize(localeID
, name
, nameCapacity
, _ULOC_STRIP_KEYWORDS
, err
);
2045 U_CAPI
int32_t U_EXPORT2
2046 uloc_canonicalize(const char* localeID
,
2048 int32_t nameCapacity
,
2051 return _canonicalize(localeID
, name
, nameCapacity
, _ULOC_CANONICALIZE
, err
);
2054 U_CAPI
const char* U_EXPORT2
2055 uloc_getISO3Language(const char* localeID
)
2058 char lang
[ULOC_LANG_CAPACITY
];
2059 UErrorCode err
= U_ZERO_ERROR
;
2061 if (localeID
== NULL
)
2063 localeID
= uloc_getDefault();
2065 uloc_getLanguage(localeID
, lang
, ULOC_LANG_CAPACITY
, &err
);
2068 offset
= _findIndex(LANGUAGES
, lang
);
2071 return LANGUAGES_3
[offset
];
2074 U_CAPI
const char* U_EXPORT2
2075 uloc_getISO3Country(const char* localeID
)
2078 char cntry
[ULOC_LANG_CAPACITY
];
2079 UErrorCode err
= U_ZERO_ERROR
;
2081 if (localeID
== NULL
)
2083 localeID
= uloc_getDefault();
2085 uloc_getCountry(localeID
, cntry
, ULOC_LANG_CAPACITY
, &err
);
2088 offset
= _findIndex(COUNTRIES
, cntry
);
2092 return COUNTRIES_3
[offset
];
2095 U_CAPI
uint32_t U_EXPORT2
2096 uloc_getLCID(const char* localeID
)
2098 UErrorCode status
= U_ZERO_ERROR
;
2099 char langID
[ULOC_FULLNAME_CAPACITY
];
2101 uloc_getLanguage(localeID
, langID
, sizeof(langID
), &status
);
2102 if (U_FAILURE(status
)) {
2106 return uprv_convertToLCID(langID
, localeID
, &status
);
2109 U_CAPI
int32_t U_EXPORT2
2110 uloc_getLocaleForLCID(uint32_t hostid
, char *locale
, int32_t localeCapacity
,
2114 const char *posix
= uprv_convertToPosix(hostid
, status
);
2115 if (U_FAILURE(*status
) || posix
== NULL
) {
2118 length
= (int32_t)uprv_strlen(posix
);
2119 if (length
+1 > localeCapacity
) {
2120 *status
= U_BUFFER_OVERFLOW_ERROR
;
2123 uprv_strcpy(locale
, posix
);
2128 /* ### Default locale **************************************************/
2130 U_CAPI
const char* U_EXPORT2
2133 return locale_get_default();
2136 U_CAPI
void U_EXPORT2
2137 uloc_setDefault(const char* newDefaultLocale
,
2140 if (U_FAILURE(*err
))
2142 /* the error code isn't currently used for anything by this function*/
2144 /* propagate change to C++ */
2145 locale_set_default(newDefaultLocale
);
2149 * Returns a list of all language codes defined in ISO 639. This is a pointer
2150 * to an array of pointers to arrays of char. All of these pointers are owned
2151 * by ICU-- do not delete them, and do not write through them. The array is
2152 * terminated with a null pointer.
2154 U_CAPI
const char* const* U_EXPORT2
2155 uloc_getISOLanguages()
2161 * Returns a list of all 2-letter country codes defined in ISO 639. This is a
2162 * pointer to an array of pointers to arrays of char. All of these pointers are
2163 * owned by ICU-- do not delete them, and do not write through them. The array is
2164 * terminated with a null pointer.
2166 U_CAPI
const char* const* U_EXPORT2
2167 uloc_getISOCountries()
2173 /* this function to be moved into cstring.c later */
2174 static char gDecimal
= 0;
2179 _uloc_strtod(const char *start
, char **end
) {
2186 /* For machines that decide to change the decimal on you,
2187 and try to be too smart with localization.
2188 This normally should be just a '.'. */
2189 sprintf(rep
, "%+1.1f", 1.0);
2193 if(gDecimal
== '.') {
2194 return uprv_strtod(start
, end
); /* fall through to OS */
2196 uprv_strncpy(buf
, start
, 29);
2198 decimal
= uprv_strchr(buf
, '.');
2200 *decimal
= gDecimal
;
2202 return uprv_strtod(start
, end
); /* no decimal point */
2204 rv
= uprv_strtod(buf
, &myEnd
);
2206 *end
= (char*)(start
+(myEnd
-buf
)); /* cast away const (to follow uprv_strtod API.) */
2214 int32_t dummy
; /* to avoid uninitialized memory copy from qsort */
2218 static int32_t U_CALLCONV
2219 uloc_acceptLanguageCompare(const void *context
, const void *a
, const void *b
)
2221 const _acceptLangItem
*aa
= (const _acceptLangItem
*)a
;
2222 const _acceptLangItem
*bb
= (const _acceptLangItem
*)b
;
2226 rc
= -1; /* A > B */
2227 } else if(bb
->q
> aa
->q
) {
2234 rc
= uprv_stricmp(aa
->locale
, bb
->locale
);
2237 #if defined(ULOC_DEBUG)
2238 /* fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
2248 mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
2251 U_CAPI
int32_t U_EXPORT2
2252 uloc_acceptLanguageFromHTTP(char *result
, int32_t resultAvailable
, UAcceptResult
*outResult
,
2253 const char *httpAcceptLanguage
,
2254 UEnumeration
* availableLocales
,
2258 _acceptLangItem smallBuffer
[30];
2260 char tmp
[ULOC_FULLNAME_CAPACITY
+1];
2262 const char *itemEnd
;
2263 const char *paramEnd
;
2268 int32_t l
= (int32_t)uprv_strlen(httpAcceptLanguage
);
2270 char *tempstr
; /* Use for null pointer check */
2273 jSize
= sizeof(smallBuffer
)/sizeof(smallBuffer
[0]);
2274 if(U_FAILURE(*status
)) {
2278 for(s
=httpAcceptLanguage
;s
&&*s
;) {
2279 while(isspace(*s
)) /* eat space at the beginning */
2281 itemEnd
=uprv_strchr(s
,',');
2282 paramEnd
=uprv_strchr(s
,';');
2284 itemEnd
= httpAcceptLanguage
+l
; /* end of string */
2286 if(paramEnd
&& paramEnd
<itemEnd
) {
2287 /* semicolon (;) is closer than end (,) */
2292 while(isspace(*t
)) {
2298 while(isspace(*t
)) {
2301 j
[n
].q
= (float)_uloc_strtod(t
,NULL
);
2303 /* no semicolon - it's 1.0 */
2308 /* eat spaces prior to semi */
2309 for(t
=(paramEnd
-1);(paramEnd
>s
)&&isspace(*t
);t
--)
2311 /* Check for null pointer from uprv_strndup */
2312 tempstr
= uprv_strndup(s
,(int32_t)((t
+1)-s
));
2313 if (tempstr
== NULL
) {
2314 *status
= U_MEMORY_ALLOCATION_ERROR
;
2317 j
[n
].locale
= tempstr
;
2318 uloc_canonicalize(j
[n
].locale
,tmp
,sizeof(tmp
)/sizeof(tmp
[0]),status
);
2319 if(strcmp(j
[n
].locale
,tmp
)) {
2320 uprv_free(j
[n
].locale
);
2321 j
[n
].locale
=uprv_strdup(tmp
);
2323 #if defined(ULOC_DEBUG)
2324 /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
2328 while(*s
==',') { /* eat duplicate commas */
2332 if(j
==smallBuffer
) { /* overflowed the small buffer. */
2333 j
= uprv_malloc(sizeof(j
[0])*(jSize
*2));
2335 uprv_memcpy(j
,smallBuffer
,sizeof(j
[0])*jSize
);
2337 #if defined(ULOC_DEBUG)
2338 fprintf(stderr
,"malloced at size %d\n", jSize
);
2341 j
= uprv_realloc(j
, sizeof(j
[0])*jSize
*2);
2342 #if defined(ULOC_DEBUG)
2343 fprintf(stderr
,"re-alloced at size %d\n", jSize
);
2348 *status
= U_MEMORY_ALLOCATION_ERROR
;
2353 uprv_sortArray(j
, n
, sizeof(j
[0]), uloc_acceptLanguageCompare
, NULL
, TRUE
, status
);
2354 if(U_FAILURE(*status
)) {
2355 if(j
!= smallBuffer
) {
2356 #if defined(ULOC_DEBUG)
2357 fprintf(stderr
,"freeing j %p\n", j
);
2363 strs
= uprv_malloc((size_t)(sizeof(strs
[0])*n
));
2364 /* Check for null pointer */
2366 uprv_free(j
); /* Free to avoid memory leak */
2367 *status
= U_MEMORY_ALLOCATION_ERROR
;
2371 #if defined(ULOC_DEBUG)
2372 /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
2374 strs
[i
]=j
[i
].locale
;
2376 res
= uloc_acceptLanguage(result
, resultAvailable
, outResult
,
2377 (const char**)strs
, n
, availableLocales
, status
);
2382 if(j
!= smallBuffer
) {
2383 #if defined(ULOC_DEBUG)
2384 fprintf(stderr
,"freeing j %p\n", j
);
2392 U_CAPI
int32_t U_EXPORT2
2393 uloc_acceptLanguage(char *result
, int32_t resultAvailable
,
2394 UAcceptResult
*outResult
, const char **acceptList
,
2395 int32_t acceptListCount
,
2396 UEnumeration
* availableLocales
,
2402 char tmp
[ULOC_FULLNAME_CAPACITY
+1];
2404 char **fallbackList
;
2405 if(U_FAILURE(*status
)) {
2408 fallbackList
= uprv_malloc((size_t)(sizeof(fallbackList
[0])*acceptListCount
));
2409 if(fallbackList
==NULL
) {
2410 *status
= U_MEMORY_ALLOCATION_ERROR
;
2413 for(i
=0;i
<acceptListCount
;i
++) {
2414 #if defined(ULOC_DEBUG)
2415 fprintf(stderr
,"%02d: %s\n", i
, acceptList
[i
]);
2417 while((l
=uenum_next(availableLocales
, NULL
, status
))) {
2418 #if defined(ULOC_DEBUG)
2419 fprintf(stderr
," %s\n", l
);
2421 len
= (int32_t)uprv_strlen(l
);
2422 if(!uprv_strcmp(acceptList
[i
], l
)) {
2424 *outResult
= ULOC_ACCEPT_VALID
;
2426 #if defined(ULOC_DEBUG)
2427 fprintf(stderr
, "MATCH! %s\n", l
);
2430 uprv_strncpy(result
, l
, uprv_min(len
, resultAvailable
));
2433 uprv_free(fallbackList
[j
]);
2435 uprv_free(fallbackList
);
2436 return u_terminateChars(result
, resultAvailable
, len
, status
);
2442 uenum_reset(availableLocales
, status
);
2443 /* save off parent info */
2444 if(uloc_getParent(acceptList
[i
], tmp
, sizeof(tmp
)/sizeof(tmp
[0]), status
)!=0) {
2445 fallbackList
[i
] = uprv_strdup(tmp
);
2451 for(maxLen
--;maxLen
>0;maxLen
--) {
2452 for(i
=0;i
<acceptListCount
;i
++) {
2453 if(fallbackList
[i
] && ((int32_t)uprv_strlen(fallbackList
[i
])==maxLen
)) {
2454 #if defined(ULOC_DEBUG)
2455 fprintf(stderr
,"Try: [%s]", fallbackList
[i
]);
2457 while((l
=uenum_next(availableLocales
, NULL
, status
))) {
2458 #if defined(ULOC_DEBUG)
2459 fprintf(stderr
," %s\n", l
);
2461 len
= (int32_t)uprv_strlen(l
);
2462 if(!uprv_strcmp(fallbackList
[i
], l
)) {
2464 *outResult
= ULOC_ACCEPT_FALLBACK
;
2466 #if defined(ULOC_DEBUG)
2467 fprintf(stderr
, "fallback MATCH! %s\n", l
);
2470 uprv_strncpy(result
, l
, uprv_min(len
, resultAvailable
));
2472 for(j
=0;j
<acceptListCount
;j
++) {
2473 uprv_free(fallbackList
[j
]);
2475 uprv_free(fallbackList
);
2476 return u_terminateChars(result
, resultAvailable
, len
, status
);
2479 uenum_reset(availableLocales
, status
);
2481 if(uloc_getParent(fallbackList
[i
], tmp
, sizeof(tmp
)/sizeof(tmp
[0]), status
)!=0) {
2482 uprv_free(fallbackList
[i
]);
2483 fallbackList
[i
] = uprv_strdup(tmp
);
2485 uprv_free(fallbackList
[i
]);
2491 *outResult
= ULOC_ACCEPT_FAILED
;
2494 for(i
=0;i
<acceptListCount
;i
++) {
2495 uprv_free(fallbackList
[i
]);
2497 uprv_free(fallbackList
);