2 **********************************************************************
3 * Copyright (C) 1997-2013, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
9 * Modification History:
11 * Date Name Description
12 * 04/01/97 aliu Creation.
13 * 08/21/98 stephen JDK 1.2 sync
14 * 12/08/98 rtg New Locale implementation and C API
15 * 03/15/99 damiba overhaul.
16 * 04/06/99 stephen changed setDefault() to realloc and copy
17 * 06/14/99 stephen Changed calls to ures_open for new params
18 * 07/21/99 stephen Modified setDefault() to propagate to C++
19 * 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs,
20 * brought canonicalization code into line with spec
21 *****************************************************************************/
24 POSIX's locale format, from putil.c: [no spaces]
26 ll [ _CC ] [ . MM ] [ @ VV]
28 l = lang, C = ctry, M = charmap, V = variant
31 #include "unicode/utypes.h"
32 #include "unicode/ustring.h"
33 #include "unicode/uloc.h"
47 #include <stdio.h> /* for sprintf */
49 /* ### Declarations **************************************************/
51 /* Locale stuff from locid.cpp */
52 U_CFUNC
void locale_set_default(const char *id
);
53 U_CFUNC
const char *locale_get_default(void);
55 locale_getKeywords(const char *localeID
,
57 char *keywords
, int32_t keywordCapacity
,
58 char *values
, int32_t valuesCapacity
, int32_t *valLen
,
62 /* ### Data tables **************************************************/
65 * Table of language codes, both 2- and 3-letter, with preference
66 * given to 2-letter codes where possible. Includes 3-letter codes
67 * that lack a 2-letter equivalent.
69 * This list must be in sorted order. This list is returned directly
70 * to the user by some API.
72 * This list must be kept in sync with LANGUAGES_3, with corresponding
75 * This table should be terminated with a NULL entry, followed by a
76 * second list, and another NULL entry. The first list is visible to
77 * user code when this array is returned by API. The second list
78 * contains codes we support, but do not expose through user API.
82 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
83 * include the revisions up to 2001/7/27 *CWB*
85 * The 3 character codes are the terminology codes like RFC 3066. This
86 * is compatible with prior ICU codes
88 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
89 * table but now at the end of the table because 3 character codes are
90 * duplicates. This avoids bad searches going from 3 to 2 character
93 * The range qaa-qtz is reserved for local use
95 /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
96 /* ISO639 table version is 20130123 */
97 static const char * const LANGUAGES
[] = {
98 "aa", "ab", "ace", "ach", "ada", "ady", "ae", "af",
99 "afa", "afh", "agq", "ain", "ak", "akk", "ale", "alg",
100 "alt", "am", "an", "ang", "anp", "apa", "ar", "arc",
101 "arn", "arp", "art", "arw", "as", "asa", "ast", "ath",
102 "aus", "av", "awa", "ay", "az",
103 "ba", "bad", "bai", "bal", "ban", "bas", "bat", "bax",
104 "bbj", "be", "bej", "bem", "ber", "bez", "bfd", "bg",
105 "bh", "bho", "bi", "bik", "bin", "bkm", "bla", "bm",
106 "bn", "bnt", "bo", "br", "bra", "brx", "bs", "bss",
107 "btk", "bua", "bug", "bum", "byn", "byv",
108 "ca", "cad", "cai", "car", "cau", "cay", "cch", "ce",
109 "ceb", "cel", "cgg", "ch", "chb", "chg", "chk", "chm",
110 "chn", "cho", "chp", "chr", "chy", "ckb", "cmc", "co",
111 "cop", "cpe", "cpf", "cpp", "cr", "crh", "crp", "cs",
112 "csb", "cu", "cus", "cv", "cy",
113 "da", "dak", "dar", "dav", "day", "de", "del", "den",
114 "dgr", "din", "dje", "doi", "dra", "dsb", "dua", "dum",
115 "dv", "dyo", "dyu", "dz", "dzg",
116 "ebu", "ee", "efi", "egy", "eka", "el", "elx", "en",
117 "enm", "eo", "es", "et", "eu", "ewo",
118 "fa", "fan", "fat", "ff", "fi", "fil", "fiu", "fj",
119 "fo", "fon", "fr", "frm", "fro", "frr", "frs", "fur",
121 "ga", "gaa", "gay", "gba", "gd", "gem", "gez", "gil",
122 "gl", "gmh", "gn", "goh", "gon", "gor", "got", "grb",
123 "grc", "gsw", "gu", "guz", "gv", "gwi",
124 "ha", "hai", "haw", "he", "hi", "hil", "him", "hit",
125 "hmn", "ho", "hr", "hsb", "ht", "hu", "hup", "hy",
127 "ia", "iba", "ibb", "id", "ie", "ig", "ii", "ijo",
128 "ik", "ilo", "inc", "ine", "inh", "io", "ira", "iro",
130 "ja", "jbo", "jgo", "jmc", "jpr", "jrb", "jv",
131 "ka", "kaa", "kab", "kac", "kaj", "kam", "kar", "kaw",
132 "kbd", "kbl", "kcg", "kde", "kea", "kfo", "kg", "kha",
133 "khi", "kho", "khq", "ki", "kj", "kk", "kkj", "kl",
134 "kln", "km", "kmb", "kn", "ko", "kok", "kos", "kpe",
135 "kr", "krc", "krl", "kro", "kru", "ks", "ksb", "ksf",
136 "ksh", "ku", "kum", "kut", "kv", "kw", "ky",
137 "la", "lad", "lag", "lah", "lam", "lb", "lez", "lg",
138 "li", "lkt", "ln", "lo", "lol", "loz", "lt", "lu",
139 "lua", "lui", "lun", "luo", "lus", "luy", "lv",
140 "mad", "maf", "mag", "mai", "mak", "man", "map", "mas",
141 "mde", "mdf", "mdr", "men", "mer", "mfe", "mg", "mga",
142 "mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk",
143 "mkh", "ml", "mn", "mnc", "mni", "mno", "mo", "moh",
144 "mos", "mr", "ms", "mt", "mua", "mul", "mun", "mus",
145 "mwl", "mwr", "my", "mye", "myn", "myv",
146 "na", "nah", "nai", "nap", "naq", "nb", "nd", "nds",
147 "ne", "new", "ng", "nia", "nic", "niu", "nl", "nmg",
148 "nn", "nnh", "no", "nog", "non", "nqo", "nr", "nso",
149 "nub", "nus", "nv", "nwc", "ny", "nym", "nyn", "nyo",
151 "oc", "oj", "om", "or", "os", "osa", "ota", "oto",
152 "pa", "paa", "pag", "pal", "pam", "pap", "pau", "peo",
153 "phi", "phn", "pi", "pl", "pon", "pra", "pro", "ps",
156 "raj", "rap", "rar", "rm", "rn", "ro", "roa", "rof",
157 "rom", "ru", "rup", "rw", "rwk",
158 "sa", "sad", "sah", "sai", "sal", "sam", "saq", "sas",
159 "sat", "sba", "sbp", "sc", "scn", "sco", "sd", "se",
160 "see", "seh", "sel", "sem", "ses", "sg", "sga", "sgn",
161 "shi", "shn", "shu", "si", "sid", "sio", "sit",
162 "sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn",
163 "sms", "sn", "snk", "so", "sog", "son", "sq", "sr",
164 "srn", "srr", "ss", "ssa", "ssy", "st", "su", "suk",
165 "sus", "sux", "sv", "sw", "swb", "swc", "syc", "syr",
166 "ta", "tai", "te", "tem", "teo", "ter", "tet", "tg",
167 "th", "ti", "tig", "tiv", "tk", "tkl", "tl", "tlh",
168 "tli", "tmh", "tn", "to", "tog", "tpi", "tr", "trv",
169 "ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw",
170 "twq", "ty", "tyv", "tzm",
171 "udm", "ug", "uga", "uk", "umb", "und", "ur", "uz",
172 "vai", "ve", "vi", "vo", "vot", "vun",
173 "wa", "wae", "wak", "wal", "war", "was", "wen", "wo",
175 "yao", "yap", "yav", "ybb", "yi", "yo", "ypk", "yue",
176 "za", "zap", "zbl", "zen", "zh", "znd", "zu", "zun",
179 "in", "iw", "ji", "jw", "sh", /* obsolete language codes */
183 static const char* const DEPRECATED_LANGUAGES
[]={
184 "in", "iw", "ji", "jw", NULL
, NULL
186 static const char* const REPLACEMENT_LANGUAGES
[]={
187 "id", "he", "yi", "jv", NULL
, NULL
191 * Table of 3-letter language codes.
193 * This is a lookup table used to convert 3-letter language codes to
194 * their 2-letter equivalent, where possible. It must be kept in sync
195 * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the
196 * same language as LANGUAGES_3[i]. The commented-out lines are
197 * copied from LANGUAGES to make eyeballing this baby easier.
199 * Where a 3-letter language code has no 2-letter equivalent, the
200 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
202 * This table should be terminated with a NULL entry, followed by a
203 * second list, and another NULL entry. The two lists correspond to
204 * the two lists in LANGUAGES.
206 /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
207 /* ISO639 table version is 20130123 */
208 static const char * const LANGUAGES_3
[] = {
209 "aar", "abk", "ace", "ach", "ada", "ady", "ave", "afr",
210 "afa", "afh", "agq", "ain", "aka", "akk", "ale", "alg",
211 "alt", "amh", "arg", "ang", "anp", "apa", "ara", "arc",
212 "arn", "arp", "art", "arw", "asm", "asa", "ast", "ath",
213 "aus", "ava", "awa", "aym", "aze",
214 "bak", "bad", "bai", "bal", "ban", "bas", "bat", "bax",
215 "bbj", "bel", "bej", "bem", "ber", "bez", "bfd", "bul",
216 "bih", "bho", "bis", "bik", "bin", "bkm", "bla", "bam",
217 "ben", "bnt", "bod", "bre", "bra", "brx", "bos", "bss",
218 "btk", "bua", "bug", "bum", "byn", "byv",
219 "cat", "cad", "cai", "car", "cau", "cay", "cch", "che",
220 "ceb", "cel", "cgg", "cha", "chb", "chg", "chk", "chm",
221 "chn", "cho", "chp", "chr", "chy", "ckb", "cmc", "cos",
222 "cop", "cpe", "cpf", "cpp", "cre", "crh", "crp", "ces",
223 "csb", "chu", "cus", "chv", "cym",
224 "dan", "dak", "dar", "dav", "day", "deu", "del", "den",
225 "dgr", "din", "dje", "doi", "dra", "dsb", "dua", "dum",
226 "div", "dyo", "dyu", "dzo", "dzg",
227 "ebu", "ewe", "efi", "egy", "eka", "ell", "elx", "eng",
228 "enm", "epo", "spa", "est", "eus", "ewo",
229 "fas", "fan", "fat", "ful", "fin", "fil", "fiu", "fij",
230 "fao", "fon", "fra", "frm", "fro", "frr", "frs", "fur",
232 "gle", "gaa", "gay", "gba", "gla", "gem", "gez", "gil",
233 "glg", "gmh", "grn", "goh", "gon", "gor", "got", "grb",
234 "grc", "gsw", "guj", "guz", "glv", "gwi",
235 "hau", "hai", "haw", "heb", "hin", "hil", "him", "hit",
236 "hmn", "hmo", "hrv", "hsb", "hat", "hun", "hup", "hye",
238 "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ijo",
239 "ipk", "ilo", "inc", "ine", "inh", "ido", "ira", "iro",
241 "jpn", "jbo", "jgo", "jmc", "jpr", "jrb", "jav",
242 "kat", "kaa", "kab", "kac", "kaj", "kam", "kar", "kaw",
243 "kbd", "kbl", "kcg", "kde", "kea", "kfo", "kon", "kha",
244 "khi", "kho", "khq", "kik", "kua", "kaz", "kkj", "kal",
245 "kln", "khm", "kmb", "kan", "kor", "kok", "kos", "kpe",
246 "kau", "krc", "krl", "kro", "kru", "kas", "ksb", "ksf",
247 "ksh", "kur", "kum", "kut", "kom", "cor", "kir",
248 "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lug",
249 "lim", "lkt", "lin", "lao", "lol", "loz", "lit", "lub",
250 "lua", "lui", "lun", "luo", "lus", "luy", "lav",
251 "mad", "maf", "mag", "mai", "mak", "man", "map", "mas",
252 "mde", "mdf", "mdr", "men", "mer", "mfe", "mlg", "mga",
253 "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
254 "mkh", "mal", "mon", "mnc", "mni", "mno", "mol", "moh",
255 "mos", "mar", "msa", "mlt", "mua", "mul", "mun", "mus",
256 "mwl", "mwr", "mya", "mye", "myn", "myv",
257 "nau", "nah", "nai", "nap", "naq", "nob", "nde", "nds",
258 "nep", "new", "ndo", "nia", "nic", "niu", "nld", "nmg",
259 "nno", "nnh", "nor", "nog", "non", "nqo", "nbl", "nso",
260 "nub", "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo",
262 "oci", "oji", "orm", "ori", "oss", "osa", "ota", "oto",
263 "pan", "paa", "pag", "pal", "pam", "pap", "pau", "peo",
264 "phi", "phn", "pli", "pol", "pon", "pra", "pro", "pus",
267 "raj", "rap", "rar", "roh", "run", "ron", "roa", "rof",
268 "rom", "rus", "rup", "kin", "rwk",
269 "san", "sad", "sah", "sai", "sal", "sam", "saq", "sas",
270 "sat", "sba", "sbp", "srd", "scn", "sco", "snd", "sme",
271 "see", "seh", "sel", "sem", "ses", "sag", "sga", "sgn",
272 "shi", "shn", "shu", "sin", "sid", "sio", "sit",
273 "slk", "slv", "sla", "smo", "sma", "smi", "smj", "smn",
274 "sms", "sna", "snk", "som", "sog", "son", "sqi", "srp",
275 "srn", "srr", "ssw", "ssa", "ssy", "sot", "sun", "suk",
276 "sus", "sux", "swe", "swa", "swb", "swc", "syc", "syr",
277 "tam", "tai", "tel", "tem", "teo", "ter", "tet", "tgk",
278 "tha", "tir", "tig", "tiv", "tuk", "tkl", "tgl", "tlh",
279 "tli", "tmh", "tsn", "ton", "tog", "tpi", "tur", "trv",
280 "tso", "tsi", "tat", "tum", "tup", "tut", "tvl", "twi",
281 "twq", "tah", "tyv", "tzm",
282 "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
283 "vai", "ven", "vie", "vol", "vot", "vun",
284 "wln", "wae", "wak", "wal", "war", "was", "wen", "wol",
286 "yao", "yap", "yav", "ybb", "yid", "yor", "ypk", "yue",
287 "zha", "zap", "zbl", "zen", "zho", "znd", "zul", "zun",
290 /* "in", "iw", "ji", "jw", "sh", */
291 "ind", "heb", "yid", "jaw", "srp",
296 * Table of 2-letter country codes.
298 * This list must be in sorted order. This list is returned directly
299 * to the user by some API.
301 * This list must be kept in sync with COUNTRIES_3, with corresponding
304 * This table should be terminated with a NULL entry, followed by a
305 * second list, and another NULL entry. The first list is visible to
306 * user code when this array is returned by API. The second list
307 * contains codes we support, but do not expose through user API.
311 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
312 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
313 * new codes keeping the old ones for compatibility updated to include
314 * 1999/12/03 revisions *CWB*
316 * RO(ROM) is now RO(ROU) according to
317 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
319 static const char * const COUNTRIES
[] = {
320 "AD", "AE", "AF", "AG", "AI", "AL", "AM",
321 "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",
322 "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",
323 "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV",
324 "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
325 "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR",
326 "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK",
327 "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER",
328 "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
329 "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
330 "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
331 "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
332 "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
333 "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
334 "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
335 "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
336 "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK",
337 "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",
338 "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
339 "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",
340 "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",
341 "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
342 "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA",
343 "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",
344 "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV",
345 "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ",
346 "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
347 "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
348 "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
349 "WS", "YE", "YT", "ZA", "ZM", "ZW",
351 "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR", /* obsolete country codes */
355 static const char* const DEPRECATED_COUNTRIES
[] = {
356 "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL
, NULL
/* deprecated country list */
358 static const char* const REPLACEMENT_COUNTRIES
[] = {
359 /* "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
360 "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL
, NULL
/* replacement country codes */
364 * Table of 3-letter country codes.
366 * This is a lookup table used to convert 3-letter country codes to
367 * their 2-letter equivalent. It must be kept in sync with COUNTRIES.
368 * For all valid i, COUNTRIES[i] must refer to the same country as
369 * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES
370 * to make eyeballing this baby easier.
372 * This table should be terminated with a NULL entry, followed by a
373 * second list, and another NULL entry. The two lists correspond to
374 * the two lists in COUNTRIES.
376 static const char * const COUNTRIES_3
[] = {
377 /* "AD", "AE", "AF", "AG", "AI", "AL", "AM", */
378 "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
379 /* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */
380 "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
381 /* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */
382 "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
383 /* "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", */
384 "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",
385 /* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */
386 "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
387 /* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */
388 "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
389 /* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK", */
390 "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
391 /* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */
392 "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
393 /* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
394 "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
395 /* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
396 "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
397 /* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */
398 "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
399 /* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
400 "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
401 /* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
402 "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
403 /* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
404 "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
405 /* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
406 "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
407 /* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */
408 "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
409 /* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */
410 "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
411 /* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */
412 "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
413 /* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */
414 "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
415 /* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */
416 "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
417 /* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */
418 "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
419 /* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */
420 "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
421 /* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */
422 "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
423 /* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */
424 "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
425 /* "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", */
426 "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",
427 /* "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", */
428 "SXM", "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
429 /* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */
430 "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
431 /* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */
432 "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
433 /* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
434 "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
435 /* "WS", "YE", "YT", "ZA", "ZM", "ZW", */
436 "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
438 /* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */
439 "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
443 typedef struct CanonicalizationMap
{
444 const char *id
; /* input ID */
445 const char *canonicalID
; /* canonicalized output ID */
446 const char *keyword
; /* keyword, or NULL if none */
447 const char *value
; /* keyword value, or NULL if kw==NULL */
448 } CanonicalizationMap
;
451 * A map to canonicalize locale IDs. This handles a variety of
452 * different semantic kinds of transformations.
454 static const CanonicalizationMap CANONICALIZE_MAP
[] = {
455 { "", "en_US_POSIX", NULL
, NULL
}, /* .NET name */
456 { "c", "en_US_POSIX", NULL
, NULL
}, /* POSIX name */
457 { "posix", "en_US_POSIX", NULL
, NULL
}, /* POSIX name (alias of C) */
458 { "art_LOJBAN", "jbo", NULL
, NULL
}, /* registered name */
459 { "az_AZ_CYRL", "az_Cyrl_AZ", NULL
, NULL
}, /* .NET name */
460 { "az_AZ_LATN", "az_Latn_AZ", NULL
, NULL
}, /* .NET name */
461 { "ca_ES_PREEURO", "ca_ES", "currency", "ESP" },
462 { "de__PHONEBOOK", "de", "collation", "phonebook" }, /* Old ICU name */
463 { "de_AT_PREEURO", "de_AT", "currency", "ATS" },
464 { "de_DE_PREEURO", "de_DE", "currency", "DEM" },
465 { "de_LU_PREEURO", "de_LU", "currency", "LUF" },
466 { "el_GR_PREEURO", "el_GR", "currency", "GRD" },
467 { "en_BE_PREEURO", "en_BE", "currency", "BEF" },
468 { "en_IE_PREEURO", "en_IE", "currency", "IEP" },
469 { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */
470 { "es_ES_PREEURO", "es_ES", "currency", "ESP" },
471 { "eu_ES_PREEURO", "eu_ES", "currency", "ESP" },
472 { "fi_FI_PREEURO", "fi_FI", "currency", "FIM" },
473 { "fr_BE_PREEURO", "fr_BE", "currency", "BEF" },
474 { "fr_FR_PREEURO", "fr_FR", "currency", "FRF" },
475 { "fr_LU_PREEURO", "fr_LU", "currency", "LUF" },
476 { "ga_IE_PREEURO", "ga_IE", "currency", "IEP" },
477 { "gl_ES_PREEURO", "gl_ES", "currency", "ESP" },
478 { "hi__DIRECT", "hi", "collation", "direct" }, /* Old ICU name */
479 { "it_IT_PREEURO", "it_IT", "currency", "ITL" },
480 { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */
481 { "nb_NO_NY", "nn_NO", NULL
, NULL
}, /* "markus said this was ok" :-) */
482 { "nl_BE_PREEURO", "nl_BE", "currency", "BEF" },
483 { "nl_NL_PREEURO", "nl_NL", "currency", "NLG" },
484 { "pt_PT_PREEURO", "pt_PT", "currency", "PTE" },
485 { "sr_SP_CYRL", "sr_Cyrl_RS", NULL
, NULL
}, /* .NET name */
486 { "sr_SP_LATN", "sr_Latn_RS", NULL
, NULL
}, /* .NET name */
487 { "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL
, NULL
}, /* Linux name */
488 { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */
489 { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL
, NULL
}, /* Linux name */
490 { "uz_UZ_CYRL", "uz_Cyrl_UZ", NULL
, NULL
}, /* .NET name */
491 { "uz_UZ_LATN", "uz_Latn_UZ", NULL
, NULL
}, /* .NET name */
492 { "zh_CHS", "zh_Hans", NULL
, NULL
}, /* .NET name */
493 { "zh_CHT", "zh_Hant", NULL
, NULL
}, /* .NET name */
494 { "zh_GAN", "gan", NULL
, NULL
}, /* registered name */
495 { "zh_GUOYU", "zh", NULL
, NULL
}, /* registered name */
496 { "zh_HAKKA", "hak", NULL
, NULL
}, /* registered name */
497 { "zh_MIN_NAN", "nan", NULL
, NULL
}, /* registered name */
498 { "zh_WUU", "wuu", NULL
, NULL
}, /* registered name */
499 { "zh_XIANG", "hsn", NULL
, NULL
}, /* registered name */
500 { "zh_YUE", "yue", NULL
, NULL
}, /* registered name */
503 typedef struct VariantMap
{
504 const char *variant
; /* input ID */
505 const char *keyword
; /* keyword, or NULL if none */
506 const char *value
; /* keyword value, or NULL if kw==NULL */
509 static const VariantMap VARIANT_MAP
[] = {
510 { "EURO", "currency", "EUR" },
511 { "PINYIN", "collation", "pinyin" }, /* Solaris variant */
512 { "STROKE", "collation", "stroke" } /* Solaris variant */
515 /* ### BCP47 Conversion *******************************************/
516 /* Test if the locale id has BCP47 u extension and does not have '@' */
517 #define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
518 /* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
519 #define _ConvertBCP47(finalID, id, buffer, length,err) \
520 if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || U_FAILURE(*err)) { \
525 /* Gets the size of the shortest subtag in the given localeID. */
526 static int32_t getShortestSubtagLength(const char *localeID
) {
527 int32_t localeIDLength
= uprv_strlen(localeID
);
528 int32_t length
= localeIDLength
;
529 int32_t tmpLength
= 0;
533 for (i
= 0; i
< localeIDLength
; i
++) {
534 if (localeID
[i
] != '_' && localeID
[i
] != '-') {
541 if (tmpLength
!= 0 && tmpLength
< length
) {
551 /* ### Keywords **************************************************/
553 #define ULOC_KEYWORD_BUFFER_LEN 25
554 #define ULOC_MAX_NO_KEYWORDS 25
556 U_CAPI
const char * U_EXPORT2
557 locale_getKeywordsStart(const char *localeID
) {
558 const char *result
= NULL
;
559 if((result
= uprv_strchr(localeID
, '@')) != NULL
) {
562 #if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
564 /* We do this because the @ sign is variant, and the @ sign used on one
565 EBCDIC machine won't be compiled the same way on other EBCDIC based
567 static const uint8_t ebcdicSigns
[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
568 const uint8_t *charToFind
= ebcdicSigns
;
570 if((result
= uprv_strchr(localeID
, *charToFind
)) != NULL
) {
581 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
582 * @param keywordName incoming name to be canonicalized
583 * @param status return status (keyword too long)
584 * @return length of the keyword name
586 static int32_t locale_canonKeywordName(char *buf
, const char *keywordName
, UErrorCode
*status
)
589 int32_t keywordNameLen
= (int32_t)uprv_strlen(keywordName
);
591 if(keywordNameLen
>= ULOC_KEYWORD_BUFFER_LEN
) {
592 /* keyword name too long for internal buffer */
593 *status
= U_INTERNAL_PROGRAM_ERROR
;
597 /* normalize the keyword name */
598 for(i
= 0; i
< keywordNameLen
; i
++) {
599 buf
[i
] = uprv_tolower(keywordName
[i
]);
603 return keywordNameLen
;
607 char keyword
[ULOC_KEYWORD_BUFFER_LEN
];
609 const char *valueStart
;
613 static int32_t U_CALLCONV
614 compareKeywordStructs(const void * /*context*/, const void *left
, const void *right
) {
615 const char* leftString
= ((const KeywordStruct
*)left
)->keyword
;
616 const char* rightString
= ((const KeywordStruct
*)right
)->keyword
;
617 return uprv_strcmp(leftString
, rightString
);
621 * Both addKeyword and addValue must already be in canonical form.
622 * Either both addKeyword and addValue are NULL, or neither is NULL.
623 * If they are not NULL they must be zero terminated.
624 * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword.
627 _getKeywords(const char *localeID
,
629 char *keywords
, int32_t keywordCapacity
,
630 char *values
, int32_t valuesCapacity
, int32_t *valLen
,
632 const char* addKeyword
,
633 const char* addValue
,
636 KeywordStruct keywordList
[ULOC_MAX_NO_KEYWORDS
];
638 int32_t maxKeywords
= ULOC_MAX_NO_KEYWORDS
;
639 int32_t numKeywords
= 0;
640 const char* pos
= localeID
;
641 const char* equalSign
= NULL
;
642 const char* semicolon
= NULL
;
644 int32_t keywordsLen
= 0;
645 int32_t valuesLen
= 0;
647 if(prev
== '@') { /* start of keyword definition */
648 /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
650 UBool duplicate
= FALSE
;
651 /* skip leading spaces */
655 if (!*pos
) { /* handle trailing "; " */
658 if(numKeywords
== maxKeywords
) {
659 *status
= U_INTERNAL_PROGRAM_ERROR
;
662 equalSign
= uprv_strchr(pos
, '=');
663 semicolon
= uprv_strchr(pos
, ';');
664 /* lack of '=' [foo@currency] is illegal */
665 /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
666 if(!equalSign
|| (semicolon
&& semicolon
<equalSign
)) {
667 *status
= U_INVALID_FORMAT_ERROR
;
670 /* need to normalize both keyword and keyword name */
671 if(equalSign
- pos
>= ULOC_KEYWORD_BUFFER_LEN
) {
672 /* keyword name too long for internal buffer */
673 *status
= U_INTERNAL_PROGRAM_ERROR
;
676 for(i
= 0, n
= 0; i
< equalSign
- pos
; ++i
) {
678 keywordList
[numKeywords
].keyword
[n
++] = uprv_tolower(pos
[i
]);
681 keywordList
[numKeywords
].keyword
[n
] = 0;
682 keywordList
[numKeywords
].keywordLen
= n
;
683 /* now grab the value part. First we skip the '=' */
685 /* then we leading spaces */
686 while(*equalSign
== ' ') {
689 keywordList
[numKeywords
].valueStart
= equalSign
;
694 while(*(pos
- i
- 1) == ' ') {
697 keywordList
[numKeywords
].valueLen
= (int32_t)(pos
- equalSign
- i
);
700 i
= (int32_t)uprv_strlen(equalSign
);
701 while(i
&& equalSign
[i
-1] == ' ') {
704 keywordList
[numKeywords
].valueLen
= i
;
706 /* If this is a duplicate keyword, then ignore it */
707 for (j
=0; j
<numKeywords
; ++j
) {
708 if (uprv_strcmp(keywordList
[j
].keyword
, keywordList
[numKeywords
].keyword
) == 0) {
718 /* Handle addKeyword/addValue. */
719 if (addKeyword
!= NULL
) {
720 UBool duplicate
= FALSE
;
721 U_ASSERT(addValue
!= NULL
);
722 /* Search for duplicate; if found, do nothing. Explicit keyword
723 overrides addKeyword. */
724 for (j
=0; j
<numKeywords
; ++j
) {
725 if (uprv_strcmp(keywordList
[j
].keyword
, addKeyword
) == 0) {
731 if (numKeywords
== maxKeywords
) {
732 *status
= U_INTERNAL_PROGRAM_ERROR
;
735 uprv_strcpy(keywordList
[numKeywords
].keyword
, addKeyword
);
736 keywordList
[numKeywords
].keywordLen
= (int32_t)uprv_strlen(addKeyword
);
737 keywordList
[numKeywords
].valueStart
= addValue
;
738 keywordList
[numKeywords
].valueLen
= (int32_t)uprv_strlen(addValue
);
742 U_ASSERT(addValue
== NULL
);
745 /* now we have a list of keywords */
746 /* we need to sort it */
747 uprv_sortArray(keywordList
, numKeywords
, sizeof(KeywordStruct
), compareKeywordStructs
, NULL
, FALSE
, status
);
749 /* Now construct the keyword part */
750 for(i
= 0; i
< numKeywords
; i
++) {
751 if(keywordsLen
+ keywordList
[i
].keywordLen
+ 1< keywordCapacity
) {
752 uprv_strcpy(keywords
+keywordsLen
, keywordList
[i
].keyword
);
754 keywords
[keywordsLen
+ keywordList
[i
].keywordLen
] = '=';
756 keywords
[keywordsLen
+ keywordList
[i
].keywordLen
] = 0;
759 keywordsLen
+= keywordList
[i
].keywordLen
+ 1;
761 if(keywordsLen
+ keywordList
[i
].valueLen
< keywordCapacity
) {
762 uprv_strncpy(keywords
+keywordsLen
, keywordList
[i
].valueStart
, keywordList
[i
].valueLen
);
764 keywordsLen
+= keywordList
[i
].valueLen
;
766 if(i
< numKeywords
- 1) {
767 if(keywordsLen
< keywordCapacity
) {
768 keywords
[keywordsLen
] = ';';
774 if(valuesLen
+ keywordList
[i
].valueLen
+ 1< valuesCapacity
) {
775 uprv_strcpy(values
+valuesLen
, keywordList
[i
].valueStart
);
776 values
[valuesLen
+ keywordList
[i
].valueLen
] = 0;
778 valuesLen
+= keywordList
[i
].valueLen
+ 1;
782 values
[valuesLen
] = 0;
787 return u_terminateChars(keywords
, keywordCapacity
, keywordsLen
, status
);
794 locale_getKeywords(const char *localeID
,
796 char *keywords
, int32_t keywordCapacity
,
797 char *values
, int32_t valuesCapacity
, int32_t *valLen
,
799 UErrorCode
*status
) {
800 return _getKeywords(localeID
, prev
, keywords
, keywordCapacity
,
801 values
, valuesCapacity
, valLen
, valuesToo
,
805 U_CAPI
int32_t U_EXPORT2
806 uloc_getKeywordValue(const char* localeID
,
807 const char* keywordName
,
808 char* buffer
, int32_t bufferCapacity
,
811 const char* startSearchHere
= NULL
;
812 const char* nextSeparator
= NULL
;
813 char keywordNameBuffer
[ULOC_KEYWORD_BUFFER_LEN
];
814 char localeKeywordNameBuffer
[ULOC_KEYWORD_BUFFER_LEN
];
818 if(status
&& U_SUCCESS(*status
) && localeID
) {
819 char tempBuffer
[ULOC_FULLNAME_CAPACITY
];
820 const char* tmpLocaleID
;
822 if (_hasBCP47Extension(localeID
)) {
823 _ConvertBCP47(tmpLocaleID
, localeID
, tempBuffer
, sizeof(tempBuffer
), status
);
825 tmpLocaleID
=localeID
;
828 startSearchHere
= uprv_strchr(tmpLocaleID
, '@'); /* TODO: REVISIT: shouldn't this be locale_getKeywordsStart ? */
829 if(startSearchHere
== NULL
) {
830 /* no keywords, return at once */
834 locale_canonKeywordName(keywordNameBuffer
, keywordName
, status
);
835 if(U_FAILURE(*status
)) {
839 /* find the first keyword */
840 while(startSearchHere
) {
842 /* skip leading spaces (allowed?) */
843 while(*startSearchHere
== ' ') {
846 nextSeparator
= uprv_strchr(startSearchHere
, '=');
847 /* need to normalize both keyword and keyword name */
851 if(nextSeparator
- startSearchHere
>= ULOC_KEYWORD_BUFFER_LEN
) {
852 /* keyword name too long for internal buffer */
853 *status
= U_INTERNAL_PROGRAM_ERROR
;
856 for(i
= 0; i
< nextSeparator
- startSearchHere
; i
++) {
857 localeKeywordNameBuffer
[i
] = uprv_tolower(startSearchHere
[i
]);
859 /* trim trailing spaces */
860 while(startSearchHere
[i
-1] == ' ') {
864 localeKeywordNameBuffer
[i
] = 0;
866 startSearchHere
= uprv_strchr(nextSeparator
, ';');
868 if(uprv_strcmp(keywordNameBuffer
, localeKeywordNameBuffer
) == 0) {
870 while(*nextSeparator
== ' ') {
873 /* we actually found the keyword. Copy the value */
874 if(startSearchHere
&& startSearchHere
- nextSeparator
< bufferCapacity
) {
875 while(*(startSearchHere
-1) == ' ') {
878 uprv_strncpy(buffer
, nextSeparator
, startSearchHere
- nextSeparator
);
879 result
= u_terminateChars(buffer
, bufferCapacity
, (int32_t)(startSearchHere
- nextSeparator
), status
);
880 } else if(!startSearchHere
&& (int32_t)uprv_strlen(nextSeparator
) < bufferCapacity
) { /* last item in string */
881 i
= (int32_t)uprv_strlen(nextSeparator
);
882 while(nextSeparator
[i
- 1] == ' ') {
885 uprv_strncpy(buffer
, nextSeparator
, i
);
886 result
= u_terminateChars(buffer
, bufferCapacity
, i
, status
);
888 /* give a bigger buffer, please */
889 *status
= U_BUFFER_OVERFLOW_ERROR
;
890 if(startSearchHere
) {
891 result
= (int32_t)(startSearchHere
- nextSeparator
);
893 result
= (int32_t)uprv_strlen(nextSeparator
);
903 U_CAPI
int32_t U_EXPORT2
904 uloc_setKeywordValue(const char* keywordName
,
905 const char* keywordValue
,
906 char* buffer
, int32_t bufferCapacity
,
909 /* TODO: sorting. removal. */
910 int32_t keywordNameLen
;
911 int32_t keywordValueLen
;
914 int32_t foundValueLen
;
915 int32_t keywordAtEnd
= 0; /* is the keyword at the end of the string? */
916 char keywordNameBuffer
[ULOC_KEYWORD_BUFFER_LEN
];
917 char localeKeywordNameBuffer
[ULOC_KEYWORD_BUFFER_LEN
];
920 char* nextSeparator
= NULL
;
921 char* nextEqualsign
= NULL
;
922 char* startSearchHere
= NULL
;
923 char* keywordStart
= NULL
;
924 char *insertHere
= NULL
;
925 if(U_FAILURE(*status
)) {
928 if(bufferCapacity
>1) {
929 bufLen
= (int32_t)uprv_strlen(buffer
);
931 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
934 if(bufferCapacity
<bufLen
) {
935 /* The capacity is less than the length?! Is this NULL terminated? */
936 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
939 if(keywordValue
&& !*keywordValue
) {
943 keywordValueLen
= (int32_t)uprv_strlen(keywordValue
);
947 keywordNameLen
= locale_canonKeywordName(keywordNameBuffer
, keywordName
, status
);
948 if(U_FAILURE(*status
)) {
951 startSearchHere
= (char*)locale_getKeywordsStart(buffer
);
952 if(startSearchHere
== NULL
|| (startSearchHere
[1]==0)) {
953 if(!keywordValue
) { /* no keywords = nothing to remove */
957 needLen
= bufLen
+1+keywordNameLen
+1+keywordValueLen
;
958 if(startSearchHere
) { /* had a single @ */
959 needLen
--; /* already had the @ */
960 /* startSearchHere points at the @ */
962 startSearchHere
=buffer
+bufLen
;
964 if(needLen
>= bufferCapacity
) {
965 *status
= U_BUFFER_OVERFLOW_ERROR
;
966 return needLen
; /* no change */
968 *startSearchHere
= '@';
970 uprv_strcpy(startSearchHere
, keywordNameBuffer
);
971 startSearchHere
+= keywordNameLen
;
972 *startSearchHere
= '=';
974 uprv_strcpy(startSearchHere
, keywordValue
);
975 startSearchHere
+=keywordValueLen
;
977 } /* end shortcut - no @ */
979 keywordStart
= startSearchHere
;
980 /* search for keyword */
981 while(keywordStart
) {
983 /* skip leading spaces (allowed?) */
984 while(*keywordStart
== ' ') {
987 nextEqualsign
= uprv_strchr(keywordStart
, '=');
988 /* need to normalize both keyword and keyword name */
992 if(nextEqualsign
- keywordStart
>= ULOC_KEYWORD_BUFFER_LEN
) {
993 /* keyword name too long for internal buffer */
994 *status
= U_INTERNAL_PROGRAM_ERROR
;
997 for(i
= 0; i
< nextEqualsign
- keywordStart
; i
++) {
998 localeKeywordNameBuffer
[i
] = uprv_tolower(keywordStart
[i
]);
1000 /* trim trailing spaces */
1001 while(keywordStart
[i
-1] == ' ') {
1004 U_ASSERT(i
>=0 && i
<ULOC_KEYWORD_BUFFER_LEN
);
1005 localeKeywordNameBuffer
[i
] = 0;
1007 nextSeparator
= uprv_strchr(nextEqualsign
, ';');
1008 rc
= uprv_strcmp(keywordNameBuffer
, localeKeywordNameBuffer
);
1011 while(*nextEqualsign
== ' ') {
1014 /* we actually found the keyword. Change the value */
1015 if (nextSeparator
) {
1017 foundValueLen
= (int32_t)(nextSeparator
- nextEqualsign
);
1020 foundValueLen
= (int32_t)uprv_strlen(nextEqualsign
);
1022 if(keywordValue
) { /* adding a value - not removing */
1023 if(foundValueLen
== keywordValueLen
) {
1024 uprv_strncpy(nextEqualsign
, keywordValue
, keywordValueLen
);
1025 return bufLen
; /* no change in size */
1026 } else if(foundValueLen
> keywordValueLen
) {
1027 int32_t delta
= foundValueLen
- keywordValueLen
;
1028 if(nextSeparator
) { /* RH side */
1029 uprv_memmove(nextSeparator
- delta
, nextSeparator
, bufLen
-(nextSeparator
-buffer
));
1031 uprv_strncpy(nextEqualsign
, keywordValue
, keywordValueLen
);
1035 } else { /* FVL < KVL */
1036 int32_t delta
= keywordValueLen
- foundValueLen
;
1037 if((bufLen
+delta
) >= bufferCapacity
) {
1038 *status
= U_BUFFER_OVERFLOW_ERROR
;
1039 return bufLen
+delta
;
1041 if(nextSeparator
) { /* RH side */
1042 uprv_memmove(nextSeparator
+delta
,nextSeparator
, bufLen
-(nextSeparator
-buffer
));
1044 uprv_strncpy(nextEqualsign
, keywordValue
, keywordValueLen
);
1049 } else { /* removing a keyword */
1051 /* zero out the ';' or '@' just before startSearchhere */
1052 keywordStart
[-1] = 0;
1053 return (int32_t)((keywordStart
-buffer
)-1); /* (string length without keyword) minus separator */
1055 uprv_memmove(keywordStart
, nextSeparator
+1, bufLen
-((nextSeparator
+1)-buffer
));
1056 keywordStart
[bufLen
-((nextSeparator
+1)-buffer
)]=0;
1057 return (int32_t)(bufLen
-((nextSeparator
+1)-keywordStart
));
1060 } else if(rc
<0){ /* end match keyword */
1061 /* could insert at this location. */
1062 insertHere
= keywordStart
;
1064 keywordStart
= nextSeparator
;
1065 } /* end loop searching */
1068 return bufLen
; /* removal of non-extant keyword - no change */
1071 /* we know there is at least one keyword. */
1072 needLen
= bufLen
+1+keywordNameLen
+1+keywordValueLen
;
1073 if(needLen
>= bufferCapacity
) {
1074 *status
= U_BUFFER_OVERFLOW_ERROR
;
1075 return needLen
; /* no change */
1079 uprv_memmove(insertHere
+(1+keywordNameLen
+1+keywordValueLen
), insertHere
, bufLen
-(insertHere
-buffer
));
1080 keywordStart
= insertHere
;
1082 keywordStart
= buffer
+bufLen
;
1083 *keywordStart
= ';';
1086 uprv_strncpy(keywordStart
, keywordNameBuffer
, keywordNameLen
);
1087 keywordStart
+= keywordNameLen
;
1088 *keywordStart
= '=';
1090 uprv_strncpy(keywordStart
, keywordValue
, keywordValueLen
); /* terminates. */
1091 keywordStart
+=keywordValueLen
;
1093 *keywordStart
= ';';
1100 /* ### ID parsing implementation **************************************************/
1102 #define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
1104 /*returns TRUE if one of the special prefixes is here (s=string)
1106 #define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1108 /* Dot terminates it because of POSIX form where dot precedes the codepage
1109 * except for variant
1111 #define _isTerminator(a) ((a==0)||(a=='.')||(a=='@'))
1113 static char* _strnchr(const char* str
, int32_t len
, char c
) {
1114 U_ASSERT(str
!= 0 && len
>= 0);
1115 while (len
-- != 0) {
1119 } else if (d
== 0) {
1128 * Lookup 'key' in the array 'list'. The array 'list' should contain
1129 * a NULL entry, followed by more entries, and a second NULL entry.
1131 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1134 static int16_t _findIndex(const char* const* list
, const char* key
)
1136 const char* const* anchor
= list
;
1139 /* Make two passes through two NULL-terminated arrays at 'list' */
1140 while (pass
++ < 2) {
1142 if (uprv_strcmp(key
, *list
) == 0) {
1143 return (int16_t)(list
- anchor
);
1147 ++list
; /* skip final NULL *CWB*/
1152 /* count the length of src while copying it to dest; return strlen(src) */
1153 static inline int32_t
1154 _copyCount(char *dest
, int32_t destCapacity
, const char *src
) {
1161 return (int32_t)(src
-anchor
);
1163 if(destCapacity
<=0) {
1164 return (int32_t)((src
-anchor
)+uprv_strlen(src
));
1173 uloc_getCurrentCountryID(const char* oldID
){
1174 int32_t offset
= _findIndex(DEPRECATED_COUNTRIES
, oldID
);
1176 return REPLACEMENT_COUNTRIES
[offset
];
1181 uloc_getCurrentLanguageID(const char* oldID
){
1182 int32_t offset
= _findIndex(DEPRECATED_LANGUAGES
, oldID
);
1184 return REPLACEMENT_LANGUAGES
[offset
];
1189 * the internal functions _getLanguage(), _getCountry(), _getVariant()
1190 * avoid duplicating code to handle the earlier locale ID pieces
1191 * in the functions for the later ones by
1192 * setting the *pEnd pointer to where they stopped parsing
1194 * TODO try to use this in Locale
1197 ulocimp_getLanguage(const char *localeID
,
1198 char *language
, int32_t languageCapacity
,
1199 const char **pEnd
) {
1202 char lang
[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
1204 /* if it starts with i- or x- then copy that prefix */
1205 if(_isIDPrefix(localeID
)) {
1206 if(i
<languageCapacity
) {
1207 language
[i
]=(char)uprv_tolower(*localeID
);
1209 if(i
<languageCapacity
) {
1216 /* copy the language as far as possible and count its length */
1217 while(!_isTerminator(*localeID
) && !_isIDSeparator(*localeID
)) {
1218 if(i
<languageCapacity
) {
1219 language
[i
]=(char)uprv_tolower(*localeID
);
1223 lang
[i
]=(char)uprv_tolower(*localeID
);
1230 /* convert 3 character code to 2 character code if possible *CWB*/
1231 offset
=_findIndex(LANGUAGES_3
, lang
);
1233 i
=_copyCount(language
, languageCapacity
, LANGUAGES
[offset
]);
1244 ulocimp_getScript(const char *localeID
,
1245 char *script
, int32_t scriptCapacity
,
1254 /* copy the second item as far as possible and count its length */
1255 while(!_isTerminator(localeID
[idLen
]) && !_isIDSeparator(localeID
[idLen
])
1256 && uprv_isASCIILetter(localeID
[idLen
])) {
1260 /* If it's exactly 4 characters long, then it's a script and not a country. */
1264 *pEnd
= localeID
+idLen
;
1266 if(idLen
> scriptCapacity
) {
1267 idLen
= scriptCapacity
;
1270 script
[0]=(char)uprv_toupper(*(localeID
++));
1272 for (i
= 1; i
< idLen
; i
++) {
1273 script
[i
]=(char)uprv_tolower(*(localeID
++));
1283 ulocimp_getCountry(const char *localeID
,
1284 char *country
, int32_t countryCapacity
,
1288 char cnty
[ULOC_COUNTRY_CAPACITY
]={ 0, 0, 0, 0 };
1291 /* copy the country as far as possible and count its length */
1292 while(!_isTerminator(localeID
[idLen
]) && !_isIDSeparator(localeID
[idLen
])) {
1293 if(idLen
<(ULOC_COUNTRY_CAPACITY
-1)) { /*CWB*/
1294 cnty
[idLen
]=(char)uprv_toupper(localeID
[idLen
]);
1299 /* the country should be either length 2 or 3 */
1300 if (idLen
== 2 || idLen
== 3) {
1301 UBool gotCountry
= FALSE
;
1302 /* convert 3 character code to 2 character code if possible *CWB*/
1304 offset
=_findIndex(COUNTRIES_3
, cnty
);
1306 idLen
=_copyCount(country
, countryCapacity
, COUNTRIES
[offset
]);
1312 for (i
= 0; i
< idLen
; i
++) {
1313 if (i
< countryCapacity
) {
1314 country
[i
]=(char)uprv_toupper(localeID
[i
]);
1331 * @param needSeparator if true, then add leading '_' if any variants
1332 * are added to 'variant'
1335 _getVariantEx(const char *localeID
,
1337 char *variant
, int32_t variantCapacity
,
1338 UBool needSeparator
) {
1341 /* get one or more variant tags and separate them with '_' */
1342 if(_isIDSeparator(prev
)) {
1343 /* get a variant string after a '-' or '_' */
1344 while(!_isTerminator(*localeID
)) {
1345 if (needSeparator
) {
1346 if (i
<variantCapacity
) {
1350 needSeparator
= FALSE
;
1352 if(i
<variantCapacity
) {
1353 variant
[i
]=(char)uprv_toupper(*localeID
);
1354 if(variant
[i
]=='-') {
1363 /* if there is no variant tag after a '-' or '_' then look for '@' */
1367 } else if((localeID
=locale_getKeywordsStart(localeID
))!=NULL
) {
1368 ++localeID
; /* point after the '@' */
1372 while(!_isTerminator(*localeID
)) {
1373 if (needSeparator
) {
1374 if (i
<variantCapacity
) {
1378 needSeparator
= FALSE
;
1380 if(i
<variantCapacity
) {
1381 variant
[i
]=(char)uprv_toupper(*localeID
);
1382 if(variant
[i
]=='-' || variant
[i
]==',') {
1395 _getVariant(const char *localeID
,
1397 char *variant
, int32_t variantCapacity
) {
1398 return _getVariantEx(localeID
, prev
, variant
, variantCapacity
, FALSE
);
1402 * Delete ALL instances of a variant from the given list of one or
1403 * more variants. Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".
1404 * @param variants the source string of one or more variants,
1405 * separated by '_'. This will be MODIFIED IN PLACE. Not zero
1406 * terminated; if it is, trailing zero will NOT be maintained.
1407 * @param variantsLen length of variants
1408 * @param toDelete variant to delete, without separators, e.g. "EURO"
1409 * or "PREEURO"; not zero terminated
1410 * @param toDeleteLen length of toDelete
1411 * @return number of characters deleted from variants
1414 _deleteVariant(char* variants
, int32_t variantsLen
,
1415 const char* toDelete
, int32_t toDeleteLen
)
1417 int32_t delta
= 0; /* number of chars deleted */
1420 if (variantsLen
< toDeleteLen
) {
1423 if (uprv_strncmp(variants
, toDelete
, toDeleteLen
) == 0 &&
1424 (variantsLen
== toDeleteLen
||
1425 (flag
=(variants
[toDeleteLen
] == '_'))))
1427 int32_t d
= toDeleteLen
+ (flag
?1:0);
1430 if (variantsLen
> 0) {
1431 uprv_memmove(variants
, variants
+d
, variantsLen
);
1434 char* p
= _strnchr(variants
, variantsLen
, '_');
1439 variantsLen
-= (int32_t)(p
- variants
);
1445 /* Keyword enumeration */
1447 typedef struct UKeywordsContext
{
1452 static void U_CALLCONV
1453 uloc_kw_closeKeywords(UEnumeration
*enumerator
) {
1454 uprv_free(((UKeywordsContext
*)enumerator
->context
)->keywords
);
1455 uprv_free(enumerator
->context
);
1456 uprv_free(enumerator
);
1459 static int32_t U_CALLCONV
1460 uloc_kw_countKeywords(UEnumeration
*en
, UErrorCode
* /*status*/) {
1461 char *kw
= ((UKeywordsContext
*)en
->context
)->keywords
;
1465 kw
+= uprv_strlen(kw
)+1;
1470 static const char* U_CALLCONV
1471 uloc_kw_nextKeyword(UEnumeration
* en
,
1472 int32_t* resultLength
,
1473 UErrorCode
* /*status*/) {
1474 const char* result
= ((UKeywordsContext
*)en
->context
)->current
;
1477 len
= (int32_t)uprv_strlen(((UKeywordsContext
*)en
->context
)->current
);
1478 ((UKeywordsContext
*)en
->context
)->current
+= len
+1;
1483 *resultLength
= len
;
1488 static void U_CALLCONV
1489 uloc_kw_resetKeywords(UEnumeration
* en
,
1490 UErrorCode
* /*status*/) {
1491 ((UKeywordsContext
*)en
->context
)->current
= ((UKeywordsContext
*)en
->context
)->keywords
;
1494 static const UEnumeration gKeywordsEnum
= {
1497 uloc_kw_closeKeywords
,
1498 uloc_kw_countKeywords
,
1500 uloc_kw_nextKeyword
,
1501 uloc_kw_resetKeywords
1504 U_CAPI UEnumeration
* U_EXPORT2
1505 uloc_openKeywordList(const char *keywordList
, int32_t keywordListSize
, UErrorCode
* status
)
1507 UKeywordsContext
*myContext
= NULL
;
1508 UEnumeration
*result
= NULL
;
1510 if(U_FAILURE(*status
)) {
1513 result
= (UEnumeration
*)uprv_malloc(sizeof(UEnumeration
));
1514 /* Null pointer test */
1515 if (result
== NULL
) {
1516 *status
= U_MEMORY_ALLOCATION_ERROR
;
1519 uprv_memcpy(result
, &gKeywordsEnum
, sizeof(UEnumeration
));
1520 myContext
= static_cast<UKeywordsContext
*>(uprv_malloc(sizeof(UKeywordsContext
)));
1521 if (myContext
== NULL
) {
1522 *status
= U_MEMORY_ALLOCATION_ERROR
;
1526 myContext
->keywords
= (char *)uprv_malloc(keywordListSize
+1);
1527 uprv_memcpy(myContext
->keywords
, keywordList
, keywordListSize
);
1528 myContext
->keywords
[keywordListSize
] = 0;
1529 myContext
->current
= myContext
->keywords
;
1530 result
->context
= myContext
;
1534 U_CAPI UEnumeration
* U_EXPORT2
1535 uloc_openKeywords(const char* localeID
,
1540 int32_t keywordsCapacity
= 256;
1541 char tempBuffer
[ULOC_FULLNAME_CAPACITY
];
1542 const char* tmpLocaleID
;
1544 if(status
==NULL
|| U_FAILURE(*status
)) {
1548 if (_hasBCP47Extension(localeID
)) {
1549 _ConvertBCP47(tmpLocaleID
, localeID
, tempBuffer
, sizeof(tempBuffer
), status
);
1551 if (localeID
==NULL
) {
1552 localeID
=uloc_getDefault();
1554 tmpLocaleID
=localeID
;
1557 /* Skip the language */
1558 ulocimp_getLanguage(tmpLocaleID
, NULL
, 0, &tmpLocaleID
);
1559 if(_isIDSeparator(*tmpLocaleID
)) {
1560 const char *scriptID
;
1561 /* Skip the script if available */
1562 ulocimp_getScript(tmpLocaleID
+1, NULL
, 0, &scriptID
);
1563 if(scriptID
!= tmpLocaleID
+1) {
1564 /* Found optional script */
1565 tmpLocaleID
= scriptID
;
1567 /* Skip the Country */
1568 if (_isIDSeparator(*tmpLocaleID
)) {
1569 ulocimp_getCountry(tmpLocaleID
+1, NULL
, 0, &tmpLocaleID
);
1570 if(_isIDSeparator(*tmpLocaleID
)) {
1571 _getVariant(tmpLocaleID
+1, *tmpLocaleID
, NULL
, 0);
1576 /* keywords are located after '@' */
1577 if((tmpLocaleID
= locale_getKeywordsStart(tmpLocaleID
)) != NULL
) {
1578 i
=locale_getKeywords(tmpLocaleID
+1, '@', keywords
, keywordsCapacity
, NULL
, 0, NULL
, FALSE
, status
);
1582 return uloc_openKeywordList(keywords
, i
, status
);
1589 /* bit-flags for 'options' parameter of _canonicalize */
1590 #define _ULOC_STRIP_KEYWORDS 0x2
1591 #define _ULOC_CANONICALIZE 0x1
1593 #define OPTION_SET(options, mask) ((options & mask) != 0)
1595 static const char i_default
[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
1596 #define I_DEFAULT_LENGTH (sizeof i_default / sizeof i_default[0])
1599 * Canonicalize the given localeID, to level 1 or to level 2,
1600 * depending on the options. To specify level 1, pass in options=0.
1601 * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1603 * This is the code underlying uloc_getName and uloc_canonicalize.
1606 _canonicalize(const char* localeID
,
1608 int32_t resultCapacity
,
1611 int32_t j
, len
, fieldCount
=0, scriptSize
=0, variantSize
=0, nameCapacity
;
1612 char localeBuffer
[ULOC_FULLNAME_CAPACITY
];
1613 char tempBuffer
[ULOC_FULLNAME_CAPACITY
];
1614 const char* origLocaleID
;
1615 const char* tmpLocaleID
;
1616 const char* keywordAssign
= NULL
;
1617 const char* separatorIndicator
= NULL
;
1618 const char* addKeyword
= NULL
;
1619 const char* addValue
= NULL
;
1621 char* variant
= NULL
; /* pointer into name, or NULL */
1623 if (U_FAILURE(*err
)) {
1627 if (_hasBCP47Extension(localeID
)) {
1628 _ConvertBCP47(tmpLocaleID
, localeID
, tempBuffer
, sizeof(tempBuffer
), err
);
1630 if (localeID
==NULL
) {
1631 localeID
=uloc_getDefault();
1633 tmpLocaleID
=localeID
;
1636 origLocaleID
=tmpLocaleID
;
1638 /* if we are doing a full canonicalization, then put results in
1639 localeBuffer, if necessary; otherwise send them to result. */
1640 if (/*OPTION_SET(options, _ULOC_CANONICALIZE) &&*/
1641 (result
== NULL
|| resultCapacity
< (int32_t)sizeof(localeBuffer
))) {
1642 name
= localeBuffer
;
1643 nameCapacity
= (int32_t)sizeof(localeBuffer
);
1646 nameCapacity
= resultCapacity
;
1649 /* get all pieces, one after another, and separate with '_' */
1650 len
=ulocimp_getLanguage(tmpLocaleID
, name
, nameCapacity
, &tmpLocaleID
);
1652 if(len
== I_DEFAULT_LENGTH
&& uprv_strncmp(origLocaleID
, i_default
, len
) == 0) {
1653 const char *d
= uloc_getDefault();
1655 len
= (int32_t)uprv_strlen(d
);
1658 uprv_strncpy(name
, d
, len
);
1660 } else if(_isIDSeparator(*tmpLocaleID
)) {
1661 const char *scriptID
;
1664 if(len
<nameCapacity
) {
1669 scriptSize
=ulocimp_getScript(tmpLocaleID
+1,
1670 (len
<nameCapacity
? name
+len
: NULL
), nameCapacity
-len
, &scriptID
);
1671 if(scriptSize
> 0) {
1672 /* Found optional script */
1673 tmpLocaleID
= scriptID
;
1676 if (_isIDSeparator(*tmpLocaleID
)) {
1677 /* If there is something else, then we add the _ */
1678 if(len
<nameCapacity
) {
1685 if (_isIDSeparator(*tmpLocaleID
)) {
1686 const char *cntryID
;
1687 int32_t cntrySize
= ulocimp_getCountry(tmpLocaleID
+1,
1688 (len
<nameCapacity
? name
+len
: NULL
), nameCapacity
-len
, &cntryID
);
1689 if (cntrySize
> 0) {
1690 /* Found optional country */
1691 tmpLocaleID
= cntryID
;
1694 if(_isIDSeparator(*tmpLocaleID
)) {
1695 /* If there is something else, then we add the _ if we found country before. */
1696 if (cntrySize
>= 0 && ! _isIDSeparator(*(tmpLocaleID
+1)) ) {
1698 if(len
<nameCapacity
) {
1704 variantSize
= _getVariant(tmpLocaleID
+1, *tmpLocaleID
,
1705 (len
<nameCapacity
? name
+len
: NULL
), nameCapacity
-len
);
1706 if (variantSize
> 0) {
1707 variant
= len
<nameCapacity
? name
+len
: NULL
;
1709 tmpLocaleID
+= variantSize
+ 1; /* skip '_' and variant */
1715 /* Copy POSIX-style charset specifier, if any [mr.utf8] */
1716 if (!OPTION_SET(options
, _ULOC_CANONICALIZE
) && *tmpLocaleID
== '.') {
1719 char c
= *tmpLocaleID
;
1726 if (len
<nameCapacity
) {
1736 /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
1737 After this, tmpLocaleID either points to '@' or is NULL */
1738 if ((tmpLocaleID
=locale_getKeywordsStart(tmpLocaleID
))!=NULL
) {
1739 keywordAssign
= uprv_strchr(tmpLocaleID
, '=');
1740 separatorIndicator
= uprv_strchr(tmpLocaleID
, ';');
1743 /* Copy POSIX-style variant, if any [mr@FOO] */
1744 if (!OPTION_SET(options
, _ULOC_CANONICALIZE
) &&
1745 tmpLocaleID
!= NULL
&& keywordAssign
== NULL
) {
1747 char c
= *tmpLocaleID
;
1751 if (len
<nameCapacity
) {
1759 if (OPTION_SET(options
, _ULOC_CANONICALIZE
)) {
1760 /* Handle @FOO variant if @ is present and not followed by = */
1761 if (tmpLocaleID
!=NULL
&& keywordAssign
==NULL
) {
1762 int32_t posixVariantSize
;
1763 /* Add missing '_' if needed */
1764 if (fieldCount
< 2 || (fieldCount
< 3 && scriptSize
> 0)) {
1766 if(len
<nameCapacity
) {
1771 } while(fieldCount
<2);
1773 posixVariantSize
= _getVariantEx(tmpLocaleID
+1, '@', name
+len
, nameCapacity
-len
,
1774 (UBool
)(variantSize
> 0));
1775 if (posixVariantSize
> 0) {
1776 if (variant
== NULL
) {
1779 len
+= posixVariantSize
;
1780 variantSize
+= posixVariantSize
;
1784 /* Handle generic variants first */
1786 for (j
=0; j
<(int32_t)(sizeof(VARIANT_MAP
)/sizeof(VARIANT_MAP
[0])); j
++) {
1787 const char* variantToCompare
= VARIANT_MAP
[j
].variant
;
1788 int32_t n
= (int32_t)uprv_strlen(variantToCompare
);
1789 int32_t variantLen
= _deleteVariant(variant
, uprv_min(variantSize
, (nameCapacity
-len
)), variantToCompare
, n
);
1791 if (variantLen
> 0) {
1792 if (len
> 0 && name
[len
-1] == '_') { /* delete trailing '_' */
1795 addKeyword
= VARIANT_MAP
[j
].keyword
;
1796 addValue
= VARIANT_MAP
[j
].value
;
1800 if (len
> 0 && len
<= nameCapacity
&& name
[len
-1] == '_') { /* delete trailing '_' */
1805 /* Look up the ID in the canonicalization map */
1806 for (j
=0; j
<(int32_t)(sizeof(CANONICALIZE_MAP
)/sizeof(CANONICALIZE_MAP
[0])); j
++) {
1807 const char* id
= CANONICALIZE_MAP
[j
].id
;
1808 int32_t n
= (int32_t)uprv_strlen(id
);
1809 if (len
== n
&& uprv_strncmp(name
, id
, n
) == 0) {
1810 if (n
== 0 && tmpLocaleID
!= NULL
) {
1811 break; /* Don't remap "" if keywords present */
1813 len
= _copyCount(name
, nameCapacity
, CANONICALIZE_MAP
[j
].canonicalID
);
1814 if (CANONICALIZE_MAP
[j
].keyword
) {
1815 addKeyword
= CANONICALIZE_MAP
[j
].keyword
;
1816 addValue
= CANONICALIZE_MAP
[j
].value
;
1823 if (!OPTION_SET(options
, _ULOC_STRIP_KEYWORDS
)) {
1824 if (tmpLocaleID
!=NULL
&& keywordAssign
!=NULL
&&
1825 (!separatorIndicator
|| separatorIndicator
> keywordAssign
)) {
1826 if(len
<nameCapacity
) {
1831 len
+= _getKeywords(tmpLocaleID
+1, '@', (len
<nameCapacity
? name
+len
: NULL
), nameCapacity
-len
,
1832 NULL
, 0, NULL
, TRUE
, addKeyword
, addValue
, err
);
1833 } else if (addKeyword
!= NULL
) {
1834 U_ASSERT(addValue
!= NULL
&& len
< nameCapacity
);
1835 /* inelegant but works -- later make _getKeywords do this? */
1836 len
+= _copyCount(name
+len
, nameCapacity
-len
, "@");
1837 len
+= _copyCount(name
+len
, nameCapacity
-len
, addKeyword
);
1838 len
+= _copyCount(name
+len
, nameCapacity
-len
, "=");
1839 len
+= _copyCount(name
+len
, nameCapacity
-len
, addValue
);
1843 if (U_SUCCESS(*err
) && result
!= NULL
&& name
== localeBuffer
) {
1844 uprv_strncpy(result
, localeBuffer
, (len
> resultCapacity
) ? resultCapacity
: len
);
1847 return u_terminateChars(result
, resultCapacity
, len
, err
);
1850 /* ### ID parsing API **************************************************/
1852 U_CAPI
int32_t U_EXPORT2
1853 uloc_getParent(const char* localeID
,
1855 int32_t parentCapacity
,
1858 const char *lastUnderscore
;
1861 if (U_FAILURE(*err
))
1864 if (localeID
== NULL
)
1865 localeID
= uloc_getDefault();
1867 lastUnderscore
=uprv_strrchr(localeID
, '_');
1868 if(lastUnderscore
!=NULL
) {
1869 i
=(int32_t)(lastUnderscore
-localeID
);
1874 if(i
>0 && parent
!= localeID
) {
1875 uprv_memcpy(parent
, localeID
, uprv_min(i
, parentCapacity
));
1877 return u_terminateChars(parent
, parentCapacity
, i
, err
);
1880 U_CAPI
int32_t U_EXPORT2
1881 uloc_getLanguage(const char* localeID
,
1883 int32_t languageCapacity
,
1886 /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
1889 if (err
==NULL
|| U_FAILURE(*err
)) {
1893 if(localeID
==NULL
) {
1894 localeID
=uloc_getDefault();
1897 i
=ulocimp_getLanguage(localeID
, language
, languageCapacity
, NULL
);
1898 return u_terminateChars(language
, languageCapacity
, i
, err
);
1901 U_CAPI
int32_t U_EXPORT2
1902 uloc_getScript(const char* localeID
,
1904 int32_t scriptCapacity
,
1909 if(err
==NULL
|| U_FAILURE(*err
)) {
1913 if(localeID
==NULL
) {
1914 localeID
=uloc_getDefault();
1917 /* skip the language */
1918 ulocimp_getLanguage(localeID
, NULL
, 0, &localeID
);
1919 if(_isIDSeparator(*localeID
)) {
1920 i
=ulocimp_getScript(localeID
+1, script
, scriptCapacity
, NULL
);
1922 return u_terminateChars(script
, scriptCapacity
, i
, err
);
1925 U_CAPI
int32_t U_EXPORT2
1926 uloc_getCountry(const char* localeID
,
1928 int32_t countryCapacity
,
1933 if(err
==NULL
|| U_FAILURE(*err
)) {
1937 if(localeID
==NULL
) {
1938 localeID
=uloc_getDefault();
1941 /* Skip the language */
1942 ulocimp_getLanguage(localeID
, NULL
, 0, &localeID
);
1943 if(_isIDSeparator(*localeID
)) {
1944 const char *scriptID
;
1945 /* Skip the script if available */
1946 ulocimp_getScript(localeID
+1, NULL
, 0, &scriptID
);
1947 if(scriptID
!= localeID
+1) {
1948 /* Found optional script */
1949 localeID
= scriptID
;
1951 if(_isIDSeparator(*localeID
)) {
1952 i
=ulocimp_getCountry(localeID
+1, country
, countryCapacity
, NULL
);
1955 return u_terminateChars(country
, countryCapacity
, i
, err
);
1958 U_CAPI
int32_t U_EXPORT2
1959 uloc_getVariant(const char* localeID
,
1961 int32_t variantCapacity
,
1964 char tempBuffer
[ULOC_FULLNAME_CAPACITY
];
1965 const char* tmpLocaleID
;
1968 if(err
==NULL
|| U_FAILURE(*err
)) {
1972 if (_hasBCP47Extension(localeID
)) {
1973 _ConvertBCP47(tmpLocaleID
, localeID
, tempBuffer
, sizeof(tempBuffer
), err
);
1975 if (localeID
==NULL
) {
1976 localeID
=uloc_getDefault();
1978 tmpLocaleID
=localeID
;
1981 /* Skip the language */
1982 ulocimp_getLanguage(tmpLocaleID
, NULL
, 0, &tmpLocaleID
);
1983 if(_isIDSeparator(*tmpLocaleID
)) {
1984 const char *scriptID
;
1985 /* Skip the script if available */
1986 ulocimp_getScript(tmpLocaleID
+1, NULL
, 0, &scriptID
);
1987 if(scriptID
!= tmpLocaleID
+1) {
1988 /* Found optional script */
1989 tmpLocaleID
= scriptID
;
1991 /* Skip the Country */
1992 if (_isIDSeparator(*tmpLocaleID
)) {
1993 const char *cntryID
;
1994 ulocimp_getCountry(tmpLocaleID
+1, NULL
, 0, &cntryID
);
1995 if (cntryID
!= tmpLocaleID
+1) {
1996 /* Found optional country */
1997 tmpLocaleID
= cntryID
;
1999 if(_isIDSeparator(*tmpLocaleID
)) {
2000 /* If there was no country ID, skip a possible extra IDSeparator */
2001 if (tmpLocaleID
!= cntryID
&& _isIDSeparator(tmpLocaleID
[1])) {
2004 i
=_getVariant(tmpLocaleID
+1, *tmpLocaleID
, variant
, variantCapacity
);
2009 /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
2010 /* if we do not have a variant tag yet then try a POSIX variant after '@' */
2012 if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
2013 i=_getVariant(localeID+1, '@', variant, variantCapacity);
2016 return u_terminateChars(variant
, variantCapacity
, i
, err
);
2019 U_CAPI
int32_t U_EXPORT2
2020 uloc_getName(const char* localeID
,
2022 int32_t nameCapacity
,
2025 return _canonicalize(localeID
, name
, nameCapacity
, 0, err
);
2028 U_CAPI
int32_t U_EXPORT2
2029 uloc_getBaseName(const char* localeID
,
2031 int32_t nameCapacity
,
2034 return _canonicalize(localeID
, name
, nameCapacity
, _ULOC_STRIP_KEYWORDS
, err
);
2037 U_CAPI
int32_t U_EXPORT2
2038 uloc_canonicalize(const char* localeID
,
2040 int32_t nameCapacity
,
2043 return _canonicalize(localeID
, name
, nameCapacity
, _ULOC_CANONICALIZE
, err
);
2046 U_CAPI
const char* U_EXPORT2
2047 uloc_getISO3Language(const char* localeID
)
2050 char lang
[ULOC_LANG_CAPACITY
];
2051 UErrorCode err
= U_ZERO_ERROR
;
2053 if (localeID
== NULL
)
2055 localeID
= uloc_getDefault();
2057 uloc_getLanguage(localeID
, lang
, ULOC_LANG_CAPACITY
, &err
);
2060 offset
= _findIndex(LANGUAGES
, lang
);
2063 return LANGUAGES_3
[offset
];
2066 U_CAPI
const char* U_EXPORT2
2067 uloc_getISO3Country(const char* localeID
)
2070 char cntry
[ULOC_LANG_CAPACITY
];
2071 UErrorCode err
= U_ZERO_ERROR
;
2073 if (localeID
== NULL
)
2075 localeID
= uloc_getDefault();
2077 uloc_getCountry(localeID
, cntry
, ULOC_LANG_CAPACITY
, &err
);
2080 offset
= _findIndex(COUNTRIES
, cntry
);
2084 return COUNTRIES_3
[offset
];
2087 U_CAPI
uint32_t U_EXPORT2
2088 uloc_getLCID(const char* localeID
)
2090 UErrorCode status
= U_ZERO_ERROR
;
2091 char langID
[ULOC_FULLNAME_CAPACITY
];
2093 uloc_getLanguage(localeID
, langID
, sizeof(langID
), &status
);
2094 if (U_FAILURE(status
)) {
2098 return uprv_convertToLCID(langID
, localeID
, &status
);
2101 U_CAPI
int32_t U_EXPORT2
2102 uloc_getLocaleForLCID(uint32_t hostid
, char *locale
, int32_t localeCapacity
,
2106 const char *posix
= uprv_convertToPosix(hostid
, status
);
2107 if (U_FAILURE(*status
) || posix
== NULL
) {
2110 length
= (int32_t)uprv_strlen(posix
);
2111 if (length
+1 > localeCapacity
) {
2112 *status
= U_BUFFER_OVERFLOW_ERROR
;
2115 uprv_strcpy(locale
, posix
);
2120 /* ### Default locale **************************************************/
2122 U_CAPI
const char* U_EXPORT2
2125 return locale_get_default();
2128 U_CAPI
void U_EXPORT2
2129 uloc_setDefault(const char* newDefaultLocale
,
2132 if (U_FAILURE(*err
))
2134 /* the error code isn't currently used for anything by this function*/
2136 /* propagate change to C++ */
2137 locale_set_default(newDefaultLocale
);
2141 * Returns a list of all 2-letter language codes defined in ISO 639. This is a pointer
2142 * to an array of pointers to arrays of char. All of these pointers are owned
2143 * by ICU-- do not delete them, and do not write through them. The array is
2144 * terminated with a null pointer.
2146 U_CAPI
const char* const* U_EXPORT2
2147 uloc_getISOLanguages()
2153 * Returns a list of all 2-letter country codes defined in ISO 639. This is a
2154 * pointer to an array of pointers to arrays of char. All of these pointers are
2155 * owned by ICU-- do not delete them, and do not write through them. The array is
2156 * terminated with a null pointer.
2158 U_CAPI
const char* const* U_EXPORT2
2159 uloc_getISOCountries()
2165 /* this function to be moved into cstring.c later */
2166 static char gDecimal
= 0;
2171 _uloc_strtod(const char *start
, char **end
) {
2178 /* For machines that decide to change the decimal on you,
2179 and try to be too smart with localization.
2180 This normally should be just a '.'. */
2181 sprintf(rep
, "%+1.1f", 1.0);
2185 if(gDecimal
== '.') {
2186 return uprv_strtod(start
, end
); /* fall through to OS */
2188 uprv_strncpy(buf
, start
, 29);
2190 decimal
= uprv_strchr(buf
, '.');
2192 *decimal
= gDecimal
;
2194 return uprv_strtod(start
, end
); /* no decimal point */
2196 rv
= uprv_strtod(buf
, &myEnd
);
2198 *end
= (char*)(start
+(myEnd
-buf
)); /* cast away const (to follow uprv_strtod API.) */
2206 int32_t dummy
; /* to avoid uninitialized memory copy from qsort */
2210 static int32_t U_CALLCONV
2211 uloc_acceptLanguageCompare(const void * /*context*/, const void *a
, const void *b
)
2213 const _acceptLangItem
*aa
= (const _acceptLangItem
*)a
;
2214 const _acceptLangItem
*bb
= (const _acceptLangItem
*)b
;
2218 rc
= -1; /* A > B */
2219 } else if(bb
->q
> aa
->q
) {
2226 rc
= uprv_stricmp(aa
->locale
, bb
->locale
);
2229 #if defined(ULOC_DEBUG)
2230 /* fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
2240 mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
2243 U_CAPI
int32_t U_EXPORT2
2244 uloc_acceptLanguageFromHTTP(char *result
, int32_t resultAvailable
, UAcceptResult
*outResult
,
2245 const char *httpAcceptLanguage
,
2246 UEnumeration
* availableLocales
,
2250 _acceptLangItem smallBuffer
[30];
2252 char tmp
[ULOC_FULLNAME_CAPACITY
+1];
2254 const char *itemEnd
;
2255 const char *paramEnd
;
2260 int32_t l
= (int32_t)uprv_strlen(httpAcceptLanguage
);
2262 char *tempstr
; /* Use for null pointer check */
2265 jSize
= sizeof(smallBuffer
)/sizeof(smallBuffer
[0]);
2266 if(U_FAILURE(*status
)) {
2270 for(s
=httpAcceptLanguage
;s
&&*s
;) {
2271 while(isspace(*s
)) /* eat space at the beginning */
2273 itemEnd
=uprv_strchr(s
,',');
2274 paramEnd
=uprv_strchr(s
,';');
2276 itemEnd
= httpAcceptLanguage
+l
; /* end of string */
2278 if(paramEnd
&& paramEnd
<itemEnd
) {
2279 /* semicolon (;) is closer than end (,) */
2284 while(isspace(*t
)) {
2290 while(isspace(*t
)) {
2293 j
[n
].q
= (float)_uloc_strtod(t
,NULL
);
2295 /* no semicolon - it's 1.0 */
2300 /* eat spaces prior to semi */
2301 for(t
=(paramEnd
-1);(paramEnd
>s
)&&isspace(*t
);t
--)
2303 /* Check for null pointer from uprv_strndup */
2304 tempstr
= uprv_strndup(s
,(int32_t)((t
+1)-s
));
2305 if (tempstr
== NULL
) {
2306 *status
= U_MEMORY_ALLOCATION_ERROR
;
2309 j
[n
].locale
= tempstr
;
2310 uloc_canonicalize(j
[n
].locale
,tmp
,sizeof(tmp
)/sizeof(tmp
[0]),status
);
2311 if(strcmp(j
[n
].locale
,tmp
)) {
2312 uprv_free(j
[n
].locale
);
2313 j
[n
].locale
=uprv_strdup(tmp
);
2315 #if defined(ULOC_DEBUG)
2316 /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
2320 while(*s
==',') { /* eat duplicate commas */
2324 if(j
==smallBuffer
) { /* overflowed the small buffer. */
2325 j
= static_cast<_acceptLangItem
*>(uprv_malloc(sizeof(j
[0])*(jSize
*2)));
2327 uprv_memcpy(j
,smallBuffer
,sizeof(j
[0])*jSize
);
2329 #if defined(ULOC_DEBUG)
2330 fprintf(stderr
,"malloced at size %d\n", jSize
);
2333 j
= static_cast<_acceptLangItem
*>(uprv_realloc(j
, sizeof(j
[0])*jSize
*2));
2334 #if defined(ULOC_DEBUG)
2335 fprintf(stderr
,"re-alloced at size %d\n", jSize
);
2340 *status
= U_MEMORY_ALLOCATION_ERROR
;
2345 uprv_sortArray(j
, n
, sizeof(j
[0]), uloc_acceptLanguageCompare
, NULL
, TRUE
, status
);
2346 if(U_FAILURE(*status
)) {
2347 if(j
!= smallBuffer
) {
2348 #if defined(ULOC_DEBUG)
2349 fprintf(stderr
,"freeing j %p\n", j
);
2355 strs
= static_cast<char **>(uprv_malloc((size_t)(sizeof(strs
[0])*n
)));
2356 /* Check for null pointer */
2358 uprv_free(j
); /* Free to avoid memory leak */
2359 *status
= U_MEMORY_ALLOCATION_ERROR
;
2363 #if defined(ULOC_DEBUG)
2364 /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
2366 strs
[i
]=j
[i
].locale
;
2368 res
= uloc_acceptLanguage(result
, resultAvailable
, outResult
,
2369 (const char**)strs
, n
, availableLocales
, status
);
2374 if(j
!= smallBuffer
) {
2375 #if defined(ULOC_DEBUG)
2376 fprintf(stderr
,"freeing j %p\n", j
);
2384 U_CAPI
int32_t U_EXPORT2
2385 uloc_acceptLanguage(char *result
, int32_t resultAvailable
,
2386 UAcceptResult
*outResult
, const char **acceptList
,
2387 int32_t acceptListCount
,
2388 UEnumeration
* availableLocales
,
2394 char tmp
[ULOC_FULLNAME_CAPACITY
+1];
2396 char **fallbackList
;
2397 if(U_FAILURE(*status
)) {
2400 fallbackList
= static_cast<char **>(uprv_malloc((size_t)(sizeof(fallbackList
[0])*acceptListCount
)));
2401 if(fallbackList
==NULL
) {
2402 *status
= U_MEMORY_ALLOCATION_ERROR
;
2405 for(i
=0;i
<acceptListCount
;i
++) {
2406 #if defined(ULOC_DEBUG)
2407 fprintf(stderr
,"%02d: %s\n", i
, acceptList
[i
]);
2409 while((l
=uenum_next(availableLocales
, NULL
, status
))) {
2410 #if defined(ULOC_DEBUG)
2411 fprintf(stderr
," %s\n", l
);
2413 len
= (int32_t)uprv_strlen(l
);
2414 if(!uprv_strcmp(acceptList
[i
], l
)) {
2416 *outResult
= ULOC_ACCEPT_VALID
;
2418 #if defined(ULOC_DEBUG)
2419 fprintf(stderr
, "MATCH! %s\n", l
);
2422 uprv_strncpy(result
, l
, uprv_min(len
, resultAvailable
));
2425 uprv_free(fallbackList
[j
]);
2427 uprv_free(fallbackList
);
2428 return u_terminateChars(result
, resultAvailable
, len
, status
);
2434 uenum_reset(availableLocales
, status
);
2435 /* save off parent info */
2436 if(uloc_getParent(acceptList
[i
], tmp
, sizeof(tmp
)/sizeof(tmp
[0]), status
)!=0) {
2437 fallbackList
[i
] = uprv_strdup(tmp
);
2443 for(maxLen
--;maxLen
>0;maxLen
--) {
2444 for(i
=0;i
<acceptListCount
;i
++) {
2445 if(fallbackList
[i
] && ((int32_t)uprv_strlen(fallbackList
[i
])==maxLen
)) {
2446 #if defined(ULOC_DEBUG)
2447 fprintf(stderr
,"Try: [%s]", fallbackList
[i
]);
2449 while((l
=uenum_next(availableLocales
, NULL
, status
))) {
2450 #if defined(ULOC_DEBUG)
2451 fprintf(stderr
," %s\n", l
);
2453 len
= (int32_t)uprv_strlen(l
);
2454 if(!uprv_strcmp(fallbackList
[i
], l
)) {
2456 *outResult
= ULOC_ACCEPT_FALLBACK
;
2458 #if defined(ULOC_DEBUG)
2459 fprintf(stderr
, "fallback MATCH! %s\n", l
);
2462 uprv_strncpy(result
, l
, uprv_min(len
, resultAvailable
));
2464 for(j
=0;j
<acceptListCount
;j
++) {
2465 uprv_free(fallbackList
[j
]);
2467 uprv_free(fallbackList
);
2468 return u_terminateChars(result
, resultAvailable
, len
, status
);
2471 uenum_reset(availableLocales
, status
);
2473 if(uloc_getParent(fallbackList
[i
], tmp
, sizeof(tmp
)/sizeof(tmp
[0]), status
)!=0) {
2474 uprv_free(fallbackList
[i
]);
2475 fallbackList
[i
] = uprv_strdup(tmp
);
2477 uprv_free(fallbackList
[i
]);
2483 *outResult
= ULOC_ACCEPT_FAILED
;
2486 for(i
=0;i
<acceptListCount
;i
++) {
2487 uprv_free(fallbackList
[i
]);
2489 uprv_free(fallbackList
);