2 **********************************************************************
3 * Copyright (C) 1997-2013, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
9 * Modification History:
11 * Date Name Description
12 * 04/01/97 aliu Creation.
13 * 08/21/98 stephen JDK 1.2 sync
14 * 12/08/98 rtg New Locale implementation and C API
15 * 03/15/99 damiba overhaul.
16 * 04/06/99 stephen changed setDefault() to realloc and copy
17 * 06/14/99 stephen Changed calls to ures_open for new params
18 * 07/21/99 stephen Modified setDefault() to propagate to C++
19 * 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs,
20 * brought canonicalization code into line with spec
21 *****************************************************************************/
24 POSIX's locale format, from putil.c: [no spaces]
26 ll [ _CC ] [ . MM ] [ @ VV]
28 l = lang, C = ctry, M = charmap, V = variant
31 #include "unicode/utypes.h"
32 #include "unicode/ustring.h"
33 #include "unicode/uloc.h"
47 #include <stdio.h> /* for sprintf */
49 /* ### Declarations **************************************************/
51 /* Locale stuff from locid.cpp */
52 U_CFUNC
void locale_set_default(const char *id
);
53 U_CFUNC
const char *locale_get_default(void);
55 locale_getKeywords(const char *localeID
,
57 char *keywords
, int32_t keywordCapacity
,
58 char *values
, int32_t valuesCapacity
, int32_t *valLen
,
62 /* ### Data tables **************************************************/
65 * Table of language codes, both 2- and 3-letter, with preference
66 * given to 2-letter codes where possible. Includes 3-letter codes
67 * that lack a 2-letter equivalent.
69 * This list must be in sorted order. This list is returned directly
70 * to the user by some API.
72 * This list must be kept in sync with LANGUAGES_3, with corresponding
75 * This table should be terminated with a NULL entry, followed by a
76 * second list, and another NULL entry. The first list is visible to
77 * user code when this array is returned by API. The second list
78 * contains codes we support, but do not expose through user API.
82 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
83 * include the revisions up to 2001/7/27 *CWB*
85 * The 3 character codes are the terminology codes like RFC 3066. This
86 * is compatible with prior ICU codes
88 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
89 * table but now at the end of the table because 3 character codes are
90 * duplicates. This avoids bad searches going from 3 to 2 character
93 * The range qaa-qtz is reserved for local use
95 /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
96 /* ISO639 table version is 20130531 */
97 static const char * const LANGUAGES
[] = {
98 "aa", "ab", "ace", "ach", "ada", "ady", "ae", "af",
99 "afa", "afh", "agq", "ain", "ak", "akk", "ale", "alg",
100 "alt", "am", "an", "ang", "anp", "apa", "ar", "arc",
101 "arn", "arp", "art", "arw", "as", "asa", "ast", "ath",
102 "aus", "av", "awa", "ay", "az",
103 "ba", "bad", "bai", "bal", "ban", "bas", "bat", "bax",
104 "bbj", "be", "bej", "bem", "ber", "bez", "bfd", "bg",
105 "bh", "bho", "bi", "bik", "bin", "bkm", "bla", "bm",
106 "bn", "bnt", "bo", "br", "bra", "brx", "bs", "bss",
107 "btk", "bua", "bug", "bum", "byn", "byv",
108 "ca", "cad", "cai", "car", "cau", "cay", "cch", "ce",
109 "ceb", "cel", "cgg", "ch", "chb", "chg", "chk", "chm",
110 "chn", "cho", "chp", "chr", "chy", "ckb", "cmc", "co",
111 "cop", "cpe", "cpf", "cpp", "cr", "crh", "crp", "cs",
112 "csb", "cu", "cus", "cv", "cy",
113 "da", "dak", "dar", "dav", "day", "de", "del", "den",
114 "dgr", "din", "dje", "doi", "dra", "dsb", "dua", "dum",
115 "dv", "dyo", "dyu", "dz", "dzg",
116 "ebu", "ee", "efi", "egy", "eka", "el", "elx", "en",
117 "enm", "eo", "es", "et", "eu", "ewo",
118 "fa", "fan", "fat", "ff", "fi", "fil", "fiu", "fj",
119 "fo", "fon", "fr", "frm", "fro", "frr", "frs", "fur",
121 "ga", "gaa", "gay", "gba", "gd", "gem", "gez", "gil",
122 "gl", "gmh", "gn", "goh", "gon", "gor", "got", "grb",
123 "grc", "gsw", "gu", "guz", "gv", "gwi",
124 "ha", "hai", "haw", "he", "hi", "hil", "him", "hit",
125 "hmn", "ho", "hr", "hsb", "ht", "hu", "hup", "hy",
127 "ia", "iba", "ibb", "id", "ie", "ig", "ii", "ijo",
128 "ik", "ilo", "inc", "ine", "inh", "io", "ira", "iro",
130 "ja", "jbo", "jgo", "jmc", "jpr", "jrb", "jv",
131 "ka", "kaa", "kab", "kac", "kaj", "kam", "kar", "kaw",
132 "kbd", "kbl", "kcg", "kde", "kea", "kfo", "kg", "kha",
133 "khi", "kho", "khq", "ki", "kj", "kk", "kkj", "kl",
134 "kln", "km", "kmb", "kn", "ko", "kok", "kos", "kpe",
135 "kr", "krc", "krl", "kro", "kru", "ks", "ksb", "ksf",
136 "ksh", "ku", "kum", "kut", "kv", "kw", "ky",
137 "la", "lad", "lag", "lah", "lam", "lb", "lez", "lg",
138 "li", "lkt", "ln", "lo", "lol", "loz", "lt", "lu",
139 "lua", "lui", "lun", "luo", "lus", "luy", "lv",
140 "mad", "maf", "mag", "mai", "mak", "man", "map", "mas",
141 "mde", "mdf", "mdr", "men", "mer", "mfe", "mg", "mga",
142 "mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk",
143 "mkh", "ml", "mn", "mnc", "mni", "mno", "mo", "moh",
144 "mos", "mr", "ms", "mt", "mua", "mul", "mun", "mus",
145 "mwl", "mwr", "my", "mye", "myn", "myv",
146 "na", "nah", "nai", "nap", "naq", "nb", "nd", "nds",
147 "ne", "new", "ng", "nia", "nic", "niu", "nl", "nmg",
148 "nn", "nnh", "no", "nog", "non", "nqo", "nr", "nso",
149 "nub", "nus", "nv", "nwc", "ny", "nym", "nyn", "nyo",
151 "oc", "oj", "om", "or", "os", "osa", "ota", "oto",
152 "pa", "paa", "pag", "pal", "pam", "pap", "pau", "peo",
153 "phi", "phn", "pi", "pl", "pon", "pra", "pro", "ps",
156 "raj", "rap", "rar", "rm", "rn", "ro", "roa", "rof",
157 "rom", "ru", "rup", "rw", "rwk",
158 "sa", "sad", "sah", "sai", "sal", "sam", "saq", "sas",
159 "sat", "sba", "sbp", "sc", "scn", "sco", "sd", "se",
160 "see", "seh", "sel", "sem", "ses", "sg", "sga", "sgn",
161 "shi", "shn", "shu", "si", "sid", "sio", "sit",
162 "sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn",
163 "sms", "sn", "snk", "so", "sog", "son", "sq", "sr",
164 "srn", "srr", "ss", "ssa", "ssy", "st", "su", "suk",
165 "sus", "sux", "sv", "sw", "swb", "swc", "syc", "syr",
166 "ta", "tai", "te", "tem", "teo", "ter", "tet", "tg",
167 "th", "ti", "tig", "tiv", "tk", "tkl", "tl", "tlh",
168 "tli", "tmh", "tn", "to", "tog", "tpi", "tr", "trv",
169 "ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw",
170 "twq", "ty", "tyv", "tzm",
171 "udm", "ug", "uga", "uk", "umb", "und", "ur", "uz",
172 "vai", "ve", "vi", "vo", "vot", "vun",
173 "wa", "wae", "wak", "wal", "war", "was", "wen", "wo",
175 "yao", "yap", "yav", "ybb", "yi", "yo", "ypk", "yue",
176 "za", "zap", "zbl", "zen", "zgh", "zh", "znd", "zu",
179 "in", "iw", "ji", "jw", "sh", /* obsolete language codes */
183 static const char* const DEPRECATED_LANGUAGES
[]={
184 "in", "iw", "ji", "jw", NULL
, NULL
186 static const char* const REPLACEMENT_LANGUAGES
[]={
187 "id", "he", "yi", "jv", NULL
, NULL
191 * Table of 3-letter language codes.
193 * This is a lookup table used to convert 3-letter language codes to
194 * their 2-letter equivalent, where possible. It must be kept in sync
195 * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the
196 * same language as LANGUAGES_3[i]. The commented-out lines are
197 * copied from LANGUAGES to make eyeballing this baby easier.
199 * Where a 3-letter language code has no 2-letter equivalent, the
200 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
202 * This table should be terminated with a NULL entry, followed by a
203 * second list, and another NULL entry. The two lists correspond to
204 * the two lists in LANGUAGES.
206 /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
207 /* ISO639 table version is 20130531 */
208 static const char * const LANGUAGES_3
[] = {
209 "aar", "abk", "ace", "ach", "ada", "ady", "ave", "afr",
210 "afa", "afh", "agq", "ain", "aka", "akk", "ale", "alg",
211 "alt", "amh", "arg", "ang", "anp", "apa", "ara", "arc",
212 "arn", "arp", "art", "arw", "asm", "asa", "ast", "ath",
213 "aus", "ava", "awa", "aym", "aze",
214 "bak", "bad", "bai", "bal", "ban", "bas", "bat", "bax",
215 "bbj", "bel", "bej", "bem", "ber", "bez", "bfd", "bul",
216 "bih", "bho", "bis", "bik", "bin", "bkm", "bla", "bam",
217 "ben", "bnt", "bod", "bre", "bra", "brx", "bos", "bss",
218 "btk", "bua", "bug", "bum", "byn", "byv",
219 "cat", "cad", "cai", "car", "cau", "cay", "cch", "che",
220 "ceb", "cel", "cgg", "cha", "chb", "chg", "chk", "chm",
221 "chn", "cho", "chp", "chr", "chy", "ckb", "cmc", "cos",
222 "cop", "cpe", "cpf", "cpp", "cre", "crh", "crp", "ces",
223 "csb", "chu", "cus", "chv", "cym",
224 "dan", "dak", "dar", "dav", "day", "deu", "del", "den",
225 "dgr", "din", "dje", "doi", "dra", "dsb", "dua", "dum",
226 "div", "dyo", "dyu", "dzo", "dzg",
227 "ebu", "ewe", "efi", "egy", "eka", "ell", "elx", "eng",
228 "enm", "epo", "spa", "est", "eus", "ewo",
229 "fas", "fan", "fat", "ful", "fin", "fil", "fiu", "fij",
230 "fao", "fon", "fra", "frm", "fro", "frr", "frs", "fur",
232 "gle", "gaa", "gay", "gba", "gla", "gem", "gez", "gil",
233 "glg", "gmh", "grn", "goh", "gon", "gor", "got", "grb",
234 "grc", "gsw", "guj", "guz", "glv", "gwi",
235 "hau", "hai", "haw", "heb", "hin", "hil", "him", "hit",
236 "hmn", "hmo", "hrv", "hsb", "hat", "hun", "hup", "hye",
238 "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ijo",
239 "ipk", "ilo", "inc", "ine", "inh", "ido", "ira", "iro",
241 "jpn", "jbo", "jgo", "jmc", "jpr", "jrb", "jav",
242 "kat", "kaa", "kab", "kac", "kaj", "kam", "kar", "kaw",
243 "kbd", "kbl", "kcg", "kde", "kea", "kfo", "kon", "kha",
244 "khi", "kho", "khq", "kik", "kua", "kaz", "kkj", "kal",
245 "kln", "khm", "kmb", "kan", "kor", "kok", "kos", "kpe",
246 "kau", "krc", "krl", "kro", "kru", "kas", "ksb", "ksf",
247 "ksh", "kur", "kum", "kut", "kom", "cor", "kir",
248 "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lug",
249 "lim", "lkt", "lin", "lao", "lol", "loz", "lit", "lub",
250 "lua", "lui", "lun", "luo", "lus", "luy", "lav",
251 "mad", "maf", "mag", "mai", "mak", "man", "map", "mas",
252 "mde", "mdf", "mdr", "men", "mer", "mfe", "mlg", "mga",
253 "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
254 "mkh", "mal", "mon", "mnc", "mni", "mno", "mol", "moh",
255 "mos", "mar", "msa", "mlt", "mua", "mul", "mun", "mus",
256 "mwl", "mwr", "mya", "mye", "myn", "myv",
257 "nau", "nah", "nai", "nap", "naq", "nob", "nde", "nds",
258 "nep", "new", "ndo", "nia", "nic", "niu", "nld", "nmg",
259 "nno", "nnh", "nor", "nog", "non", "nqo", "nbl", "nso",
260 "nub", "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo",
262 "oci", "oji", "orm", "ori", "oss", "osa", "ota", "oto",
263 "pan", "paa", "pag", "pal", "pam", "pap", "pau", "peo",
264 "phi", "phn", "pli", "pol", "pon", "pra", "pro", "pus",
267 "raj", "rap", "rar", "roh", "run", "ron", "roa", "rof",
268 "rom", "rus", "rup", "kin", "rwk",
269 "san", "sad", "sah", "sai", "sal", "sam", "saq", "sas",
270 "sat", "sba", "sbp", "srd", "scn", "sco", "snd", "sme",
271 "see", "seh", "sel", "sem", "ses", "sag", "sga", "sgn",
272 "shi", "shn", "shu", "sin", "sid", "sio", "sit",
273 "slk", "slv", "sla", "smo", "sma", "smi", "smj", "smn",
274 "sms", "sna", "snk", "som", "sog", "son", "sqi", "srp",
275 "srn", "srr", "ssw", "ssa", "ssy", "sot", "sun", "suk",
276 "sus", "sux", "swe", "swa", "swb", "swc", "syc", "syr",
277 "tam", "tai", "tel", "tem", "teo", "ter", "tet", "tgk",
278 "tha", "tir", "tig", "tiv", "tuk", "tkl", "tgl", "tlh",
279 "tli", "tmh", "tsn", "ton", "tog", "tpi", "tur", "trv",
280 "tso", "tsi", "tat", "tum", "tup", "tut", "tvl", "twi",
281 "twq", "tah", "tyv", "tzm",
282 "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
283 "vai", "ven", "vie", "vol", "vot", "vun",
284 "wln", "wae", "wak", "wal", "war", "was", "wen", "wol",
286 "yao", "yap", "yav", "ybb", "yid", "yor", "ypk", "yue",
287 "zha", "zap", "zbl", "zen", "zgh", "zho", "znd", "zul",
290 /* "in", "iw", "ji", "jw", "sh", */
291 "ind", "heb", "yid", "jaw", "srp",
296 * Table of 2-letter country codes.
298 * This list must be in sorted order. This list is returned directly
299 * to the user by some API.
301 * This list must be kept in sync with COUNTRIES_3, with corresponding
304 * This table should be terminated with a NULL entry, followed by a
305 * second list, and another NULL entry. The first list is visible to
306 * user code when this array is returned by API. The second list
307 * contains codes we support, but do not expose through user API.
311 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
312 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
313 * new codes keeping the old ones for compatibility updated to include
314 * 1999/12/03 revisions *CWB*
316 * RO(ROM) is now RO(ROU) according to
317 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
319 static const char * const COUNTRIES
[] = {
320 "AD", "AE", "AF", "AG", "AI", "AL", "AM",
321 "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",
322 "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",
323 "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV",
324 "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
325 "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR",
326 "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK",
327 "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER",
328 "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
329 "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
330 "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
331 "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
332 "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
333 "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
334 "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
335 "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
336 "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK",
337 "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",
338 "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
339 "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",
340 "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",
341 "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
342 "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA",
343 "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",
344 "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV",
345 "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ",
346 "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
347 "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
348 "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
349 "WS", "YE", "YT", "ZA", "ZM", "ZW",
351 "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR", /* obsolete country codes */
355 static const char* const DEPRECATED_COUNTRIES
[] = {
356 "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL
, NULL
/* deprecated country list */
358 static const char* const REPLACEMENT_COUNTRIES
[] = {
359 /* "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
360 "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL
, NULL
/* replacement country codes */
364 * Table of 3-letter country codes.
366 * This is a lookup table used to convert 3-letter country codes to
367 * their 2-letter equivalent. It must be kept in sync with COUNTRIES.
368 * For all valid i, COUNTRIES[i] must refer to the same country as
369 * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES
370 * to make eyeballing this baby easier.
372 * This table should be terminated with a NULL entry, followed by a
373 * second list, and another NULL entry. The two lists correspond to
374 * the two lists in COUNTRIES.
376 static const char * const COUNTRIES_3
[] = {
377 /* "AD", "AE", "AF", "AG", "AI", "AL", "AM", */
378 "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
379 /* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */
380 "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
381 /* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */
382 "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
383 /* "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", */
384 "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",
385 /* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */
386 "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
387 /* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */
388 "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
389 /* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK", */
390 "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
391 /* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */
392 "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
393 /* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
394 "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
395 /* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
396 "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
397 /* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */
398 "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
399 /* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
400 "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
401 /* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
402 "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
403 /* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
404 "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
405 /* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
406 "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
407 /* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */
408 "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
409 /* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */
410 "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
411 /* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */
412 "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
413 /* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */
414 "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
415 /* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */
416 "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
417 /* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */
418 "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
419 /* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */
420 "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
421 /* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */
422 "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
423 /* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */
424 "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
425 /* "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", */
426 "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",
427 /* "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", */
428 "SXM", "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
429 /* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */
430 "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
431 /* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */
432 "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
433 /* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
434 "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
435 /* "WS", "YE", "YT", "ZA", "ZM", "ZW", */
436 "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
438 /* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */
439 "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
443 typedef struct CanonicalizationMap
{
444 const char *id
; /* input ID */
445 const char *canonicalID
; /* canonicalized output ID */
446 const char *keyword
; /* keyword, or NULL if none */
447 const char *value
; /* keyword value, or NULL if kw==NULL */
448 } CanonicalizationMap
;
451 * A map to canonicalize locale IDs. This handles a variety of
452 * different semantic kinds of transformations.
454 static const CanonicalizationMap CANONICALIZE_MAP
[] = {
455 { "", "en_US_POSIX", NULL
, NULL
}, /* .NET name */
456 { "c", "en_US_POSIX", NULL
, NULL
}, /* POSIX name */
457 { "posix", "en_US_POSIX", NULL
, NULL
}, /* POSIX name (alias of C) */
458 { "art_LOJBAN", "jbo", NULL
, NULL
}, /* registered name */
459 { "az_AZ_CYRL", "az_Cyrl_AZ", NULL
, NULL
}, /* .NET name */
460 { "az_AZ_LATN", "az_Latn_AZ", NULL
, NULL
}, /* .NET name */
461 { "ca_ES_PREEURO", "ca_ES", "currency", "ESP" },
462 { "de__PHONEBOOK", "de", "collation", "phonebook" }, /* Old ICU name */
463 { "de_AT_PREEURO", "de_AT", "currency", "ATS" },
464 { "de_DE_PREEURO", "de_DE", "currency", "DEM" },
465 { "de_LU_PREEURO", "de_LU", "currency", "LUF" },
466 { "el_GR_PREEURO", "el_GR", "currency", "GRD" },
467 { "en_BE_PREEURO", "en_BE", "currency", "BEF" },
468 { "en_IE_PREEURO", "en_IE", "currency", "IEP" },
469 { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */
470 { "es_ES_PREEURO", "es_ES", "currency", "ESP" },
471 { "eu_ES_PREEURO", "eu_ES", "currency", "ESP" },
472 { "fi_FI_PREEURO", "fi_FI", "currency", "FIM" },
473 { "fr_BE_PREEURO", "fr_BE", "currency", "BEF" },
474 { "fr_FR_PREEURO", "fr_FR", "currency", "FRF" },
475 { "fr_LU_PREEURO", "fr_LU", "currency", "LUF" },
476 { "ga_IE_PREEURO", "ga_IE", "currency", "IEP" },
477 { "gl_ES_PREEURO", "gl_ES", "currency", "ESP" },
478 { "hi__DIRECT", "hi", "collation", "direct" }, /* Old ICU name */
479 { "it_IT_PREEURO", "it_IT", "currency", "ITL" },
480 { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */
481 { "nb_NO_NY", "nn_NO", NULL
, NULL
}, /* "markus said this was ok" :-) */
482 { "nl_BE_PREEURO", "nl_BE", "currency", "BEF" },
483 { "nl_NL_PREEURO", "nl_NL", "currency", "NLG" },
484 { "pt_PT_PREEURO", "pt_PT", "currency", "PTE" },
485 { "sr_SP_CYRL", "sr_Cyrl_RS", NULL
, NULL
}, /* .NET name */
486 { "sr_SP_LATN", "sr_Latn_RS", NULL
, NULL
}, /* .NET name */
487 { "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL
, NULL
}, /* Linux name */
488 { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */
489 { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL
, NULL
}, /* Linux name */
490 { "uz_UZ_CYRL", "uz_Cyrl_UZ", NULL
, NULL
}, /* .NET name */
491 { "uz_UZ_LATN", "uz_Latn_UZ", NULL
, NULL
}, /* .NET name */
492 { "zh_CHS", "zh_Hans", NULL
, NULL
}, /* .NET name */
493 { "zh_CHT", "zh_Hant", NULL
, NULL
}, /* .NET name */
494 { "zh_GAN", "gan", NULL
, NULL
}, /* registered name */
495 { "zh_GUOYU", "zh", NULL
, NULL
}, /* registered name */
496 { "zh_HAKKA", "hak", NULL
, NULL
}, /* registered name */
497 { "zh_MIN_NAN", "nan", NULL
, NULL
}, /* registered name */
498 { "zh_WUU", "wuu", NULL
, NULL
}, /* registered name */
499 { "zh_XIANG", "hsn", NULL
, NULL
}, /* registered name */
500 { "zh_YUE", "yue", NULL
, NULL
}, /* registered name */
503 typedef struct VariantMap
{
504 const char *variant
; /* input ID */
505 const char *keyword
; /* keyword, or NULL if none */
506 const char *value
; /* keyword value, or NULL if kw==NULL */
509 static const VariantMap VARIANT_MAP
[] = {
510 { "EURO", "currency", "EUR" },
511 { "PINYIN", "collation", "pinyin" }, /* Solaris variant */
512 { "STROKE", "collation", "stroke" } /* Solaris variant */
515 /* ### BCP47 Conversion *******************************************/
516 /* Test if the locale id has BCP47 u extension and does not have '@' */
517 #define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
518 /* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
519 #define _ConvertBCP47(finalID, id, buffer, length,err) \
520 if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || U_FAILURE(*err)) { \
525 /* Gets the size of the shortest subtag in the given localeID. */
526 static int32_t getShortestSubtagLength(const char *localeID
) {
527 int32_t localeIDLength
= uprv_strlen(localeID
);
528 int32_t length
= localeIDLength
;
529 int32_t tmpLength
= 0;
533 for (i
= 0; i
< localeIDLength
; i
++) {
534 if (localeID
[i
] != '_' && localeID
[i
] != '-') {
541 if (tmpLength
!= 0 && tmpLength
< length
) {
551 /* ### Keywords **************************************************/
553 #define ULOC_KEYWORD_BUFFER_LEN 25
554 #define ULOC_MAX_NO_KEYWORDS 25
556 U_CAPI
const char * U_EXPORT2
557 locale_getKeywordsStart(const char *localeID
) {
558 const char *result
= NULL
;
559 if((result
= uprv_strchr(localeID
, '@')) != NULL
) {
562 #if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
564 /* We do this because the @ sign is variant, and the @ sign used on one
565 EBCDIC machine won't be compiled the same way on other EBCDIC based
567 static const uint8_t ebcdicSigns
[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
568 const uint8_t *charToFind
= ebcdicSigns
;
570 if((result
= uprv_strchr(localeID
, *charToFind
)) != NULL
) {
581 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
582 * @param keywordName incoming name to be canonicalized
583 * @param status return status (keyword too long)
584 * @return length of the keyword name
586 static int32_t locale_canonKeywordName(char *buf
, const char *keywordName
, UErrorCode
*status
)
589 int32_t keywordNameLen
= (int32_t)uprv_strlen(keywordName
);
591 if(keywordNameLen
>= ULOC_KEYWORD_BUFFER_LEN
) {
592 /* keyword name too long for internal buffer */
593 *status
= U_INTERNAL_PROGRAM_ERROR
;
597 /* normalize the keyword name */
598 for(i
= 0; i
< keywordNameLen
; i
++) {
599 buf
[i
] = uprv_tolower(keywordName
[i
]);
603 return keywordNameLen
;
607 char keyword
[ULOC_KEYWORD_BUFFER_LEN
];
609 const char *valueStart
;
613 static int32_t U_CALLCONV
614 compareKeywordStructs(const void * /*context*/, const void *left
, const void *right
) {
615 const char* leftString
= ((const KeywordStruct
*)left
)->keyword
;
616 const char* rightString
= ((const KeywordStruct
*)right
)->keyword
;
617 return uprv_strcmp(leftString
, rightString
);
621 * Both addKeyword and addValue must already be in canonical form.
622 * Either both addKeyword and addValue are NULL, or neither is NULL.
623 * If they are not NULL they must be zero terminated.
624 * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword.
627 _getKeywords(const char *localeID
,
629 char *keywords
, int32_t keywordCapacity
,
630 char *values
, int32_t valuesCapacity
, int32_t *valLen
,
632 const char* addKeyword
,
633 const char* addValue
,
636 KeywordStruct keywordList
[ULOC_MAX_NO_KEYWORDS
];
638 int32_t maxKeywords
= ULOC_MAX_NO_KEYWORDS
;
639 int32_t numKeywords
= 0;
640 const char* pos
= localeID
;
641 const char* equalSign
= NULL
;
642 const char* semicolon
= NULL
;
644 int32_t keywordsLen
= 0;
645 int32_t valuesLen
= 0;
647 if(prev
== '@') { /* start of keyword definition */
648 /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
650 UBool duplicate
= FALSE
;
651 /* skip leading spaces */
655 if (!*pos
) { /* handle trailing "; " */
658 if(numKeywords
== maxKeywords
) {
659 *status
= U_INTERNAL_PROGRAM_ERROR
;
662 equalSign
= uprv_strchr(pos
, '=');
663 semicolon
= uprv_strchr(pos
, ';');
664 /* lack of '=' [foo@currency] is illegal */
665 /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
666 if(!equalSign
|| (semicolon
&& semicolon
<equalSign
)) {
667 *status
= U_INVALID_FORMAT_ERROR
;
670 /* need to normalize both keyword and keyword name */
671 if(equalSign
- pos
>= ULOC_KEYWORD_BUFFER_LEN
) {
672 /* keyword name too long for internal buffer */
673 *status
= U_INTERNAL_PROGRAM_ERROR
;
676 for(i
= 0, n
= 0; i
< equalSign
- pos
; ++i
) {
678 keywordList
[numKeywords
].keyword
[n
++] = uprv_tolower(pos
[i
]);
682 /* zero-length keyword is an error. */
684 *status
= U_INVALID_FORMAT_ERROR
;
688 keywordList
[numKeywords
].keyword
[n
] = 0;
689 keywordList
[numKeywords
].keywordLen
= n
;
690 /* now grab the value part. First we skip the '=' */
692 /* then we leading spaces */
693 while(*equalSign
== ' ') {
697 /* Premature end or zero-length value */
698 if (!equalSign
|| equalSign
== semicolon
) {
699 *status
= U_INVALID_FORMAT_ERROR
;
703 keywordList
[numKeywords
].valueStart
= equalSign
;
708 while(*(pos
- i
- 1) == ' ') {
711 keywordList
[numKeywords
].valueLen
= (int32_t)(pos
- equalSign
- i
);
714 i
= (int32_t)uprv_strlen(equalSign
);
715 while(i
&& equalSign
[i
-1] == ' ') {
718 keywordList
[numKeywords
].valueLen
= i
;
720 /* If this is a duplicate keyword, then ignore it */
721 for (j
=0; j
<numKeywords
; ++j
) {
722 if (uprv_strcmp(keywordList
[j
].keyword
, keywordList
[numKeywords
].keyword
) == 0) {
732 /* Handle addKeyword/addValue. */
733 if (addKeyword
!= NULL
) {
734 UBool duplicate
= FALSE
;
735 U_ASSERT(addValue
!= NULL
);
736 /* Search for duplicate; if found, do nothing. Explicit keyword
737 overrides addKeyword. */
738 for (j
=0; j
<numKeywords
; ++j
) {
739 if (uprv_strcmp(keywordList
[j
].keyword
, addKeyword
) == 0) {
745 if (numKeywords
== maxKeywords
) {
746 *status
= U_INTERNAL_PROGRAM_ERROR
;
749 uprv_strcpy(keywordList
[numKeywords
].keyword
, addKeyword
);
750 keywordList
[numKeywords
].keywordLen
= (int32_t)uprv_strlen(addKeyword
);
751 keywordList
[numKeywords
].valueStart
= addValue
;
752 keywordList
[numKeywords
].valueLen
= (int32_t)uprv_strlen(addValue
);
756 U_ASSERT(addValue
== NULL
);
759 /* now we have a list of keywords */
760 /* we need to sort it */
761 uprv_sortArray(keywordList
, numKeywords
, sizeof(KeywordStruct
), compareKeywordStructs
, NULL
, FALSE
, status
);
763 /* Now construct the keyword part */
764 for(i
= 0; i
< numKeywords
; i
++) {
765 if(keywordsLen
+ keywordList
[i
].keywordLen
+ 1< keywordCapacity
) {
766 uprv_strcpy(keywords
+keywordsLen
, keywordList
[i
].keyword
);
768 keywords
[keywordsLen
+ keywordList
[i
].keywordLen
] = '=';
770 keywords
[keywordsLen
+ keywordList
[i
].keywordLen
] = 0;
773 keywordsLen
+= keywordList
[i
].keywordLen
+ 1;
775 if(keywordsLen
+ keywordList
[i
].valueLen
< keywordCapacity
) {
776 uprv_strncpy(keywords
+keywordsLen
, keywordList
[i
].valueStart
, keywordList
[i
].valueLen
);
778 keywordsLen
+= keywordList
[i
].valueLen
;
780 if(i
< numKeywords
- 1) {
781 if(keywordsLen
< keywordCapacity
) {
782 keywords
[keywordsLen
] = ';';
788 if(valuesLen
+ keywordList
[i
].valueLen
+ 1< valuesCapacity
) {
789 uprv_strcpy(values
+valuesLen
, keywordList
[i
].valueStart
);
790 values
[valuesLen
+ keywordList
[i
].valueLen
] = 0;
792 valuesLen
+= keywordList
[i
].valueLen
+ 1;
796 values
[valuesLen
] = 0;
801 return u_terminateChars(keywords
, keywordCapacity
, keywordsLen
, status
);
808 locale_getKeywords(const char *localeID
,
810 char *keywords
, int32_t keywordCapacity
,
811 char *values
, int32_t valuesCapacity
, int32_t *valLen
,
813 UErrorCode
*status
) {
814 return _getKeywords(localeID
, prev
, keywords
, keywordCapacity
,
815 values
, valuesCapacity
, valLen
, valuesToo
,
819 U_CAPI
int32_t U_EXPORT2
820 uloc_getKeywordValue(const char* localeID
,
821 const char* keywordName
,
822 char* buffer
, int32_t bufferCapacity
,
825 const char* startSearchHere
= NULL
;
826 const char* nextSeparator
= NULL
;
827 char keywordNameBuffer
[ULOC_KEYWORD_BUFFER_LEN
];
828 char localeKeywordNameBuffer
[ULOC_KEYWORD_BUFFER_LEN
];
832 if(status
&& U_SUCCESS(*status
) && localeID
) {
833 char tempBuffer
[ULOC_FULLNAME_CAPACITY
];
834 const char* tmpLocaleID
;
836 if (_hasBCP47Extension(localeID
)) {
837 _ConvertBCP47(tmpLocaleID
, localeID
, tempBuffer
, sizeof(tempBuffer
), status
);
839 tmpLocaleID
=localeID
;
842 startSearchHere
= uprv_strchr(tmpLocaleID
, '@'); /* TODO: REVISIT: shouldn't this be locale_getKeywordsStart ? */
843 if(startSearchHere
== NULL
) {
844 /* no keywords, return at once */
848 locale_canonKeywordName(keywordNameBuffer
, keywordName
, status
);
849 if(U_FAILURE(*status
)) {
853 /* find the first keyword */
854 while(startSearchHere
) {
856 /* skip leading spaces (allowed?) */
857 while(*startSearchHere
== ' ') {
860 nextSeparator
= uprv_strchr(startSearchHere
, '=');
861 /* need to normalize both keyword and keyword name */
865 if(nextSeparator
- startSearchHere
>= ULOC_KEYWORD_BUFFER_LEN
) {
866 /* keyword name too long for internal buffer */
867 *status
= U_INTERNAL_PROGRAM_ERROR
;
870 for(i
= 0; i
< nextSeparator
- startSearchHere
; i
++) {
871 localeKeywordNameBuffer
[i
] = uprv_tolower(startSearchHere
[i
]);
873 /* trim trailing spaces */
874 while(startSearchHere
[i
-1] == ' ') {
878 localeKeywordNameBuffer
[i
] = 0;
880 startSearchHere
= uprv_strchr(nextSeparator
, ';');
882 if(uprv_strcmp(keywordNameBuffer
, localeKeywordNameBuffer
) == 0) {
884 while(*nextSeparator
== ' ') {
887 /* we actually found the keyword. Copy the value */
888 if(startSearchHere
&& startSearchHere
- nextSeparator
< bufferCapacity
) {
889 while(*(startSearchHere
-1) == ' ') {
892 uprv_strncpy(buffer
, nextSeparator
, startSearchHere
- nextSeparator
);
893 result
= u_terminateChars(buffer
, bufferCapacity
, (int32_t)(startSearchHere
- nextSeparator
), status
);
894 } else if(!startSearchHere
&& (int32_t)uprv_strlen(nextSeparator
) < bufferCapacity
) { /* last item in string */
895 i
= (int32_t)uprv_strlen(nextSeparator
);
896 while(nextSeparator
[i
- 1] == ' ') {
899 uprv_strncpy(buffer
, nextSeparator
, i
);
900 result
= u_terminateChars(buffer
, bufferCapacity
, i
, status
);
902 /* give a bigger buffer, please */
903 *status
= U_BUFFER_OVERFLOW_ERROR
;
904 if(startSearchHere
) {
905 result
= (int32_t)(startSearchHere
- nextSeparator
);
907 result
= (int32_t)uprv_strlen(nextSeparator
);
917 U_CAPI
int32_t U_EXPORT2
918 uloc_setKeywordValue(const char* keywordName
,
919 const char* keywordValue
,
920 char* buffer
, int32_t bufferCapacity
,
923 /* TODO: sorting. removal. */
924 int32_t keywordNameLen
;
925 int32_t keywordValueLen
;
928 int32_t foundValueLen
;
929 int32_t keywordAtEnd
= 0; /* is the keyword at the end of the string? */
930 char keywordNameBuffer
[ULOC_KEYWORD_BUFFER_LEN
];
931 char localeKeywordNameBuffer
[ULOC_KEYWORD_BUFFER_LEN
];
934 char* nextSeparator
= NULL
;
935 char* nextEqualsign
= NULL
;
936 char* startSearchHere
= NULL
;
937 char* keywordStart
= NULL
;
938 char *insertHere
= NULL
;
939 if(U_FAILURE(*status
)) {
942 if(bufferCapacity
>1) {
943 bufLen
= (int32_t)uprv_strlen(buffer
);
945 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
948 if(bufferCapacity
<bufLen
) {
949 /* The capacity is less than the length?! Is this NULL terminated? */
950 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
953 if(keywordValue
&& !*keywordValue
) {
957 keywordValueLen
= (int32_t)uprv_strlen(keywordValue
);
961 keywordNameLen
= locale_canonKeywordName(keywordNameBuffer
, keywordName
, status
);
962 if(U_FAILURE(*status
)) {
965 startSearchHere
= (char*)locale_getKeywordsStart(buffer
);
966 if(startSearchHere
== NULL
|| (startSearchHere
[1]==0)) {
967 if(!keywordValue
) { /* no keywords = nothing to remove */
971 needLen
= bufLen
+1+keywordNameLen
+1+keywordValueLen
;
972 if(startSearchHere
) { /* had a single @ */
973 needLen
--; /* already had the @ */
974 /* startSearchHere points at the @ */
976 startSearchHere
=buffer
+bufLen
;
978 if(needLen
>= bufferCapacity
) {
979 *status
= U_BUFFER_OVERFLOW_ERROR
;
980 return needLen
; /* no change */
982 *startSearchHere
= '@';
984 uprv_strcpy(startSearchHere
, keywordNameBuffer
);
985 startSearchHere
+= keywordNameLen
;
986 *startSearchHere
= '=';
988 uprv_strcpy(startSearchHere
, keywordValue
);
989 startSearchHere
+=keywordValueLen
;
991 } /* end shortcut - no @ */
993 keywordStart
= startSearchHere
;
994 /* search for keyword */
995 while(keywordStart
) {
997 /* skip leading spaces (allowed?) */
998 while(*keywordStart
== ' ') {
1001 nextEqualsign
= uprv_strchr(keywordStart
, '=');
1002 /* need to normalize both keyword and keyword name */
1003 if(!nextEqualsign
) {
1006 if(nextEqualsign
- keywordStart
>= ULOC_KEYWORD_BUFFER_LEN
) {
1007 /* keyword name too long for internal buffer */
1008 *status
= U_INTERNAL_PROGRAM_ERROR
;
1011 for(i
= 0; i
< nextEqualsign
- keywordStart
; i
++) {
1012 localeKeywordNameBuffer
[i
] = uprv_tolower(keywordStart
[i
]);
1014 /* trim trailing spaces */
1015 while(keywordStart
[i
-1] == ' ') {
1018 U_ASSERT(i
>=0 && i
<ULOC_KEYWORD_BUFFER_LEN
);
1019 localeKeywordNameBuffer
[i
] = 0;
1021 nextSeparator
= uprv_strchr(nextEqualsign
, ';');
1022 rc
= uprv_strcmp(keywordNameBuffer
, localeKeywordNameBuffer
);
1025 while(*nextEqualsign
== ' ') {
1028 /* we actually found the keyword. Change the value */
1029 if (nextSeparator
) {
1031 foundValueLen
= (int32_t)(nextSeparator
- nextEqualsign
);
1034 foundValueLen
= (int32_t)uprv_strlen(nextEqualsign
);
1036 if(keywordValue
) { /* adding a value - not removing */
1037 if(foundValueLen
== keywordValueLen
) {
1038 uprv_strncpy(nextEqualsign
, keywordValue
, keywordValueLen
);
1039 return bufLen
; /* no change in size */
1040 } else if(foundValueLen
> keywordValueLen
) {
1041 int32_t delta
= foundValueLen
- keywordValueLen
;
1042 if(nextSeparator
) { /* RH side */
1043 uprv_memmove(nextSeparator
- delta
, nextSeparator
, bufLen
-(nextSeparator
-buffer
));
1045 uprv_strncpy(nextEqualsign
, keywordValue
, keywordValueLen
);
1049 } else { /* FVL < KVL */
1050 int32_t delta
= keywordValueLen
- foundValueLen
;
1051 if((bufLen
+delta
) >= bufferCapacity
) {
1052 *status
= U_BUFFER_OVERFLOW_ERROR
;
1053 return bufLen
+delta
;
1055 if(nextSeparator
) { /* RH side */
1056 uprv_memmove(nextSeparator
+delta
,nextSeparator
, bufLen
-(nextSeparator
-buffer
));
1058 uprv_strncpy(nextEqualsign
, keywordValue
, keywordValueLen
);
1063 } else { /* removing a keyword */
1065 /* zero out the ';' or '@' just before startSearchhere */
1066 keywordStart
[-1] = 0;
1067 return (int32_t)((keywordStart
-buffer
)-1); /* (string length without keyword) minus separator */
1069 uprv_memmove(keywordStart
, nextSeparator
+1, bufLen
-((nextSeparator
+1)-buffer
));
1070 keywordStart
[bufLen
-((nextSeparator
+1)-buffer
)]=0;
1071 return (int32_t)(bufLen
-((nextSeparator
+1)-keywordStart
));
1074 } else if(rc
<0){ /* end match keyword */
1075 /* could insert at this location. */
1076 insertHere
= keywordStart
;
1078 keywordStart
= nextSeparator
;
1079 } /* end loop searching */
1082 return bufLen
; /* removal of non-extant keyword - no change */
1085 /* we know there is at least one keyword. */
1086 needLen
= bufLen
+1+keywordNameLen
+1+keywordValueLen
;
1087 if(needLen
>= bufferCapacity
) {
1088 *status
= U_BUFFER_OVERFLOW_ERROR
;
1089 return needLen
; /* no change */
1093 uprv_memmove(insertHere
+(1+keywordNameLen
+1+keywordValueLen
), insertHere
, bufLen
-(insertHere
-buffer
));
1094 keywordStart
= insertHere
;
1096 keywordStart
= buffer
+bufLen
;
1097 *keywordStart
= ';';
1100 uprv_strncpy(keywordStart
, keywordNameBuffer
, keywordNameLen
);
1101 keywordStart
+= keywordNameLen
;
1102 *keywordStart
= '=';
1104 uprv_strncpy(keywordStart
, keywordValue
, keywordValueLen
); /* terminates. */
1105 keywordStart
+=keywordValueLen
;
1107 *keywordStart
= ';';
1114 /* ### ID parsing implementation **************************************************/
1116 #define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
1118 /*returns TRUE if one of the special prefixes is here (s=string)
1120 #define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1122 /* Dot terminates it because of POSIX form where dot precedes the codepage
1123 * except for variant
1125 #define _isTerminator(a) ((a==0)||(a=='.')||(a=='@'))
1127 static char* _strnchr(const char* str
, int32_t len
, char c
) {
1128 U_ASSERT(str
!= 0 && len
>= 0);
1129 while (len
-- != 0) {
1133 } else if (d
== 0) {
1142 * Lookup 'key' in the array 'list'. The array 'list' should contain
1143 * a NULL entry, followed by more entries, and a second NULL entry.
1145 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1148 static int16_t _findIndex(const char* const* list
, const char* key
)
1150 const char* const* anchor
= list
;
1153 /* Make two passes through two NULL-terminated arrays at 'list' */
1154 while (pass
++ < 2) {
1156 if (uprv_strcmp(key
, *list
) == 0) {
1157 return (int16_t)(list
- anchor
);
1161 ++list
; /* skip final NULL *CWB*/
1166 /* count the length of src while copying it to dest; return strlen(src) */
1167 static inline int32_t
1168 _copyCount(char *dest
, int32_t destCapacity
, const char *src
) {
1175 return (int32_t)(src
-anchor
);
1177 if(destCapacity
<=0) {
1178 return (int32_t)((src
-anchor
)+uprv_strlen(src
));
1187 uloc_getCurrentCountryID(const char* oldID
){
1188 int32_t offset
= _findIndex(DEPRECATED_COUNTRIES
, oldID
);
1190 return REPLACEMENT_COUNTRIES
[offset
];
1195 uloc_getCurrentLanguageID(const char* oldID
){
1196 int32_t offset
= _findIndex(DEPRECATED_LANGUAGES
, oldID
);
1198 return REPLACEMENT_LANGUAGES
[offset
];
1203 * the internal functions _getLanguage(), _getCountry(), _getVariant()
1204 * avoid duplicating code to handle the earlier locale ID pieces
1205 * in the functions for the later ones by
1206 * setting the *pEnd pointer to where they stopped parsing
1208 * TODO try to use this in Locale
1211 ulocimp_getLanguage(const char *localeID
,
1212 char *language
, int32_t languageCapacity
,
1213 const char **pEnd
) {
1216 char lang
[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
1218 /* if it starts with i- or x- then copy that prefix */
1219 if(_isIDPrefix(localeID
)) {
1220 if(i
<languageCapacity
) {
1221 language
[i
]=(char)uprv_tolower(*localeID
);
1223 if(i
<languageCapacity
) {
1230 /* copy the language as far as possible and count its length */
1231 while(!_isTerminator(*localeID
) && !_isIDSeparator(*localeID
)) {
1232 if(i
<languageCapacity
) {
1233 language
[i
]=(char)uprv_tolower(*localeID
);
1237 lang
[i
]=(char)uprv_tolower(*localeID
);
1244 /* convert 3 character code to 2 character code if possible *CWB*/
1245 offset
=_findIndex(LANGUAGES_3
, lang
);
1247 i
=_copyCount(language
, languageCapacity
, LANGUAGES
[offset
]);
1258 ulocimp_getScript(const char *localeID
,
1259 char *script
, int32_t scriptCapacity
,
1268 /* copy the second item as far as possible and count its length */
1269 while(!_isTerminator(localeID
[idLen
]) && !_isIDSeparator(localeID
[idLen
])
1270 && uprv_isASCIILetter(localeID
[idLen
])) {
1274 /* If it's exactly 4 characters long, then it's a script and not a country. */
1278 *pEnd
= localeID
+idLen
;
1280 if(idLen
> scriptCapacity
) {
1281 idLen
= scriptCapacity
;
1284 script
[0]=(char)uprv_toupper(*(localeID
++));
1286 for (i
= 1; i
< idLen
; i
++) {
1287 script
[i
]=(char)uprv_tolower(*(localeID
++));
1297 ulocimp_getCountry(const char *localeID
,
1298 char *country
, int32_t countryCapacity
,
1302 char cnty
[ULOC_COUNTRY_CAPACITY
]={ 0, 0, 0, 0 };
1305 /* copy the country as far as possible and count its length */
1306 while(!_isTerminator(localeID
[idLen
]) && !_isIDSeparator(localeID
[idLen
])) {
1307 if(idLen
<(ULOC_COUNTRY_CAPACITY
-1)) { /*CWB*/
1308 cnty
[idLen
]=(char)uprv_toupper(localeID
[idLen
]);
1313 /* the country should be either length 2 or 3 */
1314 if (idLen
== 2 || idLen
== 3) {
1315 UBool gotCountry
= FALSE
;
1316 /* convert 3 character code to 2 character code if possible *CWB*/
1318 offset
=_findIndex(COUNTRIES_3
, cnty
);
1320 idLen
=_copyCount(country
, countryCapacity
, COUNTRIES
[offset
]);
1326 for (i
= 0; i
< idLen
; i
++) {
1327 if (i
< countryCapacity
) {
1328 country
[i
]=(char)uprv_toupper(localeID
[i
]);
1345 * @param needSeparator if true, then add leading '_' if any variants
1346 * are added to 'variant'
1349 _getVariantEx(const char *localeID
,
1351 char *variant
, int32_t variantCapacity
,
1352 UBool needSeparator
) {
1355 /* get one or more variant tags and separate them with '_' */
1356 if(_isIDSeparator(prev
)) {
1357 /* get a variant string after a '-' or '_' */
1358 while(!_isTerminator(*localeID
)) {
1359 if (needSeparator
) {
1360 if (i
<variantCapacity
) {
1364 needSeparator
= FALSE
;
1366 if(i
<variantCapacity
) {
1367 variant
[i
]=(char)uprv_toupper(*localeID
);
1368 if(variant
[i
]=='-') {
1377 /* if there is no variant tag after a '-' or '_' then look for '@' */
1381 } else if((localeID
=locale_getKeywordsStart(localeID
))!=NULL
) {
1382 ++localeID
; /* point after the '@' */
1386 while(!_isTerminator(*localeID
)) {
1387 if (needSeparator
) {
1388 if (i
<variantCapacity
) {
1392 needSeparator
= FALSE
;
1394 if(i
<variantCapacity
) {
1395 variant
[i
]=(char)uprv_toupper(*localeID
);
1396 if(variant
[i
]=='-' || variant
[i
]==',') {
1409 _getVariant(const char *localeID
,
1411 char *variant
, int32_t variantCapacity
) {
1412 return _getVariantEx(localeID
, prev
, variant
, variantCapacity
, FALSE
);
1416 * Delete ALL instances of a variant from the given list of one or
1417 * more variants. Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".
1418 * @param variants the source string of one or more variants,
1419 * separated by '_'. This will be MODIFIED IN PLACE. Not zero
1420 * terminated; if it is, trailing zero will NOT be maintained.
1421 * @param variantsLen length of variants
1422 * @param toDelete variant to delete, without separators, e.g. "EURO"
1423 * or "PREEURO"; not zero terminated
1424 * @param toDeleteLen length of toDelete
1425 * @return number of characters deleted from variants
1428 _deleteVariant(char* variants
, int32_t variantsLen
,
1429 const char* toDelete
, int32_t toDeleteLen
)
1431 int32_t delta
= 0; /* number of chars deleted */
1434 if (variantsLen
< toDeleteLen
) {
1437 if (uprv_strncmp(variants
, toDelete
, toDeleteLen
) == 0 &&
1438 (variantsLen
== toDeleteLen
||
1439 (flag
=(variants
[toDeleteLen
] == '_'))))
1441 int32_t d
= toDeleteLen
+ (flag
?1:0);
1444 if (variantsLen
> 0) {
1445 uprv_memmove(variants
, variants
+d
, variantsLen
);
1448 char* p
= _strnchr(variants
, variantsLen
, '_');
1453 variantsLen
-= (int32_t)(p
- variants
);
1459 /* Keyword enumeration */
1461 typedef struct UKeywordsContext
{
1466 static void U_CALLCONV
1467 uloc_kw_closeKeywords(UEnumeration
*enumerator
) {
1468 uprv_free(((UKeywordsContext
*)enumerator
->context
)->keywords
);
1469 uprv_free(enumerator
->context
);
1470 uprv_free(enumerator
);
1473 static int32_t U_CALLCONV
1474 uloc_kw_countKeywords(UEnumeration
*en
, UErrorCode
* /*status*/) {
1475 char *kw
= ((UKeywordsContext
*)en
->context
)->keywords
;
1479 kw
+= uprv_strlen(kw
)+1;
1484 static const char* U_CALLCONV
1485 uloc_kw_nextKeyword(UEnumeration
* en
,
1486 int32_t* resultLength
,
1487 UErrorCode
* /*status*/) {
1488 const char* result
= ((UKeywordsContext
*)en
->context
)->current
;
1491 len
= (int32_t)uprv_strlen(((UKeywordsContext
*)en
->context
)->current
);
1492 ((UKeywordsContext
*)en
->context
)->current
+= len
+1;
1497 *resultLength
= len
;
1502 static void U_CALLCONV
1503 uloc_kw_resetKeywords(UEnumeration
* en
,
1504 UErrorCode
* /*status*/) {
1505 ((UKeywordsContext
*)en
->context
)->current
= ((UKeywordsContext
*)en
->context
)->keywords
;
1508 static const UEnumeration gKeywordsEnum
= {
1511 uloc_kw_closeKeywords
,
1512 uloc_kw_countKeywords
,
1514 uloc_kw_nextKeyword
,
1515 uloc_kw_resetKeywords
1518 U_CAPI UEnumeration
* U_EXPORT2
1519 uloc_openKeywordList(const char *keywordList
, int32_t keywordListSize
, UErrorCode
* status
)
1521 UKeywordsContext
*myContext
= NULL
;
1522 UEnumeration
*result
= NULL
;
1524 if(U_FAILURE(*status
)) {
1527 result
= (UEnumeration
*)uprv_malloc(sizeof(UEnumeration
));
1528 /* Null pointer test */
1529 if (result
== NULL
) {
1530 *status
= U_MEMORY_ALLOCATION_ERROR
;
1533 uprv_memcpy(result
, &gKeywordsEnum
, sizeof(UEnumeration
));
1534 myContext
= static_cast<UKeywordsContext
*>(uprv_malloc(sizeof(UKeywordsContext
)));
1535 if (myContext
== NULL
) {
1536 *status
= U_MEMORY_ALLOCATION_ERROR
;
1540 myContext
->keywords
= (char *)uprv_malloc(keywordListSize
+1);
1541 uprv_memcpy(myContext
->keywords
, keywordList
, keywordListSize
);
1542 myContext
->keywords
[keywordListSize
] = 0;
1543 myContext
->current
= myContext
->keywords
;
1544 result
->context
= myContext
;
1548 U_CAPI UEnumeration
* U_EXPORT2
1549 uloc_openKeywords(const char* localeID
,
1554 int32_t keywordsCapacity
= 256;
1555 char tempBuffer
[ULOC_FULLNAME_CAPACITY
];
1556 const char* tmpLocaleID
;
1558 if(status
==NULL
|| U_FAILURE(*status
)) {
1562 if (_hasBCP47Extension(localeID
)) {
1563 _ConvertBCP47(tmpLocaleID
, localeID
, tempBuffer
, sizeof(tempBuffer
), status
);
1565 if (localeID
==NULL
) {
1566 localeID
=uloc_getDefault();
1568 tmpLocaleID
=localeID
;
1571 /* Skip the language */
1572 ulocimp_getLanguage(tmpLocaleID
, NULL
, 0, &tmpLocaleID
);
1573 if(_isIDSeparator(*tmpLocaleID
)) {
1574 const char *scriptID
;
1575 /* Skip the script if available */
1576 ulocimp_getScript(tmpLocaleID
+1, NULL
, 0, &scriptID
);
1577 if(scriptID
!= tmpLocaleID
+1) {
1578 /* Found optional script */
1579 tmpLocaleID
= scriptID
;
1581 /* Skip the Country */
1582 if (_isIDSeparator(*tmpLocaleID
)) {
1583 ulocimp_getCountry(tmpLocaleID
+1, NULL
, 0, &tmpLocaleID
);
1584 if(_isIDSeparator(*tmpLocaleID
)) {
1585 _getVariant(tmpLocaleID
+1, *tmpLocaleID
, NULL
, 0);
1590 /* keywords are located after '@' */
1591 if((tmpLocaleID
= locale_getKeywordsStart(tmpLocaleID
)) != NULL
) {
1592 i
=locale_getKeywords(tmpLocaleID
+1, '@', keywords
, keywordsCapacity
, NULL
, 0, NULL
, FALSE
, status
);
1596 return uloc_openKeywordList(keywords
, i
, status
);
1603 /* bit-flags for 'options' parameter of _canonicalize */
1604 #define _ULOC_STRIP_KEYWORDS 0x2
1605 #define _ULOC_CANONICALIZE 0x1
1607 #define OPTION_SET(options, mask) ((options & mask) != 0)
1609 static const char i_default
[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
1610 #define I_DEFAULT_LENGTH (sizeof i_default / sizeof i_default[0])
1613 * Canonicalize the given localeID, to level 1 or to level 2,
1614 * depending on the options. To specify level 1, pass in options=0.
1615 * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1617 * This is the code underlying uloc_getName and uloc_canonicalize.
1620 _canonicalize(const char* localeID
,
1622 int32_t resultCapacity
,
1625 int32_t j
, len
, fieldCount
=0, scriptSize
=0, variantSize
=0, nameCapacity
;
1626 char localeBuffer
[ULOC_FULLNAME_CAPACITY
];
1627 char tempBuffer
[ULOC_FULLNAME_CAPACITY
];
1628 const char* origLocaleID
;
1629 const char* tmpLocaleID
;
1630 const char* keywordAssign
= NULL
;
1631 const char* separatorIndicator
= NULL
;
1632 const char* addKeyword
= NULL
;
1633 const char* addValue
= NULL
;
1635 char* variant
= NULL
; /* pointer into name, or NULL */
1637 if (U_FAILURE(*err
)) {
1641 if (_hasBCP47Extension(localeID
)) {
1642 _ConvertBCP47(tmpLocaleID
, localeID
, tempBuffer
, sizeof(tempBuffer
), err
);
1644 if (localeID
==NULL
) {
1645 localeID
=uloc_getDefault();
1647 tmpLocaleID
=localeID
;
1650 origLocaleID
=tmpLocaleID
;
1652 /* if we are doing a full canonicalization, then put results in
1653 localeBuffer, if necessary; otherwise send them to result. */
1654 if (/*OPTION_SET(options, _ULOC_CANONICALIZE) &&*/
1655 (result
== NULL
|| resultCapacity
< (int32_t)sizeof(localeBuffer
))) {
1656 name
= localeBuffer
;
1657 nameCapacity
= (int32_t)sizeof(localeBuffer
);
1660 nameCapacity
= resultCapacity
;
1663 /* get all pieces, one after another, and separate with '_' */
1664 len
=ulocimp_getLanguage(tmpLocaleID
, name
, nameCapacity
, &tmpLocaleID
);
1666 if(len
== I_DEFAULT_LENGTH
&& uprv_strncmp(origLocaleID
, i_default
, len
) == 0) {
1667 const char *d
= uloc_getDefault();
1669 len
= (int32_t)uprv_strlen(d
);
1672 uprv_strncpy(name
, d
, len
);
1674 } else if(_isIDSeparator(*tmpLocaleID
)) {
1675 const char *scriptID
;
1678 if(len
<nameCapacity
) {
1683 scriptSize
=ulocimp_getScript(tmpLocaleID
+1,
1684 (len
<nameCapacity
? name
+len
: NULL
), nameCapacity
-len
, &scriptID
);
1685 if(scriptSize
> 0) {
1686 /* Found optional script */
1687 tmpLocaleID
= scriptID
;
1690 if (_isIDSeparator(*tmpLocaleID
)) {
1691 /* If there is something else, then we add the _ */
1692 if(len
<nameCapacity
) {
1699 if (_isIDSeparator(*tmpLocaleID
)) {
1700 const char *cntryID
;
1701 int32_t cntrySize
= ulocimp_getCountry(tmpLocaleID
+1,
1702 (len
<nameCapacity
? name
+len
: NULL
), nameCapacity
-len
, &cntryID
);
1703 if (cntrySize
> 0) {
1704 /* Found optional country */
1705 tmpLocaleID
= cntryID
;
1708 if(_isIDSeparator(*tmpLocaleID
)) {
1709 /* If there is something else, then we add the _ if we found country before. */
1710 if (cntrySize
>= 0 && ! _isIDSeparator(*(tmpLocaleID
+1)) ) {
1712 if(len
<nameCapacity
) {
1718 variantSize
= _getVariant(tmpLocaleID
+1, *tmpLocaleID
,
1719 (len
<nameCapacity
? name
+len
: NULL
), nameCapacity
-len
);
1720 if (variantSize
> 0) {
1721 variant
= len
<nameCapacity
? name
+len
: NULL
;
1723 tmpLocaleID
+= variantSize
+ 1; /* skip '_' and variant */
1729 /* Copy POSIX-style charset specifier, if any [mr.utf8] */
1730 if (!OPTION_SET(options
, _ULOC_CANONICALIZE
) && *tmpLocaleID
== '.') {
1733 char c
= *tmpLocaleID
;
1740 if (len
<nameCapacity
) {
1750 /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
1751 After this, tmpLocaleID either points to '@' or is NULL */
1752 if ((tmpLocaleID
=locale_getKeywordsStart(tmpLocaleID
))!=NULL
) {
1753 keywordAssign
= uprv_strchr(tmpLocaleID
, '=');
1754 separatorIndicator
= uprv_strchr(tmpLocaleID
, ';');
1757 /* Copy POSIX-style variant, if any [mr@FOO] */
1758 if (!OPTION_SET(options
, _ULOC_CANONICALIZE
) &&
1759 tmpLocaleID
!= NULL
&& keywordAssign
== NULL
) {
1761 char c
= *tmpLocaleID
;
1765 if (len
<nameCapacity
) {
1773 if (OPTION_SET(options
, _ULOC_CANONICALIZE
)) {
1774 /* Handle @FOO variant if @ is present and not followed by = */
1775 if (tmpLocaleID
!=NULL
&& keywordAssign
==NULL
) {
1776 int32_t posixVariantSize
;
1777 /* Add missing '_' if needed */
1778 if (fieldCount
< 2 || (fieldCount
< 3 && scriptSize
> 0)) {
1780 if(len
<nameCapacity
) {
1785 } while(fieldCount
<2);
1787 posixVariantSize
= _getVariantEx(tmpLocaleID
+1, '@', name
+len
, nameCapacity
-len
,
1788 (UBool
)(variantSize
> 0));
1789 if (posixVariantSize
> 0) {
1790 if (variant
== NULL
) {
1793 len
+= posixVariantSize
;
1794 variantSize
+= posixVariantSize
;
1798 /* Handle generic variants first */
1800 for (j
=0; j
<(int32_t)(sizeof(VARIANT_MAP
)/sizeof(VARIANT_MAP
[0])); j
++) {
1801 const char* variantToCompare
= VARIANT_MAP
[j
].variant
;
1802 int32_t n
= (int32_t)uprv_strlen(variantToCompare
);
1803 int32_t variantLen
= _deleteVariant(variant
, uprv_min(variantSize
, (nameCapacity
-len
)), variantToCompare
, n
);
1805 if (variantLen
> 0) {
1806 if (len
> 0 && name
[len
-1] == '_') { /* delete trailing '_' */
1809 addKeyword
= VARIANT_MAP
[j
].keyword
;
1810 addValue
= VARIANT_MAP
[j
].value
;
1814 if (len
> 0 && len
<= nameCapacity
&& name
[len
-1] == '_') { /* delete trailing '_' */
1819 /* Look up the ID in the canonicalization map */
1820 for (j
=0; j
<(int32_t)(sizeof(CANONICALIZE_MAP
)/sizeof(CANONICALIZE_MAP
[0])); j
++) {
1821 const char* id
= CANONICALIZE_MAP
[j
].id
;
1822 int32_t n
= (int32_t)uprv_strlen(id
);
1823 if (len
== n
&& uprv_strncmp(name
, id
, n
) == 0) {
1824 if (n
== 0 && tmpLocaleID
!= NULL
) {
1825 break; /* Don't remap "" if keywords present */
1827 len
= _copyCount(name
, nameCapacity
, CANONICALIZE_MAP
[j
].canonicalID
);
1828 if (CANONICALIZE_MAP
[j
].keyword
) {
1829 addKeyword
= CANONICALIZE_MAP
[j
].keyword
;
1830 addValue
= CANONICALIZE_MAP
[j
].value
;
1837 if (!OPTION_SET(options
, _ULOC_STRIP_KEYWORDS
)) {
1838 if (tmpLocaleID
!=NULL
&& keywordAssign
!=NULL
&&
1839 (!separatorIndicator
|| separatorIndicator
> keywordAssign
)) {
1840 if(len
<nameCapacity
) {
1845 len
+= _getKeywords(tmpLocaleID
+1, '@', (len
<nameCapacity
? name
+len
: NULL
), nameCapacity
-len
,
1846 NULL
, 0, NULL
, TRUE
, addKeyword
, addValue
, err
);
1847 } else if (addKeyword
!= NULL
) {
1848 U_ASSERT(addValue
!= NULL
&& len
< nameCapacity
);
1849 /* inelegant but works -- later make _getKeywords do this? */
1850 len
+= _copyCount(name
+len
, nameCapacity
-len
, "@");
1851 len
+= _copyCount(name
+len
, nameCapacity
-len
, addKeyword
);
1852 len
+= _copyCount(name
+len
, nameCapacity
-len
, "=");
1853 len
+= _copyCount(name
+len
, nameCapacity
-len
, addValue
);
1857 if (U_SUCCESS(*err
) && result
!= NULL
&& name
== localeBuffer
) {
1858 uprv_strncpy(result
, localeBuffer
, (len
> resultCapacity
) ? resultCapacity
: len
);
1861 return u_terminateChars(result
, resultCapacity
, len
, err
);
1864 /* ### ID parsing API **************************************************/
1866 U_CAPI
int32_t U_EXPORT2
1867 uloc_getParent(const char* localeID
,
1869 int32_t parentCapacity
,
1872 const char *lastUnderscore
;
1875 if (U_FAILURE(*err
))
1878 if (localeID
== NULL
)
1879 localeID
= uloc_getDefault();
1881 lastUnderscore
=uprv_strrchr(localeID
, '_');
1882 if(lastUnderscore
!=NULL
) {
1883 i
=(int32_t)(lastUnderscore
-localeID
);
1888 if(i
>0 && parent
!= localeID
) {
1889 uprv_memcpy(parent
, localeID
, uprv_min(i
, parentCapacity
));
1891 return u_terminateChars(parent
, parentCapacity
, i
, err
);
1894 U_CAPI
int32_t U_EXPORT2
1895 uloc_getLanguage(const char* localeID
,
1897 int32_t languageCapacity
,
1900 /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
1903 if (err
==NULL
|| U_FAILURE(*err
)) {
1907 if(localeID
==NULL
) {
1908 localeID
=uloc_getDefault();
1911 i
=ulocimp_getLanguage(localeID
, language
, languageCapacity
, NULL
);
1912 return u_terminateChars(language
, languageCapacity
, i
, err
);
1915 U_CAPI
int32_t U_EXPORT2
1916 uloc_getScript(const char* localeID
,
1918 int32_t scriptCapacity
,
1923 if(err
==NULL
|| U_FAILURE(*err
)) {
1927 if(localeID
==NULL
) {
1928 localeID
=uloc_getDefault();
1931 /* skip the language */
1932 ulocimp_getLanguage(localeID
, NULL
, 0, &localeID
);
1933 if(_isIDSeparator(*localeID
)) {
1934 i
=ulocimp_getScript(localeID
+1, script
, scriptCapacity
, NULL
);
1936 return u_terminateChars(script
, scriptCapacity
, i
, err
);
1939 U_CAPI
int32_t U_EXPORT2
1940 uloc_getCountry(const char* localeID
,
1942 int32_t countryCapacity
,
1947 if(err
==NULL
|| U_FAILURE(*err
)) {
1951 if(localeID
==NULL
) {
1952 localeID
=uloc_getDefault();
1955 /* Skip the language */
1956 ulocimp_getLanguage(localeID
, NULL
, 0, &localeID
);
1957 if(_isIDSeparator(*localeID
)) {
1958 const char *scriptID
;
1959 /* Skip the script if available */
1960 ulocimp_getScript(localeID
+1, NULL
, 0, &scriptID
);
1961 if(scriptID
!= localeID
+1) {
1962 /* Found optional script */
1963 localeID
= scriptID
;
1965 if(_isIDSeparator(*localeID
)) {
1966 i
=ulocimp_getCountry(localeID
+1, country
, countryCapacity
, NULL
);
1969 return u_terminateChars(country
, countryCapacity
, i
, err
);
1972 U_CAPI
int32_t U_EXPORT2
1973 uloc_getVariant(const char* localeID
,
1975 int32_t variantCapacity
,
1978 char tempBuffer
[ULOC_FULLNAME_CAPACITY
];
1979 const char* tmpLocaleID
;
1982 if(err
==NULL
|| U_FAILURE(*err
)) {
1986 if (_hasBCP47Extension(localeID
)) {
1987 _ConvertBCP47(tmpLocaleID
, localeID
, tempBuffer
, sizeof(tempBuffer
), err
);
1989 if (localeID
==NULL
) {
1990 localeID
=uloc_getDefault();
1992 tmpLocaleID
=localeID
;
1995 /* Skip the language */
1996 ulocimp_getLanguage(tmpLocaleID
, NULL
, 0, &tmpLocaleID
);
1997 if(_isIDSeparator(*tmpLocaleID
)) {
1998 const char *scriptID
;
1999 /* Skip the script if available */
2000 ulocimp_getScript(tmpLocaleID
+1, NULL
, 0, &scriptID
);
2001 if(scriptID
!= tmpLocaleID
+1) {
2002 /* Found optional script */
2003 tmpLocaleID
= scriptID
;
2005 /* Skip the Country */
2006 if (_isIDSeparator(*tmpLocaleID
)) {
2007 const char *cntryID
;
2008 ulocimp_getCountry(tmpLocaleID
+1, NULL
, 0, &cntryID
);
2009 if (cntryID
!= tmpLocaleID
+1) {
2010 /* Found optional country */
2011 tmpLocaleID
= cntryID
;
2013 if(_isIDSeparator(*tmpLocaleID
)) {
2014 /* If there was no country ID, skip a possible extra IDSeparator */
2015 if (tmpLocaleID
!= cntryID
&& _isIDSeparator(tmpLocaleID
[1])) {
2018 i
=_getVariant(tmpLocaleID
+1, *tmpLocaleID
, variant
, variantCapacity
);
2023 /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
2024 /* if we do not have a variant tag yet then try a POSIX variant after '@' */
2026 if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
2027 i=_getVariant(localeID+1, '@', variant, variantCapacity);
2030 return u_terminateChars(variant
, variantCapacity
, i
, err
);
2033 U_CAPI
int32_t U_EXPORT2
2034 uloc_getName(const char* localeID
,
2036 int32_t nameCapacity
,
2039 return _canonicalize(localeID
, name
, nameCapacity
, 0, err
);
2042 U_CAPI
int32_t U_EXPORT2
2043 uloc_getBaseName(const char* localeID
,
2045 int32_t nameCapacity
,
2048 return _canonicalize(localeID
, name
, nameCapacity
, _ULOC_STRIP_KEYWORDS
, err
);
2051 U_CAPI
int32_t U_EXPORT2
2052 uloc_canonicalize(const char* localeID
,
2054 int32_t nameCapacity
,
2057 return _canonicalize(localeID
, name
, nameCapacity
, _ULOC_CANONICALIZE
, err
);
2060 U_CAPI
const char* U_EXPORT2
2061 uloc_getISO3Language(const char* localeID
)
2064 char lang
[ULOC_LANG_CAPACITY
];
2065 UErrorCode err
= U_ZERO_ERROR
;
2067 if (localeID
== NULL
)
2069 localeID
= uloc_getDefault();
2071 uloc_getLanguage(localeID
, lang
, ULOC_LANG_CAPACITY
, &err
);
2074 offset
= _findIndex(LANGUAGES
, lang
);
2077 return LANGUAGES_3
[offset
];
2080 U_CAPI
const char* U_EXPORT2
2081 uloc_getISO3Country(const char* localeID
)
2084 char cntry
[ULOC_LANG_CAPACITY
];
2085 UErrorCode err
= U_ZERO_ERROR
;
2087 if (localeID
== NULL
)
2089 localeID
= uloc_getDefault();
2091 uloc_getCountry(localeID
, cntry
, ULOC_LANG_CAPACITY
, &err
);
2094 offset
= _findIndex(COUNTRIES
, cntry
);
2098 return COUNTRIES_3
[offset
];
2101 U_CAPI
uint32_t U_EXPORT2
2102 uloc_getLCID(const char* localeID
)
2104 UErrorCode status
= U_ZERO_ERROR
;
2105 char langID
[ULOC_FULLNAME_CAPACITY
];
2107 uloc_getLanguage(localeID
, langID
, sizeof(langID
), &status
);
2108 if (U_FAILURE(status
)) {
2112 if (uprv_strchr(localeID
, '@')) {
2113 // uprv_convertToLCID does not support keywords other than collation.
2114 // Remove all keywords except collation.
2116 char collVal
[ULOC_KEYWORDS_CAPACITY
];
2117 char tmpLocaleID
[ULOC_FULLNAME_CAPACITY
];
2119 len
= uloc_getKeywordValue(localeID
, "collation", collVal
,
2120 sizeof(collVal
)/sizeof(collVal
[0]) - 1, &status
);
2122 if (U_SUCCESS(status
) && len
> 0) {
2125 len
= uloc_getBaseName(localeID
, tmpLocaleID
,
2126 sizeof(tmpLocaleID
)/sizeof(tmpLocaleID
[0]) - 1, &status
);
2128 if (U_SUCCESS(status
)) {
2129 tmpLocaleID
[len
] = 0;
2131 len
= uloc_setKeywordValue("collation", collVal
, tmpLocaleID
,
2132 sizeof(tmpLocaleID
)/sizeof(tmpLocaleID
[0]) - len
- 1, &status
);
2134 if (U_SUCCESS(status
)) {
2135 tmpLocaleID
[len
] = 0;
2136 return uprv_convertToLCID(langID
, tmpLocaleID
, &status
);
2141 // fall through - all keywords are simply ignored
2142 status
= U_ZERO_ERROR
;
2145 return uprv_convertToLCID(langID
, localeID
, &status
);
2148 U_CAPI
int32_t U_EXPORT2
2149 uloc_getLocaleForLCID(uint32_t hostid
, char *locale
, int32_t localeCapacity
,
2152 return uprv_convertToPosix(hostid
, locale
, localeCapacity
, status
);
2155 /* ### Default locale **************************************************/
2157 U_CAPI
const char* U_EXPORT2
2160 return locale_get_default();
2163 U_CAPI
void U_EXPORT2
2164 uloc_setDefault(const char* newDefaultLocale
,
2167 if (U_FAILURE(*err
))
2169 /* the error code isn't currently used for anything by this function*/
2171 /* propagate change to C++ */
2172 locale_set_default(newDefaultLocale
);
2176 * Returns a list of all 2-letter language codes defined in ISO 639. This is a pointer
2177 * to an array of pointers to arrays of char. All of these pointers are owned
2178 * by ICU-- do not delete them, and do not write through them. The array is
2179 * terminated with a null pointer.
2181 U_CAPI
const char* const* U_EXPORT2
2182 uloc_getISOLanguages()
2188 * Returns a list of all 2-letter country codes defined in ISO 639. This is a
2189 * pointer to an array of pointers to arrays of char. All of these pointers are
2190 * owned by ICU-- do not delete them, and do not write through them. The array is
2191 * terminated with a null pointer.
2193 U_CAPI
const char* const* U_EXPORT2
2194 uloc_getISOCountries()
2200 /* this function to be moved into cstring.c later */
2201 static char gDecimal
= 0;
2206 _uloc_strtod(const char *start
, char **end
) {
2213 /* For machines that decide to change the decimal on you,
2214 and try to be too smart with localization.
2215 This normally should be just a '.'. */
2216 sprintf(rep
, "%+1.1f", 1.0);
2220 if(gDecimal
== '.') {
2221 return uprv_strtod(start
, end
); /* fall through to OS */
2223 uprv_strncpy(buf
, start
, 29);
2225 decimal
= uprv_strchr(buf
, '.');
2227 *decimal
= gDecimal
;
2229 return uprv_strtod(start
, end
); /* no decimal point */
2231 rv
= uprv_strtod(buf
, &myEnd
);
2233 *end
= (char*)(start
+(myEnd
-buf
)); /* cast away const (to follow uprv_strtod API.) */
2241 int32_t dummy
; /* to avoid uninitialized memory copy from qsort */
2245 static int32_t U_CALLCONV
2246 uloc_acceptLanguageCompare(const void * /*context*/, const void *a
, const void *b
)
2248 const _acceptLangItem
*aa
= (const _acceptLangItem
*)a
;
2249 const _acceptLangItem
*bb
= (const _acceptLangItem
*)b
;
2253 rc
= -1; /* A > B */
2254 } else if(bb
->q
> aa
->q
) {
2261 rc
= uprv_stricmp(aa
->locale
, bb
->locale
);
2264 #if defined(ULOC_DEBUG)
2265 /* fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
2275 mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
2278 U_CAPI
int32_t U_EXPORT2
2279 uloc_acceptLanguageFromHTTP(char *result
, int32_t resultAvailable
, UAcceptResult
*outResult
,
2280 const char *httpAcceptLanguage
,
2281 UEnumeration
* availableLocales
,
2285 _acceptLangItem smallBuffer
[30];
2287 char tmp
[ULOC_FULLNAME_CAPACITY
+1];
2289 const char *itemEnd
;
2290 const char *paramEnd
;
2295 int32_t l
= (int32_t)uprv_strlen(httpAcceptLanguage
);
2297 char *tempstr
; /* Use for null pointer check */
2300 jSize
= sizeof(smallBuffer
)/sizeof(smallBuffer
[0]);
2301 if(U_FAILURE(*status
)) {
2305 for(s
=httpAcceptLanguage
;s
&&*s
;) {
2306 while(isspace(*s
)) /* eat space at the beginning */
2308 itemEnd
=uprv_strchr(s
,',');
2309 paramEnd
=uprv_strchr(s
,';');
2311 itemEnd
= httpAcceptLanguage
+l
; /* end of string */
2313 if(paramEnd
&& paramEnd
<itemEnd
) {
2314 /* semicolon (;) is closer than end (,) */
2319 while(isspace(*t
)) {
2325 while(isspace(*t
)) {
2328 j
[n
].q
= (float)_uloc_strtod(t
,NULL
);
2330 /* no semicolon - it's 1.0 */
2335 /* eat spaces prior to semi */
2336 for(t
=(paramEnd
-1);(paramEnd
>s
)&&isspace(*t
);t
--)
2338 /* Check for null pointer from uprv_strndup */
2339 tempstr
= uprv_strndup(s
,(int32_t)((t
+1)-s
));
2340 if (tempstr
== NULL
) {
2341 *status
= U_MEMORY_ALLOCATION_ERROR
;
2344 j
[n
].locale
= tempstr
;
2345 uloc_canonicalize(j
[n
].locale
,tmp
,sizeof(tmp
)/sizeof(tmp
[0]),status
);
2346 if(strcmp(j
[n
].locale
,tmp
)) {
2347 uprv_free(j
[n
].locale
);
2348 j
[n
].locale
=uprv_strdup(tmp
);
2350 #if defined(ULOC_DEBUG)
2351 /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
2355 while(*s
==',') { /* eat duplicate commas */
2359 if(j
==smallBuffer
) { /* overflowed the small buffer. */
2360 j
= static_cast<_acceptLangItem
*>(uprv_malloc(sizeof(j
[0])*(jSize
*2)));
2362 uprv_memcpy(j
,smallBuffer
,sizeof(j
[0])*jSize
);
2364 #if defined(ULOC_DEBUG)
2365 fprintf(stderr
,"malloced at size %d\n", jSize
);
2368 j
= static_cast<_acceptLangItem
*>(uprv_realloc(j
, sizeof(j
[0])*jSize
*2));
2369 #if defined(ULOC_DEBUG)
2370 fprintf(stderr
,"re-alloced at size %d\n", jSize
);
2375 *status
= U_MEMORY_ALLOCATION_ERROR
;
2380 uprv_sortArray(j
, n
, sizeof(j
[0]), uloc_acceptLanguageCompare
, NULL
, TRUE
, status
);
2381 if(U_FAILURE(*status
)) {
2382 if(j
!= smallBuffer
) {
2383 #if defined(ULOC_DEBUG)
2384 fprintf(stderr
,"freeing j %p\n", j
);
2390 strs
= static_cast<char **>(uprv_malloc((size_t)(sizeof(strs
[0])*n
)));
2391 /* Check for null pointer */
2393 uprv_free(j
); /* Free to avoid memory leak */
2394 *status
= U_MEMORY_ALLOCATION_ERROR
;
2398 #if defined(ULOC_DEBUG)
2399 /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
2401 strs
[i
]=j
[i
].locale
;
2403 res
= uloc_acceptLanguage(result
, resultAvailable
, outResult
,
2404 (const char**)strs
, n
, availableLocales
, status
);
2409 if(j
!= smallBuffer
) {
2410 #if defined(ULOC_DEBUG)
2411 fprintf(stderr
,"freeing j %p\n", j
);
2419 U_CAPI
int32_t U_EXPORT2
2420 uloc_acceptLanguage(char *result
, int32_t resultAvailable
,
2421 UAcceptResult
*outResult
, const char **acceptList
,
2422 int32_t acceptListCount
,
2423 UEnumeration
* availableLocales
,
2429 char tmp
[ULOC_FULLNAME_CAPACITY
+1];
2431 char **fallbackList
;
2432 if(U_FAILURE(*status
)) {
2435 fallbackList
= static_cast<char **>(uprv_malloc((size_t)(sizeof(fallbackList
[0])*acceptListCount
)));
2436 if(fallbackList
==NULL
) {
2437 *status
= U_MEMORY_ALLOCATION_ERROR
;
2440 for(i
=0;i
<acceptListCount
;i
++) {
2441 #if defined(ULOC_DEBUG)
2442 fprintf(stderr
,"%02d: %s\n", i
, acceptList
[i
]);
2444 while((l
=uenum_next(availableLocales
, NULL
, status
))) {
2445 #if defined(ULOC_DEBUG)
2446 fprintf(stderr
," %s\n", l
);
2448 len
= (int32_t)uprv_strlen(l
);
2449 if(!uprv_strcmp(acceptList
[i
], l
)) {
2451 *outResult
= ULOC_ACCEPT_VALID
;
2453 #if defined(ULOC_DEBUG)
2454 fprintf(stderr
, "MATCH! %s\n", l
);
2457 uprv_strncpy(result
, l
, uprv_min(len
, resultAvailable
));
2460 uprv_free(fallbackList
[j
]);
2462 uprv_free(fallbackList
);
2463 return u_terminateChars(result
, resultAvailable
, len
, status
);
2469 uenum_reset(availableLocales
, status
);
2470 /* save off parent info */
2471 if(uloc_getParent(acceptList
[i
], tmp
, sizeof(tmp
)/sizeof(tmp
[0]), status
)!=0) {
2472 fallbackList
[i
] = uprv_strdup(tmp
);
2478 for(maxLen
--;maxLen
>0;maxLen
--) {
2479 for(i
=0;i
<acceptListCount
;i
++) {
2480 if(fallbackList
[i
] && ((int32_t)uprv_strlen(fallbackList
[i
])==maxLen
)) {
2481 #if defined(ULOC_DEBUG)
2482 fprintf(stderr
,"Try: [%s]", fallbackList
[i
]);
2484 while((l
=uenum_next(availableLocales
, NULL
, status
))) {
2485 #if defined(ULOC_DEBUG)
2486 fprintf(stderr
," %s\n", l
);
2488 len
= (int32_t)uprv_strlen(l
);
2489 if(!uprv_strcmp(fallbackList
[i
], l
)) {
2491 *outResult
= ULOC_ACCEPT_FALLBACK
;
2493 #if defined(ULOC_DEBUG)
2494 fprintf(stderr
, "fallback MATCH! %s\n", l
);
2497 uprv_strncpy(result
, l
, uprv_min(len
, resultAvailable
));
2499 for(j
=0;j
<acceptListCount
;j
++) {
2500 uprv_free(fallbackList
[j
]);
2502 uprv_free(fallbackList
);
2503 return u_terminateChars(result
, resultAvailable
, len
, status
);
2506 uenum_reset(availableLocales
, status
);
2508 if(uloc_getParent(fallbackList
[i
], tmp
, sizeof(tmp
)/sizeof(tmp
[0]), status
)!=0) {
2509 uprv_free(fallbackList
[i
]);
2510 fallbackList
[i
] = uprv_strdup(tmp
);
2512 uprv_free(fallbackList
[i
]);
2518 *outResult
= ULOC_ACCEPT_FAILED
;
2521 for(i
=0;i
<acceptListCount
;i
++) {
2522 uprv_free(fallbackList
[i
]);
2524 uprv_free(fallbackList
);