2 **********************************************************************
3 * Copyright (C) 1997-2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
9 * Modification History:
11 * Date Name Description
12 * 04/01/97 aliu Creation.
13 * 08/21/98 stephen JDK 1.2 sync
14 * 12/08/98 rtg New Locale implementation and C API
15 * 03/15/99 damiba overhaul.
16 * 04/06/99 stephen changed setDefault() to realloc and copy
17 * 06/14/99 stephen Changed calls to ures_open for new params
18 * 07/21/99 stephen Modified setDefault() to propagate to C++
19 * 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs,
20 * brought canonicalization code into line with spec
21 *****************************************************************************/
24 POSIX's locale format, from putil.c: [no spaces]
26 ll [ _CC ] [ . MM ] [ @ VV]
28 l = lang, C = ctry, M = charmap, V = variant
31 #include "unicode/utypes.h"
32 #include "unicode/ustring.h"
33 #include "unicode/uloc.h"
46 #include <stdio.h> /* for sprintf */
48 /* ### Declarations **************************************************/
50 /* Locale stuff from locid.cpp */
51 U_CFUNC
void locale_set_default(const char *id
);
52 U_CFUNC
const char *locale_get_default(void);
54 locale_getKeywords(const char *localeID
,
56 char *keywords
, int32_t keywordCapacity
,
57 char *values
, int32_t valuesCapacity
, int32_t *valLen
,
61 /* ### Data tables **************************************************/
64 * Table of language codes, both 2- and 3-letter, with preference
65 * given to 2-letter codes where possible. Includes 3-letter codes
66 * that lack a 2-letter equivalent.
68 * This list must be in sorted order. This list is returned directly
69 * to the user by some API.
71 * This list must be kept in sync with LANGUAGES_3, with corresponding
74 * This table should be terminated with a NULL entry, followed by a
75 * second list, and another NULL entry. The first list is visible to
76 * user code when this array is returned by API. The second list
77 * contains codes we support, but do not expose through user API.
81 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
82 * include the revisions up to 2001/7/27 *CWB*
84 * The 3 character codes are the terminology codes like RFC 3066. This
85 * is compatible with prior ICU codes
87 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
88 * table but now at the end of the table because 3 character codes are
89 * duplicates. This avoids bad searches going from 3 to 2 character
92 * The range qaa-qtz is reserved for local use
94 /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
95 /* ISO639 table version is 20130531 */
96 static const char * const LANGUAGES
[] = {
97 "aa", "ab", "ace", "ach", "ada", "ady", "ae", "af",
98 "afa", "afh", "agq", "ain", "ak", "akk", "ale", "alg",
99 "alt", "am", "an", "ang", "anp", "apa", "ar", "arc",
100 "arn", "arp", "art", "arw", "as", "asa", "ast", "ath",
101 "aus", "av", "awa", "ay", "az",
102 "ba", "bad", "bai", "bal", "ban", "bas", "bat", "bax",
103 "bbj", "be", "bej", "bem", "ber", "bez", "bfd", "bg",
104 "bh", "bho", "bi", "bik", "bin", "bkm", "bla", "bm",
105 "bn", "bnt", "bo", "br", "bra", "brx", "bs", "bss",
106 "btk", "bua", "bug", "bum", "byn", "byv",
107 "ca", "cad", "cai", "car", "cau", "cay", "cch", "ce",
108 "ceb", "cel", "cgg", "ch", "chb", "chg", "chk", "chm",
109 "chn", "cho", "chp", "chr", "chy", "ckb", "cmc", "co",
110 "cop", "cpe", "cpf", "cpp", "cr", "crh", "crp", "cs",
111 "csb", "cu", "cus", "cv", "cy",
112 "da", "dak", "dar", "dav", "day", "de", "del", "den",
113 "dgr", "din", "dje", "doi", "dra", "dsb", "dua", "dum",
114 "dv", "dyo", "dyu", "dz", "dzg",
115 "ebu", "ee", "efi", "egy", "eka", "el", "elx", "en",
116 "enm", "eo", "es", "et", "eu", "ewo",
117 "fa", "fan", "fat", "ff", "fi", "fil", "fiu", "fj",
118 "fo", "fon", "fr", "frm", "fro", "frr", "frs", "fur",
120 "ga", "gaa", "gay", "gba", "gd", "gem", "gez", "gil",
121 "gl", "gmh", "gn", "goh", "gon", "gor", "got", "grb",
122 "grc", "gsw", "gu", "guz", "gv", "gwi",
123 "ha", "hai", "haw", "he", "hi", "hil", "him", "hit",
124 "hmn", "ho", "hr", "hsb", "ht", "hu", "hup", "hy",
126 "ia", "iba", "ibb", "id", "ie", "ig", "ii", "ijo",
127 "ik", "ilo", "inc", "ine", "inh", "io", "ira", "iro",
129 "ja", "jbo", "jgo", "jmc", "jpr", "jrb", "jv",
130 "ka", "kaa", "kab", "kac", "kaj", "kam", "kar", "kaw",
131 "kbd", "kbl", "kcg", "kde", "kea", "kfo", "kg", "kha",
132 "khi", "kho", "khq", "ki", "kj", "kk", "kkj", "kl",
133 "kln", "km", "kmb", "kn", "ko", "kok", "kos", "kpe",
134 "kr", "krc", "krl", "kro", "kru", "ks", "ksb", "ksf",
135 "ksh", "ku", "kum", "kut", "kv", "kw", "ky",
136 "la", "lad", "lag", "lah", "lam", "lb", "lez", "lg",
137 "li", "lkt", "ln", "lo", "lol", "loz", "lt", "lu",
138 "lua", "lui", "lun", "luo", "lus", "luy", "lv",
139 "mad", "maf", "mag", "mai", "mak", "man", "map", "mas",
140 "mde", "mdf", "mdr", "men", "mer", "mfe", "mg", "mga",
141 "mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk",
142 "mkh", "ml", "mn", "mnc", "mni", "mno", "mo", "moh",
143 "mos", "mr", "ms", "mt", "mua", "mul", "mun", "mus",
144 "mwl", "mwr", "my", "mye", "myn", "myv",
145 "na", "nah", "nai", "nap", "naq", "nb", "nd", "nds",
146 "ne", "new", "ng", "nia", "nic", "niu", "nl", "nmg",
147 "nn", "nnh", "no", "nog", "non", "nqo", "nr", "nso",
148 "nub", "nus", "nv", "nwc", "ny", "nym", "nyn", "nyo",
150 "oc", "oj", "om", "or", "os", "osa", "ota", "oto",
151 "pa", "paa", "pag", "pal", "pam", "pap", "pau", "peo",
152 "phi", "phn", "pi", "pl", "pon", "pra", "pro", "ps",
155 "raj", "rap", "rar", "rm", "rn", "ro", "roa", "rof",
156 "rom", "ru", "rup", "rw", "rwk",
157 "sa", "sad", "sah", "sai", "sal", "sam", "saq", "sas",
158 "sat", "sba", "sbp", "sc", "scn", "sco", "sd", "se",
159 "see", "seh", "sel", "sem", "ses", "sg", "sga", "sgn",
160 "shi", "shn", "shu", "si", "sid", "sio", "sit",
161 "sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn",
162 "sms", "sn", "snk", "so", "sog", "son", "sq", "sr",
163 "srn", "srr", "ss", "ssa", "ssy", "st", "su", "suk",
164 "sus", "sux", "sv", "sw", "swb", "swc", "syc", "syr",
165 "ta", "tai", "te", "tem", "teo", "ter", "tet", "tg",
166 "th", "ti", "tig", "tiv", "tk", "tkl", "tl", "tlh",
167 "tli", "tmh", "tn", "to", "tog", "tpi", "tr", "trv",
168 "ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw",
169 "twq", "ty", "tyv", "tzm",
170 "udm", "ug", "uga", "uk", "umb", "und", "ur", "uz",
171 "vai", "ve", "vi", "vo", "vot", "vun",
172 "wa", "wae", "wak", "wal", "war", "was", "wen", "wo",
174 "yao", "yap", "yav", "ybb", "yi", "yo", "ypk", "yue",
175 "za", "zap", "zbl", "zen", "zgh", "zh", "znd", "zu",
178 "in", "iw", "ji", "jw", "sh", /* obsolete language codes */
182 static const char* const DEPRECATED_LANGUAGES
[]={
183 "in", "iw", "ji", "jw", NULL
, NULL
185 static const char* const REPLACEMENT_LANGUAGES
[]={
186 "id", "he", "yi", "jv", NULL
, NULL
190 * Table of 3-letter language codes.
192 * This is a lookup table used to convert 3-letter language codes to
193 * their 2-letter equivalent, where possible. It must be kept in sync
194 * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the
195 * same language as LANGUAGES_3[i]. The commented-out lines are
196 * copied from LANGUAGES to make eyeballing this baby easier.
198 * Where a 3-letter language code has no 2-letter equivalent, the
199 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
201 * This table should be terminated with a NULL entry, followed by a
202 * second list, and another NULL entry. The two lists correspond to
203 * the two lists in LANGUAGES.
205 /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
206 /* ISO639 table version is 20130531 */
207 static const char * const LANGUAGES_3
[] = {
208 "aar", "abk", "ace", "ach", "ada", "ady", "ave", "afr",
209 "afa", "afh", "agq", "ain", "aka", "akk", "ale", "alg",
210 "alt", "amh", "arg", "ang", "anp", "apa", "ara", "arc",
211 "arn", "arp", "art", "arw", "asm", "asa", "ast", "ath",
212 "aus", "ava", "awa", "aym", "aze",
213 "bak", "bad", "bai", "bal", "ban", "bas", "bat", "bax",
214 "bbj", "bel", "bej", "bem", "ber", "bez", "bfd", "bul",
215 "bih", "bho", "bis", "bik", "bin", "bkm", "bla", "bam",
216 "ben", "bnt", "bod", "bre", "bra", "brx", "bos", "bss",
217 "btk", "bua", "bug", "bum", "byn", "byv",
218 "cat", "cad", "cai", "car", "cau", "cay", "cch", "che",
219 "ceb", "cel", "cgg", "cha", "chb", "chg", "chk", "chm",
220 "chn", "cho", "chp", "chr", "chy", "ckb", "cmc", "cos",
221 "cop", "cpe", "cpf", "cpp", "cre", "crh", "crp", "ces",
222 "csb", "chu", "cus", "chv", "cym",
223 "dan", "dak", "dar", "dav", "day", "deu", "del", "den",
224 "dgr", "din", "dje", "doi", "dra", "dsb", "dua", "dum",
225 "div", "dyo", "dyu", "dzo", "dzg",
226 "ebu", "ewe", "efi", "egy", "eka", "ell", "elx", "eng",
227 "enm", "epo", "spa", "est", "eus", "ewo",
228 "fas", "fan", "fat", "ful", "fin", "fil", "fiu", "fij",
229 "fao", "fon", "fra", "frm", "fro", "frr", "frs", "fur",
231 "gle", "gaa", "gay", "gba", "gla", "gem", "gez", "gil",
232 "glg", "gmh", "grn", "goh", "gon", "gor", "got", "grb",
233 "grc", "gsw", "guj", "guz", "glv", "gwi",
234 "hau", "hai", "haw", "heb", "hin", "hil", "him", "hit",
235 "hmn", "hmo", "hrv", "hsb", "hat", "hun", "hup", "hye",
237 "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ijo",
238 "ipk", "ilo", "inc", "ine", "inh", "ido", "ira", "iro",
240 "jpn", "jbo", "jgo", "jmc", "jpr", "jrb", "jav",
241 "kat", "kaa", "kab", "kac", "kaj", "kam", "kar", "kaw",
242 "kbd", "kbl", "kcg", "kde", "kea", "kfo", "kon", "kha",
243 "khi", "kho", "khq", "kik", "kua", "kaz", "kkj", "kal",
244 "kln", "khm", "kmb", "kan", "kor", "kok", "kos", "kpe",
245 "kau", "krc", "krl", "kro", "kru", "kas", "ksb", "ksf",
246 "ksh", "kur", "kum", "kut", "kom", "cor", "kir",
247 "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lug",
248 "lim", "lkt", "lin", "lao", "lol", "loz", "lit", "lub",
249 "lua", "lui", "lun", "luo", "lus", "luy", "lav",
250 "mad", "maf", "mag", "mai", "mak", "man", "map", "mas",
251 "mde", "mdf", "mdr", "men", "mer", "mfe", "mlg", "mga",
252 "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
253 "mkh", "mal", "mon", "mnc", "mni", "mno", "mol", "moh",
254 "mos", "mar", "msa", "mlt", "mua", "mul", "mun", "mus",
255 "mwl", "mwr", "mya", "mye", "myn", "myv",
256 "nau", "nah", "nai", "nap", "naq", "nob", "nde", "nds",
257 "nep", "new", "ndo", "nia", "nic", "niu", "nld", "nmg",
258 "nno", "nnh", "nor", "nog", "non", "nqo", "nbl", "nso",
259 "nub", "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo",
261 "oci", "oji", "orm", "ori", "oss", "osa", "ota", "oto",
262 "pan", "paa", "pag", "pal", "pam", "pap", "pau", "peo",
263 "phi", "phn", "pli", "pol", "pon", "pra", "pro", "pus",
266 "raj", "rap", "rar", "roh", "run", "ron", "roa", "rof",
267 "rom", "rus", "rup", "kin", "rwk",
268 "san", "sad", "sah", "sai", "sal", "sam", "saq", "sas",
269 "sat", "sba", "sbp", "srd", "scn", "sco", "snd", "sme",
270 "see", "seh", "sel", "sem", "ses", "sag", "sga", "sgn",
271 "shi", "shn", "shu", "sin", "sid", "sio", "sit",
272 "slk", "slv", "sla", "smo", "sma", "smi", "smj", "smn",
273 "sms", "sna", "snk", "som", "sog", "son", "sqi", "srp",
274 "srn", "srr", "ssw", "ssa", "ssy", "sot", "sun", "suk",
275 "sus", "sux", "swe", "swa", "swb", "swc", "syc", "syr",
276 "tam", "tai", "tel", "tem", "teo", "ter", "tet", "tgk",
277 "tha", "tir", "tig", "tiv", "tuk", "tkl", "tgl", "tlh",
278 "tli", "tmh", "tsn", "ton", "tog", "tpi", "tur", "trv",
279 "tso", "tsi", "tat", "tum", "tup", "tut", "tvl", "twi",
280 "twq", "tah", "tyv", "tzm",
281 "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
282 "vai", "ven", "vie", "vol", "vot", "vun",
283 "wln", "wae", "wak", "wal", "war", "was", "wen", "wol",
285 "yao", "yap", "yav", "ybb", "yid", "yor", "ypk", "yue",
286 "zha", "zap", "zbl", "zen", "zgh", "zho", "znd", "zul",
289 /* "in", "iw", "ji", "jw", "sh", */
290 "ind", "heb", "yid", "jaw", "srp",
295 * Table of 2-letter country codes.
297 * This list must be in sorted order. This list is returned directly
298 * to the user by some API.
300 * This list must be kept in sync with COUNTRIES_3, with corresponding
303 * This table should be terminated with a NULL entry, followed by a
304 * second list, and another NULL entry. The first list is visible to
305 * user code when this array is returned by API. The second list
306 * contains codes we support, but do not expose through user API.
310 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
311 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
312 * new codes keeping the old ones for compatibility updated to include
313 * 1999/12/03 revisions *CWB*
315 * RO(ROM) is now RO(ROU) according to
316 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
318 static const char * const COUNTRIES
[] = {
319 "AD", "AE", "AF", "AG", "AI", "AL", "AM",
320 "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",
321 "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",
322 "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV",
323 "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
324 "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR",
325 "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK",
326 "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER",
327 "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
328 "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
329 "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
330 "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
331 "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
332 "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
333 "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
334 "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
335 "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK",
336 "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",
337 "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
338 "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",
339 "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",
340 "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
341 "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA",
342 "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",
343 "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV",
344 "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ",
345 "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
346 "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
347 "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
348 "WS", "YE", "YT", "ZA", "ZM", "ZW",
350 "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR", /* obsolete country codes */
354 static const char* const DEPRECATED_COUNTRIES
[] = {
355 "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL
, NULL
/* deprecated country list */
357 static const char* const REPLACEMENT_COUNTRIES
[] = {
358 /* "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
359 "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL
, NULL
/* replacement country codes */
363 * Table of 3-letter country codes.
365 * This is a lookup table used to convert 3-letter country codes to
366 * their 2-letter equivalent. It must be kept in sync with COUNTRIES.
367 * For all valid i, COUNTRIES[i] must refer to the same country as
368 * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES
369 * to make eyeballing this baby easier.
371 * This table should be terminated with a NULL entry, followed by a
372 * second list, and another NULL entry. The two lists correspond to
373 * the two lists in COUNTRIES.
375 static const char * const COUNTRIES_3
[] = {
376 /* "AD", "AE", "AF", "AG", "AI", "AL", "AM", */
377 "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
378 /* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */
379 "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
380 /* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */
381 "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
382 /* "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", */
383 "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",
384 /* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */
385 "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
386 /* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */
387 "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
388 /* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK", */
389 "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
390 /* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */
391 "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
392 /* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
393 "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
394 /* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
395 "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
396 /* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */
397 "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
398 /* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
399 "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
400 /* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
401 "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
402 /* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
403 "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
404 /* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
405 "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
406 /* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */
407 "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
408 /* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */
409 "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
410 /* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */
411 "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
412 /* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */
413 "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
414 /* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */
415 "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
416 /* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */
417 "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
418 /* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */
419 "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
420 /* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */
421 "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
422 /* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */
423 "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
424 /* "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", */
425 "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",
426 /* "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", */
427 "SXM", "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
428 /* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */
429 "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
430 /* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */
431 "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
432 /* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
433 "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
434 /* "WS", "YE", "YT", "ZA", "ZM", "ZW", */
435 "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
437 /* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */
438 "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
442 typedef struct CanonicalizationMap
{
443 const char *id
; /* input ID */
444 const char *canonicalID
; /* canonicalized output ID */
445 const char *keyword
; /* keyword, or NULL if none */
446 const char *value
; /* keyword value, or NULL if kw==NULL */
447 } CanonicalizationMap
;
450 * A map to canonicalize locale IDs. This handles a variety of
451 * different semantic kinds of transformations.
453 static const CanonicalizationMap CANONICALIZE_MAP
[] = {
454 { "", "en_US_POSIX", NULL
, NULL
}, /* .NET name */
455 { "c", "en_US_POSIX", NULL
, NULL
}, /* POSIX name */
456 { "posix", "en_US_POSIX", NULL
, NULL
}, /* POSIX name (alias of C) */
457 { "art_LOJBAN", "jbo", NULL
, NULL
}, /* registered name */
458 { "az_AZ_CYRL", "az_Cyrl_AZ", NULL
, NULL
}, /* .NET name */
459 { "az_AZ_LATN", "az_Latn_AZ", NULL
, NULL
}, /* .NET name */
460 { "ca_ES_PREEURO", "ca_ES", "currency", "ESP" },
461 { "de__PHONEBOOK", "de", "collation", "phonebook" }, /* Old ICU name */
462 { "de_AT_PREEURO", "de_AT", "currency", "ATS" },
463 { "de_DE_PREEURO", "de_DE", "currency", "DEM" },
464 { "de_LU_PREEURO", "de_LU", "currency", "LUF" },
465 { "el_GR_PREEURO", "el_GR", "currency", "GRD" },
466 { "en_BE_PREEURO", "en_BE", "currency", "BEF" },
467 { "en_IE_PREEURO", "en_IE", "currency", "IEP" },
468 { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */
469 { "es_ES_PREEURO", "es_ES", "currency", "ESP" },
470 { "eu_ES_PREEURO", "eu_ES", "currency", "ESP" },
471 { "fi_FI_PREEURO", "fi_FI", "currency", "FIM" },
472 { "fr_BE_PREEURO", "fr_BE", "currency", "BEF" },
473 { "fr_FR_PREEURO", "fr_FR", "currency", "FRF" },
474 { "fr_LU_PREEURO", "fr_LU", "currency", "LUF" },
475 { "ga_IE_PREEURO", "ga_IE", "currency", "IEP" },
476 { "gl_ES_PREEURO", "gl_ES", "currency", "ESP" },
477 { "hi__DIRECT", "hi", "collation", "direct" }, /* Old ICU name */
478 { "it_IT_PREEURO", "it_IT", "currency", "ITL" },
479 { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */
480 { "nb_NO_NY", "nn_NO", NULL
, NULL
}, /* "markus said this was ok" :-) */
481 { "nl_BE_PREEURO", "nl_BE", "currency", "BEF" },
482 { "nl_NL_PREEURO", "nl_NL", "currency", "NLG" },
483 { "pt_PT_PREEURO", "pt_PT", "currency", "PTE" },
484 { "sr_SP_CYRL", "sr_Cyrl_RS", NULL
, NULL
}, /* .NET name */
485 { "sr_SP_LATN", "sr_Latn_RS", NULL
, NULL
}, /* .NET name */
486 { "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL
, NULL
}, /* Linux name */
487 { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */
488 { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL
, NULL
}, /* Linux name */
489 { "uz_UZ_CYRL", "uz_Cyrl_UZ", NULL
, NULL
}, /* .NET name */
490 { "uz_UZ_LATN", "uz_Latn_UZ", NULL
, NULL
}, /* .NET name */
491 { "zh_CHS", "zh_Hans", NULL
, NULL
}, /* .NET name */
492 { "zh_CHT", "zh_Hant", NULL
, NULL
}, /* .NET name */
493 { "zh_GAN", "gan", NULL
, NULL
}, /* registered name */
494 { "zh_GUOYU", "zh", NULL
, NULL
}, /* registered name */
495 { "zh_HAKKA", "hak", NULL
, NULL
}, /* registered name */
496 { "zh_MIN_NAN", "nan", NULL
, NULL
}, /* registered name */
497 { "zh_WUU", "wuu", NULL
, NULL
}, /* registered name */
498 { "zh_XIANG", "hsn", NULL
, NULL
}, /* registered name */
499 { "zh_YUE", "yue", NULL
, NULL
}, /* registered name */
502 typedef struct VariantMap
{
503 const char *variant
; /* input ID */
504 const char *keyword
; /* keyword, or NULL if none */
505 const char *value
; /* keyword value, or NULL if kw==NULL */
508 static const VariantMap VARIANT_MAP
[] = {
509 { "EURO", "currency", "EUR" },
510 { "PINYIN", "collation", "pinyin" }, /* Solaris variant */
511 { "STROKE", "collation", "stroke" } /* Solaris variant */
514 /* ### BCP47 Conversion *******************************************/
515 /* Test if the locale id has BCP47 u extension and does not have '@' */
516 #define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
517 /* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
518 #define _ConvertBCP47(finalID, id, buffer, length,err) \
519 if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || U_FAILURE(*err)) { \
524 /* Gets the size of the shortest subtag in the given localeID. */
525 static int32_t getShortestSubtagLength(const char *localeID
) {
526 int32_t localeIDLength
= uprv_strlen(localeID
);
527 int32_t length
= localeIDLength
;
528 int32_t tmpLength
= 0;
532 for (i
= 0; i
< localeIDLength
; i
++) {
533 if (localeID
[i
] != '_' && localeID
[i
] != '-') {
540 if (tmpLength
!= 0 && tmpLength
< length
) {
550 /* ### Keywords **************************************************/
552 #define ULOC_KEYWORD_BUFFER_LEN 25
553 #define ULOC_MAX_NO_KEYWORDS 25
555 U_CAPI
const char * U_EXPORT2
556 locale_getKeywordsStart(const char *localeID
) {
557 const char *result
= NULL
;
558 if((result
= uprv_strchr(localeID
, '@')) != NULL
) {
561 #if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
563 /* We do this because the @ sign is variant, and the @ sign used on one
564 EBCDIC machine won't be compiled the same way on other EBCDIC based
566 static const uint8_t ebcdicSigns
[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
567 const uint8_t *charToFind
= ebcdicSigns
;
569 if((result
= uprv_strchr(localeID
, *charToFind
)) != NULL
) {
580 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
581 * @param keywordName incoming name to be canonicalized
582 * @param status return status (keyword too long)
583 * @return length of the keyword name
585 static int32_t locale_canonKeywordName(char *buf
, const char *keywordName
, UErrorCode
*status
)
588 int32_t keywordNameLen
= (int32_t)uprv_strlen(keywordName
);
590 if(keywordNameLen
>= ULOC_KEYWORD_BUFFER_LEN
) {
591 /* keyword name too long for internal buffer */
592 *status
= U_INTERNAL_PROGRAM_ERROR
;
596 /* normalize the keyword name */
597 for(i
= 0; i
< keywordNameLen
; i
++) {
598 buf
[i
] = uprv_tolower(keywordName
[i
]);
602 return keywordNameLen
;
606 char keyword
[ULOC_KEYWORD_BUFFER_LEN
];
608 const char *valueStart
;
612 static int32_t U_CALLCONV
613 compareKeywordStructs(const void * /*context*/, const void *left
, const void *right
) {
614 const char* leftString
= ((const KeywordStruct
*)left
)->keyword
;
615 const char* rightString
= ((const KeywordStruct
*)right
)->keyword
;
616 return uprv_strcmp(leftString
, rightString
);
620 * Both addKeyword and addValue must already be in canonical form.
621 * Either both addKeyword and addValue are NULL, or neither is NULL.
622 * If they are not NULL they must be zero terminated.
623 * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword.
626 _getKeywords(const char *localeID
,
628 char *keywords
, int32_t keywordCapacity
,
629 char *values
, int32_t valuesCapacity
, int32_t *valLen
,
631 const char* addKeyword
,
632 const char* addValue
,
635 KeywordStruct keywordList
[ULOC_MAX_NO_KEYWORDS
];
637 int32_t maxKeywords
= ULOC_MAX_NO_KEYWORDS
;
638 int32_t numKeywords
= 0;
639 const char* pos
= localeID
;
640 const char* equalSign
= NULL
;
641 const char* semicolon
= NULL
;
643 int32_t keywordsLen
= 0;
644 int32_t valuesLen
= 0;
646 if(prev
== '@') { /* start of keyword definition */
647 /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
649 UBool duplicate
= FALSE
;
650 /* skip leading spaces */
654 if (!*pos
) { /* handle trailing "; " */
657 if(numKeywords
== maxKeywords
) {
658 *status
= U_INTERNAL_PROGRAM_ERROR
;
661 equalSign
= uprv_strchr(pos
, '=');
662 semicolon
= uprv_strchr(pos
, ';');
663 /* lack of '=' [foo@currency] is illegal */
664 /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
665 if(!equalSign
|| (semicolon
&& semicolon
<equalSign
)) {
666 *status
= U_INVALID_FORMAT_ERROR
;
669 /* need to normalize both keyword and keyword name */
670 if(equalSign
- pos
>= ULOC_KEYWORD_BUFFER_LEN
) {
671 /* keyword name too long for internal buffer */
672 *status
= U_INTERNAL_PROGRAM_ERROR
;
675 for(i
= 0, n
= 0; i
< equalSign
- pos
; ++i
) {
677 keywordList
[numKeywords
].keyword
[n
++] = uprv_tolower(pos
[i
]);
681 /* zero-length keyword is an error. */
683 *status
= U_INVALID_FORMAT_ERROR
;
687 keywordList
[numKeywords
].keyword
[n
] = 0;
688 keywordList
[numKeywords
].keywordLen
= n
;
689 /* now grab the value part. First we skip the '=' */
691 /* then we leading spaces */
692 while(*equalSign
== ' ') {
696 /* Premature end or zero-length value */
697 if (!equalSign
|| equalSign
== semicolon
) {
698 *status
= U_INVALID_FORMAT_ERROR
;
702 keywordList
[numKeywords
].valueStart
= equalSign
;
707 while(*(pos
- i
- 1) == ' ') {
710 keywordList
[numKeywords
].valueLen
= (int32_t)(pos
- equalSign
- i
);
713 i
= (int32_t)uprv_strlen(equalSign
);
714 while(i
&& equalSign
[i
-1] == ' ') {
717 keywordList
[numKeywords
].valueLen
= i
;
719 /* If this is a duplicate keyword, then ignore it */
720 for (j
=0; j
<numKeywords
; ++j
) {
721 if (uprv_strcmp(keywordList
[j
].keyword
, keywordList
[numKeywords
].keyword
) == 0) {
731 /* Handle addKeyword/addValue. */
732 if (addKeyword
!= NULL
) {
733 UBool duplicate
= FALSE
;
734 U_ASSERT(addValue
!= NULL
);
735 /* Search for duplicate; if found, do nothing. Explicit keyword
736 overrides addKeyword. */
737 for (j
=0; j
<numKeywords
; ++j
) {
738 if (uprv_strcmp(keywordList
[j
].keyword
, addKeyword
) == 0) {
744 if (numKeywords
== maxKeywords
) {
745 *status
= U_INTERNAL_PROGRAM_ERROR
;
748 uprv_strcpy(keywordList
[numKeywords
].keyword
, addKeyword
);
749 keywordList
[numKeywords
].keywordLen
= (int32_t)uprv_strlen(addKeyword
);
750 keywordList
[numKeywords
].valueStart
= addValue
;
751 keywordList
[numKeywords
].valueLen
= (int32_t)uprv_strlen(addValue
);
755 U_ASSERT(addValue
== NULL
);
758 /* now we have a list of keywords */
759 /* we need to sort it */
760 uprv_sortArray(keywordList
, numKeywords
, sizeof(KeywordStruct
), compareKeywordStructs
, NULL
, FALSE
, status
);
762 /* Now construct the keyword part */
763 for(i
= 0; i
< numKeywords
; i
++) {
764 if(keywordsLen
+ keywordList
[i
].keywordLen
+ 1< keywordCapacity
) {
765 uprv_strcpy(keywords
+keywordsLen
, keywordList
[i
].keyword
);
767 keywords
[keywordsLen
+ keywordList
[i
].keywordLen
] = '=';
769 keywords
[keywordsLen
+ keywordList
[i
].keywordLen
] = 0;
772 keywordsLen
+= keywordList
[i
].keywordLen
+ 1;
774 if(keywordsLen
+ keywordList
[i
].valueLen
< keywordCapacity
) {
775 uprv_strncpy(keywords
+keywordsLen
, keywordList
[i
].valueStart
, keywordList
[i
].valueLen
);
777 keywordsLen
+= keywordList
[i
].valueLen
;
779 if(i
< numKeywords
- 1) {
780 if(keywordsLen
< keywordCapacity
) {
781 keywords
[keywordsLen
] = ';';
787 if(valuesLen
+ keywordList
[i
].valueLen
+ 1< valuesCapacity
) {
788 uprv_strcpy(values
+valuesLen
, keywordList
[i
].valueStart
);
789 values
[valuesLen
+ keywordList
[i
].valueLen
] = 0;
791 valuesLen
+= keywordList
[i
].valueLen
+ 1;
795 values
[valuesLen
] = 0;
800 return u_terminateChars(keywords
, keywordCapacity
, keywordsLen
, status
);
807 locale_getKeywords(const char *localeID
,
809 char *keywords
, int32_t keywordCapacity
,
810 char *values
, int32_t valuesCapacity
, int32_t *valLen
,
812 UErrorCode
*status
) {
813 return _getKeywords(localeID
, prev
, keywords
, keywordCapacity
,
814 values
, valuesCapacity
, valLen
, valuesToo
,
818 U_CAPI
int32_t U_EXPORT2
819 uloc_getKeywordValue(const char* localeID
,
820 const char* keywordName
,
821 char* buffer
, int32_t bufferCapacity
,
824 const char* startSearchHere
= NULL
;
825 const char* nextSeparator
= NULL
;
826 char keywordNameBuffer
[ULOC_KEYWORD_BUFFER_LEN
];
827 char localeKeywordNameBuffer
[ULOC_KEYWORD_BUFFER_LEN
];
831 if(status
&& U_SUCCESS(*status
) && localeID
) {
832 char tempBuffer
[ULOC_FULLNAME_CAPACITY
];
833 const char* tmpLocaleID
;
835 if (_hasBCP47Extension(localeID
)) {
836 _ConvertBCP47(tmpLocaleID
, localeID
, tempBuffer
, sizeof(tempBuffer
), status
);
838 tmpLocaleID
=localeID
;
841 startSearchHere
= uprv_strchr(tmpLocaleID
, '@'); /* TODO: REVISIT: shouldn't this be locale_getKeywordsStart ? */
842 if(startSearchHere
== NULL
) {
843 /* no keywords, return at once */
847 locale_canonKeywordName(keywordNameBuffer
, keywordName
, status
);
848 if(U_FAILURE(*status
)) {
852 /* find the first keyword */
853 while(startSearchHere
) {
855 /* skip leading spaces (allowed?) */
856 while(*startSearchHere
== ' ') {
859 nextSeparator
= uprv_strchr(startSearchHere
, '=');
860 /* need to normalize both keyword and keyword name */
864 if(nextSeparator
- startSearchHere
>= ULOC_KEYWORD_BUFFER_LEN
) {
865 /* keyword name too long for internal buffer */
866 *status
= U_INTERNAL_PROGRAM_ERROR
;
869 for(i
= 0; i
< nextSeparator
- startSearchHere
; i
++) {
870 localeKeywordNameBuffer
[i
] = uprv_tolower(startSearchHere
[i
]);
872 /* trim trailing spaces */
873 while(startSearchHere
[i
-1] == ' ') {
877 localeKeywordNameBuffer
[i
] = 0;
879 startSearchHere
= uprv_strchr(nextSeparator
, ';');
881 if(uprv_strcmp(keywordNameBuffer
, localeKeywordNameBuffer
) == 0) {
883 while(*nextSeparator
== ' ') {
886 /* we actually found the keyword. Copy the value */
887 if(startSearchHere
&& startSearchHere
- nextSeparator
< bufferCapacity
) {
888 while(*(startSearchHere
-1) == ' ') {
891 uprv_strncpy(buffer
, nextSeparator
, startSearchHere
- nextSeparator
);
892 result
= u_terminateChars(buffer
, bufferCapacity
, (int32_t)(startSearchHere
- nextSeparator
), status
);
893 } else if(!startSearchHere
&& (int32_t)uprv_strlen(nextSeparator
) < bufferCapacity
) { /* last item in string */
894 i
= (int32_t)uprv_strlen(nextSeparator
);
895 while(nextSeparator
[i
- 1] == ' ') {
898 uprv_strncpy(buffer
, nextSeparator
, i
);
899 result
= u_terminateChars(buffer
, bufferCapacity
, i
, status
);
901 /* give a bigger buffer, please */
902 *status
= U_BUFFER_OVERFLOW_ERROR
;
903 if(startSearchHere
) {
904 result
= (int32_t)(startSearchHere
- nextSeparator
);
906 result
= (int32_t)uprv_strlen(nextSeparator
);
916 U_CAPI
int32_t U_EXPORT2
917 uloc_setKeywordValue(const char* keywordName
,
918 const char* keywordValue
,
919 char* buffer
, int32_t bufferCapacity
,
922 /* TODO: sorting. removal. */
923 int32_t keywordNameLen
;
924 int32_t keywordValueLen
;
927 int32_t foundValueLen
;
928 int32_t keywordAtEnd
= 0; /* is the keyword at the end of the string? */
929 char keywordNameBuffer
[ULOC_KEYWORD_BUFFER_LEN
];
930 char localeKeywordNameBuffer
[ULOC_KEYWORD_BUFFER_LEN
];
933 char* nextSeparator
= NULL
;
934 char* nextEqualsign
= NULL
;
935 char* startSearchHere
= NULL
;
936 char* keywordStart
= NULL
;
937 char *insertHere
= NULL
;
938 if(U_FAILURE(*status
)) {
941 if(bufferCapacity
>1) {
942 bufLen
= (int32_t)uprv_strlen(buffer
);
944 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
947 if(bufferCapacity
<bufLen
) {
948 /* The capacity is less than the length?! Is this NULL terminated? */
949 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
952 if(keywordValue
&& !*keywordValue
) {
956 keywordValueLen
= (int32_t)uprv_strlen(keywordValue
);
960 keywordNameLen
= locale_canonKeywordName(keywordNameBuffer
, keywordName
, status
);
961 if(U_FAILURE(*status
)) {
964 startSearchHere
= (char*)locale_getKeywordsStart(buffer
);
965 if(startSearchHere
== NULL
|| (startSearchHere
[1]==0)) {
966 if(!keywordValue
) { /* no keywords = nothing to remove */
970 needLen
= bufLen
+1+keywordNameLen
+1+keywordValueLen
;
971 if(startSearchHere
) { /* had a single @ */
972 needLen
--; /* already had the @ */
973 /* startSearchHere points at the @ */
975 startSearchHere
=buffer
+bufLen
;
977 if(needLen
>= bufferCapacity
) {
978 *status
= U_BUFFER_OVERFLOW_ERROR
;
979 return needLen
; /* no change */
981 *startSearchHere
= '@';
983 uprv_strcpy(startSearchHere
, keywordNameBuffer
);
984 startSearchHere
+= keywordNameLen
;
985 *startSearchHere
= '=';
987 uprv_strcpy(startSearchHere
, keywordValue
);
988 startSearchHere
+=keywordValueLen
;
990 } /* end shortcut - no @ */
992 keywordStart
= startSearchHere
;
993 /* search for keyword */
994 while(keywordStart
) {
996 /* skip leading spaces (allowed?) */
997 while(*keywordStart
== ' ') {
1000 nextEqualsign
= uprv_strchr(keywordStart
, '=');
1001 /* need to normalize both keyword and keyword name */
1002 if(!nextEqualsign
) {
1005 if(nextEqualsign
- keywordStart
>= ULOC_KEYWORD_BUFFER_LEN
) {
1006 /* keyword name too long for internal buffer */
1007 *status
= U_INTERNAL_PROGRAM_ERROR
;
1010 for(i
= 0; i
< nextEqualsign
- keywordStart
; i
++) {
1011 localeKeywordNameBuffer
[i
] = uprv_tolower(keywordStart
[i
]);
1013 /* trim trailing spaces */
1014 while(keywordStart
[i
-1] == ' ') {
1017 U_ASSERT(i
>=0 && i
<ULOC_KEYWORD_BUFFER_LEN
);
1018 localeKeywordNameBuffer
[i
] = 0;
1020 nextSeparator
= uprv_strchr(nextEqualsign
, ';');
1021 rc
= uprv_strcmp(keywordNameBuffer
, localeKeywordNameBuffer
);
1024 while(*nextEqualsign
== ' ') {
1027 /* we actually found the keyword. Change the value */
1028 if (nextSeparator
) {
1030 foundValueLen
= (int32_t)(nextSeparator
- nextEqualsign
);
1033 foundValueLen
= (int32_t)uprv_strlen(nextEqualsign
);
1035 if(keywordValue
) { /* adding a value - not removing */
1036 if(foundValueLen
== keywordValueLen
) {
1037 uprv_strncpy(nextEqualsign
, keywordValue
, keywordValueLen
);
1038 return bufLen
; /* no change in size */
1039 } else if(foundValueLen
> keywordValueLen
) {
1040 int32_t delta
= foundValueLen
- keywordValueLen
;
1041 if(nextSeparator
) { /* RH side */
1042 uprv_memmove(nextSeparator
- delta
, nextSeparator
, bufLen
-(nextSeparator
-buffer
));
1044 uprv_strncpy(nextEqualsign
, keywordValue
, keywordValueLen
);
1048 } else { /* FVL < KVL */
1049 int32_t delta
= keywordValueLen
- foundValueLen
;
1050 if((bufLen
+delta
) >= bufferCapacity
) {
1051 *status
= U_BUFFER_OVERFLOW_ERROR
;
1052 return bufLen
+delta
;
1054 if(nextSeparator
) { /* RH side */
1055 uprv_memmove(nextSeparator
+delta
,nextSeparator
, bufLen
-(nextSeparator
-buffer
));
1057 uprv_strncpy(nextEqualsign
, keywordValue
, keywordValueLen
);
1062 } else { /* removing a keyword */
1064 /* zero out the ';' or '@' just before startSearchhere */
1065 keywordStart
[-1] = 0;
1066 return (int32_t)((keywordStart
-buffer
)-1); /* (string length without keyword) minus separator */
1068 uprv_memmove(keywordStart
, nextSeparator
+1, bufLen
-((nextSeparator
+1)-buffer
));
1069 keywordStart
[bufLen
-((nextSeparator
+1)-buffer
)]=0;
1070 return (int32_t)(bufLen
-((nextSeparator
+1)-keywordStart
));
1073 } else if(rc
<0){ /* end match keyword */
1074 /* could insert at this location. */
1075 insertHere
= keywordStart
;
1077 keywordStart
= nextSeparator
;
1078 } /* end loop searching */
1081 return bufLen
; /* removal of non-extant keyword - no change */
1084 /* we know there is at least one keyword. */
1085 needLen
= bufLen
+1+keywordNameLen
+1+keywordValueLen
;
1086 if(needLen
>= bufferCapacity
) {
1087 *status
= U_BUFFER_OVERFLOW_ERROR
;
1088 return needLen
; /* no change */
1092 uprv_memmove(insertHere
+(1+keywordNameLen
+1+keywordValueLen
), insertHere
, bufLen
-(insertHere
-buffer
));
1093 keywordStart
= insertHere
;
1095 keywordStart
= buffer
+bufLen
;
1096 *keywordStart
= ';';
1099 uprv_strncpy(keywordStart
, keywordNameBuffer
, keywordNameLen
);
1100 keywordStart
+= keywordNameLen
;
1101 *keywordStart
= '=';
1103 uprv_strncpy(keywordStart
, keywordValue
, keywordValueLen
); /* terminates. */
1104 keywordStart
+=keywordValueLen
;
1106 *keywordStart
= ';';
1113 /* ### ID parsing implementation **************************************************/
1115 #define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
1117 /*returns TRUE if one of the special prefixes is here (s=string)
1119 #define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1121 /* Dot terminates it because of POSIX form where dot precedes the codepage
1122 * except for variant
1124 #define _isTerminator(a) ((a==0)||(a=='.')||(a=='@'))
1126 static char* _strnchr(const char* str
, int32_t len
, char c
) {
1127 U_ASSERT(str
!= 0 && len
>= 0);
1128 while (len
-- != 0) {
1132 } else if (d
== 0) {
1141 * Lookup 'key' in the array 'list'. The array 'list' should contain
1142 * a NULL entry, followed by more entries, and a second NULL entry.
1144 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1147 static int16_t _findIndex(const char* const* list
, const char* key
)
1149 const char* const* anchor
= list
;
1152 /* Make two passes through two NULL-terminated arrays at 'list' */
1153 while (pass
++ < 2) {
1155 if (uprv_strcmp(key
, *list
) == 0) {
1156 return (int16_t)(list
- anchor
);
1160 ++list
; /* skip final NULL *CWB*/
1165 /* count the length of src while copying it to dest; return strlen(src) */
1166 static inline int32_t
1167 _copyCount(char *dest
, int32_t destCapacity
, const char *src
) {
1174 return (int32_t)(src
-anchor
);
1176 if(destCapacity
<=0) {
1177 return (int32_t)((src
-anchor
)+uprv_strlen(src
));
1186 uloc_getCurrentCountryID(const char* oldID
){
1187 int32_t offset
= _findIndex(DEPRECATED_COUNTRIES
, oldID
);
1189 return REPLACEMENT_COUNTRIES
[offset
];
1194 uloc_getCurrentLanguageID(const char* oldID
){
1195 int32_t offset
= _findIndex(DEPRECATED_LANGUAGES
, oldID
);
1197 return REPLACEMENT_LANGUAGES
[offset
];
1202 * the internal functions _getLanguage(), _getCountry(), _getVariant()
1203 * avoid duplicating code to handle the earlier locale ID pieces
1204 * in the functions for the later ones by
1205 * setting the *pEnd pointer to where they stopped parsing
1207 * TODO try to use this in Locale
1210 ulocimp_getLanguage(const char *localeID
,
1211 char *language
, int32_t languageCapacity
,
1212 const char **pEnd
) {
1215 char lang
[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
1217 /* if it starts with i- or x- then copy that prefix */
1218 if(_isIDPrefix(localeID
)) {
1219 if(i
<languageCapacity
) {
1220 language
[i
]=(char)uprv_tolower(*localeID
);
1222 if(i
<languageCapacity
) {
1229 /* copy the language as far as possible and count its length */
1230 while(!_isTerminator(*localeID
) && !_isIDSeparator(*localeID
)) {
1231 if(i
<languageCapacity
) {
1232 language
[i
]=(char)uprv_tolower(*localeID
);
1236 lang
[i
]=(char)uprv_tolower(*localeID
);
1243 /* convert 3 character code to 2 character code if possible *CWB*/
1244 offset
=_findIndex(LANGUAGES_3
, lang
);
1246 i
=_copyCount(language
, languageCapacity
, LANGUAGES
[offset
]);
1257 ulocimp_getScript(const char *localeID
,
1258 char *script
, int32_t scriptCapacity
,
1267 /* copy the second item as far as possible and count its length */
1268 while(!_isTerminator(localeID
[idLen
]) && !_isIDSeparator(localeID
[idLen
])
1269 && uprv_isASCIILetter(localeID
[idLen
])) {
1273 /* If it's exactly 4 characters long, then it's a script and not a country. */
1277 *pEnd
= localeID
+idLen
;
1279 if(idLen
> scriptCapacity
) {
1280 idLen
= scriptCapacity
;
1283 script
[0]=(char)uprv_toupper(*(localeID
++));
1285 for (i
= 1; i
< idLen
; i
++) {
1286 script
[i
]=(char)uprv_tolower(*(localeID
++));
1296 ulocimp_getCountry(const char *localeID
,
1297 char *country
, int32_t countryCapacity
,
1301 char cnty
[ULOC_COUNTRY_CAPACITY
]={ 0, 0, 0, 0 };
1304 /* copy the country as far as possible and count its length */
1305 while(!_isTerminator(localeID
[idLen
]) && !_isIDSeparator(localeID
[idLen
])) {
1306 if(idLen
<(ULOC_COUNTRY_CAPACITY
-1)) { /*CWB*/
1307 cnty
[idLen
]=(char)uprv_toupper(localeID
[idLen
]);
1312 /* the country should be either length 2 or 3 */
1313 if (idLen
== 2 || idLen
== 3) {
1314 UBool gotCountry
= FALSE
;
1315 /* convert 3 character code to 2 character code if possible *CWB*/
1317 offset
=_findIndex(COUNTRIES_3
, cnty
);
1319 idLen
=_copyCount(country
, countryCapacity
, COUNTRIES
[offset
]);
1325 for (i
= 0; i
< idLen
; i
++) {
1326 if (i
< countryCapacity
) {
1327 country
[i
]=(char)uprv_toupper(localeID
[i
]);
1344 * @param needSeparator if true, then add leading '_' if any variants
1345 * are added to 'variant'
1348 _getVariantEx(const char *localeID
,
1350 char *variant
, int32_t variantCapacity
,
1351 UBool needSeparator
) {
1354 /* get one or more variant tags and separate them with '_' */
1355 if(_isIDSeparator(prev
)) {
1356 /* get a variant string after a '-' or '_' */
1357 while(!_isTerminator(*localeID
)) {
1358 if (needSeparator
) {
1359 if (i
<variantCapacity
) {
1363 needSeparator
= FALSE
;
1365 if(i
<variantCapacity
) {
1366 variant
[i
]=(char)uprv_toupper(*localeID
);
1367 if(variant
[i
]=='-') {
1376 /* if there is no variant tag after a '-' or '_' then look for '@' */
1380 } else if((localeID
=locale_getKeywordsStart(localeID
))!=NULL
) {
1381 ++localeID
; /* point after the '@' */
1385 while(!_isTerminator(*localeID
)) {
1386 if (needSeparator
) {
1387 if (i
<variantCapacity
) {
1391 needSeparator
= FALSE
;
1393 if(i
<variantCapacity
) {
1394 variant
[i
]=(char)uprv_toupper(*localeID
);
1395 if(variant
[i
]=='-' || variant
[i
]==',') {
1408 _getVariant(const char *localeID
,
1410 char *variant
, int32_t variantCapacity
) {
1411 return _getVariantEx(localeID
, prev
, variant
, variantCapacity
, FALSE
);
1415 * Delete ALL instances of a variant from the given list of one or
1416 * more variants. Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".
1417 * @param variants the source string of one or more variants,
1418 * separated by '_'. This will be MODIFIED IN PLACE. Not zero
1419 * terminated; if it is, trailing zero will NOT be maintained.
1420 * @param variantsLen length of variants
1421 * @param toDelete variant to delete, without separators, e.g. "EURO"
1422 * or "PREEURO"; not zero terminated
1423 * @param toDeleteLen length of toDelete
1424 * @return number of characters deleted from variants
1427 _deleteVariant(char* variants
, int32_t variantsLen
,
1428 const char* toDelete
, int32_t toDeleteLen
)
1430 int32_t delta
= 0; /* number of chars deleted */
1433 if (variantsLen
< toDeleteLen
) {
1436 if (uprv_strncmp(variants
, toDelete
, toDeleteLen
) == 0 &&
1437 (variantsLen
== toDeleteLen
||
1438 (flag
=(variants
[toDeleteLen
] == '_'))))
1440 int32_t d
= toDeleteLen
+ (flag
?1:0);
1443 if (variantsLen
> 0) {
1444 uprv_memmove(variants
, variants
+d
, variantsLen
);
1447 char* p
= _strnchr(variants
, variantsLen
, '_');
1452 variantsLen
-= (int32_t)(p
- variants
);
1458 /* Keyword enumeration */
1460 typedef struct UKeywordsContext
{
1465 static void U_CALLCONV
1466 uloc_kw_closeKeywords(UEnumeration
*enumerator
) {
1467 uprv_free(((UKeywordsContext
*)enumerator
->context
)->keywords
);
1468 uprv_free(enumerator
->context
);
1469 uprv_free(enumerator
);
1472 static int32_t U_CALLCONV
1473 uloc_kw_countKeywords(UEnumeration
*en
, UErrorCode
* /*status*/) {
1474 char *kw
= ((UKeywordsContext
*)en
->context
)->keywords
;
1478 kw
+= uprv_strlen(kw
)+1;
1483 static const char* U_CALLCONV
1484 uloc_kw_nextKeyword(UEnumeration
* en
,
1485 int32_t* resultLength
,
1486 UErrorCode
* /*status*/) {
1487 const char* result
= ((UKeywordsContext
*)en
->context
)->current
;
1490 len
= (int32_t)uprv_strlen(((UKeywordsContext
*)en
->context
)->current
);
1491 ((UKeywordsContext
*)en
->context
)->current
+= len
+1;
1496 *resultLength
= len
;
1501 static void U_CALLCONV
1502 uloc_kw_resetKeywords(UEnumeration
* en
,
1503 UErrorCode
* /*status*/) {
1504 ((UKeywordsContext
*)en
->context
)->current
= ((UKeywordsContext
*)en
->context
)->keywords
;
1507 static const UEnumeration gKeywordsEnum
= {
1510 uloc_kw_closeKeywords
,
1511 uloc_kw_countKeywords
,
1513 uloc_kw_nextKeyword
,
1514 uloc_kw_resetKeywords
1517 U_CAPI UEnumeration
* U_EXPORT2
1518 uloc_openKeywordList(const char *keywordList
, int32_t keywordListSize
, UErrorCode
* status
)
1520 UKeywordsContext
*myContext
= NULL
;
1521 UEnumeration
*result
= NULL
;
1523 if(U_FAILURE(*status
)) {
1526 result
= (UEnumeration
*)uprv_malloc(sizeof(UEnumeration
));
1527 /* Null pointer test */
1528 if (result
== NULL
) {
1529 *status
= U_MEMORY_ALLOCATION_ERROR
;
1532 uprv_memcpy(result
, &gKeywordsEnum
, sizeof(UEnumeration
));
1533 myContext
= static_cast<UKeywordsContext
*>(uprv_malloc(sizeof(UKeywordsContext
)));
1534 if (myContext
== NULL
) {
1535 *status
= U_MEMORY_ALLOCATION_ERROR
;
1539 myContext
->keywords
= (char *)uprv_malloc(keywordListSize
+1);
1540 uprv_memcpy(myContext
->keywords
, keywordList
, keywordListSize
);
1541 myContext
->keywords
[keywordListSize
] = 0;
1542 myContext
->current
= myContext
->keywords
;
1543 result
->context
= myContext
;
1547 U_CAPI UEnumeration
* U_EXPORT2
1548 uloc_openKeywords(const char* localeID
,
1553 int32_t keywordsCapacity
= 256;
1554 char tempBuffer
[ULOC_FULLNAME_CAPACITY
];
1555 const char* tmpLocaleID
;
1557 if(status
==NULL
|| U_FAILURE(*status
)) {
1561 if (_hasBCP47Extension(localeID
)) {
1562 _ConvertBCP47(tmpLocaleID
, localeID
, tempBuffer
, sizeof(tempBuffer
), status
);
1564 if (localeID
==NULL
) {
1565 localeID
=uloc_getDefault();
1567 tmpLocaleID
=localeID
;
1570 /* Skip the language */
1571 ulocimp_getLanguage(tmpLocaleID
, NULL
, 0, &tmpLocaleID
);
1572 if(_isIDSeparator(*tmpLocaleID
)) {
1573 const char *scriptID
;
1574 /* Skip the script if available */
1575 ulocimp_getScript(tmpLocaleID
+1, NULL
, 0, &scriptID
);
1576 if(scriptID
!= tmpLocaleID
+1) {
1577 /* Found optional script */
1578 tmpLocaleID
= scriptID
;
1580 /* Skip the Country */
1581 if (_isIDSeparator(*tmpLocaleID
)) {
1582 ulocimp_getCountry(tmpLocaleID
+1, NULL
, 0, &tmpLocaleID
);
1583 if(_isIDSeparator(*tmpLocaleID
)) {
1584 _getVariant(tmpLocaleID
+1, *tmpLocaleID
, NULL
, 0);
1589 /* keywords are located after '@' */
1590 if((tmpLocaleID
= locale_getKeywordsStart(tmpLocaleID
)) != NULL
) {
1591 i
=locale_getKeywords(tmpLocaleID
+1, '@', keywords
, keywordsCapacity
, NULL
, 0, NULL
, FALSE
, status
);
1595 return uloc_openKeywordList(keywords
, i
, status
);
1602 /* bit-flags for 'options' parameter of _canonicalize */
1603 #define _ULOC_STRIP_KEYWORDS 0x2
1604 #define _ULOC_CANONICALIZE 0x1
1606 #define OPTION_SET(options, mask) ((options & mask) != 0)
1608 static const char i_default
[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
1609 #define I_DEFAULT_LENGTH (sizeof i_default / sizeof i_default[0])
1612 * Canonicalize the given localeID, to level 1 or to level 2,
1613 * depending on the options. To specify level 1, pass in options=0.
1614 * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1616 * This is the code underlying uloc_getName and uloc_canonicalize.
1619 _canonicalize(const char* localeID
,
1621 int32_t resultCapacity
,
1624 int32_t j
, len
, fieldCount
=0, scriptSize
=0, variantSize
=0, nameCapacity
;
1625 char localeBuffer
[ULOC_FULLNAME_CAPACITY
];
1626 char tempBuffer
[ULOC_FULLNAME_CAPACITY
];
1627 const char* origLocaleID
;
1628 const char* tmpLocaleID
;
1629 const char* keywordAssign
= NULL
;
1630 const char* separatorIndicator
= NULL
;
1631 const char* addKeyword
= NULL
;
1632 const char* addValue
= NULL
;
1634 char* variant
= NULL
; /* pointer into name, or NULL */
1636 if (U_FAILURE(*err
)) {
1640 if (_hasBCP47Extension(localeID
)) {
1641 _ConvertBCP47(tmpLocaleID
, localeID
, tempBuffer
, sizeof(tempBuffer
), err
);
1643 if (localeID
==NULL
) {
1644 localeID
=uloc_getDefault();
1646 tmpLocaleID
=localeID
;
1649 origLocaleID
=tmpLocaleID
;
1651 /* if we are doing a full canonicalization, then put results in
1652 localeBuffer, if necessary; otherwise send them to result. */
1653 if (/*OPTION_SET(options, _ULOC_CANONICALIZE) &&*/
1654 (result
== NULL
|| resultCapacity
< (int32_t)sizeof(localeBuffer
))) {
1655 name
= localeBuffer
;
1656 nameCapacity
= (int32_t)sizeof(localeBuffer
);
1659 nameCapacity
= resultCapacity
;
1662 /* get all pieces, one after another, and separate with '_' */
1663 len
=ulocimp_getLanguage(tmpLocaleID
, name
, nameCapacity
, &tmpLocaleID
);
1665 if(len
== I_DEFAULT_LENGTH
&& uprv_strncmp(origLocaleID
, i_default
, len
) == 0) {
1666 const char *d
= uloc_getDefault();
1668 len
= (int32_t)uprv_strlen(d
);
1671 uprv_strncpy(name
, d
, len
);
1673 } else if(_isIDSeparator(*tmpLocaleID
)) {
1674 const char *scriptID
;
1677 if(len
<nameCapacity
) {
1682 scriptSize
=ulocimp_getScript(tmpLocaleID
+1,
1683 (len
<nameCapacity
? name
+len
: NULL
), nameCapacity
-len
, &scriptID
);
1684 if(scriptSize
> 0) {
1685 /* Found optional script */
1686 tmpLocaleID
= scriptID
;
1689 if (_isIDSeparator(*tmpLocaleID
)) {
1690 /* If there is something else, then we add the _ */
1691 if(len
<nameCapacity
) {
1698 if (_isIDSeparator(*tmpLocaleID
)) {
1699 const char *cntryID
;
1700 int32_t cntrySize
= ulocimp_getCountry(tmpLocaleID
+1,
1701 (len
<nameCapacity
? name
+len
: NULL
), nameCapacity
-len
, &cntryID
);
1702 if (cntrySize
> 0) {
1703 /* Found optional country */
1704 tmpLocaleID
= cntryID
;
1707 if(_isIDSeparator(*tmpLocaleID
)) {
1708 /* If there is something else, then we add the _ if we found country before. */
1709 if (cntrySize
>= 0 && ! _isIDSeparator(*(tmpLocaleID
+1)) ) {
1711 if(len
<nameCapacity
) {
1717 variantSize
= _getVariant(tmpLocaleID
+1, *tmpLocaleID
,
1718 (len
<nameCapacity
? name
+len
: NULL
), nameCapacity
-len
);
1719 if (variantSize
> 0) {
1720 variant
= len
<nameCapacity
? name
+len
: NULL
;
1722 tmpLocaleID
+= variantSize
+ 1; /* skip '_' and variant */
1728 /* Copy POSIX-style charset specifier, if any [mr.utf8] */
1729 if (!OPTION_SET(options
, _ULOC_CANONICALIZE
) && *tmpLocaleID
== '.') {
1732 char c
= *tmpLocaleID
;
1739 if (len
<nameCapacity
) {
1749 /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
1750 After this, tmpLocaleID either points to '@' or is NULL */
1751 if ((tmpLocaleID
=locale_getKeywordsStart(tmpLocaleID
))!=NULL
) {
1752 keywordAssign
= uprv_strchr(tmpLocaleID
, '=');
1753 separatorIndicator
= uprv_strchr(tmpLocaleID
, ';');
1756 /* Copy POSIX-style variant, if any [mr@FOO] */
1757 if (!OPTION_SET(options
, _ULOC_CANONICALIZE
) &&
1758 tmpLocaleID
!= NULL
&& keywordAssign
== NULL
) {
1760 char c
= *tmpLocaleID
;
1764 if (len
<nameCapacity
) {
1772 if (OPTION_SET(options
, _ULOC_CANONICALIZE
)) {
1773 /* Handle @FOO variant if @ is present and not followed by = */
1774 if (tmpLocaleID
!=NULL
&& keywordAssign
==NULL
) {
1775 int32_t posixVariantSize
;
1776 /* Add missing '_' if needed */
1777 if (fieldCount
< 2 || (fieldCount
< 3 && scriptSize
> 0)) {
1779 if(len
<nameCapacity
) {
1784 } while(fieldCount
<2);
1786 posixVariantSize
= _getVariantEx(tmpLocaleID
+1, '@', name
+len
, nameCapacity
-len
,
1787 (UBool
)(variantSize
> 0));
1788 if (posixVariantSize
> 0) {
1789 if (variant
== NULL
) {
1792 len
+= posixVariantSize
;
1793 variantSize
+= posixVariantSize
;
1797 /* Handle generic variants first */
1799 for (j
=0; j
<(int32_t)(sizeof(VARIANT_MAP
)/sizeof(VARIANT_MAP
[0])); j
++) {
1800 const char* variantToCompare
= VARIANT_MAP
[j
].variant
;
1801 int32_t n
= (int32_t)uprv_strlen(variantToCompare
);
1802 int32_t variantLen
= _deleteVariant(variant
, uprv_min(variantSize
, (nameCapacity
-len
)), variantToCompare
, n
);
1804 if (variantLen
> 0) {
1805 if (len
> 0 && name
[len
-1] == '_') { /* delete trailing '_' */
1808 addKeyword
= VARIANT_MAP
[j
].keyword
;
1809 addValue
= VARIANT_MAP
[j
].value
;
1813 if (len
> 0 && len
<= nameCapacity
&& name
[len
-1] == '_') { /* delete trailing '_' */
1818 /* Look up the ID in the canonicalization map */
1819 for (j
=0; j
<(int32_t)(sizeof(CANONICALIZE_MAP
)/sizeof(CANONICALIZE_MAP
[0])); j
++) {
1820 const char* id
= CANONICALIZE_MAP
[j
].id
;
1821 int32_t n
= (int32_t)uprv_strlen(id
);
1822 if (len
== n
&& uprv_strncmp(name
, id
, n
) == 0) {
1823 if (n
== 0 && tmpLocaleID
!= NULL
) {
1824 break; /* Don't remap "" if keywords present */
1826 len
= _copyCount(name
, nameCapacity
, CANONICALIZE_MAP
[j
].canonicalID
);
1827 if (CANONICALIZE_MAP
[j
].keyword
) {
1828 addKeyword
= CANONICALIZE_MAP
[j
].keyword
;
1829 addValue
= CANONICALIZE_MAP
[j
].value
;
1836 if (!OPTION_SET(options
, _ULOC_STRIP_KEYWORDS
)) {
1837 if (tmpLocaleID
!=NULL
&& keywordAssign
!=NULL
&&
1838 (!separatorIndicator
|| separatorIndicator
> keywordAssign
)) {
1839 if(len
<nameCapacity
) {
1844 len
+= _getKeywords(tmpLocaleID
+1, '@', (len
<nameCapacity
? name
+len
: NULL
), nameCapacity
-len
,
1845 NULL
, 0, NULL
, TRUE
, addKeyword
, addValue
, err
);
1846 } else if (addKeyword
!= NULL
) {
1847 U_ASSERT(addValue
!= NULL
&& len
< nameCapacity
);
1848 /* inelegant but works -- later make _getKeywords do this? */
1849 len
+= _copyCount(name
+len
, nameCapacity
-len
, "@");
1850 len
+= _copyCount(name
+len
, nameCapacity
-len
, addKeyword
);
1851 len
+= _copyCount(name
+len
, nameCapacity
-len
, "=");
1852 len
+= _copyCount(name
+len
, nameCapacity
-len
, addValue
);
1856 if (U_SUCCESS(*err
) && result
!= NULL
&& name
== localeBuffer
) {
1857 uprv_strncpy(result
, localeBuffer
, (len
> resultCapacity
) ? resultCapacity
: len
);
1860 return u_terminateChars(result
, resultCapacity
, len
, err
);
1863 /* ### ID parsing API **************************************************/
1865 U_CAPI
int32_t U_EXPORT2
1866 uloc_getParent(const char* localeID
,
1868 int32_t parentCapacity
,
1871 const char *lastUnderscore
;
1874 if (U_FAILURE(*err
))
1877 if (localeID
== NULL
)
1878 localeID
= uloc_getDefault();
1880 lastUnderscore
=uprv_strrchr(localeID
, '_');
1881 if(lastUnderscore
!=NULL
) {
1882 i
=(int32_t)(lastUnderscore
-localeID
);
1887 if(i
>0 && parent
!= localeID
) {
1888 uprv_memcpy(parent
, localeID
, uprv_min(i
, parentCapacity
));
1890 return u_terminateChars(parent
, parentCapacity
, i
, err
);
1893 U_CAPI
int32_t U_EXPORT2
1894 uloc_getLanguage(const char* localeID
,
1896 int32_t languageCapacity
,
1899 /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
1902 if (err
==NULL
|| U_FAILURE(*err
)) {
1906 if(localeID
==NULL
) {
1907 localeID
=uloc_getDefault();
1910 i
=ulocimp_getLanguage(localeID
, language
, languageCapacity
, NULL
);
1911 return u_terminateChars(language
, languageCapacity
, i
, err
);
1914 U_CAPI
int32_t U_EXPORT2
1915 uloc_getScript(const char* localeID
,
1917 int32_t scriptCapacity
,
1922 if(err
==NULL
|| U_FAILURE(*err
)) {
1926 if(localeID
==NULL
) {
1927 localeID
=uloc_getDefault();
1930 /* skip the language */
1931 ulocimp_getLanguage(localeID
, NULL
, 0, &localeID
);
1932 if(_isIDSeparator(*localeID
)) {
1933 i
=ulocimp_getScript(localeID
+1, script
, scriptCapacity
, NULL
);
1935 return u_terminateChars(script
, scriptCapacity
, i
, err
);
1938 U_CAPI
int32_t U_EXPORT2
1939 uloc_getCountry(const char* localeID
,
1941 int32_t countryCapacity
,
1946 if(err
==NULL
|| U_FAILURE(*err
)) {
1950 if(localeID
==NULL
) {
1951 localeID
=uloc_getDefault();
1954 /* Skip the language */
1955 ulocimp_getLanguage(localeID
, NULL
, 0, &localeID
);
1956 if(_isIDSeparator(*localeID
)) {
1957 const char *scriptID
;
1958 /* Skip the script if available */
1959 ulocimp_getScript(localeID
+1, NULL
, 0, &scriptID
);
1960 if(scriptID
!= localeID
+1) {
1961 /* Found optional script */
1962 localeID
= scriptID
;
1964 if(_isIDSeparator(*localeID
)) {
1965 i
=ulocimp_getCountry(localeID
+1, country
, countryCapacity
, NULL
);
1968 return u_terminateChars(country
, countryCapacity
, i
, err
);
1971 U_CAPI
int32_t U_EXPORT2
1972 uloc_getVariant(const char* localeID
,
1974 int32_t variantCapacity
,
1977 char tempBuffer
[ULOC_FULLNAME_CAPACITY
];
1978 const char* tmpLocaleID
;
1981 if(err
==NULL
|| U_FAILURE(*err
)) {
1985 if (_hasBCP47Extension(localeID
)) {
1986 _ConvertBCP47(tmpLocaleID
, localeID
, tempBuffer
, sizeof(tempBuffer
), err
);
1988 if (localeID
==NULL
) {
1989 localeID
=uloc_getDefault();
1991 tmpLocaleID
=localeID
;
1994 /* Skip the language */
1995 ulocimp_getLanguage(tmpLocaleID
, NULL
, 0, &tmpLocaleID
);
1996 if(_isIDSeparator(*tmpLocaleID
)) {
1997 const char *scriptID
;
1998 /* Skip the script if available */
1999 ulocimp_getScript(tmpLocaleID
+1, NULL
, 0, &scriptID
);
2000 if(scriptID
!= tmpLocaleID
+1) {
2001 /* Found optional script */
2002 tmpLocaleID
= scriptID
;
2004 /* Skip the Country */
2005 if (_isIDSeparator(*tmpLocaleID
)) {
2006 const char *cntryID
;
2007 ulocimp_getCountry(tmpLocaleID
+1, NULL
, 0, &cntryID
);
2008 if (cntryID
!= tmpLocaleID
+1) {
2009 /* Found optional country */
2010 tmpLocaleID
= cntryID
;
2012 if(_isIDSeparator(*tmpLocaleID
)) {
2013 /* If there was no country ID, skip a possible extra IDSeparator */
2014 if (tmpLocaleID
!= cntryID
&& _isIDSeparator(tmpLocaleID
[1])) {
2017 i
=_getVariant(tmpLocaleID
+1, *tmpLocaleID
, variant
, variantCapacity
);
2022 /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
2023 /* if we do not have a variant tag yet then try a POSIX variant after '@' */
2025 if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
2026 i=_getVariant(localeID+1, '@', variant, variantCapacity);
2029 return u_terminateChars(variant
, variantCapacity
, i
, err
);
2032 U_CAPI
int32_t U_EXPORT2
2033 uloc_getName(const char* localeID
,
2035 int32_t nameCapacity
,
2038 return _canonicalize(localeID
, name
, nameCapacity
, 0, err
);
2041 U_CAPI
int32_t U_EXPORT2
2042 uloc_getBaseName(const char* localeID
,
2044 int32_t nameCapacity
,
2047 return _canonicalize(localeID
, name
, nameCapacity
, _ULOC_STRIP_KEYWORDS
, err
);
2050 U_CAPI
int32_t U_EXPORT2
2051 uloc_canonicalize(const char* localeID
,
2053 int32_t nameCapacity
,
2056 return _canonicalize(localeID
, name
, nameCapacity
, _ULOC_CANONICALIZE
, err
);
2059 U_CAPI
const char* U_EXPORT2
2060 uloc_getISO3Language(const char* localeID
)
2063 char lang
[ULOC_LANG_CAPACITY
];
2064 UErrorCode err
= U_ZERO_ERROR
;
2066 if (localeID
== NULL
)
2068 localeID
= uloc_getDefault();
2070 uloc_getLanguage(localeID
, lang
, ULOC_LANG_CAPACITY
, &err
);
2073 offset
= _findIndex(LANGUAGES
, lang
);
2076 return LANGUAGES_3
[offset
];
2079 U_CAPI
const char* U_EXPORT2
2080 uloc_getISO3Country(const char* localeID
)
2083 char cntry
[ULOC_LANG_CAPACITY
];
2084 UErrorCode err
= U_ZERO_ERROR
;
2086 if (localeID
== NULL
)
2088 localeID
= uloc_getDefault();
2090 uloc_getCountry(localeID
, cntry
, ULOC_LANG_CAPACITY
, &err
);
2093 offset
= _findIndex(COUNTRIES
, cntry
);
2097 return COUNTRIES_3
[offset
];
2100 U_CAPI
uint32_t U_EXPORT2
2101 uloc_getLCID(const char* localeID
)
2103 UErrorCode status
= U_ZERO_ERROR
;
2104 char langID
[ULOC_FULLNAME_CAPACITY
];
2106 uloc_getLanguage(localeID
, langID
, sizeof(langID
), &status
);
2107 if (U_FAILURE(status
)) {
2111 if (uprv_strchr(localeID
, '@')) {
2112 // uprv_convertToLCID does not support keywords other than collation.
2113 // Remove all keywords except collation.
2115 char collVal
[ULOC_KEYWORDS_CAPACITY
];
2116 char tmpLocaleID
[ULOC_FULLNAME_CAPACITY
];
2118 len
= uloc_getKeywordValue(localeID
, "collation", collVal
,
2119 sizeof(collVal
)/sizeof(collVal
[0]) - 1, &status
);
2121 if (U_SUCCESS(status
) && len
> 0) {
2124 len
= uloc_getBaseName(localeID
, tmpLocaleID
,
2125 sizeof(tmpLocaleID
)/sizeof(tmpLocaleID
[0]) - 1, &status
);
2127 if (U_SUCCESS(status
)) {
2128 tmpLocaleID
[len
] = 0;
2130 len
= uloc_setKeywordValue("collation", collVal
, tmpLocaleID
,
2131 sizeof(tmpLocaleID
)/sizeof(tmpLocaleID
[0]) - len
- 1, &status
);
2133 if (U_SUCCESS(status
)) {
2134 tmpLocaleID
[len
] = 0;
2135 return uprv_convertToLCID(langID
, tmpLocaleID
, &status
);
2140 // fall through - all keywords are simply ignored
2141 status
= U_ZERO_ERROR
;
2144 return uprv_convertToLCID(langID
, localeID
, &status
);
2147 U_CAPI
int32_t U_EXPORT2
2148 uloc_getLocaleForLCID(uint32_t hostid
, char *locale
, int32_t localeCapacity
,
2151 return uprv_convertToPosix(hostid
, locale
, localeCapacity
, status
);
2154 /* ### Default locale **************************************************/
2156 U_CAPI
const char* U_EXPORT2
2159 return locale_get_default();
2162 U_CAPI
void U_EXPORT2
2163 uloc_setDefault(const char* newDefaultLocale
,
2166 if (U_FAILURE(*err
))
2168 /* the error code isn't currently used for anything by this function*/
2170 /* propagate change to C++ */
2171 locale_set_default(newDefaultLocale
);
2175 * Returns a list of all 2-letter language codes defined in ISO 639. This is a pointer
2176 * to an array of pointers to arrays of char. All of these pointers are owned
2177 * by ICU-- do not delete them, and do not write through them. The array is
2178 * terminated with a null pointer.
2180 U_CAPI
const char* const* U_EXPORT2
2181 uloc_getISOLanguages()
2187 * Returns a list of all 2-letter country codes defined in ISO 639. This is a
2188 * pointer to an array of pointers to arrays of char. All of these pointers are
2189 * owned by ICU-- do not delete them, and do not write through them. The array is
2190 * terminated with a null pointer.
2192 U_CAPI
const char* const* U_EXPORT2
2193 uloc_getISOCountries()
2199 /* this function to be moved into cstring.c later */
2200 static char gDecimal
= 0;
2205 _uloc_strtod(const char *start
, char **end
) {
2212 /* For machines that decide to change the decimal on you,
2213 and try to be too smart with localization.
2214 This normally should be just a '.'. */
2215 sprintf(rep
, "%+1.1f", 1.0);
2219 if(gDecimal
== '.') {
2220 return uprv_strtod(start
, end
); /* fall through to OS */
2222 uprv_strncpy(buf
, start
, 29);
2224 decimal
= uprv_strchr(buf
, '.');
2226 *decimal
= gDecimal
;
2228 return uprv_strtod(start
, end
); /* no decimal point */
2230 rv
= uprv_strtod(buf
, &myEnd
);
2232 *end
= (char*)(start
+(myEnd
-buf
)); /* cast away const (to follow uprv_strtod API.) */
2240 int32_t dummy
; /* to avoid uninitialized memory copy from qsort */
2244 static int32_t U_CALLCONV
2245 uloc_acceptLanguageCompare(const void * /*context*/, const void *a
, const void *b
)
2247 const _acceptLangItem
*aa
= (const _acceptLangItem
*)a
;
2248 const _acceptLangItem
*bb
= (const _acceptLangItem
*)b
;
2252 rc
= -1; /* A > B */
2253 } else if(bb
->q
> aa
->q
) {
2260 rc
= uprv_stricmp(aa
->locale
, bb
->locale
);
2263 #if defined(ULOC_DEBUG)
2264 /* fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
2274 mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
2277 U_CAPI
int32_t U_EXPORT2
2278 uloc_acceptLanguageFromHTTP(char *result
, int32_t resultAvailable
, UAcceptResult
*outResult
,
2279 const char *httpAcceptLanguage
,
2280 UEnumeration
* availableLocales
,
2284 _acceptLangItem smallBuffer
[30];
2286 char tmp
[ULOC_FULLNAME_CAPACITY
+1];
2288 const char *itemEnd
;
2289 const char *paramEnd
;
2294 int32_t l
= (int32_t)uprv_strlen(httpAcceptLanguage
);
2296 char *tempstr
; /* Use for null pointer check */
2299 jSize
= sizeof(smallBuffer
)/sizeof(smallBuffer
[0]);
2300 if(U_FAILURE(*status
)) {
2304 for(s
=httpAcceptLanguage
;s
&&*s
;) {
2305 while(isspace(*s
)) /* eat space at the beginning */
2307 itemEnd
=uprv_strchr(s
,',');
2308 paramEnd
=uprv_strchr(s
,';');
2310 itemEnd
= httpAcceptLanguage
+l
; /* end of string */
2312 if(paramEnd
&& paramEnd
<itemEnd
) {
2313 /* semicolon (;) is closer than end (,) */
2318 while(isspace(*t
)) {
2324 while(isspace(*t
)) {
2327 j
[n
].q
= (float)_uloc_strtod(t
,NULL
);
2329 /* no semicolon - it's 1.0 */
2334 /* eat spaces prior to semi */
2335 for(t
=(paramEnd
-1);(paramEnd
>s
)&&isspace(*t
);t
--)
2337 /* Check for null pointer from uprv_strndup */
2338 tempstr
= uprv_strndup(s
,(int32_t)((t
+1)-s
));
2339 if (tempstr
== NULL
) {
2340 *status
= U_MEMORY_ALLOCATION_ERROR
;
2343 j
[n
].locale
= tempstr
;
2344 uloc_canonicalize(j
[n
].locale
,tmp
,sizeof(tmp
)/sizeof(tmp
[0]),status
);
2345 if(strcmp(j
[n
].locale
,tmp
)) {
2346 uprv_free(j
[n
].locale
);
2347 j
[n
].locale
=uprv_strdup(tmp
);
2349 #if defined(ULOC_DEBUG)
2350 /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
2354 while(*s
==',') { /* eat duplicate commas */
2358 if(j
==smallBuffer
) { /* overflowed the small buffer. */
2359 j
= static_cast<_acceptLangItem
*>(uprv_malloc(sizeof(j
[0])*(jSize
*2)));
2361 uprv_memcpy(j
,smallBuffer
,sizeof(j
[0])*jSize
);
2363 #if defined(ULOC_DEBUG)
2364 fprintf(stderr
,"malloced at size %d\n", jSize
);
2367 j
= static_cast<_acceptLangItem
*>(uprv_realloc(j
, sizeof(j
[0])*jSize
*2));
2368 #if defined(ULOC_DEBUG)
2369 fprintf(stderr
,"re-alloced at size %d\n", jSize
);
2374 *status
= U_MEMORY_ALLOCATION_ERROR
;
2379 uprv_sortArray(j
, n
, sizeof(j
[0]), uloc_acceptLanguageCompare
, NULL
, TRUE
, status
);
2380 if(U_FAILURE(*status
)) {
2381 if(j
!= smallBuffer
) {
2382 #if defined(ULOC_DEBUG)
2383 fprintf(stderr
,"freeing j %p\n", j
);
2389 strs
= static_cast<char **>(uprv_malloc((size_t)(sizeof(strs
[0])*n
)));
2390 /* Check for null pointer */
2392 uprv_free(j
); /* Free to avoid memory leak */
2393 *status
= U_MEMORY_ALLOCATION_ERROR
;
2397 #if defined(ULOC_DEBUG)
2398 /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
2400 strs
[i
]=j
[i
].locale
;
2402 res
= uloc_acceptLanguage(result
, resultAvailable
, outResult
,
2403 (const char**)strs
, n
, availableLocales
, status
);
2408 if(j
!= smallBuffer
) {
2409 #if defined(ULOC_DEBUG)
2410 fprintf(stderr
,"freeing j %p\n", j
);
2418 U_CAPI
int32_t U_EXPORT2
2419 uloc_acceptLanguage(char *result
, int32_t resultAvailable
,
2420 UAcceptResult
*outResult
, const char **acceptList
,
2421 int32_t acceptListCount
,
2422 UEnumeration
* availableLocales
,
2428 char tmp
[ULOC_FULLNAME_CAPACITY
+1];
2430 char **fallbackList
;
2431 if(U_FAILURE(*status
)) {
2434 fallbackList
= static_cast<char **>(uprv_malloc((size_t)(sizeof(fallbackList
[0])*acceptListCount
)));
2435 if(fallbackList
==NULL
) {
2436 *status
= U_MEMORY_ALLOCATION_ERROR
;
2439 for(i
=0;i
<acceptListCount
;i
++) {
2440 #if defined(ULOC_DEBUG)
2441 fprintf(stderr
,"%02d: %s\n", i
, acceptList
[i
]);
2443 while((l
=uenum_next(availableLocales
, NULL
, status
))) {
2444 #if defined(ULOC_DEBUG)
2445 fprintf(stderr
," %s\n", l
);
2447 len
= (int32_t)uprv_strlen(l
);
2448 if(!uprv_strcmp(acceptList
[i
], l
)) {
2450 *outResult
= ULOC_ACCEPT_VALID
;
2452 #if defined(ULOC_DEBUG)
2453 fprintf(stderr
, "MATCH! %s\n", l
);
2456 uprv_strncpy(result
, l
, uprv_min(len
, resultAvailable
));
2459 uprv_free(fallbackList
[j
]);
2461 uprv_free(fallbackList
);
2462 return u_terminateChars(result
, resultAvailable
, len
, status
);
2468 uenum_reset(availableLocales
, status
);
2469 /* save off parent info */
2470 if(uloc_getParent(acceptList
[i
], tmp
, sizeof(tmp
)/sizeof(tmp
[0]), status
)!=0) {
2471 fallbackList
[i
] = uprv_strdup(tmp
);
2477 for(maxLen
--;maxLen
>0;maxLen
--) {
2478 for(i
=0;i
<acceptListCount
;i
++) {
2479 if(fallbackList
[i
] && ((int32_t)uprv_strlen(fallbackList
[i
])==maxLen
)) {
2480 #if defined(ULOC_DEBUG)
2481 fprintf(stderr
,"Try: [%s]", fallbackList
[i
]);
2483 while((l
=uenum_next(availableLocales
, NULL
, status
))) {
2484 #if defined(ULOC_DEBUG)
2485 fprintf(stderr
," %s\n", l
);
2487 len
= (int32_t)uprv_strlen(l
);
2488 if(!uprv_strcmp(fallbackList
[i
], l
)) {
2490 *outResult
= ULOC_ACCEPT_FALLBACK
;
2492 #if defined(ULOC_DEBUG)
2493 fprintf(stderr
, "fallback MATCH! %s\n", l
);
2496 uprv_strncpy(result
, l
, uprv_min(len
, resultAvailable
));
2498 for(j
=0;j
<acceptListCount
;j
++) {
2499 uprv_free(fallbackList
[j
]);
2501 uprv_free(fallbackList
);
2502 return u_terminateChars(result
, resultAvailable
, len
, status
);
2505 uenum_reset(availableLocales
, status
);
2507 if(uloc_getParent(fallbackList
[i
], tmp
, sizeof(tmp
)/sizeof(tmp
[0]), status
)!=0) {
2508 uprv_free(fallbackList
[i
]);
2509 fallbackList
[i
] = uprv_strdup(tmp
);
2511 uprv_free(fallbackList
[i
]);
2517 *outResult
= ULOC_ACCEPT_FAILED
;
2520 for(i
=0;i
<acceptListCount
;i
++) {
2521 uprv_free(fallbackList
[i
]);
2523 uprv_free(fallbackList
);
2527 U_CAPI
const char* U_EXPORT2
2528 uloc_toUnicodeLocaleKey(const char* keyword
)
2530 const char* bcpKey
= ulocimp_toBcpKey(keyword
);
2531 if (bcpKey
== NULL
&& ultag_isUnicodeLocaleKey(keyword
, -1)) {
2532 // unknown keyword, but syntax is fine..
2538 U_CAPI
const char* U_EXPORT2
2539 uloc_toUnicodeLocaleType(const char* keyword
, const char* value
)
2541 const char* bcpType
= ulocimp_toBcpType(keyword
, value
, NULL
, NULL
);
2542 if (bcpType
== NULL
&& ultag_isUnicodeLocaleType(value
, -1)) {
2543 // unknown keyword, but syntax is fine..
2549 #define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
2550 #define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )
2553 isWellFormedLegacyKey(const char* legacyKey
)
2555 const char* p
= legacyKey
;
2557 if (!UPRV_ISALPHANUM(*p
)) {
2566 isWellFormedLegacyType(const char* legacyType
)
2568 const char* p
= legacyType
;
2569 int32_t alphaNumLen
= 0;
2571 if (*p
== '_' || *p
== '/' || *p
== '-') {
2572 if (alphaNumLen
== 0) {
2576 } else if (UPRV_ISALPHANUM(*p
)) {
2583 return (alphaNumLen
!= 0);
2586 U_CAPI
const char* U_EXPORT2
2587 uloc_toLegacyKey(const char* keyword
)
2589 const char* legacyKey
= ulocimp_toLegacyKey(keyword
);
2590 if (legacyKey
== NULL
) {
2591 // Checks if the specified locale key is well-formed with the legacy locale syntax.
2594 // Neither ICU nor LDML/CLDR provides the definition of keyword syntax.
2595 // However, a key should not contain '=' obviously. For now, all existing
2596 // keys are using ASCII alphabetic letters only. We won't add any new key
2597 // that is not compatible with the BCP 47 syntax. Therefore, we assume
2598 // a valid key consist from [0-9a-zA-Z], no symbols.
2599 if (isWellFormedLegacyKey(keyword
)) {
2606 U_CAPI
const char* U_EXPORT2
2607 uloc_toLegacyType(const char* keyword
, const char* value
)
2609 const char* legacyType
= ulocimp_toLegacyType(keyword
, value
, NULL
, NULL
);
2610 if (legacyType
== NULL
) {
2611 // Checks if the specified locale type is well-formed with the legacy locale syntax.
2614 // Neither ICU nor LDML/CLDR provides the definition of keyword syntax.
2615 // However, a type should not contain '=' obviously. For now, all existing
2616 // types are using ASCII alphabetic letters with a few symbol letters. We won't
2617 // add any new type that is not compatible with the BCP 47 syntax except timezone
2618 // IDs. For now, we assume a valid type start with [0-9a-zA-Z], but may contain
2619 // '-' '_' '/' in the middle.
2620 if (isWellFormedLegacyType(value
)) {