2 **********************************************************************
3 * Copyright (C) 1997-2008, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
9 * Modification History:
11 * Date Name Description
12 * 04/01/97 aliu Creation.
13 * 08/21/98 stephen JDK 1.2 sync
14 * 12/08/98 rtg New Locale implementation and C API
15 * 03/15/99 damiba overhaul.
16 * 04/06/99 stephen changed setDefault() to realloc and copy
17 * 06/14/99 stephen Changed calls to ures_open for new params
18 * 07/21/99 stephen Modified setDefault() to propagate to C++
19 * 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs,
20 * brought canonicalization code into line with spec
21 *****************************************************************************/
24 POSIX's locale format, from putil.c: [no spaces]
26 ll [ _CC ] [ . MM ] [ @ VV]
28 l = lang, C = ctry, M = charmap, V = variant
31 #include "unicode/utypes.h"
32 #include "unicode/ustring.h"
33 #include "unicode/uloc.h"
34 #include "unicode/ures.h"
49 #include <stdio.h> /* for sprintf */
51 /* ### Declarations **************************************************/
53 /* Locale stuff from locid.cpp */
54 U_CFUNC
void locale_set_default(const char *id
);
55 U_CFUNC
const char *locale_get_default(void);
57 locale_getKeywords(const char *localeID
,
59 char *keywords
, int32_t keywordCapacity
,
60 char *values
, int32_t valuesCapacity
, int32_t *valLen
,
64 /* ### Constants **************************************************/
66 /* These strings describe the resources we attempt to load from
67 the locale ResourceBundle data file.*/
68 static const char _kLanguages
[] = "Languages";
69 static const char _kScripts
[] = "Scripts";
70 static const char _kCountries
[] = "Countries";
71 static const char _kVariants
[] = "Variants";
72 static const char _kKeys
[] = "Keys";
73 static const char _kTypes
[] = "Types";
74 static const char _kIndexLocaleName
[] = "res_index";
75 static const char _kRootName
[] = "root";
76 static const char _kIndexTag
[] = "InstalledLocales";
77 static const char _kCurrency
[] = "currency";
78 static const char _kCurrencies
[] = "Currencies";
79 static char** _installedLocales
= NULL
;
80 static int32_t _installedLocalesCount
= 0;
82 /* ### Data tables **************************************************/
85 * Table of language codes, both 2- and 3-letter, with preference
86 * given to 2-letter codes where possible. Includes 3-letter codes
87 * that lack a 2-letter equivalent.
89 * This list must be in sorted order. This list is returned directly
90 * to the user by some API.
92 * This list must be kept in sync with LANGUAGES_3, with corresponding
95 * This table should be terminated with a NULL entry, followed by a
96 * second list, and another NULL entry. The first list is visible to
97 * user code when this array is returned by API. The second list
98 * contains codes we support, but do not expose through user API.
102 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
103 * include the revisions up to 2001/7/27 *CWB*
105 * The 3 character codes are the terminology codes like RFC 3066. This
106 * is compatible with prior ICU codes
108 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
109 * table but now at the end of the table because 3 character codes are
110 * duplicates. This avoids bad searches going from 3 to 2 character
113 * The range qaa-qtz is reserved for local use
115 static const char * const LANGUAGES
[] = {
116 "aa", "ab", "ace", "ach", "ada", "ady", "ae", "af", "afa",
117 "afh", "ain", "ak", "akk", "ale", "alg", "alt", "am", "an",
119 "ar", "arc", "arn", "arp", "art", "arw", "as", "ast",
120 "ath", "aus", "av", "awa", "ay", "az", "ba", "bad",
121 "bai", "bal", "ban", "bas", "bat", "be", "bej",
122 "bem", "ber", "bg", "bh", "bho", "bi", "bik", "bin",
123 "bla", "bm", "bn", "bnt", "bo", "br", "bra", "bs",
124 "btk", "bua", "bug", "byn", "ca", "cad", "cai", "car", "cau",
125 "cch", "ce", "ceb", "cel", "ch", "chb", "chg", "chk", "chm",
126 "chn", "cho", "chp", "chr", "chy", "cmc", "co", "cop",
127 "cpe", "cpf", "cpp", "cr", "crh", "crp", "cs", "csb", "cu", "cus",
128 "cv", "cy", "da", "dak", "dar", "day", "de", "del", "den",
129 "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv", "dyu",
130 "dz", "ee", "efi", "egy", "eka", "el", "elx", "en",
131 "enm", "eo", "es", "et", "eu", "ewo", "fa",
132 "fan", "fat", "ff", "fi", "fil", "fiu", "fj", "fo", "fon",
133 "fr", "frm", "fro", "frr", "frs", "fur", "fy",
134 "ga", "gaa", "gay", "gba", "gd", "gem", "gez", "gil",
135 "gl", "gmh", "gn", "goh", "gon", "gor", "got", "grb",
136 "grc", "gsw", "gu", "gv", "gwi",
137 "ha", "hai", "haw", "he", "hi", "hil", "him",
138 "hit", "hmn", "ho", "hr", "hsb", "ht", "hu", "hup", "hy", "hz",
139 "ia", "iba", "id", "ie", "ig", "ii", "ijo", "ik",
140 "ilo", "inc", "ine", "inh", "io", "ira", "iro", "is", "it",
141 "iu", "ja", "jbo", "jpr", "jrb", "jv", "ka", "kaa", "kab",
142 "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg", "kha", "khi",
143 "kho", "ki", "kj", "kk", "kl", "km", "kmb", "kn",
144 "ko", "kok", "kos", "kpe", "kr", "krc", "krl", "kro", "kru", "ks",
145 "ku", "kum", "kut", "kv", "kw", "ky", "la", "lad",
146 "lah", "lam", "lb", "lez", "lg", "li", "ln", "lo", "lol",
147 "loz", "lt", "lu", "lua", "lui", "lun", "luo", "lus",
148 "lv", "mad", "mag", "mai", "mak", "man", "map", "mas",
149 "mdf", "mdr", "men", "mg", "mga", "mh", "mi", "mic", "min",
150 "mis", "mk", "mkh", "ml", "mn", "mnc", "mni", "mno",
151 "mo", "moh", "mos", "mr", "ms", "mt", "mul", "mun",
152 "mus", "mwl", "mwr", "my", "myn", "myv", "na", "nah", "nai", "nap",
153 "nb", "nd", "nds", "ne", "new", "ng", "nia", "nic",
154 "niu", "nl", "nn", "no", "nog", "non", "nqo", "nr", "nso", "nub",
155 "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi", "oc", "oj",
156 "om", "or", "os", "osa", "ota", "oto", "pa", "paa",
157 "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",
158 "pi", "pl", "pon", "pra", "pro", "ps", "pt", "qu",
159 "raj", "rap", "rar", "rm", "rn", "ro", "roa", "rom",
160 "ru", "rup", "rw", "sa", "sad", "sah", "sai", "sal", "sam",
161 "sas", "sat", "sc", "scn", "sco", "sd", "se", "sel", "sem",
162 "sg", "sga", "sgn", "shn", "si", "sid", "sio", "sit",
163 "sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn",
164 "sms", "sn", "snk", "so", "sog", "son", "sq", "sr",
165 "srn", "srr", "ss", "ssa", "st", "su", "suk", "sus", "sux",
166 "sv", "sw", "syc", "syr", "ta", "tai", "te", "tem", "ter",
167 "tet", "tg", "th", "ti", "tig", "tiv", "tk", "tkl",
168 "tl", "tlh", "tli", "tmh", "tn", "to", "tog", "tpi", "tr",
169 "ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw",
170 "ty", "tyv", "udm", "ug", "uga", "uk", "umb", "und", "ur",
171 "uz", "vai", "ve", "vi", "vo", "vot", "wa", "wak",
172 "wal", "war", "was", "wen", "wo", "xal", "xh", "yao", "yap",
173 "yi", "yo", "ypk", "za", "zap", "zbl", "zen", "zh", "znd",
174 "zu", "zun", "zxx", "zza",
176 "in", "iw", "ji", "jw", "sh", /* obsolete language codes */
179 static const char* const DEPRECATED_LANGUAGES
[]={
180 "in", "iw", "ji", "jw", NULL
, NULL
182 static const char* const REPLACEMENT_LANGUAGES
[]={
183 "id", "he", "yi", "jv", NULL
, NULL
187 * Table of 3-letter language codes.
189 * This is a lookup table used to convert 3-letter language codes to
190 * their 2-letter equivalent, where possible. It must be kept in sync
191 * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the
192 * same language as LANGUAGES_3[i]. The commented-out lines are
193 * copied from LANGUAGES to make eyeballing this baby easier.
195 * Where a 3-letter language code has no 2-letter equivalent, the
196 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
198 * This table should be terminated with a NULL entry, followed by a
199 * second list, and another NULL entry. The two lists correspond to
200 * the two lists in LANGUAGES.
202 static const char * const LANGUAGES_3
[] = {
203 /* "aa", "ab", "ace", "ach", "ada", "ady", "ae", "af", "afa", */
204 "aar", "abk", "ace", "ach", "ada", "ady", "ave", "afr", "afa",
205 /* "afh", "ain", "ak", "akk", "ale", "alg", "alt", "am", "an", "ang", "anp", "apa", */
206 "afh", "ain", "aka", "akk", "ale", "alg", "alt", "amh", "arg", "ang", "anp", "apa",
207 /* "ar", "arc", "arn", "arp", "art", "arw", "as", "ast", */
208 "ara", "arc", "arn", "arp", "art", "arw", "asm", "ast",
209 /* "ath", "aus", "av", "awa", "ay", "az", "ba", "bad", */
210 "ath", "aus", "ava", "awa", "aym", "aze", "bak", "bad",
211 /* "bai", "bal", "ban", "bas", "bat", "be", "bej", */
212 "bai", "bal", "ban", "bas", "bat", "bel", "bej",
213 /* "bem", "ber", "bg", "bh", "bho", "bi", "bik", "bin", */
214 "bem", "ber", "bul", "bih", "bho", "bis", "bik", "bin",
215 /* "bla", "bm", "bn", "bnt", "bo", "br", "bra", "bs", */
216 "bla", "bam", "ben", "bnt", "bod", "bre", "bra", "bos",
217 /* "btk", "bua", "bug", "byn", "ca", "cad", "cai", "car", "cau", */
218 "btk", "bua", "bug", "byn", "cat", "cad", "cai", "car", "cau",
219 /* "cch", "ce", "ceb", "cel", "ch", "chb", "chg", "chk", "chm", */
220 "cch", "che", "ceb", "cel", "cha", "chb", "chg", "chk", "chm",
221 /* "chn", "cho", "chp", "chr", "chy", "cmc", "co", "cop", */
222 "chn", "cho", "chp", "chr", "chy", "cmc", "cos", "cop",
223 /* "cpe", "cpf", "cpp", "cr", "crh", "crp", "cs", "csb", "cu", "cus", */
224 "cpe", "cpf", "cpp", "cre", "crh", "crp", "ces", "csb", "chu", "cus",
225 /* "cv", "cy", "da", "dak", "dar", "day", "de", "del", "den", */
226 "chv", "cym", "dan", "dak", "dar", "day", "deu", "del", "den",
227 /* "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv", "dyu", */
228 "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "div", "dyu",
229 /* "dz", "ee", "efi", "egy", "eka", "el", "elx", "en", */
230 "dzo", "ewe", "efi", "egy", "eka", "ell", "elx", "eng",
231 /* "enm", "eo", "es", "et", "eu", "ewo", "fa", */
232 "enm", "epo", "spa", "est", "eus", "ewo", "fas",
233 /* "fan", "fat", "ff", "fi", "fil", "fiu", "fj", "fo", "fon", */
234 "fan", "fat", "ful", "fin", "fil", "fiu", "fij", "fao", "fon",
235 /* "fr", "frm", "fro", "frr", "frs", "fur", "fy", "ga", "gaa", "gay", */
236 "fra", "frm", "fro", "frr", "frs", "fur", "fry", "gle", "gaa", "gay",
237 /* "gba", "gd", "gem", "gez", "gil", "gl", "gmh", "gn", */
238 "gba", "gla", "gem", "gez", "gil", "glg", "gmh", "grn",
239 /* "goh", "gon", "gor", "got", "grb", "grc", "gsw", "gu", "gv", */
240 "goh", "gon", "gor", "got", "grb", "grc", "gsw", "guj", "glv",
241 /* "gwi", "ha", "hai", "haw", "he", "hi", "hil", "him", */
242 "gwi", "hau", "hai", "haw", "heb", "hin", "hil", "him",
243 /* "hit", "hmn", "ho", "hr", "hsb", "ht", "hu", "hup", "hy", "hz", */
244 "hit", "hmn", "hmo", "hrv", "hsb", "hat", "hun", "hup", "hye", "her",
245 /* "ia", "iba", "id", "ie", "ig", "ii", "ijo", "ik", */
246 "ina", "iba", "ind", "ile", "ibo", "iii", "ijo", "ipk",
247 /* "ilo", "inc", "ine", "inh", "io", "ira", "iro", "is", "it", */
248 "ilo", "inc", "ine", "inh", "ido", "ira", "iro", "isl", "ita",
249 /* "iu", "ja", "jbo", "jpr", "jrb", "jv", "ka", "kaa", "kab", */
250 "iku", "jpn", "jbo", "jpr", "jrb", "jav", "kat", "kaa", "kab",
251 /* "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg", "kha", "khi",*/
252 "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg", "kha", "khi",
253 /* "kho", "ki", "kj", "kk", "kl", "km", "kmb", "kn", */
254 "kho", "kik", "kua", "kaz", "kal", "khm", "kmb", "kan",
255 /* "ko", "kok", "kos", "kpe", "kr", "krc", "krl", "kro", "kru", "ks", */
256 "kor", "kok", "kos", "kpe", "kau", "krc", "krl", "kro", "kru", "kas",
257 /* "ku", "kum", "kut", "kv", "kw", "ky", "la", "lad", */
258 "kur", "kum", "kut", "kom", "cor", "kir", "lat", "lad",
259 /* "lah", "lam", "lb", "lez", "lg", "li", "ln", "lo", "lol", */
260 "lah", "lam", "ltz", "lez", "lug", "lim", "lin", "lao", "lol",
261 /* "loz", "lt", "lu", "lua", "lui", "lun", "luo", "lus", */
262 "loz", "lit", "lub", "lua", "lui", "lun", "luo", "lus",
263 /* "lv", "mad", "mag", "mai", "mak", "man", "map", "mas", */
264 "lav", "mad", "mag", "mai", "mak", "man", "map", "mas",
265 /* "mdf", "mdr", "men", "mg", "mga", "mh", "mi", "mic", "min", */
266 "mdf", "mdr", "men", "mlg", "mga", "mah", "mri", "mic", "min",
267 /* "mis", "mk", "mkh", "ml", "mn", "mnc", "mni", "mno", */
268 "mis", "mkd", "mkh", "mal", "mon", "mnc", "mni", "mno",
269 /* "mo", "moh", "mos", "mr", "ms", "mt", "mul", "mun", */
270 "mol", "moh", "mos", "mar", "msa", "mlt", "mul", "mun",
271 /* "mus", "mwl", "mwr", "my", "myn", "myv", "na", "nah", "nai", "nap", */
272 "mus", "mwl", "mwr", "mya", "myn", "myv", "nau", "nah", "nai", "nap",
273 /* "nb", "nd", "nds", "ne", "new", "ng", "nia", "nic", */
274 "nob", "nde", "nds", "nep", "new", "ndo", "nia", "nic",
275 /* "niu", "nl", "nn", "no", "nog", "non", "nqo", "nr", "nso", "nub", */
276 "niu", "nld", "nno", "nor", "nog", "non", "nqo", "nbl", "nso", "nub",
277 /* "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi", "oc", "oj", */
278 "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi", "oci", "oji",
279 /* "om", "or", "os", "osa", "ota", "oto", "pa", "paa", */
280 "orm", "ori", "oss", "osa", "ota", "oto", "pan", "paa",
281 /* "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn", */
282 "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",
283 /* "pi", "pl", "pon", "pra", "pro", "ps", "pt", "qu", */
284 "pli", "pol", "pon", "pra", "pro", "pus", "por", "que",
285 /* "raj", "rap", "rar", "rm", "rn", "ro", "roa", "rom", */
286 "raj", "rap", "rar", "roh", "run", "ron", "roa", "rom",
287 /* "ru", "rup", "rw", "sa", "sad", "sah", "sai", "sal", "sam", */
288 "rus", "rup", "kin", "san", "sad", "sah", "sai", "sal", "sam",
289 /* "sas", "sat", "sc", "scn", "sco", "sd", "se", "sel", "sem", */
290 "sas", "sat", "srd", "scn", "sco", "snd", "sme", "sel", "sem",
291 /* "sg", "sga", "sgn", "shn", "si", "sid", "sio", "sit", */
292 "sag", "sga", "sgn", "shn", "sin", "sid", "sio", "sit",
293 /* "sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn", */
294 "slk", "slv", "sla", "smo", "sma", "smi", "smj", "smn",
295 /* "sms", "sn", "snk", "so", "sog", "son", "sq", "sr", */
296 "sms", "sna", "snk", "som", "sog", "son", "sqi", "srp",
297 /* "srn", "srr", "ss", "ssa", "st", "su", "suk", "sus", "sux", */
298 "srn", "srr", "ssw", "ssa", "sot", "sun", "suk", "sus", "sux",
299 /* "sv", "sw", "syc", "syr", "ta", "tai", "te", "tem", "ter", */
300 "swe", "swa", "syc", "syr", "tam", "tai", "tel", "tem", "ter",
301 /* "tet", "tg", "th", "ti", "tig", "tiv", "tk", "tkl", */
302 "tet", "tgk", "tha", "tir", "tig", "tiv", "tuk", "tkl",
303 /* "tl", "tlh", "tli", "tmh", "tn", "to", "tog", "tpi", "tr", */
304 "tgl", "tlh", "tli", "tmh", "tsn", "ton", "tog", "tpi", "tur",
305 /* "ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw", */
306 "tso", "tsi", "tat", "tum", "tup", "tut", "tvl", "twi",
307 /* "ty", "tyv", "udm", "ug", "uga", "uk", "umb", "und", "ur", */
308 "tah", "tyv", "udm", "uig", "uga", "ukr", "umb", "und", "urd",
309 /* "uz", "vai", "ve", "vi", "vo", "vot", "wa", "wak", */
310 "uzb", "vai", "ven", "vie", "vol", "vot", "wln", "wak",
311 /* "wal", "war", "was", "wen", "wo", "xal", "xh", "yao", "yap", */
312 "wal", "war", "was", "wen", "wol", "xal", "xho", "yao", "yap",
313 /* "yi", "yo", "ypk", "za", "zap", "zbl", "zen", "zh", "znd", */
314 "yid", "yor", "ypk", "zha", "zap", "zbl", "zen", "zho", "znd",
315 /* "zu", "zun", "zxx", "zza", */
316 "zul", "zun", "zxx", "zza",
318 /* "in", "iw", "ji", "jw", "sh", */
319 "ind", "heb", "yid", "jaw", "srp",
324 * Table of 2-letter country codes.
326 * This list must be in sorted order. This list is returned directly
327 * to the user by some API.
329 * This list must be kept in sync with COUNTRIES_3, with corresponding
332 * This table should be terminated with a NULL entry, followed by a
333 * second list, and another NULL entry. The first list is visible to
334 * user code when this array is returned by API. The second list
335 * contains codes we support, but do not expose through user API.
339 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
340 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
341 * new codes keeping the old ones for compatibility updated to include
342 * 1999/12/03 revisions *CWB*
344 * RO(ROM) is now RO(ROU) according to
345 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
347 static const char * const COUNTRIES
[] = {
348 "AD", "AE", "AF", "AG", "AI", "AL", "AM", "AN",
349 "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",
350 "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",
351 "BJ", "BL", "BM", "BN", "BO", "BR", "BS", "BT", "BV",
352 "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
353 "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR",
354 "CU", "CV", "CX", "CY", "CZ", "DE", "DJ", "DK",
355 "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER",
356 "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
357 "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
358 "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
359 "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
360 "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
361 "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
362 "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
363 "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
364 "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK",
365 "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",
366 "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
367 "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",
368 "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",
369 "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
370 "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA",
371 "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",
372 "SK", "SL", "SM", "SN", "SO", "SR", "ST", "SV",
373 "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ",
374 "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
375 "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
376 "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
377 "WS", "YE", "YT", "ZA", "ZM", "ZW",
379 "FX", "CS", "RO", "TP", "YU", "ZR", /* obsolete country codes */
383 static const char* const DEPRECATED_COUNTRIES
[] ={
384 "BU", "CS", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR", NULL
, NULL
/* deprecated country list */
386 static const char* const REPLACEMENT_COUNTRIES
[] = {
387 /* "BU", "CS", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR" */
388 "MM", "RS", "BJ", "FR", "BF", "VU", "ZW", "TL", "RS", "CD", NULL
, NULL
/* replacement country codes */
392 * Table of 3-letter country codes.
394 * This is a lookup table used to convert 3-letter country codes to
395 * their 2-letter equivalent. It must be kept in sync with COUNTRIES.
396 * For all valid i, COUNTRIES[i] must refer to the same country as
397 * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES
398 * to make eyeballing this baby easier.
400 * This table should be terminated with a NULL entry, followed by a
401 * second list, and another NULL entry. The two lists correspond to
402 * the two lists in COUNTRIES.
404 static const char * const COUNTRIES_3
[] = {
405 /* "AD", "AE", "AF", "AG", "AI", "AL", "AM", "AN", */
406 "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM", "ANT",
407 /* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */
408 "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
409 /* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */
410 "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
411 /* "BJ", "BL", "BM", "BN", "BO", "BR", "BS", "BT", "BV", */
412 "BEN", "BLM", "BMU", "BRN", "BOL", "BRA", "BHS", "BTN", "BVT",
413 /* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */
414 "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
415 /* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */
416 "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
417 /* "CU", "CV", "CX", "CY", "CZ", "DE", "DJ", "DK", */
418 "CUB", "CPV", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
419 /* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */
420 "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
421 /* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
422 "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
423 /* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
424 "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
425 /* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */
426 "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
427 /* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
428 "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
429 /* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
430 "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
431 /* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
432 "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
433 /* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
434 "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
435 /* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */
436 "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
437 /* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */
438 "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
439 /* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */
440 "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
441 /* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */
442 "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
443 /* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */
444 "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
445 /* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */
446 "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
447 /* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */
448 "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
449 /* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */
450 "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
451 /* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */
452 "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
453 /* "SK", "SL", "SM", "SN", "SO", "SR", "ST", "SV", */
454 "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "STP", "SLV",
455 /* "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", */
456 "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
457 /* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */
458 "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
459 /* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */
460 "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
461 /* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
462 "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
463 /* "WS", "YE", "YT", "ZA", "ZM", "ZW", */
464 "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
466 /* "FX", "CS", "RO", "TP", "YU", "ZR", */
467 "FXX", "SCG", "ROM", "TMP", "YUG", "ZAR",
471 typedef struct CanonicalizationMap
{
472 const char *id
; /* input ID */
473 const char *canonicalID
; /* canonicalized output ID */
474 const char *keyword
; /* keyword, or NULL if none */
475 const char *value
; /* keyword value, or NULL if kw==NULL */
476 } CanonicalizationMap
;
479 * A map to canonicalize locale IDs. This handles a variety of
480 * different semantic kinds of transformations.
482 static const CanonicalizationMap CANONICALIZE_MAP
[] = {
483 { "", "en_US_POSIX", NULL
, NULL
}, /* .NET name */
484 { "C", "en_US_POSIX", NULL
, NULL
}, /* POSIX name */
485 { "posix", "en_US_POSIX", NULL
, NULL
}, /* POSIX name (alias of C) */
486 { "art_LOJBAN", "jbo", NULL
, NULL
}, /* registered name */
487 { "az_AZ_CYRL", "az_Cyrl_AZ", NULL
, NULL
}, /* .NET name */
488 { "az_AZ_LATN", "az_Latn_AZ", NULL
, NULL
}, /* .NET name */
489 { "ca_ES_PREEURO", "ca_ES", "currency", "ESP" },
490 { "cel_GAULISH", "cel__GAULISH", NULL
, NULL
}, /* registered name */
491 { "de_1901", "de__1901", NULL
, NULL
}, /* registered name */
492 { "de_1906", "de__1906", NULL
, NULL
}, /* registered name */
493 { "de__PHONEBOOK", "de", "collation", "phonebook" }, /* Old ICU name */
494 { "de_AT_PREEURO", "de_AT", "currency", "ATS" },
495 { "de_DE_PREEURO", "de_DE", "currency", "DEM" },
496 { "de_LU_PREEURO", "de_LU", "currency", "LUF" },
497 { "el_GR_PREEURO", "el_GR", "currency", "GRD" },
498 { "en_BOONT", "en__BOONT", NULL
, NULL
}, /* registered name */
499 { "en_SCOUSE", "en__SCOUSE", NULL
, NULL
}, /* registered name */
500 { "en_BE_PREEURO", "en_BE", "currency", "BEF" },
501 { "en_IE_PREEURO", "en_IE", "currency", "IEP" },
502 { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */
503 { "es_ES_PREEURO", "es_ES", "currency", "ESP" },
504 { "eu_ES_PREEURO", "eu_ES", "currency", "ESP" },
505 { "fi_FI_PREEURO", "fi_FI", "currency", "FIM" },
506 { "fr_BE_PREEURO", "fr_BE", "currency", "BEF" },
507 { "fr_FR_PREEURO", "fr_FR", "currency", "FRF" },
508 { "fr_LU_PREEURO", "fr_LU", "currency", "LUF" },
509 { "ga_IE_PREEURO", "ga_IE", "currency", "IEP" },
510 { "gl_ES_PREEURO", "gl_ES", "currency", "ESP" },
511 { "hi__DIRECT", "hi", "collation", "direct" }, /* Old ICU name */
512 { "it_IT_PREEURO", "it_IT", "currency", "ITL" },
513 { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */
514 { "nb_NO_NY", "nn_NO", NULL
, NULL
}, /* "markus said this was ok" :-) */
515 { "nl_BE_PREEURO", "nl_BE", "currency", "BEF" },
516 { "nl_NL_PREEURO", "nl_NL", "currency", "NLG" },
517 { "pt_PT_PREEURO", "pt_PT", "currency", "PTE" },
518 { "sl_ROZAJ", "sl__ROZAJ", NULL
, NULL
}, /* registered name */
519 { "sr_SP_CYRL", "sr_Cyrl_RS", NULL
, NULL
}, /* .NET name */
520 { "sr_SP_LATN", "sr_Latn_RS", NULL
, NULL
}, /* .NET name */
521 { "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL
, NULL
}, /* Linux name */
522 { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */
523 { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL
, NULL
}, /* Linux name */
524 { "uz_UZ_CYRL", "uz_Cyrl_UZ", NULL
, NULL
}, /* .NET name */
525 { "uz_UZ_LATN", "uz_Latn_UZ", NULL
, NULL
}, /* .NET name */
526 { "zh_CHS", "zh_Hans", NULL
, NULL
}, /* .NET name */
527 { "zh_CHT", "zh_Hant", NULL
, NULL
}, /* .NET name */
528 { "zh_GAN", "zh__GAN", NULL
, NULL
}, /* registered name */
529 { "zh_GUOYU", "zh", NULL
, NULL
}, /* registered name */
530 { "zh_HAKKA", "zh__HAKKA", NULL
, NULL
}, /* registered name */
531 { "zh_MIN", "zh__MIN", NULL
, NULL
}, /* registered name */
532 { "zh_MIN_NAN", "zh__MINNAN", NULL
, NULL
}, /* registered name */
533 { "zh_WUU", "zh__WUU", NULL
, NULL
}, /* registered name */
534 { "zh_XIANG", "zh__XIANG", NULL
, NULL
}, /* registered name */
535 { "zh_YUE", "zh__YUE", NULL
, NULL
}, /* registered name */
538 typedef struct VariantMap
{
539 const char *variant
; /* input ID */
540 const char *keyword
; /* keyword, or NULL if none */
541 const char *value
; /* keyword value, or NULL if kw==NULL */
544 static const VariantMap VARIANT_MAP
[] = {
545 { "EURO", "currency", "EUR" },
546 { "PINYIN", "collation", "pinyin" }, /* Solaris variant */
547 { "STROKE", "collation", "stroke" } /* Solaris variant */
550 /* ### Keywords **************************************************/
552 #define ULOC_KEYWORD_BUFFER_LEN 25
553 #define ULOC_MAX_NO_KEYWORDS 25
556 locale_getKeywordsStart(const char *localeID
) {
557 const char *result
= NULL
;
558 if((result
= uprv_strchr(localeID
, '@')) != NULL
) {
561 #if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
563 /* We do this because the @ sign is variant, and the @ sign used on one
564 EBCDIC machine won't be compiled the same way on other EBCDIC based
566 static const uint8_t ebcdicSigns
[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
567 const uint8_t *charToFind
= ebcdicSigns
;
569 if((result
= uprv_strchr(localeID
, *charToFind
)) != NULL
) {
580 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
581 * @param keywordName incoming name to be canonicalized
582 * @param status return status (keyword too long)
583 * @return length of the keyword name
585 static int32_t locale_canonKeywordName(char *buf
, const char *keywordName
, UErrorCode
*status
)
588 int32_t keywordNameLen
= (int32_t)uprv_strlen(keywordName
);
590 if(keywordNameLen
>= ULOC_KEYWORD_BUFFER_LEN
) {
591 /* keyword name too long for internal buffer */
592 *status
= U_INTERNAL_PROGRAM_ERROR
;
596 /* normalize the keyword name */
597 for(i
= 0; i
< keywordNameLen
; i
++) {
598 buf
[i
] = uprv_tolower(keywordName
[i
]);
602 return keywordNameLen
;
606 char keyword
[ULOC_KEYWORD_BUFFER_LEN
];
608 const char *valueStart
;
612 static int32_t U_CALLCONV
613 compareKeywordStructs(const void *context
, const void *left
, const void *right
) {
614 const char* leftString
= ((const KeywordStruct
*)left
)->keyword
;
615 const char* rightString
= ((const KeywordStruct
*)right
)->keyword
;
616 return uprv_strcmp(leftString
, rightString
);
620 * Both addKeyword and addValue must already be in canonical form.
621 * Either both addKeyword and addValue are NULL, or neither is NULL.
622 * If they are not NULL they must be zero terminated.
623 * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword.
626 _getKeywords(const char *localeID
,
628 char *keywords
, int32_t keywordCapacity
,
629 char *values
, int32_t valuesCapacity
, int32_t *valLen
,
631 const char* addKeyword
,
632 const char* addValue
,
635 KeywordStruct keywordList
[ULOC_MAX_NO_KEYWORDS
];
637 int32_t maxKeywords
= ULOC_MAX_NO_KEYWORDS
;
638 int32_t numKeywords
= 0;
639 const char* pos
= localeID
;
640 const char* equalSign
= NULL
;
641 const char* semicolon
= NULL
;
643 int32_t keywordsLen
= 0;
644 int32_t valuesLen
= 0;
646 if(prev
== '@') { /* start of keyword definition */
647 /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
649 UBool duplicate
= FALSE
;
650 /* skip leading spaces */
654 if (!*pos
) { /* handle trailing "; " */
657 if(numKeywords
== maxKeywords
) {
658 *status
= U_INTERNAL_PROGRAM_ERROR
;
661 equalSign
= uprv_strchr(pos
, '=');
662 semicolon
= uprv_strchr(pos
, ';');
663 /* lack of '=' [foo@currency] is illegal */
664 /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
665 if(!equalSign
|| (semicolon
&& semicolon
<equalSign
)) {
666 *status
= U_INVALID_FORMAT_ERROR
;
669 /* need to normalize both keyword and keyword name */
670 if(equalSign
- pos
>= ULOC_KEYWORD_BUFFER_LEN
) {
671 /* keyword name too long for internal buffer */
672 *status
= U_INTERNAL_PROGRAM_ERROR
;
675 for(i
= 0, n
= 0; i
< equalSign
- pos
; ++i
) {
677 keywordList
[numKeywords
].keyword
[n
++] = uprv_tolower(pos
[i
]);
680 keywordList
[numKeywords
].keyword
[n
] = 0;
681 keywordList
[numKeywords
].keywordLen
= n
;
682 /* now grab the value part. First we skip the '=' */
684 /* then we leading spaces */
685 while(*equalSign
== ' ') {
688 keywordList
[numKeywords
].valueStart
= equalSign
;
693 while(*(pos
- i
- 1) == ' ') {
696 keywordList
[numKeywords
].valueLen
= (int32_t)(pos
- equalSign
- i
);
699 i
= (int32_t)uprv_strlen(equalSign
);
700 while(equalSign
[i
-1] == ' ') {
703 keywordList
[numKeywords
].valueLen
= i
;
705 /* If this is a duplicate keyword, then ignore it */
706 for (j
=0; j
<numKeywords
; ++j
) {
707 if (uprv_strcmp(keywordList
[j
].keyword
, keywordList
[numKeywords
].keyword
) == 0) {
717 /* Handle addKeyword/addValue. */
718 if (addKeyword
!= NULL
) {
719 UBool duplicate
= FALSE
;
720 U_ASSERT(addValue
!= NULL
);
721 /* Search for duplicate; if found, do nothing. Explicit keyword
722 overrides addKeyword. */
723 for (j
=0; j
<numKeywords
; ++j
) {
724 if (uprv_strcmp(keywordList
[j
].keyword
, addKeyword
) == 0) {
730 if (numKeywords
== maxKeywords
) {
731 *status
= U_INTERNAL_PROGRAM_ERROR
;
734 uprv_strcpy(keywordList
[numKeywords
].keyword
, addKeyword
);
735 keywordList
[numKeywords
].keywordLen
= (int32_t)uprv_strlen(addKeyword
);
736 keywordList
[numKeywords
].valueStart
= addValue
;
737 keywordList
[numKeywords
].valueLen
= (int32_t)uprv_strlen(addValue
);
741 U_ASSERT(addValue
== NULL
);
744 /* now we have a list of keywords */
745 /* we need to sort it */
746 uprv_sortArray(keywordList
, numKeywords
, sizeof(KeywordStruct
), compareKeywordStructs
, NULL
, FALSE
, status
);
748 /* Now construct the keyword part */
749 for(i
= 0; i
< numKeywords
; i
++) {
750 if(keywordsLen
+ keywordList
[i
].keywordLen
+ 1< keywordCapacity
) {
751 uprv_strcpy(keywords
+keywordsLen
, keywordList
[i
].keyword
);
753 keywords
[keywordsLen
+ keywordList
[i
].keywordLen
] = '=';
755 keywords
[keywordsLen
+ keywordList
[i
].keywordLen
] = 0;
758 keywordsLen
+= keywordList
[i
].keywordLen
+ 1;
760 if(keywordsLen
+ keywordList
[i
].valueLen
< keywordCapacity
) {
761 uprv_strncpy(keywords
+keywordsLen
, keywordList
[i
].valueStart
, keywordList
[i
].valueLen
);
763 keywordsLen
+= keywordList
[i
].valueLen
;
765 if(i
< numKeywords
- 1) {
766 if(keywordsLen
< keywordCapacity
) {
767 keywords
[keywordsLen
] = ';';
773 if(valuesLen
+ keywordList
[i
].valueLen
+ 1< valuesCapacity
) {
774 uprv_strcpy(values
+valuesLen
, keywordList
[i
].valueStart
);
775 values
[valuesLen
+ keywordList
[i
].valueLen
] = 0;
777 valuesLen
+= keywordList
[i
].valueLen
+ 1;
781 values
[valuesLen
] = 0;
786 return u_terminateChars(keywords
, keywordCapacity
, keywordsLen
, status
);
793 locale_getKeywords(const char *localeID
,
795 char *keywords
, int32_t keywordCapacity
,
796 char *values
, int32_t valuesCapacity
, int32_t *valLen
,
798 UErrorCode
*status
) {
799 return _getKeywords(localeID
, prev
, keywords
, keywordCapacity
,
800 values
, valuesCapacity
, valLen
, valuesToo
,
804 U_CAPI
int32_t U_EXPORT2
805 uloc_getKeywordValue(const char* localeID
,
806 const char* keywordName
,
807 char* buffer
, int32_t bufferCapacity
,
810 const char* nextSeparator
= NULL
;
811 char keywordNameBuffer
[ULOC_KEYWORD_BUFFER_LEN
];
812 char localeKeywordNameBuffer
[ULOC_KEYWORD_BUFFER_LEN
];
816 if(status
&& U_SUCCESS(*status
) && localeID
) {
818 const char* startSearchHere
= uprv_strchr(localeID
, '@'); /* TODO: REVISIT: shouldn't this be locale_getKeywordsStart ? */
819 if(startSearchHere
== NULL
) {
820 /* no keywords, return at once */
824 locale_canonKeywordName(keywordNameBuffer
, keywordName
, status
);
825 if(U_FAILURE(*status
)) {
829 /* find the first keyword */
830 while(startSearchHere
) {
832 /* skip leading spaces (allowed?) */
833 while(*startSearchHere
== ' ') {
836 nextSeparator
= uprv_strchr(startSearchHere
, '=');
837 /* need to normalize both keyword and keyword name */
841 if(nextSeparator
- startSearchHere
>= ULOC_KEYWORD_BUFFER_LEN
) {
842 /* keyword name too long for internal buffer */
843 *status
= U_INTERNAL_PROGRAM_ERROR
;
846 for(i
= 0; i
< nextSeparator
- startSearchHere
; i
++) {
847 localeKeywordNameBuffer
[i
] = uprv_tolower(startSearchHere
[i
]);
849 /* trim trailing spaces */
850 while(startSearchHere
[i
-1] == ' ') {
853 localeKeywordNameBuffer
[i
] = 0;
855 startSearchHere
= uprv_strchr(nextSeparator
, ';');
857 if(uprv_strcmp(keywordNameBuffer
, localeKeywordNameBuffer
) == 0) {
859 while(*nextSeparator
== ' ') {
862 /* we actually found the keyword. Copy the value */
863 if(startSearchHere
&& startSearchHere
- nextSeparator
< bufferCapacity
) {
864 while(*(startSearchHere
-1) == ' ') {
867 uprv_strncpy(buffer
, nextSeparator
, startSearchHere
- nextSeparator
);
868 result
= u_terminateChars(buffer
, bufferCapacity
, (int32_t)(startSearchHere
- nextSeparator
), status
);
869 } else if(!startSearchHere
&& (int32_t)uprv_strlen(nextSeparator
) < bufferCapacity
) { /* last item in string */
870 i
= (int32_t)uprv_strlen(nextSeparator
);
871 while(nextSeparator
[i
- 1] == ' ') {
874 uprv_strncpy(buffer
, nextSeparator
, i
);
875 result
= u_terminateChars(buffer
, bufferCapacity
, i
, status
);
877 /* give a bigger buffer, please */
878 *status
= U_BUFFER_OVERFLOW_ERROR
;
879 if(startSearchHere
) {
880 result
= (int32_t)(startSearchHere
- nextSeparator
);
882 result
= (int32_t)uprv_strlen(nextSeparator
);
892 U_CAPI
int32_t U_EXPORT2
893 uloc_setKeywordValue(const char* keywordName
,
894 const char* keywordValue
,
895 char* buffer
, int32_t bufferCapacity
,
898 /* TODO: sorting. removal. */
899 int32_t keywordNameLen
;
900 int32_t keywordValueLen
;
903 int32_t foundValueLen
;
904 int32_t keywordAtEnd
= 0; /* is the keyword at the end of the string? */
905 char keywordNameBuffer
[ULOC_KEYWORD_BUFFER_LEN
];
906 char localeKeywordNameBuffer
[ULOC_KEYWORD_BUFFER_LEN
];
909 char* nextSeparator
= NULL
;
910 char* nextEqualsign
= NULL
;
911 char* startSearchHere
= NULL
;
912 char* keywordStart
= NULL
;
913 char *insertHere
= NULL
;
914 if(U_FAILURE(*status
)) {
917 if(bufferCapacity
>1) {
918 bufLen
= (int32_t)uprv_strlen(buffer
);
920 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
923 if(bufferCapacity
<bufLen
) {
924 /* The capacity is less than the length?! Is this NULL terminated? */
925 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
928 if(keywordValue
&& !*keywordValue
) {
932 keywordValueLen
= (int32_t)uprv_strlen(keywordValue
);
936 keywordNameLen
= locale_canonKeywordName(keywordNameBuffer
, keywordName
, status
);
937 if(U_FAILURE(*status
)) {
940 startSearchHere
= (char*)locale_getKeywordsStart(buffer
);
941 if(startSearchHere
== NULL
|| (startSearchHere
[1]==0)) {
942 if(!keywordValue
) { /* no keywords = nothing to remove */
946 needLen
= bufLen
+1+keywordNameLen
+1+keywordValueLen
;
947 if(startSearchHere
) { /* had a single @ */
948 needLen
--; /* already had the @ */
949 /* startSearchHere points at the @ */
951 startSearchHere
=buffer
+bufLen
;
953 if(needLen
>= bufferCapacity
) {
954 *status
= U_BUFFER_OVERFLOW_ERROR
;
955 return needLen
; /* no change */
957 *startSearchHere
= '@';
959 uprv_strcpy(startSearchHere
, keywordNameBuffer
);
960 startSearchHere
+= keywordNameLen
;
961 *startSearchHere
= '=';
963 uprv_strcpy(startSearchHere
, keywordValue
);
964 startSearchHere
+=keywordValueLen
;
966 } /* end shortcut - no @ */
968 keywordStart
= startSearchHere
;
969 /* search for keyword */
970 while(keywordStart
) {
972 /* skip leading spaces (allowed?) */
973 while(*keywordStart
== ' ') {
976 nextEqualsign
= uprv_strchr(keywordStart
, '=');
977 /* need to normalize both keyword and keyword name */
981 if(nextEqualsign
- keywordStart
>= ULOC_KEYWORD_BUFFER_LEN
) {
982 /* keyword name too long for internal buffer */
983 *status
= U_INTERNAL_PROGRAM_ERROR
;
986 for(i
= 0; i
< nextEqualsign
- keywordStart
; i
++) {
987 localeKeywordNameBuffer
[i
] = uprv_tolower(keywordStart
[i
]);
989 /* trim trailing spaces */
990 while(keywordStart
[i
-1] == ' ') {
993 localeKeywordNameBuffer
[i
] = 0;
995 nextSeparator
= uprv_strchr(nextEqualsign
, ';');
996 rc
= uprv_strcmp(keywordNameBuffer
, localeKeywordNameBuffer
);
999 while(*nextEqualsign
== ' ') {
1002 /* we actually found the keyword. Change the value */
1003 if (nextSeparator
) {
1005 foundValueLen
= (int32_t)(nextSeparator
- nextEqualsign
);
1008 foundValueLen
= (int32_t)uprv_strlen(nextEqualsign
);
1010 if(keywordValue
) { /* adding a value - not removing */
1011 if(foundValueLen
== keywordValueLen
) {
1012 uprv_strncpy(nextEqualsign
, keywordValue
, keywordValueLen
);
1013 return bufLen
; /* no change in size */
1014 } else if(foundValueLen
> keywordValueLen
) {
1015 int32_t delta
= foundValueLen
- keywordValueLen
;
1016 if(nextSeparator
) { /* RH side */
1017 uprv_memmove(nextSeparator
- delta
, nextSeparator
, bufLen
-(nextSeparator
-buffer
));
1019 uprv_strncpy(nextEqualsign
, keywordValue
, keywordValueLen
);
1023 } else { /* FVL < KVL */
1024 int32_t delta
= keywordValueLen
- foundValueLen
;
1025 if((bufLen
+delta
) >= bufferCapacity
) {
1026 *status
= U_BUFFER_OVERFLOW_ERROR
;
1027 return bufLen
+delta
;
1029 if(nextSeparator
) { /* RH side */
1030 uprv_memmove(nextSeparator
+delta
,nextSeparator
, bufLen
-(nextSeparator
-buffer
));
1032 uprv_strncpy(nextEqualsign
, keywordValue
, keywordValueLen
);
1037 } else { /* removing a keyword */
1039 /* zero out the ';' or '@' just before startSearchhere */
1040 keywordStart
[-1] = 0;
1041 return (int32_t)((keywordStart
-buffer
)-1); /* (string length without keyword) minus separator */
1043 uprv_memmove(keywordStart
, nextSeparator
+1, bufLen
-((nextSeparator
+1)-buffer
));
1044 keywordStart
[bufLen
-((nextSeparator
+1)-buffer
)]=0;
1045 return (int32_t)(bufLen
-((nextSeparator
+1)-keywordStart
));
1048 } else if(rc
<0){ /* end match keyword */
1049 /* could insert at this location. */
1050 insertHere
= keywordStart
;
1052 keywordStart
= nextSeparator
;
1053 } /* end loop searching */
1056 return bufLen
; /* removal of non-extant keyword - no change */
1059 /* we know there is at least one keyword. */
1060 needLen
= bufLen
+1+keywordNameLen
+1+keywordValueLen
;
1061 if(needLen
>= bufferCapacity
) {
1062 *status
= U_BUFFER_OVERFLOW_ERROR
;
1063 return needLen
; /* no change */
1067 uprv_memmove(insertHere
+(1+keywordNameLen
+1+keywordValueLen
), insertHere
, bufLen
-(insertHere
-buffer
));
1068 keywordStart
= insertHere
;
1070 keywordStart
= buffer
+bufLen
;
1071 *keywordStart
= ';';
1074 uprv_strncpy(keywordStart
, keywordNameBuffer
, keywordNameLen
);
1075 keywordStart
+= keywordNameLen
;
1076 *keywordStart
= '=';
1078 uprv_strncpy(keywordStart
, keywordValue
, keywordValueLen
); /* terminates. */
1079 keywordStart
+=keywordValueLen
;
1081 *keywordStart
= ';';
1088 /* ### ID parsing implementation **************************************************/
1090 /*returns TRUE if a is an ID separator FALSE otherwise*/
1091 #define _isIDSeparator(a) (a == '_' || a == '-')
1093 #define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
1095 /*returns TRUE if one of the special prefixes is here (s=string)
1097 #define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1099 /* Dot terminates it because of POSIX form where dot precedes the codepage
1100 * except for variant
1102 #define _isTerminator(a) ((a==0)||(a=='.')||(a=='@'))
1104 static char* _strnchr(const char* str
, int32_t len
, char c
) {
1105 U_ASSERT(str
!= 0 && len
>= 0);
1106 while (len
-- != 0) {
1110 } else if (d
== 0) {
1119 * Lookup 'key' in the array 'list'. The array 'list' should contain
1120 * a NULL entry, followed by more entries, and a second NULL entry.
1122 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1125 static int16_t _findIndex(const char* const* list
, const char* key
)
1127 const char* const* anchor
= list
;
1130 /* Make two passes through two NULL-terminated arrays at 'list' */
1131 while (pass
++ < 2) {
1133 if (uprv_strcmp(key
, *list
) == 0) {
1134 return (int16_t)(list
- anchor
);
1138 ++list
; /* skip final NULL *CWB*/
1143 /* count the length of src while copying it to dest; return strlen(src) */
1144 static U_INLINE
int32_t
1145 _copyCount(char *dest
, int32_t destCapacity
, const char *src
) {
1152 return (int32_t)(src
-anchor
);
1154 if(destCapacity
<=0) {
1155 return (int32_t)((src
-anchor
)+uprv_strlen(src
));
1164 uloc_getCurrentCountryID(const char* oldID
){
1165 int32_t offset
= _findIndex(DEPRECATED_COUNTRIES
, oldID
);
1167 return REPLACEMENT_COUNTRIES
[offset
];
1172 uloc_getCurrentLanguageID(const char* oldID
){
1173 int32_t offset
= _findIndex(DEPRECATED_LANGUAGES
, oldID
);
1175 return REPLACEMENT_LANGUAGES
[offset
];
1180 * the internal functions _getLanguage(), _getCountry(), _getVariant()
1181 * avoid duplicating code to handle the earlier locale ID pieces
1182 * in the functions for the later ones by
1183 * setting the *pEnd pointer to where they stopped parsing
1185 * TODO try to use this in Locale
1188 _getLanguage(const char *localeID
,
1189 char *language
, int32_t languageCapacity
,
1190 const char **pEnd
) {
1193 char lang
[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
1195 /* if it starts with i- or x- then copy that prefix */
1196 if(_isIDPrefix(localeID
)) {
1197 if(i
<languageCapacity
) {
1198 language
[i
]=(char)uprv_tolower(*localeID
);
1200 if(i
<languageCapacity
) {
1207 /* copy the language as far as possible and count its length */
1208 while(!_isTerminator(*localeID
) && !_isIDSeparator(*localeID
)) {
1209 if(i
<languageCapacity
) {
1210 language
[i
]=(char)uprv_tolower(*localeID
);
1213 lang
[i
]=(char)uprv_tolower(*localeID
);
1220 /* convert 3 character code to 2 character code if possible *CWB*/
1221 offset
=_findIndex(LANGUAGES_3
, lang
);
1223 i
=_copyCount(language
, languageCapacity
, LANGUAGES
[offset
]);
1234 _getScript(const char *localeID
,
1235 char *script
, int32_t scriptCapacity
,
1244 /* copy the second item as far as possible and count its length */
1245 while(!_isTerminator(localeID
[idLen
]) && !_isIDSeparator(localeID
[idLen
])) {
1249 /* If it's exactly 4 characters long, then it's a script and not a country. */
1253 *pEnd
= localeID
+idLen
;
1255 if(idLen
> scriptCapacity
) {
1256 idLen
= scriptCapacity
;
1259 script
[0]=(char)uprv_toupper(*(localeID
++));
1261 for (i
= 1; i
< idLen
; i
++) {
1262 script
[i
]=(char)uprv_tolower(*(localeID
++));
1272 _getCountry(const char *localeID
,
1273 char *country
, int32_t countryCapacity
,
1277 char cnty
[ULOC_COUNTRY_CAPACITY
]={ 0, 0, 0, 0 };
1280 /* copy the country as far as possible and count its length */
1281 while(!_isTerminator(*localeID
) && !_isIDSeparator(*localeID
)) {
1282 if(i
<countryCapacity
) {
1283 country
[i
]=(char)uprv_toupper(*localeID
);
1285 if(i
<(ULOC_COUNTRY_CAPACITY
-1)) { /*CWB*/
1286 cnty
[i
]=(char)uprv_toupper(*localeID
);
1292 /* convert 3 character code to 2 character code if possible *CWB*/
1294 offset
=_findIndex(COUNTRIES_3
, cnty
);
1296 i
=_copyCount(country
, countryCapacity
, COUNTRIES
[offset
]);
1307 * @param needSeparator if true, then add leading '_' if any variants
1308 * are added to 'variant'
1311 _getVariantEx(const char *localeID
,
1313 char *variant
, int32_t variantCapacity
,
1314 UBool needSeparator
) {
1317 /* get one or more variant tags and separate them with '_' */
1318 if(_isIDSeparator(prev
)) {
1319 /* get a variant string after a '-' or '_' */
1320 while(!_isTerminator(*localeID
)) {
1321 if (needSeparator
) {
1322 if (i
<variantCapacity
) {
1326 needSeparator
= FALSE
;
1328 if(i
<variantCapacity
) {
1329 variant
[i
]=(char)uprv_toupper(*localeID
);
1330 if(variant
[i
]=='-') {
1339 /* if there is no variant tag after a '-' or '_' then look for '@' */
1343 } else if((localeID
=locale_getKeywordsStart(localeID
))!=NULL
) {
1344 ++localeID
; /* point after the '@' */
1348 while(!_isTerminator(*localeID
)) {
1349 if (needSeparator
) {
1350 if (i
<variantCapacity
) {
1354 needSeparator
= FALSE
;
1356 if(i
<variantCapacity
) {
1357 variant
[i
]=(char)uprv_toupper(*localeID
);
1358 if(variant
[i
]=='-' || variant
[i
]==',') {
1371 _getVariant(const char *localeID
,
1373 char *variant
, int32_t variantCapacity
) {
1374 return _getVariantEx(localeID
, prev
, variant
, variantCapacity
, FALSE
);
1378 * Delete ALL instances of a variant from the given list of one or
1379 * more variants. Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".
1380 * @param variants the source string of one or more variants,
1381 * separated by '_'. This will be MODIFIED IN PLACE. Not zero
1382 * terminated; if it is, trailing zero will NOT be maintained.
1383 * @param variantsLen length of variants
1384 * @param toDelete variant to delete, without separators, e.g. "EURO"
1385 * or "PREEURO"; not zero terminated
1386 * @param toDeleteLen length of toDelete
1387 * @return number of characters deleted from variants
1390 _deleteVariant(char* variants
, int32_t variantsLen
,
1391 const char* toDelete
, int32_t toDeleteLen
)
1393 int32_t delta
= 0; /* number of chars deleted */
1396 if (variantsLen
< toDeleteLen
) {
1399 if (uprv_strncmp(variants
, toDelete
, toDeleteLen
) == 0 &&
1400 (variantsLen
== toDeleteLen
||
1401 (flag
=(variants
[toDeleteLen
] == '_'))))
1403 int32_t d
= toDeleteLen
+ (flag
?1:0);
1406 if (variantsLen
> 0) {
1407 uprv_memmove(variants
, variants
+d
, variantsLen
);
1410 char* p
= _strnchr(variants
, variantsLen
, '_');
1415 variantsLen
-= (int32_t)(p
- variants
);
1421 /* Keyword enumeration */
1423 typedef struct UKeywordsContext
{
1428 static void U_CALLCONV
1429 uloc_kw_closeKeywords(UEnumeration
*enumerator
) {
1430 uprv_free(((UKeywordsContext
*)enumerator
->context
)->keywords
);
1431 uprv_free(enumerator
->context
);
1432 uprv_free(enumerator
);
1435 static int32_t U_CALLCONV
1436 uloc_kw_countKeywords(UEnumeration
*en
, UErrorCode
*status
) {
1437 char *kw
= ((UKeywordsContext
*)en
->context
)->keywords
;
1441 kw
+= uprv_strlen(kw
)+1;
1446 static const char* U_CALLCONV
1447 uloc_kw_nextKeyword(UEnumeration
* en
,
1448 int32_t* resultLength
,
1449 UErrorCode
* status
) {
1450 const char* result
= ((UKeywordsContext
*)en
->context
)->current
;
1453 len
= (int32_t)uprv_strlen(((UKeywordsContext
*)en
->context
)->current
);
1454 ((UKeywordsContext
*)en
->context
)->current
+= len
+1;
1459 *resultLength
= len
;
1464 static void U_CALLCONV
1465 uloc_kw_resetKeywords(UEnumeration
* en
,
1466 UErrorCode
* status
) {
1467 ((UKeywordsContext
*)en
->context
)->current
= ((UKeywordsContext
*)en
->context
)->keywords
;
1470 static const UEnumeration gKeywordsEnum
= {
1473 uloc_kw_closeKeywords
,
1474 uloc_kw_countKeywords
,
1476 uloc_kw_nextKeyword
,
1477 uloc_kw_resetKeywords
1480 U_CAPI UEnumeration
* U_EXPORT2
1481 uloc_openKeywordList(const char *keywordList
, int32_t keywordListSize
, UErrorCode
* status
)
1483 UKeywordsContext
*myContext
= NULL
;
1484 UEnumeration
*result
= NULL
;
1486 if(U_FAILURE(*status
)) {
1489 result
= (UEnumeration
*)uprv_malloc(sizeof(UEnumeration
));
1490 /* Null pointer test */
1491 if (result
== NULL
) {
1492 *status
= U_MEMORY_ALLOCATION_ERROR
;
1495 uprv_memcpy(result
, &gKeywordsEnum
, sizeof(UEnumeration
));
1496 myContext
= uprv_malloc(sizeof(UKeywordsContext
));
1497 if (myContext
== NULL
) {
1498 *status
= U_MEMORY_ALLOCATION_ERROR
;
1502 myContext
->keywords
= (char *)uprv_malloc(keywordListSize
+1);
1503 uprv_memcpy(myContext
->keywords
, keywordList
, keywordListSize
);
1504 myContext
->keywords
[keywordListSize
] = 0;
1505 myContext
->current
= myContext
->keywords
;
1506 result
->context
= myContext
;
1510 U_CAPI UEnumeration
* U_EXPORT2
1511 uloc_openKeywords(const char* localeID
,
1516 int32_t keywordsCapacity
= 256;
1517 if(status
==NULL
|| U_FAILURE(*status
)) {
1521 if(localeID
==NULL
) {
1522 localeID
=uloc_getDefault();
1525 /* Skip the language */
1526 _getLanguage(localeID
, NULL
, 0, &localeID
);
1527 if(_isIDSeparator(*localeID
)) {
1528 const char *scriptID
;
1529 /* Skip the script if available */
1530 _getScript(localeID
+1, NULL
, 0, &scriptID
);
1531 if(scriptID
!= localeID
+1) {
1532 /* Found optional script */
1533 localeID
= scriptID
;
1535 /* Skip the Country */
1536 if (_isIDSeparator(*localeID
)) {
1537 _getCountry(localeID
+1, NULL
, 0, &localeID
);
1538 if(_isIDSeparator(*localeID
)) {
1539 _getVariant(localeID
+1, *localeID
, NULL
, 0);
1544 /* keywords are located after '@' */
1545 if((localeID
= locale_getKeywordsStart(localeID
)) != NULL
) {
1546 i
=locale_getKeywords(localeID
+1, '@', keywords
, keywordsCapacity
, NULL
, 0, NULL
, FALSE
, status
);
1550 return uloc_openKeywordList(keywords
, i
, status
);
1557 /* bit-flags for 'options' parameter of _canonicalize */
1558 #define _ULOC_STRIP_KEYWORDS 0x2
1559 #define _ULOC_CANONICALIZE 0x1
1561 #define OPTION_SET(options, mask) ((options & mask) != 0)
1563 static const char i_default
[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
1564 #define I_DEFAULT_LENGTH (sizeof i_default / sizeof i_default[0])
1567 * Canonicalize the given localeID, to level 1 or to level 2,
1568 * depending on the options. To specify level 1, pass in options=0.
1569 * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1571 * This is the code underlying uloc_getName and uloc_canonicalize.
1574 _canonicalize(const char* localeID
,
1576 int32_t resultCapacity
,
1579 int32_t j
, len
, fieldCount
=0, scriptSize
=0, variantSize
=0, nameCapacity
;
1580 char localeBuffer
[ULOC_FULLNAME_CAPACITY
];
1581 const char* origLocaleID
;
1582 const char* keywordAssign
= NULL
;
1583 const char* separatorIndicator
= NULL
;
1584 const char* addKeyword
= NULL
;
1585 const char* addValue
= NULL
;
1587 char* variant
= NULL
; /* pointer into name, or NULL */
1589 if (U_FAILURE(*err
)) {
1593 if (localeID
==NULL
) {
1594 localeID
=uloc_getDefault();
1596 origLocaleID
=localeID
;
1598 /* if we are doing a full canonicalization, then put results in
1599 localeBuffer, if necessary; otherwise send them to result. */
1600 if (OPTION_SET(options
, _ULOC_CANONICALIZE
) &&
1601 (result
== NULL
|| resultCapacity
< sizeof(localeBuffer
))) {
1602 name
= localeBuffer
;
1603 nameCapacity
= sizeof(localeBuffer
);
1606 nameCapacity
= resultCapacity
;
1609 /* get all pieces, one after another, and separate with '_' */
1610 len
=_getLanguage(localeID
, name
, nameCapacity
, &localeID
);
1612 if(len
== I_DEFAULT_LENGTH
&& uprv_strncmp(origLocaleID
, i_default
, len
) == 0) {
1613 const char *d
= uloc_getDefault();
1615 len
= uprv_strlen(d
);
1618 uprv_strncpy(name
, d
, len
);
1620 } else if(_isIDSeparator(*localeID
)) {
1621 const char *scriptID
;
1624 if(len
<nameCapacity
) {
1629 scriptSize
=_getScript(localeID
+1, name
+len
, nameCapacity
-len
, &scriptID
);
1630 if(scriptSize
> 0) {
1631 /* Found optional script */
1632 localeID
= scriptID
;
1635 if (_isIDSeparator(*localeID
)) {
1636 /* If there is something else, then we add the _ */
1637 if(len
<nameCapacity
) {
1644 if (_isIDSeparator(*localeID
)) {
1645 len
+=_getCountry(localeID
+1, name
+len
, nameCapacity
-len
, &localeID
);
1646 if(_isIDSeparator(*localeID
)) {
1648 if(len
<nameCapacity
) {
1652 variantSize
= _getVariant(localeID
+1, *localeID
, name
+len
, nameCapacity
-len
);
1653 if (variantSize
> 0) {
1656 localeID
+= variantSize
+ 1; /* skip '_' and variant */
1662 /* Copy POSIX-style charset specifier, if any [mr.utf8] */
1663 if (!OPTION_SET(options
, _ULOC_CANONICALIZE
) && *localeID
== '.') {
1673 if (len
<nameCapacity
) {
1683 /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
1684 After this, localeID either points to '@' or is NULL */
1685 if ((localeID
=locale_getKeywordsStart(localeID
))!=NULL
) {
1686 keywordAssign
= uprv_strchr(localeID
, '=');
1687 separatorIndicator
= uprv_strchr(localeID
, ';');
1690 /* Copy POSIX-style variant, if any [mr@FOO] */
1691 if (!OPTION_SET(options
, _ULOC_CANONICALIZE
) &&
1692 localeID
!= NULL
&& keywordAssign
== NULL
) {
1698 if (len
<nameCapacity
) {
1706 if (OPTION_SET(options
, _ULOC_CANONICALIZE
)) {
1707 /* Handle @FOO variant if @ is present and not followed by = */
1708 if (localeID
!=NULL
&& keywordAssign
==NULL
) {
1709 int32_t posixVariantSize
;
1710 /* Add missing '_' if needed */
1711 if (fieldCount
< 2 || (fieldCount
< 3 && scriptSize
> 0)) {
1713 if(len
<nameCapacity
) {
1718 } while(fieldCount
<2);
1720 posixVariantSize
= _getVariantEx(localeID
+1, '@', name
+len
, nameCapacity
-len
,
1721 (UBool
)(variantSize
> 0));
1722 if (posixVariantSize
> 0) {
1723 if (variant
== NULL
) {
1726 len
+= posixVariantSize
;
1727 variantSize
+= posixVariantSize
;
1731 /* Handle generic variants first */
1733 for (j
=0; j
<(int32_t)(sizeof(VARIANT_MAP
)/sizeof(VARIANT_MAP
[0])); j
++) {
1734 const char* variantToCompare
= VARIANT_MAP
[j
].variant
;
1735 int32_t n
= (int32_t)uprv_strlen(variantToCompare
);
1736 int32_t variantLen
= _deleteVariant(variant
, uprv_min(variantSize
, (nameCapacity
-len
)), variantToCompare
, n
);
1738 if (variantLen
> 0) {
1739 if (name
[len
-1] == '_') { /* delete trailing '_' */
1742 addKeyword
= VARIANT_MAP
[j
].keyword
;
1743 addValue
= VARIANT_MAP
[j
].value
;
1747 if (name
[len
-1] == '_') { /* delete trailing '_' */
1752 /* Look up the ID in the canonicalization map */
1753 for (j
=0; j
<(int32_t)(sizeof(CANONICALIZE_MAP
)/sizeof(CANONICALIZE_MAP
[0])); j
++) {
1754 const char* id
= CANONICALIZE_MAP
[j
].id
;
1755 int32_t n
= (int32_t)uprv_strlen(id
);
1756 if (len
== n
&& uprv_strncmp(name
, id
, n
) == 0) {
1757 if (n
== 0 && localeID
!= NULL
) {
1758 break; /* Don't remap "" if keywords present */
1760 len
= _copyCount(name
, nameCapacity
, CANONICALIZE_MAP
[j
].canonicalID
);
1761 if (CANONICALIZE_MAP
[j
].keyword
) {
1762 addKeyword
= CANONICALIZE_MAP
[j
].keyword
;
1763 addValue
= CANONICALIZE_MAP
[j
].value
;
1770 if (!OPTION_SET(options
, _ULOC_STRIP_KEYWORDS
)) {
1771 if (localeID
!=NULL
&& keywordAssign
!=NULL
&&
1772 (!separatorIndicator
|| separatorIndicator
> keywordAssign
)) {
1773 if(len
<nameCapacity
) {
1778 len
+= _getKeywords(localeID
+1, '@', name
+len
, nameCapacity
-len
, NULL
, 0, NULL
, TRUE
,
1779 addKeyword
, addValue
, err
);
1780 } else if (addKeyword
!= NULL
) {
1781 U_ASSERT(addValue
!= NULL
);
1782 /* inelegant but works -- later make _getKeywords do this? */
1783 len
+= _copyCount(name
+len
, nameCapacity
-len
, "@");
1784 len
+= _copyCount(name
+len
, nameCapacity
-len
, addKeyword
);
1785 len
+= _copyCount(name
+len
, nameCapacity
-len
, "=");
1786 len
+= _copyCount(name
+len
, nameCapacity
-len
, addValue
);
1790 if (U_SUCCESS(*err
) && result
!= NULL
&& name
== localeBuffer
) {
1791 uprv_strncpy(result
, localeBuffer
, (len
> resultCapacity
) ? resultCapacity
: len
);
1794 return u_terminateChars(result
, resultCapacity
, len
, err
);
1797 /* ### ID parsing API **************************************************/
1799 U_CAPI
int32_t U_EXPORT2
1800 uloc_getParent(const char* localeID
,
1802 int32_t parentCapacity
,
1805 const char *lastUnderscore
;
1808 if (U_FAILURE(*err
))
1811 if (localeID
== NULL
)
1812 localeID
= uloc_getDefault();
1814 lastUnderscore
=uprv_strrchr(localeID
, '_');
1815 if(lastUnderscore
!=NULL
) {
1816 i
=(int32_t)(lastUnderscore
-localeID
);
1821 if(i
>0 && parent
!= localeID
) {
1822 uprv_memcpy(parent
, localeID
, uprv_min(i
, parentCapacity
));
1824 return u_terminateChars(parent
, parentCapacity
, i
, err
);
1827 U_CAPI
int32_t U_EXPORT2
1828 uloc_getLanguage(const char* localeID
,
1830 int32_t languageCapacity
,
1833 /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
1836 if (err
==NULL
|| U_FAILURE(*err
)) {
1840 if(localeID
==NULL
) {
1841 localeID
=uloc_getDefault();
1844 i
=_getLanguage(localeID
, language
, languageCapacity
, NULL
);
1845 return u_terminateChars(language
, languageCapacity
, i
, err
);
1848 U_CAPI
int32_t U_EXPORT2
1849 uloc_getScript(const char* localeID
,
1851 int32_t scriptCapacity
,
1856 if(err
==NULL
|| U_FAILURE(*err
)) {
1860 if(localeID
==NULL
) {
1861 localeID
=uloc_getDefault();
1864 /* skip the language */
1865 _getLanguage(localeID
, NULL
, 0, &localeID
);
1866 if(_isIDSeparator(*localeID
)) {
1867 i
=_getScript(localeID
+1, script
, scriptCapacity
, NULL
);
1869 return u_terminateChars(script
, scriptCapacity
, i
, err
);
1872 U_CAPI
int32_t U_EXPORT2
1873 uloc_getCountry(const char* localeID
,
1875 int32_t countryCapacity
,
1880 if(err
==NULL
|| U_FAILURE(*err
)) {
1884 if(localeID
==NULL
) {
1885 localeID
=uloc_getDefault();
1888 /* Skip the language */
1889 _getLanguage(localeID
, NULL
, 0, &localeID
);
1890 if(_isIDSeparator(*localeID
)) {
1891 const char *scriptID
;
1892 /* Skip the script if available */
1893 _getScript(localeID
+1, NULL
, 0, &scriptID
);
1894 if(scriptID
!= localeID
+1) {
1895 /* Found optional script */
1896 localeID
= scriptID
;
1898 if(_isIDSeparator(*localeID
)) {
1899 i
=_getCountry(localeID
+1, country
, countryCapacity
, NULL
);
1902 return u_terminateChars(country
, countryCapacity
, i
, err
);
1905 U_CAPI
int32_t U_EXPORT2
1906 uloc_getVariant(const char* localeID
,
1908 int32_t variantCapacity
,
1913 if(err
==NULL
|| U_FAILURE(*err
)) {
1917 if(localeID
==NULL
) {
1918 localeID
=uloc_getDefault();
1921 /* Skip the language */
1922 _getLanguage(localeID
, NULL
, 0, &localeID
);
1923 if(_isIDSeparator(*localeID
)) {
1924 const char *scriptID
;
1925 /* Skip the script if available */
1926 _getScript(localeID
+1, NULL
, 0, &scriptID
);
1927 if(scriptID
!= localeID
+1) {
1928 /* Found optional script */
1929 localeID
= scriptID
;
1931 /* Skip the Country */
1932 if (_isIDSeparator(*localeID
)) {
1933 _getCountry(localeID
+1, NULL
, 0, &localeID
);
1934 if(_isIDSeparator(*localeID
)) {
1935 i
=_getVariant(localeID
+1, *localeID
, variant
, variantCapacity
);
1940 /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
1941 /* if we do not have a variant tag yet then try a POSIX variant after '@' */
1943 if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
1944 i=_getVariant(localeID+1, '@', variant, variantCapacity);
1947 return u_terminateChars(variant
, variantCapacity
, i
, err
);
1950 U_CAPI
int32_t U_EXPORT2
1951 uloc_getName(const char* localeID
,
1953 int32_t nameCapacity
,
1956 return _canonicalize(localeID
, name
, nameCapacity
, 0, err
);
1959 U_CAPI
int32_t U_EXPORT2
1960 uloc_getBaseName(const char* localeID
,
1962 int32_t nameCapacity
,
1965 return _canonicalize(localeID
, name
, nameCapacity
, _ULOC_STRIP_KEYWORDS
, err
);
1968 U_CAPI
int32_t U_EXPORT2
1969 uloc_canonicalize(const char* localeID
,
1971 int32_t nameCapacity
,
1974 return _canonicalize(localeID
, name
, nameCapacity
, _ULOC_CANONICALIZE
, err
);
1977 U_CAPI
const char* U_EXPORT2
1978 uloc_getISO3Language(const char* localeID
)
1981 char lang
[ULOC_LANG_CAPACITY
];
1982 UErrorCode err
= U_ZERO_ERROR
;
1984 if (localeID
== NULL
)
1986 localeID
= uloc_getDefault();
1988 uloc_getLanguage(localeID
, lang
, ULOC_LANG_CAPACITY
, &err
);
1991 offset
= _findIndex(LANGUAGES
, lang
);
1994 return LANGUAGES_3
[offset
];
1997 U_CAPI
const char* U_EXPORT2
1998 uloc_getISO3Country(const char* localeID
)
2001 char cntry
[ULOC_LANG_CAPACITY
];
2002 UErrorCode err
= U_ZERO_ERROR
;
2004 if (localeID
== NULL
)
2006 localeID
= uloc_getDefault();
2008 uloc_getCountry(localeID
, cntry
, ULOC_LANG_CAPACITY
, &err
);
2011 offset
= _findIndex(COUNTRIES
, cntry
);
2015 return COUNTRIES_3
[offset
];
2018 U_CAPI
uint32_t U_EXPORT2
2019 uloc_getLCID(const char* localeID
)
2021 UErrorCode status
= U_ZERO_ERROR
;
2022 char langID
[ULOC_FULLNAME_CAPACITY
];
2024 uloc_getLanguage(localeID
, langID
, sizeof(langID
), &status
);
2025 if (U_FAILURE(status
)) {
2029 return uprv_convertToLCID(langID
, localeID
, &status
);
2032 U_CAPI
int32_t U_EXPORT2
2033 uloc_getLocaleForLCID(uint32_t hostid
, char *locale
, int32_t localeCapacity
,
2037 const char *posix
= uprv_convertToPosix(hostid
, status
);
2038 if (U_FAILURE(*status
) || posix
== NULL
) {
2041 length
= (int32_t)uprv_strlen(posix
);
2042 if (length
+1 > localeCapacity
) {
2043 *status
= U_BUFFER_OVERFLOW_ERROR
;
2046 uprv_strcpy(locale
, posix
);
2051 /* ### Default locale **************************************************/
2053 U_CAPI
const char* U_EXPORT2
2056 return locale_get_default();
2059 U_CAPI
void U_EXPORT2
2060 uloc_setDefault(const char* newDefaultLocale
,
2063 if (U_FAILURE(*err
))
2065 /* the error code isn't currently used for anything by this function*/
2067 /* propagate change to C++ */
2068 locale_set_default(newDefaultLocale
);
2071 /* ### Display name **************************************************/
2074 * Lookup a resource bundle table item with fallback on the table level.
2075 * Regular resource bundle lookups perform fallback to parent locale bundles
2076 * and eventually the root bundle, but only for top-level items.
2077 * This function takes the name of a top-level table and of an item in that table
2078 * and performs a lookup of both, falling back until a bundle contains a table
2081 * Note: Only the opening of entire bundles falls back through the default locale
2082 * before root. Once a bundle is open, item lookups do not go through the
2083 * default locale because that would result in a mix of languages that is
2084 * unpredictable to the programmer and most likely useless.
2086 static const UChar
*
2087 _res_getTableStringWithFallback(const char *path
, const char *locale
,
2088 const char *tableKey
, const char *subTableKey
,
2089 const char *itemKey
,
2091 UErrorCode
*pErrorCode
)
2093 /* char localeBuffer[ULOC_FULLNAME_CAPACITY*4];*/
2094 UResourceBundle
*rb
=NULL
, table
, subTable
;
2095 const UChar
*item
=NULL
;
2096 UErrorCode errorCode
;
2097 char explicitFallbackName
[ULOC_FULLNAME_CAPACITY
] = {0};
2100 * open the bundle for the current locale
2101 * this falls back through the locale's chain to root
2103 errorCode
=U_ZERO_ERROR
;
2104 rb
=ures_open(path
, locale
, &errorCode
);
2105 if(U_FAILURE(errorCode
)) {
2106 /* total failure, not even root could be opened */
2107 *pErrorCode
=errorCode
;
2109 } else if(errorCode
==U_USING_DEFAULT_WARNING
||
2110 (errorCode
==U_USING_FALLBACK_WARNING
&& *pErrorCode
!=U_USING_DEFAULT_WARNING
)
2112 /* set the "strongest" error code (success->fallback->default->failure) */
2113 *pErrorCode
=errorCode
;
2117 ures_initStackObject(&table
);
2118 ures_initStackObject(&subTable
);
2119 ures_getByKeyWithFallback(rb
, tableKey
, &table
, &errorCode
);
2120 if (subTableKey
!= NULL
) {
2122 ures_getByKeyWithFallback(&table,subTableKey, &subTable, &errorCode);
2123 item = ures_getStringByKeyWithFallback(&subTable, itemKey, pLength, &errorCode);
2124 if(U_FAILURE(errorCode)){
2125 *pErrorCode = errorCode;
2130 ures_getByKeyWithFallback(&table
,subTableKey
, &table
, &errorCode
);
2132 if(U_SUCCESS(errorCode
)){
2133 item
= ures_getStringByKeyWithFallback(&table
, itemKey
, pLength
, &errorCode
);
2134 if(U_FAILURE(errorCode
)){
2135 const char* replacement
= NULL
;
2136 *pErrorCode
= errorCode
; /*save the errorCode*/
2137 errorCode
= U_ZERO_ERROR
;
2138 /* may be a deprecated code */
2139 if(uprv_strcmp(tableKey
, "Countries")==0){
2140 replacement
= uloc_getCurrentCountryID(itemKey
);
2141 }else if(uprv_strcmp(tableKey
, "Languages")==0){
2142 replacement
= uloc_getCurrentLanguageID(itemKey
);
2144 /*pointer comparison is ok since uloc_getCurrentCountryID & uloc_getCurrentLanguageID return the key itself is replacement is not found*/
2145 if(replacement
!=NULL
&& itemKey
!= replacement
){
2146 item
= ures_getStringByKeyWithFallback(&table
, replacement
, pLength
, &errorCode
);
2147 if(U_SUCCESS(errorCode
)){
2148 *pErrorCode
= errorCode
;
2157 if(U_FAILURE(errorCode
)){
2159 /* still can't figure out ?.. try the fallback mechanism */
2161 const UChar
* fallbackLocale
= NULL
;
2162 *pErrorCode
= errorCode
;
2163 errorCode
= U_ZERO_ERROR
;
2165 fallbackLocale
= ures_getStringByKeyWithFallback(&table
, "Fallback", &len
, &errorCode
);
2166 if(U_FAILURE(errorCode
)){
2167 *pErrorCode
= errorCode
;
2171 u_UCharsToChars(fallbackLocale
, explicitFallbackName
, len
);
2173 /* guard against recursive fallback */
2174 if(uprv_strcmp(explicitFallbackName
, locale
)==0){
2175 *pErrorCode
= U_INTERNAL_PROGRAM_ERROR
;
2179 rb
= ures_open(NULL
, explicitFallbackName
, &errorCode
);
2180 if(U_FAILURE(errorCode
)){
2181 *pErrorCode
= errorCode
;
2184 /* succeeded in opening the fallback bundle .. continue and try to fetch the item */
2189 /* done with the locale string - ready to close table and rb */
2190 ures_close(&subTable
);
2197 _getStringOrCopyKey(const char *path
, const char *locale
,
2198 const char *tableKey
,
2199 const char* subTableKey
,
2200 const char *itemKey
,
2201 const char *substitute
,
2202 UChar
*dest
, int32_t destCapacity
,
2203 UErrorCode
*pErrorCode
) {
2204 const UChar
*s
= NULL
;
2208 /* top-level item: normal resource bundle access */
2209 UResourceBundle
*rb
;
2211 rb
=ures_open(path
, locale
, pErrorCode
);
2212 if(U_SUCCESS(*pErrorCode
)) {
2213 s
=ures_getStringByKey(rb
, tableKey
, &length
, pErrorCode
);
2214 /* see comment about closing rb near "return item;" in _res_getTableStringWithFallback() */
2218 /* Language code should not be a number. If it is, set the error code. */
2219 if (!uprv_strncmp(tableKey
, "Languages", 9) && uprv_strtol(itemKey
, NULL
, 10)) {
2220 *pErrorCode
= U_MISSING_RESOURCE_ERROR
;
2222 /* second-level item, use special fallback */
2223 s
=_res_getTableStringWithFallback(path
, locale
,
2231 if(U_SUCCESS(*pErrorCode
)) {
2232 int32_t copyLength
=uprv_min(length
, destCapacity
);
2233 if(copyLength
>0 && s
!= NULL
) {
2234 u_memcpy(dest
, s
, copyLength
);
2237 /* no string from a resource bundle: convert the substitute */
2238 length
=(int32_t)uprv_strlen(substitute
);
2239 u_charsToUChars(substitute
, dest
, uprv_min(length
, destCapacity
));
2240 *pErrorCode
=U_USING_DEFAULT_WARNING
;
2243 return u_terminateUChars(dest
, destCapacity
, length
, pErrorCode
);
2247 _getDisplayNameForComponent(const char *locale
,
2248 const char *displayLocale
,
2249 UChar
*dest
, int32_t destCapacity
,
2250 int32_t (*getter
)(const char *, char *, int32_t, UErrorCode
*),
2252 UErrorCode
*pErrorCode
) {
2253 char localeBuffer
[ULOC_FULLNAME_CAPACITY
*4];
2255 UErrorCode localStatus
;
2257 /* argument checking */
2258 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
2262 if(destCapacity
<0 || (destCapacity
>0 && dest
==NULL
)) {
2263 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
2267 localStatus
= U_ZERO_ERROR
;
2268 length
=(*getter
)(locale
, localeBuffer
, sizeof(localeBuffer
), &localStatus
);
2269 if(U_FAILURE(localStatus
) || localStatus
==U_STRING_NOT_TERMINATED_WARNING
) {
2270 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
2274 return u_terminateUChars(dest
, destCapacity
, 0, pErrorCode
);
2277 return _getStringOrCopyKey(NULL
, displayLocale
,
2278 tag
, NULL
, localeBuffer
,
2284 U_CAPI
int32_t U_EXPORT2
2285 uloc_getDisplayLanguage(const char *locale
,
2286 const char *displayLocale
,
2287 UChar
*dest
, int32_t destCapacity
,
2288 UErrorCode
*pErrorCode
) {
2289 return _getDisplayNameForComponent(locale
, displayLocale
, dest
, destCapacity
,
2290 uloc_getLanguage
, _kLanguages
, pErrorCode
);
2293 U_CAPI
int32_t U_EXPORT2
2294 uloc_getDisplayScript(const char* locale
,
2295 const char* displayLocale
,
2296 UChar
*dest
, int32_t destCapacity
,
2297 UErrorCode
*pErrorCode
)
2299 return _getDisplayNameForComponent(locale
, displayLocale
, dest
, destCapacity
,
2300 uloc_getScript
, _kScripts
, pErrorCode
);
2303 U_CAPI
int32_t U_EXPORT2
2304 uloc_getDisplayCountry(const char *locale
,
2305 const char *displayLocale
,
2306 UChar
*dest
, int32_t destCapacity
,
2307 UErrorCode
*pErrorCode
) {
2308 return _getDisplayNameForComponent(locale
, displayLocale
, dest
, destCapacity
,
2309 uloc_getCountry
, _kCountries
, pErrorCode
);
2313 * TODO separate variant1_variant2_variant3...
2314 * by getting each tag's display string and concatenating them with ", "
2315 * in between - similar to uloc_getDisplayName()
2317 U_CAPI
int32_t U_EXPORT2
2318 uloc_getDisplayVariant(const char *locale
,
2319 const char *displayLocale
,
2320 UChar
*dest
, int32_t destCapacity
,
2321 UErrorCode
*pErrorCode
) {
2322 return _getDisplayNameForComponent(locale
, displayLocale
, dest
, destCapacity
,
2323 uloc_getVariant
, _kVariants
, pErrorCode
);
2326 U_CAPI
int32_t U_EXPORT2
2327 uloc_getDisplayName(const char *locale
,
2328 const char *displayLocale
,
2329 UChar
*dest
, int32_t destCapacity
,
2330 UErrorCode
*pErrorCode
)
2332 int32_t length
, length2
, length3
= 0;
2333 UBool hasLanguage
, hasScript
, hasCountry
, hasVariant
, hasKeywords
;
2334 UEnumeration
* keywordEnum
= NULL
;
2335 int32_t keywordCount
= 0;
2336 const char *keyword
= NULL
;
2337 int32_t keywordLen
= 0;
2338 char keywordValue
[256];
2339 int32_t keywordValueLen
= 0;
2341 /* argument checking */
2342 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
2346 if(destCapacity
<0 || (destCapacity
>0 && dest
==NULL
)) {
2347 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
2352 * if there is a language, then write "language (country, variant)"
2353 * otherwise write "country, variant"
2356 /* write the language */
2357 length
=uloc_getDisplayLanguage(locale
, displayLocale
,
2360 hasLanguage
= length
>0;
2364 if(length
<destCapacity
) {
2368 if(length
<destCapacity
) {
2374 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) {
2375 /* keep preflighting */
2376 *pErrorCode
=U_ZERO_ERROR
;
2379 /* append the script */
2380 if(length
<destCapacity
) {
2381 length2
=uloc_getDisplayScript(locale
, displayLocale
,
2382 dest
+length
, destCapacity
-length
,
2385 length2
=uloc_getDisplayScript(locale
, displayLocale
,
2389 hasScript
= length2
>0;
2394 if(length
<destCapacity
) {
2398 if(length
<destCapacity
) {
2404 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) {
2405 /* keep preflighting */
2406 *pErrorCode
=U_ZERO_ERROR
;
2409 /* append the country */
2410 if(length
<destCapacity
) {
2411 length2
=uloc_getDisplayCountry(locale
, displayLocale
,
2412 dest
+length
, destCapacity
-length
,
2415 length2
=uloc_getDisplayCountry(locale
, displayLocale
,
2419 hasCountry
= length2
>0;
2424 if(length
<destCapacity
) {
2428 if(length
<destCapacity
) {
2434 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) {
2435 /* keep preflighting */
2436 *pErrorCode
=U_ZERO_ERROR
;
2439 /* append the variant */
2440 if(length
<destCapacity
) {
2441 length2
=uloc_getDisplayVariant(locale
, displayLocale
,
2442 dest
+length
, destCapacity
-length
,
2445 length2
=uloc_getDisplayVariant(locale
, displayLocale
,
2449 hasVariant
= length2
>0;
2454 if(length
<destCapacity
) {
2458 if(length
<destCapacity
) {
2464 keywordEnum
= uloc_openKeywords(locale
, pErrorCode
);
2466 for(keywordCount
= uenum_count(keywordEnum
, pErrorCode
); keywordCount
> 0 ; keywordCount
--){
2467 if(U_FAILURE(*pErrorCode
)){
2470 /* the uenum_next returns NUL terminated string */
2471 keyword
= uenum_next(keywordEnum
, &keywordLen
, pErrorCode
);
2472 if(length
+ length3
< destCapacity
) {
2473 length3
+= uloc_getDisplayKeyword(keyword
, displayLocale
, dest
+length
+length3
, destCapacity
-length
-length3
, pErrorCode
);
2475 length3
+= uloc_getDisplayKeyword(keyword
, displayLocale
, NULL
, 0, pErrorCode
);
2477 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) {
2478 /* keep preflighting */
2479 *pErrorCode
=U_ZERO_ERROR
;
2481 keywordValueLen
= uloc_getKeywordValue(locale
, keyword
, keywordValue
, 256, pErrorCode
);
2482 if(keywordValueLen
) {
2483 if(length
+ length3
< destCapacity
) {
2484 dest
[length
+ length3
] = 0x3D;
2487 if(length
+ length3
< destCapacity
) {
2488 length3
+= uloc_getDisplayKeywordValue(locale
, keyword
, displayLocale
, dest
+length
+length3
, destCapacity
-length
-length3
, pErrorCode
);
2490 length3
+= uloc_getDisplayKeywordValue(locale
, keyword
, displayLocale
, NULL
, 0, pErrorCode
);
2492 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) {
2493 /* keep preflighting */
2494 *pErrorCode
=U_ZERO_ERROR
;
2497 if(keywordCount
> 1) {
2498 if(length
+ length3
+ 1 < destCapacity
&& keywordCount
) {
2499 dest
[length
+ length3
]=0x2c;
2500 dest
[length
+ length3
+1]=0x20;
2502 length3
++; /* ',' */
2503 length3
++; /* ' ' */
2506 uenum_close(keywordEnum
);
2508 hasKeywords
= length3
> 0;
2513 if ((hasScript
&& !hasCountry
)
2514 || ((hasScript
|| hasCountry
) && !hasVariant
&& !hasKeywords
)
2515 || ((hasScript
|| hasCountry
|| hasVariant
) && !hasKeywords
)
2516 || (hasLanguage
&& !hasScript
&& !hasCountry
&& !hasVariant
&& !hasKeywords
))
2518 /* remove ", " or " (" */
2522 if (hasLanguage
&& (hasScript
|| hasCountry
|| hasVariant
|| hasKeywords
)) {
2524 if(length
<destCapacity
) {
2530 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) {
2531 /* keep preflighting */
2532 *pErrorCode
=U_ZERO_ERROR
;
2535 return u_terminateUChars(dest
, destCapacity
, length
, pErrorCode
);
2538 U_CAPI
int32_t U_EXPORT2
2539 uloc_getDisplayKeyword(const char* keyword
,
2540 const char* displayLocale
,
2542 int32_t destCapacity
,
2543 UErrorCode
* status
){
2545 /* argument checking */
2546 if(status
==NULL
|| U_FAILURE(*status
)) {
2550 if(destCapacity
<0 || (destCapacity
>0 && dest
==NULL
)) {
2551 *status
=U_ILLEGAL_ARGUMENT_ERROR
;
2556 /* pass itemKey=NULL to look for a top-level item */
2557 return _getStringOrCopyKey(NULL
, displayLocale
,
2567 #define UCURRENCY_DISPLAY_NAME_INDEX 1
2569 U_CAPI
int32_t U_EXPORT2
2570 uloc_getDisplayKeywordValue( const char* locale
,
2571 const char* keyword
,
2572 const char* displayLocale
,
2574 int32_t destCapacity
,
2575 UErrorCode
* status
){
2578 char keywordValue
[ULOC_FULLNAME_CAPACITY
*4];
2579 int32_t capacity
= ULOC_FULLNAME_CAPACITY
*4;
2580 int32_t keywordValueLen
=0;
2582 /* argument checking */
2583 if(status
==NULL
|| U_FAILURE(*status
)) {
2587 if(destCapacity
<0 || (destCapacity
>0 && dest
==NULL
)) {
2588 *status
=U_ILLEGAL_ARGUMENT_ERROR
;
2592 /* get the keyword value */
2594 keywordValueLen
= uloc_getKeywordValue(locale
, keyword
, keywordValue
, capacity
, status
);
2597 * if the keyword is equal to currency .. then to get the display name
2598 * we need to do the fallback ourselves
2600 if(uprv_stricmp(keyword
, _kCurrency
)==0){
2602 int32_t dispNameLen
= 0;
2603 const UChar
*dispName
= NULL
;
2605 UResourceBundle
*bundle
= ures_open(NULL
, displayLocale
, status
);
2606 UResourceBundle
*currencies
= ures_getByKey(bundle
, _kCurrencies
, NULL
, status
);
2607 UResourceBundle
*currency
= ures_getByKeyWithFallback(currencies
, keywordValue
, NULL
, status
);
2609 dispName
= ures_getStringByIndex(currency
, UCURRENCY_DISPLAY_NAME_INDEX
, &dispNameLen
, status
);
2611 /*close the bundles */
2612 ures_close(currency
);
2613 ures_close(currencies
);
2616 if(U_FAILURE(*status
)){
2617 if(*status
== U_MISSING_RESOURCE_ERROR
){
2618 /* we just want to write the value over if nothing is available */
2619 *status
= U_USING_DEFAULT_WARNING
;
2625 /* now copy the dispName over if not NULL */
2626 if(dispName
!= NULL
){
2627 if(dispNameLen
<= destCapacity
){
2628 uprv_memcpy(dest
, dispName
, dispNameLen
* U_SIZEOF_UCHAR
);
2629 return u_terminateUChars(dest
, destCapacity
, dispNameLen
, status
);
2631 *status
= U_BUFFER_OVERFLOW_ERROR
;
2635 /* we have not found the display name for the value .. just copy over */
2636 if(keywordValueLen
<= destCapacity
){
2637 u_charsToUChars(keywordValue
, dest
, keywordValueLen
);
2638 return u_terminateUChars(dest
, destCapacity
, keywordValueLen
, status
);
2640 *status
= U_BUFFER_OVERFLOW_ERROR
;
2641 return keywordValueLen
;
2648 return _getStringOrCopyKey(NULL
, displayLocale
,
2657 /* ### Get available **************************************************/
2659 static UBool U_CALLCONV
uloc_cleanup(void) {
2662 if (_installedLocales
) {
2663 temp
= _installedLocales
;
2664 _installedLocales
= NULL
;
2666 _installedLocalesCount
= 0;
2673 static void _load_installedLocales()
2675 UBool localesLoaded
;
2677 UMTX_CHECK(NULL
, _installedLocales
!= NULL
, localesLoaded
);
2679 if (localesLoaded
== FALSE
) {
2680 UResourceBundle
*index
= NULL
;
2681 UResourceBundle installed
;
2682 UErrorCode status
= U_ZERO_ERROR
;
2685 int32_t localeCount
;
2687 ures_initStackObject(&installed
);
2688 index
= ures_openDirect(NULL
, _kIndexLocaleName
, &status
);
2689 ures_getByKey(index
, _kIndexTag
, &installed
, &status
);
2691 if(U_SUCCESS(status
)) {
2692 localeCount
= ures_getSize(&installed
);
2693 temp
= (char **) uprv_malloc(sizeof(char*) * (localeCount
+1));
2694 /* Check for null pointer */
2696 ures_resetIterator(&installed
);
2697 while(ures_hasNext(&installed
)) {
2698 ures_getNextString(&installed
, NULL
, (const char **)&temp
[i
++], &status
);
2703 if (_installedLocales
== NULL
)
2705 _installedLocalesCount
= localeCount
;
2706 _installedLocales
= temp
;
2708 ucln_common_registerCleanup(UCLN_COMMON_ULOC
, uloc_cleanup
);
2715 ures_close(&installed
);
2720 U_CAPI
const char* U_EXPORT2
2721 uloc_getAvailable(int32_t offset
)
2724 _load_installedLocales();
2726 if (offset
> _installedLocalesCount
)
2728 return _installedLocales
[offset
];
2731 U_CAPI
int32_t U_EXPORT2
2732 uloc_countAvailable()
2734 _load_installedLocales();
2735 return _installedLocalesCount
;
2739 * Returns a list of all language codes defined in ISO 639. This is a pointer
2740 * to an array of pointers to arrays of char. All of these pointers are owned
2741 * by ICU-- do not delete them, and do not write through them. The array is
2742 * terminated with a null pointer.
2744 U_CAPI
const char* const* U_EXPORT2
2745 uloc_getISOLanguages()
2751 * Returns a list of all 2-letter country codes defined in ISO 639. This is a
2752 * pointer to an array of pointers to arrays of char. All of these pointers are
2753 * owned by ICU-- do not delete them, and do not write through them. The array is
2754 * terminated with a null pointer.
2756 U_CAPI
const char* const* U_EXPORT2
2757 uloc_getISOCountries()
2763 /* this function to be moved into cstring.c later */
2764 static char gDecimal
= 0;
2769 _uloc_strtod(const char *start
, char **end
) {
2776 /* For machines that decide to change the decimal on you,
2777 and try to be too smart with localization.
2778 This normally should be just a '.'. */
2779 sprintf(rep
, "%+1.1f", 1.0);
2783 if(gDecimal
== '.') {
2784 return uprv_strtod(start
, end
); /* fall through to OS */
2786 uprv_strncpy(buf
, start
, 29);
2788 decimal
= uprv_strchr(buf
, '.');
2790 *decimal
= gDecimal
;
2792 return uprv_strtod(start
, end
); /* no decimal point */
2794 rv
= uprv_strtod(buf
, &myEnd
);
2796 *end
= (char*)(start
+(myEnd
-buf
)); /* cast away const (to follow uprv_strtod API.) */
2804 int32_t dummy
; /* to avoid uninitialized memory copy from qsort */
2808 static int32_t U_CALLCONV
2809 uloc_acceptLanguageCompare(const void *context
, const void *a
, const void *b
)
2811 const _acceptLangItem
*aa
= (const _acceptLangItem
*)a
;
2812 const _acceptLangItem
*bb
= (const _acceptLangItem
*)b
;
2816 rc
= -1; /* A > B */
2817 } else if(bb
->q
> aa
->q
) {
2824 rc
= uprv_stricmp(aa
->locale
, bb
->locale
);
2827 #if defined(ULOC_DEBUG)
2828 /* fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
2838 _uloc_getOrientationHelper(const char* localeId
,
2842 ULayoutType result
= ULOC_LAYOUT_UNKNOWN
;
2844 if (!U_FAILURE(*status
)) {
2846 char localeBuffer
[ULOC_FULLNAME_CAPACITY
];
2848 uloc_canonicalize(localeId
, localeBuffer
, sizeof(localeBuffer
), status
);
2850 if (!U_FAILURE(*status
)) {
2851 const UChar
* const value
=
2852 _res_getTableStringWithFallback(
2861 if (!U_FAILURE(*status
) && length
!= 0) {
2864 case 0x0062: /* 'b' */
2865 result
= ULOC_LAYOUT_BTT
;
2867 case 0x006C: /* 'l' */
2868 result
= ULOC_LAYOUT_LTR
;
2870 case 0x0072: /* 'r' */
2871 result
= ULOC_LAYOUT_RTL
;
2873 case 0x0074: /* 't' */
2874 result
= ULOC_LAYOUT_TTB
;
2877 *status
= U_INTERNAL_PROGRAM_ERROR
;
2887 U_DRAFT ULayoutType U_EXPORT2
2888 uloc_getCharacterOrientation(const char* localeId
,
2891 return _uloc_getOrientationHelper(localeId
, "characters", status
);
2895 * Get the layout line orientation for the specified locale.
2897 * @param localeID locale name
2898 * @param status Error status
2899 * @return an enum indicating the layout orientation for lines.
2902 U_DRAFT ULayoutType U_EXPORT2
2903 uloc_getLineOrientation(const char* localeId
,
2906 return _uloc_getOrientationHelper(localeId
, "lines", status
);
2910 mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
2913 U_CAPI
int32_t U_EXPORT2
2914 uloc_acceptLanguageFromHTTP(char *result
, int32_t resultAvailable
, UAcceptResult
*outResult
,
2915 const char *httpAcceptLanguage
,
2916 UEnumeration
* availableLocales
,
2920 _acceptLangItem smallBuffer
[30];
2922 char tmp
[ULOC_FULLNAME_CAPACITY
+1];
2924 const char *itemEnd
;
2925 const char *paramEnd
;
2930 int32_t l
= (int32_t)uprv_strlen(httpAcceptLanguage
);
2932 char *tempstr
; /* Use for null pointer check */
2935 jSize
= sizeof(smallBuffer
)/sizeof(smallBuffer
[0]);
2936 if(U_FAILURE(*status
)) {
2940 for(s
=httpAcceptLanguage
;s
&&*s
;) {
2941 while(isspace(*s
)) /* eat space at the beginning */
2943 itemEnd
=uprv_strchr(s
,',');
2944 paramEnd
=uprv_strchr(s
,';');
2946 itemEnd
= httpAcceptLanguage
+l
; /* end of string */
2948 if(paramEnd
&& paramEnd
<itemEnd
) {
2949 /* semicolon (;) is closer than end (,) */
2954 while(isspace(*t
)) {
2960 while(isspace(*t
)) {
2963 j
[n
].q
= (float)_uloc_strtod(t
,NULL
);
2965 /* no semicolon - it's 1.0 */
2970 /* eat spaces prior to semi */
2971 for(t
=(paramEnd
-1);(paramEnd
>s
)&&isspace(*t
);t
--)
2973 /* Check for null pointer from uprv_strndup */
2974 tempstr
= uprv_strndup(s
,(int32_t)((t
+1)-s
));
2975 if (tempstr
== NULL
) {
2976 *status
= U_MEMORY_ALLOCATION_ERROR
;
2979 j
[n
].locale
= tempstr
;
2980 uloc_canonicalize(j
[n
].locale
,tmp
,sizeof(tmp
)/sizeof(tmp
[0]),status
);
2981 if(strcmp(j
[n
].locale
,tmp
)) {
2982 uprv_free(j
[n
].locale
);
2983 j
[n
].locale
=uprv_strdup(tmp
);
2985 #if defined(ULOC_DEBUG)
2986 /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
2990 while(*s
==',') { /* eat duplicate commas */
2994 if(j
==smallBuffer
) { /* overflowed the small buffer. */
2995 j
= uprv_malloc(sizeof(j
[0])*(jSize
*2));
2997 uprv_memcpy(j
,smallBuffer
,sizeof(j
[0])*jSize
);
2999 #if defined(ULOC_DEBUG)
3000 fprintf(stderr
,"malloced at size %d\n", jSize
);
3003 j
= uprv_realloc(j
, sizeof(j
[0])*jSize
*2);
3004 #if defined(ULOC_DEBUG)
3005 fprintf(stderr
,"re-alloced at size %d\n", jSize
);
3010 *status
= U_MEMORY_ALLOCATION_ERROR
;
3015 uprv_sortArray(j
, n
, sizeof(j
[0]), uloc_acceptLanguageCompare
, NULL
, TRUE
, status
);
3016 if(U_FAILURE(*status
)) {
3017 if(j
!= smallBuffer
) {
3018 #if defined(ULOC_DEBUG)
3019 fprintf(stderr
,"freeing j %p\n", j
);
3025 strs
= uprv_malloc((size_t)(sizeof(strs
[0])*n
));
3026 /* Check for null pointer */
3028 uprv_free(j
); /* Free to avoid memory leak */
3029 *status
= U_MEMORY_ALLOCATION_ERROR
;
3033 #if defined(ULOC_DEBUG)
3034 /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
3036 strs
[i
]=j
[i
].locale
;
3038 res
= uloc_acceptLanguage(result
, resultAvailable
, outResult
,
3039 (const char**)strs
, n
, availableLocales
, status
);
3044 if(j
!= smallBuffer
) {
3045 #if defined(ULOC_DEBUG)
3046 fprintf(stderr
,"freeing j %p\n", j
);
3054 U_CAPI
int32_t U_EXPORT2
3055 uloc_acceptLanguage(char *result
, int32_t resultAvailable
,
3056 UAcceptResult
*outResult
, const char **acceptList
,
3057 int32_t acceptListCount
,
3058 UEnumeration
* availableLocales
,
3064 char tmp
[ULOC_FULLNAME_CAPACITY
+1];
3066 char **fallbackList
;
3067 if(U_FAILURE(*status
)) {
3070 fallbackList
= uprv_malloc((size_t)(sizeof(fallbackList
[0])*acceptListCount
));
3071 if(fallbackList
==NULL
) {
3072 *status
= U_MEMORY_ALLOCATION_ERROR
;
3075 for(i
=0;i
<acceptListCount
;i
++) {
3076 #if defined(ULOC_DEBUG)
3077 fprintf(stderr
,"%02d: %s\n", i
, acceptList
[i
]);
3079 while((l
=uenum_next(availableLocales
, NULL
, status
))) {
3080 #if defined(ULOC_DEBUG)
3081 fprintf(stderr
," %s\n", l
);
3083 len
= (int32_t)uprv_strlen(l
);
3084 if(!uprv_strcmp(acceptList
[i
], l
)) {
3086 *outResult
= ULOC_ACCEPT_VALID
;
3088 #if defined(ULOC_DEBUG)
3089 fprintf(stderr
, "MATCH! %s\n", l
);
3092 uprv_strncpy(result
, l
, uprv_min(len
, resultAvailable
));
3095 uprv_free(fallbackList
[j
]);
3097 uprv_free(fallbackList
);
3098 return u_terminateChars(result
, resultAvailable
, len
, status
);
3104 uenum_reset(availableLocales
, status
);
3105 /* save off parent info */
3106 if(uloc_getParent(acceptList
[i
], tmp
, sizeof(tmp
)/sizeof(tmp
[0]), status
)!=0) {
3107 fallbackList
[i
] = uprv_strdup(tmp
);
3113 for(maxLen
--;maxLen
>0;maxLen
--) {
3114 for(i
=0;i
<acceptListCount
;i
++) {
3115 if(fallbackList
[i
] && ((int32_t)uprv_strlen(fallbackList
[i
])==maxLen
)) {
3116 #if defined(ULOC_DEBUG)
3117 fprintf(stderr
,"Try: [%s]", fallbackList
[i
]);
3119 while((l
=uenum_next(availableLocales
, NULL
, status
))) {
3120 #if defined(ULOC_DEBUG)
3121 fprintf(stderr
," %s\n", l
);
3123 len
= (int32_t)uprv_strlen(l
);
3124 if(!uprv_strcmp(fallbackList
[i
], l
)) {
3126 *outResult
= ULOC_ACCEPT_FALLBACK
;
3128 #if defined(ULOC_DEBUG)
3129 fprintf(stderr
, "fallback MATCH! %s\n", l
);
3132 uprv_strncpy(result
, l
, uprv_min(len
, resultAvailable
));
3134 for(j
=0;j
<acceptListCount
;j
++) {
3135 uprv_free(fallbackList
[j
]);
3137 uprv_free(fallbackList
);
3138 return u_terminateChars(result
, resultAvailable
, len
, status
);
3141 uenum_reset(availableLocales
, status
);
3143 if(uloc_getParent(fallbackList
[i
], tmp
, sizeof(tmp
)/sizeof(tmp
[0]), status
)!=0) {
3144 uprv_free(fallbackList
[i
]);
3145 fallbackList
[i
] = uprv_strdup(tmp
);
3147 uprv_free(fallbackList
[i
]);
3153 *outResult
= ULOC_ACCEPT_FAILED
;
3156 for(i
=0;i
<acceptListCount
;i
++) {
3157 uprv_free(fallbackList
[i
]);
3159 uprv_free(fallbackList
);
3165 * This function looks for the localeID in the likelySubtags resource.
3167 * @param localeID The tag to find.
3168 * @param buffer A buffer to hold the matching entry
3169 * @param bufferLength The length of the output buffer
3170 * @return A pointer to "buffer" if found, or a null pointer if not.
3172 static const char* U_CALLCONV
3173 findLikelySubtags(const char* localeID
,
3175 int32_t bufferLength
,
3177 const char* result
= NULL
;
3179 if (!U_FAILURE(*err
)) {
3181 const UChar
* s
= NULL
;
3182 UResourceBundle
* subtags
= ures_openDirect(NULL
, "likelySubtags", err
);
3183 if (!U_FAILURE(*err
)) {
3184 s
= ures_getStringByKey(subtags
, localeID
, &resLen
, err
);
3186 if (U_FAILURE(*err
)) {
3188 * If a resource is missing, it's not really an error, it's
3189 * just that we don't have any data for that particular locale ID.
3191 if (*err
== U_MISSING_RESOURCE_ERROR
) {
3192 *err
= U_ZERO_ERROR
;
3195 else if (resLen
>= bufferLength
) {
3196 /* The buffer should never overflow. */
3197 *err
= U_INTERNAL_PROGRAM_ERROR
;
3200 u_UCharsToChars(s
, buffer
, resLen
+ 1);
3204 ures_close(subtags
);
3212 * Append a tag to a buffer, adding the separator if necessary. The buffer
3213 * must be large enough to contain the resulting tag plus any separator
3214 * necessary. The tag must not be a zero-length string.
3216 * @param tag The tag to add.
3217 * @param tagLength The length of the tag.
3218 * @param buffer The output buffer.
3219 * @param bufferLength The length of the output buffer. This is an input/ouput parameter.
3221 static void U_CALLCONV
3226 int32_t* bufferLength
) {
3228 if (*bufferLength
> 0) {
3229 buffer
[*bufferLength
] = '_';
3234 &buffer
[*bufferLength
],
3238 *bufferLength
+= tagLength
;
3242 * These are the canonical strings for unknown languages, scripts and regions.
3244 static const char* const unknownLanguage
= "und";
3245 static const char* const unknownScript
= "Zzzz";
3246 static const char* const unknownRegion
= "ZZ";
3249 * Create a tag string from the supplied parameters. The lang, script and region
3250 * parameters may be NULL pointers. If they are, their corresponding length parameters
3251 * must be less than or equal to 0.
3253 * If any of the language, script or region parameters are empty, and the alternateTags
3254 * parameter is not NULL, it will be parsed for potential language, script and region tags
3255 * to be used when constructing the new tag. If the alternateTags parameter is NULL, or
3256 * it contains no language tag, the default tag for the unknown language is used.
3258 * If the length of the new string exceeds the capacity of the output buffer,
3259 * the function copies as many bytes to the output buffer as it can, and returns
3260 * the error U_BUFFER_OVERFLOW_ERROR.
3262 * If an illegal argument is provided, the function returns the error
3263 * U_ILLEGAL_ARGUMENT_ERROR.
3265 * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if
3266 * the tag string fits in the output buffer, but the null terminator doesn't.
3268 * @param lang The language tag to use.
3269 * @param langLength The length of the language tag.
3270 * @param script The script tag to use.
3271 * @param scriptLength The length of the script tag.
3272 * @param region The region tag to use.
3273 * @param regionLength The length of the region tag.
3274 * @param trailing Any trailing data to append to the new tag.
3275 * @param trailingLength The length of the trailing data.
3276 * @param alternateTags A string containing any alternate tags.
3277 * @param tag The output buffer.
3278 * @param tagCapacity The capacity of the output buffer.
3279 * @param err A pointer to a UErrorCode for error reporting.
3280 * @return The length of the tag string, which may be greater than tagCapacity, or -1 on error.
3282 static int32_t U_CALLCONV
3283 createTagStringWithAlternates(
3287 int32_t scriptLength
,
3289 int32_t regionLength
,
3290 const char* trailing
,
3291 int32_t trailingLength
,
3292 const char* alternateTags
,
3294 int32_t tagCapacity
,
3297 if (U_FAILURE(*err
)) {
3300 else if (tag
== NULL
||
3302 langLength
>= ULOC_LANG_CAPACITY
||
3303 scriptLength
>= ULOC_SCRIPT_CAPACITY
||
3304 regionLength
>= ULOC_COUNTRY_CAPACITY
) {
3309 * ULOC_FULLNAME_CAPACITY will provide enough capacity
3310 * that we can build a string that contains the language,
3311 * script and region code without worrying about overrunning
3312 * the user-supplied buffer.
3314 char tagBuffer
[ULOC_FULLNAME_CAPACITY
];
3315 int32_t tagLength
= 0;
3316 int32_t capacityRemaining
= tagCapacity
;
3317 UBool regionAppended
= FALSE
;
3319 if (langLength
> 0) {
3326 else if (alternateTags
== NULL
) {
3328 * Append the value for an unknown language, if
3329 * we found no language.
3333 uprv_strlen(unknownLanguage
),
3339 * Parse the alternateTags string for the language.
3341 char alternateLang
[ULOC_LANG_CAPACITY
];
3342 int32_t alternateLangLength
= sizeof(alternateLang
);
3344 alternateLangLength
=
3348 alternateLangLength
,
3350 if(U_FAILURE(*err
) ||
3351 alternateLangLength
>= ULOC_LANG_CAPACITY
) {
3354 else if (alternateLangLength
== 0) {
3356 * Append the value for an unknown language, if
3357 * we found no language.
3361 uprv_strlen(unknownLanguage
),
3368 alternateLangLength
,
3374 if (scriptLength
> 0) {
3381 else if (alternateTags
!= NULL
) {
3383 * Parse the alternateTags string for the script.
3385 char alternateScript
[ULOC_SCRIPT_CAPACITY
];
3387 const int32_t alternateScriptLength
=
3391 sizeof(alternateScript
),
3394 if (U_FAILURE(*err
) ||
3395 alternateScriptLength
>= ULOC_SCRIPT_CAPACITY
) {
3398 else if (alternateScriptLength
> 0) {
3401 alternateScriptLength
,
3407 if (regionLength
> 0) {
3414 regionAppended
= TRUE
;
3416 else if (alternateTags
!= NULL
) {
3418 * Parse the alternateTags string for the region.
3420 char alternateRegion
[ULOC_COUNTRY_CAPACITY
];
3422 const int32_t alternateRegionLength
=
3426 sizeof(alternateRegion
),
3428 if (U_FAILURE(*err
) ||
3429 alternateRegionLength
>= ULOC_COUNTRY_CAPACITY
) {
3432 else if (alternateRegionLength
> 0) {
3435 alternateRegionLength
,
3439 regionAppended
= TRUE
;
3444 const int32_t toCopy
=
3445 tagLength
>= tagCapacity
? tagCapacity
: tagLength
;
3448 * Copy the partial tag from our internal buffer to the supplied
3456 capacityRemaining
-= toCopy
;
3459 if (trailingLength
> 0) {
3460 if (capacityRemaining
> 0 && !regionAppended
) {
3461 tag
[tagLength
++] = '_';
3462 --capacityRemaining
;
3465 if (capacityRemaining
> 0) {
3467 * Copy the trailing data into the supplied buffer. Use uprv_memmove, since we
3468 * don't know if the user-supplied buffers overlap.
3470 const int32_t toCopy
=
3471 trailingLength
>= capacityRemaining
? capacityRemaining
: trailingLength
;
3480 tagLength
+= trailingLength
;
3482 return u_terminateChars(
3492 * An overflow indicates the locale ID passed in
3493 * is ill-formed. If we got here, and there was
3494 * no previous error, it's an implicit overflow.
3496 if (*err
== U_BUFFER_OVERFLOW_ERROR
||
3498 *err
= U_ILLEGAL_ARGUMENT_ERROR
;
3505 * Create a tag string from the supplied parameters. The lang, script and region
3506 * parameters may be NULL pointers. If they are, their corresponding length parameters
3507 * must be less than or equal to 0. If the lang parameter is an empty string, the
3508 * default value for an unknown language is written to the output buffer.
3510 * If the length of the new string exceeds the capacity of the output buffer,
3511 * the function copies as many bytes to the output buffer as it can, and returns
3512 * the error U_BUFFER_OVERFLOW_ERROR.
3514 * If an illegal argument is provided, the function returns the error
3515 * U_ILLEGAL_ARGUMENT_ERROR.
3517 * @param lang The language tag to use.
3518 * @param langLength The length of the language tag.
3519 * @param script The script tag to use.
3520 * @param scriptLength The length of the script tag.
3521 * @param region The region tag to use.
3522 * @param regionLength The length of the region tag.
3523 * @param trailing Any trailing data to append to the new tag.
3524 * @param trailingLength The length of the trailing data.
3525 * @param tag The output buffer.
3526 * @param tagCapacity The capacity of the output buffer.
3527 * @param err A pointer to a UErrorCode for error reporting.
3528 * @return The length of the tag string, which may be greater than tagCapacity.
3530 static int32_t U_CALLCONV
3535 int32_t scriptLength
,
3537 int32_t regionLength
,
3538 const char* trailing
,
3539 int32_t trailingLength
,
3541 int32_t tagCapacity
,
3544 return createTagStringWithAlternates(
3560 * Parse the language, script, and region subtags from a tag string, and copy the
3561 * results into the corresponding output parameters. The buffers are null-terminated,
3562 * unless overflow occurs.
3564 * The langLength, scriptLength, and regionLength parameters are input/output
3565 * parameters, and must contain the capacity of their corresponding buffers on
3566 * input. On output, they will contain the actual length of the buffers, not
3567 * including the null terminator.
3569 * If the length of any of the output subtags exceeds the capacity of the corresponding
3570 * buffer, the function copies as many bytes to the output buffer as it can, and returns
3571 * the error U_BUFFER_OVERFLOW_ERROR. It will not parse any more subtags once overflow
3574 * If an illegal argument is provided, the function returns the error
3575 * U_ILLEGAL_ARGUMENT_ERROR.
3577 * @param localeID The locale ID to parse.
3578 * @param lang The language tag buffer.
3579 * @param langLength The length of the language tag.
3580 * @param script The script tag buffer.
3581 * @param scriptLength The length of the script tag.
3582 * @param region The region tag buffer.
3583 * @param regionLength The length of the region tag.
3584 * @param err A pointer to a UErrorCode for error reporting.
3585 * @return The number of chars of the localeID parameter consumed.
3587 static int32_t U_CALLCONV
3589 const char* localeID
,
3591 int32_t* langLength
,
3593 int32_t* scriptLength
,
3595 int32_t* regionLength
,
3598 const char* position
= localeID
;
3599 int32_t subtagLength
= 0;
3601 if(U_FAILURE(*err
) ||
3604 langLength
== NULL
||
3606 scriptLength
== NULL
||
3608 regionLength
== NULL
) {
3612 subtagLength
= _getLanguage(position
, lang
, *langLength
, &position
);
3613 u_terminateChars(lang
, *langLength
, subtagLength
, err
);
3616 * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING
3617 * to be an error, because it indicates the user-supplied tag is
3620 if(*err
!= U_ZERO_ERROR
) {
3624 *langLength
= subtagLength
;
3627 * If no language was present, use the value of unknownLanguage
3628 * instead. Otherwise, move past any separator.
3630 if (*langLength
== 0) {
3634 *langLength
= uprv_strlen(lang
);
3636 else if (_isIDSeparator(*position
)) {
3640 subtagLength
= _getScript(position
, script
, *scriptLength
, &position
);
3641 u_terminateChars(script
, *scriptLength
, subtagLength
, err
);
3643 if(*err
!= U_ZERO_ERROR
) {
3647 *scriptLength
= subtagLength
;
3649 if (*scriptLength
> 0) {
3650 if (uprv_strnicmp(script
, unknownScript
, *scriptLength
) == 0) {
3652 * If the script part is the "unknown" script, then don't return it.
3658 * Move past any separator.
3660 if (_isIDSeparator(*position
)) {
3665 subtagLength
= _getCountry(position
, region
, *regionLength
, &position
);
3666 u_terminateChars(region
, *regionLength
, subtagLength
, err
);
3668 if(*err
!= U_ZERO_ERROR
) {
3672 *regionLength
= subtagLength
;
3674 if (*regionLength
> 0) {
3675 if (uprv_strnicmp(region
, unknownRegion
, *regionLength
) == 0) {
3677 * If the region part is the "unknown" region, then don't return it.
3685 return (int32_t)(position
- localeID
);
3690 * If we get here, we have no explicit error, it's the result of an
3693 if (!U_FAILURE(*err
)) {
3694 *err
= U_ILLEGAL_ARGUMENT_ERROR
;
3700 static int32_t U_CALLCONV
3701 createLikelySubtagsString(
3705 int32_t scriptLength
,
3707 int32_t regionLength
,
3708 const char* variants
,
3709 int32_t variantsLength
,
3711 int32_t tagCapacity
,
3715 * ULOC_FULLNAME_CAPACITY will provide enough capacity
3716 * that we can build a string that contains the language,
3717 * script and region code without worrying about overrunning
3718 * the user-supplied buffer.
3720 char tagBuffer
[ULOC_FULLNAME_CAPACITY
];
3721 char likelySubtagsBuffer
[ULOC_FULLNAME_CAPACITY
];
3722 int32_t tagBufferLength
= 0;
3724 if(U_FAILURE(*err
)) {
3729 * Try the language with the script and region first.
3731 if (scriptLength
> 0 && regionLength
> 0) {
3733 const char* likelySubtags
= NULL
;
3735 tagBufferLength
= createTagString(
3747 if(U_FAILURE(*err
)) {
3754 likelySubtagsBuffer
,
3755 sizeof(likelySubtagsBuffer
),
3757 if(U_FAILURE(*err
)) {
3761 if (likelySubtags
!= NULL
) {
3762 /* Always use the language tag from the
3763 maximal string, since it may be more
3764 specific than the one provided. */
3765 return createTagStringWithAlternates(
3782 * Try the language with just the script.
3784 if (scriptLength
> 0) {
3786 const char* likelySubtags
= NULL
;
3788 tagBufferLength
= createTagString(
3800 if(U_FAILURE(*err
)) {
3807 likelySubtagsBuffer
,
3808 sizeof(likelySubtagsBuffer
),
3810 if(U_FAILURE(*err
)) {
3814 if (likelySubtags
!= NULL
) {
3815 /* Always use the language tag from the
3816 maximal string, since it may be more
3817 specific than the one provided. */
3818 return createTagStringWithAlternates(
3835 * Try the language with just the region.
3837 if (regionLength
> 0) {
3839 const char* likelySubtags
= NULL
;
3853 if(U_FAILURE(*err
)) {
3860 likelySubtagsBuffer
,
3861 sizeof(likelySubtagsBuffer
),
3863 if(U_FAILURE(*err
)) {
3867 if (likelySubtags
!= NULL
) {
3868 /* Always use the language tag from the
3869 maximal string, since it may be more
3870 specific than the one provided. */
3871 return createTagStringWithAlternates(
3888 * Finally, try just the language.
3891 const char* likelySubtags
= NULL
;
3905 if(U_FAILURE(*err
)) {
3912 likelySubtagsBuffer
,
3913 sizeof(likelySubtagsBuffer
),
3915 if(U_FAILURE(*err
)) {
3919 if (likelySubtags
!= NULL
) {
3920 /* Always use the language tag from the
3921 maximal string, since it may be more
3922 specific than the one provided. */
3923 return createTagStringWithAlternates(
3939 return u_terminateChars(
3947 if (!U_FAILURE(*err
)) {
3948 *err
= U_ILLEGAL_ARGUMENT_ERROR
;
3955 _uloc_addLikelySubtags(const char* localeID
,
3956 char* maximizedLocaleID
,
3957 int32_t maximizedLocaleIDCapacity
,
3960 char lang
[ULOC_LANG_CAPACITY
];
3961 int32_t langLength
= sizeof(lang
);
3962 char script
[ULOC_SCRIPT_CAPACITY
];
3963 int32_t scriptLength
= sizeof(script
);
3964 char region
[ULOC_COUNTRY_CAPACITY
];
3965 int32_t regionLength
= sizeof(region
);
3966 const char* trailing
= "";
3967 int32_t trailingLength
= 0;
3968 int32_t trailingIndex
= 0;
3969 int32_t resultLength
= 0;
3971 if(U_FAILURE(*err
)) {
3974 else if (localeID
== NULL
||
3975 maximizedLocaleID
== NULL
||
3976 maximizedLocaleIDCapacity
<= 0) {
3980 trailingIndex
= parseTagString(
3989 if(U_FAILURE(*err
)) {
3990 /* Overflow indicates an illegal argument error */
3991 if (*err
== U_BUFFER_OVERFLOW_ERROR
) {
3992 *err
= U_ILLEGAL_ARGUMENT_ERROR
;
3998 /* Find the length of the trailing portion. */
3999 trailing
= &localeID
[trailingIndex
];
4000 trailingLength
= uprv_strlen(trailing
);
4003 createLikelySubtagsString(
4013 maximizedLocaleIDCapacity
,
4016 if (resultLength
== 0) {
4017 const int32_t localIDLength
=
4018 uprv_strlen(localeID
);
4021 * If we get here, we need to return localeID.
4026 localIDLength
<= maximizedLocaleIDCapacity
?
4027 localIDLength
: maximizedLocaleIDCapacity
);
4032 maximizedLocaleIDCapacity
,
4037 return resultLength
;
4041 if (!U_FAILURE(*err
)) {
4042 *err
= U_ILLEGAL_ARGUMENT_ERROR
;
4049 _uloc_minimizeSubtags(const char* localeID
,
4050 char* minimizedLocaleID
,
4051 int32_t minimizedLocaleIDCapacity
,
4055 * ULOC_FULLNAME_CAPACITY will provide enough capacity
4056 * that we can build a string that contains the language,
4057 * script and region code without worrying about overrunning
4058 * the user-supplied buffer.
4060 char maximizedTagBuffer
[ULOC_FULLNAME_CAPACITY
];
4061 int32_t maximizedTagBufferLength
= sizeof(maximizedTagBuffer
);
4063 char lang
[ULOC_LANG_CAPACITY
];
4064 int32_t langLength
= sizeof(lang
);
4065 char script
[ULOC_SCRIPT_CAPACITY
];
4066 int32_t scriptLength
= sizeof(script
);
4067 char region
[ULOC_COUNTRY_CAPACITY
];
4068 int32_t regionLength
= sizeof(region
);
4069 const char* trailing
= "";
4070 int32_t trailingLength
= 0;
4071 int32_t trailingIndex
= 0;
4073 if(U_FAILURE(*err
)) {
4076 else if (localeID
== NULL
||
4077 minimizedLocaleID
== NULL
||
4078 minimizedLocaleIDCapacity
<= 0) {
4092 if(U_FAILURE(*err
)) {
4094 /* Overflow indicates an illegal argument error */
4095 if (*err
== U_BUFFER_OVERFLOW_ERROR
) {
4096 *err
= U_ILLEGAL_ARGUMENT_ERROR
;
4102 /* Find the spot where the variants begin, if any. */
4103 trailing
= &localeID
[trailingIndex
];
4104 trailingLength
= uprv_strlen(trailing
);
4116 maximizedTagBufferLength
,
4118 if(U_FAILURE(*err
)) {
4123 * First, we need to first get the maximization
4124 * from AddLikelySubtags.
4126 maximizedTagBufferLength
=
4127 uloc_addLikelySubtags(
4130 maximizedTagBufferLength
,
4133 if(U_FAILURE(*err
)) {
4138 * Start first with just the language.
4141 char tagBuffer
[ULOC_FULLNAME_CAPACITY
];
4143 const int32_t tagBufferLength
=
4144 createLikelySubtagsString(
4157 if(U_FAILURE(*err
)) {
4160 else if (uprv_strnicmp(
4163 tagBufferLength
) == 0) {
4165 return createTagString(
4175 minimizedLocaleIDCapacity
,
4181 * Next, try the language and region.
4183 if (regionLength
> 0) {
4185 char tagBuffer
[ULOC_FULLNAME_CAPACITY
];
4187 const int32_t tagBufferLength
=
4188 createLikelySubtagsString(
4201 if(U_FAILURE(*err
)) {
4204 else if (uprv_strnicmp(
4207 tagBufferLength
) == 0) {
4209 return createTagString(
4219 minimizedLocaleIDCapacity
,
4225 * Finally, try the language and script. This is our last chance,
4226 * since trying with all three subtags would only yield the
4227 * maximal version that we already have.
4229 if (scriptLength
> 0 && regionLength
> 0) {
4230 char tagBuffer
[ULOC_FULLNAME_CAPACITY
];
4232 const int32_t tagBufferLength
=
4233 createLikelySubtagsString(
4246 if(U_FAILURE(*err
)) {
4249 else if (uprv_strnicmp(
4252 tagBufferLength
) == 0) {
4254 return createTagString(
4264 minimizedLocaleIDCapacity
,
4271 * If we got here, return the locale ID parameter.
4273 const int32_t localeIDLength
= uprv_strlen(localeID
);
4278 localeIDLength
<= minimizedLocaleIDCapacity
?
4279 localeIDLength
: minimizedLocaleIDCapacity
);
4281 return u_terminateChars(
4283 minimizedLocaleIDCapacity
,
4290 if (!U_FAILURE(*err
)) {
4291 *err
= U_ILLEGAL_ARGUMENT_ERROR
;
4300 do_canonicalize(const char* localeID
,
4302 int32_t bufferCapacity
,
4311 if (*err
== U_STRING_NOT_TERMINATED_WARNING
||
4312 *err
== U_BUFFER_OVERFLOW_ERROR
) {
4313 *err
= U_ILLEGAL_ARGUMENT_ERROR
;
4317 else if (U_FAILURE(*err
)) {
4326 U_DRAFT
int32_t U_EXPORT2
4327 uloc_addLikelySubtags(const char* localeID
,
4328 char* maximizedLocaleID
,
4329 int32_t maximizedLocaleIDCapacity
,
4332 char localeBuffer
[ULOC_FULLNAME_CAPACITY
];
4334 if (!do_canonicalize(
4337 sizeof(localeBuffer
),
4342 return _uloc_addLikelySubtags(
4345 maximizedLocaleIDCapacity
,
4350 U_DRAFT
int32_t U_EXPORT2
4351 uloc_minimizeSubtags(const char* localeID
,
4352 char* minimizedLocaleID
,
4353 int32_t minimizedLocaleIDCapacity
,
4356 char localeBuffer
[ULOC_FULLNAME_CAPACITY
];
4358 if (!do_canonicalize(
4361 sizeof(localeBuffer
),
4366 return _uloc_minimizeSubtags(
4369 minimizedLocaleIDCapacity
,