2 **********************************************************************
3 * Copyright (C) 1997-2007, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
9 * Modification History:
11 * Date Name Description
12 * 04/01/97 aliu Creation.
13 * 08/21/98 stephen JDK 1.2 sync
14 * 12/08/98 rtg New Locale implementation and C API
15 * 03/15/99 damiba overhaul.
16 * 04/06/99 stephen changed setDefault() to realloc and copy
17 * 06/14/99 stephen Changed calls to ures_open for new params
18 * 07/21/99 stephen Modified setDefault() to propagate to C++
19 * 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs,
20 * brought canonicalization code into line with spec
21 *****************************************************************************/
24 POSIX's locale format, from putil.c: [no spaces]
26 ll [ _CC ] [ . MM ] [ @ VV]
28 l = lang, C = ctry, M = charmap, V = variant
31 #include "unicode/utypes.h"
32 #include "unicode/ustring.h"
33 #include "unicode/uloc.h"
34 #include "unicode/ures.h"
49 #include <stdio.h> /* for sprintf */
51 /* ### Declarations **************************************************/
53 /* Locale stuff from locid.cpp */
54 U_CFUNC
void locale_set_default(const char *id
);
55 U_CFUNC
const char *locale_get_default(void);
57 locale_getKeywords(const char *localeID
,
59 char *keywords
, int32_t keywordCapacity
,
60 char *values
, int32_t valuesCapacity
, int32_t *valLen
,
64 /* ### Constants **************************************************/
66 /* These strings describe the resources we attempt to load from
67 the locale ResourceBundle data file.*/
68 static const char _kLanguages
[] = "Languages";
69 static const char _kScripts
[] = "Scripts";
70 static const char _kCountries
[] = "Countries";
71 static const char _kVariants
[] = "Variants";
72 static const char _kKeys
[] = "Keys";
73 static const char _kTypes
[] = "Types";
74 static const char _kIndexLocaleName
[] = "res_index";
75 static const char _kRootName
[] = "root";
76 static const char _kIndexTag
[] = "InstalledLocales";
77 static const char _kCurrency
[] = "currency";
78 static const char _kCurrencies
[] = "Currencies";
79 static char** _installedLocales
= NULL
;
80 static int32_t _installedLocalesCount
= 0;
82 /* ### Data tables **************************************************/
85 * Table of language codes, both 2- and 3-letter, with preference
86 * given to 2-letter codes where possible. Includes 3-letter codes
87 * that lack a 2-letter equivalent.
89 * This list must be in sorted order. This list is returned directly
90 * to the user by some API.
92 * This list must be kept in sync with LANGUAGES_3, with corresponding
95 * This table should be terminated with a NULL entry, followed by a
96 * second list, and another NULL entry. The first list is visible to
97 * user code when this array is returned by API. The second list
98 * contains codes we support, but do not expose through user API.
102 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
103 * include the revisions up to 2001/7/27 *CWB*
105 * The 3 character codes are the terminology codes like RFC 3066. This
106 * is compatible with prior ICU codes
108 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
109 * table but now at the end of the table because 3 character codes are
110 * duplicates. This avoids bad searches going from 3 to 2 character
113 * The range qaa-qtz is reserved for local use
115 static const char * const LANGUAGES
[] = {
116 "aa", "ab", "ace", "ach", "ada", "ady", "ae", "af", "afa",
117 "afh", "ain", "ak", "akk", "ale", "alg", "alt", "am", "an",
119 "ar", "arc", "arn", "arp", "art", "arw", "as", "ast",
120 "ath", "aus", "av", "awa", "ay", "az", "ba", "bad",
121 "bai", "bal", "ban", "bas", "bat", "be", "bej",
122 "bem", "ber", "bg", "bh", "bho", "bi", "bik", "bin",
123 "bla", "bm", "bn", "bnt", "bo", "br", "bra", "bs",
124 "btk", "bua", "bug", "byn", "ca", "cad", "cai", "car", "cau",
125 "cch", "ce", "ceb", "cel", "ch", "chb", "chg", "chk", "chm",
126 "chn", "cho", "chp", "chr", "chy", "cmc", "co", "cop",
127 "cpe", "cpf", "cpp", "cr", "crh", "crp", "cs", "csb", "cu", "cus",
128 "cv", "cy", "da", "dak", "dar", "day", "de", "del", "den",
129 "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv", "dyu",
130 "dz", "ee", "efi", "egy", "eka", "el", "elx", "en",
131 "enm", "eo", "es", "et", "eu", "ewo", "fa",
132 "fan", "fat", "ff", "fi", "fil", "fiu", "fj", "fo", "fon",
133 "fr", "frm", "fro", "frr", "frs", "fur", "fy",
134 "ga", "gaa", "gay", "gba", "gd", "gem", "gez", "gil",
135 "gl", "gmh", "gn", "goh", "gon", "gor", "got", "grb",
136 "grc", "gsw", "gu", "gv", "gwi",
137 "ha", "hai", "haw", "he", "hi", "hil", "him",
138 "hit", "hmn", "ho", "hr", "hsb", "ht", "hu", "hup", "hy", "hz",
139 "ia", "iba", "id", "ie", "ig", "ii", "ijo", "ik",
140 "ilo", "inc", "ine", "inh", "io", "ira", "iro", "is", "it",
141 "iu", "ja", "jbo", "jpr", "jrb", "jv", "ka", "kaa", "kab",
142 "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg", "kha", "khi",
143 "kho", "ki", "kj", "kk", "kl", "km", "kmb", "kn",
144 "ko", "kok", "kos", "kpe", "kr", "krc", "krl", "kro", "kru", "ks",
145 "ku", "kum", "kut", "kv", "kw", "ky", "la", "lad",
146 "lah", "lam", "lb", "lez", "lg", "li", "ln", "lo", "lol",
147 "loz", "lt", "lu", "lua", "lui", "lun", "luo", "lus",
148 "lv", "mad", "mag", "mai", "mak", "man", "map", "mas",
149 "mdf", "mdr", "men", "mg", "mga", "mh", "mi", "mic", "min",
150 "mis", "mk", "mkh", "ml", "mn", "mnc", "mni", "mno",
151 "mo", "moh", "mos", "mr", "ms", "mt", "mul", "mun",
152 "mus", "mwl", "mwr", "my", "myn", "myv", "na", "nah", "nai", "nap",
153 "nb", "nd", "nds", "ne", "new", "ng", "nia", "nic",
154 "niu", "nl", "nn", "no", "nog", "non", "nr", "nso", "nub",
155 "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi", "oc", "oj",
156 "om", "or", "os", "osa", "ota", "oto", "pa", "paa",
157 "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",
158 "pi", "pl", "pon", "pra", "pro", "ps", "pt", "qu",
159 "raj", "rap", "rar", "rm", "rn", "ro", "roa", "rom",
160 "ru", "rup", "rw", "sa", "sad", "sah", "sai", "sal", "sam",
161 "sas", "sat", "sc", "scn", "sco", "sd", "se", "sel", "sem",
162 "sg", "sga", "sgn", "shn", "si", "sid", "sio", "sit",
163 "sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn",
164 "sms", "sn", "snk", "so", "sog", "son", "sq", "sr",
165 "srn", "srr", "ss", "ssa", "st", "su", "suk", "sus", "sux",
166 "sv", "sw", "syr", "ta", "tai", "te", "tem", "ter",
167 "tet", "tg", "th", "ti", "tig", "tiv", "tk", "tkl",
168 "tl", "tlh", "tli", "tmh", "tn", "to", "tog", "tpi", "tr",
169 "ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw",
170 "ty", "tyv", "udm", "ug", "uga", "uk", "umb", "und", "ur",
171 "uz", "vai", "ve", "vi", "vo", "vot", "wa", "wak",
172 "wal", "war", "was", "wen", "wo", "xal", "xh", "yao", "yap",
173 "yi", "yo", "ypk", "za", "zap", "zen", "zh", "znd",
176 "in", "iw", "ji", "jw", "sh", /* obsolete language codes */
179 static const char* const DEPRECATED_LANGUAGES
[]={
180 "in", "iw", "ji", "jw", NULL
, NULL
182 static const char* const REPLACEMENT_LANGUAGES
[]={
183 "id", "he", "yi", "jv", NULL
, NULL
187 * Table of 3-letter language codes.
189 * This is a lookup table used to convert 3-letter language codes to
190 * their 2-letter equivalent, where possible. It must be kept in sync
191 * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the
192 * same language as LANGUAGES_3[i]. The commented-out lines are
193 * copied from LANGUAGES to make eyeballing this baby easier.
195 * Where a 3-letter language code has no 2-letter equivalent, the
196 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
198 * This table should be terminated with a NULL entry, followed by a
199 * second list, and another NULL entry. The two lists correspond to
200 * the two lists in LANGUAGES.
202 static const char * const LANGUAGES_3
[] = {
203 /* "aa", "ab", "ace", "ach", "ada", "ady", "ae", "af", "afa", */
204 "aar", "abk", "ace", "ach", "ada", "ady", "ave", "afr", "afa",
205 /* "afh", "ain", "ak", "akk", "ale", "alg", "alt", "am", "an", "ang", "anp", "apa", */
206 "afh", "ain", "aka", "akk", "ale", "alg", "alt", "amh", "arg", "ang", "anp", "apa",
207 /* "ar", "arc", "arn", "arp", "art", "arw", "as", "ast", */
208 "ara", "arc", "arn", "arp", "art", "arw", "asm", "ast",
209 /* "ath", "aus", "av", "awa", "ay", "az", "ba", "bad", */
210 "ath", "aus", "ava", "awa", "aym", "aze", "bak", "bad",
211 /* "bai", "bal", "ban", "bas", "bat", "be", "bej", */
212 "bai", "bal", "ban", "bas", "bat", "bel", "bej",
213 /* "bem", "ber", "bg", "bh", "bho", "bi", "bik", "bin", */
214 "bem", "ber", "bul", "bih", "bho", "bis", "bik", "bin",
215 /* "bla", "bm", "bn", "bnt", "bo", "br", "bra", "bs", */
216 "bla", "bam", "ben", "bnt", "bod", "bre", "bra", "bos",
217 /* "btk", "bua", "bug", "byn", "ca", "cad", "cai", "car", "cau", */
218 "btk", "bua", "bug", "byn", "cat", "cad", "cai", "car", "cau",
219 /* "cch", "ce", "ceb", "cel", "ch", "chb", "chg", "chk", "chm", */
220 "cch", "che", "ceb", "cel", "cha", "chb", "chg", "chk", "chm",
221 /* "chn", "cho", "chp", "chr", "chy", "cmc", "co", "cop", */
222 "chn", "cho", "chp", "chr", "chy", "cmc", "cos", "cop",
223 /* "cpe", "cpf", "cpp", "cr", "crh", "crp", "cs", "csb", "cu", "cus", */
224 "cpe", "cpf", "cpp", "cre", "crh", "crp", "ces", "csb", "chu", "cus",
225 /* "cv", "cy", "da", "dak", "dar", "day", "de", "del", "den", */
226 "chv", "cym", "dan", "dak", "dar", "day", "deu", "del", "den",
227 /* "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv", "dyu", */
228 "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "div", "dyu",
229 /* "dz", "ee", "efi", "egy", "eka", "el", "elx", "en", */
230 "dzo", "ewe", "efi", "egy", "eka", "ell", "elx", "eng",
231 /* "enm", "eo", "es", "et", "eu", "ewo", "fa", */
232 "enm", "epo", "spa", "est", "eus", "ewo", "fas",
233 /* "fan", "fat", "ff", "fi", "fil", "fiu", "fj", "fo", "fon", */
234 "fan", "fat", "ful", "fin", "fil", "fiu", "fij", "fao", "fon",
235 /* "fr", "frm", "fro", "fur", "frr", "frs", "fy", "ga", "gaa", "gay", */
236 "fra", "frm", "fro", "fur", "frr", "frs", "fry", "gle", "gaa", "gay",
237 /* "gba", "gd", "gem", "gez", "gil", "gl", "gmh", "gn", */
238 "gba", "gla", "gem", "gez", "gil", "glg", "gmh", "grn",
239 /* "goh", "gon", "gor", "got", "grb", "grc", "gsw", "gu", "gv", */
240 "goh", "gon", "gor", "got", "grb", "grc", "gsw", "guj", "glv",
241 /* "gwi", "ha", "hai", "haw", "he", "hi", "hil", "him", */
242 "gwi", "hau", "hai", "haw", "heb", "hin", "hil", "him",
243 /* "hit", "hmn", "ho", "hr", "hsb", "ht", "hu", "hup", "hy", "hz", */
244 "hit", "hmn", "hmo", "hrv", "hsb", "hat", "hun", "hup", "hye", "her",
245 /* "ia", "iba", "id", "ie", "ig", "ii", "ijo", "ik", */
246 "ina", "iba", "ind", "ile", "ibo", "iii", "ijo", "ipk",
247 /* "ilo", "inc", "ine", "inh", "io", "ira", "iro", "is", "it", */
248 "ilo", "inc", "ine", "inh", "ido", "ira", "iro", "isl", "ita",
249 /* "iu", "ja", "jbo", "jpr", "jrb", "jv", "ka", "kaa", "kab", */
250 "iku", "jpn", "jbo", "jpr", "jrb", "jav", "kat", "kaa", "kab",
251 /* "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg", "kha", "khi",*/
252 "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg", "kha", "khi",
253 /* "kho", "ki", "kj", "kk", "kl", "km", "kmb", "kn", */
254 "kho", "kik", "kua", "kaz", "kal", "khm", "kmb", "kan",
255 /* "ko", "kok", "kos", "kpe", "kr", "krc", "krl", "kro", "kru", "ks", */
256 "kor", "kok", "kos", "kpe", "kau", "krc", "krl", "kro", "kru", "kas",
257 /* "ku", "kum", "kut", "kv", "kw", "ky", "la", "lad", */
258 "kur", "kum", "kut", "kom", "cor", "kir", "lat", "lad",
259 /* "lah", "lam", "lb", "lez", "lg", "li", "ln", "lo", "lol", */
260 "lah", "lam", "ltz", "lez", "lug", "lim", "lin", "lao", "lol",
261 /* "loz", "lt", "lu", "lua", "lui", "lun", "luo", "lus", */
262 "loz", "lit", "lub", "lua", "lui", "lun", "luo", "lus",
263 /* "lv", "mad", "mag", "mai", "mak", "man", "map", "mas", */
264 "lav", "mad", "mag", "mai", "mak", "man", "map", "mas",
265 /* "mdf", "mdr", "men", "mg", "mga", "mh", "mi", "mic", "min", */
266 "mdf", "mdr", "men", "mlg", "mga", "mah", "mri", "mic", "min",
267 /* "mis", "mk", "mkh", "ml", "mn", "mnc", "mni", "mno", */
268 "mis", "mkd", "mkh", "mal", "mon", "mnc", "mni", "mno",
269 /* "mo", "moh", "mos", "mr", "ms", "mt", "mul", "mun", */
270 "mol", "moh", "mos", "mar", "msa", "mlt", "mul", "mun",
271 /* "mus", "mwl", "mwr", "my", "myn", "myv", "na", "nah", "nai", "nap", */
272 "mus", "mwl", "mwr", "mya", "myn", "myv", "nau", "nah", "nai", "nap",
273 /* "nb", "nd", "nds", "ne", "new", "ng", "nia", "nic", */
274 "nob", "nde", "nds", "nep", "new", "ndo", "nia", "nic",
275 /* "niu", "nl", "nn", "no", "nog", "non", "nr", "nso", "nub", */
276 "niu", "nld", "nno", "nor", "nog", "non", "nbl", "nso", "nub",
277 /* "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi", "oc", "oj", */
278 "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi", "oci", "oji",
279 /* "om", "or", "os", "osa", "ota", "oto", "pa", "paa", */
280 "orm", "ori", "oss", "osa", "ota", "oto", "pan", "paa",
281 /* "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn", */
282 "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",
283 /* "pi", "pl", "pon", "pra", "pro", "ps", "pt", "qu", */
284 "pli", "pol", "pon", "pra", "pro", "pus", "por", "que",
285 /* "raj", "rap", "rar", "rm", "rn", "ro", "roa", "rom", */
286 "raj", "rap", "rar", "roh", "run", "ron", "roa", "rom",
287 /* "ru", "rup", "rw", "sa", "sad", "sah", "sai", "sal", "sam", */
288 "rus", "rup", "kin", "san", "sad", "sah", "sai", "sal", "sam",
289 /* "sas", "sat", "sc", "scn", "sco", "sd", "se", "sel", "sem", */
290 "sas", "sat", "srd", "scn", "sco", "snd", "sme", "sel", "sem",
291 /* "sg", "sga", "sgn", "shn", "si", "sid", "sio", "sit", */
292 "sag", "sga", "sgn", "shn", "sin", "sid", "sio", "sit",
293 /* "sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn", */
294 "slk", "slv", "sla", "smo", "sma", "smi", "smj", "smn",
295 /* "sms", "sn", "snk", "so", "sog", "son", "sq", "sr", */
296 "sms", "sna", "snk", "som", "sog", "son", "sqi", "srp",
297 /* "srn", "srr", "ss", "ssa", "st", "su", "suk", "sus", "sux", */
298 "srn", "srr", "ssw", "ssa", "sot", "sun", "suk", "sus", "sux",
299 /* "sv", "sw", "syr", "ta", "tai", "te", "tem", "ter", */
300 "swe", "swa", "syr", "tam", "tai", "tel", "tem", "ter",
301 /* "tet", "tg", "th", "ti", "tig", "tiv", "tk", "tkl", */
302 "tet", "tgk", "tha", "tir", "tig", "tiv", "tuk", "tkl",
303 /* "tl", "tlh", "tli", "tmh", "tn", "to", "tog", "tpi", "tr", */
304 "tgl", "tlh", "tli", "tmh", "tsn", "ton", "tog", "tpi", "tur",
305 /* "ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw", */
306 "tso", "tsi", "tat", "tum", "tup", "tut", "tvl", "twi",
307 /* "ty", "tyv", "udm", "ug", "uga", "uk", "umb", "und", "ur", */
308 "tah", "tyv", "udm", "uig", "uga", "ukr", "umb", "und", "urd",
309 /* "uz", "vai", "ve", "vi", "vo", "vot", "wa", "wak", */
310 "uzb", "vai", "ven", "vie", "vol", "vot", "wln", "wak",
311 /* "wal", "war", "was", "wen", "wo", "xal", "xh", "yao", "yap", */
312 "wal", "war", "was", "wen", "wol", "xal", "xho", "yao", "yap",
313 /* "yi", "yo", "ypk", "za", "zap", "zen", "zh", "znd", */
314 "yid", "yor", "ypk", "zha", "zap", "zen", "zho", "znd",
318 /* "in", "iw", "ji", "jw", "sh", */
319 "ind", "heb", "yid", "jaw", "srp",
324 * Table of 2-letter country codes.
326 * This list must be in sorted order. This list is returned directly
327 * to the user by some API.
329 * This list must be kept in sync with COUNTRIES_3, with corresponding
332 * This table should be terminated with a NULL entry, followed by a
333 * second list, and another NULL entry. The first list is visible to
334 * user code when this array is returned by API. The second list
335 * contains codes we support, but do not expose through user API.
339 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
340 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
341 * new codes keeping the old ones for compatibility updated to include
342 * 1999/12/03 revisions *CWB*
344 * RO(ROM) is now RO(ROU) according to
345 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
347 static const char * const COUNTRIES
[] = {
348 "AD", "AE", "AF", "AG", "AI", "AL", "AM", "AN",
349 "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",
350 "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",
351 "BJ", "BM", "BN", "BO", "BR", "BS", "BT", "BV",
352 "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
353 "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR",
354 "CU", "CV", "CX", "CY", "CZ", "DE", "DJ", "DK",
355 "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER",
356 "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
357 "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
358 "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
359 "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
360 "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
361 "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
362 "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
363 "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
364 "LV", "LY", "MA", "MC", "MD", "MG", "MH", "MK",
365 "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",
366 "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
367 "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",
368 "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",
369 "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
370 "PW", "PY", "QA", "RE", "RO", "RU", "RW", "SA",
371 "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",
372 "SK", "SL", "SM", "SN", "SO", "SR", "ST", "SV",
373 "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ",
374 "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
375 "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
376 "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
377 "WS", "YE", "YT", "YU", "ZA", "ZM", "ZW", "ZZ",
379 "FX", "RO", "TP", "ZR", /* obsolete country codes */
383 static const char* const DEPRECATED_COUNTRIES
[] ={
384 "BU", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR", NULL
, NULL
/* deprecated country list */
386 static const char* const REPLACEMENT_COUNTRIES
[] = {
387 /* "BU", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR" */
388 "MM", "BJ", "FR", "BF", "VU", "ZW", "TL", "CS", "CD", NULL
, NULL
/* replacement country codes */
392 * Table of 3-letter country codes.
394 * This is a lookup table used to convert 3-letter country codes to
395 * their 2-letter equivalent. It must be kept in sync with COUNTRIES.
396 * For all valid i, COUNTRIES[i] must refer to the same country as
397 * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES
398 * to make eyeballing this baby easier.
400 * This table should be terminated with a NULL entry, followed by a
401 * second list, and another NULL entry. The two lists correspond to
402 * the two lists in COUNTRIES.
404 static const char * const COUNTRIES_3
[] = {
405 /* "AD", "AE", "AF", "AG", "AI", "AL", "AM", "AN", */
406 "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM", "ANT",
407 /* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */
408 "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
409 /* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */
410 "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
411 /* "BJ", "BM", "BN", "BO", "BR", "BS", "BT", "BV", */
412 "BEN", "BMU", "BRN", "BOL", "BRA", "BHS", "BTN", "BVT",
413 /* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */
414 "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
415 /* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */
416 "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
417 /* "CU", "CV", "CX", "CY", "CZ", "DE", "DJ", "DK", */
418 "CUB", "CPV", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
419 /* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */
420 "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
421 /* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
422 "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
423 /* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
424 "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
425 /* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */
426 "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
427 /* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
428 "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
429 /* ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
430 "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
431 /* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
432 "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
433 /* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
434 "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
435 /* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */
436 "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
437 /* "LV", "LY", "MA", "MC", "MD", "MG", "MH", "MK", */
438 "LVA", "LBY", "MAR", "MCO", "MDA", "MDG", "MHL", "MKD",
439 /* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */
440 "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
441 /* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */
442 "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
443 /* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */
444 "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
445 /* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */
446 "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
447 /* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */
448 "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
449 /* "PW", "PY", "QA", "RE", "RO", "RU", "RW", "SA", */
450 "PLW", "PRY", "QAT", "REU", "ROU", "RUS", "RWA", "SAU",
451 /* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */
452 "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
453 /* "SK", "SL", "SM", "SN", "SO", "SR", "ST", "SV", */
454 "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "STP", "SLV",
455 /* "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", */
456 "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
457 /* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */
458 "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
459 /* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */
460 "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
461 /* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
462 "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
463 /* "WS", "YE", "YT", "YU", "ZA", "ZM", "ZW", "ZZZ" */
464 "WSM", "YEM", "MYT", "YUG", "ZAF", "ZMB", "ZWE", "ZZZ",
466 /* "FX", "RO", "TP", "ZR", */
467 "FXX", "ROM", "TMP", "ZAR",
471 typedef struct CanonicalizationMap
{
472 const char *id
; /* input ID */
473 const char *canonicalID
; /* canonicalized output ID */
474 const char *keyword
; /* keyword, or NULL if none */
475 const char *value
; /* keyword value, or NULL if kw==NULL */
476 } CanonicalizationMap
;
479 * A map to canonicalize locale IDs. This handles a variety of
480 * different semantic kinds of transformations.
482 static const CanonicalizationMap CANONICALIZE_MAP
[] = {
483 { "", "en_US_POSIX", NULL
, NULL
}, /* .NET name */
484 { "C", "en_US_POSIX", NULL
, NULL
}, /* POSIX name */
485 { "posix", "en_US_POSIX", NULL
, NULL
}, /* POSIX name (alias of C) */
486 { "art_LOJBAN", "jbo", NULL
, NULL
}, /* registered name */
487 { "az_AZ_CYRL", "az_Cyrl_AZ", NULL
, NULL
}, /* .NET name */
488 { "az_AZ_LATN", "az_Latn_AZ", NULL
, NULL
}, /* .NET name */
489 { "ca_ES_PREEURO", "ca_ES", "currency", "ESP" },
490 { "cel_GAULISH", "cel__GAULISH", NULL
, NULL
}, /* registered name */
491 { "de_1901", "de__1901", NULL
, NULL
}, /* registered name */
492 { "de_1906", "de__1906", NULL
, NULL
}, /* registered name */
493 { "de__PHONEBOOK", "de", "collation", "phonebook" },
494 { "de_AT_PREEURO", "de_AT", "currency", "ATS" },
495 { "de_DE_PREEURO", "de_DE", "currency", "DEM" },
496 { "de_LU_PREEURO", "de_LU", "currency", "LUF" },
497 { "el_GR_PREEURO", "el_GR", "currency", "GRD" },
498 { "en_BOONT", "en__BOONT", NULL
, NULL
}, /* registered name */
499 { "en_SCOUSE", "en__SCOUSE", NULL
, NULL
}, /* registered name */
500 { "en_BE_PREEURO", "en_BE", "currency", "BEF" },
501 { "en_IE_PREEURO", "en_IE", "currency", "IEP" },
502 { "es__TRADITIONAL", "es", "collation", "traditional" },
503 { "es_ES_PREEURO", "es_ES", "currency", "ESP" },
504 { "eu_ES_PREEURO", "eu_ES", "currency", "ESP" },
505 { "fi_FI_PREEURO", "fi_FI", "currency", "FIM" },
506 { "fr_BE_PREEURO", "fr_BE", "currency", "BEF" },
507 { "fr_FR_PREEURO", "fr_FR", "currency", "FRF" },
508 { "fr_LU_PREEURO", "fr_LU", "currency", "LUF" },
509 { "ga_IE_PREEURO", "ga_IE", "currency", "IEP" },
510 { "gl_ES_PREEURO", "gl_ES", "currency", "ESP" },
511 { "hi__DIRECT", "hi", "collation", "direct" },
512 { "it_IT_PREEURO", "it_IT", "currency", "ITL" },
513 { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" },
514 { "nb_NO_NY", "nn_NO", NULL
, NULL
}, /* "markus said this was ok" :-) */
515 { "nl_BE_PREEURO", "nl_BE", "currency", "BEF" },
516 { "nl_NL_PREEURO", "nl_NL", "currency", "NLG" },
517 { "pt_PT_PREEURO", "pt_PT", "currency", "PTE" },
518 { "sl_ROZAJ", "sl__ROZAJ", NULL
, NULL
}, /* registered name */
519 { "sr_SP_CYRL", "sr_Cyrl_CS", NULL
, NULL
}, /* .NET name */
520 { "sr_SP_LATN", "sr_Latn_CS", NULL
, NULL
}, /* .NET name */
521 { "sr_YU_CYRILLIC", "sr_Cyrl_CS", NULL
, NULL
}, /* Linux name */
522 { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL
, NULL
}, /* Linux name */
523 { "uz_UZ_CYRL", "uz_Cyrl_UZ", NULL
, NULL
}, /* .NET name */
524 { "uz_UZ_LATN", "uz_Latn_UZ", NULL
, NULL
}, /* .NET name */
525 { "zh_CHS", "zh_Hans", NULL
, NULL
}, /* .NET name */
526 { "zh_CHT", "zh_Hant", NULL
, NULL
}, /* .NET name TODO: This should be zh_Hant once the locale structure is fixed. */
527 { "zh_GAN", "zh__GAN", NULL
, NULL
}, /* registered name */
528 { "zh_GUOYU", "zh", NULL
, NULL
}, /* registered name */
529 { "zh_HAKKA", "zh__HAKKA", NULL
, NULL
}, /* registered name */
530 { "zh_MIN", "zh__MIN", NULL
, NULL
}, /* registered name */
531 { "zh_MIN_NAN", "zh__MINNAN", NULL
, NULL
}, /* registered name */
532 { "zh_WUU", "zh__WUU", NULL
, NULL
}, /* registered name */
533 { "zh_XIANG", "zh__XIANG", NULL
, NULL
}, /* registered name */
534 { "zh_YUE", "zh__YUE", NULL
, NULL
}, /* registered name */
535 { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" },
536 { "zh_TW_STROKE", "zh_Hant_TW", "collation", "stroke" },
537 { "zh__PINYIN", "zh", "collation", "pinyin" }
540 /* ### Keywords **************************************************/
542 #define ULOC_KEYWORD_BUFFER_LEN 25
543 #define ULOC_MAX_NO_KEYWORDS 25
546 locale_getKeywordsStart(const char *localeID
) {
547 const char *result
= NULL
;
548 if((result
= uprv_strchr(localeID
, '@')) != NULL
) {
551 #if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
553 /* We do this because the @ sign is variant, and the @ sign used on one
554 EBCDIC machine won't be compiled the same way on other EBCDIC based
556 static const uint8_t ebcdicSigns
[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
557 const uint8_t *charToFind
= ebcdicSigns
;
559 if((result
= uprv_strchr(localeID
, *charToFind
)) != NULL
) {
570 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
571 * @param keywordName incoming name to be canonicalized
572 * @param status return status (keyword too long)
573 * @return length of the keyword name
575 static int32_t locale_canonKeywordName(char *buf
, const char *keywordName
, UErrorCode
*status
)
578 int32_t keywordNameLen
= (int32_t)uprv_strlen(keywordName
);
580 if(keywordNameLen
>= ULOC_KEYWORD_BUFFER_LEN
) {
581 /* keyword name too long for internal buffer */
582 *status
= U_INTERNAL_PROGRAM_ERROR
;
586 /* normalize the keyword name */
587 for(i
= 0; i
< keywordNameLen
; i
++) {
588 buf
[i
] = uprv_tolower(keywordName
[i
]);
592 return keywordNameLen
;
596 char keyword
[ULOC_KEYWORD_BUFFER_LEN
];
598 const char *valueStart
;
602 static int32_t U_CALLCONV
603 compareKeywordStructs(const void *context
, const void *left
, const void *right
) {
604 const char* leftString
= ((const KeywordStruct
*)left
)->keyword
;
605 const char* rightString
= ((const KeywordStruct
*)right
)->keyword
;
606 return uprv_strcmp(leftString
, rightString
);
610 * Both addKeyword and addValue must already be in canonical form.
611 * Either both addKeyword and addValue are NULL, or neither is NULL.
612 * If they are not NULL they must be zero terminated.
613 * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword.
616 _getKeywords(const char *localeID
,
618 char *keywords
, int32_t keywordCapacity
,
619 char *values
, int32_t valuesCapacity
, int32_t *valLen
,
621 const char* addKeyword
,
622 const char* addValue
,
625 KeywordStruct keywordList
[ULOC_MAX_NO_KEYWORDS
];
627 int32_t maxKeywords
= ULOC_MAX_NO_KEYWORDS
;
628 int32_t numKeywords
= 0;
629 const char* pos
= localeID
;
630 const char* equalSign
= NULL
;
631 const char* semicolon
= NULL
;
633 int32_t keywordsLen
= 0;
634 int32_t valuesLen
= 0;
636 if(prev
== '@') { /* start of keyword definition */
637 /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
639 UBool duplicate
= FALSE
;
640 /* skip leading spaces */
644 if (!*pos
) { /* handle trailing "; " */
647 if(numKeywords
== maxKeywords
) {
648 *status
= U_INTERNAL_PROGRAM_ERROR
;
651 equalSign
= uprv_strchr(pos
, '=');
652 semicolon
= uprv_strchr(pos
, ';');
653 /* lack of '=' [foo@currency] is illegal */
654 /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
655 if(!equalSign
|| (semicolon
&& semicolon
<equalSign
)) {
656 *status
= U_INVALID_FORMAT_ERROR
;
659 /* need to normalize both keyword and keyword name */
660 if(equalSign
- pos
>= ULOC_KEYWORD_BUFFER_LEN
) {
661 /* keyword name too long for internal buffer */
662 *status
= U_INTERNAL_PROGRAM_ERROR
;
665 for(i
= 0, n
= 0; i
< equalSign
- pos
; ++i
) {
667 keywordList
[numKeywords
].keyword
[n
++] = uprv_tolower(pos
[i
]);
670 keywordList
[numKeywords
].keyword
[n
] = 0;
671 keywordList
[numKeywords
].keywordLen
= n
;
672 /* now grab the value part. First we skip the '=' */
674 /* then we leading spaces */
675 while(*equalSign
== ' ') {
678 keywordList
[numKeywords
].valueStart
= equalSign
;
683 while(*(pos
- i
- 1) == ' ') {
686 keywordList
[numKeywords
].valueLen
= (int32_t)(pos
- equalSign
- i
);
689 i
= (int32_t)uprv_strlen(equalSign
);
690 while(equalSign
[i
-1] == ' ') {
693 keywordList
[numKeywords
].valueLen
= i
;
695 /* If this is a duplicate keyword, then ignore it */
696 for (j
=0; j
<numKeywords
; ++j
) {
697 if (uprv_strcmp(keywordList
[j
].keyword
, keywordList
[numKeywords
].keyword
) == 0) {
707 /* Handle addKeyword/addValue. */
708 if (addKeyword
!= NULL
) {
709 UBool duplicate
= FALSE
;
710 U_ASSERT(addValue
!= NULL
);
711 /* Search for duplicate; if found, do nothing. Explicit keyword
712 overrides addKeyword. */
713 for (j
=0; j
<numKeywords
; ++j
) {
714 if (uprv_strcmp(keywordList
[j
].keyword
, addKeyword
) == 0) {
720 if (numKeywords
== maxKeywords
) {
721 *status
= U_INTERNAL_PROGRAM_ERROR
;
724 uprv_strcpy(keywordList
[numKeywords
].keyword
, addKeyword
);
725 keywordList
[numKeywords
].keywordLen
= (int32_t)uprv_strlen(addKeyword
);
726 keywordList
[numKeywords
].valueStart
= addValue
;
727 keywordList
[numKeywords
].valueLen
= (int32_t)uprv_strlen(addValue
);
731 U_ASSERT(addValue
== NULL
);
734 /* now we have a list of keywords */
735 /* we need to sort it */
736 uprv_sortArray(keywordList
, numKeywords
, sizeof(KeywordStruct
), compareKeywordStructs
, NULL
, FALSE
, status
);
738 /* Now construct the keyword part */
739 for(i
= 0; i
< numKeywords
; i
++) {
740 if(keywordsLen
+ keywordList
[i
].keywordLen
+ 1< keywordCapacity
) {
741 uprv_strcpy(keywords
+keywordsLen
, keywordList
[i
].keyword
);
743 keywords
[keywordsLen
+ keywordList
[i
].keywordLen
] = '=';
745 keywords
[keywordsLen
+ keywordList
[i
].keywordLen
] = 0;
748 keywordsLen
+= keywordList
[i
].keywordLen
+ 1;
750 if(keywordsLen
+ keywordList
[i
].valueLen
< keywordCapacity
) {
751 uprv_strncpy(keywords
+keywordsLen
, keywordList
[i
].valueStart
, keywordList
[i
].valueLen
);
753 keywordsLen
+= keywordList
[i
].valueLen
;
755 if(i
< numKeywords
- 1) {
756 if(keywordsLen
< keywordCapacity
) {
757 keywords
[keywordsLen
] = ';';
763 if(valuesLen
+ keywordList
[i
].valueLen
+ 1< valuesCapacity
) {
764 uprv_strcpy(values
+valuesLen
, keywordList
[i
].valueStart
);
765 values
[valuesLen
+ keywordList
[i
].valueLen
] = 0;
767 valuesLen
+= keywordList
[i
].valueLen
+ 1;
771 values
[valuesLen
] = 0;
776 return u_terminateChars(keywords
, keywordCapacity
, keywordsLen
, status
);
783 locale_getKeywords(const char *localeID
,
785 char *keywords
, int32_t keywordCapacity
,
786 char *values
, int32_t valuesCapacity
, int32_t *valLen
,
788 UErrorCode
*status
) {
789 return _getKeywords(localeID
, prev
, keywords
, keywordCapacity
,
790 values
, valuesCapacity
, valLen
, valuesToo
,
794 U_CAPI
int32_t U_EXPORT2
795 uloc_getKeywordValue(const char* localeID
,
796 const char* keywordName
,
797 char* buffer
, int32_t bufferCapacity
,
800 const char* nextSeparator
= NULL
;
801 char keywordNameBuffer
[ULOC_KEYWORD_BUFFER_LEN
];
802 char localeKeywordNameBuffer
[ULOC_KEYWORD_BUFFER_LEN
];
806 if(status
&& U_SUCCESS(*status
) && localeID
) {
808 const char* startSearchHere
= uprv_strchr(localeID
, '@'); /* TODO: REVISIT: shouldn't this be locale_getKeywordsStart ? */
809 if(startSearchHere
== NULL
) {
810 /* no keywords, return at once */
814 locale_canonKeywordName(keywordNameBuffer
, keywordName
, status
);
815 if(U_FAILURE(*status
)) {
819 /* find the first keyword */
820 while(startSearchHere
) {
822 /* skip leading spaces (allowed?) */
823 while(*startSearchHere
== ' ') {
826 nextSeparator
= uprv_strchr(startSearchHere
, '=');
827 /* need to normalize both keyword and keyword name */
831 if(nextSeparator
- startSearchHere
>= ULOC_KEYWORD_BUFFER_LEN
) {
832 /* keyword name too long for internal buffer */
833 *status
= U_INTERNAL_PROGRAM_ERROR
;
836 for(i
= 0; i
< nextSeparator
- startSearchHere
; i
++) {
837 localeKeywordNameBuffer
[i
] = uprv_tolower(startSearchHere
[i
]);
839 /* trim trailing spaces */
840 while(startSearchHere
[i
-1] == ' ') {
843 localeKeywordNameBuffer
[i
] = 0;
845 startSearchHere
= uprv_strchr(nextSeparator
, ';');
847 if(uprv_strcmp(keywordNameBuffer
, localeKeywordNameBuffer
) == 0) {
849 while(*nextSeparator
== ' ') {
852 /* we actually found the keyword. Copy the value */
853 if(startSearchHere
&& startSearchHere
- nextSeparator
< bufferCapacity
) {
854 while(*(startSearchHere
-1) == ' ') {
857 uprv_strncpy(buffer
, nextSeparator
, startSearchHere
- nextSeparator
);
858 result
= u_terminateChars(buffer
, bufferCapacity
, (int32_t)(startSearchHere
- nextSeparator
), status
);
859 } else if(!startSearchHere
&& (int32_t)uprv_strlen(nextSeparator
) < bufferCapacity
) { /* last item in string */
860 i
= (int32_t)uprv_strlen(nextSeparator
);
861 while(nextSeparator
[i
- 1] == ' ') {
864 uprv_strncpy(buffer
, nextSeparator
, i
);
865 result
= u_terminateChars(buffer
, bufferCapacity
, i
, status
);
867 /* give a bigger buffer, please */
868 *status
= U_BUFFER_OVERFLOW_ERROR
;
869 if(startSearchHere
) {
870 result
= (int32_t)(startSearchHere
- nextSeparator
);
872 result
= (int32_t)uprv_strlen(nextSeparator
);
882 U_CAPI
int32_t U_EXPORT2
883 uloc_setKeywordValue(const char* keywordName
,
884 const char* keywordValue
,
885 char* buffer
, int32_t bufferCapacity
,
888 /* TODO: sorting. removal. */
889 int32_t keywordNameLen
;
890 int32_t keywordValueLen
;
893 int32_t foundValueLen
;
894 int32_t keywordAtEnd
= 0; /* is the keyword at the end of the string? */
895 char keywordNameBuffer
[ULOC_KEYWORD_BUFFER_LEN
];
896 char localeKeywordNameBuffer
[ULOC_KEYWORD_BUFFER_LEN
];
899 char* nextSeparator
= NULL
;
900 char* nextEqualsign
= NULL
;
901 char* startSearchHere
= NULL
;
902 char* keywordStart
= NULL
;
903 char *insertHere
= NULL
;
904 if(U_FAILURE(*status
)) {
907 if(bufferCapacity
>1) {
908 bufLen
= (int32_t)uprv_strlen(buffer
);
910 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
913 if(bufferCapacity
<bufLen
) {
914 /* The capacity is less than the length?! Is this NULL terminated? */
915 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
918 if(keywordValue
&& !*keywordValue
) {
922 keywordValueLen
= (int32_t)uprv_strlen(keywordValue
);
926 keywordNameLen
= locale_canonKeywordName(keywordNameBuffer
, keywordName
, status
);
927 if(U_FAILURE(*status
)) {
930 startSearchHere
= (char*)locale_getKeywordsStart(buffer
);
931 if(startSearchHere
== NULL
|| (startSearchHere
[1]==0)) {
932 if(!keywordValue
) { /* no keywords = nothing to remove */
936 needLen
= bufLen
+1+keywordNameLen
+1+keywordValueLen
;
937 if(startSearchHere
) { /* had a single @ */
938 needLen
--; /* already had the @ */
939 /* startSearchHere points at the @ */
941 startSearchHere
=buffer
+bufLen
;
943 if(needLen
>= bufferCapacity
) {
944 *status
= U_BUFFER_OVERFLOW_ERROR
;
945 return needLen
; /* no change */
947 *startSearchHere
= '@';
949 uprv_strcpy(startSearchHere
, keywordNameBuffer
);
950 startSearchHere
+= keywordNameLen
;
951 *startSearchHere
= '=';
953 uprv_strcpy(startSearchHere
, keywordValue
);
954 startSearchHere
+=keywordValueLen
;
956 } /* end shortcut - no @ */
958 keywordStart
= startSearchHere
;
959 /* search for keyword */
960 while(keywordStart
) {
962 /* skip leading spaces (allowed?) */
963 while(*keywordStart
== ' ') {
966 nextEqualsign
= uprv_strchr(keywordStart
, '=');
967 /* need to normalize both keyword and keyword name */
971 if(nextEqualsign
- keywordStart
>= ULOC_KEYWORD_BUFFER_LEN
) {
972 /* keyword name too long for internal buffer */
973 *status
= U_INTERNAL_PROGRAM_ERROR
;
976 for(i
= 0; i
< nextEqualsign
- keywordStart
; i
++) {
977 localeKeywordNameBuffer
[i
] = uprv_tolower(keywordStart
[i
]);
979 /* trim trailing spaces */
980 while(keywordStart
[i
-1] == ' ') {
983 localeKeywordNameBuffer
[i
] = 0;
985 nextSeparator
= uprv_strchr(nextEqualsign
, ';');
986 rc
= uprv_strcmp(keywordNameBuffer
, localeKeywordNameBuffer
);
989 while(*nextEqualsign
== ' ') {
992 /* we actually found the keyword. Change the value */
995 foundValueLen
= (int32_t)(nextSeparator
- nextEqualsign
);
998 foundValueLen
= (int32_t)uprv_strlen(nextEqualsign
);
1000 if(keywordValue
) { /* adding a value - not removing */
1001 if(foundValueLen
== keywordValueLen
) {
1002 uprv_strncpy(nextEqualsign
, keywordValue
, keywordValueLen
);
1003 return bufLen
; /* no change in size */
1004 } else if(foundValueLen
> keywordValueLen
) {
1005 int32_t delta
= foundValueLen
- keywordValueLen
;
1006 if(nextSeparator
) { /* RH side */
1007 uprv_memmove(nextSeparator
- delta
, nextSeparator
, bufLen
-(nextSeparator
-buffer
));
1009 uprv_strncpy(nextEqualsign
, keywordValue
, keywordValueLen
);
1013 } else { /* FVL < KVL */
1014 int32_t delta
= keywordValueLen
- foundValueLen
;
1015 if((bufLen
+delta
) >= bufferCapacity
) {
1016 *status
= U_BUFFER_OVERFLOW_ERROR
;
1017 return bufLen
+delta
;
1019 if(nextSeparator
) { /* RH side */
1020 uprv_memmove(nextSeparator
+delta
,nextSeparator
, bufLen
-(nextSeparator
-buffer
));
1022 uprv_strncpy(nextEqualsign
, keywordValue
, keywordValueLen
);
1027 } else { /* removing a keyword */
1029 /* zero out the ';' or '@' just before startSearchhere */
1030 keywordStart
[-1] = 0;
1031 return (int32_t)((keywordStart
-buffer
)-1); /* (string length without keyword) minus separator */
1033 uprv_memmove(keywordStart
, nextSeparator
+1, bufLen
-((nextSeparator
+1)-buffer
));
1034 keywordStart
[bufLen
-((nextSeparator
+1)-buffer
)]=0;
1035 return (int32_t)(bufLen
-((nextSeparator
+1)-keywordStart
));
1038 } else if(rc
<0){ /* end match keyword */
1039 /* could insert at this location. */
1040 insertHere
= keywordStart
;
1042 keywordStart
= nextSeparator
;
1043 } /* end loop searching */
1046 return bufLen
; /* removal of non-extant keyword - no change */
1049 /* we know there is at least one keyword. */
1050 needLen
= bufLen
+1+keywordNameLen
+1+keywordValueLen
;
1051 if(needLen
>= bufferCapacity
) {
1052 *status
= U_BUFFER_OVERFLOW_ERROR
;
1053 return needLen
; /* no change */
1057 uprv_memmove(insertHere
+(1+keywordNameLen
+1+keywordValueLen
), insertHere
, bufLen
-(insertHere
-buffer
));
1058 keywordStart
= insertHere
;
1060 keywordStart
= buffer
+bufLen
;
1061 *keywordStart
= ';';
1064 uprv_strncpy(keywordStart
, keywordNameBuffer
, keywordNameLen
);
1065 keywordStart
+= keywordNameLen
;
1066 *keywordStart
= '=';
1068 uprv_strncpy(keywordStart
, keywordValue
, keywordValueLen
); /* terminates. */
1069 keywordStart
+=keywordValueLen
;
1071 *keywordStart
= ';';
1078 /* ### ID parsing implementation **************************************************/
1080 /*returns TRUE if a is an ID separator FALSE otherwise*/
1081 #define _isIDSeparator(a) (a == '_' || a == '-')
1083 #define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
1085 /*returns TRUE if one of the special prefixes is here (s=string)
1087 #define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1089 /* Dot terminates it because of POSIX form where dot precedes the codepage
1090 * except for variant
1092 #define _isTerminator(a) ((a==0)||(a=='.')||(a=='@'))
1094 static char* _strnchr(const char* str
, int32_t len
, char c
) {
1095 U_ASSERT(str
!= 0 && len
>= 0);
1096 while (len
-- != 0) {
1100 } else if (d
== 0) {
1109 * Lookup 'key' in the array 'list'. The array 'list' should contain
1110 * a NULL entry, followed by more entries, and a second NULL entry.
1112 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1115 static int16_t _findIndex(const char* const* list
, const char* key
)
1117 const char* const* anchor
= list
;
1120 /* Make two passes through two NULL-terminated arrays at 'list' */
1121 while (pass
++ < 2) {
1123 if (uprv_strcmp(key
, *list
) == 0) {
1124 return (int16_t)(list
- anchor
);
1128 ++list
; /* skip final NULL *CWB*/
1133 /* count the length of src while copying it to dest; return strlen(src) */
1134 static U_INLINE
int32_t
1135 _copyCount(char *dest
, int32_t destCapacity
, const char *src
) {
1142 return (int32_t)(src
-anchor
);
1144 if(destCapacity
<=0) {
1145 return (int32_t)((src
-anchor
)+uprv_strlen(src
));
1154 uloc_getCurrentCountryID(const char* oldID
){
1155 int32_t offset
= _findIndex(DEPRECATED_COUNTRIES
, oldID
);
1157 return REPLACEMENT_COUNTRIES
[offset
];
1162 uloc_getCurrentLanguageID(const char* oldID
){
1163 int32_t offset
= _findIndex(DEPRECATED_LANGUAGES
, oldID
);
1165 return REPLACEMENT_LANGUAGES
[offset
];
1170 * the internal functions _getLanguage(), _getCountry(), _getVariant()
1171 * avoid duplicating code to handle the earlier locale ID pieces
1172 * in the functions for the later ones by
1173 * setting the *pEnd pointer to where they stopped parsing
1175 * TODO try to use this in Locale
1178 _getLanguage(const char *localeID
,
1179 char *language
, int32_t languageCapacity
,
1180 const char **pEnd
) {
1183 char lang
[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
1185 /* if it starts with i- or x- then copy that prefix */
1186 if(_isIDPrefix(localeID
)) {
1187 if(i
<languageCapacity
) {
1188 language
[i
]=(char)uprv_tolower(*localeID
);
1190 if(i
<languageCapacity
) {
1197 /* copy the language as far as possible and count its length */
1198 while(!_isTerminator(*localeID
) && !_isIDSeparator(*localeID
)) {
1199 if(i
<languageCapacity
) {
1200 language
[i
]=(char)uprv_tolower(*localeID
);
1203 lang
[i
]=(char)uprv_tolower(*localeID
);
1210 /* convert 3 character code to 2 character code if possible *CWB*/
1211 offset
=_findIndex(LANGUAGES_3
, lang
);
1213 i
=_copyCount(language
, languageCapacity
, LANGUAGES
[offset
]);
1224 _getScript(const char *localeID
,
1225 char *script
, int32_t scriptCapacity
,
1234 /* copy the second item as far as possible and count its length */
1235 while(!_isTerminator(localeID
[idLen
]) && !_isIDSeparator(localeID
[idLen
])) {
1239 /* If it's exactly 4 characters long, then it's a script and not a country. */
1243 *pEnd
= localeID
+idLen
;
1245 if(idLen
> scriptCapacity
) {
1246 idLen
= scriptCapacity
;
1249 script
[0]=(char)uprv_toupper(*(localeID
++));
1251 for (i
= 1; i
< idLen
; i
++) {
1252 script
[i
]=(char)uprv_tolower(*(localeID
++));
1262 _getCountry(const char *localeID
,
1263 char *country
, int32_t countryCapacity
,
1267 char cnty
[ULOC_COUNTRY_CAPACITY
]={ 0, 0, 0, 0 };
1270 /* copy the country as far as possible and count its length */
1271 while(!_isTerminator(*localeID
) && !_isIDSeparator(*localeID
)) {
1272 if(i
<countryCapacity
) {
1273 country
[i
]=(char)uprv_toupper(*localeID
);
1275 if(i
<(ULOC_COUNTRY_CAPACITY
-1)) { /*CWB*/
1276 cnty
[i
]=(char)uprv_toupper(*localeID
);
1282 /* convert 3 character code to 2 character code if possible *CWB*/
1284 offset
=_findIndex(COUNTRIES_3
, cnty
);
1286 i
=_copyCount(country
, countryCapacity
, COUNTRIES
[offset
]);
1297 * @param needSeparator if true, then add leading '_' if any variants
1298 * are added to 'variant'
1301 _getVariantEx(const char *localeID
,
1303 char *variant
, int32_t variantCapacity
,
1304 UBool needSeparator
) {
1307 /* get one or more variant tags and separate them with '_' */
1308 if(_isIDSeparator(prev
)) {
1309 /* get a variant string after a '-' or '_' */
1310 while(!_isTerminator(*localeID
)) {
1311 if (needSeparator
) {
1312 if (i
<variantCapacity
) {
1316 needSeparator
= FALSE
;
1318 if(i
<variantCapacity
) {
1319 variant
[i
]=(char)uprv_toupper(*localeID
);
1320 if(variant
[i
]=='-') {
1329 /* if there is no variant tag after a '-' or '_' then look for '@' */
1333 } else if((localeID
=locale_getKeywordsStart(localeID
))!=NULL
) {
1334 ++localeID
; /* point after the '@' */
1338 while(!_isTerminator(*localeID
)) {
1339 if (needSeparator
) {
1340 if (i
<variantCapacity
) {
1344 needSeparator
= FALSE
;
1346 if(i
<variantCapacity
) {
1347 variant
[i
]=(char)uprv_toupper(*localeID
);
1348 if(variant
[i
]=='-' || variant
[i
]==',') {
1361 _getVariant(const char *localeID
,
1363 char *variant
, int32_t variantCapacity
) {
1364 return _getVariantEx(localeID
, prev
, variant
, variantCapacity
, FALSE
);
1368 * Delete ALL instances of a variant from the given list of one or
1369 * more variants. Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".
1370 * @param variants the source string of one or more variants,
1371 * separated by '_'. This will be MODIFIED IN PLACE. Not zero
1372 * terminated; if it is, trailing zero will NOT be maintained.
1373 * @param variantsLen length of variants
1374 * @param toDelete variant to delete, without separators, e.g. "EURO"
1375 * or "PREEURO"; not zero terminated
1376 * @param toDeleteLen length of toDelete
1377 * @return number of characters deleted from variants
1380 _deleteVariant(char* variants
, int32_t variantsLen
,
1381 const char* toDelete
, int32_t toDeleteLen
) {
1382 int32_t delta
= 0; /* number of chars deleted */
1385 if (variantsLen
< toDeleteLen
) {
1388 if (uprv_strncmp(variants
, toDelete
, toDeleteLen
) == 0 &&
1389 (variantsLen
== toDeleteLen
||
1390 (flag
=(variants
[toDeleteLen
] == '_')))) {
1391 int32_t d
= toDeleteLen
+ (flag
?1:0);
1394 uprv_memmove(variants
, variants
+d
, variantsLen
);
1396 char* p
= _strnchr(variants
, variantsLen
, '_');
1401 variantsLen
-= (int32_t)(p
- variants
);
1407 /* Keyword enumeration */
1409 typedef struct UKeywordsContext
{
1414 static void U_CALLCONV
1415 uloc_kw_closeKeywords(UEnumeration
*enumerator
) {
1416 uprv_free(((UKeywordsContext
*)enumerator
->context
)->keywords
);
1417 uprv_free(enumerator
->context
);
1418 uprv_free(enumerator
);
1421 static int32_t U_CALLCONV
1422 uloc_kw_countKeywords(UEnumeration
*en
, UErrorCode
*status
) {
1423 char *kw
= ((UKeywordsContext
*)en
->context
)->keywords
;
1427 kw
+= uprv_strlen(kw
)+1;
1432 static const char* U_CALLCONV
1433 uloc_kw_nextKeyword(UEnumeration
* en
,
1434 int32_t* resultLength
,
1435 UErrorCode
* status
) {
1436 const char* result
= ((UKeywordsContext
*)en
->context
)->current
;
1439 len
= (int32_t)uprv_strlen(((UKeywordsContext
*)en
->context
)->current
);
1440 ((UKeywordsContext
*)en
->context
)->current
+= len
+1;
1445 *resultLength
= len
;
1450 static void U_CALLCONV
1451 uloc_kw_resetKeywords(UEnumeration
* en
,
1452 UErrorCode
* status
) {
1453 ((UKeywordsContext
*)en
->context
)->current
= ((UKeywordsContext
*)en
->context
)->keywords
;
1456 static const UEnumeration gKeywordsEnum
= {
1459 uloc_kw_closeKeywords
,
1460 uloc_kw_countKeywords
,
1462 uloc_kw_nextKeyword
,
1463 uloc_kw_resetKeywords
1466 U_CAPI UEnumeration
* U_EXPORT2
1467 uloc_openKeywordList(const char *keywordList
, int32_t keywordListSize
, UErrorCode
* status
)
1469 UKeywordsContext
*myContext
= NULL
;
1470 UEnumeration
*result
= NULL
;
1472 if(U_FAILURE(*status
)) {
1475 result
= (UEnumeration
*)uprv_malloc(sizeof(UEnumeration
));
1476 uprv_memcpy(result
, &gKeywordsEnum
, sizeof(UEnumeration
));
1477 myContext
= uprv_malloc(sizeof(UKeywordsContext
));
1478 if (myContext
== NULL
) {
1479 *status
= U_MEMORY_ALLOCATION_ERROR
;
1483 myContext
->keywords
= (char *)uprv_malloc(keywordListSize
+1);
1484 uprv_memcpy(myContext
->keywords
, keywordList
, keywordListSize
);
1485 myContext
->keywords
[keywordListSize
] = 0;
1486 myContext
->current
= myContext
->keywords
;
1487 result
->context
= myContext
;
1491 U_CAPI UEnumeration
* U_EXPORT2
1492 uloc_openKeywords(const char* localeID
,
1497 int32_t keywordsCapacity
= 256;
1498 if(status
==NULL
|| U_FAILURE(*status
)) {
1502 if(localeID
==NULL
) {
1503 localeID
=uloc_getDefault();
1506 /* Skip the language */
1507 _getLanguage(localeID
, NULL
, 0, &localeID
);
1508 if(_isIDSeparator(*localeID
)) {
1509 const char *scriptID
;
1510 /* Skip the script if available */
1511 _getScript(localeID
+1, NULL
, 0, &scriptID
);
1512 if(scriptID
!= localeID
+1) {
1513 /* Found optional script */
1514 localeID
= scriptID
;
1516 /* Skip the Country */
1517 if (_isIDSeparator(*localeID
)) {
1518 _getCountry(localeID
+1, NULL
, 0, &localeID
);
1519 if(_isIDSeparator(*localeID
)) {
1520 _getVariant(localeID
+1, *localeID
, NULL
, 0);
1525 /* keywords are located after '@' */
1526 if((localeID
= locale_getKeywordsStart(localeID
)) != NULL
) {
1527 i
=locale_getKeywords(localeID
+1, '@', keywords
, keywordsCapacity
, NULL
, 0, NULL
, FALSE
, status
);
1531 return uloc_openKeywordList(keywords
, i
, status
);
1538 /* bit-flags for 'options' parameter of _canonicalize */
1539 #define _ULOC_STRIP_KEYWORDS 0x2
1540 #define _ULOC_CANONICALIZE 0x1
1542 #define OPTION_SET(options, mask) ((options & mask) != 0)
1544 static const char i_default
[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
1545 #define I_DEFAULT_LENGTH (sizeof i_default / sizeof i_default[0])
1548 * Canonicalize the given localeID, to level 1 or to level 2,
1549 * depending on the options. To specify level 1, pass in options=0.
1550 * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1552 * This is the code underlying uloc_getName and uloc_canonicalize.
1555 _canonicalize(const char* localeID
,
1557 int32_t resultCapacity
,
1560 int32_t j
, len
, fieldCount
=0, scriptSize
=0, variantSize
=0, nameCapacity
;
1561 char localeBuffer
[ULOC_FULLNAME_CAPACITY
];
1562 const char* origLocaleID
= localeID
;
1563 const char* keywordAssign
= NULL
;
1564 const char* separatorIndicator
= NULL
;
1565 const char* addKeyword
= NULL
;
1566 const char* addValue
= NULL
;
1568 char* variant
= NULL
; /* pointer into name, or NULL */
1569 int32_t sawEuro
= 0;
1571 if (U_FAILURE(*err
)) {
1575 if (localeID
==NULL
) {
1576 localeID
=uloc_getDefault();
1579 /* if we are doing a full canonicalization, then put results in
1580 localeBuffer, if necessary; otherwise send them to result. */
1581 if (OPTION_SET(options
, _ULOC_CANONICALIZE
) &&
1582 (result
== NULL
|| resultCapacity
< sizeof(localeBuffer
))) {
1583 name
= localeBuffer
;
1584 nameCapacity
= sizeof(localeBuffer
);
1587 nameCapacity
= resultCapacity
;
1590 /* get all pieces, one after another, and separate with '_' */
1591 len
=_getLanguage(localeID
, name
, nameCapacity
, &localeID
);
1593 if(len
== I_DEFAULT_LENGTH
&& uprv_strncmp(origLocaleID
, i_default
, len
) == 0) {
1594 const char *d
= uloc_getDefault();
1596 len
= uprv_strlen(d
);
1599 uprv_strncpy(name
, d
, len
);
1601 } else if(_isIDSeparator(*localeID
)) {
1602 const char *scriptID
;
1605 if(len
<nameCapacity
) {
1610 scriptSize
=_getScript(localeID
+1, name
+len
, nameCapacity
-len
, &scriptID
);
1611 if(scriptSize
> 0) {
1612 /* Found optional script */
1613 localeID
= scriptID
;
1616 if (_isIDSeparator(*localeID
)) {
1617 /* If there is something else, then we add the _ */
1618 if(len
<nameCapacity
) {
1625 if (_isIDSeparator(*localeID
)) {
1626 len
+=_getCountry(localeID
+1, name
+len
, nameCapacity
-len
, &localeID
);
1627 if(_isIDSeparator(*localeID
)) {
1629 if(len
<nameCapacity
) {
1633 variantSize
= _getVariant(localeID
+1, *localeID
, name
+len
, nameCapacity
-len
);
1634 if (variantSize
> 0) {
1637 localeID
+= variantSize
+ 1; /* skip '_' and variant */
1643 /* Copy POSIX-style charset specifier, if any [mr.utf8] */
1644 if (!OPTION_SET(options
, _ULOC_CANONICALIZE
) && *localeID
== '.') {
1654 if (len
<nameCapacity
) {
1664 /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
1665 After this, localeID either points to '@' or is NULL */
1666 if ((localeID
=locale_getKeywordsStart(localeID
))!=NULL
) {
1667 keywordAssign
= uprv_strchr(localeID
, '=');
1668 separatorIndicator
= uprv_strchr(localeID
, ';');
1671 /* Copy POSIX-style variant, if any [mr@FOO] */
1672 if (!OPTION_SET(options
, _ULOC_CANONICALIZE
) &&
1673 localeID
!= NULL
&& keywordAssign
== NULL
) {
1679 if (len
<nameCapacity
) {
1687 if (OPTION_SET(options
, _ULOC_CANONICALIZE
)) {
1688 /* Handle @FOO variant if @ is present and not followed by = */
1689 if (localeID
!=NULL
&& keywordAssign
==NULL
) {
1690 int32_t posixVariantSize
;
1691 /* Add missing '_' if needed */
1692 if (fieldCount
< 2 || (fieldCount
< 3 && scriptSize
> 0)) {
1694 if(len
<nameCapacity
) {
1699 } while(fieldCount
<2);
1701 posixVariantSize
= _getVariantEx(localeID
+1, '@', name
+len
, nameCapacity
-len
,
1702 (UBool
)(variantSize
> 0));
1703 if (posixVariantSize
> 0) {
1704 if (variant
== NULL
) {
1707 len
+= posixVariantSize
;
1708 variantSize
+= posixVariantSize
;
1712 /* Check for EURO variants. */
1713 sawEuro
= _deleteVariant(variant
, variantSize
, "EURO", 4);
1715 if (sawEuro
> 0 && name
[len
-1] == '_') { /* delete trailing '_' */
1719 /* Look up the ID in the canonicalization map */
1720 for (j
=0; j
<(int32_t)(sizeof(CANONICALIZE_MAP
)/sizeof(CANONICALIZE_MAP
[0])); j
++) {
1721 const char* id
= CANONICALIZE_MAP
[j
].id
;
1722 int32_t n
= (int32_t)uprv_strlen(id
);
1723 if (len
== n
&& uprv_strncmp(name
, id
, n
) == 0) {
1724 if (n
== 0 && localeID
!= NULL
) {
1725 break; /* Don't remap "" if keywords present */
1727 len
= _copyCount(name
, nameCapacity
, CANONICALIZE_MAP
[j
].canonicalID
);
1728 addKeyword
= CANONICALIZE_MAP
[j
].keyword
;
1729 addValue
= CANONICALIZE_MAP
[j
].value
;
1734 /* Explicit EURO variant overrides keyword in CANONICALIZE_MAP */
1736 addKeyword
= "currency";
1741 if (!OPTION_SET(options
, _ULOC_STRIP_KEYWORDS
)) {
1742 if (localeID
!=NULL
&& keywordAssign
!=NULL
&&
1743 (!separatorIndicator
|| separatorIndicator
> keywordAssign
)) {
1744 if(len
<nameCapacity
) {
1749 len
+= _getKeywords(localeID
+1, '@', name
+len
, nameCapacity
-len
, NULL
, 0, NULL
, TRUE
,
1750 addKeyword
, addValue
, err
);
1751 } else if (addKeyword
!= NULL
) {
1752 U_ASSERT(addValue
!= NULL
);
1753 /* inelegant but works -- later make _getKeywords do this? */
1754 len
+= _copyCount(name
+len
, nameCapacity
-len
, "@");
1755 len
+= _copyCount(name
+len
, nameCapacity
-len
, addKeyword
);
1756 len
+= _copyCount(name
+len
, nameCapacity
-len
, "=");
1757 len
+= _copyCount(name
+len
, nameCapacity
-len
, addValue
);
1761 if (U_SUCCESS(*err
) && name
== localeBuffer
) {
1762 uprv_strncpy(result
, localeBuffer
, (len
> resultCapacity
) ? resultCapacity
: len
);
1765 return u_terminateChars(result
, resultCapacity
, len
, err
);
1768 /* ### ID parsing API **************************************************/
1770 U_CAPI
int32_t U_EXPORT2
1771 uloc_getParent(const char* localeID
,
1773 int32_t parentCapacity
,
1776 const char *lastUnderscore
;
1779 if (U_FAILURE(*err
))
1782 if (localeID
== NULL
)
1783 localeID
= uloc_getDefault();
1785 lastUnderscore
=uprv_strrchr(localeID
, '_');
1786 if(lastUnderscore
!=NULL
) {
1787 i
=(int32_t)(lastUnderscore
-localeID
);
1792 if(i
>0 && parent
!= localeID
) {
1793 uprv_memcpy(parent
, localeID
, uprv_min(i
, parentCapacity
));
1795 return u_terminateChars(parent
, parentCapacity
, i
, err
);
1798 U_CAPI
int32_t U_EXPORT2
1799 uloc_getLanguage(const char* localeID
,
1801 int32_t languageCapacity
,
1804 /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
1807 if (err
==NULL
|| U_FAILURE(*err
)) {
1811 if(localeID
==NULL
) {
1812 localeID
=uloc_getDefault();
1815 i
=_getLanguage(localeID
, language
, languageCapacity
, NULL
);
1816 return u_terminateChars(language
, languageCapacity
, i
, err
);
1819 U_CAPI
int32_t U_EXPORT2
1820 uloc_getScript(const char* localeID
,
1822 int32_t scriptCapacity
,
1827 if(err
==NULL
|| U_FAILURE(*err
)) {
1831 if(localeID
==NULL
) {
1832 localeID
=uloc_getDefault();
1835 /* skip the language */
1836 _getLanguage(localeID
, NULL
, 0, &localeID
);
1837 if(_isIDSeparator(*localeID
)) {
1838 i
=_getScript(localeID
+1, script
, scriptCapacity
, NULL
);
1840 return u_terminateChars(script
, scriptCapacity
, i
, err
);
1843 U_CAPI
int32_t U_EXPORT2
1844 uloc_getCountry(const char* localeID
,
1846 int32_t countryCapacity
,
1851 if(err
==NULL
|| U_FAILURE(*err
)) {
1855 if(localeID
==NULL
) {
1856 localeID
=uloc_getDefault();
1859 /* Skip the language */
1860 _getLanguage(localeID
, NULL
, 0, &localeID
);
1861 if(_isIDSeparator(*localeID
)) {
1862 const char *scriptID
;
1863 /* Skip the script if available */
1864 _getScript(localeID
+1, NULL
, 0, &scriptID
);
1865 if(scriptID
!= localeID
+1) {
1866 /* Found optional script */
1867 localeID
= scriptID
;
1869 if(_isIDSeparator(*localeID
)) {
1870 i
=_getCountry(localeID
+1, country
, countryCapacity
, NULL
);
1873 return u_terminateChars(country
, countryCapacity
, i
, err
);
1876 U_CAPI
int32_t U_EXPORT2
1877 uloc_getVariant(const char* localeID
,
1879 int32_t variantCapacity
,
1884 if(err
==NULL
|| U_FAILURE(*err
)) {
1888 if(localeID
==NULL
) {
1889 localeID
=uloc_getDefault();
1892 /* Skip the language */
1893 _getLanguage(localeID
, NULL
, 0, &localeID
);
1894 if(_isIDSeparator(*localeID
)) {
1895 const char *scriptID
;
1896 /* Skip the script if available */
1897 _getScript(localeID
+1, NULL
, 0, &scriptID
);
1898 if(scriptID
!= localeID
+1) {
1899 /* Found optional script */
1900 localeID
= scriptID
;
1902 /* Skip the Country */
1903 if (_isIDSeparator(*localeID
)) {
1904 _getCountry(localeID
+1, NULL
, 0, &localeID
);
1905 if(_isIDSeparator(*localeID
)) {
1906 i
=_getVariant(localeID
+1, *localeID
, variant
, variantCapacity
);
1911 /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
1912 /* if we do not have a variant tag yet then try a POSIX variant after '@' */
1914 if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
1915 i=_getVariant(localeID+1, '@', variant, variantCapacity);
1918 return u_terminateChars(variant
, variantCapacity
, i
, err
);
1921 U_CAPI
int32_t U_EXPORT2
1922 uloc_getName(const char* localeID
,
1924 int32_t nameCapacity
,
1927 return _canonicalize(localeID
, name
, nameCapacity
, 0, err
);
1930 U_CAPI
int32_t U_EXPORT2
1931 uloc_getBaseName(const char* localeID
,
1933 int32_t nameCapacity
,
1936 return _canonicalize(localeID
, name
, nameCapacity
, _ULOC_STRIP_KEYWORDS
, err
);
1939 U_CAPI
int32_t U_EXPORT2
1940 uloc_canonicalize(const char* localeID
,
1942 int32_t nameCapacity
,
1945 return _canonicalize(localeID
, name
, nameCapacity
, _ULOC_CANONICALIZE
, err
);
1948 U_CAPI
const char* U_EXPORT2
1949 uloc_getISO3Language(const char* localeID
)
1952 char lang
[ULOC_LANG_CAPACITY
];
1953 UErrorCode err
= U_ZERO_ERROR
;
1955 if (localeID
== NULL
)
1957 localeID
= uloc_getDefault();
1959 uloc_getLanguage(localeID
, lang
, ULOC_LANG_CAPACITY
, &err
);
1962 offset
= _findIndex(LANGUAGES
, lang
);
1965 return LANGUAGES_3
[offset
];
1968 U_CAPI
const char* U_EXPORT2
1969 uloc_getISO3Country(const char* localeID
)
1972 char cntry
[ULOC_LANG_CAPACITY
];
1973 UErrorCode err
= U_ZERO_ERROR
;
1975 if (localeID
== NULL
)
1977 localeID
= uloc_getDefault();
1979 uloc_getCountry(localeID
, cntry
, ULOC_LANG_CAPACITY
, &err
);
1982 offset
= _findIndex(COUNTRIES
, cntry
);
1986 return COUNTRIES_3
[offset
];
1989 U_CAPI
uint32_t U_EXPORT2
1990 uloc_getLCID(const char* localeID
)
1992 UErrorCode status
= U_ZERO_ERROR
;
1993 char langID
[ULOC_FULLNAME_CAPACITY
];
1995 uloc_getLanguage(localeID
, langID
, sizeof(langID
), &status
);
1996 if (U_FAILURE(status
)) {
2000 return uprv_convertToLCID(langID
, localeID
, &status
);
2003 U_CAPI
int32_t U_EXPORT2
2004 uloc_getLocaleForLCID(uint32_t hostid
, char *locale
, int32_t localeCapacity
,
2008 const char *posix
= uprv_convertToPosix(hostid
, status
);
2009 if (U_FAILURE(*status
) || posix
== NULL
) {
2012 length
= (int32_t)uprv_strlen(posix
);
2013 if (length
+1 > localeCapacity
) {
2014 *status
= U_BUFFER_OVERFLOW_ERROR
;
2017 uprv_strcpy(locale
, posix
);
2022 /* ### Default locale **************************************************/
2024 U_CAPI
const char* U_EXPORT2
2027 return locale_get_default();
2030 U_CAPI
void U_EXPORT2
2031 uloc_setDefault(const char* newDefaultLocale
,
2034 if (U_FAILURE(*err
))
2036 /* the error code isn't currently used for anything by this function*/
2038 /* propagate change to C++ */
2039 locale_set_default(newDefaultLocale
);
2042 /* ### Display name **************************************************/
2045 * Lookup a resource bundle table item with fallback on the table level.
2046 * Regular resource bundle lookups perform fallback to parent locale bundles
2047 * and eventually the root bundle, but only for top-level items.
2048 * This function takes the name of a top-level table and of an item in that table
2049 * and performs a lookup of both, falling back until a bundle contains a table
2052 * Note: Only the opening of entire bundles falls back through the default locale
2053 * before root. Once a bundle is open, item lookups do not go through the
2054 * default locale because that would result in a mix of languages that is
2055 * unpredictable to the programmer and most likely useless.
2057 static const UChar
*
2058 _res_getTableStringWithFallback(const char *path
, const char *locale
,
2059 const char *tableKey
, const char *subTableKey
,
2060 const char *itemKey
,
2062 UErrorCode
*pErrorCode
)
2064 /* char localeBuffer[ULOC_FULLNAME_CAPACITY*4];*/
2065 UResourceBundle
*rb
=NULL
, table
, subTable
;
2066 const UChar
*item
=NULL
;
2067 UErrorCode errorCode
;
2068 char explicitFallbackName
[ULOC_FULLNAME_CAPACITY
] = {0};
2071 * open the bundle for the current locale
2072 * this falls back through the locale's chain to root
2074 errorCode
=U_ZERO_ERROR
;
2075 rb
=ures_open(path
, locale
, &errorCode
);
2076 if(U_FAILURE(errorCode
)) {
2077 /* total failure, not even root could be opened */
2078 *pErrorCode
=errorCode
;
2080 } else if(errorCode
==U_USING_DEFAULT_WARNING
||
2081 (errorCode
==U_USING_FALLBACK_WARNING
&& *pErrorCode
!=U_USING_DEFAULT_WARNING
)
2083 /* set the "strongest" error code (success->fallback->default->failure) */
2084 *pErrorCode
=errorCode
;
2088 ures_initStackObject(&table
);
2089 ures_initStackObject(&subTable
);
2090 ures_getByKeyWithFallback(rb
, tableKey
, &table
, &errorCode
);
2091 if (subTableKey
!= NULL
) {
2093 ures_getByKeyWithFallback(&table,subTableKey, &subTable, &errorCode);
2094 item = ures_getStringByKeyWithFallback(&subTable, itemKey, pLength, &errorCode);
2095 if(U_FAILURE(errorCode)){
2096 *pErrorCode = errorCode;
2101 ures_getByKeyWithFallback(&table
,subTableKey
, &table
, &errorCode
);
2103 if(U_SUCCESS(errorCode
)){
2104 item
= ures_getStringByKeyWithFallback(&table
, itemKey
, pLength
, &errorCode
);
2105 if(U_FAILURE(errorCode
)){
2106 const char* replacement
= NULL
;
2107 *pErrorCode
= errorCode
; /*save the errorCode*/
2108 errorCode
= U_ZERO_ERROR
;
2109 /* may be a deprecated code */
2110 if(uprv_strcmp(tableKey
, "Countries")==0){
2111 replacement
= uloc_getCurrentCountryID(itemKey
);
2112 }else if(uprv_strcmp(tableKey
, "Languages")==0){
2113 replacement
= uloc_getCurrentLanguageID(itemKey
);
2115 /*pointer comparison is ok since uloc_getCurrentCountryID & uloc_getCurrentLanguageID return the key itself is replacement is not found*/
2116 if(replacement
!=NULL
&& itemKey
!= replacement
){
2117 item
= ures_getStringByKeyWithFallback(&table
, replacement
, pLength
, &errorCode
);
2118 if(U_SUCCESS(errorCode
)){
2119 *pErrorCode
= errorCode
;
2128 if(U_FAILURE(errorCode
)){
2130 /* still can't figure out ?.. try the fallback mechanism */
2132 const UChar
* fallbackLocale
= NULL
;
2133 *pErrorCode
= errorCode
;
2134 errorCode
= U_ZERO_ERROR
;
2136 fallbackLocale
= ures_getStringByKeyWithFallback(&table
, "Fallback", &len
, &errorCode
);
2137 if(U_FAILURE(errorCode
)){
2138 *pErrorCode
= errorCode
;
2142 u_UCharsToChars(fallbackLocale
, explicitFallbackName
, len
);
2144 /* guard against recursive fallback */
2145 if(uprv_strcmp(explicitFallbackName
, locale
)==0){
2146 *pErrorCode
= U_INTERNAL_PROGRAM_ERROR
;
2150 rb
= ures_open(NULL
, explicitFallbackName
, &errorCode
);
2151 if(U_FAILURE(errorCode
)){
2152 *pErrorCode
= errorCode
;
2155 /* succeeded in opening the fallback bundle .. continue and try to fetch the item */
2160 /* done with the locale string - ready to close table and rb */
2161 ures_close(&subTable
);
2168 _getStringOrCopyKey(const char *path
, const char *locale
,
2169 const char *tableKey
,
2170 const char* subTableKey
,
2171 const char *itemKey
,
2172 const char *substitute
,
2173 UChar
*dest
, int32_t destCapacity
,
2174 UErrorCode
*pErrorCode
) {
2175 const UChar
*s
= NULL
;
2179 /* top-level item: normal resource bundle access */
2180 UResourceBundle
*rb
;
2182 rb
=ures_open(path
, locale
, pErrorCode
);
2183 if(U_SUCCESS(*pErrorCode
)) {
2184 s
=ures_getStringByKey(rb
, tableKey
, &length
, pErrorCode
);
2185 /* see comment about closing rb near "return item;" in _res_getTableStringWithFallback() */
2189 /* second-level item, use special fallback */
2190 s
=_res_getTableStringWithFallback(path
, locale
,
2197 if(U_SUCCESS(*pErrorCode
)) {
2198 int32_t copyLength
=uprv_min(length
, destCapacity
);
2199 if(copyLength
>0 && s
!= NULL
) {
2200 u_memcpy(dest
, s
, copyLength
);
2203 /* no string from a resource bundle: convert the substitute */
2204 length
=(int32_t)uprv_strlen(substitute
);
2205 u_charsToUChars(substitute
, dest
, uprv_min(length
, destCapacity
));
2206 *pErrorCode
=U_USING_DEFAULT_WARNING
;
2209 return u_terminateUChars(dest
, destCapacity
, length
, pErrorCode
);
2213 _getDisplayNameForComponent(const char *locale
,
2214 const char *displayLocale
,
2215 UChar
*dest
, int32_t destCapacity
,
2216 int32_t (*getter
)(const char *, char *, int32_t, UErrorCode
*),
2218 UErrorCode
*pErrorCode
) {
2219 char localeBuffer
[ULOC_FULLNAME_CAPACITY
*4];
2221 UErrorCode localStatus
;
2223 /* argument checking */
2224 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
2228 if(destCapacity
<0 || (destCapacity
>0 && dest
==NULL
)) {
2229 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
2233 localStatus
= U_ZERO_ERROR
;
2234 length
=(*getter
)(locale
, localeBuffer
, sizeof(localeBuffer
), &localStatus
);
2235 if(U_FAILURE(localStatus
) || localStatus
==U_STRING_NOT_TERMINATED_WARNING
) {
2236 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
2240 return u_terminateUChars(dest
, destCapacity
, 0, pErrorCode
);
2243 return _getStringOrCopyKey(NULL
, displayLocale
,
2244 tag
, NULL
, localeBuffer
,
2250 U_CAPI
int32_t U_EXPORT2
2251 uloc_getDisplayLanguage(const char *locale
,
2252 const char *displayLocale
,
2253 UChar
*dest
, int32_t destCapacity
,
2254 UErrorCode
*pErrorCode
) {
2255 return _getDisplayNameForComponent(locale
, displayLocale
, dest
, destCapacity
,
2256 uloc_getLanguage
, _kLanguages
, pErrorCode
);
2259 U_CAPI
int32_t U_EXPORT2
2260 uloc_getDisplayScript(const char* locale
,
2261 const char* displayLocale
,
2262 UChar
*dest
, int32_t destCapacity
,
2263 UErrorCode
*pErrorCode
)
2265 return _getDisplayNameForComponent(locale
, displayLocale
, dest
, destCapacity
,
2266 uloc_getScript
, _kScripts
, pErrorCode
);
2269 U_CAPI
int32_t U_EXPORT2
2270 uloc_getDisplayCountry(const char *locale
,
2271 const char *displayLocale
,
2272 UChar
*dest
, int32_t destCapacity
,
2273 UErrorCode
*pErrorCode
) {
2274 return _getDisplayNameForComponent(locale
, displayLocale
, dest
, destCapacity
,
2275 uloc_getCountry
, _kCountries
, pErrorCode
);
2279 * TODO separate variant1_variant2_variant3...
2280 * by getting each tag's display string and concatenating them with ", "
2281 * in between - similar to uloc_getDisplayName()
2283 U_CAPI
int32_t U_EXPORT2
2284 uloc_getDisplayVariant(const char *locale
,
2285 const char *displayLocale
,
2286 UChar
*dest
, int32_t destCapacity
,
2287 UErrorCode
*pErrorCode
) {
2288 return _getDisplayNameForComponent(locale
, displayLocale
, dest
, destCapacity
,
2289 uloc_getVariant
, _kVariants
, pErrorCode
);
2292 U_CAPI
int32_t U_EXPORT2
2293 uloc_getDisplayName(const char *locale
,
2294 const char *displayLocale
,
2295 UChar
*dest
, int32_t destCapacity
,
2296 UErrorCode
*pErrorCode
)
2298 int32_t length
, length2
, length3
= 0;
2299 UBool hasLanguage
, hasScript
, hasCountry
, hasVariant
, hasKeywords
;
2300 UEnumeration
* keywordEnum
= NULL
;
2301 int32_t keywordCount
= 0;
2302 const char *keyword
= NULL
;
2303 int32_t keywordLen
= 0;
2304 char keywordValue
[256];
2305 int32_t keywordValueLen
= 0;
2307 /* argument checking */
2308 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
2312 if(destCapacity
<0 || (destCapacity
>0 && dest
==NULL
)) {
2313 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
2318 * if there is a language, then write "language (country, variant)"
2319 * otherwise write "country, variant"
2322 /* write the language */
2323 length
=uloc_getDisplayLanguage(locale
, displayLocale
,
2326 hasLanguage
= length
>0;
2330 if(length
<destCapacity
) {
2334 if(length
<destCapacity
) {
2340 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) {
2341 /* keep preflighting */
2342 *pErrorCode
=U_ZERO_ERROR
;
2345 /* append the script */
2346 if(length
<destCapacity
) {
2347 length2
=uloc_getDisplayScript(locale
, displayLocale
,
2348 dest
+length
, destCapacity
-length
,
2351 length2
=uloc_getDisplayScript(locale
, displayLocale
,
2355 hasScript
= length2
>0;
2360 if(length
<destCapacity
) {
2364 if(length
<destCapacity
) {
2370 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) {
2371 /* keep preflighting */
2372 *pErrorCode
=U_ZERO_ERROR
;
2375 /* append the country */
2376 if(length
<destCapacity
) {
2377 length2
=uloc_getDisplayCountry(locale
, displayLocale
,
2378 dest
+length
, destCapacity
-length
,
2381 length2
=uloc_getDisplayCountry(locale
, displayLocale
,
2385 hasCountry
= length2
>0;
2390 if(length
<destCapacity
) {
2394 if(length
<destCapacity
) {
2400 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) {
2401 /* keep preflighting */
2402 *pErrorCode
=U_ZERO_ERROR
;
2405 /* append the variant */
2406 if(length
<destCapacity
) {
2407 length2
=uloc_getDisplayVariant(locale
, displayLocale
,
2408 dest
+length
, destCapacity
-length
,
2411 length2
=uloc_getDisplayVariant(locale
, displayLocale
,
2415 hasVariant
= length2
>0;
2420 if(length
<destCapacity
) {
2424 if(length
<destCapacity
) {
2430 keywordEnum
= uloc_openKeywords(locale
, pErrorCode
);
2432 for(keywordCount
= uenum_count(keywordEnum
, pErrorCode
); keywordCount
> 0 ; keywordCount
--){
2433 if(U_FAILURE(*pErrorCode
)){
2436 /* the uenum_next returns NUL terminated string */
2437 keyword
= uenum_next(keywordEnum
, &keywordLen
, pErrorCode
);
2438 if(length
+ length3
< destCapacity
) {
2439 length3
+= uloc_getDisplayKeyword(keyword
, displayLocale
, dest
+length
+length3
, destCapacity
-length
-length3
, pErrorCode
);
2441 length3
+= uloc_getDisplayKeyword(keyword
, displayLocale
, NULL
, 0, pErrorCode
);
2443 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) {
2444 /* keep preflighting */
2445 *pErrorCode
=U_ZERO_ERROR
;
2447 keywordValueLen
= uloc_getKeywordValue(locale
, keyword
, keywordValue
, 256, pErrorCode
);
2448 if(keywordValueLen
) {
2449 if(length
+ length3
< destCapacity
) {
2450 dest
[length
+ length3
] = 0x3D;
2453 if(length
+ length3
< destCapacity
) {
2454 length3
+= uloc_getDisplayKeywordValue(locale
, keyword
, displayLocale
, dest
+length
+length3
, destCapacity
-length
-length3
, pErrorCode
);
2456 length3
+= uloc_getDisplayKeywordValue(locale
, keyword
, displayLocale
, NULL
, 0, pErrorCode
);
2458 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) {
2459 /* keep preflighting */
2460 *pErrorCode
=U_ZERO_ERROR
;
2463 if(keywordCount
> 1) {
2464 if(length
+ length3
+ 1 < destCapacity
&& keywordCount
) {
2465 dest
[length
+ length3
]=0x2c;
2466 dest
[length
+ length3
+1]=0x20;
2468 length3
++; /* ',' */
2469 length3
++; /* ' ' */
2472 uenum_close(keywordEnum
);
2474 hasKeywords
= length3
> 0;
2479 if ((hasScript
&& !hasCountry
)
2480 || ((hasScript
|| hasCountry
) && !hasVariant
&& !hasKeywords
)
2481 || ((hasScript
|| hasCountry
|| hasVariant
) && !hasKeywords
)
2482 || (hasLanguage
&& !hasScript
&& !hasCountry
&& !hasVariant
&& !hasKeywords
))
2484 /* remove ", " or " (" */
2488 if (hasLanguage
&& (hasScript
|| hasCountry
|| hasVariant
|| hasKeywords
)) {
2490 if(length
<destCapacity
) {
2496 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) {
2497 /* keep preflighting */
2498 *pErrorCode
=U_ZERO_ERROR
;
2501 return u_terminateUChars(dest
, destCapacity
, length
, pErrorCode
);
2504 U_CAPI
int32_t U_EXPORT2
2505 uloc_getDisplayKeyword(const char* keyword
,
2506 const char* displayLocale
,
2508 int32_t destCapacity
,
2509 UErrorCode
* status
){
2511 /* argument checking */
2512 if(status
==NULL
|| U_FAILURE(*status
)) {
2516 if(destCapacity
<0 || (destCapacity
>0 && dest
==NULL
)) {
2517 *status
=U_ILLEGAL_ARGUMENT_ERROR
;
2522 /* pass itemKey=NULL to look for a top-level item */
2523 return _getStringOrCopyKey(NULL
, displayLocale
,
2533 #define UCURRENCY_DISPLAY_NAME_INDEX 1
2535 U_CAPI
int32_t U_EXPORT2
2536 uloc_getDisplayKeywordValue( const char* locale
,
2537 const char* keyword
,
2538 const char* displayLocale
,
2540 int32_t destCapacity
,
2541 UErrorCode
* status
){
2544 char keywordValue
[ULOC_FULLNAME_CAPACITY
*4];
2545 int32_t capacity
= ULOC_FULLNAME_CAPACITY
*4;
2546 int32_t keywordValueLen
=0;
2548 /* argument checking */
2549 if(status
==NULL
|| U_FAILURE(*status
)) {
2553 if(destCapacity
<0 || (destCapacity
>0 && dest
==NULL
)) {
2554 *status
=U_ILLEGAL_ARGUMENT_ERROR
;
2558 /* get the keyword value */
2560 keywordValueLen
= uloc_getKeywordValue(locale
, keyword
, keywordValue
, capacity
, status
);
2563 * if the keyword is equal to currency .. then to get the display name
2564 * we need to do the fallback ourselves
2566 if(uprv_stricmp(keyword
, _kCurrency
)==0){
2568 int32_t dispNameLen
= 0;
2569 const UChar
*dispName
= NULL
;
2571 UResourceBundle
*bundle
= ures_open(NULL
, displayLocale
, status
);
2572 UResourceBundle
*currencies
= ures_getByKey(bundle
, _kCurrencies
, NULL
, status
);
2573 UResourceBundle
*currency
= ures_getByKeyWithFallback(currencies
, keywordValue
, NULL
, status
);
2575 dispName
= ures_getStringByIndex(currency
, UCURRENCY_DISPLAY_NAME_INDEX
, &dispNameLen
, status
);
2577 /*close the bundles */
2578 ures_close(currency
);
2579 ures_close(currencies
);
2582 if(U_FAILURE(*status
)){
2583 if(*status
== U_MISSING_RESOURCE_ERROR
){
2584 /* we just want to write the value over if nothing is available */
2585 *status
= U_USING_DEFAULT_WARNING
;
2591 /* now copy the dispName over if not NULL */
2592 if(dispName
!= NULL
){
2593 if(dispNameLen
<= destCapacity
){
2594 uprv_memcpy(dest
, dispName
, dispNameLen
* U_SIZEOF_UCHAR
);
2595 return u_terminateUChars(dest
, destCapacity
, dispNameLen
, status
);
2597 *status
= U_BUFFER_OVERFLOW_ERROR
;
2601 /* we have not found the display name for the value .. just copy over */
2602 if(keywordValueLen
<= destCapacity
){
2603 u_charsToUChars(keywordValue
, dest
, keywordValueLen
);
2604 return u_terminateUChars(dest
, destCapacity
, keywordValueLen
, status
);
2606 *status
= U_BUFFER_OVERFLOW_ERROR
;
2607 return keywordValueLen
;
2614 return _getStringOrCopyKey(NULL
, displayLocale
,
2623 /* ### Get available **************************************************/
2625 static UBool U_CALLCONV
uloc_cleanup(void) {
2628 if (_installedLocales
) {
2629 temp
= _installedLocales
;
2630 _installedLocales
= NULL
;
2632 _installedLocalesCount
= 0;
2639 static void _load_installedLocales()
2641 UBool localesLoaded
;
2644 localesLoaded
= _installedLocales
!= NULL
;
2647 if (localesLoaded
== FALSE
) {
2648 UResourceBundle
*index
= NULL
;
2649 UResourceBundle installed
;
2650 UErrorCode status
= U_ZERO_ERROR
;
2653 int32_t localeCount
;
2655 ures_initStackObject(&installed
);
2656 index
= ures_openDirect(NULL
, _kIndexLocaleName
, &status
);
2657 ures_getByKey(index
, _kIndexTag
, &installed
, &status
);
2659 if(U_SUCCESS(status
)) {
2660 localeCount
= ures_getSize(&installed
);
2661 temp
= (char **) uprv_malloc(sizeof(char*) * (localeCount
+1));
2663 ures_resetIterator(&installed
);
2664 while(ures_hasNext(&installed
)) {
2665 ures_getNextString(&installed
, NULL
, (const char **)&temp
[i
++], &status
);
2670 if (_installedLocales
== NULL
)
2672 _installedLocales
= temp
;
2673 _installedLocalesCount
= localeCount
;
2675 ucln_common_registerCleanup(UCLN_COMMON_ULOC
, uloc_cleanup
);
2680 ures_close(&installed
);
2686 U_CAPI
const char* U_EXPORT2
2687 uloc_getAvailable(int32_t offset
)
2690 _load_installedLocales();
2692 if (offset
> _installedLocalesCount
)
2694 return _installedLocales
[offset
];
2697 U_CAPI
int32_t U_EXPORT2
2698 uloc_countAvailable()
2700 _load_installedLocales();
2701 return _installedLocalesCount
;
2705 * Returns a list of all language codes defined in ISO 639. This is a pointer
2706 * to an array of pointers to arrays of char. All of these pointers are owned
2707 * by ICU-- do not delete them, and do not write through them. The array is
2708 * terminated with a null pointer.
2710 U_CAPI
const char* const* U_EXPORT2
2711 uloc_getISOLanguages()
2717 * Returns a list of all 2-letter country codes defined in ISO 639. This is a
2718 * pointer to an array of pointers to arrays of char. All of these pointers are
2719 * owned by ICU-- do not delete them, and do not write through them. The array is
2720 * terminated with a null pointer.
2722 U_CAPI
const char* const* U_EXPORT2
2723 uloc_getISOCountries()
2729 /* this function to be moved into cstring.c later */
2730 static char gDecimal
= 0;
2735 _uloc_strtod(const char *start
, char **end
) {
2742 /* For machines that decide to change the decimal on you,
2743 and try to be too smart with localization.
2744 This normally should be just a '.'. */
2745 sprintf(rep
, "%+1.1f", 1.0);
2749 if(gDecimal
== '.') {
2750 return uprv_strtod(start
, end
); /* fall through to OS */
2752 uprv_strncpy(buf
, start
, 29);
2754 decimal
= uprv_strchr(buf
, '.');
2756 *decimal
= gDecimal
;
2758 return uprv_strtod(start
, end
); /* no decimal point */
2760 rv
= uprv_strtod(buf
, &myEnd
);
2762 *end
= (char*)(start
+(myEnd
-buf
)); /* cast away const (to follow uprv_strtod API.) */
2771 #if defined(ULOC_DEBUG_PURIFY)
2772 int32_t dummy
; /* to avoid uninitialized memory copy from qsort */
2776 static int32_t U_CALLCONV
2777 uloc_acceptLanguageCompare(const void *context
, const void *a
, const void *b
)
2779 const _acceptLangItem
*aa
= (const _acceptLangItem
*)a
;
2780 const _acceptLangItem
*bb
= (const _acceptLangItem
*)b
;
2784 rc
= -1; /* A > B */
2785 } else if(bb
->q
> aa
->q
) {
2792 rc
= uprv_stricmp(aa
->locale
, bb
->locale
);
2795 #if defined(ULOC_DEBUG)
2796 /* fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
2806 mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
2809 U_CAPI
int32_t U_EXPORT2
2810 uloc_acceptLanguageFromHTTP(char *result
, int32_t resultAvailable
, UAcceptResult
*outResult
,
2811 const char *httpAcceptLanguage
,
2812 UEnumeration
* availableLocales
,
2816 _acceptLangItem smallBuffer
[30];
2818 char tmp
[ULOC_FULLNAME_CAPACITY
+1];
2820 const char *itemEnd
;
2821 const char *paramEnd
;
2826 int32_t l
= (int32_t)uprv_strlen(httpAcceptLanguage
);
2830 jSize
= sizeof(smallBuffer
)/sizeof(smallBuffer
[0]);
2831 if(U_FAILURE(*status
)) {
2835 for(s
=httpAcceptLanguage
;s
&&*s
;) {
2836 while(isspace(*s
)) /* eat space at the beginning */
2838 itemEnd
=uprv_strchr(s
,',');
2839 paramEnd
=uprv_strchr(s
,';');
2841 itemEnd
= httpAcceptLanguage
+l
; /* end of string */
2843 if(paramEnd
&& paramEnd
<itemEnd
) {
2844 /* semicolon (;) is closer than end (,) */
2849 while(isspace(*t
)) {
2855 while(isspace(*t
)) {
2858 j
[n
].q
= _uloc_strtod(t
,NULL
);
2860 /* no semicolon - it's 1.0 */
2864 #if defined(ULOC_DEBUG_PURIFY)
2865 j
[n
].dummy
=0xDECAFBAD;
2867 /* eat spaces prior to semi */
2868 for(t
=(paramEnd
-1);(paramEnd
>s
)&&isspace(*t
);t
--)
2870 j
[n
].locale
= uprv_strndup(s
,(int32_t)((t
+1)-s
));
2871 uloc_canonicalize(j
[n
].locale
,tmp
,sizeof(tmp
)/sizeof(tmp
[0]),status
);
2872 if(strcmp(j
[n
].locale
,tmp
)) {
2873 uprv_free(j
[n
].locale
);
2874 j
[n
].locale
=uprv_strdup(tmp
);
2876 #if defined(ULOC_DEBUG)
2877 /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
2881 while(*s
==',') { /* eat duplicate commas */
2885 if(j
==smallBuffer
) { /* overflowed the small buffer. */
2886 j
= uprv_malloc(sizeof(j
[0])*(jSize
*2));
2888 uprv_memcpy(j
,smallBuffer
,sizeof(j
[0])*jSize
);
2890 #if defined(ULOC_DEBUG)
2891 fprintf(stderr
,"malloced at size %d\n", jSize
);
2894 j
= uprv_realloc(j
, sizeof(j
[0])*jSize
*2);
2895 #if defined(ULOC_DEBUG)
2896 fprintf(stderr
,"re-alloced at size %d\n", jSize
);
2901 *status
= U_MEMORY_ALLOCATION_ERROR
;
2906 uprv_sortArray(j
, n
, sizeof(j
[0]), uloc_acceptLanguageCompare
, NULL
, TRUE
, status
);
2907 if(U_FAILURE(*status
)) {
2908 if(j
!= smallBuffer
) {
2909 #if defined(ULOC_DEBUG)
2910 fprintf(stderr
,"freeing j %p\n", j
);
2916 strs
= uprv_malloc((size_t)(sizeof(strs
[0])*n
));
2918 #if defined(ULOC_DEBUG)
2919 /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
2921 strs
[i
]=j
[i
].locale
;
2923 res
= uloc_acceptLanguage(result
, resultAvailable
, outResult
,
2924 (const char**)strs
, n
, availableLocales
, status
);
2929 if(j
!= smallBuffer
) {
2930 #if defined(ULOC_DEBUG)
2931 fprintf(stderr
,"freeing j %p\n", j
);
2939 U_CAPI
int32_t U_EXPORT2
2940 uloc_acceptLanguage(char *result
, int32_t resultAvailable
,
2941 UAcceptResult
*outResult
, const char **acceptList
,
2942 int32_t acceptListCount
,
2943 UEnumeration
* availableLocales
,
2949 char tmp
[ULOC_FULLNAME_CAPACITY
+1];
2951 char **fallbackList
;
2952 if(U_FAILURE(*status
)) {
2955 fallbackList
= uprv_malloc((size_t)(sizeof(fallbackList
[0])*acceptListCount
));
2956 if(fallbackList
==NULL
) {
2957 *status
= U_MEMORY_ALLOCATION_ERROR
;
2960 for(i
=0;i
<acceptListCount
;i
++) {
2961 #if defined(ULOC_DEBUG)
2962 fprintf(stderr
,"%02d: %s\n", i
, acceptList
[i
]);
2964 while((l
=uenum_next(availableLocales
, NULL
, status
))) {
2965 #if defined(ULOC_DEBUG)
2966 fprintf(stderr
," %s\n", l
);
2968 len
= (int32_t)uprv_strlen(l
);
2969 if(!uprv_strcmp(acceptList
[i
], l
)) {
2971 *outResult
= ULOC_ACCEPT_VALID
;
2973 #if defined(ULOC_DEBUG)
2974 fprintf(stderr
, "MATCH! %s\n", l
);
2977 uprv_strncpy(result
, l
, uprv_min(len
, resultAvailable
));
2980 uprv_free(fallbackList
[j
]);
2982 uprv_free(fallbackList
);
2983 return u_terminateChars(result
, resultAvailable
, len
, status
);
2989 uenum_reset(availableLocales
, status
);
2990 /* save off parent info */
2991 if(uloc_getParent(acceptList
[i
], tmp
, sizeof(tmp
)/sizeof(tmp
[0]), status
)!=0) {
2992 fallbackList
[i
] = uprv_strdup(tmp
);
2998 for(maxLen
--;maxLen
>0;maxLen
--) {
2999 for(i
=0;i
<acceptListCount
;i
++) {
3000 if(fallbackList
[i
] && ((int32_t)uprv_strlen(fallbackList
[i
])==maxLen
)) {
3001 #if defined(ULOC_DEBUG)
3002 fprintf(stderr
,"Try: [%s]", fallbackList
[i
]);
3004 while((l
=uenum_next(availableLocales
, NULL
, status
))) {
3005 #if defined(ULOC_DEBUG)
3006 fprintf(stderr
," %s\n", l
);
3008 len
= (int32_t)uprv_strlen(l
);
3009 if(!uprv_strcmp(fallbackList
[i
], l
)) {
3011 *outResult
= ULOC_ACCEPT_FALLBACK
;
3013 #if defined(ULOC_DEBUG)
3014 fprintf(stderr
, "fallback MATCH! %s\n", l
);
3017 uprv_strncpy(result
, l
, uprv_min(len
, resultAvailable
));
3019 for(j
=0;j
<acceptListCount
;j
++) {
3020 uprv_free(fallbackList
[j
]);
3022 uprv_free(fallbackList
);
3023 return u_terminateChars(result
, resultAvailable
, len
, status
);
3026 uenum_reset(availableLocales
, status
);
3028 if(uloc_getParent(fallbackList
[i
], tmp
, sizeof(tmp
)/sizeof(tmp
[0]), status
)!=0) {
3029 uprv_free(fallbackList
[i
]);
3030 fallbackList
[i
] = uprv_strdup(tmp
);
3032 uprv_free(fallbackList
[i
]);
3038 *outResult
= ULOC_ACCEPT_FAILED
;
3041 for(i
=0;i
<acceptListCount
;i
++) {
3042 uprv_free(fallbackList
[i
]);
3044 uprv_free(fallbackList
);