2 **********************************************************************
3 * Copyright (C) 1997-2004, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
9 * Modification History:
11 * Date Name Description
12 * 04/01/97 aliu Creation.
13 * 08/21/98 stephen JDK 1.2 sync
14 * 12/08/98 rtg New Locale implementation and C API
15 * 03/15/99 damiba overhaul.
16 * 04/06/99 stephen changed setDefault() to realloc and copy
17 * 06/14/99 stephen Changed calls to ures_open for new params
18 * 07/21/99 stephen Modified setDefault() to propagate to C++
19 * 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs,
20 * brought canonicalization code into line with spec
21 *****************************************************************************/
24 POSIX's locale format, from putil.c: [no spaces]
26 ll [ _CC ] [ . MM ] [ @ VV]
28 l = lang, C = ctry, M = charmap, V = variant
31 #include "unicode/utypes.h"
32 #include "unicode/ustring.h"
33 #include "unicode/uloc.h"
48 #include <stdio.h> /* for sprintf */
50 /* ### Declarations **************************************************/
52 /* Locale stuff from locid.cpp */
53 U_CFUNC
void locale_set_default(const char *id
);
54 U_CFUNC
const char *locale_get_default(void);
56 locale_getKeywords(const char *localeID
,
58 char *keywords
, int32_t keywordCapacity
,
59 char *values
, int32_t valuesCapacity
, int32_t *valLen
,
63 /* ### Constants **************************************************/
65 /* These strings describe the resources we attempt to load from
66 the locale ResourceBundle data file.*/
67 static const char _kLanguages
[] = "Languages";
68 static const char _kScripts
[] = "Scripts";
69 static const char _kCountries
[] = "Countries";
70 static const char _kVariants
[] = "Variants";
71 static const char _kKeys
[] = "Keys";
72 static const char _kTypes
[] = "Types";
73 static const char _kIndexLocaleName
[] = "res_index";
74 static const char _kRootName
[] = "root";
75 static const char _kIndexTag
[] = "InstalledLocales";
76 static const char _kCurrency
[] = "currency";
77 static const char _kCurrencies
[] = "Currencies";
78 static char** _installedLocales
= NULL
;
79 static int32_t _installedLocalesCount
= 0;
81 /* ### Data tables **************************************************/
84 * Table of language codes, both 2- and 3-letter, with preference
85 * given to 2-letter codes where possible. Includes 3-letter codes
86 * that lack a 2-letter equivalent.
88 * This list must be in sorted order. This list is returned directly
89 * to the user by some API.
91 * This list must be kept in sync with LANGUAGES_3, with corresponding
94 * This table should be terminated with a NULL entry, followed by a
95 * second list, and another NULL entry. The first list is visible to
96 * user code when this array is returned by API. The second list
97 * contains codes we support, but do not expose through user API.
101 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
102 * include the revisions up to 2001/7/27 *CWB*
104 * The 3 character codes are the terminology codes like RFC 3066. This
105 * is compatible with prior ICU codes
107 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
108 * table but now at the end of the table because 3 character codes are
109 * duplicates. This avoids bad searches going from 3 to 2 character
112 * The range qaa-qtz is reserved for local use
114 static const char * const LANGUAGES
[] = {
115 "aa", "ab", "ace", "ach", "ada", "ady", "ae", "af", "afa",
116 "afh", "ak", "akk", "ale", "alg", "am", "an", "ang", "apa",
117 "ar", "arc", "arn", "arp", "art", "arw", "as", "ast",
118 "ath", "aus", "av", "awa", "ay", "az", "ba", "bad",
119 "bai", "bal", "ban", "bas", "bat", "be", "bej",
120 "bem", "ber", "bg", "bh", "bho", "bi", "bik", "bin",
121 "bla", "bm", "bn", "bnt", "bo", "br", "bra", "bs",
122 "btk", "bua", "bug", "byn", "ca", "cad", "cai", "car", "cau",
123 "ce", "ceb", "cel", "ch", "chb", "chg", "chk", "chm",
124 "chn", "cho", "chp", "chr", "chy", "cmc", "co", "cop",
125 "cpe", "cpf", "cpp", "cr", "crh", "crp", "cs", "csb", "cu", "cus",
126 "cv", "cy", "da", "dak", "dar", "day", "de", "del", "den",
127 "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv", "dyu",
128 "dz", "ee", "efi", "egy", "eka", "el", "elx", "en",
129 "enm", "eo", "es", "et", "eu", "ewo", "fa",
130 "fan", "fat", "ff", "fi", "fiu", "fj", "fo", "fon",
131 "fr", "frm", "fro", "fur", "fy", "ga", "gaa", "gay",
132 "gba", "gd", "gem", "gez", "gil", "gl", "gmh", "gn",
133 "goh", "gon", "gor", "got", "grb", "grc", "gu", "gv",
134 "gwi", "ha", "hai", "haw", "he", "hi", "hil", "him",
135 "hit", "hmn", "ho", "hr", "hsb", "ht", "hu", "hup", "hy", "hz",
136 "ia", "iba", "id", "ie", "ig", "ii", "ijo", "ik",
137 "ilo", "inc", "ine", "inh", "io", "ira", "iro", "is", "it",
138 "iu", "ja", "jbo", "jpr", "jrb", "jv", "ka", "kaa", "kab",
139 "kac", "kam", "kar", "kaw", "kbd", "kg", "kha", "khi",
140 "kho", "ki", "kj", "kk", "kl", "km", "kmb", "kn",
141 "ko", "kok", "kos", "kpe", "kr", "krc", "kro", "kru", "ks",
142 "ku", "kum", "kut", "kv", "kw", "ky", "la", "lad",
143 "lah", "lam", "lb", "lez", "lg", "li", "ln", "lo", "lol",
144 "loz", "lt", "lu", "lua", "lui", "lun", "luo", "lus",
145 "lv", "mad", "mag", "mai", "mak", "man", "map", "mas",
146 "mdf", "mdr", "men", "mg", "mga", "mh", "mi", "mic", "min",
147 "mis", "mk", "mkh", "ml", "mn", "mnc", "mni", "mno",
148 "mo", "moh", "mos", "mr", "ms", "mt", "mul", "mun",
149 "mus", "mwr", "my", "myn", "myv", "na", "nah", "nai", "nap",
150 "nb", "nd", "nds", "ne", "new", "ng", "nia", "nic",
151 "niu", "nl", "nn", "no", "nog", "non", "nr", "nso", "nub",
152 "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi", "oc", "oj",
153 "om", "or", "os", "osa", "ota", "oto", "pa", "paa",
154 "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",
155 "pi", "pl", "pon", "pra", "pro", "ps", "pt", "qu",
156 "raj", "rap", "rar", "rm", "rn", "ro", "roa", "rom",
157 "ru", "rw", "sa", "sad", "sah", "sai", "sal", "sam",
158 "sas", "sat", "sc", "sco", "sd", "se", "sel", "sem",
159 "sg", "sga", "sgn", "shn", "si", "sid", "sio", "sit",
160 "sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn",
161 "sms", "sn", "snk", "so", "sog", "son", "sq", "sr",
162 "srr", "ss", "ssa", "st", "su", "suk", "sus", "sux",
163 "sv", "sw", "syr", "ta", "tai", "te", "tem", "ter",
164 "tet", "tg", "th", "ti", "tig", "tiv", "tk", "tkl",
165 "tl", "tlh", "tli", "tmh", "tn", "to", "tog", "tpi", "tr",
166 "ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw",
167 "ty", "tyv", "udm", "ug", "uga", "uk", "umb", "und", "ur",
168 "uz", "vai", "ve", "vi", "vo", "vot", "wa", "wak",
169 "wal", "war", "was", "wen", "wo", "xal", "xh", "yao", "yap",
170 "yi", "yo", "ypk", "za", "zap", "zen", "zh", "znd",
173 "in", "iw", "ji", "jw", "sh", /* obsolete language codes */
178 * Table of 3-letter language codes.
180 * This is a lookup table used to convert 3-letter language codes to
181 * their 2-letter equivalent, where possible. It must be kept in sync
182 * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the
183 * same language as LANGUAGES_3[i]. The commented-out lines are
184 * copied from LANGUAGES to make eyeballing this baby easier.
186 * Where a 3-letter language code has no 2-letter equivalent, the
187 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
189 * This table should be terminated with a NULL entry, followed by a
190 * second list, and another NULL entry. The two lists correspond to
191 * the two lists in LANGUAGES.
193 static const char * const LANGUAGES_3
[] = {
194 /* "aa", "ab", "ace", "ach", "ada", "ady", "ae", "af", "afa", */
195 "aar", "abk", "ace", "ach", "ada", "ady", "ave", "afr", "afa",
196 /* "afh", "ak", "akk", "ale", "alg", "am", "an", "ang", "apa", */
197 "afh", "aka", "akk", "ale", "alg", "amh", "arg", "ang", "apa",
198 /* "ar", "arc", "arn", "arp", "art", "arw", "as", "ast", */
199 "ara", "arc", "arn", "arp", "art", "arw", "asm", "ast",
200 /* "ath", "aus", "av", "awa", "ay", "az", "ba", "bad", */
201 "ath", "aus", "ava", "awa", "aym", "aze", "bak", "bad",
202 /* "bai", "bal", "ban", "bas", "bat", "be", "bej", */
203 "bai", "bal", "ban", "bas", "bat", "bel", "bej",
204 /* "bem", "ber", "bg", "bh", "bho", "bi", "bik", "bin", */
205 "bem", "ber", "bul", "bih", "bho", "bis", "bik", "bin",
206 /* "bla", "bm", "bn", "bnt", "bo", "br", "bra", "bs", */
207 "bla", "bam", "ben", "bnt", "bod", "bre", "bra", "bos",
208 /* "btk", "bua", "bug", "byn", "ca", "cad", "cai", "car", "cau", */
209 "btk", "bua", "bug", "byn", "cat", "cad", "cai", "car", "cau",
210 /* "ce", "ceb", "cel", "ch", "chb", "chg", "chk", "chm", */
211 "che", "ceb", "cel", "cha", "chb", "chg", "chk", "chm",
212 /* "chn", "cho", "chp", "chr", "chy", "cmc", "co", "cop", */
213 "chn", "cho", "chp", "chr", "chy", "cmc", "cos", "cop",
214 /* "cpe", "cpf", "cpp", "cr", "crh", "crp", "cs", "csb", "cu", "cus", */
215 "cpe", "cpf", "cpp", "cre", "crh", "crp", "ces", "csb", "chu", "cus",
216 /* "cv", "cy", "da", "dak", "dar", "day", "de", "del", "den", */
217 "chv", "cym", "dan", "dak", "dar", "day", "deu", "del", "den",
218 /* "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv", "dyu", */
219 "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "div", "dyu",
220 /* "dz", "ee", "efi", "egy", "eka", "el", "elx", "en", */
221 "dzo", "ewe", "efi", "egy", "eka", "ell", "elx", "eng",
222 /* "enm", "eo", "es", "et", "eu", "ewo", "fa", */
223 "enm", "epo", "spa", "est", "eus", "ewo", "fas",
224 /* "fan", "fat", "ff", "fi", "fiu", "fj", "fo", "fon", */
225 "fan", "fat", "ful", "fin", "fiu", "fij", "fao", "fon",
226 /* "fr", "frm", "fro", "fur", "fy", "ga", "gaa", "gay", */
227 "fra", "frm", "fro", "fur", "fry", "gle", "gaa", "gay",
228 /* "gba", "gd", "gem", "gez", "gil", "gl", "gmh", "gn", */
229 "gba", "gla", "gem", "gez", "gil", "glg", "gmh", "grn",
230 /* "goh", "gon", "gor", "got", "grb", "grc", "gu", "gv", */
231 "goh", "gon", "gor", "got", "grb", "grc", "guj", "glv",
232 /* "gwi", "ha", "hai", "haw", "he", "hi", "hil", "him", */
233 "gwi", "hau", "hai", "haw", "heb", "hin", "hil", "him",
234 /* "hit", "hmn", "ho", "hr", "hsb", "ht", "hu", "hup", "hy", "hz", */
235 "hit", "hmn", "hmo", "hrv", "hsb", "hat", "hun", "hup", "hye", "her",
236 /* "ia", "iba", "id", "ie", "ig", "ii", "ijo", "ik", */
237 "ina", "iba", "ind", "ile", "ibo", "iii", "ijo", "ipk",
238 /* "ilo", "inc", "ine", "inh", "io", "ira", "iro", "is", "it", */
239 "ilo", "inc", "ine", "inh", "ido", "ira", "iro", "isl", "ita",
240 /* "iu", "ja", "jbo", "jpr", "jrb", "jv", "ka", "kaa", "kab", */
241 "iku", "jpn", "jbo", "jpr", "jrb", "jav", "kat", "kaa", "kab",
242 /* "kac", "kam", "kar", "kaw", "kbd", "kg", "kha", "khi", */
243 "kac", "kam", "kar", "kaw", "kbd", "kon", "kha", "khi",
244 /* "kho", "ki", "kj", "kk", "kl", "km", "kmb", "kn", */
245 "kho", "kik", "kua", "kaz", "kal", "khm", "kmb", "kan",
246 /* "ko", "kok", "kos", "kpe", "kr", "krc", "kro", "kru", "ks", */
247 "kor", "kok", "kos", "kpe", "kau", "krc", "kro", "kru", "kas",
248 /* "ku", "kum", "kut", "kv", "kw", "ky", "la", "lad", */
249 "kur", "kum", "kut", "kom", "cor", "kir", "lat", "lad",
250 /* "lah", "lam", "lb", "lez", "lg", "li", "ln", "lo", "lol", */
251 "lah", "lam", "ltz", "lez", "lug", "lim", "lin", "lao", "lol",
252 /* "loz", "lt", "lu", "lua", "lui", "lun", "luo", "lus", */
253 "loz", "lit", "lub", "lua", "lui", "lun", "luo", "lus",
254 /* "lv", "mad", "mag", "mai", "mak", "man", "map", "mas", */
255 "lav", "mad", "mag", "mai", "mak", "man", "map", "mas",
256 /* "mdf", "mdr", "men", "mg", "mga", "mh", "mi", "mic", "min", */
257 "mdf", "mdr", "men", "mlg", "mga", "mah", "mri", "mic", "min",
258 /* "mis", "mk", "mkh", "ml", "mn", "mnc", "mni", "mno", */
259 "mis", "mkd", "mkh", "mal", "mon", "mnc", "mni", "mno",
260 /* "mo", "moh", "mos", "mr", "ms", "mt", "mul", "mun", */
261 "mol", "moh", "mos", "mar", "msa", "mlt", "mul", "mun",
262 /* "mus", "mwr", "my", "myn", "myv", "na", "nah", "nai", "nap", */
263 "mus", "mwr", "mya", "myn", "myv", "nau", "nah", "nai", "nap",
264 /* "nb", "nd", "nds", "ne", "new", "ng", "nia", "nic", */
265 "nob", "nde", "nds", "nep", "new", "ndo", "nia", "nic",
266 /* "niu", "nl", "nn", "no", "nog", "non", "nr", "nso", "nub", */
267 "niu", "nld", "nno", "nor", "nog", "non", "nbl", "nso", "nub",
268 /* "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi", "oc", "oj", */
269 "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi", "oci", "oji",
270 /* "om", "or", "os", "osa", "ota", "oto", "pa", "paa", */
271 "orm", "ori", "oss", "osa", "ota", "oto", "pan", "paa",
272 /* "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn", */
273 "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",
274 /* "pi", "pl", "pon", "pra", "pro", "ps", "pt", "qu", */
275 "pli", "pol", "pon", "pra", "pro", "pus", "por", "que",
276 /* "raj", "rap", "rar", "rm", "rn", "ro", "roa", "rom", */
277 "raj", "rap", "rar", "roh", "run", "ron", "roa", "rom",
278 /* "ru", "rw", "sa", "sad", "sah", "sai", "sal", "sam", */
279 "rus", "kin", "san", "sad", "sah", "sai", "sal", "sam",
280 /* "sas", "sat", "sc", "sco", "sd", "se", "sel", "sem", */
281 "sas", "sat", "srd", "sco", "snd", "sme", "sel", "sem",
282 /* "sg", "sga", "sgn", "shn", "si", "sid", "sio", "sit", */
283 "sag", "sga", "sgn", "shn", "sin", "sid", "sio", "sit",
284 /* "sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn", */
285 "slk", "slv", "sla", "smo", "sma", "smi", "smj", "smn",
286 /* "sms", "sn", "snk", "so", "sog", "son", "sq", "sr", */
287 "sms", "sna", "snk", "som", "sog", "son", "sqi", "srp",
288 /* "srr", "ss", "ssa", "st", "su", "suk", "sus", "sux", */
289 "srr", "ssw", "ssa", "sot", "sun", "suk", "sus", "sux",
290 /* "sv", "sw", "syr", "ta", "tai", "te", "tem", "ter", */
291 "swe", "swa", "syr", "tam", "tai", "tel", "tem", "ter",
292 /* "tet", "tg", "th", "ti", "tig", "tiv", "tk", "tkl", */
293 "tet", "tgk", "tha", "tir", "tig", "tiv", "tuk", "tkl",
294 /* "tl", "tlh", "tli", "tmh", "tn", "to", "tog", "tpi", "tr", */
295 "tgl", "tlh", "tli", "tmh", "tsn", "ton", "tog", "tpi", "tur",
296 /* "ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw", */
297 "tso", "tsi", "tat", "tum", "tup", "tut", "tvl", "twi",
298 /* "ty", "tyv", "udm", "ug", "uga", "uk", "umb", "und", "ur", */
299 "tah", "tyv", "udm", "uig", "uga", "ukr", "umb", "und", "urd",
300 /* "uz", "vai", "ve", "vi", "vo", "vot", "wa", "wak", */
301 "uzb", "vai", "ven", "vie", "vol", "vot", "wln", "wak",
302 /* "wal", "war", "was", "wen", "wo", "xal", "xh", "yao", "yap", */
303 "wal", "war", "was", "wen", "wol", "xal", "xho", "yao", "yap",
304 /* "yi", "yo", "ypk", "za", "zap", "zen", "zh", "znd", */
305 "yid", "yor", "ypk", "zha", "zap", "zen", "zho", "znd",
309 /* "in", "iw", "ji", "jw", "sh", */
310 "ind", "heb", "yid", "jaw", "srp",
315 * Table of 2-letter country codes.
317 * This list must be in sorted order. This list is returned directly
318 * to the user by some API.
320 * This list must be kept in sync with COUNTRIES_3, with corresponding
323 * This table should be terminated with a NULL entry, followed by a
324 * second list, and another NULL entry. The first list is visible to
325 * user code when this array is returned by API. The second list
326 * contains codes we support, but do not expose through user API.
330 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
331 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
332 * new codes keeping the old ones for compatibility updated to include
333 * 1999/12/03 revisions *CWB*
335 * RO(ROM) is now RO(ROU) according to
336 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
338 static const char * const COUNTRIES
[] = {
339 "AD", "AE", "AF", "AG", "AI", "AL", "AM", "AN",
340 "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AZ",
341 "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",
342 "BJ", "BM", "BN", "BO", "BR", "BS", "BT", "BV",
343 "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
344 "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR",
345 "CU", "CV", "CX", "CY", "CZ", "DE", "DJ", "DK",
346 "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER",
347 "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
348 "GA", "GB", "GD", "GE", "GF", "GH", "GI", "GL",
349 "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
350 "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
351 "ID", "IE", "IL", "IN", "IO", "IQ", "IR", "IS",
352 "IT", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
353 "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
354 "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
355 "LV", "LY", "MA", "MC", "MD", "MG", "MH", "MK",
356 "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",
357 "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
358 "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",
359 "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",
360 "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
361 "PW", "PY", "QA", "RE", "RO", "RU", "RW", "SA",
362 "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",
363 "SK", "SL", "SM", "SN", "SO", "SR", "ST", "SV",
364 "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ",
365 "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
366 "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
367 "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
368 "WS", "YE", "YT", "YU", "ZA", "ZM", "ZW",
370 "FX", "RO", "TP", "ZR", /* obsolete country codes */
375 * Table of 3-letter country codes.
377 * This is a lookup table used to convert 3-letter country codes to
378 * their 2-letter equivalent. It must be kept in sync with COUNTRIES.
379 * For all valid i, COUNTRIES[i] must refer to the same country as
380 * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES
381 * to make eyeballing this baby easier.
383 * This table should be terminated with a NULL entry, followed by a
384 * second list, and another NULL entry. The two lists correspond to
385 * the two lists in COUNTRIES.
387 static const char * const COUNTRIES_3
[] = {
388 /* "AD", "AE", "AF", "AG", "AI", "AL", "AM", "AN", */
389 "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM", "ANT",
390 /* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AZ", */
391 "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "AZE",
392 /* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */
393 "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
394 /* "BJ", "BM", "BN", "BO", "BR", "BS", "BT", "BV", */
395 "BEN", "BMU", "BRN", "BOL", "BRA", "BHS", "BTN", "BVT",
396 /* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */
397 "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
398 /* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */
399 "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
400 /* "CU", "CV", "CX", "CY", "CZ", "DE", "DJ", "DK", */
401 "CUB", "CPV", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
402 /* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */
403 "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
404 /* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
405 "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
406 /* "GA", "GB", "GD", "GE", "GF", "GH", "GI", "GL", */
407 "GAB", "GBR", "GRD", "GEO", "GUF", "GHA", "GIB", "GRL",
408 /* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */
409 "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
410 /* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
411 "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
412 /* "ID", "IE", "IL", "IN", "IO", "IQ", "IR", "IS", */
413 "IDN", "IRL", "ISR", "IND", "IOT", "IRQ", "IRN", "ISL",
414 /* "IT", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
415 "ITA", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
416 /* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
417 "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
418 /* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */
419 "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
420 /* "LV", "LY", "MA", "MC", "MD", "MG", "MH", "MK", */
421 "LVA", "LBY", "MAR", "MCO", "MDA", "MDG", "MHL", "MKD",
422 /* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */
423 "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
424 /* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */
425 "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
426 /* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */
427 "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
428 /* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */
429 "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
430 /* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */
431 "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
432 /* "PW", "PY", "QA", "RE", "RO", "RU", "RW", "SA", */
433 "PLW", "PRY", "QAT", "REU", "ROU", "RUS", "RWA", "SAU",
434 /* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */
435 "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
436 /* "SK", "SL", "SM", "SN", "SO", "SR", "ST", "SV", */
437 "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "STP", "SLV",
438 /* "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", */
439 "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
440 /* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */
441 "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
442 /* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */
443 "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
444 /* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
445 "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
446 /* "WS", "YE", "YT", "YU", "ZA", "ZM", "ZW", */
447 "WSM", "YEM", "MYT", "YUG", "ZAF", "ZMB", "ZWE",
449 /* "FX", "RO", "TP", "ZR", */
450 "FXX", "ROM", "TMP", "ZAR",
454 typedef struct CanonicalizationMap
{
455 const char *id
; /* input ID */
456 const char *canonicalID
; /* canonicalized output ID */
457 const char *keyword
; /* keyword, or NULL if none */
458 const char *value
; /* keyword value, or NULL if kw==NULL */
459 } CanonicalizationMap
;
462 * A map to canonicalize locale IDs. This handles a variety of
463 * different semantic kinds of transformations.
465 static const CanonicalizationMap CANONICALIZE_MAP
[] = {
466 { "", "en_US_POSIX", NULL
, NULL
}, /* .NET name */
467 { "C", "en_US_POSIX", NULL
, NULL
}, /* POSIX name */
468 { "art_LOJBAN", "jbo", NULL
, NULL
}, /* registered name */
469 { "az_AZ_CYRL", "az_Cyrl_AZ", NULL
, NULL
}, /* .NET name */
470 { "az_AZ_LATN", "az_Latn_AZ", NULL
, NULL
}, /* .NET name */
471 { "ca_ES_PREEURO", "ca_ES", "currency", "ESP" },
472 { "cel_GAULISH", "cel__GAULISH", NULL
, NULL
}, /* registered name */
473 { "de_1901", "de__1901", NULL
, NULL
}, /* registered name */
474 { "de_1906", "de__1906", NULL
, NULL
}, /* registered name */
475 { "de__PHONEBOOK", "de", "collation", "phonebook" },
476 { "de_AT_PREEURO", "de_AT", "currency", "ATS" },
477 { "de_DE_PREEURO", "de_DE", "currency", "DEM" },
478 { "de_LU_PREEURO", "de_LU", "currency", "LUF" },
479 { "el_GR_PREEURO", "el_GR", "currency", "GRD" },
480 { "en_BOONT", "en__BOONT", NULL
, NULL
}, /* registered name */
481 { "en_SCOUSE", "en__SCOUSE", NULL
, NULL
}, /* registered name */
482 { "en_BE_PREEURO", "en_BE", "currency", "BEF" },
483 { "en_IE_PREEURO", "en_IE", "currency", "IEP" },
484 { "es__TRADITIONAL", "es", "collation", "traditional" },
485 { "es_ES_PREEURO", "es_ES", "currency", "ESP" },
486 { "eu_ES_PREEURO", "eu_ES", "currency", "ESP" },
487 { "fi_FI_PREEURO", "fi_FI", "currency", "FIM" },
488 { "fr_BE_PREEURO", "fr_BE", "currency", "BEF" },
489 { "fr_FR_PREEURO", "fr_FR", "currency", "FRF" },
490 { "fr_LU_PREEURO", "fr_LU", "currency", "LUF" },
491 { "ga_IE_PREEURO", "ga_IE", "currency", "IEP" },
492 { "gl_ES_PREEURO", "gl_ES", "currency", "ESP" },
493 { "hi__DIRECT", "hi", "collation", "direct" },
494 { "it_IT_PREEURO", "it_IT", "currency", "ITL" },
495 { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" },
496 { "nb_NO_NY", "nn_NO", NULL
, NULL
}, /* "markus said this was ok" :-) */
497 { "nl_BE_PREEURO", "nl_BE", "currency", "BEF" },
498 { "nl_NL_PREEURO", "nl_NL", "currency", "NLG" },
499 { "pt_PT_PREEURO", "pt_PT", "currency", "PTE" },
500 { "sl_ROZAJ", "sl__ROZAJ", NULL
, NULL
}, /* registered name */
501 { "sr_SP_CYRL", "sr_Cyrl_SP", NULL
, NULL
}, /* .NET name */
502 { "sr_SP_LATN", "sr_Latn_SP", NULL
, NULL
}, /* .NET name */
503 { "uz_UZ_CYRL", "uz_Cyrl_UZ", NULL
, NULL
}, /* .NET name */
504 { "uz_UZ_LATN", "uz_Latn_UZ", NULL
, NULL
}, /* .NET name */
505 { "zh_CHS", "zh_Hans", NULL
, NULL
}, /* .NET name */
506 { "zh_CHT", "zh_TW", NULL
, NULL
}, /* .NET name TODO: This should be zh_Hant once the locale structure is fixed. */
507 { "zh_GAN", "zh__GAN", NULL
, NULL
}, /* registered name */
508 { "zh_GUOYU", "zh", NULL
, NULL
}, /* registered name */
509 { "zh_HAKKA", "zh__HAKKA", NULL
, NULL
}, /* registered name */
510 { "zh_MIN", "zh__MIN", NULL
, NULL
}, /* registered name */
511 { "zh_MIN_NAN", "zh__MINNAN", NULL
, NULL
}, /* registered name */
512 { "zh_WUU", "zh__WUU", NULL
, NULL
}, /* registered name */
513 { "zh_XIANG", "zh__XIANG", NULL
, NULL
}, /* registered name */
514 { "zh_YUE", "zh__YUE", NULL
, NULL
}, /* registered name */
515 { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" },
516 { "zh_TW_STROKE", "zh_TW", "collation", "stroke" },
517 { "zh__PINYIN", "zh", "collation", "pinyin" }
520 /* ### Keywords **************************************************/
522 #define ULOC_KEYWORD_BUFFER_LEN 25
523 #define ULOC_MAX_NO_KEYWORDS 25
526 locale_getKeywordsStart(const char *localeID
) {
527 /* TODO This seems odd. No matter what charset we're on, won't '@'
528 be '@'? Or are we building on one EBCDIC machine and moving the
529 library to another? */
530 const char *result
= NULL
;
531 static const uint8_t ebcdicSigns
[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
532 if((result
= uprv_strchr(localeID
, '@')) != NULL
) {
534 } else if(U_CHARSET_FAMILY
== U_EBCDIC_FAMILY
) {
535 const uint8_t *charToFind
= ebcdicSigns
;
537 if((result
= uprv_strchr(localeID
, *charToFind
)) != NULL
) {
547 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
548 * @param keywordName incoming name to be canonicalized
549 * @param status return status (keyword too long)
550 * @return length of the keyword name
552 static int32_t locale_canonKeywordName(char *buf
, const char *keywordName
, UErrorCode
*status
)
555 int32_t keywordNameLen
= uprv_strlen(keywordName
);
557 if(keywordNameLen
>= ULOC_KEYWORD_BUFFER_LEN
) {
558 /* keyword name too long for internal buffer */
559 *status
= U_INTERNAL_PROGRAM_ERROR
;
563 /* normalize the keyword name */
564 for(i
= 0; i
< keywordNameLen
; i
++) {
565 buf
[i
] = uprv_tolower(keywordName
[i
]);
569 return keywordNameLen
;
573 char keyword
[ULOC_KEYWORD_BUFFER_LEN
];
575 const char *valueStart
;
579 static int32_t U_CALLCONV
580 compareKeywordStructs(const void *context
, const void *left
, const void *right
) {
581 const char* leftString
= ((const KeywordStruct
*)left
)->keyword
;
582 const char* rightString
= ((const KeywordStruct
*)right
)->keyword
;
583 return uprv_strcmp(leftString
, rightString
);
587 * Both addKeyword and addValue must already be in canonical form.
588 * Either both addKeyword and addValue are NULL, or neither is NULL.
589 * If they are not NULL they must be zero terminated.
590 * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword.
593 _getKeywords(const char *localeID
,
595 char *keywords
, int32_t keywordCapacity
,
596 char *values
, int32_t valuesCapacity
, int32_t *valLen
,
598 const char* addKeyword
,
599 const char* addValue
,
602 KeywordStruct keywordList
[ULOC_MAX_NO_KEYWORDS
];
604 int32_t maxKeywords
= ULOC_MAX_NO_KEYWORDS
;
605 int32_t numKeywords
= 0;
606 const char* pos
= localeID
;
607 const char* equalSign
= NULL
;
608 const char* semicolon
= NULL
;
610 int32_t keywordsLen
= 0;
611 int32_t valuesLen
= 0;
613 if(prev
== '@') { /* start of keyword definition */
614 /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
616 UBool duplicate
= FALSE
;
617 /* skip leading spaces */
621 if (!*pos
) { /* handle trailing "; " */
624 if(numKeywords
== maxKeywords
) {
625 *status
= U_INTERNAL_PROGRAM_ERROR
;
628 equalSign
= uprv_strchr(pos
, '=');
629 semicolon
= uprv_strchr(pos
, ';');
630 /* lack of '=' [foo@currency] is illegal */
631 /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
632 if(!equalSign
|| (semicolon
&& semicolon
<equalSign
)) {
633 *status
= U_INVALID_FORMAT_ERROR
;
636 /* need to normalize both keyword and keyword name */
637 if(equalSign
- pos
>= ULOC_KEYWORD_BUFFER_LEN
) {
638 /* keyword name too long for internal buffer */
639 *status
= U_INTERNAL_PROGRAM_ERROR
;
642 for(i
= 0, n
= 0; i
< equalSign
- pos
; ++i
) {
644 keywordList
[numKeywords
].keyword
[n
++] = uprv_tolower(pos
[i
]);
647 keywordList
[numKeywords
].keyword
[n
] = 0;
648 keywordList
[numKeywords
].keywordLen
= n
;
649 /* now grab the value part. First we skip the '=' */
651 /* then we leading spaces */
652 while(*equalSign
== ' ') {
655 keywordList
[numKeywords
].valueStart
= equalSign
;
660 while(*(pos
- i
- 1) == ' ') {
663 keywordList
[numKeywords
].valueLen
= pos
- equalSign
- i
;
666 i
= uprv_strlen(equalSign
);
667 while(equalSign
[i
-1] == ' ') {
670 keywordList
[numKeywords
].valueLen
= i
;
672 /* If this is a duplicate keyword, then ignore it */
673 for (j
=0; j
<numKeywords
; ++j
) {
674 if (uprv_strcmp(keywordList
[j
].keyword
, keywordList
[numKeywords
].keyword
) == 0) {
684 /* Handle addKeyword/addValue. */
685 if (addKeyword
!= NULL
) {
686 UBool duplicate
= FALSE
;
687 U_ASSERT(addValue
!= NULL
);
688 /* Search for duplicate; if found, do nothing. Explicit keyword
689 overrides addKeyword. */
690 for (j
=0; j
<numKeywords
; ++j
) {
691 if (uprv_strcmp(keywordList
[j
].keyword
, addKeyword
) == 0) {
697 if (numKeywords
== maxKeywords
) {
698 *status
= U_INTERNAL_PROGRAM_ERROR
;
701 uprv_strcpy(keywordList
[numKeywords
].keyword
, addKeyword
);
702 keywordList
[numKeywords
].keywordLen
= uprv_strlen(addKeyword
);
703 keywordList
[numKeywords
].valueStart
= addValue
;
704 keywordList
[numKeywords
].valueLen
= uprv_strlen(addValue
);
708 U_ASSERT(addValue
== NULL
);
711 /* now we have a list of keywords */
712 /* we need to sort it */
713 uprv_sortArray(keywordList
, numKeywords
, sizeof(KeywordStruct
), compareKeywordStructs
, NULL
, FALSE
, status
);
715 /* Now construct the keyword part */
716 for(i
= 0; i
< numKeywords
; i
++) {
717 if(keywordsLen
+ keywordList
[i
].keywordLen
+ 1< keywordCapacity
) {
718 uprv_strcpy(keywords
+keywordsLen
, keywordList
[i
].keyword
);
720 keywords
[keywordsLen
+ keywordList
[i
].keywordLen
] = '=';
722 keywords
[keywordsLen
+ keywordList
[i
].keywordLen
] = 0;
725 keywordsLen
+= keywordList
[i
].keywordLen
+ 1;
727 if(keywordsLen
+ keywordList
[i
].valueLen
< keywordCapacity
) {
728 uprv_strncpy(keywords
+keywordsLen
, keywordList
[i
].valueStart
, keywordList
[i
].valueLen
);
730 keywordsLen
+= keywordList
[i
].valueLen
;
732 if(i
< numKeywords
- 1) {
733 if(keywordsLen
< keywordCapacity
) {
734 keywords
[keywordsLen
] = ';';
740 if(valuesLen
+ keywordList
[i
].valueLen
+ 1< valuesCapacity
) {
741 uprv_strcpy(values
+valuesLen
, keywordList
[i
].valueStart
);
742 values
[valuesLen
+ keywordList
[i
].valueLen
] = 0;
744 valuesLen
+= keywordList
[i
].valueLen
+ 1;
748 values
[valuesLen
] = 0;
753 return u_terminateChars(keywords
, keywordCapacity
, keywordsLen
, status
);
760 locale_getKeywords(const char *localeID
,
762 char *keywords
, int32_t keywordCapacity
,
763 char *values
, int32_t valuesCapacity
, int32_t *valLen
,
765 UErrorCode
*status
) {
766 return _getKeywords(localeID
, prev
, keywords
, keywordCapacity
,
767 values
, valuesCapacity
, valLen
, valuesToo
,
771 U_CAPI
int32_t U_EXPORT2
772 uloc_getKeywordValue(const char* localeID
,
773 const char* keywordName
,
774 char* buffer
, int32_t bufferCapacity
,
777 const char* nextSeparator
= NULL
;
778 int32_t keywordNameLen
;
779 char keywordNameBuffer
[ULOC_KEYWORD_BUFFER_LEN
];
780 char localeKeywordNameBuffer
[ULOC_KEYWORD_BUFFER_LEN
];
784 if(status
&& U_SUCCESS(*status
) && localeID
) {
786 const char* startSearchHere
= uprv_strchr(localeID
, '@'); /* TODO: REVISIT: shouldn't this be locale_getKeywordsStart ? */
787 if(startSearchHere
== NULL
) {
788 /* no keywords, return at once */
792 keywordNameLen
= locale_canonKeywordName(keywordNameBuffer
, keywordName
, status
);
793 if(U_FAILURE(*status
)) {
797 /* find the first keyword */
798 while(startSearchHere
) {
800 /* skip leading spaces (allowed?) */
801 while(*startSearchHere
== ' ') {
804 nextSeparator
= uprv_strchr(startSearchHere
, '=');
805 /* need to normalize both keyword and keyword name */
809 if(nextSeparator
- startSearchHere
>= ULOC_KEYWORD_BUFFER_LEN
) {
810 /* keyword name too long for internal buffer */
811 *status
= U_INTERNAL_PROGRAM_ERROR
;
814 for(i
= 0; i
< nextSeparator
- startSearchHere
; i
++) {
815 localeKeywordNameBuffer
[i
] = uprv_tolower(startSearchHere
[i
]);
817 /* trim trailing spaces */
818 while(startSearchHere
[i
-1] == ' ') {
821 localeKeywordNameBuffer
[i
] = 0;
823 startSearchHere
= uprv_strchr(nextSeparator
, ';');
825 if(uprv_strcmp(keywordNameBuffer
, localeKeywordNameBuffer
) == 0) {
827 while(*nextSeparator
== ' ') {
830 /* we actually found the keyword. Copy the value */
831 if(startSearchHere
&& startSearchHere
- nextSeparator
< bufferCapacity
) {
832 while(*(startSearchHere
-1) == ' ') {
835 uprv_strncpy(buffer
, nextSeparator
, startSearchHere
- nextSeparator
);
836 result
= u_terminateChars(buffer
, bufferCapacity
, startSearchHere
- nextSeparator
, status
);
837 } else if(!startSearchHere
&& (int32_t)uprv_strlen(nextSeparator
) < bufferCapacity
) { /* last item in string */
838 i
= uprv_strlen(nextSeparator
);
839 while(nextSeparator
[i
- 1] == ' ') {
842 uprv_strncpy(buffer
, nextSeparator
, i
);
843 result
= u_terminateChars(buffer
, bufferCapacity
, i
, status
);
845 /* give a bigger buffer, please */
846 *status
= U_BUFFER_OVERFLOW_ERROR
;
847 if(startSearchHere
) {
848 result
= startSearchHere
- nextSeparator
;
850 result
= uprv_strlen(nextSeparator
);
860 U_CAPI
int32_t U_EXPORT2
861 uloc_setKeywordValue(const char* keywordName
,
862 const char* keywordValue
,
863 char* buffer
, int32_t bufferCapacity
,
866 /* TODO: sorting. removal. */
867 int32_t keywordNameLen
;
868 int32_t keywordValueLen
;
871 int32_t foundValueLen
;
872 int32_t keywordAtEnd
= 0; /* is the keyword at the end of the string? */
873 char keywordNameBuffer
[ULOC_KEYWORD_BUFFER_LEN
];
874 char localeKeywordNameBuffer
[ULOC_KEYWORD_BUFFER_LEN
];
877 char* nextSeparator
= NULL
;
878 char* nextEqualsign
= NULL
;
879 char* startSearchHere
= NULL
;
880 char* keywordStart
= NULL
;
881 char *insertHere
= NULL
;
882 if(U_FAILURE(*status
)) {
885 if(keywordValue
&& !*keywordValue
) {
889 keywordValueLen
= uprv_strlen(keywordValue
);
893 keywordNameLen
= locale_canonKeywordName(keywordNameBuffer
, keywordName
, status
);
894 if(U_FAILURE(*status
)) {
897 startSearchHere
= (char*)locale_getKeywordsStart(buffer
);
898 if(bufferCapacity
>1) {
899 bufLen
= uprv_strlen(buffer
);
901 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
904 if(startSearchHere
== NULL
|| (startSearchHere
[1]==0)) {
905 if(!keywordValue
) { /* no keywords = nothing to remove */
909 needLen
= bufLen
+1+keywordNameLen
+1+keywordValueLen
;
910 if(startSearchHere
) { /* had a single @ */
911 needLen
--; /* already had the @ */
912 /* startSearchHere points at the @ */
914 startSearchHere
=buffer
+bufLen
;
916 if(needLen
>= bufferCapacity
) {
917 *status
= U_BUFFER_OVERFLOW_ERROR
;
918 return needLen
; /* no change */
920 *startSearchHere
= '@';
922 uprv_strcpy(startSearchHere
, keywordNameBuffer
);
923 startSearchHere
+= keywordNameLen
;
924 *startSearchHere
= '=';
926 uprv_strcpy(startSearchHere
, keywordValue
);
927 startSearchHere
+=keywordValueLen
;
929 } /* end shortcut - no @ */
931 keywordStart
= startSearchHere
;
932 /* search for keyword */
933 while(keywordStart
) {
935 /* skip leading spaces (allowed?) */
936 while(*keywordStart
== ' ') {
939 nextEqualsign
= uprv_strchr(keywordStart
, '=');
940 /* need to normalize both keyword and keyword name */
944 if(nextEqualsign
- keywordStart
>= ULOC_KEYWORD_BUFFER_LEN
) {
945 /* keyword name too long for internal buffer */
946 *status
= U_INTERNAL_PROGRAM_ERROR
;
949 for(i
= 0; i
< nextEqualsign
- keywordStart
; i
++) {
950 localeKeywordNameBuffer
[i
] = uprv_tolower(keywordStart
[i
]);
952 /* trim trailing spaces */
953 while(keywordStart
[i
-1] == ' ') {
956 localeKeywordNameBuffer
[i
] = 0;
958 nextSeparator
= uprv_strchr(nextEqualsign
, ';');
959 rc
= uprv_strcmp(keywordNameBuffer
, localeKeywordNameBuffer
);
962 while(*nextEqualsign
== ' ') {
965 /* we actually found the keyword. Change the value */
968 foundValueLen
= nextSeparator
- nextEqualsign
;
971 foundValueLen
= uprv_strlen(nextEqualsign
);
973 if(keywordValue
) { /* adding a value - not removing */
974 if(foundValueLen
== keywordValueLen
) {
975 uprv_strncpy(nextEqualsign
, keywordValue
, keywordValueLen
);
976 return bufLen
; /* no change in size */
977 } else if(foundValueLen
> keywordValueLen
) {
978 int32_t delta
= foundValueLen
- keywordValueLen
;
979 if(nextSeparator
) { /* RH side */
980 uprv_memmove(nextSeparator
- delta
, nextSeparator
, bufLen
-(nextSeparator
-buffer
));
982 uprv_strncpy(nextEqualsign
, keywordValue
, keywordValueLen
);
986 } else { /* FVL < KVL */
987 int32_t delta
= keywordValueLen
- foundValueLen
;
988 if((bufLen
+delta
) >= bufferCapacity
) {
989 *status
= U_BUFFER_OVERFLOW_ERROR
;
992 if(nextSeparator
) { /* RH side */
993 uprv_memmove(nextSeparator
+delta
,nextSeparator
, bufLen
-(nextSeparator
-buffer
));
995 uprv_strncpy(nextEqualsign
, keywordValue
, keywordValueLen
);
1000 } else { /* removing a keyword */
1002 /* zero out the ';' or '@' just before startSearchhere */
1003 keywordStart
[-1] = 0;
1004 return (keywordStart
-buffer
)-1; /* (string length without keyword) minus separator */
1006 uprv_memmove(keywordStart
, nextSeparator
+1, bufLen
-((nextSeparator
+1)-buffer
));
1007 keywordStart
[bufLen
-((nextSeparator
+1)-buffer
)]=0;
1008 return bufLen
-((nextSeparator
+1)-keywordStart
);
1011 } else if(rc
<0){ /* end match keyword */
1012 /* could insert at this location. */
1013 insertHere
= keywordStart
;
1015 keywordStart
= nextSeparator
;
1016 } /* end loop searching */
1019 return bufLen
; /* removal of non-extant keyword - no change */
1022 /* we know there is at least one keyword. */
1023 needLen
= bufLen
+1+keywordNameLen
+1+keywordValueLen
;
1024 if(needLen
>= bufferCapacity
) {
1025 *status
= U_BUFFER_OVERFLOW_ERROR
;
1026 return needLen
; /* no change */
1030 uprv_memmove(insertHere
+(1+keywordNameLen
+1+keywordValueLen
), insertHere
, bufLen
-(insertHere
-buffer
));
1031 keywordStart
= insertHere
;
1033 keywordStart
= buffer
+bufLen
;
1034 *keywordStart
= ';';
1037 uprv_strncpy(keywordStart
, keywordNameBuffer
, keywordNameLen
);
1038 keywordStart
+= keywordNameLen
;
1039 *keywordStart
= '=';
1041 uprv_strncpy(keywordStart
, keywordValue
, keywordValueLen
); /* terminates. */
1042 keywordStart
+=keywordValueLen
;
1044 *keywordStart
= ';';
1051 /* ### ID parsing implementation **************************************************/
1053 /*returns TRUE if a is an ID separator FALSE otherwise*/
1054 #define _isIDSeparator(a) (a == '_' || a == '-')
1056 #define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
1058 /*returns TRUE if one of the special prefixes is here (s=string)
1060 #define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1062 /* Dot terminates it because of POSIX form where dot precedes the codepage
1063 * except for variant
1065 #define _isTerminator(a) ((a==0)||(a=='.')||(a=='@'))
1067 static char* _strnchr(const char* str
, int32_t len
, char c
) {
1068 U_ASSERT(str
!= 0 && len
>= 0);
1069 while (len
-- != 0) {
1073 } else if (d
== 0) {
1082 * Lookup 'key' in the array 'list'. The array 'list' should contain
1083 * a NULL entry, followed by more entries, and a second NULL entry.
1085 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1088 static int16_t _findIndex(const char* const* list
, const char* key
)
1090 const char* const* anchor
= list
;
1093 /* Make two passes through two NULL-terminated arrays at 'list' */
1094 while (pass
++ < 2) {
1096 if (uprv_strcmp(key
, *list
) == 0) {
1097 return (int16_t)(list
- anchor
);
1101 ++list
; /* skip final NULL *CWB*/
1106 /* count the length of src while copying it to dest; return strlen(src) */
1107 static U_INLINE
int32_t
1108 _copyCount(char *dest
, int32_t destCapacity
, const char *src
) {
1115 return (int32_t)(src
-anchor
);
1117 if(destCapacity
<=0) {
1118 return (int32_t)((src
-anchor
)+uprv_strlen(src
));
1127 * the internal functions _getLanguage(), _getCountry(), _getVariant()
1128 * avoid duplicating code to handle the earlier locale ID pieces
1129 * in the functions for the later ones by
1130 * setting the *pEnd pointer to where they stopped parsing
1132 * TODO try to use this in Locale
1135 _getLanguage(const char *localeID
,
1136 char *language
, int32_t languageCapacity
,
1137 const char **pEnd
) {
1140 char lang
[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
1142 /* if it starts with i- or x- then copy that prefix */
1143 if(_isIDPrefix(localeID
)) {
1144 if(i
<languageCapacity
) {
1145 language
[i
]=(char)uprv_tolower(*localeID
);
1147 if(i
<languageCapacity
) {
1154 /* copy the language as far as possible and count its length */
1155 while(!_isTerminator(*localeID
) && !_isIDSeparator(*localeID
)) {
1156 if(i
<languageCapacity
) {
1157 language
[i
]=(char)uprv_tolower(*localeID
);
1160 lang
[i
]=(char)uprv_tolower(*localeID
);
1167 /* convert 3 character code to 2 character code if possible *CWB*/
1168 offset
=_findIndex(LANGUAGES_3
, lang
);
1170 i
=_copyCount(language
, languageCapacity
, LANGUAGES
[offset
]);
1181 _getScript(const char *localeID
,
1182 char *script
, int32_t scriptCapacity
,
1191 /* copy the second item as far as possible and count its length */
1192 while(!_isTerminator(localeID
[idLen
]) && !_isIDSeparator(localeID
[idLen
])) {
1196 /* If it's exactly 4 characters long, then it's a script and not a country. */
1200 *pEnd
= localeID
+idLen
;
1202 if(idLen
> scriptCapacity
) {
1203 idLen
= scriptCapacity
;
1206 script
[0]=(char)uprv_toupper(*(localeID
++));
1208 for (i
= 1; i
< idLen
; i
++) {
1209 script
[i
]=(char)uprv_tolower(*(localeID
++));
1219 _getCountry(const char *localeID
,
1220 char *country
, int32_t countryCapacity
,
1224 char cnty
[ULOC_COUNTRY_CAPACITY
]={ 0, 0, 0, 0 };
1227 /* copy the country as far as possible and count its length */
1228 while(!_isTerminator(*localeID
) && !_isIDSeparator(*localeID
)) {
1229 if(i
<countryCapacity
) {
1230 country
[i
]=(char)uprv_toupper(*localeID
);
1232 if(i
<(ULOC_COUNTRY_CAPACITY
-1)) { /*CWB*/
1233 cnty
[i
]=(char)uprv_toupper(*localeID
);
1239 /* convert 3 character code to 2 character code if possible *CWB*/
1241 offset
=_findIndex(COUNTRIES_3
, cnty
);
1243 i
=_copyCount(country
, countryCapacity
, COUNTRIES
[offset
]);
1254 * @param needSeparator if true, then add leading '_' if any variants
1255 * are added to 'variant'
1258 _getVariantEx(const char *localeID
,
1260 char *variant
, int32_t variantCapacity
,
1261 UBool needSeparator
) {
1264 /* get one or more variant tags and separate them with '_' */
1265 if(_isIDSeparator(prev
)) {
1266 /* get a variant string after a '-' or '_' */
1267 while(!_isTerminator(*localeID
)) {
1268 if (needSeparator
) {
1269 if (i
<variantCapacity
) {
1273 needSeparator
= FALSE
;
1275 if(i
<variantCapacity
) {
1276 variant
[i
]=(char)uprv_toupper(*localeID
);
1277 if(variant
[i
]=='-') {
1286 /* if there is no variant tag after a '-' or '_' then look for '@' */
1290 } else if((localeID
=locale_getKeywordsStart(localeID
))!=NULL
) {
1291 ++localeID
; /* point after the '@' */
1295 while(!_isTerminator(*localeID
)) {
1296 if (needSeparator
) {
1297 if (i
<variantCapacity
) {
1301 needSeparator
= FALSE
;
1303 if(i
<variantCapacity
) {
1304 variant
[i
]=(char)uprv_toupper(*localeID
);
1305 if(variant
[i
]=='-' || variant
[i
]==',') {
1318 _getVariant(const char *localeID
,
1320 char *variant
, int32_t variantCapacity
) {
1321 return _getVariantEx(localeID
, prev
, variant
, variantCapacity
, FALSE
);
1325 * Delete ALL instances of a variant from the given list of one or
1326 * more variants. Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".
1327 * @param variants the source string of one or more variants,
1328 * separated by '_'. This will be MODIFIED IN PLACE. Not zero
1329 * terminated; if it is, trailing zero will NOT be maintained.
1330 * @param variantsLen length of variants
1331 * @param toDelete variant to delete, without separators, e.g. "EURO"
1332 * or "PREEURO"; not zero terminated
1333 * @param toDeleteLen length of toDelete
1334 * @return number of characters deleted from variants
1337 _deleteVariant(char* variants
, int32_t variantsLen
,
1338 const char* toDelete
, int32_t toDeleteLen
) {
1339 int32_t delta
= 0; /* number of chars deleted */
1342 if (variantsLen
< toDeleteLen
) {
1345 if (uprv_strncmp(variants
, toDelete
, toDeleteLen
) == 0 &&
1346 (variantsLen
== toDeleteLen
||
1347 (flag
=(variants
[toDeleteLen
] == '_')))) {
1348 int32_t d
= toDeleteLen
+ (flag
?1:0);
1351 uprv_memmove(variants
, variants
+d
, variantsLen
);
1353 char* p
= _strnchr(variants
, variantsLen
, '_');
1358 variantsLen
-= p
- variants
;
1364 /* Keyword enumeration */
1366 typedef struct UKeywordsContext
{
1371 static void U_CALLCONV
1372 uloc_kw_closeKeywords(UEnumeration
*enumerator
) {
1373 uprv_free(((UKeywordsContext
*)enumerator
->context
)->keywords
);
1374 uprv_free(enumerator
->context
);
1375 uprv_free(enumerator
);
1378 static int32_t U_CALLCONV
1379 uloc_kw_countKeywords(UEnumeration
*en
, UErrorCode
*status
) {
1380 char *kw
= ((UKeywordsContext
*)en
->context
)->keywords
;
1384 kw
+= uprv_strlen(kw
)+1;
1389 static const char* U_CALLCONV
1390 uloc_kw_nextKeyword(UEnumeration
* en
,
1391 int32_t* resultLength
,
1392 UErrorCode
* status
) {
1393 const char* result
= ((UKeywordsContext
*)en
->context
)->current
;
1396 len
= uprv_strlen(((UKeywordsContext
*)en
->context
)->current
);
1397 ((UKeywordsContext
*)en
->context
)->current
+= len
+1;
1402 *resultLength
= len
;
1407 static void U_CALLCONV
1408 uloc_kw_resetKeywords(UEnumeration
* en
,
1409 UErrorCode
* status
) {
1410 ((UKeywordsContext
*)en
->context
)->current
= ((UKeywordsContext
*)en
->context
)->keywords
;
1413 static const UEnumeration gKeywordsEnum
= {
1416 uloc_kw_closeKeywords
,
1417 uloc_kw_countKeywords
,
1419 uloc_kw_nextKeyword
,
1420 uloc_kw_resetKeywords
1423 U_CAPI UEnumeration
* U_EXPORT2
1424 uloc_openKeywordList(const char *keywordList
, int32_t keywordListSize
, UErrorCode
* status
)
1426 UKeywordsContext
*myContext
= NULL
;
1427 UEnumeration
*result
= NULL
;
1429 if(U_FAILURE(*status
)) {
1432 result
= (UEnumeration
*)uprv_malloc(sizeof(UEnumeration
));
1433 uprv_memcpy(result
, &gKeywordsEnum
, sizeof(UEnumeration
));
1434 myContext
= uprv_malloc(sizeof(UKeywordsContext
));
1435 if (myContext
== NULL
) {
1436 *status
= U_MEMORY_ALLOCATION_ERROR
;
1440 myContext
->keywords
= (char *)uprv_malloc(keywordListSize
+1);
1441 uprv_memcpy(myContext
->keywords
, keywordList
, keywordListSize
);
1442 myContext
->keywords
[keywordListSize
] = 0;
1443 myContext
->current
= myContext
->keywords
;
1444 result
->context
= myContext
;
1448 U_CAPI UEnumeration
* U_EXPORT2
1449 uloc_openKeywords(const char* localeID
,
1454 int32_t keywordsCapacity
= 256;
1455 if(status
==NULL
|| U_FAILURE(*status
)) {
1459 if(localeID
==NULL
) {
1460 localeID
=uloc_getDefault();
1463 /* Skip the language */
1464 _getLanguage(localeID
, NULL
, 0, &localeID
);
1465 if(_isIDSeparator(*localeID
)) {
1466 const char *scriptID
;
1467 /* Skip the script if available */
1468 _getScript(localeID
+1, NULL
, 0, &scriptID
);
1469 if(scriptID
!= localeID
+1) {
1470 /* Found optional script */
1471 localeID
= scriptID
;
1473 /* Skip the Country */
1474 if (_isIDSeparator(*localeID
)) {
1475 _getCountry(localeID
+1, NULL
, 0, &localeID
);
1476 if(_isIDSeparator(*localeID
)) {
1477 _getVariant(localeID
+1, *localeID
, NULL
, 0);
1482 /* keywords are located after '@' */
1483 if((localeID
= locale_getKeywordsStart(localeID
)) != NULL
) {
1484 i
=locale_getKeywords(localeID
+1, '@', keywords
, keywordsCapacity
, NULL
, 0, NULL
, FALSE
, status
);
1488 return uloc_openKeywordList(keywords
, i
, status
);
1495 /* bit-flags for 'options' parameter of _canonicalize */
1496 #define _ULOC_STRIP_KEYWORDS 0x2
1497 #define _ULOC_CANONICALIZE 0x1
1499 #define OPTION_SET(options, mask) ((options & mask) != 0)
1502 * Canonicalize the given localeID, to level 1 or to level 2,
1503 * depending on the options. To specify level 1, pass in options=0.
1504 * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1506 * This is the code underlying uloc_getName and uloc_canonicalize.
1509 _canonicalize(const char* localeID
,
1511 int32_t resultCapacity
,
1514 int32_t j
, len
, fieldCount
=0, scriptSize
=0, variantSize
=0, nameCapacity
;
1515 char localeBuffer
[ULOC_FULLNAME_CAPACITY
];
1516 const char* keywordAssign
= NULL
;
1517 const char* separatorIndicator
= NULL
;
1518 const char* addKeyword
= NULL
;
1519 const char* addValue
= NULL
;
1521 char* variant
= NULL
; /* pointer into name, or NULL */
1522 int32_t sawEuro
= 0;
1524 if (U_FAILURE(*err
)) {
1528 if (localeID
==NULL
) {
1529 localeID
=uloc_getDefault();
1532 /* if we are doing a full canonicalization, then put results in
1533 localeBuffer, if necessary; otherwise send them to result. */
1534 if (OPTION_SET(options
, _ULOC_CANONICALIZE
) &&
1535 (result
== NULL
|| resultCapacity
< sizeof(localeBuffer
))) {
1536 name
= localeBuffer
;
1537 nameCapacity
= sizeof(localeBuffer
);
1540 nameCapacity
= resultCapacity
;
1543 /* get all pieces, one after another, and separate with '_' */
1544 len
=_getLanguage(localeID
, name
, nameCapacity
, &localeID
);
1545 if(_isIDSeparator(*localeID
)) {
1546 const char *scriptID
;
1549 if(len
<nameCapacity
) {
1554 scriptSize
=_getScript(localeID
+1, name
+len
, nameCapacity
-len
, &scriptID
);
1555 if(scriptSize
> 0) {
1556 /* Found optional script */
1557 localeID
= scriptID
;
1560 if (_isIDSeparator(*localeID
)) {
1561 /* If there is something else, then we add the _ */
1562 if(len
<nameCapacity
) {
1569 if (_isIDSeparator(*localeID
)) {
1570 len
+=_getCountry(localeID
+1, name
+len
, nameCapacity
-len
, &localeID
);
1571 if(_isIDSeparator(*localeID
)) {
1573 if(len
<nameCapacity
) {
1577 variantSize
= _getVariant(localeID
+1, *localeID
, name
+len
, nameCapacity
-len
);
1578 if (variantSize
> 0) {
1581 localeID
+= variantSize
+ 1; /* skip '_' and variant */
1587 /* Copy POSIX-style charset specifier, if any [mr.utf8] */
1588 if (!OPTION_SET(options
, _ULOC_CANONICALIZE
) && *localeID
== '.') {
1598 if (len
<nameCapacity
) {
1608 /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
1609 After this, localeID either points to '@' or is NULL */
1610 if ((localeID
=locale_getKeywordsStart(localeID
))!=NULL
) {
1611 keywordAssign
= uprv_strchr(localeID
, '=');
1612 separatorIndicator
= uprv_strchr(localeID
, ';');
1615 /* Copy POSIX-style variant, if any [mr@FOO] */
1616 if (!OPTION_SET(options
, _ULOC_CANONICALIZE
) &&
1617 localeID
!= NULL
&& keywordAssign
== NULL
) {
1623 if (len
<nameCapacity
) {
1631 if (OPTION_SET(options
, _ULOC_CANONICALIZE
)) {
1632 /* Handle @FOO variant if @ is present and not followed by = */
1633 if (localeID
!=NULL
&& keywordAssign
==NULL
) {
1634 int32_t posixVariantSize
;
1635 /* Add missing '_' if needed */
1636 if (fieldCount
< 2 || (fieldCount
< 3 && scriptSize
> 0)) {
1638 if(len
<nameCapacity
) {
1643 } while(fieldCount
<2);
1645 posixVariantSize
= _getVariantEx(localeID
+1, '@', name
+len
, nameCapacity
-len
,
1646 (UBool
)(variantSize
> 0));
1647 if (posixVariantSize
> 0) {
1648 if (variant
== NULL
) {
1651 len
+= posixVariantSize
;
1652 variantSize
+= posixVariantSize
;
1656 /* Check for EURO variants. */
1657 sawEuro
= _deleteVariant(variant
, variantSize
, "EURO", 4);
1659 if (sawEuro
> 0 && name
[len
-1] == '_') { /* delete trailing '_' */
1663 /* Look up the ID in the canonicalization map */
1664 for (j
=0; j
<(int32_t)(sizeof(CANONICALIZE_MAP
)/sizeof(CANONICALIZE_MAP
[0])); j
++) {
1665 const char* id
= CANONICALIZE_MAP
[j
].id
;
1666 int32_t n
= uprv_strlen(id
);
1667 if (len
== n
&& uprv_strncmp(name
, id
, n
) == 0) {
1668 if (n
== 0 && localeID
!= NULL
) {
1669 break; /* Don't remap "" if keywords present */
1671 len
= _copyCount(name
, nameCapacity
, CANONICALIZE_MAP
[j
].canonicalID
);
1672 addKeyword
= CANONICALIZE_MAP
[j
].keyword
;
1673 addValue
= CANONICALIZE_MAP
[j
].value
;
1678 /* Explicit EURO variant overrides keyword in CANONICALIZE_MAP */
1680 addKeyword
= "currency";
1685 if (!OPTION_SET(options
, _ULOC_STRIP_KEYWORDS
)) {
1686 if (localeID
!=NULL
&& keywordAssign
!=NULL
&&
1687 (!separatorIndicator
|| separatorIndicator
> keywordAssign
)) {
1688 if(len
<nameCapacity
) {
1693 len
+= _getKeywords(localeID
+1, '@', name
+len
, nameCapacity
-len
, NULL
, 0, NULL
, TRUE
,
1694 addKeyword
, addValue
, err
);
1695 } else if (addKeyword
!= NULL
) {
1696 U_ASSERT(addValue
!= NULL
);
1697 /* inelegant but works -- later make _getKeywords do this? */
1698 len
+= _copyCount(name
+len
, nameCapacity
-len
, "@");
1699 len
+= _copyCount(name
+len
, nameCapacity
-len
, addKeyword
);
1700 len
+= _copyCount(name
+len
, nameCapacity
-len
, "=");
1701 len
+= _copyCount(name
+len
, nameCapacity
-len
, addValue
);
1705 if (U_SUCCESS(*err
) && name
== localeBuffer
) {
1706 uprv_strncpy(result
, localeBuffer
, (len
> resultCapacity
) ? resultCapacity
: len
);
1709 return u_terminateChars(result
, resultCapacity
, len
, err
);
1712 /* ### ID parsing API **************************************************/
1714 U_CAPI
int32_t U_EXPORT2
1715 uloc_getParent(const char* localeID
,
1717 int32_t parentCapacity
,
1720 const char *lastUnderscore
;
1723 if (U_FAILURE(*err
))
1726 if (localeID
== NULL
)
1727 localeID
= uloc_getDefault();
1729 lastUnderscore
=uprv_strrchr(localeID
, '_');
1730 if(lastUnderscore
!=NULL
) {
1731 i
=(int32_t)(lastUnderscore
-localeID
);
1737 uprv_memcpy(parent
, localeID
, uprv_min(i
, parentCapacity
));
1739 return u_terminateChars(parent
, parentCapacity
, i
, err
);
1742 U_CAPI
int32_t U_EXPORT2
1743 uloc_getLanguage(const char* localeID
,
1745 int32_t languageCapacity
,
1748 /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
1751 if (err
==NULL
|| U_FAILURE(*err
)) {
1755 if(localeID
==NULL
) {
1756 localeID
=uloc_getDefault();
1759 i
=_getLanguage(localeID
, language
, languageCapacity
, NULL
);
1760 return u_terminateChars(language
, languageCapacity
, i
, err
);
1763 U_CAPI
int32_t U_EXPORT2
1764 uloc_getScript(const char* localeID
,
1766 int32_t scriptCapacity
,
1771 if(err
==NULL
|| U_FAILURE(*err
)) {
1775 if(localeID
==NULL
) {
1776 localeID
=uloc_getDefault();
1779 /* skip the language */
1780 _getLanguage(localeID
, NULL
, 0, &localeID
);
1781 if(_isIDSeparator(*localeID
)) {
1782 i
=_getScript(localeID
+1, script
, scriptCapacity
, NULL
);
1784 return u_terminateChars(script
, scriptCapacity
, i
, err
);
1787 U_CAPI
int32_t U_EXPORT2
1788 uloc_getCountry(const char* localeID
,
1790 int32_t countryCapacity
,
1795 if(err
==NULL
|| U_FAILURE(*err
)) {
1799 if(localeID
==NULL
) {
1800 localeID
=uloc_getDefault();
1803 /* Skip the language */
1804 _getLanguage(localeID
, NULL
, 0, &localeID
);
1805 if(_isIDSeparator(*localeID
)) {
1806 const char *scriptID
;
1807 /* Skip the script if available */
1808 _getScript(localeID
+1, NULL
, 0, &scriptID
);
1809 if(scriptID
!= localeID
+1) {
1810 /* Found optional script */
1811 localeID
= scriptID
;
1813 if(_isIDSeparator(*localeID
)) {
1814 i
=_getCountry(localeID
+1, country
, countryCapacity
, NULL
);
1817 return u_terminateChars(country
, countryCapacity
, i
, err
);
1820 U_CAPI
int32_t U_EXPORT2
1821 uloc_getVariant(const char* localeID
,
1823 int32_t variantCapacity
,
1827 UBool haveVariant
=FALSE
;
1829 if(err
==NULL
|| U_FAILURE(*err
)) {
1833 if(localeID
==NULL
) {
1834 localeID
=uloc_getDefault();
1837 /* Skip the language */
1838 _getLanguage(localeID
, NULL
, 0, &localeID
);
1839 if(_isIDSeparator(*localeID
)) {
1840 const char *scriptID
;
1841 /* Skip the script if available */
1842 _getScript(localeID
+1, NULL
, 0, &scriptID
);
1843 if(scriptID
!= localeID
+1) {
1844 /* Found optional script */
1845 localeID
= scriptID
;
1847 /* Skip the Country */
1848 if (_isIDSeparator(*localeID
)) {
1849 _getCountry(localeID
+1, NULL
, 0, &localeID
);
1850 if(_isIDSeparator(*localeID
)) {
1852 i
=_getVariant(localeID
+1, *localeID
, variant
, variantCapacity
);
1857 /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
1858 /* if we do not have a variant tag yet then try a POSIX variant after '@' */
1860 if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
1861 i=_getVariant(localeID+1, '@', variant, variantCapacity);
1864 return u_terminateChars(variant
, variantCapacity
, i
, err
);
1867 U_CAPI
int32_t U_EXPORT2
1868 uloc_getName(const char* localeID
,
1870 int32_t nameCapacity
,
1873 return _canonicalize(localeID
, name
, nameCapacity
, 0, err
);
1876 U_CAPI
int32_t U_EXPORT2
1877 uloc_getBaseName(const char* localeID
,
1879 int32_t nameCapacity
,
1882 return _canonicalize(localeID
, name
, nameCapacity
, _ULOC_STRIP_KEYWORDS
, err
);
1885 U_CAPI
int32_t U_EXPORT2
1886 uloc_canonicalize(const char* localeID
,
1888 int32_t nameCapacity
,
1891 return _canonicalize(localeID
, name
, nameCapacity
, _ULOC_CANONICALIZE
, err
);
1894 U_CAPI
const char* U_EXPORT2
1895 uloc_getISO3Language(const char* localeID
)
1898 char lang
[ULOC_LANG_CAPACITY
];
1899 UErrorCode err
= U_ZERO_ERROR
;
1901 if (localeID
== NULL
)
1903 localeID
= uloc_getDefault();
1905 uloc_getLanguage(localeID
, lang
, ULOC_LANG_CAPACITY
, &err
);
1908 offset
= _findIndex(LANGUAGES
, lang
);
1911 return LANGUAGES_3
[offset
];
1914 U_CAPI
const char* U_EXPORT2
1915 uloc_getISO3Country(const char* localeID
)
1918 char cntry
[ULOC_LANG_CAPACITY
];
1919 UErrorCode err
= U_ZERO_ERROR
;
1921 if (localeID
== NULL
)
1923 localeID
= uloc_getDefault();
1925 uloc_getCountry(localeID
, cntry
, ULOC_LANG_CAPACITY
, &err
);
1928 offset
= _findIndex(COUNTRIES
, cntry
);
1932 return COUNTRIES_3
[offset
];
1935 U_CAPI
uint32_t U_EXPORT2
1936 uloc_getLCID(const char* localeID
)
1938 UErrorCode status
= U_ZERO_ERROR
;
1939 char langID
[ULOC_FULLNAME_CAPACITY
];
1941 uloc_getLanguage(localeID
, langID
, sizeof(langID
), &status
);
1942 if (U_FAILURE(status
)) {
1946 return uprv_convertToLCID(langID
, localeID
, &status
);
1949 /* ### Default locale **************************************************/
1951 U_CAPI
const char* U_EXPORT2
1954 return locale_get_default();
1957 U_CAPI
void U_EXPORT2
1958 uloc_setDefault(const char* newDefaultLocale
,
1961 if (U_FAILURE(*err
))
1963 /* the error code isn't currently used for anything by this function*/
1965 /* propagate change to C++ */
1966 locale_set_default(newDefaultLocale
);
1969 /* ### Display name **************************************************/
1972 * Lookup a resource bundle table item with fallback on the table level.
1973 * Regular resource bundle lookups perform fallback to parent locale bundles
1974 * and eventually the root bundle, but only for top-level items.
1975 * This function takes the name of a top-level table and of an item in that table
1976 * and performs a lookup of both, falling back until a bundle contains a table
1979 * Note: Only the opening of entire bundles falls back through the default locale
1980 * before root. Once a bundle is open, item lookups do not go through the
1981 * default locale because that would result in a mix of languages that is
1982 * unpredictable to the programmer and most likely useless.
1984 static const UChar
*
1985 _res_getTableStringWithFallback(const char *path
, const char *locale
,
1986 const char *tableKey
, const char *subTableKey
,
1987 const char *itemKey
,
1989 UErrorCode
*pErrorCode
)
1991 char localeBuffer
[ULOC_FULLNAME_CAPACITY
*4];
1992 UResourceBundle
*rb
, table
;
1994 UErrorCode errorCode
;
1995 char explicitFallbackName
[ULOC_FULLNAME_CAPACITY
] = {0};
1997 const UChar
* ef
= NULL
;
1998 UBool overrideExplicitFallback
= FALSE
;
2001 * open the bundle for the current locale
2002 * this falls back through the locale's chain to root
2004 errorCode
=U_ZERO_ERROR
;
2005 rb
=ures_open(path
, locale
, &errorCode
);
2006 if(U_FAILURE(errorCode
)) {
2007 /* total failure, not even root could be opened */
2008 *pErrorCode
=errorCode
;
2010 } else if(errorCode
==U_USING_DEFAULT_WARNING
||
2011 (errorCode
==U_USING_FALLBACK_WARNING
&& *pErrorCode
!=U_USING_DEFAULT_WARNING
)
2013 /* set the "strongest" error code (success->fallback->default->failure) */
2014 *pErrorCode
=errorCode
;
2018 * try to open the requested table
2019 * this falls back through the locale's chain to root, but not through the default locale
2021 errorCode
=U_ZERO_ERROR
;
2022 ures_initStackObject(&table
);
2023 ures_getByKey(rb
, tableKey
, &table
, &errorCode
);
2024 if(U_FAILURE(errorCode
)) {
2025 /* no such table anywhere in this fallback chain */
2027 *pErrorCode
=errorCode
;
2029 } else if(errorCode
==U_USING_DEFAULT_WARNING
||
2030 (errorCode
==U_USING_FALLBACK_WARNING
&& *pErrorCode
!=U_USING_DEFAULT_WARNING
)
2032 /* set the "strongest" error code (success->fallback->default->failure) */
2033 *pErrorCode
=errorCode
;
2036 /* check if the fallback token is set */
2037 ef
= ures_getStringByKey(&table
, "Fallback", &efnLen
, &errorCode
);
2038 if(U_SUCCESS(errorCode
)){
2039 /* set the fallback chain */
2040 u_UCharsToChars(ef
, explicitFallbackName
, efnLen
);
2041 /* null terminate the buffer */
2042 explicitFallbackName
[efnLen
]=0;
2043 }else if(errorCode
==U_USING_DEFAULT_WARNING
||
2044 (errorCode
==U_USING_FALLBACK_WARNING
&& *pErrorCode
!=U_USING_DEFAULT_WARNING
)
2046 /* set the "strongest" error code (success->fallback->default->failure) */
2047 *pErrorCode
=errorCode
;
2050 /* try to open the requested item in the table */
2051 errorCode
=U_ZERO_ERROR
;
2052 if(subTableKey
== NULL
){
2053 item
=ures_getStringByKey(&table
, itemKey
, pLength
, &errorCode
);
2055 UResourceBundle subTable
;
2056 ures_initStackObject(&subTable
);
2057 ures_getByKey(&table
, subTableKey
, &subTable
, &errorCode
);
2058 item
= ures_getStringByKey(&subTable
, itemKey
, pLength
, &errorCode
);
2059 ures_close(&subTable
);
2061 if(U_SUCCESS(errorCode
)) {
2062 /* if the item for the key is empty ... override the explicit fall back set */
2063 if(item
[0]==0 && efnLen
> 0){
2064 overrideExplicitFallback
= TRUE
;
2066 /* we got the requested item! */
2070 if(errorCode
==U_USING_DEFAULT_WARNING
||
2071 (errorCode
==U_USING_FALLBACK_WARNING
&& *pErrorCode
!=U_USING_DEFAULT_WARNING
)
2073 /* set the "strongest" error code (success->fallback->default->failure) */
2074 *pErrorCode
=errorCode
;
2078 * It is safe to close the bundle and still return the
2079 * string pointer because resource bundles are
2080 * cached until u_cleanup().
2087 * We get here if the item was not found.
2088 * We will follow the chain to the parent locale bundle and look in
2092 /* get the real locale ID for this table */
2093 errorCode
=U_ZERO_ERROR
;
2094 locale
=ures_getLocale(&table
, &errorCode
);
2095 /* keep table and rb open until we are done using the locale string owned by the table bundle */
2096 if(U_FAILURE(errorCode
)) {
2097 /* error getting the locale ID for an open RB - should never happen */
2100 *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
;
2104 if(*locale
==0 || 0==uprv_strcmp(locale
, _kRootName
) || 0==uprv_strcmp(locale
,explicitFallbackName
)) {
2105 /* end of fallback; even root does not have the requested item either */
2108 *pErrorCode
=U_MISSING_RESOURCE_ERROR
;
2112 /* could not find the table, or its item, try to fall back to a different RB and table */
2113 errorCode
=U_ZERO_ERROR
;
2114 if(efnLen
> 0 && overrideExplicitFallback
== FALSE
){
2115 /* continue the fallback lookup with the explicit fallback that is requested */
2116 locale
= explicitFallbackName
;
2118 uloc_getParent(locale
, localeBuffer
, sizeof(localeBuffer
), &errorCode
);
2119 if(U_FAILURE(errorCode
) || errorCode
==U_STRING_NOT_TERMINATED_WARNING
) {
2120 /* error getting the parent locale ID - should never happen */
2121 *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
;
2125 /* continue the fallback lookup with the parent locale ID */
2126 locale
=localeBuffer
;
2128 /* adjust error code as we fall back */
2129 if (uprv_strlen(locale
) == 0) /* Falling back to root locale? */
2130 *pErrorCode
= U_USING_DEFAULT_WARNING
;
2131 else if (*pErrorCode
!= U_USING_DEFAULT_WARNING
)
2132 *pErrorCode
= U_USING_FALLBACK_WARNING
;
2134 /* done with the locale string - ready to close table and rb */
2141 _getStringOrCopyKey(const char *path
, const char *locale
,
2142 const char *tableKey
,
2143 const char* subTableKey
,
2144 const char *itemKey
,
2145 const char *substitute
,
2146 UChar
*dest
, int32_t destCapacity
,
2147 UErrorCode
*pErrorCode
) {
2148 const UChar
*s
= NULL
;
2152 /* top-level item: normal resource bundle access */
2153 UResourceBundle
*rb
;
2155 rb
=ures_open(path
, locale
, pErrorCode
);
2156 if(U_SUCCESS(*pErrorCode
)) {
2157 s
=ures_getStringByKey(rb
, tableKey
, &length
, pErrorCode
);
2158 /* see comment about closing rb near "return item;" in _res_getTableStringWithFallback() */
2162 /* second-level item, use special fallback */
2163 s
=_res_getTableStringWithFallback(path
, locale
,
2170 if(U_SUCCESS(*pErrorCode
)) {
2171 int32_t copyLength
=uprv_min(length
, destCapacity
);
2172 if(copyLength
>0 && s
!= NULL
) {
2173 u_memcpy(dest
, s
, copyLength
);
2176 /* no string from a resource bundle: convert the substitute */
2177 length
=(int32_t)uprv_strlen(substitute
);
2178 u_charsToUChars(substitute
, dest
, uprv_min(length
, destCapacity
));
2179 *pErrorCode
=U_USING_DEFAULT_WARNING
;
2182 return u_terminateUChars(dest
, destCapacity
, length
, pErrorCode
);
2186 _getDisplayNameForComponent(const char *locale
,
2187 const char *displayLocale
,
2188 UChar
*dest
, int32_t destCapacity
,
2189 int32_t (*getter
)(const char *, char *, int32_t, UErrorCode
*),
2191 UErrorCode
*pErrorCode
) {
2192 char localeBuffer
[ULOC_FULLNAME_CAPACITY
*4];
2194 UErrorCode localStatus
;
2196 /* argument checking */
2197 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
2201 if(destCapacity
<0 || (destCapacity
>0 && dest
==NULL
)) {
2202 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
2206 localStatus
= U_ZERO_ERROR
;
2207 length
=(*getter
)(locale
, localeBuffer
, sizeof(localeBuffer
), &localStatus
);
2208 if(U_FAILURE(localStatus
) || localStatus
==U_STRING_NOT_TERMINATED_WARNING
) {
2209 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
2213 return u_terminateUChars(dest
, destCapacity
, 0, pErrorCode
);
2216 return _getStringOrCopyKey(NULL
, displayLocale
,
2217 tag
, NULL
, localeBuffer
,
2223 U_CAPI
int32_t U_EXPORT2
2224 uloc_getDisplayLanguage(const char *locale
,
2225 const char *displayLocale
,
2226 UChar
*dest
, int32_t destCapacity
,
2227 UErrorCode
*pErrorCode
) {
2228 return _getDisplayNameForComponent(locale
, displayLocale
, dest
, destCapacity
,
2229 uloc_getLanguage
, _kLanguages
, pErrorCode
);
2232 U_CAPI
int32_t U_EXPORT2
2233 uloc_getDisplayScript(const char* locale
,
2234 const char* displayLocale
,
2235 UChar
*dest
, int32_t destCapacity
,
2236 UErrorCode
*pErrorCode
)
2238 return _getDisplayNameForComponent(locale
, displayLocale
, dest
, destCapacity
,
2239 uloc_getScript
, _kScripts
, pErrorCode
);
2242 U_CAPI
int32_t U_EXPORT2
2243 uloc_getDisplayCountry(const char *locale
,
2244 const char *displayLocale
,
2245 UChar
*dest
, int32_t destCapacity
,
2246 UErrorCode
*pErrorCode
) {
2247 return _getDisplayNameForComponent(locale
, displayLocale
, dest
, destCapacity
,
2248 uloc_getCountry
, _kCountries
, pErrorCode
);
2252 * TODO separate variant1_variant2_variant3...
2253 * by getting each tag's display string and concatenating them with ", "
2254 * in between - similar to uloc_getDisplayName()
2256 U_CAPI
int32_t U_EXPORT2
2257 uloc_getDisplayVariant(const char *locale
,
2258 const char *displayLocale
,
2259 UChar
*dest
, int32_t destCapacity
,
2260 UErrorCode
*pErrorCode
) {
2261 return _getDisplayNameForComponent(locale
, displayLocale
, dest
, destCapacity
,
2262 uloc_getVariant
, _kVariants
, pErrorCode
);
2265 U_CAPI
int32_t U_EXPORT2
2266 uloc_getDisplayName(const char *locale
,
2267 const char *displayLocale
,
2268 UChar
*dest
, int32_t destCapacity
,
2269 UErrorCode
*pErrorCode
)
2271 int32_t length
, length2
, length3
= 0;
2272 UBool hasLanguage
, hasScript
, hasCountry
, hasVariant
, hasKeywords
;
2273 UEnumeration
* keywordEnum
= NULL
;
2274 int32_t keywordCount
= 0;
2275 const char *keyword
= NULL
;
2276 int32_t keywordLen
= 0;
2277 char keywordValue
[256];
2278 int32_t keywordValueLen
= 0;
2280 /* argument checking */
2281 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
2285 if(destCapacity
<0 || (destCapacity
>0 && dest
==NULL
)) {
2286 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
2291 * if there is a language, then write "language (country, variant)"
2292 * otherwise write "country, variant"
2295 /* write the language */
2296 length
=uloc_getDisplayLanguage(locale
, displayLocale
,
2299 hasLanguage
= length
>0;
2303 if(length
<destCapacity
) {
2307 if(length
<destCapacity
) {
2313 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) {
2314 /* keep preflighting */
2315 *pErrorCode
=U_ZERO_ERROR
;
2318 /* append the script */
2319 if(length
<destCapacity
) {
2320 length2
=uloc_getDisplayScript(locale
, displayLocale
,
2321 dest
+length
, destCapacity
-length
,
2324 length2
=uloc_getDisplayScript(locale
, displayLocale
,
2328 hasScript
= length2
>0;
2333 if(length
<destCapacity
) {
2337 if(length
<destCapacity
) {
2343 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) {
2344 /* keep preflighting */
2345 *pErrorCode
=U_ZERO_ERROR
;
2348 /* append the country */
2349 if(length
<destCapacity
) {
2350 length2
=uloc_getDisplayCountry(locale
, displayLocale
,
2351 dest
+length
, destCapacity
-length
,
2354 length2
=uloc_getDisplayCountry(locale
, displayLocale
,
2358 hasCountry
= length2
>0;
2363 if(length
<destCapacity
) {
2367 if(length
<destCapacity
) {
2373 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) {
2374 /* keep preflighting */
2375 *pErrorCode
=U_ZERO_ERROR
;
2378 /* append the variant */
2379 if(length
<destCapacity
) {
2380 length2
=uloc_getDisplayVariant(locale
, displayLocale
,
2381 dest
+length
, destCapacity
-length
,
2384 length2
=uloc_getDisplayVariant(locale
, displayLocale
,
2388 hasVariant
= length2
>0;
2393 if(length
<destCapacity
) {
2397 if(length
<destCapacity
) {
2403 keywordEnum
= uloc_openKeywords(locale
, pErrorCode
);
2405 for(keywordCount
= uenum_count(keywordEnum
, pErrorCode
); keywordCount
> 0 ; keywordCount
--){
2406 if(U_FAILURE(*pErrorCode
)){
2409 /* the uenum_next returns NUL terminated string */
2410 keyword
= uenum_next(keywordEnum
, &keywordLen
, pErrorCode
);
2411 if(length
+ length3
< destCapacity
) {
2412 length3
+= uloc_getDisplayKeyword(keyword
, displayLocale
, dest
+length
+length3
, destCapacity
-length
-length3
, pErrorCode
);
2414 length3
+= uloc_getDisplayKeyword(keyword
, displayLocale
, NULL
, 0, pErrorCode
);
2416 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) {
2417 /* keep preflighting */
2418 *pErrorCode
=U_ZERO_ERROR
;
2420 keywordValueLen
= uloc_getKeywordValue(locale
, keyword
, keywordValue
, 256, pErrorCode
);
2421 if(keywordValueLen
) {
2422 if(length
+ length3
< destCapacity
) {
2423 dest
[length
+ length3
] = 0x3D;
2426 if(length
+ length3
< destCapacity
) {
2427 length3
+= uloc_getDisplayKeywordValue(locale
, keyword
, displayLocale
, dest
+length
+length3
, destCapacity
-length
-length3
, pErrorCode
);
2429 length3
+= uloc_getDisplayKeywordValue(locale
, keyword
, displayLocale
, NULL
, 0, pErrorCode
);
2431 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) {
2432 /* keep preflighting */
2433 *pErrorCode
=U_ZERO_ERROR
;
2436 if(keywordCount
> 1) {
2437 if(length
+ length3
+ 1 < destCapacity
&& keywordCount
) {
2438 dest
[length
+ length3
]=0x2c;
2439 dest
[length
+ length3
+1]=0x20;
2441 length3
++; /* ',' */
2442 length3
++; /* ' ' */
2445 uenum_close(keywordEnum
);
2447 hasKeywords
= length3
> 0;
2452 if ((hasScript
&& !hasCountry
)
2453 || ((hasScript
|| hasCountry
) && !hasVariant
&& !hasKeywords
)
2454 || ((hasScript
|| hasCountry
|| hasVariant
) && !hasKeywords
)
2455 || (hasLanguage
&& !hasScript
&& !hasCountry
&& !hasVariant
&& !hasKeywords
))
2457 /* remove ", " or " (" */
2461 if (hasLanguage
&& (hasScript
|| hasCountry
|| hasVariant
|| hasKeywords
)) {
2463 if(length
<destCapacity
) {
2469 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) {
2470 /* keep preflighting */
2471 *pErrorCode
=U_ZERO_ERROR
;
2474 return u_terminateUChars(dest
, destCapacity
, length
, pErrorCode
);
2477 U_CAPI
int32_t U_EXPORT2
2478 uloc_getDisplayKeyword(const char* keyword
,
2479 const char* displayLocale
,
2481 int32_t destCapacity
,
2482 UErrorCode
* status
){
2484 /* argument checking */
2485 if(status
==NULL
|| U_FAILURE(*status
)) {
2489 if(destCapacity
<0 || (destCapacity
>0 && dest
==NULL
)) {
2490 *status
=U_ILLEGAL_ARGUMENT_ERROR
;
2495 /* pass itemKey=NULL to look for a top-level item */
2496 return _getStringOrCopyKey(NULL
, displayLocale
,
2506 #define UCURRENCY_DISPLAY_NAME_INDEX 1
2508 U_CAPI
int32_t U_EXPORT2
2509 uloc_getDisplayKeywordValue( const char* locale
,
2510 const char* keyword
,
2511 const char* displayLocale
,
2513 int32_t destCapacity
,
2514 UErrorCode
* status
){
2517 char keywordValue
[ULOC_FULLNAME_CAPACITY
*4];
2518 int32_t capacity
= ULOC_FULLNAME_CAPACITY
*4;
2519 int32_t keywordValueLen
=0;
2521 /* argument checking */
2522 if(status
==NULL
|| U_FAILURE(*status
)) {
2526 if(destCapacity
<0 || (destCapacity
>0 && dest
==NULL
)) {
2527 *status
=U_ILLEGAL_ARGUMENT_ERROR
;
2531 /* get the keyword value */
2533 keywordValueLen
= uloc_getKeywordValue(locale
, keyword
, keywordValue
, capacity
, status
);
2536 * if the keyword is equal to currency .. then to get the display name
2537 * we need to do the fallback ourselves
2539 if(uprv_stricmp(keyword
, _kCurrency
)==0){
2541 int32_t dispNameLen
= 0;
2542 const UChar
*dispName
= NULL
;
2544 UResourceBundle
*bundle
= ures_open(NULL
, displayLocale
, status
);
2545 UResourceBundle
*currencies
= ures_getByKey(bundle
, _kCurrencies
, NULL
, status
);
2546 UResourceBundle
*currency
= ures_getByKeyWithFallback(currencies
, keywordValue
, NULL
, status
);
2548 dispName
= ures_getStringByIndex(currency
, UCURRENCY_DISPLAY_NAME_INDEX
, &dispNameLen
, status
);
2550 /*close the bundles */
2551 ures_close(currency
);
2552 ures_close(currencies
);
2555 if(U_FAILURE(*status
)){
2556 if(*status
== U_MISSING_RESOURCE_ERROR
){
2557 /* we just want to write the value over if nothing is available */
2558 *status
= U_USING_DEFAULT_WARNING
;
2564 /* now copy the dispName over if not NULL */
2565 if(dispName
!= NULL
){
2566 if(dispNameLen
<= destCapacity
){
2567 uprv_memcpy(dest
, dispName
, dispNameLen
* U_SIZEOF_UCHAR
);
2568 return u_terminateUChars(dest
, destCapacity
, dispNameLen
, status
);
2570 *status
= U_BUFFER_OVERFLOW_ERROR
;
2574 /* we have not found the display name for the value .. just copy over */
2575 if(keywordValueLen
<= destCapacity
){
2576 u_charsToUChars(keywordValue
, dest
, keywordValueLen
);
2577 return u_terminateUChars(dest
, destCapacity
, keywordValueLen
, status
);
2579 *status
= U_BUFFER_OVERFLOW_ERROR
;
2580 return keywordValueLen
;
2587 return _getStringOrCopyKey(NULL
, displayLocale
,
2596 /* ### Get available **************************************************/
2598 static UBool U_CALLCONV
uloc_cleanup(void) {
2601 if (_installedLocales
) {
2602 temp
= _installedLocales
;
2603 _installedLocales
= NULL
;
2605 _installedLocalesCount
= 0;
2612 static void _load_installedLocales()
2614 UBool localesLoaded
;
2617 localesLoaded
= _installedLocales
!= NULL
;
2620 if (localesLoaded
== FALSE
) {
2621 UResourceBundle
*index
= NULL
;
2622 UResourceBundle installed
;
2623 UErrorCode status
= U_ZERO_ERROR
;
2626 int32_t localeCount
;
2628 ures_initStackObject(&installed
);
2629 index
= ures_openDirect(NULL
, _kIndexLocaleName
, &status
);
2630 ures_getByKey(index
, _kIndexTag
, &installed
, &status
);
2632 if(U_SUCCESS(status
)) {
2633 localeCount
= ures_getSize(&installed
);
2634 temp
= (char **) uprv_malloc(sizeof(char*) * (localeCount
+1));
2636 ures_resetIterator(&installed
);
2637 while(ures_hasNext(&installed
)) {
2638 ures_getNextString(&installed
, NULL
, (const char **)&temp
[i
++], &status
);
2643 if (_installedLocales
== NULL
)
2645 _installedLocales
= temp
;
2646 _installedLocalesCount
= localeCount
;
2648 ucln_common_registerCleanup(UCLN_COMMON_ULOC
, uloc_cleanup
);
2653 ures_close(&installed
);
2659 U_CAPI
const char* U_EXPORT2
2660 uloc_getAvailable(int32_t offset
)
2663 _load_installedLocales();
2665 if (offset
> _installedLocalesCount
)
2667 return _installedLocales
[offset
];
2670 U_CAPI
int32_t U_EXPORT2
2671 uloc_countAvailable()
2673 _load_installedLocales();
2674 return _installedLocalesCount
;
2678 * Returns a list of all language codes defined in ISO 639. This is a pointer
2679 * to an array of pointers to arrays of char. All of these pointers are owned
2680 * by ICU-- do not delete them, and do not write through them. The array is
2681 * terminated with a null pointer.
2683 U_CAPI
const char* const* U_EXPORT2
2684 uloc_getISOLanguages()
2690 * Returns a list of all 2-letter country codes defined in ISO 639. This is a
2691 * pointer to an array of pointers to arrays of char. All of these pointers are
2692 * owned by ICU-- do not delete them, and do not write through them. The array is
2693 * terminated with a null pointer.
2695 U_CAPI
const char* const* U_EXPORT2
2696 uloc_getISOCountries()
2702 /* this function to be moved into cstring.c later */
2703 static char gDecimal
= 0;
2708 _uloc_strtod(const char *start
, char **end
) {
2715 /* For machines that decide to change the decimal on you,
2716 and try to be too smart with localization.
2717 This normally should be just a '.'. */
2718 sprintf(rep
, "%+1.1f", 1.0);
2722 if(gDecimal
== '.') {
2723 return uprv_strtod(start
, end
); /* fall through to OS */
2725 uprv_strncpy(buf
, start
, 29);
2727 decimal
= uprv_strchr(buf
, '.');
2729 *decimal
= gDecimal
;
2731 return uprv_strtod(start
, end
); /* no decimal point */
2733 rv
= uprv_strtod(buf
, &myEnd
);
2735 *end
= (char*)(start
+(myEnd
-buf
)); /* cast away const (to follow uprv_strtod API.) */
2744 #if defined(ULOC_DEBUG_PURIFY)
2745 int32_t dummy
; /* to avoid uninitialized memory copy from qsort */
2749 static int32_t U_CALLCONV
2750 uloc_acceptLanguageCompare(const void *context
, const void *a
, const void *b
)
2752 const _acceptLangItem
*aa
= (const _acceptLangItem
*)a
;
2753 const _acceptLangItem
*bb
= (const _acceptLangItem
*)b
;
2757 rc
= -1; /* A > B */
2758 } else if(bb
->q
> aa
->q
) {
2765 rc
= uprv_stricmp(aa
->locale
, bb
->locale
);
2768 #if defined(ULOC_DEBUG)
2769 /* fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
2779 mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
2782 U_CAPI
int32_t U_EXPORT2
2783 uloc_acceptLanguageFromHTTP(char *result
, int32_t resultAvailable
, UAcceptResult
*outResult
,
2784 const char *httpAcceptLanguage
,
2785 UEnumeration
* availableLocales
,
2789 _acceptLangItem smallBuffer
[30];
2791 char tmp
[ULOC_FULLNAME_CAPACITY
+1];
2793 const char *itemEnd
;
2794 const char *paramEnd
;
2799 int32_t l
= uprv_strlen(httpAcceptLanguage
);
2803 jSize
= sizeof(smallBuffer
)/sizeof(smallBuffer
[0]);
2804 if(U_FAILURE(*status
)) {
2808 for(s
=httpAcceptLanguage
;s
&&*s
;) {
2809 while(isspace(*s
)) /* eat space at the beginning */
2811 itemEnd
=uprv_strchr(s
,',');
2812 paramEnd
=uprv_strchr(s
,';');
2814 itemEnd
= httpAcceptLanguage
+l
; /* end of string */
2816 if(paramEnd
&& paramEnd
<itemEnd
) {
2817 /* semicolon (;) is closer than end (,) */
2822 while(isspace(*t
)) {
2828 while(isspace(*t
)) {
2831 j
[n
].q
= _uloc_strtod(t
,NULL
);
2833 /* no semicolon - it's 1.0 */
2837 #if defined(ULOC_DEBUG_PURIFY)
2838 j
[n
].dummy
=0xDECAFBAD;
2840 /* eat spaces prior to semi */
2841 for(t
=(paramEnd
-1);(paramEnd
>s
)&&isspace(*t
);t
--)
2843 j
[n
].locale
= uprv_strndup(s
,(t
+1)-s
);
2844 uloc_canonicalize(j
[n
].locale
,tmp
,sizeof(tmp
)/sizeof(tmp
[0]),status
);
2845 if(strcmp(j
[n
].locale
,tmp
)) {
2846 uprv_free(j
[n
].locale
);
2847 j
[n
].locale
=uprv_strdup(tmp
);
2849 #if defined(ULOC_DEBUG)
2850 /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
2854 while(*s
==',') { /* eat duplicate commas */
2858 if(j
==smallBuffer
) { /* overflowed the small buffer. */
2859 j
= uprv_malloc(sizeof(j
[0])*(jSize
*2));
2861 uprv_memcpy(j
,smallBuffer
,sizeof(j
[0])*jSize
);
2863 #if defined(ULOC_DEBUG)
2864 fprintf(stderr
,"malloced at size %d\n", jSize
);
2867 j
= uprv_realloc(j
, sizeof(j
[0])*jSize
*2);
2868 #if defined(ULOC_DEBUG)
2869 fprintf(stderr
,"re-alloced at size %d\n", jSize
);
2874 *status
= U_MEMORY_ALLOCATION_ERROR
;
2879 uprv_sortArray(j
, n
, sizeof(j
[0]), uloc_acceptLanguageCompare
, NULL
, TRUE
, status
);
2880 if(U_FAILURE(*status
)) {
2881 if(j
!= smallBuffer
) {
2882 #if defined(ULOC_DEBUG)
2883 fprintf(stderr
,"freeing j %p\n", j
);
2889 strs
= uprv_malloc((size_t)(sizeof(strs
[0])*n
));
2891 #if defined(ULOC_DEBUG)
2892 /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
2894 strs
[i
]=j
[i
].locale
;
2896 res
= uloc_acceptLanguage(result
, resultAvailable
, outResult
,
2897 (const char**)strs
, n
, availableLocales
, status
);
2902 if(j
!= smallBuffer
) {
2903 #if defined(ULOC_DEBUG)
2904 fprintf(stderr
,"freeing j %p\n", j
);
2912 U_CAPI
int32_t U_EXPORT2
2913 uloc_acceptLanguage(char *result
, int32_t resultAvailable
,
2914 UAcceptResult
*outResult
, const char **acceptList
,
2915 int32_t acceptListCount
,
2916 UEnumeration
* availableLocales
,
2922 char tmp
[ULOC_FULLNAME_CAPACITY
+1];
2924 char **fallbackList
;
2925 if(U_FAILURE(*status
)) {
2928 fallbackList
= uprv_malloc((size_t)(sizeof(fallbackList
[0])*acceptListCount
));
2929 if(fallbackList
==NULL
) {
2930 *status
= U_MEMORY_ALLOCATION_ERROR
;
2933 for(i
=0;i
<acceptListCount
;i
++) {
2934 #if defined(ULOC_DEBUG)
2935 fprintf(stderr
,"%02d: %s\n", i
, acceptList
[i
]);
2937 while((l
=uenum_next(availableLocales
, NULL
, status
))) {
2938 #if defined(ULOC_DEBUG)
2939 fprintf(stderr
," %s\n", l
);
2941 len
= uprv_strlen(l
);
2942 if(!uprv_strcmp(acceptList
[i
], l
)) {
2944 *outResult
= ULOC_ACCEPT_VALID
;
2946 #if defined(ULOC_DEBUG)
2947 fprintf(stderr
, "MATCH! %s\n", l
);
2950 uprv_strncpy(result
, l
, uprv_min(len
, resultAvailable
));
2953 uprv_free(fallbackList
[j
]);
2955 uprv_free(fallbackList
);
2956 return u_terminateChars(result
, resultAvailable
, len
, status
);
2962 uenum_reset(availableLocales
, status
);
2963 /* save off parent info */
2964 if(uloc_getParent(acceptList
[i
], tmp
, sizeof(tmp
)/sizeof(tmp
[0]), status
)!=0) {
2965 fallbackList
[i
] = uprv_strdup(tmp
);
2971 for(maxLen
--;maxLen
>0;maxLen
--) {
2972 for(i
=0;i
<acceptListCount
;i
++) {
2973 if(fallbackList
[i
] && ((int32_t)uprv_strlen(fallbackList
[i
])==maxLen
)) {
2974 #if defined(ULOC_DEBUG)
2975 fprintf(stderr
,"Try: [%s]", fallbackList
[i
]);
2977 while((l
=uenum_next(availableLocales
, NULL
, status
))) {
2978 #if defined(ULOC_DEBUG)
2979 fprintf(stderr
," %s\n", l
);
2981 len
= uprv_strlen(l
);
2982 if(!uprv_strcmp(fallbackList
[i
], l
)) {
2984 *outResult
= ULOC_ACCEPT_FALLBACK
;
2986 #if defined(ULOC_DEBUG)
2987 fprintf(stderr
, "fallback MATCH! %s\n", l
);
2990 uprv_strncpy(result
, l
, uprv_min(len
, resultAvailable
));
2992 for(i
=0;i
<acceptListCount
;i
++) {
2993 uprv_free(fallbackList
[i
]);
2995 uprv_free(fallbackList
);
2996 return u_terminateChars(result
, resultAvailable
, len
, status
);
2999 uenum_reset(availableLocales
, status
);
3001 if(uloc_getParent(fallbackList
[i
], tmp
, sizeof(tmp
)/sizeof(tmp
[0]), status
)!=0) {
3002 uprv_free(fallbackList
[i
]);
3003 fallbackList
[i
] = uprv_strdup(tmp
);
3005 uprv_free(fallbackList
[i
]);
3011 *outResult
= ULOC_ACCEPT_FAILED
;
3014 for(i
=0;i
<acceptListCount
;i
++) {
3015 uprv_free(fallbackList
[i
]);
3017 uprv_free(fallbackList
);