1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 **********************************************************************
5 * Copyright (C) 1997-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
11 * Modification History:
13 * Date Name Description
14 * 04/01/97 aliu Creation.
15 * 08/21/98 stephen JDK 1.2 sync
16 * 12/08/98 rtg New Locale implementation and C API
17 * 03/15/99 damiba overhaul.
18 * 04/06/99 stephen changed setDefault() to realloc and copy
19 * 06/14/99 stephen Changed calls to ures_open for new params
20 * 07/21/99 stephen Modified setDefault() to propagate to C++
21 * 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs,
22 * brought canonicalization code into line with spec
23 *****************************************************************************/
26 POSIX's locale format, from putil.c: [no spaces]
28 ll [ _CC ] [ . MM ] [ @ VV]
30 l = lang, C = ctry, M = charmap, V = variant
33 #include "unicode/utypes.h"
34 #include "unicode/ustring.h"
35 #include "unicode/uloc.h"
49 #include <stdio.h> /* for sprintf */
53 /* ### Declarations **************************************************/
55 /* Locale stuff from locid.cpp */
56 U_CFUNC
void locale_set_default(const char *id
);
57 U_CFUNC
const char *locale_get_default(void);
59 locale_getKeywords(const char *localeID
,
61 char *keywords
, int32_t keywordCapacity
,
62 char *values
, int32_t valuesCapacity
, int32_t *valLen
,
66 /* ### Data tables **************************************************/
69 * Table of language codes, both 2- and 3-letter, with preference
70 * given to 2-letter codes where possible. Includes 3-letter codes
71 * that lack a 2-letter equivalent.
73 * This list must be in sorted order. This list is returned directly
74 * to the user by some API.
76 * This list must be kept in sync with LANGUAGES_3, with corresponding
79 * This table should be terminated with a NULL entry, followed by a
80 * second list, and another NULL entry. The first list is visible to
81 * user code when this array is returned by API. The second list
82 * contains codes we support, but do not expose through user API.
86 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
87 * include the revisions up to 2001/7/27 *CWB*
89 * The 3 character codes are the terminology codes like RFC 3066. This
90 * is compatible with prior ICU codes
92 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
93 * table but now at the end of the table because 3 character codes are
94 * duplicates. This avoids bad searches going from 3 to 2 character
97 * The range qaa-qtz is reserved for local use
99 /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
100 /* ISO639 table version is 20150505 */
101 static const char * const LANGUAGES
[] = {
102 "aa", "ab", "ace", "ach", "ada", "ady", "ae", "aeb",
103 "af", "afh", "agq", "ain", "ak", "akk", "akz", "ale",
104 "aln", "alt", "am", "an", "ang", "anp", "ar", "arc",
105 "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "as",
106 "asa", "ase", "ast", "av", "avk", "awa", "ay", "az",
107 "ba", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
108 "be", "bej", "bem", "bew", "bez", "bfd", "bfq", "bg",
109 "bgn", "bho", "bi", "bik", "bin", "bjn", "bkm", "bla",
110 "bm", "bn", "bo", "bpy", "bqi", "br", "bra", "brh",
111 "brx", "bs", "bss", "bua", "bug", "bum", "byn", "byv",
112 "ca", "cad", "car", "cay", "cch", "ce", "ceb", "cgg",
113 "ch", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
114 "chr", "chy", "ckb", "co", "cop", "cps", "cr", "crh",
115 "cs", "csb", "cu", "cv", "cy",
116 "da", "dak", "dar", "dav", "de", "del", "den", "dgr",
117 "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "dv",
118 "dyo", "dyu", "dz", "dzg",
119 "ebu", "ee", "efi", "egl", "egy", "eka", "el", "elx",
120 "en", "enm", "eo", "es", "esu", "et", "eu", "ewo",
122 "fa", "fan", "fat", "ff", "fi", "fil", "fit", "fj",
123 "fo", "fon", "fr", "frc", "frm", "fro", "frp", "frr",
125 "ga", "gaa", "gag", "gan", "gay", "gba", "gbz", "gd",
126 "gez", "gil", "gl", "glk", "gmh", "gn", "goh", "gom",
127 "gon", "gor", "got", "grb", "grc", "gsw", "gu", "guc",
128 "gur", "guz", "gv", "gwi",
129 "ha", "hai", "hak", "haw", "he", "hi", "hif", "hil",
130 "hit", "hmn", "ho", "hr", "hsb", "hsn", "ht", "hu",
132 "ia", "iba", "ibb", "id", "ie", "ig", "ii", "ik",
133 "ilo", "inh", "io", "is", "it", "iu", "izh",
134 "ja", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
136 "ka", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
137 "kbl", "kcg", "kde", "kea", "ken", "kfo", "kg", "kgp",
138 "kha", "kho", "khq", "khw", "ki", "kiu", "kj", "kk",
139 "kkj", "kl", "kln", "km", "kmb", "kn", "ko", "koi",
140 "kok", "kos", "kpe", "kr", "krc", "kri", "krj", "krl",
141 "kru", "ks", "ksb", "ksf", "ksh", "ku", "kum", "kut",
143 "la", "lad", "lag", "lah", "lam", "lb", "lez", "lfn",
144 "lg", "li", "lij", "liv", "lkt", "lmo", "ln", "lo",
145 "lol", "loz", "lrc", "lt", "ltg", "lu", "lua", "lui",
146 "lun", "luo", "lus", "luy", "lv", "lzh", "lzz",
147 "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
148 "mdf", "mdh", "mdr", "men", "mer", "mfe", "mg", "mga",
149 "mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk",
150 "ml", "mn", "mnc", "mni", "moh", "mos", "mr", "mrj",
151 "ms", "mt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
152 "my", "mye", "myv", "mzn",
153 "na", "nan", "nap", "naq", "nb", "nd", "nds", "ne",
154 "new", "ng", "nia", "niu", "njo", "nl", "nmg", "nn",
155 "nnh", "no", "nog", "non", "nov", "nqo", "nr", "nso",
156 "nus", "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi",
157 "oc", "oj", "om", "or", "os", "osa", "ota",
158 "pa", "pag", "pal", "pam", "pap", "pau", "pcd", "pdc",
159 "pdt", "peo", "pfl", "phn", "pi", "pl", "pms", "pnt",
160 "pon", "prg", "pro", "ps", "pt",
162 "raj", "rap", "rar", "rgn", "rif", "rm", "rn", "ro",
163 "rof", "rom", "rtm", "ru", "rue", "rug", "rup",
165 "sa", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
166 "sba", "sbp", "sc", "scn", "sco", "sd", "sdc", "sdh",
167 "se", "see", "seh", "sei", "sel", "ses", "sg", "sga",
168 "sgs", "shi", "shn", "shu", "si", "sid", "sk",
169 "sl", "sli", "sly", "sm", "sma", "smj", "smn", "sms",
170 "sn", "snk", "so", "sog", "sq", "sr", "srn", "srr",
171 "ss", "ssy", "st", "stq", "su", "suk", "sus", "sux",
172 "sv", "sw", "swb", "swc", "syc", "syr", "szl",
173 "ta", "tcy", "te", "tem", "teo", "ter", "tet", "tg",
174 "th", "ti", "tig", "tiv", "tk", "tkl", "tkr", "tl",
175 "tlh", "tli", "tly", "tmh", "tn", "to", "tog", "tpi",
176 "tr", "tru", "trv", "ts", "tsd", "tsi", "tt", "ttt",
177 "tum", "tvl", "tw", "twq", "ty", "tyv", "tzm",
178 "udm", "ug", "uga", "uk", "umb", "und", "ur", "uz",
179 "vai", "ve", "vec", "vep", "vi", "vls", "vmf", "vo",
181 "wa", "wae", "wal", "war", "was", "wbp", "wo", "wuu",
182 "xal", "xh", "xmf", "xog",
183 "yao", "yap", "yav", "ybb", "yi", "yo", "yrl", "yue",
184 "za", "zap", "zbl", "zea", "zen", "zgh", "zh", "zu",
187 "in", "iw", "ji", "jw", "sh", /* obsolete language codes */
191 static const char* const DEPRECATED_LANGUAGES
[]={
192 "in", "iw", "ji", "jw", NULL
, NULL
194 static const char* const REPLACEMENT_LANGUAGES
[]={
195 "id", "he", "yi", "jv", NULL
, NULL
199 * Table of 3-letter language codes.
201 * This is a lookup table used to convert 3-letter language codes to
202 * their 2-letter equivalent, where possible. It must be kept in sync
203 * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the
204 * same language as LANGUAGES_3[i]. The commented-out lines are
205 * copied from LANGUAGES to make eyeballing this baby easier.
207 * Where a 3-letter language code has no 2-letter equivalent, the
208 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
210 * This table should be terminated with a NULL entry, followed by a
211 * second list, and another NULL entry. The two lists correspond to
212 * the two lists in LANGUAGES.
214 /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
215 /* ISO639 table version is 20150505 */
216 static const char * const LANGUAGES_3
[] = {
217 "aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb",
218 "afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale",
219 "aln", "alt", "amh", "arg", "ang", "anp", "ara", "arc",
220 "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "asm",
221 "asa", "ase", "ast", "ava", "avk", "awa", "aym", "aze",
222 "bak", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
223 "bel", "bej", "bem", "bew", "bez", "bfd", "bfq", "bul",
224 "bgn", "bho", "bis", "bik", "bin", "bjn", "bkm", "bla",
225 "bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh",
226 "brx", "bos", "bss", "bua", "bug", "bum", "byn", "byv",
227 "cat", "cad", "car", "cay", "cch", "che", "ceb", "cgg",
228 "cha", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
229 "chr", "chy", "ckb", "cos", "cop", "cps", "cre", "crh",
230 "ces", "csb", "chu", "chv", "cym",
231 "dan", "dak", "dar", "dav", "deu", "del", "den", "dgr",
232 "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "div",
233 "dyo", "dyu", "dzo", "dzg",
234 "ebu", "ewe", "efi", "egl", "egy", "eka", "ell", "elx",
235 "eng", "enm", "epo", "spa", "esu", "est", "eus", "ewo",
237 "fas", "fan", "fat", "ful", "fin", "fil", "fit", "fij",
238 "fao", "fon", "fra", "frc", "frm", "fro", "frp", "frr",
240 "gle", "gaa", "gag", "gan", "gay", "gba", "gbz", "gla",
241 "gez", "gil", "glg", "glk", "gmh", "grn", "goh", "gom",
242 "gon", "gor", "got", "grb", "grc", "gsw", "guj", "guc",
243 "gur", "guz", "glv", "gwi",
244 "hau", "hai", "hak", "haw", "heb", "hin", "hif", "hil",
245 "hit", "hmn", "hmo", "hrv", "hsb", "hsn", "hat", "hun",
247 "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ipk",
248 "ilo", "inh", "ido", "isl", "ita", "iku", "izh",
249 "jpn", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
251 "kat", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
252 "kbl", "kcg", "kde", "kea", "ken", "kfo", "kon", "kgp",
253 "kha", "kho", "khq", "khw", "kik", "kiu", "kua", "kaz",
254 "kkj", "kal", "kln", "khm", "kmb", "kan", "kor", "koi",
255 "kok", "kos", "kpe", "kau", "krc", "kri", "krj", "krl",
256 "kru", "kas", "ksb", "ksf", "ksh", "kur", "kum", "kut",
258 "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lfn",
259 "lug", "lim", "lij", "liv", "lkt", "lmo", "lin", "lao",
260 "lol", "loz", "lrc", "lit", "ltg", "lub", "lua", "lui",
261 "lun", "luo", "lus", "luy", "lav", "lzh", "lzz",
262 "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
263 "mdf", "mdh", "mdr", "men", "mer", "mfe", "mlg", "mga",
264 "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
265 "mal", "mon", "mnc", "mni", "moh", "mos", "mar", "mrj",
266 "msa", "mlt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
267 "mya", "mye", "myv", "mzn",
268 "nau", "nan", "nap", "naq", "nob", "nde", "nds", "nep",
269 "new", "ndo", "nia", "niu", "njo", "nld", "nmg", "nno",
270 "nnh", "nor", "nog", "non", "nov", "nqo", "nbl", "nso",
271 "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi",
272 "oci", "oji", "orm", "ori", "oss", "osa", "ota",
273 "pan", "pag", "pal", "pam", "pap", "pau", "pcd", "pdc",
274 "pdt", "peo", "pfl", "phn", "pli", "pol", "pms", "pnt",
275 "pon", "prg", "pro", "pus", "por",
277 "raj", "rap", "rar", "rgn", "rif", "roh", "run", "ron",
278 "rof", "rom", "rtm", "rus", "rue", "rug", "rup",
280 "san", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
281 "sba", "sbp", "srd", "scn", "sco", "snd", "sdc", "sdh",
282 "sme", "see", "seh", "sei", "sel", "ses", "sag", "sga",
283 "sgs", "shi", "shn", "shu", "sin", "sid", "slk",
284 "slv", "sli", "sly", "smo", "sma", "smj", "smn", "sms",
285 "sna", "snk", "som", "sog", "sqi", "srp", "srn", "srr",
286 "ssw", "ssy", "sot", "stq", "sun", "suk", "sus", "sux",
287 "swe", "swa", "swb", "swc", "syc", "syr", "szl",
288 "tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk",
289 "tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr", "tgl",
290 "tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tpi",
291 "tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt",
292 "tum", "tvl", "twi", "twq", "tah", "tyv", "tzm",
293 "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
294 "vai", "ven", "vec", "vep", "vie", "vls", "vmf", "vol",
296 "wln", "wae", "wal", "war", "was", "wbp", "wol", "wuu",
297 "xal", "xho", "xmf", "xog",
298 "yao", "yap", "yav", "ybb", "yid", "yor", "yrl", "yue",
299 "zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul",
302 /* "in", "iw", "ji", "jw", "sh", */
303 "ind", "heb", "yid", "jaw", "srp",
308 * Table of 2-letter country codes.
310 * This list must be in sorted order. This list is returned directly
311 * to the user by some API.
313 * This list must be kept in sync with COUNTRIES_3, with corresponding
316 * This table should be terminated with a NULL entry, followed by a
317 * second list, and another NULL entry. The first list is visible to
318 * user code when this array is returned by API. The second list
319 * contains codes we support, but do not expose through user API.
323 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
324 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
325 * new codes keeping the old ones for compatibility updated to include
326 * 1999/12/03 revisions *CWB*
328 * RO(ROM) is now RO(ROU) according to
329 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
331 static const char * const COUNTRIES
[] = {
332 "AC", "AD", "AE", "AF", "AG", "AI", "AL", "AM",
333 "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",
334 "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",
335 "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV",
336 "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
337 "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CP", "CR",
338 "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DG", "DJ", "DK",
339 "DM", "DO", "DZ", "EA", "EC", "EE", "EG", "EH", "ER",
340 "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
341 "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
342 "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
343 "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
344 "IC", "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
345 "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
346 "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
347 "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
348 "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK",
349 "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",
350 "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
351 "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",
352 "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",
353 "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
354 "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA",
355 "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",
356 "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV",
357 "SX", "SY", "SZ", "TA", "TC", "TD", "TF", "TG", "TH", "TJ",
358 "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
359 "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
360 "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
361 "WS", "XK", "YE", "YT", "ZA", "ZM", "ZW",
363 "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR", /* obsolete country codes */
367 static const char* const DEPRECATED_COUNTRIES
[] = {
368 "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL
, NULL
/* deprecated country list */
370 static const char* const REPLACEMENT_COUNTRIES
[] = {
371 /* "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
372 "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL
, NULL
/* replacement country codes */
376 * Table of 3-letter country codes.
378 * This is a lookup table used to convert 3-letter country codes to
379 * their 2-letter equivalent. It must be kept in sync with COUNTRIES.
380 * For all valid i, COUNTRIES[i] must refer to the same country as
381 * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES
382 * to make eyeballing this baby easier.
384 * This table should be terminated with a NULL entry, followed by a
385 * second list, and another NULL entry. The two lists correspond to
386 * the two lists in COUNTRIES.
388 static const char * const COUNTRIES_3
[] = {
389 /* "AC", "AD", "AE", "AF", "AG", "AI", "AL", "AM", */
390 "ASC", "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
391 /* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */
392 "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
393 /* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */
394 "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
395 /* "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", */
396 "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",
397 /* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */
398 "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
399 /* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CP", "CR", */
400 "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CPT", "CRI",
401 /* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DG", "DJ", "DK", */
402 "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DGA", "DJI", "DNK",
403 /* "DM", "DO", "DZ", "EA", "EC", "EE", "EG", "EH", "ER", */
404 "DMA", "DOM", "DZA", "EA ", "ECU", "EST", "EGY", "ESH", "ERI", /* no valid 3-letter code for EA */
405 /* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
406 "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
407 /* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
408 "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
409 /* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */
410 "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
411 /* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
412 "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
413 /* "IC", "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
414 "IC ", "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL", /* no valid 3-letter code for IC */
415 /* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
416 "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
417 /* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
418 "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
419 /* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */
420 "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
421 /* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */
422 "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
423 /* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */
424 "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
425 /* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */
426 "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
427 /* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */
428 "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
429 /* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */
430 "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
431 /* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */
432 "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
433 /* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */
434 "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
435 /* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */
436 "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
437 /* "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", */
438 "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",
439 /* "SX", "SY", "SZ", "TA", "TC", "TD", "TF", "TG", "TH", "TJ", */
440 "SXM", "SYR", "SWZ", "TAA", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
441 /* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */
442 "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
443 /* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */
444 "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
445 /* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
446 "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
447 /* "WS", "XK", "YE", "YT", "ZA", "ZM", "ZW", */
448 "WSM", "XKK", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
450 /* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */
451 "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
455 typedef struct CanonicalizationMap
{
456 const char *id
; /* input ID */
457 const char *canonicalID
; /* canonicalized output ID */
458 const char *keyword
; /* keyword, or NULL if none */
459 const char *value
; /* keyword value, or NULL if kw==NULL */
460 } CanonicalizationMap
;
463 * A map to canonicalize locale IDs. This handles a variety of
464 * different semantic kinds of transformations.
466 static const CanonicalizationMap CANONICALIZE_MAP
[] = {
467 { "", "en_US_POSIX", NULL
, NULL
}, /* .NET name */
468 { "c", "en_US_POSIX", NULL
, NULL
}, /* POSIX name */
469 { "posix", "en_US_POSIX", NULL
, NULL
}, /* POSIX name (alias of C) */
470 { "art_LOJBAN", "jbo", NULL
, NULL
}, /* registered name */
471 { "az_AZ_CYRL", "az_Cyrl_AZ", NULL
, NULL
}, /* .NET name */
472 { "az_AZ_LATN", "az_Latn_AZ", NULL
, NULL
}, /* .NET name */
473 { "ca_ES_PREEURO", "ca_ES", "currency", "ESP" },
474 { "de__PHONEBOOK", "de", "collation", "phonebook" }, /* Old ICU name */
475 { "de_AT_PREEURO", "de_AT", "currency", "ATS" },
476 { "de_DE_PREEURO", "de_DE", "currency", "DEM" },
477 { "de_LU_PREEURO", "de_LU", "currency", "LUF" },
478 { "el_GR_PREEURO", "el_GR", "currency", "GRD" },
479 { "en_BE_PREEURO", "en_BE", "currency", "BEF" },
480 { "en_IE_PREEURO", "en_IE", "currency", "IEP" },
481 { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */
482 { "es_ES_PREEURO", "es_ES", "currency", "ESP" },
483 { "eu_ES_PREEURO", "eu_ES", "currency", "ESP" },
484 { "fi_FI_PREEURO", "fi_FI", "currency", "FIM" },
485 { "fr_BE_PREEURO", "fr_BE", "currency", "BEF" },
486 { "fr_FR_PREEURO", "fr_FR", "currency", "FRF" },
487 { "fr_LU_PREEURO", "fr_LU", "currency", "LUF" },
488 { "ga_IE_PREEURO", "ga_IE", "currency", "IEP" },
489 { "gl_ES_PREEURO", "gl_ES", "currency", "ESP" },
490 { "hi__DIRECT", "hi", "collation", "direct" }, /* Old ICU name */
491 { "it_IT_PREEURO", "it_IT", "currency", "ITL" },
492 { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */
493 { "nb_NO_NY", "nn_NO", NULL
, NULL
}, /* "markus said this was ok" :-) */
494 { "nl_BE_PREEURO", "nl_BE", "currency", "BEF" },
495 { "nl_NL_PREEURO", "nl_NL", "currency", "NLG" },
496 { "pt_PT_PREEURO", "pt_PT", "currency", "PTE" },
497 { "sr_SP_CYRL", "sr_Cyrl_RS", NULL
, NULL
}, /* .NET name */
498 { "sr_SP_LATN", "sr_Latn_RS", NULL
, NULL
}, /* .NET name */
499 { "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL
, NULL
}, /* Linux name */
500 { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */
501 { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL
, NULL
}, /* Linux name */
502 { "uz_UZ_CYRL", "uz_Cyrl_UZ", NULL
, NULL
}, /* .NET name */
503 { "uz_UZ_LATN", "uz_Latn_UZ", NULL
, NULL
}, /* .NET name */
504 { "zh_CHS", "zh_Hans", NULL
, NULL
}, /* .NET name */
505 { "zh_CHT", "zh_Hant", NULL
, NULL
}, /* .NET name */
506 { "zh_GAN", "gan", NULL
, NULL
}, /* registered name */
507 { "zh_GUOYU", "zh", NULL
, NULL
}, /* registered name */
508 { "zh_HAKKA", "hak", NULL
, NULL
}, /* registered name */
509 { "zh_MIN_NAN", "nan", NULL
, NULL
}, /* registered name */
510 { "zh_WUU", "wuu", NULL
, NULL
}, /* registered name */
511 { "zh_XIANG", "hsn", NULL
, NULL
}, /* registered name */
512 { "zh_YUE", "yue", NULL
, NULL
}, /* registered name */
515 typedef struct VariantMap
{
516 const char *variant
; /* input ID */
517 const char *keyword
; /* keyword, or NULL if none */
518 const char *value
; /* keyword value, or NULL if kw==NULL */
521 static const VariantMap VARIANT_MAP
[] = {
522 { "EURO", "currency", "EUR" },
523 { "PINYIN", "collation", "pinyin" }, /* Solaris variant */
524 { "STROKE", "collation", "stroke" } /* Solaris variant */
527 /* ### BCP47 Conversion *******************************************/
528 /* Test if the locale id has BCP47 u extension and does not have '@' */
529 #define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
530 /* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
531 #define _ConvertBCP47(finalID, id, buffer, length,err) \
532 if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || U_FAILURE(*err)) { \
537 /* Gets the size of the shortest subtag in the given localeID. */
538 static int32_t getShortestSubtagLength(const char *localeID
) {
539 int32_t localeIDLength
= uprv_strlen(localeID
);
540 int32_t length
= localeIDLength
;
541 int32_t tmpLength
= 0;
545 for (i
= 0; i
< localeIDLength
; i
++) {
546 if (localeID
[i
] != '_' && localeID
[i
] != '-') {
553 if (tmpLength
!= 0 && tmpLength
< length
) {
563 /* ### Keywords **************************************************/
564 #define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
565 #define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )
566 /* Punctuation/symbols allowed in legacy key values */
567 #define UPRV_OK_VALUE_PUNCTUATION(c) ((c) == '_' || (c) == '-' || (c) == '+' || (c) == '/')
569 #define ULOC_KEYWORD_BUFFER_LEN 25
570 #define ULOC_MAX_NO_KEYWORDS 25
572 U_CAPI
const char * U_EXPORT2
573 locale_getKeywordsStart(const char *localeID
) {
574 const char *result
= NULL
;
575 if((result
= uprv_strchr(localeID
, '@')) != NULL
) {
578 #if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
580 /* We do this because the @ sign is variant, and the @ sign used on one
581 EBCDIC machine won't be compiled the same way on other EBCDIC based
583 static const uint8_t ebcdicSigns
[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
584 const uint8_t *charToFind
= ebcdicSigns
;
586 if((result
= uprv_strchr(localeID
, *charToFind
)) != NULL
) {
597 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
598 * @param keywordName incoming name to be canonicalized
599 * @param status return status (keyword too long)
600 * @return length of the keyword name
602 static int32_t locale_canonKeywordName(char *buf
, const char *keywordName
, UErrorCode
*status
)
604 int32_t keywordNameLen
= 0;
606 for (; *keywordName
!= 0; keywordName
++) {
607 if (!UPRV_ISALPHANUM(*keywordName
)) {
608 *status
= U_ILLEGAL_ARGUMENT_ERROR
; /* malformed keyword name */
611 if (keywordNameLen
< ULOC_KEYWORD_BUFFER_LEN
- 1) {
612 buf
[keywordNameLen
++] = uprv_tolower(*keywordName
);
614 /* keyword name too long for internal buffer */
615 *status
= U_INTERNAL_PROGRAM_ERROR
;
619 if (keywordNameLen
== 0) {
620 *status
= U_ILLEGAL_ARGUMENT_ERROR
; /* empty keyword name */
623 buf
[keywordNameLen
] = 0; /* terminate */
625 return keywordNameLen
;
629 char keyword
[ULOC_KEYWORD_BUFFER_LEN
];
631 const char *valueStart
;
635 static int32_t U_CALLCONV
636 compareKeywordStructs(const void * /*context*/, const void *left
, const void *right
) {
637 const char* leftString
= ((const KeywordStruct
*)left
)->keyword
;
638 const char* rightString
= ((const KeywordStruct
*)right
)->keyword
;
639 return uprv_strcmp(leftString
, rightString
);
643 * Both addKeyword and addValue must already be in canonical form.
644 * Either both addKeyword and addValue are NULL, or neither is NULL.
645 * If they are not NULL they must be zero terminated.
646 * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword.
649 _getKeywords(const char *localeID
,
651 char *keywords
, int32_t keywordCapacity
,
652 char *values
, int32_t valuesCapacity
, int32_t *valLen
,
654 const char* addKeyword
,
655 const char* addValue
,
658 KeywordStruct keywordList
[ULOC_MAX_NO_KEYWORDS
];
660 int32_t maxKeywords
= ULOC_MAX_NO_KEYWORDS
;
661 int32_t numKeywords
= 0;
662 const char* pos
= localeID
;
663 const char* equalSign
= NULL
;
664 const char* semicolon
= NULL
;
666 int32_t keywordsLen
= 0;
667 int32_t valuesLen
= 0;
669 if(prev
== '@') { /* start of keyword definition */
670 /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
672 UBool duplicate
= FALSE
;
673 /* skip leading spaces */
677 if (!*pos
) { /* handle trailing "; " */
680 if(numKeywords
== maxKeywords
) {
681 *status
= U_INTERNAL_PROGRAM_ERROR
;
684 equalSign
= uprv_strchr(pos
, '=');
685 semicolon
= uprv_strchr(pos
, ';');
686 /* lack of '=' [foo@currency] is illegal */
687 /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
688 if(!equalSign
|| (semicolon
&& semicolon
<equalSign
)) {
689 *status
= U_INVALID_FORMAT_ERROR
;
692 /* need to normalize both keyword and keyword name */
693 if(equalSign
- pos
>= ULOC_KEYWORD_BUFFER_LEN
) {
694 /* keyword name too long for internal buffer */
695 *status
= U_INTERNAL_PROGRAM_ERROR
;
698 for(i
= 0, n
= 0; i
< equalSign
- pos
; ++i
) {
700 keywordList
[numKeywords
].keyword
[n
++] = uprv_tolower(pos
[i
]);
704 /* zero-length keyword is an error. */
706 *status
= U_INVALID_FORMAT_ERROR
;
710 keywordList
[numKeywords
].keyword
[n
] = 0;
711 keywordList
[numKeywords
].keywordLen
= n
;
712 /* now grab the value part. First we skip the '=' */
714 /* then we leading spaces */
715 while(*equalSign
== ' ') {
719 /* Premature end or zero-length value */
720 if (!*equalSign
|| equalSign
== semicolon
) {
721 *status
= U_INVALID_FORMAT_ERROR
;
725 keywordList
[numKeywords
].valueStart
= equalSign
;
730 while(*(pos
- i
- 1) == ' ') {
733 keywordList
[numKeywords
].valueLen
= (int32_t)(pos
- equalSign
- i
);
736 i
= (int32_t)uprv_strlen(equalSign
);
737 while(i
&& equalSign
[i
-1] == ' ') {
740 keywordList
[numKeywords
].valueLen
= i
;
742 /* If this is a duplicate keyword, then ignore it */
743 for (j
=0; j
<numKeywords
; ++j
) {
744 if (uprv_strcmp(keywordList
[j
].keyword
, keywordList
[numKeywords
].keyword
) == 0) {
754 /* Handle addKeyword/addValue. */
755 if (addKeyword
!= NULL
) {
756 UBool duplicate
= FALSE
;
757 U_ASSERT(addValue
!= NULL
);
758 /* Search for duplicate; if found, do nothing. Explicit keyword
759 overrides addKeyword. */
760 for (j
=0; j
<numKeywords
; ++j
) {
761 if (uprv_strcmp(keywordList
[j
].keyword
, addKeyword
) == 0) {
767 if (numKeywords
== maxKeywords
) {
768 *status
= U_INTERNAL_PROGRAM_ERROR
;
771 uprv_strcpy(keywordList
[numKeywords
].keyword
, addKeyword
);
772 keywordList
[numKeywords
].keywordLen
= (int32_t)uprv_strlen(addKeyword
);
773 keywordList
[numKeywords
].valueStart
= addValue
;
774 keywordList
[numKeywords
].valueLen
= (int32_t)uprv_strlen(addValue
);
778 U_ASSERT(addValue
== NULL
);
781 /* now we have a list of keywords */
782 /* we need to sort it */
783 uprv_sortArray(keywordList
, numKeywords
, sizeof(KeywordStruct
), compareKeywordStructs
, NULL
, FALSE
, status
);
785 /* Now construct the keyword part */
786 for(i
= 0; i
< numKeywords
; i
++) {
787 if(keywordsLen
+ keywordList
[i
].keywordLen
+ 1< keywordCapacity
) {
788 uprv_strcpy(keywords
+keywordsLen
, keywordList
[i
].keyword
);
790 keywords
[keywordsLen
+ keywordList
[i
].keywordLen
] = '=';
792 keywords
[keywordsLen
+ keywordList
[i
].keywordLen
] = 0;
795 keywordsLen
+= keywordList
[i
].keywordLen
+ 1;
797 if(keywordsLen
+ keywordList
[i
].valueLen
< keywordCapacity
) {
798 uprv_strncpy(keywords
+keywordsLen
, keywordList
[i
].valueStart
, keywordList
[i
].valueLen
);
800 keywordsLen
+= keywordList
[i
].valueLen
;
802 if(i
< numKeywords
- 1) {
803 if(keywordsLen
< keywordCapacity
) {
804 keywords
[keywordsLen
] = ';';
810 if(valuesLen
+ keywordList
[i
].valueLen
+ 1< valuesCapacity
) {
811 uprv_strcpy(values
+valuesLen
, keywordList
[i
].valueStart
);
812 values
[valuesLen
+ keywordList
[i
].valueLen
] = 0;
814 valuesLen
+= keywordList
[i
].valueLen
+ 1;
818 values
[valuesLen
] = 0;
823 return u_terminateChars(keywords
, keywordCapacity
, keywordsLen
, status
);
830 locale_getKeywords(const char *localeID
,
832 char *keywords
, int32_t keywordCapacity
,
833 char *values
, int32_t valuesCapacity
, int32_t *valLen
,
835 UErrorCode
*status
) {
836 return _getKeywords(localeID
, prev
, keywords
, keywordCapacity
,
837 values
, valuesCapacity
, valLen
, valuesToo
,
841 U_CAPI
int32_t U_EXPORT2
842 uloc_getKeywordValue(const char* localeID
,
843 const char* keywordName
,
844 char* buffer
, int32_t bufferCapacity
,
847 const char* startSearchHere
= NULL
;
848 const char* nextSeparator
= NULL
;
849 char keywordNameBuffer
[ULOC_KEYWORD_BUFFER_LEN
];
850 char localeKeywordNameBuffer
[ULOC_KEYWORD_BUFFER_LEN
];
853 if(status
&& U_SUCCESS(*status
) && localeID
) {
854 char tempBuffer
[ULOC_FULLNAME_CAPACITY
];
855 const char* tmpLocaleID
;
857 if (keywordName
== NULL
|| keywordName
[0] == 0) {
858 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
862 locale_canonKeywordName(keywordNameBuffer
, keywordName
, status
);
863 if(U_FAILURE(*status
)) {
867 if (_hasBCP47Extension(localeID
)) {
868 _ConvertBCP47(tmpLocaleID
, localeID
, tempBuffer
, sizeof(tempBuffer
), status
);
870 tmpLocaleID
=localeID
;
873 startSearchHere
= locale_getKeywordsStart(tmpLocaleID
);
874 if(startSearchHere
== NULL
) {
875 /* no keywords, return at once */
879 /* find the first keyword */
880 while(startSearchHere
) {
881 const char* keyValueTail
;
884 startSearchHere
++; /* skip @ or ; */
885 nextSeparator
= uprv_strchr(startSearchHere
, '=');
887 *status
= U_ILLEGAL_ARGUMENT_ERROR
; /* key must have =value */
890 /* strip leading & trailing spaces (TC decided to tolerate these) */
891 while(*startSearchHere
== ' ') {
894 keyValueTail
= nextSeparator
;
895 while (keyValueTail
> startSearchHere
&& *(keyValueTail
-1) == ' ') {
898 /* now keyValueTail points to first char after the keyName */
899 /* copy & normalize keyName from locale */
900 if (startSearchHere
== keyValueTail
) {
901 *status
= U_ILLEGAL_ARGUMENT_ERROR
; /* empty keyword name in passed-in locale */
905 while (startSearchHere
< keyValueTail
) {
906 if (!UPRV_ISALPHANUM(*startSearchHere
)) {
907 *status
= U_ILLEGAL_ARGUMENT_ERROR
; /* malformed keyword name */
910 if (keyValueLen
< ULOC_KEYWORD_BUFFER_LEN
- 1) {
911 localeKeywordNameBuffer
[keyValueLen
++] = uprv_tolower(*startSearchHere
++);
913 /* keyword name too long for internal buffer */
914 *status
= U_INTERNAL_PROGRAM_ERROR
;
918 localeKeywordNameBuffer
[keyValueLen
] = 0; /* terminate */
920 startSearchHere
= uprv_strchr(nextSeparator
, ';');
922 if(uprv_strcmp(keywordNameBuffer
, localeKeywordNameBuffer
) == 0) {
923 /* current entry matches the keyword. */
924 nextSeparator
++; /* skip '=' */
925 /* First strip leading & trailing spaces (TC decided to tolerate these) */
926 while(*nextSeparator
== ' ') {
929 keyValueTail
= (startSearchHere
)? startSearchHere
: nextSeparator
+ uprv_strlen(nextSeparator
);
930 while(keyValueTail
> nextSeparator
&& *(keyValueTail
-1) == ' ') {
933 /* Now copy the value, but check well-formedness */
934 if (nextSeparator
== keyValueTail
) {
935 *status
= U_ILLEGAL_ARGUMENT_ERROR
; /* empty key value name in passed-in locale */
939 while (nextSeparator
< keyValueTail
) {
940 if (!UPRV_ISALPHANUM(*nextSeparator
) && !UPRV_OK_VALUE_PUNCTUATION(*nextSeparator
)) {
941 *status
= U_ILLEGAL_ARGUMENT_ERROR
; /* malformed key value */
944 if (keyValueLen
< bufferCapacity
) {
945 /* Should we lowercase value to return here? Tests expect as-is. */
946 buffer
[keyValueLen
++] = *nextSeparator
++;
947 } else { /* keep advancing so we return correct length in case of overflow */
952 result
= u_terminateChars(buffer
, bufferCapacity
, keyValueLen
, status
);
960 U_CAPI
int32_t U_EXPORT2
961 uloc_setKeywordValue(const char* keywordName
,
962 const char* keywordValue
,
963 char* buffer
, int32_t bufferCapacity
,
966 /* TODO: sorting. removal. */
967 int32_t keywordNameLen
;
968 int32_t keywordValueLen
;
971 char keywordNameBuffer
[ULOC_KEYWORD_BUFFER_LEN
];
972 char keywordValueBuffer
[ULOC_KEYWORDS_CAPACITY
+1];
973 char localeKeywordNameBuffer
[ULOC_KEYWORD_BUFFER_LEN
];
975 char* nextSeparator
= NULL
;
976 char* nextEqualsign
= NULL
;
977 char* startSearchHere
= NULL
;
978 char* keywordStart
= NULL
;
979 CharString updatedKeysAndValues
;
980 int32_t updatedKeysAndValuesLen
;
981 UBool handledInputKeyAndValue
= FALSE
;
982 char keyValuePrefix
= '@';
984 if(U_FAILURE(*status
)) {
987 if (keywordName
== NULL
|| keywordName
[0] == 0 || bufferCapacity
<= 1) {
988 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
991 bufLen
= (int32_t)uprv_strlen(buffer
);
992 if(bufferCapacity
<bufLen
) {
993 /* The capacity is less than the length?! Is this NULL terminated? */
994 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
997 keywordNameLen
= locale_canonKeywordName(keywordNameBuffer
, keywordName
, status
);
998 if(U_FAILURE(*status
)) {
1002 keywordValueLen
= 0;
1004 while (*keywordValue
!= 0) {
1005 if (!UPRV_ISALPHANUM(*keywordValue
) && !UPRV_OK_VALUE_PUNCTUATION(*keywordValue
)) {
1006 *status
= U_ILLEGAL_ARGUMENT_ERROR
; /* malformed key value */
1009 if (keywordValueLen
< ULOC_KEYWORDS_CAPACITY
) {
1010 /* Should we force lowercase in value to set? */
1011 keywordValueBuffer
[keywordValueLen
++] = *keywordValue
++;
1013 /* keywordValue too long for internal buffer */
1014 *status
= U_INTERNAL_PROGRAM_ERROR
;
1019 keywordValueBuffer
[keywordValueLen
] = 0; /* terminate */
1021 startSearchHere
= (char*)locale_getKeywordsStart(buffer
);
1022 if(startSearchHere
== NULL
|| (startSearchHere
[1]==0)) {
1023 if(keywordValueLen
== 0) { /* no keywords = nothing to remove */
1027 needLen
= bufLen
+1+keywordNameLen
+1+keywordValueLen
;
1028 if(startSearchHere
) { /* had a single @ */
1029 needLen
--; /* already had the @ */
1030 /* startSearchHere points at the @ */
1032 startSearchHere
=buffer
+bufLen
;
1034 if(needLen
>= bufferCapacity
) {
1035 *status
= U_BUFFER_OVERFLOW_ERROR
;
1036 return needLen
; /* no change */
1038 *startSearchHere
++ = '@';
1039 uprv_strcpy(startSearchHere
, keywordNameBuffer
);
1040 startSearchHere
+= keywordNameLen
;
1041 *startSearchHere
++ = '=';
1042 uprv_strcpy(startSearchHere
, keywordValueBuffer
);
1044 } /* end shortcut - no @ */
1046 keywordStart
= startSearchHere
;
1047 /* search for keyword */
1048 while(keywordStart
) {
1049 const char* keyValueTail
;
1050 int32_t keyValueLen
;
1052 keywordStart
++; /* skip @ or ; */
1053 nextEqualsign
= uprv_strchr(keywordStart
, '=');
1054 if (!nextEqualsign
) {
1055 *status
= U_ILLEGAL_ARGUMENT_ERROR
; /* key must have =value */
1058 /* strip leading & trailing spaces (TC decided to tolerate these) */
1059 while(*keywordStart
== ' ') {
1062 keyValueTail
= nextEqualsign
;
1063 while (keyValueTail
> keywordStart
&& *(keyValueTail
-1) == ' ') {
1066 /* now keyValueTail points to first char after the keyName */
1067 /* copy & normalize keyName from locale */
1068 if (keywordStart
== keyValueTail
) {
1069 *status
= U_ILLEGAL_ARGUMENT_ERROR
; /* empty keyword name in passed-in locale */
1073 while (keywordStart
< keyValueTail
) {
1074 if (!UPRV_ISALPHANUM(*keywordStart
)) {
1075 *status
= U_ILLEGAL_ARGUMENT_ERROR
; /* malformed keyword name */
1078 if (keyValueLen
< ULOC_KEYWORD_BUFFER_LEN
- 1) {
1079 localeKeywordNameBuffer
[keyValueLen
++] = uprv_tolower(*keywordStart
++);
1081 /* keyword name too long for internal buffer */
1082 *status
= U_INTERNAL_PROGRAM_ERROR
;
1086 localeKeywordNameBuffer
[keyValueLen
] = 0; /* terminate */
1088 nextSeparator
= uprv_strchr(nextEqualsign
, ';');
1090 /* start processing the value part */
1091 nextEqualsign
++; /* skip '=' */
1092 /* First strip leading & trailing spaces (TC decided to tolerate these) */
1093 while(*nextEqualsign
== ' ') {
1096 keyValueTail
= (nextSeparator
)? nextSeparator
: nextEqualsign
+ uprv_strlen(nextEqualsign
);
1097 while(keyValueTail
> nextEqualsign
&& *(keyValueTail
-1) == ' ') {
1100 if (nextEqualsign
== keyValueTail
) {
1101 *status
= U_ILLEGAL_ARGUMENT_ERROR
; /* empty key value in passed-in locale */
1105 rc
= uprv_strcmp(keywordNameBuffer
, localeKeywordNameBuffer
);
1107 /* Current entry matches the input keyword. Update the entry */
1108 if(keywordValueLen
> 0) { /* updating a value */
1109 updatedKeysAndValues
.append(keyValuePrefix
, *status
);
1110 keyValuePrefix
= ';'; /* for any subsequent key-value pair */
1111 updatedKeysAndValues
.append(keywordNameBuffer
, keywordNameLen
, *status
);
1112 updatedKeysAndValues
.append('=', *status
);
1113 updatedKeysAndValues
.append(keywordValueBuffer
, keywordValueLen
, *status
);
1114 } /* else removing this entry, don't emit anything */
1115 handledInputKeyAndValue
= TRUE
;
1117 /* input keyword sorts earlier than current entry, add before current entry */
1118 if (rc
< 0 && keywordValueLen
> 0 && !handledInputKeyAndValue
) {
1119 /* insert new entry at this location */
1120 updatedKeysAndValues
.append(keyValuePrefix
, *status
);
1121 keyValuePrefix
= ';'; /* for any subsequent key-value pair */
1122 updatedKeysAndValues
.append(keywordNameBuffer
, keywordNameLen
, *status
);
1123 updatedKeysAndValues
.append('=', *status
);
1124 updatedKeysAndValues
.append(keywordValueBuffer
, keywordValueLen
, *status
);
1125 handledInputKeyAndValue
= TRUE
;
1127 /* copy the current entry */
1128 updatedKeysAndValues
.append(keyValuePrefix
, *status
);
1129 keyValuePrefix
= ';'; /* for any subsequent key-value pair */
1130 updatedKeysAndValues
.append(localeKeywordNameBuffer
, keyValueLen
, *status
);
1131 updatedKeysAndValues
.append('=', *status
);
1132 updatedKeysAndValues
.append(nextEqualsign
, keyValueTail
-nextEqualsign
, *status
);
1134 if (!nextSeparator
&& keywordValueLen
> 0 && !handledInputKeyAndValue
) {
1135 /* append new entry at the end, it sorts later than existing entries */
1136 updatedKeysAndValues
.append(keyValuePrefix
, *status
);
1137 /* skip keyValuePrefix update, no subsequent key-value pair */
1138 updatedKeysAndValues
.append(keywordNameBuffer
, keywordNameLen
, *status
);
1139 updatedKeysAndValues
.append('=', *status
);
1140 updatedKeysAndValues
.append(keywordValueBuffer
, keywordValueLen
, *status
);
1141 handledInputKeyAndValue
= TRUE
;
1143 keywordStart
= nextSeparator
;
1144 } /* end loop searching */
1146 /* Any error from updatedKeysAndValues.append above would be internal and not due to
1147 * problems with the passed-in locale. So if we did encounter problems with the
1148 * passed-in locale above, those errors took precedence and overrode any error
1149 * status from updatedKeysAndValues.append, and also caused a return of 0. If there
1150 * are errors here they are from updatedKeysAndValues.append; they do cause an
1151 * error return but the passed-in locale is unmodified and the original bufLen is
1154 if (!handledInputKeyAndValue
|| U_FAILURE(*status
)) {
1155 /* if input key/value specified removal of a keyword not present in locale, or
1156 * there was an error in CharString.append, leave original locale alone. */
1160 updatedKeysAndValuesLen
= updatedKeysAndValues
.length();
1161 /* needLen = length of the part before '@' + length of updated key-value part including '@' */
1162 needLen
= (int32_t)(startSearchHere
- buffer
) + updatedKeysAndValuesLen
;
1163 if(needLen
>= bufferCapacity
) {
1164 *status
= U_BUFFER_OVERFLOW_ERROR
;
1165 return needLen
; /* no change */
1167 if (updatedKeysAndValuesLen
> 0) {
1168 uprv_strncpy(startSearchHere
, updatedKeysAndValues
.data(), updatedKeysAndValuesLen
);
1174 /* ### ID parsing implementation **************************************************/
1176 #define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
1178 /*returns TRUE if one of the special prefixes is here (s=string)
1180 #define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1182 /* Dot terminates it because of POSIX form where dot precedes the codepage
1183 * except for variant
1185 #define _isTerminator(a) ((a==0)||(a=='.')||(a=='@'))
1187 static char* _strnchr(const char* str
, int32_t len
, char c
) {
1188 U_ASSERT(str
!= 0 && len
>= 0);
1189 while (len
-- != 0) {
1193 } else if (d
== 0) {
1202 * Lookup 'key' in the array 'list'. The array 'list' should contain
1203 * a NULL entry, followed by more entries, and a second NULL entry.
1205 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1208 static int16_t _findIndex(const char* const* list
, const char* key
)
1210 const char* const* anchor
= list
;
1213 /* Make two passes through two NULL-terminated arrays at 'list' */
1214 while (pass
++ < 2) {
1216 if (uprv_strcmp(key
, *list
) == 0) {
1217 return (int16_t)(list
- anchor
);
1221 ++list
; /* skip final NULL *CWB*/
1226 /* count the length of src while copying it to dest; return strlen(src) */
1227 static inline int32_t
1228 _copyCount(char *dest
, int32_t destCapacity
, const char *src
) {
1235 return (int32_t)(src
-anchor
);
1237 if(destCapacity
<=0) {
1238 return (int32_t)((src
-anchor
)+uprv_strlen(src
));
1247 uloc_getCurrentCountryID(const char* oldID
){
1248 int32_t offset
= _findIndex(DEPRECATED_COUNTRIES
, oldID
);
1250 return REPLACEMENT_COUNTRIES
[offset
];
1255 uloc_getCurrentLanguageID(const char* oldID
){
1256 int32_t offset
= _findIndex(DEPRECATED_LANGUAGES
, oldID
);
1258 return REPLACEMENT_LANGUAGES
[offset
];
1263 * the internal functions _getLanguage(), _getCountry(), _getVariant()
1264 * avoid duplicating code to handle the earlier locale ID pieces
1265 * in the functions for the later ones by
1266 * setting the *pEnd pointer to where they stopped parsing
1268 * TODO try to use this in Locale
1271 ulocimp_getLanguage(const char *localeID
,
1272 char *language
, int32_t languageCapacity
,
1273 const char **pEnd
) {
1276 char lang
[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
1278 /* if it starts with i- or x- then copy that prefix */
1279 if(_isIDPrefix(localeID
)) {
1280 if(i
<languageCapacity
) {
1281 language
[i
]=(char)uprv_tolower(*localeID
);
1283 if(i
<languageCapacity
) {
1290 /* copy the language as far as possible and count its length */
1291 while(!_isTerminator(*localeID
) && !_isIDSeparator(*localeID
)) {
1292 if(i
<languageCapacity
) {
1293 language
[i
]=(char)uprv_tolower(*localeID
);
1297 lang
[i
]=(char)uprv_tolower(*localeID
);
1304 /* convert 3 character code to 2 character code if possible *CWB*/
1305 offset
=_findIndex(LANGUAGES_3
, lang
);
1307 i
=_copyCount(language
, languageCapacity
, LANGUAGES
[offset
]);
1318 ulocimp_getScript(const char *localeID
,
1319 char *script
, int32_t scriptCapacity
,
1328 /* copy the second item as far as possible and count its length */
1329 while(!_isTerminator(localeID
[idLen
]) && !_isIDSeparator(localeID
[idLen
])
1330 && uprv_isASCIILetter(localeID
[idLen
])) {
1334 /* If it's exactly 4 characters long, then it's a script and not a country. */
1338 *pEnd
= localeID
+idLen
;
1340 if(idLen
> scriptCapacity
) {
1341 idLen
= scriptCapacity
;
1344 script
[0]=(char)uprv_toupper(*(localeID
++));
1346 for (i
= 1; i
< idLen
; i
++) {
1347 script
[i
]=(char)uprv_tolower(*(localeID
++));
1357 ulocimp_getCountry(const char *localeID
,
1358 char *country
, int32_t countryCapacity
,
1362 char cnty
[ULOC_COUNTRY_CAPACITY
]={ 0, 0, 0, 0 };
1365 /* copy the country as far as possible and count its length */
1366 while(!_isTerminator(localeID
[idLen
]) && !_isIDSeparator(localeID
[idLen
])) {
1367 if(idLen
<(ULOC_COUNTRY_CAPACITY
-1)) { /*CWB*/
1368 cnty
[idLen
]=(char)uprv_toupper(localeID
[idLen
]);
1373 /* the country should be either length 2 or 3 */
1374 if (idLen
== 2 || idLen
== 3) {
1375 UBool gotCountry
= FALSE
;
1376 /* convert 3 character code to 2 character code if possible *CWB*/
1378 offset
=_findIndex(COUNTRIES_3
, cnty
);
1380 idLen
=_copyCount(country
, countryCapacity
, COUNTRIES
[offset
]);
1386 for (i
= 0; i
< idLen
; i
++) {
1387 if (i
< countryCapacity
) {
1388 country
[i
]=(char)uprv_toupper(localeID
[i
]);
1405 * @param needSeparator if true, then add leading '_' if any variants
1406 * are added to 'variant'
1409 _getVariantEx(const char *localeID
,
1411 char *variant
, int32_t variantCapacity
,
1412 UBool needSeparator
) {
1415 /* get one or more variant tags and separate them with '_' */
1416 if(_isIDSeparator(prev
)) {
1417 /* get a variant string after a '-' or '_' */
1418 while(!_isTerminator(*localeID
)) {
1419 if (needSeparator
) {
1420 if (i
<variantCapacity
) {
1424 needSeparator
= FALSE
;
1426 if(i
<variantCapacity
) {
1427 variant
[i
]=(char)uprv_toupper(*localeID
);
1428 if(variant
[i
]=='-') {
1437 /* if there is no variant tag after a '-' or '_' then look for '@' */
1441 } else if((localeID
=locale_getKeywordsStart(localeID
))!=NULL
) {
1442 ++localeID
; /* point after the '@' */
1446 while(!_isTerminator(*localeID
)) {
1447 if (needSeparator
) {
1448 if (i
<variantCapacity
) {
1452 needSeparator
= FALSE
;
1454 if(i
<variantCapacity
) {
1455 variant
[i
]=(char)uprv_toupper(*localeID
);
1456 if(variant
[i
]=='-' || variant
[i
]==',') {
1469 _getVariant(const char *localeID
,
1471 char *variant
, int32_t variantCapacity
) {
1472 return _getVariantEx(localeID
, prev
, variant
, variantCapacity
, FALSE
);
1476 * Delete ALL instances of a variant from the given list of one or
1477 * more variants. Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".
1478 * @param variants the source string of one or more variants,
1479 * separated by '_'. This will be MODIFIED IN PLACE. Not zero
1480 * terminated; if it is, trailing zero will NOT be maintained.
1481 * @param variantsLen length of variants
1482 * @param toDelete variant to delete, without separators, e.g. "EURO"
1483 * or "PREEURO"; not zero terminated
1484 * @param toDeleteLen length of toDelete
1485 * @return number of characters deleted from variants
1488 _deleteVariant(char* variants
, int32_t variantsLen
,
1489 const char* toDelete
, int32_t toDeleteLen
)
1491 int32_t delta
= 0; /* number of chars deleted */
1494 if (variantsLen
< toDeleteLen
) {
1497 if (uprv_strncmp(variants
, toDelete
, toDeleteLen
) == 0 &&
1498 (variantsLen
== toDeleteLen
||
1499 (flag
=(variants
[toDeleteLen
] == '_'))))
1501 int32_t d
= toDeleteLen
+ (flag
?1:0);
1504 if (variantsLen
> 0) {
1505 uprv_memmove(variants
, variants
+d
, variantsLen
);
1508 char* p
= _strnchr(variants
, variantsLen
, '_');
1513 variantsLen
-= (int32_t)(p
- variants
);
1519 /* Keyword enumeration */
1521 typedef struct UKeywordsContext
{
1528 static void U_CALLCONV
1529 uloc_kw_closeKeywords(UEnumeration
*enumerator
) {
1530 uprv_free(((UKeywordsContext
*)enumerator
->context
)->keywords
);
1531 uprv_free(enumerator
->context
);
1532 uprv_free(enumerator
);
1535 static int32_t U_CALLCONV
1536 uloc_kw_countKeywords(UEnumeration
*en
, UErrorCode
* /*status*/) {
1537 char *kw
= ((UKeywordsContext
*)en
->context
)->keywords
;
1541 kw
+= uprv_strlen(kw
)+1;
1546 static const char * U_CALLCONV
1547 uloc_kw_nextKeyword(UEnumeration
* en
,
1548 int32_t* resultLength
,
1549 UErrorCode
* /*status*/) {
1550 const char* result
= ((UKeywordsContext
*)en
->context
)->current
;
1553 len
= (int32_t)uprv_strlen(((UKeywordsContext
*)en
->context
)->current
);
1554 ((UKeywordsContext
*)en
->context
)->current
+= len
+1;
1559 *resultLength
= len
;
1564 static void U_CALLCONV
1565 uloc_kw_resetKeywords(UEnumeration
* en
,
1566 UErrorCode
* /*status*/) {
1567 ((UKeywordsContext
*)en
->context
)->current
= ((UKeywordsContext
*)en
->context
)->keywords
;
1573 static const UEnumeration gKeywordsEnum
= {
1576 uloc_kw_closeKeywords
,
1577 uloc_kw_countKeywords
,
1579 uloc_kw_nextKeyword
,
1580 uloc_kw_resetKeywords
1583 U_CAPI UEnumeration
* U_EXPORT2
1584 uloc_openKeywordList(const char *keywordList
, int32_t keywordListSize
, UErrorCode
* status
)
1586 UKeywordsContext
*myContext
= NULL
;
1587 UEnumeration
*result
= NULL
;
1589 if(U_FAILURE(*status
)) {
1592 result
= (UEnumeration
*)uprv_malloc(sizeof(UEnumeration
));
1593 /* Null pointer test */
1594 if (result
== NULL
) {
1595 *status
= U_MEMORY_ALLOCATION_ERROR
;
1598 uprv_memcpy(result
, &gKeywordsEnum
, sizeof(UEnumeration
));
1599 myContext
= static_cast<UKeywordsContext
*>(uprv_malloc(sizeof(UKeywordsContext
)));
1600 if (myContext
== NULL
) {
1601 *status
= U_MEMORY_ALLOCATION_ERROR
;
1605 myContext
->keywords
= (char *)uprv_malloc(keywordListSize
+1);
1606 uprv_memcpy(myContext
->keywords
, keywordList
, keywordListSize
);
1607 myContext
->keywords
[keywordListSize
] = 0;
1608 myContext
->current
= myContext
->keywords
;
1609 result
->context
= myContext
;
1613 U_CAPI UEnumeration
* U_EXPORT2
1614 uloc_openKeywords(const char* localeID
,
1619 int32_t keywordsCapacity
= 256;
1620 char tempBuffer
[ULOC_FULLNAME_CAPACITY
];
1621 const char* tmpLocaleID
;
1623 if(status
==NULL
|| U_FAILURE(*status
)) {
1627 if (_hasBCP47Extension(localeID
)) {
1628 _ConvertBCP47(tmpLocaleID
, localeID
, tempBuffer
, sizeof(tempBuffer
), status
);
1630 if (localeID
==NULL
) {
1631 localeID
=uloc_getDefault();
1633 tmpLocaleID
=localeID
;
1636 /* Skip the language */
1637 ulocimp_getLanguage(tmpLocaleID
, NULL
, 0, &tmpLocaleID
);
1638 if(_isIDSeparator(*tmpLocaleID
)) {
1639 const char *scriptID
;
1640 /* Skip the script if available */
1641 ulocimp_getScript(tmpLocaleID
+1, NULL
, 0, &scriptID
);
1642 if(scriptID
!= tmpLocaleID
+1) {
1643 /* Found optional script */
1644 tmpLocaleID
= scriptID
;
1646 /* Skip the Country */
1647 if (_isIDSeparator(*tmpLocaleID
)) {
1648 ulocimp_getCountry(tmpLocaleID
+1, NULL
, 0, &tmpLocaleID
);
1649 if(_isIDSeparator(*tmpLocaleID
)) {
1650 _getVariant(tmpLocaleID
+1, *tmpLocaleID
, NULL
, 0);
1655 /* keywords are located after '@' */
1656 if((tmpLocaleID
= locale_getKeywordsStart(tmpLocaleID
)) != NULL
) {
1657 i
=locale_getKeywords(tmpLocaleID
+1, '@', keywords
, keywordsCapacity
, NULL
, 0, NULL
, FALSE
, status
);
1661 return uloc_openKeywordList(keywords
, i
, status
);
1668 /* bit-flags for 'options' parameter of _canonicalize */
1669 #define _ULOC_STRIP_KEYWORDS 0x2
1670 #define _ULOC_CANONICALIZE 0x1
1672 #define OPTION_SET(options, mask) ((options & mask) != 0)
1674 static const char i_default
[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
1675 #define I_DEFAULT_LENGTH UPRV_LENGTHOF(i_default)
1678 * Canonicalize the given localeID, to level 1 or to level 2,
1679 * depending on the options. To specify level 1, pass in options=0.
1680 * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1682 * This is the code underlying uloc_getName and uloc_canonicalize.
1685 _canonicalize(const char* localeID
,
1687 int32_t resultCapacity
,
1690 int32_t j
, len
, fieldCount
=0, scriptSize
=0, variantSize
=0, nameCapacity
;
1691 char localeBuffer
[ULOC_FULLNAME_CAPACITY
];
1692 char tempBuffer
[ULOC_FULLNAME_CAPACITY
];
1693 const char* origLocaleID
;
1694 const char* tmpLocaleID
;
1695 const char* keywordAssign
= NULL
;
1696 const char* separatorIndicator
= NULL
;
1697 const char* addKeyword
= NULL
;
1698 const char* addValue
= NULL
;
1700 char* variant
= NULL
; /* pointer into name, or NULL */
1702 if (U_FAILURE(*err
)) {
1706 if (_hasBCP47Extension(localeID
)) {
1707 _ConvertBCP47(tmpLocaleID
, localeID
, tempBuffer
, sizeof(tempBuffer
), err
);
1709 if (localeID
==NULL
) {
1710 localeID
=uloc_getDefault();
1712 tmpLocaleID
=localeID
;
1715 origLocaleID
=tmpLocaleID
;
1717 /* if we are doing a full canonicalization, then put results in
1718 localeBuffer, if necessary; otherwise send them to result. */
1719 if (/*OPTION_SET(options, _ULOC_CANONICALIZE) &&*/
1720 (result
== NULL
|| resultCapacity
< (int32_t)sizeof(localeBuffer
))) {
1721 name
= localeBuffer
;
1722 nameCapacity
= (int32_t)sizeof(localeBuffer
);
1725 nameCapacity
= resultCapacity
;
1728 /* get all pieces, one after another, and separate with '_' */
1729 len
=ulocimp_getLanguage(tmpLocaleID
, name
, nameCapacity
, &tmpLocaleID
);
1731 if(len
== I_DEFAULT_LENGTH
&& uprv_strncmp(origLocaleID
, i_default
, len
) == 0) {
1732 const char *d
= uloc_getDefault();
1734 len
= (int32_t)uprv_strlen(d
);
1737 uprv_strncpy(name
, d
, len
);
1739 } else if(_isIDSeparator(*tmpLocaleID
)) {
1740 const char *scriptID
;
1743 if(len
<nameCapacity
) {
1748 scriptSize
=ulocimp_getScript(tmpLocaleID
+1,
1749 (len
<nameCapacity
? name
+len
: NULL
), nameCapacity
-len
, &scriptID
);
1750 if(scriptSize
> 0) {
1751 /* Found optional script */
1752 tmpLocaleID
= scriptID
;
1755 if (_isIDSeparator(*tmpLocaleID
)) {
1756 /* If there is something else, then we add the _ */
1757 if(len
<nameCapacity
) {
1764 if (_isIDSeparator(*tmpLocaleID
)) {
1765 const char *cntryID
;
1766 int32_t cntrySize
= ulocimp_getCountry(tmpLocaleID
+1,
1767 (len
<nameCapacity
? name
+len
: NULL
), nameCapacity
-len
, &cntryID
);
1768 if (cntrySize
> 0) {
1769 /* Found optional country */
1770 tmpLocaleID
= cntryID
;
1773 if(_isIDSeparator(*tmpLocaleID
)) {
1774 /* If there is something else, then we add the _ if we found country before. */
1775 if (cntrySize
>= 0 && ! _isIDSeparator(*(tmpLocaleID
+1)) ) {
1777 if(len
<nameCapacity
) {
1783 variantSize
= _getVariant(tmpLocaleID
+1, *tmpLocaleID
,
1784 (len
<nameCapacity
? name
+len
: NULL
), nameCapacity
-len
);
1785 if (variantSize
> 0) {
1786 variant
= len
<nameCapacity
? name
+len
: NULL
;
1788 tmpLocaleID
+= variantSize
+ 1; /* skip '_' and variant */
1794 /* Copy POSIX-style charset specifier, if any [mr.utf8] */
1795 if (!OPTION_SET(options
, _ULOC_CANONICALIZE
) && *tmpLocaleID
== '.') {
1798 char c
= *tmpLocaleID
;
1805 if (len
<nameCapacity
) {
1815 /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
1816 After this, tmpLocaleID either points to '@' or is NULL */
1817 if ((tmpLocaleID
=locale_getKeywordsStart(tmpLocaleID
))!=NULL
) {
1818 keywordAssign
= uprv_strchr(tmpLocaleID
, '=');
1819 separatorIndicator
= uprv_strchr(tmpLocaleID
, ';');
1822 /* Copy POSIX-style variant, if any [mr@FOO] */
1823 if (!OPTION_SET(options
, _ULOC_CANONICALIZE
) &&
1824 tmpLocaleID
!= NULL
&& keywordAssign
== NULL
) {
1826 char c
= *tmpLocaleID
;
1830 if (len
<nameCapacity
) {
1838 if (OPTION_SET(options
, _ULOC_CANONICALIZE
)) {
1839 /* Handle @FOO variant if @ is present and not followed by = */
1840 if (tmpLocaleID
!=NULL
&& keywordAssign
==NULL
) {
1841 int32_t posixVariantSize
;
1842 /* Add missing '_' if needed */
1843 if (fieldCount
< 2 || (fieldCount
< 3 && scriptSize
> 0)) {
1845 if(len
<nameCapacity
) {
1850 } while(fieldCount
<2);
1852 posixVariantSize
= _getVariantEx(tmpLocaleID
+1, '@', name
+len
, nameCapacity
-len
,
1853 (UBool
)(variantSize
> 0));
1854 if (posixVariantSize
> 0) {
1855 if (variant
== NULL
) {
1858 len
+= posixVariantSize
;
1859 variantSize
+= posixVariantSize
;
1863 /* Handle generic variants first */
1865 for (j
=0; j
<UPRV_LENGTHOF(VARIANT_MAP
); j
++) {
1866 const char* variantToCompare
= VARIANT_MAP
[j
].variant
;
1867 int32_t n
= (int32_t)uprv_strlen(variantToCompare
);
1868 int32_t variantLen
= _deleteVariant(variant
, uprv_min(variantSize
, (nameCapacity
-len
)), variantToCompare
, n
);
1870 if (variantLen
> 0) {
1871 if (len
> 0 && name
[len
-1] == '_') { /* delete trailing '_' */
1874 addKeyword
= VARIANT_MAP
[j
].keyword
;
1875 addValue
= VARIANT_MAP
[j
].value
;
1879 if (len
> 0 && len
<= nameCapacity
&& name
[len
-1] == '_') { /* delete trailing '_' */
1884 /* Look up the ID in the canonicalization map */
1885 for (j
=0; j
<UPRV_LENGTHOF(CANONICALIZE_MAP
); j
++) {
1886 const char* id
= CANONICALIZE_MAP
[j
].id
;
1887 int32_t n
= (int32_t)uprv_strlen(id
);
1888 if (len
== n
&& uprv_strncmp(name
, id
, n
) == 0) {
1889 if (n
== 0 && tmpLocaleID
!= NULL
) {
1890 break; /* Don't remap "" if keywords present */
1892 len
= _copyCount(name
, nameCapacity
, CANONICALIZE_MAP
[j
].canonicalID
);
1893 if (CANONICALIZE_MAP
[j
].keyword
) {
1894 addKeyword
= CANONICALIZE_MAP
[j
].keyword
;
1895 addValue
= CANONICALIZE_MAP
[j
].value
;
1902 if (!OPTION_SET(options
, _ULOC_STRIP_KEYWORDS
)) {
1903 if (tmpLocaleID
!=NULL
&& keywordAssign
!=NULL
&&
1904 (!separatorIndicator
|| separatorIndicator
> keywordAssign
)) {
1905 if(len
<nameCapacity
) {
1910 len
+= _getKeywords(tmpLocaleID
+1, '@', (len
<nameCapacity
? name
+len
: NULL
), nameCapacity
-len
,
1911 NULL
, 0, NULL
, TRUE
, addKeyword
, addValue
, err
);
1912 } else if (addKeyword
!= NULL
) {
1913 U_ASSERT(addValue
!= NULL
&& len
< nameCapacity
);
1914 /* inelegant but works -- later make _getKeywords do this? */
1915 len
+= _copyCount(name
+len
, nameCapacity
-len
, "@");
1916 len
+= _copyCount(name
+len
, nameCapacity
-len
, addKeyword
);
1917 len
+= _copyCount(name
+len
, nameCapacity
-len
, "=");
1918 len
+= _copyCount(name
+len
, nameCapacity
-len
, addValue
);
1922 if (U_SUCCESS(*err
) && result
!= NULL
&& name
== localeBuffer
) {
1923 uprv_strncpy(result
, localeBuffer
, (len
> resultCapacity
) ? resultCapacity
: len
);
1926 return u_terminateChars(result
, resultCapacity
, len
, err
);
1929 /* ### ID parsing API **************************************************/
1931 U_CAPI
int32_t U_EXPORT2
1932 uloc_getParent(const char* localeID
,
1934 int32_t parentCapacity
,
1937 const char *lastUnderscore
;
1940 if (U_FAILURE(*err
))
1943 if (localeID
== NULL
)
1944 localeID
= uloc_getDefault();
1946 lastUnderscore
=uprv_strrchr(localeID
, '_');
1947 if(lastUnderscore
!=NULL
) {
1948 i
=(int32_t)(lastUnderscore
-localeID
);
1953 if(i
>0 && parent
!= localeID
) {
1954 uprv_memcpy(parent
, localeID
, uprv_min(i
, parentCapacity
));
1956 return u_terminateChars(parent
, parentCapacity
, i
, err
);
1959 U_CAPI
int32_t U_EXPORT2
1960 uloc_getLanguage(const char* localeID
,
1962 int32_t languageCapacity
,
1965 /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
1968 if (err
==NULL
|| U_FAILURE(*err
)) {
1972 if(localeID
==NULL
) {
1973 localeID
=uloc_getDefault();
1976 i
=ulocimp_getLanguage(localeID
, language
, languageCapacity
, NULL
);
1977 return u_terminateChars(language
, languageCapacity
, i
, err
);
1980 U_CAPI
int32_t U_EXPORT2
1981 uloc_getScript(const char* localeID
,
1983 int32_t scriptCapacity
,
1988 if(err
==NULL
|| U_FAILURE(*err
)) {
1992 if(localeID
==NULL
) {
1993 localeID
=uloc_getDefault();
1996 /* skip the language */
1997 ulocimp_getLanguage(localeID
, NULL
, 0, &localeID
);
1998 if(_isIDSeparator(*localeID
)) {
1999 i
=ulocimp_getScript(localeID
+1, script
, scriptCapacity
, NULL
);
2001 return u_terminateChars(script
, scriptCapacity
, i
, err
);
2004 U_CAPI
int32_t U_EXPORT2
2005 uloc_getCountry(const char* localeID
,
2007 int32_t countryCapacity
,
2012 if(err
==NULL
|| U_FAILURE(*err
)) {
2016 if(localeID
==NULL
) {
2017 localeID
=uloc_getDefault();
2020 /* Skip the language */
2021 ulocimp_getLanguage(localeID
, NULL
, 0, &localeID
);
2022 if(_isIDSeparator(*localeID
)) {
2023 const char *scriptID
;
2024 /* Skip the script if available */
2025 ulocimp_getScript(localeID
+1, NULL
, 0, &scriptID
);
2026 if(scriptID
!= localeID
+1) {
2027 /* Found optional script */
2028 localeID
= scriptID
;
2030 if(_isIDSeparator(*localeID
)) {
2031 i
=ulocimp_getCountry(localeID
+1, country
, countryCapacity
, NULL
);
2034 return u_terminateChars(country
, countryCapacity
, i
, err
);
2037 U_CAPI
int32_t U_EXPORT2
2038 uloc_getVariant(const char* localeID
,
2040 int32_t variantCapacity
,
2043 char tempBuffer
[ULOC_FULLNAME_CAPACITY
];
2044 const char* tmpLocaleID
;
2047 if(err
==NULL
|| U_FAILURE(*err
)) {
2051 if (_hasBCP47Extension(localeID
)) {
2052 _ConvertBCP47(tmpLocaleID
, localeID
, tempBuffer
, sizeof(tempBuffer
), err
);
2054 if (localeID
==NULL
) {
2055 localeID
=uloc_getDefault();
2057 tmpLocaleID
=localeID
;
2060 /* Skip the language */
2061 ulocimp_getLanguage(tmpLocaleID
, NULL
, 0, &tmpLocaleID
);
2062 if(_isIDSeparator(*tmpLocaleID
)) {
2063 const char *scriptID
;
2064 /* Skip the script if available */
2065 ulocimp_getScript(tmpLocaleID
+1, NULL
, 0, &scriptID
);
2066 if(scriptID
!= tmpLocaleID
+1) {
2067 /* Found optional script */
2068 tmpLocaleID
= scriptID
;
2070 /* Skip the Country */
2071 if (_isIDSeparator(*tmpLocaleID
)) {
2072 const char *cntryID
;
2073 ulocimp_getCountry(tmpLocaleID
+1, NULL
, 0, &cntryID
);
2074 if (cntryID
!= tmpLocaleID
+1) {
2075 /* Found optional country */
2076 tmpLocaleID
= cntryID
;
2078 if(_isIDSeparator(*tmpLocaleID
)) {
2079 /* If there was no country ID, skip a possible extra IDSeparator */
2080 if (tmpLocaleID
!= cntryID
&& _isIDSeparator(tmpLocaleID
[1])) {
2083 i
=_getVariant(tmpLocaleID
+1, *tmpLocaleID
, variant
, variantCapacity
);
2088 /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
2089 /* if we do not have a variant tag yet then try a POSIX variant after '@' */
2091 if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
2092 i=_getVariant(localeID+1, '@', variant, variantCapacity);
2095 return u_terminateChars(variant
, variantCapacity
, i
, err
);
2098 U_CAPI
int32_t U_EXPORT2
2099 uloc_getName(const char* localeID
,
2101 int32_t nameCapacity
,
2104 return _canonicalize(localeID
, name
, nameCapacity
, 0, err
);
2107 U_CAPI
int32_t U_EXPORT2
2108 uloc_getBaseName(const char* localeID
,
2110 int32_t nameCapacity
,
2113 return _canonicalize(localeID
, name
, nameCapacity
, _ULOC_STRIP_KEYWORDS
, err
);
2116 U_CAPI
int32_t U_EXPORT2
2117 uloc_canonicalize(const char* localeID
,
2119 int32_t nameCapacity
,
2122 return _canonicalize(localeID
, name
, nameCapacity
, _ULOC_CANONICALIZE
, err
);
2125 U_CAPI
const char* U_EXPORT2
2126 uloc_getISO3Language(const char* localeID
)
2129 char lang
[ULOC_LANG_CAPACITY
];
2130 UErrorCode err
= U_ZERO_ERROR
;
2132 if (localeID
== NULL
)
2134 localeID
= uloc_getDefault();
2136 uloc_getLanguage(localeID
, lang
, ULOC_LANG_CAPACITY
, &err
);
2139 offset
= _findIndex(LANGUAGES
, lang
);
2142 return LANGUAGES_3
[offset
];
2145 U_CAPI
const char* U_EXPORT2
2146 uloc_getISO3Country(const char* localeID
)
2149 char cntry
[ULOC_LANG_CAPACITY
];
2150 UErrorCode err
= U_ZERO_ERROR
;
2152 if (localeID
== NULL
)
2154 localeID
= uloc_getDefault();
2156 uloc_getCountry(localeID
, cntry
, ULOC_LANG_CAPACITY
, &err
);
2159 offset
= _findIndex(COUNTRIES
, cntry
);
2163 return COUNTRIES_3
[offset
];
2166 U_CAPI
uint32_t U_EXPORT2
2167 uloc_getLCID(const char* localeID
)
2169 UErrorCode status
= U_ZERO_ERROR
;
2170 char langID
[ULOC_FULLNAME_CAPACITY
];
2173 /* Check for incomplete id. */
2174 if (!localeID
|| uprv_strlen(localeID
) < 2) {
2178 // Attempt platform lookup if available
2179 lcid
= uprv_convertToLCIDPlatform(localeID
);
2182 // Windows found an LCID, return that
2186 uloc_getLanguage(localeID
, langID
, sizeof(langID
), &status
);
2187 if (U_FAILURE(status
)) {
2191 if (uprv_strchr(localeID
, '@')) {
2192 // uprv_convertToLCID does not support keywords other than collation.
2193 // Remove all keywords except collation.
2195 char collVal
[ULOC_KEYWORDS_CAPACITY
];
2196 char tmpLocaleID
[ULOC_FULLNAME_CAPACITY
];
2198 len
= uloc_getKeywordValue(localeID
, "collation", collVal
,
2199 UPRV_LENGTHOF(collVal
) - 1, &status
);
2201 if (U_SUCCESS(status
) && len
> 0) {
2204 len
= uloc_getBaseName(localeID
, tmpLocaleID
,
2205 UPRV_LENGTHOF(tmpLocaleID
) - 1, &status
);
2207 if (U_SUCCESS(status
) && len
> 0) {
2208 tmpLocaleID
[len
] = 0;
2210 len
= uloc_setKeywordValue("collation", collVal
, tmpLocaleID
,
2211 UPRV_LENGTHOF(tmpLocaleID
) - len
- 1, &status
);
2213 if (U_SUCCESS(status
) && len
> 0) {
2214 tmpLocaleID
[len
] = 0;
2215 return uprv_convertToLCID(langID
, tmpLocaleID
, &status
);
2220 // fall through - all keywords are simply ignored
2221 status
= U_ZERO_ERROR
;
2224 return uprv_convertToLCID(langID
, localeID
, &status
);
2227 U_CAPI
int32_t U_EXPORT2
2228 uloc_getLocaleForLCID(uint32_t hostid
, char *locale
, int32_t localeCapacity
,
2231 return uprv_convertToPosix(hostid
, locale
, localeCapacity
, status
);
2234 /* ### Default locale **************************************************/
2236 U_CAPI
const char* U_EXPORT2
2239 return locale_get_default();
2242 U_CAPI
void U_EXPORT2
2243 uloc_setDefault(const char* newDefaultLocale
,
2246 if (U_FAILURE(*err
))
2248 /* the error code isn't currently used for anything by this function*/
2250 /* propagate change to C++ */
2251 locale_set_default(newDefaultLocale
);
2255 * Returns a list of all 2-letter language codes defined in ISO 639. This is a pointer
2256 * to an array of pointers to arrays of char. All of these pointers are owned
2257 * by ICU-- do not delete them, and do not write through them. The array is
2258 * terminated with a null pointer.
2260 U_CAPI
const char* const* U_EXPORT2
2261 uloc_getISOLanguages()
2267 * Returns a list of all 2-letter country codes defined in ISO 639. This is a
2268 * pointer to an array of pointers to arrays of char. All of these pointers are
2269 * owned by ICU-- do not delete them, and do not write through them. The array is
2270 * terminated with a null pointer.
2272 U_CAPI
const char* const* U_EXPORT2
2273 uloc_getISOCountries()
2279 /* this function to be moved into cstring.c later */
2280 static char gDecimal
= 0;
2285 _uloc_strtod(const char *start
, char **end
) {
2292 /* For machines that decide to change the decimal on you,
2293 and try to be too smart with localization.
2294 This normally should be just a '.'. */
2295 sprintf(rep
, "%+1.1f", 1.0);
2299 if(gDecimal
== '.') {
2300 return uprv_strtod(start
, end
); /* fall through to OS */
2302 uprv_strncpy(buf
, start
, 29);
2304 decimal
= uprv_strchr(buf
, '.');
2306 *decimal
= gDecimal
;
2308 return uprv_strtod(start
, end
); /* no decimal point */
2310 rv
= uprv_strtod(buf
, &myEnd
);
2312 *end
= (char*)(start
+(myEnd
-buf
)); /* cast away const (to follow uprv_strtod API.) */
2320 int32_t dummy
; /* to avoid uninitialized memory copy from qsort */
2321 char locale
[ULOC_FULLNAME_CAPACITY
+1];
2324 static int32_t U_CALLCONV
2325 uloc_acceptLanguageCompare(const void * /*context*/, const void *a
, const void *b
)
2327 const _acceptLangItem
*aa
= (const _acceptLangItem
*)a
;
2328 const _acceptLangItem
*bb
= (const _acceptLangItem
*)b
;
2332 rc
= -1; /* A > B */
2333 } else if(bb
->q
> aa
->q
) {
2340 rc
= uprv_stricmp(aa
->locale
, bb
->locale
);
2343 #if defined(ULOC_DEBUG)
2344 /* fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
2354 mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
2357 U_CAPI
int32_t U_EXPORT2
2358 uloc_acceptLanguageFromHTTP(char *result
, int32_t resultAvailable
, UAcceptResult
*outResult
,
2359 const char *httpAcceptLanguage
,
2360 UEnumeration
* availableLocales
,
2363 MaybeStackArray
<_acceptLangItem
, 4> items
; // Struct for collecting items.
2364 char tmp
[ULOC_FULLNAME_CAPACITY
+1];
2366 const char *itemEnd
;
2367 const char *paramEnd
;
2372 int32_t l
= (int32_t)uprv_strlen(httpAcceptLanguage
);
2374 if(U_FAILURE(*status
)) {
2378 for(s
=httpAcceptLanguage
;s
&&*s
;) {
2379 while(isspace(*s
)) /* eat space at the beginning */
2381 itemEnd
=uprv_strchr(s
,',');
2382 paramEnd
=uprv_strchr(s
,';');
2384 itemEnd
= httpAcceptLanguage
+l
; /* end of string */
2386 if(paramEnd
&& paramEnd
<itemEnd
) {
2387 /* semicolon (;) is closer than end (,) */
2392 while(isspace(*t
)) {
2398 while(isspace(*t
)) {
2401 items
[n
].q
= (float)_uloc_strtod(t
,NULL
);
2403 /* no semicolon - it's 1.0 */
2408 /* eat spaces prior to semi */
2409 for(t
=(paramEnd
-1);(paramEnd
>s
)&&isspace(*t
);t
--)
2411 int32_t slen
= ((t
+1)-s
);
2412 if(slen
> ULOC_FULLNAME_CAPACITY
) {
2413 *status
= U_BUFFER_OVERFLOW_ERROR
;
2414 return -1; // too big
2416 uprv_strncpy(items
[n
].locale
, s
, slen
);
2417 items
[n
].locale
[slen
]=0; // terminate
2418 int32_t clen
= uloc_canonicalize(items
[n
].locale
, tmp
, UPRV_LENGTHOF(tmp
)-1, status
);
2419 if(U_FAILURE(*status
)) return -1;
2420 if((clen
!=slen
) || (uprv_strncmp(items
[n
].locale
, tmp
, slen
))) {
2421 // canonicalization had an effect- copy back
2422 uprv_strncpy(items
[n
].locale
, tmp
, clen
);
2423 items
[n
].locale
[clen
] = 0; // terminate
2425 #if defined(ULOC_DEBUG)
2426 /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
2430 while(*s
==',') { /* eat duplicate commas */
2433 if(n
>=items
.getCapacity()) { // If we need more items
2434 if(NULL
== items
.resize(items
.getCapacity()*2, items
.getCapacity())) {
2435 *status
= U_MEMORY_ALLOCATION_ERROR
;
2438 #if defined(ULOC_DEBUG)
2439 fprintf(stderr
,"malloced at size %d\n", items
.getCapacity());
2443 uprv_sortArray(items
.getAlias(), n
, sizeof(items
[0]), uloc_acceptLanguageCompare
, NULL
, TRUE
, status
);
2444 if (U_FAILURE(*status
)) {
2447 LocalMemory
<const char*> strs(NULL
);
2448 if (strs
.allocateInsteadAndReset(n
) == NULL
) {
2449 *status
= U_MEMORY_ALLOCATION_ERROR
;
2453 #if defined(ULOC_DEBUG)
2454 /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
2456 strs
[i
]=items
[i
].locale
;
2458 res
= uloc_acceptLanguage(result
, resultAvailable
, outResult
,
2459 strs
.getAlias(), n
, availableLocales
, status
);
2464 U_CAPI
int32_t U_EXPORT2
2465 uloc_acceptLanguage(char *result
, int32_t resultAvailable
,
2466 UAcceptResult
*outResult
, const char **acceptList
,
2467 int32_t acceptListCount
,
2468 UEnumeration
* availableLocales
,
2474 char tmp
[ULOC_FULLNAME_CAPACITY
+1];
2476 char **fallbackList
;
2477 if(U_FAILURE(*status
)) {
2480 fallbackList
= static_cast<char **>(uprv_malloc((size_t)(sizeof(fallbackList
[0])*acceptListCount
)));
2481 if(fallbackList
==NULL
) {
2482 *status
= U_MEMORY_ALLOCATION_ERROR
;
2485 for(i
=0;i
<acceptListCount
;i
++) {
2486 #if defined(ULOC_DEBUG)
2487 fprintf(stderr
,"%02d: %s\n", i
, acceptList
[i
]);
2489 while((l
=uenum_next(availableLocales
, NULL
, status
))) {
2490 #if defined(ULOC_DEBUG)
2491 fprintf(stderr
," %s\n", l
);
2493 len
= (int32_t)uprv_strlen(l
);
2494 if(!uprv_strcmp(acceptList
[i
], l
)) {
2496 *outResult
= ULOC_ACCEPT_VALID
;
2498 #if defined(ULOC_DEBUG)
2499 fprintf(stderr
, "MATCH! %s\n", l
);
2502 uprv_strncpy(result
, l
, uprv_min(len
, resultAvailable
));
2505 uprv_free(fallbackList
[j
]);
2507 uprv_free(fallbackList
);
2508 return u_terminateChars(result
, resultAvailable
, len
, status
);
2514 uenum_reset(availableLocales
, status
);
2515 /* save off parent info */
2516 if(uloc_getParent(acceptList
[i
], tmp
, UPRV_LENGTHOF(tmp
), status
)!=0) {
2517 fallbackList
[i
] = uprv_strdup(tmp
);
2523 for(maxLen
--;maxLen
>0;maxLen
--) {
2524 for(i
=0;i
<acceptListCount
;i
++) {
2525 if(fallbackList
[i
] && ((int32_t)uprv_strlen(fallbackList
[i
])==maxLen
)) {
2526 #if defined(ULOC_DEBUG)
2527 fprintf(stderr
,"Try: [%s]", fallbackList
[i
]);
2529 while((l
=uenum_next(availableLocales
, NULL
, status
))) {
2530 #if defined(ULOC_DEBUG)
2531 fprintf(stderr
," %s\n", l
);
2533 len
= (int32_t)uprv_strlen(l
);
2534 if(!uprv_strcmp(fallbackList
[i
], l
)) {
2536 *outResult
= ULOC_ACCEPT_FALLBACK
;
2538 #if defined(ULOC_DEBUG)
2539 fprintf(stderr
, "fallback MATCH! %s\n", l
);
2542 uprv_strncpy(result
, l
, uprv_min(len
, resultAvailable
));
2544 for(j
=0;j
<acceptListCount
;j
++) {
2545 uprv_free(fallbackList
[j
]);
2547 uprv_free(fallbackList
);
2548 return u_terminateChars(result
, resultAvailable
, len
, status
);
2551 uenum_reset(availableLocales
, status
);
2553 if(uloc_getParent(fallbackList
[i
], tmp
, UPRV_LENGTHOF(tmp
), status
)!=0) {
2554 uprv_free(fallbackList
[i
]);
2555 fallbackList
[i
] = uprv_strdup(tmp
);
2557 uprv_free(fallbackList
[i
]);
2563 *outResult
= ULOC_ACCEPT_FAILED
;
2566 for(i
=0;i
<acceptListCount
;i
++) {
2567 uprv_free(fallbackList
[i
]);
2569 uprv_free(fallbackList
);
2573 U_CAPI
const char* U_EXPORT2
2574 uloc_toUnicodeLocaleKey(const char* keyword
)
2576 const char* bcpKey
= ulocimp_toBcpKey(keyword
);
2577 if (bcpKey
== NULL
&& ultag_isUnicodeLocaleKey(keyword
, -1)) {
2578 // unknown keyword, but syntax is fine..
2584 U_CAPI
const char* U_EXPORT2
2585 uloc_toUnicodeLocaleType(const char* keyword
, const char* value
)
2587 const char* bcpType
= ulocimp_toBcpType(keyword
, value
, NULL
, NULL
);
2588 if (bcpType
== NULL
&& ultag_isUnicodeLocaleType(value
, -1)) {
2589 // unknown keyword, but syntax is fine..
2596 isWellFormedLegacyKey(const char* legacyKey
)
2598 const char* p
= legacyKey
;
2600 if (!UPRV_ISALPHANUM(*p
)) {
2609 isWellFormedLegacyType(const char* legacyType
)
2611 const char* p
= legacyType
;
2612 int32_t alphaNumLen
= 0;
2614 if (*p
== '_' || *p
== '/' || *p
== '-') {
2615 if (alphaNumLen
== 0) {
2619 } else if (UPRV_ISALPHANUM(*p
)) {
2626 return (alphaNumLen
!= 0);
2629 U_CAPI
const char* U_EXPORT2
2630 uloc_toLegacyKey(const char* keyword
)
2632 const char* legacyKey
= ulocimp_toLegacyKey(keyword
);
2633 if (legacyKey
== NULL
) {
2634 // Checks if the specified locale key is well-formed with the legacy locale syntax.
2637 // LDML/CLDR provides some definition of keyword syntax in
2638 // * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
2639 // * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
2640 // Keys can only consist of [0-9a-zA-Z].
2641 if (isWellFormedLegacyKey(keyword
)) {
2648 U_CAPI
const char* U_EXPORT2
2649 uloc_toLegacyType(const char* keyword
, const char* value
)
2651 const char* legacyType
= ulocimp_toLegacyType(keyword
, value
, NULL
, NULL
);
2652 if (legacyType
== NULL
) {
2653 // Checks if the specified locale type is well-formed with the legacy locale syntax.
2656 // LDML/CLDR provides some definition of keyword syntax in
2657 // * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
2658 // * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
2659 // Values (types) can only consist of [0-9a-zA-Z], plus for legacy values
2660 // we allow [/_-+] in the middle (e.g. "Etc/GMT+1", "Asia/Tel_Aviv")
2661 if (isWellFormedLegacyType(value
)) {