1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 **********************************************************************
5 * Copyright (C) 1997-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
11 * Modification History:
13 * Date Name Description
14 * 04/01/97 aliu Creation.
15 * 08/21/98 stephen JDK 1.2 sync
16 * 12/08/98 rtg New Locale implementation and C API
17 * 03/15/99 damiba overhaul.
18 * 04/06/99 stephen changed setDefault() to realloc and copy
19 * 06/14/99 stephen Changed calls to ures_open for new params
20 * 07/21/99 stephen Modified setDefault() to propagate to C++
21 * 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs,
22 * brought canonicalization code into line with spec
23 *****************************************************************************/
26 POSIX's locale format, from putil.c: [no spaces]
28 ll [ _CC ] [ . MM ] [ @ VV]
30 l = lang, C = ctry, M = charmap, V = variant
33 #include "unicode/utypes.h"
34 #include "unicode/ustring.h"
35 #include "unicode/uloc.h"
49 #include <stdio.h> /* for sprintf */
53 /* ### Declarations **************************************************/
55 /* Locale stuff from locid.cpp */
56 U_CFUNC
void locale_set_default(const char *id
);
57 U_CFUNC
const char *locale_get_default(void);
59 locale_getKeywords(const char *localeID
,
61 char *keywords
, int32_t keywordCapacity
,
62 char *values
, int32_t valuesCapacity
, int32_t *valLen
,
66 /* ### Data tables **************************************************/
69 * Table of language codes, both 2- and 3-letter, with preference
70 * given to 2-letter codes where possible. Includes 3-letter codes
71 * that lack a 2-letter equivalent.
73 * This list must be in sorted order. This list is returned directly
74 * to the user by some API.
76 * This list must be kept in sync with LANGUAGES_3, with corresponding
79 * This table should be terminated with a NULL entry, followed by a
80 * second list, and another NULL entry. The first list is visible to
81 * user code when this array is returned by API. The second list
82 * contains codes we support, but do not expose through user API.
86 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
87 * include the revisions up to 2001/7/27 *CWB*
89 * The 3 character codes are the terminology codes like RFC 3066. This
90 * is compatible with prior ICU codes
92 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
93 * table but now at the end of the table because 3 character codes are
94 * duplicates. This avoids bad searches going from 3 to 2 character
97 * The range qaa-qtz is reserved for local use
99 /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
100 /* ISO639 table version is 20150505 */
101 /* Subsequent hand addition of selected languages */
102 static const char * const LANGUAGES
[] = {
103 "aa", "ab", "ace", "ach", "ada", "ady", "ae", "aeb",
104 "af", "afh", "agq", "ain", "ak", "akk", "akz", "ale",
105 "aln", "alt", "am", "an", "ang", "anp", "ar", "arc",
106 "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "as",
107 "asa", "ase", "ast", "av", "avk", "awa", "ay", "az",
108 "ba", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
109 "be", "bej", "bem", "bew", "bez", "bfd", "bfq", "bg",
110 "bgn", "bho", "bi", "bik", "bin", "bjn", "bkm", "bla",
111 "bm", "bn", "bo", "bpy", "bqi", "br", "bra", "brh",
112 "brx", "bs", "bss", "bua", "bug", "bum", "byn", "byv",
113 "ca", "cad", "car", "cay", "cch", "ccp", "ce", "ceb", "cgg",
114 "ch", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
115 "chr", "chy", "ckb", "co", "cop", "cps", "cr", "crh",
116 "cs", "csb", "cu", "cv", "cy",
117 "da", "dak", "dar", "dav", "de", "del", "den", "dgr",
118 "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "dv",
119 "dyo", "dyu", "dz", "dzg",
120 "ebu", "ee", "efi", "egl", "egy", "eka", "el", "elx",
121 "en", "enm", "eo", "es", "esu", "et", "eu", "ewo",
123 "fa", "fan", "fat", "ff", "fi", "fil", "fit", "fj",
124 "fo", "fon", "fr", "frc", "frm", "fro", "frp", "frr",
126 "ga", "gaa", "gag", "gan", "gay", "gba", "gbz", "gd",
127 "gez", "gil", "gl", "glk", "gmh", "gn", "goh", "gom",
128 "gon", "gor", "got", "grb", "grc", "gsw", "gu", "guc",
129 "gur", "guz", "gv", "gwi",
130 "ha", "hai", "hak", "haw", "he", "hi", "hif", "hil",
131 "hit", "hmn", "ho", "hr", "hsb", "hsn", "ht", "hu",
133 "ia", "iba", "ibb", "id", "ie", "ig", "ii", "ik",
134 "ilo", "inh", "io", "is", "it", "iu", "izh",
135 "ja", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
137 "ka", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
138 "kbl", "kcg", "kde", "kea", "ken", "kfo", "kg", "kgp",
139 "kha", "kho", "khq", "khw", "ki", "kiu", "kj", "kk",
140 "kkj", "kl", "kln", "km", "kmb", "kn", "ko", "koi",
141 "kok", "kos", "kpe", "kr", "krc", "kri", "krj", "krl",
142 "kru", "ks", "ksb", "ksf", "ksh", "ku", "kum", "kut",
144 "la", "lad", "lag", "lah", "lam", "lb", "lez", "lfn",
145 "lg", "li", "lij", "liv", "lkt", "lmo", "ln", "lo",
146 "lol", "loz", "lrc", "lt", "ltg", "lu", "lua", "lui",
147 "lun", "luo", "lus", "luy", "lv", "lzh", "lzz",
148 "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
149 "mdf", "mdh", "mdr", "men", "mer", "mfe", "mg", "mga",
150 "mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk",
151 "ml", "mn", "mnc", "mni", "moh", "mos", "mr", "mrj",
152 "ms", "mt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
153 "my", "mye", "myv", "mzn",
154 "na", "nan", "nap", "naq", "nb", "nd", "nds", "ne",
155 "new", "ng", "nia", "niu", "njo", "nl", "nmg", "nn",
156 "nnh", "no", "nog", "non", "nov", "nqo", "nr", "nso",
157 "nus", "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi",
158 "oc", "oj", "om", "or", "os", "osa", "ota",
159 "pa", "pag", "pal", "pam", "pap", "pau", "pcd", "pdc",
160 "pdt", "peo", "pfl", "phn", "pi", "pl", "pms", "pnt",
161 "pon", "prg", "pro", "ps", "pt",
163 "raj", "rap", "rar", "rgn", "rif", "rm", "rn", "ro",
164 "rof", "rom", "rtm", "ru", "rue", "rug", "rup",
166 "sa", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
167 "sba", "sbp", "sc", "scn", "sco", "sd", "sdc", "sdh",
168 "se", "see", "seh", "sei", "sel", "ses", "sg", "sga",
169 "sgs", "shi", "shn", "shu", "si", "sid", "sk",
170 "sl", "sli", "sly", "sm", "sma", "smj", "smn", "sms",
171 "sn", "snk", "so", "sog", "sq", "sr", "srn", "srr",
172 "ss", "ssy", "st", "stq", "su", "suk", "sus", "sux",
173 "sv", "sw", "swb", "swc", "syc", "syr", "szl",
174 "ta", "tcy", "te", "tem", "teo", "ter", "tet", "tg",
175 "th", "ti", "tig", "tiv", "tk", "tkl", "tkr", "tl",
176 "tlh", "tli", "tly", "tmh", "tn", "to", "tog", "tpi",
177 "tr", "tru", "trv", "ts", "tsd", "tsi", "tt", "ttt",
178 "tum", "tvl", "tw", "twq", "ty", "tyv", "tzm",
179 "udm", "ug", "uga", "uk", "umb", "und", "ur", "uz",
180 "vai", "ve", "vec", "vep", "vi", "vls", "vmf", "vo",
182 "wa", "wae", "wal", "war", "was", "wbp", "wo", "wuu",
183 "xal", "xh", "xmf", "xog",
184 "yao", "yap", "yav", "ybb", "yi", "yo", "yrl", "yue",
185 "za", "zap", "zbl", "zea", "zen", "zgh", "zh", "zu",
188 "in", "iw", "ji", "jw", "sh", /* obsolete language codes */
192 static const char* const DEPRECATED_LANGUAGES
[]={
193 "in", "iw", "ji", "jw", NULL
, NULL
195 static const char* const REPLACEMENT_LANGUAGES
[]={
196 "id", "he", "yi", "jv", NULL
, NULL
200 * Table of 3-letter language codes.
202 * This is a lookup table used to convert 3-letter language codes to
203 * their 2-letter equivalent, where possible. It must be kept in sync
204 * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the
205 * same language as LANGUAGES_3[i]. The commented-out lines are
206 * copied from LANGUAGES to make eyeballing this baby easier.
208 * Where a 3-letter language code has no 2-letter equivalent, the
209 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
211 * This table should be terminated with a NULL entry, followed by a
212 * second list, and another NULL entry. The two lists correspond to
213 * the two lists in LANGUAGES.
215 /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
216 /* ISO639 table version is 20150505 */
217 /* Subsequent hand addition of selected languages */
218 static const char * const LANGUAGES_3
[] = {
219 "aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb",
220 "afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale",
221 "aln", "alt", "amh", "arg", "ang", "anp", "ara", "arc",
222 "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "asm",
223 "asa", "ase", "ast", "ava", "avk", "awa", "aym", "aze",
224 "bak", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
225 "bel", "bej", "bem", "bew", "bez", "bfd", "bfq", "bul",
226 "bgn", "bho", "bis", "bik", "bin", "bjn", "bkm", "bla",
227 "bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh",
228 "brx", "bos", "bss", "bua", "bug", "bum", "byn", "byv",
229 "cat", "cad", "car", "cay", "cch", "ccp", "che", "ceb", "cgg",
230 "cha", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
231 "chr", "chy", "ckb", "cos", "cop", "cps", "cre", "crh",
232 "ces", "csb", "chu", "chv", "cym",
233 "dan", "dak", "dar", "dav", "deu", "del", "den", "dgr",
234 "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "div",
235 "dyo", "dyu", "dzo", "dzg",
236 "ebu", "ewe", "efi", "egl", "egy", "eka", "ell", "elx",
237 "eng", "enm", "epo", "spa", "esu", "est", "eus", "ewo",
239 "fas", "fan", "fat", "ful", "fin", "fil", "fit", "fij",
240 "fao", "fon", "fra", "frc", "frm", "fro", "frp", "frr",
242 "gle", "gaa", "gag", "gan", "gay", "gba", "gbz", "gla",
243 "gez", "gil", "glg", "glk", "gmh", "grn", "goh", "gom",
244 "gon", "gor", "got", "grb", "grc", "gsw", "guj", "guc",
245 "gur", "guz", "glv", "gwi",
246 "hau", "hai", "hak", "haw", "heb", "hin", "hif", "hil",
247 "hit", "hmn", "hmo", "hrv", "hsb", "hsn", "hat", "hun",
249 "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ipk",
250 "ilo", "inh", "ido", "isl", "ita", "iku", "izh",
251 "jpn", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
253 "kat", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
254 "kbl", "kcg", "kde", "kea", "ken", "kfo", "kon", "kgp",
255 "kha", "kho", "khq", "khw", "kik", "kiu", "kua", "kaz",
256 "kkj", "kal", "kln", "khm", "kmb", "kan", "kor", "koi",
257 "kok", "kos", "kpe", "kau", "krc", "kri", "krj", "krl",
258 "kru", "kas", "ksb", "ksf", "ksh", "kur", "kum", "kut",
260 "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lfn",
261 "lug", "lim", "lij", "liv", "lkt", "lmo", "lin", "lao",
262 "lol", "loz", "lrc", "lit", "ltg", "lub", "lua", "lui",
263 "lun", "luo", "lus", "luy", "lav", "lzh", "lzz",
264 "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
265 "mdf", "mdh", "mdr", "men", "mer", "mfe", "mlg", "mga",
266 "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
267 "mal", "mon", "mnc", "mni", "moh", "mos", "mar", "mrj",
268 "msa", "mlt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
269 "mya", "mye", "myv", "mzn",
270 "nau", "nan", "nap", "naq", "nob", "nde", "nds", "nep",
271 "new", "ndo", "nia", "niu", "njo", "nld", "nmg", "nno",
272 "nnh", "nor", "nog", "non", "nov", "nqo", "nbl", "nso",
273 "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi",
274 "oci", "oji", "orm", "ori", "oss", "osa", "ota",
275 "pan", "pag", "pal", "pam", "pap", "pau", "pcd", "pdc",
276 "pdt", "peo", "pfl", "phn", "pli", "pol", "pms", "pnt",
277 "pon", "prg", "pro", "pus", "por",
279 "raj", "rap", "rar", "rgn", "rif", "roh", "run", "ron",
280 "rof", "rom", "rtm", "rus", "rue", "rug", "rup",
282 "san", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
283 "sba", "sbp", "srd", "scn", "sco", "snd", "sdc", "sdh",
284 "sme", "see", "seh", "sei", "sel", "ses", "sag", "sga",
285 "sgs", "shi", "shn", "shu", "sin", "sid", "slk",
286 "slv", "sli", "sly", "smo", "sma", "smj", "smn", "sms",
287 "sna", "snk", "som", "sog", "sqi", "srp", "srn", "srr",
288 "ssw", "ssy", "sot", "stq", "sun", "suk", "sus", "sux",
289 "swe", "swa", "swb", "swc", "syc", "syr", "szl",
290 "tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk",
291 "tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr", "tgl",
292 "tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tpi",
293 "tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt",
294 "tum", "tvl", "twi", "twq", "tah", "tyv", "tzm",
295 "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
296 "vai", "ven", "vec", "vep", "vie", "vls", "vmf", "vol",
298 "wln", "wae", "wal", "war", "was", "wbp", "wol", "wuu",
299 "xal", "xho", "xmf", "xog",
300 "yao", "yap", "yav", "ybb", "yid", "yor", "yrl", "yue",
301 "zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul",
304 /* "in", "iw", "ji", "jw", "sh", */
305 "ind", "heb", "yid", "jaw", "srp",
310 * Table of 2-letter country codes.
312 * This list must be in sorted order. This list is returned directly
313 * to the user by some API.
315 * This list must be kept in sync with COUNTRIES_3, with corresponding
318 * This table should be terminated with a NULL entry, followed by a
319 * second list, and another NULL entry. The first list is visible to
320 * user code when this array is returned by API. The second list
321 * contains codes we support, but do not expose through user API.
325 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
326 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
327 * new codes keeping the old ones for compatibility updated to include
328 * 1999/12/03 revisions *CWB*
330 * RO(ROM) is now RO(ROU) according to
331 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
333 static const char * const COUNTRIES
[] = {
334 "AC", "AD", "AE", "AF", "AG", "AI", "AL", "AM",
335 "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",
336 "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",
337 "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV",
338 "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
339 "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CP", "CR",
340 "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DG", "DJ", "DK",
341 "DM", "DO", "DZ", "EA", "EC", "EE", "EG", "EH", "ER",
342 "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
343 "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
344 "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
345 "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
346 "IC", "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
347 "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
348 "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
349 "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
350 "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK",
351 "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",
352 "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
353 "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",
354 "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",
355 "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
356 "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA",
357 "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",
358 "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV",
359 "SX", "SY", "SZ", "TA", "TC", "TD", "TF", "TG", "TH", "TJ",
360 "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
361 "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
362 "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
363 "WS", "XK", "YE", "YT", "ZA", "ZM", "ZW",
365 "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR", /* obsolete country codes */
369 static const char* const DEPRECATED_COUNTRIES
[] = {
370 "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL
, NULL
/* deprecated country list */
372 static const char* const REPLACEMENT_COUNTRIES
[] = {
373 /* "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
374 "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL
, NULL
/* replacement country codes */
378 * Table of 3-letter country codes.
380 * This is a lookup table used to convert 3-letter country codes to
381 * their 2-letter equivalent. It must be kept in sync with COUNTRIES.
382 * For all valid i, COUNTRIES[i] must refer to the same country as
383 * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES
384 * to make eyeballing this baby easier.
386 * This table should be terminated with a NULL entry, followed by a
387 * second list, and another NULL entry. The two lists correspond to
388 * the two lists in COUNTRIES.
390 static const char * const COUNTRIES_3
[] = {
391 /* "AC", "AD", "AE", "AF", "AG", "AI", "AL", "AM", */
392 "ASC", "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
393 /* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */
394 "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
395 /* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */
396 "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
397 /* "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", */
398 "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",
399 /* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */
400 "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
401 /* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CP", "CR", */
402 "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CPT", "CRI",
403 /* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DG", "DJ", "DK", */
404 "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DGA", "DJI", "DNK",
405 /* "DM", "DO", "DZ", "EA", "EC", "EE", "EG", "EH", "ER", */
406 "DMA", "DOM", "DZA", "EA ", "ECU", "EST", "EGY", "ESH", "ERI", /* no valid 3-letter code for EA */
407 /* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
408 "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
409 /* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
410 "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
411 /* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */
412 "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
413 /* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
414 "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
415 /* "IC", "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
416 "IC ", "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL", /* no valid 3-letter code for IC */
417 /* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
418 "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
419 /* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
420 "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
421 /* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */
422 "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
423 /* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */
424 "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
425 /* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */
426 "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
427 /* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */
428 "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
429 /* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */
430 "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
431 /* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */
432 "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
433 /* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */
434 "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
435 /* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */
436 "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
437 /* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */
438 "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
439 /* "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", */
440 "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",
441 /* "SX", "SY", "SZ", "TA", "TC", "TD", "TF", "TG", "TH", "TJ", */
442 "SXM", "SYR", "SWZ", "TAA", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
443 /* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */
444 "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
445 /* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */
446 "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
447 /* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
448 "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
449 /* "WS", "XK", "YE", "YT", "ZA", "ZM", "ZW", */
450 "WSM", "XKK", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
452 /* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */
453 "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
457 typedef struct CanonicalizationMap
{
458 const char *id
; /* input ID */
459 const char *canonicalID
; /* canonicalized output ID */
460 const char *keyword
; /* keyword, or NULL if none */
461 const char *value
; /* keyword value, or NULL if kw==NULL */
462 } CanonicalizationMap
;
465 * A map to canonicalize locale IDs. This handles a variety of
466 * different semantic kinds of transformations.
468 static const CanonicalizationMap CANONICALIZE_MAP
[] = {
469 { "", "en_US_POSIX", NULL
, NULL
}, /* .NET name */
470 { "c", "en_US_POSIX", NULL
, NULL
}, /* POSIX name */
471 { "posix", "en_US_POSIX", NULL
, NULL
}, /* POSIX name (alias of C) */
472 { "art_LOJBAN", "jbo", NULL
, NULL
}, /* registered name */
473 { "az_AZ_CYRL", "az_Cyrl_AZ", NULL
, NULL
}, /* .NET name */
474 { "az_AZ_LATN", "az_Latn_AZ", NULL
, NULL
}, /* .NET name */
475 { "ca_ES_PREEURO", "ca_ES", "currency", "ESP" },
476 { "de__PHONEBOOK", "de", "collation", "phonebook" }, /* Old ICU name */
477 { "de_AT_PREEURO", "de_AT", "currency", "ATS" },
478 { "de_DE_PREEURO", "de_DE", "currency", "DEM" },
479 { "de_LU_PREEURO", "de_LU", "currency", "LUF" },
480 { "el_GR_PREEURO", "el_GR", "currency", "GRD" },
481 { "en_BE_PREEURO", "en_BE", "currency", "BEF" },
482 { "en_IE_PREEURO", "en_IE", "currency", "IEP" },
483 { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */
484 { "es_ES_PREEURO", "es_ES", "currency", "ESP" },
485 { "eu_ES_PREEURO", "eu_ES", "currency", "ESP" },
486 { "fi_FI_PREEURO", "fi_FI", "currency", "FIM" },
487 { "fr_BE_PREEURO", "fr_BE", "currency", "BEF" },
488 { "fr_FR_PREEURO", "fr_FR", "currency", "FRF" },
489 { "fr_LU_PREEURO", "fr_LU", "currency", "LUF" },
490 { "ga_IE_PREEURO", "ga_IE", "currency", "IEP" },
491 { "gl_ES_PREEURO", "gl_ES", "currency", "ESP" },
492 { "hi__DIRECT", "hi", "collation", "direct" }, /* Old ICU name */
493 { "it_IT_PREEURO", "it_IT", "currency", "ITL" },
494 { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */
495 { "nb_NO_NY", "nn_NO", NULL
, NULL
}, /* "markus said this was ok" :-) */
496 { "nl_BE_PREEURO", "nl_BE", "currency", "BEF" },
497 { "nl_NL_PREEURO", "nl_NL", "currency", "NLG" },
498 { "pt_PT_PREEURO", "pt_PT", "currency", "PTE" },
499 { "sr_SP_CYRL", "sr_Cyrl_RS", NULL
, NULL
}, /* .NET name */
500 { "sr_SP_LATN", "sr_Latn_RS", NULL
, NULL
}, /* .NET name */
501 { "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL
, NULL
}, /* Linux name */
502 { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */
503 { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL
, NULL
}, /* Linux name */
504 { "uz_UZ_CYRL", "uz_Cyrl_UZ", NULL
, NULL
}, /* .NET name */
505 { "uz_UZ_LATN", "uz_Latn_UZ", NULL
, NULL
}, /* .NET name */
506 { "zh_CHS", "zh_Hans", NULL
, NULL
}, /* .NET name */
507 { "zh_CHT", "zh_Hant", NULL
, NULL
}, /* .NET name */
508 { "zh_GAN", "gan", NULL
, NULL
}, /* registered name */
509 { "zh_GUOYU", "zh", NULL
, NULL
}, /* registered name */
510 { "zh_HAKKA", "hak", NULL
, NULL
}, /* registered name */
511 { "zh_MIN_NAN", "nan", NULL
, NULL
}, /* registered name */
512 { "zh_WUU", "wuu", NULL
, NULL
}, /* registered name */
513 { "zh_XIANG", "hsn", NULL
, NULL
}, /* registered name */
514 { "zh_YUE", "yue", NULL
, NULL
}, /* registered name */
517 typedef struct VariantMap
{
518 const char *variant
; /* input ID */
519 const char *keyword
; /* keyword, or NULL if none */
520 const char *value
; /* keyword value, or NULL if kw==NULL */
523 static const VariantMap VARIANT_MAP
[] = {
524 { "EURO", "currency", "EUR" },
525 { "PINYIN", "collation", "pinyin" }, /* Solaris variant */
526 { "STROKE", "collation", "stroke" } /* Solaris variant */
529 /* ### BCP47 Conversion *******************************************/
530 /* Test if the locale id has BCP47 u extension and does not have '@' */
531 #define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
532 /* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
533 #define _ConvertBCP47(finalID, id, buffer, length,err) \
534 if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || \
535 U_FAILURE(*err) || *err == U_STRING_NOT_TERMINATED_WARNING) { \
537 if (*err == U_STRING_NOT_TERMINATED_WARNING) { *err = U_BUFFER_OVERFLOW_ERROR; } \
541 /* Gets the size of the shortest subtag in the given localeID. */
542 static int32_t getShortestSubtagLength(const char *localeID
) {
543 int32_t localeIDLength
= static_cast<int32_t>(uprv_strlen(localeID
));
544 int32_t length
= localeIDLength
;
545 int32_t tmpLength
= 0;
549 for (i
= 0; i
< localeIDLength
; i
++) {
550 if (localeID
[i
] != '_' && localeID
[i
] != '-') {
557 if (tmpLength
!= 0 && tmpLength
< length
) {
567 /* ### Keywords **************************************************/
568 #define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
569 #define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )
570 /* Punctuation/symbols allowed in legacy key values */
571 #define UPRV_OK_VALUE_PUNCTUATION(c) ((c) == '_' || (c) == '-' || (c) == '+' || (c) == '/')
573 #define ULOC_KEYWORD_BUFFER_LEN 25
574 #define ULOC_MAX_NO_KEYWORDS 25
576 U_CAPI
const char * U_EXPORT2
577 locale_getKeywordsStart(const char *localeID
) {
578 const char *result
= NULL
;
579 if((result
= uprv_strchr(localeID
, '@')) != NULL
) {
582 #if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
584 /* We do this because the @ sign is variant, and the @ sign used on one
585 EBCDIC machine won't be compiled the same way on other EBCDIC based
587 static const uint8_t ebcdicSigns
[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
588 const uint8_t *charToFind
= ebcdicSigns
;
590 if((result
= uprv_strchr(localeID
, *charToFind
)) != NULL
) {
601 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
602 * @param keywordName incoming name to be canonicalized
603 * @param status return status (keyword too long)
604 * @return length of the keyword name
606 static int32_t locale_canonKeywordName(char *buf
, const char *keywordName
, UErrorCode
*status
)
608 int32_t keywordNameLen
= 0;
610 for (; *keywordName
!= 0; keywordName
++) {
611 if (!UPRV_ISALPHANUM(*keywordName
)) {
612 *status
= U_ILLEGAL_ARGUMENT_ERROR
; /* malformed keyword name */
615 if (keywordNameLen
< ULOC_KEYWORD_BUFFER_LEN
- 1) {
616 buf
[keywordNameLen
++] = uprv_tolower(*keywordName
);
618 /* keyword name too long for internal buffer */
619 *status
= U_INTERNAL_PROGRAM_ERROR
;
623 if (keywordNameLen
== 0) {
624 *status
= U_ILLEGAL_ARGUMENT_ERROR
; /* empty keyword name */
627 buf
[keywordNameLen
] = 0; /* terminate */
629 return keywordNameLen
;
633 char keyword
[ULOC_KEYWORD_BUFFER_LEN
];
635 const char *valueStart
;
639 static int32_t U_CALLCONV
640 compareKeywordStructs(const void * /*context*/, const void *left
, const void *right
) {
641 const char* leftString
= ((const KeywordStruct
*)left
)->keyword
;
642 const char* rightString
= ((const KeywordStruct
*)right
)->keyword
;
643 return uprv_strcmp(leftString
, rightString
);
647 * Both addKeyword and addValue must already be in canonical form.
648 * Either both addKeyword and addValue are NULL, or neither is NULL.
649 * If they are not NULL they must be zero terminated.
650 * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword.
653 _getKeywords(const char *localeID
,
655 char *keywords
, int32_t keywordCapacity
,
656 char *values
, int32_t valuesCapacity
, int32_t *valLen
,
658 const char* addKeyword
,
659 const char* addValue
,
662 KeywordStruct keywordList
[ULOC_MAX_NO_KEYWORDS
];
664 int32_t maxKeywords
= ULOC_MAX_NO_KEYWORDS
;
665 int32_t numKeywords
= 0;
666 const char* pos
= localeID
;
667 const char* equalSign
= NULL
;
668 const char* semicolon
= NULL
;
670 int32_t keywordsLen
= 0;
671 int32_t valuesLen
= 0;
673 if(prev
== '@') { /* start of keyword definition */
674 /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
676 UBool duplicate
= FALSE
;
677 /* skip leading spaces */
681 if (!*pos
) { /* handle trailing "; " */
684 if(numKeywords
== maxKeywords
) {
685 *status
= U_INTERNAL_PROGRAM_ERROR
;
688 equalSign
= uprv_strchr(pos
, '=');
689 semicolon
= uprv_strchr(pos
, ';');
690 /* lack of '=' [foo@currency] is illegal */
691 /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
692 if(!equalSign
|| (semicolon
&& semicolon
<equalSign
)) {
693 *status
= U_INVALID_FORMAT_ERROR
;
696 /* need to normalize both keyword and keyword name */
697 if(equalSign
- pos
>= ULOC_KEYWORD_BUFFER_LEN
) {
698 /* keyword name too long for internal buffer */
699 *status
= U_INTERNAL_PROGRAM_ERROR
;
702 for(i
= 0, n
= 0; i
< equalSign
- pos
; ++i
) {
704 keywordList
[numKeywords
].keyword
[n
++] = uprv_tolower(pos
[i
]);
708 /* zero-length keyword is an error. */
710 *status
= U_INVALID_FORMAT_ERROR
;
714 keywordList
[numKeywords
].keyword
[n
] = 0;
715 keywordList
[numKeywords
].keywordLen
= n
;
716 /* now grab the value part. First we skip the '=' */
718 /* then we leading spaces */
719 while(*equalSign
== ' ') {
723 /* Premature end or zero-length value */
724 if (!*equalSign
|| equalSign
== semicolon
) {
725 *status
= U_INVALID_FORMAT_ERROR
;
729 keywordList
[numKeywords
].valueStart
= equalSign
;
734 while(*(pos
- i
- 1) == ' ') {
737 keywordList
[numKeywords
].valueLen
= (int32_t)(pos
- equalSign
- i
);
740 i
= (int32_t)uprv_strlen(equalSign
);
741 while(i
&& equalSign
[i
-1] == ' ') {
744 keywordList
[numKeywords
].valueLen
= i
;
746 /* If this is a duplicate keyword, then ignore it */
747 for (j
=0; j
<numKeywords
; ++j
) {
748 if (uprv_strcmp(keywordList
[j
].keyword
, keywordList
[numKeywords
].keyword
) == 0) {
758 /* Handle addKeyword/addValue. */
759 if (addKeyword
!= NULL
) {
760 UBool duplicate
= FALSE
;
761 U_ASSERT(addValue
!= NULL
);
762 /* Search for duplicate; if found, do nothing. Explicit keyword
763 overrides addKeyword. */
764 for (j
=0; j
<numKeywords
; ++j
) {
765 if (uprv_strcmp(keywordList
[j
].keyword
, addKeyword
) == 0) {
771 if (numKeywords
== maxKeywords
) {
772 *status
= U_INTERNAL_PROGRAM_ERROR
;
775 uprv_strcpy(keywordList
[numKeywords
].keyword
, addKeyword
);
776 keywordList
[numKeywords
].keywordLen
= (int32_t)uprv_strlen(addKeyword
);
777 keywordList
[numKeywords
].valueStart
= addValue
;
778 keywordList
[numKeywords
].valueLen
= (int32_t)uprv_strlen(addValue
);
782 U_ASSERT(addValue
== NULL
);
785 /* now we have a list of keywords */
786 /* we need to sort it */
787 uprv_sortArray(keywordList
, numKeywords
, sizeof(KeywordStruct
), compareKeywordStructs
, NULL
, FALSE
, status
);
789 /* Now construct the keyword part */
790 for(i
= 0; i
< numKeywords
; i
++) {
791 if(keywordsLen
+ keywordList
[i
].keywordLen
+ 1< keywordCapacity
) {
792 uprv_strcpy(keywords
+keywordsLen
, keywordList
[i
].keyword
);
794 keywords
[keywordsLen
+ keywordList
[i
].keywordLen
] = '=';
796 keywords
[keywordsLen
+ keywordList
[i
].keywordLen
] = 0;
799 keywordsLen
+= keywordList
[i
].keywordLen
+ 1;
801 if(keywordsLen
+ keywordList
[i
].valueLen
< keywordCapacity
) {
802 uprv_strncpy(keywords
+keywordsLen
, keywordList
[i
].valueStart
, keywordList
[i
].valueLen
);
804 keywordsLen
+= keywordList
[i
].valueLen
;
806 if(i
< numKeywords
- 1) {
807 if(keywordsLen
< keywordCapacity
) {
808 keywords
[keywordsLen
] = ';';
814 if(valuesLen
+ keywordList
[i
].valueLen
+ 1< valuesCapacity
) {
815 uprv_strcpy(values
+valuesLen
, keywordList
[i
].valueStart
);
816 values
[valuesLen
+ keywordList
[i
].valueLen
] = 0;
818 valuesLen
+= keywordList
[i
].valueLen
+ 1;
822 values
[valuesLen
] = 0;
827 return u_terminateChars(keywords
, keywordCapacity
, keywordsLen
, status
);
834 locale_getKeywords(const char *localeID
,
836 char *keywords
, int32_t keywordCapacity
,
837 char *values
, int32_t valuesCapacity
, int32_t *valLen
,
839 UErrorCode
*status
) {
840 return _getKeywords(localeID
, prev
, keywords
, keywordCapacity
,
841 values
, valuesCapacity
, valLen
, valuesToo
,
845 U_CAPI
int32_t U_EXPORT2
846 uloc_getKeywordValue(const char* localeID
,
847 const char* keywordName
,
848 char* buffer
, int32_t bufferCapacity
,
851 const char* startSearchHere
= NULL
;
852 const char* nextSeparator
= NULL
;
853 char keywordNameBuffer
[ULOC_KEYWORD_BUFFER_LEN
];
854 char localeKeywordNameBuffer
[ULOC_KEYWORD_BUFFER_LEN
];
857 if(status
&& U_SUCCESS(*status
) && localeID
) {
858 char tempBuffer
[ULOC_FULLNAME_CAPACITY
];
859 const char* tmpLocaleID
;
861 if (keywordName
== NULL
|| keywordName
[0] == 0) {
862 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
866 locale_canonKeywordName(keywordNameBuffer
, keywordName
, status
);
867 if(U_FAILURE(*status
)) {
871 if (_hasBCP47Extension(localeID
)) {
872 _ConvertBCP47(tmpLocaleID
, localeID
, tempBuffer
, sizeof(tempBuffer
), status
);
874 tmpLocaleID
=localeID
;
877 startSearchHere
= locale_getKeywordsStart(tmpLocaleID
);
878 if(startSearchHere
== NULL
) {
879 /* no keywords, return at once */
883 /* find the first keyword */
884 while(startSearchHere
) {
885 const char* keyValueTail
;
888 startSearchHere
++; /* skip @ or ; */
889 nextSeparator
= uprv_strchr(startSearchHere
, '=');
891 *status
= U_ILLEGAL_ARGUMENT_ERROR
; /* key must have =value */
894 /* strip leading & trailing spaces (TC decided to tolerate these) */
895 while(*startSearchHere
== ' ') {
898 keyValueTail
= nextSeparator
;
899 while (keyValueTail
> startSearchHere
&& *(keyValueTail
-1) == ' ') {
902 /* now keyValueTail points to first char after the keyName */
903 /* copy & normalize keyName from locale */
904 if (startSearchHere
== keyValueTail
) {
905 *status
= U_ILLEGAL_ARGUMENT_ERROR
; /* empty keyword name in passed-in locale */
909 while (startSearchHere
< keyValueTail
) {
910 if (!UPRV_ISALPHANUM(*startSearchHere
)) {
911 *status
= U_ILLEGAL_ARGUMENT_ERROR
; /* malformed keyword name */
914 if (keyValueLen
< ULOC_KEYWORD_BUFFER_LEN
- 1) {
915 localeKeywordNameBuffer
[keyValueLen
++] = uprv_tolower(*startSearchHere
++);
917 /* keyword name too long for internal buffer */
918 *status
= U_INTERNAL_PROGRAM_ERROR
;
922 localeKeywordNameBuffer
[keyValueLen
] = 0; /* terminate */
924 startSearchHere
= uprv_strchr(nextSeparator
, ';');
926 if(uprv_strcmp(keywordNameBuffer
, localeKeywordNameBuffer
) == 0) {
927 /* current entry matches the keyword. */
928 nextSeparator
++; /* skip '=' */
929 /* First strip leading & trailing spaces (TC decided to tolerate these) */
930 while(*nextSeparator
== ' ') {
933 keyValueTail
= (startSearchHere
)? startSearchHere
: nextSeparator
+ uprv_strlen(nextSeparator
);
934 while(keyValueTail
> nextSeparator
&& *(keyValueTail
-1) == ' ') {
937 /* Now copy the value, but check well-formedness */
938 if (nextSeparator
== keyValueTail
) {
939 *status
= U_ILLEGAL_ARGUMENT_ERROR
; /* empty key value name in passed-in locale */
943 while (nextSeparator
< keyValueTail
) {
944 if (!UPRV_ISALPHANUM(*nextSeparator
) && !UPRV_OK_VALUE_PUNCTUATION(*nextSeparator
)) {
945 *status
= U_ILLEGAL_ARGUMENT_ERROR
; /* malformed key value */
948 if (keyValueLen
< bufferCapacity
) {
949 /* Should we lowercase value to return here? Tests expect as-is. */
950 buffer
[keyValueLen
++] = *nextSeparator
++;
951 } else { /* keep advancing so we return correct length in case of overflow */
956 result
= u_terminateChars(buffer
, bufferCapacity
, keyValueLen
, status
);
964 U_CAPI
int32_t U_EXPORT2
965 uloc_setKeywordValue(const char* keywordName
,
966 const char* keywordValue
,
967 char* buffer
, int32_t bufferCapacity
,
970 /* TODO: sorting. removal. */
971 int32_t keywordNameLen
;
972 int32_t keywordValueLen
;
975 char keywordNameBuffer
[ULOC_KEYWORD_BUFFER_LEN
];
976 char keywordValueBuffer
[ULOC_KEYWORDS_CAPACITY
+1];
977 char localeKeywordNameBuffer
[ULOC_KEYWORD_BUFFER_LEN
];
979 char* nextSeparator
= NULL
;
980 char* nextEqualsign
= NULL
;
981 char* startSearchHere
= NULL
;
982 char* keywordStart
= NULL
;
983 CharString updatedKeysAndValues
;
984 int32_t updatedKeysAndValuesLen
;
985 UBool handledInputKeyAndValue
= FALSE
;
986 char keyValuePrefix
= '@';
988 if(U_FAILURE(*status
)) {
991 if (keywordName
== NULL
|| keywordName
[0] == 0 || bufferCapacity
<= 1) {
992 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
995 bufLen
= (int32_t)uprv_strlen(buffer
);
996 if(bufferCapacity
<bufLen
) {
997 /* The capacity is less than the length?! Is this NULL terminated? */
998 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
1001 keywordNameLen
= locale_canonKeywordName(keywordNameBuffer
, keywordName
, status
);
1002 if(U_FAILURE(*status
)) {
1006 keywordValueLen
= 0;
1008 while (*keywordValue
!= 0) {
1009 if (!UPRV_ISALPHANUM(*keywordValue
) && !UPRV_OK_VALUE_PUNCTUATION(*keywordValue
)) {
1010 *status
= U_ILLEGAL_ARGUMENT_ERROR
; /* malformed key value */
1013 if (keywordValueLen
< ULOC_KEYWORDS_CAPACITY
) {
1014 /* Should we force lowercase in value to set? */
1015 keywordValueBuffer
[keywordValueLen
++] = *keywordValue
++;
1017 /* keywordValue too long for internal buffer */
1018 *status
= U_INTERNAL_PROGRAM_ERROR
;
1023 keywordValueBuffer
[keywordValueLen
] = 0; /* terminate */
1025 startSearchHere
= (char*)locale_getKeywordsStart(buffer
);
1026 if(startSearchHere
== NULL
|| (startSearchHere
[1]==0)) {
1027 if(keywordValueLen
== 0) { /* no keywords = nothing to remove */
1031 needLen
= bufLen
+1+keywordNameLen
+1+keywordValueLen
;
1032 if(startSearchHere
) { /* had a single @ */
1033 needLen
--; /* already had the @ */
1034 /* startSearchHere points at the @ */
1036 startSearchHere
=buffer
+bufLen
;
1038 if(needLen
>= bufferCapacity
) {
1039 *status
= U_BUFFER_OVERFLOW_ERROR
;
1040 return needLen
; /* no change */
1042 *startSearchHere
++ = '@';
1043 uprv_strcpy(startSearchHere
, keywordNameBuffer
);
1044 startSearchHere
+= keywordNameLen
;
1045 *startSearchHere
++ = '=';
1046 uprv_strcpy(startSearchHere
, keywordValueBuffer
);
1048 } /* end shortcut - no @ */
1050 keywordStart
= startSearchHere
;
1051 /* search for keyword */
1052 while(keywordStart
) {
1053 const char* keyValueTail
;
1054 int32_t keyValueLen
;
1056 keywordStart
++; /* skip @ or ; */
1057 nextEqualsign
= uprv_strchr(keywordStart
, '=');
1058 if (!nextEqualsign
) {
1059 *status
= U_ILLEGAL_ARGUMENT_ERROR
; /* key must have =value */
1062 /* strip leading & trailing spaces (TC decided to tolerate these) */
1063 while(*keywordStart
== ' ') {
1066 keyValueTail
= nextEqualsign
;
1067 while (keyValueTail
> keywordStart
&& *(keyValueTail
-1) == ' ') {
1070 /* now keyValueTail points to first char after the keyName */
1071 /* copy & normalize keyName from locale */
1072 if (keywordStart
== keyValueTail
) {
1073 *status
= U_ILLEGAL_ARGUMENT_ERROR
; /* empty keyword name in passed-in locale */
1077 while (keywordStart
< keyValueTail
) {
1078 if (!UPRV_ISALPHANUM(*keywordStart
)) {
1079 *status
= U_ILLEGAL_ARGUMENT_ERROR
; /* malformed keyword name */
1082 if (keyValueLen
< ULOC_KEYWORD_BUFFER_LEN
- 1) {
1083 localeKeywordNameBuffer
[keyValueLen
++] = uprv_tolower(*keywordStart
++);
1085 /* keyword name too long for internal buffer */
1086 *status
= U_INTERNAL_PROGRAM_ERROR
;
1090 localeKeywordNameBuffer
[keyValueLen
] = 0; /* terminate */
1092 nextSeparator
= uprv_strchr(nextEqualsign
, ';');
1094 /* start processing the value part */
1095 nextEqualsign
++; /* skip '=' */
1096 /* First strip leading & trailing spaces (TC decided to tolerate these) */
1097 while(*nextEqualsign
== ' ') {
1100 keyValueTail
= (nextSeparator
)? nextSeparator
: nextEqualsign
+ uprv_strlen(nextEqualsign
);
1101 while(keyValueTail
> nextEqualsign
&& *(keyValueTail
-1) == ' ') {
1104 if (nextEqualsign
== keyValueTail
) {
1105 *status
= U_ILLEGAL_ARGUMENT_ERROR
; /* empty key value in passed-in locale */
1109 rc
= uprv_strcmp(keywordNameBuffer
, localeKeywordNameBuffer
);
1111 /* Current entry matches the input keyword. Update the entry */
1112 if(keywordValueLen
> 0) { /* updating a value */
1113 updatedKeysAndValues
.append(keyValuePrefix
, *status
);
1114 keyValuePrefix
= ';'; /* for any subsequent key-value pair */
1115 updatedKeysAndValues
.append(keywordNameBuffer
, keywordNameLen
, *status
);
1116 updatedKeysAndValues
.append('=', *status
);
1117 updatedKeysAndValues
.append(keywordValueBuffer
, keywordValueLen
, *status
);
1118 } /* else removing this entry, don't emit anything */
1119 handledInputKeyAndValue
= TRUE
;
1121 /* input keyword sorts earlier than current entry, add before current entry */
1122 if (rc
< 0 && keywordValueLen
> 0 && !handledInputKeyAndValue
) {
1123 /* insert new entry at this location */
1124 updatedKeysAndValues
.append(keyValuePrefix
, *status
);
1125 keyValuePrefix
= ';'; /* for any subsequent key-value pair */
1126 updatedKeysAndValues
.append(keywordNameBuffer
, keywordNameLen
, *status
);
1127 updatedKeysAndValues
.append('=', *status
);
1128 updatedKeysAndValues
.append(keywordValueBuffer
, keywordValueLen
, *status
);
1129 handledInputKeyAndValue
= TRUE
;
1131 /* copy the current entry */
1132 updatedKeysAndValues
.append(keyValuePrefix
, *status
);
1133 keyValuePrefix
= ';'; /* for any subsequent key-value pair */
1134 updatedKeysAndValues
.append(localeKeywordNameBuffer
, keyValueLen
, *status
);
1135 updatedKeysAndValues
.append('=', *status
);
1136 updatedKeysAndValues
.append(nextEqualsign
, keyValueTail
-nextEqualsign
, *status
);
1138 if (!nextSeparator
&& keywordValueLen
> 0 && !handledInputKeyAndValue
) {
1139 /* append new entry at the end, it sorts later than existing entries */
1140 updatedKeysAndValues
.append(keyValuePrefix
, *status
);
1141 /* skip keyValuePrefix update, no subsequent key-value pair */
1142 updatedKeysAndValues
.append(keywordNameBuffer
, keywordNameLen
, *status
);
1143 updatedKeysAndValues
.append('=', *status
);
1144 updatedKeysAndValues
.append(keywordValueBuffer
, keywordValueLen
, *status
);
1145 handledInputKeyAndValue
= TRUE
;
1147 keywordStart
= nextSeparator
;
1148 } /* end loop searching */
1150 /* Any error from updatedKeysAndValues.append above would be internal and not due to
1151 * problems with the passed-in locale. So if we did encounter problems with the
1152 * passed-in locale above, those errors took precedence and overrode any error
1153 * status from updatedKeysAndValues.append, and also caused a return of 0. If there
1154 * are errors here they are from updatedKeysAndValues.append; they do cause an
1155 * error return but the passed-in locale is unmodified and the original bufLen is
1158 if (!handledInputKeyAndValue
|| U_FAILURE(*status
)) {
1159 /* if input key/value specified removal of a keyword not present in locale, or
1160 * there was an error in CharString.append, leave original locale alone. */
1164 updatedKeysAndValuesLen
= updatedKeysAndValues
.length();
1165 /* needLen = length of the part before '@' + length of updated key-value part including '@' */
1166 needLen
= (int32_t)(startSearchHere
- buffer
) + updatedKeysAndValuesLen
;
1167 if(needLen
>= bufferCapacity
) {
1168 *status
= U_BUFFER_OVERFLOW_ERROR
;
1169 return needLen
; /* no change */
1171 if (updatedKeysAndValuesLen
> 0) {
1172 uprv_strncpy(startSearchHere
, updatedKeysAndValues
.data(), updatedKeysAndValuesLen
);
1178 /* ### ID parsing implementation **************************************************/
1180 #define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
1182 /*returns TRUE if one of the special prefixes is here (s=string)
1184 #define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1186 /* Dot terminates it because of POSIX form where dot precedes the codepage
1187 * except for variant
1189 #define _isTerminator(a) ((a==0)||(a=='.')||(a=='@'))
1191 static char* _strnchr(const char* str
, int32_t len
, char c
) {
1192 U_ASSERT(str
!= 0 && len
>= 0);
1193 while (len
-- != 0) {
1197 } else if (d
== 0) {
1206 * Lookup 'key' in the array 'list'. The array 'list' should contain
1207 * a NULL entry, followed by more entries, and a second NULL entry.
1209 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1212 static int16_t _findIndex(const char* const* list
, const char* key
)
1214 const char* const* anchor
= list
;
1217 /* Make two passes through two NULL-terminated arrays at 'list' */
1218 while (pass
++ < 2) {
1220 if (uprv_strcmp(key
, *list
) == 0) {
1221 return (int16_t)(list
- anchor
);
1225 ++list
; /* skip final NULL *CWB*/
1230 /* count the length of src while copying it to dest; return strlen(src) */
1231 static inline int32_t
1232 _copyCount(char *dest
, int32_t destCapacity
, const char *src
) {
1239 return (int32_t)(src
-anchor
);
1241 if(destCapacity
<=0) {
1242 return (int32_t)((src
-anchor
)+uprv_strlen(src
));
1251 uloc_getCurrentCountryID(const char* oldID
){
1252 int32_t offset
= _findIndex(DEPRECATED_COUNTRIES
, oldID
);
1254 return REPLACEMENT_COUNTRIES
[offset
];
1259 uloc_getCurrentLanguageID(const char* oldID
){
1260 int32_t offset
= _findIndex(DEPRECATED_LANGUAGES
, oldID
);
1262 return REPLACEMENT_LANGUAGES
[offset
];
1267 * the internal functions _getLanguage(), _getCountry(), _getVariant()
1268 * avoid duplicating code to handle the earlier locale ID pieces
1269 * in the functions for the later ones by
1270 * setting the *pEnd pointer to where they stopped parsing
1272 * TODO try to use this in Locale
1275 ulocimp_getLanguage(const char *localeID
,
1276 char *language
, int32_t languageCapacity
,
1277 const char **pEnd
) {
1280 char lang
[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
1282 /* if it starts with i- or x- then copy that prefix */
1283 if(_isIDPrefix(localeID
)) {
1284 if(i
<languageCapacity
) {
1285 language
[i
]=(char)uprv_tolower(*localeID
);
1287 if(i
<languageCapacity
) {
1294 /* copy the language as far as possible and count its length */
1295 while(!_isTerminator(*localeID
) && !_isIDSeparator(*localeID
)) {
1296 if(i
<languageCapacity
) {
1297 language
[i
]=(char)uprv_tolower(*localeID
);
1301 lang
[i
]=(char)uprv_tolower(*localeID
);
1308 /* convert 3 character code to 2 character code if possible *CWB*/
1309 offset
=_findIndex(LANGUAGES_3
, lang
);
1311 i
=_copyCount(language
, languageCapacity
, LANGUAGES
[offset
]);
1322 ulocimp_getScript(const char *localeID
,
1323 char *script
, int32_t scriptCapacity
,
1332 /* copy the second item as far as possible and count its length */
1333 while(!_isTerminator(localeID
[idLen
]) && !_isIDSeparator(localeID
[idLen
])
1334 && uprv_isASCIILetter(localeID
[idLen
])) {
1338 /* If it's exactly 4 characters long, then it's a script and not a country. */
1342 *pEnd
= localeID
+idLen
;
1344 if(idLen
> scriptCapacity
) {
1345 idLen
= scriptCapacity
;
1348 script
[0]=(char)uprv_toupper(*(localeID
++));
1350 for (i
= 1; i
< idLen
; i
++) {
1351 script
[i
]=(char)uprv_tolower(*(localeID
++));
1361 ulocimp_getCountry(const char *localeID
,
1362 char *country
, int32_t countryCapacity
,
1366 char cnty
[ULOC_COUNTRY_CAPACITY
]={ 0, 0, 0, 0 };
1369 /* copy the country as far as possible and count its length */
1370 while(!_isTerminator(localeID
[idLen
]) && !_isIDSeparator(localeID
[idLen
])) {
1371 if(idLen
<(ULOC_COUNTRY_CAPACITY
-1)) { /*CWB*/
1372 cnty
[idLen
]=(char)uprv_toupper(localeID
[idLen
]);
1377 /* the country should be either length 2 or 3 */
1378 if (idLen
== 2 || idLen
== 3) {
1379 UBool gotCountry
= FALSE
;
1380 /* convert 3 character code to 2 character code if possible *CWB*/
1382 offset
=_findIndex(COUNTRIES_3
, cnty
);
1384 idLen
=_copyCount(country
, countryCapacity
, COUNTRIES
[offset
]);
1390 for (i
= 0; i
< idLen
; i
++) {
1391 if (i
< countryCapacity
) {
1392 country
[i
]=(char)uprv_toupper(localeID
[i
]);
1409 * @param needSeparator if true, then add leading '_' if any variants
1410 * are added to 'variant'
1413 _getVariantEx(const char *localeID
,
1415 char *variant
, int32_t variantCapacity
,
1416 UBool needSeparator
) {
1419 /* get one or more variant tags and separate them with '_' */
1420 if(_isIDSeparator(prev
)) {
1421 /* get a variant string after a '-' or '_' */
1422 while(!_isTerminator(*localeID
)) {
1423 if (needSeparator
) {
1424 if (i
<variantCapacity
) {
1428 needSeparator
= FALSE
;
1430 if(i
<variantCapacity
) {
1431 variant
[i
]=(char)uprv_toupper(*localeID
);
1432 if(variant
[i
]=='-') {
1441 /* if there is no variant tag after a '-' or '_' then look for '@' */
1445 } else if((localeID
=locale_getKeywordsStart(localeID
))!=NULL
) {
1446 ++localeID
; /* point after the '@' */
1450 while(!_isTerminator(*localeID
)) {
1451 if (needSeparator
) {
1452 if (i
<variantCapacity
) {
1456 needSeparator
= FALSE
;
1458 if(i
<variantCapacity
) {
1459 variant
[i
]=(char)uprv_toupper(*localeID
);
1460 if(variant
[i
]=='-' || variant
[i
]==',') {
1473 _getVariant(const char *localeID
,
1475 char *variant
, int32_t variantCapacity
) {
1476 return _getVariantEx(localeID
, prev
, variant
, variantCapacity
, FALSE
);
1480 * Delete ALL instances of a variant from the given list of one or
1481 * more variants. Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".
1482 * @param variants the source string of one or more variants,
1483 * separated by '_'. This will be MODIFIED IN PLACE. Not zero
1484 * terminated; if it is, trailing zero will NOT be maintained.
1485 * @param variantsLen length of variants
1486 * @param toDelete variant to delete, without separators, e.g. "EURO"
1487 * or "PREEURO"; not zero terminated
1488 * @param toDeleteLen length of toDelete
1489 * @return number of characters deleted from variants
1492 _deleteVariant(char* variants
, int32_t variantsLen
,
1493 const char* toDelete
, int32_t toDeleteLen
)
1495 int32_t delta
= 0; /* number of chars deleted */
1498 if (variantsLen
< toDeleteLen
) {
1501 if (uprv_strncmp(variants
, toDelete
, toDeleteLen
) == 0 &&
1502 (variantsLen
== toDeleteLen
||
1503 (flag
=(variants
[toDeleteLen
] == '_'))))
1505 int32_t d
= toDeleteLen
+ (flag
?1:0);
1508 if (variantsLen
> 0) {
1509 uprv_memmove(variants
, variants
+d
, variantsLen
);
1512 char* p
= _strnchr(variants
, variantsLen
, '_');
1517 variantsLen
-= (int32_t)(p
- variants
);
1523 /* Keyword enumeration */
1525 typedef struct UKeywordsContext
{
1532 static void U_CALLCONV
1533 uloc_kw_closeKeywords(UEnumeration
*enumerator
) {
1534 uprv_free(((UKeywordsContext
*)enumerator
->context
)->keywords
);
1535 uprv_free(enumerator
->context
);
1536 uprv_free(enumerator
);
1539 static int32_t U_CALLCONV
1540 uloc_kw_countKeywords(UEnumeration
*en
, UErrorCode
* /*status*/) {
1541 char *kw
= ((UKeywordsContext
*)en
->context
)->keywords
;
1545 kw
+= uprv_strlen(kw
)+1;
1550 static const char * U_CALLCONV
1551 uloc_kw_nextKeyword(UEnumeration
* en
,
1552 int32_t* resultLength
,
1553 UErrorCode
* /*status*/) {
1554 const char* result
= ((UKeywordsContext
*)en
->context
)->current
;
1557 len
= (int32_t)uprv_strlen(((UKeywordsContext
*)en
->context
)->current
);
1558 ((UKeywordsContext
*)en
->context
)->current
+= len
+1;
1563 *resultLength
= len
;
1568 static void U_CALLCONV
1569 uloc_kw_resetKeywords(UEnumeration
* en
,
1570 UErrorCode
* /*status*/) {
1571 ((UKeywordsContext
*)en
->context
)->current
= ((UKeywordsContext
*)en
->context
)->keywords
;
1577 static const UEnumeration gKeywordsEnum
= {
1580 uloc_kw_closeKeywords
,
1581 uloc_kw_countKeywords
,
1583 uloc_kw_nextKeyword
,
1584 uloc_kw_resetKeywords
1587 U_CAPI UEnumeration
* U_EXPORT2
1588 uloc_openKeywordList(const char *keywordList
, int32_t keywordListSize
, UErrorCode
* status
)
1590 UKeywordsContext
*myContext
= NULL
;
1591 UEnumeration
*result
= NULL
;
1593 if(U_FAILURE(*status
)) {
1596 result
= (UEnumeration
*)uprv_malloc(sizeof(UEnumeration
));
1597 /* Null pointer test */
1598 if (result
== NULL
) {
1599 *status
= U_MEMORY_ALLOCATION_ERROR
;
1602 uprv_memcpy(result
, &gKeywordsEnum
, sizeof(UEnumeration
));
1603 myContext
= static_cast<UKeywordsContext
*>(uprv_malloc(sizeof(UKeywordsContext
)));
1604 if (myContext
== NULL
) {
1605 *status
= U_MEMORY_ALLOCATION_ERROR
;
1609 myContext
->keywords
= (char *)uprv_malloc(keywordListSize
+1);
1610 uprv_memcpy(myContext
->keywords
, keywordList
, keywordListSize
);
1611 myContext
->keywords
[keywordListSize
] = 0;
1612 myContext
->current
= myContext
->keywords
;
1613 result
->context
= myContext
;
1617 U_CAPI UEnumeration
* U_EXPORT2
1618 uloc_openKeywords(const char* localeID
,
1623 int32_t keywordsCapacity
= 256;
1624 char tempBuffer
[ULOC_FULLNAME_CAPACITY
];
1625 const char* tmpLocaleID
;
1627 if(status
==NULL
|| U_FAILURE(*status
)) {
1631 if (_hasBCP47Extension(localeID
)) {
1632 _ConvertBCP47(tmpLocaleID
, localeID
, tempBuffer
, sizeof(tempBuffer
), status
);
1634 if (localeID
==NULL
) {
1635 localeID
=uloc_getDefault();
1637 tmpLocaleID
=localeID
;
1640 /* Skip the language */
1641 ulocimp_getLanguage(tmpLocaleID
, NULL
, 0, &tmpLocaleID
);
1642 if(_isIDSeparator(*tmpLocaleID
)) {
1643 const char *scriptID
;
1644 /* Skip the script if available */
1645 ulocimp_getScript(tmpLocaleID
+1, NULL
, 0, &scriptID
);
1646 if(scriptID
!= tmpLocaleID
+1) {
1647 /* Found optional script */
1648 tmpLocaleID
= scriptID
;
1650 /* Skip the Country */
1651 if (_isIDSeparator(*tmpLocaleID
)) {
1652 ulocimp_getCountry(tmpLocaleID
+1, NULL
, 0, &tmpLocaleID
);
1653 if(_isIDSeparator(*tmpLocaleID
)) {
1654 _getVariant(tmpLocaleID
+1, *tmpLocaleID
, NULL
, 0);
1659 /* keywords are located after '@' */
1660 if((tmpLocaleID
= locale_getKeywordsStart(tmpLocaleID
)) != NULL
) {
1661 i
=locale_getKeywords(tmpLocaleID
+1, '@', keywords
, keywordsCapacity
, NULL
, 0, NULL
, FALSE
, status
);
1665 return uloc_openKeywordList(keywords
, i
, status
);
1672 /* bit-flags for 'options' parameter of _canonicalize */
1673 #define _ULOC_STRIP_KEYWORDS 0x2
1674 #define _ULOC_CANONICALIZE 0x1
1676 #define OPTION_SET(options, mask) ((options & mask) != 0)
1678 static const char i_default
[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
1679 #define I_DEFAULT_LENGTH UPRV_LENGTHOF(i_default)
1682 * Canonicalize the given localeID, to level 1 or to level 2,
1683 * depending on the options. To specify level 1, pass in options=0.
1684 * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1686 * This is the code underlying uloc_getName and uloc_canonicalize.
1689 _canonicalize(const char* localeID
,
1691 int32_t resultCapacity
,
1694 int32_t j
, len
, fieldCount
=0, scriptSize
=0, variantSize
=0, nameCapacity
;
1695 char localeBuffer
[ULOC_FULLNAME_CAPACITY
];
1696 char tempBuffer
[ULOC_FULLNAME_CAPACITY
];
1697 const char* origLocaleID
;
1698 const char* tmpLocaleID
;
1699 const char* keywordAssign
= NULL
;
1700 const char* separatorIndicator
= NULL
;
1701 const char* addKeyword
= NULL
;
1702 const char* addValue
= NULL
;
1704 char* variant
= NULL
; /* pointer into name, or NULL */
1706 if (U_FAILURE(*err
)) {
1710 if (_hasBCP47Extension(localeID
)) {
1711 _ConvertBCP47(tmpLocaleID
, localeID
, tempBuffer
, sizeof(tempBuffer
), err
);
1713 if (localeID
==NULL
) {
1714 localeID
=uloc_getDefault();
1716 tmpLocaleID
=localeID
;
1719 origLocaleID
=tmpLocaleID
;
1721 /* if we are doing a full canonicalization, then put results in
1722 localeBuffer, if necessary; otherwise send them to result. */
1723 if (/*OPTION_SET(options, _ULOC_CANONICALIZE) &&*/
1724 (result
== NULL
|| resultCapacity
< (int32_t)sizeof(localeBuffer
))) {
1725 name
= localeBuffer
;
1726 nameCapacity
= (int32_t)sizeof(localeBuffer
);
1729 nameCapacity
= resultCapacity
;
1732 /* get all pieces, one after another, and separate with '_' */
1733 len
=ulocimp_getLanguage(tmpLocaleID
, name
, nameCapacity
, &tmpLocaleID
);
1735 if(len
== I_DEFAULT_LENGTH
&& uprv_strncmp(origLocaleID
, i_default
, len
) == 0) {
1736 const char *d
= uloc_getDefault();
1738 len
= (int32_t)uprv_strlen(d
);
1741 uprv_strncpy(name
, d
, len
);
1743 } else if(_isIDSeparator(*tmpLocaleID
)) {
1744 const char *scriptID
;
1747 if(len
<nameCapacity
) {
1752 scriptSize
=ulocimp_getScript(tmpLocaleID
+1,
1753 (len
<nameCapacity
? name
+len
: NULL
), nameCapacity
-len
, &scriptID
);
1754 if(scriptSize
> 0) {
1755 /* Found optional script */
1756 tmpLocaleID
= scriptID
;
1759 if (_isIDSeparator(*tmpLocaleID
)) {
1760 /* If there is something else, then we add the _ */
1761 if(len
<nameCapacity
) {
1768 if (_isIDSeparator(*tmpLocaleID
)) {
1769 const char *cntryID
;
1770 int32_t cntrySize
= ulocimp_getCountry(tmpLocaleID
+1,
1771 (len
<nameCapacity
? name
+len
: NULL
), nameCapacity
-len
, &cntryID
);
1772 if (cntrySize
> 0) {
1773 /* Found optional country */
1774 tmpLocaleID
= cntryID
;
1777 if(_isIDSeparator(*tmpLocaleID
)) {
1778 /* If there is something else, then we add the _ if we found country before. */
1779 if (cntrySize
>= 0 && ! _isIDSeparator(*(tmpLocaleID
+1)) ) {
1781 if(len
<nameCapacity
) {
1787 variantSize
= _getVariant(tmpLocaleID
+1, *tmpLocaleID
,
1788 (len
<nameCapacity
? name
+len
: NULL
), nameCapacity
-len
);
1789 if (variantSize
> 0) {
1790 variant
= len
<nameCapacity
? name
+len
: NULL
;
1792 tmpLocaleID
+= variantSize
+ 1; /* skip '_' and variant */
1798 /* Copy POSIX-style charset specifier, if any [mr.utf8] */
1799 if (!OPTION_SET(options
, _ULOC_CANONICALIZE
) && *tmpLocaleID
== '.') {
1802 char c
= *tmpLocaleID
;
1809 if (len
<nameCapacity
) {
1819 /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
1820 After this, tmpLocaleID either points to '@' or is NULL */
1821 if ((tmpLocaleID
=locale_getKeywordsStart(tmpLocaleID
))!=NULL
) {
1822 keywordAssign
= uprv_strchr(tmpLocaleID
, '=');
1823 separatorIndicator
= uprv_strchr(tmpLocaleID
, ';');
1826 /* Copy POSIX-style variant, if any [mr@FOO] */
1827 if (!OPTION_SET(options
, _ULOC_CANONICALIZE
) &&
1828 tmpLocaleID
!= NULL
&& keywordAssign
== NULL
) {
1830 char c
= *tmpLocaleID
;
1834 if (len
<nameCapacity
) {
1842 if (OPTION_SET(options
, _ULOC_CANONICALIZE
)) {
1843 /* Handle @FOO variant if @ is present and not followed by = */
1844 if (tmpLocaleID
!=NULL
&& keywordAssign
==NULL
) {
1845 int32_t posixVariantSize
;
1846 /* Add missing '_' if needed */
1847 if (fieldCount
< 2 || (fieldCount
< 3 && scriptSize
> 0)) {
1849 if(len
<nameCapacity
) {
1854 } while(fieldCount
<2);
1856 posixVariantSize
= _getVariantEx(tmpLocaleID
+1, '@', name
+len
, nameCapacity
-len
,
1857 (UBool
)(variantSize
> 0));
1858 if (posixVariantSize
> 0) {
1859 if (variant
== NULL
) {
1862 len
+= posixVariantSize
;
1863 variantSize
+= posixVariantSize
;
1867 /* Handle generic variants first */
1869 for (j
=0; j
<UPRV_LENGTHOF(VARIANT_MAP
); j
++) {
1870 const char* variantToCompare
= VARIANT_MAP
[j
].variant
;
1871 int32_t n
= (int32_t)uprv_strlen(variantToCompare
);
1872 int32_t variantLen
= _deleteVariant(variant
, uprv_min(variantSize
, (nameCapacity
-len
)), variantToCompare
, n
);
1874 if (variantLen
> 0) {
1875 if (len
> 0 && name
[len
-1] == '_') { /* delete trailing '_' */
1878 addKeyword
= VARIANT_MAP
[j
].keyword
;
1879 addValue
= VARIANT_MAP
[j
].value
;
1883 if (len
> 0 && len
<= nameCapacity
&& name
[len
-1] == '_') { /* delete trailing '_' */
1888 /* Look up the ID in the canonicalization map */
1889 for (j
=0; j
<UPRV_LENGTHOF(CANONICALIZE_MAP
); j
++) {
1890 const char* id
= CANONICALIZE_MAP
[j
].id
;
1891 int32_t n
= (int32_t)uprv_strlen(id
);
1892 if (len
== n
&& uprv_strncmp(name
, id
, n
) == 0) {
1893 if (n
== 0 && tmpLocaleID
!= NULL
) {
1894 break; /* Don't remap "" if keywords present */
1896 len
= _copyCount(name
, nameCapacity
, CANONICALIZE_MAP
[j
].canonicalID
);
1897 if (CANONICALIZE_MAP
[j
].keyword
) {
1898 addKeyword
= CANONICALIZE_MAP
[j
].keyword
;
1899 addValue
= CANONICALIZE_MAP
[j
].value
;
1906 if (!OPTION_SET(options
, _ULOC_STRIP_KEYWORDS
)) {
1907 if (tmpLocaleID
!=NULL
&& keywordAssign
!=NULL
&&
1908 (!separatorIndicator
|| separatorIndicator
> keywordAssign
)) {
1909 if(len
<nameCapacity
) {
1914 len
+= _getKeywords(tmpLocaleID
+1, '@', (len
<nameCapacity
? name
+len
: NULL
), nameCapacity
-len
,
1915 NULL
, 0, NULL
, TRUE
, addKeyword
, addValue
, err
);
1916 } else if (addKeyword
!= NULL
) {
1917 U_ASSERT(addValue
!= NULL
&& len
< nameCapacity
);
1918 /* inelegant but works -- later make _getKeywords do this? */
1919 len
+= _copyCount(name
+len
, nameCapacity
-len
, "@");
1920 len
+= _copyCount(name
+len
, nameCapacity
-len
, addKeyword
);
1921 len
+= _copyCount(name
+len
, nameCapacity
-len
, "=");
1922 len
+= _copyCount(name
+len
, nameCapacity
-len
, addValue
);
1926 if (U_SUCCESS(*err
) && result
!= NULL
&& name
== localeBuffer
) {
1927 uprv_strncpy(result
, localeBuffer
, (len
> resultCapacity
) ? resultCapacity
: len
);
1930 return u_terminateChars(result
, resultCapacity
, len
, err
);
1933 /* ### ID parsing API **************************************************/
1935 U_CAPI
int32_t U_EXPORT2
1936 uloc_getParent(const char* localeID
,
1938 int32_t parentCapacity
,
1941 const char *lastUnderscore
;
1944 if (U_FAILURE(*err
))
1947 if (localeID
== NULL
)
1948 localeID
= uloc_getDefault();
1950 lastUnderscore
=uprv_strrchr(localeID
, '_');
1951 if(lastUnderscore
!=NULL
) {
1952 i
=(int32_t)(lastUnderscore
-localeID
);
1957 if(i
>0 && parent
!= localeID
) {
1958 uprv_memcpy(parent
, localeID
, uprv_min(i
, parentCapacity
));
1960 return u_terminateChars(parent
, parentCapacity
, i
, err
);
1963 U_CAPI
int32_t U_EXPORT2
1964 uloc_getLanguage(const char* localeID
,
1966 int32_t languageCapacity
,
1969 /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
1972 if (err
==NULL
|| U_FAILURE(*err
)) {
1976 if(localeID
==NULL
) {
1977 localeID
=uloc_getDefault();
1980 i
=ulocimp_getLanguage(localeID
, language
, languageCapacity
, NULL
);
1981 return u_terminateChars(language
, languageCapacity
, i
, err
);
1984 U_CAPI
int32_t U_EXPORT2
1985 uloc_getScript(const char* localeID
,
1987 int32_t scriptCapacity
,
1992 if(err
==NULL
|| U_FAILURE(*err
)) {
1996 if(localeID
==NULL
) {
1997 localeID
=uloc_getDefault();
2000 /* skip the language */
2001 ulocimp_getLanguage(localeID
, NULL
, 0, &localeID
);
2002 if(_isIDSeparator(*localeID
)) {
2003 i
=ulocimp_getScript(localeID
+1, script
, scriptCapacity
, NULL
);
2005 return u_terminateChars(script
, scriptCapacity
, i
, err
);
2008 U_CAPI
int32_t U_EXPORT2
2009 uloc_getCountry(const char* localeID
,
2011 int32_t countryCapacity
,
2016 if(err
==NULL
|| U_FAILURE(*err
)) {
2020 if(localeID
==NULL
) {
2021 localeID
=uloc_getDefault();
2024 /* Skip the language */
2025 ulocimp_getLanguage(localeID
, NULL
, 0, &localeID
);
2026 if(_isIDSeparator(*localeID
)) {
2027 const char *scriptID
;
2028 /* Skip the script if available */
2029 ulocimp_getScript(localeID
+1, NULL
, 0, &scriptID
);
2030 if(scriptID
!= localeID
+1) {
2031 /* Found optional script */
2032 localeID
= scriptID
;
2034 if(_isIDSeparator(*localeID
)) {
2035 i
=ulocimp_getCountry(localeID
+1, country
, countryCapacity
, NULL
);
2038 return u_terminateChars(country
, countryCapacity
, i
, err
);
2041 U_CAPI
int32_t U_EXPORT2
2042 uloc_getVariant(const char* localeID
,
2044 int32_t variantCapacity
,
2047 char tempBuffer
[ULOC_FULLNAME_CAPACITY
];
2048 const char* tmpLocaleID
;
2051 if(err
==NULL
|| U_FAILURE(*err
)) {
2055 if (_hasBCP47Extension(localeID
)) {
2056 _ConvertBCP47(tmpLocaleID
, localeID
, tempBuffer
, sizeof(tempBuffer
), err
);
2058 if (localeID
==NULL
) {
2059 localeID
=uloc_getDefault();
2061 tmpLocaleID
=localeID
;
2064 /* Skip the language */
2065 ulocimp_getLanguage(tmpLocaleID
, NULL
, 0, &tmpLocaleID
);
2066 if(_isIDSeparator(*tmpLocaleID
)) {
2067 const char *scriptID
;
2068 /* Skip the script if available */
2069 ulocimp_getScript(tmpLocaleID
+1, NULL
, 0, &scriptID
);
2070 if(scriptID
!= tmpLocaleID
+1) {
2071 /* Found optional script */
2072 tmpLocaleID
= scriptID
;
2074 /* Skip the Country */
2075 if (_isIDSeparator(*tmpLocaleID
)) {
2076 const char *cntryID
;
2077 ulocimp_getCountry(tmpLocaleID
+1, NULL
, 0, &cntryID
);
2078 if (cntryID
!= tmpLocaleID
+1) {
2079 /* Found optional country */
2080 tmpLocaleID
= cntryID
;
2082 if(_isIDSeparator(*tmpLocaleID
)) {
2083 /* If there was no country ID, skip a possible extra IDSeparator */
2084 if (tmpLocaleID
!= cntryID
&& _isIDSeparator(tmpLocaleID
[1])) {
2087 i
=_getVariant(tmpLocaleID
+1, *tmpLocaleID
, variant
, variantCapacity
);
2092 /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
2093 /* if we do not have a variant tag yet then try a POSIX variant after '@' */
2095 if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
2096 i=_getVariant(localeID+1, '@', variant, variantCapacity);
2099 return u_terminateChars(variant
, variantCapacity
, i
, err
);
2102 U_CAPI
int32_t U_EXPORT2
2103 uloc_getName(const char* localeID
,
2105 int32_t nameCapacity
,
2108 return _canonicalize(localeID
, name
, nameCapacity
, 0, err
);
2111 U_CAPI
int32_t U_EXPORT2
2112 uloc_getBaseName(const char* localeID
,
2114 int32_t nameCapacity
,
2117 return _canonicalize(localeID
, name
, nameCapacity
, _ULOC_STRIP_KEYWORDS
, err
);
2120 U_CAPI
int32_t U_EXPORT2
2121 uloc_canonicalize(const char* localeID
,
2123 int32_t nameCapacity
,
2126 return _canonicalize(localeID
, name
, nameCapacity
, _ULOC_CANONICALIZE
, err
);
2129 U_CAPI
const char* U_EXPORT2
2130 uloc_getISO3Language(const char* localeID
)
2133 char lang
[ULOC_LANG_CAPACITY
];
2134 UErrorCode err
= U_ZERO_ERROR
;
2136 if (localeID
== NULL
)
2138 localeID
= uloc_getDefault();
2140 uloc_getLanguage(localeID
, lang
, ULOC_LANG_CAPACITY
, &err
);
2143 offset
= _findIndex(LANGUAGES
, lang
);
2146 return LANGUAGES_3
[offset
];
2149 U_CAPI
const char* U_EXPORT2
2150 uloc_getISO3Country(const char* localeID
)
2153 char cntry
[ULOC_LANG_CAPACITY
];
2154 UErrorCode err
= U_ZERO_ERROR
;
2156 if (localeID
== NULL
)
2158 localeID
= uloc_getDefault();
2160 uloc_getCountry(localeID
, cntry
, ULOC_LANG_CAPACITY
, &err
);
2163 offset
= _findIndex(COUNTRIES
, cntry
);
2167 return COUNTRIES_3
[offset
];
2170 U_CAPI
uint32_t U_EXPORT2
2171 uloc_getLCID(const char* localeID
)
2173 UErrorCode status
= U_ZERO_ERROR
;
2174 char langID
[ULOC_FULLNAME_CAPACITY
];
2177 /* Check for incomplete id. */
2178 if (!localeID
|| uprv_strlen(localeID
) < 2) {
2182 // Attempt platform lookup if available
2183 lcid
= uprv_convertToLCIDPlatform(localeID
);
2186 // Windows found an LCID, return that
2190 uloc_getLanguage(localeID
, langID
, sizeof(langID
), &status
);
2191 if (U_FAILURE(status
)) {
2195 if (uprv_strchr(localeID
, '@')) {
2196 // uprv_convertToLCID does not support keywords other than collation.
2197 // Remove all keywords except collation.
2199 char collVal
[ULOC_KEYWORDS_CAPACITY
];
2200 char tmpLocaleID
[ULOC_FULLNAME_CAPACITY
];
2202 len
= uloc_getKeywordValue(localeID
, "collation", collVal
,
2203 UPRV_LENGTHOF(collVal
) - 1, &status
);
2205 if (U_SUCCESS(status
) && len
> 0) {
2208 len
= uloc_getBaseName(localeID
, tmpLocaleID
,
2209 UPRV_LENGTHOF(tmpLocaleID
) - 1, &status
);
2211 if (U_SUCCESS(status
) && len
> 0) {
2212 tmpLocaleID
[len
] = 0;
2214 len
= uloc_setKeywordValue("collation", collVal
, tmpLocaleID
,
2215 UPRV_LENGTHOF(tmpLocaleID
) - len
- 1, &status
);
2217 if (U_SUCCESS(status
) && len
> 0) {
2218 tmpLocaleID
[len
] = 0;
2219 return uprv_convertToLCID(langID
, tmpLocaleID
, &status
);
2224 // fall through - all keywords are simply ignored
2225 status
= U_ZERO_ERROR
;
2228 return uprv_convertToLCID(langID
, localeID
, &status
);
2231 U_CAPI
int32_t U_EXPORT2
2232 uloc_getLocaleForLCID(uint32_t hostid
, char *locale
, int32_t localeCapacity
,
2235 return uprv_convertToPosix(hostid
, locale
, localeCapacity
, status
);
2238 /* ### Default locale **************************************************/
2240 U_CAPI
const char* U_EXPORT2
2243 return locale_get_default();
2246 U_CAPI
void U_EXPORT2
2247 uloc_setDefault(const char* newDefaultLocale
,
2250 if (U_FAILURE(*err
))
2252 /* the error code isn't currently used for anything by this function*/
2254 /* propagate change to C++ */
2255 locale_set_default(newDefaultLocale
);
2259 * Returns a list of all 2-letter language codes defined in ISO 639. This is a pointer
2260 * to an array of pointers to arrays of char. All of these pointers are owned
2261 * by ICU-- do not delete them, and do not write through them. The array is
2262 * terminated with a null pointer.
2264 U_CAPI
const char* const* U_EXPORT2
2265 uloc_getISOLanguages()
2271 * Returns a list of all 2-letter country codes defined in ISO 639. This is a
2272 * pointer to an array of pointers to arrays of char. All of these pointers are
2273 * owned by ICU-- do not delete them, and do not write through them. The array is
2274 * terminated with a null pointer.
2276 U_CAPI
const char* const* U_EXPORT2
2277 uloc_getISOCountries()
2283 /* this function to be moved into cstring.c later */
2284 static char gDecimal
= 0;
2289 _uloc_strtod(const char *start
, char **end
) {
2296 /* For machines that decide to change the decimal on you,
2297 and try to be too smart with localization.
2298 This normally should be just a '.'. */
2299 sprintf(rep
, "%+1.1f", 1.0);
2303 if(gDecimal
== '.') {
2304 return uprv_strtod(start
, end
); /* fall through to OS */
2306 uprv_strncpy(buf
, start
, 29);
2308 decimal
= uprv_strchr(buf
, '.');
2310 *decimal
= gDecimal
;
2312 return uprv_strtod(start
, end
); /* no decimal point */
2314 rv
= uprv_strtod(buf
, &myEnd
);
2316 *end
= (char*)(start
+(myEnd
-buf
)); /* cast away const (to follow uprv_strtod API.) */
2324 int32_t dummy
; /* to avoid uninitialized memory copy from qsort */
2325 char locale
[ULOC_FULLNAME_CAPACITY
+1];
2328 static int32_t U_CALLCONV
2329 uloc_acceptLanguageCompare(const void * /*context*/, const void *a
, const void *b
)
2331 const _acceptLangItem
*aa
= (const _acceptLangItem
*)a
;
2332 const _acceptLangItem
*bb
= (const _acceptLangItem
*)b
;
2336 rc
= -1; /* A > B */
2337 } else if(bb
->q
> aa
->q
) {
2344 rc
= uprv_stricmp(aa
->locale
, bb
->locale
);
2347 #if defined(ULOC_DEBUG)
2348 /* fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
2358 mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
2361 U_CAPI
int32_t U_EXPORT2
2362 uloc_acceptLanguageFromHTTP(char *result
, int32_t resultAvailable
, UAcceptResult
*outResult
,
2363 const char *httpAcceptLanguage
,
2364 UEnumeration
* availableLocales
,
2367 MaybeStackArray
<_acceptLangItem
, 4> items
; // Struct for collecting items.
2368 char tmp
[ULOC_FULLNAME_CAPACITY
+1];
2370 const char *itemEnd
;
2371 const char *paramEnd
;
2376 int32_t l
= (int32_t)uprv_strlen(httpAcceptLanguage
);
2378 if(U_FAILURE(*status
)) {
2382 for(s
=httpAcceptLanguage
;s
&&*s
;) {
2383 while(isspace(*s
)) /* eat space at the beginning */
2385 itemEnd
=uprv_strchr(s
,',');
2386 paramEnd
=uprv_strchr(s
,';');
2388 itemEnd
= httpAcceptLanguage
+l
; /* end of string */
2390 if(paramEnd
&& paramEnd
<itemEnd
) {
2391 /* semicolon (;) is closer than end (,) */
2396 while(isspace(*t
)) {
2402 while(isspace(*t
)) {
2405 items
[n
].q
= (float)_uloc_strtod(t
,NULL
);
2407 /* no semicolon - it's 1.0 */
2412 /* eat spaces prior to semi */
2413 for(t
=(paramEnd
-1);(paramEnd
>s
)&&isspace(*t
);t
--)
2415 int32_t slen
= ((t
+1)-s
);
2416 if(slen
> ULOC_FULLNAME_CAPACITY
) {
2417 *status
= U_BUFFER_OVERFLOW_ERROR
;
2418 return -1; // too big
2420 uprv_strncpy(items
[n
].locale
, s
, slen
);
2421 items
[n
].locale
[slen
]=0; // terminate
2422 int32_t clen
= uloc_canonicalize(items
[n
].locale
, tmp
, UPRV_LENGTHOF(tmp
)-1, status
);
2423 if(U_FAILURE(*status
)) return -1;
2424 if((clen
!=slen
) || (uprv_strncmp(items
[n
].locale
, tmp
, slen
))) {
2425 // canonicalization had an effect- copy back
2426 uprv_strncpy(items
[n
].locale
, tmp
, clen
);
2427 items
[n
].locale
[clen
] = 0; // terminate
2429 #if defined(ULOC_DEBUG)
2430 /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
2434 while(*s
==',') { /* eat duplicate commas */
2437 if(n
>=items
.getCapacity()) { // If we need more items
2438 if(NULL
== items
.resize(items
.getCapacity()*2, items
.getCapacity())) {
2439 *status
= U_MEMORY_ALLOCATION_ERROR
;
2442 #if defined(ULOC_DEBUG)
2443 fprintf(stderr
,"malloced at size %d\n", items
.getCapacity());
2447 uprv_sortArray(items
.getAlias(), n
, sizeof(items
[0]), uloc_acceptLanguageCompare
, NULL
, TRUE
, status
);
2448 if (U_FAILURE(*status
)) {
2451 LocalMemory
<const char*> strs(NULL
);
2452 if (strs
.allocateInsteadAndReset(n
) == NULL
) {
2453 *status
= U_MEMORY_ALLOCATION_ERROR
;
2457 #if defined(ULOC_DEBUG)
2458 /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
2460 strs
[i
]=items
[i
].locale
;
2462 res
= uloc_acceptLanguage(result
, resultAvailable
, outResult
,
2463 strs
.getAlias(), n
, availableLocales
, status
);
2468 U_CAPI
int32_t U_EXPORT2
2469 uloc_acceptLanguage(char *result
, int32_t resultAvailable
,
2470 UAcceptResult
*outResult
, const char **acceptList
,
2471 int32_t acceptListCount
,
2472 UEnumeration
* availableLocales
,
2478 char tmp
[ULOC_FULLNAME_CAPACITY
+1];
2480 char **fallbackList
;
2481 if(U_FAILURE(*status
)) {
2484 fallbackList
= static_cast<char **>(uprv_malloc((size_t)(sizeof(fallbackList
[0])*acceptListCount
)));
2485 if(fallbackList
==NULL
) {
2486 *status
= U_MEMORY_ALLOCATION_ERROR
;
2489 for(i
=0;i
<acceptListCount
;i
++) {
2490 #if defined(ULOC_DEBUG)
2491 fprintf(stderr
,"%02d: %s\n", i
, acceptList
[i
]);
2493 while((l
=uenum_next(availableLocales
, NULL
, status
)) != NULL
) {
2494 #if defined(ULOC_DEBUG)
2495 fprintf(stderr
," %s\n", l
);
2497 len
= (int32_t)uprv_strlen(l
);
2498 if(!uprv_strcmp(acceptList
[i
], l
)) {
2500 *outResult
= ULOC_ACCEPT_VALID
;
2502 #if defined(ULOC_DEBUG)
2503 fprintf(stderr
, "MATCH! %s\n", l
);
2506 uprv_strncpy(result
, l
, uprv_min(len
, resultAvailable
));
2509 uprv_free(fallbackList
[j
]);
2511 uprv_free(fallbackList
);
2512 return u_terminateChars(result
, resultAvailable
, len
, status
);
2518 uenum_reset(availableLocales
, status
);
2519 /* save off parent info */
2520 if(uloc_getParent(acceptList
[i
], tmp
, UPRV_LENGTHOF(tmp
), status
)!=0) {
2521 fallbackList
[i
] = uprv_strdup(tmp
);
2527 for(maxLen
--;maxLen
>0;maxLen
--) {
2528 for(i
=0;i
<acceptListCount
;i
++) {
2529 if(fallbackList
[i
] && ((int32_t)uprv_strlen(fallbackList
[i
])==maxLen
)) {
2530 #if defined(ULOC_DEBUG)
2531 fprintf(stderr
,"Try: [%s]", fallbackList
[i
]);
2533 while((l
=uenum_next(availableLocales
, NULL
, status
)) != NULL
) {
2534 #if defined(ULOC_DEBUG)
2535 fprintf(stderr
," %s\n", l
);
2537 len
= (int32_t)uprv_strlen(l
);
2538 if(!uprv_strcmp(fallbackList
[i
], l
)) {
2540 *outResult
= ULOC_ACCEPT_FALLBACK
;
2542 #if defined(ULOC_DEBUG)
2543 fprintf(stderr
, "fallback MATCH! %s\n", l
);
2546 uprv_strncpy(result
, l
, uprv_min(len
, resultAvailable
));
2548 for(j
=0;j
<acceptListCount
;j
++) {
2549 uprv_free(fallbackList
[j
]);
2551 uprv_free(fallbackList
);
2552 return u_terminateChars(result
, resultAvailable
, len
, status
);
2555 uenum_reset(availableLocales
, status
);
2557 if(uloc_getParent(fallbackList
[i
], tmp
, UPRV_LENGTHOF(tmp
), status
)!=0) {
2558 uprv_free(fallbackList
[i
]);
2559 fallbackList
[i
] = uprv_strdup(tmp
);
2561 uprv_free(fallbackList
[i
]);
2567 *outResult
= ULOC_ACCEPT_FAILED
;
2570 for(i
=0;i
<acceptListCount
;i
++) {
2571 uprv_free(fallbackList
[i
]);
2573 uprv_free(fallbackList
);
2577 U_CAPI
const char* U_EXPORT2
2578 uloc_toUnicodeLocaleKey(const char* keyword
)
2580 const char* bcpKey
= ulocimp_toBcpKey(keyword
);
2581 if (bcpKey
== NULL
&& ultag_isUnicodeLocaleKey(keyword
, -1)) {
2582 // unknown keyword, but syntax is fine..
2588 U_CAPI
const char* U_EXPORT2
2589 uloc_toUnicodeLocaleType(const char* keyword
, const char* value
)
2591 const char* bcpType
= ulocimp_toBcpType(keyword
, value
, NULL
, NULL
);
2592 if (bcpType
== NULL
&& ultag_isUnicodeLocaleType(value
, -1)) {
2593 // unknown keyword, but syntax is fine..
2600 isWellFormedLegacyKey(const char* legacyKey
)
2602 const char* p
= legacyKey
;
2604 if (!UPRV_ISALPHANUM(*p
)) {
2613 isWellFormedLegacyType(const char* legacyType
)
2615 const char* p
= legacyType
;
2616 int32_t alphaNumLen
= 0;
2618 if (*p
== '_' || *p
== '/' || *p
== '-') {
2619 if (alphaNumLen
== 0) {
2623 } else if (UPRV_ISALPHANUM(*p
)) {
2630 return (alphaNumLen
!= 0);
2633 U_CAPI
const char* U_EXPORT2
2634 uloc_toLegacyKey(const char* keyword
)
2636 const char* legacyKey
= ulocimp_toLegacyKey(keyword
);
2637 if (legacyKey
== NULL
) {
2638 // Checks if the specified locale key is well-formed with the legacy locale syntax.
2641 // LDML/CLDR provides some definition of keyword syntax in
2642 // * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
2643 // * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
2644 // Keys can only consist of [0-9a-zA-Z].
2645 if (isWellFormedLegacyKey(keyword
)) {
2652 U_CAPI
const char* U_EXPORT2
2653 uloc_toLegacyType(const char* keyword
, const char* value
)
2655 const char* legacyType
= ulocimp_toLegacyType(keyword
, value
, NULL
, NULL
);
2656 if (legacyType
== NULL
) {
2657 // Checks if the specified locale type is well-formed with the legacy locale syntax.
2660 // LDML/CLDR provides some definition of keyword syntax in
2661 // * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
2662 // * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
2663 // Values (types) can only consist of [0-9a-zA-Z], plus for legacy values
2664 // we allow [/_-+] in the middle (e.g. "Etc/GMT+1", "Asia/Tel_Aviv")
2665 if (isWellFormedLegacyType(value
)) {