]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/uloc.cpp
ICU-64260.0.1.tar.gz
[apple/icu.git] / icuSources / common / uloc.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
b75a7d8f
A
3/*
4**********************************************************************
2ca993e8 5* Copyright (C) 1997-2016, International Business Machines
b75a7d8f
A
6* Corporation and others. All Rights Reserved.
7**********************************************************************
8*
9* File ULOC.CPP
10*
11* Modification History:
12*
13* Date Name Description
14* 04/01/97 aliu Creation.
15* 08/21/98 stephen JDK 1.2 sync
16* 12/08/98 rtg New Locale implementation and C API
17* 03/15/99 damiba overhaul.
18* 04/06/99 stephen changed setDefault() to realloc and copy
19* 06/14/99 stephen Changed calls to ures_open for new params
20* 07/21/99 stephen Modified setDefault() to propagate to C++
374ca955
A
21* 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs,
22* brought canonicalization code into line with spec
b75a7d8f
A
23*****************************************************************************/
24
25/*
26 POSIX's locale format, from putil.c: [no spaces]
27
28 ll [ _CC ] [ . MM ] [ @ VV]
29
30 l = lang, C = ctry, M = charmap, V = variant
31*/
32
b75a7d8f
A
33#include "unicode/utypes.h"
34#include "unicode/ustring.h"
35#include "unicode/uloc.h"
36
374ca955 37#include "putilimp.h"
b75a7d8f 38#include "ustr_imp.h"
374ca955 39#include "ulocimp.h"
b75a7d8f
A
40#include "umutex.h"
41#include "cstring.h"
42#include "cmemory.h"
374ca955
A
43#include "locmap.h"
44#include "uarrsort.h"
45#include "uenumimp.h"
46#include "uassert.h"
f3c0d7a5 47#include "charstr.h"
b75a7d8f 48
374ca955
A
49#include <stdio.h> /* for sprintf */
50
f3c0d7a5
A
51U_NAMESPACE_USE
52
374ca955 53/* ### Declarations **************************************************/
b75a7d8f
A
54
55/* Locale stuff from locid.cpp */
56U_CFUNC void locale_set_default(const char *id);
57U_CFUNC const char *locale_get_default(void);
374ca955
A
58U_CFUNC int32_t
59locale_getKeywords(const char *localeID,
60 char prev,
61 char *keywords, int32_t keywordCapacity,
62 char *values, int32_t valuesCapacity, int32_t *valLen,
63 UBool valuesToo,
64 UErrorCode *status);
65
374ca955
A
66/* ### Data tables **************************************************/
67
68/**
69 * Table of language codes, both 2- and 3-letter, with preference
70 * given to 2-letter codes where possible. Includes 3-letter codes
71 * that lack a 2-letter equivalent.
72 *
73 * This list must be in sorted order. This list is returned directly
74 * to the user by some API.
75 *
76 * This list must be kept in sync with LANGUAGES_3, with corresponding
77 * entries matched.
78 *
79 * This table should be terminated with a NULL entry, followed by a
80 * second list, and another NULL entry. The first list is visible to
81 * user code when this array is returned by API. The second list
82 * contains codes we support, but do not expose through user API.
83 *
84 * Notes
85 *
86 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
87 * include the revisions up to 2001/7/27 *CWB*
88 *
89 * The 3 character codes are the terminology codes like RFC 3066. This
90 * is compatible with prior ICU codes
91 *
92 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
93 * table but now at the end of the table because 3 character codes are
94 * duplicates. This avoids bad searches going from 3 to 2 character
95 * codes.
96 *
97 * The range qaa-qtz is reserved for local use
98 */
51004dcb 99/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
2ca993e8 100/* ISO639 table version is 20150505 */
0f5d89e8 101/* Subsequent hand addition of selected languages */
374ca955 102static const char * const LANGUAGES[] = {
f3c0d7a5
A
103 "aa", "ab", "ace", "ach", "ada", "ady", "ae", "aeb",
104 "af", "afh", "agq", "ain", "ak", "akk", "akz", "ale",
105 "aln", "alt", "am", "an", "ang", "anp", "ar", "arc",
106 "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "as",
107 "asa", "ase", "ast", "av", "avk", "awa", "ay", "az",
108 "ba", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
109 "be", "bej", "bem", "bew", "bez", "bfd", "bfq", "bg",
110 "bgn", "bho", "bi", "bik", "bin", "bjn", "bkm", "bla",
111 "bm", "bn", "bo", "bpy", "bqi", "br", "bra", "brh",
112 "brx", "bs", "bss", "bua", "bug", "bum", "byn", "byv",
0f5d89e8 113 "ca", "cad", "car", "cay", "cch", "ccp", "ce", "ceb", "cgg",
f3c0d7a5
A
114 "ch", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
115 "chr", "chy", "ckb", "co", "cop", "cps", "cr", "crh",
116 "cs", "csb", "cu", "cv", "cy",
117 "da", "dak", "dar", "dav", "de", "del", "den", "dgr",
118 "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "dv",
119 "dyo", "dyu", "dz", "dzg",
120 "ebu", "ee", "efi", "egl", "egy", "eka", "el", "elx",
121 "en", "enm", "eo", "es", "esu", "et", "eu", "ewo",
122 "ext",
123 "fa", "fan", "fat", "ff", "fi", "fil", "fit", "fj",
124 "fo", "fon", "fr", "frc", "frm", "fro", "frp", "frr",
125 "frs", "fur", "fy",
126 "ga", "gaa", "gag", "gan", "gay", "gba", "gbz", "gd",
127 "gez", "gil", "gl", "glk", "gmh", "gn", "goh", "gom",
128 "gon", "gor", "got", "grb", "grc", "gsw", "gu", "guc",
129 "gur", "guz", "gv", "gwi",
130 "ha", "hai", "hak", "haw", "he", "hi", "hif", "hil",
131 "hit", "hmn", "ho", "hr", "hsb", "hsn", "ht", "hu",
132 "hup", "hy", "hz",
133 "ia", "iba", "ibb", "id", "ie", "ig", "ii", "ik",
134 "ilo", "inh", "io", "is", "it", "iu", "izh",
135 "ja", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
136 "jv",
137 "ka", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
138 "kbl", "kcg", "kde", "kea", "ken", "kfo", "kg", "kgp",
139 "kha", "kho", "khq", "khw", "ki", "kiu", "kj", "kk",
140 "kkj", "kl", "kln", "km", "kmb", "kn", "ko", "koi",
141 "kok", "kos", "kpe", "kr", "krc", "kri", "krj", "krl",
142 "kru", "ks", "ksb", "ksf", "ksh", "ku", "kum", "kut",
143 "kv", "kw", "ky",
144 "la", "lad", "lag", "lah", "lam", "lb", "lez", "lfn",
145 "lg", "li", "lij", "liv", "lkt", "lmo", "ln", "lo",
146 "lol", "loz", "lrc", "lt", "ltg", "lu", "lua", "lui",
147 "lun", "luo", "lus", "luy", "lv", "lzh", "lzz",
148 "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
149 "mdf", "mdh", "mdr", "men", "mer", "mfe", "mg", "mga",
150 "mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk",
151 "ml", "mn", "mnc", "mni", "moh", "mos", "mr", "mrj",
152 "ms", "mt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
153 "my", "mye", "myv", "mzn",
154 "na", "nan", "nap", "naq", "nb", "nd", "nds", "ne",
155 "new", "ng", "nia", "niu", "njo", "nl", "nmg", "nn",
156 "nnh", "no", "nog", "non", "nov", "nqo", "nr", "nso",
157 "nus", "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi",
158 "oc", "oj", "om", "or", "os", "osa", "ota",
159 "pa", "pag", "pal", "pam", "pap", "pau", "pcd", "pdc",
160 "pdt", "peo", "pfl", "phn", "pi", "pl", "pms", "pnt",
161 "pon", "prg", "pro", "ps", "pt",
162 "qu", "quc", "qug",
163 "raj", "rap", "rar", "rgn", "rif", "rm", "rn", "ro",
164 "rof", "rom", "rtm", "ru", "rue", "rug", "rup",
165 "rw", "rwk",
166 "sa", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
167 "sba", "sbp", "sc", "scn", "sco", "sd", "sdc", "sdh",
168 "se", "see", "seh", "sei", "sel", "ses", "sg", "sga",
169 "sgs", "shi", "shn", "shu", "si", "sid", "sk",
170 "sl", "sli", "sly", "sm", "sma", "smj", "smn", "sms",
171 "sn", "snk", "so", "sog", "sq", "sr", "srn", "srr",
172 "ss", "ssy", "st", "stq", "su", "suk", "sus", "sux",
173 "sv", "sw", "swb", "swc", "syc", "syr", "szl",
174 "ta", "tcy", "te", "tem", "teo", "ter", "tet", "tg",
175 "th", "ti", "tig", "tiv", "tk", "tkl", "tkr", "tl",
176 "tlh", "tli", "tly", "tmh", "tn", "to", "tog", "tpi",
177 "tr", "tru", "trv", "ts", "tsd", "tsi", "tt", "ttt",
178 "tum", "tvl", "tw", "twq", "ty", "tyv", "tzm",
179 "udm", "ug", "uga", "uk", "umb", "und", "ur", "uz",
180 "vai", "ve", "vec", "vep", "vi", "vls", "vmf", "vo",
181 "vot", "vro", "vun",
182 "wa", "wae", "wal", "war", "was", "wbp", "wo", "wuu",
183 "xal", "xh", "xmf", "xog",
184 "yao", "yap", "yav", "ybb", "yi", "yo", "yrl", "yue",
185 "za", "zap", "zbl", "zea", "zen", "zgh", "zh", "zu",
186 "zun", "zxx", "zza",
b75a7d8f
A
187NULL,
188 "in", "iw", "ji", "jw", "sh", /* obsolete language codes */
189NULL
190};
51004dcb 191
73c04bcf
A
192static const char* const DEPRECATED_LANGUAGES[]={
193 "in", "iw", "ji", "jw", NULL, NULL
194};
195static const char* const REPLACEMENT_LANGUAGES[]={
196 "id", "he", "yi", "jv", NULL, NULL
197};
b75a7d8f 198
374ca955
A
199/**
200 * Table of 3-letter language codes.
201 *
202 * This is a lookup table used to convert 3-letter language codes to
203 * their 2-letter equivalent, where possible. It must be kept in sync
204 * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the
205 * same language as LANGUAGES_3[i]. The commented-out lines are
206 * copied from LANGUAGES to make eyeballing this baby easier.
207 *
208 * Where a 3-letter language code has no 2-letter equivalent, the
209 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
210 *
211 * This table should be terminated with a NULL entry, followed by a
212 * second list, and another NULL entry. The two lists correspond to
213 * the two lists in LANGUAGES.
214 */
51004dcb 215/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
2ca993e8 216/* ISO639 table version is 20150505 */
0f5d89e8 217/* Subsequent hand addition of selected languages */
374ca955 218static const char * const LANGUAGES_3[] = {
f3c0d7a5
A
219 "aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb",
220 "afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale",
221 "aln", "alt", "amh", "arg", "ang", "anp", "ara", "arc",
222 "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "asm",
223 "asa", "ase", "ast", "ava", "avk", "awa", "aym", "aze",
224 "bak", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
225 "bel", "bej", "bem", "bew", "bez", "bfd", "bfq", "bul",
226 "bgn", "bho", "bis", "bik", "bin", "bjn", "bkm", "bla",
227 "bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh",
228 "brx", "bos", "bss", "bua", "bug", "bum", "byn", "byv",
0f5d89e8 229 "cat", "cad", "car", "cay", "cch", "ccp", "che", "ceb", "cgg",
f3c0d7a5
A
230 "cha", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
231 "chr", "chy", "ckb", "cos", "cop", "cps", "cre", "crh",
232 "ces", "csb", "chu", "chv", "cym",
233 "dan", "dak", "dar", "dav", "deu", "del", "den", "dgr",
234 "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "div",
235 "dyo", "dyu", "dzo", "dzg",
236 "ebu", "ewe", "efi", "egl", "egy", "eka", "ell", "elx",
237 "eng", "enm", "epo", "spa", "esu", "est", "eus", "ewo",
238 "ext",
239 "fas", "fan", "fat", "ful", "fin", "fil", "fit", "fij",
240 "fao", "fon", "fra", "frc", "frm", "fro", "frp", "frr",
241 "frs", "fur", "fry",
242 "gle", "gaa", "gag", "gan", "gay", "gba", "gbz", "gla",
243 "gez", "gil", "glg", "glk", "gmh", "grn", "goh", "gom",
244 "gon", "gor", "got", "grb", "grc", "gsw", "guj", "guc",
245 "gur", "guz", "glv", "gwi",
246 "hau", "hai", "hak", "haw", "heb", "hin", "hif", "hil",
247 "hit", "hmn", "hmo", "hrv", "hsb", "hsn", "hat", "hun",
248 "hup", "hye", "her",
249 "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ipk",
250 "ilo", "inh", "ido", "isl", "ita", "iku", "izh",
251 "jpn", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
252 "jav",
253 "kat", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
254 "kbl", "kcg", "kde", "kea", "ken", "kfo", "kon", "kgp",
255 "kha", "kho", "khq", "khw", "kik", "kiu", "kua", "kaz",
256 "kkj", "kal", "kln", "khm", "kmb", "kan", "kor", "koi",
257 "kok", "kos", "kpe", "kau", "krc", "kri", "krj", "krl",
258 "kru", "kas", "ksb", "ksf", "ksh", "kur", "kum", "kut",
259 "kom", "cor", "kir",
260 "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lfn",
261 "lug", "lim", "lij", "liv", "lkt", "lmo", "lin", "lao",
262 "lol", "loz", "lrc", "lit", "ltg", "lub", "lua", "lui",
263 "lun", "luo", "lus", "luy", "lav", "lzh", "lzz",
264 "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
265 "mdf", "mdh", "mdr", "men", "mer", "mfe", "mlg", "mga",
266 "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
267 "mal", "mon", "mnc", "mni", "moh", "mos", "mar", "mrj",
268 "msa", "mlt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
269 "mya", "mye", "myv", "mzn",
270 "nau", "nan", "nap", "naq", "nob", "nde", "nds", "nep",
271 "new", "ndo", "nia", "niu", "njo", "nld", "nmg", "nno",
272 "nnh", "nor", "nog", "non", "nov", "nqo", "nbl", "nso",
273 "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi",
274 "oci", "oji", "orm", "ori", "oss", "osa", "ota",
275 "pan", "pag", "pal", "pam", "pap", "pau", "pcd", "pdc",
276 "pdt", "peo", "pfl", "phn", "pli", "pol", "pms", "pnt",
277 "pon", "prg", "pro", "pus", "por",
278 "que", "quc", "qug",
279 "raj", "rap", "rar", "rgn", "rif", "roh", "run", "ron",
280 "rof", "rom", "rtm", "rus", "rue", "rug", "rup",
281 "kin", "rwk",
282 "san", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
283 "sba", "sbp", "srd", "scn", "sco", "snd", "sdc", "sdh",
284 "sme", "see", "seh", "sei", "sel", "ses", "sag", "sga",
285 "sgs", "shi", "shn", "shu", "sin", "sid", "slk",
286 "slv", "sli", "sly", "smo", "sma", "smj", "smn", "sms",
287 "sna", "snk", "som", "sog", "sqi", "srp", "srn", "srr",
288 "ssw", "ssy", "sot", "stq", "sun", "suk", "sus", "sux",
289 "swe", "swa", "swb", "swc", "syc", "syr", "szl",
290 "tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk",
291 "tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr", "tgl",
292 "tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tpi",
293 "tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt",
294 "tum", "tvl", "twi", "twq", "tah", "tyv", "tzm",
295 "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
296 "vai", "ven", "vec", "vep", "vie", "vls", "vmf", "vol",
297 "vot", "vro", "vun",
298 "wln", "wae", "wal", "war", "was", "wbp", "wol", "wuu",
299 "xal", "xho", "xmf", "xog",
300 "yao", "yap", "yav", "ybb", "yid", "yor", "yrl", "yue",
301 "zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul",
302 "zun", "zxx", "zza",
b75a7d8f
A
303NULL,
304/* "in", "iw", "ji", "jw", "sh", */
305 "ind", "heb", "yid", "jaw", "srp",
306NULL
307};
308
374ca955
A
309/**
310 * Table of 2-letter country codes.
311 *
312 * This list must be in sorted order. This list is returned directly
313 * to the user by some API.
314 *
315 * This list must be kept in sync with COUNTRIES_3, with corresponding
316 * entries matched.
317 *
318 * This table should be terminated with a NULL entry, followed by a
319 * second list, and another NULL entry. The first list is visible to
320 * user code when this array is returned by API. The second list
321 * contains codes we support, but do not expose through user API.
322 *
323 * Notes:
324 *
325 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
326 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
327 * new codes keeping the old ones for compatibility updated to include
328 * 1999/12/03 revisions *CWB*
329 *
330 * RO(ROM) is now RO(ROU) according to
331 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
332 */
333static const char * const COUNTRIES[] = {
2ca993e8 334 "AC", "AD", "AE", "AF", "AG", "AI", "AL", "AM",
73c04bcf 335 "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",
b75a7d8f 336 "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",
51004dcb 337 "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV",
b75a7d8f 338 "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
2ca993e8
A
339 "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CP", "CR",
340 "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DG", "DJ", "DK",
341 "DM", "DO", "DZ", "EA", "EC", "EE", "EG", "EH", "ER",
b75a7d8f 342 "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
73c04bcf 343 "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
b75a7d8f
A
344 "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
345 "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
2ca993e8 346 "IC", "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
73c04bcf 347 "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
b75a7d8f
A
348 "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
349 "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
46f4442e 350 "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK",
b75a7d8f
A
351 "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",
352 "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
353 "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",
354 "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",
355 "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
46f4442e 356 "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA",
b75a7d8f 357 "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",
51004dcb 358 "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV",
2ca993e8 359 "SX", "SY", "SZ", "TA", "TC", "TD", "TF", "TG", "TH", "TJ",
b75a7d8f
A
360 "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
361 "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
362 "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
2ca993e8 363 "WS", "XK", "YE", "YT", "ZA", "ZM", "ZW",
b75a7d8f 364NULL,
51004dcb 365 "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR", /* obsolete country codes */
b75a7d8f
A
366NULL
367};
368
51004dcb
A
369static const char* const DEPRECATED_COUNTRIES[] = {
370 "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */
73c04bcf
A
371};
372static const char* const REPLACEMENT_COUNTRIES[] = {
51004dcb 373/* "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
f3c0d7a5 374 "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL /* replacement country codes */
73c04bcf 375};
f3c0d7a5 376
374ca955
A
377/**
378 * Table of 3-letter country codes.
379 *
380 * This is a lookup table used to convert 3-letter country codes to
381 * their 2-letter equivalent. It must be kept in sync with COUNTRIES.
382 * For all valid i, COUNTRIES[i] must refer to the same country as
383 * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES
384 * to make eyeballing this baby easier.
385 *
386 * This table should be terminated with a NULL entry, followed by a
387 * second list, and another NULL entry. The two lists correspond to
388 * the two lists in COUNTRIES.
389 */
390static const char * const COUNTRIES_3[] = {
2ca993e8
A
391/* "AC", "AD", "AE", "AF", "AG", "AI", "AL", "AM", */
392 "ASC", "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
73c04bcf
A
393/* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */
394 "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
b75a7d8f
A
395/* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */
396 "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
51004dcb
A
397/* "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", */
398 "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",
b75a7d8f
A
399/* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */
400 "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
2ca993e8
A
401/* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CP", "CR", */
402 "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CPT", "CRI",
403/* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DG", "DJ", "DK", */
404 "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DGA", "DJI", "DNK",
405/* "DM", "DO", "DZ", "EA", "EC", "EE", "EG", "EH", "ER", */
406 "DMA", "DOM", "DZA", "EA ", "ECU", "EST", "EGY", "ESH", "ERI", /* no valid 3-letter code for EA */
b75a7d8f
A
407/* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
408 "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
46f4442e 409/* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
73c04bcf 410 "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
b75a7d8f
A
411/* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */
412 "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
413/* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
414 "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
2ca993e8
A
415/* "IC", "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
416 "IC ", "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL", /* no valid 3-letter code for IC */
46f4442e 417/* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
73c04bcf 418 "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
b75a7d8f
A
419/* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
420 "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
421/* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */
422 "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
46f4442e
A
423/* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */
424 "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
b75a7d8f
A
425/* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */
426 "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
427/* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */
428 "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
429/* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */
430 "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
431/* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */
432 "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
433/* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */
434 "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
46f4442e
A
435/* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */
436 "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
b75a7d8f
A
437/* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */
438 "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
51004dcb
A
439/* "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", */
440 "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",
2ca993e8
A
441/* "SX", "SY", "SZ", "TA", "TC", "TD", "TF", "TG", "TH", "TJ", */
442 "SXM", "SYR", "SWZ", "TAA", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
b75a7d8f
A
443/* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */
444 "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
445/* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */
446 "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
447/* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
448 "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
2ca993e8
A
449/* "WS", "XK", "YE", "YT", "ZA", "ZM", "ZW", */
450 "WSM", "XKK", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
b75a7d8f 451NULL,
51004dcb
A
452/* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */
453 "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
b75a7d8f
A
454NULL
455};
456
374ca955
A
457typedef struct CanonicalizationMap {
458 const char *id; /* input ID */
459 const char *canonicalID; /* canonicalized output ID */
374ca955
A
460} CanonicalizationMap;
461
462/**
463 * A map to canonicalize locale IDs. This handles a variety of
464 * different semantic kinds of transformations.
465 */
466static const CanonicalizationMap CANONICALIZE_MAP[] = {
3d1f044b
A
467 { "", "en_US_POSIX" }, /* .NET name */ // open ICU 64 deleted, we restore
468 { "c", "en_US_POSIX" }, /* POSIX name */ // open ICU 64 deleted, we restore
469 { "posix", "en_US_POSIX" }, /* POSIX name (alias of C) */ // open ICU 64 deleted, we restore
470 { "art_LOJBAN", "jbo" }, /* registered name */
471 { "hy__AREVELA", "hy" }, /* Registered IANA variant */
472 { "hy__AREVMDA", "hyw" }, /* Registered IANA variant */
473 { "zh_GAN", "gan" }, /* registered name */
474 { "zh_GUOYU", "zh" }, /* registered name */
475 { "zh_HAKKA", "hak" }, /* registered name */
476 { "zh_MIN_NAN", "nan" }, /* registered name */
477 { "zh_WUU", "wuu" }, /* registered name */
478 { "zh_XIANG", "hsn" }, /* registered name */
479 { "zh_YUE", "yue" }, /* registered name */
374ca955
A
480};
481
729e4ab9
A
482/* ### BCP47 Conversion *******************************************/
483/* Test if the locale id has BCP47 u extension and does not have '@' */
484#define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
485/* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
486#define _ConvertBCP47(finalID, id, buffer, length,err) \
0f5d89e8
A
487 if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || \
488 U_FAILURE(*err) || *err == U_STRING_NOT_TERMINATED_WARNING) { \
729e4ab9 489 finalID=id; \
0f5d89e8 490 if (*err == U_STRING_NOT_TERMINATED_WARNING) { *err = U_BUFFER_OVERFLOW_ERROR; } \
729e4ab9
A
491 } else { \
492 finalID=buffer; \
493 }
494/* Gets the size of the shortest subtag in the given localeID. */
495static int32_t getShortestSubtagLength(const char *localeID) {
0f5d89e8 496 int32_t localeIDLength = static_cast<int32_t>(uprv_strlen(localeID));
729e4ab9
A
497 int32_t length = localeIDLength;
498 int32_t tmpLength = 0;
499 int32_t i;
500 UBool reset = TRUE;
501
502 for (i = 0; i < localeIDLength; i++) {
503 if (localeID[i] != '_' && localeID[i] != '-') {
504 if (reset) {
505 tmpLength = 0;
506 reset = FALSE;
507 }
508 tmpLength++;
509 } else {
510 if (tmpLength != 0 && tmpLength < length) {
511 length = tmpLength;
512 }
513 reset = TRUE;
514 }
515 }
516
517 return length;
518}
519
374ca955 520/* ### Keywords **************************************************/
f3c0d7a5
A
521#define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
522#define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )
523/* Punctuation/symbols allowed in legacy key values */
524#define UPRV_OK_VALUE_PUNCTUATION(c) ((c) == '_' || (c) == '-' || (c) == '+' || (c) == '/')
374ca955
A
525
526#define ULOC_KEYWORD_BUFFER_LEN 25
527#define ULOC_MAX_NO_KEYWORDS 25
528
729e4ab9 529U_CAPI const char * U_EXPORT2
374ca955 530locale_getKeywordsStart(const char *localeID) {
374ca955 531 const char *result = NULL;
374ca955
A
532 if((result = uprv_strchr(localeID, '@')) != NULL) {
533 return result;
73c04bcf
A
534 }
535#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
536 else {
537 /* We do this because the @ sign is variant, and the @ sign used on one
538 EBCDIC machine won't be compiled the same way on other EBCDIC based
539 machines. */
540 static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
374ca955
A
541 const uint8_t *charToFind = ebcdicSigns;
542 while(*charToFind) {
543 if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
544 return result;
545 }
546 charToFind++;
547 }
548 }
73c04bcf 549#endif
374ca955
A
550 return NULL;
551}
552
553/**
554 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
555 * @param keywordName incoming name to be canonicalized
556 * @param status return status (keyword too long)
557 * @return length of the keyword name
558 */
559static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
560{
f3c0d7a5
A
561 int32_t keywordNameLen = 0;
562
563 for (; *keywordName != 0; keywordName++) {
564 if (!UPRV_ISALPHANUM(*keywordName)) {
565 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
566 return 0;
567 }
568 if (keywordNameLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
569 buf[keywordNameLen++] = uprv_tolower(*keywordName);
570 } else {
571 /* keyword name too long for internal buffer */
572 *status = U_INTERNAL_PROGRAM_ERROR;
573 return 0;
574 }
374ca955 575 }
f3c0d7a5
A
576 if (keywordNameLen == 0) {
577 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name */
578 return 0;
374ca955 579 }
f3c0d7a5
A
580 buf[keywordNameLen] = 0; /* terminate */
581
374ca955
A
582 return keywordNameLen;
583}
584
585typedef struct {
586 char keyword[ULOC_KEYWORD_BUFFER_LEN];
587 int32_t keywordLen;
588 const char *valueStart;
589 int32_t valueLen;
590} KeywordStruct;
591
592static int32_t U_CALLCONV
4388f060 593compareKeywordStructs(const void * /*context*/, const void *left, const void *right) {
374ca955
A
594 const char* leftString = ((const KeywordStruct *)left)->keyword;
595 const char* rightString = ((const KeywordStruct *)right)->keyword;
596 return uprv_strcmp(leftString, rightString);
597}
598
374ca955
A
599static int32_t
600_getKeywords(const char *localeID,
601 char prev,
602 char *keywords, int32_t keywordCapacity,
603 char *values, int32_t valuesCapacity, int32_t *valLen,
604 UBool valuesToo,
374ca955
A
605 UErrorCode *status)
606{
607 KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
f3c0d7a5 608
374ca955
A
609 int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
610 int32_t numKeywords = 0;
611 const char* pos = localeID;
612 const char* equalSign = NULL;
613 const char* semicolon = NULL;
614 int32_t i = 0, j, n;
615 int32_t keywordsLen = 0;
616 int32_t valuesLen = 0;
617
618 if(prev == '@') { /* start of keyword definition */
619 /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
620 do {
621 UBool duplicate = FALSE;
622 /* skip leading spaces */
623 while(*pos == ' ') {
624 pos++;
625 }
626 if (!*pos) { /* handle trailing "; " */
627 break;
628 }
629 if(numKeywords == maxKeywords) {
630 *status = U_INTERNAL_PROGRAM_ERROR;
631 return 0;
632 }
633 equalSign = uprv_strchr(pos, '=');
634 semicolon = uprv_strchr(pos, ';');
635 /* lack of '=' [foo@currency] is illegal */
636 /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
637 if(!equalSign || (semicolon && semicolon<equalSign)) {
638 *status = U_INVALID_FORMAT_ERROR;
639 return 0;
640 }
641 /* need to normalize both keyword and keyword name */
642 if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
643 /* keyword name too long for internal buffer */
644 *status = U_INTERNAL_PROGRAM_ERROR;
645 return 0;
646 }
647 for(i = 0, n = 0; i < equalSign - pos; ++i) {
648 if (pos[i] != ' ') {
649 keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
650 }
651 }
57a6839d
A
652
653 /* zero-length keyword is an error. */
654 if (n == 0) {
655 *status = U_INVALID_FORMAT_ERROR;
656 return 0;
657 }
658
374ca955
A
659 keywordList[numKeywords].keyword[n] = 0;
660 keywordList[numKeywords].keywordLen = n;
661 /* now grab the value part. First we skip the '=' */
662 equalSign++;
663 /* then we leading spaces */
664 while(*equalSign == ' ') {
665 equalSign++;
666 }
57a6839d
A
667
668 /* Premature end or zero-length value */
2ca993e8 669 if (!*equalSign || equalSign == semicolon) {
57a6839d
A
670 *status = U_INVALID_FORMAT_ERROR;
671 return 0;
672 }
673
374ca955 674 keywordList[numKeywords].valueStart = equalSign;
57a6839d 675
374ca955
A
676 pos = semicolon;
677 i = 0;
678 if(pos) {
679 while(*(pos - i - 1) == ' ') {
680 i++;
681 }
73c04bcf 682 keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
374ca955
A
683 pos++;
684 } else {
73c04bcf 685 i = (int32_t)uprv_strlen(equalSign);
4388f060 686 while(i && equalSign[i-1] == ' ') {
374ca955
A
687 i--;
688 }
689 keywordList[numKeywords].valueLen = i;
690 }
691 /* If this is a duplicate keyword, then ignore it */
692 for (j=0; j<numKeywords; ++j) {
693 if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
694 duplicate = TRUE;
695 break;
696 }
697 }
698 if (!duplicate) {
699 ++numKeywords;
700 }
701 } while(pos);
702
374ca955
A
703 /* now we have a list of keywords */
704 /* we need to sort it */
705 uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
f3c0d7a5 706
374ca955
A
707 /* Now construct the keyword part */
708 for(i = 0; i < numKeywords; i++) {
709 if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) {
710 uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword);
711 if(valuesToo) {
712 keywords[keywordsLen + keywordList[i].keywordLen] = '=';
713 } else {
714 keywords[keywordsLen + keywordList[i].keywordLen] = 0;
715 }
716 }
717 keywordsLen += keywordList[i].keywordLen + 1;
718 if(valuesToo) {
3d1f044b 719 if(keywordsLen + keywordList[i].valueLen <= keywordCapacity) {
374ca955
A
720 uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
721 }
722 keywordsLen += keywordList[i].valueLen;
f3c0d7a5 723
374ca955 724 if(i < numKeywords - 1) {
f3c0d7a5 725 if(keywordsLen < keywordCapacity) {
374ca955
A
726 keywords[keywordsLen] = ';';
727 }
728 keywordsLen++;
729 }
730 }
731 if(values) {
732 if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) {
733 uprv_strcpy(values+valuesLen, keywordList[i].valueStart);
734 values[valuesLen + keywordList[i].valueLen] = 0;
735 }
736 valuesLen += keywordList[i].valueLen + 1;
737 }
738 }
739 if(values) {
740 values[valuesLen] = 0;
741 if(valLen) {
742 *valLen = valuesLen;
743 }
744 }
f3c0d7a5 745 return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);
374ca955
A
746 } else {
747 return 0;
748 }
749}
750
751U_CFUNC int32_t
752locale_getKeywords(const char *localeID,
753 char prev,
754 char *keywords, int32_t keywordCapacity,
755 char *values, int32_t valuesCapacity, int32_t *valLen,
756 UBool valuesToo,
757 UErrorCode *status) {
758 return _getKeywords(localeID, prev, keywords, keywordCapacity,
759 values, valuesCapacity, valLen, valuesToo,
3d1f044b 760 status);
374ca955
A
761}
762
763U_CAPI int32_t U_EXPORT2
764uloc_getKeywordValue(const char* localeID,
765 const char* keywordName,
766 char* buffer, int32_t bufferCapacity,
767 UErrorCode* status)
f3c0d7a5 768{
729e4ab9 769 const char* startSearchHere = NULL;
374ca955 770 const char* nextSeparator = NULL;
374ca955
A
771 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
772 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
374ca955
A
773 int32_t result = 0;
774
775 if(status && U_SUCCESS(*status) && localeID) {
729e4ab9
A
776 char tempBuffer[ULOC_FULLNAME_CAPACITY];
777 const char* tmpLocaleID;
778
f3c0d7a5
A
779 if (keywordName == NULL || keywordName[0] == 0) {
780 *status = U_ILLEGAL_ARGUMENT_ERROR;
781 return 0;
782 }
783
784 locale_canonKeywordName(keywordNameBuffer, keywordName, status);
785 if(U_FAILURE(*status)) {
786 return 0;
787 }
788
729e4ab9
A
789 if (_hasBCP47Extension(localeID)) {
790 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
791 } else {
792 tmpLocaleID=localeID;
793 }
f3c0d7a5
A
794
795 startSearchHere = locale_getKeywordsStart(tmpLocaleID);
374ca955
A
796 if(startSearchHere == NULL) {
797 /* no keywords, return at once */
798 return 0;
799 }
800
374ca955
A
801 /* find the first keyword */
802 while(startSearchHere) {
f3c0d7a5
A
803 const char* keyValueTail;
804 int32_t keyValueLen;
805
806 startSearchHere++; /* skip @ or ; */
807 nextSeparator = uprv_strchr(startSearchHere, '=');
808 if(!nextSeparator) {
809 *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
810 return 0;
811 }
812 /* strip leading & trailing spaces (TC decided to tolerate these) */
374ca955
A
813 while(*startSearchHere == ' ') {
814 startSearchHere++;
815 }
f3c0d7a5
A
816 keyValueTail = nextSeparator;
817 while (keyValueTail > startSearchHere && *(keyValueTail-1) == ' ') {
818 keyValueTail--;
819 }
820 /* now keyValueTail points to first char after the keyName */
821 /* copy & normalize keyName from locale */
822 if (startSearchHere == keyValueTail) {
823 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
824 return 0;
374ca955 825 }
f3c0d7a5
A
826 keyValueLen = 0;
827 while (startSearchHere < keyValueTail) {
828 if (!UPRV_ISALPHANUM(*startSearchHere)) {
829 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
830 return 0;
831 }
832 if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
833 localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*startSearchHere++);
834 } else {
374ca955
A
835 /* keyword name too long for internal buffer */
836 *status = U_INTERNAL_PROGRAM_ERROR;
837 return 0;
f3c0d7a5 838 }
374ca955 839 }
f3c0d7a5
A
840 localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
841
374ca955 842 startSearchHere = uprv_strchr(nextSeparator, ';');
f3c0d7a5 843
374ca955 844 if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
f3c0d7a5
A
845 /* current entry matches the keyword. */
846 nextSeparator++; /* skip '=' */
847 /* First strip leading & trailing spaces (TC decided to tolerate these) */
374ca955 848 while(*nextSeparator == ' ') {
f3c0d7a5 849 nextSeparator++;
374ca955 850 }
f3c0d7a5
A
851 keyValueTail = (startSearchHere)? startSearchHere: nextSeparator + uprv_strlen(nextSeparator);
852 while(keyValueTail > nextSeparator && *(keyValueTail-1) == ' ') {
853 keyValueTail--;
854 }
855 /* Now copy the value, but check well-formedness */
856 if (nextSeparator == keyValueTail) {
857 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value name in passed-in locale */
858 return 0;
374ca955 859 }
f3c0d7a5
A
860 keyValueLen = 0;
861 while (nextSeparator < keyValueTail) {
862 if (!UPRV_ISALPHANUM(*nextSeparator) && !UPRV_OK_VALUE_PUNCTUATION(*nextSeparator)) {
863 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
864 return 0;
865 }
866 if (keyValueLen < bufferCapacity) {
867 /* Should we lowercase value to return here? Tests expect as-is. */
868 buffer[keyValueLen++] = *nextSeparator++;
869 } else { /* keep advancing so we return correct length in case of overflow */
870 keyValueLen++;
871 nextSeparator++;
872 }
873 }
874 result = u_terminateChars(buffer, bufferCapacity, keyValueLen, status);
374ca955
A
875 return result;
876 }
877 }
878 }
879 return 0;
880}
881
882U_CAPI int32_t U_EXPORT2
883uloc_setKeywordValue(const char* keywordName,
884 const char* keywordValue,
885 char* buffer, int32_t bufferCapacity,
886 UErrorCode* status)
887{
888 /* TODO: sorting. removal. */
889 int32_t keywordNameLen;
890 int32_t keywordValueLen;
891 int32_t bufLen;
892 int32_t needLen = 0;
374ca955 893 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
f3c0d7a5 894 char keywordValueBuffer[ULOC_KEYWORDS_CAPACITY+1];
374ca955 895 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
374ca955
A
896 int32_t rc;
897 char* nextSeparator = NULL;
898 char* nextEqualsign = NULL;
899 char* startSearchHere = NULL;
900 char* keywordStart = NULL;
f3c0d7a5
A
901 CharString updatedKeysAndValues;
902 int32_t updatedKeysAndValuesLen;
903 UBool handledInputKeyAndValue = FALSE;
904 char keyValuePrefix = '@';
905
906 if(U_FAILURE(*status)) {
907 return -1;
374ca955 908 }
f3c0d7a5 909 if (keywordName == NULL || keywordName[0] == 0 || bufferCapacity <= 1) {
73c04bcf
A
910 *status = U_ILLEGAL_ARGUMENT_ERROR;
911 return 0;
912 }
f3c0d7a5 913 bufLen = (int32_t)uprv_strlen(buffer);
73c04bcf
A
914 if(bufferCapacity<bufLen) {
915 /* The capacity is less than the length?! Is this NULL terminated? */
916 *status = U_ILLEGAL_ARGUMENT_ERROR;
917 return 0;
918 }
374ca955
A
919 keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
920 if(U_FAILURE(*status)) {
921 return 0;
922 }
f3c0d7a5
A
923
924 keywordValueLen = 0;
925 if(keywordValue) {
926 while (*keywordValue != 0) {
927 if (!UPRV_ISALPHANUM(*keywordValue) && !UPRV_OK_VALUE_PUNCTUATION(*keywordValue)) {
928 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
929 return 0;
930 }
931 if (keywordValueLen < ULOC_KEYWORDS_CAPACITY) {
932 /* Should we force lowercase in value to set? */
933 keywordValueBuffer[keywordValueLen++] = *keywordValue++;
934 } else {
935 /* keywordValue too long for internal buffer */
936 *status = U_INTERNAL_PROGRAM_ERROR;
937 return 0;
938 }
939 }
940 }
941 keywordValueBuffer[keywordValueLen] = 0; /* terminate */
942
374ca955 943 startSearchHere = (char*)locale_getKeywordsStart(buffer);
374ca955 944 if(startSearchHere == NULL || (startSearchHere[1]==0)) {
f3c0d7a5
A
945 if(keywordValueLen == 0) { /* no keywords = nothing to remove */
946 return bufLen;
374ca955
A
947 }
948
949 needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
f3c0d7a5 950 if(startSearchHere) { /* had a single @ */
374ca955
A
951 needLen--; /* already had the @ */
952 /* startSearchHere points at the @ */
953 } else {
954 startSearchHere=buffer+bufLen;
955 }
956 if(needLen >= bufferCapacity) {
957 *status = U_BUFFER_OVERFLOW_ERROR;
958 return needLen; /* no change */
959 }
f3c0d7a5 960 *startSearchHere++ = '@';
374ca955
A
961 uprv_strcpy(startSearchHere, keywordNameBuffer);
962 startSearchHere += keywordNameLen;
f3c0d7a5
A
963 *startSearchHere++ = '=';
964 uprv_strcpy(startSearchHere, keywordValueBuffer);
374ca955
A
965 return needLen;
966 } /* end shortcut - no @ */
f3c0d7a5 967
374ca955
A
968 keywordStart = startSearchHere;
969 /* search for keyword */
970 while(keywordStart) {
f3c0d7a5
A
971 const char* keyValueTail;
972 int32_t keyValueLen;
973
974 keywordStart++; /* skip @ or ; */
975 nextEqualsign = uprv_strchr(keywordStart, '=');
976 if (!nextEqualsign) {
977 *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
978 return 0;
979 }
980 /* strip leading & trailing spaces (TC decided to tolerate these) */
374ca955
A
981 while(*keywordStart == ' ') {
982 keywordStart++;
983 }
f3c0d7a5
A
984 keyValueTail = nextEqualsign;
985 while (keyValueTail > keywordStart && *(keyValueTail-1) == ' ') {
986 keyValueTail--;
374ca955 987 }
f3c0d7a5
A
988 /* now keyValueTail points to first char after the keyName */
989 /* copy & normalize keyName from locale */
990 if (keywordStart == keyValueTail) {
991 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
374ca955
A
992 return 0;
993 }
f3c0d7a5
A
994 keyValueLen = 0;
995 while (keywordStart < keyValueTail) {
996 if (!UPRV_ISALPHANUM(*keywordStart)) {
997 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
998 return 0;
999 }
1000 if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
1001 localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*keywordStart++);
1002 } else {
1003 /* keyword name too long for internal buffer */
1004 *status = U_INTERNAL_PROGRAM_ERROR;
1005 return 0;
1006 }
374ca955 1007 }
f3c0d7a5 1008 localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
374ca955
A
1009
1010 nextSeparator = uprv_strchr(nextEqualsign, ';');
f3c0d7a5
A
1011
1012 /* start processing the value part */
1013 nextEqualsign++; /* skip '=' */
1014 /* First strip leading & trailing spaces (TC decided to tolerate these) */
1015 while(*nextEqualsign == ' ') {
1016 nextEqualsign++;
1017 }
1018 keyValueTail = (nextSeparator)? nextSeparator: nextEqualsign + uprv_strlen(nextEqualsign);
1019 while(keyValueTail > nextEqualsign && *(keyValueTail-1) == ' ') {
1020 keyValueTail--;
1021 }
1022 if (nextEqualsign == keyValueTail) {
1023 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value in passed-in locale */
1024 return 0;
1025 }
1026
374ca955
A
1027 rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
1028 if(rc == 0) {
f3c0d7a5
A
1029 /* Current entry matches the input keyword. Update the entry */
1030 if(keywordValueLen > 0) { /* updating a value */
1031 updatedKeysAndValues.append(keyValuePrefix, *status);
1032 keyValuePrefix = ';'; /* for any subsequent key-value pair */
1033 updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1034 updatedKeysAndValues.append('=', *status);
1035 updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1036 } /* else removing this entry, don't emit anything */
1037 handledInputKeyAndValue = TRUE;
1038 } else {
1039 /* input keyword sorts earlier than current entry, add before current entry */
1040 if (rc < 0 && keywordValueLen > 0 && !handledInputKeyAndValue) {
1041 /* insert new entry at this location */
1042 updatedKeysAndValues.append(keyValuePrefix, *status);
1043 keyValuePrefix = ';'; /* for any subsequent key-value pair */
1044 updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1045 updatedKeysAndValues.append('=', *status);
1046 updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1047 handledInputKeyAndValue = TRUE;
374ca955 1048 }
f3c0d7a5
A
1049 /* copy the current entry */
1050 updatedKeysAndValues.append(keyValuePrefix, *status);
1051 keyValuePrefix = ';'; /* for any subsequent key-value pair */
1052 updatedKeysAndValues.append(localeKeywordNameBuffer, keyValueLen, *status);
1053 updatedKeysAndValues.append('=', *status);
3d1f044b 1054 updatedKeysAndValues.append(nextEqualsign, static_cast<int32_t>(keyValueTail-nextEqualsign), *status);
f3c0d7a5
A
1055 }
1056 if (!nextSeparator && keywordValueLen > 0 && !handledInputKeyAndValue) {
1057 /* append new entry at the end, it sorts later than existing entries */
1058 updatedKeysAndValues.append(keyValuePrefix, *status);
1059 /* skip keyValuePrefix update, no subsequent key-value pair */
1060 updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1061 updatedKeysAndValues.append('=', *status);
1062 updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1063 handledInputKeyAndValue = TRUE;
374ca955
A
1064 }
1065 keywordStart = nextSeparator;
1066 } /* end loop searching */
374ca955 1067
f3c0d7a5
A
1068 /* Any error from updatedKeysAndValues.append above would be internal and not due to
1069 * problems with the passed-in locale. So if we did encounter problems with the
1070 * passed-in locale above, those errors took precedence and overrode any error
1071 * status from updatedKeysAndValues.append, and also caused a return of 0. If there
1072 * are errors here they are from updatedKeysAndValues.append; they do cause an
1073 * error return but the passed-in locale is unmodified and the original bufLen is
1074 * returned.
1075 */
1076 if (!handledInputKeyAndValue || U_FAILURE(*status)) {
1077 /* if input key/value specified removal of a keyword not present in locale, or
1078 * there was an error in CharString.append, leave original locale alone. */
1079 return bufLen;
1080 }
1081
1082 updatedKeysAndValuesLen = updatedKeysAndValues.length();
1083 /* needLen = length of the part before '@' + length of updated key-value part including '@' */
1084 needLen = (int32_t)(startSearchHere - buffer) + updatedKeysAndValuesLen;
374ca955
A
1085 if(needLen >= bufferCapacity) {
1086 *status = U_BUFFER_OVERFLOW_ERROR;
1087 return needLen; /* no change */
1088 }
f3c0d7a5
A
1089 if (updatedKeysAndValuesLen > 0) {
1090 uprv_strncpy(startSearchHere, updatedKeysAndValues.data(), updatedKeysAndValuesLen);
374ca955
A
1091 }
1092 buffer[needLen]=0;
1093 return needLen;
1094}
b75a7d8f 1095
374ca955 1096/* ### ID parsing implementation **************************************************/
b75a7d8f 1097
b75a7d8f 1098#define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
374ca955 1099
b75a7d8f
A
1100/*returns TRUE if one of the special prefixes is here (s=string)
1101 'x-' or 'i-' */
1102#define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1103
1104/* Dot terminates it because of POSIX form where dot precedes the codepage
1105 * except for variant
1106 */
1107#define _isTerminator(a) ((a==0)||(a=='.')||(a=='@'))
1108
374ca955
A
1109/**
1110 * Lookup 'key' in the array 'list'. The array 'list' should contain
1111 * a NULL entry, followed by more entries, and a second NULL entry.
1112 *
1113 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1114 * COUNTRIES_3.
1115 */
b75a7d8f
A
1116static int16_t _findIndex(const char* const* list, const char* key)
1117{
1118 const char* const* anchor = list;
374ca955
A
1119 int32_t pass = 0;
1120
1121 /* Make two passes through two NULL-terminated arrays at 'list' */
1122 while (pass++ < 2) {
1123 while (*list) {
1124 if (uprv_strcmp(key, *list) == 0) {
1125 return (int16_t)(list - anchor);
1126 }
1127 list++;
b75a7d8f 1128 }
374ca955 1129 ++list; /* skip final NULL *CWB*/
b75a7d8f
A
1130 }
1131 return -1;
1132}
1133
1134/* count the length of src while copying it to dest; return strlen(src) */
4388f060 1135static inline int32_t
b75a7d8f
A
1136_copyCount(char *dest, int32_t destCapacity, const char *src) {
1137 const char *anchor;
1138 char c;
1139
1140 anchor=src;
1141 for(;;) {
1142 if((c=*src)==0) {
1143 return (int32_t)(src-anchor);
1144 }
1145 if(destCapacity<=0) {
1146 return (int32_t)((src-anchor)+uprv_strlen(src));
1147 }
1148 ++src;
1149 *dest++=c;
1150 --destCapacity;
1151 }
1152}
1153
f3c0d7a5 1154U_CFUNC const char*
73c04bcf
A
1155uloc_getCurrentCountryID(const char* oldID){
1156 int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
1157 if (offset >= 0) {
1158 return REPLACEMENT_COUNTRIES[offset];
1159 }
1160 return oldID;
1161}
f3c0d7a5 1162U_CFUNC const char*
73c04bcf
A
1163uloc_getCurrentLanguageID(const char* oldID){
1164 int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
1165 if (offset >= 0) {
1166 return REPLACEMENT_LANGUAGES[offset];
1167 }
f3c0d7a5 1168 return oldID;
73c04bcf 1169}
b75a7d8f
A
1170/*
1171 * the internal functions _getLanguage(), _getCountry(), _getVariant()
1172 * avoid duplicating code to handle the earlier locale ID pieces
1173 * in the functions for the later ones by
1174 * setting the *pEnd pointer to where they stopped parsing
1175 *
1176 * TODO try to use this in Locale
1177 */
729e4ab9
A
1178U_CFUNC int32_t
1179ulocimp_getLanguage(const char *localeID,
1180 char *language, int32_t languageCapacity,
1181 const char **pEnd) {
b75a7d8f
A
1182 int32_t i=0;
1183 int32_t offset;
1184 char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
1185
1186 /* if it starts with i- or x- then copy that prefix */
1187 if(_isIDPrefix(localeID)) {
1188 if(i<languageCapacity) {
1189 language[i]=(char)uprv_tolower(*localeID);
1190 }
1191 if(i<languageCapacity) {
1192 language[i+1]='-';
1193 }
1194 i+=2;
1195 localeID+=2;
1196 }
f3c0d7a5 1197
b75a7d8f
A
1198 /* copy the language as far as possible and count its length */
1199 while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
1200 if(i<languageCapacity) {
1201 language[i]=(char)uprv_tolower(*localeID);
1202 }
1203 if(i<3) {
4388f060 1204 U_ASSERT(i>=0);
b75a7d8f
A
1205 lang[i]=(char)uprv_tolower(*localeID);
1206 }
1207 i++;
1208 localeID++;
1209 }
1210
1211 if(i==3) {
1212 /* convert 3 character code to 2 character code if possible *CWB*/
374ca955 1213 offset=_findIndex(LANGUAGES_3, lang);
b75a7d8f 1214 if(offset>=0) {
374ca955 1215 i=_copyCount(language, languageCapacity, LANGUAGES[offset]);
b75a7d8f
A
1216 }
1217 }
1218
1219 if(pEnd!=NULL) {
1220 *pEnd=localeID;
1221 }
1222 return i;
1223}
1224
729e4ab9
A
1225U_CFUNC int32_t
1226ulocimp_getScript(const char *localeID,
1227 char *script, int32_t scriptCapacity,
1228 const char **pEnd)
b75a7d8f 1229{
374ca955 1230 int32_t idLen = 0;
b75a7d8f 1231
374ca955
A
1232 if (pEnd != NULL) {
1233 *pEnd = localeID;
b75a7d8f 1234 }
374ca955
A
1235
1236 /* copy the second item as far as possible and count its length */
4388f060
A
1237 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])
1238 && uprv_isASCIILetter(localeID[idLen])) {
374ca955 1239 idLen++;
b75a7d8f
A
1240 }
1241
374ca955
A
1242 /* If it's exactly 4 characters long, then it's a script and not a country. */
1243 if (idLen == 4) {
1244 int32_t i;
1245 if (pEnd != NULL) {
1246 *pEnd = localeID+idLen;
1247 }
1248 if(idLen > scriptCapacity) {
1249 idLen = scriptCapacity;
1250 }
1251 if (idLen >= 1) {
1252 script[0]=(char)uprv_toupper(*(localeID++));
1253 }
1254 for (i = 1; i < idLen; i++) {
1255 script[i]=(char)uprv_tolower(*(localeID++));
1256 }
1257 }
1258 else {
1259 idLen = 0;
1260 }
1261 return idLen;
b75a7d8f
A
1262}
1263
729e4ab9
A
1264U_CFUNC int32_t
1265ulocimp_getCountry(const char *localeID,
1266 char *country, int32_t countryCapacity,
1267 const char **pEnd)
374ca955 1268{
729e4ab9 1269 int32_t idLen=0;
374ca955 1270 char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 };
b75a7d8f
A
1271 int32_t offset;
1272
1273 /* copy the country as far as possible and count its length */
729e4ab9
A
1274 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
1275 if(idLen<(ULOC_COUNTRY_CAPACITY-1)) { /*CWB*/
1276 cnty[idLen]=(char)uprv_toupper(localeID[idLen]);
b75a7d8f 1277 }
729e4ab9 1278 idLen++;
b75a7d8f
A
1279 }
1280
729e4ab9
A
1281 /* the country should be either length 2 or 3 */
1282 if (idLen == 2 || idLen == 3) {
1283 UBool gotCountry = FALSE;
1284 /* convert 3 character code to 2 character code if possible *CWB*/
1285 if(idLen==3) {
1286 offset=_findIndex(COUNTRIES_3, cnty);
1287 if(offset>=0) {
1288 idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]);
1289 gotCountry = TRUE;
1290 }
1291 }
1292 if (!gotCountry) {
1293 int32_t i = 0;
1294 for (i = 0; i < idLen; i++) {
1295 if (i < countryCapacity) {
1296 country[i]=(char)uprv_toupper(localeID[i]);
1297 }
1298 }
b75a7d8f 1299 }
729e4ab9
A
1300 localeID+=idLen;
1301 } else {
1302 idLen = 0;
b75a7d8f
A
1303 }
1304
1305 if(pEnd!=NULL) {
1306 *pEnd=localeID;
1307 }
729e4ab9
A
1308
1309 return idLen;
b75a7d8f
A
1310}
1311
374ca955
A
1312/**
1313 * @param needSeparator if true, then add leading '_' if any variants
1314 * are added to 'variant'
1315 */
1316static int32_t
1317_getVariantEx(const char *localeID,
1318 char prev,
1319 char *variant, int32_t variantCapacity,
1320 UBool needSeparator) {
b75a7d8f
A
1321 int32_t i=0;
1322
1323 /* get one or more variant tags and separate them with '_' */
1324 if(_isIDSeparator(prev)) {
1325 /* get a variant string after a '-' or '_' */
1326 while(!_isTerminator(*localeID)) {
374ca955
A
1327 if (needSeparator) {
1328 if (i<variantCapacity) {
1329 variant[i] = '_';
1330 }
1331 ++i;
1332 needSeparator = FALSE;
1333 }
b75a7d8f
A
1334 if(i<variantCapacity) {
1335 variant[i]=(char)uprv_toupper(*localeID);
1336 if(variant[i]=='-') {
1337 variant[i]='_';
1338 }
1339 }
1340 i++;
1341 localeID++;
1342 }
1343 }
1344
1345 /* if there is no variant tag after a '-' or '_' then look for '@' */
1346 if(i==0) {
1347 if(prev=='@') {
1348 /* keep localeID */
374ca955 1349 } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
b75a7d8f
A
1350 ++localeID; /* point after the '@' */
1351 } else {
1352 return 0;
1353 }
1354 while(!_isTerminator(*localeID)) {
374ca955
A
1355 if (needSeparator) {
1356 if (i<variantCapacity) {
1357 variant[i] = '_';
1358 }
1359 ++i;
1360 needSeparator = FALSE;
1361 }
b75a7d8f
A
1362 if(i<variantCapacity) {
1363 variant[i]=(char)uprv_toupper(*localeID);
1364 if(variant[i]=='-' || variant[i]==',') {
1365 variant[i]='_';
1366 }
1367 }
1368 i++;
1369 localeID++;
1370 }
1371 }
f3c0d7a5 1372
b75a7d8f
A
1373 return i;
1374}
1375
374ca955
A
1376static int32_t
1377_getVariant(const char *localeID,
1378 char prev,
1379 char *variant, int32_t variantCapacity) {
1380 return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE);
1381}
1382
374ca955
A
1383/* Keyword enumeration */
1384
1385typedef struct UKeywordsContext {
1386 char* keywords;
1387 char* current;
1388} UKeywordsContext;
1389
f3c0d7a5
A
1390U_CDECL_BEGIN
1391
374ca955
A
1392static void U_CALLCONV
1393uloc_kw_closeKeywords(UEnumeration *enumerator) {
1394 uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
1395 uprv_free(enumerator->context);
1396 uprv_free(enumerator);
1397}
1398
1399static int32_t U_CALLCONV
4388f060 1400uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) {
374ca955
A
1401 char *kw = ((UKeywordsContext *)en->context)->keywords;
1402 int32_t result = 0;
1403 while(*kw) {
1404 result++;
1405 kw += uprv_strlen(kw)+1;
1406 }
1407 return result;
1408}
1409
f3c0d7a5 1410static const char * U_CALLCONV
374ca955
A
1411uloc_kw_nextKeyword(UEnumeration* en,
1412 int32_t* resultLength,
4388f060 1413 UErrorCode* /*status*/) {
374ca955
A
1414 const char* result = ((UKeywordsContext *)en->context)->current;
1415 int32_t len = 0;
1416 if(*result) {
73c04bcf 1417 len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
374ca955
A
1418 ((UKeywordsContext *)en->context)->current += len+1;
1419 } else {
1420 result = NULL;
1421 }
1422 if (resultLength) {
1423 *resultLength = len;
1424 }
1425 return result;
1426}
1427
f3c0d7a5
A
1428static void U_CALLCONV
1429uloc_kw_resetKeywords(UEnumeration* en,
4388f060 1430 UErrorCode* /*status*/) {
374ca955
A
1431 ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
1432}
1433
f3c0d7a5
A
1434U_CDECL_END
1435
1436
374ca955
A
1437static const UEnumeration gKeywordsEnum = {
1438 NULL,
1439 NULL,
1440 uloc_kw_closeKeywords,
1441 uloc_kw_countKeywords,
1442 uenum_unextDefault,
1443 uloc_kw_nextKeyword,
1444 uloc_kw_resetKeywords
1445};
1446
1447U_CAPI UEnumeration* U_EXPORT2
1448uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
b75a7d8f 1449{
46f4442e
A
1450 UKeywordsContext *myContext = NULL;
1451 UEnumeration *result = NULL;
b75a7d8f 1452
46f4442e
A
1453 if(U_FAILURE(*status)) {
1454 return NULL;
1455 }
1456 result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
1457 /* Null pointer test */
1458 if (result == NULL) {
1459 *status = U_MEMORY_ALLOCATION_ERROR;
1460 return NULL;
1461 }
1462 uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration));
51004dcb 1463 myContext = static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext)));
46f4442e
A
1464 if (myContext == NULL) {
1465 *status = U_MEMORY_ALLOCATION_ERROR;
1466 uprv_free(result);
1467 return NULL;
1468 }
1469 myContext->keywords = (char *)uprv_malloc(keywordListSize+1);
1470 uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
1471 myContext->keywords[keywordListSize] = 0;
1472 myContext->current = myContext->keywords;
1473 result->context = myContext;
1474 return result;
374ca955
A
1475}
1476
1477U_CAPI UEnumeration* U_EXPORT2
1478uloc_openKeywords(const char* localeID,
f3c0d7a5 1479 UErrorCode* status)
374ca955
A
1480{
1481 int32_t i=0;
1482 char keywords[256];
1483 int32_t keywordsCapacity = 256;
729e4ab9
A
1484 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1485 const char* tmpLocaleID;
1486
374ca955 1487 if(status==NULL || U_FAILURE(*status)) {
b75a7d8f
A
1488 return 0;
1489 }
f3c0d7a5 1490
729e4ab9
A
1491 if (_hasBCP47Extension(localeID)) {
1492 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
1493 } else {
1494 if (localeID==NULL) {
1495 localeID=uloc_getDefault();
1496 }
1497 tmpLocaleID=localeID;
b75a7d8f
A
1498 }
1499
374ca955 1500 /* Skip the language */
729e4ab9
A
1501 ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
1502 if(_isIDSeparator(*tmpLocaleID)) {
374ca955
A
1503 const char *scriptID;
1504 /* Skip the script if available */
729e4ab9
A
1505 ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
1506 if(scriptID != tmpLocaleID+1) {
374ca955 1507 /* Found optional script */
729e4ab9 1508 tmpLocaleID = scriptID;
374ca955
A
1509 }
1510 /* Skip the Country */
729e4ab9
A
1511 if (_isIDSeparator(*tmpLocaleID)) {
1512 ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &tmpLocaleID);
1513 if(_isIDSeparator(*tmpLocaleID)) {
1514 _getVariant(tmpLocaleID+1, *tmpLocaleID, NULL, 0);
374ca955 1515 }
b75a7d8f
A
1516 }
1517 }
1518
374ca955 1519 /* keywords are located after '@' */
729e4ab9
A
1520 if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {
1521 i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status);
374ca955
A
1522 }
1523
1524 if(i) {
1525 return uloc_openKeywordList(keywords, i, status);
1526 } else {
1527 return NULL;
b75a7d8f 1528 }
b75a7d8f
A
1529}
1530
b75a7d8f 1531
374ca955
A
1532/* bit-flags for 'options' parameter of _canonicalize */
1533#define _ULOC_STRIP_KEYWORDS 0x2
1534#define _ULOC_CANONICALIZE 0x1
1535
1536#define OPTION_SET(options, mask) ((options & mask) != 0)
1537
73c04bcf 1538static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
2ca993e8 1539#define I_DEFAULT_LENGTH UPRV_LENGTHOF(i_default)
73c04bcf 1540
374ca955
A
1541/**
1542 * Canonicalize the given localeID, to level 1 or to level 2,
1543 * depending on the options. To specify level 1, pass in options=0.
1544 * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1545 *
1546 * This is the code underlying uloc_getName and uloc_canonicalize.
1547 */
1548static int32_t
1549_canonicalize(const char* localeID,
1550 char* result,
1551 int32_t resultCapacity,
1552 uint32_t options,
1553 UErrorCode* err) {
1554 int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity;
1555 char localeBuffer[ULOC_FULLNAME_CAPACITY];
729e4ab9 1556 char tempBuffer[ULOC_FULLNAME_CAPACITY];
46f4442e 1557 const char* origLocaleID;
729e4ab9 1558 const char* tmpLocaleID;
374ca955
A
1559 const char* keywordAssign = NULL;
1560 const char* separatorIndicator = NULL;
374ca955
A
1561 char* name;
1562 char* variant = NULL; /* pointer into name, or NULL */
374ca955
A
1563
1564 if (U_FAILURE(*err)) {
b75a7d8f
A
1565 return 0;
1566 }
f3c0d7a5 1567
729e4ab9
A
1568 if (_hasBCP47Extension(localeID)) {
1569 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1570 } else {
1571 if (localeID==NULL) {
1572 localeID=uloc_getDefault();
1573 }
1574 tmpLocaleID=localeID;
b75a7d8f 1575 }
729e4ab9
A
1576
1577 origLocaleID=tmpLocaleID;
b75a7d8f 1578
374ca955
A
1579 /* if we are doing a full canonicalization, then put results in
1580 localeBuffer, if necessary; otherwise send them to result. */
729e4ab9 1581 if (/*OPTION_SET(options, _ULOC_CANONICALIZE) &&*/
4388f060 1582 (result == NULL || resultCapacity < (int32_t)sizeof(localeBuffer))) {
374ca955 1583 name = localeBuffer;
4388f060 1584 nameCapacity = (int32_t)sizeof(localeBuffer);
374ca955
A
1585 } else {
1586 name = result;
1587 nameCapacity = resultCapacity;
1588 }
1589
b75a7d8f 1590 /* get all pieces, one after another, and separate with '_' */
729e4ab9 1591 len=ulocimp_getLanguage(tmpLocaleID, name, nameCapacity, &tmpLocaleID);
73c04bcf
A
1592
1593 if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) {
1594 const char *d = uloc_getDefault();
f3c0d7a5 1595
729e4ab9 1596 len = (int32_t)uprv_strlen(d);
73c04bcf
A
1597
1598 if (name != NULL) {
3d1f044b 1599 uprv_memcpy(name, d, len);
73c04bcf 1600 }
729e4ab9 1601 } else if(_isIDSeparator(*tmpLocaleID)) {
374ca955
A
1602 const char *scriptID;
1603
b75a7d8f 1604 ++fieldCount;
374ca955
A
1605 if(len<nameCapacity) {
1606 name[len]='_';
b75a7d8f 1607 }
374ca955
A
1608 ++len;
1609
4388f060
A
1610 scriptSize=ulocimp_getScript(tmpLocaleID+1,
1611 (len<nameCapacity ? name+len : NULL), nameCapacity-len, &scriptID);
374ca955
A
1612 if(scriptSize > 0) {
1613 /* Found optional script */
729e4ab9 1614 tmpLocaleID = scriptID;
b75a7d8f 1615 ++fieldCount;
374ca955 1616 len+=scriptSize;
729e4ab9 1617 if (_isIDSeparator(*tmpLocaleID)) {
374ca955
A
1618 /* If there is something else, then we add the _ */
1619 if(len<nameCapacity) {
1620 name[len]='_';
1621 }
1622 ++len;
1623 }
1624 }
1625
729e4ab9
A
1626 if (_isIDSeparator(*tmpLocaleID)) {
1627 const char *cntryID;
4388f060
A
1628 int32_t cntrySize = ulocimp_getCountry(tmpLocaleID+1,
1629 (len<nameCapacity ? name+len : NULL), nameCapacity-len, &cntryID);
729e4ab9
A
1630 if (cntrySize > 0) {
1631 /* Found optional country */
1632 tmpLocaleID = cntryID;
1633 len+=cntrySize;
1634 }
1635 if(_isIDSeparator(*tmpLocaleID)) {
51004dcb
A
1636 /* If there is something else, then we add the _ if we found country before. */
1637 if (cntrySize >= 0 && ! _isIDSeparator(*(tmpLocaleID+1)) ) {
729e4ab9
A
1638 ++fieldCount;
1639 if(len<nameCapacity) {
1640 name[len]='_';
1641 }
1642 ++len;
374ca955 1643 }
729e4ab9 1644
4388f060
A
1645 variantSize = _getVariant(tmpLocaleID+1, *tmpLocaleID,
1646 (len<nameCapacity ? name+len : NULL), nameCapacity-len);
374ca955 1647 if (variantSize > 0) {
4388f060 1648 variant = len<nameCapacity ? name+len : NULL;
374ca955 1649 len += variantSize;
729e4ab9 1650 tmpLocaleID += variantSize + 1; /* skip '_' and variant */
374ca955 1651 }
b75a7d8f 1652 }
b75a7d8f
A
1653 }
1654 }
1655
374ca955 1656 /* Copy POSIX-style charset specifier, if any [mr.utf8] */
729e4ab9 1657 if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
374ca955 1658 UBool done = FALSE;
b75a7d8f 1659 do {
729e4ab9 1660 char c = *tmpLocaleID;
374ca955
A
1661 switch (c) {
1662 case 0:
1663 case '@':
1664 done = TRUE;
1665 break;
1666 default:
1667 if (len<nameCapacity) {
1668 name[len] = c;
1669 }
1670 ++len;
729e4ab9 1671 ++tmpLocaleID;
374ca955
A
1672 break;
1673 }
1674 } while (!done);
1675 }
1676
1677 /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
729e4ab9
A
1678 After this, tmpLocaleID either points to '@' or is NULL */
1679 if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {
1680 keywordAssign = uprv_strchr(tmpLocaleID, '=');
1681 separatorIndicator = uprv_strchr(tmpLocaleID, ';');
374ca955
A
1682 }
1683
1684 /* Copy POSIX-style variant, if any [mr@FOO] */
1685 if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
729e4ab9 1686 tmpLocaleID != NULL && keywordAssign == NULL) {
374ca955 1687 for (;;) {
729e4ab9 1688 char c = *tmpLocaleID;
374ca955
A
1689 if (c == 0) {
1690 break;
1691 }
1692 if (len<nameCapacity) {
1693 name[len] = c;
1694 }
1695 ++len;
729e4ab9 1696 ++tmpLocaleID;
374ca955
A
1697 }
1698 }
1699
1700 if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
1701 /* Handle @FOO variant if @ is present and not followed by = */
729e4ab9 1702 if (tmpLocaleID!=NULL && keywordAssign==NULL) {
374ca955
A
1703 int32_t posixVariantSize;
1704 /* Add missing '_' if needed */
1705 if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
1706 do {
1707 if(len<nameCapacity) {
1708 name[len]='_';
1709 }
1710 ++len;
1711 ++fieldCount;
1712 } while(fieldCount<2);
1713 }
729e4ab9 1714 posixVariantSize = _getVariantEx(tmpLocaleID+1, '@', name+len, nameCapacity-len,
374ca955
A
1715 (UBool)(variantSize > 0));
1716 if (posixVariantSize > 0) {
1717 if (variant == NULL) {
1718 variant = name+len;
1719 }
1720 len += posixVariantSize;
1721 variantSize += posixVariantSize;
b75a7d8f 1722 }
374ca955
A
1723 }
1724
374ca955 1725 /* Look up the ID in the canonicalization map */
2ca993e8 1726 for (j=0; j<UPRV_LENGTHOF(CANONICALIZE_MAP); j++) {
374ca955 1727 const char* id = CANONICALIZE_MAP[j].id;
73c04bcf 1728 int32_t n = (int32_t)uprv_strlen(id);
374ca955 1729 if (len == n && uprv_strncmp(name, id, n) == 0) {
729e4ab9 1730 if (n == 0 && tmpLocaleID != NULL) {
374ca955
A
1731 break; /* Don't remap "" if keywords present */
1732 }
1733 len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID);
374ca955
A
1734 break;
1735 }
1736 }
374ca955
A
1737 }
1738
1739 if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
729e4ab9 1740 if (tmpLocaleID!=NULL && keywordAssign!=NULL &&
374ca955
A
1741 (!separatorIndicator || separatorIndicator > keywordAssign)) {
1742 if(len<nameCapacity) {
1743 name[len]='@';
1744 }
1745 ++len;
b75a7d8f 1746 ++fieldCount;
4388f060 1747 len += _getKeywords(tmpLocaleID+1, '@', (len<nameCapacity ? name+len : NULL), nameCapacity-len,
3d1f044b 1748 NULL, 0, NULL, TRUE, err);
374ca955
A
1749 }
1750 }
1751
46f4442e 1752 if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) {
374ca955
A
1753 uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);
1754 }
1755
1756 return u_terminateChars(result, resultCapacity, len, err);
1757}
1758
1759/* ### ID parsing API **************************************************/
1760
1761U_CAPI int32_t U_EXPORT2
1762uloc_getParent(const char* localeID,
1763 char* parent,
1764 int32_t parentCapacity,
1765 UErrorCode* err)
1766{
1767 const char *lastUnderscore;
1768 int32_t i;
f3c0d7a5 1769
374ca955
A
1770 if (U_FAILURE(*err))
1771 return 0;
f3c0d7a5 1772
374ca955
A
1773 if (localeID == NULL)
1774 localeID = uloc_getDefault();
1775
1776 lastUnderscore=uprv_strrchr(localeID, '_');
1777 if(lastUnderscore!=NULL) {
1778 i=(int32_t)(lastUnderscore-localeID);
1779 } else {
1780 i=0;
b75a7d8f 1781 }
374ca955 1782
73c04bcf 1783 if(i>0 && parent != localeID) {
374ca955
A
1784 uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
1785 }
3d1f044b 1786
374ca955 1787 return u_terminateChars(parent, parentCapacity, i, err);
b75a7d8f 1788}
374ca955
A
1789
1790U_CAPI int32_t U_EXPORT2
1791uloc_getLanguage(const char* localeID,
1792 char* language,
1793 int32_t languageCapacity,
1794 UErrorCode* err)
1795{
1796 /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
1797 int32_t i=0;
1798
1799 if (err==NULL || U_FAILURE(*err)) {
1800 return 0;
1801 }
f3c0d7a5 1802
374ca955
A
1803 if(localeID==NULL) {
1804 localeID=uloc_getDefault();
1805 }
1806
729e4ab9 1807 i=ulocimp_getLanguage(localeID, language, languageCapacity, NULL);
374ca955
A
1808 return u_terminateChars(language, languageCapacity, i, err);
1809}
1810
1811U_CAPI int32_t U_EXPORT2
1812uloc_getScript(const char* localeID,
1813 char* script,
1814 int32_t scriptCapacity,
1815 UErrorCode* err)
1816{
1817 int32_t i=0;
1818
1819 if(err==NULL || U_FAILURE(*err)) {
1820 return 0;
1821 }
1822
1823 if(localeID==NULL) {
1824 localeID=uloc_getDefault();
1825 }
1826
1827 /* skip the language */
729e4ab9 1828 ulocimp_getLanguage(localeID, NULL, 0, &localeID);
374ca955 1829 if(_isIDSeparator(*localeID)) {
729e4ab9 1830 i=ulocimp_getScript(localeID+1, script, scriptCapacity, NULL);
374ca955
A
1831 }
1832 return u_terminateChars(script, scriptCapacity, i, err);
1833}
1834
1835U_CAPI int32_t U_EXPORT2
1836uloc_getCountry(const char* localeID,
1837 char* country,
1838 int32_t countryCapacity,
f3c0d7a5 1839 UErrorCode* err)
374ca955
A
1840{
1841 int32_t i=0;
1842
1843 if(err==NULL || U_FAILURE(*err)) {
1844 return 0;
1845 }
1846
1847 if(localeID==NULL) {
1848 localeID=uloc_getDefault();
1849 }
1850
1851 /* Skip the language */
729e4ab9 1852 ulocimp_getLanguage(localeID, NULL, 0, &localeID);
374ca955
A
1853 if(_isIDSeparator(*localeID)) {
1854 const char *scriptID;
1855 /* Skip the script if available */
729e4ab9 1856 ulocimp_getScript(localeID+1, NULL, 0, &scriptID);
374ca955
A
1857 if(scriptID != localeID+1) {
1858 /* Found optional script */
1859 localeID = scriptID;
1860 }
1861 if(_isIDSeparator(*localeID)) {
729e4ab9 1862 i=ulocimp_getCountry(localeID+1, country, countryCapacity, NULL);
374ca955
A
1863 }
1864 }
1865 return u_terminateChars(country, countryCapacity, i, err);
1866}
1867
1868U_CAPI int32_t U_EXPORT2
1869uloc_getVariant(const char* localeID,
1870 char* variant,
1871 int32_t variantCapacity,
f3c0d7a5 1872 UErrorCode* err)
374ca955 1873{
729e4ab9
A
1874 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1875 const char* tmpLocaleID;
374ca955 1876 int32_t i=0;
f3c0d7a5 1877
374ca955
A
1878 if(err==NULL || U_FAILURE(*err)) {
1879 return 0;
1880 }
f3c0d7a5 1881
729e4ab9
A
1882 if (_hasBCP47Extension(localeID)) {
1883 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1884 } else {
1885 if (localeID==NULL) {
1886 localeID=uloc_getDefault();
1887 }
1888 tmpLocaleID=localeID;
374ca955 1889 }
f3c0d7a5 1890
374ca955 1891 /* Skip the language */
729e4ab9
A
1892 ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
1893 if(_isIDSeparator(*tmpLocaleID)) {
374ca955
A
1894 const char *scriptID;
1895 /* Skip the script if available */
729e4ab9
A
1896 ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
1897 if(scriptID != tmpLocaleID+1) {
374ca955 1898 /* Found optional script */
729e4ab9 1899 tmpLocaleID = scriptID;
374ca955
A
1900 }
1901 /* Skip the Country */
729e4ab9
A
1902 if (_isIDSeparator(*tmpLocaleID)) {
1903 const char *cntryID;
1904 ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &cntryID);
1905 if (cntryID != tmpLocaleID+1) {
1906 /* Found optional country */
1907 tmpLocaleID = cntryID;
1908 }
1909 if(_isIDSeparator(*tmpLocaleID)) {
1910 /* If there was no country ID, skip a possible extra IDSeparator */
1911 if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {
1912 tmpLocaleID++;
1913 }
1914 i=_getVariant(tmpLocaleID+1, *tmpLocaleID, variant, variantCapacity);
374ca955
A
1915 }
1916 }
1917 }
f3c0d7a5 1918
374ca955
A
1919 /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
1920 /* if we do not have a variant tag yet then try a POSIX variant after '@' */
1921/*
1922 if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
1923 i=_getVariant(localeID+1, '@', variant, variantCapacity);
1924 }
1925*/
1926 return u_terminateChars(variant, variantCapacity, i, err);
1927}
1928
1929U_CAPI int32_t U_EXPORT2
1930uloc_getName(const char* localeID,
1931 char* name,
1932 int32_t nameCapacity,
f3c0d7a5 1933 UErrorCode* err)
374ca955
A
1934{
1935 return _canonicalize(localeID, name, nameCapacity, 0, err);
1936}
1937
1938U_CAPI int32_t U_EXPORT2
1939uloc_getBaseName(const char* localeID,
1940 char* name,
1941 int32_t nameCapacity,
f3c0d7a5 1942 UErrorCode* err)
374ca955
A
1943{
1944 return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err);
1945}
1946
1947U_CAPI int32_t U_EXPORT2
1948uloc_canonicalize(const char* localeID,
1949 char* name,
1950 int32_t nameCapacity,
f3c0d7a5 1951 UErrorCode* err)
374ca955
A
1952{
1953 return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err);
1954}
f3c0d7a5 1955
b75a7d8f 1956U_CAPI const char* U_EXPORT2
f3c0d7a5 1957uloc_getISO3Language(const char* localeID)
b75a7d8f 1958{
374ca955
A
1959 int16_t offset;
1960 char lang[ULOC_LANG_CAPACITY];
1961 UErrorCode err = U_ZERO_ERROR;
f3c0d7a5 1962
374ca955
A
1963 if (localeID == NULL)
1964 {
1965 localeID = uloc_getDefault();
1966 }
1967 uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
1968 if (U_FAILURE(err))
1969 return "";
1970 offset = _findIndex(LANGUAGES, lang);
1971 if (offset < 0)
1972 return "";
1973 return LANGUAGES_3[offset];
b75a7d8f
A
1974}
1975
1976U_CAPI const char* U_EXPORT2
f3c0d7a5 1977uloc_getISO3Country(const char* localeID)
b75a7d8f
A
1978{
1979 int16_t offset;
374ca955 1980 char cntry[ULOC_LANG_CAPACITY];
b75a7d8f 1981 UErrorCode err = U_ZERO_ERROR;
f3c0d7a5 1982
b75a7d8f
A
1983 if (localeID == NULL)
1984 {
1985 localeID = uloc_getDefault();
1986 }
374ca955 1987 uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
b75a7d8f
A
1988 if (U_FAILURE(err))
1989 return "";
374ca955 1990 offset = _findIndex(COUNTRIES, cntry);
b75a7d8f
A
1991 if (offset < 0)
1992 return "";
f3c0d7a5 1993
374ca955 1994 return COUNTRIES_3[offset];
b75a7d8f
A
1995}
1996
1997U_CAPI uint32_t U_EXPORT2
f3c0d7a5 1998uloc_getLCID(const char* localeID)
b75a7d8f 1999{
374ca955
A
2000 UErrorCode status = U_ZERO_ERROR;
2001 char langID[ULOC_FULLNAME_CAPACITY];
f3c0d7a5
A
2002 uint32_t lcid = 0;
2003
2004 /* Check for incomplete id. */
2005 if (!localeID || uprv_strlen(localeID) < 2) {
2006 return 0;
2007 }
2008
3d1f044b
A
2009 // First, attempt Windows platform lookup if available, but fall
2010 // through to catch any special cases (ICU vs Windows name differences).
2011 lcid = uprv_convertToLCIDPlatform(localeID, &status);
2012 if (U_FAILURE(status)) {
2013 return 0;
2014 }
2015 if (lcid > 0) {
f3c0d7a5
A
2016 // Windows found an LCID, return that
2017 return lcid;
2018 }
374ca955
A
2019
2020 uloc_getLanguage(localeID, langID, sizeof(langID), &status);
3d1f044b 2021 if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) {
374ca955 2022 return 0;
b75a7d8f 2023 }
374ca955 2024
57a6839d
A
2025 if (uprv_strchr(localeID, '@')) {
2026 // uprv_convertToLCID does not support keywords other than collation.
2027 // Remove all keywords except collation.
2028 int32_t len;
2029 char collVal[ULOC_KEYWORDS_CAPACITY];
2030 char tmpLocaleID[ULOC_FULLNAME_CAPACITY];
2031
2032 len = uloc_getKeywordValue(localeID, "collation", collVal,
2ca993e8 2033 UPRV_LENGTHOF(collVal) - 1, &status);
57a6839d
A
2034
2035 if (U_SUCCESS(status) && len > 0) {
2036 collVal[len] = 0;
2037
2038 len = uloc_getBaseName(localeID, tmpLocaleID,
2ca993e8 2039 UPRV_LENGTHOF(tmpLocaleID) - 1, &status);
57a6839d 2040
2ca993e8 2041 if (U_SUCCESS(status) && len > 0) {
57a6839d
A
2042 tmpLocaleID[len] = 0;
2043
2044 len = uloc_setKeywordValue("collation", collVal, tmpLocaleID,
2ca993e8 2045 UPRV_LENGTHOF(tmpLocaleID) - len - 1, &status);
57a6839d 2046
2ca993e8 2047 if (U_SUCCESS(status) && len > 0) {
57a6839d
A
2048 tmpLocaleID[len] = 0;
2049 return uprv_convertToLCID(langID, tmpLocaleID, &status);
2050 }
2051 }
2052 }
2053
2054 // fall through - all keywords are simply ignored
2055 status = U_ZERO_ERROR;
2056 }
2057
374ca955
A
2058 return uprv_convertToLCID(langID, localeID, &status);
2059}
2060
73c04bcf
A
2061U_CAPI int32_t U_EXPORT2
2062uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
2063 UErrorCode *status)
2064{
57a6839d 2065 return uprv_convertToPosix(hostid, locale, localeCapacity, status);
73c04bcf
A
2066}
2067
374ca955
A
2068/* ### Default locale **************************************************/
2069
2070U_CAPI const char* U_EXPORT2
2071uloc_getDefault()
2072{
2073 return locale_get_default();
2074}
2075
2076U_CAPI void U_EXPORT2
2077uloc_setDefault(const char* newDefaultLocale,
f3c0d7a5 2078 UErrorCode* err)
374ca955
A
2079{
2080 if (U_FAILURE(*err))
2081 return;
2082 /* the error code isn't currently used for anything by this function*/
f3c0d7a5 2083
374ca955
A
2084 /* propagate change to C++ */
2085 locale_set_default(newDefaultLocale);
b75a7d8f
A
2086}
2087
729e4ab9 2088/**
51004dcb 2089 * Returns a list of all 2-letter language codes defined in ISO 639. This is a pointer
729e4ab9
A
2090 * to an array of pointers to arrays of char. All of these pointers are owned
2091 * by ICU-- do not delete them, and do not write through them. The array is
2092 * terminated with a null pointer.
2093 */
2094U_CAPI const char* const* U_EXPORT2
f3c0d7a5 2095uloc_getISOLanguages()
729e4ab9
A
2096{
2097 return LANGUAGES;
2098}
374ca955 2099
729e4ab9
A
2100/**
2101 * Returns a list of all 2-letter country codes defined in ISO 639. This is a
2102 * pointer to an array of pointers to arrays of char. All of these pointers are
2103 * owned by ICU-- do not delete them, and do not write through them. The array is
2104 * terminated with a null pointer.
b75a7d8f 2105 */
729e4ab9 2106U_CAPI const char* const* U_EXPORT2
f3c0d7a5 2107uloc_getISOCountries()
b75a7d8f 2108{
729e4ab9
A
2109 return COUNTRIES;
2110}
73c04bcf 2111
b75a7d8f 2112
729e4ab9
A
2113/* this function to be moved into cstring.c later */
2114static char gDecimal = 0;
b75a7d8f 2115
729e4ab9
A
2116static /* U_CAPI */
2117double
2118/* U_EXPORT2 */
2119_uloc_strtod(const char *start, char **end) {
2120 char *decimal;
2121 char *myEnd;
2122 char buf[30];
2123 double rv;
2124 if (!gDecimal) {
2125 char rep[5];
2126 /* For machines that decide to change the decimal on you,
2127 and try to be too smart with localization.
2128 This normally should be just a '.'. */
2129 sprintf(rep, "%+1.1f", 1.0);
2130 gDecimal = rep[2];
b75a7d8f 2131 }
b75a7d8f 2132
729e4ab9
A
2133 if(gDecimal == '.') {
2134 return uprv_strtod(start, end); /* fall through to OS */
b75a7d8f 2135 } else {
729e4ab9
A
2136 uprv_strncpy(buf, start, 29);
2137 buf[29]=0;
2138 decimal = uprv_strchr(buf, '.');
2139 if(decimal) {
2140 *decimal = gDecimal;
46f4442e 2141 } else {
729e4ab9 2142 return uprv_strtod(start, end); /* no decimal point */
46f4442e 2143 }
729e4ab9
A
2144 rv = uprv_strtod(buf, &myEnd);
2145 if(end) {
2146 *end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */
b75a7d8f 2147 }
729e4ab9 2148 return rv;
374ca955 2149 }
374ca955
A
2150}
2151
f3c0d7a5 2152typedef struct {
729e4ab9
A
2153 float q;
2154 int32_t dummy; /* to avoid uninitialized memory copy from qsort */
f3c0d7a5 2155 char locale[ULOC_FULLNAME_CAPACITY+1];
729e4ab9 2156} _acceptLangItem;
b75a7d8f 2157
729e4ab9 2158static int32_t U_CALLCONV
4388f060 2159uloc_acceptLanguageCompare(const void * /*context*/, const void *a, const void *b)
729e4ab9
A
2160{
2161 const _acceptLangItem *aa = (const _acceptLangItem*)a;
2162 const _acceptLangItem *bb = (const _acceptLangItem*)b;
b75a7d8f 2163
729e4ab9
A
2164 int32_t rc = 0;
2165 if(bb->q < aa->q) {
2166 rc = -1; /* A > B */
2167 } else if(bb->q > aa->q) {
2168 rc = 1; /* A < B */
2169 } else {
2170 rc = 0; /* A = B */
b75a7d8f
A
2171 }
2172
729e4ab9
A
2173 if(rc==0) {
2174 rc = uprv_stricmp(aa->locale, bb->locale);
b75a7d8f
A
2175 }
2176
729e4ab9 2177#if defined(ULOC_DEBUG)
f3c0d7a5
A
2178 /* fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
2179 aa->locale, aa->q,
729e4ab9
A
2180 bb->locale, bb->q,
2181 rc);*/
2182#endif
374ca955 2183
729e4ab9 2184 return rc;
374ca955
A
2185}
2186
f3c0d7a5 2187/*
729e4ab9
A
2188mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
2189*/
374ca955 2190
b75a7d8f 2191U_CAPI int32_t U_EXPORT2
729e4ab9
A
2192uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult *outResult,
2193 const char *httpAcceptLanguage,
2194 UEnumeration* availableLocales,
2195 UErrorCode *status)
374ca955 2196{
f3c0d7a5 2197 MaybeStackArray<_acceptLangItem, 4> items; // Struct for collecting items.
729e4ab9
A
2198 char tmp[ULOC_FULLNAME_CAPACITY +1];
2199 int32_t n = 0;
2200 const char *itemEnd;
2201 const char *paramEnd;
2202 const char *s;
2203 const char *t;
2204 int32_t res;
2205 int32_t i;
2206 int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);
b75a7d8f 2207
729e4ab9
A
2208 if(U_FAILURE(*status)) {
2209 return -1;
b75a7d8f
A
2210 }
2211
729e4ab9
A
2212 for(s=httpAcceptLanguage;s&&*s;) {
2213 while(isspace(*s)) /* eat space at the beginning */
2214 s++;
2215 itemEnd=uprv_strchr(s,',');
2216 paramEnd=uprv_strchr(s,';');
2217 if(!itemEnd) {
2218 itemEnd = httpAcceptLanguage+l; /* end of string */
b75a7d8f 2219 }
f3c0d7a5 2220 if(paramEnd && paramEnd<itemEnd) {
729e4ab9
A
2221 /* semicolon (;) is closer than end (,) */
2222 t = paramEnd+1;
2223 if(*t=='q') {
2224 t++;
2225 }
2226 while(isspace(*t)) {
2227 t++;
2228 }
2229 if(*t=='=') {
2230 t++;
2231 }
2232 while(isspace(*t)) {
2233 t++;
2234 }
f3c0d7a5 2235 items[n].q = (float)_uloc_strtod(t,NULL);
729e4ab9
A
2236 } else {
2237 /* no semicolon - it's 1.0 */
f3c0d7a5 2238 items[n].q = 1.0f;
729e4ab9 2239 paramEnd = itemEnd;
374ca955 2240 }
f3c0d7a5 2241 items[n].dummy=0;
374ca955
A
2242 /* eat spaces prior to semi */
2243 for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)
2244 ;
3d1f044b 2245 int32_t slen = static_cast<int32_t>(((t+1)-s));
f3c0d7a5
A
2246 if(slen > ULOC_FULLNAME_CAPACITY) {
2247 *status = U_BUFFER_OVERFLOW_ERROR;
2248 return -1; // too big
2249 }
2250 uprv_strncpy(items[n].locale, s, slen);
2251 items[n].locale[slen]=0; // terminate
2252 int32_t clen = uloc_canonicalize(items[n].locale, tmp, UPRV_LENGTHOF(tmp)-1, status);
2253 if(U_FAILURE(*status)) return -1;
2254 if((clen!=slen) || (uprv_strncmp(items[n].locale, tmp, slen))) {
2255 // canonicalization had an effect- copy back
2256 uprv_strncpy(items[n].locale, tmp, clen);
2257 items[n].locale[clen] = 0; // terminate
374ca955
A
2258 }
2259#if defined(ULOC_DEBUG)
2260 /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
2261#endif
2262 n++;
2263 s = itemEnd;
2264 while(*s==',') { /* eat duplicate commas */
2265 s++;
2266 }
f3c0d7a5
A
2267 if(n>=items.getCapacity()) { // If we need more items
2268 if(NULL == items.resize(items.getCapacity()*2, items.getCapacity())) {
2269 *status = U_MEMORY_ALLOCATION_ERROR;
2270 return -1;
2271 }
374ca955 2272#if defined(ULOC_DEBUG)
f3c0d7a5 2273 fprintf(stderr,"malloced at size %d\n", items.getCapacity());
374ca955 2274#endif
374ca955
A
2275 }
2276 }
f3c0d7a5
A
2277 uprv_sortArray(items.getAlias(), n, sizeof(items[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
2278 if (U_FAILURE(*status)) {
46f4442e 2279 return -1;
374ca955 2280 }
f3c0d7a5
A
2281 LocalMemory<const char*> strs(NULL);
2282 if (strs.allocateInsteadAndReset(n) == NULL) {
46f4442e
A
2283 *status = U_MEMORY_ALLOCATION_ERROR;
2284 return -1;
2285 }
374ca955
A
2286 for(i=0;i<n;i++) {
2287#if defined(ULOC_DEBUG)
2288 /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
2289#endif
f3c0d7a5 2290 strs[i]=items[i].locale;
374ca955 2291 }
f3c0d7a5
A
2292 res = uloc_acceptLanguage(result, resultAvailable, outResult,
2293 strs.getAlias(), n, availableLocales, status);
374ca955
A
2294 return res;
2295}
2296
2297
2298U_CAPI int32_t U_EXPORT2
f3c0d7a5 2299uloc_acceptLanguage(char *result, int32_t resultAvailable,
374ca955
A
2300 UAcceptResult *outResult, const char **acceptList,
2301 int32_t acceptListCount,
2302 UEnumeration* availableLocales,
2303 UErrorCode *status)
2304{
2305 int32_t i,j;
2306 int32_t len;
2307 int32_t maxLen=0;
2308 char tmp[ULOC_FULLNAME_CAPACITY+1];
2309 const char *l;
2310 char **fallbackList;
2311 if(U_FAILURE(*status)) {
2312 return -1;
2313 }
51004dcb 2314 fallbackList = static_cast<char **>(uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount)));
374ca955 2315 if(fallbackList==NULL) {
46f4442e
A
2316 *status = U_MEMORY_ALLOCATION_ERROR;
2317 return -1;
374ca955
A
2318 }
2319 for(i=0;i<acceptListCount;i++) {
2320#if defined(ULOC_DEBUG)
2321 fprintf(stderr,"%02d: %s\n", i, acceptList[i]);
2322#endif
0f5d89e8 2323 while((l=uenum_next(availableLocales, NULL, status)) != NULL) {
374ca955
A
2324#if defined(ULOC_DEBUG)
2325 fprintf(stderr," %s\n", l);
2326#endif
73c04bcf 2327 len = (int32_t)uprv_strlen(l);
374ca955 2328 if(!uprv_strcmp(acceptList[i], l)) {
f3c0d7a5 2329 if(outResult) {
374ca955
A
2330 *outResult = ULOC_ACCEPT_VALID;
2331 }
2332#if defined(ULOC_DEBUG)
2333 fprintf(stderr, "MATCH! %s\n", l);
2334#endif
2335 if(len>0) {
2336 uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2337 }
2338 for(j=0;j<i;j++) {
2339 uprv_free(fallbackList[j]);
2340 }
2341 uprv_free(fallbackList);
f3c0d7a5 2342 return u_terminateChars(result, resultAvailable, len, status);
374ca955
A
2343 }
2344 if(len>maxLen) {
2345 maxLen = len;
2346 }
2347 }
f3c0d7a5 2348 uenum_reset(availableLocales, status);
374ca955 2349 /* save off parent info */
2ca993e8 2350 if(uloc_getParent(acceptList[i], tmp, UPRV_LENGTHOF(tmp), status)!=0) {
374ca955
A
2351 fallbackList[i] = uprv_strdup(tmp);
2352 } else {
2353 fallbackList[i]=0;
2354 }
2355 }
2356
2357 for(maxLen--;maxLen>0;maxLen--) {
2358 for(i=0;i<acceptListCount;i++) {
2359 if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) {
2360#if defined(ULOC_DEBUG)
2361 fprintf(stderr,"Try: [%s]", fallbackList[i]);
2362#endif
0f5d89e8 2363 while((l=uenum_next(availableLocales, NULL, status)) != NULL) {
374ca955
A
2364#if defined(ULOC_DEBUG)
2365 fprintf(stderr," %s\n", l);
2366#endif
73c04bcf 2367 len = (int32_t)uprv_strlen(l);
374ca955 2368 if(!uprv_strcmp(fallbackList[i], l)) {
f3c0d7a5 2369 if(outResult) {
374ca955
A
2370 *outResult = ULOC_ACCEPT_FALLBACK;
2371 }
2372#if defined(ULOC_DEBUG)
2373 fprintf(stderr, "fallback MATCH! %s\n", l);
2374#endif
2375 if(len>0) {
2376 uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2377 }
73c04bcf
A
2378 for(j=0;j<acceptListCount;j++) {
2379 uprv_free(fallbackList[j]);
374ca955
A
2380 }
2381 uprv_free(fallbackList);
73c04bcf 2382 return u_terminateChars(result, resultAvailable, len, status);
374ca955
A
2383 }
2384 }
f3c0d7a5 2385 uenum_reset(availableLocales, status);
374ca955 2386
2ca993e8 2387 if(uloc_getParent(fallbackList[i], tmp, UPRV_LENGTHOF(tmp), status)!=0) {
374ca955
A
2388 uprv_free(fallbackList[i]);
2389 fallbackList[i] = uprv_strdup(tmp);
2390 } else {
2391 uprv_free(fallbackList[i]);
2392 fallbackList[i]=0;
2393 }
2394 }
2395 }
f3c0d7a5 2396 if(outResult) {
374ca955
A
2397 *outResult = ULOC_ACCEPT_FAILED;
2398 }
2399 }
2400 for(i=0;i<acceptListCount;i++) {
2401 uprv_free(fallbackList[i]);
2402 }
2403 uprv_free(fallbackList);
2404 return -1;
b75a7d8f 2405}
374ca955 2406
b331163b
A
2407U_CAPI const char* U_EXPORT2
2408uloc_toUnicodeLocaleKey(const char* keyword)
2409{
2410 const char* bcpKey = ulocimp_toBcpKey(keyword);
2411 if (bcpKey == NULL && ultag_isUnicodeLocaleKey(keyword, -1)) {
2412 // unknown keyword, but syntax is fine..
2413 return keyword;
2414 }
2415 return bcpKey;
2416}
2417
2418U_CAPI const char* U_EXPORT2
2419uloc_toUnicodeLocaleType(const char* keyword, const char* value)
2420{
2421 const char* bcpType = ulocimp_toBcpType(keyword, value, NULL, NULL);
2422 if (bcpType == NULL && ultag_isUnicodeLocaleType(value, -1)) {
2423 // unknown keyword, but syntax is fine..
2424 return value;
2425 }
2426 return bcpType;
2427}
2428
b331163b
A
2429static UBool
2430isWellFormedLegacyKey(const char* legacyKey)
2431{
2432 const char* p = legacyKey;
2433 while (*p) {
2434 if (!UPRV_ISALPHANUM(*p)) {
2435 return FALSE;
2436 }
2437 p++;
2438 }
2439 return TRUE;
2440}
2441
2442static UBool
2443isWellFormedLegacyType(const char* legacyType)
2444{
2445 const char* p = legacyType;
2446 int32_t alphaNumLen = 0;
2447 while (*p) {
2448 if (*p == '_' || *p == '/' || *p == '-') {
2449 if (alphaNumLen == 0) {
2450 return FALSE;
2451 }
2452 alphaNumLen = 0;
2453 } else if (UPRV_ISALPHANUM(*p)) {
2454 alphaNumLen++;
2455 } else {
2456 return FALSE;
2457 }
2458 p++;
2459 }
2460 return (alphaNumLen != 0);
2461}
2462
2463U_CAPI const char* U_EXPORT2
2464uloc_toLegacyKey(const char* keyword)
2465{
2466 const char* legacyKey = ulocimp_toLegacyKey(keyword);
2467 if (legacyKey == NULL) {
2468 // Checks if the specified locale key is well-formed with the legacy locale syntax.
2469 //
2470 // Note:
f3c0d7a5
A
2471 // LDML/CLDR provides some definition of keyword syntax in
2472 // * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
2473 // * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
2474 // Keys can only consist of [0-9a-zA-Z].
b331163b
A
2475 if (isWellFormedLegacyKey(keyword)) {
2476 return keyword;
2477 }
2478 }
2479 return legacyKey;
2480}
2481
2482U_CAPI const char* U_EXPORT2
2483uloc_toLegacyType(const char* keyword, const char* value)
2484{
2485 const char* legacyType = ulocimp_toLegacyType(keyword, value, NULL, NULL);
2486 if (legacyType == NULL) {
2487 // Checks if the specified locale type is well-formed with the legacy locale syntax.
2488 //
2489 // Note:
f3c0d7a5
A
2490 // LDML/CLDR provides some definition of keyword syntax in
2491 // * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
2492 // * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
2493 // Values (types) can only consist of [0-9a-zA-Z], plus for legacy values
2494 // we allow [/_-+] in the middle (e.g. "Etc/GMT+1", "Asia/Tel_Aviv")
b331163b
A
2495 if (isWellFormedLegacyType(value)) {
2496 return value;
2497 }
2498 }
2499 return legacyType;
2500}
2501
374ca955 2502/*eof*/