]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/uloc.cpp
ICU-551.51.4.tar.gz
[apple/icu.git] / icuSources / common / uloc.cpp
CommitLineData
b75a7d8f
A
1/*
2**********************************************************************
b331163b 3* Copyright (C) 1997-2014, International Business Machines
b75a7d8f
A
4* Corporation and others. All Rights Reserved.
5**********************************************************************
6*
7* File ULOC.CPP
8*
9* Modification History:
10*
11* Date Name Description
12* 04/01/97 aliu Creation.
13* 08/21/98 stephen JDK 1.2 sync
14* 12/08/98 rtg New Locale implementation and C API
15* 03/15/99 damiba overhaul.
16* 04/06/99 stephen changed setDefault() to realloc and copy
17* 06/14/99 stephen Changed calls to ures_open for new params
18* 07/21/99 stephen Modified setDefault() to propagate to C++
374ca955
A
19* 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs,
20* brought canonicalization code into line with spec
b75a7d8f
A
21*****************************************************************************/
22
23/*
24 POSIX's locale format, from putil.c: [no spaces]
25
26 ll [ _CC ] [ . MM ] [ @ VV]
27
28 l = lang, C = ctry, M = charmap, V = variant
29*/
30
b75a7d8f
A
31#include "unicode/utypes.h"
32#include "unicode/ustring.h"
33#include "unicode/uloc.h"
34
374ca955 35#include "putilimp.h"
b75a7d8f 36#include "ustr_imp.h"
374ca955 37#include "ulocimp.h"
b75a7d8f
A
38#include "umutex.h"
39#include "cstring.h"
40#include "cmemory.h"
374ca955
A
41#include "locmap.h"
42#include "uarrsort.h"
43#include "uenumimp.h"
44#include "uassert.h"
b75a7d8f 45
374ca955
A
46#include <stdio.h> /* for sprintf */
47
48/* ### Declarations **************************************************/
b75a7d8f
A
49
50/* Locale stuff from locid.cpp */
51U_CFUNC void locale_set_default(const char *id);
52U_CFUNC const char *locale_get_default(void);
374ca955
A
53U_CFUNC int32_t
54locale_getKeywords(const char *localeID,
55 char prev,
56 char *keywords, int32_t keywordCapacity,
57 char *values, int32_t valuesCapacity, int32_t *valLen,
58 UBool valuesToo,
59 UErrorCode *status);
60
374ca955
A
61/* ### Data tables **************************************************/
62
63/**
64 * Table of language codes, both 2- and 3-letter, with preference
65 * given to 2-letter codes where possible. Includes 3-letter codes
66 * that lack a 2-letter equivalent.
67 *
68 * This list must be in sorted order. This list is returned directly
69 * to the user by some API.
70 *
71 * This list must be kept in sync with LANGUAGES_3, with corresponding
72 * entries matched.
73 *
74 * This table should be terminated with a NULL entry, followed by a
75 * second list, and another NULL entry. The first list is visible to
76 * user code when this array is returned by API. The second list
77 * contains codes we support, but do not expose through user API.
78 *
79 * Notes
80 *
81 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
82 * include the revisions up to 2001/7/27 *CWB*
83 *
84 * The 3 character codes are the terminology codes like RFC 3066. This
85 * is compatible with prior ICU codes
86 *
87 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
88 * table but now at the end of the table because 3 character codes are
89 * duplicates. This avoids bad searches going from 3 to 2 character
90 * codes.
91 *
92 * The range qaa-qtz is reserved for local use
93 */
51004dcb 94/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
57a6839d 95/* ISO639 table version is 20130531 */
374ca955 96static const char * const LANGUAGES[] = {
51004dcb
A
97 "aa", "ab", "ace", "ach", "ada", "ady", "ae", "af",
98 "afa", "afh", "agq", "ain", "ak", "akk", "ale", "alg",
99 "alt", "am", "an", "ang", "anp", "apa", "ar", "arc",
100 "arn", "arp", "art", "arw", "as", "asa", "ast", "ath",
101 "aus", "av", "awa", "ay", "az",
102 "ba", "bad", "bai", "bal", "ban", "bas", "bat", "bax",
103 "bbj", "be", "bej", "bem", "ber", "bez", "bfd", "bg",
104 "bh", "bho", "bi", "bik", "bin", "bkm", "bla", "bm",
105 "bn", "bnt", "bo", "br", "bra", "brx", "bs", "bss",
106 "btk", "bua", "bug", "bum", "byn", "byv",
107 "ca", "cad", "cai", "car", "cau", "cay", "cch", "ce",
108 "ceb", "cel", "cgg", "ch", "chb", "chg", "chk", "chm",
109 "chn", "cho", "chp", "chr", "chy", "ckb", "cmc", "co",
110 "cop", "cpe", "cpf", "cpp", "cr", "crh", "crp", "cs",
111 "csb", "cu", "cus", "cv", "cy",
112 "da", "dak", "dar", "dav", "day", "de", "del", "den",
113 "dgr", "din", "dje", "doi", "dra", "dsb", "dua", "dum",
114 "dv", "dyo", "dyu", "dz", "dzg",
115 "ebu", "ee", "efi", "egy", "eka", "el", "elx", "en",
116 "enm", "eo", "es", "et", "eu", "ewo",
117 "fa", "fan", "fat", "ff", "fi", "fil", "fiu", "fj",
118 "fo", "fon", "fr", "frm", "fro", "frr", "frs", "fur",
119 "fy",
120 "ga", "gaa", "gay", "gba", "gd", "gem", "gez", "gil",
73c04bcf 121 "gl", "gmh", "gn", "goh", "gon", "gor", "got", "grb",
51004dcb
A
122 "grc", "gsw", "gu", "guz", "gv", "gwi",
123 "ha", "hai", "haw", "he", "hi", "hil", "him", "hit",
124 "hmn", "ho", "hr", "hsb", "ht", "hu", "hup", "hy",
125 "hz",
126 "ia", "iba", "ibb", "id", "ie", "ig", "ii", "ijo",
127 "ik", "ilo", "inc", "ine", "inh", "io", "ira", "iro",
128 "is", "it", "iu",
129 "ja", "jbo", "jgo", "jmc", "jpr", "jrb", "jv",
130 "ka", "kaa", "kab", "kac", "kaj", "kam", "kar", "kaw",
131 "kbd", "kbl", "kcg", "kde", "kea", "kfo", "kg", "kha",
132 "khi", "kho", "khq", "ki", "kj", "kk", "kkj", "kl",
133 "kln", "km", "kmb", "kn", "ko", "kok", "kos", "kpe",
134 "kr", "krc", "krl", "kro", "kru", "ks", "ksb", "ksf",
135 "ksh", "ku", "kum", "kut", "kv", "kw", "ky",
136 "la", "lad", "lag", "lah", "lam", "lb", "lez", "lg",
137 "li", "lkt", "ln", "lo", "lol", "loz", "lt", "lu",
138 "lua", "lui", "lun", "luo", "lus", "luy", "lv",
139 "mad", "maf", "mag", "mai", "mak", "man", "map", "mas",
140 "mde", "mdf", "mdr", "men", "mer", "mfe", "mg", "mga",
141 "mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk",
142 "mkh", "ml", "mn", "mnc", "mni", "mno", "mo", "moh",
143 "mos", "mr", "ms", "mt", "mua", "mul", "mun", "mus",
144 "mwl", "mwr", "my", "mye", "myn", "myv",
145 "na", "nah", "nai", "nap", "naq", "nb", "nd", "nds",
146 "ne", "new", "ng", "nia", "nic", "niu", "nl", "nmg",
147 "nn", "nnh", "no", "nog", "non", "nqo", "nr", "nso",
148 "nub", "nus", "nv", "nwc", "ny", "nym", "nyn", "nyo",
149 "nzi",
150 "oc", "oj", "om", "or", "os", "osa", "ota", "oto",
151 "pa", "paa", "pag", "pal", "pam", "pap", "pau", "peo",
152 "phi", "phn", "pi", "pl", "pon", "pra", "pro", "ps",
153 "pt",
154 "qu",
155 "raj", "rap", "rar", "rm", "rn", "ro", "roa", "rof",
156 "rom", "ru", "rup", "rw", "rwk",
157 "sa", "sad", "sah", "sai", "sal", "sam", "saq", "sas",
158 "sat", "sba", "sbp", "sc", "scn", "sco", "sd", "se",
159 "see", "seh", "sel", "sem", "ses", "sg", "sga", "sgn",
160 "shi", "shn", "shu", "si", "sid", "sio", "sit",
161 "sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn",
162 "sms", "sn", "snk", "so", "sog", "son", "sq", "sr",
163 "srn", "srr", "ss", "ssa", "ssy", "st", "su", "suk",
164 "sus", "sux", "sv", "sw", "swb", "swc", "syc", "syr",
165 "ta", "tai", "te", "tem", "teo", "ter", "tet", "tg",
166 "th", "ti", "tig", "tiv", "tk", "tkl", "tl", "tlh",
167 "tli", "tmh", "tn", "to", "tog", "tpi", "tr", "trv",
168 "ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw",
169 "twq", "ty", "tyv", "tzm",
170 "udm", "ug", "uga", "uk", "umb", "und", "ur", "uz",
171 "vai", "ve", "vi", "vo", "vot", "vun",
172 "wa", "wae", "wak", "wal", "war", "was", "wen", "wo",
173 "xal", "xh", "xog",
174 "yao", "yap", "yav", "ybb", "yi", "yo", "ypk", "yue",
57a6839d
A
175 "za", "zap", "zbl", "zen", "zgh", "zh", "znd", "zu",
176 "zun", "zxx", "zza",
b75a7d8f
A
177NULL,
178 "in", "iw", "ji", "jw", "sh", /* obsolete language codes */
179NULL
180};
51004dcb 181
73c04bcf
A
182static const char* const DEPRECATED_LANGUAGES[]={
183 "in", "iw", "ji", "jw", NULL, NULL
184};
185static const char* const REPLACEMENT_LANGUAGES[]={
186 "id", "he", "yi", "jv", NULL, NULL
187};
b75a7d8f 188
374ca955
A
189/**
190 * Table of 3-letter language codes.
191 *
192 * This is a lookup table used to convert 3-letter language codes to
193 * their 2-letter equivalent, where possible. It must be kept in sync
194 * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the
195 * same language as LANGUAGES_3[i]. The commented-out lines are
196 * copied from LANGUAGES to make eyeballing this baby easier.
197 *
198 * Where a 3-letter language code has no 2-letter equivalent, the
199 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
200 *
201 * This table should be terminated with a NULL entry, followed by a
202 * second list, and another NULL entry. The two lists correspond to
203 * the two lists in LANGUAGES.
204 */
51004dcb 205/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
57a6839d 206/* ISO639 table version is 20130531 */
374ca955 207static const char * const LANGUAGES_3[] = {
51004dcb
A
208 "aar", "abk", "ace", "ach", "ada", "ady", "ave", "afr",
209 "afa", "afh", "agq", "ain", "aka", "akk", "ale", "alg",
210 "alt", "amh", "arg", "ang", "anp", "apa", "ara", "arc",
211 "arn", "arp", "art", "arw", "asm", "asa", "ast", "ath",
212 "aus", "ava", "awa", "aym", "aze",
213 "bak", "bad", "bai", "bal", "ban", "bas", "bat", "bax",
214 "bbj", "bel", "bej", "bem", "ber", "bez", "bfd", "bul",
215 "bih", "bho", "bis", "bik", "bin", "bkm", "bla", "bam",
216 "ben", "bnt", "bod", "bre", "bra", "brx", "bos", "bss",
217 "btk", "bua", "bug", "bum", "byn", "byv",
218 "cat", "cad", "cai", "car", "cau", "cay", "cch", "che",
219 "ceb", "cel", "cgg", "cha", "chb", "chg", "chk", "chm",
220 "chn", "cho", "chp", "chr", "chy", "ckb", "cmc", "cos",
221 "cop", "cpe", "cpf", "cpp", "cre", "crh", "crp", "ces",
222 "csb", "chu", "cus", "chv", "cym",
223 "dan", "dak", "dar", "dav", "day", "deu", "del", "den",
224 "dgr", "din", "dje", "doi", "dra", "dsb", "dua", "dum",
225 "div", "dyo", "dyu", "dzo", "dzg",
226 "ebu", "ewe", "efi", "egy", "eka", "ell", "elx", "eng",
227 "enm", "epo", "spa", "est", "eus", "ewo",
228 "fas", "fan", "fat", "ful", "fin", "fil", "fiu", "fij",
229 "fao", "fon", "fra", "frm", "fro", "frr", "frs", "fur",
230 "fry",
231 "gle", "gaa", "gay", "gba", "gla", "gem", "gez", "gil",
232 "glg", "gmh", "grn", "goh", "gon", "gor", "got", "grb",
233 "grc", "gsw", "guj", "guz", "glv", "gwi",
234 "hau", "hai", "haw", "heb", "hin", "hil", "him", "hit",
235 "hmn", "hmo", "hrv", "hsb", "hat", "hun", "hup", "hye",
236 "her",
237 "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ijo",
238 "ipk", "ilo", "inc", "ine", "inh", "ido", "ira", "iro",
239 "isl", "ita", "iku",
240 "jpn", "jbo", "jgo", "jmc", "jpr", "jrb", "jav",
241 "kat", "kaa", "kab", "kac", "kaj", "kam", "kar", "kaw",
242 "kbd", "kbl", "kcg", "kde", "kea", "kfo", "kon", "kha",
243 "khi", "kho", "khq", "kik", "kua", "kaz", "kkj", "kal",
244 "kln", "khm", "kmb", "kan", "kor", "kok", "kos", "kpe",
245 "kau", "krc", "krl", "kro", "kru", "kas", "ksb", "ksf",
246 "ksh", "kur", "kum", "kut", "kom", "cor", "kir",
247 "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lug",
248 "lim", "lkt", "lin", "lao", "lol", "loz", "lit", "lub",
249 "lua", "lui", "lun", "luo", "lus", "luy", "lav",
250 "mad", "maf", "mag", "mai", "mak", "man", "map", "mas",
251 "mde", "mdf", "mdr", "men", "mer", "mfe", "mlg", "mga",
252 "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
253 "mkh", "mal", "mon", "mnc", "mni", "mno", "mol", "moh",
254 "mos", "mar", "msa", "mlt", "mua", "mul", "mun", "mus",
255 "mwl", "mwr", "mya", "mye", "myn", "myv",
256 "nau", "nah", "nai", "nap", "naq", "nob", "nde", "nds",
257 "nep", "new", "ndo", "nia", "nic", "niu", "nld", "nmg",
258 "nno", "nnh", "nor", "nog", "non", "nqo", "nbl", "nso",
259 "nub", "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo",
260 "nzi",
261 "oci", "oji", "orm", "ori", "oss", "osa", "ota", "oto",
262 "pan", "paa", "pag", "pal", "pam", "pap", "pau", "peo",
263 "phi", "phn", "pli", "pol", "pon", "pra", "pro", "pus",
264 "por",
265 "que",
266 "raj", "rap", "rar", "roh", "run", "ron", "roa", "rof",
267 "rom", "rus", "rup", "kin", "rwk",
268 "san", "sad", "sah", "sai", "sal", "sam", "saq", "sas",
269 "sat", "sba", "sbp", "srd", "scn", "sco", "snd", "sme",
270 "see", "seh", "sel", "sem", "ses", "sag", "sga", "sgn",
271 "shi", "shn", "shu", "sin", "sid", "sio", "sit",
272 "slk", "slv", "sla", "smo", "sma", "smi", "smj", "smn",
273 "sms", "sna", "snk", "som", "sog", "son", "sqi", "srp",
274 "srn", "srr", "ssw", "ssa", "ssy", "sot", "sun", "suk",
275 "sus", "sux", "swe", "swa", "swb", "swc", "syc", "syr",
276 "tam", "tai", "tel", "tem", "teo", "ter", "tet", "tgk",
277 "tha", "tir", "tig", "tiv", "tuk", "tkl", "tgl", "tlh",
278 "tli", "tmh", "tsn", "ton", "tog", "tpi", "tur", "trv",
279 "tso", "tsi", "tat", "tum", "tup", "tut", "tvl", "twi",
280 "twq", "tah", "tyv", "tzm",
281 "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
282 "vai", "ven", "vie", "vol", "vot", "vun",
283 "wln", "wae", "wak", "wal", "war", "was", "wen", "wol",
284 "xal", "xho", "xog",
285 "yao", "yap", "yav", "ybb", "yid", "yor", "ypk", "yue",
57a6839d
A
286 "zha", "zap", "zbl", "zen", "zgh", "zho", "znd", "zul",
287 "zun", "zxx", "zza",
b75a7d8f
A
288NULL,
289/* "in", "iw", "ji", "jw", "sh", */
290 "ind", "heb", "yid", "jaw", "srp",
291NULL
292};
293
374ca955
A
294/**
295 * Table of 2-letter country codes.
296 *
297 * This list must be in sorted order. This list is returned directly
298 * to the user by some API.
299 *
300 * This list must be kept in sync with COUNTRIES_3, with corresponding
301 * entries matched.
302 *
303 * This table should be terminated with a NULL entry, followed by a
304 * second list, and another NULL entry. The first list is visible to
305 * user code when this array is returned by API. The second list
306 * contains codes we support, but do not expose through user API.
307 *
308 * Notes:
309 *
310 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
311 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
312 * new codes keeping the old ones for compatibility updated to include
313 * 1999/12/03 revisions *CWB*
314 *
315 * RO(ROM) is now RO(ROU) according to
316 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
317 */
318static const char * const COUNTRIES[] = {
51004dcb 319 "AD", "AE", "AF", "AG", "AI", "AL", "AM",
73c04bcf 320 "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",
b75a7d8f 321 "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",
51004dcb 322 "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV",
b75a7d8f
A
323 "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
324 "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR",
51004dcb 325 "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK",
b75a7d8f
A
326 "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER",
327 "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
73c04bcf 328 "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
b75a7d8f
A
329 "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
330 "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
73c04bcf
A
331 "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
332 "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
b75a7d8f
A
333 "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
334 "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
46f4442e 335 "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK",
b75a7d8f
A
336 "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",
337 "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
338 "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",
339 "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",
340 "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
46f4442e 341 "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA",
b75a7d8f 342 "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",
51004dcb
A
343 "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV",
344 "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ",
b75a7d8f
A
345 "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
346 "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
347 "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
46f4442e 348 "WS", "YE", "YT", "ZA", "ZM", "ZW",
b75a7d8f 349NULL,
51004dcb 350 "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR", /* obsolete country codes */
b75a7d8f
A
351NULL
352};
353
51004dcb
A
354static const char* const DEPRECATED_COUNTRIES[] = {
355 "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */
73c04bcf
A
356};
357static const char* const REPLACEMENT_COUNTRIES[] = {
51004dcb
A
358/* "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
359 "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL /* replacement country codes */
73c04bcf
A
360};
361
374ca955
A
362/**
363 * Table of 3-letter country codes.
364 *
365 * This is a lookup table used to convert 3-letter country codes to
366 * their 2-letter equivalent. It must be kept in sync with COUNTRIES.
367 * For all valid i, COUNTRIES[i] must refer to the same country as
368 * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES
369 * to make eyeballing this baby easier.
370 *
371 * This table should be terminated with a NULL entry, followed by a
372 * second list, and another NULL entry. The two lists correspond to
373 * the two lists in COUNTRIES.
374 */
375static const char * const COUNTRIES_3[] = {
51004dcb
A
376/* "AD", "AE", "AF", "AG", "AI", "AL", "AM", */
377 "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
73c04bcf
A
378/* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */
379 "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
b75a7d8f
A
380/* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */
381 "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
51004dcb
A
382/* "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", */
383 "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",
b75a7d8f
A
384/* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */
385 "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
386/* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */
387 "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
51004dcb
A
388/* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK", */
389 "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
b75a7d8f
A
390/* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */
391 "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
392/* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
393 "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
46f4442e 394/* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
73c04bcf 395 "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
b75a7d8f
A
396/* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */
397 "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
398/* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
399 "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
46f4442e
A
400/* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
401 "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
402/* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
73c04bcf 403 "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
b75a7d8f
A
404/* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
405 "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
406/* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */
407 "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
46f4442e
A
408/* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */
409 "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
b75a7d8f
A
410/* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */
411 "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
412/* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */
413 "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
414/* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */
415 "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
416/* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */
417 "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
418/* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */
419 "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
46f4442e
A
420/* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */
421 "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
b75a7d8f
A
422/* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */
423 "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
51004dcb
A
424/* "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", */
425 "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",
426/* "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", */
427 "SXM", "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
b75a7d8f
A
428/* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */
429 "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
430/* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */
431 "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
432/* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
433 "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
46f4442e
A
434/* "WS", "YE", "YT", "ZA", "ZM", "ZW", */
435 "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
b75a7d8f 436NULL,
51004dcb
A
437/* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */
438 "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
b75a7d8f
A
439NULL
440};
441
374ca955
A
442typedef struct CanonicalizationMap {
443 const char *id; /* input ID */
444 const char *canonicalID; /* canonicalized output ID */
445 const char *keyword; /* keyword, or NULL if none */
446 const char *value; /* keyword value, or NULL if kw==NULL */
447} CanonicalizationMap;
448
449/**
450 * A map to canonicalize locale IDs. This handles a variety of
451 * different semantic kinds of transformations.
452 */
453static const CanonicalizationMap CANONICALIZE_MAP[] = {
454 { "", "en_US_POSIX", NULL, NULL }, /* .NET name */
729e4ab9 455 { "c", "en_US_POSIX", NULL, NULL }, /* POSIX name */
73c04bcf 456 { "posix", "en_US_POSIX", NULL, NULL }, /* POSIX name (alias of C) */
374ca955
A
457 { "art_LOJBAN", "jbo", NULL, NULL }, /* registered name */
458 { "az_AZ_CYRL", "az_Cyrl_AZ", NULL, NULL }, /* .NET name */
459 { "az_AZ_LATN", "az_Latn_AZ", NULL, NULL }, /* .NET name */
460 { "ca_ES_PREEURO", "ca_ES", "currency", "ESP" },
46f4442e 461 { "de__PHONEBOOK", "de", "collation", "phonebook" }, /* Old ICU name */
374ca955
A
462 { "de_AT_PREEURO", "de_AT", "currency", "ATS" },
463 { "de_DE_PREEURO", "de_DE", "currency", "DEM" },
464 { "de_LU_PREEURO", "de_LU", "currency", "LUF" },
465 { "el_GR_PREEURO", "el_GR", "currency", "GRD" },
374ca955
A
466 { "en_BE_PREEURO", "en_BE", "currency", "BEF" },
467 { "en_IE_PREEURO", "en_IE", "currency", "IEP" },
46f4442e 468 { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */
374ca955
A
469 { "es_ES_PREEURO", "es_ES", "currency", "ESP" },
470 { "eu_ES_PREEURO", "eu_ES", "currency", "ESP" },
471 { "fi_FI_PREEURO", "fi_FI", "currency", "FIM" },
472 { "fr_BE_PREEURO", "fr_BE", "currency", "BEF" },
473 { "fr_FR_PREEURO", "fr_FR", "currency", "FRF" },
474 { "fr_LU_PREEURO", "fr_LU", "currency", "LUF" },
475 { "ga_IE_PREEURO", "ga_IE", "currency", "IEP" },
476 { "gl_ES_PREEURO", "gl_ES", "currency", "ESP" },
46f4442e 477 { "hi__DIRECT", "hi", "collation", "direct" }, /* Old ICU name */
374ca955 478 { "it_IT_PREEURO", "it_IT", "currency", "ITL" },
46f4442e 479 { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */
374ca955
A
480 { "nb_NO_NY", "nn_NO", NULL, NULL }, /* "markus said this was ok" :-) */
481 { "nl_BE_PREEURO", "nl_BE", "currency", "BEF" },
482 { "nl_NL_PREEURO", "nl_NL", "currency", "NLG" },
483 { "pt_PT_PREEURO", "pt_PT", "currency", "PTE" },
46f4442e
A
484 { "sr_SP_CYRL", "sr_Cyrl_RS", NULL, NULL }, /* .NET name */
485 { "sr_SP_LATN", "sr_Latn_RS", NULL, NULL }, /* .NET name */
486 { "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL, NULL }, /* Linux name */
487 { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */
73c04bcf 488 { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL, NULL }, /* Linux name */
374ca955
A
489 { "uz_UZ_CYRL", "uz_Cyrl_UZ", NULL, NULL }, /* .NET name */
490 { "uz_UZ_LATN", "uz_Latn_UZ", NULL, NULL }, /* .NET name */
491 { "zh_CHS", "zh_Hans", NULL, NULL }, /* .NET name */
46f4442e 492 { "zh_CHT", "zh_Hant", NULL, NULL }, /* .NET name */
4388f060 493 { "zh_GAN", "gan", NULL, NULL }, /* registered name */
374ca955 494 { "zh_GUOYU", "zh", NULL, NULL }, /* registered name */
4388f060
A
495 { "zh_HAKKA", "hak", NULL, NULL }, /* registered name */
496 { "zh_MIN_NAN", "nan", NULL, NULL }, /* registered name */
497 { "zh_WUU", "wuu", NULL, NULL }, /* registered name */
498 { "zh_XIANG", "hsn", NULL, NULL }, /* registered name */
499 { "zh_YUE", "yue", NULL, NULL }, /* registered name */
46f4442e
A
500};
501
502typedef struct VariantMap {
503 const char *variant; /* input ID */
504 const char *keyword; /* keyword, or NULL if none */
505 const char *value; /* keyword value, or NULL if kw==NULL */
506} VariantMap;
507
508static const VariantMap VARIANT_MAP[] = {
509 { "EURO", "currency", "EUR" },
510 { "PINYIN", "collation", "pinyin" }, /* Solaris variant */
511 { "STROKE", "collation", "stroke" } /* Solaris variant */
374ca955
A
512};
513
729e4ab9
A
514/* ### BCP47 Conversion *******************************************/
515/* Test if the locale id has BCP47 u extension and does not have '@' */
516#define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
517/* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
518#define _ConvertBCP47(finalID, id, buffer, length,err) \
519 if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || U_FAILURE(*err)) { \
520 finalID=id; \
521 } else { \
522 finalID=buffer; \
523 }
524/* Gets the size of the shortest subtag in the given localeID. */
525static int32_t getShortestSubtagLength(const char *localeID) {
526 int32_t localeIDLength = uprv_strlen(localeID);
527 int32_t length = localeIDLength;
528 int32_t tmpLength = 0;
529 int32_t i;
530 UBool reset = TRUE;
531
532 for (i = 0; i < localeIDLength; i++) {
533 if (localeID[i] != '_' && localeID[i] != '-') {
534 if (reset) {
535 tmpLength = 0;
536 reset = FALSE;
537 }
538 tmpLength++;
539 } else {
540 if (tmpLength != 0 && tmpLength < length) {
541 length = tmpLength;
542 }
543 reset = TRUE;
544 }
545 }
546
547 return length;
548}
549
374ca955
A
550/* ### Keywords **************************************************/
551
552#define ULOC_KEYWORD_BUFFER_LEN 25
553#define ULOC_MAX_NO_KEYWORDS 25
554
729e4ab9 555U_CAPI const char * U_EXPORT2
374ca955 556locale_getKeywordsStart(const char *localeID) {
374ca955 557 const char *result = NULL;
374ca955
A
558 if((result = uprv_strchr(localeID, '@')) != NULL) {
559 return result;
73c04bcf
A
560 }
561#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
562 else {
563 /* We do this because the @ sign is variant, and the @ sign used on one
564 EBCDIC machine won't be compiled the same way on other EBCDIC based
565 machines. */
566 static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
374ca955
A
567 const uint8_t *charToFind = ebcdicSigns;
568 while(*charToFind) {
569 if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
570 return result;
571 }
572 charToFind++;
573 }
574 }
73c04bcf 575#endif
374ca955
A
576 return NULL;
577}
578
579/**
580 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
581 * @param keywordName incoming name to be canonicalized
582 * @param status return status (keyword too long)
583 * @return length of the keyword name
584 */
585static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
586{
587 int32_t i;
73c04bcf 588 int32_t keywordNameLen = (int32_t)uprv_strlen(keywordName);
374ca955
A
589
590 if(keywordNameLen >= ULOC_KEYWORD_BUFFER_LEN) {
591 /* keyword name too long for internal buffer */
592 *status = U_INTERNAL_PROGRAM_ERROR;
593 return 0;
594 }
595
596 /* normalize the keyword name */
597 for(i = 0; i < keywordNameLen; i++) {
598 buf[i] = uprv_tolower(keywordName[i]);
599 }
600 buf[i] = 0;
601
602 return keywordNameLen;
603}
604
605typedef struct {
606 char keyword[ULOC_KEYWORD_BUFFER_LEN];
607 int32_t keywordLen;
608 const char *valueStart;
609 int32_t valueLen;
610} KeywordStruct;
611
612static int32_t U_CALLCONV
4388f060 613compareKeywordStructs(const void * /*context*/, const void *left, const void *right) {
374ca955
A
614 const char* leftString = ((const KeywordStruct *)left)->keyword;
615 const char* rightString = ((const KeywordStruct *)right)->keyword;
616 return uprv_strcmp(leftString, rightString);
617}
618
619/**
620 * Both addKeyword and addValue must already be in canonical form.
621 * Either both addKeyword and addValue are NULL, or neither is NULL.
622 * If they are not NULL they must be zero terminated.
623 * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword.
624 */
625static int32_t
626_getKeywords(const char *localeID,
627 char prev,
628 char *keywords, int32_t keywordCapacity,
629 char *values, int32_t valuesCapacity, int32_t *valLen,
630 UBool valuesToo,
631 const char* addKeyword,
632 const char* addValue,
633 UErrorCode *status)
634{
635 KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
636
637 int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
638 int32_t numKeywords = 0;
639 const char* pos = localeID;
640 const char* equalSign = NULL;
641 const char* semicolon = NULL;
642 int32_t i = 0, j, n;
643 int32_t keywordsLen = 0;
644 int32_t valuesLen = 0;
645
646 if(prev == '@') { /* start of keyword definition */
647 /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
648 do {
649 UBool duplicate = FALSE;
650 /* skip leading spaces */
651 while(*pos == ' ') {
652 pos++;
653 }
654 if (!*pos) { /* handle trailing "; " */
655 break;
656 }
657 if(numKeywords == maxKeywords) {
658 *status = U_INTERNAL_PROGRAM_ERROR;
659 return 0;
660 }
661 equalSign = uprv_strchr(pos, '=');
662 semicolon = uprv_strchr(pos, ';');
663 /* lack of '=' [foo@currency] is illegal */
664 /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
665 if(!equalSign || (semicolon && semicolon<equalSign)) {
666 *status = U_INVALID_FORMAT_ERROR;
667 return 0;
668 }
669 /* need to normalize both keyword and keyword name */
670 if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
671 /* keyword name too long for internal buffer */
672 *status = U_INTERNAL_PROGRAM_ERROR;
673 return 0;
674 }
675 for(i = 0, n = 0; i < equalSign - pos; ++i) {
676 if (pos[i] != ' ') {
677 keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
678 }
679 }
57a6839d
A
680
681 /* zero-length keyword is an error. */
682 if (n == 0) {
683 *status = U_INVALID_FORMAT_ERROR;
684 return 0;
685 }
686
374ca955
A
687 keywordList[numKeywords].keyword[n] = 0;
688 keywordList[numKeywords].keywordLen = n;
689 /* now grab the value part. First we skip the '=' */
690 equalSign++;
691 /* then we leading spaces */
692 while(*equalSign == ' ') {
693 equalSign++;
694 }
57a6839d
A
695
696 /* Premature end or zero-length value */
697 if (!equalSign || equalSign == semicolon) {
698 *status = U_INVALID_FORMAT_ERROR;
699 return 0;
700 }
701
374ca955 702 keywordList[numKeywords].valueStart = equalSign;
57a6839d 703
374ca955
A
704 pos = semicolon;
705 i = 0;
706 if(pos) {
707 while(*(pos - i - 1) == ' ') {
708 i++;
709 }
73c04bcf 710 keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
374ca955
A
711 pos++;
712 } else {
73c04bcf 713 i = (int32_t)uprv_strlen(equalSign);
4388f060 714 while(i && equalSign[i-1] == ' ') {
374ca955
A
715 i--;
716 }
717 keywordList[numKeywords].valueLen = i;
718 }
719 /* If this is a duplicate keyword, then ignore it */
720 for (j=0; j<numKeywords; ++j) {
721 if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
722 duplicate = TRUE;
723 break;
724 }
725 }
726 if (!duplicate) {
727 ++numKeywords;
728 }
729 } while(pos);
730
731 /* Handle addKeyword/addValue. */
732 if (addKeyword != NULL) {
733 UBool duplicate = FALSE;
734 U_ASSERT(addValue != NULL);
735 /* Search for duplicate; if found, do nothing. Explicit keyword
736 overrides addKeyword. */
737 for (j=0; j<numKeywords; ++j) {
738 if (uprv_strcmp(keywordList[j].keyword, addKeyword) == 0) {
739 duplicate = TRUE;
740 break;
741 }
742 }
743 if (!duplicate) {
744 if (numKeywords == maxKeywords) {
745 *status = U_INTERNAL_PROGRAM_ERROR;
746 return 0;
747 }
748 uprv_strcpy(keywordList[numKeywords].keyword, addKeyword);
73c04bcf 749 keywordList[numKeywords].keywordLen = (int32_t)uprv_strlen(addKeyword);
374ca955 750 keywordList[numKeywords].valueStart = addValue;
73c04bcf 751 keywordList[numKeywords].valueLen = (int32_t)uprv_strlen(addValue);
374ca955
A
752 ++numKeywords;
753 }
754 } else {
755 U_ASSERT(addValue == NULL);
756 }
757
758 /* now we have a list of keywords */
759 /* we need to sort it */
760 uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
761
762 /* Now construct the keyword part */
763 for(i = 0; i < numKeywords; i++) {
764 if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) {
765 uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword);
766 if(valuesToo) {
767 keywords[keywordsLen + keywordList[i].keywordLen] = '=';
768 } else {
769 keywords[keywordsLen + keywordList[i].keywordLen] = 0;
770 }
771 }
772 keywordsLen += keywordList[i].keywordLen + 1;
773 if(valuesToo) {
774 if(keywordsLen + keywordList[i].valueLen < keywordCapacity) {
775 uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
776 }
777 keywordsLen += keywordList[i].valueLen;
778
779 if(i < numKeywords - 1) {
780 if(keywordsLen < keywordCapacity) {
781 keywords[keywordsLen] = ';';
782 }
783 keywordsLen++;
784 }
785 }
786 if(values) {
787 if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) {
788 uprv_strcpy(values+valuesLen, keywordList[i].valueStart);
789 values[valuesLen + keywordList[i].valueLen] = 0;
790 }
791 valuesLen += keywordList[i].valueLen + 1;
792 }
793 }
794 if(values) {
795 values[valuesLen] = 0;
796 if(valLen) {
797 *valLen = valuesLen;
798 }
799 }
800 return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);
801 } else {
802 return 0;
803 }
804}
805
806U_CFUNC int32_t
807locale_getKeywords(const char *localeID,
808 char prev,
809 char *keywords, int32_t keywordCapacity,
810 char *values, int32_t valuesCapacity, int32_t *valLen,
811 UBool valuesToo,
812 UErrorCode *status) {
813 return _getKeywords(localeID, prev, keywords, keywordCapacity,
814 values, valuesCapacity, valLen, valuesToo,
815 NULL, NULL, status);
816}
817
818U_CAPI int32_t U_EXPORT2
819uloc_getKeywordValue(const char* localeID,
820 const char* keywordName,
821 char* buffer, int32_t bufferCapacity,
822 UErrorCode* status)
823{
729e4ab9 824 const char* startSearchHere = NULL;
374ca955 825 const char* nextSeparator = NULL;
374ca955
A
826 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
827 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
828 int32_t i = 0;
829 int32_t result = 0;
830
831 if(status && U_SUCCESS(*status) && localeID) {
729e4ab9
A
832 char tempBuffer[ULOC_FULLNAME_CAPACITY];
833 const char* tmpLocaleID;
834
835 if (_hasBCP47Extension(localeID)) {
836 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
837 } else {
838 tmpLocaleID=localeID;
839 }
374ca955 840
729e4ab9 841 startSearchHere = uprv_strchr(tmpLocaleID, '@'); /* TODO: REVISIT: shouldn't this be locale_getKeywordsStart ? */
374ca955
A
842 if(startSearchHere == NULL) {
843 /* no keywords, return at once */
844 return 0;
845 }
846
73c04bcf 847 locale_canonKeywordName(keywordNameBuffer, keywordName, status);
374ca955
A
848 if(U_FAILURE(*status)) {
849 return 0;
850 }
851
852 /* find the first keyword */
853 while(startSearchHere) {
854 startSearchHere++;
855 /* skip leading spaces (allowed?) */
856 while(*startSearchHere == ' ') {
857 startSearchHere++;
858 }
859 nextSeparator = uprv_strchr(startSearchHere, '=');
860 /* need to normalize both keyword and keyword name */
861 if(!nextSeparator) {
862 break;
863 }
864 if(nextSeparator - startSearchHere >= ULOC_KEYWORD_BUFFER_LEN) {
865 /* keyword name too long for internal buffer */
866 *status = U_INTERNAL_PROGRAM_ERROR;
867 return 0;
868 }
869 for(i = 0; i < nextSeparator - startSearchHere; i++) {
870 localeKeywordNameBuffer[i] = uprv_tolower(startSearchHere[i]);
871 }
872 /* trim trailing spaces */
873 while(startSearchHere[i-1] == ' ') {
874 i--;
4388f060 875 U_ASSERT(i>=0);
374ca955
A
876 }
877 localeKeywordNameBuffer[i] = 0;
878
879 startSearchHere = uprv_strchr(nextSeparator, ';');
880
881 if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
882 nextSeparator++;
883 while(*nextSeparator == ' ') {
884 nextSeparator++;
885 }
886 /* we actually found the keyword. Copy the value */
887 if(startSearchHere && startSearchHere - nextSeparator < bufferCapacity) {
888 while(*(startSearchHere-1) == ' ') {
889 startSearchHere--;
890 }
891 uprv_strncpy(buffer, nextSeparator, startSearchHere - nextSeparator);
73c04bcf 892 result = u_terminateChars(buffer, bufferCapacity, (int32_t)(startSearchHere - nextSeparator), status);
374ca955 893 } else if(!startSearchHere && (int32_t)uprv_strlen(nextSeparator) < bufferCapacity) { /* last item in string */
73c04bcf 894 i = (int32_t)uprv_strlen(nextSeparator);
374ca955
A
895 while(nextSeparator[i - 1] == ' ') {
896 i--;
897 }
898 uprv_strncpy(buffer, nextSeparator, i);
899 result = u_terminateChars(buffer, bufferCapacity, i, status);
900 } else {
901 /* give a bigger buffer, please */
902 *status = U_BUFFER_OVERFLOW_ERROR;
903 if(startSearchHere) {
73c04bcf 904 result = (int32_t)(startSearchHere - nextSeparator);
374ca955 905 } else {
73c04bcf 906 result = (int32_t)uprv_strlen(nextSeparator);
374ca955
A
907 }
908 }
909 return result;
910 }
911 }
912 }
913 return 0;
914}
915
916U_CAPI int32_t U_EXPORT2
917uloc_setKeywordValue(const char* keywordName,
918 const char* keywordValue,
919 char* buffer, int32_t bufferCapacity,
920 UErrorCode* status)
921{
922 /* TODO: sorting. removal. */
923 int32_t keywordNameLen;
924 int32_t keywordValueLen;
925 int32_t bufLen;
926 int32_t needLen = 0;
927 int32_t foundValueLen;
928 int32_t keywordAtEnd = 0; /* is the keyword at the end of the string? */
929 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
930 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
931 int32_t i = 0;
932 int32_t rc;
933 char* nextSeparator = NULL;
934 char* nextEqualsign = NULL;
935 char* startSearchHere = NULL;
936 char* keywordStart = NULL;
937 char *insertHere = NULL;
938 if(U_FAILURE(*status)) {
939 return -1;
940 }
73c04bcf
A
941 if(bufferCapacity>1) {
942 bufLen = (int32_t)uprv_strlen(buffer);
943 } else {
944 *status = U_ILLEGAL_ARGUMENT_ERROR;
945 return 0;
946 }
947 if(bufferCapacity<bufLen) {
948 /* The capacity is less than the length?! Is this NULL terminated? */
949 *status = U_ILLEGAL_ARGUMENT_ERROR;
950 return 0;
951 }
374ca955
A
952 if(keywordValue && !*keywordValue) {
953 keywordValue = NULL;
954 }
955 if(keywordValue) {
73c04bcf 956 keywordValueLen = (int32_t)uprv_strlen(keywordValue);
374ca955
A
957 } else {
958 keywordValueLen = 0;
959 }
960 keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
961 if(U_FAILURE(*status)) {
962 return 0;
963 }
964 startSearchHere = (char*)locale_getKeywordsStart(buffer);
374ca955
A
965 if(startSearchHere == NULL || (startSearchHere[1]==0)) {
966 if(!keywordValue) { /* no keywords = nothing to remove */
967 return bufLen;
968 }
969
970 needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
971 if(startSearchHere) { /* had a single @ */
972 needLen--; /* already had the @ */
973 /* startSearchHere points at the @ */
974 } else {
975 startSearchHere=buffer+bufLen;
976 }
977 if(needLen >= bufferCapacity) {
978 *status = U_BUFFER_OVERFLOW_ERROR;
979 return needLen; /* no change */
980 }
981 *startSearchHere = '@';
982 startSearchHere++;
983 uprv_strcpy(startSearchHere, keywordNameBuffer);
984 startSearchHere += keywordNameLen;
985 *startSearchHere = '=';
986 startSearchHere++;
987 uprv_strcpy(startSearchHere, keywordValue);
988 startSearchHere+=keywordValueLen;
989 return needLen;
990 } /* end shortcut - no @ */
991
992 keywordStart = startSearchHere;
993 /* search for keyword */
994 while(keywordStart) {
995 keywordStart++;
996 /* skip leading spaces (allowed?) */
997 while(*keywordStart == ' ') {
998 keywordStart++;
999 }
1000 nextEqualsign = uprv_strchr(keywordStart, '=');
1001 /* need to normalize both keyword and keyword name */
1002 if(!nextEqualsign) {
1003 break;
1004 }
1005 if(nextEqualsign - keywordStart >= ULOC_KEYWORD_BUFFER_LEN) {
1006 /* keyword name too long for internal buffer */
1007 *status = U_INTERNAL_PROGRAM_ERROR;
1008 return 0;
1009 }
1010 for(i = 0; i < nextEqualsign - keywordStart; i++) {
1011 localeKeywordNameBuffer[i] = uprv_tolower(keywordStart[i]);
1012 }
1013 /* trim trailing spaces */
1014 while(keywordStart[i-1] == ' ') {
1015 i--;
1016 }
51004dcb 1017 U_ASSERT(i>=0 && i<ULOC_KEYWORD_BUFFER_LEN);
374ca955
A
1018 localeKeywordNameBuffer[i] = 0;
1019
1020 nextSeparator = uprv_strchr(nextEqualsign, ';');
1021 rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
1022 if(rc == 0) {
1023 nextEqualsign++;
1024 while(*nextEqualsign == ' ') {
1025 nextEqualsign++;
1026 }
1027 /* we actually found the keyword. Change the value */
1028 if (nextSeparator) {
1029 keywordAtEnd = 0;
73c04bcf 1030 foundValueLen = (int32_t)(nextSeparator - nextEqualsign);
374ca955
A
1031 } else {
1032 keywordAtEnd = 1;
73c04bcf 1033 foundValueLen = (int32_t)uprv_strlen(nextEqualsign);
374ca955
A
1034 }
1035 if(keywordValue) { /* adding a value - not removing */
1036 if(foundValueLen == keywordValueLen) {
1037 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1038 return bufLen; /* no change in size */
1039 } else if(foundValueLen > keywordValueLen) {
1040 int32_t delta = foundValueLen - keywordValueLen;
1041 if(nextSeparator) { /* RH side */
1042 uprv_memmove(nextSeparator - delta, nextSeparator, bufLen-(nextSeparator-buffer));
1043 }
1044 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1045 bufLen -= delta;
1046 buffer[bufLen]=0;
1047 return bufLen;
1048 } else { /* FVL < KVL */
1049 int32_t delta = keywordValueLen - foundValueLen;
1050 if((bufLen+delta) >= bufferCapacity) {
1051 *status = U_BUFFER_OVERFLOW_ERROR;
1052 return bufLen+delta;
1053 }
1054 if(nextSeparator) { /* RH side */
1055 uprv_memmove(nextSeparator+delta,nextSeparator, bufLen-(nextSeparator-buffer));
1056 }
1057 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1058 bufLen += delta;
1059 buffer[bufLen]=0;
1060 return bufLen;
1061 }
1062 } else { /* removing a keyword */
1063 if(keywordAtEnd) {
1064 /* zero out the ';' or '@' just before startSearchhere */
1065 keywordStart[-1] = 0;
73c04bcf 1066 return (int32_t)((keywordStart-buffer)-1); /* (string length without keyword) minus separator */
374ca955
A
1067 } else {
1068 uprv_memmove(keywordStart, nextSeparator+1, bufLen-((nextSeparator+1)-buffer));
1069 keywordStart[bufLen-((nextSeparator+1)-buffer)]=0;
73c04bcf 1070 return (int32_t)(bufLen-((nextSeparator+1)-keywordStart));
374ca955
A
1071 }
1072 }
1073 } else if(rc<0){ /* end match keyword */
1074 /* could insert at this location. */
1075 insertHere = keywordStart;
1076 }
1077 keywordStart = nextSeparator;
1078 } /* end loop searching */
1079
1080 if(!keywordValue) {
1081 return bufLen; /* removal of non-extant keyword - no change */
1082 }
1083
1084 /* we know there is at least one keyword. */
1085 needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
1086 if(needLen >= bufferCapacity) {
1087 *status = U_BUFFER_OVERFLOW_ERROR;
1088 return needLen; /* no change */
1089 }
1090
1091 if(insertHere) {
1092 uprv_memmove(insertHere+(1+keywordNameLen+1+keywordValueLen), insertHere, bufLen-(insertHere-buffer));
1093 keywordStart = insertHere;
1094 } else {
1095 keywordStart = buffer+bufLen;
1096 *keywordStart = ';';
1097 keywordStart++;
1098 }
1099 uprv_strncpy(keywordStart, keywordNameBuffer, keywordNameLen);
1100 keywordStart += keywordNameLen;
1101 *keywordStart = '=';
1102 keywordStart++;
1103 uprv_strncpy(keywordStart, keywordValue, keywordValueLen); /* terminates. */
1104 keywordStart+=keywordValueLen;
1105 if(insertHere) {
1106 *keywordStart = ';';
1107 keywordStart++;
1108 }
1109 buffer[needLen]=0;
1110 return needLen;
1111}
b75a7d8f 1112
374ca955 1113/* ### ID parsing implementation **************************************************/
b75a7d8f 1114
b75a7d8f 1115#define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
374ca955 1116
b75a7d8f
A
1117/*returns TRUE if one of the special prefixes is here (s=string)
1118 'x-' or 'i-' */
1119#define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1120
1121/* Dot terminates it because of POSIX form where dot precedes the codepage
1122 * except for variant
1123 */
1124#define _isTerminator(a) ((a==0)||(a=='.')||(a=='@'))
1125
374ca955
A
1126static char* _strnchr(const char* str, int32_t len, char c) {
1127 U_ASSERT(str != 0 && len >= 0);
1128 while (len-- != 0) {
1129 char d = *str;
1130 if (d == c) {
1131 return (char*) str;
1132 } else if (d == 0) {
1133 break;
1134 }
1135 ++str;
1136 }
1137 return NULL;
1138}
1139
1140/**
1141 * Lookup 'key' in the array 'list'. The array 'list' should contain
1142 * a NULL entry, followed by more entries, and a second NULL entry.
1143 *
1144 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1145 * COUNTRIES_3.
1146 */
b75a7d8f
A
1147static int16_t _findIndex(const char* const* list, const char* key)
1148{
1149 const char* const* anchor = list;
374ca955
A
1150 int32_t pass = 0;
1151
1152 /* Make two passes through two NULL-terminated arrays at 'list' */
1153 while (pass++ < 2) {
1154 while (*list) {
1155 if (uprv_strcmp(key, *list) == 0) {
1156 return (int16_t)(list - anchor);
1157 }
1158 list++;
b75a7d8f 1159 }
374ca955 1160 ++list; /* skip final NULL *CWB*/
b75a7d8f
A
1161 }
1162 return -1;
1163}
1164
1165/* count the length of src while copying it to dest; return strlen(src) */
4388f060 1166static inline int32_t
b75a7d8f
A
1167_copyCount(char *dest, int32_t destCapacity, const char *src) {
1168 const char *anchor;
1169 char c;
1170
1171 anchor=src;
1172 for(;;) {
1173 if((c=*src)==0) {
1174 return (int32_t)(src-anchor);
1175 }
1176 if(destCapacity<=0) {
1177 return (int32_t)((src-anchor)+uprv_strlen(src));
1178 }
1179 ++src;
1180 *dest++=c;
1181 --destCapacity;
1182 }
1183}
1184
729e4ab9 1185U_CFUNC const char*
73c04bcf
A
1186uloc_getCurrentCountryID(const char* oldID){
1187 int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
1188 if (offset >= 0) {
1189 return REPLACEMENT_COUNTRIES[offset];
1190 }
1191 return oldID;
1192}
729e4ab9 1193U_CFUNC const char*
73c04bcf
A
1194uloc_getCurrentLanguageID(const char* oldID){
1195 int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
1196 if (offset >= 0) {
1197 return REPLACEMENT_LANGUAGES[offset];
1198 }
1199 return oldID;
1200}
b75a7d8f
A
1201/*
1202 * the internal functions _getLanguage(), _getCountry(), _getVariant()
1203 * avoid duplicating code to handle the earlier locale ID pieces
1204 * in the functions for the later ones by
1205 * setting the *pEnd pointer to where they stopped parsing
1206 *
1207 * TODO try to use this in Locale
1208 */
729e4ab9
A
1209U_CFUNC int32_t
1210ulocimp_getLanguage(const char *localeID,
1211 char *language, int32_t languageCapacity,
1212 const char **pEnd) {
b75a7d8f
A
1213 int32_t i=0;
1214 int32_t offset;
1215 char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
1216
1217 /* if it starts with i- or x- then copy that prefix */
1218 if(_isIDPrefix(localeID)) {
1219 if(i<languageCapacity) {
1220 language[i]=(char)uprv_tolower(*localeID);
1221 }
1222 if(i<languageCapacity) {
1223 language[i+1]='-';
1224 }
1225 i+=2;
1226 localeID+=2;
1227 }
1228
1229 /* copy the language as far as possible and count its length */
1230 while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
1231 if(i<languageCapacity) {
1232 language[i]=(char)uprv_tolower(*localeID);
1233 }
1234 if(i<3) {
4388f060 1235 U_ASSERT(i>=0);
b75a7d8f
A
1236 lang[i]=(char)uprv_tolower(*localeID);
1237 }
1238 i++;
1239 localeID++;
1240 }
1241
1242 if(i==3) {
1243 /* convert 3 character code to 2 character code if possible *CWB*/
374ca955 1244 offset=_findIndex(LANGUAGES_3, lang);
b75a7d8f 1245 if(offset>=0) {
374ca955 1246 i=_copyCount(language, languageCapacity, LANGUAGES[offset]);
b75a7d8f
A
1247 }
1248 }
1249
1250 if(pEnd!=NULL) {
1251 *pEnd=localeID;
1252 }
1253 return i;
1254}
1255
729e4ab9
A
1256U_CFUNC int32_t
1257ulocimp_getScript(const char *localeID,
1258 char *script, int32_t scriptCapacity,
1259 const char **pEnd)
b75a7d8f 1260{
374ca955 1261 int32_t idLen = 0;
b75a7d8f 1262
374ca955
A
1263 if (pEnd != NULL) {
1264 *pEnd = localeID;
b75a7d8f 1265 }
374ca955
A
1266
1267 /* copy the second item as far as possible and count its length */
4388f060
A
1268 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])
1269 && uprv_isASCIILetter(localeID[idLen])) {
374ca955 1270 idLen++;
b75a7d8f
A
1271 }
1272
374ca955
A
1273 /* If it's exactly 4 characters long, then it's a script and not a country. */
1274 if (idLen == 4) {
1275 int32_t i;
1276 if (pEnd != NULL) {
1277 *pEnd = localeID+idLen;
1278 }
1279 if(idLen > scriptCapacity) {
1280 idLen = scriptCapacity;
1281 }
1282 if (idLen >= 1) {
1283 script[0]=(char)uprv_toupper(*(localeID++));
1284 }
1285 for (i = 1; i < idLen; i++) {
1286 script[i]=(char)uprv_tolower(*(localeID++));
1287 }
1288 }
1289 else {
1290 idLen = 0;
1291 }
1292 return idLen;
b75a7d8f
A
1293}
1294
729e4ab9
A
1295U_CFUNC int32_t
1296ulocimp_getCountry(const char *localeID,
1297 char *country, int32_t countryCapacity,
1298 const char **pEnd)
374ca955 1299{
729e4ab9 1300 int32_t idLen=0;
374ca955 1301 char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 };
b75a7d8f
A
1302 int32_t offset;
1303
1304 /* copy the country as far as possible and count its length */
729e4ab9
A
1305 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
1306 if(idLen<(ULOC_COUNTRY_CAPACITY-1)) { /*CWB*/
1307 cnty[idLen]=(char)uprv_toupper(localeID[idLen]);
b75a7d8f 1308 }
729e4ab9 1309 idLen++;
b75a7d8f
A
1310 }
1311
729e4ab9
A
1312 /* the country should be either length 2 or 3 */
1313 if (idLen == 2 || idLen == 3) {
1314 UBool gotCountry = FALSE;
1315 /* convert 3 character code to 2 character code if possible *CWB*/
1316 if(idLen==3) {
1317 offset=_findIndex(COUNTRIES_3, cnty);
1318 if(offset>=0) {
1319 idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]);
1320 gotCountry = TRUE;
1321 }
1322 }
1323 if (!gotCountry) {
1324 int32_t i = 0;
1325 for (i = 0; i < idLen; i++) {
1326 if (i < countryCapacity) {
1327 country[i]=(char)uprv_toupper(localeID[i]);
1328 }
1329 }
b75a7d8f 1330 }
729e4ab9
A
1331 localeID+=idLen;
1332 } else {
1333 idLen = 0;
b75a7d8f
A
1334 }
1335
1336 if(pEnd!=NULL) {
1337 *pEnd=localeID;
1338 }
729e4ab9
A
1339
1340 return idLen;
b75a7d8f
A
1341}
1342
374ca955
A
1343/**
1344 * @param needSeparator if true, then add leading '_' if any variants
1345 * are added to 'variant'
1346 */
1347static int32_t
1348_getVariantEx(const char *localeID,
1349 char prev,
1350 char *variant, int32_t variantCapacity,
1351 UBool needSeparator) {
b75a7d8f
A
1352 int32_t i=0;
1353
1354 /* get one or more variant tags and separate them with '_' */
1355 if(_isIDSeparator(prev)) {
1356 /* get a variant string after a '-' or '_' */
1357 while(!_isTerminator(*localeID)) {
374ca955
A
1358 if (needSeparator) {
1359 if (i<variantCapacity) {
1360 variant[i] = '_';
1361 }
1362 ++i;
1363 needSeparator = FALSE;
1364 }
b75a7d8f
A
1365 if(i<variantCapacity) {
1366 variant[i]=(char)uprv_toupper(*localeID);
1367 if(variant[i]=='-') {
1368 variant[i]='_';
1369 }
1370 }
1371 i++;
1372 localeID++;
1373 }
1374 }
1375
1376 /* if there is no variant tag after a '-' or '_' then look for '@' */
1377 if(i==0) {
1378 if(prev=='@') {
1379 /* keep localeID */
374ca955 1380 } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
b75a7d8f
A
1381 ++localeID; /* point after the '@' */
1382 } else {
1383 return 0;
1384 }
1385 while(!_isTerminator(*localeID)) {
374ca955
A
1386 if (needSeparator) {
1387 if (i<variantCapacity) {
1388 variant[i] = '_';
1389 }
1390 ++i;
1391 needSeparator = FALSE;
1392 }
b75a7d8f
A
1393 if(i<variantCapacity) {
1394 variant[i]=(char)uprv_toupper(*localeID);
1395 if(variant[i]=='-' || variant[i]==',') {
1396 variant[i]='_';
1397 }
1398 }
1399 i++;
1400 localeID++;
1401 }
1402 }
374ca955 1403
b75a7d8f
A
1404 return i;
1405}
1406
374ca955
A
1407static int32_t
1408_getVariant(const char *localeID,
1409 char prev,
1410 char *variant, int32_t variantCapacity) {
1411 return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE);
1412}
1413
1414/**
1415 * Delete ALL instances of a variant from the given list of one or
1416 * more variants. Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".
1417 * @param variants the source string of one or more variants,
1418 * separated by '_'. This will be MODIFIED IN PLACE. Not zero
1419 * terminated; if it is, trailing zero will NOT be maintained.
1420 * @param variantsLen length of variants
1421 * @param toDelete variant to delete, without separators, e.g. "EURO"
1422 * or "PREEURO"; not zero terminated
1423 * @param toDeleteLen length of toDelete
1424 * @return number of characters deleted from variants
1425 */
1426static int32_t
1427_deleteVariant(char* variants, int32_t variantsLen,
46f4442e
A
1428 const char* toDelete, int32_t toDeleteLen)
1429{
374ca955
A
1430 int32_t delta = 0; /* number of chars deleted */
1431 for (;;) {
1432 UBool flag = FALSE;
1433 if (variantsLen < toDeleteLen) {
1434 return delta;
1435 }
1436 if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 &&
1437 (variantsLen == toDeleteLen ||
46f4442e
A
1438 (flag=(variants[toDeleteLen] == '_'))))
1439 {
374ca955
A
1440 int32_t d = toDeleteLen + (flag?1:0);
1441 variantsLen -= d;
1442 delta += d;
46f4442e
A
1443 if (variantsLen > 0) {
1444 uprv_memmove(variants, variants+d, variantsLen);
1445 }
374ca955
A
1446 } else {
1447 char* p = _strnchr(variants, variantsLen, '_');
1448 if (p == NULL) {
1449 return delta;
1450 }
1451 ++p;
73c04bcf 1452 variantsLen -= (int32_t)(p - variants);
374ca955
A
1453 variants = p;
1454 }
1455 }
1456}
1457
1458/* Keyword enumeration */
1459
1460typedef struct UKeywordsContext {
1461 char* keywords;
1462 char* current;
1463} UKeywordsContext;
1464
1465static void U_CALLCONV
1466uloc_kw_closeKeywords(UEnumeration *enumerator) {
1467 uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
1468 uprv_free(enumerator->context);
1469 uprv_free(enumerator);
1470}
1471
1472static int32_t U_CALLCONV
4388f060 1473uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) {
374ca955
A
1474 char *kw = ((UKeywordsContext *)en->context)->keywords;
1475 int32_t result = 0;
1476 while(*kw) {
1477 result++;
1478 kw += uprv_strlen(kw)+1;
1479 }
1480 return result;
1481}
1482
1483static const char* U_CALLCONV
1484uloc_kw_nextKeyword(UEnumeration* en,
1485 int32_t* resultLength,
4388f060 1486 UErrorCode* /*status*/) {
374ca955
A
1487 const char* result = ((UKeywordsContext *)en->context)->current;
1488 int32_t len = 0;
1489 if(*result) {
73c04bcf 1490 len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
374ca955
A
1491 ((UKeywordsContext *)en->context)->current += len+1;
1492 } else {
1493 result = NULL;
1494 }
1495 if (resultLength) {
1496 *resultLength = len;
1497 }
1498 return result;
1499}
1500
1501static void U_CALLCONV
1502uloc_kw_resetKeywords(UEnumeration* en,
4388f060 1503 UErrorCode* /*status*/) {
374ca955
A
1504 ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
1505}
1506
1507static const UEnumeration gKeywordsEnum = {
1508 NULL,
1509 NULL,
1510 uloc_kw_closeKeywords,
1511 uloc_kw_countKeywords,
1512 uenum_unextDefault,
1513 uloc_kw_nextKeyword,
1514 uloc_kw_resetKeywords
1515};
1516
1517U_CAPI UEnumeration* U_EXPORT2
1518uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
b75a7d8f 1519{
46f4442e
A
1520 UKeywordsContext *myContext = NULL;
1521 UEnumeration *result = NULL;
b75a7d8f 1522
46f4442e
A
1523 if(U_FAILURE(*status)) {
1524 return NULL;
1525 }
1526 result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
1527 /* Null pointer test */
1528 if (result == NULL) {
1529 *status = U_MEMORY_ALLOCATION_ERROR;
1530 return NULL;
1531 }
1532 uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration));
51004dcb 1533 myContext = static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext)));
46f4442e
A
1534 if (myContext == NULL) {
1535 *status = U_MEMORY_ALLOCATION_ERROR;
1536 uprv_free(result);
1537 return NULL;
1538 }
1539 myContext->keywords = (char *)uprv_malloc(keywordListSize+1);
1540 uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
1541 myContext->keywords[keywordListSize] = 0;
1542 myContext->current = myContext->keywords;
1543 result->context = myContext;
1544 return result;
374ca955
A
1545}
1546
1547U_CAPI UEnumeration* U_EXPORT2
1548uloc_openKeywords(const char* localeID,
1549 UErrorCode* status)
1550{
1551 int32_t i=0;
1552 char keywords[256];
1553 int32_t keywordsCapacity = 256;
729e4ab9
A
1554 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1555 const char* tmpLocaleID;
1556
374ca955 1557 if(status==NULL || U_FAILURE(*status)) {
b75a7d8f
A
1558 return 0;
1559 }
1560
729e4ab9
A
1561 if (_hasBCP47Extension(localeID)) {
1562 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
1563 } else {
1564 if (localeID==NULL) {
1565 localeID=uloc_getDefault();
1566 }
1567 tmpLocaleID=localeID;
b75a7d8f
A
1568 }
1569
374ca955 1570 /* Skip the language */
729e4ab9
A
1571 ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
1572 if(_isIDSeparator(*tmpLocaleID)) {
374ca955
A
1573 const char *scriptID;
1574 /* Skip the script if available */
729e4ab9
A
1575 ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
1576 if(scriptID != tmpLocaleID+1) {
374ca955 1577 /* Found optional script */
729e4ab9 1578 tmpLocaleID = scriptID;
374ca955
A
1579 }
1580 /* Skip the Country */
729e4ab9
A
1581 if (_isIDSeparator(*tmpLocaleID)) {
1582 ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &tmpLocaleID);
1583 if(_isIDSeparator(*tmpLocaleID)) {
1584 _getVariant(tmpLocaleID+1, *tmpLocaleID, NULL, 0);
374ca955 1585 }
b75a7d8f
A
1586 }
1587 }
1588
374ca955 1589 /* keywords are located after '@' */
729e4ab9
A
1590 if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {
1591 i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status);
374ca955
A
1592 }
1593
1594 if(i) {
1595 return uloc_openKeywordList(keywords, i, status);
1596 } else {
1597 return NULL;
b75a7d8f 1598 }
b75a7d8f
A
1599}
1600
b75a7d8f 1601
374ca955
A
1602/* bit-flags for 'options' parameter of _canonicalize */
1603#define _ULOC_STRIP_KEYWORDS 0x2
1604#define _ULOC_CANONICALIZE 0x1
1605
1606#define OPTION_SET(options, mask) ((options & mask) != 0)
1607
73c04bcf
A
1608static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
1609#define I_DEFAULT_LENGTH (sizeof i_default / sizeof i_default[0])
1610
374ca955
A
1611/**
1612 * Canonicalize the given localeID, to level 1 or to level 2,
1613 * depending on the options. To specify level 1, pass in options=0.
1614 * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1615 *
1616 * This is the code underlying uloc_getName and uloc_canonicalize.
1617 */
1618static int32_t
1619_canonicalize(const char* localeID,
1620 char* result,
1621 int32_t resultCapacity,
1622 uint32_t options,
1623 UErrorCode* err) {
1624 int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity;
1625 char localeBuffer[ULOC_FULLNAME_CAPACITY];
729e4ab9 1626 char tempBuffer[ULOC_FULLNAME_CAPACITY];
46f4442e 1627 const char* origLocaleID;
729e4ab9 1628 const char* tmpLocaleID;
374ca955
A
1629 const char* keywordAssign = NULL;
1630 const char* separatorIndicator = NULL;
1631 const char* addKeyword = NULL;
1632 const char* addValue = NULL;
1633 char* name;
1634 char* variant = NULL; /* pointer into name, or NULL */
374ca955
A
1635
1636 if (U_FAILURE(*err)) {
b75a7d8f
A
1637 return 0;
1638 }
1639
729e4ab9
A
1640 if (_hasBCP47Extension(localeID)) {
1641 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1642 } else {
1643 if (localeID==NULL) {
1644 localeID=uloc_getDefault();
1645 }
1646 tmpLocaleID=localeID;
b75a7d8f 1647 }
729e4ab9
A
1648
1649 origLocaleID=tmpLocaleID;
b75a7d8f 1650
374ca955
A
1651 /* if we are doing a full canonicalization, then put results in
1652 localeBuffer, if necessary; otherwise send them to result. */
729e4ab9 1653 if (/*OPTION_SET(options, _ULOC_CANONICALIZE) &&*/
4388f060 1654 (result == NULL || resultCapacity < (int32_t)sizeof(localeBuffer))) {
374ca955 1655 name = localeBuffer;
4388f060 1656 nameCapacity = (int32_t)sizeof(localeBuffer);
374ca955
A
1657 } else {
1658 name = result;
1659 nameCapacity = resultCapacity;
1660 }
1661
b75a7d8f 1662 /* get all pieces, one after another, and separate with '_' */
729e4ab9 1663 len=ulocimp_getLanguage(tmpLocaleID, name, nameCapacity, &tmpLocaleID);
73c04bcf
A
1664
1665 if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) {
1666 const char *d = uloc_getDefault();
1667
729e4ab9 1668 len = (int32_t)uprv_strlen(d);
73c04bcf
A
1669
1670 if (name != NULL) {
1671 uprv_strncpy(name, d, len);
1672 }
729e4ab9 1673 } else if(_isIDSeparator(*tmpLocaleID)) {
374ca955
A
1674 const char *scriptID;
1675
b75a7d8f 1676 ++fieldCount;
374ca955
A
1677 if(len<nameCapacity) {
1678 name[len]='_';
b75a7d8f 1679 }
374ca955
A
1680 ++len;
1681
4388f060
A
1682 scriptSize=ulocimp_getScript(tmpLocaleID+1,
1683 (len<nameCapacity ? name+len : NULL), nameCapacity-len, &scriptID);
374ca955
A
1684 if(scriptSize > 0) {
1685 /* Found optional script */
729e4ab9 1686 tmpLocaleID = scriptID;
b75a7d8f 1687 ++fieldCount;
374ca955 1688 len+=scriptSize;
729e4ab9 1689 if (_isIDSeparator(*tmpLocaleID)) {
374ca955
A
1690 /* If there is something else, then we add the _ */
1691 if(len<nameCapacity) {
1692 name[len]='_';
1693 }
1694 ++len;
1695 }
1696 }
1697
729e4ab9
A
1698 if (_isIDSeparator(*tmpLocaleID)) {
1699 const char *cntryID;
4388f060
A
1700 int32_t cntrySize = ulocimp_getCountry(tmpLocaleID+1,
1701 (len<nameCapacity ? name+len : NULL), nameCapacity-len, &cntryID);
729e4ab9
A
1702 if (cntrySize > 0) {
1703 /* Found optional country */
1704 tmpLocaleID = cntryID;
1705 len+=cntrySize;
1706 }
1707 if(_isIDSeparator(*tmpLocaleID)) {
51004dcb
A
1708 /* If there is something else, then we add the _ if we found country before. */
1709 if (cntrySize >= 0 && ! _isIDSeparator(*(tmpLocaleID+1)) ) {
729e4ab9
A
1710 ++fieldCount;
1711 if(len<nameCapacity) {
1712 name[len]='_';
1713 }
1714 ++len;
374ca955 1715 }
729e4ab9 1716
4388f060
A
1717 variantSize = _getVariant(tmpLocaleID+1, *tmpLocaleID,
1718 (len<nameCapacity ? name+len : NULL), nameCapacity-len);
374ca955 1719 if (variantSize > 0) {
4388f060 1720 variant = len<nameCapacity ? name+len : NULL;
374ca955 1721 len += variantSize;
729e4ab9 1722 tmpLocaleID += variantSize + 1; /* skip '_' and variant */
374ca955 1723 }
b75a7d8f 1724 }
b75a7d8f
A
1725 }
1726 }
1727
374ca955 1728 /* Copy POSIX-style charset specifier, if any [mr.utf8] */
729e4ab9 1729 if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
374ca955 1730 UBool done = FALSE;
b75a7d8f 1731 do {
729e4ab9 1732 char c = *tmpLocaleID;
374ca955
A
1733 switch (c) {
1734 case 0:
1735 case '@':
1736 done = TRUE;
1737 break;
1738 default:
1739 if (len<nameCapacity) {
1740 name[len] = c;
1741 }
1742 ++len;
729e4ab9 1743 ++tmpLocaleID;
374ca955
A
1744 break;
1745 }
1746 } while (!done);
1747 }
1748
1749 /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
729e4ab9
A
1750 After this, tmpLocaleID either points to '@' or is NULL */
1751 if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {
1752 keywordAssign = uprv_strchr(tmpLocaleID, '=');
1753 separatorIndicator = uprv_strchr(tmpLocaleID, ';');
374ca955
A
1754 }
1755
1756 /* Copy POSIX-style variant, if any [mr@FOO] */
1757 if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
729e4ab9 1758 tmpLocaleID != NULL && keywordAssign == NULL) {
374ca955 1759 for (;;) {
729e4ab9 1760 char c = *tmpLocaleID;
374ca955
A
1761 if (c == 0) {
1762 break;
1763 }
1764 if (len<nameCapacity) {
1765 name[len] = c;
1766 }
1767 ++len;
729e4ab9 1768 ++tmpLocaleID;
374ca955
A
1769 }
1770 }
1771
1772 if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
1773 /* Handle @FOO variant if @ is present and not followed by = */
729e4ab9 1774 if (tmpLocaleID!=NULL && keywordAssign==NULL) {
374ca955
A
1775 int32_t posixVariantSize;
1776 /* Add missing '_' if needed */
1777 if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
1778 do {
1779 if(len<nameCapacity) {
1780 name[len]='_';
1781 }
1782 ++len;
1783 ++fieldCount;
1784 } while(fieldCount<2);
1785 }
729e4ab9 1786 posixVariantSize = _getVariantEx(tmpLocaleID+1, '@', name+len, nameCapacity-len,
374ca955
A
1787 (UBool)(variantSize > 0));
1788 if (posixVariantSize > 0) {
1789 if (variant == NULL) {
1790 variant = name+len;
1791 }
1792 len += posixVariantSize;
1793 variantSize += posixVariantSize;
b75a7d8f 1794 }
374ca955
A
1795 }
1796
46f4442e
A
1797 /* Handle generic variants first */
1798 if (variant) {
1799 for (j=0; j<(int32_t)(sizeof(VARIANT_MAP)/sizeof(VARIANT_MAP[0])); j++) {
1800 const char* variantToCompare = VARIANT_MAP[j].variant;
1801 int32_t n = (int32_t)uprv_strlen(variantToCompare);
1802 int32_t variantLen = _deleteVariant(variant, uprv_min(variantSize, (nameCapacity-len)), variantToCompare, n);
1803 len -= variantLen;
1804 if (variantLen > 0) {
b25be066 1805 if (len > 0 && name[len-1] == '_') { /* delete trailing '_' */
46f4442e
A
1806 --len;
1807 }
1808 addKeyword = VARIANT_MAP[j].keyword;
1809 addValue = VARIANT_MAP[j].value;
1810 break;
1811 }
1812 }
b25be066 1813 if (len > 0 && len <= nameCapacity && name[len-1] == '_') { /* delete trailing '_' */
46f4442e
A
1814 --len;
1815 }
374ca955
A
1816 }
1817
1818 /* Look up the ID in the canonicalization map */
1819 for (j=0; j<(int32_t)(sizeof(CANONICALIZE_MAP)/sizeof(CANONICALIZE_MAP[0])); j++) {
1820 const char* id = CANONICALIZE_MAP[j].id;
73c04bcf 1821 int32_t n = (int32_t)uprv_strlen(id);
374ca955 1822 if (len == n && uprv_strncmp(name, id, n) == 0) {
729e4ab9 1823 if (n == 0 && tmpLocaleID != NULL) {
374ca955
A
1824 break; /* Don't remap "" if keywords present */
1825 }
1826 len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID);
46f4442e
A
1827 if (CANONICALIZE_MAP[j].keyword) {
1828 addKeyword = CANONICALIZE_MAP[j].keyword;
1829 addValue = CANONICALIZE_MAP[j].value;
1830 }
374ca955
A
1831 break;
1832 }
1833 }
374ca955
A
1834 }
1835
1836 if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
729e4ab9 1837 if (tmpLocaleID!=NULL && keywordAssign!=NULL &&
374ca955
A
1838 (!separatorIndicator || separatorIndicator > keywordAssign)) {
1839 if(len<nameCapacity) {
1840 name[len]='@';
1841 }
1842 ++len;
b75a7d8f 1843 ++fieldCount;
4388f060
A
1844 len += _getKeywords(tmpLocaleID+1, '@', (len<nameCapacity ? name+len : NULL), nameCapacity-len,
1845 NULL, 0, NULL, TRUE, addKeyword, addValue, err);
374ca955 1846 } else if (addKeyword != NULL) {
51004dcb 1847 U_ASSERT(addValue != NULL && len < nameCapacity);
374ca955
A
1848 /* inelegant but works -- later make _getKeywords do this? */
1849 len += _copyCount(name+len, nameCapacity-len, "@");
1850 len += _copyCount(name+len, nameCapacity-len, addKeyword);
1851 len += _copyCount(name+len, nameCapacity-len, "=");
1852 len += _copyCount(name+len, nameCapacity-len, addValue);
1853 }
1854 }
1855
46f4442e 1856 if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) {
374ca955
A
1857 uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);
1858 }
1859
1860 return u_terminateChars(result, resultCapacity, len, err);
1861}
1862
1863/* ### ID parsing API **************************************************/
1864
1865U_CAPI int32_t U_EXPORT2
1866uloc_getParent(const char* localeID,
1867 char* parent,
1868 int32_t parentCapacity,
1869 UErrorCode* err)
1870{
1871 const char *lastUnderscore;
1872 int32_t i;
1873
1874 if (U_FAILURE(*err))
1875 return 0;
1876
1877 if (localeID == NULL)
1878 localeID = uloc_getDefault();
1879
1880 lastUnderscore=uprv_strrchr(localeID, '_');
1881 if(lastUnderscore!=NULL) {
1882 i=(int32_t)(lastUnderscore-localeID);
1883 } else {
1884 i=0;
b75a7d8f 1885 }
374ca955 1886
73c04bcf 1887 if(i>0 && parent != localeID) {
374ca955
A
1888 uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
1889 }
1890 return u_terminateChars(parent, parentCapacity, i, err);
b75a7d8f 1891}
374ca955
A
1892
1893U_CAPI int32_t U_EXPORT2
1894uloc_getLanguage(const char* localeID,
1895 char* language,
1896 int32_t languageCapacity,
1897 UErrorCode* err)
1898{
1899 /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
1900 int32_t i=0;
1901
1902 if (err==NULL || U_FAILURE(*err)) {
1903 return 0;
1904 }
1905
1906 if(localeID==NULL) {
1907 localeID=uloc_getDefault();
1908 }
1909
729e4ab9 1910 i=ulocimp_getLanguage(localeID, language, languageCapacity, NULL);
374ca955
A
1911 return u_terminateChars(language, languageCapacity, i, err);
1912}
1913
1914U_CAPI int32_t U_EXPORT2
1915uloc_getScript(const char* localeID,
1916 char* script,
1917 int32_t scriptCapacity,
1918 UErrorCode* err)
1919{
1920 int32_t i=0;
1921
1922 if(err==NULL || U_FAILURE(*err)) {
1923 return 0;
1924 }
1925
1926 if(localeID==NULL) {
1927 localeID=uloc_getDefault();
1928 }
1929
1930 /* skip the language */
729e4ab9 1931 ulocimp_getLanguage(localeID, NULL, 0, &localeID);
374ca955 1932 if(_isIDSeparator(*localeID)) {
729e4ab9 1933 i=ulocimp_getScript(localeID+1, script, scriptCapacity, NULL);
374ca955
A
1934 }
1935 return u_terminateChars(script, scriptCapacity, i, err);
1936}
1937
1938U_CAPI int32_t U_EXPORT2
1939uloc_getCountry(const char* localeID,
1940 char* country,
1941 int32_t countryCapacity,
1942 UErrorCode* err)
1943{
1944 int32_t i=0;
1945
1946 if(err==NULL || U_FAILURE(*err)) {
1947 return 0;
1948 }
1949
1950 if(localeID==NULL) {
1951 localeID=uloc_getDefault();
1952 }
1953
1954 /* Skip the language */
729e4ab9 1955 ulocimp_getLanguage(localeID, NULL, 0, &localeID);
374ca955
A
1956 if(_isIDSeparator(*localeID)) {
1957 const char *scriptID;
1958 /* Skip the script if available */
729e4ab9 1959 ulocimp_getScript(localeID+1, NULL, 0, &scriptID);
374ca955
A
1960 if(scriptID != localeID+1) {
1961 /* Found optional script */
1962 localeID = scriptID;
1963 }
1964 if(_isIDSeparator(*localeID)) {
729e4ab9 1965 i=ulocimp_getCountry(localeID+1, country, countryCapacity, NULL);
374ca955
A
1966 }
1967 }
1968 return u_terminateChars(country, countryCapacity, i, err);
1969}
1970
1971U_CAPI int32_t U_EXPORT2
1972uloc_getVariant(const char* localeID,
1973 char* variant,
1974 int32_t variantCapacity,
1975 UErrorCode* err)
1976{
729e4ab9
A
1977 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1978 const char* tmpLocaleID;
374ca955 1979 int32_t i=0;
374ca955
A
1980
1981 if(err==NULL || U_FAILURE(*err)) {
1982 return 0;
1983 }
1984
729e4ab9
A
1985 if (_hasBCP47Extension(localeID)) {
1986 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1987 } else {
1988 if (localeID==NULL) {
1989 localeID=uloc_getDefault();
1990 }
1991 tmpLocaleID=localeID;
374ca955
A
1992 }
1993
1994 /* Skip the language */
729e4ab9
A
1995 ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
1996 if(_isIDSeparator(*tmpLocaleID)) {
374ca955
A
1997 const char *scriptID;
1998 /* Skip the script if available */
729e4ab9
A
1999 ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
2000 if(scriptID != tmpLocaleID+1) {
374ca955 2001 /* Found optional script */
729e4ab9 2002 tmpLocaleID = scriptID;
374ca955
A
2003 }
2004 /* Skip the Country */
729e4ab9
A
2005 if (_isIDSeparator(*tmpLocaleID)) {
2006 const char *cntryID;
2007 ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &cntryID);
2008 if (cntryID != tmpLocaleID+1) {
2009 /* Found optional country */
2010 tmpLocaleID = cntryID;
2011 }
2012 if(_isIDSeparator(*tmpLocaleID)) {
2013 /* If there was no country ID, skip a possible extra IDSeparator */
2014 if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {
2015 tmpLocaleID++;
2016 }
2017 i=_getVariant(tmpLocaleID+1, *tmpLocaleID, variant, variantCapacity);
374ca955
A
2018 }
2019 }
2020 }
2021
2022 /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
2023 /* if we do not have a variant tag yet then try a POSIX variant after '@' */
2024/*
2025 if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
2026 i=_getVariant(localeID+1, '@', variant, variantCapacity);
2027 }
2028*/
2029 return u_terminateChars(variant, variantCapacity, i, err);
2030}
2031
2032U_CAPI int32_t U_EXPORT2
2033uloc_getName(const char* localeID,
2034 char* name,
2035 int32_t nameCapacity,
2036 UErrorCode* err)
2037{
2038 return _canonicalize(localeID, name, nameCapacity, 0, err);
2039}
2040
2041U_CAPI int32_t U_EXPORT2
2042uloc_getBaseName(const char* localeID,
2043 char* name,
2044 int32_t nameCapacity,
2045 UErrorCode* err)
2046{
2047 return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err);
2048}
2049
2050U_CAPI int32_t U_EXPORT2
2051uloc_canonicalize(const char* localeID,
2052 char* name,
2053 int32_t nameCapacity,
2054 UErrorCode* err)
2055{
2056 return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err);
2057}
2058
b75a7d8f
A
2059U_CAPI const char* U_EXPORT2
2060uloc_getISO3Language(const char* localeID)
2061{
374ca955
A
2062 int16_t offset;
2063 char lang[ULOC_LANG_CAPACITY];
2064 UErrorCode err = U_ZERO_ERROR;
2065
2066 if (localeID == NULL)
2067 {
2068 localeID = uloc_getDefault();
2069 }
2070 uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
2071 if (U_FAILURE(err))
2072 return "";
2073 offset = _findIndex(LANGUAGES, lang);
2074 if (offset < 0)
2075 return "";
2076 return LANGUAGES_3[offset];
b75a7d8f
A
2077}
2078
2079U_CAPI const char* U_EXPORT2
2080uloc_getISO3Country(const char* localeID)
2081{
2082 int16_t offset;
374ca955 2083 char cntry[ULOC_LANG_CAPACITY];
b75a7d8f
A
2084 UErrorCode err = U_ZERO_ERROR;
2085
2086 if (localeID == NULL)
2087 {
2088 localeID = uloc_getDefault();
2089 }
374ca955 2090 uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
b75a7d8f
A
2091 if (U_FAILURE(err))
2092 return "";
374ca955 2093 offset = _findIndex(COUNTRIES, cntry);
b75a7d8f
A
2094 if (offset < 0)
2095 return "";
2096
374ca955 2097 return COUNTRIES_3[offset];
b75a7d8f
A
2098}
2099
2100U_CAPI uint32_t U_EXPORT2
2101uloc_getLCID(const char* localeID)
2102{
374ca955
A
2103 UErrorCode status = U_ZERO_ERROR;
2104 char langID[ULOC_FULLNAME_CAPACITY];
2105
2106 uloc_getLanguage(localeID, langID, sizeof(langID), &status);
2107 if (U_FAILURE(status)) {
2108 return 0;
b75a7d8f 2109 }
374ca955 2110
57a6839d
A
2111 if (uprv_strchr(localeID, '@')) {
2112 // uprv_convertToLCID does not support keywords other than collation.
2113 // Remove all keywords except collation.
2114 int32_t len;
2115 char collVal[ULOC_KEYWORDS_CAPACITY];
2116 char tmpLocaleID[ULOC_FULLNAME_CAPACITY];
2117
2118 len = uloc_getKeywordValue(localeID, "collation", collVal,
2119 sizeof(collVal)/sizeof(collVal[0]) - 1, &status);
2120
2121 if (U_SUCCESS(status) && len > 0) {
2122 collVal[len] = 0;
2123
2124 len = uloc_getBaseName(localeID, tmpLocaleID,
2125 sizeof(tmpLocaleID)/sizeof(tmpLocaleID[0]) - 1, &status);
2126
2127 if (U_SUCCESS(status)) {
2128 tmpLocaleID[len] = 0;
2129
2130 len = uloc_setKeywordValue("collation", collVal, tmpLocaleID,
2131 sizeof(tmpLocaleID)/sizeof(tmpLocaleID[0]) - len - 1, &status);
2132
2133 if (U_SUCCESS(status)) {
2134 tmpLocaleID[len] = 0;
2135 return uprv_convertToLCID(langID, tmpLocaleID, &status);
2136 }
2137 }
2138 }
2139
2140 // fall through - all keywords are simply ignored
2141 status = U_ZERO_ERROR;
2142 }
2143
374ca955
A
2144 return uprv_convertToLCID(langID, localeID, &status);
2145}
2146
73c04bcf
A
2147U_CAPI int32_t U_EXPORT2
2148uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
2149 UErrorCode *status)
2150{
57a6839d 2151 return uprv_convertToPosix(hostid, locale, localeCapacity, status);
73c04bcf
A
2152}
2153
374ca955
A
2154/* ### Default locale **************************************************/
2155
2156U_CAPI const char* U_EXPORT2
2157uloc_getDefault()
2158{
2159 return locale_get_default();
2160}
2161
2162U_CAPI void U_EXPORT2
2163uloc_setDefault(const char* newDefaultLocale,
2164 UErrorCode* err)
2165{
2166 if (U_FAILURE(*err))
2167 return;
2168 /* the error code isn't currently used for anything by this function*/
b75a7d8f 2169
374ca955
A
2170 /* propagate change to C++ */
2171 locale_set_default(newDefaultLocale);
b75a7d8f
A
2172}
2173
729e4ab9 2174/**
51004dcb 2175 * Returns a list of all 2-letter language codes defined in ISO 639. This is a pointer
729e4ab9
A
2176 * to an array of pointers to arrays of char. All of these pointers are owned
2177 * by ICU-- do not delete them, and do not write through them. The array is
2178 * terminated with a null pointer.
2179 */
2180U_CAPI const char* const* U_EXPORT2
2181uloc_getISOLanguages()
2182{
2183 return LANGUAGES;
2184}
374ca955 2185
729e4ab9
A
2186/**
2187 * Returns a list of all 2-letter country codes defined in ISO 639. This is a
2188 * pointer to an array of pointers to arrays of char. All of these pointers are
2189 * owned by ICU-- do not delete them, and do not write through them. The array is
2190 * terminated with a null pointer.
b75a7d8f 2191 */
729e4ab9
A
2192U_CAPI const char* const* U_EXPORT2
2193uloc_getISOCountries()
b75a7d8f 2194{
729e4ab9
A
2195 return COUNTRIES;
2196}
73c04bcf 2197
b75a7d8f 2198
729e4ab9
A
2199/* this function to be moved into cstring.c later */
2200static char gDecimal = 0;
b75a7d8f 2201
729e4ab9
A
2202static /* U_CAPI */
2203double
2204/* U_EXPORT2 */
2205_uloc_strtod(const char *start, char **end) {
2206 char *decimal;
2207 char *myEnd;
2208 char buf[30];
2209 double rv;
2210 if (!gDecimal) {
2211 char rep[5];
2212 /* For machines that decide to change the decimal on you,
2213 and try to be too smart with localization.
2214 This normally should be just a '.'. */
2215 sprintf(rep, "%+1.1f", 1.0);
2216 gDecimal = rep[2];
b75a7d8f 2217 }
b75a7d8f 2218
729e4ab9
A
2219 if(gDecimal == '.') {
2220 return uprv_strtod(start, end); /* fall through to OS */
b75a7d8f 2221 } else {
729e4ab9
A
2222 uprv_strncpy(buf, start, 29);
2223 buf[29]=0;
2224 decimal = uprv_strchr(buf, '.');
2225 if(decimal) {
2226 *decimal = gDecimal;
46f4442e 2227 } else {
729e4ab9 2228 return uprv_strtod(start, end); /* no decimal point */
46f4442e 2229 }
729e4ab9
A
2230 rv = uprv_strtod(buf, &myEnd);
2231 if(end) {
2232 *end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */
b75a7d8f 2233 }
729e4ab9 2234 return rv;
374ca955 2235 }
374ca955
A
2236}
2237
729e4ab9
A
2238typedef struct {
2239 float q;
2240 int32_t dummy; /* to avoid uninitialized memory copy from qsort */
2241 char *locale;
2242} _acceptLangItem;
b75a7d8f 2243
729e4ab9 2244static int32_t U_CALLCONV
4388f060 2245uloc_acceptLanguageCompare(const void * /*context*/, const void *a, const void *b)
729e4ab9
A
2246{
2247 const _acceptLangItem *aa = (const _acceptLangItem*)a;
2248 const _acceptLangItem *bb = (const _acceptLangItem*)b;
b75a7d8f 2249
729e4ab9
A
2250 int32_t rc = 0;
2251 if(bb->q < aa->q) {
2252 rc = -1; /* A > B */
2253 } else if(bb->q > aa->q) {
2254 rc = 1; /* A < B */
2255 } else {
2256 rc = 0; /* A = B */
b75a7d8f
A
2257 }
2258
729e4ab9
A
2259 if(rc==0) {
2260 rc = uprv_stricmp(aa->locale, bb->locale);
b75a7d8f
A
2261 }
2262
729e4ab9
A
2263#if defined(ULOC_DEBUG)
2264 /* fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
2265 aa->locale, aa->q,
2266 bb->locale, bb->q,
2267 rc);*/
2268#endif
374ca955 2269
729e4ab9 2270 return rc;
374ca955
A
2271}
2272
729e4ab9
A
2273/*
2274mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
2275*/
374ca955 2276
b75a7d8f 2277U_CAPI int32_t U_EXPORT2
729e4ab9
A
2278uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult *outResult,
2279 const char *httpAcceptLanguage,
2280 UEnumeration* availableLocales,
2281 UErrorCode *status)
374ca955 2282{
729e4ab9
A
2283 _acceptLangItem *j;
2284 _acceptLangItem smallBuffer[30];
2285 char **strs;
2286 char tmp[ULOC_FULLNAME_CAPACITY +1];
2287 int32_t n = 0;
2288 const char *itemEnd;
2289 const char *paramEnd;
2290 const char *s;
2291 const char *t;
2292 int32_t res;
2293 int32_t i;
2294 int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);
2295 int32_t jSize;
2296 char *tempstr; /* Use for null pointer check */
b75a7d8f 2297
729e4ab9
A
2298 j = smallBuffer;
2299 jSize = sizeof(smallBuffer)/sizeof(smallBuffer[0]);
2300 if(U_FAILURE(*status)) {
2301 return -1;
b75a7d8f
A
2302 }
2303
729e4ab9
A
2304 for(s=httpAcceptLanguage;s&&*s;) {
2305 while(isspace(*s)) /* eat space at the beginning */
2306 s++;
2307 itemEnd=uprv_strchr(s,',');
2308 paramEnd=uprv_strchr(s,';');
2309 if(!itemEnd) {
2310 itemEnd = httpAcceptLanguage+l; /* end of string */
b75a7d8f 2311 }
729e4ab9
A
2312 if(paramEnd && paramEnd<itemEnd) {
2313 /* semicolon (;) is closer than end (,) */
2314 t = paramEnd+1;
2315 if(*t=='q') {
2316 t++;
2317 }
2318 while(isspace(*t)) {
2319 t++;
2320 }
2321 if(*t=='=') {
2322 t++;
2323 }
2324 while(isspace(*t)) {
2325 t++;
2326 }
2327 j[n].q = (float)_uloc_strtod(t,NULL);
2328 } else {
2329 /* no semicolon - it's 1.0 */
2330 j[n].q = 1.0f;
2331 paramEnd = itemEnd;
374ca955 2332 }
46f4442e 2333 j[n].dummy=0;
374ca955
A
2334 /* eat spaces prior to semi */
2335 for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)
2336 ;
46f4442e
A
2337 /* Check for null pointer from uprv_strndup */
2338 tempstr = uprv_strndup(s,(int32_t)((t+1)-s));
2339 if (tempstr == NULL) {
2340 *status = U_MEMORY_ALLOCATION_ERROR;
2341 return -1;
2342 }
2343 j[n].locale = tempstr;
374ca955
A
2344 uloc_canonicalize(j[n].locale,tmp,sizeof(tmp)/sizeof(tmp[0]),status);
2345 if(strcmp(j[n].locale,tmp)) {
2346 uprv_free(j[n].locale);
2347 j[n].locale=uprv_strdup(tmp);
2348 }
2349#if defined(ULOC_DEBUG)
2350 /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
2351#endif
2352 n++;
2353 s = itemEnd;
2354 while(*s==',') { /* eat duplicate commas */
2355 s++;
2356 }
2357 if(n>=jSize) {
46f4442e 2358 if(j==smallBuffer) { /* overflowed the small buffer. */
51004dcb 2359 j = static_cast<_acceptLangItem *>(uprv_malloc(sizeof(j[0])*(jSize*2)));
46f4442e
A
2360 if(j!=NULL) {
2361 uprv_memcpy(j,smallBuffer,sizeof(j[0])*jSize);
2362 }
374ca955 2363#if defined(ULOC_DEBUG)
46f4442e 2364 fprintf(stderr,"malloced at size %d\n", jSize);
374ca955 2365#endif
46f4442e 2366 } else {
51004dcb 2367 j = static_cast<_acceptLangItem *>(uprv_realloc(j, sizeof(j[0])*jSize*2));
374ca955 2368#if defined(ULOC_DEBUG)
46f4442e 2369 fprintf(stderr,"re-alloced at size %d\n", jSize);
374ca955 2370#endif
46f4442e
A
2371 }
2372 jSize *= 2;
2373 if(j==NULL) {
2374 *status = U_MEMORY_ALLOCATION_ERROR;
2375 return -1;
2376 }
374ca955
A
2377 }
2378 }
2379 uprv_sortArray(j, n, sizeof(j[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
2380 if(U_FAILURE(*status)) {
46f4442e 2381 if(j != smallBuffer) {
374ca955 2382#if defined(ULOC_DEBUG)
46f4442e 2383 fprintf(stderr,"freeing j %p\n", j);
374ca955 2384#endif
46f4442e
A
2385 uprv_free(j);
2386 }
2387 return -1;
374ca955 2388 }
51004dcb 2389 strs = static_cast<char **>(uprv_malloc((size_t)(sizeof(strs[0])*n)));
46f4442e
A
2390 /* Check for null pointer */
2391 if (strs == NULL) {
2392 uprv_free(j); /* Free to avoid memory leak */
2393 *status = U_MEMORY_ALLOCATION_ERROR;
2394 return -1;
2395 }
374ca955
A
2396 for(i=0;i<n;i++) {
2397#if defined(ULOC_DEBUG)
2398 /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
2399#endif
2400 strs[i]=j[i].locale;
2401 }
2402 res = uloc_acceptLanguage(result, resultAvailable, outResult,
2403 (const char**)strs, n, availableLocales, status);
2404 for(i=0;i<n;i++) {
2405 uprv_free(strs[i]);
2406 }
2407 uprv_free(strs);
2408 if(j != smallBuffer) {
2409#if defined(ULOC_DEBUG)
46f4442e 2410 fprintf(stderr,"freeing j %p\n", j);
374ca955 2411#endif
46f4442e 2412 uprv_free(j);
374ca955
A
2413 }
2414 return res;
2415}
2416
2417
2418U_CAPI int32_t U_EXPORT2
2419uloc_acceptLanguage(char *result, int32_t resultAvailable,
2420 UAcceptResult *outResult, const char **acceptList,
2421 int32_t acceptListCount,
2422 UEnumeration* availableLocales,
2423 UErrorCode *status)
2424{
2425 int32_t i,j;
2426 int32_t len;
2427 int32_t maxLen=0;
2428 char tmp[ULOC_FULLNAME_CAPACITY+1];
2429 const char *l;
2430 char **fallbackList;
2431 if(U_FAILURE(*status)) {
2432 return -1;
2433 }
51004dcb 2434 fallbackList = static_cast<char **>(uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount)));
374ca955 2435 if(fallbackList==NULL) {
46f4442e
A
2436 *status = U_MEMORY_ALLOCATION_ERROR;
2437 return -1;
374ca955
A
2438 }
2439 for(i=0;i<acceptListCount;i++) {
2440#if defined(ULOC_DEBUG)
2441 fprintf(stderr,"%02d: %s\n", i, acceptList[i]);
2442#endif
2443 while((l=uenum_next(availableLocales, NULL, status))) {
2444#if defined(ULOC_DEBUG)
2445 fprintf(stderr," %s\n", l);
2446#endif
73c04bcf 2447 len = (int32_t)uprv_strlen(l);
374ca955
A
2448 if(!uprv_strcmp(acceptList[i], l)) {
2449 if(outResult) {
2450 *outResult = ULOC_ACCEPT_VALID;
2451 }
2452#if defined(ULOC_DEBUG)
2453 fprintf(stderr, "MATCH! %s\n", l);
2454#endif
2455 if(len>0) {
2456 uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2457 }
2458 for(j=0;j<i;j++) {
2459 uprv_free(fallbackList[j]);
2460 }
2461 uprv_free(fallbackList);
2462 return u_terminateChars(result, resultAvailable, len, status);
2463 }
2464 if(len>maxLen) {
2465 maxLen = len;
2466 }
2467 }
2468 uenum_reset(availableLocales, status);
2469 /* save off parent info */
2470 if(uloc_getParent(acceptList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
2471 fallbackList[i] = uprv_strdup(tmp);
2472 } else {
2473 fallbackList[i]=0;
2474 }
2475 }
2476
2477 for(maxLen--;maxLen>0;maxLen--) {
2478 for(i=0;i<acceptListCount;i++) {
2479 if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) {
2480#if defined(ULOC_DEBUG)
2481 fprintf(stderr,"Try: [%s]", fallbackList[i]);
2482#endif
2483 while((l=uenum_next(availableLocales, NULL, status))) {
2484#if defined(ULOC_DEBUG)
2485 fprintf(stderr," %s\n", l);
2486#endif
73c04bcf 2487 len = (int32_t)uprv_strlen(l);
374ca955
A
2488 if(!uprv_strcmp(fallbackList[i], l)) {
2489 if(outResult) {
2490 *outResult = ULOC_ACCEPT_FALLBACK;
2491 }
2492#if defined(ULOC_DEBUG)
2493 fprintf(stderr, "fallback MATCH! %s\n", l);
2494#endif
2495 if(len>0) {
2496 uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2497 }
73c04bcf
A
2498 for(j=0;j<acceptListCount;j++) {
2499 uprv_free(fallbackList[j]);
374ca955
A
2500 }
2501 uprv_free(fallbackList);
73c04bcf 2502 return u_terminateChars(result, resultAvailable, len, status);
374ca955
A
2503 }
2504 }
2505 uenum_reset(availableLocales, status);
2506
2507 if(uloc_getParent(fallbackList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
2508 uprv_free(fallbackList[i]);
2509 fallbackList[i] = uprv_strdup(tmp);
2510 } else {
2511 uprv_free(fallbackList[i]);
2512 fallbackList[i]=0;
2513 }
2514 }
2515 }
2516 if(outResult) {
2517 *outResult = ULOC_ACCEPT_FAILED;
2518 }
2519 }
2520 for(i=0;i<acceptListCount;i++) {
2521 uprv_free(fallbackList[i]);
2522 }
2523 uprv_free(fallbackList);
2524 return -1;
b75a7d8f 2525}
374ca955 2526
b331163b
A
2527U_CAPI const char* U_EXPORT2
2528uloc_toUnicodeLocaleKey(const char* keyword)
2529{
2530 const char* bcpKey = ulocimp_toBcpKey(keyword);
2531 if (bcpKey == NULL && ultag_isUnicodeLocaleKey(keyword, -1)) {
2532 // unknown keyword, but syntax is fine..
2533 return keyword;
2534 }
2535 return bcpKey;
2536}
2537
2538U_CAPI const char* U_EXPORT2
2539uloc_toUnicodeLocaleType(const char* keyword, const char* value)
2540{
2541 const char* bcpType = ulocimp_toBcpType(keyword, value, NULL, NULL);
2542 if (bcpType == NULL && ultag_isUnicodeLocaleType(value, -1)) {
2543 // unknown keyword, but syntax is fine..
2544 return value;
2545 }
2546 return bcpType;
2547}
2548
2549#define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
2550#define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )
2551
2552static UBool
2553isWellFormedLegacyKey(const char* legacyKey)
2554{
2555 const char* p = legacyKey;
2556 while (*p) {
2557 if (!UPRV_ISALPHANUM(*p)) {
2558 return FALSE;
2559 }
2560 p++;
2561 }
2562 return TRUE;
2563}
2564
2565static UBool
2566isWellFormedLegacyType(const char* legacyType)
2567{
2568 const char* p = legacyType;
2569 int32_t alphaNumLen = 0;
2570 while (*p) {
2571 if (*p == '_' || *p == '/' || *p == '-') {
2572 if (alphaNumLen == 0) {
2573 return FALSE;
2574 }
2575 alphaNumLen = 0;
2576 } else if (UPRV_ISALPHANUM(*p)) {
2577 alphaNumLen++;
2578 } else {
2579 return FALSE;
2580 }
2581 p++;
2582 }
2583 return (alphaNumLen != 0);
2584}
2585
2586U_CAPI const char* U_EXPORT2
2587uloc_toLegacyKey(const char* keyword)
2588{
2589 const char* legacyKey = ulocimp_toLegacyKey(keyword);
2590 if (legacyKey == NULL) {
2591 // Checks if the specified locale key is well-formed with the legacy locale syntax.
2592 //
2593 // Note:
2594 // Neither ICU nor LDML/CLDR provides the definition of keyword syntax.
2595 // However, a key should not contain '=' obviously. For now, all existing
2596 // keys are using ASCII alphabetic letters only. We won't add any new key
2597 // that is not compatible with the BCP 47 syntax. Therefore, we assume
2598 // a valid key consist from [0-9a-zA-Z], no symbols.
2599 if (isWellFormedLegacyKey(keyword)) {
2600 return keyword;
2601 }
2602 }
2603 return legacyKey;
2604}
2605
2606U_CAPI const char* U_EXPORT2
2607uloc_toLegacyType(const char* keyword, const char* value)
2608{
2609 const char* legacyType = ulocimp_toLegacyType(keyword, value, NULL, NULL);
2610 if (legacyType == NULL) {
2611 // Checks if the specified locale type is well-formed with the legacy locale syntax.
2612 //
2613 // Note:
2614 // Neither ICU nor LDML/CLDR provides the definition of keyword syntax.
2615 // However, a type should not contain '=' obviously. For now, all existing
2616 // types are using ASCII alphabetic letters with a few symbol letters. We won't
2617 // add any new type that is not compatible with the BCP 47 syntax except timezone
2618 // IDs. For now, we assume a valid type start with [0-9a-zA-Z], but may contain
2619 // '-' '_' '/' in the middle.
2620 if (isWellFormedLegacyType(value)) {
2621 return value;
2622 }
2623 }
2624 return legacyType;
2625}
2626
374ca955 2627/*eof*/