]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/uloc.cpp
ICU-531.30.tar.gz
[apple/icu.git] / icuSources / common / uloc.cpp
CommitLineData
b75a7d8f
A
1/*
2**********************************************************************
51004dcb 3* Copyright (C) 1997-2013, International Business Machines
b75a7d8f
A
4* Corporation and others. All Rights Reserved.
5**********************************************************************
6*
7* File ULOC.CPP
8*
9* Modification History:
10*
11* Date Name Description
12* 04/01/97 aliu Creation.
13* 08/21/98 stephen JDK 1.2 sync
14* 12/08/98 rtg New Locale implementation and C API
15* 03/15/99 damiba overhaul.
16* 04/06/99 stephen changed setDefault() to realloc and copy
17* 06/14/99 stephen Changed calls to ures_open for new params
18* 07/21/99 stephen Modified setDefault() to propagate to C++
374ca955
A
19* 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs,
20* brought canonicalization code into line with spec
b75a7d8f
A
21*****************************************************************************/
22
23/*
24 POSIX's locale format, from putil.c: [no spaces]
25
26 ll [ _CC ] [ . MM ] [ @ VV]
27
28 l = lang, C = ctry, M = charmap, V = variant
29*/
30
b75a7d8f
A
31#include "unicode/utypes.h"
32#include "unicode/ustring.h"
33#include "unicode/uloc.h"
34
374ca955 35#include "putilimp.h"
b75a7d8f 36#include "ustr_imp.h"
374ca955 37#include "ulocimp.h"
b75a7d8f
A
38#include "umutex.h"
39#include "cstring.h"
40#include "cmemory.h"
41#include "ucln_cmn.h"
374ca955
A
42#include "locmap.h"
43#include "uarrsort.h"
44#include "uenumimp.h"
45#include "uassert.h"
b75a7d8f 46
374ca955
A
47#include <stdio.h> /* for sprintf */
48
49/* ### Declarations **************************************************/
b75a7d8f
A
50
51/* Locale stuff from locid.cpp */
52U_CFUNC void locale_set_default(const char *id);
53U_CFUNC const char *locale_get_default(void);
374ca955
A
54U_CFUNC int32_t
55locale_getKeywords(const char *localeID,
56 char prev,
57 char *keywords, int32_t keywordCapacity,
58 char *values, int32_t valuesCapacity, int32_t *valLen,
59 UBool valuesToo,
60 UErrorCode *status);
61
374ca955
A
62/* ### Data tables **************************************************/
63
64/**
65 * Table of language codes, both 2- and 3-letter, with preference
66 * given to 2-letter codes where possible. Includes 3-letter codes
67 * that lack a 2-letter equivalent.
68 *
69 * This list must be in sorted order. This list is returned directly
70 * to the user by some API.
71 *
72 * This list must be kept in sync with LANGUAGES_3, with corresponding
73 * entries matched.
74 *
75 * This table should be terminated with a NULL entry, followed by a
76 * second list, and another NULL entry. The first list is visible to
77 * user code when this array is returned by API. The second list
78 * contains codes we support, but do not expose through user API.
79 *
80 * Notes
81 *
82 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
83 * include the revisions up to 2001/7/27 *CWB*
84 *
85 * The 3 character codes are the terminology codes like RFC 3066. This
86 * is compatible with prior ICU codes
87 *
88 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
89 * table but now at the end of the table because 3 character codes are
90 * duplicates. This avoids bad searches going from 3 to 2 character
91 * codes.
92 *
93 * The range qaa-qtz is reserved for local use
94 */
51004dcb 95/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
57a6839d 96/* ISO639 table version is 20130531 */
374ca955 97static const char * const LANGUAGES[] = {
51004dcb
A
98 "aa", "ab", "ace", "ach", "ada", "ady", "ae", "af",
99 "afa", "afh", "agq", "ain", "ak", "akk", "ale", "alg",
100 "alt", "am", "an", "ang", "anp", "apa", "ar", "arc",
101 "arn", "arp", "art", "arw", "as", "asa", "ast", "ath",
102 "aus", "av", "awa", "ay", "az",
103 "ba", "bad", "bai", "bal", "ban", "bas", "bat", "bax",
104 "bbj", "be", "bej", "bem", "ber", "bez", "bfd", "bg",
105 "bh", "bho", "bi", "bik", "bin", "bkm", "bla", "bm",
106 "bn", "bnt", "bo", "br", "bra", "brx", "bs", "bss",
107 "btk", "bua", "bug", "bum", "byn", "byv",
108 "ca", "cad", "cai", "car", "cau", "cay", "cch", "ce",
109 "ceb", "cel", "cgg", "ch", "chb", "chg", "chk", "chm",
110 "chn", "cho", "chp", "chr", "chy", "ckb", "cmc", "co",
111 "cop", "cpe", "cpf", "cpp", "cr", "crh", "crp", "cs",
112 "csb", "cu", "cus", "cv", "cy",
113 "da", "dak", "dar", "dav", "day", "de", "del", "den",
114 "dgr", "din", "dje", "doi", "dra", "dsb", "dua", "dum",
115 "dv", "dyo", "dyu", "dz", "dzg",
116 "ebu", "ee", "efi", "egy", "eka", "el", "elx", "en",
117 "enm", "eo", "es", "et", "eu", "ewo",
118 "fa", "fan", "fat", "ff", "fi", "fil", "fiu", "fj",
119 "fo", "fon", "fr", "frm", "fro", "frr", "frs", "fur",
120 "fy",
121 "ga", "gaa", "gay", "gba", "gd", "gem", "gez", "gil",
73c04bcf 122 "gl", "gmh", "gn", "goh", "gon", "gor", "got", "grb",
51004dcb
A
123 "grc", "gsw", "gu", "guz", "gv", "gwi",
124 "ha", "hai", "haw", "he", "hi", "hil", "him", "hit",
125 "hmn", "ho", "hr", "hsb", "ht", "hu", "hup", "hy",
126 "hz",
127 "ia", "iba", "ibb", "id", "ie", "ig", "ii", "ijo",
128 "ik", "ilo", "inc", "ine", "inh", "io", "ira", "iro",
129 "is", "it", "iu",
130 "ja", "jbo", "jgo", "jmc", "jpr", "jrb", "jv",
131 "ka", "kaa", "kab", "kac", "kaj", "kam", "kar", "kaw",
132 "kbd", "kbl", "kcg", "kde", "kea", "kfo", "kg", "kha",
133 "khi", "kho", "khq", "ki", "kj", "kk", "kkj", "kl",
134 "kln", "km", "kmb", "kn", "ko", "kok", "kos", "kpe",
135 "kr", "krc", "krl", "kro", "kru", "ks", "ksb", "ksf",
136 "ksh", "ku", "kum", "kut", "kv", "kw", "ky",
137 "la", "lad", "lag", "lah", "lam", "lb", "lez", "lg",
138 "li", "lkt", "ln", "lo", "lol", "loz", "lt", "lu",
139 "lua", "lui", "lun", "luo", "lus", "luy", "lv",
140 "mad", "maf", "mag", "mai", "mak", "man", "map", "mas",
141 "mde", "mdf", "mdr", "men", "mer", "mfe", "mg", "mga",
142 "mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk",
143 "mkh", "ml", "mn", "mnc", "mni", "mno", "mo", "moh",
144 "mos", "mr", "ms", "mt", "mua", "mul", "mun", "mus",
145 "mwl", "mwr", "my", "mye", "myn", "myv",
146 "na", "nah", "nai", "nap", "naq", "nb", "nd", "nds",
147 "ne", "new", "ng", "nia", "nic", "niu", "nl", "nmg",
148 "nn", "nnh", "no", "nog", "non", "nqo", "nr", "nso",
149 "nub", "nus", "nv", "nwc", "ny", "nym", "nyn", "nyo",
150 "nzi",
151 "oc", "oj", "om", "or", "os", "osa", "ota", "oto",
152 "pa", "paa", "pag", "pal", "pam", "pap", "pau", "peo",
153 "phi", "phn", "pi", "pl", "pon", "pra", "pro", "ps",
154 "pt",
155 "qu",
156 "raj", "rap", "rar", "rm", "rn", "ro", "roa", "rof",
157 "rom", "ru", "rup", "rw", "rwk",
158 "sa", "sad", "sah", "sai", "sal", "sam", "saq", "sas",
159 "sat", "sba", "sbp", "sc", "scn", "sco", "sd", "se",
160 "see", "seh", "sel", "sem", "ses", "sg", "sga", "sgn",
161 "shi", "shn", "shu", "si", "sid", "sio", "sit",
162 "sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn",
163 "sms", "sn", "snk", "so", "sog", "son", "sq", "sr",
164 "srn", "srr", "ss", "ssa", "ssy", "st", "su", "suk",
165 "sus", "sux", "sv", "sw", "swb", "swc", "syc", "syr",
166 "ta", "tai", "te", "tem", "teo", "ter", "tet", "tg",
167 "th", "ti", "tig", "tiv", "tk", "tkl", "tl", "tlh",
168 "tli", "tmh", "tn", "to", "tog", "tpi", "tr", "trv",
169 "ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw",
170 "twq", "ty", "tyv", "tzm",
171 "udm", "ug", "uga", "uk", "umb", "und", "ur", "uz",
172 "vai", "ve", "vi", "vo", "vot", "vun",
173 "wa", "wae", "wak", "wal", "war", "was", "wen", "wo",
174 "xal", "xh", "xog",
175 "yao", "yap", "yav", "ybb", "yi", "yo", "ypk", "yue",
57a6839d
A
176 "za", "zap", "zbl", "zen", "zgh", "zh", "znd", "zu",
177 "zun", "zxx", "zza",
b75a7d8f
A
178NULL,
179 "in", "iw", "ji", "jw", "sh", /* obsolete language codes */
180NULL
181};
51004dcb 182
73c04bcf
A
183static const char* const DEPRECATED_LANGUAGES[]={
184 "in", "iw", "ji", "jw", NULL, NULL
185};
186static const char* const REPLACEMENT_LANGUAGES[]={
187 "id", "he", "yi", "jv", NULL, NULL
188};
b75a7d8f 189
374ca955
A
190/**
191 * Table of 3-letter language codes.
192 *
193 * This is a lookup table used to convert 3-letter language codes to
194 * their 2-letter equivalent, where possible. It must be kept in sync
195 * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the
196 * same language as LANGUAGES_3[i]. The commented-out lines are
197 * copied from LANGUAGES to make eyeballing this baby easier.
198 *
199 * Where a 3-letter language code has no 2-letter equivalent, the
200 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
201 *
202 * This table should be terminated with a NULL entry, followed by a
203 * second list, and another NULL entry. The two lists correspond to
204 * the two lists in LANGUAGES.
205 */
51004dcb 206/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
57a6839d 207/* ISO639 table version is 20130531 */
374ca955 208static const char * const LANGUAGES_3[] = {
51004dcb
A
209 "aar", "abk", "ace", "ach", "ada", "ady", "ave", "afr",
210 "afa", "afh", "agq", "ain", "aka", "akk", "ale", "alg",
211 "alt", "amh", "arg", "ang", "anp", "apa", "ara", "arc",
212 "arn", "arp", "art", "arw", "asm", "asa", "ast", "ath",
213 "aus", "ava", "awa", "aym", "aze",
214 "bak", "bad", "bai", "bal", "ban", "bas", "bat", "bax",
215 "bbj", "bel", "bej", "bem", "ber", "bez", "bfd", "bul",
216 "bih", "bho", "bis", "bik", "bin", "bkm", "bla", "bam",
217 "ben", "bnt", "bod", "bre", "bra", "brx", "bos", "bss",
218 "btk", "bua", "bug", "bum", "byn", "byv",
219 "cat", "cad", "cai", "car", "cau", "cay", "cch", "che",
220 "ceb", "cel", "cgg", "cha", "chb", "chg", "chk", "chm",
221 "chn", "cho", "chp", "chr", "chy", "ckb", "cmc", "cos",
222 "cop", "cpe", "cpf", "cpp", "cre", "crh", "crp", "ces",
223 "csb", "chu", "cus", "chv", "cym",
224 "dan", "dak", "dar", "dav", "day", "deu", "del", "den",
225 "dgr", "din", "dje", "doi", "dra", "dsb", "dua", "dum",
226 "div", "dyo", "dyu", "dzo", "dzg",
227 "ebu", "ewe", "efi", "egy", "eka", "ell", "elx", "eng",
228 "enm", "epo", "spa", "est", "eus", "ewo",
229 "fas", "fan", "fat", "ful", "fin", "fil", "fiu", "fij",
230 "fao", "fon", "fra", "frm", "fro", "frr", "frs", "fur",
231 "fry",
232 "gle", "gaa", "gay", "gba", "gla", "gem", "gez", "gil",
233 "glg", "gmh", "grn", "goh", "gon", "gor", "got", "grb",
234 "grc", "gsw", "guj", "guz", "glv", "gwi",
235 "hau", "hai", "haw", "heb", "hin", "hil", "him", "hit",
236 "hmn", "hmo", "hrv", "hsb", "hat", "hun", "hup", "hye",
237 "her",
238 "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ijo",
239 "ipk", "ilo", "inc", "ine", "inh", "ido", "ira", "iro",
240 "isl", "ita", "iku",
241 "jpn", "jbo", "jgo", "jmc", "jpr", "jrb", "jav",
242 "kat", "kaa", "kab", "kac", "kaj", "kam", "kar", "kaw",
243 "kbd", "kbl", "kcg", "kde", "kea", "kfo", "kon", "kha",
244 "khi", "kho", "khq", "kik", "kua", "kaz", "kkj", "kal",
245 "kln", "khm", "kmb", "kan", "kor", "kok", "kos", "kpe",
246 "kau", "krc", "krl", "kro", "kru", "kas", "ksb", "ksf",
247 "ksh", "kur", "kum", "kut", "kom", "cor", "kir",
248 "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lug",
249 "lim", "lkt", "lin", "lao", "lol", "loz", "lit", "lub",
250 "lua", "lui", "lun", "luo", "lus", "luy", "lav",
251 "mad", "maf", "mag", "mai", "mak", "man", "map", "mas",
252 "mde", "mdf", "mdr", "men", "mer", "mfe", "mlg", "mga",
253 "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
254 "mkh", "mal", "mon", "mnc", "mni", "mno", "mol", "moh",
255 "mos", "mar", "msa", "mlt", "mua", "mul", "mun", "mus",
256 "mwl", "mwr", "mya", "mye", "myn", "myv",
257 "nau", "nah", "nai", "nap", "naq", "nob", "nde", "nds",
258 "nep", "new", "ndo", "nia", "nic", "niu", "nld", "nmg",
259 "nno", "nnh", "nor", "nog", "non", "nqo", "nbl", "nso",
260 "nub", "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo",
261 "nzi",
262 "oci", "oji", "orm", "ori", "oss", "osa", "ota", "oto",
263 "pan", "paa", "pag", "pal", "pam", "pap", "pau", "peo",
264 "phi", "phn", "pli", "pol", "pon", "pra", "pro", "pus",
265 "por",
266 "que",
267 "raj", "rap", "rar", "roh", "run", "ron", "roa", "rof",
268 "rom", "rus", "rup", "kin", "rwk",
269 "san", "sad", "sah", "sai", "sal", "sam", "saq", "sas",
270 "sat", "sba", "sbp", "srd", "scn", "sco", "snd", "sme",
271 "see", "seh", "sel", "sem", "ses", "sag", "sga", "sgn",
272 "shi", "shn", "shu", "sin", "sid", "sio", "sit",
273 "slk", "slv", "sla", "smo", "sma", "smi", "smj", "smn",
274 "sms", "sna", "snk", "som", "sog", "son", "sqi", "srp",
275 "srn", "srr", "ssw", "ssa", "ssy", "sot", "sun", "suk",
276 "sus", "sux", "swe", "swa", "swb", "swc", "syc", "syr",
277 "tam", "tai", "tel", "tem", "teo", "ter", "tet", "tgk",
278 "tha", "tir", "tig", "tiv", "tuk", "tkl", "tgl", "tlh",
279 "tli", "tmh", "tsn", "ton", "tog", "tpi", "tur", "trv",
280 "tso", "tsi", "tat", "tum", "tup", "tut", "tvl", "twi",
281 "twq", "tah", "tyv", "tzm",
282 "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
283 "vai", "ven", "vie", "vol", "vot", "vun",
284 "wln", "wae", "wak", "wal", "war", "was", "wen", "wol",
285 "xal", "xho", "xog",
286 "yao", "yap", "yav", "ybb", "yid", "yor", "ypk", "yue",
57a6839d
A
287 "zha", "zap", "zbl", "zen", "zgh", "zho", "znd", "zul",
288 "zun", "zxx", "zza",
b75a7d8f
A
289NULL,
290/* "in", "iw", "ji", "jw", "sh", */
291 "ind", "heb", "yid", "jaw", "srp",
292NULL
293};
294
374ca955
A
295/**
296 * Table of 2-letter country codes.
297 *
298 * This list must be in sorted order. This list is returned directly
299 * to the user by some API.
300 *
301 * This list must be kept in sync with COUNTRIES_3, with corresponding
302 * entries matched.
303 *
304 * This table should be terminated with a NULL entry, followed by a
305 * second list, and another NULL entry. The first list is visible to
306 * user code when this array is returned by API. The second list
307 * contains codes we support, but do not expose through user API.
308 *
309 * Notes:
310 *
311 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
312 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
313 * new codes keeping the old ones for compatibility updated to include
314 * 1999/12/03 revisions *CWB*
315 *
316 * RO(ROM) is now RO(ROU) according to
317 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
318 */
319static const char * const COUNTRIES[] = {
51004dcb 320 "AD", "AE", "AF", "AG", "AI", "AL", "AM",
73c04bcf 321 "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",
b75a7d8f 322 "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",
51004dcb 323 "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV",
b75a7d8f
A
324 "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
325 "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR",
51004dcb 326 "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK",
b75a7d8f
A
327 "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER",
328 "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
73c04bcf 329 "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
b75a7d8f
A
330 "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
331 "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
73c04bcf
A
332 "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
333 "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
b75a7d8f
A
334 "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
335 "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
46f4442e 336 "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK",
b75a7d8f
A
337 "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",
338 "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
339 "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",
340 "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",
341 "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
46f4442e 342 "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA",
b75a7d8f 343 "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",
51004dcb
A
344 "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV",
345 "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ",
b75a7d8f
A
346 "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
347 "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
348 "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
46f4442e 349 "WS", "YE", "YT", "ZA", "ZM", "ZW",
b75a7d8f 350NULL,
51004dcb 351 "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR", /* obsolete country codes */
b75a7d8f
A
352NULL
353};
354
51004dcb
A
355static const char* const DEPRECATED_COUNTRIES[] = {
356 "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */
73c04bcf
A
357};
358static const char* const REPLACEMENT_COUNTRIES[] = {
51004dcb
A
359/* "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
360 "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL /* replacement country codes */
73c04bcf
A
361};
362
374ca955
A
363/**
364 * Table of 3-letter country codes.
365 *
366 * This is a lookup table used to convert 3-letter country codes to
367 * their 2-letter equivalent. It must be kept in sync with COUNTRIES.
368 * For all valid i, COUNTRIES[i] must refer to the same country as
369 * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES
370 * to make eyeballing this baby easier.
371 *
372 * This table should be terminated with a NULL entry, followed by a
373 * second list, and another NULL entry. The two lists correspond to
374 * the two lists in COUNTRIES.
375 */
376static const char * const COUNTRIES_3[] = {
51004dcb
A
377/* "AD", "AE", "AF", "AG", "AI", "AL", "AM", */
378 "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
73c04bcf
A
379/* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */
380 "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
b75a7d8f
A
381/* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */
382 "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
51004dcb
A
383/* "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", */
384 "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",
b75a7d8f
A
385/* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */
386 "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
387/* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */
388 "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
51004dcb
A
389/* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK", */
390 "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
b75a7d8f
A
391/* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */
392 "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
393/* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
394 "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
46f4442e 395/* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
73c04bcf 396 "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
b75a7d8f
A
397/* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */
398 "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
399/* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
400 "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
46f4442e
A
401/* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
402 "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
403/* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
73c04bcf 404 "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
b75a7d8f
A
405/* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
406 "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
407/* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */
408 "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
46f4442e
A
409/* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */
410 "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
b75a7d8f
A
411/* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */
412 "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
413/* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */
414 "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
415/* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */
416 "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
417/* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */
418 "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
419/* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */
420 "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
46f4442e
A
421/* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */
422 "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
b75a7d8f
A
423/* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */
424 "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
51004dcb
A
425/* "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", */
426 "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",
427/* "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", */
428 "SXM", "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
b75a7d8f
A
429/* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */
430 "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
431/* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */
432 "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
433/* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
434 "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
46f4442e
A
435/* "WS", "YE", "YT", "ZA", "ZM", "ZW", */
436 "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
b75a7d8f 437NULL,
51004dcb
A
438/* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */
439 "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
b75a7d8f
A
440NULL
441};
442
374ca955
A
443typedef struct CanonicalizationMap {
444 const char *id; /* input ID */
445 const char *canonicalID; /* canonicalized output ID */
446 const char *keyword; /* keyword, or NULL if none */
447 const char *value; /* keyword value, or NULL if kw==NULL */
448} CanonicalizationMap;
449
450/**
451 * A map to canonicalize locale IDs. This handles a variety of
452 * different semantic kinds of transformations.
453 */
454static const CanonicalizationMap CANONICALIZE_MAP[] = {
455 { "", "en_US_POSIX", NULL, NULL }, /* .NET name */
729e4ab9 456 { "c", "en_US_POSIX", NULL, NULL }, /* POSIX name */
73c04bcf 457 { "posix", "en_US_POSIX", NULL, NULL }, /* POSIX name (alias of C) */
374ca955
A
458 { "art_LOJBAN", "jbo", NULL, NULL }, /* registered name */
459 { "az_AZ_CYRL", "az_Cyrl_AZ", NULL, NULL }, /* .NET name */
460 { "az_AZ_LATN", "az_Latn_AZ", NULL, NULL }, /* .NET name */
461 { "ca_ES_PREEURO", "ca_ES", "currency", "ESP" },
46f4442e 462 { "de__PHONEBOOK", "de", "collation", "phonebook" }, /* Old ICU name */
374ca955
A
463 { "de_AT_PREEURO", "de_AT", "currency", "ATS" },
464 { "de_DE_PREEURO", "de_DE", "currency", "DEM" },
465 { "de_LU_PREEURO", "de_LU", "currency", "LUF" },
466 { "el_GR_PREEURO", "el_GR", "currency", "GRD" },
374ca955
A
467 { "en_BE_PREEURO", "en_BE", "currency", "BEF" },
468 { "en_IE_PREEURO", "en_IE", "currency", "IEP" },
46f4442e 469 { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */
374ca955
A
470 { "es_ES_PREEURO", "es_ES", "currency", "ESP" },
471 { "eu_ES_PREEURO", "eu_ES", "currency", "ESP" },
472 { "fi_FI_PREEURO", "fi_FI", "currency", "FIM" },
473 { "fr_BE_PREEURO", "fr_BE", "currency", "BEF" },
474 { "fr_FR_PREEURO", "fr_FR", "currency", "FRF" },
475 { "fr_LU_PREEURO", "fr_LU", "currency", "LUF" },
476 { "ga_IE_PREEURO", "ga_IE", "currency", "IEP" },
477 { "gl_ES_PREEURO", "gl_ES", "currency", "ESP" },
46f4442e 478 { "hi__DIRECT", "hi", "collation", "direct" }, /* Old ICU name */
374ca955 479 { "it_IT_PREEURO", "it_IT", "currency", "ITL" },
46f4442e 480 { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */
374ca955
A
481 { "nb_NO_NY", "nn_NO", NULL, NULL }, /* "markus said this was ok" :-) */
482 { "nl_BE_PREEURO", "nl_BE", "currency", "BEF" },
483 { "nl_NL_PREEURO", "nl_NL", "currency", "NLG" },
484 { "pt_PT_PREEURO", "pt_PT", "currency", "PTE" },
46f4442e
A
485 { "sr_SP_CYRL", "sr_Cyrl_RS", NULL, NULL }, /* .NET name */
486 { "sr_SP_LATN", "sr_Latn_RS", NULL, NULL }, /* .NET name */
487 { "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL, NULL }, /* Linux name */
488 { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */
73c04bcf 489 { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL, NULL }, /* Linux name */
374ca955
A
490 { "uz_UZ_CYRL", "uz_Cyrl_UZ", NULL, NULL }, /* .NET name */
491 { "uz_UZ_LATN", "uz_Latn_UZ", NULL, NULL }, /* .NET name */
492 { "zh_CHS", "zh_Hans", NULL, NULL }, /* .NET name */
46f4442e 493 { "zh_CHT", "zh_Hant", NULL, NULL }, /* .NET name */
4388f060 494 { "zh_GAN", "gan", NULL, NULL }, /* registered name */
374ca955 495 { "zh_GUOYU", "zh", NULL, NULL }, /* registered name */
4388f060
A
496 { "zh_HAKKA", "hak", NULL, NULL }, /* registered name */
497 { "zh_MIN_NAN", "nan", NULL, NULL }, /* registered name */
498 { "zh_WUU", "wuu", NULL, NULL }, /* registered name */
499 { "zh_XIANG", "hsn", NULL, NULL }, /* registered name */
500 { "zh_YUE", "yue", NULL, NULL }, /* registered name */
46f4442e
A
501};
502
503typedef struct VariantMap {
504 const char *variant; /* input ID */
505 const char *keyword; /* keyword, or NULL if none */
506 const char *value; /* keyword value, or NULL if kw==NULL */
507} VariantMap;
508
509static const VariantMap VARIANT_MAP[] = {
510 { "EURO", "currency", "EUR" },
511 { "PINYIN", "collation", "pinyin" }, /* Solaris variant */
512 { "STROKE", "collation", "stroke" } /* Solaris variant */
374ca955
A
513};
514
729e4ab9
A
515/* ### BCP47 Conversion *******************************************/
516/* Test if the locale id has BCP47 u extension and does not have '@' */
517#define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
518/* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
519#define _ConvertBCP47(finalID, id, buffer, length,err) \
520 if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || U_FAILURE(*err)) { \
521 finalID=id; \
522 } else { \
523 finalID=buffer; \
524 }
525/* Gets the size of the shortest subtag in the given localeID. */
526static int32_t getShortestSubtagLength(const char *localeID) {
527 int32_t localeIDLength = uprv_strlen(localeID);
528 int32_t length = localeIDLength;
529 int32_t tmpLength = 0;
530 int32_t i;
531 UBool reset = TRUE;
532
533 for (i = 0; i < localeIDLength; i++) {
534 if (localeID[i] != '_' && localeID[i] != '-') {
535 if (reset) {
536 tmpLength = 0;
537 reset = FALSE;
538 }
539 tmpLength++;
540 } else {
541 if (tmpLength != 0 && tmpLength < length) {
542 length = tmpLength;
543 }
544 reset = TRUE;
545 }
546 }
547
548 return length;
549}
550
374ca955
A
551/* ### Keywords **************************************************/
552
553#define ULOC_KEYWORD_BUFFER_LEN 25
554#define ULOC_MAX_NO_KEYWORDS 25
555
729e4ab9 556U_CAPI const char * U_EXPORT2
374ca955 557locale_getKeywordsStart(const char *localeID) {
374ca955 558 const char *result = NULL;
374ca955
A
559 if((result = uprv_strchr(localeID, '@')) != NULL) {
560 return result;
73c04bcf
A
561 }
562#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
563 else {
564 /* We do this because the @ sign is variant, and the @ sign used on one
565 EBCDIC machine won't be compiled the same way on other EBCDIC based
566 machines. */
567 static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
374ca955
A
568 const uint8_t *charToFind = ebcdicSigns;
569 while(*charToFind) {
570 if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
571 return result;
572 }
573 charToFind++;
574 }
575 }
73c04bcf 576#endif
374ca955
A
577 return NULL;
578}
579
580/**
581 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
582 * @param keywordName incoming name to be canonicalized
583 * @param status return status (keyword too long)
584 * @return length of the keyword name
585 */
586static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
587{
588 int32_t i;
73c04bcf 589 int32_t keywordNameLen = (int32_t)uprv_strlen(keywordName);
374ca955
A
590
591 if(keywordNameLen >= ULOC_KEYWORD_BUFFER_LEN) {
592 /* keyword name too long for internal buffer */
593 *status = U_INTERNAL_PROGRAM_ERROR;
594 return 0;
595 }
596
597 /* normalize the keyword name */
598 for(i = 0; i < keywordNameLen; i++) {
599 buf[i] = uprv_tolower(keywordName[i]);
600 }
601 buf[i] = 0;
602
603 return keywordNameLen;
604}
605
606typedef struct {
607 char keyword[ULOC_KEYWORD_BUFFER_LEN];
608 int32_t keywordLen;
609 const char *valueStart;
610 int32_t valueLen;
611} KeywordStruct;
612
613static int32_t U_CALLCONV
4388f060 614compareKeywordStructs(const void * /*context*/, const void *left, const void *right) {
374ca955
A
615 const char* leftString = ((const KeywordStruct *)left)->keyword;
616 const char* rightString = ((const KeywordStruct *)right)->keyword;
617 return uprv_strcmp(leftString, rightString);
618}
619
620/**
621 * Both addKeyword and addValue must already be in canonical form.
622 * Either both addKeyword and addValue are NULL, or neither is NULL.
623 * If they are not NULL they must be zero terminated.
624 * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword.
625 */
626static int32_t
627_getKeywords(const char *localeID,
628 char prev,
629 char *keywords, int32_t keywordCapacity,
630 char *values, int32_t valuesCapacity, int32_t *valLen,
631 UBool valuesToo,
632 const char* addKeyword,
633 const char* addValue,
634 UErrorCode *status)
635{
636 KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
637
638 int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
639 int32_t numKeywords = 0;
640 const char* pos = localeID;
641 const char* equalSign = NULL;
642 const char* semicolon = NULL;
643 int32_t i = 0, j, n;
644 int32_t keywordsLen = 0;
645 int32_t valuesLen = 0;
646
647 if(prev == '@') { /* start of keyword definition */
648 /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
649 do {
650 UBool duplicate = FALSE;
651 /* skip leading spaces */
652 while(*pos == ' ') {
653 pos++;
654 }
655 if (!*pos) { /* handle trailing "; " */
656 break;
657 }
658 if(numKeywords == maxKeywords) {
659 *status = U_INTERNAL_PROGRAM_ERROR;
660 return 0;
661 }
662 equalSign = uprv_strchr(pos, '=');
663 semicolon = uprv_strchr(pos, ';');
664 /* lack of '=' [foo@currency] is illegal */
665 /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
666 if(!equalSign || (semicolon && semicolon<equalSign)) {
667 *status = U_INVALID_FORMAT_ERROR;
668 return 0;
669 }
670 /* need to normalize both keyword and keyword name */
671 if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
672 /* keyword name too long for internal buffer */
673 *status = U_INTERNAL_PROGRAM_ERROR;
674 return 0;
675 }
676 for(i = 0, n = 0; i < equalSign - pos; ++i) {
677 if (pos[i] != ' ') {
678 keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
679 }
680 }
57a6839d
A
681
682 /* zero-length keyword is an error. */
683 if (n == 0) {
684 *status = U_INVALID_FORMAT_ERROR;
685 return 0;
686 }
687
374ca955
A
688 keywordList[numKeywords].keyword[n] = 0;
689 keywordList[numKeywords].keywordLen = n;
690 /* now grab the value part. First we skip the '=' */
691 equalSign++;
692 /* then we leading spaces */
693 while(*equalSign == ' ') {
694 equalSign++;
695 }
57a6839d
A
696
697 /* Premature end or zero-length value */
698 if (!equalSign || equalSign == semicolon) {
699 *status = U_INVALID_FORMAT_ERROR;
700 return 0;
701 }
702
374ca955 703 keywordList[numKeywords].valueStart = equalSign;
57a6839d 704
374ca955
A
705 pos = semicolon;
706 i = 0;
707 if(pos) {
708 while(*(pos - i - 1) == ' ') {
709 i++;
710 }
73c04bcf 711 keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
374ca955
A
712 pos++;
713 } else {
73c04bcf 714 i = (int32_t)uprv_strlen(equalSign);
4388f060 715 while(i && equalSign[i-1] == ' ') {
374ca955
A
716 i--;
717 }
718 keywordList[numKeywords].valueLen = i;
719 }
720 /* If this is a duplicate keyword, then ignore it */
721 for (j=0; j<numKeywords; ++j) {
722 if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
723 duplicate = TRUE;
724 break;
725 }
726 }
727 if (!duplicate) {
728 ++numKeywords;
729 }
730 } while(pos);
731
732 /* Handle addKeyword/addValue. */
733 if (addKeyword != NULL) {
734 UBool duplicate = FALSE;
735 U_ASSERT(addValue != NULL);
736 /* Search for duplicate; if found, do nothing. Explicit keyword
737 overrides addKeyword. */
738 for (j=0; j<numKeywords; ++j) {
739 if (uprv_strcmp(keywordList[j].keyword, addKeyword) == 0) {
740 duplicate = TRUE;
741 break;
742 }
743 }
744 if (!duplicate) {
745 if (numKeywords == maxKeywords) {
746 *status = U_INTERNAL_PROGRAM_ERROR;
747 return 0;
748 }
749 uprv_strcpy(keywordList[numKeywords].keyword, addKeyword);
73c04bcf 750 keywordList[numKeywords].keywordLen = (int32_t)uprv_strlen(addKeyword);
374ca955 751 keywordList[numKeywords].valueStart = addValue;
73c04bcf 752 keywordList[numKeywords].valueLen = (int32_t)uprv_strlen(addValue);
374ca955
A
753 ++numKeywords;
754 }
755 } else {
756 U_ASSERT(addValue == NULL);
757 }
758
759 /* now we have a list of keywords */
760 /* we need to sort it */
761 uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
762
763 /* Now construct the keyword part */
764 for(i = 0; i < numKeywords; i++) {
765 if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) {
766 uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword);
767 if(valuesToo) {
768 keywords[keywordsLen + keywordList[i].keywordLen] = '=';
769 } else {
770 keywords[keywordsLen + keywordList[i].keywordLen] = 0;
771 }
772 }
773 keywordsLen += keywordList[i].keywordLen + 1;
774 if(valuesToo) {
775 if(keywordsLen + keywordList[i].valueLen < keywordCapacity) {
776 uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
777 }
778 keywordsLen += keywordList[i].valueLen;
779
780 if(i < numKeywords - 1) {
781 if(keywordsLen < keywordCapacity) {
782 keywords[keywordsLen] = ';';
783 }
784 keywordsLen++;
785 }
786 }
787 if(values) {
788 if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) {
789 uprv_strcpy(values+valuesLen, keywordList[i].valueStart);
790 values[valuesLen + keywordList[i].valueLen] = 0;
791 }
792 valuesLen += keywordList[i].valueLen + 1;
793 }
794 }
795 if(values) {
796 values[valuesLen] = 0;
797 if(valLen) {
798 *valLen = valuesLen;
799 }
800 }
801 return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);
802 } else {
803 return 0;
804 }
805}
806
807U_CFUNC int32_t
808locale_getKeywords(const char *localeID,
809 char prev,
810 char *keywords, int32_t keywordCapacity,
811 char *values, int32_t valuesCapacity, int32_t *valLen,
812 UBool valuesToo,
813 UErrorCode *status) {
814 return _getKeywords(localeID, prev, keywords, keywordCapacity,
815 values, valuesCapacity, valLen, valuesToo,
816 NULL, NULL, status);
817}
818
819U_CAPI int32_t U_EXPORT2
820uloc_getKeywordValue(const char* localeID,
821 const char* keywordName,
822 char* buffer, int32_t bufferCapacity,
823 UErrorCode* status)
824{
729e4ab9 825 const char* startSearchHere = NULL;
374ca955 826 const char* nextSeparator = NULL;
374ca955
A
827 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
828 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
829 int32_t i = 0;
830 int32_t result = 0;
831
832 if(status && U_SUCCESS(*status) && localeID) {
729e4ab9
A
833 char tempBuffer[ULOC_FULLNAME_CAPACITY];
834 const char* tmpLocaleID;
835
836 if (_hasBCP47Extension(localeID)) {
837 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
838 } else {
839 tmpLocaleID=localeID;
840 }
374ca955 841
729e4ab9 842 startSearchHere = uprv_strchr(tmpLocaleID, '@'); /* TODO: REVISIT: shouldn't this be locale_getKeywordsStart ? */
374ca955
A
843 if(startSearchHere == NULL) {
844 /* no keywords, return at once */
845 return 0;
846 }
847
73c04bcf 848 locale_canonKeywordName(keywordNameBuffer, keywordName, status);
374ca955
A
849 if(U_FAILURE(*status)) {
850 return 0;
851 }
852
853 /* find the first keyword */
854 while(startSearchHere) {
855 startSearchHere++;
856 /* skip leading spaces (allowed?) */
857 while(*startSearchHere == ' ') {
858 startSearchHere++;
859 }
860 nextSeparator = uprv_strchr(startSearchHere, '=');
861 /* need to normalize both keyword and keyword name */
862 if(!nextSeparator) {
863 break;
864 }
865 if(nextSeparator - startSearchHere >= ULOC_KEYWORD_BUFFER_LEN) {
866 /* keyword name too long for internal buffer */
867 *status = U_INTERNAL_PROGRAM_ERROR;
868 return 0;
869 }
870 for(i = 0; i < nextSeparator - startSearchHere; i++) {
871 localeKeywordNameBuffer[i] = uprv_tolower(startSearchHere[i]);
872 }
873 /* trim trailing spaces */
874 while(startSearchHere[i-1] == ' ') {
875 i--;
4388f060 876 U_ASSERT(i>=0);
374ca955
A
877 }
878 localeKeywordNameBuffer[i] = 0;
879
880 startSearchHere = uprv_strchr(nextSeparator, ';');
881
882 if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
883 nextSeparator++;
884 while(*nextSeparator == ' ') {
885 nextSeparator++;
886 }
887 /* we actually found the keyword. Copy the value */
888 if(startSearchHere && startSearchHere - nextSeparator < bufferCapacity) {
889 while(*(startSearchHere-1) == ' ') {
890 startSearchHere--;
891 }
892 uprv_strncpy(buffer, nextSeparator, startSearchHere - nextSeparator);
73c04bcf 893 result = u_terminateChars(buffer, bufferCapacity, (int32_t)(startSearchHere - nextSeparator), status);
374ca955 894 } else if(!startSearchHere && (int32_t)uprv_strlen(nextSeparator) < bufferCapacity) { /* last item in string */
73c04bcf 895 i = (int32_t)uprv_strlen(nextSeparator);
374ca955
A
896 while(nextSeparator[i - 1] == ' ') {
897 i--;
898 }
899 uprv_strncpy(buffer, nextSeparator, i);
900 result = u_terminateChars(buffer, bufferCapacity, i, status);
901 } else {
902 /* give a bigger buffer, please */
903 *status = U_BUFFER_OVERFLOW_ERROR;
904 if(startSearchHere) {
73c04bcf 905 result = (int32_t)(startSearchHere - nextSeparator);
374ca955 906 } else {
73c04bcf 907 result = (int32_t)uprv_strlen(nextSeparator);
374ca955
A
908 }
909 }
910 return result;
911 }
912 }
913 }
914 return 0;
915}
916
917U_CAPI int32_t U_EXPORT2
918uloc_setKeywordValue(const char* keywordName,
919 const char* keywordValue,
920 char* buffer, int32_t bufferCapacity,
921 UErrorCode* status)
922{
923 /* TODO: sorting. removal. */
924 int32_t keywordNameLen;
925 int32_t keywordValueLen;
926 int32_t bufLen;
927 int32_t needLen = 0;
928 int32_t foundValueLen;
929 int32_t keywordAtEnd = 0; /* is the keyword at the end of the string? */
930 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
931 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
932 int32_t i = 0;
933 int32_t rc;
934 char* nextSeparator = NULL;
935 char* nextEqualsign = NULL;
936 char* startSearchHere = NULL;
937 char* keywordStart = NULL;
938 char *insertHere = NULL;
939 if(U_FAILURE(*status)) {
940 return -1;
941 }
73c04bcf
A
942 if(bufferCapacity>1) {
943 bufLen = (int32_t)uprv_strlen(buffer);
944 } else {
945 *status = U_ILLEGAL_ARGUMENT_ERROR;
946 return 0;
947 }
948 if(bufferCapacity<bufLen) {
949 /* The capacity is less than the length?! Is this NULL terminated? */
950 *status = U_ILLEGAL_ARGUMENT_ERROR;
951 return 0;
952 }
374ca955
A
953 if(keywordValue && !*keywordValue) {
954 keywordValue = NULL;
955 }
956 if(keywordValue) {
73c04bcf 957 keywordValueLen = (int32_t)uprv_strlen(keywordValue);
374ca955
A
958 } else {
959 keywordValueLen = 0;
960 }
961 keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
962 if(U_FAILURE(*status)) {
963 return 0;
964 }
965 startSearchHere = (char*)locale_getKeywordsStart(buffer);
374ca955
A
966 if(startSearchHere == NULL || (startSearchHere[1]==0)) {
967 if(!keywordValue) { /* no keywords = nothing to remove */
968 return bufLen;
969 }
970
971 needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
972 if(startSearchHere) { /* had a single @ */
973 needLen--; /* already had the @ */
974 /* startSearchHere points at the @ */
975 } else {
976 startSearchHere=buffer+bufLen;
977 }
978 if(needLen >= bufferCapacity) {
979 *status = U_BUFFER_OVERFLOW_ERROR;
980 return needLen; /* no change */
981 }
982 *startSearchHere = '@';
983 startSearchHere++;
984 uprv_strcpy(startSearchHere, keywordNameBuffer);
985 startSearchHere += keywordNameLen;
986 *startSearchHere = '=';
987 startSearchHere++;
988 uprv_strcpy(startSearchHere, keywordValue);
989 startSearchHere+=keywordValueLen;
990 return needLen;
991 } /* end shortcut - no @ */
992
993 keywordStart = startSearchHere;
994 /* search for keyword */
995 while(keywordStart) {
996 keywordStart++;
997 /* skip leading spaces (allowed?) */
998 while(*keywordStart == ' ') {
999 keywordStart++;
1000 }
1001 nextEqualsign = uprv_strchr(keywordStart, '=');
1002 /* need to normalize both keyword and keyword name */
1003 if(!nextEqualsign) {
1004 break;
1005 }
1006 if(nextEqualsign - keywordStart >= ULOC_KEYWORD_BUFFER_LEN) {
1007 /* keyword name too long for internal buffer */
1008 *status = U_INTERNAL_PROGRAM_ERROR;
1009 return 0;
1010 }
1011 for(i = 0; i < nextEqualsign - keywordStart; i++) {
1012 localeKeywordNameBuffer[i] = uprv_tolower(keywordStart[i]);
1013 }
1014 /* trim trailing spaces */
1015 while(keywordStart[i-1] == ' ') {
1016 i--;
1017 }
51004dcb 1018 U_ASSERT(i>=0 && i<ULOC_KEYWORD_BUFFER_LEN);
374ca955
A
1019 localeKeywordNameBuffer[i] = 0;
1020
1021 nextSeparator = uprv_strchr(nextEqualsign, ';');
1022 rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
1023 if(rc == 0) {
1024 nextEqualsign++;
1025 while(*nextEqualsign == ' ') {
1026 nextEqualsign++;
1027 }
1028 /* we actually found the keyword. Change the value */
1029 if (nextSeparator) {
1030 keywordAtEnd = 0;
73c04bcf 1031 foundValueLen = (int32_t)(nextSeparator - nextEqualsign);
374ca955
A
1032 } else {
1033 keywordAtEnd = 1;
73c04bcf 1034 foundValueLen = (int32_t)uprv_strlen(nextEqualsign);
374ca955
A
1035 }
1036 if(keywordValue) { /* adding a value - not removing */
1037 if(foundValueLen == keywordValueLen) {
1038 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1039 return bufLen; /* no change in size */
1040 } else if(foundValueLen > keywordValueLen) {
1041 int32_t delta = foundValueLen - keywordValueLen;
1042 if(nextSeparator) { /* RH side */
1043 uprv_memmove(nextSeparator - delta, nextSeparator, bufLen-(nextSeparator-buffer));
1044 }
1045 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1046 bufLen -= delta;
1047 buffer[bufLen]=0;
1048 return bufLen;
1049 } else { /* FVL < KVL */
1050 int32_t delta = keywordValueLen - foundValueLen;
1051 if((bufLen+delta) >= bufferCapacity) {
1052 *status = U_BUFFER_OVERFLOW_ERROR;
1053 return bufLen+delta;
1054 }
1055 if(nextSeparator) { /* RH side */
1056 uprv_memmove(nextSeparator+delta,nextSeparator, bufLen-(nextSeparator-buffer));
1057 }
1058 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1059 bufLen += delta;
1060 buffer[bufLen]=0;
1061 return bufLen;
1062 }
1063 } else { /* removing a keyword */
1064 if(keywordAtEnd) {
1065 /* zero out the ';' or '@' just before startSearchhere */
1066 keywordStart[-1] = 0;
73c04bcf 1067 return (int32_t)((keywordStart-buffer)-1); /* (string length without keyword) minus separator */
374ca955
A
1068 } else {
1069 uprv_memmove(keywordStart, nextSeparator+1, bufLen-((nextSeparator+1)-buffer));
1070 keywordStart[bufLen-((nextSeparator+1)-buffer)]=0;
73c04bcf 1071 return (int32_t)(bufLen-((nextSeparator+1)-keywordStart));
374ca955
A
1072 }
1073 }
1074 } else if(rc<0){ /* end match keyword */
1075 /* could insert at this location. */
1076 insertHere = keywordStart;
1077 }
1078 keywordStart = nextSeparator;
1079 } /* end loop searching */
1080
1081 if(!keywordValue) {
1082 return bufLen; /* removal of non-extant keyword - no change */
1083 }
1084
1085 /* we know there is at least one keyword. */
1086 needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
1087 if(needLen >= bufferCapacity) {
1088 *status = U_BUFFER_OVERFLOW_ERROR;
1089 return needLen; /* no change */
1090 }
1091
1092 if(insertHere) {
1093 uprv_memmove(insertHere+(1+keywordNameLen+1+keywordValueLen), insertHere, bufLen-(insertHere-buffer));
1094 keywordStart = insertHere;
1095 } else {
1096 keywordStart = buffer+bufLen;
1097 *keywordStart = ';';
1098 keywordStart++;
1099 }
1100 uprv_strncpy(keywordStart, keywordNameBuffer, keywordNameLen);
1101 keywordStart += keywordNameLen;
1102 *keywordStart = '=';
1103 keywordStart++;
1104 uprv_strncpy(keywordStart, keywordValue, keywordValueLen); /* terminates. */
1105 keywordStart+=keywordValueLen;
1106 if(insertHere) {
1107 *keywordStart = ';';
1108 keywordStart++;
1109 }
1110 buffer[needLen]=0;
1111 return needLen;
1112}
b75a7d8f 1113
374ca955 1114/* ### ID parsing implementation **************************************************/
b75a7d8f 1115
b75a7d8f 1116#define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
374ca955 1117
b75a7d8f
A
1118/*returns TRUE if one of the special prefixes is here (s=string)
1119 'x-' or 'i-' */
1120#define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1121
1122/* Dot terminates it because of POSIX form where dot precedes the codepage
1123 * except for variant
1124 */
1125#define _isTerminator(a) ((a==0)||(a=='.')||(a=='@'))
1126
374ca955
A
1127static char* _strnchr(const char* str, int32_t len, char c) {
1128 U_ASSERT(str != 0 && len >= 0);
1129 while (len-- != 0) {
1130 char d = *str;
1131 if (d == c) {
1132 return (char*) str;
1133 } else if (d == 0) {
1134 break;
1135 }
1136 ++str;
1137 }
1138 return NULL;
1139}
1140
1141/**
1142 * Lookup 'key' in the array 'list'. The array 'list' should contain
1143 * a NULL entry, followed by more entries, and a second NULL entry.
1144 *
1145 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1146 * COUNTRIES_3.
1147 */
b75a7d8f
A
1148static int16_t _findIndex(const char* const* list, const char* key)
1149{
1150 const char* const* anchor = list;
374ca955
A
1151 int32_t pass = 0;
1152
1153 /* Make two passes through two NULL-terminated arrays at 'list' */
1154 while (pass++ < 2) {
1155 while (*list) {
1156 if (uprv_strcmp(key, *list) == 0) {
1157 return (int16_t)(list - anchor);
1158 }
1159 list++;
b75a7d8f 1160 }
374ca955 1161 ++list; /* skip final NULL *CWB*/
b75a7d8f
A
1162 }
1163 return -1;
1164}
1165
1166/* count the length of src while copying it to dest; return strlen(src) */
4388f060 1167static inline int32_t
b75a7d8f
A
1168_copyCount(char *dest, int32_t destCapacity, const char *src) {
1169 const char *anchor;
1170 char c;
1171
1172 anchor=src;
1173 for(;;) {
1174 if((c=*src)==0) {
1175 return (int32_t)(src-anchor);
1176 }
1177 if(destCapacity<=0) {
1178 return (int32_t)((src-anchor)+uprv_strlen(src));
1179 }
1180 ++src;
1181 *dest++=c;
1182 --destCapacity;
1183 }
1184}
1185
729e4ab9 1186U_CFUNC const char*
73c04bcf
A
1187uloc_getCurrentCountryID(const char* oldID){
1188 int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
1189 if (offset >= 0) {
1190 return REPLACEMENT_COUNTRIES[offset];
1191 }
1192 return oldID;
1193}
729e4ab9 1194U_CFUNC const char*
73c04bcf
A
1195uloc_getCurrentLanguageID(const char* oldID){
1196 int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
1197 if (offset >= 0) {
1198 return REPLACEMENT_LANGUAGES[offset];
1199 }
1200 return oldID;
1201}
b75a7d8f
A
1202/*
1203 * the internal functions _getLanguage(), _getCountry(), _getVariant()
1204 * avoid duplicating code to handle the earlier locale ID pieces
1205 * in the functions for the later ones by
1206 * setting the *pEnd pointer to where they stopped parsing
1207 *
1208 * TODO try to use this in Locale
1209 */
729e4ab9
A
1210U_CFUNC int32_t
1211ulocimp_getLanguage(const char *localeID,
1212 char *language, int32_t languageCapacity,
1213 const char **pEnd) {
b75a7d8f
A
1214 int32_t i=0;
1215 int32_t offset;
1216 char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
1217
1218 /* if it starts with i- or x- then copy that prefix */
1219 if(_isIDPrefix(localeID)) {
1220 if(i<languageCapacity) {
1221 language[i]=(char)uprv_tolower(*localeID);
1222 }
1223 if(i<languageCapacity) {
1224 language[i+1]='-';
1225 }
1226 i+=2;
1227 localeID+=2;
1228 }
1229
1230 /* copy the language as far as possible and count its length */
1231 while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
1232 if(i<languageCapacity) {
1233 language[i]=(char)uprv_tolower(*localeID);
1234 }
1235 if(i<3) {
4388f060 1236 U_ASSERT(i>=0);
b75a7d8f
A
1237 lang[i]=(char)uprv_tolower(*localeID);
1238 }
1239 i++;
1240 localeID++;
1241 }
1242
1243 if(i==3) {
1244 /* convert 3 character code to 2 character code if possible *CWB*/
374ca955 1245 offset=_findIndex(LANGUAGES_3, lang);
b75a7d8f 1246 if(offset>=0) {
374ca955 1247 i=_copyCount(language, languageCapacity, LANGUAGES[offset]);
b75a7d8f
A
1248 }
1249 }
1250
1251 if(pEnd!=NULL) {
1252 *pEnd=localeID;
1253 }
1254 return i;
1255}
1256
729e4ab9
A
1257U_CFUNC int32_t
1258ulocimp_getScript(const char *localeID,
1259 char *script, int32_t scriptCapacity,
1260 const char **pEnd)
b75a7d8f 1261{
374ca955 1262 int32_t idLen = 0;
b75a7d8f 1263
374ca955
A
1264 if (pEnd != NULL) {
1265 *pEnd = localeID;
b75a7d8f 1266 }
374ca955
A
1267
1268 /* copy the second item as far as possible and count its length */
4388f060
A
1269 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])
1270 && uprv_isASCIILetter(localeID[idLen])) {
374ca955 1271 idLen++;
b75a7d8f
A
1272 }
1273
374ca955
A
1274 /* If it's exactly 4 characters long, then it's a script and not a country. */
1275 if (idLen == 4) {
1276 int32_t i;
1277 if (pEnd != NULL) {
1278 *pEnd = localeID+idLen;
1279 }
1280 if(idLen > scriptCapacity) {
1281 idLen = scriptCapacity;
1282 }
1283 if (idLen >= 1) {
1284 script[0]=(char)uprv_toupper(*(localeID++));
1285 }
1286 for (i = 1; i < idLen; i++) {
1287 script[i]=(char)uprv_tolower(*(localeID++));
1288 }
1289 }
1290 else {
1291 idLen = 0;
1292 }
1293 return idLen;
b75a7d8f
A
1294}
1295
729e4ab9
A
1296U_CFUNC int32_t
1297ulocimp_getCountry(const char *localeID,
1298 char *country, int32_t countryCapacity,
1299 const char **pEnd)
374ca955 1300{
729e4ab9 1301 int32_t idLen=0;
374ca955 1302 char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 };
b75a7d8f
A
1303 int32_t offset;
1304
1305 /* copy the country as far as possible and count its length */
729e4ab9
A
1306 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
1307 if(idLen<(ULOC_COUNTRY_CAPACITY-1)) { /*CWB*/
1308 cnty[idLen]=(char)uprv_toupper(localeID[idLen]);
b75a7d8f 1309 }
729e4ab9 1310 idLen++;
b75a7d8f
A
1311 }
1312
729e4ab9
A
1313 /* the country should be either length 2 or 3 */
1314 if (idLen == 2 || idLen == 3) {
1315 UBool gotCountry = FALSE;
1316 /* convert 3 character code to 2 character code if possible *CWB*/
1317 if(idLen==3) {
1318 offset=_findIndex(COUNTRIES_3, cnty);
1319 if(offset>=0) {
1320 idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]);
1321 gotCountry = TRUE;
1322 }
1323 }
1324 if (!gotCountry) {
1325 int32_t i = 0;
1326 for (i = 0; i < idLen; i++) {
1327 if (i < countryCapacity) {
1328 country[i]=(char)uprv_toupper(localeID[i]);
1329 }
1330 }
b75a7d8f 1331 }
729e4ab9
A
1332 localeID+=idLen;
1333 } else {
1334 idLen = 0;
b75a7d8f
A
1335 }
1336
1337 if(pEnd!=NULL) {
1338 *pEnd=localeID;
1339 }
729e4ab9
A
1340
1341 return idLen;
b75a7d8f
A
1342}
1343
374ca955
A
1344/**
1345 * @param needSeparator if true, then add leading '_' if any variants
1346 * are added to 'variant'
1347 */
1348static int32_t
1349_getVariantEx(const char *localeID,
1350 char prev,
1351 char *variant, int32_t variantCapacity,
1352 UBool needSeparator) {
b75a7d8f
A
1353 int32_t i=0;
1354
1355 /* get one or more variant tags and separate them with '_' */
1356 if(_isIDSeparator(prev)) {
1357 /* get a variant string after a '-' or '_' */
1358 while(!_isTerminator(*localeID)) {
374ca955
A
1359 if (needSeparator) {
1360 if (i<variantCapacity) {
1361 variant[i] = '_';
1362 }
1363 ++i;
1364 needSeparator = FALSE;
1365 }
b75a7d8f
A
1366 if(i<variantCapacity) {
1367 variant[i]=(char)uprv_toupper(*localeID);
1368 if(variant[i]=='-') {
1369 variant[i]='_';
1370 }
1371 }
1372 i++;
1373 localeID++;
1374 }
1375 }
1376
1377 /* if there is no variant tag after a '-' or '_' then look for '@' */
1378 if(i==0) {
1379 if(prev=='@') {
1380 /* keep localeID */
374ca955 1381 } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
b75a7d8f
A
1382 ++localeID; /* point after the '@' */
1383 } else {
1384 return 0;
1385 }
1386 while(!_isTerminator(*localeID)) {
374ca955
A
1387 if (needSeparator) {
1388 if (i<variantCapacity) {
1389 variant[i] = '_';
1390 }
1391 ++i;
1392 needSeparator = FALSE;
1393 }
b75a7d8f
A
1394 if(i<variantCapacity) {
1395 variant[i]=(char)uprv_toupper(*localeID);
1396 if(variant[i]=='-' || variant[i]==',') {
1397 variant[i]='_';
1398 }
1399 }
1400 i++;
1401 localeID++;
1402 }
1403 }
374ca955 1404
b75a7d8f
A
1405 return i;
1406}
1407
374ca955
A
1408static int32_t
1409_getVariant(const char *localeID,
1410 char prev,
1411 char *variant, int32_t variantCapacity) {
1412 return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE);
1413}
1414
1415/**
1416 * Delete ALL instances of a variant from the given list of one or
1417 * more variants. Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".
1418 * @param variants the source string of one or more variants,
1419 * separated by '_'. This will be MODIFIED IN PLACE. Not zero
1420 * terminated; if it is, trailing zero will NOT be maintained.
1421 * @param variantsLen length of variants
1422 * @param toDelete variant to delete, without separators, e.g. "EURO"
1423 * or "PREEURO"; not zero terminated
1424 * @param toDeleteLen length of toDelete
1425 * @return number of characters deleted from variants
1426 */
1427static int32_t
1428_deleteVariant(char* variants, int32_t variantsLen,
46f4442e
A
1429 const char* toDelete, int32_t toDeleteLen)
1430{
374ca955
A
1431 int32_t delta = 0; /* number of chars deleted */
1432 for (;;) {
1433 UBool flag = FALSE;
1434 if (variantsLen < toDeleteLen) {
1435 return delta;
1436 }
1437 if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 &&
1438 (variantsLen == toDeleteLen ||
46f4442e
A
1439 (flag=(variants[toDeleteLen] == '_'))))
1440 {
374ca955
A
1441 int32_t d = toDeleteLen + (flag?1:0);
1442 variantsLen -= d;
1443 delta += d;
46f4442e
A
1444 if (variantsLen > 0) {
1445 uprv_memmove(variants, variants+d, variantsLen);
1446 }
374ca955
A
1447 } else {
1448 char* p = _strnchr(variants, variantsLen, '_');
1449 if (p == NULL) {
1450 return delta;
1451 }
1452 ++p;
73c04bcf 1453 variantsLen -= (int32_t)(p - variants);
374ca955
A
1454 variants = p;
1455 }
1456 }
1457}
1458
1459/* Keyword enumeration */
1460
1461typedef struct UKeywordsContext {
1462 char* keywords;
1463 char* current;
1464} UKeywordsContext;
1465
1466static void U_CALLCONV
1467uloc_kw_closeKeywords(UEnumeration *enumerator) {
1468 uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
1469 uprv_free(enumerator->context);
1470 uprv_free(enumerator);
1471}
1472
1473static int32_t U_CALLCONV
4388f060 1474uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) {
374ca955
A
1475 char *kw = ((UKeywordsContext *)en->context)->keywords;
1476 int32_t result = 0;
1477 while(*kw) {
1478 result++;
1479 kw += uprv_strlen(kw)+1;
1480 }
1481 return result;
1482}
1483
1484static const char* U_CALLCONV
1485uloc_kw_nextKeyword(UEnumeration* en,
1486 int32_t* resultLength,
4388f060 1487 UErrorCode* /*status*/) {
374ca955
A
1488 const char* result = ((UKeywordsContext *)en->context)->current;
1489 int32_t len = 0;
1490 if(*result) {
73c04bcf 1491 len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
374ca955
A
1492 ((UKeywordsContext *)en->context)->current += len+1;
1493 } else {
1494 result = NULL;
1495 }
1496 if (resultLength) {
1497 *resultLength = len;
1498 }
1499 return result;
1500}
1501
1502static void U_CALLCONV
1503uloc_kw_resetKeywords(UEnumeration* en,
4388f060 1504 UErrorCode* /*status*/) {
374ca955
A
1505 ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
1506}
1507
1508static const UEnumeration gKeywordsEnum = {
1509 NULL,
1510 NULL,
1511 uloc_kw_closeKeywords,
1512 uloc_kw_countKeywords,
1513 uenum_unextDefault,
1514 uloc_kw_nextKeyword,
1515 uloc_kw_resetKeywords
1516};
1517
1518U_CAPI UEnumeration* U_EXPORT2
1519uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
b75a7d8f 1520{
46f4442e
A
1521 UKeywordsContext *myContext = NULL;
1522 UEnumeration *result = NULL;
b75a7d8f 1523
46f4442e
A
1524 if(U_FAILURE(*status)) {
1525 return NULL;
1526 }
1527 result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
1528 /* Null pointer test */
1529 if (result == NULL) {
1530 *status = U_MEMORY_ALLOCATION_ERROR;
1531 return NULL;
1532 }
1533 uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration));
51004dcb 1534 myContext = static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext)));
46f4442e
A
1535 if (myContext == NULL) {
1536 *status = U_MEMORY_ALLOCATION_ERROR;
1537 uprv_free(result);
1538 return NULL;
1539 }
1540 myContext->keywords = (char *)uprv_malloc(keywordListSize+1);
1541 uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
1542 myContext->keywords[keywordListSize] = 0;
1543 myContext->current = myContext->keywords;
1544 result->context = myContext;
1545 return result;
374ca955
A
1546}
1547
1548U_CAPI UEnumeration* U_EXPORT2
1549uloc_openKeywords(const char* localeID,
1550 UErrorCode* status)
1551{
1552 int32_t i=0;
1553 char keywords[256];
1554 int32_t keywordsCapacity = 256;
729e4ab9
A
1555 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1556 const char* tmpLocaleID;
1557
374ca955 1558 if(status==NULL || U_FAILURE(*status)) {
b75a7d8f
A
1559 return 0;
1560 }
1561
729e4ab9
A
1562 if (_hasBCP47Extension(localeID)) {
1563 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
1564 } else {
1565 if (localeID==NULL) {
1566 localeID=uloc_getDefault();
1567 }
1568 tmpLocaleID=localeID;
b75a7d8f
A
1569 }
1570
374ca955 1571 /* Skip the language */
729e4ab9
A
1572 ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
1573 if(_isIDSeparator(*tmpLocaleID)) {
374ca955
A
1574 const char *scriptID;
1575 /* Skip the script if available */
729e4ab9
A
1576 ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
1577 if(scriptID != tmpLocaleID+1) {
374ca955 1578 /* Found optional script */
729e4ab9 1579 tmpLocaleID = scriptID;
374ca955
A
1580 }
1581 /* Skip the Country */
729e4ab9
A
1582 if (_isIDSeparator(*tmpLocaleID)) {
1583 ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &tmpLocaleID);
1584 if(_isIDSeparator(*tmpLocaleID)) {
1585 _getVariant(tmpLocaleID+1, *tmpLocaleID, NULL, 0);
374ca955 1586 }
b75a7d8f
A
1587 }
1588 }
1589
374ca955 1590 /* keywords are located after '@' */
729e4ab9
A
1591 if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {
1592 i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status);
374ca955
A
1593 }
1594
1595 if(i) {
1596 return uloc_openKeywordList(keywords, i, status);
1597 } else {
1598 return NULL;
b75a7d8f 1599 }
b75a7d8f
A
1600}
1601
b75a7d8f 1602
374ca955
A
1603/* bit-flags for 'options' parameter of _canonicalize */
1604#define _ULOC_STRIP_KEYWORDS 0x2
1605#define _ULOC_CANONICALIZE 0x1
1606
1607#define OPTION_SET(options, mask) ((options & mask) != 0)
1608
73c04bcf
A
1609static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
1610#define I_DEFAULT_LENGTH (sizeof i_default / sizeof i_default[0])
1611
374ca955
A
1612/**
1613 * Canonicalize the given localeID, to level 1 or to level 2,
1614 * depending on the options. To specify level 1, pass in options=0.
1615 * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1616 *
1617 * This is the code underlying uloc_getName and uloc_canonicalize.
1618 */
1619static int32_t
1620_canonicalize(const char* localeID,
1621 char* result,
1622 int32_t resultCapacity,
1623 uint32_t options,
1624 UErrorCode* err) {
1625 int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity;
1626 char localeBuffer[ULOC_FULLNAME_CAPACITY];
729e4ab9 1627 char tempBuffer[ULOC_FULLNAME_CAPACITY];
46f4442e 1628 const char* origLocaleID;
729e4ab9 1629 const char* tmpLocaleID;
374ca955
A
1630 const char* keywordAssign = NULL;
1631 const char* separatorIndicator = NULL;
1632 const char* addKeyword = NULL;
1633 const char* addValue = NULL;
1634 char* name;
1635 char* variant = NULL; /* pointer into name, or NULL */
374ca955
A
1636
1637 if (U_FAILURE(*err)) {
b75a7d8f
A
1638 return 0;
1639 }
1640
729e4ab9
A
1641 if (_hasBCP47Extension(localeID)) {
1642 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1643 } else {
1644 if (localeID==NULL) {
1645 localeID=uloc_getDefault();
1646 }
1647 tmpLocaleID=localeID;
b75a7d8f 1648 }
729e4ab9
A
1649
1650 origLocaleID=tmpLocaleID;
b75a7d8f 1651
374ca955
A
1652 /* if we are doing a full canonicalization, then put results in
1653 localeBuffer, if necessary; otherwise send them to result. */
729e4ab9 1654 if (/*OPTION_SET(options, _ULOC_CANONICALIZE) &&*/
4388f060 1655 (result == NULL || resultCapacity < (int32_t)sizeof(localeBuffer))) {
374ca955 1656 name = localeBuffer;
4388f060 1657 nameCapacity = (int32_t)sizeof(localeBuffer);
374ca955
A
1658 } else {
1659 name = result;
1660 nameCapacity = resultCapacity;
1661 }
1662
b75a7d8f 1663 /* get all pieces, one after another, and separate with '_' */
729e4ab9 1664 len=ulocimp_getLanguage(tmpLocaleID, name, nameCapacity, &tmpLocaleID);
73c04bcf
A
1665
1666 if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) {
1667 const char *d = uloc_getDefault();
1668
729e4ab9 1669 len = (int32_t)uprv_strlen(d);
73c04bcf
A
1670
1671 if (name != NULL) {
1672 uprv_strncpy(name, d, len);
1673 }
729e4ab9 1674 } else if(_isIDSeparator(*tmpLocaleID)) {
374ca955
A
1675 const char *scriptID;
1676
b75a7d8f 1677 ++fieldCount;
374ca955
A
1678 if(len<nameCapacity) {
1679 name[len]='_';
b75a7d8f 1680 }
374ca955
A
1681 ++len;
1682
4388f060
A
1683 scriptSize=ulocimp_getScript(tmpLocaleID+1,
1684 (len<nameCapacity ? name+len : NULL), nameCapacity-len, &scriptID);
374ca955
A
1685 if(scriptSize > 0) {
1686 /* Found optional script */
729e4ab9 1687 tmpLocaleID = scriptID;
b75a7d8f 1688 ++fieldCount;
374ca955 1689 len+=scriptSize;
729e4ab9 1690 if (_isIDSeparator(*tmpLocaleID)) {
374ca955
A
1691 /* If there is something else, then we add the _ */
1692 if(len<nameCapacity) {
1693 name[len]='_';
1694 }
1695 ++len;
1696 }
1697 }
1698
729e4ab9
A
1699 if (_isIDSeparator(*tmpLocaleID)) {
1700 const char *cntryID;
4388f060
A
1701 int32_t cntrySize = ulocimp_getCountry(tmpLocaleID+1,
1702 (len<nameCapacity ? name+len : NULL), nameCapacity-len, &cntryID);
729e4ab9
A
1703 if (cntrySize > 0) {
1704 /* Found optional country */
1705 tmpLocaleID = cntryID;
1706 len+=cntrySize;
1707 }
1708 if(_isIDSeparator(*tmpLocaleID)) {
51004dcb
A
1709 /* If there is something else, then we add the _ if we found country before. */
1710 if (cntrySize >= 0 && ! _isIDSeparator(*(tmpLocaleID+1)) ) {
729e4ab9
A
1711 ++fieldCount;
1712 if(len<nameCapacity) {
1713 name[len]='_';
1714 }
1715 ++len;
374ca955 1716 }
729e4ab9 1717
4388f060
A
1718 variantSize = _getVariant(tmpLocaleID+1, *tmpLocaleID,
1719 (len<nameCapacity ? name+len : NULL), nameCapacity-len);
374ca955 1720 if (variantSize > 0) {
4388f060 1721 variant = len<nameCapacity ? name+len : NULL;
374ca955 1722 len += variantSize;
729e4ab9 1723 tmpLocaleID += variantSize + 1; /* skip '_' and variant */
374ca955 1724 }
b75a7d8f 1725 }
b75a7d8f
A
1726 }
1727 }
1728
374ca955 1729 /* Copy POSIX-style charset specifier, if any [mr.utf8] */
729e4ab9 1730 if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
374ca955 1731 UBool done = FALSE;
b75a7d8f 1732 do {
729e4ab9 1733 char c = *tmpLocaleID;
374ca955
A
1734 switch (c) {
1735 case 0:
1736 case '@':
1737 done = TRUE;
1738 break;
1739 default:
1740 if (len<nameCapacity) {
1741 name[len] = c;
1742 }
1743 ++len;
729e4ab9 1744 ++tmpLocaleID;
374ca955
A
1745 break;
1746 }
1747 } while (!done);
1748 }
1749
1750 /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
729e4ab9
A
1751 After this, tmpLocaleID either points to '@' or is NULL */
1752 if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {
1753 keywordAssign = uprv_strchr(tmpLocaleID, '=');
1754 separatorIndicator = uprv_strchr(tmpLocaleID, ';');
374ca955
A
1755 }
1756
1757 /* Copy POSIX-style variant, if any [mr@FOO] */
1758 if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
729e4ab9 1759 tmpLocaleID != NULL && keywordAssign == NULL) {
374ca955 1760 for (;;) {
729e4ab9 1761 char c = *tmpLocaleID;
374ca955
A
1762 if (c == 0) {
1763 break;
1764 }
1765 if (len<nameCapacity) {
1766 name[len] = c;
1767 }
1768 ++len;
729e4ab9 1769 ++tmpLocaleID;
374ca955
A
1770 }
1771 }
1772
1773 if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
1774 /* Handle @FOO variant if @ is present and not followed by = */
729e4ab9 1775 if (tmpLocaleID!=NULL && keywordAssign==NULL) {
374ca955
A
1776 int32_t posixVariantSize;
1777 /* Add missing '_' if needed */
1778 if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
1779 do {
1780 if(len<nameCapacity) {
1781 name[len]='_';
1782 }
1783 ++len;
1784 ++fieldCount;
1785 } while(fieldCount<2);
1786 }
729e4ab9 1787 posixVariantSize = _getVariantEx(tmpLocaleID+1, '@', name+len, nameCapacity-len,
374ca955
A
1788 (UBool)(variantSize > 0));
1789 if (posixVariantSize > 0) {
1790 if (variant == NULL) {
1791 variant = name+len;
1792 }
1793 len += posixVariantSize;
1794 variantSize += posixVariantSize;
b75a7d8f 1795 }
374ca955
A
1796 }
1797
46f4442e
A
1798 /* Handle generic variants first */
1799 if (variant) {
1800 for (j=0; j<(int32_t)(sizeof(VARIANT_MAP)/sizeof(VARIANT_MAP[0])); j++) {
1801 const char* variantToCompare = VARIANT_MAP[j].variant;
1802 int32_t n = (int32_t)uprv_strlen(variantToCompare);
1803 int32_t variantLen = _deleteVariant(variant, uprv_min(variantSize, (nameCapacity-len)), variantToCompare, n);
1804 len -= variantLen;
1805 if (variantLen > 0) {
b25be066 1806 if (len > 0 && name[len-1] == '_') { /* delete trailing '_' */
46f4442e
A
1807 --len;
1808 }
1809 addKeyword = VARIANT_MAP[j].keyword;
1810 addValue = VARIANT_MAP[j].value;
1811 break;
1812 }
1813 }
b25be066 1814 if (len > 0 && len <= nameCapacity && name[len-1] == '_') { /* delete trailing '_' */
46f4442e
A
1815 --len;
1816 }
374ca955
A
1817 }
1818
1819 /* Look up the ID in the canonicalization map */
1820 for (j=0; j<(int32_t)(sizeof(CANONICALIZE_MAP)/sizeof(CANONICALIZE_MAP[0])); j++) {
1821 const char* id = CANONICALIZE_MAP[j].id;
73c04bcf 1822 int32_t n = (int32_t)uprv_strlen(id);
374ca955 1823 if (len == n && uprv_strncmp(name, id, n) == 0) {
729e4ab9 1824 if (n == 0 && tmpLocaleID != NULL) {
374ca955
A
1825 break; /* Don't remap "" if keywords present */
1826 }
1827 len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID);
46f4442e
A
1828 if (CANONICALIZE_MAP[j].keyword) {
1829 addKeyword = CANONICALIZE_MAP[j].keyword;
1830 addValue = CANONICALIZE_MAP[j].value;
1831 }
374ca955
A
1832 break;
1833 }
1834 }
374ca955
A
1835 }
1836
1837 if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
729e4ab9 1838 if (tmpLocaleID!=NULL && keywordAssign!=NULL &&
374ca955
A
1839 (!separatorIndicator || separatorIndicator > keywordAssign)) {
1840 if(len<nameCapacity) {
1841 name[len]='@';
1842 }
1843 ++len;
b75a7d8f 1844 ++fieldCount;
4388f060
A
1845 len += _getKeywords(tmpLocaleID+1, '@', (len<nameCapacity ? name+len : NULL), nameCapacity-len,
1846 NULL, 0, NULL, TRUE, addKeyword, addValue, err);
374ca955 1847 } else if (addKeyword != NULL) {
51004dcb 1848 U_ASSERT(addValue != NULL && len < nameCapacity);
374ca955
A
1849 /* inelegant but works -- later make _getKeywords do this? */
1850 len += _copyCount(name+len, nameCapacity-len, "@");
1851 len += _copyCount(name+len, nameCapacity-len, addKeyword);
1852 len += _copyCount(name+len, nameCapacity-len, "=");
1853 len += _copyCount(name+len, nameCapacity-len, addValue);
1854 }
1855 }
1856
46f4442e 1857 if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) {
374ca955
A
1858 uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);
1859 }
1860
1861 return u_terminateChars(result, resultCapacity, len, err);
1862}
1863
1864/* ### ID parsing API **************************************************/
1865
1866U_CAPI int32_t U_EXPORT2
1867uloc_getParent(const char* localeID,
1868 char* parent,
1869 int32_t parentCapacity,
1870 UErrorCode* err)
1871{
1872 const char *lastUnderscore;
1873 int32_t i;
1874
1875 if (U_FAILURE(*err))
1876 return 0;
1877
1878 if (localeID == NULL)
1879 localeID = uloc_getDefault();
1880
1881 lastUnderscore=uprv_strrchr(localeID, '_');
1882 if(lastUnderscore!=NULL) {
1883 i=(int32_t)(lastUnderscore-localeID);
1884 } else {
1885 i=0;
b75a7d8f 1886 }
374ca955 1887
73c04bcf 1888 if(i>0 && parent != localeID) {
374ca955
A
1889 uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
1890 }
1891 return u_terminateChars(parent, parentCapacity, i, err);
b75a7d8f 1892}
374ca955
A
1893
1894U_CAPI int32_t U_EXPORT2
1895uloc_getLanguage(const char* localeID,
1896 char* language,
1897 int32_t languageCapacity,
1898 UErrorCode* err)
1899{
1900 /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
1901 int32_t i=0;
1902
1903 if (err==NULL || U_FAILURE(*err)) {
1904 return 0;
1905 }
1906
1907 if(localeID==NULL) {
1908 localeID=uloc_getDefault();
1909 }
1910
729e4ab9 1911 i=ulocimp_getLanguage(localeID, language, languageCapacity, NULL);
374ca955
A
1912 return u_terminateChars(language, languageCapacity, i, err);
1913}
1914
1915U_CAPI int32_t U_EXPORT2
1916uloc_getScript(const char* localeID,
1917 char* script,
1918 int32_t scriptCapacity,
1919 UErrorCode* err)
1920{
1921 int32_t i=0;
1922
1923 if(err==NULL || U_FAILURE(*err)) {
1924 return 0;
1925 }
1926
1927 if(localeID==NULL) {
1928 localeID=uloc_getDefault();
1929 }
1930
1931 /* skip the language */
729e4ab9 1932 ulocimp_getLanguage(localeID, NULL, 0, &localeID);
374ca955 1933 if(_isIDSeparator(*localeID)) {
729e4ab9 1934 i=ulocimp_getScript(localeID+1, script, scriptCapacity, NULL);
374ca955
A
1935 }
1936 return u_terminateChars(script, scriptCapacity, i, err);
1937}
1938
1939U_CAPI int32_t U_EXPORT2
1940uloc_getCountry(const char* localeID,
1941 char* country,
1942 int32_t countryCapacity,
1943 UErrorCode* err)
1944{
1945 int32_t i=0;
1946
1947 if(err==NULL || U_FAILURE(*err)) {
1948 return 0;
1949 }
1950
1951 if(localeID==NULL) {
1952 localeID=uloc_getDefault();
1953 }
1954
1955 /* Skip the language */
729e4ab9 1956 ulocimp_getLanguage(localeID, NULL, 0, &localeID);
374ca955
A
1957 if(_isIDSeparator(*localeID)) {
1958 const char *scriptID;
1959 /* Skip the script if available */
729e4ab9 1960 ulocimp_getScript(localeID+1, NULL, 0, &scriptID);
374ca955
A
1961 if(scriptID != localeID+1) {
1962 /* Found optional script */
1963 localeID = scriptID;
1964 }
1965 if(_isIDSeparator(*localeID)) {
729e4ab9 1966 i=ulocimp_getCountry(localeID+1, country, countryCapacity, NULL);
374ca955
A
1967 }
1968 }
1969 return u_terminateChars(country, countryCapacity, i, err);
1970}
1971
1972U_CAPI int32_t U_EXPORT2
1973uloc_getVariant(const char* localeID,
1974 char* variant,
1975 int32_t variantCapacity,
1976 UErrorCode* err)
1977{
729e4ab9
A
1978 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1979 const char* tmpLocaleID;
374ca955 1980 int32_t i=0;
374ca955
A
1981
1982 if(err==NULL || U_FAILURE(*err)) {
1983 return 0;
1984 }
1985
729e4ab9
A
1986 if (_hasBCP47Extension(localeID)) {
1987 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1988 } else {
1989 if (localeID==NULL) {
1990 localeID=uloc_getDefault();
1991 }
1992 tmpLocaleID=localeID;
374ca955
A
1993 }
1994
1995 /* Skip the language */
729e4ab9
A
1996 ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
1997 if(_isIDSeparator(*tmpLocaleID)) {
374ca955
A
1998 const char *scriptID;
1999 /* Skip the script if available */
729e4ab9
A
2000 ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
2001 if(scriptID != tmpLocaleID+1) {
374ca955 2002 /* Found optional script */
729e4ab9 2003 tmpLocaleID = scriptID;
374ca955
A
2004 }
2005 /* Skip the Country */
729e4ab9
A
2006 if (_isIDSeparator(*tmpLocaleID)) {
2007 const char *cntryID;
2008 ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &cntryID);
2009 if (cntryID != tmpLocaleID+1) {
2010 /* Found optional country */
2011 tmpLocaleID = cntryID;
2012 }
2013 if(_isIDSeparator(*tmpLocaleID)) {
2014 /* If there was no country ID, skip a possible extra IDSeparator */
2015 if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {
2016 tmpLocaleID++;
2017 }
2018 i=_getVariant(tmpLocaleID+1, *tmpLocaleID, variant, variantCapacity);
374ca955
A
2019 }
2020 }
2021 }
2022
2023 /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
2024 /* if we do not have a variant tag yet then try a POSIX variant after '@' */
2025/*
2026 if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
2027 i=_getVariant(localeID+1, '@', variant, variantCapacity);
2028 }
2029*/
2030 return u_terminateChars(variant, variantCapacity, i, err);
2031}
2032
2033U_CAPI int32_t U_EXPORT2
2034uloc_getName(const char* localeID,
2035 char* name,
2036 int32_t nameCapacity,
2037 UErrorCode* err)
2038{
2039 return _canonicalize(localeID, name, nameCapacity, 0, err);
2040}
2041
2042U_CAPI int32_t U_EXPORT2
2043uloc_getBaseName(const char* localeID,
2044 char* name,
2045 int32_t nameCapacity,
2046 UErrorCode* err)
2047{
2048 return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err);
2049}
2050
2051U_CAPI int32_t U_EXPORT2
2052uloc_canonicalize(const char* localeID,
2053 char* name,
2054 int32_t nameCapacity,
2055 UErrorCode* err)
2056{
2057 return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err);
2058}
2059
b75a7d8f
A
2060U_CAPI const char* U_EXPORT2
2061uloc_getISO3Language(const char* localeID)
2062{
374ca955
A
2063 int16_t offset;
2064 char lang[ULOC_LANG_CAPACITY];
2065 UErrorCode err = U_ZERO_ERROR;
2066
2067 if (localeID == NULL)
2068 {
2069 localeID = uloc_getDefault();
2070 }
2071 uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
2072 if (U_FAILURE(err))
2073 return "";
2074 offset = _findIndex(LANGUAGES, lang);
2075 if (offset < 0)
2076 return "";
2077 return LANGUAGES_3[offset];
b75a7d8f
A
2078}
2079
2080U_CAPI const char* U_EXPORT2
2081uloc_getISO3Country(const char* localeID)
2082{
2083 int16_t offset;
374ca955 2084 char cntry[ULOC_LANG_CAPACITY];
b75a7d8f
A
2085 UErrorCode err = U_ZERO_ERROR;
2086
2087 if (localeID == NULL)
2088 {
2089 localeID = uloc_getDefault();
2090 }
374ca955 2091 uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
b75a7d8f
A
2092 if (U_FAILURE(err))
2093 return "";
374ca955 2094 offset = _findIndex(COUNTRIES, cntry);
b75a7d8f
A
2095 if (offset < 0)
2096 return "";
2097
374ca955 2098 return COUNTRIES_3[offset];
b75a7d8f
A
2099}
2100
2101U_CAPI uint32_t U_EXPORT2
2102uloc_getLCID(const char* localeID)
2103{
374ca955
A
2104 UErrorCode status = U_ZERO_ERROR;
2105 char langID[ULOC_FULLNAME_CAPACITY];
2106
2107 uloc_getLanguage(localeID, langID, sizeof(langID), &status);
2108 if (U_FAILURE(status)) {
2109 return 0;
b75a7d8f 2110 }
374ca955 2111
57a6839d
A
2112 if (uprv_strchr(localeID, '@')) {
2113 // uprv_convertToLCID does not support keywords other than collation.
2114 // Remove all keywords except collation.
2115 int32_t len;
2116 char collVal[ULOC_KEYWORDS_CAPACITY];
2117 char tmpLocaleID[ULOC_FULLNAME_CAPACITY];
2118
2119 len = uloc_getKeywordValue(localeID, "collation", collVal,
2120 sizeof(collVal)/sizeof(collVal[0]) - 1, &status);
2121
2122 if (U_SUCCESS(status) && len > 0) {
2123 collVal[len] = 0;
2124
2125 len = uloc_getBaseName(localeID, tmpLocaleID,
2126 sizeof(tmpLocaleID)/sizeof(tmpLocaleID[0]) - 1, &status);
2127
2128 if (U_SUCCESS(status)) {
2129 tmpLocaleID[len] = 0;
2130
2131 len = uloc_setKeywordValue("collation", collVal, tmpLocaleID,
2132 sizeof(tmpLocaleID)/sizeof(tmpLocaleID[0]) - len - 1, &status);
2133
2134 if (U_SUCCESS(status)) {
2135 tmpLocaleID[len] = 0;
2136 return uprv_convertToLCID(langID, tmpLocaleID, &status);
2137 }
2138 }
2139 }
2140
2141 // fall through - all keywords are simply ignored
2142 status = U_ZERO_ERROR;
2143 }
2144
374ca955
A
2145 return uprv_convertToLCID(langID, localeID, &status);
2146}
2147
73c04bcf
A
2148U_CAPI int32_t U_EXPORT2
2149uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
2150 UErrorCode *status)
2151{
57a6839d 2152 return uprv_convertToPosix(hostid, locale, localeCapacity, status);
73c04bcf
A
2153}
2154
374ca955
A
2155/* ### Default locale **************************************************/
2156
2157U_CAPI const char* U_EXPORT2
2158uloc_getDefault()
2159{
2160 return locale_get_default();
2161}
2162
2163U_CAPI void U_EXPORT2
2164uloc_setDefault(const char* newDefaultLocale,
2165 UErrorCode* err)
2166{
2167 if (U_FAILURE(*err))
2168 return;
2169 /* the error code isn't currently used for anything by this function*/
b75a7d8f 2170
374ca955
A
2171 /* propagate change to C++ */
2172 locale_set_default(newDefaultLocale);
b75a7d8f
A
2173}
2174
729e4ab9 2175/**
51004dcb 2176 * Returns a list of all 2-letter language codes defined in ISO 639. This is a pointer
729e4ab9
A
2177 * to an array of pointers to arrays of char. All of these pointers are owned
2178 * by ICU-- do not delete them, and do not write through them. The array is
2179 * terminated with a null pointer.
2180 */
2181U_CAPI const char* const* U_EXPORT2
2182uloc_getISOLanguages()
2183{
2184 return LANGUAGES;
2185}
374ca955 2186
729e4ab9
A
2187/**
2188 * Returns a list of all 2-letter country codes defined in ISO 639. This is a
2189 * pointer to an array of pointers to arrays of char. All of these pointers are
2190 * owned by ICU-- do not delete them, and do not write through them. The array is
2191 * terminated with a null pointer.
b75a7d8f 2192 */
729e4ab9
A
2193U_CAPI const char* const* U_EXPORT2
2194uloc_getISOCountries()
b75a7d8f 2195{
729e4ab9
A
2196 return COUNTRIES;
2197}
73c04bcf 2198
b75a7d8f 2199
729e4ab9
A
2200/* this function to be moved into cstring.c later */
2201static char gDecimal = 0;
b75a7d8f 2202
729e4ab9
A
2203static /* U_CAPI */
2204double
2205/* U_EXPORT2 */
2206_uloc_strtod(const char *start, char **end) {
2207 char *decimal;
2208 char *myEnd;
2209 char buf[30];
2210 double rv;
2211 if (!gDecimal) {
2212 char rep[5];
2213 /* For machines that decide to change the decimal on you,
2214 and try to be too smart with localization.
2215 This normally should be just a '.'. */
2216 sprintf(rep, "%+1.1f", 1.0);
2217 gDecimal = rep[2];
b75a7d8f 2218 }
b75a7d8f 2219
729e4ab9
A
2220 if(gDecimal == '.') {
2221 return uprv_strtod(start, end); /* fall through to OS */
b75a7d8f 2222 } else {
729e4ab9
A
2223 uprv_strncpy(buf, start, 29);
2224 buf[29]=0;
2225 decimal = uprv_strchr(buf, '.');
2226 if(decimal) {
2227 *decimal = gDecimal;
46f4442e 2228 } else {
729e4ab9 2229 return uprv_strtod(start, end); /* no decimal point */
46f4442e 2230 }
729e4ab9
A
2231 rv = uprv_strtod(buf, &myEnd);
2232 if(end) {
2233 *end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */
b75a7d8f 2234 }
729e4ab9 2235 return rv;
374ca955 2236 }
374ca955
A
2237}
2238
729e4ab9
A
2239typedef struct {
2240 float q;
2241 int32_t dummy; /* to avoid uninitialized memory copy from qsort */
2242 char *locale;
2243} _acceptLangItem;
b75a7d8f 2244
729e4ab9 2245static int32_t U_CALLCONV
4388f060 2246uloc_acceptLanguageCompare(const void * /*context*/, const void *a, const void *b)
729e4ab9
A
2247{
2248 const _acceptLangItem *aa = (const _acceptLangItem*)a;
2249 const _acceptLangItem *bb = (const _acceptLangItem*)b;
b75a7d8f 2250
729e4ab9
A
2251 int32_t rc = 0;
2252 if(bb->q < aa->q) {
2253 rc = -1; /* A > B */
2254 } else if(bb->q > aa->q) {
2255 rc = 1; /* A < B */
2256 } else {
2257 rc = 0; /* A = B */
b75a7d8f
A
2258 }
2259
729e4ab9
A
2260 if(rc==0) {
2261 rc = uprv_stricmp(aa->locale, bb->locale);
b75a7d8f
A
2262 }
2263
729e4ab9
A
2264#if defined(ULOC_DEBUG)
2265 /* fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
2266 aa->locale, aa->q,
2267 bb->locale, bb->q,
2268 rc);*/
2269#endif
374ca955 2270
729e4ab9 2271 return rc;
374ca955
A
2272}
2273
729e4ab9
A
2274/*
2275mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
2276*/
374ca955 2277
b75a7d8f 2278U_CAPI int32_t U_EXPORT2
729e4ab9
A
2279uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult *outResult,
2280 const char *httpAcceptLanguage,
2281 UEnumeration* availableLocales,
2282 UErrorCode *status)
374ca955 2283{
729e4ab9
A
2284 _acceptLangItem *j;
2285 _acceptLangItem smallBuffer[30];
2286 char **strs;
2287 char tmp[ULOC_FULLNAME_CAPACITY +1];
2288 int32_t n = 0;
2289 const char *itemEnd;
2290 const char *paramEnd;
2291 const char *s;
2292 const char *t;
2293 int32_t res;
2294 int32_t i;
2295 int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);
2296 int32_t jSize;
2297 char *tempstr; /* Use for null pointer check */
b75a7d8f 2298
729e4ab9
A
2299 j = smallBuffer;
2300 jSize = sizeof(smallBuffer)/sizeof(smallBuffer[0]);
2301 if(U_FAILURE(*status)) {
2302 return -1;
b75a7d8f
A
2303 }
2304
729e4ab9
A
2305 for(s=httpAcceptLanguage;s&&*s;) {
2306 while(isspace(*s)) /* eat space at the beginning */
2307 s++;
2308 itemEnd=uprv_strchr(s,',');
2309 paramEnd=uprv_strchr(s,';');
2310 if(!itemEnd) {
2311 itemEnd = httpAcceptLanguage+l; /* end of string */
b75a7d8f 2312 }
729e4ab9
A
2313 if(paramEnd && paramEnd<itemEnd) {
2314 /* semicolon (;) is closer than end (,) */
2315 t = paramEnd+1;
2316 if(*t=='q') {
2317 t++;
2318 }
2319 while(isspace(*t)) {
2320 t++;
2321 }
2322 if(*t=='=') {
2323 t++;
2324 }
2325 while(isspace(*t)) {
2326 t++;
2327 }
2328 j[n].q = (float)_uloc_strtod(t,NULL);
2329 } else {
2330 /* no semicolon - it's 1.0 */
2331 j[n].q = 1.0f;
2332 paramEnd = itemEnd;
374ca955 2333 }
46f4442e 2334 j[n].dummy=0;
374ca955
A
2335 /* eat spaces prior to semi */
2336 for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)
2337 ;
46f4442e
A
2338 /* Check for null pointer from uprv_strndup */
2339 tempstr = uprv_strndup(s,(int32_t)((t+1)-s));
2340 if (tempstr == NULL) {
2341 *status = U_MEMORY_ALLOCATION_ERROR;
2342 return -1;
2343 }
2344 j[n].locale = tempstr;
374ca955
A
2345 uloc_canonicalize(j[n].locale,tmp,sizeof(tmp)/sizeof(tmp[0]),status);
2346 if(strcmp(j[n].locale,tmp)) {
2347 uprv_free(j[n].locale);
2348 j[n].locale=uprv_strdup(tmp);
2349 }
2350#if defined(ULOC_DEBUG)
2351 /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
2352#endif
2353 n++;
2354 s = itemEnd;
2355 while(*s==',') { /* eat duplicate commas */
2356 s++;
2357 }
2358 if(n>=jSize) {
46f4442e 2359 if(j==smallBuffer) { /* overflowed the small buffer. */
51004dcb 2360 j = static_cast<_acceptLangItem *>(uprv_malloc(sizeof(j[0])*(jSize*2)));
46f4442e
A
2361 if(j!=NULL) {
2362 uprv_memcpy(j,smallBuffer,sizeof(j[0])*jSize);
2363 }
374ca955 2364#if defined(ULOC_DEBUG)
46f4442e 2365 fprintf(stderr,"malloced at size %d\n", jSize);
374ca955 2366#endif
46f4442e 2367 } else {
51004dcb 2368 j = static_cast<_acceptLangItem *>(uprv_realloc(j, sizeof(j[0])*jSize*2));
374ca955 2369#if defined(ULOC_DEBUG)
46f4442e 2370 fprintf(stderr,"re-alloced at size %d\n", jSize);
374ca955 2371#endif
46f4442e
A
2372 }
2373 jSize *= 2;
2374 if(j==NULL) {
2375 *status = U_MEMORY_ALLOCATION_ERROR;
2376 return -1;
2377 }
374ca955
A
2378 }
2379 }
2380 uprv_sortArray(j, n, sizeof(j[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
2381 if(U_FAILURE(*status)) {
46f4442e 2382 if(j != smallBuffer) {
374ca955 2383#if defined(ULOC_DEBUG)
46f4442e 2384 fprintf(stderr,"freeing j %p\n", j);
374ca955 2385#endif
46f4442e
A
2386 uprv_free(j);
2387 }
2388 return -1;
374ca955 2389 }
51004dcb 2390 strs = static_cast<char **>(uprv_malloc((size_t)(sizeof(strs[0])*n)));
46f4442e
A
2391 /* Check for null pointer */
2392 if (strs == NULL) {
2393 uprv_free(j); /* Free to avoid memory leak */
2394 *status = U_MEMORY_ALLOCATION_ERROR;
2395 return -1;
2396 }
374ca955
A
2397 for(i=0;i<n;i++) {
2398#if defined(ULOC_DEBUG)
2399 /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
2400#endif
2401 strs[i]=j[i].locale;
2402 }
2403 res = uloc_acceptLanguage(result, resultAvailable, outResult,
2404 (const char**)strs, n, availableLocales, status);
2405 for(i=0;i<n;i++) {
2406 uprv_free(strs[i]);
2407 }
2408 uprv_free(strs);
2409 if(j != smallBuffer) {
2410#if defined(ULOC_DEBUG)
46f4442e 2411 fprintf(stderr,"freeing j %p\n", j);
374ca955 2412#endif
46f4442e 2413 uprv_free(j);
374ca955
A
2414 }
2415 return res;
2416}
2417
2418
2419U_CAPI int32_t U_EXPORT2
2420uloc_acceptLanguage(char *result, int32_t resultAvailable,
2421 UAcceptResult *outResult, const char **acceptList,
2422 int32_t acceptListCount,
2423 UEnumeration* availableLocales,
2424 UErrorCode *status)
2425{
2426 int32_t i,j;
2427 int32_t len;
2428 int32_t maxLen=0;
2429 char tmp[ULOC_FULLNAME_CAPACITY+1];
2430 const char *l;
2431 char **fallbackList;
2432 if(U_FAILURE(*status)) {
2433 return -1;
2434 }
51004dcb 2435 fallbackList = static_cast<char **>(uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount)));
374ca955 2436 if(fallbackList==NULL) {
46f4442e
A
2437 *status = U_MEMORY_ALLOCATION_ERROR;
2438 return -1;
374ca955
A
2439 }
2440 for(i=0;i<acceptListCount;i++) {
2441#if defined(ULOC_DEBUG)
2442 fprintf(stderr,"%02d: %s\n", i, acceptList[i]);
2443#endif
2444 while((l=uenum_next(availableLocales, NULL, status))) {
2445#if defined(ULOC_DEBUG)
2446 fprintf(stderr," %s\n", l);
2447#endif
73c04bcf 2448 len = (int32_t)uprv_strlen(l);
374ca955
A
2449 if(!uprv_strcmp(acceptList[i], l)) {
2450 if(outResult) {
2451 *outResult = ULOC_ACCEPT_VALID;
2452 }
2453#if defined(ULOC_DEBUG)
2454 fprintf(stderr, "MATCH! %s\n", l);
2455#endif
2456 if(len>0) {
2457 uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2458 }
2459 for(j=0;j<i;j++) {
2460 uprv_free(fallbackList[j]);
2461 }
2462 uprv_free(fallbackList);
2463 return u_terminateChars(result, resultAvailable, len, status);
2464 }
2465 if(len>maxLen) {
2466 maxLen = len;
2467 }
2468 }
2469 uenum_reset(availableLocales, status);
2470 /* save off parent info */
2471 if(uloc_getParent(acceptList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
2472 fallbackList[i] = uprv_strdup(tmp);
2473 } else {
2474 fallbackList[i]=0;
2475 }
2476 }
2477
2478 for(maxLen--;maxLen>0;maxLen--) {
2479 for(i=0;i<acceptListCount;i++) {
2480 if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) {
2481#if defined(ULOC_DEBUG)
2482 fprintf(stderr,"Try: [%s]", fallbackList[i]);
2483#endif
2484 while((l=uenum_next(availableLocales, NULL, status))) {
2485#if defined(ULOC_DEBUG)
2486 fprintf(stderr," %s\n", l);
2487#endif
73c04bcf 2488 len = (int32_t)uprv_strlen(l);
374ca955
A
2489 if(!uprv_strcmp(fallbackList[i], l)) {
2490 if(outResult) {
2491 *outResult = ULOC_ACCEPT_FALLBACK;
2492 }
2493#if defined(ULOC_DEBUG)
2494 fprintf(stderr, "fallback MATCH! %s\n", l);
2495#endif
2496 if(len>0) {
2497 uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2498 }
73c04bcf
A
2499 for(j=0;j<acceptListCount;j++) {
2500 uprv_free(fallbackList[j]);
374ca955
A
2501 }
2502 uprv_free(fallbackList);
73c04bcf 2503 return u_terminateChars(result, resultAvailable, len, status);
374ca955
A
2504 }
2505 }
2506 uenum_reset(availableLocales, status);
2507
2508 if(uloc_getParent(fallbackList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
2509 uprv_free(fallbackList[i]);
2510 fallbackList[i] = uprv_strdup(tmp);
2511 } else {
2512 uprv_free(fallbackList[i]);
2513 fallbackList[i]=0;
2514 }
2515 }
2516 }
2517 if(outResult) {
2518 *outResult = ULOC_ACCEPT_FAILED;
2519 }
2520 }
2521 for(i=0;i<acceptListCount;i++) {
2522 uprv_free(fallbackList[i]);
2523 }
2524 uprv_free(fallbackList);
2525 return -1;
b75a7d8f 2526}
374ca955
A
2527
2528/*eof*/