]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/uloc.cpp
ICU-511.35.tar.gz
[apple/icu.git] / icuSources / common / uloc.cpp
CommitLineData
b75a7d8f
A
1/*
2**********************************************************************
51004dcb 3* Copyright (C) 1997-2013, International Business Machines
b75a7d8f
A
4* Corporation and others. All Rights Reserved.
5**********************************************************************
6*
7* File ULOC.CPP
8*
9* Modification History:
10*
11* Date Name Description
12* 04/01/97 aliu Creation.
13* 08/21/98 stephen JDK 1.2 sync
14* 12/08/98 rtg New Locale implementation and C API
15* 03/15/99 damiba overhaul.
16* 04/06/99 stephen changed setDefault() to realloc and copy
17* 06/14/99 stephen Changed calls to ures_open for new params
18* 07/21/99 stephen Modified setDefault() to propagate to C++
374ca955
A
19* 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs,
20* brought canonicalization code into line with spec
b75a7d8f
A
21*****************************************************************************/
22
23/*
24 POSIX's locale format, from putil.c: [no spaces]
25
26 ll [ _CC ] [ . MM ] [ @ VV]
27
28 l = lang, C = ctry, M = charmap, V = variant
29*/
30
b75a7d8f
A
31#include "unicode/utypes.h"
32#include "unicode/ustring.h"
33#include "unicode/uloc.h"
34
374ca955 35#include "putilimp.h"
b75a7d8f 36#include "ustr_imp.h"
374ca955 37#include "ulocimp.h"
b75a7d8f
A
38#include "umutex.h"
39#include "cstring.h"
40#include "cmemory.h"
41#include "ucln_cmn.h"
374ca955
A
42#include "locmap.h"
43#include "uarrsort.h"
44#include "uenumimp.h"
45#include "uassert.h"
b75a7d8f 46
374ca955
A
47#include <stdio.h> /* for sprintf */
48
49/* ### Declarations **************************************************/
b75a7d8f
A
50
51/* Locale stuff from locid.cpp */
52U_CFUNC void locale_set_default(const char *id);
53U_CFUNC const char *locale_get_default(void);
374ca955
A
54U_CFUNC int32_t
55locale_getKeywords(const char *localeID,
56 char prev,
57 char *keywords, int32_t keywordCapacity,
58 char *values, int32_t valuesCapacity, int32_t *valLen,
59 UBool valuesToo,
60 UErrorCode *status);
61
374ca955
A
62/* ### Data tables **************************************************/
63
64/**
65 * Table of language codes, both 2- and 3-letter, with preference
66 * given to 2-letter codes where possible. Includes 3-letter codes
67 * that lack a 2-letter equivalent.
68 *
69 * This list must be in sorted order. This list is returned directly
70 * to the user by some API.
71 *
72 * This list must be kept in sync with LANGUAGES_3, with corresponding
73 * entries matched.
74 *
75 * This table should be terminated with a NULL entry, followed by a
76 * second list, and another NULL entry. The first list is visible to
77 * user code when this array is returned by API. The second list
78 * contains codes we support, but do not expose through user API.
79 *
80 * Notes
81 *
82 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
83 * include the revisions up to 2001/7/27 *CWB*
84 *
85 * The 3 character codes are the terminology codes like RFC 3066. This
86 * is compatible with prior ICU codes
87 *
88 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
89 * table but now at the end of the table because 3 character codes are
90 * duplicates. This avoids bad searches going from 3 to 2 character
91 * codes.
92 *
93 * The range qaa-qtz is reserved for local use
94 */
51004dcb
A
95/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
96/* ISO639 table version is 20130123 */
374ca955 97static const char * const LANGUAGES[] = {
51004dcb
A
98 "aa", "ab", "ace", "ach", "ada", "ady", "ae", "af",
99 "afa", "afh", "agq", "ain", "ak", "akk", "ale", "alg",
100 "alt", "am", "an", "ang", "anp", "apa", "ar", "arc",
101 "arn", "arp", "art", "arw", "as", "asa", "ast", "ath",
102 "aus", "av", "awa", "ay", "az",
103 "ba", "bad", "bai", "bal", "ban", "bas", "bat", "bax",
104 "bbj", "be", "bej", "bem", "ber", "bez", "bfd", "bg",
105 "bh", "bho", "bi", "bik", "bin", "bkm", "bla", "bm",
106 "bn", "bnt", "bo", "br", "bra", "brx", "bs", "bss",
107 "btk", "bua", "bug", "bum", "byn", "byv",
108 "ca", "cad", "cai", "car", "cau", "cay", "cch", "ce",
109 "ceb", "cel", "cgg", "ch", "chb", "chg", "chk", "chm",
110 "chn", "cho", "chp", "chr", "chy", "ckb", "cmc", "co",
111 "cop", "cpe", "cpf", "cpp", "cr", "crh", "crp", "cs",
112 "csb", "cu", "cus", "cv", "cy",
113 "da", "dak", "dar", "dav", "day", "de", "del", "den",
114 "dgr", "din", "dje", "doi", "dra", "dsb", "dua", "dum",
115 "dv", "dyo", "dyu", "dz", "dzg",
116 "ebu", "ee", "efi", "egy", "eka", "el", "elx", "en",
117 "enm", "eo", "es", "et", "eu", "ewo",
118 "fa", "fan", "fat", "ff", "fi", "fil", "fiu", "fj",
119 "fo", "fon", "fr", "frm", "fro", "frr", "frs", "fur",
120 "fy",
121 "ga", "gaa", "gay", "gba", "gd", "gem", "gez", "gil",
73c04bcf 122 "gl", "gmh", "gn", "goh", "gon", "gor", "got", "grb",
51004dcb
A
123 "grc", "gsw", "gu", "guz", "gv", "gwi",
124 "ha", "hai", "haw", "he", "hi", "hil", "him", "hit",
125 "hmn", "ho", "hr", "hsb", "ht", "hu", "hup", "hy",
126 "hz",
127 "ia", "iba", "ibb", "id", "ie", "ig", "ii", "ijo",
128 "ik", "ilo", "inc", "ine", "inh", "io", "ira", "iro",
129 "is", "it", "iu",
130 "ja", "jbo", "jgo", "jmc", "jpr", "jrb", "jv",
131 "ka", "kaa", "kab", "kac", "kaj", "kam", "kar", "kaw",
132 "kbd", "kbl", "kcg", "kde", "kea", "kfo", "kg", "kha",
133 "khi", "kho", "khq", "ki", "kj", "kk", "kkj", "kl",
134 "kln", "km", "kmb", "kn", "ko", "kok", "kos", "kpe",
135 "kr", "krc", "krl", "kro", "kru", "ks", "ksb", "ksf",
136 "ksh", "ku", "kum", "kut", "kv", "kw", "ky",
137 "la", "lad", "lag", "lah", "lam", "lb", "lez", "lg",
138 "li", "lkt", "ln", "lo", "lol", "loz", "lt", "lu",
139 "lua", "lui", "lun", "luo", "lus", "luy", "lv",
140 "mad", "maf", "mag", "mai", "mak", "man", "map", "mas",
141 "mde", "mdf", "mdr", "men", "mer", "mfe", "mg", "mga",
142 "mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk",
143 "mkh", "ml", "mn", "mnc", "mni", "mno", "mo", "moh",
144 "mos", "mr", "ms", "mt", "mua", "mul", "mun", "mus",
145 "mwl", "mwr", "my", "mye", "myn", "myv",
146 "na", "nah", "nai", "nap", "naq", "nb", "nd", "nds",
147 "ne", "new", "ng", "nia", "nic", "niu", "nl", "nmg",
148 "nn", "nnh", "no", "nog", "non", "nqo", "nr", "nso",
149 "nub", "nus", "nv", "nwc", "ny", "nym", "nyn", "nyo",
150 "nzi",
151 "oc", "oj", "om", "or", "os", "osa", "ota", "oto",
152 "pa", "paa", "pag", "pal", "pam", "pap", "pau", "peo",
153 "phi", "phn", "pi", "pl", "pon", "pra", "pro", "ps",
154 "pt",
155 "qu",
156 "raj", "rap", "rar", "rm", "rn", "ro", "roa", "rof",
157 "rom", "ru", "rup", "rw", "rwk",
158 "sa", "sad", "sah", "sai", "sal", "sam", "saq", "sas",
159 "sat", "sba", "sbp", "sc", "scn", "sco", "sd", "se",
160 "see", "seh", "sel", "sem", "ses", "sg", "sga", "sgn",
161 "shi", "shn", "shu", "si", "sid", "sio", "sit",
162 "sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn",
163 "sms", "sn", "snk", "so", "sog", "son", "sq", "sr",
164 "srn", "srr", "ss", "ssa", "ssy", "st", "su", "suk",
165 "sus", "sux", "sv", "sw", "swb", "swc", "syc", "syr",
166 "ta", "tai", "te", "tem", "teo", "ter", "tet", "tg",
167 "th", "ti", "tig", "tiv", "tk", "tkl", "tl", "tlh",
168 "tli", "tmh", "tn", "to", "tog", "tpi", "tr", "trv",
169 "ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw",
170 "twq", "ty", "tyv", "tzm",
171 "udm", "ug", "uga", "uk", "umb", "und", "ur", "uz",
172 "vai", "ve", "vi", "vo", "vot", "vun",
173 "wa", "wae", "wak", "wal", "war", "was", "wen", "wo",
174 "xal", "xh", "xog",
175 "yao", "yap", "yav", "ybb", "yi", "yo", "ypk", "yue",
176 "za", "zap", "zbl", "zen", "zh", "znd", "zu", "zun",
177 "zxx", "zza",
b75a7d8f
A
178NULL,
179 "in", "iw", "ji", "jw", "sh", /* obsolete language codes */
180NULL
181};
51004dcb 182
73c04bcf
A
183static const char* const DEPRECATED_LANGUAGES[]={
184 "in", "iw", "ji", "jw", NULL, NULL
185};
186static const char* const REPLACEMENT_LANGUAGES[]={
187 "id", "he", "yi", "jv", NULL, NULL
188};
b75a7d8f 189
374ca955
A
190/**
191 * Table of 3-letter language codes.
192 *
193 * This is a lookup table used to convert 3-letter language codes to
194 * their 2-letter equivalent, where possible. It must be kept in sync
195 * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the
196 * same language as LANGUAGES_3[i]. The commented-out lines are
197 * copied from LANGUAGES to make eyeballing this baby easier.
198 *
199 * Where a 3-letter language code has no 2-letter equivalent, the
200 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
201 *
202 * This table should be terminated with a NULL entry, followed by a
203 * second list, and another NULL entry. The two lists correspond to
204 * the two lists in LANGUAGES.
205 */
51004dcb
A
206/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
207/* ISO639 table version is 20130123 */
374ca955 208static const char * const LANGUAGES_3[] = {
51004dcb
A
209 "aar", "abk", "ace", "ach", "ada", "ady", "ave", "afr",
210 "afa", "afh", "agq", "ain", "aka", "akk", "ale", "alg",
211 "alt", "amh", "arg", "ang", "anp", "apa", "ara", "arc",
212 "arn", "arp", "art", "arw", "asm", "asa", "ast", "ath",
213 "aus", "ava", "awa", "aym", "aze",
214 "bak", "bad", "bai", "bal", "ban", "bas", "bat", "bax",
215 "bbj", "bel", "bej", "bem", "ber", "bez", "bfd", "bul",
216 "bih", "bho", "bis", "bik", "bin", "bkm", "bla", "bam",
217 "ben", "bnt", "bod", "bre", "bra", "brx", "bos", "bss",
218 "btk", "bua", "bug", "bum", "byn", "byv",
219 "cat", "cad", "cai", "car", "cau", "cay", "cch", "che",
220 "ceb", "cel", "cgg", "cha", "chb", "chg", "chk", "chm",
221 "chn", "cho", "chp", "chr", "chy", "ckb", "cmc", "cos",
222 "cop", "cpe", "cpf", "cpp", "cre", "crh", "crp", "ces",
223 "csb", "chu", "cus", "chv", "cym",
224 "dan", "dak", "dar", "dav", "day", "deu", "del", "den",
225 "dgr", "din", "dje", "doi", "dra", "dsb", "dua", "dum",
226 "div", "dyo", "dyu", "dzo", "dzg",
227 "ebu", "ewe", "efi", "egy", "eka", "ell", "elx", "eng",
228 "enm", "epo", "spa", "est", "eus", "ewo",
229 "fas", "fan", "fat", "ful", "fin", "fil", "fiu", "fij",
230 "fao", "fon", "fra", "frm", "fro", "frr", "frs", "fur",
231 "fry",
232 "gle", "gaa", "gay", "gba", "gla", "gem", "gez", "gil",
233 "glg", "gmh", "grn", "goh", "gon", "gor", "got", "grb",
234 "grc", "gsw", "guj", "guz", "glv", "gwi",
235 "hau", "hai", "haw", "heb", "hin", "hil", "him", "hit",
236 "hmn", "hmo", "hrv", "hsb", "hat", "hun", "hup", "hye",
237 "her",
238 "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ijo",
239 "ipk", "ilo", "inc", "ine", "inh", "ido", "ira", "iro",
240 "isl", "ita", "iku",
241 "jpn", "jbo", "jgo", "jmc", "jpr", "jrb", "jav",
242 "kat", "kaa", "kab", "kac", "kaj", "kam", "kar", "kaw",
243 "kbd", "kbl", "kcg", "kde", "kea", "kfo", "kon", "kha",
244 "khi", "kho", "khq", "kik", "kua", "kaz", "kkj", "kal",
245 "kln", "khm", "kmb", "kan", "kor", "kok", "kos", "kpe",
246 "kau", "krc", "krl", "kro", "kru", "kas", "ksb", "ksf",
247 "ksh", "kur", "kum", "kut", "kom", "cor", "kir",
248 "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lug",
249 "lim", "lkt", "lin", "lao", "lol", "loz", "lit", "lub",
250 "lua", "lui", "lun", "luo", "lus", "luy", "lav",
251 "mad", "maf", "mag", "mai", "mak", "man", "map", "mas",
252 "mde", "mdf", "mdr", "men", "mer", "mfe", "mlg", "mga",
253 "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
254 "mkh", "mal", "mon", "mnc", "mni", "mno", "mol", "moh",
255 "mos", "mar", "msa", "mlt", "mua", "mul", "mun", "mus",
256 "mwl", "mwr", "mya", "mye", "myn", "myv",
257 "nau", "nah", "nai", "nap", "naq", "nob", "nde", "nds",
258 "nep", "new", "ndo", "nia", "nic", "niu", "nld", "nmg",
259 "nno", "nnh", "nor", "nog", "non", "nqo", "nbl", "nso",
260 "nub", "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo",
261 "nzi",
262 "oci", "oji", "orm", "ori", "oss", "osa", "ota", "oto",
263 "pan", "paa", "pag", "pal", "pam", "pap", "pau", "peo",
264 "phi", "phn", "pli", "pol", "pon", "pra", "pro", "pus",
265 "por",
266 "que",
267 "raj", "rap", "rar", "roh", "run", "ron", "roa", "rof",
268 "rom", "rus", "rup", "kin", "rwk",
269 "san", "sad", "sah", "sai", "sal", "sam", "saq", "sas",
270 "sat", "sba", "sbp", "srd", "scn", "sco", "snd", "sme",
271 "see", "seh", "sel", "sem", "ses", "sag", "sga", "sgn",
272 "shi", "shn", "shu", "sin", "sid", "sio", "sit",
273 "slk", "slv", "sla", "smo", "sma", "smi", "smj", "smn",
274 "sms", "sna", "snk", "som", "sog", "son", "sqi", "srp",
275 "srn", "srr", "ssw", "ssa", "ssy", "sot", "sun", "suk",
276 "sus", "sux", "swe", "swa", "swb", "swc", "syc", "syr",
277 "tam", "tai", "tel", "tem", "teo", "ter", "tet", "tgk",
278 "tha", "tir", "tig", "tiv", "tuk", "tkl", "tgl", "tlh",
279 "tli", "tmh", "tsn", "ton", "tog", "tpi", "tur", "trv",
280 "tso", "tsi", "tat", "tum", "tup", "tut", "tvl", "twi",
281 "twq", "tah", "tyv", "tzm",
282 "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
283 "vai", "ven", "vie", "vol", "vot", "vun",
284 "wln", "wae", "wak", "wal", "war", "was", "wen", "wol",
285 "xal", "xho", "xog",
286 "yao", "yap", "yav", "ybb", "yid", "yor", "ypk", "yue",
287 "zha", "zap", "zbl", "zen", "zho", "znd", "zul", "zun",
288 "zxx", "zza",
b75a7d8f
A
289NULL,
290/* "in", "iw", "ji", "jw", "sh", */
291 "ind", "heb", "yid", "jaw", "srp",
292NULL
293};
294
374ca955
A
295/**
296 * Table of 2-letter country codes.
297 *
298 * This list must be in sorted order. This list is returned directly
299 * to the user by some API.
300 *
301 * This list must be kept in sync with COUNTRIES_3, with corresponding
302 * entries matched.
303 *
304 * This table should be terminated with a NULL entry, followed by a
305 * second list, and another NULL entry. The first list is visible to
306 * user code when this array is returned by API. The second list
307 * contains codes we support, but do not expose through user API.
308 *
309 * Notes:
310 *
311 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
312 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
313 * new codes keeping the old ones for compatibility updated to include
314 * 1999/12/03 revisions *CWB*
315 *
316 * RO(ROM) is now RO(ROU) according to
317 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
318 */
319static const char * const COUNTRIES[] = {
51004dcb 320 "AD", "AE", "AF", "AG", "AI", "AL", "AM",
73c04bcf 321 "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",
b75a7d8f 322 "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",
51004dcb 323 "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV",
b75a7d8f
A
324 "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
325 "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR",
51004dcb 326 "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK",
b75a7d8f
A
327 "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER",
328 "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
73c04bcf 329 "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
b75a7d8f
A
330 "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
331 "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
73c04bcf
A
332 "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
333 "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
b75a7d8f
A
334 "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
335 "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
46f4442e 336 "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK",
b75a7d8f
A
337 "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",
338 "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
339 "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",
340 "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",
341 "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
46f4442e 342 "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA",
b75a7d8f 343 "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",
51004dcb
A
344 "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV",
345 "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ",
b75a7d8f
A
346 "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
347 "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
348 "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
46f4442e 349 "WS", "YE", "YT", "ZA", "ZM", "ZW",
b75a7d8f 350NULL,
51004dcb 351 "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR", /* obsolete country codes */
b75a7d8f
A
352NULL
353};
354
51004dcb
A
355static const char* const DEPRECATED_COUNTRIES[] = {
356 "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */
73c04bcf
A
357};
358static const char* const REPLACEMENT_COUNTRIES[] = {
51004dcb
A
359/* "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
360 "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL /* replacement country codes */
73c04bcf
A
361};
362
374ca955
A
363/**
364 * Table of 3-letter country codes.
365 *
366 * This is a lookup table used to convert 3-letter country codes to
367 * their 2-letter equivalent. It must be kept in sync with COUNTRIES.
368 * For all valid i, COUNTRIES[i] must refer to the same country as
369 * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES
370 * to make eyeballing this baby easier.
371 *
372 * This table should be terminated with a NULL entry, followed by a
373 * second list, and another NULL entry. The two lists correspond to
374 * the two lists in COUNTRIES.
375 */
376static const char * const COUNTRIES_3[] = {
51004dcb
A
377/* "AD", "AE", "AF", "AG", "AI", "AL", "AM", */
378 "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
73c04bcf
A
379/* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */
380 "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
b75a7d8f
A
381/* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */
382 "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
51004dcb
A
383/* "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", */
384 "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",
b75a7d8f
A
385/* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */
386 "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
387/* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */
388 "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
51004dcb
A
389/* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK", */
390 "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
b75a7d8f
A
391/* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */
392 "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
393/* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
394 "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
46f4442e 395/* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
73c04bcf 396 "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
b75a7d8f
A
397/* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */
398 "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
399/* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
400 "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
46f4442e
A
401/* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
402 "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
403/* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
73c04bcf 404 "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
b75a7d8f
A
405/* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
406 "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
407/* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */
408 "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
46f4442e
A
409/* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */
410 "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
b75a7d8f
A
411/* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */
412 "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
413/* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */
414 "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
415/* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */
416 "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
417/* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */
418 "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
419/* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */
420 "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
46f4442e
A
421/* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */
422 "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
b75a7d8f
A
423/* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */
424 "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
51004dcb
A
425/* "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", */
426 "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",
427/* "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", */
428 "SXM", "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
b75a7d8f
A
429/* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */
430 "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
431/* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */
432 "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
433/* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
434 "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
46f4442e
A
435/* "WS", "YE", "YT", "ZA", "ZM", "ZW", */
436 "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
b75a7d8f 437NULL,
51004dcb
A
438/* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */
439 "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
b75a7d8f
A
440NULL
441};
442
374ca955
A
443typedef struct CanonicalizationMap {
444 const char *id; /* input ID */
445 const char *canonicalID; /* canonicalized output ID */
446 const char *keyword; /* keyword, or NULL if none */
447 const char *value; /* keyword value, or NULL if kw==NULL */
448} CanonicalizationMap;
449
450/**
451 * A map to canonicalize locale IDs. This handles a variety of
452 * different semantic kinds of transformations.
453 */
454static const CanonicalizationMap CANONICALIZE_MAP[] = {
455 { "", "en_US_POSIX", NULL, NULL }, /* .NET name */
729e4ab9 456 { "c", "en_US_POSIX", NULL, NULL }, /* POSIX name */
73c04bcf 457 { "posix", "en_US_POSIX", NULL, NULL }, /* POSIX name (alias of C) */
374ca955
A
458 { "art_LOJBAN", "jbo", NULL, NULL }, /* registered name */
459 { "az_AZ_CYRL", "az_Cyrl_AZ", NULL, NULL }, /* .NET name */
460 { "az_AZ_LATN", "az_Latn_AZ", NULL, NULL }, /* .NET name */
461 { "ca_ES_PREEURO", "ca_ES", "currency", "ESP" },
46f4442e 462 { "de__PHONEBOOK", "de", "collation", "phonebook" }, /* Old ICU name */
374ca955
A
463 { "de_AT_PREEURO", "de_AT", "currency", "ATS" },
464 { "de_DE_PREEURO", "de_DE", "currency", "DEM" },
465 { "de_LU_PREEURO", "de_LU", "currency", "LUF" },
466 { "el_GR_PREEURO", "el_GR", "currency", "GRD" },
374ca955
A
467 { "en_BE_PREEURO", "en_BE", "currency", "BEF" },
468 { "en_IE_PREEURO", "en_IE", "currency", "IEP" },
46f4442e 469 { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */
374ca955
A
470 { "es_ES_PREEURO", "es_ES", "currency", "ESP" },
471 { "eu_ES_PREEURO", "eu_ES", "currency", "ESP" },
472 { "fi_FI_PREEURO", "fi_FI", "currency", "FIM" },
473 { "fr_BE_PREEURO", "fr_BE", "currency", "BEF" },
474 { "fr_FR_PREEURO", "fr_FR", "currency", "FRF" },
475 { "fr_LU_PREEURO", "fr_LU", "currency", "LUF" },
476 { "ga_IE_PREEURO", "ga_IE", "currency", "IEP" },
477 { "gl_ES_PREEURO", "gl_ES", "currency", "ESP" },
46f4442e 478 { "hi__DIRECT", "hi", "collation", "direct" }, /* Old ICU name */
374ca955 479 { "it_IT_PREEURO", "it_IT", "currency", "ITL" },
46f4442e 480 { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */
374ca955
A
481 { "nb_NO_NY", "nn_NO", NULL, NULL }, /* "markus said this was ok" :-) */
482 { "nl_BE_PREEURO", "nl_BE", "currency", "BEF" },
483 { "nl_NL_PREEURO", "nl_NL", "currency", "NLG" },
484 { "pt_PT_PREEURO", "pt_PT", "currency", "PTE" },
46f4442e
A
485 { "sr_SP_CYRL", "sr_Cyrl_RS", NULL, NULL }, /* .NET name */
486 { "sr_SP_LATN", "sr_Latn_RS", NULL, NULL }, /* .NET name */
487 { "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL, NULL }, /* Linux name */
488 { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */
73c04bcf 489 { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL, NULL }, /* Linux name */
374ca955
A
490 { "uz_UZ_CYRL", "uz_Cyrl_UZ", NULL, NULL }, /* .NET name */
491 { "uz_UZ_LATN", "uz_Latn_UZ", NULL, NULL }, /* .NET name */
492 { "zh_CHS", "zh_Hans", NULL, NULL }, /* .NET name */
46f4442e 493 { "zh_CHT", "zh_Hant", NULL, NULL }, /* .NET name */
4388f060 494 { "zh_GAN", "gan", NULL, NULL }, /* registered name */
374ca955 495 { "zh_GUOYU", "zh", NULL, NULL }, /* registered name */
4388f060
A
496 { "zh_HAKKA", "hak", NULL, NULL }, /* registered name */
497 { "zh_MIN_NAN", "nan", NULL, NULL }, /* registered name */
498 { "zh_WUU", "wuu", NULL, NULL }, /* registered name */
499 { "zh_XIANG", "hsn", NULL, NULL }, /* registered name */
500 { "zh_YUE", "yue", NULL, NULL }, /* registered name */
46f4442e
A
501};
502
503typedef struct VariantMap {
504 const char *variant; /* input ID */
505 const char *keyword; /* keyword, or NULL if none */
506 const char *value; /* keyword value, or NULL if kw==NULL */
507} VariantMap;
508
509static const VariantMap VARIANT_MAP[] = {
510 { "EURO", "currency", "EUR" },
511 { "PINYIN", "collation", "pinyin" }, /* Solaris variant */
512 { "STROKE", "collation", "stroke" } /* Solaris variant */
374ca955
A
513};
514
729e4ab9
A
515/* ### BCP47 Conversion *******************************************/
516/* Test if the locale id has BCP47 u extension and does not have '@' */
517#define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
518/* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
519#define _ConvertBCP47(finalID, id, buffer, length,err) \
520 if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || U_FAILURE(*err)) { \
521 finalID=id; \
522 } else { \
523 finalID=buffer; \
524 }
525/* Gets the size of the shortest subtag in the given localeID. */
526static int32_t getShortestSubtagLength(const char *localeID) {
527 int32_t localeIDLength = uprv_strlen(localeID);
528 int32_t length = localeIDLength;
529 int32_t tmpLength = 0;
530 int32_t i;
531 UBool reset = TRUE;
532
533 for (i = 0; i < localeIDLength; i++) {
534 if (localeID[i] != '_' && localeID[i] != '-') {
535 if (reset) {
536 tmpLength = 0;
537 reset = FALSE;
538 }
539 tmpLength++;
540 } else {
541 if (tmpLength != 0 && tmpLength < length) {
542 length = tmpLength;
543 }
544 reset = TRUE;
545 }
546 }
547
548 return length;
549}
550
374ca955
A
551/* ### Keywords **************************************************/
552
553#define ULOC_KEYWORD_BUFFER_LEN 25
554#define ULOC_MAX_NO_KEYWORDS 25
555
729e4ab9 556U_CAPI const char * U_EXPORT2
374ca955 557locale_getKeywordsStart(const char *localeID) {
374ca955 558 const char *result = NULL;
374ca955
A
559 if((result = uprv_strchr(localeID, '@')) != NULL) {
560 return result;
73c04bcf
A
561 }
562#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
563 else {
564 /* We do this because the @ sign is variant, and the @ sign used on one
565 EBCDIC machine won't be compiled the same way on other EBCDIC based
566 machines. */
567 static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
374ca955
A
568 const uint8_t *charToFind = ebcdicSigns;
569 while(*charToFind) {
570 if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
571 return result;
572 }
573 charToFind++;
574 }
575 }
73c04bcf 576#endif
374ca955
A
577 return NULL;
578}
579
580/**
581 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
582 * @param keywordName incoming name to be canonicalized
583 * @param status return status (keyword too long)
584 * @return length of the keyword name
585 */
586static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
587{
588 int32_t i;
73c04bcf 589 int32_t keywordNameLen = (int32_t)uprv_strlen(keywordName);
374ca955
A
590
591 if(keywordNameLen >= ULOC_KEYWORD_BUFFER_LEN) {
592 /* keyword name too long for internal buffer */
593 *status = U_INTERNAL_PROGRAM_ERROR;
594 return 0;
595 }
596
597 /* normalize the keyword name */
598 for(i = 0; i < keywordNameLen; i++) {
599 buf[i] = uprv_tolower(keywordName[i]);
600 }
601 buf[i] = 0;
602
603 return keywordNameLen;
604}
605
606typedef struct {
607 char keyword[ULOC_KEYWORD_BUFFER_LEN];
608 int32_t keywordLen;
609 const char *valueStart;
610 int32_t valueLen;
611} KeywordStruct;
612
613static int32_t U_CALLCONV
4388f060 614compareKeywordStructs(const void * /*context*/, const void *left, const void *right) {
374ca955
A
615 const char* leftString = ((const KeywordStruct *)left)->keyword;
616 const char* rightString = ((const KeywordStruct *)right)->keyword;
617 return uprv_strcmp(leftString, rightString);
618}
619
620/**
621 * Both addKeyword and addValue must already be in canonical form.
622 * Either both addKeyword and addValue are NULL, or neither is NULL.
623 * If they are not NULL they must be zero terminated.
624 * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword.
625 */
626static int32_t
627_getKeywords(const char *localeID,
628 char prev,
629 char *keywords, int32_t keywordCapacity,
630 char *values, int32_t valuesCapacity, int32_t *valLen,
631 UBool valuesToo,
632 const char* addKeyword,
633 const char* addValue,
634 UErrorCode *status)
635{
636 KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
637
638 int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
639 int32_t numKeywords = 0;
640 const char* pos = localeID;
641 const char* equalSign = NULL;
642 const char* semicolon = NULL;
643 int32_t i = 0, j, n;
644 int32_t keywordsLen = 0;
645 int32_t valuesLen = 0;
646
647 if(prev == '@') { /* start of keyword definition */
648 /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
649 do {
650 UBool duplicate = FALSE;
651 /* skip leading spaces */
652 while(*pos == ' ') {
653 pos++;
654 }
655 if (!*pos) { /* handle trailing "; " */
656 break;
657 }
658 if(numKeywords == maxKeywords) {
659 *status = U_INTERNAL_PROGRAM_ERROR;
660 return 0;
661 }
662 equalSign = uprv_strchr(pos, '=');
663 semicolon = uprv_strchr(pos, ';');
664 /* lack of '=' [foo@currency] is illegal */
665 /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
666 if(!equalSign || (semicolon && semicolon<equalSign)) {
667 *status = U_INVALID_FORMAT_ERROR;
668 return 0;
669 }
670 /* need to normalize both keyword and keyword name */
671 if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
672 /* keyword name too long for internal buffer */
673 *status = U_INTERNAL_PROGRAM_ERROR;
674 return 0;
675 }
676 for(i = 0, n = 0; i < equalSign - pos; ++i) {
677 if (pos[i] != ' ') {
678 keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
679 }
680 }
681 keywordList[numKeywords].keyword[n] = 0;
682 keywordList[numKeywords].keywordLen = n;
683 /* now grab the value part. First we skip the '=' */
684 equalSign++;
685 /* then we leading spaces */
686 while(*equalSign == ' ') {
687 equalSign++;
688 }
689 keywordList[numKeywords].valueStart = equalSign;
690
691 pos = semicolon;
692 i = 0;
693 if(pos) {
694 while(*(pos - i - 1) == ' ') {
695 i++;
696 }
73c04bcf 697 keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
374ca955
A
698 pos++;
699 } else {
73c04bcf 700 i = (int32_t)uprv_strlen(equalSign);
4388f060 701 while(i && equalSign[i-1] == ' ') {
374ca955
A
702 i--;
703 }
704 keywordList[numKeywords].valueLen = i;
705 }
706 /* If this is a duplicate keyword, then ignore it */
707 for (j=0; j<numKeywords; ++j) {
708 if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
709 duplicate = TRUE;
710 break;
711 }
712 }
713 if (!duplicate) {
714 ++numKeywords;
715 }
716 } while(pos);
717
718 /* Handle addKeyword/addValue. */
719 if (addKeyword != NULL) {
720 UBool duplicate = FALSE;
721 U_ASSERT(addValue != NULL);
722 /* Search for duplicate; if found, do nothing. Explicit keyword
723 overrides addKeyword. */
724 for (j=0; j<numKeywords; ++j) {
725 if (uprv_strcmp(keywordList[j].keyword, addKeyword) == 0) {
726 duplicate = TRUE;
727 break;
728 }
729 }
730 if (!duplicate) {
731 if (numKeywords == maxKeywords) {
732 *status = U_INTERNAL_PROGRAM_ERROR;
733 return 0;
734 }
735 uprv_strcpy(keywordList[numKeywords].keyword, addKeyword);
73c04bcf 736 keywordList[numKeywords].keywordLen = (int32_t)uprv_strlen(addKeyword);
374ca955 737 keywordList[numKeywords].valueStart = addValue;
73c04bcf 738 keywordList[numKeywords].valueLen = (int32_t)uprv_strlen(addValue);
374ca955
A
739 ++numKeywords;
740 }
741 } else {
742 U_ASSERT(addValue == NULL);
743 }
744
745 /* now we have a list of keywords */
746 /* we need to sort it */
747 uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
748
749 /* Now construct the keyword part */
750 for(i = 0; i < numKeywords; i++) {
751 if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) {
752 uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword);
753 if(valuesToo) {
754 keywords[keywordsLen + keywordList[i].keywordLen] = '=';
755 } else {
756 keywords[keywordsLen + keywordList[i].keywordLen] = 0;
757 }
758 }
759 keywordsLen += keywordList[i].keywordLen + 1;
760 if(valuesToo) {
761 if(keywordsLen + keywordList[i].valueLen < keywordCapacity) {
762 uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
763 }
764 keywordsLen += keywordList[i].valueLen;
765
766 if(i < numKeywords - 1) {
767 if(keywordsLen < keywordCapacity) {
768 keywords[keywordsLen] = ';';
769 }
770 keywordsLen++;
771 }
772 }
773 if(values) {
774 if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) {
775 uprv_strcpy(values+valuesLen, keywordList[i].valueStart);
776 values[valuesLen + keywordList[i].valueLen] = 0;
777 }
778 valuesLen += keywordList[i].valueLen + 1;
779 }
780 }
781 if(values) {
782 values[valuesLen] = 0;
783 if(valLen) {
784 *valLen = valuesLen;
785 }
786 }
787 return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);
788 } else {
789 return 0;
790 }
791}
792
793U_CFUNC int32_t
794locale_getKeywords(const char *localeID,
795 char prev,
796 char *keywords, int32_t keywordCapacity,
797 char *values, int32_t valuesCapacity, int32_t *valLen,
798 UBool valuesToo,
799 UErrorCode *status) {
800 return _getKeywords(localeID, prev, keywords, keywordCapacity,
801 values, valuesCapacity, valLen, valuesToo,
802 NULL, NULL, status);
803}
804
805U_CAPI int32_t U_EXPORT2
806uloc_getKeywordValue(const char* localeID,
807 const char* keywordName,
808 char* buffer, int32_t bufferCapacity,
809 UErrorCode* status)
810{
729e4ab9 811 const char* startSearchHere = NULL;
374ca955 812 const char* nextSeparator = NULL;
374ca955
A
813 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
814 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
815 int32_t i = 0;
816 int32_t result = 0;
817
818 if(status && U_SUCCESS(*status) && localeID) {
729e4ab9
A
819 char tempBuffer[ULOC_FULLNAME_CAPACITY];
820 const char* tmpLocaleID;
821
822 if (_hasBCP47Extension(localeID)) {
823 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
824 } else {
825 tmpLocaleID=localeID;
826 }
374ca955 827
729e4ab9 828 startSearchHere = uprv_strchr(tmpLocaleID, '@'); /* TODO: REVISIT: shouldn't this be locale_getKeywordsStart ? */
374ca955
A
829 if(startSearchHere == NULL) {
830 /* no keywords, return at once */
831 return 0;
832 }
833
73c04bcf 834 locale_canonKeywordName(keywordNameBuffer, keywordName, status);
374ca955
A
835 if(U_FAILURE(*status)) {
836 return 0;
837 }
838
839 /* find the first keyword */
840 while(startSearchHere) {
841 startSearchHere++;
842 /* skip leading spaces (allowed?) */
843 while(*startSearchHere == ' ') {
844 startSearchHere++;
845 }
846 nextSeparator = uprv_strchr(startSearchHere, '=');
847 /* need to normalize both keyword and keyword name */
848 if(!nextSeparator) {
849 break;
850 }
851 if(nextSeparator - startSearchHere >= ULOC_KEYWORD_BUFFER_LEN) {
852 /* keyword name too long for internal buffer */
853 *status = U_INTERNAL_PROGRAM_ERROR;
854 return 0;
855 }
856 for(i = 0; i < nextSeparator - startSearchHere; i++) {
857 localeKeywordNameBuffer[i] = uprv_tolower(startSearchHere[i]);
858 }
859 /* trim trailing spaces */
860 while(startSearchHere[i-1] == ' ') {
861 i--;
4388f060 862 U_ASSERT(i>=0);
374ca955
A
863 }
864 localeKeywordNameBuffer[i] = 0;
865
866 startSearchHere = uprv_strchr(nextSeparator, ';');
867
868 if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
869 nextSeparator++;
870 while(*nextSeparator == ' ') {
871 nextSeparator++;
872 }
873 /* we actually found the keyword. Copy the value */
874 if(startSearchHere && startSearchHere - nextSeparator < bufferCapacity) {
875 while(*(startSearchHere-1) == ' ') {
876 startSearchHere--;
877 }
878 uprv_strncpy(buffer, nextSeparator, startSearchHere - nextSeparator);
73c04bcf 879 result = u_terminateChars(buffer, bufferCapacity, (int32_t)(startSearchHere - nextSeparator), status);
374ca955 880 } else if(!startSearchHere && (int32_t)uprv_strlen(nextSeparator) < bufferCapacity) { /* last item in string */
73c04bcf 881 i = (int32_t)uprv_strlen(nextSeparator);
374ca955
A
882 while(nextSeparator[i - 1] == ' ') {
883 i--;
884 }
885 uprv_strncpy(buffer, nextSeparator, i);
886 result = u_terminateChars(buffer, bufferCapacity, i, status);
887 } else {
888 /* give a bigger buffer, please */
889 *status = U_BUFFER_OVERFLOW_ERROR;
890 if(startSearchHere) {
73c04bcf 891 result = (int32_t)(startSearchHere - nextSeparator);
374ca955 892 } else {
73c04bcf 893 result = (int32_t)uprv_strlen(nextSeparator);
374ca955
A
894 }
895 }
896 return result;
897 }
898 }
899 }
900 return 0;
901}
902
903U_CAPI int32_t U_EXPORT2
904uloc_setKeywordValue(const char* keywordName,
905 const char* keywordValue,
906 char* buffer, int32_t bufferCapacity,
907 UErrorCode* status)
908{
909 /* TODO: sorting. removal. */
910 int32_t keywordNameLen;
911 int32_t keywordValueLen;
912 int32_t bufLen;
913 int32_t needLen = 0;
914 int32_t foundValueLen;
915 int32_t keywordAtEnd = 0; /* is the keyword at the end of the string? */
916 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
917 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
918 int32_t i = 0;
919 int32_t rc;
920 char* nextSeparator = NULL;
921 char* nextEqualsign = NULL;
922 char* startSearchHere = NULL;
923 char* keywordStart = NULL;
924 char *insertHere = NULL;
925 if(U_FAILURE(*status)) {
926 return -1;
927 }
73c04bcf
A
928 if(bufferCapacity>1) {
929 bufLen = (int32_t)uprv_strlen(buffer);
930 } else {
931 *status = U_ILLEGAL_ARGUMENT_ERROR;
932 return 0;
933 }
934 if(bufferCapacity<bufLen) {
935 /* The capacity is less than the length?! Is this NULL terminated? */
936 *status = U_ILLEGAL_ARGUMENT_ERROR;
937 return 0;
938 }
374ca955
A
939 if(keywordValue && !*keywordValue) {
940 keywordValue = NULL;
941 }
942 if(keywordValue) {
73c04bcf 943 keywordValueLen = (int32_t)uprv_strlen(keywordValue);
374ca955
A
944 } else {
945 keywordValueLen = 0;
946 }
947 keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
948 if(U_FAILURE(*status)) {
949 return 0;
950 }
951 startSearchHere = (char*)locale_getKeywordsStart(buffer);
374ca955
A
952 if(startSearchHere == NULL || (startSearchHere[1]==0)) {
953 if(!keywordValue) { /* no keywords = nothing to remove */
954 return bufLen;
955 }
956
957 needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
958 if(startSearchHere) { /* had a single @ */
959 needLen--; /* already had the @ */
960 /* startSearchHere points at the @ */
961 } else {
962 startSearchHere=buffer+bufLen;
963 }
964 if(needLen >= bufferCapacity) {
965 *status = U_BUFFER_OVERFLOW_ERROR;
966 return needLen; /* no change */
967 }
968 *startSearchHere = '@';
969 startSearchHere++;
970 uprv_strcpy(startSearchHere, keywordNameBuffer);
971 startSearchHere += keywordNameLen;
972 *startSearchHere = '=';
973 startSearchHere++;
974 uprv_strcpy(startSearchHere, keywordValue);
975 startSearchHere+=keywordValueLen;
976 return needLen;
977 } /* end shortcut - no @ */
978
979 keywordStart = startSearchHere;
980 /* search for keyword */
981 while(keywordStart) {
982 keywordStart++;
983 /* skip leading spaces (allowed?) */
984 while(*keywordStart == ' ') {
985 keywordStart++;
986 }
987 nextEqualsign = uprv_strchr(keywordStart, '=');
988 /* need to normalize both keyword and keyword name */
989 if(!nextEqualsign) {
990 break;
991 }
992 if(nextEqualsign - keywordStart >= ULOC_KEYWORD_BUFFER_LEN) {
993 /* keyword name too long for internal buffer */
994 *status = U_INTERNAL_PROGRAM_ERROR;
995 return 0;
996 }
997 for(i = 0; i < nextEqualsign - keywordStart; i++) {
998 localeKeywordNameBuffer[i] = uprv_tolower(keywordStart[i]);
999 }
1000 /* trim trailing spaces */
1001 while(keywordStart[i-1] == ' ') {
1002 i--;
1003 }
51004dcb 1004 U_ASSERT(i>=0 && i<ULOC_KEYWORD_BUFFER_LEN);
374ca955
A
1005 localeKeywordNameBuffer[i] = 0;
1006
1007 nextSeparator = uprv_strchr(nextEqualsign, ';');
1008 rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
1009 if(rc == 0) {
1010 nextEqualsign++;
1011 while(*nextEqualsign == ' ') {
1012 nextEqualsign++;
1013 }
1014 /* we actually found the keyword. Change the value */
1015 if (nextSeparator) {
1016 keywordAtEnd = 0;
73c04bcf 1017 foundValueLen = (int32_t)(nextSeparator - nextEqualsign);
374ca955
A
1018 } else {
1019 keywordAtEnd = 1;
73c04bcf 1020 foundValueLen = (int32_t)uprv_strlen(nextEqualsign);
374ca955
A
1021 }
1022 if(keywordValue) { /* adding a value - not removing */
1023 if(foundValueLen == keywordValueLen) {
1024 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1025 return bufLen; /* no change in size */
1026 } else if(foundValueLen > keywordValueLen) {
1027 int32_t delta = foundValueLen - keywordValueLen;
1028 if(nextSeparator) { /* RH side */
1029 uprv_memmove(nextSeparator - delta, nextSeparator, bufLen-(nextSeparator-buffer));
1030 }
1031 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1032 bufLen -= delta;
1033 buffer[bufLen]=0;
1034 return bufLen;
1035 } else { /* FVL < KVL */
1036 int32_t delta = keywordValueLen - foundValueLen;
1037 if((bufLen+delta) >= bufferCapacity) {
1038 *status = U_BUFFER_OVERFLOW_ERROR;
1039 return bufLen+delta;
1040 }
1041 if(nextSeparator) { /* RH side */
1042 uprv_memmove(nextSeparator+delta,nextSeparator, bufLen-(nextSeparator-buffer));
1043 }
1044 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1045 bufLen += delta;
1046 buffer[bufLen]=0;
1047 return bufLen;
1048 }
1049 } else { /* removing a keyword */
1050 if(keywordAtEnd) {
1051 /* zero out the ';' or '@' just before startSearchhere */
1052 keywordStart[-1] = 0;
73c04bcf 1053 return (int32_t)((keywordStart-buffer)-1); /* (string length without keyword) minus separator */
374ca955
A
1054 } else {
1055 uprv_memmove(keywordStart, nextSeparator+1, bufLen-((nextSeparator+1)-buffer));
1056 keywordStart[bufLen-((nextSeparator+1)-buffer)]=0;
73c04bcf 1057 return (int32_t)(bufLen-((nextSeparator+1)-keywordStart));
374ca955
A
1058 }
1059 }
1060 } else if(rc<0){ /* end match keyword */
1061 /* could insert at this location. */
1062 insertHere = keywordStart;
1063 }
1064 keywordStart = nextSeparator;
1065 } /* end loop searching */
1066
1067 if(!keywordValue) {
1068 return bufLen; /* removal of non-extant keyword - no change */
1069 }
1070
1071 /* we know there is at least one keyword. */
1072 needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
1073 if(needLen >= bufferCapacity) {
1074 *status = U_BUFFER_OVERFLOW_ERROR;
1075 return needLen; /* no change */
1076 }
1077
1078 if(insertHere) {
1079 uprv_memmove(insertHere+(1+keywordNameLen+1+keywordValueLen), insertHere, bufLen-(insertHere-buffer));
1080 keywordStart = insertHere;
1081 } else {
1082 keywordStart = buffer+bufLen;
1083 *keywordStart = ';';
1084 keywordStart++;
1085 }
1086 uprv_strncpy(keywordStart, keywordNameBuffer, keywordNameLen);
1087 keywordStart += keywordNameLen;
1088 *keywordStart = '=';
1089 keywordStart++;
1090 uprv_strncpy(keywordStart, keywordValue, keywordValueLen); /* terminates. */
1091 keywordStart+=keywordValueLen;
1092 if(insertHere) {
1093 *keywordStart = ';';
1094 keywordStart++;
1095 }
1096 buffer[needLen]=0;
1097 return needLen;
1098}
b75a7d8f 1099
374ca955 1100/* ### ID parsing implementation **************************************************/
b75a7d8f 1101
b75a7d8f 1102#define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
374ca955 1103
b75a7d8f
A
1104/*returns TRUE if one of the special prefixes is here (s=string)
1105 'x-' or 'i-' */
1106#define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1107
1108/* Dot terminates it because of POSIX form where dot precedes the codepage
1109 * except for variant
1110 */
1111#define _isTerminator(a) ((a==0)||(a=='.')||(a=='@'))
1112
374ca955
A
1113static char* _strnchr(const char* str, int32_t len, char c) {
1114 U_ASSERT(str != 0 && len >= 0);
1115 while (len-- != 0) {
1116 char d = *str;
1117 if (d == c) {
1118 return (char*) str;
1119 } else if (d == 0) {
1120 break;
1121 }
1122 ++str;
1123 }
1124 return NULL;
1125}
1126
1127/**
1128 * Lookup 'key' in the array 'list'. The array 'list' should contain
1129 * a NULL entry, followed by more entries, and a second NULL entry.
1130 *
1131 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1132 * COUNTRIES_3.
1133 */
b75a7d8f
A
1134static int16_t _findIndex(const char* const* list, const char* key)
1135{
1136 const char* const* anchor = list;
374ca955
A
1137 int32_t pass = 0;
1138
1139 /* Make two passes through two NULL-terminated arrays at 'list' */
1140 while (pass++ < 2) {
1141 while (*list) {
1142 if (uprv_strcmp(key, *list) == 0) {
1143 return (int16_t)(list - anchor);
1144 }
1145 list++;
b75a7d8f 1146 }
374ca955 1147 ++list; /* skip final NULL *CWB*/
b75a7d8f
A
1148 }
1149 return -1;
1150}
1151
1152/* count the length of src while copying it to dest; return strlen(src) */
4388f060 1153static inline int32_t
b75a7d8f
A
1154_copyCount(char *dest, int32_t destCapacity, const char *src) {
1155 const char *anchor;
1156 char c;
1157
1158 anchor=src;
1159 for(;;) {
1160 if((c=*src)==0) {
1161 return (int32_t)(src-anchor);
1162 }
1163 if(destCapacity<=0) {
1164 return (int32_t)((src-anchor)+uprv_strlen(src));
1165 }
1166 ++src;
1167 *dest++=c;
1168 --destCapacity;
1169 }
1170}
1171
729e4ab9 1172U_CFUNC const char*
73c04bcf
A
1173uloc_getCurrentCountryID(const char* oldID){
1174 int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
1175 if (offset >= 0) {
1176 return REPLACEMENT_COUNTRIES[offset];
1177 }
1178 return oldID;
1179}
729e4ab9 1180U_CFUNC const char*
73c04bcf
A
1181uloc_getCurrentLanguageID(const char* oldID){
1182 int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
1183 if (offset >= 0) {
1184 return REPLACEMENT_LANGUAGES[offset];
1185 }
1186 return oldID;
1187}
b75a7d8f
A
1188/*
1189 * the internal functions _getLanguage(), _getCountry(), _getVariant()
1190 * avoid duplicating code to handle the earlier locale ID pieces
1191 * in the functions for the later ones by
1192 * setting the *pEnd pointer to where they stopped parsing
1193 *
1194 * TODO try to use this in Locale
1195 */
729e4ab9
A
1196U_CFUNC int32_t
1197ulocimp_getLanguage(const char *localeID,
1198 char *language, int32_t languageCapacity,
1199 const char **pEnd) {
b75a7d8f
A
1200 int32_t i=0;
1201 int32_t offset;
1202 char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
1203
1204 /* if it starts with i- or x- then copy that prefix */
1205 if(_isIDPrefix(localeID)) {
1206 if(i<languageCapacity) {
1207 language[i]=(char)uprv_tolower(*localeID);
1208 }
1209 if(i<languageCapacity) {
1210 language[i+1]='-';
1211 }
1212 i+=2;
1213 localeID+=2;
1214 }
1215
1216 /* copy the language as far as possible and count its length */
1217 while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
1218 if(i<languageCapacity) {
1219 language[i]=(char)uprv_tolower(*localeID);
1220 }
1221 if(i<3) {
4388f060 1222 U_ASSERT(i>=0);
b75a7d8f
A
1223 lang[i]=(char)uprv_tolower(*localeID);
1224 }
1225 i++;
1226 localeID++;
1227 }
1228
1229 if(i==3) {
1230 /* convert 3 character code to 2 character code if possible *CWB*/
374ca955 1231 offset=_findIndex(LANGUAGES_3, lang);
b75a7d8f 1232 if(offset>=0) {
374ca955 1233 i=_copyCount(language, languageCapacity, LANGUAGES[offset]);
b75a7d8f
A
1234 }
1235 }
1236
1237 if(pEnd!=NULL) {
1238 *pEnd=localeID;
1239 }
1240 return i;
1241}
1242
729e4ab9
A
1243U_CFUNC int32_t
1244ulocimp_getScript(const char *localeID,
1245 char *script, int32_t scriptCapacity,
1246 const char **pEnd)
b75a7d8f 1247{
374ca955 1248 int32_t idLen = 0;
b75a7d8f 1249
374ca955
A
1250 if (pEnd != NULL) {
1251 *pEnd = localeID;
b75a7d8f 1252 }
374ca955
A
1253
1254 /* copy the second item as far as possible and count its length */
4388f060
A
1255 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])
1256 && uprv_isASCIILetter(localeID[idLen])) {
374ca955 1257 idLen++;
b75a7d8f
A
1258 }
1259
374ca955
A
1260 /* If it's exactly 4 characters long, then it's a script and not a country. */
1261 if (idLen == 4) {
1262 int32_t i;
1263 if (pEnd != NULL) {
1264 *pEnd = localeID+idLen;
1265 }
1266 if(idLen > scriptCapacity) {
1267 idLen = scriptCapacity;
1268 }
1269 if (idLen >= 1) {
1270 script[0]=(char)uprv_toupper(*(localeID++));
1271 }
1272 for (i = 1; i < idLen; i++) {
1273 script[i]=(char)uprv_tolower(*(localeID++));
1274 }
1275 }
1276 else {
1277 idLen = 0;
1278 }
1279 return idLen;
b75a7d8f
A
1280}
1281
729e4ab9
A
1282U_CFUNC int32_t
1283ulocimp_getCountry(const char *localeID,
1284 char *country, int32_t countryCapacity,
1285 const char **pEnd)
374ca955 1286{
729e4ab9 1287 int32_t idLen=0;
374ca955 1288 char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 };
b75a7d8f
A
1289 int32_t offset;
1290
1291 /* copy the country as far as possible and count its length */
729e4ab9
A
1292 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
1293 if(idLen<(ULOC_COUNTRY_CAPACITY-1)) { /*CWB*/
1294 cnty[idLen]=(char)uprv_toupper(localeID[idLen]);
b75a7d8f 1295 }
729e4ab9 1296 idLen++;
b75a7d8f
A
1297 }
1298
729e4ab9
A
1299 /* the country should be either length 2 or 3 */
1300 if (idLen == 2 || idLen == 3) {
1301 UBool gotCountry = FALSE;
1302 /* convert 3 character code to 2 character code if possible *CWB*/
1303 if(idLen==3) {
1304 offset=_findIndex(COUNTRIES_3, cnty);
1305 if(offset>=0) {
1306 idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]);
1307 gotCountry = TRUE;
1308 }
1309 }
1310 if (!gotCountry) {
1311 int32_t i = 0;
1312 for (i = 0; i < idLen; i++) {
1313 if (i < countryCapacity) {
1314 country[i]=(char)uprv_toupper(localeID[i]);
1315 }
1316 }
b75a7d8f 1317 }
729e4ab9
A
1318 localeID+=idLen;
1319 } else {
1320 idLen = 0;
b75a7d8f
A
1321 }
1322
1323 if(pEnd!=NULL) {
1324 *pEnd=localeID;
1325 }
729e4ab9
A
1326
1327 return idLen;
b75a7d8f
A
1328}
1329
374ca955
A
1330/**
1331 * @param needSeparator if true, then add leading '_' if any variants
1332 * are added to 'variant'
1333 */
1334static int32_t
1335_getVariantEx(const char *localeID,
1336 char prev,
1337 char *variant, int32_t variantCapacity,
1338 UBool needSeparator) {
b75a7d8f
A
1339 int32_t i=0;
1340
1341 /* get one or more variant tags and separate them with '_' */
1342 if(_isIDSeparator(prev)) {
1343 /* get a variant string after a '-' or '_' */
1344 while(!_isTerminator(*localeID)) {
374ca955
A
1345 if (needSeparator) {
1346 if (i<variantCapacity) {
1347 variant[i] = '_';
1348 }
1349 ++i;
1350 needSeparator = FALSE;
1351 }
b75a7d8f
A
1352 if(i<variantCapacity) {
1353 variant[i]=(char)uprv_toupper(*localeID);
1354 if(variant[i]=='-') {
1355 variant[i]='_';
1356 }
1357 }
1358 i++;
1359 localeID++;
1360 }
1361 }
1362
1363 /* if there is no variant tag after a '-' or '_' then look for '@' */
1364 if(i==0) {
1365 if(prev=='@') {
1366 /* keep localeID */
374ca955 1367 } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
b75a7d8f
A
1368 ++localeID; /* point after the '@' */
1369 } else {
1370 return 0;
1371 }
1372 while(!_isTerminator(*localeID)) {
374ca955
A
1373 if (needSeparator) {
1374 if (i<variantCapacity) {
1375 variant[i] = '_';
1376 }
1377 ++i;
1378 needSeparator = FALSE;
1379 }
b75a7d8f
A
1380 if(i<variantCapacity) {
1381 variant[i]=(char)uprv_toupper(*localeID);
1382 if(variant[i]=='-' || variant[i]==',') {
1383 variant[i]='_';
1384 }
1385 }
1386 i++;
1387 localeID++;
1388 }
1389 }
374ca955 1390
b75a7d8f
A
1391 return i;
1392}
1393
374ca955
A
1394static int32_t
1395_getVariant(const char *localeID,
1396 char prev,
1397 char *variant, int32_t variantCapacity) {
1398 return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE);
1399}
1400
1401/**
1402 * Delete ALL instances of a variant from the given list of one or
1403 * more variants. Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".
1404 * @param variants the source string of one or more variants,
1405 * separated by '_'. This will be MODIFIED IN PLACE. Not zero
1406 * terminated; if it is, trailing zero will NOT be maintained.
1407 * @param variantsLen length of variants
1408 * @param toDelete variant to delete, without separators, e.g. "EURO"
1409 * or "PREEURO"; not zero terminated
1410 * @param toDeleteLen length of toDelete
1411 * @return number of characters deleted from variants
1412 */
1413static int32_t
1414_deleteVariant(char* variants, int32_t variantsLen,
46f4442e
A
1415 const char* toDelete, int32_t toDeleteLen)
1416{
374ca955
A
1417 int32_t delta = 0; /* number of chars deleted */
1418 for (;;) {
1419 UBool flag = FALSE;
1420 if (variantsLen < toDeleteLen) {
1421 return delta;
1422 }
1423 if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 &&
1424 (variantsLen == toDeleteLen ||
46f4442e
A
1425 (flag=(variants[toDeleteLen] == '_'))))
1426 {
374ca955
A
1427 int32_t d = toDeleteLen + (flag?1:0);
1428 variantsLen -= d;
1429 delta += d;
46f4442e
A
1430 if (variantsLen > 0) {
1431 uprv_memmove(variants, variants+d, variantsLen);
1432 }
374ca955
A
1433 } else {
1434 char* p = _strnchr(variants, variantsLen, '_');
1435 if (p == NULL) {
1436 return delta;
1437 }
1438 ++p;
73c04bcf 1439 variantsLen -= (int32_t)(p - variants);
374ca955
A
1440 variants = p;
1441 }
1442 }
1443}
1444
1445/* Keyword enumeration */
1446
1447typedef struct UKeywordsContext {
1448 char* keywords;
1449 char* current;
1450} UKeywordsContext;
1451
1452static void U_CALLCONV
1453uloc_kw_closeKeywords(UEnumeration *enumerator) {
1454 uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
1455 uprv_free(enumerator->context);
1456 uprv_free(enumerator);
1457}
1458
1459static int32_t U_CALLCONV
4388f060 1460uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) {
374ca955
A
1461 char *kw = ((UKeywordsContext *)en->context)->keywords;
1462 int32_t result = 0;
1463 while(*kw) {
1464 result++;
1465 kw += uprv_strlen(kw)+1;
1466 }
1467 return result;
1468}
1469
1470static const char* U_CALLCONV
1471uloc_kw_nextKeyword(UEnumeration* en,
1472 int32_t* resultLength,
4388f060 1473 UErrorCode* /*status*/) {
374ca955
A
1474 const char* result = ((UKeywordsContext *)en->context)->current;
1475 int32_t len = 0;
1476 if(*result) {
73c04bcf 1477 len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
374ca955
A
1478 ((UKeywordsContext *)en->context)->current += len+1;
1479 } else {
1480 result = NULL;
1481 }
1482 if (resultLength) {
1483 *resultLength = len;
1484 }
1485 return result;
1486}
1487
1488static void U_CALLCONV
1489uloc_kw_resetKeywords(UEnumeration* en,
4388f060 1490 UErrorCode* /*status*/) {
374ca955
A
1491 ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
1492}
1493
1494static const UEnumeration gKeywordsEnum = {
1495 NULL,
1496 NULL,
1497 uloc_kw_closeKeywords,
1498 uloc_kw_countKeywords,
1499 uenum_unextDefault,
1500 uloc_kw_nextKeyword,
1501 uloc_kw_resetKeywords
1502};
1503
1504U_CAPI UEnumeration* U_EXPORT2
1505uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
b75a7d8f 1506{
46f4442e
A
1507 UKeywordsContext *myContext = NULL;
1508 UEnumeration *result = NULL;
b75a7d8f 1509
46f4442e
A
1510 if(U_FAILURE(*status)) {
1511 return NULL;
1512 }
1513 result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
1514 /* Null pointer test */
1515 if (result == NULL) {
1516 *status = U_MEMORY_ALLOCATION_ERROR;
1517 return NULL;
1518 }
1519 uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration));
51004dcb 1520 myContext = static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext)));
46f4442e
A
1521 if (myContext == NULL) {
1522 *status = U_MEMORY_ALLOCATION_ERROR;
1523 uprv_free(result);
1524 return NULL;
1525 }
1526 myContext->keywords = (char *)uprv_malloc(keywordListSize+1);
1527 uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
1528 myContext->keywords[keywordListSize] = 0;
1529 myContext->current = myContext->keywords;
1530 result->context = myContext;
1531 return result;
374ca955
A
1532}
1533
1534U_CAPI UEnumeration* U_EXPORT2
1535uloc_openKeywords(const char* localeID,
1536 UErrorCode* status)
1537{
1538 int32_t i=0;
1539 char keywords[256];
1540 int32_t keywordsCapacity = 256;
729e4ab9
A
1541 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1542 const char* tmpLocaleID;
1543
374ca955 1544 if(status==NULL || U_FAILURE(*status)) {
b75a7d8f
A
1545 return 0;
1546 }
1547
729e4ab9
A
1548 if (_hasBCP47Extension(localeID)) {
1549 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
1550 } else {
1551 if (localeID==NULL) {
1552 localeID=uloc_getDefault();
1553 }
1554 tmpLocaleID=localeID;
b75a7d8f
A
1555 }
1556
374ca955 1557 /* Skip the language */
729e4ab9
A
1558 ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
1559 if(_isIDSeparator(*tmpLocaleID)) {
374ca955
A
1560 const char *scriptID;
1561 /* Skip the script if available */
729e4ab9
A
1562 ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
1563 if(scriptID != tmpLocaleID+1) {
374ca955 1564 /* Found optional script */
729e4ab9 1565 tmpLocaleID = scriptID;
374ca955
A
1566 }
1567 /* Skip the Country */
729e4ab9
A
1568 if (_isIDSeparator(*tmpLocaleID)) {
1569 ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &tmpLocaleID);
1570 if(_isIDSeparator(*tmpLocaleID)) {
1571 _getVariant(tmpLocaleID+1, *tmpLocaleID, NULL, 0);
374ca955 1572 }
b75a7d8f
A
1573 }
1574 }
1575
374ca955 1576 /* keywords are located after '@' */
729e4ab9
A
1577 if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {
1578 i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status);
374ca955
A
1579 }
1580
1581 if(i) {
1582 return uloc_openKeywordList(keywords, i, status);
1583 } else {
1584 return NULL;
b75a7d8f 1585 }
b75a7d8f
A
1586}
1587
b75a7d8f 1588
374ca955
A
1589/* bit-flags for 'options' parameter of _canonicalize */
1590#define _ULOC_STRIP_KEYWORDS 0x2
1591#define _ULOC_CANONICALIZE 0x1
1592
1593#define OPTION_SET(options, mask) ((options & mask) != 0)
1594
73c04bcf
A
1595static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
1596#define I_DEFAULT_LENGTH (sizeof i_default / sizeof i_default[0])
1597
374ca955
A
1598/**
1599 * Canonicalize the given localeID, to level 1 or to level 2,
1600 * depending on the options. To specify level 1, pass in options=0.
1601 * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1602 *
1603 * This is the code underlying uloc_getName and uloc_canonicalize.
1604 */
1605static int32_t
1606_canonicalize(const char* localeID,
1607 char* result,
1608 int32_t resultCapacity,
1609 uint32_t options,
1610 UErrorCode* err) {
1611 int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity;
1612 char localeBuffer[ULOC_FULLNAME_CAPACITY];
729e4ab9 1613 char tempBuffer[ULOC_FULLNAME_CAPACITY];
46f4442e 1614 const char* origLocaleID;
729e4ab9 1615 const char* tmpLocaleID;
374ca955
A
1616 const char* keywordAssign = NULL;
1617 const char* separatorIndicator = NULL;
1618 const char* addKeyword = NULL;
1619 const char* addValue = NULL;
1620 char* name;
1621 char* variant = NULL; /* pointer into name, or NULL */
374ca955
A
1622
1623 if (U_FAILURE(*err)) {
b75a7d8f
A
1624 return 0;
1625 }
1626
729e4ab9
A
1627 if (_hasBCP47Extension(localeID)) {
1628 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1629 } else {
1630 if (localeID==NULL) {
1631 localeID=uloc_getDefault();
1632 }
1633 tmpLocaleID=localeID;
b75a7d8f 1634 }
729e4ab9
A
1635
1636 origLocaleID=tmpLocaleID;
b75a7d8f 1637
374ca955
A
1638 /* if we are doing a full canonicalization, then put results in
1639 localeBuffer, if necessary; otherwise send them to result. */
729e4ab9 1640 if (/*OPTION_SET(options, _ULOC_CANONICALIZE) &&*/
4388f060 1641 (result == NULL || resultCapacity < (int32_t)sizeof(localeBuffer))) {
374ca955 1642 name = localeBuffer;
4388f060 1643 nameCapacity = (int32_t)sizeof(localeBuffer);
374ca955
A
1644 } else {
1645 name = result;
1646 nameCapacity = resultCapacity;
1647 }
1648
b75a7d8f 1649 /* get all pieces, one after another, and separate with '_' */
729e4ab9 1650 len=ulocimp_getLanguage(tmpLocaleID, name, nameCapacity, &tmpLocaleID);
73c04bcf
A
1651
1652 if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) {
1653 const char *d = uloc_getDefault();
1654
729e4ab9 1655 len = (int32_t)uprv_strlen(d);
73c04bcf
A
1656
1657 if (name != NULL) {
1658 uprv_strncpy(name, d, len);
1659 }
729e4ab9 1660 } else if(_isIDSeparator(*tmpLocaleID)) {
374ca955
A
1661 const char *scriptID;
1662
b75a7d8f 1663 ++fieldCount;
374ca955
A
1664 if(len<nameCapacity) {
1665 name[len]='_';
b75a7d8f 1666 }
374ca955
A
1667 ++len;
1668
4388f060
A
1669 scriptSize=ulocimp_getScript(tmpLocaleID+1,
1670 (len<nameCapacity ? name+len : NULL), nameCapacity-len, &scriptID);
374ca955
A
1671 if(scriptSize > 0) {
1672 /* Found optional script */
729e4ab9 1673 tmpLocaleID = scriptID;
b75a7d8f 1674 ++fieldCount;
374ca955 1675 len+=scriptSize;
729e4ab9 1676 if (_isIDSeparator(*tmpLocaleID)) {
374ca955
A
1677 /* If there is something else, then we add the _ */
1678 if(len<nameCapacity) {
1679 name[len]='_';
1680 }
1681 ++len;
1682 }
1683 }
1684
729e4ab9
A
1685 if (_isIDSeparator(*tmpLocaleID)) {
1686 const char *cntryID;
4388f060
A
1687 int32_t cntrySize = ulocimp_getCountry(tmpLocaleID+1,
1688 (len<nameCapacity ? name+len : NULL), nameCapacity-len, &cntryID);
729e4ab9
A
1689 if (cntrySize > 0) {
1690 /* Found optional country */
1691 tmpLocaleID = cntryID;
1692 len+=cntrySize;
1693 }
1694 if(_isIDSeparator(*tmpLocaleID)) {
51004dcb
A
1695 /* If there is something else, then we add the _ if we found country before. */
1696 if (cntrySize >= 0 && ! _isIDSeparator(*(tmpLocaleID+1)) ) {
729e4ab9
A
1697 ++fieldCount;
1698 if(len<nameCapacity) {
1699 name[len]='_';
1700 }
1701 ++len;
374ca955 1702 }
729e4ab9 1703
4388f060
A
1704 variantSize = _getVariant(tmpLocaleID+1, *tmpLocaleID,
1705 (len<nameCapacity ? name+len : NULL), nameCapacity-len);
374ca955 1706 if (variantSize > 0) {
4388f060 1707 variant = len<nameCapacity ? name+len : NULL;
374ca955 1708 len += variantSize;
729e4ab9 1709 tmpLocaleID += variantSize + 1; /* skip '_' and variant */
374ca955 1710 }
b75a7d8f 1711 }
b75a7d8f
A
1712 }
1713 }
1714
374ca955 1715 /* Copy POSIX-style charset specifier, if any [mr.utf8] */
729e4ab9 1716 if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
374ca955 1717 UBool done = FALSE;
b75a7d8f 1718 do {
729e4ab9 1719 char c = *tmpLocaleID;
374ca955
A
1720 switch (c) {
1721 case 0:
1722 case '@':
1723 done = TRUE;
1724 break;
1725 default:
1726 if (len<nameCapacity) {
1727 name[len] = c;
1728 }
1729 ++len;
729e4ab9 1730 ++tmpLocaleID;
374ca955
A
1731 break;
1732 }
1733 } while (!done);
1734 }
1735
1736 /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
729e4ab9
A
1737 After this, tmpLocaleID either points to '@' or is NULL */
1738 if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {
1739 keywordAssign = uprv_strchr(tmpLocaleID, '=');
1740 separatorIndicator = uprv_strchr(tmpLocaleID, ';');
374ca955
A
1741 }
1742
1743 /* Copy POSIX-style variant, if any [mr@FOO] */
1744 if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
729e4ab9 1745 tmpLocaleID != NULL && keywordAssign == NULL) {
374ca955 1746 for (;;) {
729e4ab9 1747 char c = *tmpLocaleID;
374ca955
A
1748 if (c == 0) {
1749 break;
1750 }
1751 if (len<nameCapacity) {
1752 name[len] = c;
1753 }
1754 ++len;
729e4ab9 1755 ++tmpLocaleID;
374ca955
A
1756 }
1757 }
1758
1759 if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
1760 /* Handle @FOO variant if @ is present and not followed by = */
729e4ab9 1761 if (tmpLocaleID!=NULL && keywordAssign==NULL) {
374ca955
A
1762 int32_t posixVariantSize;
1763 /* Add missing '_' if needed */
1764 if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
1765 do {
1766 if(len<nameCapacity) {
1767 name[len]='_';
1768 }
1769 ++len;
1770 ++fieldCount;
1771 } while(fieldCount<2);
1772 }
729e4ab9 1773 posixVariantSize = _getVariantEx(tmpLocaleID+1, '@', name+len, nameCapacity-len,
374ca955
A
1774 (UBool)(variantSize > 0));
1775 if (posixVariantSize > 0) {
1776 if (variant == NULL) {
1777 variant = name+len;
1778 }
1779 len += posixVariantSize;
1780 variantSize += posixVariantSize;
b75a7d8f 1781 }
374ca955
A
1782 }
1783
46f4442e
A
1784 /* Handle generic variants first */
1785 if (variant) {
1786 for (j=0; j<(int32_t)(sizeof(VARIANT_MAP)/sizeof(VARIANT_MAP[0])); j++) {
1787 const char* variantToCompare = VARIANT_MAP[j].variant;
1788 int32_t n = (int32_t)uprv_strlen(variantToCompare);
1789 int32_t variantLen = _deleteVariant(variant, uprv_min(variantSize, (nameCapacity-len)), variantToCompare, n);
1790 len -= variantLen;
1791 if (variantLen > 0) {
b25be066 1792 if (len > 0 && name[len-1] == '_') { /* delete trailing '_' */
46f4442e
A
1793 --len;
1794 }
1795 addKeyword = VARIANT_MAP[j].keyword;
1796 addValue = VARIANT_MAP[j].value;
1797 break;
1798 }
1799 }
b25be066 1800 if (len > 0 && len <= nameCapacity && name[len-1] == '_') { /* delete trailing '_' */
46f4442e
A
1801 --len;
1802 }
374ca955
A
1803 }
1804
1805 /* Look up the ID in the canonicalization map */
1806 for (j=0; j<(int32_t)(sizeof(CANONICALIZE_MAP)/sizeof(CANONICALIZE_MAP[0])); j++) {
1807 const char* id = CANONICALIZE_MAP[j].id;
73c04bcf 1808 int32_t n = (int32_t)uprv_strlen(id);
374ca955 1809 if (len == n && uprv_strncmp(name, id, n) == 0) {
729e4ab9 1810 if (n == 0 && tmpLocaleID != NULL) {
374ca955
A
1811 break; /* Don't remap "" if keywords present */
1812 }
1813 len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID);
46f4442e
A
1814 if (CANONICALIZE_MAP[j].keyword) {
1815 addKeyword = CANONICALIZE_MAP[j].keyword;
1816 addValue = CANONICALIZE_MAP[j].value;
1817 }
374ca955
A
1818 break;
1819 }
1820 }
374ca955
A
1821 }
1822
1823 if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
729e4ab9 1824 if (tmpLocaleID!=NULL && keywordAssign!=NULL &&
374ca955
A
1825 (!separatorIndicator || separatorIndicator > keywordAssign)) {
1826 if(len<nameCapacity) {
1827 name[len]='@';
1828 }
1829 ++len;
b75a7d8f 1830 ++fieldCount;
4388f060
A
1831 len += _getKeywords(tmpLocaleID+1, '@', (len<nameCapacity ? name+len : NULL), nameCapacity-len,
1832 NULL, 0, NULL, TRUE, addKeyword, addValue, err);
374ca955 1833 } else if (addKeyword != NULL) {
51004dcb 1834 U_ASSERT(addValue != NULL && len < nameCapacity);
374ca955
A
1835 /* inelegant but works -- later make _getKeywords do this? */
1836 len += _copyCount(name+len, nameCapacity-len, "@");
1837 len += _copyCount(name+len, nameCapacity-len, addKeyword);
1838 len += _copyCount(name+len, nameCapacity-len, "=");
1839 len += _copyCount(name+len, nameCapacity-len, addValue);
1840 }
1841 }
1842
46f4442e 1843 if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) {
374ca955
A
1844 uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);
1845 }
1846
1847 return u_terminateChars(result, resultCapacity, len, err);
1848}
1849
1850/* ### ID parsing API **************************************************/
1851
1852U_CAPI int32_t U_EXPORT2
1853uloc_getParent(const char* localeID,
1854 char* parent,
1855 int32_t parentCapacity,
1856 UErrorCode* err)
1857{
1858 const char *lastUnderscore;
1859 int32_t i;
1860
1861 if (U_FAILURE(*err))
1862 return 0;
1863
1864 if (localeID == NULL)
1865 localeID = uloc_getDefault();
1866
1867 lastUnderscore=uprv_strrchr(localeID, '_');
1868 if(lastUnderscore!=NULL) {
1869 i=(int32_t)(lastUnderscore-localeID);
1870 } else {
1871 i=0;
b75a7d8f 1872 }
374ca955 1873
73c04bcf 1874 if(i>0 && parent != localeID) {
374ca955
A
1875 uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
1876 }
1877 return u_terminateChars(parent, parentCapacity, i, err);
b75a7d8f 1878}
374ca955
A
1879
1880U_CAPI int32_t U_EXPORT2
1881uloc_getLanguage(const char* localeID,
1882 char* language,
1883 int32_t languageCapacity,
1884 UErrorCode* err)
1885{
1886 /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
1887 int32_t i=0;
1888
1889 if (err==NULL || U_FAILURE(*err)) {
1890 return 0;
1891 }
1892
1893 if(localeID==NULL) {
1894 localeID=uloc_getDefault();
1895 }
1896
729e4ab9 1897 i=ulocimp_getLanguage(localeID, language, languageCapacity, NULL);
374ca955
A
1898 return u_terminateChars(language, languageCapacity, i, err);
1899}
1900
1901U_CAPI int32_t U_EXPORT2
1902uloc_getScript(const char* localeID,
1903 char* script,
1904 int32_t scriptCapacity,
1905 UErrorCode* err)
1906{
1907 int32_t i=0;
1908
1909 if(err==NULL || U_FAILURE(*err)) {
1910 return 0;
1911 }
1912
1913 if(localeID==NULL) {
1914 localeID=uloc_getDefault();
1915 }
1916
1917 /* skip the language */
729e4ab9 1918 ulocimp_getLanguage(localeID, NULL, 0, &localeID);
374ca955 1919 if(_isIDSeparator(*localeID)) {
729e4ab9 1920 i=ulocimp_getScript(localeID+1, script, scriptCapacity, NULL);
374ca955
A
1921 }
1922 return u_terminateChars(script, scriptCapacity, i, err);
1923}
1924
1925U_CAPI int32_t U_EXPORT2
1926uloc_getCountry(const char* localeID,
1927 char* country,
1928 int32_t countryCapacity,
1929 UErrorCode* err)
1930{
1931 int32_t i=0;
1932
1933 if(err==NULL || U_FAILURE(*err)) {
1934 return 0;
1935 }
1936
1937 if(localeID==NULL) {
1938 localeID=uloc_getDefault();
1939 }
1940
1941 /* Skip the language */
729e4ab9 1942 ulocimp_getLanguage(localeID, NULL, 0, &localeID);
374ca955
A
1943 if(_isIDSeparator(*localeID)) {
1944 const char *scriptID;
1945 /* Skip the script if available */
729e4ab9 1946 ulocimp_getScript(localeID+1, NULL, 0, &scriptID);
374ca955
A
1947 if(scriptID != localeID+1) {
1948 /* Found optional script */
1949 localeID = scriptID;
1950 }
1951 if(_isIDSeparator(*localeID)) {
729e4ab9 1952 i=ulocimp_getCountry(localeID+1, country, countryCapacity, NULL);
374ca955
A
1953 }
1954 }
1955 return u_terminateChars(country, countryCapacity, i, err);
1956}
1957
1958U_CAPI int32_t U_EXPORT2
1959uloc_getVariant(const char* localeID,
1960 char* variant,
1961 int32_t variantCapacity,
1962 UErrorCode* err)
1963{
729e4ab9
A
1964 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1965 const char* tmpLocaleID;
374ca955 1966 int32_t i=0;
374ca955
A
1967
1968 if(err==NULL || U_FAILURE(*err)) {
1969 return 0;
1970 }
1971
729e4ab9
A
1972 if (_hasBCP47Extension(localeID)) {
1973 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1974 } else {
1975 if (localeID==NULL) {
1976 localeID=uloc_getDefault();
1977 }
1978 tmpLocaleID=localeID;
374ca955
A
1979 }
1980
1981 /* Skip the language */
729e4ab9
A
1982 ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
1983 if(_isIDSeparator(*tmpLocaleID)) {
374ca955
A
1984 const char *scriptID;
1985 /* Skip the script if available */
729e4ab9
A
1986 ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
1987 if(scriptID != tmpLocaleID+1) {
374ca955 1988 /* Found optional script */
729e4ab9 1989 tmpLocaleID = scriptID;
374ca955
A
1990 }
1991 /* Skip the Country */
729e4ab9
A
1992 if (_isIDSeparator(*tmpLocaleID)) {
1993 const char *cntryID;
1994 ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &cntryID);
1995 if (cntryID != tmpLocaleID+1) {
1996 /* Found optional country */
1997 tmpLocaleID = cntryID;
1998 }
1999 if(_isIDSeparator(*tmpLocaleID)) {
2000 /* If there was no country ID, skip a possible extra IDSeparator */
2001 if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {
2002 tmpLocaleID++;
2003 }
2004 i=_getVariant(tmpLocaleID+1, *tmpLocaleID, variant, variantCapacity);
374ca955
A
2005 }
2006 }
2007 }
2008
2009 /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
2010 /* if we do not have a variant tag yet then try a POSIX variant after '@' */
2011/*
2012 if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
2013 i=_getVariant(localeID+1, '@', variant, variantCapacity);
2014 }
2015*/
2016 return u_terminateChars(variant, variantCapacity, i, err);
2017}
2018
2019U_CAPI int32_t U_EXPORT2
2020uloc_getName(const char* localeID,
2021 char* name,
2022 int32_t nameCapacity,
2023 UErrorCode* err)
2024{
2025 return _canonicalize(localeID, name, nameCapacity, 0, err);
2026}
2027
2028U_CAPI int32_t U_EXPORT2
2029uloc_getBaseName(const char* localeID,
2030 char* name,
2031 int32_t nameCapacity,
2032 UErrorCode* err)
2033{
2034 return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err);
2035}
2036
2037U_CAPI int32_t U_EXPORT2
2038uloc_canonicalize(const char* localeID,
2039 char* name,
2040 int32_t nameCapacity,
2041 UErrorCode* err)
2042{
2043 return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err);
2044}
2045
b75a7d8f
A
2046U_CAPI const char* U_EXPORT2
2047uloc_getISO3Language(const char* localeID)
2048{
374ca955
A
2049 int16_t offset;
2050 char lang[ULOC_LANG_CAPACITY];
2051 UErrorCode err = U_ZERO_ERROR;
2052
2053 if (localeID == NULL)
2054 {
2055 localeID = uloc_getDefault();
2056 }
2057 uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
2058 if (U_FAILURE(err))
2059 return "";
2060 offset = _findIndex(LANGUAGES, lang);
2061 if (offset < 0)
2062 return "";
2063 return LANGUAGES_3[offset];
b75a7d8f
A
2064}
2065
2066U_CAPI const char* U_EXPORT2
2067uloc_getISO3Country(const char* localeID)
2068{
2069 int16_t offset;
374ca955 2070 char cntry[ULOC_LANG_CAPACITY];
b75a7d8f
A
2071 UErrorCode err = U_ZERO_ERROR;
2072
2073 if (localeID == NULL)
2074 {
2075 localeID = uloc_getDefault();
2076 }
374ca955 2077 uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
b75a7d8f
A
2078 if (U_FAILURE(err))
2079 return "";
374ca955 2080 offset = _findIndex(COUNTRIES, cntry);
b75a7d8f
A
2081 if (offset < 0)
2082 return "";
2083
374ca955 2084 return COUNTRIES_3[offset];
b75a7d8f
A
2085}
2086
2087U_CAPI uint32_t U_EXPORT2
2088uloc_getLCID(const char* localeID)
2089{
374ca955
A
2090 UErrorCode status = U_ZERO_ERROR;
2091 char langID[ULOC_FULLNAME_CAPACITY];
2092
2093 uloc_getLanguage(localeID, langID, sizeof(langID), &status);
2094 if (U_FAILURE(status)) {
2095 return 0;
b75a7d8f 2096 }
374ca955
A
2097
2098 return uprv_convertToLCID(langID, localeID, &status);
2099}
2100
73c04bcf
A
2101U_CAPI int32_t U_EXPORT2
2102uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
2103 UErrorCode *status)
2104{
2105 int32_t length;
2106 const char *posix = uprv_convertToPosix(hostid, status);
2107 if (U_FAILURE(*status) || posix == NULL) {
2108 return 0;
2109 }
2110 length = (int32_t)uprv_strlen(posix);
2111 if (length+1 > localeCapacity) {
2112 *status = U_BUFFER_OVERFLOW_ERROR;
2113 }
2114 else {
2115 uprv_strcpy(locale, posix);
2116 }
2117 return length;
2118}
2119
374ca955
A
2120/* ### Default locale **************************************************/
2121
2122U_CAPI const char* U_EXPORT2
2123uloc_getDefault()
2124{
2125 return locale_get_default();
2126}
2127
2128U_CAPI void U_EXPORT2
2129uloc_setDefault(const char* newDefaultLocale,
2130 UErrorCode* err)
2131{
2132 if (U_FAILURE(*err))
2133 return;
2134 /* the error code isn't currently used for anything by this function*/
b75a7d8f 2135
374ca955
A
2136 /* propagate change to C++ */
2137 locale_set_default(newDefaultLocale);
b75a7d8f
A
2138}
2139
729e4ab9 2140/**
51004dcb 2141 * Returns a list of all 2-letter language codes defined in ISO 639. This is a pointer
729e4ab9
A
2142 * to an array of pointers to arrays of char. All of these pointers are owned
2143 * by ICU-- do not delete them, and do not write through them. The array is
2144 * terminated with a null pointer.
2145 */
2146U_CAPI const char* const* U_EXPORT2
2147uloc_getISOLanguages()
2148{
2149 return LANGUAGES;
2150}
374ca955 2151
729e4ab9
A
2152/**
2153 * Returns a list of all 2-letter country codes defined in ISO 639. This is a
2154 * pointer to an array of pointers to arrays of char. All of these pointers are
2155 * owned by ICU-- do not delete them, and do not write through them. The array is
2156 * terminated with a null pointer.
b75a7d8f 2157 */
729e4ab9
A
2158U_CAPI const char* const* U_EXPORT2
2159uloc_getISOCountries()
b75a7d8f 2160{
729e4ab9
A
2161 return COUNTRIES;
2162}
73c04bcf 2163
b75a7d8f 2164
729e4ab9
A
2165/* this function to be moved into cstring.c later */
2166static char gDecimal = 0;
b75a7d8f 2167
729e4ab9
A
2168static /* U_CAPI */
2169double
2170/* U_EXPORT2 */
2171_uloc_strtod(const char *start, char **end) {
2172 char *decimal;
2173 char *myEnd;
2174 char buf[30];
2175 double rv;
2176 if (!gDecimal) {
2177 char rep[5];
2178 /* For machines that decide to change the decimal on you,
2179 and try to be too smart with localization.
2180 This normally should be just a '.'. */
2181 sprintf(rep, "%+1.1f", 1.0);
2182 gDecimal = rep[2];
b75a7d8f 2183 }
b75a7d8f 2184
729e4ab9
A
2185 if(gDecimal == '.') {
2186 return uprv_strtod(start, end); /* fall through to OS */
b75a7d8f 2187 } else {
729e4ab9
A
2188 uprv_strncpy(buf, start, 29);
2189 buf[29]=0;
2190 decimal = uprv_strchr(buf, '.');
2191 if(decimal) {
2192 *decimal = gDecimal;
46f4442e 2193 } else {
729e4ab9 2194 return uprv_strtod(start, end); /* no decimal point */
46f4442e 2195 }
729e4ab9
A
2196 rv = uprv_strtod(buf, &myEnd);
2197 if(end) {
2198 *end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */
b75a7d8f 2199 }
729e4ab9 2200 return rv;
374ca955 2201 }
374ca955
A
2202}
2203
729e4ab9
A
2204typedef struct {
2205 float q;
2206 int32_t dummy; /* to avoid uninitialized memory copy from qsort */
2207 char *locale;
2208} _acceptLangItem;
b75a7d8f 2209
729e4ab9 2210static int32_t U_CALLCONV
4388f060 2211uloc_acceptLanguageCompare(const void * /*context*/, const void *a, const void *b)
729e4ab9
A
2212{
2213 const _acceptLangItem *aa = (const _acceptLangItem*)a;
2214 const _acceptLangItem *bb = (const _acceptLangItem*)b;
b75a7d8f 2215
729e4ab9
A
2216 int32_t rc = 0;
2217 if(bb->q < aa->q) {
2218 rc = -1; /* A > B */
2219 } else if(bb->q > aa->q) {
2220 rc = 1; /* A < B */
2221 } else {
2222 rc = 0; /* A = B */
b75a7d8f
A
2223 }
2224
729e4ab9
A
2225 if(rc==0) {
2226 rc = uprv_stricmp(aa->locale, bb->locale);
b75a7d8f
A
2227 }
2228
729e4ab9
A
2229#if defined(ULOC_DEBUG)
2230 /* fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
2231 aa->locale, aa->q,
2232 bb->locale, bb->q,
2233 rc);*/
2234#endif
374ca955 2235
729e4ab9 2236 return rc;
374ca955
A
2237}
2238
729e4ab9
A
2239/*
2240mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
2241*/
374ca955 2242
b75a7d8f 2243U_CAPI int32_t U_EXPORT2
729e4ab9
A
2244uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult *outResult,
2245 const char *httpAcceptLanguage,
2246 UEnumeration* availableLocales,
2247 UErrorCode *status)
374ca955 2248{
729e4ab9
A
2249 _acceptLangItem *j;
2250 _acceptLangItem smallBuffer[30];
2251 char **strs;
2252 char tmp[ULOC_FULLNAME_CAPACITY +1];
2253 int32_t n = 0;
2254 const char *itemEnd;
2255 const char *paramEnd;
2256 const char *s;
2257 const char *t;
2258 int32_t res;
2259 int32_t i;
2260 int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);
2261 int32_t jSize;
2262 char *tempstr; /* Use for null pointer check */
b75a7d8f 2263
729e4ab9
A
2264 j = smallBuffer;
2265 jSize = sizeof(smallBuffer)/sizeof(smallBuffer[0]);
2266 if(U_FAILURE(*status)) {
2267 return -1;
b75a7d8f
A
2268 }
2269
729e4ab9
A
2270 for(s=httpAcceptLanguage;s&&*s;) {
2271 while(isspace(*s)) /* eat space at the beginning */
2272 s++;
2273 itemEnd=uprv_strchr(s,',');
2274 paramEnd=uprv_strchr(s,';');
2275 if(!itemEnd) {
2276 itemEnd = httpAcceptLanguage+l; /* end of string */
b75a7d8f 2277 }
729e4ab9
A
2278 if(paramEnd && paramEnd<itemEnd) {
2279 /* semicolon (;) is closer than end (,) */
2280 t = paramEnd+1;
2281 if(*t=='q') {
2282 t++;
2283 }
2284 while(isspace(*t)) {
2285 t++;
2286 }
2287 if(*t=='=') {
2288 t++;
2289 }
2290 while(isspace(*t)) {
2291 t++;
2292 }
2293 j[n].q = (float)_uloc_strtod(t,NULL);
2294 } else {
2295 /* no semicolon - it's 1.0 */
2296 j[n].q = 1.0f;
2297 paramEnd = itemEnd;
374ca955 2298 }
46f4442e 2299 j[n].dummy=0;
374ca955
A
2300 /* eat spaces prior to semi */
2301 for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)
2302 ;
46f4442e
A
2303 /* Check for null pointer from uprv_strndup */
2304 tempstr = uprv_strndup(s,(int32_t)((t+1)-s));
2305 if (tempstr == NULL) {
2306 *status = U_MEMORY_ALLOCATION_ERROR;
2307 return -1;
2308 }
2309 j[n].locale = tempstr;
374ca955
A
2310 uloc_canonicalize(j[n].locale,tmp,sizeof(tmp)/sizeof(tmp[0]),status);
2311 if(strcmp(j[n].locale,tmp)) {
2312 uprv_free(j[n].locale);
2313 j[n].locale=uprv_strdup(tmp);
2314 }
2315#if defined(ULOC_DEBUG)
2316 /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
2317#endif
2318 n++;
2319 s = itemEnd;
2320 while(*s==',') { /* eat duplicate commas */
2321 s++;
2322 }
2323 if(n>=jSize) {
46f4442e 2324 if(j==smallBuffer) { /* overflowed the small buffer. */
51004dcb 2325 j = static_cast<_acceptLangItem *>(uprv_malloc(sizeof(j[0])*(jSize*2)));
46f4442e
A
2326 if(j!=NULL) {
2327 uprv_memcpy(j,smallBuffer,sizeof(j[0])*jSize);
2328 }
374ca955 2329#if defined(ULOC_DEBUG)
46f4442e 2330 fprintf(stderr,"malloced at size %d\n", jSize);
374ca955 2331#endif
46f4442e 2332 } else {
51004dcb 2333 j = static_cast<_acceptLangItem *>(uprv_realloc(j, sizeof(j[0])*jSize*2));
374ca955 2334#if defined(ULOC_DEBUG)
46f4442e 2335 fprintf(stderr,"re-alloced at size %d\n", jSize);
374ca955 2336#endif
46f4442e
A
2337 }
2338 jSize *= 2;
2339 if(j==NULL) {
2340 *status = U_MEMORY_ALLOCATION_ERROR;
2341 return -1;
2342 }
374ca955
A
2343 }
2344 }
2345 uprv_sortArray(j, n, sizeof(j[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
2346 if(U_FAILURE(*status)) {
46f4442e 2347 if(j != smallBuffer) {
374ca955 2348#if defined(ULOC_DEBUG)
46f4442e 2349 fprintf(stderr,"freeing j %p\n", j);
374ca955 2350#endif
46f4442e
A
2351 uprv_free(j);
2352 }
2353 return -1;
374ca955 2354 }
51004dcb 2355 strs = static_cast<char **>(uprv_malloc((size_t)(sizeof(strs[0])*n)));
46f4442e
A
2356 /* Check for null pointer */
2357 if (strs == NULL) {
2358 uprv_free(j); /* Free to avoid memory leak */
2359 *status = U_MEMORY_ALLOCATION_ERROR;
2360 return -1;
2361 }
374ca955
A
2362 for(i=0;i<n;i++) {
2363#if defined(ULOC_DEBUG)
2364 /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
2365#endif
2366 strs[i]=j[i].locale;
2367 }
2368 res = uloc_acceptLanguage(result, resultAvailable, outResult,
2369 (const char**)strs, n, availableLocales, status);
2370 for(i=0;i<n;i++) {
2371 uprv_free(strs[i]);
2372 }
2373 uprv_free(strs);
2374 if(j != smallBuffer) {
2375#if defined(ULOC_DEBUG)
46f4442e 2376 fprintf(stderr,"freeing j %p\n", j);
374ca955 2377#endif
46f4442e 2378 uprv_free(j);
374ca955
A
2379 }
2380 return res;
2381}
2382
2383
2384U_CAPI int32_t U_EXPORT2
2385uloc_acceptLanguage(char *result, int32_t resultAvailable,
2386 UAcceptResult *outResult, const char **acceptList,
2387 int32_t acceptListCount,
2388 UEnumeration* availableLocales,
2389 UErrorCode *status)
2390{
2391 int32_t i,j;
2392 int32_t len;
2393 int32_t maxLen=0;
2394 char tmp[ULOC_FULLNAME_CAPACITY+1];
2395 const char *l;
2396 char **fallbackList;
2397 if(U_FAILURE(*status)) {
2398 return -1;
2399 }
51004dcb 2400 fallbackList = static_cast<char **>(uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount)));
374ca955 2401 if(fallbackList==NULL) {
46f4442e
A
2402 *status = U_MEMORY_ALLOCATION_ERROR;
2403 return -1;
374ca955
A
2404 }
2405 for(i=0;i<acceptListCount;i++) {
2406#if defined(ULOC_DEBUG)
2407 fprintf(stderr,"%02d: %s\n", i, acceptList[i]);
2408#endif
2409 while((l=uenum_next(availableLocales, NULL, status))) {
2410#if defined(ULOC_DEBUG)
2411 fprintf(stderr," %s\n", l);
2412#endif
73c04bcf 2413 len = (int32_t)uprv_strlen(l);
374ca955
A
2414 if(!uprv_strcmp(acceptList[i], l)) {
2415 if(outResult) {
2416 *outResult = ULOC_ACCEPT_VALID;
2417 }
2418#if defined(ULOC_DEBUG)
2419 fprintf(stderr, "MATCH! %s\n", l);
2420#endif
2421 if(len>0) {
2422 uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2423 }
2424 for(j=0;j<i;j++) {
2425 uprv_free(fallbackList[j]);
2426 }
2427 uprv_free(fallbackList);
2428 return u_terminateChars(result, resultAvailable, len, status);
2429 }
2430 if(len>maxLen) {
2431 maxLen = len;
2432 }
2433 }
2434 uenum_reset(availableLocales, status);
2435 /* save off parent info */
2436 if(uloc_getParent(acceptList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
2437 fallbackList[i] = uprv_strdup(tmp);
2438 } else {
2439 fallbackList[i]=0;
2440 }
2441 }
2442
2443 for(maxLen--;maxLen>0;maxLen--) {
2444 for(i=0;i<acceptListCount;i++) {
2445 if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) {
2446#if defined(ULOC_DEBUG)
2447 fprintf(stderr,"Try: [%s]", fallbackList[i]);
2448#endif
2449 while((l=uenum_next(availableLocales, NULL, status))) {
2450#if defined(ULOC_DEBUG)
2451 fprintf(stderr," %s\n", l);
2452#endif
73c04bcf 2453 len = (int32_t)uprv_strlen(l);
374ca955
A
2454 if(!uprv_strcmp(fallbackList[i], l)) {
2455 if(outResult) {
2456 *outResult = ULOC_ACCEPT_FALLBACK;
2457 }
2458#if defined(ULOC_DEBUG)
2459 fprintf(stderr, "fallback MATCH! %s\n", l);
2460#endif
2461 if(len>0) {
2462 uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2463 }
73c04bcf
A
2464 for(j=0;j<acceptListCount;j++) {
2465 uprv_free(fallbackList[j]);
374ca955
A
2466 }
2467 uprv_free(fallbackList);
73c04bcf 2468 return u_terminateChars(result, resultAvailable, len, status);
374ca955
A
2469 }
2470 }
2471 uenum_reset(availableLocales, status);
2472
2473 if(uloc_getParent(fallbackList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
2474 uprv_free(fallbackList[i]);
2475 fallbackList[i] = uprv_strdup(tmp);
2476 } else {
2477 uprv_free(fallbackList[i]);
2478 fallbackList[i]=0;
2479 }
2480 }
2481 }
2482 if(outResult) {
2483 *outResult = ULOC_ACCEPT_FAILED;
2484 }
2485 }
2486 for(i=0;i<acceptListCount;i++) {
2487 uprv_free(fallbackList[i]);
2488 }
2489 uprv_free(fallbackList);
2490 return -1;
b75a7d8f 2491}
374ca955
A
2492
2493/*eof*/