]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/uloc.c
ICU-8.11.1.tar.gz
[apple/icu.git] / icuSources / common / uloc.c
CommitLineData
b75a7d8f
A
1/*
2**********************************************************************
73c04bcf 3* Copyright (C) 1997-2007, International Business Machines
b75a7d8f
A
4* Corporation and others. All Rights Reserved.
5**********************************************************************
6*
7* File ULOC.CPP
8*
9* Modification History:
10*
11* Date Name Description
12* 04/01/97 aliu Creation.
13* 08/21/98 stephen JDK 1.2 sync
14* 12/08/98 rtg New Locale implementation and C API
15* 03/15/99 damiba overhaul.
16* 04/06/99 stephen changed setDefault() to realloc and copy
17* 06/14/99 stephen Changed calls to ures_open for new params
18* 07/21/99 stephen Modified setDefault() to propagate to C++
374ca955
A
19* 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs,
20* brought canonicalization code into line with spec
b75a7d8f
A
21*****************************************************************************/
22
23/*
24 POSIX's locale format, from putil.c: [no spaces]
25
26 ll [ _CC ] [ . MM ] [ @ VV]
27
28 l = lang, C = ctry, M = charmap, V = variant
29*/
30
b75a7d8f
A
31#include "unicode/utypes.h"
32#include "unicode/ustring.h"
33#include "unicode/uloc.h"
73c04bcf 34#include "unicode/ures.h"
b75a7d8f 35
374ca955 36#include "putilimp.h"
b75a7d8f 37#include "ustr_imp.h"
374ca955 38#include "ulocimp.h"
b75a7d8f
A
39#include "uresimp.h"
40#include "umutex.h"
41#include "cstring.h"
42#include "cmemory.h"
43#include "ucln_cmn.h"
374ca955
A
44#include "locmap.h"
45#include "uarrsort.h"
46#include "uenumimp.h"
47#include "uassert.h"
b75a7d8f 48
374ca955
A
49#include <stdio.h> /* for sprintf */
50
51/* ### Declarations **************************************************/
b75a7d8f
A
52
53/* Locale stuff from locid.cpp */
54U_CFUNC void locale_set_default(const char *id);
55U_CFUNC const char *locale_get_default(void);
374ca955
A
56U_CFUNC int32_t
57locale_getKeywords(const char *localeID,
58 char prev,
59 char *keywords, int32_t keywordCapacity,
60 char *values, int32_t valuesCapacity, int32_t *valLen,
61 UBool valuesToo,
62 UErrorCode *status);
63
64/* ### Constants **************************************************/
b75a7d8f
A
65
66/* These strings describe the resources we attempt to load from
67 the locale ResourceBundle data file.*/
374ca955
A
68static const char _kLanguages[] = "Languages";
69static const char _kScripts[] = "Scripts";
70static const char _kCountries[] = "Countries";
71static const char _kVariants[] = "Variants";
72static const char _kKeys[] = "Keys";
73static const char _kTypes[] = "Types";
b75a7d8f 74static const char _kIndexLocaleName[] = "res_index";
374ca955
A
75static const char _kRootName[] = "root";
76static const char _kIndexTag[] = "InstalledLocales";
77static const char _kCurrency[] = "currency";
78static const char _kCurrencies[] = "Currencies";
b75a7d8f
A
79static char** _installedLocales = NULL;
80static int32_t _installedLocalesCount = 0;
81
374ca955
A
82/* ### Data tables **************************************************/
83
84/**
85 * Table of language codes, both 2- and 3-letter, with preference
86 * given to 2-letter codes where possible. Includes 3-letter codes
87 * that lack a 2-letter equivalent.
88 *
89 * This list must be in sorted order. This list is returned directly
90 * to the user by some API.
91 *
92 * This list must be kept in sync with LANGUAGES_3, with corresponding
93 * entries matched.
94 *
95 * This table should be terminated with a NULL entry, followed by a
96 * second list, and another NULL entry. The first list is visible to
97 * user code when this array is returned by API. The second list
98 * contains codes we support, but do not expose through user API.
99 *
100 * Notes
101 *
102 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
103 * include the revisions up to 2001/7/27 *CWB*
104 *
105 * The 3 character codes are the terminology codes like RFC 3066. This
106 * is compatible with prior ICU codes
107 *
108 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
109 * table but now at the end of the table because 3 character codes are
110 * duplicates. This avoids bad searches going from 3 to 2 character
111 * codes.
112 *
113 * The range qaa-qtz is reserved for local use
114 */
115static const char * const LANGUAGES[] = {
116 "aa", "ab", "ace", "ach", "ada", "ady", "ae", "af", "afa",
73c04bcf
A
117 "afh", "ain", "ak", "akk", "ale", "alg", "alt", "am", "an",
118 "ang", "anp", "apa",
b75a7d8f
A
119 "ar", "arc", "arn", "arp", "art", "arw", "as", "ast",
120 "ath", "aus", "av", "awa", "ay", "az", "ba", "bad",
374ca955 121 "bai", "bal", "ban", "bas", "bat", "be", "bej",
b75a7d8f
A
122 "bem", "ber", "bg", "bh", "bho", "bi", "bik", "bin",
123 "bla", "bm", "bn", "bnt", "bo", "br", "bra", "bs",
374ca955 124 "btk", "bua", "bug", "byn", "ca", "cad", "cai", "car", "cau",
73c04bcf 125 "cch", "ce", "ceb", "cel", "ch", "chb", "chg", "chk", "chm",
b75a7d8f 126 "chn", "cho", "chp", "chr", "chy", "cmc", "co", "cop",
374ca955
A
127 "cpe", "cpf", "cpp", "cr", "crh", "crp", "cs", "csb", "cu", "cus",
128 "cv", "cy", "da", "dak", "dar", "day", "de", "del", "den",
129 "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv", "dyu",
b75a7d8f 130 "dz", "ee", "efi", "egy", "eka", "el", "elx", "en",
374ca955 131 "enm", "eo", "es", "et", "eu", "ewo", "fa",
73c04bcf
A
132 "fan", "fat", "ff", "fi", "fil", "fiu", "fj", "fo", "fon",
133 "fr", "frm", "fro", "frr", "frs", "fur", "fy",
134 "ga", "gaa", "gay", "gba", "gd", "gem", "gez", "gil",
135 "gl", "gmh", "gn", "goh", "gon", "gor", "got", "grb",
136 "grc", "gsw", "gu", "gv", "gwi",
137 "ha", "hai", "haw", "he", "hi", "hil", "him",
374ca955 138 "hit", "hmn", "ho", "hr", "hsb", "ht", "hu", "hup", "hy", "hz",
b75a7d8f 139 "ia", "iba", "id", "ie", "ig", "ii", "ijo", "ik",
374ca955
A
140 "ilo", "inc", "ine", "inh", "io", "ira", "iro", "is", "it",
141 "iu", "ja", "jbo", "jpr", "jrb", "jv", "ka", "kaa", "kab",
73c04bcf 142 "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg", "kha", "khi",
b75a7d8f 143 "kho", "ki", "kj", "kk", "kl", "km", "kmb", "kn",
73c04bcf 144 "ko", "kok", "kos", "kpe", "kr", "krc", "krl", "kro", "kru", "ks",
b75a7d8f 145 "ku", "kum", "kut", "kv", "kw", "ky", "la", "lad",
374ca955 146 "lah", "lam", "lb", "lez", "lg", "li", "ln", "lo", "lol",
b75a7d8f
A
147 "loz", "lt", "lu", "lua", "lui", "lun", "luo", "lus",
148 "lv", "mad", "mag", "mai", "mak", "man", "map", "mas",
374ca955 149 "mdf", "mdr", "men", "mg", "mga", "mh", "mi", "mic", "min",
b75a7d8f
A
150 "mis", "mk", "mkh", "ml", "mn", "mnc", "mni", "mno",
151 "mo", "moh", "mos", "mr", "ms", "mt", "mul", "mun",
73c04bcf 152 "mus", "mwl", "mwr", "my", "myn", "myv", "na", "nah", "nai", "nap",
b75a7d8f 153 "nb", "nd", "nds", "ne", "new", "ng", "nia", "nic",
374ca955
A
154 "niu", "nl", "nn", "no", "nog", "non", "nr", "nso", "nub",
155 "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi", "oc", "oj",
b75a7d8f
A
156 "om", "or", "os", "osa", "ota", "oto", "pa", "paa",
157 "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",
158 "pi", "pl", "pon", "pra", "pro", "ps", "pt", "qu",
159 "raj", "rap", "rar", "rm", "rn", "ro", "roa", "rom",
73c04bcf
A
160 "ru", "rup", "rw", "sa", "sad", "sah", "sai", "sal", "sam",
161 "sas", "sat", "sc", "scn", "sco", "sd", "se", "sel", "sem",
b75a7d8f
A
162 "sg", "sga", "sgn", "shn", "si", "sid", "sio", "sit",
163 "sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn",
164 "sms", "sn", "snk", "so", "sog", "son", "sq", "sr",
73c04bcf 165 "srn", "srr", "ss", "ssa", "st", "su", "suk", "sus", "sux",
b75a7d8f
A
166 "sv", "sw", "syr", "ta", "tai", "te", "tem", "ter",
167 "tet", "tg", "th", "ti", "tig", "tiv", "tk", "tkl",
374ca955 168 "tl", "tlh", "tli", "tmh", "tn", "to", "tog", "tpi", "tr",
b75a7d8f 169 "ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw",
374ca955 170 "ty", "tyv", "udm", "ug", "uga", "uk", "umb", "und", "ur",
b75a7d8f 171 "uz", "vai", "ve", "vi", "vo", "vot", "wa", "wak",
374ca955 172 "wal", "war", "was", "wen", "wo", "xal", "xh", "yao", "yap",
b75a7d8f 173 "yi", "yo", "ypk", "za", "zap", "zen", "zh", "znd",
73c04bcf 174 "zu", "zun", "zxx",
b75a7d8f
A
175NULL,
176 "in", "iw", "ji", "jw", "sh", /* obsolete language codes */
177NULL
178};
73c04bcf
A
179static const char* const DEPRECATED_LANGUAGES[]={
180 "in", "iw", "ji", "jw", NULL, NULL
181};
182static const char* const REPLACEMENT_LANGUAGES[]={
183 "id", "he", "yi", "jv", NULL, NULL
184};
b75a7d8f 185
374ca955
A
186/**
187 * Table of 3-letter language codes.
188 *
189 * This is a lookup table used to convert 3-letter language codes to
190 * their 2-letter equivalent, where possible. It must be kept in sync
191 * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the
192 * same language as LANGUAGES_3[i]. The commented-out lines are
193 * copied from LANGUAGES to make eyeballing this baby easier.
194 *
195 * Where a 3-letter language code has no 2-letter equivalent, the
196 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
197 *
198 * This table should be terminated with a NULL entry, followed by a
199 * second list, and another NULL entry. The two lists correspond to
200 * the two lists in LANGUAGES.
201 */
202static const char * const LANGUAGES_3[] = {
203/* "aa", "ab", "ace", "ach", "ada", "ady", "ae", "af", "afa", */
204 "aar", "abk", "ace", "ach", "ada", "ady", "ave", "afr", "afa",
73c04bcf
A
205/* "afh", "ain", "ak", "akk", "ale", "alg", "alt", "am", "an", "ang", "anp", "apa", */
206 "afh", "ain", "aka", "akk", "ale", "alg", "alt", "amh", "arg", "ang", "anp", "apa",
b75a7d8f
A
207/* "ar", "arc", "arn", "arp", "art", "arw", "as", "ast", */
208 "ara", "arc", "arn", "arp", "art", "arw", "asm", "ast",
209/* "ath", "aus", "av", "awa", "ay", "az", "ba", "bad", */
210 "ath", "aus", "ava", "awa", "aym", "aze", "bak", "bad",
374ca955
A
211/* "bai", "bal", "ban", "bas", "bat", "be", "bej", */
212 "bai", "bal", "ban", "bas", "bat", "bel", "bej",
b75a7d8f
A
213/* "bem", "ber", "bg", "bh", "bho", "bi", "bik", "bin", */
214 "bem", "ber", "bul", "bih", "bho", "bis", "bik", "bin",
215/* "bla", "bm", "bn", "bnt", "bo", "br", "bra", "bs", */
374ca955
A
216 "bla", "bam", "ben", "bnt", "bod", "bre", "bra", "bos",
217/* "btk", "bua", "bug", "byn", "ca", "cad", "cai", "car", "cau", */
218 "btk", "bua", "bug", "byn", "cat", "cad", "cai", "car", "cau",
73c04bcf
A
219/* "cch", "ce", "ceb", "cel", "ch", "chb", "chg", "chk", "chm", */
220 "cch", "che", "ceb", "cel", "cha", "chb", "chg", "chk", "chm",
b75a7d8f
A
221/* "chn", "cho", "chp", "chr", "chy", "cmc", "co", "cop", */
222 "chn", "cho", "chp", "chr", "chy", "cmc", "cos", "cop",
374ca955
A
223/* "cpe", "cpf", "cpp", "cr", "crh", "crp", "cs", "csb", "cu", "cus", */
224 "cpe", "cpf", "cpp", "cre", "crh", "crp", "ces", "csb", "chu", "cus",
225/* "cv", "cy", "da", "dak", "dar", "day", "de", "del", "den", */
226 "chv", "cym", "dan", "dak", "dar", "day", "deu", "del", "den",
227/* "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv", "dyu", */
228 "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "div", "dyu",
b75a7d8f
A
229/* "dz", "ee", "efi", "egy", "eka", "el", "elx", "en", */
230 "dzo", "ewe", "efi", "egy", "eka", "ell", "elx", "eng",
374ca955
A
231/* "enm", "eo", "es", "et", "eu", "ewo", "fa", */
232 "enm", "epo", "spa", "est", "eus", "ewo", "fas",
73c04bcf
A
233/* "fan", "fat", "ff", "fi", "fil", "fiu", "fj", "fo", "fon", */
234 "fan", "fat", "ful", "fin", "fil", "fiu", "fij", "fao", "fon",
235/* "fr", "frm", "fro", "fur", "frr", "frs", "fy", "ga", "gaa", "gay", */
236 "fra", "frm", "fro", "fur", "frr", "frs", "fry", "gle", "gaa", "gay",
b75a7d8f
A
237/* "gba", "gd", "gem", "gez", "gil", "gl", "gmh", "gn", */
238 "gba", "gla", "gem", "gez", "gil", "glg", "gmh", "grn",
73c04bcf
A
239/* "goh", "gon", "gor", "got", "grb", "grc", "gsw", "gu", "gv", */
240 "goh", "gon", "gor", "got", "grb", "grc", "gsw", "guj", "glv",
b75a7d8f
A
241/* "gwi", "ha", "hai", "haw", "he", "hi", "hil", "him", */
242 "gwi", "hau", "hai", "haw", "heb", "hin", "hil", "him",
374ca955
A
243/* "hit", "hmn", "ho", "hr", "hsb", "ht", "hu", "hup", "hy", "hz", */
244 "hit", "hmn", "hmo", "hrv", "hsb", "hat", "hun", "hup", "hye", "her",
b75a7d8f
A
245/* "ia", "iba", "id", "ie", "ig", "ii", "ijo", "ik", */
246 "ina", "iba", "ind", "ile", "ibo", "iii", "ijo", "ipk",
374ca955
A
247/* "ilo", "inc", "ine", "inh", "io", "ira", "iro", "is", "it", */
248 "ilo", "inc", "ine", "inh", "ido", "ira", "iro", "isl", "ita",
249/* "iu", "ja", "jbo", "jpr", "jrb", "jv", "ka", "kaa", "kab", */
250 "iku", "jpn", "jbo", "jpr", "jrb", "jav", "kat", "kaa", "kab",
73c04bcf
A
251/* "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg", "kha", "khi",*/
252 "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg", "kha", "khi",
b75a7d8f
A
253/* "kho", "ki", "kj", "kk", "kl", "km", "kmb", "kn", */
254 "kho", "kik", "kua", "kaz", "kal", "khm", "kmb", "kan",
73c04bcf
A
255/* "ko", "kok", "kos", "kpe", "kr", "krc", "krl", "kro", "kru", "ks", */
256 "kor", "kok", "kos", "kpe", "kau", "krc", "krl", "kro", "kru", "kas",
b75a7d8f
A
257/* "ku", "kum", "kut", "kv", "kw", "ky", "la", "lad", */
258 "kur", "kum", "kut", "kom", "cor", "kir", "lat", "lad",
374ca955
A
259/* "lah", "lam", "lb", "lez", "lg", "li", "ln", "lo", "lol", */
260 "lah", "lam", "ltz", "lez", "lug", "lim", "lin", "lao", "lol",
b75a7d8f
A
261/* "loz", "lt", "lu", "lua", "lui", "lun", "luo", "lus", */
262 "loz", "lit", "lub", "lua", "lui", "lun", "luo", "lus",
263/* "lv", "mad", "mag", "mai", "mak", "man", "map", "mas", */
264 "lav", "mad", "mag", "mai", "mak", "man", "map", "mas",
374ca955
A
265/* "mdf", "mdr", "men", "mg", "mga", "mh", "mi", "mic", "min", */
266 "mdf", "mdr", "men", "mlg", "mga", "mah", "mri", "mic", "min",
b75a7d8f
A
267/* "mis", "mk", "mkh", "ml", "mn", "mnc", "mni", "mno", */
268 "mis", "mkd", "mkh", "mal", "mon", "mnc", "mni", "mno",
269/* "mo", "moh", "mos", "mr", "ms", "mt", "mul", "mun", */
270 "mol", "moh", "mos", "mar", "msa", "mlt", "mul", "mun",
73c04bcf
A
271/* "mus", "mwl", "mwr", "my", "myn", "myv", "na", "nah", "nai", "nap", */
272 "mus", "mwl", "mwr", "mya", "myn", "myv", "nau", "nah", "nai", "nap",
b75a7d8f
A
273/* "nb", "nd", "nds", "ne", "new", "ng", "nia", "nic", */
274 "nob", "nde", "nds", "nep", "new", "ndo", "nia", "nic",
374ca955
A
275/* "niu", "nl", "nn", "no", "nog", "non", "nr", "nso", "nub", */
276 "niu", "nld", "nno", "nor", "nog", "non", "nbl", "nso", "nub",
277/* "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi", "oc", "oj", */
278 "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi", "oci", "oji",
b75a7d8f
A
279/* "om", "or", "os", "osa", "ota", "oto", "pa", "paa", */
280 "orm", "ori", "oss", "osa", "ota", "oto", "pan", "paa",
281/* "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn", */
282 "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",
283/* "pi", "pl", "pon", "pra", "pro", "ps", "pt", "qu", */
284 "pli", "pol", "pon", "pra", "pro", "pus", "por", "que",
285/* "raj", "rap", "rar", "rm", "rn", "ro", "roa", "rom", */
286 "raj", "rap", "rar", "roh", "run", "ron", "roa", "rom",
73c04bcf
A
287/* "ru", "rup", "rw", "sa", "sad", "sah", "sai", "sal", "sam", */
288 "rus", "rup", "kin", "san", "sad", "sah", "sai", "sal", "sam",
289/* "sas", "sat", "sc", "scn", "sco", "sd", "se", "sel", "sem", */
290 "sas", "sat", "srd", "scn", "sco", "snd", "sme", "sel", "sem",
b75a7d8f
A
291/* "sg", "sga", "sgn", "shn", "si", "sid", "sio", "sit", */
292 "sag", "sga", "sgn", "shn", "sin", "sid", "sio", "sit",
293/* "sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn", */
294 "slk", "slv", "sla", "smo", "sma", "smi", "smj", "smn",
295/* "sms", "sn", "snk", "so", "sog", "son", "sq", "sr", */
296 "sms", "sna", "snk", "som", "sog", "son", "sqi", "srp",
73c04bcf
A
297/* "srn", "srr", "ss", "ssa", "st", "su", "suk", "sus", "sux", */
298 "srn", "srr", "ssw", "ssa", "sot", "sun", "suk", "sus", "sux",
b75a7d8f
A
299/* "sv", "sw", "syr", "ta", "tai", "te", "tem", "ter", */
300 "swe", "swa", "syr", "tam", "tai", "tel", "tem", "ter",
301/* "tet", "tg", "th", "ti", "tig", "tiv", "tk", "tkl", */
302 "tet", "tgk", "tha", "tir", "tig", "tiv", "tuk", "tkl",
374ca955
A
303/* "tl", "tlh", "tli", "tmh", "tn", "to", "tog", "tpi", "tr", */
304 "tgl", "tlh", "tli", "tmh", "tsn", "ton", "tog", "tpi", "tur",
b75a7d8f
A
305/* "ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw", */
306 "tso", "tsi", "tat", "tum", "tup", "tut", "tvl", "twi",
374ca955
A
307/* "ty", "tyv", "udm", "ug", "uga", "uk", "umb", "und", "ur", */
308 "tah", "tyv", "udm", "uig", "uga", "ukr", "umb", "und", "urd",
b75a7d8f
A
309/* "uz", "vai", "ve", "vi", "vo", "vot", "wa", "wak", */
310 "uzb", "vai", "ven", "vie", "vol", "vot", "wln", "wak",
374ca955
A
311/* "wal", "war", "was", "wen", "wo", "xal", "xh", "yao", "yap", */
312 "wal", "war", "was", "wen", "wol", "xal", "xho", "yao", "yap",
b75a7d8f
A
313/* "yi", "yo", "ypk", "za", "zap", "zen", "zh", "znd", */
314 "yid", "yor", "ypk", "zha", "zap", "zen", "zho", "znd",
315/* "zu", "zun", */
73c04bcf 316 "zul", "zun", "zxx",
b75a7d8f
A
317NULL,
318/* "in", "iw", "ji", "jw", "sh", */
319 "ind", "heb", "yid", "jaw", "srp",
320NULL
321};
322
374ca955
A
323/**
324 * Table of 2-letter country codes.
325 *
326 * This list must be in sorted order. This list is returned directly
327 * to the user by some API.
328 *
329 * This list must be kept in sync with COUNTRIES_3, with corresponding
330 * entries matched.
331 *
332 * This table should be terminated with a NULL entry, followed by a
333 * second list, and another NULL entry. The first list is visible to
334 * user code when this array is returned by API. The second list
335 * contains codes we support, but do not expose through user API.
336 *
337 * Notes:
338 *
339 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
340 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
341 * new codes keeping the old ones for compatibility updated to include
342 * 1999/12/03 revisions *CWB*
343 *
344 * RO(ROM) is now RO(ROU) according to
345 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
346 */
347static const char * const COUNTRIES[] = {
b75a7d8f 348 "AD", "AE", "AF", "AG", "AI", "AL", "AM", "AN",
73c04bcf 349 "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",
b75a7d8f
A
350 "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",
351 "BJ", "BM", "BN", "BO", "BR", "BS", "BT", "BV",
352 "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
353 "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR",
354 "CU", "CV", "CX", "CY", "CZ", "DE", "DJ", "DK",
355 "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER",
356 "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
73c04bcf 357 "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
b75a7d8f
A
358 "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
359 "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
73c04bcf
A
360 "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
361 "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
b75a7d8f
A
362 "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
363 "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
364 "LV", "LY", "MA", "MC", "MD", "MG", "MH", "MK",
365 "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",
366 "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
367 "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",
368 "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",
369 "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
370 "PW", "PY", "QA", "RE", "RO", "RU", "RW", "SA",
371 "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",
372 "SK", "SL", "SM", "SN", "SO", "SR", "ST", "SV",
373 "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ",
374 "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
375 "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
376 "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
73c04bcf 377 "WS", "YE", "YT", "YU", "ZA", "ZM", "ZW", "ZZ",
b75a7d8f
A
378NULL,
379 "FX", "RO", "TP", "ZR", /* obsolete country codes */
380NULL
381};
382
73c04bcf
A
383static const char* const DEPRECATED_COUNTRIES[] ={
384 "BU", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR", NULL, NULL /* deprecated country list */
385};
386static const char* const REPLACEMENT_COUNTRIES[] = {
387/* "BU", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR" */
388 "MM", "BJ", "FR", "BF", "VU", "ZW", "TL", "CS", "CD", NULL, NULL /* replacement country codes */
389};
390
374ca955
A
391/**
392 * Table of 3-letter country codes.
393 *
394 * This is a lookup table used to convert 3-letter country codes to
395 * their 2-letter equivalent. It must be kept in sync with COUNTRIES.
396 * For all valid i, COUNTRIES[i] must refer to the same country as
397 * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES
398 * to make eyeballing this baby easier.
399 *
400 * This table should be terminated with a NULL entry, followed by a
401 * second list, and another NULL entry. The two lists correspond to
402 * the two lists in COUNTRIES.
403 */
404static const char * const COUNTRIES_3[] = {
b75a7d8f
A
405/* "AD", "AE", "AF", "AG", "AI", "AL", "AM", "AN", */
406 "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM", "ANT",
73c04bcf
A
407/* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */
408 "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
b75a7d8f
A
409/* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */
410 "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
411/* "BJ", "BM", "BN", "BO", "BR", "BS", "BT", "BV", */
412 "BEN", "BMU", "BRN", "BOL", "BRA", "BHS", "BTN", "BVT",
413/* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */
414 "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
415/* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */
416 "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
417/* "CU", "CV", "CX", "CY", "CZ", "DE", "DJ", "DK", */
418 "CUB", "CPV", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
419/* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */
420 "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
421/* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
422 "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
73c04bcf
A
423/* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
424 "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
b75a7d8f
A
425/* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */
426 "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
427/* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
428 "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
73c04bcf
A
429/* ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
430 "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
431/* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
432 "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
b75a7d8f
A
433/* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
434 "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
435/* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */
436 "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
437/* "LV", "LY", "MA", "MC", "MD", "MG", "MH", "MK", */
438 "LVA", "LBY", "MAR", "MCO", "MDA", "MDG", "MHL", "MKD",
439/* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */
440 "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
441/* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */
442 "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
443/* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */
444 "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
445/* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */
446 "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
447/* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */
448 "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
449/* "PW", "PY", "QA", "RE", "RO", "RU", "RW", "SA", */
450 "PLW", "PRY", "QAT", "REU", "ROU", "RUS", "RWA", "SAU",
451/* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */
452 "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
453/* "SK", "SL", "SM", "SN", "SO", "SR", "ST", "SV", */
454 "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "STP", "SLV",
455/* "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", */
456 "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
457/* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */
458 "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
459/* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */
460 "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
461/* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
462 "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
73c04bcf
A
463/* "WS", "YE", "YT", "YU", "ZA", "ZM", "ZW", "ZZZ" */
464 "WSM", "YEM", "MYT", "YUG", "ZAF", "ZMB", "ZWE", "ZZZ",
b75a7d8f
A
465NULL,
466/* "FX", "RO", "TP", "ZR", */
467 "FXX", "ROM", "TMP", "ZAR",
468NULL
469};
470
374ca955
A
471typedef struct CanonicalizationMap {
472 const char *id; /* input ID */
473 const char *canonicalID; /* canonicalized output ID */
474 const char *keyword; /* keyword, or NULL if none */
475 const char *value; /* keyword value, or NULL if kw==NULL */
476} CanonicalizationMap;
477
478/**
479 * A map to canonicalize locale IDs. This handles a variety of
480 * different semantic kinds of transformations.
481 */
482static const CanonicalizationMap CANONICALIZE_MAP[] = {
483 { "", "en_US_POSIX", NULL, NULL }, /* .NET name */
484 { "C", "en_US_POSIX", NULL, NULL }, /* POSIX name */
73c04bcf 485 { "posix", "en_US_POSIX", NULL, NULL }, /* POSIX name (alias of C) */
374ca955
A
486 { "art_LOJBAN", "jbo", NULL, NULL }, /* registered name */
487 { "az_AZ_CYRL", "az_Cyrl_AZ", NULL, NULL }, /* .NET name */
488 { "az_AZ_LATN", "az_Latn_AZ", NULL, NULL }, /* .NET name */
489 { "ca_ES_PREEURO", "ca_ES", "currency", "ESP" },
490 { "cel_GAULISH", "cel__GAULISH", NULL, NULL }, /* registered name */
491 { "de_1901", "de__1901", NULL, NULL }, /* registered name */
492 { "de_1906", "de__1906", NULL, NULL }, /* registered name */
493 { "de__PHONEBOOK", "de", "collation", "phonebook" },
494 { "de_AT_PREEURO", "de_AT", "currency", "ATS" },
495 { "de_DE_PREEURO", "de_DE", "currency", "DEM" },
496 { "de_LU_PREEURO", "de_LU", "currency", "LUF" },
497 { "el_GR_PREEURO", "el_GR", "currency", "GRD" },
498 { "en_BOONT", "en__BOONT", NULL, NULL }, /* registered name */
499 { "en_SCOUSE", "en__SCOUSE", NULL, NULL }, /* registered name */
500 { "en_BE_PREEURO", "en_BE", "currency", "BEF" },
501 { "en_IE_PREEURO", "en_IE", "currency", "IEP" },
502 { "es__TRADITIONAL", "es", "collation", "traditional" },
503 { "es_ES_PREEURO", "es_ES", "currency", "ESP" },
504 { "eu_ES_PREEURO", "eu_ES", "currency", "ESP" },
505 { "fi_FI_PREEURO", "fi_FI", "currency", "FIM" },
506 { "fr_BE_PREEURO", "fr_BE", "currency", "BEF" },
507 { "fr_FR_PREEURO", "fr_FR", "currency", "FRF" },
508 { "fr_LU_PREEURO", "fr_LU", "currency", "LUF" },
509 { "ga_IE_PREEURO", "ga_IE", "currency", "IEP" },
510 { "gl_ES_PREEURO", "gl_ES", "currency", "ESP" },
511 { "hi__DIRECT", "hi", "collation", "direct" },
512 { "it_IT_PREEURO", "it_IT", "currency", "ITL" },
513 { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" },
514 { "nb_NO_NY", "nn_NO", NULL, NULL }, /* "markus said this was ok" :-) */
515 { "nl_BE_PREEURO", "nl_BE", "currency", "BEF" },
516 { "nl_NL_PREEURO", "nl_NL", "currency", "NLG" },
517 { "pt_PT_PREEURO", "pt_PT", "currency", "PTE" },
518 { "sl_ROZAJ", "sl__ROZAJ", NULL, NULL }, /* registered name */
73c04bcf
A
519 { "sr_SP_CYRL", "sr_Cyrl_CS", NULL, NULL }, /* .NET name */
520 { "sr_SP_LATN", "sr_Latn_CS", NULL, NULL }, /* .NET name */
521 { "sr_YU_CYRILLIC", "sr_Cyrl_CS", NULL, NULL }, /* Linux name */
522 { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL, NULL }, /* Linux name */
374ca955
A
523 { "uz_UZ_CYRL", "uz_Cyrl_UZ", NULL, NULL }, /* .NET name */
524 { "uz_UZ_LATN", "uz_Latn_UZ", NULL, NULL }, /* .NET name */
525 { "zh_CHS", "zh_Hans", NULL, NULL }, /* .NET name */
73c04bcf 526 { "zh_CHT", "zh_Hant", NULL, NULL }, /* .NET name TODO: This should be zh_Hant once the locale structure is fixed. */
374ca955
A
527 { "zh_GAN", "zh__GAN", NULL, NULL }, /* registered name */
528 { "zh_GUOYU", "zh", NULL, NULL }, /* registered name */
529 { "zh_HAKKA", "zh__HAKKA", NULL, NULL }, /* registered name */
530 { "zh_MIN", "zh__MIN", NULL, NULL }, /* registered name */
531 { "zh_MIN_NAN", "zh__MINNAN", NULL, NULL }, /* registered name */
532 { "zh_WUU", "zh__WUU", NULL, NULL }, /* registered name */
533 { "zh_XIANG", "zh__XIANG", NULL, NULL }, /* registered name */
534 { "zh_YUE", "zh__YUE", NULL, NULL }, /* registered name */
535 { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" },
73c04bcf 536 { "zh_TW_STROKE", "zh_Hant_TW", "collation", "stroke" },
374ca955
A
537 { "zh__PINYIN", "zh", "collation", "pinyin" }
538};
539
540/* ### Keywords **************************************************/
541
542#define ULOC_KEYWORD_BUFFER_LEN 25
543#define ULOC_MAX_NO_KEYWORDS 25
544
545static const char *
546locale_getKeywordsStart(const char *localeID) {
374ca955 547 const char *result = NULL;
374ca955
A
548 if((result = uprv_strchr(localeID, '@')) != NULL) {
549 return result;
73c04bcf
A
550 }
551#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
552 else {
553 /* We do this because the @ sign is variant, and the @ sign used on one
554 EBCDIC machine won't be compiled the same way on other EBCDIC based
555 machines. */
556 static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
374ca955
A
557 const uint8_t *charToFind = ebcdicSigns;
558 while(*charToFind) {
559 if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
560 return result;
561 }
562 charToFind++;
563 }
564 }
73c04bcf 565#endif
374ca955
A
566 return NULL;
567}
568
569/**
570 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
571 * @param keywordName incoming name to be canonicalized
572 * @param status return status (keyword too long)
573 * @return length of the keyword name
574 */
575static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
576{
577 int32_t i;
73c04bcf 578 int32_t keywordNameLen = (int32_t)uprv_strlen(keywordName);
374ca955
A
579
580 if(keywordNameLen >= ULOC_KEYWORD_BUFFER_LEN) {
581 /* keyword name too long for internal buffer */
582 *status = U_INTERNAL_PROGRAM_ERROR;
583 return 0;
584 }
585
586 /* normalize the keyword name */
587 for(i = 0; i < keywordNameLen; i++) {
588 buf[i] = uprv_tolower(keywordName[i]);
589 }
590 buf[i] = 0;
591
592 return keywordNameLen;
593}
594
595typedef struct {
596 char keyword[ULOC_KEYWORD_BUFFER_LEN];
597 int32_t keywordLen;
598 const char *valueStart;
599 int32_t valueLen;
600} KeywordStruct;
601
602static int32_t U_CALLCONV
603compareKeywordStructs(const void *context, const void *left, const void *right) {
604 const char* leftString = ((const KeywordStruct *)left)->keyword;
605 const char* rightString = ((const KeywordStruct *)right)->keyword;
606 return uprv_strcmp(leftString, rightString);
607}
608
609/**
610 * Both addKeyword and addValue must already be in canonical form.
611 * Either both addKeyword and addValue are NULL, or neither is NULL.
612 * If they are not NULL they must be zero terminated.
613 * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword.
614 */
615static int32_t
616_getKeywords(const char *localeID,
617 char prev,
618 char *keywords, int32_t keywordCapacity,
619 char *values, int32_t valuesCapacity, int32_t *valLen,
620 UBool valuesToo,
621 const char* addKeyword,
622 const char* addValue,
623 UErrorCode *status)
624{
625 KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
626
627 int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
628 int32_t numKeywords = 0;
629 const char* pos = localeID;
630 const char* equalSign = NULL;
631 const char* semicolon = NULL;
632 int32_t i = 0, j, n;
633 int32_t keywordsLen = 0;
634 int32_t valuesLen = 0;
635
636 if(prev == '@') { /* start of keyword definition */
637 /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
638 do {
639 UBool duplicate = FALSE;
640 /* skip leading spaces */
641 while(*pos == ' ') {
642 pos++;
643 }
644 if (!*pos) { /* handle trailing "; " */
645 break;
646 }
647 if(numKeywords == maxKeywords) {
648 *status = U_INTERNAL_PROGRAM_ERROR;
649 return 0;
650 }
651 equalSign = uprv_strchr(pos, '=');
652 semicolon = uprv_strchr(pos, ';');
653 /* lack of '=' [foo@currency] is illegal */
654 /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
655 if(!equalSign || (semicolon && semicolon<equalSign)) {
656 *status = U_INVALID_FORMAT_ERROR;
657 return 0;
658 }
659 /* need to normalize both keyword and keyword name */
660 if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
661 /* keyword name too long for internal buffer */
662 *status = U_INTERNAL_PROGRAM_ERROR;
663 return 0;
664 }
665 for(i = 0, n = 0; i < equalSign - pos; ++i) {
666 if (pos[i] != ' ') {
667 keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
668 }
669 }
670 keywordList[numKeywords].keyword[n] = 0;
671 keywordList[numKeywords].keywordLen = n;
672 /* now grab the value part. First we skip the '=' */
673 equalSign++;
674 /* then we leading spaces */
675 while(*equalSign == ' ') {
676 equalSign++;
677 }
678 keywordList[numKeywords].valueStart = equalSign;
679
680 pos = semicolon;
681 i = 0;
682 if(pos) {
683 while(*(pos - i - 1) == ' ') {
684 i++;
685 }
73c04bcf 686 keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
374ca955
A
687 pos++;
688 } else {
73c04bcf 689 i = (int32_t)uprv_strlen(equalSign);
374ca955
A
690 while(equalSign[i-1] == ' ') {
691 i--;
692 }
693 keywordList[numKeywords].valueLen = i;
694 }
695 /* If this is a duplicate keyword, then ignore it */
696 for (j=0; j<numKeywords; ++j) {
697 if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
698 duplicate = TRUE;
699 break;
700 }
701 }
702 if (!duplicate) {
703 ++numKeywords;
704 }
705 } while(pos);
706
707 /* Handle addKeyword/addValue. */
708 if (addKeyword != NULL) {
709 UBool duplicate = FALSE;
710 U_ASSERT(addValue != NULL);
711 /* Search for duplicate; if found, do nothing. Explicit keyword
712 overrides addKeyword. */
713 for (j=0; j<numKeywords; ++j) {
714 if (uprv_strcmp(keywordList[j].keyword, addKeyword) == 0) {
715 duplicate = TRUE;
716 break;
717 }
718 }
719 if (!duplicate) {
720 if (numKeywords == maxKeywords) {
721 *status = U_INTERNAL_PROGRAM_ERROR;
722 return 0;
723 }
724 uprv_strcpy(keywordList[numKeywords].keyword, addKeyword);
73c04bcf 725 keywordList[numKeywords].keywordLen = (int32_t)uprv_strlen(addKeyword);
374ca955 726 keywordList[numKeywords].valueStart = addValue;
73c04bcf 727 keywordList[numKeywords].valueLen = (int32_t)uprv_strlen(addValue);
374ca955
A
728 ++numKeywords;
729 }
730 } else {
731 U_ASSERT(addValue == NULL);
732 }
733
734 /* now we have a list of keywords */
735 /* we need to sort it */
736 uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
737
738 /* Now construct the keyword part */
739 for(i = 0; i < numKeywords; i++) {
740 if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) {
741 uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword);
742 if(valuesToo) {
743 keywords[keywordsLen + keywordList[i].keywordLen] = '=';
744 } else {
745 keywords[keywordsLen + keywordList[i].keywordLen] = 0;
746 }
747 }
748 keywordsLen += keywordList[i].keywordLen + 1;
749 if(valuesToo) {
750 if(keywordsLen + keywordList[i].valueLen < keywordCapacity) {
751 uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
752 }
753 keywordsLen += keywordList[i].valueLen;
754
755 if(i < numKeywords - 1) {
756 if(keywordsLen < keywordCapacity) {
757 keywords[keywordsLen] = ';';
758 }
759 keywordsLen++;
760 }
761 }
762 if(values) {
763 if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) {
764 uprv_strcpy(values+valuesLen, keywordList[i].valueStart);
765 values[valuesLen + keywordList[i].valueLen] = 0;
766 }
767 valuesLen += keywordList[i].valueLen + 1;
768 }
769 }
770 if(values) {
771 values[valuesLen] = 0;
772 if(valLen) {
773 *valLen = valuesLen;
774 }
775 }
776 return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);
777 } else {
778 return 0;
779 }
780}
781
782U_CFUNC int32_t
783locale_getKeywords(const char *localeID,
784 char prev,
785 char *keywords, int32_t keywordCapacity,
786 char *values, int32_t valuesCapacity, int32_t *valLen,
787 UBool valuesToo,
788 UErrorCode *status) {
789 return _getKeywords(localeID, prev, keywords, keywordCapacity,
790 values, valuesCapacity, valLen, valuesToo,
791 NULL, NULL, status);
792}
793
794U_CAPI int32_t U_EXPORT2
795uloc_getKeywordValue(const char* localeID,
796 const char* keywordName,
797 char* buffer, int32_t bufferCapacity,
798 UErrorCode* status)
799{
800 const char* nextSeparator = NULL;
374ca955
A
801 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
802 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
803 int32_t i = 0;
804 int32_t result = 0;
805
806 if(status && U_SUCCESS(*status) && localeID) {
807
808 const char* startSearchHere = uprv_strchr(localeID, '@'); /* TODO: REVISIT: shouldn't this be locale_getKeywordsStart ? */
809 if(startSearchHere == NULL) {
810 /* no keywords, return at once */
811 return 0;
812 }
813
73c04bcf 814 locale_canonKeywordName(keywordNameBuffer, keywordName, status);
374ca955
A
815 if(U_FAILURE(*status)) {
816 return 0;
817 }
818
819 /* find the first keyword */
820 while(startSearchHere) {
821 startSearchHere++;
822 /* skip leading spaces (allowed?) */
823 while(*startSearchHere == ' ') {
824 startSearchHere++;
825 }
826 nextSeparator = uprv_strchr(startSearchHere, '=');
827 /* need to normalize both keyword and keyword name */
828 if(!nextSeparator) {
829 break;
830 }
831 if(nextSeparator - startSearchHere >= ULOC_KEYWORD_BUFFER_LEN) {
832 /* keyword name too long for internal buffer */
833 *status = U_INTERNAL_PROGRAM_ERROR;
834 return 0;
835 }
836 for(i = 0; i < nextSeparator - startSearchHere; i++) {
837 localeKeywordNameBuffer[i] = uprv_tolower(startSearchHere[i]);
838 }
839 /* trim trailing spaces */
840 while(startSearchHere[i-1] == ' ') {
841 i--;
842 }
843 localeKeywordNameBuffer[i] = 0;
844
845 startSearchHere = uprv_strchr(nextSeparator, ';');
846
847 if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
848 nextSeparator++;
849 while(*nextSeparator == ' ') {
850 nextSeparator++;
851 }
852 /* we actually found the keyword. Copy the value */
853 if(startSearchHere && startSearchHere - nextSeparator < bufferCapacity) {
854 while(*(startSearchHere-1) == ' ') {
855 startSearchHere--;
856 }
857 uprv_strncpy(buffer, nextSeparator, startSearchHere - nextSeparator);
73c04bcf 858 result = u_terminateChars(buffer, bufferCapacity, (int32_t)(startSearchHere - nextSeparator), status);
374ca955 859 } else if(!startSearchHere && (int32_t)uprv_strlen(nextSeparator) < bufferCapacity) { /* last item in string */
73c04bcf 860 i = (int32_t)uprv_strlen(nextSeparator);
374ca955
A
861 while(nextSeparator[i - 1] == ' ') {
862 i--;
863 }
864 uprv_strncpy(buffer, nextSeparator, i);
865 result = u_terminateChars(buffer, bufferCapacity, i, status);
866 } else {
867 /* give a bigger buffer, please */
868 *status = U_BUFFER_OVERFLOW_ERROR;
869 if(startSearchHere) {
73c04bcf 870 result = (int32_t)(startSearchHere - nextSeparator);
374ca955 871 } else {
73c04bcf 872 result = (int32_t)uprv_strlen(nextSeparator);
374ca955
A
873 }
874 }
875 return result;
876 }
877 }
878 }
879 return 0;
880}
881
882U_CAPI int32_t U_EXPORT2
883uloc_setKeywordValue(const char* keywordName,
884 const char* keywordValue,
885 char* buffer, int32_t bufferCapacity,
886 UErrorCode* status)
887{
888 /* TODO: sorting. removal. */
889 int32_t keywordNameLen;
890 int32_t keywordValueLen;
891 int32_t bufLen;
892 int32_t needLen = 0;
893 int32_t foundValueLen;
894 int32_t keywordAtEnd = 0; /* is the keyword at the end of the string? */
895 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
896 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
897 int32_t i = 0;
898 int32_t rc;
899 char* nextSeparator = NULL;
900 char* nextEqualsign = NULL;
901 char* startSearchHere = NULL;
902 char* keywordStart = NULL;
903 char *insertHere = NULL;
904 if(U_FAILURE(*status)) {
905 return -1;
906 }
73c04bcf
A
907 if(bufferCapacity>1) {
908 bufLen = (int32_t)uprv_strlen(buffer);
909 } else {
910 *status = U_ILLEGAL_ARGUMENT_ERROR;
911 return 0;
912 }
913 if(bufferCapacity<bufLen) {
914 /* The capacity is less than the length?! Is this NULL terminated? */
915 *status = U_ILLEGAL_ARGUMENT_ERROR;
916 return 0;
917 }
374ca955
A
918 if(keywordValue && !*keywordValue) {
919 keywordValue = NULL;
920 }
921 if(keywordValue) {
73c04bcf 922 keywordValueLen = (int32_t)uprv_strlen(keywordValue);
374ca955
A
923 } else {
924 keywordValueLen = 0;
925 }
926 keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
927 if(U_FAILURE(*status)) {
928 return 0;
929 }
930 startSearchHere = (char*)locale_getKeywordsStart(buffer);
374ca955
A
931 if(startSearchHere == NULL || (startSearchHere[1]==0)) {
932 if(!keywordValue) { /* no keywords = nothing to remove */
933 return bufLen;
934 }
935
936 needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
937 if(startSearchHere) { /* had a single @ */
938 needLen--; /* already had the @ */
939 /* startSearchHere points at the @ */
940 } else {
941 startSearchHere=buffer+bufLen;
942 }
943 if(needLen >= bufferCapacity) {
944 *status = U_BUFFER_OVERFLOW_ERROR;
945 return needLen; /* no change */
946 }
947 *startSearchHere = '@';
948 startSearchHere++;
949 uprv_strcpy(startSearchHere, keywordNameBuffer);
950 startSearchHere += keywordNameLen;
951 *startSearchHere = '=';
952 startSearchHere++;
953 uprv_strcpy(startSearchHere, keywordValue);
954 startSearchHere+=keywordValueLen;
955 return needLen;
956 } /* end shortcut - no @ */
957
958 keywordStart = startSearchHere;
959 /* search for keyword */
960 while(keywordStart) {
961 keywordStart++;
962 /* skip leading spaces (allowed?) */
963 while(*keywordStart == ' ') {
964 keywordStart++;
965 }
966 nextEqualsign = uprv_strchr(keywordStart, '=');
967 /* need to normalize both keyword and keyword name */
968 if(!nextEqualsign) {
969 break;
970 }
971 if(nextEqualsign - keywordStart >= ULOC_KEYWORD_BUFFER_LEN) {
972 /* keyword name too long for internal buffer */
973 *status = U_INTERNAL_PROGRAM_ERROR;
974 return 0;
975 }
976 for(i = 0; i < nextEqualsign - keywordStart; i++) {
977 localeKeywordNameBuffer[i] = uprv_tolower(keywordStart[i]);
978 }
979 /* trim trailing spaces */
980 while(keywordStart[i-1] == ' ') {
981 i--;
982 }
983 localeKeywordNameBuffer[i] = 0;
984
985 nextSeparator = uprv_strchr(nextEqualsign, ';');
986 rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
987 if(rc == 0) {
988 nextEqualsign++;
989 while(*nextEqualsign == ' ') {
990 nextEqualsign++;
991 }
992 /* we actually found the keyword. Change the value */
993 if (nextSeparator) {
994 keywordAtEnd = 0;
73c04bcf 995 foundValueLen = (int32_t)(nextSeparator - nextEqualsign);
374ca955
A
996 } else {
997 keywordAtEnd = 1;
73c04bcf 998 foundValueLen = (int32_t)uprv_strlen(nextEqualsign);
374ca955
A
999 }
1000 if(keywordValue) { /* adding a value - not removing */
1001 if(foundValueLen == keywordValueLen) {
1002 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1003 return bufLen; /* no change in size */
1004 } else if(foundValueLen > keywordValueLen) {
1005 int32_t delta = foundValueLen - keywordValueLen;
1006 if(nextSeparator) { /* RH side */
1007 uprv_memmove(nextSeparator - delta, nextSeparator, bufLen-(nextSeparator-buffer));
1008 }
1009 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1010 bufLen -= delta;
1011 buffer[bufLen]=0;
1012 return bufLen;
1013 } else { /* FVL < KVL */
1014 int32_t delta = keywordValueLen - foundValueLen;
1015 if((bufLen+delta) >= bufferCapacity) {
1016 *status = U_BUFFER_OVERFLOW_ERROR;
1017 return bufLen+delta;
1018 }
1019 if(nextSeparator) { /* RH side */
1020 uprv_memmove(nextSeparator+delta,nextSeparator, bufLen-(nextSeparator-buffer));
1021 }
1022 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1023 bufLen += delta;
1024 buffer[bufLen]=0;
1025 return bufLen;
1026 }
1027 } else { /* removing a keyword */
1028 if(keywordAtEnd) {
1029 /* zero out the ';' or '@' just before startSearchhere */
1030 keywordStart[-1] = 0;
73c04bcf 1031 return (int32_t)((keywordStart-buffer)-1); /* (string length without keyword) minus separator */
374ca955
A
1032 } else {
1033 uprv_memmove(keywordStart, nextSeparator+1, bufLen-((nextSeparator+1)-buffer));
1034 keywordStart[bufLen-((nextSeparator+1)-buffer)]=0;
73c04bcf 1035 return (int32_t)(bufLen-((nextSeparator+1)-keywordStart));
374ca955
A
1036 }
1037 }
1038 } else if(rc<0){ /* end match keyword */
1039 /* could insert at this location. */
1040 insertHere = keywordStart;
1041 }
1042 keywordStart = nextSeparator;
1043 } /* end loop searching */
1044
1045 if(!keywordValue) {
1046 return bufLen; /* removal of non-extant keyword - no change */
1047 }
1048
1049 /* we know there is at least one keyword. */
1050 needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
1051 if(needLen >= bufferCapacity) {
1052 *status = U_BUFFER_OVERFLOW_ERROR;
1053 return needLen; /* no change */
1054 }
1055
1056 if(insertHere) {
1057 uprv_memmove(insertHere+(1+keywordNameLen+1+keywordValueLen), insertHere, bufLen-(insertHere-buffer));
1058 keywordStart = insertHere;
1059 } else {
1060 keywordStart = buffer+bufLen;
1061 *keywordStart = ';';
1062 keywordStart++;
1063 }
1064 uprv_strncpy(keywordStart, keywordNameBuffer, keywordNameLen);
1065 keywordStart += keywordNameLen;
1066 *keywordStart = '=';
1067 keywordStart++;
1068 uprv_strncpy(keywordStart, keywordValue, keywordValueLen); /* terminates. */
1069 keywordStart+=keywordValueLen;
1070 if(insertHere) {
1071 *keywordStart = ';';
1072 keywordStart++;
1073 }
1074 buffer[needLen]=0;
1075 return needLen;
1076}
b75a7d8f 1077
374ca955 1078/* ### ID parsing implementation **************************************************/
b75a7d8f
A
1079
1080/*returns TRUE if a is an ID separator FALSE otherwise*/
1081#define _isIDSeparator(a) (a == '_' || a == '-')
1082
1083#define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
374ca955 1084
b75a7d8f
A
1085/*returns TRUE if one of the special prefixes is here (s=string)
1086 'x-' or 'i-' */
1087#define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1088
1089/* Dot terminates it because of POSIX form where dot precedes the codepage
1090 * except for variant
1091 */
1092#define _isTerminator(a) ((a==0)||(a=='.')||(a=='@'))
1093
374ca955
A
1094static char* _strnchr(const char* str, int32_t len, char c) {
1095 U_ASSERT(str != 0 && len >= 0);
1096 while (len-- != 0) {
1097 char d = *str;
1098 if (d == c) {
1099 return (char*) str;
1100 } else if (d == 0) {
1101 break;
1102 }
1103 ++str;
1104 }
1105 return NULL;
1106}
1107
1108/**
1109 * Lookup 'key' in the array 'list'. The array 'list' should contain
1110 * a NULL entry, followed by more entries, and a second NULL entry.
1111 *
1112 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1113 * COUNTRIES_3.
1114 */
b75a7d8f
A
1115static int16_t _findIndex(const char* const* list, const char* key)
1116{
1117 const char* const* anchor = list;
374ca955
A
1118 int32_t pass = 0;
1119
1120 /* Make two passes through two NULL-terminated arrays at 'list' */
1121 while (pass++ < 2) {
1122 while (*list) {
1123 if (uprv_strcmp(key, *list) == 0) {
1124 return (int16_t)(list - anchor);
1125 }
1126 list++;
b75a7d8f 1127 }
374ca955 1128 ++list; /* skip final NULL *CWB*/
b75a7d8f
A
1129 }
1130 return -1;
1131}
1132
1133/* count the length of src while copying it to dest; return strlen(src) */
1134static U_INLINE int32_t
1135_copyCount(char *dest, int32_t destCapacity, const char *src) {
1136 const char *anchor;
1137 char c;
1138
1139 anchor=src;
1140 for(;;) {
1141 if((c=*src)==0) {
1142 return (int32_t)(src-anchor);
1143 }
1144 if(destCapacity<=0) {
1145 return (int32_t)((src-anchor)+uprv_strlen(src));
1146 }
1147 ++src;
1148 *dest++=c;
1149 --destCapacity;
1150 }
1151}
1152
73c04bcf
A
1153static const char*
1154uloc_getCurrentCountryID(const char* oldID){
1155 int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
1156 if (offset >= 0) {
1157 return REPLACEMENT_COUNTRIES[offset];
1158 }
1159 return oldID;
1160}
1161static const char*
1162uloc_getCurrentLanguageID(const char* oldID){
1163 int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
1164 if (offset >= 0) {
1165 return REPLACEMENT_LANGUAGES[offset];
1166 }
1167 return oldID;
1168}
b75a7d8f
A
1169/*
1170 * the internal functions _getLanguage(), _getCountry(), _getVariant()
1171 * avoid duplicating code to handle the earlier locale ID pieces
1172 * in the functions for the later ones by
1173 * setting the *pEnd pointer to where they stopped parsing
1174 *
1175 * TODO try to use this in Locale
1176 */
1177static int32_t
1178_getLanguage(const char *localeID,
1179 char *language, int32_t languageCapacity,
1180 const char **pEnd) {
1181 int32_t i=0;
1182 int32_t offset;
1183 char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
1184
1185 /* if it starts with i- or x- then copy that prefix */
1186 if(_isIDPrefix(localeID)) {
1187 if(i<languageCapacity) {
1188 language[i]=(char)uprv_tolower(*localeID);
1189 }
1190 if(i<languageCapacity) {
1191 language[i+1]='-';
1192 }
1193 i+=2;
1194 localeID+=2;
1195 }
1196
1197 /* copy the language as far as possible and count its length */
1198 while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
1199 if(i<languageCapacity) {
1200 language[i]=(char)uprv_tolower(*localeID);
1201 }
1202 if(i<3) {
1203 lang[i]=(char)uprv_tolower(*localeID);
1204 }
1205 i++;
1206 localeID++;
1207 }
1208
1209 if(i==3) {
1210 /* convert 3 character code to 2 character code if possible *CWB*/
374ca955 1211 offset=_findIndex(LANGUAGES_3, lang);
b75a7d8f 1212 if(offset>=0) {
374ca955 1213 i=_copyCount(language, languageCapacity, LANGUAGES[offset]);
b75a7d8f
A
1214 }
1215 }
1216
1217 if(pEnd!=NULL) {
1218 *pEnd=localeID;
1219 }
1220 return i;
1221}
1222
374ca955
A
1223static int32_t
1224_getScript(const char *localeID,
1225 char *script, int32_t scriptCapacity,
1226 const char **pEnd)
b75a7d8f 1227{
374ca955 1228 int32_t idLen = 0;
b75a7d8f 1229
374ca955
A
1230 if (pEnd != NULL) {
1231 *pEnd = localeID;
b75a7d8f 1232 }
374ca955
A
1233
1234 /* copy the second item as far as possible and count its length */
1235 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
1236 idLen++;
b75a7d8f
A
1237 }
1238
374ca955
A
1239 /* If it's exactly 4 characters long, then it's a script and not a country. */
1240 if (idLen == 4) {
1241 int32_t i;
1242 if (pEnd != NULL) {
1243 *pEnd = localeID+idLen;
1244 }
1245 if(idLen > scriptCapacity) {
1246 idLen = scriptCapacity;
1247 }
1248 if (idLen >= 1) {
1249 script[0]=(char)uprv_toupper(*(localeID++));
1250 }
1251 for (i = 1; i < idLen; i++) {
1252 script[i]=(char)uprv_tolower(*(localeID++));
1253 }
1254 }
1255 else {
1256 idLen = 0;
1257 }
1258 return idLen;
b75a7d8f
A
1259}
1260
1261static int32_t
1262_getCountry(const char *localeID,
1263 char *country, int32_t countryCapacity,
374ca955
A
1264 const char **pEnd)
1265{
b75a7d8f 1266 int32_t i=0;
374ca955 1267 char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 };
b75a7d8f
A
1268 int32_t offset;
1269
1270 /* copy the country as far as possible and count its length */
1271 while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
1272 if(i<countryCapacity) {
1273 country[i]=(char)uprv_toupper(*localeID);
1274 }
374ca955 1275 if(i<(ULOC_COUNTRY_CAPACITY-1)) { /*CWB*/
b75a7d8f
A
1276 cnty[i]=(char)uprv_toupper(*localeID);
1277 }
1278 i++;
1279 localeID++;
1280 }
1281
1282 /* convert 3 character code to 2 character code if possible *CWB*/
1283 if(i==3) {
374ca955 1284 offset=_findIndex(COUNTRIES_3, cnty);
b75a7d8f 1285 if(offset>=0) {
374ca955 1286 i=_copyCount(country, countryCapacity, COUNTRIES[offset]);
b75a7d8f
A
1287 }
1288 }
1289
1290 if(pEnd!=NULL) {
1291 *pEnd=localeID;
1292 }
1293 return i;
1294}
1295
374ca955
A
1296/**
1297 * @param needSeparator if true, then add leading '_' if any variants
1298 * are added to 'variant'
1299 */
1300static int32_t
1301_getVariantEx(const char *localeID,
1302 char prev,
1303 char *variant, int32_t variantCapacity,
1304 UBool needSeparator) {
b75a7d8f
A
1305 int32_t i=0;
1306
1307 /* get one or more variant tags and separate them with '_' */
1308 if(_isIDSeparator(prev)) {
1309 /* get a variant string after a '-' or '_' */
1310 while(!_isTerminator(*localeID)) {
374ca955
A
1311 if (needSeparator) {
1312 if (i<variantCapacity) {
1313 variant[i] = '_';
1314 }
1315 ++i;
1316 needSeparator = FALSE;
1317 }
b75a7d8f
A
1318 if(i<variantCapacity) {
1319 variant[i]=(char)uprv_toupper(*localeID);
1320 if(variant[i]=='-') {
1321 variant[i]='_';
1322 }
1323 }
1324 i++;
1325 localeID++;
1326 }
1327 }
1328
1329 /* if there is no variant tag after a '-' or '_' then look for '@' */
1330 if(i==0) {
1331 if(prev=='@') {
1332 /* keep localeID */
374ca955 1333 } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
b75a7d8f
A
1334 ++localeID; /* point after the '@' */
1335 } else {
1336 return 0;
1337 }
1338 while(!_isTerminator(*localeID)) {
374ca955
A
1339 if (needSeparator) {
1340 if (i<variantCapacity) {
1341 variant[i] = '_';
1342 }
1343 ++i;
1344 needSeparator = FALSE;
1345 }
b75a7d8f
A
1346 if(i<variantCapacity) {
1347 variant[i]=(char)uprv_toupper(*localeID);
1348 if(variant[i]=='-' || variant[i]==',') {
1349 variant[i]='_';
1350 }
1351 }
1352 i++;
1353 localeID++;
1354 }
1355 }
374ca955 1356
b75a7d8f
A
1357 return i;
1358}
1359
374ca955
A
1360static int32_t
1361_getVariant(const char *localeID,
1362 char prev,
1363 char *variant, int32_t variantCapacity) {
1364 return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE);
1365}
1366
1367/**
1368 * Delete ALL instances of a variant from the given list of one or
1369 * more variants. Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".
1370 * @param variants the source string of one or more variants,
1371 * separated by '_'. This will be MODIFIED IN PLACE. Not zero
1372 * terminated; if it is, trailing zero will NOT be maintained.
1373 * @param variantsLen length of variants
1374 * @param toDelete variant to delete, without separators, e.g. "EURO"
1375 * or "PREEURO"; not zero terminated
1376 * @param toDeleteLen length of toDelete
1377 * @return number of characters deleted from variants
1378 */
1379static int32_t
1380_deleteVariant(char* variants, int32_t variantsLen,
1381 const char* toDelete, int32_t toDeleteLen) {
1382 int32_t delta = 0; /* number of chars deleted */
1383 for (;;) {
1384 UBool flag = FALSE;
1385 if (variantsLen < toDeleteLen) {
1386 return delta;
1387 }
1388 if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 &&
1389 (variantsLen == toDeleteLen ||
1390 (flag=(variants[toDeleteLen] == '_')))) {
1391 int32_t d = toDeleteLen + (flag?1:0);
1392 variantsLen -= d;
1393 delta += d;
1394 uprv_memmove(variants, variants+d, variantsLen);
1395 } else {
1396 char* p = _strnchr(variants, variantsLen, '_');
1397 if (p == NULL) {
1398 return delta;
1399 }
1400 ++p;
73c04bcf 1401 variantsLen -= (int32_t)(p - variants);
374ca955
A
1402 variants = p;
1403 }
1404 }
1405}
1406
1407/* Keyword enumeration */
1408
1409typedef struct UKeywordsContext {
1410 char* keywords;
1411 char* current;
1412} UKeywordsContext;
1413
1414static void U_CALLCONV
1415uloc_kw_closeKeywords(UEnumeration *enumerator) {
1416 uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
1417 uprv_free(enumerator->context);
1418 uprv_free(enumerator);
1419}
1420
1421static int32_t U_CALLCONV
1422uloc_kw_countKeywords(UEnumeration *en, UErrorCode *status) {
1423 char *kw = ((UKeywordsContext *)en->context)->keywords;
1424 int32_t result = 0;
1425 while(*kw) {
1426 result++;
1427 kw += uprv_strlen(kw)+1;
1428 }
1429 return result;
1430}
1431
1432static const char* U_CALLCONV
1433uloc_kw_nextKeyword(UEnumeration* en,
1434 int32_t* resultLength,
1435 UErrorCode* status) {
1436 const char* result = ((UKeywordsContext *)en->context)->current;
1437 int32_t len = 0;
1438 if(*result) {
73c04bcf 1439 len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
374ca955
A
1440 ((UKeywordsContext *)en->context)->current += len+1;
1441 } else {
1442 result = NULL;
1443 }
1444 if (resultLength) {
1445 *resultLength = len;
1446 }
1447 return result;
1448}
1449
1450static void U_CALLCONV
1451uloc_kw_resetKeywords(UEnumeration* en,
1452 UErrorCode* status) {
1453 ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
1454}
1455
1456static const UEnumeration gKeywordsEnum = {
1457 NULL,
1458 NULL,
1459 uloc_kw_closeKeywords,
1460 uloc_kw_countKeywords,
1461 uenum_unextDefault,
1462 uloc_kw_nextKeyword,
1463 uloc_kw_resetKeywords
1464};
1465
1466U_CAPI UEnumeration* U_EXPORT2
1467uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
b75a7d8f 1468{
374ca955
A
1469 UKeywordsContext *myContext = NULL;
1470 UEnumeration *result = NULL;
b75a7d8f 1471
374ca955
A
1472 if(U_FAILURE(*status)) {
1473 return NULL;
1474 }
1475 result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
1476 uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration));
1477 myContext = uprv_malloc(sizeof(UKeywordsContext));
1478 if (myContext == NULL) {
1479 *status = U_MEMORY_ALLOCATION_ERROR;
1480 uprv_free(result);
1481 return NULL;
1482 }
1483 myContext->keywords = (char *)uprv_malloc(keywordListSize+1);
1484 uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
1485 myContext->keywords[keywordListSize] = 0;
1486 myContext->current = myContext->keywords;
1487 result->context = myContext;
1488 return result;
1489}
1490
1491U_CAPI UEnumeration* U_EXPORT2
1492uloc_openKeywords(const char* localeID,
1493 UErrorCode* status)
1494{
1495 int32_t i=0;
1496 char keywords[256];
1497 int32_t keywordsCapacity = 256;
1498 if(status==NULL || U_FAILURE(*status)) {
b75a7d8f
A
1499 return 0;
1500 }
1501
1502 if(localeID==NULL) {
1503 localeID=uloc_getDefault();
1504 }
1505
374ca955 1506 /* Skip the language */
b75a7d8f
A
1507 _getLanguage(localeID, NULL, 0, &localeID);
1508 if(_isIDSeparator(*localeID)) {
374ca955
A
1509 const char *scriptID;
1510 /* Skip the script if available */
1511 _getScript(localeID+1, NULL, 0, &scriptID);
1512 if(scriptID != localeID+1) {
1513 /* Found optional script */
1514 localeID = scriptID;
1515 }
1516 /* Skip the Country */
1517 if (_isIDSeparator(*localeID)) {
1518 _getCountry(localeID+1, NULL, 0, &localeID);
1519 if(_isIDSeparator(*localeID)) {
1520 _getVariant(localeID+1, *localeID, NULL, 0);
1521 }
b75a7d8f
A
1522 }
1523 }
1524
374ca955
A
1525 /* keywords are located after '@' */
1526 if((localeID = locale_getKeywordsStart(localeID)) != NULL) {
1527 i=locale_getKeywords(localeID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status);
1528 }
1529
1530 if(i) {
1531 return uloc_openKeywordList(keywords, i, status);
1532 } else {
1533 return NULL;
b75a7d8f 1534 }
b75a7d8f
A
1535}
1536
b75a7d8f 1537
374ca955
A
1538/* bit-flags for 'options' parameter of _canonicalize */
1539#define _ULOC_STRIP_KEYWORDS 0x2
1540#define _ULOC_CANONICALIZE 0x1
1541
1542#define OPTION_SET(options, mask) ((options & mask) != 0)
1543
73c04bcf
A
1544static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
1545#define I_DEFAULT_LENGTH (sizeof i_default / sizeof i_default[0])
1546
374ca955
A
1547/**
1548 * Canonicalize the given localeID, to level 1 or to level 2,
1549 * depending on the options. To specify level 1, pass in options=0.
1550 * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1551 *
1552 * This is the code underlying uloc_getName and uloc_canonicalize.
1553 */
1554static int32_t
1555_canonicalize(const char* localeID,
1556 char* result,
1557 int32_t resultCapacity,
1558 uint32_t options,
1559 UErrorCode* err) {
1560 int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity;
1561 char localeBuffer[ULOC_FULLNAME_CAPACITY];
73c04bcf 1562 const char* origLocaleID = localeID;
374ca955
A
1563 const char* keywordAssign = NULL;
1564 const char* separatorIndicator = NULL;
1565 const char* addKeyword = NULL;
1566 const char* addValue = NULL;
1567 char* name;
1568 char* variant = NULL; /* pointer into name, or NULL */
1569 int32_t sawEuro = 0;
1570
1571 if (U_FAILURE(*err)) {
b75a7d8f
A
1572 return 0;
1573 }
1574
374ca955 1575 if (localeID==NULL) {
b75a7d8f
A
1576 localeID=uloc_getDefault();
1577 }
1578
374ca955
A
1579 /* if we are doing a full canonicalization, then put results in
1580 localeBuffer, if necessary; otherwise send them to result. */
1581 if (OPTION_SET(options, _ULOC_CANONICALIZE) &&
1582 (result == NULL || resultCapacity < sizeof(localeBuffer))) {
1583 name = localeBuffer;
1584 nameCapacity = sizeof(localeBuffer);
1585 } else {
1586 name = result;
1587 nameCapacity = resultCapacity;
1588 }
1589
b75a7d8f 1590 /* get all pieces, one after another, and separate with '_' */
374ca955 1591 len=_getLanguage(localeID, name, nameCapacity, &localeID);
73c04bcf
A
1592
1593 if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) {
1594 const char *d = uloc_getDefault();
1595
1596 len = uprv_strlen(d);
1597
1598 if (name != NULL) {
1599 uprv_strncpy(name, d, len);
1600 }
1601 } else if(_isIDSeparator(*localeID)) {
374ca955
A
1602 const char *scriptID;
1603
b75a7d8f 1604 ++fieldCount;
374ca955
A
1605 if(len<nameCapacity) {
1606 name[len]='_';
b75a7d8f 1607 }
374ca955
A
1608 ++len;
1609
1610 scriptSize=_getScript(localeID+1, name+len, nameCapacity-len, &scriptID);
1611 if(scriptSize > 0) {
1612 /* Found optional script */
1613 localeID = scriptID;
b75a7d8f 1614 ++fieldCount;
374ca955
A
1615 len+=scriptSize;
1616 if (_isIDSeparator(*localeID)) {
1617 /* If there is something else, then we add the _ */
1618 if(len<nameCapacity) {
1619 name[len]='_';
1620 }
1621 ++len;
1622 }
1623 }
1624
1625 if (_isIDSeparator(*localeID)) {
1626 len+=_getCountry(localeID+1, name+len, nameCapacity-len, &localeID);
1627 if(_isIDSeparator(*localeID)) {
1628 ++fieldCount;
1629 if(len<nameCapacity) {
1630 name[len]='_';
1631 }
1632 ++len;
1633 variantSize = _getVariant(localeID+1, *localeID, name+len, nameCapacity-len);
1634 if (variantSize > 0) {
1635 variant = name+len;
1636 len += variantSize;
1637 localeID += variantSize + 1; /* skip '_' and variant */
1638 }
b75a7d8f 1639 }
b75a7d8f
A
1640 }
1641 }
1642
374ca955
A
1643 /* Copy POSIX-style charset specifier, if any [mr.utf8] */
1644 if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *localeID == '.') {
1645 UBool done = FALSE;
b75a7d8f 1646 do {
374ca955
A
1647 char c = *localeID;
1648 switch (c) {
1649 case 0:
1650 case '@':
1651 done = TRUE;
1652 break;
1653 default:
1654 if (len<nameCapacity) {
1655 name[len] = c;
1656 }
1657 ++len;
1658 ++localeID;
1659 break;
1660 }
1661 } while (!done);
1662 }
1663
1664 /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
1665 After this, localeID either points to '@' or is NULL */
1666 if ((localeID=locale_getKeywordsStart(localeID))!=NULL) {
1667 keywordAssign = uprv_strchr(localeID, '=');
1668 separatorIndicator = uprv_strchr(localeID, ';');
1669 }
1670
1671 /* Copy POSIX-style variant, if any [mr@FOO] */
1672 if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
1673 localeID != NULL && keywordAssign == NULL) {
1674 for (;;) {
1675 char c = *localeID;
1676 if (c == 0) {
1677 break;
1678 }
1679 if (len<nameCapacity) {
1680 name[len] = c;
1681 }
1682 ++len;
1683 ++localeID;
1684 }
1685 }
1686
1687 if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
1688 /* Handle @FOO variant if @ is present and not followed by = */
1689 if (localeID!=NULL && keywordAssign==NULL) {
1690 int32_t posixVariantSize;
1691 /* Add missing '_' if needed */
1692 if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
1693 do {
1694 if(len<nameCapacity) {
1695 name[len]='_';
1696 }
1697 ++len;
1698 ++fieldCount;
1699 } while(fieldCount<2);
1700 }
1701 posixVariantSize = _getVariantEx(localeID+1, '@', name+len, nameCapacity-len,
1702 (UBool)(variantSize > 0));
1703 if (posixVariantSize > 0) {
1704 if (variant == NULL) {
1705 variant = name+len;
1706 }
1707 len += posixVariantSize;
1708 variantSize += posixVariantSize;
b75a7d8f 1709 }
374ca955
A
1710 }
1711
1712 /* Check for EURO variants. */
1713 sawEuro = _deleteVariant(variant, variantSize, "EURO", 4);
1714 len -= sawEuro;
1715 if (sawEuro > 0 && name[len-1] == '_') { /* delete trailing '_' */
1716 --len;
1717 }
1718
1719 /* Look up the ID in the canonicalization map */
1720 for (j=0; j<(int32_t)(sizeof(CANONICALIZE_MAP)/sizeof(CANONICALIZE_MAP[0])); j++) {
1721 const char* id = CANONICALIZE_MAP[j].id;
73c04bcf 1722 int32_t n = (int32_t)uprv_strlen(id);
374ca955
A
1723 if (len == n && uprv_strncmp(name, id, n) == 0) {
1724 if (n == 0 && localeID != NULL) {
1725 break; /* Don't remap "" if keywords present */
1726 }
1727 len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID);
1728 addKeyword = CANONICALIZE_MAP[j].keyword;
1729 addValue = CANONICALIZE_MAP[j].value;
1730 break;
1731 }
1732 }
1733
1734 /* Explicit EURO variant overrides keyword in CANONICALIZE_MAP */
1735 if (sawEuro > 0) {
1736 addKeyword = "currency";
1737 addValue = "EUR";
1738 }
1739 }
1740
1741 if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
1742 if (localeID!=NULL && keywordAssign!=NULL &&
1743 (!separatorIndicator || separatorIndicator > keywordAssign)) {
1744 if(len<nameCapacity) {
1745 name[len]='@';
1746 }
1747 ++len;
b75a7d8f 1748 ++fieldCount;
374ca955
A
1749 len += _getKeywords(localeID+1, '@', name+len, nameCapacity-len, NULL, 0, NULL, TRUE,
1750 addKeyword, addValue, err);
1751 } else if (addKeyword != NULL) {
1752 U_ASSERT(addValue != NULL);
1753 /* inelegant but works -- later make _getKeywords do this? */
1754 len += _copyCount(name+len, nameCapacity-len, "@");
1755 len += _copyCount(name+len, nameCapacity-len, addKeyword);
1756 len += _copyCount(name+len, nameCapacity-len, "=");
1757 len += _copyCount(name+len, nameCapacity-len, addValue);
1758 }
1759 }
1760
1761 if (U_SUCCESS(*err) && name == localeBuffer) {
1762 uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);
1763 }
1764
1765 return u_terminateChars(result, resultCapacity, len, err);
1766}
1767
1768/* ### ID parsing API **************************************************/
1769
1770U_CAPI int32_t U_EXPORT2
1771uloc_getParent(const char* localeID,
1772 char* parent,
1773 int32_t parentCapacity,
1774 UErrorCode* err)
1775{
1776 const char *lastUnderscore;
1777 int32_t i;
1778
1779 if (U_FAILURE(*err))
1780 return 0;
1781
1782 if (localeID == NULL)
1783 localeID = uloc_getDefault();
1784
1785 lastUnderscore=uprv_strrchr(localeID, '_');
1786 if(lastUnderscore!=NULL) {
1787 i=(int32_t)(lastUnderscore-localeID);
1788 } else {
1789 i=0;
b75a7d8f 1790 }
374ca955 1791
73c04bcf 1792 if(i>0 && parent != localeID) {
374ca955
A
1793 uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
1794 }
1795 return u_terminateChars(parent, parentCapacity, i, err);
b75a7d8f 1796}
374ca955
A
1797
1798U_CAPI int32_t U_EXPORT2
1799uloc_getLanguage(const char* localeID,
1800 char* language,
1801 int32_t languageCapacity,
1802 UErrorCode* err)
1803{
1804 /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
1805 int32_t i=0;
1806
1807 if (err==NULL || U_FAILURE(*err)) {
1808 return 0;
1809 }
1810
1811 if(localeID==NULL) {
1812 localeID=uloc_getDefault();
1813 }
1814
1815 i=_getLanguage(localeID, language, languageCapacity, NULL);
1816 return u_terminateChars(language, languageCapacity, i, err);
1817}
1818
1819U_CAPI int32_t U_EXPORT2
1820uloc_getScript(const char* localeID,
1821 char* script,
1822 int32_t scriptCapacity,
1823 UErrorCode* err)
1824{
1825 int32_t i=0;
1826
1827 if(err==NULL || U_FAILURE(*err)) {
1828 return 0;
1829 }
1830
1831 if(localeID==NULL) {
1832 localeID=uloc_getDefault();
1833 }
1834
1835 /* skip the language */
1836 _getLanguage(localeID, NULL, 0, &localeID);
1837 if(_isIDSeparator(*localeID)) {
1838 i=_getScript(localeID+1, script, scriptCapacity, NULL);
1839 }
1840 return u_terminateChars(script, scriptCapacity, i, err);
1841}
1842
1843U_CAPI int32_t U_EXPORT2
1844uloc_getCountry(const char* localeID,
1845 char* country,
1846 int32_t countryCapacity,
1847 UErrorCode* err)
1848{
1849 int32_t i=0;
1850
1851 if(err==NULL || U_FAILURE(*err)) {
1852 return 0;
1853 }
1854
1855 if(localeID==NULL) {
1856 localeID=uloc_getDefault();
1857 }
1858
1859 /* Skip the language */
1860 _getLanguage(localeID, NULL, 0, &localeID);
1861 if(_isIDSeparator(*localeID)) {
1862 const char *scriptID;
1863 /* Skip the script if available */
1864 _getScript(localeID+1, NULL, 0, &scriptID);
1865 if(scriptID != localeID+1) {
1866 /* Found optional script */
1867 localeID = scriptID;
1868 }
1869 if(_isIDSeparator(*localeID)) {
1870 i=_getCountry(localeID+1, country, countryCapacity, NULL);
1871 }
1872 }
1873 return u_terminateChars(country, countryCapacity, i, err);
1874}
1875
1876U_CAPI int32_t U_EXPORT2
1877uloc_getVariant(const char* localeID,
1878 char* variant,
1879 int32_t variantCapacity,
1880 UErrorCode* err)
1881{
1882 int32_t i=0;
374ca955
A
1883
1884 if(err==NULL || U_FAILURE(*err)) {
1885 return 0;
1886 }
1887
1888 if(localeID==NULL) {
1889 localeID=uloc_getDefault();
1890 }
1891
1892 /* Skip the language */
1893 _getLanguage(localeID, NULL, 0, &localeID);
1894 if(_isIDSeparator(*localeID)) {
1895 const char *scriptID;
1896 /* Skip the script if available */
1897 _getScript(localeID+1, NULL, 0, &scriptID);
1898 if(scriptID != localeID+1) {
1899 /* Found optional script */
1900 localeID = scriptID;
1901 }
1902 /* Skip the Country */
1903 if (_isIDSeparator(*localeID)) {
1904 _getCountry(localeID+1, NULL, 0, &localeID);
1905 if(_isIDSeparator(*localeID)) {
374ca955
A
1906 i=_getVariant(localeID+1, *localeID, variant, variantCapacity);
1907 }
1908 }
1909 }
1910
1911 /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
1912 /* if we do not have a variant tag yet then try a POSIX variant after '@' */
1913/*
1914 if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
1915 i=_getVariant(localeID+1, '@', variant, variantCapacity);
1916 }
1917*/
1918 return u_terminateChars(variant, variantCapacity, i, err);
1919}
1920
1921U_CAPI int32_t U_EXPORT2
1922uloc_getName(const char* localeID,
1923 char* name,
1924 int32_t nameCapacity,
1925 UErrorCode* err)
1926{
1927 return _canonicalize(localeID, name, nameCapacity, 0, err);
1928}
1929
1930U_CAPI int32_t U_EXPORT2
1931uloc_getBaseName(const char* localeID,
1932 char* name,
1933 int32_t nameCapacity,
1934 UErrorCode* err)
1935{
1936 return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err);
1937}
1938
1939U_CAPI int32_t U_EXPORT2
1940uloc_canonicalize(const char* localeID,
1941 char* name,
1942 int32_t nameCapacity,
1943 UErrorCode* err)
1944{
1945 return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err);
1946}
1947
b75a7d8f
A
1948U_CAPI const char* U_EXPORT2
1949uloc_getISO3Language(const char* localeID)
1950{
374ca955
A
1951 int16_t offset;
1952 char lang[ULOC_LANG_CAPACITY];
1953 UErrorCode err = U_ZERO_ERROR;
1954
1955 if (localeID == NULL)
1956 {
1957 localeID = uloc_getDefault();
1958 }
1959 uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
1960 if (U_FAILURE(err))
1961 return "";
1962 offset = _findIndex(LANGUAGES, lang);
1963 if (offset < 0)
1964 return "";
1965 return LANGUAGES_3[offset];
b75a7d8f
A
1966}
1967
1968U_CAPI const char* U_EXPORT2
1969uloc_getISO3Country(const char* localeID)
1970{
1971 int16_t offset;
374ca955 1972 char cntry[ULOC_LANG_CAPACITY];
b75a7d8f
A
1973 UErrorCode err = U_ZERO_ERROR;
1974
1975 if (localeID == NULL)
1976 {
1977 localeID = uloc_getDefault();
1978 }
374ca955 1979 uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
b75a7d8f
A
1980 if (U_FAILURE(err))
1981 return "";
374ca955 1982 offset = _findIndex(COUNTRIES, cntry);
b75a7d8f
A
1983 if (offset < 0)
1984 return "";
1985
374ca955 1986 return COUNTRIES_3[offset];
b75a7d8f
A
1987}
1988
1989U_CAPI uint32_t U_EXPORT2
1990uloc_getLCID(const char* localeID)
1991{
374ca955
A
1992 UErrorCode status = U_ZERO_ERROR;
1993 char langID[ULOC_FULLNAME_CAPACITY];
1994
1995 uloc_getLanguage(localeID, langID, sizeof(langID), &status);
1996 if (U_FAILURE(status)) {
1997 return 0;
b75a7d8f 1998 }
374ca955
A
1999
2000 return uprv_convertToLCID(langID, localeID, &status);
2001}
2002
73c04bcf
A
2003U_CAPI int32_t U_EXPORT2
2004uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
2005 UErrorCode *status)
2006{
2007 int32_t length;
2008 const char *posix = uprv_convertToPosix(hostid, status);
2009 if (U_FAILURE(*status) || posix == NULL) {
2010 return 0;
2011 }
2012 length = (int32_t)uprv_strlen(posix);
2013 if (length+1 > localeCapacity) {
2014 *status = U_BUFFER_OVERFLOW_ERROR;
2015 }
2016 else {
2017 uprv_strcpy(locale, posix);
2018 }
2019 return length;
2020}
2021
374ca955
A
2022/* ### Default locale **************************************************/
2023
2024U_CAPI const char* U_EXPORT2
2025uloc_getDefault()
2026{
2027 return locale_get_default();
2028}
2029
2030U_CAPI void U_EXPORT2
2031uloc_setDefault(const char* newDefaultLocale,
2032 UErrorCode* err)
2033{
2034 if (U_FAILURE(*err))
2035 return;
2036 /* the error code isn't currently used for anything by this function*/
b75a7d8f 2037
374ca955
A
2038 /* propagate change to C++ */
2039 locale_set_default(newDefaultLocale);
b75a7d8f
A
2040}
2041
374ca955
A
2042/* ### Display name **************************************************/
2043
b75a7d8f
A
2044/*
2045 * Lookup a resource bundle table item with fallback on the table level.
2046 * Regular resource bundle lookups perform fallback to parent locale bundles
2047 * and eventually the root bundle, but only for top-level items.
2048 * This function takes the name of a top-level table and of an item in that table
2049 * and performs a lookup of both, falling back until a bundle contains a table
2050 * with this item.
2051 *
2052 * Note: Only the opening of entire bundles falls back through the default locale
2053 * before root. Once a bundle is open, item lookups do not go through the
2054 * default locale because that would result in a mix of languages that is
2055 * unpredictable to the programmer and most likely useless.
2056 */
2057static const UChar *
2058_res_getTableStringWithFallback(const char *path, const char *locale,
374ca955
A
2059 const char *tableKey, const char *subTableKey,
2060 const char *itemKey,
b75a7d8f
A
2061 int32_t *pLength,
2062 UErrorCode *pErrorCode)
2063{
73c04bcf
A
2064/* char localeBuffer[ULOC_FULLNAME_CAPACITY*4];*/
2065 UResourceBundle *rb=NULL, table, subTable;
2066 const UChar *item=NULL;
b75a7d8f 2067 UErrorCode errorCode;
374ca955 2068 char explicitFallbackName[ULOC_FULLNAME_CAPACITY] = {0};
73c04bcf
A
2069
2070 /*
2071 * open the bundle for the current locale
2072 * this falls back through the locale's chain to root
2073 */
2074 errorCode=U_ZERO_ERROR;
2075 rb=ures_open(path, locale, &errorCode);
2076 if(U_FAILURE(errorCode)) {
2077 /* total failure, not even root could be opened */
2078 *pErrorCode=errorCode;
2079 return NULL;
2080 } else if(errorCode==U_USING_DEFAULT_WARNING ||
2081 (errorCode==U_USING_FALLBACK_WARNING && *pErrorCode!=U_USING_DEFAULT_WARNING)
2082 ) {
2083 /* set the "strongest" error code (success->fallback->default->failure) */
2084 *pErrorCode=errorCode;
2085 }
2086
2087 for(;;){
b75a7d8f 2088 ures_initStackObject(&table);
73c04bcf
A
2089 ures_initStackObject(&subTable);
2090 ures_getByKeyWithFallback(rb, tableKey, &table, &errorCode);
2091 if (subTableKey != NULL) {
2092 /*
2093 ures_getByKeyWithFallback(&table,subTableKey, &subTable, &errorCode);
2094 item = ures_getStringByKeyWithFallback(&subTable, itemKey, pLength, &errorCode);
2095 if(U_FAILURE(errorCode)){
2096 *pErrorCode = errorCode;
2097 }
2098
2099 break;*/
2100
2101 ures_getByKeyWithFallback(&table,subTableKey, &table, &errorCode);
b75a7d8f 2102 }
374ca955 2103 if(U_SUCCESS(errorCode)){
73c04bcf
A
2104 item = ures_getStringByKeyWithFallback(&table, itemKey, pLength, &errorCode);
2105 if(U_FAILURE(errorCode)){
2106 const char* replacement = NULL;
2107 *pErrorCode = errorCode; /*save the errorCode*/
2108 errorCode = U_ZERO_ERROR;
2109 /* may be a deprecated code */
2110 if(uprv_strcmp(tableKey, "Countries")==0){
2111 replacement = uloc_getCurrentCountryID(itemKey);
2112 }else if(uprv_strcmp(tableKey, "Languages")==0){
2113 replacement = uloc_getCurrentLanguageID(itemKey);
374ca955 2114 }
73c04bcf
A
2115 /*pointer comparison is ok since uloc_getCurrentCountryID & uloc_getCurrentLanguageID return the key itself is replacement is not found*/
2116 if(replacement!=NULL && itemKey != replacement){
2117 item = ures_getStringByKeyWithFallback(&table, replacement, pLength, &errorCode);
2118 if(U_SUCCESS(errorCode)){
2119 *pErrorCode = errorCode;
2120 break;
2121 }
2122 }
2123 }else{
2124 break;
b75a7d8f 2125 }
b75a7d8f 2126 }
73c04bcf
A
2127
2128 if(U_FAILURE(errorCode)){
b75a7d8f 2129
73c04bcf
A
2130 /* still can't figure out ?.. try the fallback mechanism */
2131 int32_t len = 0;
2132 const UChar* fallbackLocale = NULL;
2133 *pErrorCode = errorCode;
2134 errorCode = U_ZERO_ERROR;
b75a7d8f 2135
73c04bcf
A
2136 fallbackLocale = ures_getStringByKeyWithFallback(&table, "Fallback", &len, &errorCode);
2137 if(U_FAILURE(errorCode)){
2138 *pErrorCode = errorCode;
2139 break;
2140 }
2141
2142 u_UCharsToChars(fallbackLocale, explicitFallbackName, len);
2143
2144 /* guard against recursive fallback */
2145 if(uprv_strcmp(explicitFallbackName, locale)==0){
2146 *pErrorCode = U_INTERNAL_PROGRAM_ERROR;
2147 break;
2148 }
b75a7d8f 2149 ures_close(rb);
73c04bcf
A
2150 rb = ures_open(NULL, explicitFallbackName, &errorCode);
2151 if(U_FAILURE(errorCode)){
2152 *pErrorCode = errorCode;
2153 break;
374ca955 2154 }
73c04bcf
A
2155 /* succeeded in opening the fallback bundle .. continue and try to fetch the item */
2156 }else{
2157 break;
374ca955 2158 }
b75a7d8f 2159 }
73c04bcf
A
2160 /* done with the locale string - ready to close table and rb */
2161 ures_close(&subTable);
2162 ures_close(&table);
2163 ures_close(rb);
2164 return item;
b75a7d8f
A
2165}
2166
2167static int32_t
2168_getStringOrCopyKey(const char *path, const char *locale,
374ca955
A
2169 const char *tableKey,
2170 const char* subTableKey,
2171 const char *itemKey,
b75a7d8f
A
2172 const char *substitute,
2173 UChar *dest, int32_t destCapacity,
2174 UErrorCode *pErrorCode) {
374ca955 2175 const UChar *s = NULL;
73c04bcf 2176 int32_t length = 0;
b75a7d8f
A
2177
2178 if(itemKey==NULL) {
2179 /* top-level item: normal resource bundle access */
2180 UResourceBundle *rb;
2181
2182 rb=ures_open(path, locale, pErrorCode);
2183 if(U_SUCCESS(*pErrorCode)) {
2184 s=ures_getStringByKey(rb, tableKey, &length, pErrorCode);
2185 /* see comment about closing rb near "return item;" in _res_getTableStringWithFallback() */
2186 ures_close(rb);
2187 }
2188 } else {
2189 /* second-level item, use special fallback */
2190 s=_res_getTableStringWithFallback(path, locale,
374ca955
A
2191 tableKey,
2192 subTableKey,
2193 itemKey,
b75a7d8f
A
2194 &length,
2195 pErrorCode);
2196 }
2197 if(U_SUCCESS(*pErrorCode)) {
2198 int32_t copyLength=uprv_min(length, destCapacity);
374ca955 2199 if(copyLength>0 && s != NULL) {
b75a7d8f
A
2200 u_memcpy(dest, s, copyLength);
2201 }
2202 } else {
2203 /* no string from a resource bundle: convert the substitute */
2204 length=(int32_t)uprv_strlen(substitute);
2205 u_charsToUChars(substitute, dest, uprv_min(length, destCapacity));
374ca955
A
2206 *pErrorCode=U_USING_DEFAULT_WARNING;
2207 }
2208
2209 return u_terminateUChars(dest, destCapacity, length, pErrorCode);
2210}
2211
2212static int32_t
2213_getDisplayNameForComponent(const char *locale,
2214 const char *displayLocale,
2215 UChar *dest, int32_t destCapacity,
2216 int32_t (*getter)(const char *, char *, int32_t, UErrorCode *),
2217 const char *tag,
2218 UErrorCode *pErrorCode) {
2219 char localeBuffer[ULOC_FULLNAME_CAPACITY*4];
b75a7d8f 2220 int32_t length;
374ca955 2221 UErrorCode localStatus;
b75a7d8f
A
2222
2223 /* argument checking */
2224 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
2225 return 0;
2226 }
2227
2228 if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
2229 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2230 return 0;
2231 }
2232
374ca955
A
2233 localStatus = U_ZERO_ERROR;
2234 length=(*getter)(locale, localeBuffer, sizeof(localeBuffer), &localStatus);
2235 if(U_FAILURE(localStatus) || localStatus==U_STRING_NOT_TERMINATED_WARNING) {
b75a7d8f
A
2236 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2237 return 0;
2238 }
2239 if(length==0) {
2240 return u_terminateUChars(dest, destCapacity, 0, pErrorCode);
2241 }
2242
b75a7d8f 2243 return _getStringOrCopyKey(NULL, displayLocale,
374ca955
A
2244 tag, NULL, localeBuffer,
2245 localeBuffer,
b75a7d8f
A
2246 dest, destCapacity,
2247 pErrorCode);
2248}
2249
374ca955
A
2250U_CAPI int32_t U_EXPORT2
2251uloc_getDisplayLanguage(const char *locale,
2252 const char *displayLocale,
2253 UChar *dest, int32_t destCapacity,
2254 UErrorCode *pErrorCode) {
2255 return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
2256 uloc_getLanguage, _kLanguages, pErrorCode);
2257}
2258
2259U_CAPI int32_t U_EXPORT2
2260uloc_getDisplayScript(const char* locale,
2261 const char* displayLocale,
2262 UChar *dest, int32_t destCapacity,
2263 UErrorCode *pErrorCode)
2264{
2265 return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
2266 uloc_getScript, _kScripts, pErrorCode);
2267}
2268
2269U_CAPI int32_t U_EXPORT2
2270uloc_getDisplayCountry(const char *locale,
2271 const char *displayLocale,
2272 UChar *dest, int32_t destCapacity,
2273 UErrorCode *pErrorCode) {
2274 return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
2275 uloc_getCountry, _kCountries, pErrorCode);
2276}
2277
2278/*
2279 * TODO separate variant1_variant2_variant3...
2280 * by getting each tag's display string and concatenating them with ", "
2281 * in between - similar to uloc_getDisplayName()
2282 */
2283U_CAPI int32_t U_EXPORT2
2284uloc_getDisplayVariant(const char *locale,
2285 const char *displayLocale,
2286 UChar *dest, int32_t destCapacity,
2287 UErrorCode *pErrorCode) {
2288 return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
2289 uloc_getVariant, _kVariants, pErrorCode);
2290}
2291
b75a7d8f
A
2292U_CAPI int32_t U_EXPORT2
2293uloc_getDisplayName(const char *locale,
2294 const char *displayLocale,
2295 UChar *dest, int32_t destCapacity,
374ca955
A
2296 UErrorCode *pErrorCode)
2297{
2298 int32_t length, length2, length3 = 0;
2299 UBool hasLanguage, hasScript, hasCountry, hasVariant, hasKeywords;
2300 UEnumeration* keywordEnum = NULL;
2301 int32_t keywordCount = 0;
2302 const char *keyword = NULL;
2303 int32_t keywordLen = 0;
2304 char keywordValue[256];
2305 int32_t keywordValueLen = 0;
b75a7d8f
A
2306
2307 /* argument checking */
2308 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
2309 return 0;
2310 }
2311
2312 if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
2313 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2314 return 0;
2315 }
2316
2317 /*
2318 * if there is a language, then write "language (country, variant)"
2319 * otherwise write "country, variant"
2320 */
2321
2322 /* write the language */
2323 length=uloc_getDisplayLanguage(locale, displayLocale,
2324 dest, destCapacity,
2325 pErrorCode);
2326 hasLanguage= length>0;
2327
2328 if(hasLanguage) {
2329 /* append " (" */
2330 if(length<destCapacity) {
2331 dest[length]=0x20;
2332 }
2333 ++length;
2334 if(length<destCapacity) {
2335 dest[length]=0x28;
2336 }
2337 ++length;
2338 }
2339
2340 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2341 /* keep preflighting */
2342 *pErrorCode=U_ZERO_ERROR;
2343 }
2344
374ca955
A
2345 /* append the script */
2346 if(length<destCapacity) {
2347 length2=uloc_getDisplayScript(locale, displayLocale,
2348 dest+length, destCapacity-length,
2349 pErrorCode);
2350 } else {
2351 length2=uloc_getDisplayScript(locale, displayLocale,
2352 NULL, 0,
2353 pErrorCode);
2354 }
2355 hasScript= length2>0;
2356 length+=length2;
2357
2358 if(hasScript) {
2359 /* append ", " */
2360 if(length<destCapacity) {
2361 dest[length]=0x2c;
2362 }
2363 ++length;
2364 if(length<destCapacity) {
2365 dest[length]=0x20;
2366 }
2367 ++length;
2368 }
2369
2370 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2371 /* keep preflighting */
2372 *pErrorCode=U_ZERO_ERROR;
2373 }
2374
b75a7d8f
A
2375 /* append the country */
2376 if(length<destCapacity) {
2377 length2=uloc_getDisplayCountry(locale, displayLocale,
2378 dest+length, destCapacity-length,
2379 pErrorCode);
2380 } else {
2381 length2=uloc_getDisplayCountry(locale, displayLocale,
2382 NULL, 0,
2383 pErrorCode);
2384 }
2385 hasCountry= length2>0;
2386 length+=length2;
2387
2388 if(hasCountry) {
2389 /* append ", " */
2390 if(length<destCapacity) {
2391 dest[length]=0x2c;
2392 }
2393 ++length;
2394 if(length<destCapacity) {
2395 dest[length]=0x20;
2396 }
2397 ++length;
2398 }
2399
2400 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2401 /* keep preflighting */
2402 *pErrorCode=U_ZERO_ERROR;
2403 }
2404
2405 /* append the variant */
2406 if(length<destCapacity) {
2407 length2=uloc_getDisplayVariant(locale, displayLocale,
2408 dest+length, destCapacity-length,
2409 pErrorCode);
2410 } else {
2411 length2=uloc_getDisplayVariant(locale, displayLocale,
2412 NULL, 0,
2413 pErrorCode);
2414 }
2415 hasVariant= length2>0;
2416 length+=length2;
2417
374ca955
A
2418 if(hasVariant) {
2419 /* append ", " */
2420 if(length<destCapacity) {
2421 dest[length]=0x2c;
2422 }
2423 ++length;
2424 if(length<destCapacity) {
2425 dest[length]=0x20;
2426 }
2427 ++length;
b75a7d8f
A
2428 }
2429
374ca955
A
2430 keywordEnum = uloc_openKeywords(locale, pErrorCode);
2431
2432 for(keywordCount = uenum_count(keywordEnum, pErrorCode); keywordCount > 0 ; keywordCount--){
2433 if(U_FAILURE(*pErrorCode)){
2434 break;
2435 }
2436 /* the uenum_next returns NUL terminated string */
2437 keyword = uenum_next(keywordEnum, &keywordLen, pErrorCode);
2438 if(length + length3 < destCapacity) {
2439 length3 += uloc_getDisplayKeyword(keyword, displayLocale, dest+length+length3, destCapacity-length-length3, pErrorCode);
2440 } else {
2441 length3 += uloc_getDisplayKeyword(keyword, displayLocale, NULL, 0, pErrorCode);
2442 }
2443 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2444 /* keep preflighting */
2445 *pErrorCode=U_ZERO_ERROR;
2446 }
2447 keywordValueLen = uloc_getKeywordValue(locale, keyword, keywordValue, 256, pErrorCode);
2448 if(keywordValueLen) {
2449 if(length + length3 < destCapacity) {
2450 dest[length + length3] = 0x3D;
b75a7d8f 2451 }
374ca955
A
2452 length3++;
2453 if(length + length3 < destCapacity) {
2454 length3 += uloc_getDisplayKeywordValue(locale, keyword, displayLocale, dest+length+length3, destCapacity-length-length3, pErrorCode);
2455 } else {
2456 length3 += uloc_getDisplayKeywordValue(locale, keyword, displayLocale, NULL, 0, pErrorCode);
2457 }
2458 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2459 /* keep preflighting */
2460 *pErrorCode=U_ZERO_ERROR;
2461 }
2462 }
2463 if(keywordCount > 1) {
2464 if(length + length3 + 1 < destCapacity && keywordCount) {
2465 dest[length + length3]=0x2c;
2466 dest[length + length3+1]=0x20;
2467 }
2468 length3++; /* ',' */
2469 length3++; /* ' ' */
2470 }
2471 }
2472 uenum_close(keywordEnum);
2473
2474 hasKeywords = length3 > 0;
2475 length += length3;
2476
2477
2478
2479 if ((hasScript && !hasCountry)
2480 || ((hasScript || hasCountry) && !hasVariant && !hasKeywords)
2481 || ((hasScript || hasCountry || hasVariant) && !hasKeywords)
2482 || (hasLanguage && !hasScript && !hasCountry && !hasVariant && !hasKeywords))
2483 {
2484 /* remove ", " or " (" */
2485 length-=2;
2486 }
2487
2488 if (hasLanguage && (hasScript || hasCountry || hasVariant || hasKeywords)) {
2489 /* append ")" */
2490 if(length<destCapacity) {
2491 dest[length]=0x29;
b75a7d8f 2492 }
374ca955 2493 ++length;
b75a7d8f
A
2494 }
2495
2496 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2497 /* keep preflighting */
2498 *pErrorCode=U_ZERO_ERROR;
2499 }
2500
2501 return u_terminateUChars(dest, destCapacity, length, pErrorCode);
2502}
2503
374ca955
A
2504U_CAPI int32_t U_EXPORT2
2505uloc_getDisplayKeyword(const char* keyword,
2506 const char* displayLocale,
2507 UChar* dest,
2508 int32_t destCapacity,
2509 UErrorCode* status){
2510
2511 /* argument checking */
2512 if(status==NULL || U_FAILURE(*status)) {
2513 return 0;
2514 }
2515
2516 if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
2517 *status=U_ILLEGAL_ARGUMENT_ERROR;
2518 return 0;
2519 }
2520
2521
2522 /* pass itemKey=NULL to look for a top-level item */
2523 return _getStringOrCopyKey(NULL, displayLocale,
2524 _kKeys, NULL,
2525 keyword,
2526 keyword,
2527 dest, destCapacity,
2528 status);
b75a7d8f
A
2529
2530}
2531
374ca955
A
2532
2533#define UCURRENCY_DISPLAY_NAME_INDEX 1
2534
2535U_CAPI int32_t U_EXPORT2
2536uloc_getDisplayKeywordValue( const char* locale,
2537 const char* keyword,
2538 const char* displayLocale,
2539 UChar* dest,
2540 int32_t destCapacity,
2541 UErrorCode* status){
2542
2543
2544 char keywordValue[ULOC_FULLNAME_CAPACITY*4];
2545 int32_t capacity = ULOC_FULLNAME_CAPACITY*4;
2546 int32_t keywordValueLen =0;
2547
2548 /* argument checking */
2549 if(status==NULL || U_FAILURE(*status)) {
2550 return 0;
2551 }
2552
2553 if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
2554 *status=U_ILLEGAL_ARGUMENT_ERROR;
2555 return 0;
2556 }
2557
2558 /* get the keyword value */
2559 keywordValue[0]=0;
2560 keywordValueLen = uloc_getKeywordValue(locale, keyword, keywordValue, capacity, status);
2561
2562 /*
2563 * if the keyword is equal to currency .. then to get the display name
2564 * we need to do the fallback ourselves
2565 */
2566 if(uprv_stricmp(keyword, _kCurrency)==0){
2567
2568 int32_t dispNameLen = 0;
2569 const UChar *dispName = NULL;
2570
2571 UResourceBundle *bundle = ures_open(NULL, displayLocale, status);
2572 UResourceBundle *currencies = ures_getByKey(bundle, _kCurrencies, NULL, status);
2573 UResourceBundle *currency = ures_getByKeyWithFallback(currencies, keywordValue, NULL, status);
2574
2575 dispName = ures_getStringByIndex(currency, UCURRENCY_DISPLAY_NAME_INDEX, &dispNameLen, status);
2576
2577 /*close the bundles */
2578 ures_close(currency);
2579 ures_close(currencies);
2580 ures_close(bundle);
2581
2582 if(U_FAILURE(*status)){
2583 if(*status == U_MISSING_RESOURCE_ERROR){
2584 /* we just want to write the value over if nothing is available */
2585 *status = U_USING_DEFAULT_WARNING;
2586 }else{
2587 return 0;
2588 }
2589 }
2590
2591 /* now copy the dispName over if not NULL */
2592 if(dispName != NULL){
2593 if(dispNameLen <= destCapacity){
2594 uprv_memcpy(dest, dispName, dispNameLen * U_SIZEOF_UCHAR);
2595 return u_terminateUChars(dest, destCapacity, dispNameLen, status);
2596 }else{
2597 *status = U_BUFFER_OVERFLOW_ERROR;
2598 return dispNameLen;
2599 }
2600 }else{
2601 /* we have not found the display name for the value .. just copy over */
2602 if(keywordValueLen <= destCapacity){
2603 u_charsToUChars(keywordValue, dest, keywordValueLen);
2604 return u_terminateUChars(dest, destCapacity, keywordValueLen, status);
2605 }else{
2606 *status = U_BUFFER_OVERFLOW_ERROR;
2607 return keywordValueLen;
2608 }
2609 }
2610
2611
2612 }else{
2613
2614 return _getStringOrCopyKey(NULL, displayLocale,
2615 _kTypes, keyword,
2616 keywordValue,
2617 keywordValue,
2618 dest, destCapacity,
2619 status);
2620 }
b75a7d8f
A
2621}
2622
374ca955
A
2623/* ### Get available **************************************************/
2624
2625static UBool U_CALLCONV uloc_cleanup(void) {
b75a7d8f
A
2626 char ** temp;
2627
2628 if (_installedLocales) {
2629 temp = _installedLocales;
2630 _installedLocales = NULL;
2631
2632 _installedLocalesCount = 0;
2633
2634 uprv_free(temp);
2635 }
2636 return TRUE;
2637}
2638
2639static void _load_installedLocales()
2640{
2641 UBool localesLoaded;
2642
2643 umtx_lock(NULL);
2644 localesLoaded = _installedLocales != NULL;
2645 umtx_unlock(NULL);
2646
2647 if (localesLoaded == FALSE) {
2648 UResourceBundle *index = NULL;
2649 UResourceBundle installed;
2650 UErrorCode status = U_ZERO_ERROR;
2651 char ** temp;
2652 int32_t i = 0;
2653 int32_t localeCount;
2654
2655 ures_initStackObject(&installed);
2656 index = ures_openDirect(NULL, _kIndexLocaleName, &status);
2657 ures_getByKey(index, _kIndexTag, &installed, &status);
2658
2659 if(U_SUCCESS(status)) {
2660 localeCount = ures_getSize(&installed);
2661 temp = (char **) uprv_malloc(sizeof(char*) * (localeCount+1));
2662
2663 ures_resetIterator(&installed);
2664 while(ures_hasNext(&installed)) {
2665 ures_getNextString(&installed, NULL, (const char **)&temp[i++], &status);
2666 }
2667 temp[i] = NULL;
2668
2669 umtx_lock(NULL);
2670 if (_installedLocales == NULL)
2671 {
2672 _installedLocales = temp;
2673 _installedLocalesCount = localeCount;
2674 temp = NULL;
374ca955 2675 ucln_common_registerCleanup(UCLN_COMMON_ULOC, uloc_cleanup);
b75a7d8f
A
2676 }
2677 umtx_unlock(NULL);
2678
2679 uprv_free(temp);
2680 ures_close(&installed);
2681 }
2682 ures_close(index);
2683 }
2684}
2685
374ca955
A
2686U_CAPI const char* U_EXPORT2
2687uloc_getAvailable(int32_t offset)
2688{
2689
2690 _load_installedLocales();
2691
2692 if (offset > _installedLocalesCount)
2693 return NULL;
2694 return _installedLocales[offset];
2695}
b75a7d8f 2696
374ca955
A
2697U_CAPI int32_t U_EXPORT2
2698uloc_countAvailable()
2699{
2700 _load_installedLocales();
2701 return _installedLocalesCount;
2702}
b75a7d8f
A
2703
2704/**
2705 * Returns a list of all language codes defined in ISO 639. This is a pointer
2706 * to an array of pointers to arrays of char. All of these pointers are owned
2707 * by ICU-- do not delete them, and do not write through them. The array is
2708 * terminated with a null pointer.
2709 */
2710U_CAPI const char* const* U_EXPORT2
2711uloc_getISOLanguages()
2712{
374ca955 2713 return LANGUAGES;
b75a7d8f
A
2714}
2715
2716/**
2717 * Returns a list of all 2-letter country codes defined in ISO 639. This is a
2718 * pointer to an array of pointers to arrays of char. All of these pointers are
2719 * owned by ICU-- do not delete them, and do not write through them. The array is
2720 * terminated with a null pointer.
2721 */
2722U_CAPI const char* const* U_EXPORT2
2723uloc_getISOCountries()
2724{
374ca955
A
2725 return COUNTRIES;
2726}
2727
2728
2729/* this function to be moved into cstring.c later */
2730static char gDecimal = 0;
2731
2732static /* U_CAPI */
2733double
2734/* U_EXPORT2 */
2735_uloc_strtod(const char *start, char **end) {
2736 char *decimal;
2737 char *myEnd;
2738 char buf[30];
2739 double rv;
2740 if (!gDecimal) {
2741 char rep[5];
2742 /* For machines that decide to change the decimal on you,
2743 and try to be too smart with localization.
2744 This normally should be just a '.'. */
2745 sprintf(rep, "%+1.1f", 1.0);
2746 gDecimal = rep[2];
2747 }
2748
2749 if(gDecimal == '.') {
2750 return uprv_strtod(start, end); /* fall through to OS */
2751 } else {
2752 uprv_strncpy(buf, start, 29);
2753 buf[29]=0;
2754 decimal = uprv_strchr(buf, '.');
2755 if(decimal) {
2756 *decimal = gDecimal;
2757 } else {
2758 return uprv_strtod(start, end); /* no decimal point */
2759 }
2760 rv = uprv_strtod(buf, &myEnd);
2761 if(end) {
2762 *end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */
2763 }
2764 return rv;
2765 }
2766}
2767
2768typedef struct {
2769 double q;
2770 char *locale;
2771#if defined(ULOC_DEBUG_PURIFY)
2772 int32_t dummy; /* to avoid uninitialized memory copy from qsort */
2773#endif
2774} _acceptLangItem;
2775
2776static int32_t U_CALLCONV
2777uloc_acceptLanguageCompare(const void *context, const void *a, const void *b)
2778{
2779 const _acceptLangItem *aa = (const _acceptLangItem*)a;
2780 const _acceptLangItem *bb = (const _acceptLangItem*)b;
2781
2782 int32_t rc = 0;
2783 if(bb->q < aa->q) {
2784 rc = -1; /* A > B */
2785 } else if(bb->q > aa->q) {
2786 rc = 1; /* A < B */
2787 } else {
2788 rc = 0; /* A = B */
2789 }
2790
2791 if(rc==0) {
2792 rc = uprv_stricmp(aa->locale, bb->locale);
2793 }
2794
2795#if defined(ULOC_DEBUG)
2796 /* fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
2797 aa->locale, aa->q,
2798 bb->locale, bb->q,
2799 rc);*/
2800#endif
2801
2802 return rc;
2803}
2804
2805/*
2806mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
2807*/
2808
2809U_CAPI int32_t U_EXPORT2
2810uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult *outResult,
2811 const char *httpAcceptLanguage,
2812 UEnumeration* availableLocales,
2813 UErrorCode *status)
2814{
2815 _acceptLangItem *j;
2816 _acceptLangItem smallBuffer[30];
2817 char **strs;
2818 char tmp[ULOC_FULLNAME_CAPACITY +1];
2819 int32_t n = 0;
2820 const char *itemEnd;
2821 const char *paramEnd;
2822 const char *s;
2823 const char *t;
2824 int32_t res;
2825 int32_t i;
73c04bcf 2826 int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);
374ca955
A
2827 int32_t jSize;
2828
2829 j = smallBuffer;
2830 jSize = sizeof(smallBuffer)/sizeof(smallBuffer[0]);
2831 if(U_FAILURE(*status)) {
2832 return -1;
2833 }
2834
2835 for(s=httpAcceptLanguage;s&&*s;) {
2836 while(isspace(*s)) /* eat space at the beginning */
2837 s++;
2838 itemEnd=uprv_strchr(s,',');
2839 paramEnd=uprv_strchr(s,';');
2840 if(!itemEnd) {
2841 itemEnd = httpAcceptLanguage+l; /* end of string */
2842 }
2843 if(paramEnd && paramEnd<itemEnd) {
2844 /* semicolon (;) is closer than end (,) */
2845 t = paramEnd+1;
2846 if(*t=='q') {
2847 t++;
2848 }
2849 while(isspace(*t)) {
2850 t++;
2851 }
2852 if(*t=='=') {
2853 t++;
2854 }
2855 while(isspace(*t)) {
2856 t++;
2857 }
2858 j[n].q = _uloc_strtod(t,NULL);
2859 } else {
2860 /* no semicolon - it's 1.0 */
2861 j[n].q = 1.0;
2862 paramEnd = itemEnd;
2863 }
2864#if defined(ULOC_DEBUG_PURIFY)
2865 j[n].dummy=0xDECAFBAD;
2866#endif
2867 /* eat spaces prior to semi */
2868 for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)
2869 ;
73c04bcf 2870 j[n].locale = uprv_strndup(s,(int32_t)((t+1)-s));
374ca955
A
2871 uloc_canonicalize(j[n].locale,tmp,sizeof(tmp)/sizeof(tmp[0]),status);
2872 if(strcmp(j[n].locale,tmp)) {
2873 uprv_free(j[n].locale);
2874 j[n].locale=uprv_strdup(tmp);
2875 }
2876#if defined(ULOC_DEBUG)
2877 /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
2878#endif
2879 n++;
2880 s = itemEnd;
2881 while(*s==',') { /* eat duplicate commas */
2882 s++;
2883 }
2884 if(n>=jSize) {
2885 if(j==smallBuffer) { /* overflowed the small buffer. */
2886 j = uprv_malloc(sizeof(j[0])*(jSize*2));
2887 if(j!=NULL) {
2888 uprv_memcpy(j,smallBuffer,sizeof(j[0])*jSize);
2889 }
2890#if defined(ULOC_DEBUG)
2891 fprintf(stderr,"malloced at size %d\n", jSize);
2892#endif
2893 } else {
2894 j = uprv_realloc(j, sizeof(j[0])*jSize*2);
2895#if defined(ULOC_DEBUG)
2896 fprintf(stderr,"re-alloced at size %d\n", jSize);
2897#endif
2898 }
2899 jSize *= 2;
2900 if(j==NULL) {
2901 *status = U_MEMORY_ALLOCATION_ERROR;
2902 return -1;
2903 }
2904 }
2905 }
2906 uprv_sortArray(j, n, sizeof(j[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
2907 if(U_FAILURE(*status)) {
2908 if(j != smallBuffer) {
2909#if defined(ULOC_DEBUG)
2910 fprintf(stderr,"freeing j %p\n", j);
2911#endif
2912 uprv_free(j);
2913 }
2914 return -1;
2915 }
2916 strs = uprv_malloc((size_t)(sizeof(strs[0])*n));
2917 for(i=0;i<n;i++) {
2918#if defined(ULOC_DEBUG)
2919 /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
2920#endif
2921 strs[i]=j[i].locale;
2922 }
2923 res = uloc_acceptLanguage(result, resultAvailable, outResult,
2924 (const char**)strs, n, availableLocales, status);
2925 for(i=0;i<n;i++) {
2926 uprv_free(strs[i]);
2927 }
2928 uprv_free(strs);
2929 if(j != smallBuffer) {
2930#if defined(ULOC_DEBUG)
2931 fprintf(stderr,"freeing j %p\n", j);
2932#endif
2933 uprv_free(j);
2934 }
2935 return res;
2936}
2937
2938
2939U_CAPI int32_t U_EXPORT2
2940uloc_acceptLanguage(char *result, int32_t resultAvailable,
2941 UAcceptResult *outResult, const char **acceptList,
2942 int32_t acceptListCount,
2943 UEnumeration* availableLocales,
2944 UErrorCode *status)
2945{
2946 int32_t i,j;
2947 int32_t len;
2948 int32_t maxLen=0;
2949 char tmp[ULOC_FULLNAME_CAPACITY+1];
2950 const char *l;
2951 char **fallbackList;
2952 if(U_FAILURE(*status)) {
2953 return -1;
2954 }
2955 fallbackList = uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount));
2956 if(fallbackList==NULL) {
2957 *status = U_MEMORY_ALLOCATION_ERROR;
2958 return -1;
2959 }
2960 for(i=0;i<acceptListCount;i++) {
2961#if defined(ULOC_DEBUG)
2962 fprintf(stderr,"%02d: %s\n", i, acceptList[i]);
2963#endif
2964 while((l=uenum_next(availableLocales, NULL, status))) {
2965#if defined(ULOC_DEBUG)
2966 fprintf(stderr," %s\n", l);
2967#endif
73c04bcf 2968 len = (int32_t)uprv_strlen(l);
374ca955
A
2969 if(!uprv_strcmp(acceptList[i], l)) {
2970 if(outResult) {
2971 *outResult = ULOC_ACCEPT_VALID;
2972 }
2973#if defined(ULOC_DEBUG)
2974 fprintf(stderr, "MATCH! %s\n", l);
2975#endif
2976 if(len>0) {
2977 uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2978 }
2979 for(j=0;j<i;j++) {
2980 uprv_free(fallbackList[j]);
2981 }
2982 uprv_free(fallbackList);
2983 return u_terminateChars(result, resultAvailable, len, status);
2984 }
2985 if(len>maxLen) {
2986 maxLen = len;
2987 }
2988 }
2989 uenum_reset(availableLocales, status);
2990 /* save off parent info */
2991 if(uloc_getParent(acceptList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
2992 fallbackList[i] = uprv_strdup(tmp);
2993 } else {
2994 fallbackList[i]=0;
2995 }
2996 }
2997
2998 for(maxLen--;maxLen>0;maxLen--) {
2999 for(i=0;i<acceptListCount;i++) {
3000 if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) {
3001#if defined(ULOC_DEBUG)
3002 fprintf(stderr,"Try: [%s]", fallbackList[i]);
3003#endif
3004 while((l=uenum_next(availableLocales, NULL, status))) {
3005#if defined(ULOC_DEBUG)
3006 fprintf(stderr," %s\n", l);
3007#endif
73c04bcf 3008 len = (int32_t)uprv_strlen(l);
374ca955
A
3009 if(!uprv_strcmp(fallbackList[i], l)) {
3010 if(outResult) {
3011 *outResult = ULOC_ACCEPT_FALLBACK;
3012 }
3013#if defined(ULOC_DEBUG)
3014 fprintf(stderr, "fallback MATCH! %s\n", l);
3015#endif
3016 if(len>0) {
3017 uprv_strncpy(result, l, uprv_min(len, resultAvailable));
3018 }
73c04bcf
A
3019 for(j=0;j<acceptListCount;j++) {
3020 uprv_free(fallbackList[j]);
374ca955
A
3021 }
3022 uprv_free(fallbackList);
73c04bcf 3023 return u_terminateChars(result, resultAvailable, len, status);
374ca955
A
3024 }
3025 }
3026 uenum_reset(availableLocales, status);
3027
3028 if(uloc_getParent(fallbackList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
3029 uprv_free(fallbackList[i]);
3030 fallbackList[i] = uprv_strdup(tmp);
3031 } else {
3032 uprv_free(fallbackList[i]);
3033 fallbackList[i]=0;
3034 }
3035 }
3036 }
3037 if(outResult) {
3038 *outResult = ULOC_ACCEPT_FAILED;
3039 }
3040 }
3041 for(i=0;i<acceptListCount;i++) {
3042 uprv_free(fallbackList[i]);
3043 }
3044 uprv_free(fallbackList);
3045 return -1;
b75a7d8f 3046}
374ca955
A
3047
3048/*eof*/