]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/uloc.c
ICU-400.42.tar.gz
[apple/icu.git] / icuSources / common / uloc.c
CommitLineData
b75a7d8f
A
1/*
2**********************************************************************
46f4442e 3* Copyright (C) 1997-2008, International Business Machines
b75a7d8f
A
4* Corporation and others. All Rights Reserved.
5**********************************************************************
6*
7* File ULOC.CPP
8*
9* Modification History:
10*
11* Date Name Description
12* 04/01/97 aliu Creation.
13* 08/21/98 stephen JDK 1.2 sync
14* 12/08/98 rtg New Locale implementation and C API
15* 03/15/99 damiba overhaul.
16* 04/06/99 stephen changed setDefault() to realloc and copy
17* 06/14/99 stephen Changed calls to ures_open for new params
18* 07/21/99 stephen Modified setDefault() to propagate to C++
374ca955
A
19* 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs,
20* brought canonicalization code into line with spec
b75a7d8f
A
21*****************************************************************************/
22
23/*
24 POSIX's locale format, from putil.c: [no spaces]
25
26 ll [ _CC ] [ . MM ] [ @ VV]
27
28 l = lang, C = ctry, M = charmap, V = variant
29*/
30
b75a7d8f
A
31#include "unicode/utypes.h"
32#include "unicode/ustring.h"
33#include "unicode/uloc.h"
73c04bcf 34#include "unicode/ures.h"
b75a7d8f 35
374ca955 36#include "putilimp.h"
b75a7d8f 37#include "ustr_imp.h"
374ca955 38#include "ulocimp.h"
b75a7d8f
A
39#include "uresimp.h"
40#include "umutex.h"
41#include "cstring.h"
42#include "cmemory.h"
43#include "ucln_cmn.h"
374ca955
A
44#include "locmap.h"
45#include "uarrsort.h"
46#include "uenumimp.h"
47#include "uassert.h"
b75a7d8f 48
374ca955
A
49#include <stdio.h> /* for sprintf */
50
51/* ### Declarations **************************************************/
b75a7d8f
A
52
53/* Locale stuff from locid.cpp */
54U_CFUNC void locale_set_default(const char *id);
55U_CFUNC const char *locale_get_default(void);
374ca955
A
56U_CFUNC int32_t
57locale_getKeywords(const char *localeID,
58 char prev,
59 char *keywords, int32_t keywordCapacity,
60 char *values, int32_t valuesCapacity, int32_t *valLen,
61 UBool valuesToo,
62 UErrorCode *status);
63
64/* ### Constants **************************************************/
b75a7d8f
A
65
66/* These strings describe the resources we attempt to load from
67 the locale ResourceBundle data file.*/
374ca955
A
68static const char _kLanguages[] = "Languages";
69static const char _kScripts[] = "Scripts";
70static const char _kCountries[] = "Countries";
71static const char _kVariants[] = "Variants";
72static const char _kKeys[] = "Keys";
73static const char _kTypes[] = "Types";
b75a7d8f 74static const char _kIndexLocaleName[] = "res_index";
374ca955
A
75static const char _kRootName[] = "root";
76static const char _kIndexTag[] = "InstalledLocales";
77static const char _kCurrency[] = "currency";
78static const char _kCurrencies[] = "Currencies";
b75a7d8f
A
79static char** _installedLocales = NULL;
80static int32_t _installedLocalesCount = 0;
81
374ca955
A
82/* ### Data tables **************************************************/
83
84/**
85 * Table of language codes, both 2- and 3-letter, with preference
86 * given to 2-letter codes where possible. Includes 3-letter codes
87 * that lack a 2-letter equivalent.
88 *
89 * This list must be in sorted order. This list is returned directly
90 * to the user by some API.
91 *
92 * This list must be kept in sync with LANGUAGES_3, with corresponding
93 * entries matched.
94 *
95 * This table should be terminated with a NULL entry, followed by a
96 * second list, and another NULL entry. The first list is visible to
97 * user code when this array is returned by API. The second list
98 * contains codes we support, but do not expose through user API.
99 *
100 * Notes
101 *
102 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
103 * include the revisions up to 2001/7/27 *CWB*
104 *
105 * The 3 character codes are the terminology codes like RFC 3066. This
106 * is compatible with prior ICU codes
107 *
108 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
109 * table but now at the end of the table because 3 character codes are
110 * duplicates. This avoids bad searches going from 3 to 2 character
111 * codes.
112 *
113 * The range qaa-qtz is reserved for local use
114 */
115static const char * const LANGUAGES[] = {
116 "aa", "ab", "ace", "ach", "ada", "ady", "ae", "af", "afa",
73c04bcf
A
117 "afh", "ain", "ak", "akk", "ale", "alg", "alt", "am", "an",
118 "ang", "anp", "apa",
b75a7d8f
A
119 "ar", "arc", "arn", "arp", "art", "arw", "as", "ast",
120 "ath", "aus", "av", "awa", "ay", "az", "ba", "bad",
374ca955 121 "bai", "bal", "ban", "bas", "bat", "be", "bej",
b75a7d8f
A
122 "bem", "ber", "bg", "bh", "bho", "bi", "bik", "bin",
123 "bla", "bm", "bn", "bnt", "bo", "br", "bra", "bs",
374ca955 124 "btk", "bua", "bug", "byn", "ca", "cad", "cai", "car", "cau",
73c04bcf 125 "cch", "ce", "ceb", "cel", "ch", "chb", "chg", "chk", "chm",
b75a7d8f 126 "chn", "cho", "chp", "chr", "chy", "cmc", "co", "cop",
374ca955
A
127 "cpe", "cpf", "cpp", "cr", "crh", "crp", "cs", "csb", "cu", "cus",
128 "cv", "cy", "da", "dak", "dar", "day", "de", "del", "den",
129 "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv", "dyu",
b75a7d8f 130 "dz", "ee", "efi", "egy", "eka", "el", "elx", "en",
374ca955 131 "enm", "eo", "es", "et", "eu", "ewo", "fa",
73c04bcf
A
132 "fan", "fat", "ff", "fi", "fil", "fiu", "fj", "fo", "fon",
133 "fr", "frm", "fro", "frr", "frs", "fur", "fy",
134 "ga", "gaa", "gay", "gba", "gd", "gem", "gez", "gil",
135 "gl", "gmh", "gn", "goh", "gon", "gor", "got", "grb",
136 "grc", "gsw", "gu", "gv", "gwi",
137 "ha", "hai", "haw", "he", "hi", "hil", "him",
374ca955 138 "hit", "hmn", "ho", "hr", "hsb", "ht", "hu", "hup", "hy", "hz",
b75a7d8f 139 "ia", "iba", "id", "ie", "ig", "ii", "ijo", "ik",
374ca955
A
140 "ilo", "inc", "ine", "inh", "io", "ira", "iro", "is", "it",
141 "iu", "ja", "jbo", "jpr", "jrb", "jv", "ka", "kaa", "kab",
73c04bcf 142 "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg", "kha", "khi",
b75a7d8f 143 "kho", "ki", "kj", "kk", "kl", "km", "kmb", "kn",
73c04bcf 144 "ko", "kok", "kos", "kpe", "kr", "krc", "krl", "kro", "kru", "ks",
b75a7d8f 145 "ku", "kum", "kut", "kv", "kw", "ky", "la", "lad",
374ca955 146 "lah", "lam", "lb", "lez", "lg", "li", "ln", "lo", "lol",
b75a7d8f
A
147 "loz", "lt", "lu", "lua", "lui", "lun", "luo", "lus",
148 "lv", "mad", "mag", "mai", "mak", "man", "map", "mas",
374ca955 149 "mdf", "mdr", "men", "mg", "mga", "mh", "mi", "mic", "min",
b75a7d8f
A
150 "mis", "mk", "mkh", "ml", "mn", "mnc", "mni", "mno",
151 "mo", "moh", "mos", "mr", "ms", "mt", "mul", "mun",
73c04bcf 152 "mus", "mwl", "mwr", "my", "myn", "myv", "na", "nah", "nai", "nap",
b75a7d8f 153 "nb", "nd", "nds", "ne", "new", "ng", "nia", "nic",
46f4442e 154 "niu", "nl", "nn", "no", "nog", "non", "nqo", "nr", "nso", "nub",
374ca955 155 "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi", "oc", "oj",
b75a7d8f
A
156 "om", "or", "os", "osa", "ota", "oto", "pa", "paa",
157 "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",
158 "pi", "pl", "pon", "pra", "pro", "ps", "pt", "qu",
159 "raj", "rap", "rar", "rm", "rn", "ro", "roa", "rom",
73c04bcf
A
160 "ru", "rup", "rw", "sa", "sad", "sah", "sai", "sal", "sam",
161 "sas", "sat", "sc", "scn", "sco", "sd", "se", "sel", "sem",
b75a7d8f
A
162 "sg", "sga", "sgn", "shn", "si", "sid", "sio", "sit",
163 "sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn",
164 "sms", "sn", "snk", "so", "sog", "son", "sq", "sr",
73c04bcf 165 "srn", "srr", "ss", "ssa", "st", "su", "suk", "sus", "sux",
46f4442e 166 "sv", "sw", "syc", "syr", "ta", "tai", "te", "tem", "ter",
b75a7d8f 167 "tet", "tg", "th", "ti", "tig", "tiv", "tk", "tkl",
374ca955 168 "tl", "tlh", "tli", "tmh", "tn", "to", "tog", "tpi", "tr",
b75a7d8f 169 "ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw",
374ca955 170 "ty", "tyv", "udm", "ug", "uga", "uk", "umb", "und", "ur",
b75a7d8f 171 "uz", "vai", "ve", "vi", "vo", "vot", "wa", "wak",
374ca955 172 "wal", "war", "was", "wen", "wo", "xal", "xh", "yao", "yap",
46f4442e
A
173 "yi", "yo", "ypk", "za", "zap", "zbl", "zen", "zh", "znd",
174 "zu", "zun", "zxx", "zza",
b75a7d8f
A
175NULL,
176 "in", "iw", "ji", "jw", "sh", /* obsolete language codes */
177NULL
178};
73c04bcf
A
179static const char* const DEPRECATED_LANGUAGES[]={
180 "in", "iw", "ji", "jw", NULL, NULL
181};
182static const char* const REPLACEMENT_LANGUAGES[]={
183 "id", "he", "yi", "jv", NULL, NULL
184};
b75a7d8f 185
374ca955
A
186/**
187 * Table of 3-letter language codes.
188 *
189 * This is a lookup table used to convert 3-letter language codes to
190 * their 2-letter equivalent, where possible. It must be kept in sync
191 * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the
192 * same language as LANGUAGES_3[i]. The commented-out lines are
193 * copied from LANGUAGES to make eyeballing this baby easier.
194 *
195 * Where a 3-letter language code has no 2-letter equivalent, the
196 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
197 *
198 * This table should be terminated with a NULL entry, followed by a
199 * second list, and another NULL entry. The two lists correspond to
200 * the two lists in LANGUAGES.
201 */
202static const char * const LANGUAGES_3[] = {
203/* "aa", "ab", "ace", "ach", "ada", "ady", "ae", "af", "afa", */
204 "aar", "abk", "ace", "ach", "ada", "ady", "ave", "afr", "afa",
73c04bcf
A
205/* "afh", "ain", "ak", "akk", "ale", "alg", "alt", "am", "an", "ang", "anp", "apa", */
206 "afh", "ain", "aka", "akk", "ale", "alg", "alt", "amh", "arg", "ang", "anp", "apa",
b75a7d8f
A
207/* "ar", "arc", "arn", "arp", "art", "arw", "as", "ast", */
208 "ara", "arc", "arn", "arp", "art", "arw", "asm", "ast",
209/* "ath", "aus", "av", "awa", "ay", "az", "ba", "bad", */
210 "ath", "aus", "ava", "awa", "aym", "aze", "bak", "bad",
374ca955
A
211/* "bai", "bal", "ban", "bas", "bat", "be", "bej", */
212 "bai", "bal", "ban", "bas", "bat", "bel", "bej",
b75a7d8f
A
213/* "bem", "ber", "bg", "bh", "bho", "bi", "bik", "bin", */
214 "bem", "ber", "bul", "bih", "bho", "bis", "bik", "bin",
215/* "bla", "bm", "bn", "bnt", "bo", "br", "bra", "bs", */
374ca955
A
216 "bla", "bam", "ben", "bnt", "bod", "bre", "bra", "bos",
217/* "btk", "bua", "bug", "byn", "ca", "cad", "cai", "car", "cau", */
218 "btk", "bua", "bug", "byn", "cat", "cad", "cai", "car", "cau",
73c04bcf
A
219/* "cch", "ce", "ceb", "cel", "ch", "chb", "chg", "chk", "chm", */
220 "cch", "che", "ceb", "cel", "cha", "chb", "chg", "chk", "chm",
b75a7d8f
A
221/* "chn", "cho", "chp", "chr", "chy", "cmc", "co", "cop", */
222 "chn", "cho", "chp", "chr", "chy", "cmc", "cos", "cop",
374ca955
A
223/* "cpe", "cpf", "cpp", "cr", "crh", "crp", "cs", "csb", "cu", "cus", */
224 "cpe", "cpf", "cpp", "cre", "crh", "crp", "ces", "csb", "chu", "cus",
225/* "cv", "cy", "da", "dak", "dar", "day", "de", "del", "den", */
226 "chv", "cym", "dan", "dak", "dar", "day", "deu", "del", "den",
227/* "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv", "dyu", */
228 "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "div", "dyu",
b75a7d8f
A
229/* "dz", "ee", "efi", "egy", "eka", "el", "elx", "en", */
230 "dzo", "ewe", "efi", "egy", "eka", "ell", "elx", "eng",
374ca955
A
231/* "enm", "eo", "es", "et", "eu", "ewo", "fa", */
232 "enm", "epo", "spa", "est", "eus", "ewo", "fas",
73c04bcf
A
233/* "fan", "fat", "ff", "fi", "fil", "fiu", "fj", "fo", "fon", */
234 "fan", "fat", "ful", "fin", "fil", "fiu", "fij", "fao", "fon",
46f4442e
A
235/* "fr", "frm", "fro", "frr", "frs", "fur", "fy", "ga", "gaa", "gay", */
236 "fra", "frm", "fro", "frr", "frs", "fur", "fry", "gle", "gaa", "gay",
b75a7d8f
A
237/* "gba", "gd", "gem", "gez", "gil", "gl", "gmh", "gn", */
238 "gba", "gla", "gem", "gez", "gil", "glg", "gmh", "grn",
73c04bcf
A
239/* "goh", "gon", "gor", "got", "grb", "grc", "gsw", "gu", "gv", */
240 "goh", "gon", "gor", "got", "grb", "grc", "gsw", "guj", "glv",
b75a7d8f
A
241/* "gwi", "ha", "hai", "haw", "he", "hi", "hil", "him", */
242 "gwi", "hau", "hai", "haw", "heb", "hin", "hil", "him",
374ca955
A
243/* "hit", "hmn", "ho", "hr", "hsb", "ht", "hu", "hup", "hy", "hz", */
244 "hit", "hmn", "hmo", "hrv", "hsb", "hat", "hun", "hup", "hye", "her",
b75a7d8f
A
245/* "ia", "iba", "id", "ie", "ig", "ii", "ijo", "ik", */
246 "ina", "iba", "ind", "ile", "ibo", "iii", "ijo", "ipk",
374ca955
A
247/* "ilo", "inc", "ine", "inh", "io", "ira", "iro", "is", "it", */
248 "ilo", "inc", "ine", "inh", "ido", "ira", "iro", "isl", "ita",
249/* "iu", "ja", "jbo", "jpr", "jrb", "jv", "ka", "kaa", "kab", */
250 "iku", "jpn", "jbo", "jpr", "jrb", "jav", "kat", "kaa", "kab",
73c04bcf
A
251/* "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg", "kha", "khi",*/
252 "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg", "kha", "khi",
b75a7d8f
A
253/* "kho", "ki", "kj", "kk", "kl", "km", "kmb", "kn", */
254 "kho", "kik", "kua", "kaz", "kal", "khm", "kmb", "kan",
73c04bcf
A
255/* "ko", "kok", "kos", "kpe", "kr", "krc", "krl", "kro", "kru", "ks", */
256 "kor", "kok", "kos", "kpe", "kau", "krc", "krl", "kro", "kru", "kas",
b75a7d8f
A
257/* "ku", "kum", "kut", "kv", "kw", "ky", "la", "lad", */
258 "kur", "kum", "kut", "kom", "cor", "kir", "lat", "lad",
374ca955
A
259/* "lah", "lam", "lb", "lez", "lg", "li", "ln", "lo", "lol", */
260 "lah", "lam", "ltz", "lez", "lug", "lim", "lin", "lao", "lol",
b75a7d8f
A
261/* "loz", "lt", "lu", "lua", "lui", "lun", "luo", "lus", */
262 "loz", "lit", "lub", "lua", "lui", "lun", "luo", "lus",
263/* "lv", "mad", "mag", "mai", "mak", "man", "map", "mas", */
264 "lav", "mad", "mag", "mai", "mak", "man", "map", "mas",
374ca955
A
265/* "mdf", "mdr", "men", "mg", "mga", "mh", "mi", "mic", "min", */
266 "mdf", "mdr", "men", "mlg", "mga", "mah", "mri", "mic", "min",
b75a7d8f
A
267/* "mis", "mk", "mkh", "ml", "mn", "mnc", "mni", "mno", */
268 "mis", "mkd", "mkh", "mal", "mon", "mnc", "mni", "mno",
269/* "mo", "moh", "mos", "mr", "ms", "mt", "mul", "mun", */
270 "mol", "moh", "mos", "mar", "msa", "mlt", "mul", "mun",
73c04bcf
A
271/* "mus", "mwl", "mwr", "my", "myn", "myv", "na", "nah", "nai", "nap", */
272 "mus", "mwl", "mwr", "mya", "myn", "myv", "nau", "nah", "nai", "nap",
b75a7d8f
A
273/* "nb", "nd", "nds", "ne", "new", "ng", "nia", "nic", */
274 "nob", "nde", "nds", "nep", "new", "ndo", "nia", "nic",
46f4442e
A
275/* "niu", "nl", "nn", "no", "nog", "non", "nqo", "nr", "nso", "nub", */
276 "niu", "nld", "nno", "nor", "nog", "non", "nqo", "nbl", "nso", "nub",
374ca955
A
277/* "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi", "oc", "oj", */
278 "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi", "oci", "oji",
b75a7d8f
A
279/* "om", "or", "os", "osa", "ota", "oto", "pa", "paa", */
280 "orm", "ori", "oss", "osa", "ota", "oto", "pan", "paa",
281/* "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn", */
282 "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",
283/* "pi", "pl", "pon", "pra", "pro", "ps", "pt", "qu", */
284 "pli", "pol", "pon", "pra", "pro", "pus", "por", "que",
285/* "raj", "rap", "rar", "rm", "rn", "ro", "roa", "rom", */
286 "raj", "rap", "rar", "roh", "run", "ron", "roa", "rom",
73c04bcf
A
287/* "ru", "rup", "rw", "sa", "sad", "sah", "sai", "sal", "sam", */
288 "rus", "rup", "kin", "san", "sad", "sah", "sai", "sal", "sam",
289/* "sas", "sat", "sc", "scn", "sco", "sd", "se", "sel", "sem", */
290 "sas", "sat", "srd", "scn", "sco", "snd", "sme", "sel", "sem",
b75a7d8f
A
291/* "sg", "sga", "sgn", "shn", "si", "sid", "sio", "sit", */
292 "sag", "sga", "sgn", "shn", "sin", "sid", "sio", "sit",
293/* "sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn", */
294 "slk", "slv", "sla", "smo", "sma", "smi", "smj", "smn",
295/* "sms", "sn", "snk", "so", "sog", "son", "sq", "sr", */
296 "sms", "sna", "snk", "som", "sog", "son", "sqi", "srp",
73c04bcf
A
297/* "srn", "srr", "ss", "ssa", "st", "su", "suk", "sus", "sux", */
298 "srn", "srr", "ssw", "ssa", "sot", "sun", "suk", "sus", "sux",
46f4442e
A
299/* "sv", "sw", "syc", "syr", "ta", "tai", "te", "tem", "ter", */
300 "swe", "swa", "syc", "syr", "tam", "tai", "tel", "tem", "ter",
b75a7d8f
A
301/* "tet", "tg", "th", "ti", "tig", "tiv", "tk", "tkl", */
302 "tet", "tgk", "tha", "tir", "tig", "tiv", "tuk", "tkl",
374ca955
A
303/* "tl", "tlh", "tli", "tmh", "tn", "to", "tog", "tpi", "tr", */
304 "tgl", "tlh", "tli", "tmh", "tsn", "ton", "tog", "tpi", "tur",
b75a7d8f
A
305/* "ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw", */
306 "tso", "tsi", "tat", "tum", "tup", "tut", "tvl", "twi",
374ca955
A
307/* "ty", "tyv", "udm", "ug", "uga", "uk", "umb", "und", "ur", */
308 "tah", "tyv", "udm", "uig", "uga", "ukr", "umb", "und", "urd",
b75a7d8f
A
309/* "uz", "vai", "ve", "vi", "vo", "vot", "wa", "wak", */
310 "uzb", "vai", "ven", "vie", "vol", "vot", "wln", "wak",
374ca955
A
311/* "wal", "war", "was", "wen", "wo", "xal", "xh", "yao", "yap", */
312 "wal", "war", "was", "wen", "wol", "xal", "xho", "yao", "yap",
46f4442e
A
313/* "yi", "yo", "ypk", "za", "zap", "zbl", "zen", "zh", "znd", */
314 "yid", "yor", "ypk", "zha", "zap", "zbl", "zen", "zho", "znd",
315/* "zu", "zun", "zxx", "zza", */
316 "zul", "zun", "zxx", "zza",
b75a7d8f
A
317NULL,
318/* "in", "iw", "ji", "jw", "sh", */
319 "ind", "heb", "yid", "jaw", "srp",
320NULL
321};
322
374ca955
A
323/**
324 * Table of 2-letter country codes.
325 *
326 * This list must be in sorted order. This list is returned directly
327 * to the user by some API.
328 *
329 * This list must be kept in sync with COUNTRIES_3, with corresponding
330 * entries matched.
331 *
332 * This table should be terminated with a NULL entry, followed by a
333 * second list, and another NULL entry. The first list is visible to
334 * user code when this array is returned by API. The second list
335 * contains codes we support, but do not expose through user API.
336 *
337 * Notes:
338 *
339 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
340 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
341 * new codes keeping the old ones for compatibility updated to include
342 * 1999/12/03 revisions *CWB*
343 *
344 * RO(ROM) is now RO(ROU) according to
345 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
346 */
347static const char * const COUNTRIES[] = {
b75a7d8f 348 "AD", "AE", "AF", "AG", "AI", "AL", "AM", "AN",
73c04bcf 349 "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",
b75a7d8f 350 "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",
46f4442e 351 "BJ", "BL", "BM", "BN", "BO", "BR", "BS", "BT", "BV",
b75a7d8f
A
352 "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
353 "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR",
354 "CU", "CV", "CX", "CY", "CZ", "DE", "DJ", "DK",
355 "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER",
356 "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
73c04bcf 357 "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
b75a7d8f
A
358 "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
359 "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
73c04bcf
A
360 "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
361 "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
b75a7d8f
A
362 "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
363 "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
46f4442e 364 "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK",
b75a7d8f
A
365 "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",
366 "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
367 "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",
368 "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",
369 "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
46f4442e 370 "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA",
b75a7d8f
A
371 "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",
372 "SK", "SL", "SM", "SN", "SO", "SR", "ST", "SV",
373 "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ",
374 "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
375 "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
376 "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
46f4442e 377 "WS", "YE", "YT", "ZA", "ZM", "ZW",
b75a7d8f 378NULL,
46f4442e 379 "FX", "CS", "RO", "TP", "YU", "ZR", /* obsolete country codes */
b75a7d8f
A
380NULL
381};
382
73c04bcf 383static const char* const DEPRECATED_COUNTRIES[] ={
46f4442e 384 "BU", "CS", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR", NULL, NULL /* deprecated country list */
73c04bcf
A
385};
386static const char* const REPLACEMENT_COUNTRIES[] = {
46f4442e
A
387/* "BU", "CS", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR" */
388 "MM", "RS", "BJ", "FR", "BF", "VU", "ZW", "TL", "RS", "CD", NULL, NULL /* replacement country codes */
73c04bcf
A
389};
390
374ca955
A
391/**
392 * Table of 3-letter country codes.
393 *
394 * This is a lookup table used to convert 3-letter country codes to
395 * their 2-letter equivalent. It must be kept in sync with COUNTRIES.
396 * For all valid i, COUNTRIES[i] must refer to the same country as
397 * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES
398 * to make eyeballing this baby easier.
399 *
400 * This table should be terminated with a NULL entry, followed by a
401 * second list, and another NULL entry. The two lists correspond to
402 * the two lists in COUNTRIES.
403 */
404static const char * const COUNTRIES_3[] = {
b75a7d8f
A
405/* "AD", "AE", "AF", "AG", "AI", "AL", "AM", "AN", */
406 "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM", "ANT",
73c04bcf
A
407/* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */
408 "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
b75a7d8f
A
409/* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */
410 "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
46f4442e
A
411/* "BJ", "BL", "BM", "BN", "BO", "BR", "BS", "BT", "BV", */
412 "BEN", "BLM", "BMU", "BRN", "BOL", "BRA", "BHS", "BTN", "BVT",
b75a7d8f
A
413/* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */
414 "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
415/* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */
416 "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
417/* "CU", "CV", "CX", "CY", "CZ", "DE", "DJ", "DK", */
418 "CUB", "CPV", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
419/* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */
420 "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
421/* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
422 "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
46f4442e 423/* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
73c04bcf 424 "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
b75a7d8f
A
425/* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */
426 "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
427/* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
428 "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
46f4442e
A
429/* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
430 "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
431/* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
73c04bcf 432 "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
b75a7d8f
A
433/* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
434 "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
435/* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */
436 "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
46f4442e
A
437/* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */
438 "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
b75a7d8f
A
439/* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */
440 "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
441/* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */
442 "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
443/* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */
444 "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
445/* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */
446 "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
447/* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */
448 "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
46f4442e
A
449/* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */
450 "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
b75a7d8f
A
451/* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */
452 "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
453/* "SK", "SL", "SM", "SN", "SO", "SR", "ST", "SV", */
454 "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "STP", "SLV",
455/* "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", */
456 "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
457/* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */
458 "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
459/* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */
460 "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
461/* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
462 "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
46f4442e
A
463/* "WS", "YE", "YT", "ZA", "ZM", "ZW", */
464 "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
b75a7d8f 465NULL,
46f4442e
A
466/* "FX", "CS", "RO", "TP", "YU", "ZR", */
467 "FXX", "SCG", "ROM", "TMP", "YUG", "ZAR",
b75a7d8f
A
468NULL
469};
470
374ca955
A
471typedef struct CanonicalizationMap {
472 const char *id; /* input ID */
473 const char *canonicalID; /* canonicalized output ID */
474 const char *keyword; /* keyword, or NULL if none */
475 const char *value; /* keyword value, or NULL if kw==NULL */
476} CanonicalizationMap;
477
478/**
479 * A map to canonicalize locale IDs. This handles a variety of
480 * different semantic kinds of transformations.
481 */
482static const CanonicalizationMap CANONICALIZE_MAP[] = {
483 { "", "en_US_POSIX", NULL, NULL }, /* .NET name */
484 { "C", "en_US_POSIX", NULL, NULL }, /* POSIX name */
73c04bcf 485 { "posix", "en_US_POSIX", NULL, NULL }, /* POSIX name (alias of C) */
374ca955
A
486 { "art_LOJBAN", "jbo", NULL, NULL }, /* registered name */
487 { "az_AZ_CYRL", "az_Cyrl_AZ", NULL, NULL }, /* .NET name */
488 { "az_AZ_LATN", "az_Latn_AZ", NULL, NULL }, /* .NET name */
489 { "ca_ES_PREEURO", "ca_ES", "currency", "ESP" },
490 { "cel_GAULISH", "cel__GAULISH", NULL, NULL }, /* registered name */
491 { "de_1901", "de__1901", NULL, NULL }, /* registered name */
492 { "de_1906", "de__1906", NULL, NULL }, /* registered name */
46f4442e 493 { "de__PHONEBOOK", "de", "collation", "phonebook" }, /* Old ICU name */
374ca955
A
494 { "de_AT_PREEURO", "de_AT", "currency", "ATS" },
495 { "de_DE_PREEURO", "de_DE", "currency", "DEM" },
496 { "de_LU_PREEURO", "de_LU", "currency", "LUF" },
497 { "el_GR_PREEURO", "el_GR", "currency", "GRD" },
498 { "en_BOONT", "en__BOONT", NULL, NULL }, /* registered name */
499 { "en_SCOUSE", "en__SCOUSE", NULL, NULL }, /* registered name */
500 { "en_BE_PREEURO", "en_BE", "currency", "BEF" },
501 { "en_IE_PREEURO", "en_IE", "currency", "IEP" },
46f4442e 502 { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */
374ca955
A
503 { "es_ES_PREEURO", "es_ES", "currency", "ESP" },
504 { "eu_ES_PREEURO", "eu_ES", "currency", "ESP" },
505 { "fi_FI_PREEURO", "fi_FI", "currency", "FIM" },
506 { "fr_BE_PREEURO", "fr_BE", "currency", "BEF" },
507 { "fr_FR_PREEURO", "fr_FR", "currency", "FRF" },
508 { "fr_LU_PREEURO", "fr_LU", "currency", "LUF" },
509 { "ga_IE_PREEURO", "ga_IE", "currency", "IEP" },
510 { "gl_ES_PREEURO", "gl_ES", "currency", "ESP" },
46f4442e 511 { "hi__DIRECT", "hi", "collation", "direct" }, /* Old ICU name */
374ca955 512 { "it_IT_PREEURO", "it_IT", "currency", "ITL" },
46f4442e 513 { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */
374ca955
A
514 { "nb_NO_NY", "nn_NO", NULL, NULL }, /* "markus said this was ok" :-) */
515 { "nl_BE_PREEURO", "nl_BE", "currency", "BEF" },
516 { "nl_NL_PREEURO", "nl_NL", "currency", "NLG" },
517 { "pt_PT_PREEURO", "pt_PT", "currency", "PTE" },
518 { "sl_ROZAJ", "sl__ROZAJ", NULL, NULL }, /* registered name */
46f4442e
A
519 { "sr_SP_CYRL", "sr_Cyrl_RS", NULL, NULL }, /* .NET name */
520 { "sr_SP_LATN", "sr_Latn_RS", NULL, NULL }, /* .NET name */
521 { "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL, NULL }, /* Linux name */
522 { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */
73c04bcf 523 { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL, NULL }, /* Linux name */
374ca955
A
524 { "uz_UZ_CYRL", "uz_Cyrl_UZ", NULL, NULL }, /* .NET name */
525 { "uz_UZ_LATN", "uz_Latn_UZ", NULL, NULL }, /* .NET name */
526 { "zh_CHS", "zh_Hans", NULL, NULL }, /* .NET name */
46f4442e 527 { "zh_CHT", "zh_Hant", NULL, NULL }, /* .NET name */
374ca955
A
528 { "zh_GAN", "zh__GAN", NULL, NULL }, /* registered name */
529 { "zh_GUOYU", "zh", NULL, NULL }, /* registered name */
530 { "zh_HAKKA", "zh__HAKKA", NULL, NULL }, /* registered name */
531 { "zh_MIN", "zh__MIN", NULL, NULL }, /* registered name */
532 { "zh_MIN_NAN", "zh__MINNAN", NULL, NULL }, /* registered name */
533 { "zh_WUU", "zh__WUU", NULL, NULL }, /* registered name */
534 { "zh_XIANG", "zh__XIANG", NULL, NULL }, /* registered name */
535 { "zh_YUE", "zh__YUE", NULL, NULL }, /* registered name */
46f4442e
A
536};
537
538typedef struct VariantMap {
539 const char *variant; /* input ID */
540 const char *keyword; /* keyword, or NULL if none */
541 const char *value; /* keyword value, or NULL if kw==NULL */
542} VariantMap;
543
544static const VariantMap VARIANT_MAP[] = {
545 { "EURO", "currency", "EUR" },
546 { "PINYIN", "collation", "pinyin" }, /* Solaris variant */
547 { "STROKE", "collation", "stroke" } /* Solaris variant */
374ca955
A
548};
549
550/* ### Keywords **************************************************/
551
552#define ULOC_KEYWORD_BUFFER_LEN 25
553#define ULOC_MAX_NO_KEYWORDS 25
554
555static const char *
556locale_getKeywordsStart(const char *localeID) {
374ca955 557 const char *result = NULL;
374ca955
A
558 if((result = uprv_strchr(localeID, '@')) != NULL) {
559 return result;
73c04bcf
A
560 }
561#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
562 else {
563 /* We do this because the @ sign is variant, and the @ sign used on one
564 EBCDIC machine won't be compiled the same way on other EBCDIC based
565 machines. */
566 static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
374ca955
A
567 const uint8_t *charToFind = ebcdicSigns;
568 while(*charToFind) {
569 if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
570 return result;
571 }
572 charToFind++;
573 }
574 }
73c04bcf 575#endif
374ca955
A
576 return NULL;
577}
578
579/**
580 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
581 * @param keywordName incoming name to be canonicalized
582 * @param status return status (keyword too long)
583 * @return length of the keyword name
584 */
585static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
586{
587 int32_t i;
73c04bcf 588 int32_t keywordNameLen = (int32_t)uprv_strlen(keywordName);
374ca955
A
589
590 if(keywordNameLen >= ULOC_KEYWORD_BUFFER_LEN) {
591 /* keyword name too long for internal buffer */
592 *status = U_INTERNAL_PROGRAM_ERROR;
593 return 0;
594 }
595
596 /* normalize the keyword name */
597 for(i = 0; i < keywordNameLen; i++) {
598 buf[i] = uprv_tolower(keywordName[i]);
599 }
600 buf[i] = 0;
601
602 return keywordNameLen;
603}
604
605typedef struct {
606 char keyword[ULOC_KEYWORD_BUFFER_LEN];
607 int32_t keywordLen;
608 const char *valueStart;
609 int32_t valueLen;
610} KeywordStruct;
611
612static int32_t U_CALLCONV
613compareKeywordStructs(const void *context, const void *left, const void *right) {
614 const char* leftString = ((const KeywordStruct *)left)->keyword;
615 const char* rightString = ((const KeywordStruct *)right)->keyword;
616 return uprv_strcmp(leftString, rightString);
617}
618
619/**
620 * Both addKeyword and addValue must already be in canonical form.
621 * Either both addKeyword and addValue are NULL, or neither is NULL.
622 * If they are not NULL they must be zero terminated.
623 * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword.
624 */
625static int32_t
626_getKeywords(const char *localeID,
627 char prev,
628 char *keywords, int32_t keywordCapacity,
629 char *values, int32_t valuesCapacity, int32_t *valLen,
630 UBool valuesToo,
631 const char* addKeyword,
632 const char* addValue,
633 UErrorCode *status)
634{
635 KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
636
637 int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
638 int32_t numKeywords = 0;
639 const char* pos = localeID;
640 const char* equalSign = NULL;
641 const char* semicolon = NULL;
642 int32_t i = 0, j, n;
643 int32_t keywordsLen = 0;
644 int32_t valuesLen = 0;
645
646 if(prev == '@') { /* start of keyword definition */
647 /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
648 do {
649 UBool duplicate = FALSE;
650 /* skip leading spaces */
651 while(*pos == ' ') {
652 pos++;
653 }
654 if (!*pos) { /* handle trailing "; " */
655 break;
656 }
657 if(numKeywords == maxKeywords) {
658 *status = U_INTERNAL_PROGRAM_ERROR;
659 return 0;
660 }
661 equalSign = uprv_strchr(pos, '=');
662 semicolon = uprv_strchr(pos, ';');
663 /* lack of '=' [foo@currency] is illegal */
664 /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
665 if(!equalSign || (semicolon && semicolon<equalSign)) {
666 *status = U_INVALID_FORMAT_ERROR;
667 return 0;
668 }
669 /* need to normalize both keyword and keyword name */
670 if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
671 /* keyword name too long for internal buffer */
672 *status = U_INTERNAL_PROGRAM_ERROR;
673 return 0;
674 }
675 for(i = 0, n = 0; i < equalSign - pos; ++i) {
676 if (pos[i] != ' ') {
677 keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
678 }
679 }
680 keywordList[numKeywords].keyword[n] = 0;
681 keywordList[numKeywords].keywordLen = n;
682 /* now grab the value part. First we skip the '=' */
683 equalSign++;
684 /* then we leading spaces */
685 while(*equalSign == ' ') {
686 equalSign++;
687 }
688 keywordList[numKeywords].valueStart = equalSign;
689
690 pos = semicolon;
691 i = 0;
692 if(pos) {
693 while(*(pos - i - 1) == ' ') {
694 i++;
695 }
73c04bcf 696 keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
374ca955
A
697 pos++;
698 } else {
73c04bcf 699 i = (int32_t)uprv_strlen(equalSign);
374ca955
A
700 while(equalSign[i-1] == ' ') {
701 i--;
702 }
703 keywordList[numKeywords].valueLen = i;
704 }
705 /* If this is a duplicate keyword, then ignore it */
706 for (j=0; j<numKeywords; ++j) {
707 if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
708 duplicate = TRUE;
709 break;
710 }
711 }
712 if (!duplicate) {
713 ++numKeywords;
714 }
715 } while(pos);
716
717 /* Handle addKeyword/addValue. */
718 if (addKeyword != NULL) {
719 UBool duplicate = FALSE;
720 U_ASSERT(addValue != NULL);
721 /* Search for duplicate; if found, do nothing. Explicit keyword
722 overrides addKeyword. */
723 for (j=0; j<numKeywords; ++j) {
724 if (uprv_strcmp(keywordList[j].keyword, addKeyword) == 0) {
725 duplicate = TRUE;
726 break;
727 }
728 }
729 if (!duplicate) {
730 if (numKeywords == maxKeywords) {
731 *status = U_INTERNAL_PROGRAM_ERROR;
732 return 0;
733 }
734 uprv_strcpy(keywordList[numKeywords].keyword, addKeyword);
73c04bcf 735 keywordList[numKeywords].keywordLen = (int32_t)uprv_strlen(addKeyword);
374ca955 736 keywordList[numKeywords].valueStart = addValue;
73c04bcf 737 keywordList[numKeywords].valueLen = (int32_t)uprv_strlen(addValue);
374ca955
A
738 ++numKeywords;
739 }
740 } else {
741 U_ASSERT(addValue == NULL);
742 }
743
744 /* now we have a list of keywords */
745 /* we need to sort it */
746 uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
747
748 /* Now construct the keyword part */
749 for(i = 0; i < numKeywords; i++) {
750 if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) {
751 uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword);
752 if(valuesToo) {
753 keywords[keywordsLen + keywordList[i].keywordLen] = '=';
754 } else {
755 keywords[keywordsLen + keywordList[i].keywordLen] = 0;
756 }
757 }
758 keywordsLen += keywordList[i].keywordLen + 1;
759 if(valuesToo) {
760 if(keywordsLen + keywordList[i].valueLen < keywordCapacity) {
761 uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
762 }
763 keywordsLen += keywordList[i].valueLen;
764
765 if(i < numKeywords - 1) {
766 if(keywordsLen < keywordCapacity) {
767 keywords[keywordsLen] = ';';
768 }
769 keywordsLen++;
770 }
771 }
772 if(values) {
773 if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) {
774 uprv_strcpy(values+valuesLen, keywordList[i].valueStart);
775 values[valuesLen + keywordList[i].valueLen] = 0;
776 }
777 valuesLen += keywordList[i].valueLen + 1;
778 }
779 }
780 if(values) {
781 values[valuesLen] = 0;
782 if(valLen) {
783 *valLen = valuesLen;
784 }
785 }
786 return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);
787 } else {
788 return 0;
789 }
790}
791
792U_CFUNC int32_t
793locale_getKeywords(const char *localeID,
794 char prev,
795 char *keywords, int32_t keywordCapacity,
796 char *values, int32_t valuesCapacity, int32_t *valLen,
797 UBool valuesToo,
798 UErrorCode *status) {
799 return _getKeywords(localeID, prev, keywords, keywordCapacity,
800 values, valuesCapacity, valLen, valuesToo,
801 NULL, NULL, status);
802}
803
804U_CAPI int32_t U_EXPORT2
805uloc_getKeywordValue(const char* localeID,
806 const char* keywordName,
807 char* buffer, int32_t bufferCapacity,
808 UErrorCode* status)
809{
810 const char* nextSeparator = NULL;
374ca955
A
811 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
812 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
813 int32_t i = 0;
814 int32_t result = 0;
815
816 if(status && U_SUCCESS(*status) && localeID) {
817
818 const char* startSearchHere = uprv_strchr(localeID, '@'); /* TODO: REVISIT: shouldn't this be locale_getKeywordsStart ? */
819 if(startSearchHere == NULL) {
820 /* no keywords, return at once */
821 return 0;
822 }
823
73c04bcf 824 locale_canonKeywordName(keywordNameBuffer, keywordName, status);
374ca955
A
825 if(U_FAILURE(*status)) {
826 return 0;
827 }
828
829 /* find the first keyword */
830 while(startSearchHere) {
831 startSearchHere++;
832 /* skip leading spaces (allowed?) */
833 while(*startSearchHere == ' ') {
834 startSearchHere++;
835 }
836 nextSeparator = uprv_strchr(startSearchHere, '=');
837 /* need to normalize both keyword and keyword name */
838 if(!nextSeparator) {
839 break;
840 }
841 if(nextSeparator - startSearchHere >= ULOC_KEYWORD_BUFFER_LEN) {
842 /* keyword name too long for internal buffer */
843 *status = U_INTERNAL_PROGRAM_ERROR;
844 return 0;
845 }
846 for(i = 0; i < nextSeparator - startSearchHere; i++) {
847 localeKeywordNameBuffer[i] = uprv_tolower(startSearchHere[i]);
848 }
849 /* trim trailing spaces */
850 while(startSearchHere[i-1] == ' ') {
851 i--;
852 }
853 localeKeywordNameBuffer[i] = 0;
854
855 startSearchHere = uprv_strchr(nextSeparator, ';');
856
857 if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
858 nextSeparator++;
859 while(*nextSeparator == ' ') {
860 nextSeparator++;
861 }
862 /* we actually found the keyword. Copy the value */
863 if(startSearchHere && startSearchHere - nextSeparator < bufferCapacity) {
864 while(*(startSearchHere-1) == ' ') {
865 startSearchHere--;
866 }
867 uprv_strncpy(buffer, nextSeparator, startSearchHere - nextSeparator);
73c04bcf 868 result = u_terminateChars(buffer, bufferCapacity, (int32_t)(startSearchHere - nextSeparator), status);
374ca955 869 } else if(!startSearchHere && (int32_t)uprv_strlen(nextSeparator) < bufferCapacity) { /* last item in string */
73c04bcf 870 i = (int32_t)uprv_strlen(nextSeparator);
374ca955
A
871 while(nextSeparator[i - 1] == ' ') {
872 i--;
873 }
874 uprv_strncpy(buffer, nextSeparator, i);
875 result = u_terminateChars(buffer, bufferCapacity, i, status);
876 } else {
877 /* give a bigger buffer, please */
878 *status = U_BUFFER_OVERFLOW_ERROR;
879 if(startSearchHere) {
73c04bcf 880 result = (int32_t)(startSearchHere - nextSeparator);
374ca955 881 } else {
73c04bcf 882 result = (int32_t)uprv_strlen(nextSeparator);
374ca955
A
883 }
884 }
885 return result;
886 }
887 }
888 }
889 return 0;
890}
891
892U_CAPI int32_t U_EXPORT2
893uloc_setKeywordValue(const char* keywordName,
894 const char* keywordValue,
895 char* buffer, int32_t bufferCapacity,
896 UErrorCode* status)
897{
898 /* TODO: sorting. removal. */
899 int32_t keywordNameLen;
900 int32_t keywordValueLen;
901 int32_t bufLen;
902 int32_t needLen = 0;
903 int32_t foundValueLen;
904 int32_t keywordAtEnd = 0; /* is the keyword at the end of the string? */
905 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
906 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
907 int32_t i = 0;
908 int32_t rc;
909 char* nextSeparator = NULL;
910 char* nextEqualsign = NULL;
911 char* startSearchHere = NULL;
912 char* keywordStart = NULL;
913 char *insertHere = NULL;
914 if(U_FAILURE(*status)) {
915 return -1;
916 }
73c04bcf
A
917 if(bufferCapacity>1) {
918 bufLen = (int32_t)uprv_strlen(buffer);
919 } else {
920 *status = U_ILLEGAL_ARGUMENT_ERROR;
921 return 0;
922 }
923 if(bufferCapacity<bufLen) {
924 /* The capacity is less than the length?! Is this NULL terminated? */
925 *status = U_ILLEGAL_ARGUMENT_ERROR;
926 return 0;
927 }
374ca955
A
928 if(keywordValue && !*keywordValue) {
929 keywordValue = NULL;
930 }
931 if(keywordValue) {
73c04bcf 932 keywordValueLen = (int32_t)uprv_strlen(keywordValue);
374ca955
A
933 } else {
934 keywordValueLen = 0;
935 }
936 keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
937 if(U_FAILURE(*status)) {
938 return 0;
939 }
940 startSearchHere = (char*)locale_getKeywordsStart(buffer);
374ca955
A
941 if(startSearchHere == NULL || (startSearchHere[1]==0)) {
942 if(!keywordValue) { /* no keywords = nothing to remove */
943 return bufLen;
944 }
945
946 needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
947 if(startSearchHere) { /* had a single @ */
948 needLen--; /* already had the @ */
949 /* startSearchHere points at the @ */
950 } else {
951 startSearchHere=buffer+bufLen;
952 }
953 if(needLen >= bufferCapacity) {
954 *status = U_BUFFER_OVERFLOW_ERROR;
955 return needLen; /* no change */
956 }
957 *startSearchHere = '@';
958 startSearchHere++;
959 uprv_strcpy(startSearchHere, keywordNameBuffer);
960 startSearchHere += keywordNameLen;
961 *startSearchHere = '=';
962 startSearchHere++;
963 uprv_strcpy(startSearchHere, keywordValue);
964 startSearchHere+=keywordValueLen;
965 return needLen;
966 } /* end shortcut - no @ */
967
968 keywordStart = startSearchHere;
969 /* search for keyword */
970 while(keywordStart) {
971 keywordStart++;
972 /* skip leading spaces (allowed?) */
973 while(*keywordStart == ' ') {
974 keywordStart++;
975 }
976 nextEqualsign = uprv_strchr(keywordStart, '=');
977 /* need to normalize both keyword and keyword name */
978 if(!nextEqualsign) {
979 break;
980 }
981 if(nextEqualsign - keywordStart >= ULOC_KEYWORD_BUFFER_LEN) {
982 /* keyword name too long for internal buffer */
983 *status = U_INTERNAL_PROGRAM_ERROR;
984 return 0;
985 }
986 for(i = 0; i < nextEqualsign - keywordStart; i++) {
987 localeKeywordNameBuffer[i] = uprv_tolower(keywordStart[i]);
988 }
989 /* trim trailing spaces */
990 while(keywordStart[i-1] == ' ') {
991 i--;
992 }
993 localeKeywordNameBuffer[i] = 0;
994
995 nextSeparator = uprv_strchr(nextEqualsign, ';');
996 rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
997 if(rc == 0) {
998 nextEqualsign++;
999 while(*nextEqualsign == ' ') {
1000 nextEqualsign++;
1001 }
1002 /* we actually found the keyword. Change the value */
1003 if (nextSeparator) {
1004 keywordAtEnd = 0;
73c04bcf 1005 foundValueLen = (int32_t)(nextSeparator - nextEqualsign);
374ca955
A
1006 } else {
1007 keywordAtEnd = 1;
73c04bcf 1008 foundValueLen = (int32_t)uprv_strlen(nextEqualsign);
374ca955
A
1009 }
1010 if(keywordValue) { /* adding a value - not removing */
1011 if(foundValueLen == keywordValueLen) {
1012 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1013 return bufLen; /* no change in size */
1014 } else if(foundValueLen > keywordValueLen) {
1015 int32_t delta = foundValueLen - keywordValueLen;
1016 if(nextSeparator) { /* RH side */
1017 uprv_memmove(nextSeparator - delta, nextSeparator, bufLen-(nextSeparator-buffer));
1018 }
1019 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1020 bufLen -= delta;
1021 buffer[bufLen]=0;
1022 return bufLen;
1023 } else { /* FVL < KVL */
1024 int32_t delta = keywordValueLen - foundValueLen;
1025 if((bufLen+delta) >= bufferCapacity) {
1026 *status = U_BUFFER_OVERFLOW_ERROR;
1027 return bufLen+delta;
1028 }
1029 if(nextSeparator) { /* RH side */
1030 uprv_memmove(nextSeparator+delta,nextSeparator, bufLen-(nextSeparator-buffer));
1031 }
1032 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1033 bufLen += delta;
1034 buffer[bufLen]=0;
1035 return bufLen;
1036 }
1037 } else { /* removing a keyword */
1038 if(keywordAtEnd) {
1039 /* zero out the ';' or '@' just before startSearchhere */
1040 keywordStart[-1] = 0;
73c04bcf 1041 return (int32_t)((keywordStart-buffer)-1); /* (string length without keyword) minus separator */
374ca955
A
1042 } else {
1043 uprv_memmove(keywordStart, nextSeparator+1, bufLen-((nextSeparator+1)-buffer));
1044 keywordStart[bufLen-((nextSeparator+1)-buffer)]=0;
73c04bcf 1045 return (int32_t)(bufLen-((nextSeparator+1)-keywordStart));
374ca955
A
1046 }
1047 }
1048 } else if(rc<0){ /* end match keyword */
1049 /* could insert at this location. */
1050 insertHere = keywordStart;
1051 }
1052 keywordStart = nextSeparator;
1053 } /* end loop searching */
1054
1055 if(!keywordValue) {
1056 return bufLen; /* removal of non-extant keyword - no change */
1057 }
1058
1059 /* we know there is at least one keyword. */
1060 needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
1061 if(needLen >= bufferCapacity) {
1062 *status = U_BUFFER_OVERFLOW_ERROR;
1063 return needLen; /* no change */
1064 }
1065
1066 if(insertHere) {
1067 uprv_memmove(insertHere+(1+keywordNameLen+1+keywordValueLen), insertHere, bufLen-(insertHere-buffer));
1068 keywordStart = insertHere;
1069 } else {
1070 keywordStart = buffer+bufLen;
1071 *keywordStart = ';';
1072 keywordStart++;
1073 }
1074 uprv_strncpy(keywordStart, keywordNameBuffer, keywordNameLen);
1075 keywordStart += keywordNameLen;
1076 *keywordStart = '=';
1077 keywordStart++;
1078 uprv_strncpy(keywordStart, keywordValue, keywordValueLen); /* terminates. */
1079 keywordStart+=keywordValueLen;
1080 if(insertHere) {
1081 *keywordStart = ';';
1082 keywordStart++;
1083 }
1084 buffer[needLen]=0;
1085 return needLen;
1086}
b75a7d8f 1087
374ca955 1088/* ### ID parsing implementation **************************************************/
b75a7d8f
A
1089
1090/*returns TRUE if a is an ID separator FALSE otherwise*/
1091#define _isIDSeparator(a) (a == '_' || a == '-')
1092
1093#define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
374ca955 1094
b75a7d8f
A
1095/*returns TRUE if one of the special prefixes is here (s=string)
1096 'x-' or 'i-' */
1097#define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1098
1099/* Dot terminates it because of POSIX form where dot precedes the codepage
1100 * except for variant
1101 */
1102#define _isTerminator(a) ((a==0)||(a=='.')||(a=='@'))
1103
374ca955
A
1104static char* _strnchr(const char* str, int32_t len, char c) {
1105 U_ASSERT(str != 0 && len >= 0);
1106 while (len-- != 0) {
1107 char d = *str;
1108 if (d == c) {
1109 return (char*) str;
1110 } else if (d == 0) {
1111 break;
1112 }
1113 ++str;
1114 }
1115 return NULL;
1116}
1117
1118/**
1119 * Lookup 'key' in the array 'list'. The array 'list' should contain
1120 * a NULL entry, followed by more entries, and a second NULL entry.
1121 *
1122 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1123 * COUNTRIES_3.
1124 */
b75a7d8f
A
1125static int16_t _findIndex(const char* const* list, const char* key)
1126{
1127 const char* const* anchor = list;
374ca955
A
1128 int32_t pass = 0;
1129
1130 /* Make two passes through two NULL-terminated arrays at 'list' */
1131 while (pass++ < 2) {
1132 while (*list) {
1133 if (uprv_strcmp(key, *list) == 0) {
1134 return (int16_t)(list - anchor);
1135 }
1136 list++;
b75a7d8f 1137 }
374ca955 1138 ++list; /* skip final NULL *CWB*/
b75a7d8f
A
1139 }
1140 return -1;
1141}
1142
1143/* count the length of src while copying it to dest; return strlen(src) */
1144static U_INLINE int32_t
1145_copyCount(char *dest, int32_t destCapacity, const char *src) {
1146 const char *anchor;
1147 char c;
1148
1149 anchor=src;
1150 for(;;) {
1151 if((c=*src)==0) {
1152 return (int32_t)(src-anchor);
1153 }
1154 if(destCapacity<=0) {
1155 return (int32_t)((src-anchor)+uprv_strlen(src));
1156 }
1157 ++src;
1158 *dest++=c;
1159 --destCapacity;
1160 }
1161}
1162
73c04bcf
A
1163static const char*
1164uloc_getCurrentCountryID(const char* oldID){
1165 int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
1166 if (offset >= 0) {
1167 return REPLACEMENT_COUNTRIES[offset];
1168 }
1169 return oldID;
1170}
1171static const char*
1172uloc_getCurrentLanguageID(const char* oldID){
1173 int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
1174 if (offset >= 0) {
1175 return REPLACEMENT_LANGUAGES[offset];
1176 }
1177 return oldID;
1178}
b75a7d8f
A
1179/*
1180 * the internal functions _getLanguage(), _getCountry(), _getVariant()
1181 * avoid duplicating code to handle the earlier locale ID pieces
1182 * in the functions for the later ones by
1183 * setting the *pEnd pointer to where they stopped parsing
1184 *
1185 * TODO try to use this in Locale
1186 */
1187static int32_t
1188_getLanguage(const char *localeID,
1189 char *language, int32_t languageCapacity,
1190 const char **pEnd) {
1191 int32_t i=0;
1192 int32_t offset;
1193 char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
1194
1195 /* if it starts with i- or x- then copy that prefix */
1196 if(_isIDPrefix(localeID)) {
1197 if(i<languageCapacity) {
1198 language[i]=(char)uprv_tolower(*localeID);
1199 }
1200 if(i<languageCapacity) {
1201 language[i+1]='-';
1202 }
1203 i+=2;
1204 localeID+=2;
1205 }
1206
1207 /* copy the language as far as possible and count its length */
1208 while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
1209 if(i<languageCapacity) {
1210 language[i]=(char)uprv_tolower(*localeID);
1211 }
1212 if(i<3) {
1213 lang[i]=(char)uprv_tolower(*localeID);
1214 }
1215 i++;
1216 localeID++;
1217 }
1218
1219 if(i==3) {
1220 /* convert 3 character code to 2 character code if possible *CWB*/
374ca955 1221 offset=_findIndex(LANGUAGES_3, lang);
b75a7d8f 1222 if(offset>=0) {
374ca955 1223 i=_copyCount(language, languageCapacity, LANGUAGES[offset]);
b75a7d8f
A
1224 }
1225 }
1226
1227 if(pEnd!=NULL) {
1228 *pEnd=localeID;
1229 }
1230 return i;
1231}
1232
374ca955
A
1233static int32_t
1234_getScript(const char *localeID,
1235 char *script, int32_t scriptCapacity,
1236 const char **pEnd)
b75a7d8f 1237{
374ca955 1238 int32_t idLen = 0;
b75a7d8f 1239
374ca955
A
1240 if (pEnd != NULL) {
1241 *pEnd = localeID;
b75a7d8f 1242 }
374ca955
A
1243
1244 /* copy the second item as far as possible and count its length */
1245 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
1246 idLen++;
b75a7d8f
A
1247 }
1248
374ca955
A
1249 /* If it's exactly 4 characters long, then it's a script and not a country. */
1250 if (idLen == 4) {
1251 int32_t i;
1252 if (pEnd != NULL) {
1253 *pEnd = localeID+idLen;
1254 }
1255 if(idLen > scriptCapacity) {
1256 idLen = scriptCapacity;
1257 }
1258 if (idLen >= 1) {
1259 script[0]=(char)uprv_toupper(*(localeID++));
1260 }
1261 for (i = 1; i < idLen; i++) {
1262 script[i]=(char)uprv_tolower(*(localeID++));
1263 }
1264 }
1265 else {
1266 idLen = 0;
1267 }
1268 return idLen;
b75a7d8f
A
1269}
1270
1271static int32_t
1272_getCountry(const char *localeID,
1273 char *country, int32_t countryCapacity,
374ca955
A
1274 const char **pEnd)
1275{
b75a7d8f 1276 int32_t i=0;
374ca955 1277 char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 };
b75a7d8f
A
1278 int32_t offset;
1279
1280 /* copy the country as far as possible and count its length */
1281 while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
1282 if(i<countryCapacity) {
1283 country[i]=(char)uprv_toupper(*localeID);
1284 }
374ca955 1285 if(i<(ULOC_COUNTRY_CAPACITY-1)) { /*CWB*/
b75a7d8f
A
1286 cnty[i]=(char)uprv_toupper(*localeID);
1287 }
1288 i++;
1289 localeID++;
1290 }
1291
1292 /* convert 3 character code to 2 character code if possible *CWB*/
1293 if(i==3) {
374ca955 1294 offset=_findIndex(COUNTRIES_3, cnty);
b75a7d8f 1295 if(offset>=0) {
374ca955 1296 i=_copyCount(country, countryCapacity, COUNTRIES[offset]);
b75a7d8f
A
1297 }
1298 }
1299
1300 if(pEnd!=NULL) {
1301 *pEnd=localeID;
1302 }
1303 return i;
1304}
1305
374ca955
A
1306/**
1307 * @param needSeparator if true, then add leading '_' if any variants
1308 * are added to 'variant'
1309 */
1310static int32_t
1311_getVariantEx(const char *localeID,
1312 char prev,
1313 char *variant, int32_t variantCapacity,
1314 UBool needSeparator) {
b75a7d8f
A
1315 int32_t i=0;
1316
1317 /* get one or more variant tags and separate them with '_' */
1318 if(_isIDSeparator(prev)) {
1319 /* get a variant string after a '-' or '_' */
1320 while(!_isTerminator(*localeID)) {
374ca955
A
1321 if (needSeparator) {
1322 if (i<variantCapacity) {
1323 variant[i] = '_';
1324 }
1325 ++i;
1326 needSeparator = FALSE;
1327 }
b75a7d8f
A
1328 if(i<variantCapacity) {
1329 variant[i]=(char)uprv_toupper(*localeID);
1330 if(variant[i]=='-') {
1331 variant[i]='_';
1332 }
1333 }
1334 i++;
1335 localeID++;
1336 }
1337 }
1338
1339 /* if there is no variant tag after a '-' or '_' then look for '@' */
1340 if(i==0) {
1341 if(prev=='@') {
1342 /* keep localeID */
374ca955 1343 } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
b75a7d8f
A
1344 ++localeID; /* point after the '@' */
1345 } else {
1346 return 0;
1347 }
1348 while(!_isTerminator(*localeID)) {
374ca955
A
1349 if (needSeparator) {
1350 if (i<variantCapacity) {
1351 variant[i] = '_';
1352 }
1353 ++i;
1354 needSeparator = FALSE;
1355 }
b75a7d8f
A
1356 if(i<variantCapacity) {
1357 variant[i]=(char)uprv_toupper(*localeID);
1358 if(variant[i]=='-' || variant[i]==',') {
1359 variant[i]='_';
1360 }
1361 }
1362 i++;
1363 localeID++;
1364 }
1365 }
374ca955 1366
b75a7d8f
A
1367 return i;
1368}
1369
374ca955
A
1370static int32_t
1371_getVariant(const char *localeID,
1372 char prev,
1373 char *variant, int32_t variantCapacity) {
1374 return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE);
1375}
1376
1377/**
1378 * Delete ALL instances of a variant from the given list of one or
1379 * more variants. Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".
1380 * @param variants the source string of one or more variants,
1381 * separated by '_'. This will be MODIFIED IN PLACE. Not zero
1382 * terminated; if it is, trailing zero will NOT be maintained.
1383 * @param variantsLen length of variants
1384 * @param toDelete variant to delete, without separators, e.g. "EURO"
1385 * or "PREEURO"; not zero terminated
1386 * @param toDeleteLen length of toDelete
1387 * @return number of characters deleted from variants
1388 */
1389static int32_t
1390_deleteVariant(char* variants, int32_t variantsLen,
46f4442e
A
1391 const char* toDelete, int32_t toDeleteLen)
1392{
374ca955
A
1393 int32_t delta = 0; /* number of chars deleted */
1394 for (;;) {
1395 UBool flag = FALSE;
1396 if (variantsLen < toDeleteLen) {
1397 return delta;
1398 }
1399 if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 &&
1400 (variantsLen == toDeleteLen ||
46f4442e
A
1401 (flag=(variants[toDeleteLen] == '_'))))
1402 {
374ca955
A
1403 int32_t d = toDeleteLen + (flag?1:0);
1404 variantsLen -= d;
1405 delta += d;
46f4442e
A
1406 if (variantsLen > 0) {
1407 uprv_memmove(variants, variants+d, variantsLen);
1408 }
374ca955
A
1409 } else {
1410 char* p = _strnchr(variants, variantsLen, '_');
1411 if (p == NULL) {
1412 return delta;
1413 }
1414 ++p;
73c04bcf 1415 variantsLen -= (int32_t)(p - variants);
374ca955
A
1416 variants = p;
1417 }
1418 }
1419}
1420
1421/* Keyword enumeration */
1422
1423typedef struct UKeywordsContext {
1424 char* keywords;
1425 char* current;
1426} UKeywordsContext;
1427
1428static void U_CALLCONV
1429uloc_kw_closeKeywords(UEnumeration *enumerator) {
1430 uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
1431 uprv_free(enumerator->context);
1432 uprv_free(enumerator);
1433}
1434
1435static int32_t U_CALLCONV
1436uloc_kw_countKeywords(UEnumeration *en, UErrorCode *status) {
1437 char *kw = ((UKeywordsContext *)en->context)->keywords;
1438 int32_t result = 0;
1439 while(*kw) {
1440 result++;
1441 kw += uprv_strlen(kw)+1;
1442 }
1443 return result;
1444}
1445
1446static const char* U_CALLCONV
1447uloc_kw_nextKeyword(UEnumeration* en,
1448 int32_t* resultLength,
1449 UErrorCode* status) {
1450 const char* result = ((UKeywordsContext *)en->context)->current;
1451 int32_t len = 0;
1452 if(*result) {
73c04bcf 1453 len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
374ca955
A
1454 ((UKeywordsContext *)en->context)->current += len+1;
1455 } else {
1456 result = NULL;
1457 }
1458 if (resultLength) {
1459 *resultLength = len;
1460 }
1461 return result;
1462}
1463
1464static void U_CALLCONV
1465uloc_kw_resetKeywords(UEnumeration* en,
1466 UErrorCode* status) {
1467 ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
1468}
1469
1470static const UEnumeration gKeywordsEnum = {
1471 NULL,
1472 NULL,
1473 uloc_kw_closeKeywords,
1474 uloc_kw_countKeywords,
1475 uenum_unextDefault,
1476 uloc_kw_nextKeyword,
1477 uloc_kw_resetKeywords
1478};
1479
1480U_CAPI UEnumeration* U_EXPORT2
1481uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
b75a7d8f 1482{
46f4442e
A
1483 UKeywordsContext *myContext = NULL;
1484 UEnumeration *result = NULL;
b75a7d8f 1485
46f4442e
A
1486 if(U_FAILURE(*status)) {
1487 return NULL;
1488 }
1489 result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
1490 /* Null pointer test */
1491 if (result == NULL) {
1492 *status = U_MEMORY_ALLOCATION_ERROR;
1493 return NULL;
1494 }
1495 uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration));
1496 myContext = uprv_malloc(sizeof(UKeywordsContext));
1497 if (myContext == NULL) {
1498 *status = U_MEMORY_ALLOCATION_ERROR;
1499 uprv_free(result);
1500 return NULL;
1501 }
1502 myContext->keywords = (char *)uprv_malloc(keywordListSize+1);
1503 uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
1504 myContext->keywords[keywordListSize] = 0;
1505 myContext->current = myContext->keywords;
1506 result->context = myContext;
1507 return result;
374ca955
A
1508}
1509
1510U_CAPI UEnumeration* U_EXPORT2
1511uloc_openKeywords(const char* localeID,
1512 UErrorCode* status)
1513{
1514 int32_t i=0;
1515 char keywords[256];
1516 int32_t keywordsCapacity = 256;
1517 if(status==NULL || U_FAILURE(*status)) {
b75a7d8f
A
1518 return 0;
1519 }
1520
1521 if(localeID==NULL) {
1522 localeID=uloc_getDefault();
1523 }
1524
374ca955 1525 /* Skip the language */
b75a7d8f
A
1526 _getLanguage(localeID, NULL, 0, &localeID);
1527 if(_isIDSeparator(*localeID)) {
374ca955
A
1528 const char *scriptID;
1529 /* Skip the script if available */
1530 _getScript(localeID+1, NULL, 0, &scriptID);
1531 if(scriptID != localeID+1) {
1532 /* Found optional script */
1533 localeID = scriptID;
1534 }
1535 /* Skip the Country */
1536 if (_isIDSeparator(*localeID)) {
1537 _getCountry(localeID+1, NULL, 0, &localeID);
1538 if(_isIDSeparator(*localeID)) {
1539 _getVariant(localeID+1, *localeID, NULL, 0);
1540 }
b75a7d8f
A
1541 }
1542 }
1543
374ca955
A
1544 /* keywords are located after '@' */
1545 if((localeID = locale_getKeywordsStart(localeID)) != NULL) {
1546 i=locale_getKeywords(localeID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status);
1547 }
1548
1549 if(i) {
1550 return uloc_openKeywordList(keywords, i, status);
1551 } else {
1552 return NULL;
b75a7d8f 1553 }
b75a7d8f
A
1554}
1555
b75a7d8f 1556
374ca955
A
1557/* bit-flags for 'options' parameter of _canonicalize */
1558#define _ULOC_STRIP_KEYWORDS 0x2
1559#define _ULOC_CANONICALIZE 0x1
1560
1561#define OPTION_SET(options, mask) ((options & mask) != 0)
1562
73c04bcf
A
1563static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
1564#define I_DEFAULT_LENGTH (sizeof i_default / sizeof i_default[0])
1565
374ca955
A
1566/**
1567 * Canonicalize the given localeID, to level 1 or to level 2,
1568 * depending on the options. To specify level 1, pass in options=0.
1569 * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1570 *
1571 * This is the code underlying uloc_getName and uloc_canonicalize.
1572 */
1573static int32_t
1574_canonicalize(const char* localeID,
1575 char* result,
1576 int32_t resultCapacity,
1577 uint32_t options,
1578 UErrorCode* err) {
1579 int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity;
1580 char localeBuffer[ULOC_FULLNAME_CAPACITY];
46f4442e 1581 const char* origLocaleID;
374ca955
A
1582 const char* keywordAssign = NULL;
1583 const char* separatorIndicator = NULL;
1584 const char* addKeyword = NULL;
1585 const char* addValue = NULL;
1586 char* name;
1587 char* variant = NULL; /* pointer into name, or NULL */
374ca955
A
1588
1589 if (U_FAILURE(*err)) {
b75a7d8f
A
1590 return 0;
1591 }
1592
374ca955 1593 if (localeID==NULL) {
b75a7d8f
A
1594 localeID=uloc_getDefault();
1595 }
46f4442e 1596 origLocaleID=localeID;
b75a7d8f 1597
374ca955
A
1598 /* if we are doing a full canonicalization, then put results in
1599 localeBuffer, if necessary; otherwise send them to result. */
1600 if (OPTION_SET(options, _ULOC_CANONICALIZE) &&
1601 (result == NULL || resultCapacity < sizeof(localeBuffer))) {
1602 name = localeBuffer;
1603 nameCapacity = sizeof(localeBuffer);
1604 } else {
1605 name = result;
1606 nameCapacity = resultCapacity;
1607 }
1608
b75a7d8f 1609 /* get all pieces, one after another, and separate with '_' */
374ca955 1610 len=_getLanguage(localeID, name, nameCapacity, &localeID);
73c04bcf
A
1611
1612 if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) {
1613 const char *d = uloc_getDefault();
1614
1615 len = uprv_strlen(d);
1616
1617 if (name != NULL) {
1618 uprv_strncpy(name, d, len);
1619 }
1620 } else if(_isIDSeparator(*localeID)) {
374ca955
A
1621 const char *scriptID;
1622
b75a7d8f 1623 ++fieldCount;
374ca955
A
1624 if(len<nameCapacity) {
1625 name[len]='_';
b75a7d8f 1626 }
374ca955
A
1627 ++len;
1628
1629 scriptSize=_getScript(localeID+1, name+len, nameCapacity-len, &scriptID);
1630 if(scriptSize > 0) {
1631 /* Found optional script */
1632 localeID = scriptID;
b75a7d8f 1633 ++fieldCount;
374ca955
A
1634 len+=scriptSize;
1635 if (_isIDSeparator(*localeID)) {
1636 /* If there is something else, then we add the _ */
1637 if(len<nameCapacity) {
1638 name[len]='_';
1639 }
1640 ++len;
1641 }
1642 }
1643
1644 if (_isIDSeparator(*localeID)) {
1645 len+=_getCountry(localeID+1, name+len, nameCapacity-len, &localeID);
1646 if(_isIDSeparator(*localeID)) {
1647 ++fieldCount;
1648 if(len<nameCapacity) {
1649 name[len]='_';
1650 }
1651 ++len;
1652 variantSize = _getVariant(localeID+1, *localeID, name+len, nameCapacity-len);
1653 if (variantSize > 0) {
1654 variant = name+len;
1655 len += variantSize;
1656 localeID += variantSize + 1; /* skip '_' and variant */
1657 }
b75a7d8f 1658 }
b75a7d8f
A
1659 }
1660 }
1661
374ca955
A
1662 /* Copy POSIX-style charset specifier, if any [mr.utf8] */
1663 if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *localeID == '.') {
1664 UBool done = FALSE;
b75a7d8f 1665 do {
374ca955
A
1666 char c = *localeID;
1667 switch (c) {
1668 case 0:
1669 case '@':
1670 done = TRUE;
1671 break;
1672 default:
1673 if (len<nameCapacity) {
1674 name[len] = c;
1675 }
1676 ++len;
1677 ++localeID;
1678 break;
1679 }
1680 } while (!done);
1681 }
1682
1683 /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
1684 After this, localeID either points to '@' or is NULL */
1685 if ((localeID=locale_getKeywordsStart(localeID))!=NULL) {
1686 keywordAssign = uprv_strchr(localeID, '=');
1687 separatorIndicator = uprv_strchr(localeID, ';');
1688 }
1689
1690 /* Copy POSIX-style variant, if any [mr@FOO] */
1691 if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
1692 localeID != NULL && keywordAssign == NULL) {
1693 for (;;) {
1694 char c = *localeID;
1695 if (c == 0) {
1696 break;
1697 }
1698 if (len<nameCapacity) {
1699 name[len] = c;
1700 }
1701 ++len;
1702 ++localeID;
1703 }
1704 }
1705
1706 if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
1707 /* Handle @FOO variant if @ is present and not followed by = */
1708 if (localeID!=NULL && keywordAssign==NULL) {
1709 int32_t posixVariantSize;
1710 /* Add missing '_' if needed */
1711 if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
1712 do {
1713 if(len<nameCapacity) {
1714 name[len]='_';
1715 }
1716 ++len;
1717 ++fieldCount;
1718 } while(fieldCount<2);
1719 }
1720 posixVariantSize = _getVariantEx(localeID+1, '@', name+len, nameCapacity-len,
1721 (UBool)(variantSize > 0));
1722 if (posixVariantSize > 0) {
1723 if (variant == NULL) {
1724 variant = name+len;
1725 }
1726 len += posixVariantSize;
1727 variantSize += posixVariantSize;
b75a7d8f 1728 }
374ca955
A
1729 }
1730
46f4442e
A
1731 /* Handle generic variants first */
1732 if (variant) {
1733 for (j=0; j<(int32_t)(sizeof(VARIANT_MAP)/sizeof(VARIANT_MAP[0])); j++) {
1734 const char* variantToCompare = VARIANT_MAP[j].variant;
1735 int32_t n = (int32_t)uprv_strlen(variantToCompare);
1736 int32_t variantLen = _deleteVariant(variant, uprv_min(variantSize, (nameCapacity-len)), variantToCompare, n);
1737 len -= variantLen;
1738 if (variantLen > 0) {
1739 if (name[len-1] == '_') { /* delete trailing '_' */
1740 --len;
1741 }
1742 addKeyword = VARIANT_MAP[j].keyword;
1743 addValue = VARIANT_MAP[j].value;
1744 break;
1745 }
1746 }
1747 if (name[len-1] == '_') { /* delete trailing '_' */
1748 --len;
1749 }
374ca955
A
1750 }
1751
1752 /* Look up the ID in the canonicalization map */
1753 for (j=0; j<(int32_t)(sizeof(CANONICALIZE_MAP)/sizeof(CANONICALIZE_MAP[0])); j++) {
1754 const char* id = CANONICALIZE_MAP[j].id;
73c04bcf 1755 int32_t n = (int32_t)uprv_strlen(id);
374ca955
A
1756 if (len == n && uprv_strncmp(name, id, n) == 0) {
1757 if (n == 0 && localeID != NULL) {
1758 break; /* Don't remap "" if keywords present */
1759 }
1760 len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID);
46f4442e
A
1761 if (CANONICALIZE_MAP[j].keyword) {
1762 addKeyword = CANONICALIZE_MAP[j].keyword;
1763 addValue = CANONICALIZE_MAP[j].value;
1764 }
374ca955
A
1765 break;
1766 }
1767 }
374ca955
A
1768 }
1769
1770 if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
1771 if (localeID!=NULL && keywordAssign!=NULL &&
1772 (!separatorIndicator || separatorIndicator > keywordAssign)) {
1773 if(len<nameCapacity) {
1774 name[len]='@';
1775 }
1776 ++len;
b75a7d8f 1777 ++fieldCount;
374ca955
A
1778 len += _getKeywords(localeID+1, '@', name+len, nameCapacity-len, NULL, 0, NULL, TRUE,
1779 addKeyword, addValue, err);
1780 } else if (addKeyword != NULL) {
1781 U_ASSERT(addValue != NULL);
1782 /* inelegant but works -- later make _getKeywords do this? */
1783 len += _copyCount(name+len, nameCapacity-len, "@");
1784 len += _copyCount(name+len, nameCapacity-len, addKeyword);
1785 len += _copyCount(name+len, nameCapacity-len, "=");
1786 len += _copyCount(name+len, nameCapacity-len, addValue);
1787 }
1788 }
1789
46f4442e 1790 if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) {
374ca955
A
1791 uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);
1792 }
1793
1794 return u_terminateChars(result, resultCapacity, len, err);
1795}
1796
1797/* ### ID parsing API **************************************************/
1798
1799U_CAPI int32_t U_EXPORT2
1800uloc_getParent(const char* localeID,
1801 char* parent,
1802 int32_t parentCapacity,
1803 UErrorCode* err)
1804{
1805 const char *lastUnderscore;
1806 int32_t i;
1807
1808 if (U_FAILURE(*err))
1809 return 0;
1810
1811 if (localeID == NULL)
1812 localeID = uloc_getDefault();
1813
1814 lastUnderscore=uprv_strrchr(localeID, '_');
1815 if(lastUnderscore!=NULL) {
1816 i=(int32_t)(lastUnderscore-localeID);
1817 } else {
1818 i=0;
b75a7d8f 1819 }
374ca955 1820
73c04bcf 1821 if(i>0 && parent != localeID) {
374ca955
A
1822 uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
1823 }
1824 return u_terminateChars(parent, parentCapacity, i, err);
b75a7d8f 1825}
374ca955
A
1826
1827U_CAPI int32_t U_EXPORT2
1828uloc_getLanguage(const char* localeID,
1829 char* language,
1830 int32_t languageCapacity,
1831 UErrorCode* err)
1832{
1833 /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
1834 int32_t i=0;
1835
1836 if (err==NULL || U_FAILURE(*err)) {
1837 return 0;
1838 }
1839
1840 if(localeID==NULL) {
1841 localeID=uloc_getDefault();
1842 }
1843
1844 i=_getLanguage(localeID, language, languageCapacity, NULL);
1845 return u_terminateChars(language, languageCapacity, i, err);
1846}
1847
1848U_CAPI int32_t U_EXPORT2
1849uloc_getScript(const char* localeID,
1850 char* script,
1851 int32_t scriptCapacity,
1852 UErrorCode* err)
1853{
1854 int32_t i=0;
1855
1856 if(err==NULL || U_FAILURE(*err)) {
1857 return 0;
1858 }
1859
1860 if(localeID==NULL) {
1861 localeID=uloc_getDefault();
1862 }
1863
1864 /* skip the language */
1865 _getLanguage(localeID, NULL, 0, &localeID);
1866 if(_isIDSeparator(*localeID)) {
1867 i=_getScript(localeID+1, script, scriptCapacity, NULL);
1868 }
1869 return u_terminateChars(script, scriptCapacity, i, err);
1870}
1871
1872U_CAPI int32_t U_EXPORT2
1873uloc_getCountry(const char* localeID,
1874 char* country,
1875 int32_t countryCapacity,
1876 UErrorCode* err)
1877{
1878 int32_t i=0;
1879
1880 if(err==NULL || U_FAILURE(*err)) {
1881 return 0;
1882 }
1883
1884 if(localeID==NULL) {
1885 localeID=uloc_getDefault();
1886 }
1887
1888 /* Skip the language */
1889 _getLanguage(localeID, NULL, 0, &localeID);
1890 if(_isIDSeparator(*localeID)) {
1891 const char *scriptID;
1892 /* Skip the script if available */
1893 _getScript(localeID+1, NULL, 0, &scriptID);
1894 if(scriptID != localeID+1) {
1895 /* Found optional script */
1896 localeID = scriptID;
1897 }
1898 if(_isIDSeparator(*localeID)) {
1899 i=_getCountry(localeID+1, country, countryCapacity, NULL);
1900 }
1901 }
1902 return u_terminateChars(country, countryCapacity, i, err);
1903}
1904
1905U_CAPI int32_t U_EXPORT2
1906uloc_getVariant(const char* localeID,
1907 char* variant,
1908 int32_t variantCapacity,
1909 UErrorCode* err)
1910{
1911 int32_t i=0;
374ca955
A
1912
1913 if(err==NULL || U_FAILURE(*err)) {
1914 return 0;
1915 }
1916
1917 if(localeID==NULL) {
1918 localeID=uloc_getDefault();
1919 }
1920
1921 /* Skip the language */
1922 _getLanguage(localeID, NULL, 0, &localeID);
1923 if(_isIDSeparator(*localeID)) {
1924 const char *scriptID;
1925 /* Skip the script if available */
1926 _getScript(localeID+1, NULL, 0, &scriptID);
1927 if(scriptID != localeID+1) {
1928 /* Found optional script */
1929 localeID = scriptID;
1930 }
1931 /* Skip the Country */
1932 if (_isIDSeparator(*localeID)) {
1933 _getCountry(localeID+1, NULL, 0, &localeID);
1934 if(_isIDSeparator(*localeID)) {
374ca955
A
1935 i=_getVariant(localeID+1, *localeID, variant, variantCapacity);
1936 }
1937 }
1938 }
1939
1940 /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
1941 /* if we do not have a variant tag yet then try a POSIX variant after '@' */
1942/*
1943 if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
1944 i=_getVariant(localeID+1, '@', variant, variantCapacity);
1945 }
1946*/
1947 return u_terminateChars(variant, variantCapacity, i, err);
1948}
1949
1950U_CAPI int32_t U_EXPORT2
1951uloc_getName(const char* localeID,
1952 char* name,
1953 int32_t nameCapacity,
1954 UErrorCode* err)
1955{
1956 return _canonicalize(localeID, name, nameCapacity, 0, err);
1957}
1958
1959U_CAPI int32_t U_EXPORT2
1960uloc_getBaseName(const char* localeID,
1961 char* name,
1962 int32_t nameCapacity,
1963 UErrorCode* err)
1964{
1965 return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err);
1966}
1967
1968U_CAPI int32_t U_EXPORT2
1969uloc_canonicalize(const char* localeID,
1970 char* name,
1971 int32_t nameCapacity,
1972 UErrorCode* err)
1973{
1974 return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err);
1975}
1976
b75a7d8f
A
1977U_CAPI const char* U_EXPORT2
1978uloc_getISO3Language(const char* localeID)
1979{
374ca955
A
1980 int16_t offset;
1981 char lang[ULOC_LANG_CAPACITY];
1982 UErrorCode err = U_ZERO_ERROR;
1983
1984 if (localeID == NULL)
1985 {
1986 localeID = uloc_getDefault();
1987 }
1988 uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
1989 if (U_FAILURE(err))
1990 return "";
1991 offset = _findIndex(LANGUAGES, lang);
1992 if (offset < 0)
1993 return "";
1994 return LANGUAGES_3[offset];
b75a7d8f
A
1995}
1996
1997U_CAPI const char* U_EXPORT2
1998uloc_getISO3Country(const char* localeID)
1999{
2000 int16_t offset;
374ca955 2001 char cntry[ULOC_LANG_CAPACITY];
b75a7d8f
A
2002 UErrorCode err = U_ZERO_ERROR;
2003
2004 if (localeID == NULL)
2005 {
2006 localeID = uloc_getDefault();
2007 }
374ca955 2008 uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
b75a7d8f
A
2009 if (U_FAILURE(err))
2010 return "";
374ca955 2011 offset = _findIndex(COUNTRIES, cntry);
b75a7d8f
A
2012 if (offset < 0)
2013 return "";
2014
374ca955 2015 return COUNTRIES_3[offset];
b75a7d8f
A
2016}
2017
2018U_CAPI uint32_t U_EXPORT2
2019uloc_getLCID(const char* localeID)
2020{
374ca955
A
2021 UErrorCode status = U_ZERO_ERROR;
2022 char langID[ULOC_FULLNAME_CAPACITY];
2023
2024 uloc_getLanguage(localeID, langID, sizeof(langID), &status);
2025 if (U_FAILURE(status)) {
2026 return 0;
b75a7d8f 2027 }
374ca955
A
2028
2029 return uprv_convertToLCID(langID, localeID, &status);
2030}
2031
73c04bcf
A
2032U_CAPI int32_t U_EXPORT2
2033uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
2034 UErrorCode *status)
2035{
2036 int32_t length;
2037 const char *posix = uprv_convertToPosix(hostid, status);
2038 if (U_FAILURE(*status) || posix == NULL) {
2039 return 0;
2040 }
2041 length = (int32_t)uprv_strlen(posix);
2042 if (length+1 > localeCapacity) {
2043 *status = U_BUFFER_OVERFLOW_ERROR;
2044 }
2045 else {
2046 uprv_strcpy(locale, posix);
2047 }
2048 return length;
2049}
2050
374ca955
A
2051/* ### Default locale **************************************************/
2052
2053U_CAPI const char* U_EXPORT2
2054uloc_getDefault()
2055{
2056 return locale_get_default();
2057}
2058
2059U_CAPI void U_EXPORT2
2060uloc_setDefault(const char* newDefaultLocale,
2061 UErrorCode* err)
2062{
2063 if (U_FAILURE(*err))
2064 return;
2065 /* the error code isn't currently used for anything by this function*/
b75a7d8f 2066
374ca955
A
2067 /* propagate change to C++ */
2068 locale_set_default(newDefaultLocale);
b75a7d8f
A
2069}
2070
374ca955
A
2071/* ### Display name **************************************************/
2072
b75a7d8f
A
2073/*
2074 * Lookup a resource bundle table item with fallback on the table level.
2075 * Regular resource bundle lookups perform fallback to parent locale bundles
2076 * and eventually the root bundle, but only for top-level items.
2077 * This function takes the name of a top-level table and of an item in that table
2078 * and performs a lookup of both, falling back until a bundle contains a table
2079 * with this item.
2080 *
2081 * Note: Only the opening of entire bundles falls back through the default locale
2082 * before root. Once a bundle is open, item lookups do not go through the
2083 * default locale because that would result in a mix of languages that is
2084 * unpredictable to the programmer and most likely useless.
2085 */
2086static const UChar *
2087_res_getTableStringWithFallback(const char *path, const char *locale,
374ca955
A
2088 const char *tableKey, const char *subTableKey,
2089 const char *itemKey,
b75a7d8f
A
2090 int32_t *pLength,
2091 UErrorCode *pErrorCode)
2092{
73c04bcf
A
2093/* char localeBuffer[ULOC_FULLNAME_CAPACITY*4];*/
2094 UResourceBundle *rb=NULL, table, subTable;
2095 const UChar *item=NULL;
b75a7d8f 2096 UErrorCode errorCode;
374ca955 2097 char explicitFallbackName[ULOC_FULLNAME_CAPACITY] = {0};
73c04bcf
A
2098
2099 /*
2100 * open the bundle for the current locale
2101 * this falls back through the locale's chain to root
2102 */
2103 errorCode=U_ZERO_ERROR;
2104 rb=ures_open(path, locale, &errorCode);
2105 if(U_FAILURE(errorCode)) {
2106 /* total failure, not even root could be opened */
2107 *pErrorCode=errorCode;
2108 return NULL;
2109 } else if(errorCode==U_USING_DEFAULT_WARNING ||
2110 (errorCode==U_USING_FALLBACK_WARNING && *pErrorCode!=U_USING_DEFAULT_WARNING)
2111 ) {
2112 /* set the "strongest" error code (success->fallback->default->failure) */
2113 *pErrorCode=errorCode;
2114 }
2115
2116 for(;;){
b75a7d8f 2117 ures_initStackObject(&table);
73c04bcf
A
2118 ures_initStackObject(&subTable);
2119 ures_getByKeyWithFallback(rb, tableKey, &table, &errorCode);
2120 if (subTableKey != NULL) {
2121 /*
2122 ures_getByKeyWithFallback(&table,subTableKey, &subTable, &errorCode);
2123 item = ures_getStringByKeyWithFallback(&subTable, itemKey, pLength, &errorCode);
2124 if(U_FAILURE(errorCode)){
2125 *pErrorCode = errorCode;
2126 }
2127
2128 break;*/
2129
2130 ures_getByKeyWithFallback(&table,subTableKey, &table, &errorCode);
b75a7d8f 2131 }
374ca955 2132 if(U_SUCCESS(errorCode)){
73c04bcf
A
2133 item = ures_getStringByKeyWithFallback(&table, itemKey, pLength, &errorCode);
2134 if(U_FAILURE(errorCode)){
2135 const char* replacement = NULL;
2136 *pErrorCode = errorCode; /*save the errorCode*/
2137 errorCode = U_ZERO_ERROR;
2138 /* may be a deprecated code */
2139 if(uprv_strcmp(tableKey, "Countries")==0){
2140 replacement = uloc_getCurrentCountryID(itemKey);
2141 }else if(uprv_strcmp(tableKey, "Languages")==0){
2142 replacement = uloc_getCurrentLanguageID(itemKey);
374ca955 2143 }
73c04bcf
A
2144 /*pointer comparison is ok since uloc_getCurrentCountryID & uloc_getCurrentLanguageID return the key itself is replacement is not found*/
2145 if(replacement!=NULL && itemKey != replacement){
2146 item = ures_getStringByKeyWithFallback(&table, replacement, pLength, &errorCode);
2147 if(U_SUCCESS(errorCode)){
2148 *pErrorCode = errorCode;
2149 break;
2150 }
2151 }
2152 }else{
2153 break;
b75a7d8f 2154 }
b75a7d8f 2155 }
73c04bcf
A
2156
2157 if(U_FAILURE(errorCode)){
b75a7d8f 2158
73c04bcf
A
2159 /* still can't figure out ?.. try the fallback mechanism */
2160 int32_t len = 0;
2161 const UChar* fallbackLocale = NULL;
2162 *pErrorCode = errorCode;
2163 errorCode = U_ZERO_ERROR;
b75a7d8f 2164
73c04bcf
A
2165 fallbackLocale = ures_getStringByKeyWithFallback(&table, "Fallback", &len, &errorCode);
2166 if(U_FAILURE(errorCode)){
2167 *pErrorCode = errorCode;
2168 break;
2169 }
2170
2171 u_UCharsToChars(fallbackLocale, explicitFallbackName, len);
2172
2173 /* guard against recursive fallback */
2174 if(uprv_strcmp(explicitFallbackName, locale)==0){
2175 *pErrorCode = U_INTERNAL_PROGRAM_ERROR;
2176 break;
2177 }
b75a7d8f 2178 ures_close(rb);
73c04bcf
A
2179 rb = ures_open(NULL, explicitFallbackName, &errorCode);
2180 if(U_FAILURE(errorCode)){
2181 *pErrorCode = errorCode;
2182 break;
374ca955 2183 }
73c04bcf
A
2184 /* succeeded in opening the fallback bundle .. continue and try to fetch the item */
2185 }else{
2186 break;
374ca955 2187 }
b75a7d8f 2188 }
73c04bcf
A
2189 /* done with the locale string - ready to close table and rb */
2190 ures_close(&subTable);
2191 ures_close(&table);
2192 ures_close(rb);
2193 return item;
b75a7d8f
A
2194}
2195
2196static int32_t
2197_getStringOrCopyKey(const char *path, const char *locale,
374ca955
A
2198 const char *tableKey,
2199 const char* subTableKey,
2200 const char *itemKey,
b75a7d8f
A
2201 const char *substitute,
2202 UChar *dest, int32_t destCapacity,
2203 UErrorCode *pErrorCode) {
374ca955 2204 const UChar *s = NULL;
73c04bcf 2205 int32_t length = 0;
b75a7d8f
A
2206
2207 if(itemKey==NULL) {
2208 /* top-level item: normal resource bundle access */
2209 UResourceBundle *rb;
2210
2211 rb=ures_open(path, locale, pErrorCode);
2212 if(U_SUCCESS(*pErrorCode)) {
2213 s=ures_getStringByKey(rb, tableKey, &length, pErrorCode);
2214 /* see comment about closing rb near "return item;" in _res_getTableStringWithFallback() */
2215 ures_close(rb);
2216 }
2217 } else {
46f4442e
A
2218 /* Language code should not be a number. If it is, set the error code. */
2219 if (!uprv_strncmp(tableKey, "Languages", 9) && uprv_strtol(itemKey, NULL, 10)) {
2220 *pErrorCode = U_MISSING_RESOURCE_ERROR;
2221 } else {
2222 /* second-level item, use special fallback */
2223 s=_res_getTableStringWithFallback(path, locale,
2224 tableKey,
2225 subTableKey,
2226 itemKey,
2227 &length,
2228 pErrorCode);
2229 }
b75a7d8f
A
2230 }
2231 if(U_SUCCESS(*pErrorCode)) {
2232 int32_t copyLength=uprv_min(length, destCapacity);
374ca955 2233 if(copyLength>0 && s != NULL) {
b75a7d8f
A
2234 u_memcpy(dest, s, copyLength);
2235 }
2236 } else {
2237 /* no string from a resource bundle: convert the substitute */
2238 length=(int32_t)uprv_strlen(substitute);
2239 u_charsToUChars(substitute, dest, uprv_min(length, destCapacity));
374ca955
A
2240 *pErrorCode=U_USING_DEFAULT_WARNING;
2241 }
2242
2243 return u_terminateUChars(dest, destCapacity, length, pErrorCode);
2244}
2245
2246static int32_t
2247_getDisplayNameForComponent(const char *locale,
2248 const char *displayLocale,
2249 UChar *dest, int32_t destCapacity,
2250 int32_t (*getter)(const char *, char *, int32_t, UErrorCode *),
2251 const char *tag,
2252 UErrorCode *pErrorCode) {
2253 char localeBuffer[ULOC_FULLNAME_CAPACITY*4];
b75a7d8f 2254 int32_t length;
374ca955 2255 UErrorCode localStatus;
b75a7d8f
A
2256
2257 /* argument checking */
2258 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
2259 return 0;
2260 }
2261
2262 if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
2263 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2264 return 0;
2265 }
2266
374ca955
A
2267 localStatus = U_ZERO_ERROR;
2268 length=(*getter)(locale, localeBuffer, sizeof(localeBuffer), &localStatus);
2269 if(U_FAILURE(localStatus) || localStatus==U_STRING_NOT_TERMINATED_WARNING) {
b75a7d8f
A
2270 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2271 return 0;
2272 }
2273 if(length==0) {
2274 return u_terminateUChars(dest, destCapacity, 0, pErrorCode);
2275 }
2276
b75a7d8f 2277 return _getStringOrCopyKey(NULL, displayLocale,
374ca955
A
2278 tag, NULL, localeBuffer,
2279 localeBuffer,
b75a7d8f
A
2280 dest, destCapacity,
2281 pErrorCode);
2282}
2283
374ca955
A
2284U_CAPI int32_t U_EXPORT2
2285uloc_getDisplayLanguage(const char *locale,
2286 const char *displayLocale,
2287 UChar *dest, int32_t destCapacity,
2288 UErrorCode *pErrorCode) {
2289 return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
2290 uloc_getLanguage, _kLanguages, pErrorCode);
2291}
2292
2293U_CAPI int32_t U_EXPORT2
2294uloc_getDisplayScript(const char* locale,
2295 const char* displayLocale,
2296 UChar *dest, int32_t destCapacity,
2297 UErrorCode *pErrorCode)
2298{
2299 return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
2300 uloc_getScript, _kScripts, pErrorCode);
2301}
2302
2303U_CAPI int32_t U_EXPORT2
2304uloc_getDisplayCountry(const char *locale,
2305 const char *displayLocale,
2306 UChar *dest, int32_t destCapacity,
2307 UErrorCode *pErrorCode) {
2308 return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
2309 uloc_getCountry, _kCountries, pErrorCode);
2310}
2311
2312/*
2313 * TODO separate variant1_variant2_variant3...
2314 * by getting each tag's display string and concatenating them with ", "
2315 * in between - similar to uloc_getDisplayName()
2316 */
2317U_CAPI int32_t U_EXPORT2
2318uloc_getDisplayVariant(const char *locale,
2319 const char *displayLocale,
2320 UChar *dest, int32_t destCapacity,
2321 UErrorCode *pErrorCode) {
2322 return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
2323 uloc_getVariant, _kVariants, pErrorCode);
2324}
2325
b75a7d8f
A
2326U_CAPI int32_t U_EXPORT2
2327uloc_getDisplayName(const char *locale,
2328 const char *displayLocale,
2329 UChar *dest, int32_t destCapacity,
374ca955
A
2330 UErrorCode *pErrorCode)
2331{
2332 int32_t length, length2, length3 = 0;
2333 UBool hasLanguage, hasScript, hasCountry, hasVariant, hasKeywords;
2334 UEnumeration* keywordEnum = NULL;
2335 int32_t keywordCount = 0;
2336 const char *keyword = NULL;
2337 int32_t keywordLen = 0;
2338 char keywordValue[256];
2339 int32_t keywordValueLen = 0;
b75a7d8f
A
2340
2341 /* argument checking */
2342 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
2343 return 0;
2344 }
2345
2346 if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
2347 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2348 return 0;
2349 }
2350
2351 /*
2352 * if there is a language, then write "language (country, variant)"
2353 * otherwise write "country, variant"
2354 */
2355
2356 /* write the language */
2357 length=uloc_getDisplayLanguage(locale, displayLocale,
2358 dest, destCapacity,
2359 pErrorCode);
2360 hasLanguage= length>0;
2361
2362 if(hasLanguage) {
2363 /* append " (" */
2364 if(length<destCapacity) {
2365 dest[length]=0x20;
2366 }
2367 ++length;
2368 if(length<destCapacity) {
2369 dest[length]=0x28;
2370 }
2371 ++length;
2372 }
2373
2374 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2375 /* keep preflighting */
2376 *pErrorCode=U_ZERO_ERROR;
2377 }
2378
374ca955
A
2379 /* append the script */
2380 if(length<destCapacity) {
2381 length2=uloc_getDisplayScript(locale, displayLocale,
2382 dest+length, destCapacity-length,
2383 pErrorCode);
2384 } else {
2385 length2=uloc_getDisplayScript(locale, displayLocale,
2386 NULL, 0,
2387 pErrorCode);
2388 }
2389 hasScript= length2>0;
2390 length+=length2;
2391
2392 if(hasScript) {
2393 /* append ", " */
2394 if(length<destCapacity) {
2395 dest[length]=0x2c;
2396 }
2397 ++length;
2398 if(length<destCapacity) {
2399 dest[length]=0x20;
2400 }
2401 ++length;
2402 }
2403
2404 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2405 /* keep preflighting */
2406 *pErrorCode=U_ZERO_ERROR;
2407 }
2408
b75a7d8f
A
2409 /* append the country */
2410 if(length<destCapacity) {
2411 length2=uloc_getDisplayCountry(locale, displayLocale,
2412 dest+length, destCapacity-length,
2413 pErrorCode);
2414 } else {
2415 length2=uloc_getDisplayCountry(locale, displayLocale,
2416 NULL, 0,
2417 pErrorCode);
2418 }
2419 hasCountry= length2>0;
2420 length+=length2;
2421
2422 if(hasCountry) {
2423 /* append ", " */
2424 if(length<destCapacity) {
2425 dest[length]=0x2c;
2426 }
2427 ++length;
2428 if(length<destCapacity) {
2429 dest[length]=0x20;
2430 }
2431 ++length;
2432 }
2433
2434 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2435 /* keep preflighting */
2436 *pErrorCode=U_ZERO_ERROR;
2437 }
2438
2439 /* append the variant */
2440 if(length<destCapacity) {
2441 length2=uloc_getDisplayVariant(locale, displayLocale,
2442 dest+length, destCapacity-length,
2443 pErrorCode);
2444 } else {
2445 length2=uloc_getDisplayVariant(locale, displayLocale,
2446 NULL, 0,
2447 pErrorCode);
2448 }
2449 hasVariant= length2>0;
2450 length+=length2;
2451
374ca955
A
2452 if(hasVariant) {
2453 /* append ", " */
2454 if(length<destCapacity) {
2455 dest[length]=0x2c;
2456 }
2457 ++length;
2458 if(length<destCapacity) {
2459 dest[length]=0x20;
2460 }
2461 ++length;
b75a7d8f
A
2462 }
2463
374ca955
A
2464 keywordEnum = uloc_openKeywords(locale, pErrorCode);
2465
2466 for(keywordCount = uenum_count(keywordEnum, pErrorCode); keywordCount > 0 ; keywordCount--){
2467 if(U_FAILURE(*pErrorCode)){
2468 break;
2469 }
2470 /* the uenum_next returns NUL terminated string */
2471 keyword = uenum_next(keywordEnum, &keywordLen, pErrorCode);
2472 if(length + length3 < destCapacity) {
2473 length3 += uloc_getDisplayKeyword(keyword, displayLocale, dest+length+length3, destCapacity-length-length3, pErrorCode);
2474 } else {
2475 length3 += uloc_getDisplayKeyword(keyword, displayLocale, NULL, 0, pErrorCode);
2476 }
2477 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2478 /* keep preflighting */
2479 *pErrorCode=U_ZERO_ERROR;
2480 }
2481 keywordValueLen = uloc_getKeywordValue(locale, keyword, keywordValue, 256, pErrorCode);
2482 if(keywordValueLen) {
2483 if(length + length3 < destCapacity) {
2484 dest[length + length3] = 0x3D;
b75a7d8f 2485 }
374ca955
A
2486 length3++;
2487 if(length + length3 < destCapacity) {
2488 length3 += uloc_getDisplayKeywordValue(locale, keyword, displayLocale, dest+length+length3, destCapacity-length-length3, pErrorCode);
2489 } else {
2490 length3 += uloc_getDisplayKeywordValue(locale, keyword, displayLocale, NULL, 0, pErrorCode);
2491 }
2492 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2493 /* keep preflighting */
2494 *pErrorCode=U_ZERO_ERROR;
2495 }
2496 }
2497 if(keywordCount > 1) {
2498 if(length + length3 + 1 < destCapacity && keywordCount) {
2499 dest[length + length3]=0x2c;
2500 dest[length + length3+1]=0x20;
2501 }
2502 length3++; /* ',' */
2503 length3++; /* ' ' */
2504 }
2505 }
2506 uenum_close(keywordEnum);
2507
2508 hasKeywords = length3 > 0;
2509 length += length3;
2510
2511
2512
2513 if ((hasScript && !hasCountry)
2514 || ((hasScript || hasCountry) && !hasVariant && !hasKeywords)
2515 || ((hasScript || hasCountry || hasVariant) && !hasKeywords)
2516 || (hasLanguage && !hasScript && !hasCountry && !hasVariant && !hasKeywords))
2517 {
2518 /* remove ", " or " (" */
2519 length-=2;
2520 }
2521
2522 if (hasLanguage && (hasScript || hasCountry || hasVariant || hasKeywords)) {
2523 /* append ")" */
2524 if(length<destCapacity) {
2525 dest[length]=0x29;
b75a7d8f 2526 }
374ca955 2527 ++length;
b75a7d8f
A
2528 }
2529
2530 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2531 /* keep preflighting */
2532 *pErrorCode=U_ZERO_ERROR;
2533 }
2534
2535 return u_terminateUChars(dest, destCapacity, length, pErrorCode);
2536}
2537
374ca955
A
2538U_CAPI int32_t U_EXPORT2
2539uloc_getDisplayKeyword(const char* keyword,
2540 const char* displayLocale,
2541 UChar* dest,
2542 int32_t destCapacity,
2543 UErrorCode* status){
2544
2545 /* argument checking */
2546 if(status==NULL || U_FAILURE(*status)) {
2547 return 0;
2548 }
2549
2550 if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
2551 *status=U_ILLEGAL_ARGUMENT_ERROR;
2552 return 0;
2553 }
2554
2555
2556 /* pass itemKey=NULL to look for a top-level item */
2557 return _getStringOrCopyKey(NULL, displayLocale,
2558 _kKeys, NULL,
2559 keyword,
2560 keyword,
2561 dest, destCapacity,
2562 status);
b75a7d8f
A
2563
2564}
2565
374ca955
A
2566
2567#define UCURRENCY_DISPLAY_NAME_INDEX 1
2568
2569U_CAPI int32_t U_EXPORT2
2570uloc_getDisplayKeywordValue( const char* locale,
2571 const char* keyword,
2572 const char* displayLocale,
2573 UChar* dest,
2574 int32_t destCapacity,
2575 UErrorCode* status){
2576
2577
2578 char keywordValue[ULOC_FULLNAME_CAPACITY*4];
2579 int32_t capacity = ULOC_FULLNAME_CAPACITY*4;
2580 int32_t keywordValueLen =0;
2581
2582 /* argument checking */
2583 if(status==NULL || U_FAILURE(*status)) {
2584 return 0;
2585 }
2586
2587 if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
2588 *status=U_ILLEGAL_ARGUMENT_ERROR;
2589 return 0;
2590 }
2591
2592 /* get the keyword value */
2593 keywordValue[0]=0;
2594 keywordValueLen = uloc_getKeywordValue(locale, keyword, keywordValue, capacity, status);
2595
2596 /*
2597 * if the keyword is equal to currency .. then to get the display name
2598 * we need to do the fallback ourselves
2599 */
2600 if(uprv_stricmp(keyword, _kCurrency)==0){
2601
2602 int32_t dispNameLen = 0;
2603 const UChar *dispName = NULL;
2604
2605 UResourceBundle *bundle = ures_open(NULL, displayLocale, status);
2606 UResourceBundle *currencies = ures_getByKey(bundle, _kCurrencies, NULL, status);
2607 UResourceBundle *currency = ures_getByKeyWithFallback(currencies, keywordValue, NULL, status);
2608
2609 dispName = ures_getStringByIndex(currency, UCURRENCY_DISPLAY_NAME_INDEX, &dispNameLen, status);
2610
2611 /*close the bundles */
2612 ures_close(currency);
2613 ures_close(currencies);
2614 ures_close(bundle);
2615
2616 if(U_FAILURE(*status)){
2617 if(*status == U_MISSING_RESOURCE_ERROR){
2618 /* we just want to write the value over if nothing is available */
2619 *status = U_USING_DEFAULT_WARNING;
2620 }else{
2621 return 0;
2622 }
2623 }
2624
2625 /* now copy the dispName over if not NULL */
2626 if(dispName != NULL){
2627 if(dispNameLen <= destCapacity){
2628 uprv_memcpy(dest, dispName, dispNameLen * U_SIZEOF_UCHAR);
2629 return u_terminateUChars(dest, destCapacity, dispNameLen, status);
2630 }else{
2631 *status = U_BUFFER_OVERFLOW_ERROR;
2632 return dispNameLen;
2633 }
2634 }else{
2635 /* we have not found the display name for the value .. just copy over */
2636 if(keywordValueLen <= destCapacity){
2637 u_charsToUChars(keywordValue, dest, keywordValueLen);
2638 return u_terminateUChars(dest, destCapacity, keywordValueLen, status);
2639 }else{
2640 *status = U_BUFFER_OVERFLOW_ERROR;
2641 return keywordValueLen;
2642 }
2643 }
2644
2645
2646 }else{
2647
2648 return _getStringOrCopyKey(NULL, displayLocale,
2649 _kTypes, keyword,
2650 keywordValue,
2651 keywordValue,
2652 dest, destCapacity,
2653 status);
2654 }
b75a7d8f
A
2655}
2656
374ca955
A
2657/* ### Get available **************************************************/
2658
2659static UBool U_CALLCONV uloc_cleanup(void) {
b75a7d8f
A
2660 char ** temp;
2661
2662 if (_installedLocales) {
2663 temp = _installedLocales;
2664 _installedLocales = NULL;
2665
2666 _installedLocalesCount = 0;
2667
2668 uprv_free(temp);
2669 }
2670 return TRUE;
2671}
2672
2673static void _load_installedLocales()
2674{
2675 UBool localesLoaded;
2676
46f4442e 2677 UMTX_CHECK(NULL, _installedLocales != NULL, localesLoaded);
b75a7d8f
A
2678
2679 if (localesLoaded == FALSE) {
2680 UResourceBundle *index = NULL;
2681 UResourceBundle installed;
2682 UErrorCode status = U_ZERO_ERROR;
2683 char ** temp;
2684 int32_t i = 0;
2685 int32_t localeCount;
2686
2687 ures_initStackObject(&installed);
2688 index = ures_openDirect(NULL, _kIndexLocaleName, &status);
2689 ures_getByKey(index, _kIndexTag, &installed, &status);
2690
2691 if(U_SUCCESS(status)) {
2692 localeCount = ures_getSize(&installed);
2693 temp = (char **) uprv_malloc(sizeof(char*) * (localeCount+1));
46f4442e
A
2694 /* Check for null pointer */
2695 if (temp != NULL) {
2696 ures_resetIterator(&installed);
2697 while(ures_hasNext(&installed)) {
2698 ures_getNextString(&installed, NULL, (const char **)&temp[i++], &status);
2699 }
2700 temp[i] = NULL;
2701
2702 umtx_lock(NULL);
2703 if (_installedLocales == NULL)
2704 {
2705 _installedLocalesCount = localeCount;
2706 _installedLocales = temp;
2707 temp = NULL;
2708 ucln_common_registerCleanup(UCLN_COMMON_ULOC, uloc_cleanup);
2709 }
2710 umtx_unlock(NULL);
2711
2712 uprv_free(temp);
b75a7d8f 2713 }
b75a7d8f 2714 }
46f4442e 2715 ures_close(&installed);
b75a7d8f
A
2716 ures_close(index);
2717 }
2718}
2719
374ca955
A
2720U_CAPI const char* U_EXPORT2
2721uloc_getAvailable(int32_t offset)
2722{
2723
2724 _load_installedLocales();
2725
2726 if (offset > _installedLocalesCount)
2727 return NULL;
2728 return _installedLocales[offset];
2729}
b75a7d8f 2730
374ca955
A
2731U_CAPI int32_t U_EXPORT2
2732uloc_countAvailable()
2733{
2734 _load_installedLocales();
2735 return _installedLocalesCount;
2736}
b75a7d8f
A
2737
2738/**
2739 * Returns a list of all language codes defined in ISO 639. This is a pointer
2740 * to an array of pointers to arrays of char. All of these pointers are owned
2741 * by ICU-- do not delete them, and do not write through them. The array is
2742 * terminated with a null pointer.
2743 */
2744U_CAPI const char* const* U_EXPORT2
2745uloc_getISOLanguages()
2746{
374ca955 2747 return LANGUAGES;
b75a7d8f
A
2748}
2749
2750/**
2751 * Returns a list of all 2-letter country codes defined in ISO 639. This is a
2752 * pointer to an array of pointers to arrays of char. All of these pointers are
2753 * owned by ICU-- do not delete them, and do not write through them. The array is
2754 * terminated with a null pointer.
2755 */
2756U_CAPI const char* const* U_EXPORT2
2757uloc_getISOCountries()
2758{
374ca955
A
2759 return COUNTRIES;
2760}
2761
2762
2763/* this function to be moved into cstring.c later */
2764static char gDecimal = 0;
2765
2766static /* U_CAPI */
2767double
2768/* U_EXPORT2 */
2769_uloc_strtod(const char *start, char **end) {
46f4442e
A
2770 char *decimal;
2771 char *myEnd;
2772 char buf[30];
2773 double rv;
2774 if (!gDecimal) {
2775 char rep[5];
2776 /* For machines that decide to change the decimal on you,
2777 and try to be too smart with localization.
2778 This normally should be just a '.'. */
2779 sprintf(rep, "%+1.1f", 1.0);
2780 gDecimal = rep[2];
2781 }
374ca955 2782
46f4442e
A
2783 if(gDecimal == '.') {
2784 return uprv_strtod(start, end); /* fall through to OS */
374ca955 2785 } else {
46f4442e
A
2786 uprv_strncpy(buf, start, 29);
2787 buf[29]=0;
2788 decimal = uprv_strchr(buf, '.');
2789 if(decimal) {
2790 *decimal = gDecimal;
2791 } else {
2792 return uprv_strtod(start, end); /* no decimal point */
2793 }
2794 rv = uprv_strtod(buf, &myEnd);
2795 if(end) {
2796 *end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */
2797 }
2798 return rv;
374ca955 2799 }
374ca955
A
2800}
2801
2802typedef struct {
46f4442e 2803 float q;
374ca955 2804 int32_t dummy; /* to avoid uninitialized memory copy from qsort */
46f4442e 2805 char *locale;
374ca955
A
2806} _acceptLangItem;
2807
2808static int32_t U_CALLCONV
2809uloc_acceptLanguageCompare(const void *context, const void *a, const void *b)
2810{
2811 const _acceptLangItem *aa = (const _acceptLangItem*)a;
2812 const _acceptLangItem *bb = (const _acceptLangItem*)b;
2813
2814 int32_t rc = 0;
2815 if(bb->q < aa->q) {
2816 rc = -1; /* A > B */
2817 } else if(bb->q > aa->q) {
2818 rc = 1; /* A < B */
2819 } else {
2820 rc = 0; /* A = B */
2821 }
2822
2823 if(rc==0) {
2824 rc = uprv_stricmp(aa->locale, bb->locale);
2825 }
2826
2827#if defined(ULOC_DEBUG)
2828 /* fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
2829 aa->locale, aa->q,
2830 bb->locale, bb->q,
2831 rc);*/
2832#endif
2833
2834 return rc;
2835}
2836
46f4442e
A
2837static ULayoutType
2838_uloc_getOrientationHelper(const char* localeId,
2839 const char* key,
2840 UErrorCode *status)
2841{
2842 ULayoutType result = ULOC_LAYOUT_UNKNOWN;
2843
2844 if (!U_FAILURE(*status)) {
2845 int32_t length = 0;
2846 char localeBuffer[ULOC_FULLNAME_CAPACITY];
2847
2848 uloc_canonicalize(localeId, localeBuffer, sizeof(localeBuffer), status);
2849
2850 if (!U_FAILURE(*status)) {
2851 const UChar* const value =
2852 _res_getTableStringWithFallback(
2853 NULL,
2854 localeBuffer,
2855 "layout",
2856 NULL,
2857 key,
2858 &length,
2859 status);
2860
2861 if (!U_FAILURE(*status) && length != 0) {
2862 switch(value[0])
2863 {
2864 case 0x0062: /* 'b' */
2865 result = ULOC_LAYOUT_BTT;
2866 break;
2867 case 0x006C: /* 'l' */
2868 result = ULOC_LAYOUT_LTR;
2869 break;
2870 case 0x0072: /* 'r' */
2871 result = ULOC_LAYOUT_RTL;
2872 break;
2873 case 0x0074: /* 't' */
2874 result = ULOC_LAYOUT_TTB;
2875 break;
2876 default:
2877 *status = U_INTERNAL_PROGRAM_ERROR;
2878 break;
2879 }
2880 }
2881 }
2882 }
2883
2884 return result;
2885}
2886
2887U_DRAFT ULayoutType U_EXPORT2
2888uloc_getCharacterOrientation(const char* localeId,
2889 UErrorCode *status)
2890{
2891 return _uloc_getOrientationHelper(localeId, "characters", status);
2892}
2893
2894/**
2895 * Get the layout line orientation for the specified locale.
2896 *
2897 * @param localeID locale name
2898 * @param status Error status
2899 * @return an enum indicating the layout orientation for lines.
2900 * @draft ICU 4.0
2901 */
2902U_DRAFT ULayoutType U_EXPORT2
2903uloc_getLineOrientation(const char* localeId,
2904 UErrorCode *status)
2905{
2906 return _uloc_getOrientationHelper(localeId, "lines", status);
2907}
2908
374ca955
A
2909/*
2910mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
2911*/
2912
2913U_CAPI int32_t U_EXPORT2
2914uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult *outResult,
2915 const char *httpAcceptLanguage,
2916 UEnumeration* availableLocales,
2917 UErrorCode *status)
2918{
2919 _acceptLangItem *j;
2920 _acceptLangItem smallBuffer[30];
2921 char **strs;
2922 char tmp[ULOC_FULLNAME_CAPACITY +1];
2923 int32_t n = 0;
2924 const char *itemEnd;
2925 const char *paramEnd;
2926 const char *s;
2927 const char *t;
2928 int32_t res;
2929 int32_t i;
73c04bcf 2930 int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);
374ca955 2931 int32_t jSize;
46f4442e 2932 char *tempstr; /* Use for null pointer check */
374ca955
A
2933
2934 j = smallBuffer;
2935 jSize = sizeof(smallBuffer)/sizeof(smallBuffer[0]);
2936 if(U_FAILURE(*status)) {
2937 return -1;
2938 }
2939
2940 for(s=httpAcceptLanguage;s&&*s;) {
2941 while(isspace(*s)) /* eat space at the beginning */
2942 s++;
2943 itemEnd=uprv_strchr(s,',');
2944 paramEnd=uprv_strchr(s,';');
2945 if(!itemEnd) {
2946 itemEnd = httpAcceptLanguage+l; /* end of string */
2947 }
2948 if(paramEnd && paramEnd<itemEnd) {
2949 /* semicolon (;) is closer than end (,) */
2950 t = paramEnd+1;
2951 if(*t=='q') {
2952 t++;
2953 }
2954 while(isspace(*t)) {
2955 t++;
2956 }
2957 if(*t=='=') {
2958 t++;
2959 }
2960 while(isspace(*t)) {
2961 t++;
2962 }
46f4442e 2963 j[n].q = (float)_uloc_strtod(t,NULL);
374ca955
A
2964 } else {
2965 /* no semicolon - it's 1.0 */
46f4442e 2966 j[n].q = 1.0f;
374ca955
A
2967 paramEnd = itemEnd;
2968 }
46f4442e 2969 j[n].dummy=0;
374ca955
A
2970 /* eat spaces prior to semi */
2971 for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)
2972 ;
46f4442e
A
2973 /* Check for null pointer from uprv_strndup */
2974 tempstr = uprv_strndup(s,(int32_t)((t+1)-s));
2975 if (tempstr == NULL) {
2976 *status = U_MEMORY_ALLOCATION_ERROR;
2977 return -1;
2978 }
2979 j[n].locale = tempstr;
374ca955
A
2980 uloc_canonicalize(j[n].locale,tmp,sizeof(tmp)/sizeof(tmp[0]),status);
2981 if(strcmp(j[n].locale,tmp)) {
2982 uprv_free(j[n].locale);
2983 j[n].locale=uprv_strdup(tmp);
2984 }
2985#if defined(ULOC_DEBUG)
2986 /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
2987#endif
2988 n++;
2989 s = itemEnd;
2990 while(*s==',') { /* eat duplicate commas */
2991 s++;
2992 }
2993 if(n>=jSize) {
46f4442e
A
2994 if(j==smallBuffer) { /* overflowed the small buffer. */
2995 j = uprv_malloc(sizeof(j[0])*(jSize*2));
2996 if(j!=NULL) {
2997 uprv_memcpy(j,smallBuffer,sizeof(j[0])*jSize);
2998 }
374ca955 2999#if defined(ULOC_DEBUG)
46f4442e 3000 fprintf(stderr,"malloced at size %d\n", jSize);
374ca955 3001#endif
46f4442e
A
3002 } else {
3003 j = uprv_realloc(j, sizeof(j[0])*jSize*2);
374ca955 3004#if defined(ULOC_DEBUG)
46f4442e 3005 fprintf(stderr,"re-alloced at size %d\n", jSize);
374ca955 3006#endif
46f4442e
A
3007 }
3008 jSize *= 2;
3009 if(j==NULL) {
3010 *status = U_MEMORY_ALLOCATION_ERROR;
3011 return -1;
3012 }
374ca955
A
3013 }
3014 }
3015 uprv_sortArray(j, n, sizeof(j[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
3016 if(U_FAILURE(*status)) {
46f4442e 3017 if(j != smallBuffer) {
374ca955 3018#if defined(ULOC_DEBUG)
46f4442e 3019 fprintf(stderr,"freeing j %p\n", j);
374ca955 3020#endif
46f4442e
A
3021 uprv_free(j);
3022 }
3023 return -1;
374ca955
A
3024 }
3025 strs = uprv_malloc((size_t)(sizeof(strs[0])*n));
46f4442e
A
3026 /* Check for null pointer */
3027 if (strs == NULL) {
3028 uprv_free(j); /* Free to avoid memory leak */
3029 *status = U_MEMORY_ALLOCATION_ERROR;
3030 return -1;
3031 }
374ca955
A
3032 for(i=0;i<n;i++) {
3033#if defined(ULOC_DEBUG)
3034 /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
3035#endif
3036 strs[i]=j[i].locale;
3037 }
3038 res = uloc_acceptLanguage(result, resultAvailable, outResult,
3039 (const char**)strs, n, availableLocales, status);
3040 for(i=0;i<n;i++) {
3041 uprv_free(strs[i]);
3042 }
3043 uprv_free(strs);
3044 if(j != smallBuffer) {
3045#if defined(ULOC_DEBUG)
46f4442e 3046 fprintf(stderr,"freeing j %p\n", j);
374ca955 3047#endif
46f4442e 3048 uprv_free(j);
374ca955
A
3049 }
3050 return res;
3051}
3052
3053
3054U_CAPI int32_t U_EXPORT2
3055uloc_acceptLanguage(char *result, int32_t resultAvailable,
3056 UAcceptResult *outResult, const char **acceptList,
3057 int32_t acceptListCount,
3058 UEnumeration* availableLocales,
3059 UErrorCode *status)
3060{
3061 int32_t i,j;
3062 int32_t len;
3063 int32_t maxLen=0;
3064 char tmp[ULOC_FULLNAME_CAPACITY+1];
3065 const char *l;
3066 char **fallbackList;
3067 if(U_FAILURE(*status)) {
3068 return -1;
3069 }
3070 fallbackList = uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount));
3071 if(fallbackList==NULL) {
46f4442e
A
3072 *status = U_MEMORY_ALLOCATION_ERROR;
3073 return -1;
374ca955
A
3074 }
3075 for(i=0;i<acceptListCount;i++) {
3076#if defined(ULOC_DEBUG)
3077 fprintf(stderr,"%02d: %s\n", i, acceptList[i]);
3078#endif
3079 while((l=uenum_next(availableLocales, NULL, status))) {
3080#if defined(ULOC_DEBUG)
3081 fprintf(stderr," %s\n", l);
3082#endif
73c04bcf 3083 len = (int32_t)uprv_strlen(l);
374ca955
A
3084 if(!uprv_strcmp(acceptList[i], l)) {
3085 if(outResult) {
3086 *outResult = ULOC_ACCEPT_VALID;
3087 }
3088#if defined(ULOC_DEBUG)
3089 fprintf(stderr, "MATCH! %s\n", l);
3090#endif
3091 if(len>0) {
3092 uprv_strncpy(result, l, uprv_min(len, resultAvailable));
3093 }
3094 for(j=0;j<i;j++) {
3095 uprv_free(fallbackList[j]);
3096 }
3097 uprv_free(fallbackList);
3098 return u_terminateChars(result, resultAvailable, len, status);
3099 }
3100 if(len>maxLen) {
3101 maxLen = len;
3102 }
3103 }
3104 uenum_reset(availableLocales, status);
3105 /* save off parent info */
3106 if(uloc_getParent(acceptList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
3107 fallbackList[i] = uprv_strdup(tmp);
3108 } else {
3109 fallbackList[i]=0;
3110 }
3111 }
3112
3113 for(maxLen--;maxLen>0;maxLen--) {
3114 for(i=0;i<acceptListCount;i++) {
3115 if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) {
3116#if defined(ULOC_DEBUG)
3117 fprintf(stderr,"Try: [%s]", fallbackList[i]);
3118#endif
3119 while((l=uenum_next(availableLocales, NULL, status))) {
3120#if defined(ULOC_DEBUG)
3121 fprintf(stderr," %s\n", l);
3122#endif
73c04bcf 3123 len = (int32_t)uprv_strlen(l);
374ca955
A
3124 if(!uprv_strcmp(fallbackList[i], l)) {
3125 if(outResult) {
3126 *outResult = ULOC_ACCEPT_FALLBACK;
3127 }
3128#if defined(ULOC_DEBUG)
3129 fprintf(stderr, "fallback MATCH! %s\n", l);
3130#endif
3131 if(len>0) {
3132 uprv_strncpy(result, l, uprv_min(len, resultAvailable));
3133 }
73c04bcf
A
3134 for(j=0;j<acceptListCount;j++) {
3135 uprv_free(fallbackList[j]);
374ca955
A
3136 }
3137 uprv_free(fallbackList);
73c04bcf 3138 return u_terminateChars(result, resultAvailable, len, status);
374ca955
A
3139 }
3140 }
3141 uenum_reset(availableLocales, status);
3142
3143 if(uloc_getParent(fallbackList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
3144 uprv_free(fallbackList[i]);
3145 fallbackList[i] = uprv_strdup(tmp);
3146 } else {
3147 uprv_free(fallbackList[i]);
3148 fallbackList[i]=0;
3149 }
3150 }
3151 }
3152 if(outResult) {
3153 *outResult = ULOC_ACCEPT_FAILED;
3154 }
3155 }
3156 for(i=0;i<acceptListCount;i++) {
3157 uprv_free(fallbackList[i]);
3158 }
3159 uprv_free(fallbackList);
3160 return -1;
b75a7d8f 3161}
374ca955 3162
46f4442e
A
3163
3164/**
3165 * This function looks for the localeID in the likelySubtags resource.
3166 *
3167 * @param localeID The tag to find.
3168 * @param buffer A buffer to hold the matching entry
3169 * @param bufferLength The length of the output buffer
3170 * @return A pointer to "buffer" if found, or a null pointer if not.
3171 */
3172static const char* U_CALLCONV
3173findLikelySubtags(const char* localeID,
3174 char* buffer,
3175 int32_t bufferLength,
3176 UErrorCode* err) {
3177 const char* result = NULL;
3178
3179 if (!U_FAILURE(*err)) {
3180 int32_t resLen = 0;
3181 const UChar* s = NULL;
3182 UResourceBundle* subtags = ures_openDirect(NULL, "likelySubtags", err);
3183 if (!U_FAILURE(*err)) {
3184 s = ures_getStringByKey(subtags, localeID, &resLen, err);
3185
3186 if (U_FAILURE(*err)) {
3187 /*
3188 * If a resource is missing, it's not really an error, it's
3189 * just that we don't have any data for that particular locale ID.
3190 */
3191 if (*err == U_MISSING_RESOURCE_ERROR) {
3192 *err = U_ZERO_ERROR;
3193 }
3194 }
3195 else if (resLen >= bufferLength) {
3196 /* The buffer should never overflow. */
3197 *err = U_INTERNAL_PROGRAM_ERROR;
3198 }
3199 else {
3200 u_UCharsToChars(s, buffer, resLen + 1);
3201 result = buffer;
3202 }
3203
3204 ures_close(subtags);
3205 }
3206 }
3207
3208 return result;
3209}
3210
3211/**
3212 * Append a tag to a buffer, adding the separator if necessary. The buffer
3213 * must be large enough to contain the resulting tag plus any separator
3214 * necessary. The tag must not be a zero-length string.
3215 *
3216 * @param tag The tag to add.
3217 * @param tagLength The length of the tag.
3218 * @param buffer The output buffer.
3219 * @param bufferLength The length of the output buffer. This is an input/ouput parameter.
3220 **/
3221static void U_CALLCONV
3222appendTag(
3223 const char* tag,
3224 int32_t tagLength,
3225 char* buffer,
3226 int32_t* bufferLength) {
3227
3228 if (*bufferLength > 0) {
3229 buffer[*bufferLength] = '_';
3230 ++(*bufferLength);
3231 }
3232
3233 uprv_memmove(
3234 &buffer[*bufferLength],
3235 tag,
3236 tagLength);
3237
3238 *bufferLength += tagLength;
3239}
3240
3241/**
3242 * These are the canonical strings for unknown languages, scripts and regions.
3243 **/
3244static const char* const unknownLanguage = "und";
3245static const char* const unknownScript = "Zzzz";
3246static const char* const unknownRegion = "ZZ";
3247
3248/**
3249 * Create a tag string from the supplied parameters. The lang, script and region
3250 * parameters may be NULL pointers. If they are, their corresponding length parameters
3251 * must be less than or equal to 0.
3252 *
3253 * If any of the language, script or region parameters are empty, and the alternateTags
3254 * parameter is not NULL, it will be parsed for potential language, script and region tags
3255 * to be used when constructing the new tag. If the alternateTags parameter is NULL, or
3256 * it contains no language tag, the default tag for the unknown language is used.
3257 *
3258 * If the length of the new string exceeds the capacity of the output buffer,
3259 * the function copies as many bytes to the output buffer as it can, and returns
3260 * the error U_BUFFER_OVERFLOW_ERROR.
3261 *
3262 * If an illegal argument is provided, the function returns the error
3263 * U_ILLEGAL_ARGUMENT_ERROR.
3264 *
3265 * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if
3266 * the tag string fits in the output buffer, but the null terminator doesn't.
3267 *
3268 * @param lang The language tag to use.
3269 * @param langLength The length of the language tag.
3270 * @param script The script tag to use.
3271 * @param scriptLength The length of the script tag.
3272 * @param region The region tag to use.
3273 * @param regionLength The length of the region tag.
3274 * @param trailing Any trailing data to append to the new tag.
3275 * @param trailingLength The length of the trailing data.
3276 * @param alternateTags A string containing any alternate tags.
3277 * @param tag The output buffer.
3278 * @param tagCapacity The capacity of the output buffer.
3279 * @param err A pointer to a UErrorCode for error reporting.
3280 * @return The length of the tag string, which may be greater than tagCapacity, or -1 on error.
3281 **/
3282static int32_t U_CALLCONV
3283createTagStringWithAlternates(
3284 const char* lang,
3285 int32_t langLength,
3286 const char* script,
3287 int32_t scriptLength,
3288 const char* region,
3289 int32_t regionLength,
3290 const char* trailing,
3291 int32_t trailingLength,
3292 const char* alternateTags,
3293 char* tag,
3294 int32_t tagCapacity,
3295 UErrorCode* err) {
3296
3297 if (U_FAILURE(*err)) {
3298 goto error;
3299 }
3300 else if (tag == NULL ||
3301 tagCapacity <= 0 ||
3302 langLength >= ULOC_LANG_CAPACITY ||
3303 scriptLength >= ULOC_SCRIPT_CAPACITY ||
3304 regionLength >= ULOC_COUNTRY_CAPACITY) {
3305 goto error;
3306 }
3307 else {
3308 /**
3309 * ULOC_FULLNAME_CAPACITY will provide enough capacity
3310 * that we can build a string that contains the language,
3311 * script and region code without worrying about overrunning
3312 * the user-supplied buffer.
3313 **/
3314 char tagBuffer[ULOC_FULLNAME_CAPACITY];
3315 int32_t tagLength = 0;
3316 int32_t capacityRemaining = tagCapacity;
3317 UBool regionAppended = FALSE;
3318
3319 if (langLength > 0) {
3320 appendTag(
3321 lang,
3322 langLength,
3323 tagBuffer,
3324 &tagLength);
3325 }
3326 else if (alternateTags == NULL) {
3327 /*
3328 * Append the value for an unknown language, if
3329 * we found no language.
3330 */
3331 appendTag(
3332 unknownLanguage,
3333 uprv_strlen(unknownLanguage),
3334 tagBuffer,
3335 &tagLength);
3336 }
3337 else {
3338 /*
3339 * Parse the alternateTags string for the language.
3340 */
3341 char alternateLang[ULOC_LANG_CAPACITY];
3342 int32_t alternateLangLength = sizeof(alternateLang);
3343
3344 alternateLangLength =
3345 uloc_getLanguage(
3346 alternateTags,
3347 alternateLang,
3348 alternateLangLength,
3349 err);
3350 if(U_FAILURE(*err) ||
3351 alternateLangLength >= ULOC_LANG_CAPACITY) {
3352 goto error;
3353 }
3354 else if (alternateLangLength == 0) {
3355 /*
3356 * Append the value for an unknown language, if
3357 * we found no language.
3358 */
3359 appendTag(
3360 unknownLanguage,
3361 uprv_strlen(unknownLanguage),
3362 tagBuffer,
3363 &tagLength);
3364 }
3365 else {
3366 appendTag(
3367 alternateLang,
3368 alternateLangLength,
3369 tagBuffer,
3370 &tagLength);
3371 }
3372 }
3373
3374 if (scriptLength > 0) {
3375 appendTag(
3376 script,
3377 scriptLength,
3378 tagBuffer,
3379 &tagLength);
3380 }
3381 else if (alternateTags != NULL) {
3382 /*
3383 * Parse the alternateTags string for the script.
3384 */
3385 char alternateScript[ULOC_SCRIPT_CAPACITY];
3386
3387 const int32_t alternateScriptLength =
3388 uloc_getScript(
3389 alternateTags,
3390 alternateScript,
3391 sizeof(alternateScript),
3392 err);
3393
3394 if (U_FAILURE(*err) ||
3395 alternateScriptLength >= ULOC_SCRIPT_CAPACITY) {
3396 goto error;
3397 }
3398 else if (alternateScriptLength > 0) {
3399 appendTag(
3400 alternateScript,
3401 alternateScriptLength,
3402 tagBuffer,
3403 &tagLength);
3404 }
3405 }
3406
3407 if (regionLength > 0) {
3408 appendTag(
3409 region,
3410 regionLength,
3411 tagBuffer,
3412 &tagLength);
3413
3414 regionAppended = TRUE;
3415 }
3416 else if (alternateTags != NULL) {
3417 /*
3418 * Parse the alternateTags string for the region.
3419 */
3420 char alternateRegion[ULOC_COUNTRY_CAPACITY];
3421
3422 const int32_t alternateRegionLength =
3423 uloc_getCountry(
3424 alternateTags,
3425 alternateRegion,
3426 sizeof(alternateRegion),
3427 err);
3428 if (U_FAILURE(*err) ||
3429 alternateRegionLength >= ULOC_COUNTRY_CAPACITY) {
3430 goto error;
3431 }
3432 else if (alternateRegionLength > 0) {
3433 appendTag(
3434 alternateRegion,
3435 alternateRegionLength,
3436 tagBuffer,
3437 &tagLength);
3438
3439 regionAppended = TRUE;
3440 }
3441 }
3442
3443 {
3444 const int32_t toCopy =
3445 tagLength >= tagCapacity ? tagCapacity : tagLength;
3446
3447 /**
3448 * Copy the partial tag from our internal buffer to the supplied
3449 * target.
3450 **/
3451 uprv_memcpy(
3452 tag,
3453 tagBuffer,
3454 toCopy);
3455
3456 capacityRemaining -= toCopy;
3457 }
3458
3459 if (trailingLength > 0) {
3460 if (capacityRemaining > 0 && !regionAppended) {
3461 tag[tagLength++] = '_';
3462 --capacityRemaining;
3463 }
3464
3465 if (capacityRemaining > 0) {
3466 /*
3467 * Copy the trailing data into the supplied buffer. Use uprv_memmove, since we
3468 * don't know if the user-supplied buffers overlap.
3469 */
3470 const int32_t toCopy =
3471 trailingLength >= capacityRemaining ? capacityRemaining : trailingLength;
3472
3473 uprv_memmove(
3474 &tag[tagLength],
3475 trailing,
3476 toCopy);
3477 }
3478 }
3479
3480 tagLength += trailingLength;
3481
3482 return u_terminateChars(
3483 tag,
3484 tagCapacity,
3485 tagLength,
3486 err);
3487 }
3488
3489error:
3490
3491 /**
3492 * An overflow indicates the locale ID passed in
3493 * is ill-formed. If we got here, and there was
3494 * no previous error, it's an implicit overflow.
3495 **/
3496 if (*err == U_BUFFER_OVERFLOW_ERROR ||
3497 U_SUCCESS(*err)) {
3498 *err = U_ILLEGAL_ARGUMENT_ERROR;
3499 }
3500
3501 return -1;
3502}
3503
3504/**
3505 * Create a tag string from the supplied parameters. The lang, script and region
3506 * parameters may be NULL pointers. If they are, their corresponding length parameters
3507 * must be less than or equal to 0. If the lang parameter is an empty string, the
3508 * default value for an unknown language is written to the output buffer.
3509 *
3510 * If the length of the new string exceeds the capacity of the output buffer,
3511 * the function copies as many bytes to the output buffer as it can, and returns
3512 * the error U_BUFFER_OVERFLOW_ERROR.
3513 *
3514 * If an illegal argument is provided, the function returns the error
3515 * U_ILLEGAL_ARGUMENT_ERROR.
3516 *
3517 * @param lang The language tag to use.
3518 * @param langLength The length of the language tag.
3519 * @param script The script tag to use.
3520 * @param scriptLength The length of the script tag.
3521 * @param region The region tag to use.
3522 * @param regionLength The length of the region tag.
3523 * @param trailing Any trailing data to append to the new tag.
3524 * @param trailingLength The length of the trailing data.
3525 * @param tag The output buffer.
3526 * @param tagCapacity The capacity of the output buffer.
3527 * @param err A pointer to a UErrorCode for error reporting.
3528 * @return The length of the tag string, which may be greater than tagCapacity.
3529 **/
3530static int32_t U_CALLCONV
3531createTagString(
3532 const char* lang,
3533 int32_t langLength,
3534 const char* script,
3535 int32_t scriptLength,
3536 const char* region,
3537 int32_t regionLength,
3538 const char* trailing,
3539 int32_t trailingLength,
3540 char* tag,
3541 int32_t tagCapacity,
3542 UErrorCode* err)
3543{
3544 return createTagStringWithAlternates(
3545 lang,
3546 langLength,
3547 script,
3548 scriptLength,
3549 region,
3550 regionLength,
3551 trailing,
3552 trailingLength,
3553 NULL,
3554 tag,
3555 tagCapacity,
3556 err);
3557}
3558
3559/**
3560 * Parse the language, script, and region subtags from a tag string, and copy the
3561 * results into the corresponding output parameters. The buffers are null-terminated,
3562 * unless overflow occurs.
3563 *
3564 * The langLength, scriptLength, and regionLength parameters are input/output
3565 * parameters, and must contain the capacity of their corresponding buffers on
3566 * input. On output, they will contain the actual length of the buffers, not
3567 * including the null terminator.
3568 *
3569 * If the length of any of the output subtags exceeds the capacity of the corresponding
3570 * buffer, the function copies as many bytes to the output buffer as it can, and returns
3571 * the error U_BUFFER_OVERFLOW_ERROR. It will not parse any more subtags once overflow
3572 * occurs.
3573 *
3574 * If an illegal argument is provided, the function returns the error
3575 * U_ILLEGAL_ARGUMENT_ERROR.
3576 *
3577 * @param localeID The locale ID to parse.
3578 * @param lang The language tag buffer.
3579 * @param langLength The length of the language tag.
3580 * @param script The script tag buffer.
3581 * @param scriptLength The length of the script tag.
3582 * @param region The region tag buffer.
3583 * @param regionLength The length of the region tag.
3584 * @param err A pointer to a UErrorCode for error reporting.
3585 * @return The number of chars of the localeID parameter consumed.
3586 **/
3587static int32_t U_CALLCONV
3588parseTagString(
3589 const char* localeID,
3590 char* lang,
3591 int32_t* langLength,
3592 char* script,
3593 int32_t* scriptLength,
3594 char* region,
3595 int32_t* regionLength,
3596 UErrorCode* err)
3597{
3598 const char* position = localeID;
3599 int32_t subtagLength = 0;
3600
3601 if(U_FAILURE(*err) ||
3602 localeID == NULL ||
3603 lang == NULL ||
3604 langLength == NULL ||
3605 script == NULL ||
3606 scriptLength == NULL ||
3607 region == NULL ||
3608 regionLength == NULL) {
3609 goto error;
3610 }
3611
3612 subtagLength = _getLanguage(position, lang, *langLength, &position);
3613 u_terminateChars(lang, *langLength, subtagLength, err);
3614
3615 /*
3616 * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING
3617 * to be an error, because it indicates the user-supplied tag is
3618 * not well-formed.
3619 */
3620 if(*err != U_ZERO_ERROR) {
3621 goto error;
3622 }
3623
3624 *langLength = subtagLength;
3625
3626 /*
3627 * If no language was present, use the value of unknownLanguage
3628 * instead. Otherwise, move past any separator.
3629 */
3630 if (*langLength == 0) {
3631 uprv_strcpy(
3632 lang,
3633 unknownLanguage);
3634 *langLength = uprv_strlen(lang);
3635 }
3636 else if (_isIDSeparator(*position)) {
3637 ++position;
3638 }
3639
3640 subtagLength = _getScript(position, script, *scriptLength, &position);
3641 u_terminateChars(script, *scriptLength, subtagLength, err);
3642
3643 if(*err != U_ZERO_ERROR) {
3644 goto error;
3645 }
3646
3647 *scriptLength = subtagLength;
3648
3649 if (*scriptLength > 0) {
3650 if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) {
3651 /**
3652 * If the script part is the "unknown" script, then don't return it.
3653 **/
3654 *scriptLength = 0;
3655 }
3656
3657 /*
3658 * Move past any separator.
3659 */
3660 if (_isIDSeparator(*position)) {
3661 ++position;
3662 }
3663 }
3664
3665 subtagLength = _getCountry(position, region, *regionLength, &position);
3666 u_terminateChars(region, *regionLength, subtagLength, err);
3667
3668 if(*err != U_ZERO_ERROR) {
3669 goto error;
3670 }
3671
3672 *regionLength = subtagLength;
3673
3674 if (*regionLength > 0) {
3675 if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) {
3676 /**
3677 * If the region part is the "unknown" region, then don't return it.
3678 **/
3679 *regionLength = 0;
3680 }
3681 }
3682
3683exit:
3684
3685 return (int32_t)(position - localeID);
3686
3687error:
3688
3689 /**
3690 * If we get here, we have no explicit error, it's the result of an
3691 * illegal argument.
3692 **/
3693 if (!U_FAILURE(*err)) {
3694 *err = U_ILLEGAL_ARGUMENT_ERROR;
3695 }
3696
3697 goto exit;
3698}
3699
3700static int32_t U_CALLCONV
3701createLikelySubtagsString(
3702 const char* lang,
3703 int32_t langLength,
3704 const char* script,
3705 int32_t scriptLength,
3706 const char* region,
3707 int32_t regionLength,
3708 const char* variants,
3709 int32_t variantsLength,
3710 char* tag,
3711 int32_t tagCapacity,
3712 UErrorCode* err)
3713{
3714 /**
3715 * ULOC_FULLNAME_CAPACITY will provide enough capacity
3716 * that we can build a string that contains the language,
3717 * script and region code without worrying about overrunning
3718 * the user-supplied buffer.
3719 **/
3720 char tagBuffer[ULOC_FULLNAME_CAPACITY];
3721 char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY];
3722 int32_t tagBufferLength = 0;
3723
3724 if(U_FAILURE(*err)) {
3725 goto error;
3726 }
3727
3728 /**
3729 * Try the language with the script and region first.
3730 **/
3731 if (scriptLength > 0 && regionLength > 0) {
3732
3733 const char* likelySubtags = NULL;
3734
3735 tagBufferLength = createTagString(
3736 lang,
3737 langLength,
3738 script,
3739 scriptLength,
3740 region,
3741 regionLength,
3742 NULL,
3743 0,
3744 tagBuffer,
3745 sizeof(tagBuffer),
3746 err);
3747 if(U_FAILURE(*err)) {
3748 goto error;
3749 }
3750
3751 likelySubtags =
3752 findLikelySubtags(
3753 tagBuffer,
3754 likelySubtagsBuffer,
3755 sizeof(likelySubtagsBuffer),
3756 err);
3757 if(U_FAILURE(*err)) {
3758 goto error;
3759 }
3760
3761 if (likelySubtags != NULL) {
3762 /* Always use the language tag from the
3763 maximal string, since it may be more
3764 specific than the one provided. */
3765 return createTagStringWithAlternates(
3766 NULL,
3767 0,
3768 NULL,
3769 0,
3770 NULL,
3771 0,
3772 variants,
3773 variantsLength,
3774 likelySubtags,
3775 tag,
3776 tagCapacity,
3777 err);
3778 }
3779 }
3780
3781 /**
3782 * Try the language with just the script.
3783 **/
3784 if (scriptLength > 0) {
3785
3786 const char* likelySubtags = NULL;
3787
3788 tagBufferLength = createTagString(
3789 lang,
3790 langLength,
3791 script,
3792 scriptLength,
3793 NULL,
3794 0,
3795 NULL,
3796 0,
3797 tagBuffer,
3798 sizeof(tagBuffer),
3799 err);
3800 if(U_FAILURE(*err)) {
3801 goto error;
3802 }
3803
3804 likelySubtags =
3805 findLikelySubtags(
3806 tagBuffer,
3807 likelySubtagsBuffer,
3808 sizeof(likelySubtagsBuffer),
3809 err);
3810 if(U_FAILURE(*err)) {
3811 goto error;
3812 }
3813
3814 if (likelySubtags != NULL) {
3815 /* Always use the language tag from the
3816 maximal string, since it may be more
3817 specific than the one provided. */
3818 return createTagStringWithAlternates(
3819 NULL,
3820 0,
3821 NULL,
3822 0,
3823 region,
3824 regionLength,
3825 variants,
3826 variantsLength,
3827 likelySubtags,
3828 tag,
3829 tagCapacity,
3830 err);
3831 }
3832 }
3833
3834 /**
3835 * Try the language with just the region.
3836 **/
3837 if (regionLength > 0) {
3838
3839 const char* likelySubtags = NULL;
3840
3841 createTagString(
3842 lang,
3843 langLength,
3844 NULL,
3845 0,
3846 region,
3847 regionLength,
3848 NULL,
3849 0,
3850 tagBuffer,
3851 sizeof(tagBuffer),
3852 err);
3853 if(U_FAILURE(*err)) {
3854 goto error;
3855 }
3856
3857 likelySubtags =
3858 findLikelySubtags(
3859 tagBuffer,
3860 likelySubtagsBuffer,
3861 sizeof(likelySubtagsBuffer),
3862 err);
3863 if(U_FAILURE(*err)) {
3864 goto error;
3865 }
3866
3867 if (likelySubtags != NULL) {
3868 /* Always use the language tag from the
3869 maximal string, since it may be more
3870 specific than the one provided. */
3871 return createTagStringWithAlternates(
3872 NULL,
3873 0,
3874 script,
3875 scriptLength,
3876 NULL,
3877 0,
3878 variants,
3879 variantsLength,
3880 likelySubtags,
3881 tag,
3882 tagCapacity,
3883 err);
3884 }
3885 }
3886
3887 /**
3888 * Finally, try just the language.
3889 **/
3890 {
3891 const char* likelySubtags = NULL;
3892
3893 createTagString(
3894 lang,
3895 langLength,
3896 NULL,
3897 0,
3898 NULL,
3899 0,
3900 NULL,
3901 0,
3902 tagBuffer,
3903 sizeof(tagBuffer),
3904 err);
3905 if(U_FAILURE(*err)) {
3906 goto error;
3907 }
3908
3909 likelySubtags =
3910 findLikelySubtags(
3911 tagBuffer,
3912 likelySubtagsBuffer,
3913 sizeof(likelySubtagsBuffer),
3914 err);
3915 if(U_FAILURE(*err)) {
3916 goto error;
3917 }
3918
3919 if (likelySubtags != NULL) {
3920 /* Always use the language tag from the
3921 maximal string, since it may be more
3922 specific than the one provided. */
3923 return createTagStringWithAlternates(
3924 NULL,
3925 0,
3926 script,
3927 scriptLength,
3928 region,
3929 regionLength,
3930 variants,
3931 variantsLength,
3932 likelySubtags,
3933 tag,
3934 tagCapacity,
3935 err);
3936 }
3937 }
3938
3939 return u_terminateChars(
3940 tag,
3941 tagCapacity,
3942 0,
3943 err);
3944
3945error:
3946
3947 if (!U_FAILURE(*err)) {
3948 *err = U_ILLEGAL_ARGUMENT_ERROR;
3949 }
3950
3951 return -1;
3952}
3953
3954static int32_t
3955_uloc_addLikelySubtags(const char* localeID,
3956 char* maximizedLocaleID,
3957 int32_t maximizedLocaleIDCapacity,
3958 UErrorCode* err)
3959{
3960 char lang[ULOC_LANG_CAPACITY];
3961 int32_t langLength = sizeof(lang);
3962 char script[ULOC_SCRIPT_CAPACITY];
3963 int32_t scriptLength = sizeof(script);
3964 char region[ULOC_COUNTRY_CAPACITY];
3965 int32_t regionLength = sizeof(region);
3966 const char* trailing = "";
3967 int32_t trailingLength = 0;
3968 int32_t trailingIndex = 0;
3969 int32_t resultLength = 0;
3970
3971 if(U_FAILURE(*err)) {
3972 goto error;
3973 }
3974 else if (localeID == NULL ||
3975 maximizedLocaleID == NULL ||
3976 maximizedLocaleIDCapacity <= 0) {
3977 goto error;
3978 }
3979
3980 trailingIndex = parseTagString(
3981 localeID,
3982 lang,
3983 &langLength,
3984 script,
3985 &scriptLength,
3986 region,
3987 &regionLength,
3988 err);
3989 if(U_FAILURE(*err)) {
3990 /* Overflow indicates an illegal argument error */
3991 if (*err == U_BUFFER_OVERFLOW_ERROR) {
3992 *err = U_ILLEGAL_ARGUMENT_ERROR;
3993 }
3994
3995 goto error;
3996 }
3997
3998 /* Find the length of the trailing portion. */
3999 trailing = &localeID[trailingIndex];
4000 trailingLength = uprv_strlen(trailing);
4001
4002 resultLength =
4003 createLikelySubtagsString(
4004 lang,
4005 langLength,
4006 script,
4007 scriptLength,
4008 region,
4009 regionLength,
4010 trailing,
4011 trailingLength,
4012 maximizedLocaleID,
4013 maximizedLocaleIDCapacity,
4014 err);
4015
4016 if (resultLength == 0) {
4017 const int32_t localIDLength =
4018 uprv_strlen(localeID);
4019
4020 /*
4021 * If we get here, we need to return localeID.
4022 */
4023 uprv_memcpy(
4024 maximizedLocaleID,
4025 localeID,
4026 localIDLength <= maximizedLocaleIDCapacity ?
4027 localIDLength : maximizedLocaleIDCapacity);
4028
4029 resultLength =
4030 u_terminateChars(
4031 maximizedLocaleID,
4032 maximizedLocaleIDCapacity,
4033 localIDLength,
4034 err);
4035 }
4036
4037 return resultLength;
4038
4039error:
4040
4041 if (!U_FAILURE(*err)) {
4042 *err = U_ILLEGAL_ARGUMENT_ERROR;
4043 }
4044
4045 return -1;
4046}
4047
4048static int32_t
4049_uloc_minimizeSubtags(const char* localeID,
4050 char* minimizedLocaleID,
4051 int32_t minimizedLocaleIDCapacity,
4052 UErrorCode* err)
4053{
4054 /**
4055 * ULOC_FULLNAME_CAPACITY will provide enough capacity
4056 * that we can build a string that contains the language,
4057 * script and region code without worrying about overrunning
4058 * the user-supplied buffer.
4059 **/
4060 char maximizedTagBuffer[ULOC_FULLNAME_CAPACITY];
4061 int32_t maximizedTagBufferLength = sizeof(maximizedTagBuffer);
4062
4063 char lang[ULOC_LANG_CAPACITY];
4064 int32_t langLength = sizeof(lang);
4065 char script[ULOC_SCRIPT_CAPACITY];
4066 int32_t scriptLength = sizeof(script);
4067 char region[ULOC_COUNTRY_CAPACITY];
4068 int32_t regionLength = sizeof(region);
4069 const char* trailing = "";
4070 int32_t trailingLength = 0;
4071 int32_t trailingIndex = 0;
4072
4073 if(U_FAILURE(*err)) {
4074 goto error;
4075 }
4076 else if (localeID == NULL ||
4077 minimizedLocaleID == NULL ||
4078 minimizedLocaleIDCapacity <= 0) {
4079 goto error;
4080 }
4081
4082 trailingIndex =
4083 parseTagString(
4084 localeID,
4085 lang,
4086 &langLength,
4087 script,
4088 &scriptLength,
4089 region,
4090 &regionLength,
4091 err);
4092 if(U_FAILURE(*err)) {
4093
4094 /* Overflow indicates an illegal argument error */
4095 if (*err == U_BUFFER_OVERFLOW_ERROR) {
4096 *err = U_ILLEGAL_ARGUMENT_ERROR;
4097 }
4098
4099 goto error;
4100 }
4101
4102 /* Find the spot where the variants begin, if any. */
4103 trailing = &localeID[trailingIndex];
4104 trailingLength = uprv_strlen(trailing);
4105
4106 createTagString(
4107 lang,
4108 langLength,
4109 script,
4110 scriptLength,
4111 region,
4112 regionLength,
4113 NULL,
4114 0,
4115 maximizedTagBuffer,
4116 maximizedTagBufferLength,
4117 err);
4118 if(U_FAILURE(*err)) {
4119 goto error;
4120 }
4121
4122 /**
4123 * First, we need to first get the maximization
4124 * from AddLikelySubtags.
4125 **/
4126 maximizedTagBufferLength =
4127 uloc_addLikelySubtags(
4128 maximizedTagBuffer,
4129 maximizedTagBuffer,
4130 maximizedTagBufferLength,
4131 err);
4132
4133 if(U_FAILURE(*err)) {
4134 goto error;
4135 }
4136
4137 /**
4138 * Start first with just the language.
4139 **/
4140 {
4141 char tagBuffer[ULOC_FULLNAME_CAPACITY];
4142
4143 const int32_t tagBufferLength =
4144 createLikelySubtagsString(
4145 lang,
4146 langLength,
4147 NULL,
4148 0,
4149 NULL,
4150 0,
4151 NULL,
4152 0,
4153 tagBuffer,
4154 sizeof(tagBuffer),
4155 err);
4156
4157 if(U_FAILURE(*err)) {
4158 goto error;
4159 }
4160 else if (uprv_strnicmp(
4161 maximizedTagBuffer,
4162 tagBuffer,
4163 tagBufferLength) == 0) {
4164
4165 return createTagString(
4166 lang,
4167 langLength,
4168 NULL,
4169 0,
4170 NULL,
4171 0,
4172 trailing,
4173 trailingLength,
4174 minimizedLocaleID,
4175 minimizedLocaleIDCapacity,
4176 err);
4177 }
4178 }
4179
4180 /**
4181 * Next, try the language and region.
4182 **/
4183 if (regionLength > 0) {
4184
4185 char tagBuffer[ULOC_FULLNAME_CAPACITY];
4186
4187 const int32_t tagBufferLength =
4188 createLikelySubtagsString(
4189 lang,
4190 langLength,
4191 NULL,
4192 0,
4193 region,
4194 regionLength,
4195 NULL,
4196 0,
4197 tagBuffer,
4198 sizeof(tagBuffer),
4199 err);
4200
4201 if(U_FAILURE(*err)) {
4202 goto error;
4203 }
4204 else if (uprv_strnicmp(
4205 maximizedTagBuffer,
4206 tagBuffer,
4207 tagBufferLength) == 0) {
4208
4209 return createTagString(
4210 lang,
4211 langLength,
4212 NULL,
4213 0,
4214 region,
4215 regionLength,
4216 trailing,
4217 trailingLength,
4218 minimizedLocaleID,
4219 minimizedLocaleIDCapacity,
4220 err);
4221 }
4222 }
4223
4224 /**
4225 * Finally, try the language and script. This is our last chance,
4226 * since trying with all three subtags would only yield the
4227 * maximal version that we already have.
4228 **/
4229 if (scriptLength > 0 && regionLength > 0) {
4230 char tagBuffer[ULOC_FULLNAME_CAPACITY];
4231
4232 const int32_t tagBufferLength =
4233 createLikelySubtagsString(
4234 lang,
4235 langLength,
4236 script,
4237 scriptLength,
4238 NULL,
4239 0,
4240 NULL,
4241 0,
4242 tagBuffer,
4243 sizeof(tagBuffer),
4244 err);
4245
4246 if(U_FAILURE(*err)) {
4247 goto error;
4248 }
4249 else if (uprv_strnicmp(
4250 maximizedTagBuffer,
4251 tagBuffer,
4252 tagBufferLength) == 0) {
4253
4254 return createTagString(
4255 lang,
4256 langLength,
4257 script,
4258 scriptLength,
4259 NULL,
4260 0,
4261 trailing,
4262 trailingLength,
4263 minimizedLocaleID,
4264 minimizedLocaleIDCapacity,
4265 err);
4266 }
4267 }
4268
4269 {
4270 /**
4271 * If we got here, return the locale ID parameter.
4272 **/
4273 const int32_t localeIDLength = uprv_strlen(localeID);
4274
4275 uprv_memcpy(
4276 minimizedLocaleID,
4277 localeID,
4278 localeIDLength <= minimizedLocaleIDCapacity ?
4279 localeIDLength : minimizedLocaleIDCapacity);
4280
4281 return u_terminateChars(
4282 minimizedLocaleID,
4283 minimizedLocaleIDCapacity,
4284 localeIDLength,
4285 err);
4286 }
4287
4288error:
4289
4290 if (!U_FAILURE(*err)) {
4291 *err = U_ILLEGAL_ARGUMENT_ERROR;
4292 }
4293
4294 return -1;
4295
4296
4297}
4298
4299static UBool
4300do_canonicalize(const char* localeID,
4301 char* buffer,
4302 int32_t bufferCapacity,
4303 UErrorCode* err)
4304{
4305 uloc_canonicalize(
4306 localeID,
4307 buffer,
4308 bufferCapacity,
4309 err);
4310
4311 if (*err == U_STRING_NOT_TERMINATED_WARNING ||
4312 *err == U_BUFFER_OVERFLOW_ERROR) {
4313 *err = U_ILLEGAL_ARGUMENT_ERROR;
4314
4315 return FALSE;
4316 }
4317 else if (U_FAILURE(*err)) {
4318
4319 return FALSE;
4320 }
4321 else {
4322 return TRUE;
4323 }
4324}
4325
4326U_DRAFT int32_t U_EXPORT2
4327uloc_addLikelySubtags(const char* localeID,
4328 char* maximizedLocaleID,
4329 int32_t maximizedLocaleIDCapacity,
4330 UErrorCode* err)
4331{
4332 char localeBuffer[ULOC_FULLNAME_CAPACITY];
4333
4334 if (!do_canonicalize(
4335 localeID,
4336 localeBuffer,
4337 sizeof(localeBuffer),
4338 err)) {
4339 return -1;
4340 }
4341 else {
4342 return _uloc_addLikelySubtags(
4343 localeBuffer,
4344 maximizedLocaleID,
4345 maximizedLocaleIDCapacity,
4346 err);
4347 }
4348}
4349
4350U_DRAFT int32_t U_EXPORT2
4351uloc_minimizeSubtags(const char* localeID,
4352 char* minimizedLocaleID,
4353 int32_t minimizedLocaleIDCapacity,
4354 UErrorCode* err)
4355{
4356 char localeBuffer[ULOC_FULLNAME_CAPACITY];
4357
4358 if (!do_canonicalize(
4359 localeID,
4360 localeBuffer,
4361 sizeof(localeBuffer),
4362 err)) {
4363 return -1;
4364 }
4365 else {
4366 return _uloc_minimizeSubtags(
4367 localeBuffer,
4368 minimizedLocaleID,
4369 minimizedLocaleIDCapacity,
4370 err);
4371 }
4372}
4373
374ca955 4374/*eof*/