]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/uloc.cpp
ICU-491.11.3.tar.gz
[apple/icu.git] / icuSources / common / uloc.cpp
CommitLineData
b75a7d8f
A
1/*
2**********************************************************************
4388f060 3* Copyright (C) 1997-2012, International Business Machines
b75a7d8f
A
4* Corporation and others. All Rights Reserved.
5**********************************************************************
6*
7* File ULOC.CPP
8*
9* Modification History:
10*
11* Date Name Description
12* 04/01/97 aliu Creation.
13* 08/21/98 stephen JDK 1.2 sync
14* 12/08/98 rtg New Locale implementation and C API
15* 03/15/99 damiba overhaul.
16* 04/06/99 stephen changed setDefault() to realloc and copy
17* 06/14/99 stephen Changed calls to ures_open for new params
18* 07/21/99 stephen Modified setDefault() to propagate to C++
374ca955
A
19* 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs,
20* brought canonicalization code into line with spec
b75a7d8f
A
21*****************************************************************************/
22
23/*
24 POSIX's locale format, from putil.c: [no spaces]
25
26 ll [ _CC ] [ . MM ] [ @ VV]
27
28 l = lang, C = ctry, M = charmap, V = variant
29*/
30
b75a7d8f
A
31#include "unicode/utypes.h"
32#include "unicode/ustring.h"
33#include "unicode/uloc.h"
34
374ca955 35#include "putilimp.h"
b75a7d8f 36#include "ustr_imp.h"
374ca955 37#include "ulocimp.h"
b75a7d8f
A
38#include "umutex.h"
39#include "cstring.h"
40#include "cmemory.h"
41#include "ucln_cmn.h"
374ca955
A
42#include "locmap.h"
43#include "uarrsort.h"
44#include "uenumimp.h"
45#include "uassert.h"
b75a7d8f 46
374ca955
A
47#include <stdio.h> /* for sprintf */
48
49/* ### Declarations **************************************************/
b75a7d8f
A
50
51/* Locale stuff from locid.cpp */
52U_CFUNC void locale_set_default(const char *id);
53U_CFUNC const char *locale_get_default(void);
374ca955
A
54U_CFUNC int32_t
55locale_getKeywords(const char *localeID,
56 char prev,
57 char *keywords, int32_t keywordCapacity,
58 char *values, int32_t valuesCapacity, int32_t *valLen,
59 UBool valuesToo,
60 UErrorCode *status);
61
374ca955
A
62/* ### Data tables **************************************************/
63
64/**
65 * Table of language codes, both 2- and 3-letter, with preference
66 * given to 2-letter codes where possible. Includes 3-letter codes
67 * that lack a 2-letter equivalent.
68 *
69 * This list must be in sorted order. This list is returned directly
70 * to the user by some API.
71 *
72 * This list must be kept in sync with LANGUAGES_3, with corresponding
73 * entries matched.
74 *
75 * This table should be terminated with a NULL entry, followed by a
76 * second list, and another NULL entry. The first list is visible to
77 * user code when this array is returned by API. The second list
78 * contains codes we support, but do not expose through user API.
79 *
80 * Notes
81 *
82 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
83 * include the revisions up to 2001/7/27 *CWB*
84 *
85 * The 3 character codes are the terminology codes like RFC 3066. This
86 * is compatible with prior ICU codes
87 *
88 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
89 * table but now at the end of the table because 3 character codes are
90 * duplicates. This avoids bad searches going from 3 to 2 character
91 * codes.
92 *
93 * The range qaa-qtz is reserved for local use
94 */
95static const char * const LANGUAGES[] = {
96 "aa", "ab", "ace", "ach", "ada", "ady", "ae", "af", "afa",
4388f060 97 "afh", "agq", "ain", "ak", "akk", "ale", "alg", "alt", "am", "an",
73c04bcf 98 "ang", "anp", "apa",
4388f060 99 "ar", "arc", "arn", "arp", "art", "arw", "as", "asa", "ast",
b75a7d8f 100 "ath", "aus", "av", "awa", "ay", "az", "ba", "bad",
374ca955 101 "bai", "bal", "ban", "bas", "bat", "be", "bej",
4388f060
A
102 "bem", "ber", "bez", "bg", "bh", "bho", "bi", "bik", "bin",
103 "bla", "bm", "bn", "bnt", "bo", "br", "bra", "brx", "bs",
374ca955 104 "btk", "bua", "bug", "byn", "ca", "cad", "cai", "car", "cau",
4388f060 105 "cch", "ce", "ceb", "cel", "cgg", "ch", "chb", "chg", "chk", "chm",
b75a7d8f 106 "chn", "cho", "chp", "chr", "chy", "cmc", "co", "cop",
374ca955 107 "cpe", "cpf", "cpp", "cr", "crh", "crp", "cs", "csb", "cu", "cus",
4388f060
A
108 "cv", "cy", "da", "dak", "dar", "dav", "day", "de", "del", "den",
109 "dgr", "din", "dje", "doi", "dra", "dsb", "dua", "dum", "dv", "dyo", "dyu",
110 "dz", "ebu", "ee", "efi", "egy", "eka", "el", "elx", "en",
374ca955 111 "enm", "eo", "es", "et", "eu", "ewo", "fa",
73c04bcf
A
112 "fan", "fat", "ff", "fi", "fil", "fiu", "fj", "fo", "fon",
113 "fr", "frm", "fro", "frr", "frs", "fur", "fy",
4388f060 114 "ga", "gaa", "gan", "gay", "gba", "gd", "gem", "gez", "gil",
73c04bcf 115 "gl", "gmh", "gn", "goh", "gon", "gor", "got", "grb",
4388f060
A
116 "grc", "gsw", "gu", "guz", "gv", "gwi",
117 "ha", "hai", "hak", "haw", "he", "hi", "hil", "him",
118 "hit", "hmn", "ho", "hr", "hsb", "hsn", "ht", "hu", "hup", "hy", "hz",
b75a7d8f 119 "ia", "iba", "id", "ie", "ig", "ii", "ijo", "ik",
374ca955 120 "ilo", "inc", "ine", "inh", "io", "ira", "iro", "is", "it",
4388f060
A
121 "iu", "ja", "jbo", "jmc", "jpr", "jrb", "jv", "ka", "kaa", "kab",
122 "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kde", "kea", "kfo", "kg", "kha", "khi",
123 "kho", "khq", "ki", "kj", "kk", "kl", "kln", "km", "kmb", "kn",
124 "ko", "kok", "kos", "kpe", "kr", "krc", "krl", "kro", "kru", "ks", "ksb", "ksf",
125 "ku", "kum", "kut", "kv", "kw", "ky", "la", "lad", "lag",
374ca955 126 "lah", "lam", "lb", "lez", "lg", "li", "ln", "lo", "lol",
4388f060 127 "loz", "lt", "lu", "lua", "lui", "lun", "luo", "lus", "luy",
b75a7d8f 128 "lv", "mad", "mag", "mai", "mak", "man", "map", "mas",
4388f060 129 "mdf", "mdr", "men", "mer", "mfe", "mg", "mga", "mgh", "mh", "mi", "mic", "min",
b75a7d8f 130 "mis", "mk", "mkh", "ml", "mn", "mnc", "mni", "mno",
4388f060
A
131 "mo", "moh", "mos", "mr", "ms", "mt", "mua", "mul", "mun",
132 "mus", "mwl", "mwr", "my", "myn", "myv", "na", "nah", "nai", "nan", "nap", "naq",
b75a7d8f 133 "nb", "nd", "nds", "ne", "new", "ng", "nia", "nic",
4388f060 134 "niu", "nl", "nmg", "nn", "no", "nog", "non", "nqo", "nr", "nso", "nub", "nus",
374ca955 135 "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi", "oc", "oj",
b75a7d8f
A
136 "om", "or", "os", "osa", "ota", "oto", "pa", "paa",
137 "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",
138 "pi", "pl", "pon", "pra", "pro", "ps", "pt", "qu",
4388f060
A
139 "raj", "rap", "rar", "rm", "rn", "ro", "roa", "rof", "rom",
140 "ru", "rup", "rw", "rwk", "sa", "sad", "sah", "sai", "sal", "sam", "saq",
141 "sas", "sat", "sbp", "sc", "scn", "sco", "sd", "se", "seh", "sel", "sem", "ses",
142 "sg", "sga", "sgn", "shi", "shn", "si", "sid", "sio", "sit",
b75a7d8f
A
143 "sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn",
144 "sms", "sn", "snk", "so", "sog", "son", "sq", "sr",
73c04bcf 145 "srn", "srr", "ss", "ssa", "st", "su", "suk", "sus", "sux",
4388f060 146 "sv", "sw", "swc", "syc", "syr", "ta", "tai", "te", "tem", "teo", "ter",
b75a7d8f 147 "tet", "tg", "th", "ti", "tig", "tiv", "tk", "tkl",
729e4ab9 148 "tl", "tlh", "tli", "tmh", "tn", "to", "tog", "tpi", "tr", "trv",
4388f060
A
149 "ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw", "twq",
150 "ty", "tyv", "tzm", "udm", "ug", "uga", "uk", "umb", "und", "ur",
151 "uz", "vai", "ve", "vi", "vo", "vot", "vun", "wa", "wak",
152 "wal", "war", "was", "wen", "wo", "wuu", "xal", "xh", "xog", "yao", "yap", "yav",
153 "yi", "yo", "ypk", "yue", "za", "zap", "zbl", "zen", "zh", "znd",
46f4442e 154 "zu", "zun", "zxx", "zza",
b75a7d8f
A
155NULL,
156 "in", "iw", "ji", "jw", "sh", /* obsolete language codes */
157NULL
158};
73c04bcf
A
159static const char* const DEPRECATED_LANGUAGES[]={
160 "in", "iw", "ji", "jw", NULL, NULL
161};
162static const char* const REPLACEMENT_LANGUAGES[]={
163 "id", "he", "yi", "jv", NULL, NULL
164};
b75a7d8f 165
374ca955
A
166/**
167 * Table of 3-letter language codes.
168 *
169 * This is a lookup table used to convert 3-letter language codes to
170 * their 2-letter equivalent, where possible. It must be kept in sync
171 * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the
172 * same language as LANGUAGES_3[i]. The commented-out lines are
173 * copied from LANGUAGES to make eyeballing this baby easier.
174 *
175 * Where a 3-letter language code has no 2-letter equivalent, the
176 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
177 *
178 * This table should be terminated with a NULL entry, followed by a
179 * second list, and another NULL entry. The two lists correspond to
180 * the two lists in LANGUAGES.
181 */
182static const char * const LANGUAGES_3[] = {
183/* "aa", "ab", "ace", "ach", "ada", "ady", "ae", "af", "afa", */
184 "aar", "abk", "ace", "ach", "ada", "ady", "ave", "afr", "afa",
4388f060
A
185/* "afh", "agq", "ain", "ak", "akk", "ale", "alg", "alt", "am", "an", "ang", "anp", "apa", */
186 "afh", "agq", "ain", "aka", "akk", "ale", "alg", "alt", "amh", "arg", "ang", "anp", "apa",
187/* "ar", "arc", "arn", "arp", "art", "arw", "as", "asa", "ast", */
188 "ara", "arc", "arn", "arp", "art", "arw", "asm", "asa", "ast",
b75a7d8f
A
189/* "ath", "aus", "av", "awa", "ay", "az", "ba", "bad", */
190 "ath", "aus", "ava", "awa", "aym", "aze", "bak", "bad",
374ca955
A
191/* "bai", "bal", "ban", "bas", "bat", "be", "bej", */
192 "bai", "bal", "ban", "bas", "bat", "bel", "bej",
4388f060
A
193/* "bem", "ber", "bez", "bg", "bh", "bho", "bi", "bik", "bin", */
194 "bem", "ber", "bez", "bul", "bih", "bho", "bis", "bik", "bin",
195/* "bla", "bm", "bn", "bnt", "bo", "br", "bra", "brx", "bs", */
196 "bla", "bam", "ben", "bnt", "bod", "bre", "bra", "brx", "bos",
374ca955
A
197/* "btk", "bua", "bug", "byn", "ca", "cad", "cai", "car", "cau", */
198 "btk", "bua", "bug", "byn", "cat", "cad", "cai", "car", "cau",
4388f060
A
199/* "cch", "ce", "ceb", "cel", "cgg", "ch", "chb", "chg", "chk", "chm", */
200 "cch", "che", "ceb", "cel", "cgg", "cha", "chb", "chg", "chk", "chm",
b75a7d8f
A
201/* "chn", "cho", "chp", "chr", "chy", "cmc", "co", "cop", */
202 "chn", "cho", "chp", "chr", "chy", "cmc", "cos", "cop",
374ca955
A
203/* "cpe", "cpf", "cpp", "cr", "crh", "crp", "cs", "csb", "cu", "cus", */
204 "cpe", "cpf", "cpp", "cre", "crh", "crp", "ces", "csb", "chu", "cus",
4388f060
A
205/* "cv", "cy", "da", "dak", "dar", "dav", "day", "de", "del", "den", */
206 "chv", "cym", "dan", "dak", "dar", "dav", "day", "deu", "del", "den",
207/* "dgr", "din", "dje", "doi", "dra", "dsb", "dua", "dum", "dv", "dyo", "dyu", */
208 "dgr", "din", "dje", "doi", "dra", "dsb", "dua", "dum", "div", "dyo", "dyu",
209/* "dz", "ebu", "ee", "efi", "egy", "eka", "el", "elx", "en", */
210 "dzo", "ebu", "ewe", "efi", "egy", "eka", "ell", "elx", "eng",
374ca955
A
211/* "enm", "eo", "es", "et", "eu", "ewo", "fa", */
212 "enm", "epo", "spa", "est", "eus", "ewo", "fas",
73c04bcf
A
213/* "fan", "fat", "ff", "fi", "fil", "fiu", "fj", "fo", "fon", */
214 "fan", "fat", "ful", "fin", "fil", "fiu", "fij", "fao", "fon",
4388f060
A
215/* "fr", "frm", "fro", "frr", "frs", "fur", "fy", "ga", "gaa", "gan", "gay", */
216 "fra", "frm", "fro", "frr", "frs", "fur", "fry", "gle", "gaa", "gan", "gay",
b75a7d8f
A
217/* "gba", "gd", "gem", "gez", "gil", "gl", "gmh", "gn", */
218 "gba", "gla", "gem", "gez", "gil", "glg", "gmh", "grn",
4388f060
A
219/* "goh", "gon", "gor", "got", "grb", "grc", "gsw", "gu", "guz", "gv", */
220 "goh", "gon", "gor", "got", "grb", "grc", "gsw", "guj", "guz", "glv",
221/* "gwi", "ha", "hai", "hak", "haw", "he", "hi", "hil", "him", */
222 "gwi", "hau", "hai", "hak", "haw", "heb", "hin", "hil", "him",
223/* "hit", "hmn", "ho", "hr", "hsb", "hsn", "ht", "hu", "hup", "hy", "hz", */
224 "hit", "hmn", "hmo", "hrv", "hsb", "hsn", "hat", "hun", "hup", "hye", "her",
b75a7d8f
A
225/* "ia", "iba", "id", "ie", "ig", "ii", "ijo", "ik", */
226 "ina", "iba", "ind", "ile", "ibo", "iii", "ijo", "ipk",
374ca955
A
227/* "ilo", "inc", "ine", "inh", "io", "ira", "iro", "is", "it", */
228 "ilo", "inc", "ine", "inh", "ido", "ira", "iro", "isl", "ita",
4388f060
A
229/* "iu", "ja", "jbo", "jmc", "jpr", "jrb", "jv", "ka", "kaa", "kab", */
230 "iku", "jpn", "jbo", "jmc", "jpr", "jrb", "jav", "kat", "kaa", "kab",
231/* "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kde", "kea", "kfo", "kg", "kha", "khi",*/
232 "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kde", "kea", "kfo", "kg", "kha", "khi",
233/* "kho", "khq", "ki", "kj", "kk", "kl", "kln", "km", "kmb", "kn", */
234 "kho", "khq", "kik", "kua", "kaz", "kal", "kln", "khm", "kmb", "kan",
235/* "ko", "kok", "kos", "kpe", "kr", "krc", "krl", "kro", "kru", "ks", "ksb", "ksf", */
236 "kor", "kok", "kos", "kpe", "kau", "krc", "krl", "kro", "kru", "kas", "ksb", "ksf",
237/* "ku", "kum", "kut", "kv", "kw", "ky", "la", "lad", "lag", */
238 "kur", "kum", "kut", "kom", "cor", "kir", "lat", "lad", "lag",
374ca955
A
239/* "lah", "lam", "lb", "lez", "lg", "li", "ln", "lo", "lol", */
240 "lah", "lam", "ltz", "lez", "lug", "lim", "lin", "lao", "lol",
4388f060
A
241/* "loz", "lt", "lu", "lua", "lui", "lun", "luo", "lus", "luy", */
242 "loz", "lit", "lub", "lua", "lui", "lun", "luo", "lus", "luy",
b75a7d8f
A
243/* "lv", "mad", "mag", "mai", "mak", "man", "map", "mas", */
244 "lav", "mad", "mag", "mai", "mak", "man", "map", "mas",
4388f060
A
245/* "mdf", "mdr", "men", "mer", "mfe", "mg", "mga", "mgh", "mh", "mi", "mic", "min", */
246 "mdf", "mdr", "men", "mer", "mfe", "mlg", "mga", "mgh", "mah", "mri", "mic", "min",
b75a7d8f
A
247/* "mis", "mk", "mkh", "ml", "mn", "mnc", "mni", "mno", */
248 "mis", "mkd", "mkh", "mal", "mon", "mnc", "mni", "mno",
4388f060
A
249/* "mo", "moh", "mos", "mr", "ms", "mt", "mua", "mul", "mun", */
250 "mol", "moh", "mos", "mar", "msa", "mlt", "mua", "mul", "mun",
251/* "mus", "mwl", "mwr", "my", "myn", "myv", "na", "nah", "nai", "nan", "nap", "naq", */
252 "mus", "mwl", "mwr", "mya", "myn", "myv", "nau", "nah", "nai", "nan", "nap", "naq",
b75a7d8f
A
253/* "nb", "nd", "nds", "ne", "new", "ng", "nia", "nic", */
254 "nob", "nde", "nds", "nep", "new", "ndo", "nia", "nic",
4388f060
A
255/* "niu", "nl", "nmg", "nn", "no", "nog", "non", "nqo", "nr", "nso", "nub", "nus", */
256 "niu", "nld", "nmg", "nno", "nor", "nog", "non", "nqo", "nbl", "nso", "nub", "nus",
374ca955
A
257/* "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi", "oc", "oj", */
258 "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi", "oci", "oji",
b75a7d8f
A
259/* "om", "or", "os", "osa", "ota", "oto", "pa", "paa", */
260 "orm", "ori", "oss", "osa", "ota", "oto", "pan", "paa",
261/* "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn", */
262 "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",
263/* "pi", "pl", "pon", "pra", "pro", "ps", "pt", "qu", */
264 "pli", "pol", "pon", "pra", "pro", "pus", "por", "que",
4388f060
A
265/* "raj", "rap", "rar", "rm", "rn", "ro", "roa", "rof", "rom", */
266 "raj", "rap", "rar", "roh", "run", "ron", "roa", "rof", "rom",
267/* "ru", "rup", "rw", "rwk", "sa", "sad", "sah", "sai", "sal", "sam", "saq", */
268 "rus", "rup", "kin", "rwk", "san", "sad", "sah", "sai", "sal", "sam", "saq",
269/* "sas", "sat", "sbp", "sc", "scn", "sco", "sd", "se", "seh", "sel", "sem", "ses", */
270 "sas", "sat", "sbp", "srd", "scn", "sco", "snd", "sme", "seh", "sel", "sem", "ses",
271/* "sg", "sga", "sgn", "shi", "shn", "si", "sid", "sio", "sit", */
272 "sag", "sga", "sgn", "shi", "shn", "sin", "sid", "sio", "sit",
b75a7d8f
A
273/* "sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn", */
274 "slk", "slv", "sla", "smo", "sma", "smi", "smj", "smn",
275/* "sms", "sn", "snk", "so", "sog", "son", "sq", "sr", */
276 "sms", "sna", "snk", "som", "sog", "son", "sqi", "srp",
73c04bcf
A
277/* "srn", "srr", "ss", "ssa", "st", "su", "suk", "sus", "sux", */
278 "srn", "srr", "ssw", "ssa", "sot", "sun", "suk", "sus", "sux",
4388f060
A
279/* "sv", "sw", "swc", "syc", "syr", "ta", "tai", "te", "tem", "teo", "ter", */
280 "swe", "swa", "swc", "syc", "syr", "tam", "tai", "tel", "tem", "teo", "ter",
b75a7d8f
A
281/* "tet", "tg", "th", "ti", "tig", "tiv", "tk", "tkl", */
282 "tet", "tgk", "tha", "tir", "tig", "tiv", "tuk", "tkl",
729e4ab9
A
283/* "tl", "tlh", "tli", "tmh", "tn", "to", "tog", "tpi", "tr", "trv", */
284 "tgl", "tlh", "tli", "tmh", "tsn", "ton", "tog", "tpi", "tur", "trv",
4388f060
A
285/* "ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw", "twq" */
286 "tso", "tsi", "tat", "tum", "tup", "tut", "tvl", "twi", "twq",
287/* "ty", "tyv", "tzm", "udm", "ug", "uga", "uk", "umb", "und", "ur", */
288 "tah", "tyv", "tzm", "udm", "uig", "uga", "ukr", "umb", "und", "urd",
289/* "uz", "vai", "ve", "vi", "vo", "vot", "vun", "wa", "wak", */
290 "uzb", "vai", "ven", "vie", "vol", "vot", "vun", "wln", "wak",
291/* "wal", "war", "was", "wen", "wo", "wuu", "xal", "xh", "xog", "yao", "yap", "yav", */
292 "wal", "war", "was", "wen", "wol", "wuu", "xal", "xho", "xog", "yao", "yap", "yav",
293/* "yi", "yo", "ypk", "yue", "za", "zap", "zbl", "zen", "zh", "znd", */
294 "yid", "yor", "ypk", "yue", "zha", "zap", "zbl", "zen", "zho", "znd",
46f4442e
A
295/* "zu", "zun", "zxx", "zza", */
296 "zul", "zun", "zxx", "zza",
b75a7d8f
A
297NULL,
298/* "in", "iw", "ji", "jw", "sh", */
299 "ind", "heb", "yid", "jaw", "srp",
300NULL
301};
302
374ca955
A
303/**
304 * Table of 2-letter country codes.
305 *
306 * This list must be in sorted order. This list is returned directly
307 * to the user by some API.
308 *
309 * This list must be kept in sync with COUNTRIES_3, with corresponding
310 * entries matched.
311 *
312 * This table should be terminated with a NULL entry, followed by a
313 * second list, and another NULL entry. The first list is visible to
314 * user code when this array is returned by API. The second list
315 * contains codes we support, but do not expose through user API.
316 *
317 * Notes:
318 *
319 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
320 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
321 * new codes keeping the old ones for compatibility updated to include
322 * 1999/12/03 revisions *CWB*
323 *
324 * RO(ROM) is now RO(ROU) according to
325 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
326 */
327static const char * const COUNTRIES[] = {
b75a7d8f 328 "AD", "AE", "AF", "AG", "AI", "AL", "AM", "AN",
73c04bcf 329 "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",
b75a7d8f 330 "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",
46f4442e 331 "BJ", "BL", "BM", "BN", "BO", "BR", "BS", "BT", "BV",
b75a7d8f
A
332 "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
333 "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR",
334 "CU", "CV", "CX", "CY", "CZ", "DE", "DJ", "DK",
335 "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER",
336 "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
73c04bcf 337 "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
b75a7d8f
A
338 "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
339 "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
73c04bcf
A
340 "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
341 "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
b75a7d8f
A
342 "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
343 "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
46f4442e 344 "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK",
b75a7d8f
A
345 "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",
346 "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
347 "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",
348 "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",
349 "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
46f4442e 350 "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA",
b75a7d8f
A
351 "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",
352 "SK", "SL", "SM", "SN", "SO", "SR", "ST", "SV",
353 "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ",
354 "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
355 "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
356 "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
46f4442e 357 "WS", "YE", "YT", "ZA", "ZM", "ZW",
b75a7d8f 358NULL,
46f4442e 359 "FX", "CS", "RO", "TP", "YU", "ZR", /* obsolete country codes */
b75a7d8f
A
360NULL
361};
362
73c04bcf 363static const char* const DEPRECATED_COUNTRIES[] ={
46f4442e 364 "BU", "CS", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR", NULL, NULL /* deprecated country list */
73c04bcf
A
365};
366static const char* const REPLACEMENT_COUNTRIES[] = {
46f4442e
A
367/* "BU", "CS", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR" */
368 "MM", "RS", "BJ", "FR", "BF", "VU", "ZW", "TL", "RS", "CD", NULL, NULL /* replacement country codes */
73c04bcf
A
369};
370
374ca955
A
371/**
372 * Table of 3-letter country codes.
373 *
374 * This is a lookup table used to convert 3-letter country codes to
375 * their 2-letter equivalent. It must be kept in sync with COUNTRIES.
376 * For all valid i, COUNTRIES[i] must refer to the same country as
377 * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES
378 * to make eyeballing this baby easier.
379 *
380 * This table should be terminated with a NULL entry, followed by a
381 * second list, and another NULL entry. The two lists correspond to
382 * the two lists in COUNTRIES.
383 */
384static const char * const COUNTRIES_3[] = {
b75a7d8f
A
385/* "AD", "AE", "AF", "AG", "AI", "AL", "AM", "AN", */
386 "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM", "ANT",
73c04bcf
A
387/* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */
388 "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
b75a7d8f
A
389/* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */
390 "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
46f4442e
A
391/* "BJ", "BL", "BM", "BN", "BO", "BR", "BS", "BT", "BV", */
392 "BEN", "BLM", "BMU", "BRN", "BOL", "BRA", "BHS", "BTN", "BVT",
b75a7d8f
A
393/* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */
394 "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
395/* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */
396 "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
397/* "CU", "CV", "CX", "CY", "CZ", "DE", "DJ", "DK", */
398 "CUB", "CPV", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
399/* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */
400 "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
401/* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
402 "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
46f4442e 403/* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
73c04bcf 404 "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
b75a7d8f
A
405/* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */
406 "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
407/* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
408 "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
46f4442e
A
409/* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
410 "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
411/* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
73c04bcf 412 "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
b75a7d8f
A
413/* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
414 "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
415/* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */
416 "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
46f4442e
A
417/* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */
418 "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
b75a7d8f
A
419/* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */
420 "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
421/* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */
422 "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
423/* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */
424 "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
425/* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */
426 "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
427/* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */
428 "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
46f4442e
A
429/* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */
430 "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
b75a7d8f
A
431/* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */
432 "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
433/* "SK", "SL", "SM", "SN", "SO", "SR", "ST", "SV", */
434 "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "STP", "SLV",
435/* "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", */
436 "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
437/* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */
438 "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
439/* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */
440 "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
441/* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
442 "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
46f4442e
A
443/* "WS", "YE", "YT", "ZA", "ZM", "ZW", */
444 "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
b75a7d8f 445NULL,
46f4442e
A
446/* "FX", "CS", "RO", "TP", "YU", "ZR", */
447 "FXX", "SCG", "ROM", "TMP", "YUG", "ZAR",
b75a7d8f
A
448NULL
449};
450
374ca955
A
451typedef struct CanonicalizationMap {
452 const char *id; /* input ID */
453 const char *canonicalID; /* canonicalized output ID */
454 const char *keyword; /* keyword, or NULL if none */
455 const char *value; /* keyword value, or NULL if kw==NULL */
456} CanonicalizationMap;
457
458/**
459 * A map to canonicalize locale IDs. This handles a variety of
460 * different semantic kinds of transformations.
461 */
462static const CanonicalizationMap CANONICALIZE_MAP[] = {
463 { "", "en_US_POSIX", NULL, NULL }, /* .NET name */
729e4ab9 464 { "c", "en_US_POSIX", NULL, NULL }, /* POSIX name */
73c04bcf 465 { "posix", "en_US_POSIX", NULL, NULL }, /* POSIX name (alias of C) */
374ca955
A
466 { "art_LOJBAN", "jbo", NULL, NULL }, /* registered name */
467 { "az_AZ_CYRL", "az_Cyrl_AZ", NULL, NULL }, /* .NET name */
468 { "az_AZ_LATN", "az_Latn_AZ", NULL, NULL }, /* .NET name */
469 { "ca_ES_PREEURO", "ca_ES", "currency", "ESP" },
46f4442e 470 { "de__PHONEBOOK", "de", "collation", "phonebook" }, /* Old ICU name */
374ca955
A
471 { "de_AT_PREEURO", "de_AT", "currency", "ATS" },
472 { "de_DE_PREEURO", "de_DE", "currency", "DEM" },
473 { "de_LU_PREEURO", "de_LU", "currency", "LUF" },
474 { "el_GR_PREEURO", "el_GR", "currency", "GRD" },
374ca955
A
475 { "en_BE_PREEURO", "en_BE", "currency", "BEF" },
476 { "en_IE_PREEURO", "en_IE", "currency", "IEP" },
46f4442e 477 { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */
374ca955
A
478 { "es_ES_PREEURO", "es_ES", "currency", "ESP" },
479 { "eu_ES_PREEURO", "eu_ES", "currency", "ESP" },
480 { "fi_FI_PREEURO", "fi_FI", "currency", "FIM" },
481 { "fr_BE_PREEURO", "fr_BE", "currency", "BEF" },
482 { "fr_FR_PREEURO", "fr_FR", "currency", "FRF" },
483 { "fr_LU_PREEURO", "fr_LU", "currency", "LUF" },
484 { "ga_IE_PREEURO", "ga_IE", "currency", "IEP" },
485 { "gl_ES_PREEURO", "gl_ES", "currency", "ESP" },
46f4442e 486 { "hi__DIRECT", "hi", "collation", "direct" }, /* Old ICU name */
374ca955 487 { "it_IT_PREEURO", "it_IT", "currency", "ITL" },
46f4442e 488 { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */
374ca955
A
489 { "nb_NO_NY", "nn_NO", NULL, NULL }, /* "markus said this was ok" :-) */
490 { "nl_BE_PREEURO", "nl_BE", "currency", "BEF" },
491 { "nl_NL_PREEURO", "nl_NL", "currency", "NLG" },
492 { "pt_PT_PREEURO", "pt_PT", "currency", "PTE" },
46f4442e
A
493 { "sr_SP_CYRL", "sr_Cyrl_RS", NULL, NULL }, /* .NET name */
494 { "sr_SP_LATN", "sr_Latn_RS", NULL, NULL }, /* .NET name */
495 { "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL, NULL }, /* Linux name */
496 { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */
73c04bcf 497 { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL, NULL }, /* Linux name */
374ca955
A
498 { "uz_UZ_CYRL", "uz_Cyrl_UZ", NULL, NULL }, /* .NET name */
499 { "uz_UZ_LATN", "uz_Latn_UZ", NULL, NULL }, /* .NET name */
500 { "zh_CHS", "zh_Hans", NULL, NULL }, /* .NET name */
46f4442e 501 { "zh_CHT", "zh_Hant", NULL, NULL }, /* .NET name */
4388f060 502 { "zh_GAN", "gan", NULL, NULL }, /* registered name */
374ca955 503 { "zh_GUOYU", "zh", NULL, NULL }, /* registered name */
4388f060
A
504 { "zh_HAKKA", "hak", NULL, NULL }, /* registered name */
505 { "zh_MIN_NAN", "nan", NULL, NULL }, /* registered name */
506 { "zh_WUU", "wuu", NULL, NULL }, /* registered name */
507 { "zh_XIANG", "hsn", NULL, NULL }, /* registered name */
508 { "zh_YUE", "yue", NULL, NULL }, /* registered name */
46f4442e
A
509};
510
511typedef struct VariantMap {
512 const char *variant; /* input ID */
513 const char *keyword; /* keyword, or NULL if none */
514 const char *value; /* keyword value, or NULL if kw==NULL */
515} VariantMap;
516
517static const VariantMap VARIANT_MAP[] = {
518 { "EURO", "currency", "EUR" },
519 { "PINYIN", "collation", "pinyin" }, /* Solaris variant */
520 { "STROKE", "collation", "stroke" } /* Solaris variant */
374ca955
A
521};
522
729e4ab9
A
523/* ### BCP47 Conversion *******************************************/
524/* Test if the locale id has BCP47 u extension and does not have '@' */
525#define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
526/* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
527#define _ConvertBCP47(finalID, id, buffer, length,err) \
528 if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || U_FAILURE(*err)) { \
529 finalID=id; \
530 } else { \
531 finalID=buffer; \
532 }
533/* Gets the size of the shortest subtag in the given localeID. */
534static int32_t getShortestSubtagLength(const char *localeID) {
535 int32_t localeIDLength = uprv_strlen(localeID);
536 int32_t length = localeIDLength;
537 int32_t tmpLength = 0;
538 int32_t i;
539 UBool reset = TRUE;
540
541 for (i = 0; i < localeIDLength; i++) {
542 if (localeID[i] != '_' && localeID[i] != '-') {
543 if (reset) {
544 tmpLength = 0;
545 reset = FALSE;
546 }
547 tmpLength++;
548 } else {
549 if (tmpLength != 0 && tmpLength < length) {
550 length = tmpLength;
551 }
552 reset = TRUE;
553 }
554 }
555
556 return length;
557}
558
374ca955
A
559/* ### Keywords **************************************************/
560
561#define ULOC_KEYWORD_BUFFER_LEN 25
562#define ULOC_MAX_NO_KEYWORDS 25
563
729e4ab9 564U_CAPI const char * U_EXPORT2
374ca955 565locale_getKeywordsStart(const char *localeID) {
374ca955 566 const char *result = NULL;
374ca955
A
567 if((result = uprv_strchr(localeID, '@')) != NULL) {
568 return result;
73c04bcf
A
569 }
570#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
571 else {
572 /* We do this because the @ sign is variant, and the @ sign used on one
573 EBCDIC machine won't be compiled the same way on other EBCDIC based
574 machines. */
575 static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
374ca955
A
576 const uint8_t *charToFind = ebcdicSigns;
577 while(*charToFind) {
578 if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
579 return result;
580 }
581 charToFind++;
582 }
583 }
73c04bcf 584#endif
374ca955
A
585 return NULL;
586}
587
588/**
589 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
590 * @param keywordName incoming name to be canonicalized
591 * @param status return status (keyword too long)
592 * @return length of the keyword name
593 */
594static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
595{
596 int32_t i;
73c04bcf 597 int32_t keywordNameLen = (int32_t)uprv_strlen(keywordName);
374ca955
A
598
599 if(keywordNameLen >= ULOC_KEYWORD_BUFFER_LEN) {
600 /* keyword name too long for internal buffer */
601 *status = U_INTERNAL_PROGRAM_ERROR;
602 return 0;
603 }
604
605 /* normalize the keyword name */
606 for(i = 0; i < keywordNameLen; i++) {
607 buf[i] = uprv_tolower(keywordName[i]);
608 }
609 buf[i] = 0;
610
611 return keywordNameLen;
612}
613
614typedef struct {
615 char keyword[ULOC_KEYWORD_BUFFER_LEN];
616 int32_t keywordLen;
617 const char *valueStart;
618 int32_t valueLen;
619} KeywordStruct;
620
621static int32_t U_CALLCONV
4388f060 622compareKeywordStructs(const void * /*context*/, const void *left, const void *right) {
374ca955
A
623 const char* leftString = ((const KeywordStruct *)left)->keyword;
624 const char* rightString = ((const KeywordStruct *)right)->keyword;
625 return uprv_strcmp(leftString, rightString);
626}
627
628/**
629 * Both addKeyword and addValue must already be in canonical form.
630 * Either both addKeyword and addValue are NULL, or neither is NULL.
631 * If they are not NULL they must be zero terminated.
632 * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword.
633 */
634static int32_t
635_getKeywords(const char *localeID,
636 char prev,
637 char *keywords, int32_t keywordCapacity,
638 char *values, int32_t valuesCapacity, int32_t *valLen,
639 UBool valuesToo,
640 const char* addKeyword,
641 const char* addValue,
642 UErrorCode *status)
643{
644 KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
645
646 int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
647 int32_t numKeywords = 0;
648 const char* pos = localeID;
649 const char* equalSign = NULL;
650 const char* semicolon = NULL;
651 int32_t i = 0, j, n;
652 int32_t keywordsLen = 0;
653 int32_t valuesLen = 0;
654
655 if(prev == '@') { /* start of keyword definition */
656 /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
657 do {
658 UBool duplicate = FALSE;
659 /* skip leading spaces */
660 while(*pos == ' ') {
661 pos++;
662 }
663 if (!*pos) { /* handle trailing "; " */
664 break;
665 }
666 if(numKeywords == maxKeywords) {
667 *status = U_INTERNAL_PROGRAM_ERROR;
668 return 0;
669 }
670 equalSign = uprv_strchr(pos, '=');
671 semicolon = uprv_strchr(pos, ';');
672 /* lack of '=' [foo@currency] is illegal */
673 /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
674 if(!equalSign || (semicolon && semicolon<equalSign)) {
675 *status = U_INVALID_FORMAT_ERROR;
676 return 0;
677 }
678 /* need to normalize both keyword and keyword name */
679 if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
680 /* keyword name too long for internal buffer */
681 *status = U_INTERNAL_PROGRAM_ERROR;
682 return 0;
683 }
684 for(i = 0, n = 0; i < equalSign - pos; ++i) {
685 if (pos[i] != ' ') {
686 keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
687 }
688 }
689 keywordList[numKeywords].keyword[n] = 0;
690 keywordList[numKeywords].keywordLen = n;
691 /* now grab the value part. First we skip the '=' */
692 equalSign++;
693 /* then we leading spaces */
694 while(*equalSign == ' ') {
695 equalSign++;
696 }
697 keywordList[numKeywords].valueStart = equalSign;
698
699 pos = semicolon;
700 i = 0;
701 if(pos) {
702 while(*(pos - i - 1) == ' ') {
703 i++;
704 }
73c04bcf 705 keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
374ca955
A
706 pos++;
707 } else {
73c04bcf 708 i = (int32_t)uprv_strlen(equalSign);
4388f060 709 while(i && equalSign[i-1] == ' ') {
374ca955
A
710 i--;
711 }
712 keywordList[numKeywords].valueLen = i;
713 }
714 /* If this is a duplicate keyword, then ignore it */
715 for (j=0; j<numKeywords; ++j) {
716 if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
717 duplicate = TRUE;
718 break;
719 }
720 }
721 if (!duplicate) {
722 ++numKeywords;
723 }
724 } while(pos);
725
726 /* Handle addKeyword/addValue. */
727 if (addKeyword != NULL) {
728 UBool duplicate = FALSE;
729 U_ASSERT(addValue != NULL);
730 /* Search for duplicate; if found, do nothing. Explicit keyword
731 overrides addKeyword. */
732 for (j=0; j<numKeywords; ++j) {
733 if (uprv_strcmp(keywordList[j].keyword, addKeyword) == 0) {
734 duplicate = TRUE;
735 break;
736 }
737 }
738 if (!duplicate) {
739 if (numKeywords == maxKeywords) {
740 *status = U_INTERNAL_PROGRAM_ERROR;
741 return 0;
742 }
743 uprv_strcpy(keywordList[numKeywords].keyword, addKeyword);
73c04bcf 744 keywordList[numKeywords].keywordLen = (int32_t)uprv_strlen(addKeyword);
374ca955 745 keywordList[numKeywords].valueStart = addValue;
73c04bcf 746 keywordList[numKeywords].valueLen = (int32_t)uprv_strlen(addValue);
374ca955
A
747 ++numKeywords;
748 }
749 } else {
750 U_ASSERT(addValue == NULL);
751 }
752
753 /* now we have a list of keywords */
754 /* we need to sort it */
755 uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
756
757 /* Now construct the keyword part */
758 for(i = 0; i < numKeywords; i++) {
759 if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) {
760 uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword);
761 if(valuesToo) {
762 keywords[keywordsLen + keywordList[i].keywordLen] = '=';
763 } else {
764 keywords[keywordsLen + keywordList[i].keywordLen] = 0;
765 }
766 }
767 keywordsLen += keywordList[i].keywordLen + 1;
768 if(valuesToo) {
769 if(keywordsLen + keywordList[i].valueLen < keywordCapacity) {
770 uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
771 }
772 keywordsLen += keywordList[i].valueLen;
773
774 if(i < numKeywords - 1) {
775 if(keywordsLen < keywordCapacity) {
776 keywords[keywordsLen] = ';';
777 }
778 keywordsLen++;
779 }
780 }
781 if(values) {
782 if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) {
783 uprv_strcpy(values+valuesLen, keywordList[i].valueStart);
784 values[valuesLen + keywordList[i].valueLen] = 0;
785 }
786 valuesLen += keywordList[i].valueLen + 1;
787 }
788 }
789 if(values) {
790 values[valuesLen] = 0;
791 if(valLen) {
792 *valLen = valuesLen;
793 }
794 }
795 return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);
796 } else {
797 return 0;
798 }
799}
800
801U_CFUNC int32_t
802locale_getKeywords(const char *localeID,
803 char prev,
804 char *keywords, int32_t keywordCapacity,
805 char *values, int32_t valuesCapacity, int32_t *valLen,
806 UBool valuesToo,
807 UErrorCode *status) {
808 return _getKeywords(localeID, prev, keywords, keywordCapacity,
809 values, valuesCapacity, valLen, valuesToo,
810 NULL, NULL, status);
811}
812
813U_CAPI int32_t U_EXPORT2
814uloc_getKeywordValue(const char* localeID,
815 const char* keywordName,
816 char* buffer, int32_t bufferCapacity,
817 UErrorCode* status)
818{
729e4ab9 819 const char* startSearchHere = NULL;
374ca955 820 const char* nextSeparator = NULL;
374ca955
A
821 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
822 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
823 int32_t i = 0;
824 int32_t result = 0;
825
826 if(status && U_SUCCESS(*status) && localeID) {
729e4ab9
A
827 char tempBuffer[ULOC_FULLNAME_CAPACITY];
828 const char* tmpLocaleID;
829
830 if (_hasBCP47Extension(localeID)) {
831 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
832 } else {
833 tmpLocaleID=localeID;
834 }
374ca955 835
729e4ab9 836 startSearchHere = uprv_strchr(tmpLocaleID, '@'); /* TODO: REVISIT: shouldn't this be locale_getKeywordsStart ? */
374ca955
A
837 if(startSearchHere == NULL) {
838 /* no keywords, return at once */
839 return 0;
840 }
841
73c04bcf 842 locale_canonKeywordName(keywordNameBuffer, keywordName, status);
374ca955
A
843 if(U_FAILURE(*status)) {
844 return 0;
845 }
846
847 /* find the first keyword */
848 while(startSearchHere) {
849 startSearchHere++;
850 /* skip leading spaces (allowed?) */
851 while(*startSearchHere == ' ') {
852 startSearchHere++;
853 }
854 nextSeparator = uprv_strchr(startSearchHere, '=');
855 /* need to normalize both keyword and keyword name */
856 if(!nextSeparator) {
857 break;
858 }
859 if(nextSeparator - startSearchHere >= ULOC_KEYWORD_BUFFER_LEN) {
860 /* keyword name too long for internal buffer */
861 *status = U_INTERNAL_PROGRAM_ERROR;
862 return 0;
863 }
864 for(i = 0; i < nextSeparator - startSearchHere; i++) {
865 localeKeywordNameBuffer[i] = uprv_tolower(startSearchHere[i]);
866 }
867 /* trim trailing spaces */
868 while(startSearchHere[i-1] == ' ') {
869 i--;
4388f060 870 U_ASSERT(i>=0);
374ca955
A
871 }
872 localeKeywordNameBuffer[i] = 0;
873
874 startSearchHere = uprv_strchr(nextSeparator, ';');
875
876 if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
877 nextSeparator++;
878 while(*nextSeparator == ' ') {
879 nextSeparator++;
880 }
881 /* we actually found the keyword. Copy the value */
882 if(startSearchHere && startSearchHere - nextSeparator < bufferCapacity) {
883 while(*(startSearchHere-1) == ' ') {
884 startSearchHere--;
885 }
886 uprv_strncpy(buffer, nextSeparator, startSearchHere - nextSeparator);
73c04bcf 887 result = u_terminateChars(buffer, bufferCapacity, (int32_t)(startSearchHere - nextSeparator), status);
374ca955 888 } else if(!startSearchHere && (int32_t)uprv_strlen(nextSeparator) < bufferCapacity) { /* last item in string */
73c04bcf 889 i = (int32_t)uprv_strlen(nextSeparator);
374ca955
A
890 while(nextSeparator[i - 1] == ' ') {
891 i--;
892 }
893 uprv_strncpy(buffer, nextSeparator, i);
894 result = u_terminateChars(buffer, bufferCapacity, i, status);
895 } else {
896 /* give a bigger buffer, please */
897 *status = U_BUFFER_OVERFLOW_ERROR;
898 if(startSearchHere) {
73c04bcf 899 result = (int32_t)(startSearchHere - nextSeparator);
374ca955 900 } else {
73c04bcf 901 result = (int32_t)uprv_strlen(nextSeparator);
374ca955
A
902 }
903 }
904 return result;
905 }
906 }
907 }
908 return 0;
909}
910
911U_CAPI int32_t U_EXPORT2
912uloc_setKeywordValue(const char* keywordName,
913 const char* keywordValue,
914 char* buffer, int32_t bufferCapacity,
915 UErrorCode* status)
916{
917 /* TODO: sorting. removal. */
918 int32_t keywordNameLen;
919 int32_t keywordValueLen;
920 int32_t bufLen;
921 int32_t needLen = 0;
922 int32_t foundValueLen;
923 int32_t keywordAtEnd = 0; /* is the keyword at the end of the string? */
924 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
925 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
926 int32_t i = 0;
927 int32_t rc;
928 char* nextSeparator = NULL;
929 char* nextEqualsign = NULL;
930 char* startSearchHere = NULL;
931 char* keywordStart = NULL;
932 char *insertHere = NULL;
933 if(U_FAILURE(*status)) {
934 return -1;
935 }
73c04bcf
A
936 if(bufferCapacity>1) {
937 bufLen = (int32_t)uprv_strlen(buffer);
938 } else {
939 *status = U_ILLEGAL_ARGUMENT_ERROR;
940 return 0;
941 }
942 if(bufferCapacity<bufLen) {
943 /* The capacity is less than the length?! Is this NULL terminated? */
944 *status = U_ILLEGAL_ARGUMENT_ERROR;
945 return 0;
946 }
374ca955
A
947 if(keywordValue && !*keywordValue) {
948 keywordValue = NULL;
949 }
950 if(keywordValue) {
73c04bcf 951 keywordValueLen = (int32_t)uprv_strlen(keywordValue);
374ca955
A
952 } else {
953 keywordValueLen = 0;
954 }
955 keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
956 if(U_FAILURE(*status)) {
957 return 0;
958 }
959 startSearchHere = (char*)locale_getKeywordsStart(buffer);
374ca955
A
960 if(startSearchHere == NULL || (startSearchHere[1]==0)) {
961 if(!keywordValue) { /* no keywords = nothing to remove */
962 return bufLen;
963 }
964
965 needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
966 if(startSearchHere) { /* had a single @ */
967 needLen--; /* already had the @ */
968 /* startSearchHere points at the @ */
969 } else {
970 startSearchHere=buffer+bufLen;
971 }
972 if(needLen >= bufferCapacity) {
973 *status = U_BUFFER_OVERFLOW_ERROR;
974 return needLen; /* no change */
975 }
976 *startSearchHere = '@';
977 startSearchHere++;
978 uprv_strcpy(startSearchHere, keywordNameBuffer);
979 startSearchHere += keywordNameLen;
980 *startSearchHere = '=';
981 startSearchHere++;
982 uprv_strcpy(startSearchHere, keywordValue);
983 startSearchHere+=keywordValueLen;
984 return needLen;
985 } /* end shortcut - no @ */
986
987 keywordStart = startSearchHere;
988 /* search for keyword */
989 while(keywordStart) {
990 keywordStart++;
991 /* skip leading spaces (allowed?) */
992 while(*keywordStart == ' ') {
993 keywordStart++;
994 }
995 nextEqualsign = uprv_strchr(keywordStart, '=');
996 /* need to normalize both keyword and keyword name */
997 if(!nextEqualsign) {
998 break;
999 }
1000 if(nextEqualsign - keywordStart >= ULOC_KEYWORD_BUFFER_LEN) {
1001 /* keyword name too long for internal buffer */
1002 *status = U_INTERNAL_PROGRAM_ERROR;
1003 return 0;
1004 }
1005 for(i = 0; i < nextEqualsign - keywordStart; i++) {
1006 localeKeywordNameBuffer[i] = uprv_tolower(keywordStart[i]);
1007 }
1008 /* trim trailing spaces */
1009 while(keywordStart[i-1] == ' ') {
1010 i--;
1011 }
4388f060 1012 U_ASSERT(i>=0);
374ca955
A
1013 localeKeywordNameBuffer[i] = 0;
1014
1015 nextSeparator = uprv_strchr(nextEqualsign, ';');
1016 rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
1017 if(rc == 0) {
1018 nextEqualsign++;
1019 while(*nextEqualsign == ' ') {
1020 nextEqualsign++;
1021 }
1022 /* we actually found the keyword. Change the value */
1023 if (nextSeparator) {
1024 keywordAtEnd = 0;
73c04bcf 1025 foundValueLen = (int32_t)(nextSeparator - nextEqualsign);
374ca955
A
1026 } else {
1027 keywordAtEnd = 1;
73c04bcf 1028 foundValueLen = (int32_t)uprv_strlen(nextEqualsign);
374ca955
A
1029 }
1030 if(keywordValue) { /* adding a value - not removing */
1031 if(foundValueLen == keywordValueLen) {
1032 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1033 return bufLen; /* no change in size */
1034 } else if(foundValueLen > keywordValueLen) {
1035 int32_t delta = foundValueLen - keywordValueLen;
1036 if(nextSeparator) { /* RH side */
1037 uprv_memmove(nextSeparator - delta, nextSeparator, bufLen-(nextSeparator-buffer));
1038 }
1039 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1040 bufLen -= delta;
1041 buffer[bufLen]=0;
1042 return bufLen;
1043 } else { /* FVL < KVL */
1044 int32_t delta = keywordValueLen - foundValueLen;
1045 if((bufLen+delta) >= bufferCapacity) {
1046 *status = U_BUFFER_OVERFLOW_ERROR;
1047 return bufLen+delta;
1048 }
1049 if(nextSeparator) { /* RH side */
1050 uprv_memmove(nextSeparator+delta,nextSeparator, bufLen-(nextSeparator-buffer));
1051 }
1052 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1053 bufLen += delta;
1054 buffer[bufLen]=0;
1055 return bufLen;
1056 }
1057 } else { /* removing a keyword */
1058 if(keywordAtEnd) {
1059 /* zero out the ';' or '@' just before startSearchhere */
1060 keywordStart[-1] = 0;
73c04bcf 1061 return (int32_t)((keywordStart-buffer)-1); /* (string length without keyword) minus separator */
374ca955
A
1062 } else {
1063 uprv_memmove(keywordStart, nextSeparator+1, bufLen-((nextSeparator+1)-buffer));
1064 keywordStart[bufLen-((nextSeparator+1)-buffer)]=0;
73c04bcf 1065 return (int32_t)(bufLen-((nextSeparator+1)-keywordStart));
374ca955
A
1066 }
1067 }
1068 } else if(rc<0){ /* end match keyword */
1069 /* could insert at this location. */
1070 insertHere = keywordStart;
1071 }
1072 keywordStart = nextSeparator;
1073 } /* end loop searching */
1074
1075 if(!keywordValue) {
1076 return bufLen; /* removal of non-extant keyword - no change */
1077 }
1078
1079 /* we know there is at least one keyword. */
1080 needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
1081 if(needLen >= bufferCapacity) {
1082 *status = U_BUFFER_OVERFLOW_ERROR;
1083 return needLen; /* no change */
1084 }
1085
1086 if(insertHere) {
1087 uprv_memmove(insertHere+(1+keywordNameLen+1+keywordValueLen), insertHere, bufLen-(insertHere-buffer));
1088 keywordStart = insertHere;
1089 } else {
1090 keywordStart = buffer+bufLen;
1091 *keywordStart = ';';
1092 keywordStart++;
1093 }
1094 uprv_strncpy(keywordStart, keywordNameBuffer, keywordNameLen);
1095 keywordStart += keywordNameLen;
1096 *keywordStart = '=';
1097 keywordStart++;
1098 uprv_strncpy(keywordStart, keywordValue, keywordValueLen); /* terminates. */
1099 keywordStart+=keywordValueLen;
1100 if(insertHere) {
1101 *keywordStart = ';';
1102 keywordStart++;
1103 }
1104 buffer[needLen]=0;
1105 return needLen;
1106}
b75a7d8f 1107
374ca955 1108/* ### ID parsing implementation **************************************************/
b75a7d8f 1109
b75a7d8f 1110#define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
374ca955 1111
b75a7d8f
A
1112/*returns TRUE if one of the special prefixes is here (s=string)
1113 'x-' or 'i-' */
1114#define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1115
1116/* Dot terminates it because of POSIX form where dot precedes the codepage
1117 * except for variant
1118 */
1119#define _isTerminator(a) ((a==0)||(a=='.')||(a=='@'))
1120
374ca955
A
1121static char* _strnchr(const char* str, int32_t len, char c) {
1122 U_ASSERT(str != 0 && len >= 0);
1123 while (len-- != 0) {
1124 char d = *str;
1125 if (d == c) {
1126 return (char*) str;
1127 } else if (d == 0) {
1128 break;
1129 }
1130 ++str;
1131 }
1132 return NULL;
1133}
1134
1135/**
1136 * Lookup 'key' in the array 'list'. The array 'list' should contain
1137 * a NULL entry, followed by more entries, and a second NULL entry.
1138 *
1139 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1140 * COUNTRIES_3.
1141 */
b75a7d8f
A
1142static int16_t _findIndex(const char* const* list, const char* key)
1143{
1144 const char* const* anchor = list;
374ca955
A
1145 int32_t pass = 0;
1146
1147 /* Make two passes through two NULL-terminated arrays at 'list' */
1148 while (pass++ < 2) {
1149 while (*list) {
1150 if (uprv_strcmp(key, *list) == 0) {
1151 return (int16_t)(list - anchor);
1152 }
1153 list++;
b75a7d8f 1154 }
374ca955 1155 ++list; /* skip final NULL *CWB*/
b75a7d8f
A
1156 }
1157 return -1;
1158}
1159
1160/* count the length of src while copying it to dest; return strlen(src) */
4388f060 1161static inline int32_t
b75a7d8f
A
1162_copyCount(char *dest, int32_t destCapacity, const char *src) {
1163 const char *anchor;
1164 char c;
1165
1166 anchor=src;
1167 for(;;) {
1168 if((c=*src)==0) {
1169 return (int32_t)(src-anchor);
1170 }
1171 if(destCapacity<=0) {
1172 return (int32_t)((src-anchor)+uprv_strlen(src));
1173 }
1174 ++src;
1175 *dest++=c;
1176 --destCapacity;
1177 }
1178}
1179
729e4ab9 1180U_CFUNC const char*
73c04bcf
A
1181uloc_getCurrentCountryID(const char* oldID){
1182 int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
1183 if (offset >= 0) {
1184 return REPLACEMENT_COUNTRIES[offset];
1185 }
1186 return oldID;
1187}
729e4ab9 1188U_CFUNC const char*
73c04bcf
A
1189uloc_getCurrentLanguageID(const char* oldID){
1190 int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
1191 if (offset >= 0) {
1192 return REPLACEMENT_LANGUAGES[offset];
1193 }
1194 return oldID;
1195}
b75a7d8f
A
1196/*
1197 * the internal functions _getLanguage(), _getCountry(), _getVariant()
1198 * avoid duplicating code to handle the earlier locale ID pieces
1199 * in the functions for the later ones by
1200 * setting the *pEnd pointer to where they stopped parsing
1201 *
1202 * TODO try to use this in Locale
1203 */
729e4ab9
A
1204U_CFUNC int32_t
1205ulocimp_getLanguage(const char *localeID,
1206 char *language, int32_t languageCapacity,
1207 const char **pEnd) {
b75a7d8f
A
1208 int32_t i=0;
1209 int32_t offset;
1210 char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
1211
1212 /* if it starts with i- or x- then copy that prefix */
1213 if(_isIDPrefix(localeID)) {
1214 if(i<languageCapacity) {
1215 language[i]=(char)uprv_tolower(*localeID);
1216 }
1217 if(i<languageCapacity) {
1218 language[i+1]='-';
1219 }
1220 i+=2;
1221 localeID+=2;
1222 }
1223
1224 /* copy the language as far as possible and count its length */
1225 while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
1226 if(i<languageCapacity) {
1227 language[i]=(char)uprv_tolower(*localeID);
1228 }
1229 if(i<3) {
4388f060 1230 U_ASSERT(i>=0);
b75a7d8f
A
1231 lang[i]=(char)uprv_tolower(*localeID);
1232 }
1233 i++;
1234 localeID++;
1235 }
1236
1237 if(i==3) {
1238 /* convert 3 character code to 2 character code if possible *CWB*/
374ca955 1239 offset=_findIndex(LANGUAGES_3, lang);
b75a7d8f 1240 if(offset>=0) {
374ca955 1241 i=_copyCount(language, languageCapacity, LANGUAGES[offset]);
b75a7d8f
A
1242 }
1243 }
1244
1245 if(pEnd!=NULL) {
1246 *pEnd=localeID;
1247 }
1248 return i;
1249}
1250
729e4ab9
A
1251U_CFUNC int32_t
1252ulocimp_getScript(const char *localeID,
1253 char *script, int32_t scriptCapacity,
1254 const char **pEnd)
b75a7d8f 1255{
374ca955 1256 int32_t idLen = 0;
b75a7d8f 1257
374ca955
A
1258 if (pEnd != NULL) {
1259 *pEnd = localeID;
b75a7d8f 1260 }
374ca955
A
1261
1262 /* copy the second item as far as possible and count its length */
4388f060
A
1263 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])
1264 && uprv_isASCIILetter(localeID[idLen])) {
374ca955 1265 idLen++;
b75a7d8f
A
1266 }
1267
374ca955
A
1268 /* If it's exactly 4 characters long, then it's a script and not a country. */
1269 if (idLen == 4) {
1270 int32_t i;
1271 if (pEnd != NULL) {
1272 *pEnd = localeID+idLen;
1273 }
1274 if(idLen > scriptCapacity) {
1275 idLen = scriptCapacity;
1276 }
1277 if (idLen >= 1) {
1278 script[0]=(char)uprv_toupper(*(localeID++));
1279 }
1280 for (i = 1; i < idLen; i++) {
1281 script[i]=(char)uprv_tolower(*(localeID++));
1282 }
1283 }
1284 else {
1285 idLen = 0;
1286 }
1287 return idLen;
b75a7d8f
A
1288}
1289
729e4ab9
A
1290U_CFUNC int32_t
1291ulocimp_getCountry(const char *localeID,
1292 char *country, int32_t countryCapacity,
1293 const char **pEnd)
374ca955 1294{
729e4ab9 1295 int32_t idLen=0;
374ca955 1296 char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 };
b75a7d8f
A
1297 int32_t offset;
1298
1299 /* copy the country as far as possible and count its length */
729e4ab9
A
1300 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
1301 if(idLen<(ULOC_COUNTRY_CAPACITY-1)) { /*CWB*/
1302 cnty[idLen]=(char)uprv_toupper(localeID[idLen]);
b75a7d8f 1303 }
729e4ab9 1304 idLen++;
b75a7d8f
A
1305 }
1306
729e4ab9
A
1307 /* the country should be either length 2 or 3 */
1308 if (idLen == 2 || idLen == 3) {
1309 UBool gotCountry = FALSE;
1310 /* convert 3 character code to 2 character code if possible *CWB*/
1311 if(idLen==3) {
1312 offset=_findIndex(COUNTRIES_3, cnty);
1313 if(offset>=0) {
1314 idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]);
1315 gotCountry = TRUE;
1316 }
1317 }
1318 if (!gotCountry) {
1319 int32_t i = 0;
1320 for (i = 0; i < idLen; i++) {
1321 if (i < countryCapacity) {
1322 country[i]=(char)uprv_toupper(localeID[i]);
1323 }
1324 }
b75a7d8f 1325 }
729e4ab9
A
1326 localeID+=idLen;
1327 } else {
1328 idLen = 0;
b75a7d8f
A
1329 }
1330
1331 if(pEnd!=NULL) {
1332 *pEnd=localeID;
1333 }
729e4ab9
A
1334
1335 return idLen;
b75a7d8f
A
1336}
1337
374ca955
A
1338/**
1339 * @param needSeparator if true, then add leading '_' if any variants
1340 * are added to 'variant'
1341 */
1342static int32_t
1343_getVariantEx(const char *localeID,
1344 char prev,
1345 char *variant, int32_t variantCapacity,
1346 UBool needSeparator) {
b75a7d8f
A
1347 int32_t i=0;
1348
1349 /* get one or more variant tags and separate them with '_' */
1350 if(_isIDSeparator(prev)) {
1351 /* get a variant string after a '-' or '_' */
1352 while(!_isTerminator(*localeID)) {
374ca955
A
1353 if (needSeparator) {
1354 if (i<variantCapacity) {
1355 variant[i] = '_';
1356 }
1357 ++i;
1358 needSeparator = FALSE;
1359 }
b75a7d8f
A
1360 if(i<variantCapacity) {
1361 variant[i]=(char)uprv_toupper(*localeID);
1362 if(variant[i]=='-') {
1363 variant[i]='_';
1364 }
1365 }
1366 i++;
1367 localeID++;
1368 }
1369 }
1370
1371 /* if there is no variant tag after a '-' or '_' then look for '@' */
1372 if(i==0) {
1373 if(prev=='@') {
1374 /* keep localeID */
374ca955 1375 } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
b75a7d8f
A
1376 ++localeID; /* point after the '@' */
1377 } else {
1378 return 0;
1379 }
1380 while(!_isTerminator(*localeID)) {
374ca955
A
1381 if (needSeparator) {
1382 if (i<variantCapacity) {
1383 variant[i] = '_';
1384 }
1385 ++i;
1386 needSeparator = FALSE;
1387 }
b75a7d8f
A
1388 if(i<variantCapacity) {
1389 variant[i]=(char)uprv_toupper(*localeID);
1390 if(variant[i]=='-' || variant[i]==',') {
1391 variant[i]='_';
1392 }
1393 }
1394 i++;
1395 localeID++;
1396 }
1397 }
374ca955 1398
b75a7d8f
A
1399 return i;
1400}
1401
374ca955
A
1402static int32_t
1403_getVariant(const char *localeID,
1404 char prev,
1405 char *variant, int32_t variantCapacity) {
1406 return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE);
1407}
1408
1409/**
1410 * Delete ALL instances of a variant from the given list of one or
1411 * more variants. Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".
1412 * @param variants the source string of one or more variants,
1413 * separated by '_'. This will be MODIFIED IN PLACE. Not zero
1414 * terminated; if it is, trailing zero will NOT be maintained.
1415 * @param variantsLen length of variants
1416 * @param toDelete variant to delete, without separators, e.g. "EURO"
1417 * or "PREEURO"; not zero terminated
1418 * @param toDeleteLen length of toDelete
1419 * @return number of characters deleted from variants
1420 */
1421static int32_t
1422_deleteVariant(char* variants, int32_t variantsLen,
46f4442e
A
1423 const char* toDelete, int32_t toDeleteLen)
1424{
374ca955
A
1425 int32_t delta = 0; /* number of chars deleted */
1426 for (;;) {
1427 UBool flag = FALSE;
1428 if (variantsLen < toDeleteLen) {
1429 return delta;
1430 }
1431 if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 &&
1432 (variantsLen == toDeleteLen ||
46f4442e
A
1433 (flag=(variants[toDeleteLen] == '_'))))
1434 {
374ca955
A
1435 int32_t d = toDeleteLen + (flag?1:0);
1436 variantsLen -= d;
1437 delta += d;
46f4442e
A
1438 if (variantsLen > 0) {
1439 uprv_memmove(variants, variants+d, variantsLen);
1440 }
374ca955
A
1441 } else {
1442 char* p = _strnchr(variants, variantsLen, '_');
1443 if (p == NULL) {
1444 return delta;
1445 }
1446 ++p;
73c04bcf 1447 variantsLen -= (int32_t)(p - variants);
374ca955
A
1448 variants = p;
1449 }
1450 }
1451}
1452
1453/* Keyword enumeration */
1454
1455typedef struct UKeywordsContext {
1456 char* keywords;
1457 char* current;
1458} UKeywordsContext;
1459
1460static void U_CALLCONV
1461uloc_kw_closeKeywords(UEnumeration *enumerator) {
1462 uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
1463 uprv_free(enumerator->context);
1464 uprv_free(enumerator);
1465}
1466
1467static int32_t U_CALLCONV
4388f060 1468uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) {
374ca955
A
1469 char *kw = ((UKeywordsContext *)en->context)->keywords;
1470 int32_t result = 0;
1471 while(*kw) {
1472 result++;
1473 kw += uprv_strlen(kw)+1;
1474 }
1475 return result;
1476}
1477
1478static const char* U_CALLCONV
1479uloc_kw_nextKeyword(UEnumeration* en,
1480 int32_t* resultLength,
4388f060 1481 UErrorCode* /*status*/) {
374ca955
A
1482 const char* result = ((UKeywordsContext *)en->context)->current;
1483 int32_t len = 0;
1484 if(*result) {
73c04bcf 1485 len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
374ca955
A
1486 ((UKeywordsContext *)en->context)->current += len+1;
1487 } else {
1488 result = NULL;
1489 }
1490 if (resultLength) {
1491 *resultLength = len;
1492 }
1493 return result;
1494}
1495
1496static void U_CALLCONV
1497uloc_kw_resetKeywords(UEnumeration* en,
4388f060 1498 UErrorCode* /*status*/) {
374ca955
A
1499 ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
1500}
1501
1502static const UEnumeration gKeywordsEnum = {
1503 NULL,
1504 NULL,
1505 uloc_kw_closeKeywords,
1506 uloc_kw_countKeywords,
1507 uenum_unextDefault,
1508 uloc_kw_nextKeyword,
1509 uloc_kw_resetKeywords
1510};
1511
1512U_CAPI UEnumeration* U_EXPORT2
1513uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
b75a7d8f 1514{
46f4442e
A
1515 UKeywordsContext *myContext = NULL;
1516 UEnumeration *result = NULL;
b75a7d8f 1517
46f4442e
A
1518 if(U_FAILURE(*status)) {
1519 return NULL;
1520 }
1521 result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
1522 /* Null pointer test */
1523 if (result == NULL) {
1524 *status = U_MEMORY_ALLOCATION_ERROR;
1525 return NULL;
1526 }
1527 uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration));
4388f060 1528 myContext = reinterpret_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext)));
46f4442e
A
1529 if (myContext == NULL) {
1530 *status = U_MEMORY_ALLOCATION_ERROR;
1531 uprv_free(result);
1532 return NULL;
1533 }
1534 myContext->keywords = (char *)uprv_malloc(keywordListSize+1);
1535 uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
1536 myContext->keywords[keywordListSize] = 0;
1537 myContext->current = myContext->keywords;
1538 result->context = myContext;
1539 return result;
374ca955
A
1540}
1541
1542U_CAPI UEnumeration* U_EXPORT2
1543uloc_openKeywords(const char* localeID,
1544 UErrorCode* status)
1545{
1546 int32_t i=0;
1547 char keywords[256];
1548 int32_t keywordsCapacity = 256;
729e4ab9
A
1549 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1550 const char* tmpLocaleID;
1551
374ca955 1552 if(status==NULL || U_FAILURE(*status)) {
b75a7d8f
A
1553 return 0;
1554 }
1555
729e4ab9
A
1556 if (_hasBCP47Extension(localeID)) {
1557 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
1558 } else {
1559 if (localeID==NULL) {
1560 localeID=uloc_getDefault();
1561 }
1562 tmpLocaleID=localeID;
b75a7d8f
A
1563 }
1564
374ca955 1565 /* Skip the language */
729e4ab9
A
1566 ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
1567 if(_isIDSeparator(*tmpLocaleID)) {
374ca955
A
1568 const char *scriptID;
1569 /* Skip the script if available */
729e4ab9
A
1570 ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
1571 if(scriptID != tmpLocaleID+1) {
374ca955 1572 /* Found optional script */
729e4ab9 1573 tmpLocaleID = scriptID;
374ca955
A
1574 }
1575 /* Skip the Country */
729e4ab9
A
1576 if (_isIDSeparator(*tmpLocaleID)) {
1577 ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &tmpLocaleID);
1578 if(_isIDSeparator(*tmpLocaleID)) {
1579 _getVariant(tmpLocaleID+1, *tmpLocaleID, NULL, 0);
374ca955 1580 }
b75a7d8f
A
1581 }
1582 }
1583
374ca955 1584 /* keywords are located after '@' */
729e4ab9
A
1585 if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {
1586 i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status);
374ca955
A
1587 }
1588
1589 if(i) {
1590 return uloc_openKeywordList(keywords, i, status);
1591 } else {
1592 return NULL;
b75a7d8f 1593 }
b75a7d8f
A
1594}
1595
b75a7d8f 1596
374ca955
A
1597/* bit-flags for 'options' parameter of _canonicalize */
1598#define _ULOC_STRIP_KEYWORDS 0x2
1599#define _ULOC_CANONICALIZE 0x1
1600
1601#define OPTION_SET(options, mask) ((options & mask) != 0)
1602
73c04bcf
A
1603static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
1604#define I_DEFAULT_LENGTH (sizeof i_default / sizeof i_default[0])
1605
374ca955
A
1606/**
1607 * Canonicalize the given localeID, to level 1 or to level 2,
1608 * depending on the options. To specify level 1, pass in options=0.
1609 * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1610 *
1611 * This is the code underlying uloc_getName and uloc_canonicalize.
1612 */
1613static int32_t
1614_canonicalize(const char* localeID,
1615 char* result,
1616 int32_t resultCapacity,
1617 uint32_t options,
1618 UErrorCode* err) {
1619 int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity;
1620 char localeBuffer[ULOC_FULLNAME_CAPACITY];
729e4ab9 1621 char tempBuffer[ULOC_FULLNAME_CAPACITY];
46f4442e 1622 const char* origLocaleID;
729e4ab9 1623 const char* tmpLocaleID;
374ca955
A
1624 const char* keywordAssign = NULL;
1625 const char* separatorIndicator = NULL;
1626 const char* addKeyword = NULL;
1627 const char* addValue = NULL;
1628 char* name;
1629 char* variant = NULL; /* pointer into name, or NULL */
374ca955
A
1630
1631 if (U_FAILURE(*err)) {
b75a7d8f
A
1632 return 0;
1633 }
1634
729e4ab9
A
1635 if (_hasBCP47Extension(localeID)) {
1636 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1637 } else {
1638 if (localeID==NULL) {
1639 localeID=uloc_getDefault();
1640 }
1641 tmpLocaleID=localeID;
b75a7d8f 1642 }
729e4ab9
A
1643
1644 origLocaleID=tmpLocaleID;
b75a7d8f 1645
374ca955
A
1646 /* if we are doing a full canonicalization, then put results in
1647 localeBuffer, if necessary; otherwise send them to result. */
729e4ab9 1648 if (/*OPTION_SET(options, _ULOC_CANONICALIZE) &&*/
4388f060 1649 (result == NULL || resultCapacity < (int32_t)sizeof(localeBuffer))) {
374ca955 1650 name = localeBuffer;
4388f060 1651 nameCapacity = (int32_t)sizeof(localeBuffer);
374ca955
A
1652 } else {
1653 name = result;
1654 nameCapacity = resultCapacity;
1655 }
1656
b75a7d8f 1657 /* get all pieces, one after another, and separate with '_' */
729e4ab9 1658 len=ulocimp_getLanguage(tmpLocaleID, name, nameCapacity, &tmpLocaleID);
73c04bcf
A
1659
1660 if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) {
1661 const char *d = uloc_getDefault();
1662
729e4ab9 1663 len = (int32_t)uprv_strlen(d);
73c04bcf
A
1664
1665 if (name != NULL) {
1666 uprv_strncpy(name, d, len);
1667 }
729e4ab9 1668 } else if(_isIDSeparator(*tmpLocaleID)) {
374ca955
A
1669 const char *scriptID;
1670
b75a7d8f 1671 ++fieldCount;
374ca955
A
1672 if(len<nameCapacity) {
1673 name[len]='_';
b75a7d8f 1674 }
374ca955
A
1675 ++len;
1676
4388f060
A
1677 scriptSize=ulocimp_getScript(tmpLocaleID+1,
1678 (len<nameCapacity ? name+len : NULL), nameCapacity-len, &scriptID);
374ca955
A
1679 if(scriptSize > 0) {
1680 /* Found optional script */
729e4ab9 1681 tmpLocaleID = scriptID;
b75a7d8f 1682 ++fieldCount;
374ca955 1683 len+=scriptSize;
729e4ab9 1684 if (_isIDSeparator(*tmpLocaleID)) {
374ca955
A
1685 /* If there is something else, then we add the _ */
1686 if(len<nameCapacity) {
1687 name[len]='_';
1688 }
1689 ++len;
1690 }
1691 }
1692
729e4ab9
A
1693 if (_isIDSeparator(*tmpLocaleID)) {
1694 const char *cntryID;
4388f060
A
1695 int32_t cntrySize = ulocimp_getCountry(tmpLocaleID+1,
1696 (len<nameCapacity ? name+len : NULL), nameCapacity-len, &cntryID);
729e4ab9
A
1697 if (cntrySize > 0) {
1698 /* Found optional country */
1699 tmpLocaleID = cntryID;
1700 len+=cntrySize;
1701 }
1702 if(_isIDSeparator(*tmpLocaleID)) {
1703 /* If there is something else, then we add the _ if we found country before.*/
1704 if (cntrySize > 0) {
1705 ++fieldCount;
1706 if(len<nameCapacity) {
1707 name[len]='_';
1708 }
1709 ++len;
374ca955 1710 }
729e4ab9 1711
4388f060
A
1712 variantSize = _getVariant(tmpLocaleID+1, *tmpLocaleID,
1713 (len<nameCapacity ? name+len : NULL), nameCapacity-len);
374ca955 1714 if (variantSize > 0) {
4388f060 1715 variant = len<nameCapacity ? name+len : NULL;
374ca955 1716 len += variantSize;
729e4ab9 1717 tmpLocaleID += variantSize + 1; /* skip '_' and variant */
374ca955 1718 }
b75a7d8f 1719 }
b75a7d8f
A
1720 }
1721 }
1722
374ca955 1723 /* Copy POSIX-style charset specifier, if any [mr.utf8] */
729e4ab9 1724 if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
374ca955 1725 UBool done = FALSE;
b75a7d8f 1726 do {
729e4ab9 1727 char c = *tmpLocaleID;
374ca955
A
1728 switch (c) {
1729 case 0:
1730 case '@':
1731 done = TRUE;
1732 break;
1733 default:
1734 if (len<nameCapacity) {
1735 name[len] = c;
1736 }
1737 ++len;
729e4ab9 1738 ++tmpLocaleID;
374ca955
A
1739 break;
1740 }
1741 } while (!done);
1742 }
1743
1744 /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
729e4ab9
A
1745 After this, tmpLocaleID either points to '@' or is NULL */
1746 if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {
1747 keywordAssign = uprv_strchr(tmpLocaleID, '=');
1748 separatorIndicator = uprv_strchr(tmpLocaleID, ';');
374ca955
A
1749 }
1750
1751 /* Copy POSIX-style variant, if any [mr@FOO] */
1752 if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
729e4ab9 1753 tmpLocaleID != NULL && keywordAssign == NULL) {
374ca955 1754 for (;;) {
729e4ab9 1755 char c = *tmpLocaleID;
374ca955
A
1756 if (c == 0) {
1757 break;
1758 }
1759 if (len<nameCapacity) {
1760 name[len] = c;
1761 }
1762 ++len;
729e4ab9 1763 ++tmpLocaleID;
374ca955
A
1764 }
1765 }
1766
1767 if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
1768 /* Handle @FOO variant if @ is present and not followed by = */
729e4ab9 1769 if (tmpLocaleID!=NULL && keywordAssign==NULL) {
374ca955
A
1770 int32_t posixVariantSize;
1771 /* Add missing '_' if needed */
1772 if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
1773 do {
1774 if(len<nameCapacity) {
1775 name[len]='_';
1776 }
1777 ++len;
1778 ++fieldCount;
1779 } while(fieldCount<2);
1780 }
729e4ab9 1781 posixVariantSize = _getVariantEx(tmpLocaleID+1, '@', name+len, nameCapacity-len,
374ca955
A
1782 (UBool)(variantSize > 0));
1783 if (posixVariantSize > 0) {
1784 if (variant == NULL) {
1785 variant = name+len;
1786 }
1787 len += posixVariantSize;
1788 variantSize += posixVariantSize;
b75a7d8f 1789 }
374ca955
A
1790 }
1791
46f4442e
A
1792 /* Handle generic variants first */
1793 if (variant) {
1794 for (j=0; j<(int32_t)(sizeof(VARIANT_MAP)/sizeof(VARIANT_MAP[0])); j++) {
1795 const char* variantToCompare = VARIANT_MAP[j].variant;
1796 int32_t n = (int32_t)uprv_strlen(variantToCompare);
1797 int32_t variantLen = _deleteVariant(variant, uprv_min(variantSize, (nameCapacity-len)), variantToCompare, n);
1798 len -= variantLen;
1799 if (variantLen > 0) {
b25be066 1800 if (len > 0 && name[len-1] == '_') { /* delete trailing '_' */
46f4442e
A
1801 --len;
1802 }
1803 addKeyword = VARIANT_MAP[j].keyword;
1804 addValue = VARIANT_MAP[j].value;
1805 break;
1806 }
1807 }
b25be066 1808 if (len > 0 && len <= nameCapacity && name[len-1] == '_') { /* delete trailing '_' */
46f4442e
A
1809 --len;
1810 }
374ca955
A
1811 }
1812
1813 /* Look up the ID in the canonicalization map */
1814 for (j=0; j<(int32_t)(sizeof(CANONICALIZE_MAP)/sizeof(CANONICALIZE_MAP[0])); j++) {
1815 const char* id = CANONICALIZE_MAP[j].id;
73c04bcf 1816 int32_t n = (int32_t)uprv_strlen(id);
374ca955 1817 if (len == n && uprv_strncmp(name, id, n) == 0) {
729e4ab9 1818 if (n == 0 && tmpLocaleID != NULL) {
374ca955
A
1819 break; /* Don't remap "" if keywords present */
1820 }
1821 len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID);
46f4442e
A
1822 if (CANONICALIZE_MAP[j].keyword) {
1823 addKeyword = CANONICALIZE_MAP[j].keyword;
1824 addValue = CANONICALIZE_MAP[j].value;
1825 }
374ca955
A
1826 break;
1827 }
1828 }
374ca955
A
1829 }
1830
1831 if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
729e4ab9 1832 if (tmpLocaleID!=NULL && keywordAssign!=NULL &&
374ca955
A
1833 (!separatorIndicator || separatorIndicator > keywordAssign)) {
1834 if(len<nameCapacity) {
1835 name[len]='@';
1836 }
1837 ++len;
b75a7d8f 1838 ++fieldCount;
4388f060
A
1839 len += _getKeywords(tmpLocaleID+1, '@', (len<nameCapacity ? name+len : NULL), nameCapacity-len,
1840 NULL, 0, NULL, TRUE, addKeyword, addValue, err);
374ca955
A
1841 } else if (addKeyword != NULL) {
1842 U_ASSERT(addValue != NULL);
1843 /* inelegant but works -- later make _getKeywords do this? */
1844 len += _copyCount(name+len, nameCapacity-len, "@");
1845 len += _copyCount(name+len, nameCapacity-len, addKeyword);
1846 len += _copyCount(name+len, nameCapacity-len, "=");
1847 len += _copyCount(name+len, nameCapacity-len, addValue);
1848 }
1849 }
1850
46f4442e 1851 if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) {
374ca955
A
1852 uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);
1853 }
1854
1855 return u_terminateChars(result, resultCapacity, len, err);
1856}
1857
1858/* ### ID parsing API **************************************************/
1859
1860U_CAPI int32_t U_EXPORT2
1861uloc_getParent(const char* localeID,
1862 char* parent,
1863 int32_t parentCapacity,
1864 UErrorCode* err)
1865{
1866 const char *lastUnderscore;
1867 int32_t i;
1868
1869 if (U_FAILURE(*err))
1870 return 0;
1871
1872 if (localeID == NULL)
1873 localeID = uloc_getDefault();
1874
1875 lastUnderscore=uprv_strrchr(localeID, '_');
1876 if(lastUnderscore!=NULL) {
1877 i=(int32_t)(lastUnderscore-localeID);
1878 } else {
1879 i=0;
b75a7d8f 1880 }
374ca955 1881
73c04bcf 1882 if(i>0 && parent != localeID) {
374ca955
A
1883 uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
1884 }
1885 return u_terminateChars(parent, parentCapacity, i, err);
b75a7d8f 1886}
374ca955
A
1887
1888U_CAPI int32_t U_EXPORT2
1889uloc_getLanguage(const char* localeID,
1890 char* language,
1891 int32_t languageCapacity,
1892 UErrorCode* err)
1893{
1894 /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
1895 int32_t i=0;
1896
1897 if (err==NULL || U_FAILURE(*err)) {
1898 return 0;
1899 }
1900
1901 if(localeID==NULL) {
1902 localeID=uloc_getDefault();
1903 }
1904
729e4ab9 1905 i=ulocimp_getLanguage(localeID, language, languageCapacity, NULL);
374ca955
A
1906 return u_terminateChars(language, languageCapacity, i, err);
1907}
1908
1909U_CAPI int32_t U_EXPORT2
1910uloc_getScript(const char* localeID,
1911 char* script,
1912 int32_t scriptCapacity,
1913 UErrorCode* err)
1914{
1915 int32_t i=0;
1916
1917 if(err==NULL || U_FAILURE(*err)) {
1918 return 0;
1919 }
1920
1921 if(localeID==NULL) {
1922 localeID=uloc_getDefault();
1923 }
1924
1925 /* skip the language */
729e4ab9 1926 ulocimp_getLanguage(localeID, NULL, 0, &localeID);
374ca955 1927 if(_isIDSeparator(*localeID)) {
729e4ab9 1928 i=ulocimp_getScript(localeID+1, script, scriptCapacity, NULL);
374ca955
A
1929 }
1930 return u_terminateChars(script, scriptCapacity, i, err);
1931}
1932
1933U_CAPI int32_t U_EXPORT2
1934uloc_getCountry(const char* localeID,
1935 char* country,
1936 int32_t countryCapacity,
1937 UErrorCode* err)
1938{
1939 int32_t i=0;
1940
1941 if(err==NULL || U_FAILURE(*err)) {
1942 return 0;
1943 }
1944
1945 if(localeID==NULL) {
1946 localeID=uloc_getDefault();
1947 }
1948
1949 /* Skip the language */
729e4ab9 1950 ulocimp_getLanguage(localeID, NULL, 0, &localeID);
374ca955
A
1951 if(_isIDSeparator(*localeID)) {
1952 const char *scriptID;
1953 /* Skip the script if available */
729e4ab9 1954 ulocimp_getScript(localeID+1, NULL, 0, &scriptID);
374ca955
A
1955 if(scriptID != localeID+1) {
1956 /* Found optional script */
1957 localeID = scriptID;
1958 }
1959 if(_isIDSeparator(*localeID)) {
729e4ab9 1960 i=ulocimp_getCountry(localeID+1, country, countryCapacity, NULL);
374ca955
A
1961 }
1962 }
1963 return u_terminateChars(country, countryCapacity, i, err);
1964}
1965
1966U_CAPI int32_t U_EXPORT2
1967uloc_getVariant(const char* localeID,
1968 char* variant,
1969 int32_t variantCapacity,
1970 UErrorCode* err)
1971{
729e4ab9
A
1972 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1973 const char* tmpLocaleID;
374ca955 1974 int32_t i=0;
374ca955
A
1975
1976 if(err==NULL || U_FAILURE(*err)) {
1977 return 0;
1978 }
1979
729e4ab9
A
1980 if (_hasBCP47Extension(localeID)) {
1981 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1982 } else {
1983 if (localeID==NULL) {
1984 localeID=uloc_getDefault();
1985 }
1986 tmpLocaleID=localeID;
374ca955
A
1987 }
1988
1989 /* Skip the language */
729e4ab9
A
1990 ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
1991 if(_isIDSeparator(*tmpLocaleID)) {
374ca955
A
1992 const char *scriptID;
1993 /* Skip the script if available */
729e4ab9
A
1994 ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
1995 if(scriptID != tmpLocaleID+1) {
374ca955 1996 /* Found optional script */
729e4ab9 1997 tmpLocaleID = scriptID;
374ca955
A
1998 }
1999 /* Skip the Country */
729e4ab9
A
2000 if (_isIDSeparator(*tmpLocaleID)) {
2001 const char *cntryID;
2002 ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &cntryID);
2003 if (cntryID != tmpLocaleID+1) {
2004 /* Found optional country */
2005 tmpLocaleID = cntryID;
2006 }
2007 if(_isIDSeparator(*tmpLocaleID)) {
2008 /* If there was no country ID, skip a possible extra IDSeparator */
2009 if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {
2010 tmpLocaleID++;
2011 }
2012 i=_getVariant(tmpLocaleID+1, *tmpLocaleID, variant, variantCapacity);
374ca955
A
2013 }
2014 }
2015 }
2016
2017 /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
2018 /* if we do not have a variant tag yet then try a POSIX variant after '@' */
2019/*
2020 if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
2021 i=_getVariant(localeID+1, '@', variant, variantCapacity);
2022 }
2023*/
2024 return u_terminateChars(variant, variantCapacity, i, err);
2025}
2026
2027U_CAPI int32_t U_EXPORT2
2028uloc_getName(const char* localeID,
2029 char* name,
2030 int32_t nameCapacity,
2031 UErrorCode* err)
2032{
2033 return _canonicalize(localeID, name, nameCapacity, 0, err);
2034}
2035
2036U_CAPI int32_t U_EXPORT2
2037uloc_getBaseName(const char* localeID,
2038 char* name,
2039 int32_t nameCapacity,
2040 UErrorCode* err)
2041{
2042 return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err);
2043}
2044
2045U_CAPI int32_t U_EXPORT2
2046uloc_canonicalize(const char* localeID,
2047 char* name,
2048 int32_t nameCapacity,
2049 UErrorCode* err)
2050{
2051 return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err);
2052}
2053
b75a7d8f
A
2054U_CAPI const char* U_EXPORT2
2055uloc_getISO3Language(const char* localeID)
2056{
374ca955
A
2057 int16_t offset;
2058 char lang[ULOC_LANG_CAPACITY];
2059 UErrorCode err = U_ZERO_ERROR;
2060
2061 if (localeID == NULL)
2062 {
2063 localeID = uloc_getDefault();
2064 }
2065 uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
2066 if (U_FAILURE(err))
2067 return "";
2068 offset = _findIndex(LANGUAGES, lang);
2069 if (offset < 0)
2070 return "";
2071 return LANGUAGES_3[offset];
b75a7d8f
A
2072}
2073
2074U_CAPI const char* U_EXPORT2
2075uloc_getISO3Country(const char* localeID)
2076{
2077 int16_t offset;
374ca955 2078 char cntry[ULOC_LANG_CAPACITY];
b75a7d8f
A
2079 UErrorCode err = U_ZERO_ERROR;
2080
2081 if (localeID == NULL)
2082 {
2083 localeID = uloc_getDefault();
2084 }
374ca955 2085 uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
b75a7d8f
A
2086 if (U_FAILURE(err))
2087 return "";
374ca955 2088 offset = _findIndex(COUNTRIES, cntry);
b75a7d8f
A
2089 if (offset < 0)
2090 return "";
2091
374ca955 2092 return COUNTRIES_3[offset];
b75a7d8f
A
2093}
2094
2095U_CAPI uint32_t U_EXPORT2
2096uloc_getLCID(const char* localeID)
2097{
374ca955
A
2098 UErrorCode status = U_ZERO_ERROR;
2099 char langID[ULOC_FULLNAME_CAPACITY];
2100
2101 uloc_getLanguage(localeID, langID, sizeof(langID), &status);
2102 if (U_FAILURE(status)) {
2103 return 0;
b75a7d8f 2104 }
374ca955
A
2105
2106 return uprv_convertToLCID(langID, localeID, &status);
2107}
2108
73c04bcf
A
2109U_CAPI int32_t U_EXPORT2
2110uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
2111 UErrorCode *status)
2112{
2113 int32_t length;
2114 const char *posix = uprv_convertToPosix(hostid, status);
2115 if (U_FAILURE(*status) || posix == NULL) {
2116 return 0;
2117 }
2118 length = (int32_t)uprv_strlen(posix);
2119 if (length+1 > localeCapacity) {
2120 *status = U_BUFFER_OVERFLOW_ERROR;
2121 }
2122 else {
2123 uprv_strcpy(locale, posix);
2124 }
2125 return length;
2126}
2127
374ca955
A
2128/* ### Default locale **************************************************/
2129
2130U_CAPI const char* U_EXPORT2
2131uloc_getDefault()
2132{
2133 return locale_get_default();
2134}
2135
2136U_CAPI void U_EXPORT2
2137uloc_setDefault(const char* newDefaultLocale,
2138 UErrorCode* err)
2139{
2140 if (U_FAILURE(*err))
2141 return;
2142 /* the error code isn't currently used for anything by this function*/
b75a7d8f 2143
374ca955
A
2144 /* propagate change to C++ */
2145 locale_set_default(newDefaultLocale);
b75a7d8f
A
2146}
2147
729e4ab9
A
2148/**
2149 * Returns a list of all language codes defined in ISO 639. This is a pointer
2150 * to an array of pointers to arrays of char. All of these pointers are owned
2151 * by ICU-- do not delete them, and do not write through them. The array is
2152 * terminated with a null pointer.
2153 */
2154U_CAPI const char* const* U_EXPORT2
2155uloc_getISOLanguages()
2156{
2157 return LANGUAGES;
2158}
374ca955 2159
729e4ab9
A
2160/**
2161 * Returns a list of all 2-letter country codes defined in ISO 639. This is a
2162 * pointer to an array of pointers to arrays of char. All of these pointers are
2163 * owned by ICU-- do not delete them, and do not write through them. The array is
2164 * terminated with a null pointer.
b75a7d8f 2165 */
729e4ab9
A
2166U_CAPI const char* const* U_EXPORT2
2167uloc_getISOCountries()
b75a7d8f 2168{
729e4ab9
A
2169 return COUNTRIES;
2170}
73c04bcf 2171
b75a7d8f 2172
729e4ab9
A
2173/* this function to be moved into cstring.c later */
2174static char gDecimal = 0;
b75a7d8f 2175
729e4ab9
A
2176static /* U_CAPI */
2177double
2178/* U_EXPORT2 */
2179_uloc_strtod(const char *start, char **end) {
2180 char *decimal;
2181 char *myEnd;
2182 char buf[30];
2183 double rv;
2184 if (!gDecimal) {
2185 char rep[5];
2186 /* For machines that decide to change the decimal on you,
2187 and try to be too smart with localization.
2188 This normally should be just a '.'. */
2189 sprintf(rep, "%+1.1f", 1.0);
2190 gDecimal = rep[2];
b75a7d8f 2191 }
b75a7d8f 2192
729e4ab9
A
2193 if(gDecimal == '.') {
2194 return uprv_strtod(start, end); /* fall through to OS */
b75a7d8f 2195 } else {
729e4ab9
A
2196 uprv_strncpy(buf, start, 29);
2197 buf[29]=0;
2198 decimal = uprv_strchr(buf, '.');
2199 if(decimal) {
2200 *decimal = gDecimal;
46f4442e 2201 } else {
729e4ab9 2202 return uprv_strtod(start, end); /* no decimal point */
46f4442e 2203 }
729e4ab9
A
2204 rv = uprv_strtod(buf, &myEnd);
2205 if(end) {
2206 *end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */
b75a7d8f 2207 }
729e4ab9 2208 return rv;
374ca955 2209 }
374ca955
A
2210}
2211
729e4ab9
A
2212typedef struct {
2213 float q;
2214 int32_t dummy; /* to avoid uninitialized memory copy from qsort */
2215 char *locale;
2216} _acceptLangItem;
b75a7d8f 2217
729e4ab9 2218static int32_t U_CALLCONV
4388f060 2219uloc_acceptLanguageCompare(const void * /*context*/, const void *a, const void *b)
729e4ab9
A
2220{
2221 const _acceptLangItem *aa = (const _acceptLangItem*)a;
2222 const _acceptLangItem *bb = (const _acceptLangItem*)b;
b75a7d8f 2223
729e4ab9
A
2224 int32_t rc = 0;
2225 if(bb->q < aa->q) {
2226 rc = -1; /* A > B */
2227 } else if(bb->q > aa->q) {
2228 rc = 1; /* A < B */
2229 } else {
2230 rc = 0; /* A = B */
b75a7d8f
A
2231 }
2232
729e4ab9
A
2233 if(rc==0) {
2234 rc = uprv_stricmp(aa->locale, bb->locale);
b75a7d8f
A
2235 }
2236
729e4ab9
A
2237#if defined(ULOC_DEBUG)
2238 /* fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
2239 aa->locale, aa->q,
2240 bb->locale, bb->q,
2241 rc);*/
2242#endif
374ca955 2243
729e4ab9 2244 return rc;
374ca955
A
2245}
2246
729e4ab9
A
2247/*
2248mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
2249*/
374ca955 2250
b75a7d8f 2251U_CAPI int32_t U_EXPORT2
729e4ab9
A
2252uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult *outResult,
2253 const char *httpAcceptLanguage,
2254 UEnumeration* availableLocales,
2255 UErrorCode *status)
374ca955 2256{
729e4ab9
A
2257 _acceptLangItem *j;
2258 _acceptLangItem smallBuffer[30];
2259 char **strs;
2260 char tmp[ULOC_FULLNAME_CAPACITY +1];
2261 int32_t n = 0;
2262 const char *itemEnd;
2263 const char *paramEnd;
2264 const char *s;
2265 const char *t;
2266 int32_t res;
2267 int32_t i;
2268 int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);
2269 int32_t jSize;
2270 char *tempstr; /* Use for null pointer check */
b75a7d8f 2271
729e4ab9
A
2272 j = smallBuffer;
2273 jSize = sizeof(smallBuffer)/sizeof(smallBuffer[0]);
2274 if(U_FAILURE(*status)) {
2275 return -1;
b75a7d8f
A
2276 }
2277
729e4ab9
A
2278 for(s=httpAcceptLanguage;s&&*s;) {
2279 while(isspace(*s)) /* eat space at the beginning */
2280 s++;
2281 itemEnd=uprv_strchr(s,',');
2282 paramEnd=uprv_strchr(s,';');
2283 if(!itemEnd) {
2284 itemEnd = httpAcceptLanguage+l; /* end of string */
b75a7d8f 2285 }
729e4ab9
A
2286 if(paramEnd && paramEnd<itemEnd) {
2287 /* semicolon (;) is closer than end (,) */
2288 t = paramEnd+1;
2289 if(*t=='q') {
2290 t++;
2291 }
2292 while(isspace(*t)) {
2293 t++;
2294 }
2295 if(*t=='=') {
2296 t++;
2297 }
2298 while(isspace(*t)) {
2299 t++;
2300 }
2301 j[n].q = (float)_uloc_strtod(t,NULL);
2302 } else {
2303 /* no semicolon - it's 1.0 */
2304 j[n].q = 1.0f;
2305 paramEnd = itemEnd;
374ca955 2306 }
46f4442e 2307 j[n].dummy=0;
374ca955
A
2308 /* eat spaces prior to semi */
2309 for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)
2310 ;
46f4442e
A
2311 /* Check for null pointer from uprv_strndup */
2312 tempstr = uprv_strndup(s,(int32_t)((t+1)-s));
2313 if (tempstr == NULL) {
2314 *status = U_MEMORY_ALLOCATION_ERROR;
2315 return -1;
2316 }
2317 j[n].locale = tempstr;
374ca955
A
2318 uloc_canonicalize(j[n].locale,tmp,sizeof(tmp)/sizeof(tmp[0]),status);
2319 if(strcmp(j[n].locale,tmp)) {
2320 uprv_free(j[n].locale);
2321 j[n].locale=uprv_strdup(tmp);
2322 }
2323#if defined(ULOC_DEBUG)
2324 /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
2325#endif
2326 n++;
2327 s = itemEnd;
2328 while(*s==',') { /* eat duplicate commas */
2329 s++;
2330 }
2331 if(n>=jSize) {
46f4442e 2332 if(j==smallBuffer) { /* overflowed the small buffer. */
4388f060 2333 j = reinterpret_cast<_acceptLangItem *>(uprv_malloc(sizeof(j[0])*(jSize*2)));
46f4442e
A
2334 if(j!=NULL) {
2335 uprv_memcpy(j,smallBuffer,sizeof(j[0])*jSize);
2336 }
374ca955 2337#if defined(ULOC_DEBUG)
46f4442e 2338 fprintf(stderr,"malloced at size %d\n", jSize);
374ca955 2339#endif
46f4442e 2340 } else {
4388f060 2341 j = reinterpret_cast<_acceptLangItem *>(uprv_realloc(j, sizeof(j[0])*jSize*2));
374ca955 2342#if defined(ULOC_DEBUG)
46f4442e 2343 fprintf(stderr,"re-alloced at size %d\n", jSize);
374ca955 2344#endif
46f4442e
A
2345 }
2346 jSize *= 2;
2347 if(j==NULL) {
2348 *status = U_MEMORY_ALLOCATION_ERROR;
2349 return -1;
2350 }
374ca955
A
2351 }
2352 }
2353 uprv_sortArray(j, n, sizeof(j[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
2354 if(U_FAILURE(*status)) {
46f4442e 2355 if(j != smallBuffer) {
374ca955 2356#if defined(ULOC_DEBUG)
46f4442e 2357 fprintf(stderr,"freeing j %p\n", j);
374ca955 2358#endif
46f4442e
A
2359 uprv_free(j);
2360 }
2361 return -1;
374ca955 2362 }
4388f060 2363 strs = reinterpret_cast<char **>(uprv_malloc((size_t)(sizeof(strs[0])*n)));
46f4442e
A
2364 /* Check for null pointer */
2365 if (strs == NULL) {
2366 uprv_free(j); /* Free to avoid memory leak */
2367 *status = U_MEMORY_ALLOCATION_ERROR;
2368 return -1;
2369 }
374ca955
A
2370 for(i=0;i<n;i++) {
2371#if defined(ULOC_DEBUG)
2372 /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
2373#endif
2374 strs[i]=j[i].locale;
2375 }
2376 res = uloc_acceptLanguage(result, resultAvailable, outResult,
2377 (const char**)strs, n, availableLocales, status);
2378 for(i=0;i<n;i++) {
2379 uprv_free(strs[i]);
2380 }
2381 uprv_free(strs);
2382 if(j != smallBuffer) {
2383#if defined(ULOC_DEBUG)
46f4442e 2384 fprintf(stderr,"freeing j %p\n", j);
374ca955 2385#endif
46f4442e 2386 uprv_free(j);
374ca955
A
2387 }
2388 return res;
2389}
2390
2391
2392U_CAPI int32_t U_EXPORT2
2393uloc_acceptLanguage(char *result, int32_t resultAvailable,
2394 UAcceptResult *outResult, const char **acceptList,
2395 int32_t acceptListCount,
2396 UEnumeration* availableLocales,
2397 UErrorCode *status)
2398{
2399 int32_t i,j;
2400 int32_t len;
2401 int32_t maxLen=0;
2402 char tmp[ULOC_FULLNAME_CAPACITY+1];
2403 const char *l;
2404 char **fallbackList;
2405 if(U_FAILURE(*status)) {
2406 return -1;
2407 }
4388f060 2408 fallbackList = reinterpret_cast<char **>(uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount)));
374ca955 2409 if(fallbackList==NULL) {
46f4442e
A
2410 *status = U_MEMORY_ALLOCATION_ERROR;
2411 return -1;
374ca955
A
2412 }
2413 for(i=0;i<acceptListCount;i++) {
2414#if defined(ULOC_DEBUG)
2415 fprintf(stderr,"%02d: %s\n", i, acceptList[i]);
2416#endif
2417 while((l=uenum_next(availableLocales, NULL, status))) {
2418#if defined(ULOC_DEBUG)
2419 fprintf(stderr," %s\n", l);
2420#endif
73c04bcf 2421 len = (int32_t)uprv_strlen(l);
374ca955
A
2422 if(!uprv_strcmp(acceptList[i], l)) {
2423 if(outResult) {
2424 *outResult = ULOC_ACCEPT_VALID;
2425 }
2426#if defined(ULOC_DEBUG)
2427 fprintf(stderr, "MATCH! %s\n", l);
2428#endif
2429 if(len>0) {
2430 uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2431 }
2432 for(j=0;j<i;j++) {
2433 uprv_free(fallbackList[j]);
2434 }
2435 uprv_free(fallbackList);
2436 return u_terminateChars(result, resultAvailable, len, status);
2437 }
2438 if(len>maxLen) {
2439 maxLen = len;
2440 }
2441 }
2442 uenum_reset(availableLocales, status);
2443 /* save off parent info */
2444 if(uloc_getParent(acceptList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
2445 fallbackList[i] = uprv_strdup(tmp);
2446 } else {
2447 fallbackList[i]=0;
2448 }
2449 }
2450
2451 for(maxLen--;maxLen>0;maxLen--) {
2452 for(i=0;i<acceptListCount;i++) {
2453 if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) {
2454#if defined(ULOC_DEBUG)
2455 fprintf(stderr,"Try: [%s]", fallbackList[i]);
2456#endif
2457 while((l=uenum_next(availableLocales, NULL, status))) {
2458#if defined(ULOC_DEBUG)
2459 fprintf(stderr," %s\n", l);
2460#endif
73c04bcf 2461 len = (int32_t)uprv_strlen(l);
374ca955
A
2462 if(!uprv_strcmp(fallbackList[i], l)) {
2463 if(outResult) {
2464 *outResult = ULOC_ACCEPT_FALLBACK;
2465 }
2466#if defined(ULOC_DEBUG)
2467 fprintf(stderr, "fallback MATCH! %s\n", l);
2468#endif
2469 if(len>0) {
2470 uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2471 }
73c04bcf
A
2472 for(j=0;j<acceptListCount;j++) {
2473 uprv_free(fallbackList[j]);
374ca955
A
2474 }
2475 uprv_free(fallbackList);
73c04bcf 2476 return u_terminateChars(result, resultAvailable, len, status);
374ca955
A
2477 }
2478 }
2479 uenum_reset(availableLocales, status);
2480
2481 if(uloc_getParent(fallbackList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
2482 uprv_free(fallbackList[i]);
2483 fallbackList[i] = uprv_strdup(tmp);
2484 } else {
2485 uprv_free(fallbackList[i]);
2486 fallbackList[i]=0;
2487 }
2488 }
2489 }
2490 if(outResult) {
2491 *outResult = ULOC_ACCEPT_FAILED;
2492 }
2493 }
2494 for(i=0;i<acceptListCount;i++) {
2495 uprv_free(fallbackList[i]);
2496 }
2497 uprv_free(fallbackList);
2498 return -1;
b75a7d8f 2499}
374ca955
A
2500
2501/*eof*/