]> git.saurik.com Git - apple/icu.git/blame_incremental - icuSources/common/uloc.cpp
ICU-64260.0.1.tar.gz
[apple/icu.git] / icuSources / common / uloc.cpp
... / ...
CommitLineData
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4**********************************************************************
5* Copyright (C) 1997-2016, International Business Machines
6* Corporation and others. All Rights Reserved.
7**********************************************************************
8*
9* File ULOC.CPP
10*
11* Modification History:
12*
13* Date Name Description
14* 04/01/97 aliu Creation.
15* 08/21/98 stephen JDK 1.2 sync
16* 12/08/98 rtg New Locale implementation and C API
17* 03/15/99 damiba overhaul.
18* 04/06/99 stephen changed setDefault() to realloc and copy
19* 06/14/99 stephen Changed calls to ures_open for new params
20* 07/21/99 stephen Modified setDefault() to propagate to C++
21* 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs,
22* brought canonicalization code into line with spec
23*****************************************************************************/
24
25/*
26 POSIX's locale format, from putil.c: [no spaces]
27
28 ll [ _CC ] [ . MM ] [ @ VV]
29
30 l = lang, C = ctry, M = charmap, V = variant
31*/
32
33#include "unicode/utypes.h"
34#include "unicode/ustring.h"
35#include "unicode/uloc.h"
36
37#include "putilimp.h"
38#include "ustr_imp.h"
39#include "ulocimp.h"
40#include "umutex.h"
41#include "cstring.h"
42#include "cmemory.h"
43#include "locmap.h"
44#include "uarrsort.h"
45#include "uenumimp.h"
46#include "uassert.h"
47#include "charstr.h"
48
49#include <stdio.h> /* for sprintf */
50
51U_NAMESPACE_USE
52
53/* ### Declarations **************************************************/
54
55/* Locale stuff from locid.cpp */
56U_CFUNC void locale_set_default(const char *id);
57U_CFUNC const char *locale_get_default(void);
58U_CFUNC int32_t
59locale_getKeywords(const char *localeID,
60 char prev,
61 char *keywords, int32_t keywordCapacity,
62 char *values, int32_t valuesCapacity, int32_t *valLen,
63 UBool valuesToo,
64 UErrorCode *status);
65
66/* ### Data tables **************************************************/
67
68/**
69 * Table of language codes, both 2- and 3-letter, with preference
70 * given to 2-letter codes where possible. Includes 3-letter codes
71 * that lack a 2-letter equivalent.
72 *
73 * This list must be in sorted order. This list is returned directly
74 * to the user by some API.
75 *
76 * This list must be kept in sync with LANGUAGES_3, with corresponding
77 * entries matched.
78 *
79 * This table should be terminated with a NULL entry, followed by a
80 * second list, and another NULL entry. The first list is visible to
81 * user code when this array is returned by API. The second list
82 * contains codes we support, but do not expose through user API.
83 *
84 * Notes
85 *
86 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
87 * include the revisions up to 2001/7/27 *CWB*
88 *
89 * The 3 character codes are the terminology codes like RFC 3066. This
90 * is compatible with prior ICU codes
91 *
92 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
93 * table but now at the end of the table because 3 character codes are
94 * duplicates. This avoids bad searches going from 3 to 2 character
95 * codes.
96 *
97 * The range qaa-qtz is reserved for local use
98 */
99/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
100/* ISO639 table version is 20150505 */
101/* Subsequent hand addition of selected languages */
102static const char * const LANGUAGES[] = {
103 "aa", "ab", "ace", "ach", "ada", "ady", "ae", "aeb",
104 "af", "afh", "agq", "ain", "ak", "akk", "akz", "ale",
105 "aln", "alt", "am", "an", "ang", "anp", "ar", "arc",
106 "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "as",
107 "asa", "ase", "ast", "av", "avk", "awa", "ay", "az",
108 "ba", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
109 "be", "bej", "bem", "bew", "bez", "bfd", "bfq", "bg",
110 "bgn", "bho", "bi", "bik", "bin", "bjn", "bkm", "bla",
111 "bm", "bn", "bo", "bpy", "bqi", "br", "bra", "brh",
112 "brx", "bs", "bss", "bua", "bug", "bum", "byn", "byv",
113 "ca", "cad", "car", "cay", "cch", "ccp", "ce", "ceb", "cgg",
114 "ch", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
115 "chr", "chy", "ckb", "co", "cop", "cps", "cr", "crh",
116 "cs", "csb", "cu", "cv", "cy",
117 "da", "dak", "dar", "dav", "de", "del", "den", "dgr",
118 "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "dv",
119 "dyo", "dyu", "dz", "dzg",
120 "ebu", "ee", "efi", "egl", "egy", "eka", "el", "elx",
121 "en", "enm", "eo", "es", "esu", "et", "eu", "ewo",
122 "ext",
123 "fa", "fan", "fat", "ff", "fi", "fil", "fit", "fj",
124 "fo", "fon", "fr", "frc", "frm", "fro", "frp", "frr",
125 "frs", "fur", "fy",
126 "ga", "gaa", "gag", "gan", "gay", "gba", "gbz", "gd",
127 "gez", "gil", "gl", "glk", "gmh", "gn", "goh", "gom",
128 "gon", "gor", "got", "grb", "grc", "gsw", "gu", "guc",
129 "gur", "guz", "gv", "gwi",
130 "ha", "hai", "hak", "haw", "he", "hi", "hif", "hil",
131 "hit", "hmn", "ho", "hr", "hsb", "hsn", "ht", "hu",
132 "hup", "hy", "hz",
133 "ia", "iba", "ibb", "id", "ie", "ig", "ii", "ik",
134 "ilo", "inh", "io", "is", "it", "iu", "izh",
135 "ja", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
136 "jv",
137 "ka", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
138 "kbl", "kcg", "kde", "kea", "ken", "kfo", "kg", "kgp",
139 "kha", "kho", "khq", "khw", "ki", "kiu", "kj", "kk",
140 "kkj", "kl", "kln", "km", "kmb", "kn", "ko", "koi",
141 "kok", "kos", "kpe", "kr", "krc", "kri", "krj", "krl",
142 "kru", "ks", "ksb", "ksf", "ksh", "ku", "kum", "kut",
143 "kv", "kw", "ky",
144 "la", "lad", "lag", "lah", "lam", "lb", "lez", "lfn",
145 "lg", "li", "lij", "liv", "lkt", "lmo", "ln", "lo",
146 "lol", "loz", "lrc", "lt", "ltg", "lu", "lua", "lui",
147 "lun", "luo", "lus", "luy", "lv", "lzh", "lzz",
148 "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
149 "mdf", "mdh", "mdr", "men", "mer", "mfe", "mg", "mga",
150 "mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk",
151 "ml", "mn", "mnc", "mni", "moh", "mos", "mr", "mrj",
152 "ms", "mt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
153 "my", "mye", "myv", "mzn",
154 "na", "nan", "nap", "naq", "nb", "nd", "nds", "ne",
155 "new", "ng", "nia", "niu", "njo", "nl", "nmg", "nn",
156 "nnh", "no", "nog", "non", "nov", "nqo", "nr", "nso",
157 "nus", "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi",
158 "oc", "oj", "om", "or", "os", "osa", "ota",
159 "pa", "pag", "pal", "pam", "pap", "pau", "pcd", "pdc",
160 "pdt", "peo", "pfl", "phn", "pi", "pl", "pms", "pnt",
161 "pon", "prg", "pro", "ps", "pt",
162 "qu", "quc", "qug",
163 "raj", "rap", "rar", "rgn", "rif", "rm", "rn", "ro",
164 "rof", "rom", "rtm", "ru", "rue", "rug", "rup",
165 "rw", "rwk",
166 "sa", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
167 "sba", "sbp", "sc", "scn", "sco", "sd", "sdc", "sdh",
168 "se", "see", "seh", "sei", "sel", "ses", "sg", "sga",
169 "sgs", "shi", "shn", "shu", "si", "sid", "sk",
170 "sl", "sli", "sly", "sm", "sma", "smj", "smn", "sms",
171 "sn", "snk", "so", "sog", "sq", "sr", "srn", "srr",
172 "ss", "ssy", "st", "stq", "su", "suk", "sus", "sux",
173 "sv", "sw", "swb", "swc", "syc", "syr", "szl",
174 "ta", "tcy", "te", "tem", "teo", "ter", "tet", "tg",
175 "th", "ti", "tig", "tiv", "tk", "tkl", "tkr", "tl",
176 "tlh", "tli", "tly", "tmh", "tn", "to", "tog", "tpi",
177 "tr", "tru", "trv", "ts", "tsd", "tsi", "tt", "ttt",
178 "tum", "tvl", "tw", "twq", "ty", "tyv", "tzm",
179 "udm", "ug", "uga", "uk", "umb", "und", "ur", "uz",
180 "vai", "ve", "vec", "vep", "vi", "vls", "vmf", "vo",
181 "vot", "vro", "vun",
182 "wa", "wae", "wal", "war", "was", "wbp", "wo", "wuu",
183 "xal", "xh", "xmf", "xog",
184 "yao", "yap", "yav", "ybb", "yi", "yo", "yrl", "yue",
185 "za", "zap", "zbl", "zea", "zen", "zgh", "zh", "zu",
186 "zun", "zxx", "zza",
187NULL,
188 "in", "iw", "ji", "jw", "sh", /* obsolete language codes */
189NULL
190};
191
192static const char* const DEPRECATED_LANGUAGES[]={
193 "in", "iw", "ji", "jw", NULL, NULL
194};
195static const char* const REPLACEMENT_LANGUAGES[]={
196 "id", "he", "yi", "jv", NULL, NULL
197};
198
199/**
200 * Table of 3-letter language codes.
201 *
202 * This is a lookup table used to convert 3-letter language codes to
203 * their 2-letter equivalent, where possible. It must be kept in sync
204 * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the
205 * same language as LANGUAGES_3[i]. The commented-out lines are
206 * copied from LANGUAGES to make eyeballing this baby easier.
207 *
208 * Where a 3-letter language code has no 2-letter equivalent, the
209 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
210 *
211 * This table should be terminated with a NULL entry, followed by a
212 * second list, and another NULL entry. The two lists correspond to
213 * the two lists in LANGUAGES.
214 */
215/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
216/* ISO639 table version is 20150505 */
217/* Subsequent hand addition of selected languages */
218static const char * const LANGUAGES_3[] = {
219 "aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb",
220 "afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale",
221 "aln", "alt", "amh", "arg", "ang", "anp", "ara", "arc",
222 "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "asm",
223 "asa", "ase", "ast", "ava", "avk", "awa", "aym", "aze",
224 "bak", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
225 "bel", "bej", "bem", "bew", "bez", "bfd", "bfq", "bul",
226 "bgn", "bho", "bis", "bik", "bin", "bjn", "bkm", "bla",
227 "bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh",
228 "brx", "bos", "bss", "bua", "bug", "bum", "byn", "byv",
229 "cat", "cad", "car", "cay", "cch", "ccp", "che", "ceb", "cgg",
230 "cha", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
231 "chr", "chy", "ckb", "cos", "cop", "cps", "cre", "crh",
232 "ces", "csb", "chu", "chv", "cym",
233 "dan", "dak", "dar", "dav", "deu", "del", "den", "dgr",
234 "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "div",
235 "dyo", "dyu", "dzo", "dzg",
236 "ebu", "ewe", "efi", "egl", "egy", "eka", "ell", "elx",
237 "eng", "enm", "epo", "spa", "esu", "est", "eus", "ewo",
238 "ext",
239 "fas", "fan", "fat", "ful", "fin", "fil", "fit", "fij",
240 "fao", "fon", "fra", "frc", "frm", "fro", "frp", "frr",
241 "frs", "fur", "fry",
242 "gle", "gaa", "gag", "gan", "gay", "gba", "gbz", "gla",
243 "gez", "gil", "glg", "glk", "gmh", "grn", "goh", "gom",
244 "gon", "gor", "got", "grb", "grc", "gsw", "guj", "guc",
245 "gur", "guz", "glv", "gwi",
246 "hau", "hai", "hak", "haw", "heb", "hin", "hif", "hil",
247 "hit", "hmn", "hmo", "hrv", "hsb", "hsn", "hat", "hun",
248 "hup", "hye", "her",
249 "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ipk",
250 "ilo", "inh", "ido", "isl", "ita", "iku", "izh",
251 "jpn", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
252 "jav",
253 "kat", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
254 "kbl", "kcg", "kde", "kea", "ken", "kfo", "kon", "kgp",
255 "kha", "kho", "khq", "khw", "kik", "kiu", "kua", "kaz",
256 "kkj", "kal", "kln", "khm", "kmb", "kan", "kor", "koi",
257 "kok", "kos", "kpe", "kau", "krc", "kri", "krj", "krl",
258 "kru", "kas", "ksb", "ksf", "ksh", "kur", "kum", "kut",
259 "kom", "cor", "kir",
260 "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lfn",
261 "lug", "lim", "lij", "liv", "lkt", "lmo", "lin", "lao",
262 "lol", "loz", "lrc", "lit", "ltg", "lub", "lua", "lui",
263 "lun", "luo", "lus", "luy", "lav", "lzh", "lzz",
264 "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
265 "mdf", "mdh", "mdr", "men", "mer", "mfe", "mlg", "mga",
266 "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
267 "mal", "mon", "mnc", "mni", "moh", "mos", "mar", "mrj",
268 "msa", "mlt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
269 "mya", "mye", "myv", "mzn",
270 "nau", "nan", "nap", "naq", "nob", "nde", "nds", "nep",
271 "new", "ndo", "nia", "niu", "njo", "nld", "nmg", "nno",
272 "nnh", "nor", "nog", "non", "nov", "nqo", "nbl", "nso",
273 "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi",
274 "oci", "oji", "orm", "ori", "oss", "osa", "ota",
275 "pan", "pag", "pal", "pam", "pap", "pau", "pcd", "pdc",
276 "pdt", "peo", "pfl", "phn", "pli", "pol", "pms", "pnt",
277 "pon", "prg", "pro", "pus", "por",
278 "que", "quc", "qug",
279 "raj", "rap", "rar", "rgn", "rif", "roh", "run", "ron",
280 "rof", "rom", "rtm", "rus", "rue", "rug", "rup",
281 "kin", "rwk",
282 "san", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
283 "sba", "sbp", "srd", "scn", "sco", "snd", "sdc", "sdh",
284 "sme", "see", "seh", "sei", "sel", "ses", "sag", "sga",
285 "sgs", "shi", "shn", "shu", "sin", "sid", "slk",
286 "slv", "sli", "sly", "smo", "sma", "smj", "smn", "sms",
287 "sna", "snk", "som", "sog", "sqi", "srp", "srn", "srr",
288 "ssw", "ssy", "sot", "stq", "sun", "suk", "sus", "sux",
289 "swe", "swa", "swb", "swc", "syc", "syr", "szl",
290 "tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk",
291 "tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr", "tgl",
292 "tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tpi",
293 "tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt",
294 "tum", "tvl", "twi", "twq", "tah", "tyv", "tzm",
295 "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
296 "vai", "ven", "vec", "vep", "vie", "vls", "vmf", "vol",
297 "vot", "vro", "vun",
298 "wln", "wae", "wal", "war", "was", "wbp", "wol", "wuu",
299 "xal", "xho", "xmf", "xog",
300 "yao", "yap", "yav", "ybb", "yid", "yor", "yrl", "yue",
301 "zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul",
302 "zun", "zxx", "zza",
303NULL,
304/* "in", "iw", "ji", "jw", "sh", */
305 "ind", "heb", "yid", "jaw", "srp",
306NULL
307};
308
309/**
310 * Table of 2-letter country codes.
311 *
312 * This list must be in sorted order. This list is returned directly
313 * to the user by some API.
314 *
315 * This list must be kept in sync with COUNTRIES_3, with corresponding
316 * entries matched.
317 *
318 * This table should be terminated with a NULL entry, followed by a
319 * second list, and another NULL entry. The first list is visible to
320 * user code when this array is returned by API. The second list
321 * contains codes we support, but do not expose through user API.
322 *
323 * Notes:
324 *
325 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
326 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
327 * new codes keeping the old ones for compatibility updated to include
328 * 1999/12/03 revisions *CWB*
329 *
330 * RO(ROM) is now RO(ROU) according to
331 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
332 */
333static const char * const COUNTRIES[] = {
334 "AC", "AD", "AE", "AF", "AG", "AI", "AL", "AM",
335 "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",
336 "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",
337 "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV",
338 "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
339 "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CP", "CR",
340 "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DG", "DJ", "DK",
341 "DM", "DO", "DZ", "EA", "EC", "EE", "EG", "EH", "ER",
342 "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
343 "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
344 "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
345 "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
346 "IC", "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
347 "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
348 "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
349 "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
350 "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK",
351 "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",
352 "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
353 "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",
354 "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",
355 "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
356 "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA",
357 "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",
358 "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV",
359 "SX", "SY", "SZ", "TA", "TC", "TD", "TF", "TG", "TH", "TJ",
360 "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
361 "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
362 "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
363 "WS", "XK", "YE", "YT", "ZA", "ZM", "ZW",
364NULL,
365 "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR", /* obsolete country codes */
366NULL
367};
368
369static const char* const DEPRECATED_COUNTRIES[] = {
370 "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */
371};
372static const char* const REPLACEMENT_COUNTRIES[] = {
373/* "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
374 "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL /* replacement country codes */
375};
376
377/**
378 * Table of 3-letter country codes.
379 *
380 * This is a lookup table used to convert 3-letter country codes to
381 * their 2-letter equivalent. It must be kept in sync with COUNTRIES.
382 * For all valid i, COUNTRIES[i] must refer to the same country as
383 * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES
384 * to make eyeballing this baby easier.
385 *
386 * This table should be terminated with a NULL entry, followed by a
387 * second list, and another NULL entry. The two lists correspond to
388 * the two lists in COUNTRIES.
389 */
390static const char * const COUNTRIES_3[] = {
391/* "AC", "AD", "AE", "AF", "AG", "AI", "AL", "AM", */
392 "ASC", "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
393/* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */
394 "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
395/* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */
396 "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
397/* "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", */
398 "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",
399/* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */
400 "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
401/* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CP", "CR", */
402 "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CPT", "CRI",
403/* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DG", "DJ", "DK", */
404 "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DGA", "DJI", "DNK",
405/* "DM", "DO", "DZ", "EA", "EC", "EE", "EG", "EH", "ER", */
406 "DMA", "DOM", "DZA", "EA ", "ECU", "EST", "EGY", "ESH", "ERI", /* no valid 3-letter code for EA */
407/* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
408 "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
409/* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
410 "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
411/* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */
412 "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
413/* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
414 "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
415/* "IC", "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
416 "IC ", "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL", /* no valid 3-letter code for IC */
417/* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
418 "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
419/* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
420 "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
421/* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */
422 "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
423/* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */
424 "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
425/* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */
426 "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
427/* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */
428 "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
429/* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */
430 "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
431/* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */
432 "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
433/* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */
434 "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
435/* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */
436 "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
437/* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */
438 "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
439/* "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", */
440 "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",
441/* "SX", "SY", "SZ", "TA", "TC", "TD", "TF", "TG", "TH", "TJ", */
442 "SXM", "SYR", "SWZ", "TAA", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
443/* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */
444 "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
445/* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */
446 "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
447/* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
448 "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
449/* "WS", "XK", "YE", "YT", "ZA", "ZM", "ZW", */
450 "WSM", "XKK", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
451NULL,
452/* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */
453 "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
454NULL
455};
456
457typedef struct CanonicalizationMap {
458 const char *id; /* input ID */
459 const char *canonicalID; /* canonicalized output ID */
460} CanonicalizationMap;
461
462/**
463 * A map to canonicalize locale IDs. This handles a variety of
464 * different semantic kinds of transformations.
465 */
466static const CanonicalizationMap CANONICALIZE_MAP[] = {
467 { "", "en_US_POSIX" }, /* .NET name */ // open ICU 64 deleted, we restore
468 { "c", "en_US_POSIX" }, /* POSIX name */ // open ICU 64 deleted, we restore
469 { "posix", "en_US_POSIX" }, /* POSIX name (alias of C) */ // open ICU 64 deleted, we restore
470 { "art_LOJBAN", "jbo" }, /* registered name */
471 { "hy__AREVELA", "hy" }, /* Registered IANA variant */
472 { "hy__AREVMDA", "hyw" }, /* Registered IANA variant */
473 { "zh_GAN", "gan" }, /* registered name */
474 { "zh_GUOYU", "zh" }, /* registered name */
475 { "zh_HAKKA", "hak" }, /* registered name */
476 { "zh_MIN_NAN", "nan" }, /* registered name */
477 { "zh_WUU", "wuu" }, /* registered name */
478 { "zh_XIANG", "hsn" }, /* registered name */
479 { "zh_YUE", "yue" }, /* registered name */
480};
481
482/* ### BCP47 Conversion *******************************************/
483/* Test if the locale id has BCP47 u extension and does not have '@' */
484#define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
485/* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
486#define _ConvertBCP47(finalID, id, buffer, length,err) \
487 if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || \
488 U_FAILURE(*err) || *err == U_STRING_NOT_TERMINATED_WARNING) { \
489 finalID=id; \
490 if (*err == U_STRING_NOT_TERMINATED_WARNING) { *err = U_BUFFER_OVERFLOW_ERROR; } \
491 } else { \
492 finalID=buffer; \
493 }
494/* Gets the size of the shortest subtag in the given localeID. */
495static int32_t getShortestSubtagLength(const char *localeID) {
496 int32_t localeIDLength = static_cast<int32_t>(uprv_strlen(localeID));
497 int32_t length = localeIDLength;
498 int32_t tmpLength = 0;
499 int32_t i;
500 UBool reset = TRUE;
501
502 for (i = 0; i < localeIDLength; i++) {
503 if (localeID[i] != '_' && localeID[i] != '-') {
504 if (reset) {
505 tmpLength = 0;
506 reset = FALSE;
507 }
508 tmpLength++;
509 } else {
510 if (tmpLength != 0 && tmpLength < length) {
511 length = tmpLength;
512 }
513 reset = TRUE;
514 }
515 }
516
517 return length;
518}
519
520/* ### Keywords **************************************************/
521#define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
522#define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )
523/* Punctuation/symbols allowed in legacy key values */
524#define UPRV_OK_VALUE_PUNCTUATION(c) ((c) == '_' || (c) == '-' || (c) == '+' || (c) == '/')
525
526#define ULOC_KEYWORD_BUFFER_LEN 25
527#define ULOC_MAX_NO_KEYWORDS 25
528
529U_CAPI const char * U_EXPORT2
530locale_getKeywordsStart(const char *localeID) {
531 const char *result = NULL;
532 if((result = uprv_strchr(localeID, '@')) != NULL) {
533 return result;
534 }
535#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
536 else {
537 /* We do this because the @ sign is variant, and the @ sign used on one
538 EBCDIC machine won't be compiled the same way on other EBCDIC based
539 machines. */
540 static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
541 const uint8_t *charToFind = ebcdicSigns;
542 while(*charToFind) {
543 if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
544 return result;
545 }
546 charToFind++;
547 }
548 }
549#endif
550 return NULL;
551}
552
553/**
554 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
555 * @param keywordName incoming name to be canonicalized
556 * @param status return status (keyword too long)
557 * @return length of the keyword name
558 */
559static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
560{
561 int32_t keywordNameLen = 0;
562
563 for (; *keywordName != 0; keywordName++) {
564 if (!UPRV_ISALPHANUM(*keywordName)) {
565 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
566 return 0;
567 }
568 if (keywordNameLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
569 buf[keywordNameLen++] = uprv_tolower(*keywordName);
570 } else {
571 /* keyword name too long for internal buffer */
572 *status = U_INTERNAL_PROGRAM_ERROR;
573 return 0;
574 }
575 }
576 if (keywordNameLen == 0) {
577 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name */
578 return 0;
579 }
580 buf[keywordNameLen] = 0; /* terminate */
581
582 return keywordNameLen;
583}
584
585typedef struct {
586 char keyword[ULOC_KEYWORD_BUFFER_LEN];
587 int32_t keywordLen;
588 const char *valueStart;
589 int32_t valueLen;
590} KeywordStruct;
591
592static int32_t U_CALLCONV
593compareKeywordStructs(const void * /*context*/, const void *left, const void *right) {
594 const char* leftString = ((const KeywordStruct *)left)->keyword;
595 const char* rightString = ((const KeywordStruct *)right)->keyword;
596 return uprv_strcmp(leftString, rightString);
597}
598
599static int32_t
600_getKeywords(const char *localeID,
601 char prev,
602 char *keywords, int32_t keywordCapacity,
603 char *values, int32_t valuesCapacity, int32_t *valLen,
604 UBool valuesToo,
605 UErrorCode *status)
606{
607 KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
608
609 int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
610 int32_t numKeywords = 0;
611 const char* pos = localeID;
612 const char* equalSign = NULL;
613 const char* semicolon = NULL;
614 int32_t i = 0, j, n;
615 int32_t keywordsLen = 0;
616 int32_t valuesLen = 0;
617
618 if(prev == '@') { /* start of keyword definition */
619 /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
620 do {
621 UBool duplicate = FALSE;
622 /* skip leading spaces */
623 while(*pos == ' ') {
624 pos++;
625 }
626 if (!*pos) { /* handle trailing "; " */
627 break;
628 }
629 if(numKeywords == maxKeywords) {
630 *status = U_INTERNAL_PROGRAM_ERROR;
631 return 0;
632 }
633 equalSign = uprv_strchr(pos, '=');
634 semicolon = uprv_strchr(pos, ';');
635 /* lack of '=' [foo@currency] is illegal */
636 /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
637 if(!equalSign || (semicolon && semicolon<equalSign)) {
638 *status = U_INVALID_FORMAT_ERROR;
639 return 0;
640 }
641 /* need to normalize both keyword and keyword name */
642 if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
643 /* keyword name too long for internal buffer */
644 *status = U_INTERNAL_PROGRAM_ERROR;
645 return 0;
646 }
647 for(i = 0, n = 0; i < equalSign - pos; ++i) {
648 if (pos[i] != ' ') {
649 keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
650 }
651 }
652
653 /* zero-length keyword is an error. */
654 if (n == 0) {
655 *status = U_INVALID_FORMAT_ERROR;
656 return 0;
657 }
658
659 keywordList[numKeywords].keyword[n] = 0;
660 keywordList[numKeywords].keywordLen = n;
661 /* now grab the value part. First we skip the '=' */
662 equalSign++;
663 /* then we leading spaces */
664 while(*equalSign == ' ') {
665 equalSign++;
666 }
667
668 /* Premature end or zero-length value */
669 if (!*equalSign || equalSign == semicolon) {
670 *status = U_INVALID_FORMAT_ERROR;
671 return 0;
672 }
673
674 keywordList[numKeywords].valueStart = equalSign;
675
676 pos = semicolon;
677 i = 0;
678 if(pos) {
679 while(*(pos - i - 1) == ' ') {
680 i++;
681 }
682 keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
683 pos++;
684 } else {
685 i = (int32_t)uprv_strlen(equalSign);
686 while(i && equalSign[i-1] == ' ') {
687 i--;
688 }
689 keywordList[numKeywords].valueLen = i;
690 }
691 /* If this is a duplicate keyword, then ignore it */
692 for (j=0; j<numKeywords; ++j) {
693 if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
694 duplicate = TRUE;
695 break;
696 }
697 }
698 if (!duplicate) {
699 ++numKeywords;
700 }
701 } while(pos);
702
703 /* now we have a list of keywords */
704 /* we need to sort it */
705 uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
706
707 /* Now construct the keyword part */
708 for(i = 0; i < numKeywords; i++) {
709 if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) {
710 uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword);
711 if(valuesToo) {
712 keywords[keywordsLen + keywordList[i].keywordLen] = '=';
713 } else {
714 keywords[keywordsLen + keywordList[i].keywordLen] = 0;
715 }
716 }
717 keywordsLen += keywordList[i].keywordLen + 1;
718 if(valuesToo) {
719 if(keywordsLen + keywordList[i].valueLen <= keywordCapacity) {
720 uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
721 }
722 keywordsLen += keywordList[i].valueLen;
723
724 if(i < numKeywords - 1) {
725 if(keywordsLen < keywordCapacity) {
726 keywords[keywordsLen] = ';';
727 }
728 keywordsLen++;
729 }
730 }
731 if(values) {
732 if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) {
733 uprv_strcpy(values+valuesLen, keywordList[i].valueStart);
734 values[valuesLen + keywordList[i].valueLen] = 0;
735 }
736 valuesLen += keywordList[i].valueLen + 1;
737 }
738 }
739 if(values) {
740 values[valuesLen] = 0;
741 if(valLen) {
742 *valLen = valuesLen;
743 }
744 }
745 return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);
746 } else {
747 return 0;
748 }
749}
750
751U_CFUNC int32_t
752locale_getKeywords(const char *localeID,
753 char prev,
754 char *keywords, int32_t keywordCapacity,
755 char *values, int32_t valuesCapacity, int32_t *valLen,
756 UBool valuesToo,
757 UErrorCode *status) {
758 return _getKeywords(localeID, prev, keywords, keywordCapacity,
759 values, valuesCapacity, valLen, valuesToo,
760 status);
761}
762
763U_CAPI int32_t U_EXPORT2
764uloc_getKeywordValue(const char* localeID,
765 const char* keywordName,
766 char* buffer, int32_t bufferCapacity,
767 UErrorCode* status)
768{
769 const char* startSearchHere = NULL;
770 const char* nextSeparator = NULL;
771 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
772 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
773 int32_t result = 0;
774
775 if(status && U_SUCCESS(*status) && localeID) {
776 char tempBuffer[ULOC_FULLNAME_CAPACITY];
777 const char* tmpLocaleID;
778
779 if (keywordName == NULL || keywordName[0] == 0) {
780 *status = U_ILLEGAL_ARGUMENT_ERROR;
781 return 0;
782 }
783
784 locale_canonKeywordName(keywordNameBuffer, keywordName, status);
785 if(U_FAILURE(*status)) {
786 return 0;
787 }
788
789 if (_hasBCP47Extension(localeID)) {
790 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
791 } else {
792 tmpLocaleID=localeID;
793 }
794
795 startSearchHere = locale_getKeywordsStart(tmpLocaleID);
796 if(startSearchHere == NULL) {
797 /* no keywords, return at once */
798 return 0;
799 }
800
801 /* find the first keyword */
802 while(startSearchHere) {
803 const char* keyValueTail;
804 int32_t keyValueLen;
805
806 startSearchHere++; /* skip @ or ; */
807 nextSeparator = uprv_strchr(startSearchHere, '=');
808 if(!nextSeparator) {
809 *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
810 return 0;
811 }
812 /* strip leading & trailing spaces (TC decided to tolerate these) */
813 while(*startSearchHere == ' ') {
814 startSearchHere++;
815 }
816 keyValueTail = nextSeparator;
817 while (keyValueTail > startSearchHere && *(keyValueTail-1) == ' ') {
818 keyValueTail--;
819 }
820 /* now keyValueTail points to first char after the keyName */
821 /* copy & normalize keyName from locale */
822 if (startSearchHere == keyValueTail) {
823 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
824 return 0;
825 }
826 keyValueLen = 0;
827 while (startSearchHere < keyValueTail) {
828 if (!UPRV_ISALPHANUM(*startSearchHere)) {
829 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
830 return 0;
831 }
832 if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
833 localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*startSearchHere++);
834 } else {
835 /* keyword name too long for internal buffer */
836 *status = U_INTERNAL_PROGRAM_ERROR;
837 return 0;
838 }
839 }
840 localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
841
842 startSearchHere = uprv_strchr(nextSeparator, ';');
843
844 if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
845 /* current entry matches the keyword. */
846 nextSeparator++; /* skip '=' */
847 /* First strip leading & trailing spaces (TC decided to tolerate these) */
848 while(*nextSeparator == ' ') {
849 nextSeparator++;
850 }
851 keyValueTail = (startSearchHere)? startSearchHere: nextSeparator + uprv_strlen(nextSeparator);
852 while(keyValueTail > nextSeparator && *(keyValueTail-1) == ' ') {
853 keyValueTail--;
854 }
855 /* Now copy the value, but check well-formedness */
856 if (nextSeparator == keyValueTail) {
857 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value name in passed-in locale */
858 return 0;
859 }
860 keyValueLen = 0;
861 while (nextSeparator < keyValueTail) {
862 if (!UPRV_ISALPHANUM(*nextSeparator) && !UPRV_OK_VALUE_PUNCTUATION(*nextSeparator)) {
863 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
864 return 0;
865 }
866 if (keyValueLen < bufferCapacity) {
867 /* Should we lowercase value to return here? Tests expect as-is. */
868 buffer[keyValueLen++] = *nextSeparator++;
869 } else { /* keep advancing so we return correct length in case of overflow */
870 keyValueLen++;
871 nextSeparator++;
872 }
873 }
874 result = u_terminateChars(buffer, bufferCapacity, keyValueLen, status);
875 return result;
876 }
877 }
878 }
879 return 0;
880}
881
882U_CAPI int32_t U_EXPORT2
883uloc_setKeywordValue(const char* keywordName,
884 const char* keywordValue,
885 char* buffer, int32_t bufferCapacity,
886 UErrorCode* status)
887{
888 /* TODO: sorting. removal. */
889 int32_t keywordNameLen;
890 int32_t keywordValueLen;
891 int32_t bufLen;
892 int32_t needLen = 0;
893 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
894 char keywordValueBuffer[ULOC_KEYWORDS_CAPACITY+1];
895 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
896 int32_t rc;
897 char* nextSeparator = NULL;
898 char* nextEqualsign = NULL;
899 char* startSearchHere = NULL;
900 char* keywordStart = NULL;
901 CharString updatedKeysAndValues;
902 int32_t updatedKeysAndValuesLen;
903 UBool handledInputKeyAndValue = FALSE;
904 char keyValuePrefix = '@';
905
906 if(U_FAILURE(*status)) {
907 return -1;
908 }
909 if (keywordName == NULL || keywordName[0] == 0 || bufferCapacity <= 1) {
910 *status = U_ILLEGAL_ARGUMENT_ERROR;
911 return 0;
912 }
913 bufLen = (int32_t)uprv_strlen(buffer);
914 if(bufferCapacity<bufLen) {
915 /* The capacity is less than the length?! Is this NULL terminated? */
916 *status = U_ILLEGAL_ARGUMENT_ERROR;
917 return 0;
918 }
919 keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
920 if(U_FAILURE(*status)) {
921 return 0;
922 }
923
924 keywordValueLen = 0;
925 if(keywordValue) {
926 while (*keywordValue != 0) {
927 if (!UPRV_ISALPHANUM(*keywordValue) && !UPRV_OK_VALUE_PUNCTUATION(*keywordValue)) {
928 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
929 return 0;
930 }
931 if (keywordValueLen < ULOC_KEYWORDS_CAPACITY) {
932 /* Should we force lowercase in value to set? */
933 keywordValueBuffer[keywordValueLen++] = *keywordValue++;
934 } else {
935 /* keywordValue too long for internal buffer */
936 *status = U_INTERNAL_PROGRAM_ERROR;
937 return 0;
938 }
939 }
940 }
941 keywordValueBuffer[keywordValueLen] = 0; /* terminate */
942
943 startSearchHere = (char*)locale_getKeywordsStart(buffer);
944 if(startSearchHere == NULL || (startSearchHere[1]==0)) {
945 if(keywordValueLen == 0) { /* no keywords = nothing to remove */
946 return bufLen;
947 }
948
949 needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
950 if(startSearchHere) { /* had a single @ */
951 needLen--; /* already had the @ */
952 /* startSearchHere points at the @ */
953 } else {
954 startSearchHere=buffer+bufLen;
955 }
956 if(needLen >= bufferCapacity) {
957 *status = U_BUFFER_OVERFLOW_ERROR;
958 return needLen; /* no change */
959 }
960 *startSearchHere++ = '@';
961 uprv_strcpy(startSearchHere, keywordNameBuffer);
962 startSearchHere += keywordNameLen;
963 *startSearchHere++ = '=';
964 uprv_strcpy(startSearchHere, keywordValueBuffer);
965 return needLen;
966 } /* end shortcut - no @ */
967
968 keywordStart = startSearchHere;
969 /* search for keyword */
970 while(keywordStart) {
971 const char* keyValueTail;
972 int32_t keyValueLen;
973
974 keywordStart++; /* skip @ or ; */
975 nextEqualsign = uprv_strchr(keywordStart, '=');
976 if (!nextEqualsign) {
977 *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
978 return 0;
979 }
980 /* strip leading & trailing spaces (TC decided to tolerate these) */
981 while(*keywordStart == ' ') {
982 keywordStart++;
983 }
984 keyValueTail = nextEqualsign;
985 while (keyValueTail > keywordStart && *(keyValueTail-1) == ' ') {
986 keyValueTail--;
987 }
988 /* now keyValueTail points to first char after the keyName */
989 /* copy & normalize keyName from locale */
990 if (keywordStart == keyValueTail) {
991 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
992 return 0;
993 }
994 keyValueLen = 0;
995 while (keywordStart < keyValueTail) {
996 if (!UPRV_ISALPHANUM(*keywordStart)) {
997 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
998 return 0;
999 }
1000 if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
1001 localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*keywordStart++);
1002 } else {
1003 /* keyword name too long for internal buffer */
1004 *status = U_INTERNAL_PROGRAM_ERROR;
1005 return 0;
1006 }
1007 }
1008 localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
1009
1010 nextSeparator = uprv_strchr(nextEqualsign, ';');
1011
1012 /* start processing the value part */
1013 nextEqualsign++; /* skip '=' */
1014 /* First strip leading & trailing spaces (TC decided to tolerate these) */
1015 while(*nextEqualsign == ' ') {
1016 nextEqualsign++;
1017 }
1018 keyValueTail = (nextSeparator)? nextSeparator: nextEqualsign + uprv_strlen(nextEqualsign);
1019 while(keyValueTail > nextEqualsign && *(keyValueTail-1) == ' ') {
1020 keyValueTail--;
1021 }
1022 if (nextEqualsign == keyValueTail) {
1023 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value in passed-in locale */
1024 return 0;
1025 }
1026
1027 rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
1028 if(rc == 0) {
1029 /* Current entry matches the input keyword. Update the entry */
1030 if(keywordValueLen > 0) { /* updating a value */
1031 updatedKeysAndValues.append(keyValuePrefix, *status);
1032 keyValuePrefix = ';'; /* for any subsequent key-value pair */
1033 updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1034 updatedKeysAndValues.append('=', *status);
1035 updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1036 } /* else removing this entry, don't emit anything */
1037 handledInputKeyAndValue = TRUE;
1038 } else {
1039 /* input keyword sorts earlier than current entry, add before current entry */
1040 if (rc < 0 && keywordValueLen > 0 && !handledInputKeyAndValue) {
1041 /* insert new entry at this location */
1042 updatedKeysAndValues.append(keyValuePrefix, *status);
1043 keyValuePrefix = ';'; /* for any subsequent key-value pair */
1044 updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1045 updatedKeysAndValues.append('=', *status);
1046 updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1047 handledInputKeyAndValue = TRUE;
1048 }
1049 /* copy the current entry */
1050 updatedKeysAndValues.append(keyValuePrefix, *status);
1051 keyValuePrefix = ';'; /* for any subsequent key-value pair */
1052 updatedKeysAndValues.append(localeKeywordNameBuffer, keyValueLen, *status);
1053 updatedKeysAndValues.append('=', *status);
1054 updatedKeysAndValues.append(nextEqualsign, static_cast<int32_t>(keyValueTail-nextEqualsign), *status);
1055 }
1056 if (!nextSeparator && keywordValueLen > 0 && !handledInputKeyAndValue) {
1057 /* append new entry at the end, it sorts later than existing entries */
1058 updatedKeysAndValues.append(keyValuePrefix, *status);
1059 /* skip keyValuePrefix update, no subsequent key-value pair */
1060 updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1061 updatedKeysAndValues.append('=', *status);
1062 updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1063 handledInputKeyAndValue = TRUE;
1064 }
1065 keywordStart = nextSeparator;
1066 } /* end loop searching */
1067
1068 /* Any error from updatedKeysAndValues.append above would be internal and not due to
1069 * problems with the passed-in locale. So if we did encounter problems with the
1070 * passed-in locale above, those errors took precedence and overrode any error
1071 * status from updatedKeysAndValues.append, and also caused a return of 0. If there
1072 * are errors here they are from updatedKeysAndValues.append; they do cause an
1073 * error return but the passed-in locale is unmodified and the original bufLen is
1074 * returned.
1075 */
1076 if (!handledInputKeyAndValue || U_FAILURE(*status)) {
1077 /* if input key/value specified removal of a keyword not present in locale, or
1078 * there was an error in CharString.append, leave original locale alone. */
1079 return bufLen;
1080 }
1081
1082 updatedKeysAndValuesLen = updatedKeysAndValues.length();
1083 /* needLen = length of the part before '@' + length of updated key-value part including '@' */
1084 needLen = (int32_t)(startSearchHere - buffer) + updatedKeysAndValuesLen;
1085 if(needLen >= bufferCapacity) {
1086 *status = U_BUFFER_OVERFLOW_ERROR;
1087 return needLen; /* no change */
1088 }
1089 if (updatedKeysAndValuesLen > 0) {
1090 uprv_strncpy(startSearchHere, updatedKeysAndValues.data(), updatedKeysAndValuesLen);
1091 }
1092 buffer[needLen]=0;
1093 return needLen;
1094}
1095
1096/* ### ID parsing implementation **************************************************/
1097
1098#define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
1099
1100/*returns TRUE if one of the special prefixes is here (s=string)
1101 'x-' or 'i-' */
1102#define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1103
1104/* Dot terminates it because of POSIX form where dot precedes the codepage
1105 * except for variant
1106 */
1107#define _isTerminator(a) ((a==0)||(a=='.')||(a=='@'))
1108
1109/**
1110 * Lookup 'key' in the array 'list'. The array 'list' should contain
1111 * a NULL entry, followed by more entries, and a second NULL entry.
1112 *
1113 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1114 * COUNTRIES_3.
1115 */
1116static int16_t _findIndex(const char* const* list, const char* key)
1117{
1118 const char* const* anchor = list;
1119 int32_t pass = 0;
1120
1121 /* Make two passes through two NULL-terminated arrays at 'list' */
1122 while (pass++ < 2) {
1123 while (*list) {
1124 if (uprv_strcmp(key, *list) == 0) {
1125 return (int16_t)(list - anchor);
1126 }
1127 list++;
1128 }
1129 ++list; /* skip final NULL *CWB*/
1130 }
1131 return -1;
1132}
1133
1134/* count the length of src while copying it to dest; return strlen(src) */
1135static inline int32_t
1136_copyCount(char *dest, int32_t destCapacity, const char *src) {
1137 const char *anchor;
1138 char c;
1139
1140 anchor=src;
1141 for(;;) {
1142 if((c=*src)==0) {
1143 return (int32_t)(src-anchor);
1144 }
1145 if(destCapacity<=0) {
1146 return (int32_t)((src-anchor)+uprv_strlen(src));
1147 }
1148 ++src;
1149 *dest++=c;
1150 --destCapacity;
1151 }
1152}
1153
1154U_CFUNC const char*
1155uloc_getCurrentCountryID(const char* oldID){
1156 int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
1157 if (offset >= 0) {
1158 return REPLACEMENT_COUNTRIES[offset];
1159 }
1160 return oldID;
1161}
1162U_CFUNC const char*
1163uloc_getCurrentLanguageID(const char* oldID){
1164 int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
1165 if (offset >= 0) {
1166 return REPLACEMENT_LANGUAGES[offset];
1167 }
1168 return oldID;
1169}
1170/*
1171 * the internal functions _getLanguage(), _getCountry(), _getVariant()
1172 * avoid duplicating code to handle the earlier locale ID pieces
1173 * in the functions for the later ones by
1174 * setting the *pEnd pointer to where they stopped parsing
1175 *
1176 * TODO try to use this in Locale
1177 */
1178U_CFUNC int32_t
1179ulocimp_getLanguage(const char *localeID,
1180 char *language, int32_t languageCapacity,
1181 const char **pEnd) {
1182 int32_t i=0;
1183 int32_t offset;
1184 char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
1185
1186 /* if it starts with i- or x- then copy that prefix */
1187 if(_isIDPrefix(localeID)) {
1188 if(i<languageCapacity) {
1189 language[i]=(char)uprv_tolower(*localeID);
1190 }
1191 if(i<languageCapacity) {
1192 language[i+1]='-';
1193 }
1194 i+=2;
1195 localeID+=2;
1196 }
1197
1198 /* copy the language as far as possible and count its length */
1199 while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
1200 if(i<languageCapacity) {
1201 language[i]=(char)uprv_tolower(*localeID);
1202 }
1203 if(i<3) {
1204 U_ASSERT(i>=0);
1205 lang[i]=(char)uprv_tolower(*localeID);
1206 }
1207 i++;
1208 localeID++;
1209 }
1210
1211 if(i==3) {
1212 /* convert 3 character code to 2 character code if possible *CWB*/
1213 offset=_findIndex(LANGUAGES_3, lang);
1214 if(offset>=0) {
1215 i=_copyCount(language, languageCapacity, LANGUAGES[offset]);
1216 }
1217 }
1218
1219 if(pEnd!=NULL) {
1220 *pEnd=localeID;
1221 }
1222 return i;
1223}
1224
1225U_CFUNC int32_t
1226ulocimp_getScript(const char *localeID,
1227 char *script, int32_t scriptCapacity,
1228 const char **pEnd)
1229{
1230 int32_t idLen = 0;
1231
1232 if (pEnd != NULL) {
1233 *pEnd = localeID;
1234 }
1235
1236 /* copy the second item as far as possible and count its length */
1237 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])
1238 && uprv_isASCIILetter(localeID[idLen])) {
1239 idLen++;
1240 }
1241
1242 /* If it's exactly 4 characters long, then it's a script and not a country. */
1243 if (idLen == 4) {
1244 int32_t i;
1245 if (pEnd != NULL) {
1246 *pEnd = localeID+idLen;
1247 }
1248 if(idLen > scriptCapacity) {
1249 idLen = scriptCapacity;
1250 }
1251 if (idLen >= 1) {
1252 script[0]=(char)uprv_toupper(*(localeID++));
1253 }
1254 for (i = 1; i < idLen; i++) {
1255 script[i]=(char)uprv_tolower(*(localeID++));
1256 }
1257 }
1258 else {
1259 idLen = 0;
1260 }
1261 return idLen;
1262}
1263
1264U_CFUNC int32_t
1265ulocimp_getCountry(const char *localeID,
1266 char *country, int32_t countryCapacity,
1267 const char **pEnd)
1268{
1269 int32_t idLen=0;
1270 char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 };
1271 int32_t offset;
1272
1273 /* copy the country as far as possible and count its length */
1274 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
1275 if(idLen<(ULOC_COUNTRY_CAPACITY-1)) { /*CWB*/
1276 cnty[idLen]=(char)uprv_toupper(localeID[idLen]);
1277 }
1278 idLen++;
1279 }
1280
1281 /* the country should be either length 2 or 3 */
1282 if (idLen == 2 || idLen == 3) {
1283 UBool gotCountry = FALSE;
1284 /* convert 3 character code to 2 character code if possible *CWB*/
1285 if(idLen==3) {
1286 offset=_findIndex(COUNTRIES_3, cnty);
1287 if(offset>=0) {
1288 idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]);
1289 gotCountry = TRUE;
1290 }
1291 }
1292 if (!gotCountry) {
1293 int32_t i = 0;
1294 for (i = 0; i < idLen; i++) {
1295 if (i < countryCapacity) {
1296 country[i]=(char)uprv_toupper(localeID[i]);
1297 }
1298 }
1299 }
1300 localeID+=idLen;
1301 } else {
1302 idLen = 0;
1303 }
1304
1305 if(pEnd!=NULL) {
1306 *pEnd=localeID;
1307 }
1308
1309 return idLen;
1310}
1311
1312/**
1313 * @param needSeparator if true, then add leading '_' if any variants
1314 * are added to 'variant'
1315 */
1316static int32_t
1317_getVariantEx(const char *localeID,
1318 char prev,
1319 char *variant, int32_t variantCapacity,
1320 UBool needSeparator) {
1321 int32_t i=0;
1322
1323 /* get one or more variant tags and separate them with '_' */
1324 if(_isIDSeparator(prev)) {
1325 /* get a variant string after a '-' or '_' */
1326 while(!_isTerminator(*localeID)) {
1327 if (needSeparator) {
1328 if (i<variantCapacity) {
1329 variant[i] = '_';
1330 }
1331 ++i;
1332 needSeparator = FALSE;
1333 }
1334 if(i<variantCapacity) {
1335 variant[i]=(char)uprv_toupper(*localeID);
1336 if(variant[i]=='-') {
1337 variant[i]='_';
1338 }
1339 }
1340 i++;
1341 localeID++;
1342 }
1343 }
1344
1345 /* if there is no variant tag after a '-' or '_' then look for '@' */
1346 if(i==0) {
1347 if(prev=='@') {
1348 /* keep localeID */
1349 } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
1350 ++localeID; /* point after the '@' */
1351 } else {
1352 return 0;
1353 }
1354 while(!_isTerminator(*localeID)) {
1355 if (needSeparator) {
1356 if (i<variantCapacity) {
1357 variant[i] = '_';
1358 }
1359 ++i;
1360 needSeparator = FALSE;
1361 }
1362 if(i<variantCapacity) {
1363 variant[i]=(char)uprv_toupper(*localeID);
1364 if(variant[i]=='-' || variant[i]==',') {
1365 variant[i]='_';
1366 }
1367 }
1368 i++;
1369 localeID++;
1370 }
1371 }
1372
1373 return i;
1374}
1375
1376static int32_t
1377_getVariant(const char *localeID,
1378 char prev,
1379 char *variant, int32_t variantCapacity) {
1380 return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE);
1381}
1382
1383/* Keyword enumeration */
1384
1385typedef struct UKeywordsContext {
1386 char* keywords;
1387 char* current;
1388} UKeywordsContext;
1389
1390U_CDECL_BEGIN
1391
1392static void U_CALLCONV
1393uloc_kw_closeKeywords(UEnumeration *enumerator) {
1394 uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
1395 uprv_free(enumerator->context);
1396 uprv_free(enumerator);
1397}
1398
1399static int32_t U_CALLCONV
1400uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) {
1401 char *kw = ((UKeywordsContext *)en->context)->keywords;
1402 int32_t result = 0;
1403 while(*kw) {
1404 result++;
1405 kw += uprv_strlen(kw)+1;
1406 }
1407 return result;
1408}
1409
1410static const char * U_CALLCONV
1411uloc_kw_nextKeyword(UEnumeration* en,
1412 int32_t* resultLength,
1413 UErrorCode* /*status*/) {
1414 const char* result = ((UKeywordsContext *)en->context)->current;
1415 int32_t len = 0;
1416 if(*result) {
1417 len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
1418 ((UKeywordsContext *)en->context)->current += len+1;
1419 } else {
1420 result = NULL;
1421 }
1422 if (resultLength) {
1423 *resultLength = len;
1424 }
1425 return result;
1426}
1427
1428static void U_CALLCONV
1429uloc_kw_resetKeywords(UEnumeration* en,
1430 UErrorCode* /*status*/) {
1431 ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
1432}
1433
1434U_CDECL_END
1435
1436
1437static const UEnumeration gKeywordsEnum = {
1438 NULL,
1439 NULL,
1440 uloc_kw_closeKeywords,
1441 uloc_kw_countKeywords,
1442 uenum_unextDefault,
1443 uloc_kw_nextKeyword,
1444 uloc_kw_resetKeywords
1445};
1446
1447U_CAPI UEnumeration* U_EXPORT2
1448uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
1449{
1450 UKeywordsContext *myContext = NULL;
1451 UEnumeration *result = NULL;
1452
1453 if(U_FAILURE(*status)) {
1454 return NULL;
1455 }
1456 result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
1457 /* Null pointer test */
1458 if (result == NULL) {
1459 *status = U_MEMORY_ALLOCATION_ERROR;
1460 return NULL;
1461 }
1462 uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration));
1463 myContext = static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext)));
1464 if (myContext == NULL) {
1465 *status = U_MEMORY_ALLOCATION_ERROR;
1466 uprv_free(result);
1467 return NULL;
1468 }
1469 myContext->keywords = (char *)uprv_malloc(keywordListSize+1);
1470 uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
1471 myContext->keywords[keywordListSize] = 0;
1472 myContext->current = myContext->keywords;
1473 result->context = myContext;
1474 return result;
1475}
1476
1477U_CAPI UEnumeration* U_EXPORT2
1478uloc_openKeywords(const char* localeID,
1479 UErrorCode* status)
1480{
1481 int32_t i=0;
1482 char keywords[256];
1483 int32_t keywordsCapacity = 256;
1484 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1485 const char* tmpLocaleID;
1486
1487 if(status==NULL || U_FAILURE(*status)) {
1488 return 0;
1489 }
1490
1491 if (_hasBCP47Extension(localeID)) {
1492 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
1493 } else {
1494 if (localeID==NULL) {
1495 localeID=uloc_getDefault();
1496 }
1497 tmpLocaleID=localeID;
1498 }
1499
1500 /* Skip the language */
1501 ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
1502 if(_isIDSeparator(*tmpLocaleID)) {
1503 const char *scriptID;
1504 /* Skip the script if available */
1505 ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
1506 if(scriptID != tmpLocaleID+1) {
1507 /* Found optional script */
1508 tmpLocaleID = scriptID;
1509 }
1510 /* Skip the Country */
1511 if (_isIDSeparator(*tmpLocaleID)) {
1512 ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &tmpLocaleID);
1513 if(_isIDSeparator(*tmpLocaleID)) {
1514 _getVariant(tmpLocaleID+1, *tmpLocaleID, NULL, 0);
1515 }
1516 }
1517 }
1518
1519 /* keywords are located after '@' */
1520 if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {
1521 i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status);
1522 }
1523
1524 if(i) {
1525 return uloc_openKeywordList(keywords, i, status);
1526 } else {
1527 return NULL;
1528 }
1529}
1530
1531
1532/* bit-flags for 'options' parameter of _canonicalize */
1533#define _ULOC_STRIP_KEYWORDS 0x2
1534#define _ULOC_CANONICALIZE 0x1
1535
1536#define OPTION_SET(options, mask) ((options & mask) != 0)
1537
1538static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
1539#define I_DEFAULT_LENGTH UPRV_LENGTHOF(i_default)
1540
1541/**
1542 * Canonicalize the given localeID, to level 1 or to level 2,
1543 * depending on the options. To specify level 1, pass in options=0.
1544 * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1545 *
1546 * This is the code underlying uloc_getName and uloc_canonicalize.
1547 */
1548static int32_t
1549_canonicalize(const char* localeID,
1550 char* result,
1551 int32_t resultCapacity,
1552 uint32_t options,
1553 UErrorCode* err) {
1554 int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity;
1555 char localeBuffer[ULOC_FULLNAME_CAPACITY];
1556 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1557 const char* origLocaleID;
1558 const char* tmpLocaleID;
1559 const char* keywordAssign = NULL;
1560 const char* separatorIndicator = NULL;
1561 char* name;
1562 char* variant = NULL; /* pointer into name, or NULL */
1563
1564 if (U_FAILURE(*err)) {
1565 return 0;
1566 }
1567
1568 if (_hasBCP47Extension(localeID)) {
1569 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1570 } else {
1571 if (localeID==NULL) {
1572 localeID=uloc_getDefault();
1573 }
1574 tmpLocaleID=localeID;
1575 }
1576
1577 origLocaleID=tmpLocaleID;
1578
1579 /* if we are doing a full canonicalization, then put results in
1580 localeBuffer, if necessary; otherwise send them to result. */
1581 if (/*OPTION_SET(options, _ULOC_CANONICALIZE) &&*/
1582 (result == NULL || resultCapacity < (int32_t)sizeof(localeBuffer))) {
1583 name = localeBuffer;
1584 nameCapacity = (int32_t)sizeof(localeBuffer);
1585 } else {
1586 name = result;
1587 nameCapacity = resultCapacity;
1588 }
1589
1590 /* get all pieces, one after another, and separate with '_' */
1591 len=ulocimp_getLanguage(tmpLocaleID, name, nameCapacity, &tmpLocaleID);
1592
1593 if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) {
1594 const char *d = uloc_getDefault();
1595
1596 len = (int32_t)uprv_strlen(d);
1597
1598 if (name != NULL) {
1599 uprv_memcpy(name, d, len);
1600 }
1601 } else if(_isIDSeparator(*tmpLocaleID)) {
1602 const char *scriptID;
1603
1604 ++fieldCount;
1605 if(len<nameCapacity) {
1606 name[len]='_';
1607 }
1608 ++len;
1609
1610 scriptSize=ulocimp_getScript(tmpLocaleID+1,
1611 (len<nameCapacity ? name+len : NULL), nameCapacity-len, &scriptID);
1612 if(scriptSize > 0) {
1613 /* Found optional script */
1614 tmpLocaleID = scriptID;
1615 ++fieldCount;
1616 len+=scriptSize;
1617 if (_isIDSeparator(*tmpLocaleID)) {
1618 /* If there is something else, then we add the _ */
1619 if(len<nameCapacity) {
1620 name[len]='_';
1621 }
1622 ++len;
1623 }
1624 }
1625
1626 if (_isIDSeparator(*tmpLocaleID)) {
1627 const char *cntryID;
1628 int32_t cntrySize = ulocimp_getCountry(tmpLocaleID+1,
1629 (len<nameCapacity ? name+len : NULL), nameCapacity-len, &cntryID);
1630 if (cntrySize > 0) {
1631 /* Found optional country */
1632 tmpLocaleID = cntryID;
1633 len+=cntrySize;
1634 }
1635 if(_isIDSeparator(*tmpLocaleID)) {
1636 /* If there is something else, then we add the _ if we found country before. */
1637 if (cntrySize >= 0 && ! _isIDSeparator(*(tmpLocaleID+1)) ) {
1638 ++fieldCount;
1639 if(len<nameCapacity) {
1640 name[len]='_';
1641 }
1642 ++len;
1643 }
1644
1645 variantSize = _getVariant(tmpLocaleID+1, *tmpLocaleID,
1646 (len<nameCapacity ? name+len : NULL), nameCapacity-len);
1647 if (variantSize > 0) {
1648 variant = len<nameCapacity ? name+len : NULL;
1649 len += variantSize;
1650 tmpLocaleID += variantSize + 1; /* skip '_' and variant */
1651 }
1652 }
1653 }
1654 }
1655
1656 /* Copy POSIX-style charset specifier, if any [mr.utf8] */
1657 if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
1658 UBool done = FALSE;
1659 do {
1660 char c = *tmpLocaleID;
1661 switch (c) {
1662 case 0:
1663 case '@':
1664 done = TRUE;
1665 break;
1666 default:
1667 if (len<nameCapacity) {
1668 name[len] = c;
1669 }
1670 ++len;
1671 ++tmpLocaleID;
1672 break;
1673 }
1674 } while (!done);
1675 }
1676
1677 /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
1678 After this, tmpLocaleID either points to '@' or is NULL */
1679 if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {
1680 keywordAssign = uprv_strchr(tmpLocaleID, '=');
1681 separatorIndicator = uprv_strchr(tmpLocaleID, ';');
1682 }
1683
1684 /* Copy POSIX-style variant, if any [mr@FOO] */
1685 if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
1686 tmpLocaleID != NULL && keywordAssign == NULL) {
1687 for (;;) {
1688 char c = *tmpLocaleID;
1689 if (c == 0) {
1690 break;
1691 }
1692 if (len<nameCapacity) {
1693 name[len] = c;
1694 }
1695 ++len;
1696 ++tmpLocaleID;
1697 }
1698 }
1699
1700 if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
1701 /* Handle @FOO variant if @ is present and not followed by = */
1702 if (tmpLocaleID!=NULL && keywordAssign==NULL) {
1703 int32_t posixVariantSize;
1704 /* Add missing '_' if needed */
1705 if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
1706 do {
1707 if(len<nameCapacity) {
1708 name[len]='_';
1709 }
1710 ++len;
1711 ++fieldCount;
1712 } while(fieldCount<2);
1713 }
1714 posixVariantSize = _getVariantEx(tmpLocaleID+1, '@', name+len, nameCapacity-len,
1715 (UBool)(variantSize > 0));
1716 if (posixVariantSize > 0) {
1717 if (variant == NULL) {
1718 variant = name+len;
1719 }
1720 len += posixVariantSize;
1721 variantSize += posixVariantSize;
1722 }
1723 }
1724
1725 /* Look up the ID in the canonicalization map */
1726 for (j=0; j<UPRV_LENGTHOF(CANONICALIZE_MAP); j++) {
1727 const char* id = CANONICALIZE_MAP[j].id;
1728 int32_t n = (int32_t)uprv_strlen(id);
1729 if (len == n && uprv_strncmp(name, id, n) == 0) {
1730 if (n == 0 && tmpLocaleID != NULL) {
1731 break; /* Don't remap "" if keywords present */
1732 }
1733 len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID);
1734 break;
1735 }
1736 }
1737 }
1738
1739 if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
1740 if (tmpLocaleID!=NULL && keywordAssign!=NULL &&
1741 (!separatorIndicator || separatorIndicator > keywordAssign)) {
1742 if(len<nameCapacity) {
1743 name[len]='@';
1744 }
1745 ++len;
1746 ++fieldCount;
1747 len += _getKeywords(tmpLocaleID+1, '@', (len<nameCapacity ? name+len : NULL), nameCapacity-len,
1748 NULL, 0, NULL, TRUE, err);
1749 }
1750 }
1751
1752 if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) {
1753 uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);
1754 }
1755
1756 return u_terminateChars(result, resultCapacity, len, err);
1757}
1758
1759/* ### ID parsing API **************************************************/
1760
1761U_CAPI int32_t U_EXPORT2
1762uloc_getParent(const char* localeID,
1763 char* parent,
1764 int32_t parentCapacity,
1765 UErrorCode* err)
1766{
1767 const char *lastUnderscore;
1768 int32_t i;
1769
1770 if (U_FAILURE(*err))
1771 return 0;
1772
1773 if (localeID == NULL)
1774 localeID = uloc_getDefault();
1775
1776 lastUnderscore=uprv_strrchr(localeID, '_');
1777 if(lastUnderscore!=NULL) {
1778 i=(int32_t)(lastUnderscore-localeID);
1779 } else {
1780 i=0;
1781 }
1782
1783 if(i>0 && parent != localeID) {
1784 uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
1785 }
1786
1787 return u_terminateChars(parent, parentCapacity, i, err);
1788}
1789
1790U_CAPI int32_t U_EXPORT2
1791uloc_getLanguage(const char* localeID,
1792 char* language,
1793 int32_t languageCapacity,
1794 UErrorCode* err)
1795{
1796 /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
1797 int32_t i=0;
1798
1799 if (err==NULL || U_FAILURE(*err)) {
1800 return 0;
1801 }
1802
1803 if(localeID==NULL) {
1804 localeID=uloc_getDefault();
1805 }
1806
1807 i=ulocimp_getLanguage(localeID, language, languageCapacity, NULL);
1808 return u_terminateChars(language, languageCapacity, i, err);
1809}
1810
1811U_CAPI int32_t U_EXPORT2
1812uloc_getScript(const char* localeID,
1813 char* script,
1814 int32_t scriptCapacity,
1815 UErrorCode* err)
1816{
1817 int32_t i=0;
1818
1819 if(err==NULL || U_FAILURE(*err)) {
1820 return 0;
1821 }
1822
1823 if(localeID==NULL) {
1824 localeID=uloc_getDefault();
1825 }
1826
1827 /* skip the language */
1828 ulocimp_getLanguage(localeID, NULL, 0, &localeID);
1829 if(_isIDSeparator(*localeID)) {
1830 i=ulocimp_getScript(localeID+1, script, scriptCapacity, NULL);
1831 }
1832 return u_terminateChars(script, scriptCapacity, i, err);
1833}
1834
1835U_CAPI int32_t U_EXPORT2
1836uloc_getCountry(const char* localeID,
1837 char* country,
1838 int32_t countryCapacity,
1839 UErrorCode* err)
1840{
1841 int32_t i=0;
1842
1843 if(err==NULL || U_FAILURE(*err)) {
1844 return 0;
1845 }
1846
1847 if(localeID==NULL) {
1848 localeID=uloc_getDefault();
1849 }
1850
1851 /* Skip the language */
1852 ulocimp_getLanguage(localeID, NULL, 0, &localeID);
1853 if(_isIDSeparator(*localeID)) {
1854 const char *scriptID;
1855 /* Skip the script if available */
1856 ulocimp_getScript(localeID+1, NULL, 0, &scriptID);
1857 if(scriptID != localeID+1) {
1858 /* Found optional script */
1859 localeID = scriptID;
1860 }
1861 if(_isIDSeparator(*localeID)) {
1862 i=ulocimp_getCountry(localeID+1, country, countryCapacity, NULL);
1863 }
1864 }
1865 return u_terminateChars(country, countryCapacity, i, err);
1866}
1867
1868U_CAPI int32_t U_EXPORT2
1869uloc_getVariant(const char* localeID,
1870 char* variant,
1871 int32_t variantCapacity,
1872 UErrorCode* err)
1873{
1874 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1875 const char* tmpLocaleID;
1876 int32_t i=0;
1877
1878 if(err==NULL || U_FAILURE(*err)) {
1879 return 0;
1880 }
1881
1882 if (_hasBCP47Extension(localeID)) {
1883 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1884 } else {
1885 if (localeID==NULL) {
1886 localeID=uloc_getDefault();
1887 }
1888 tmpLocaleID=localeID;
1889 }
1890
1891 /* Skip the language */
1892 ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
1893 if(_isIDSeparator(*tmpLocaleID)) {
1894 const char *scriptID;
1895 /* Skip the script if available */
1896 ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
1897 if(scriptID != tmpLocaleID+1) {
1898 /* Found optional script */
1899 tmpLocaleID = scriptID;
1900 }
1901 /* Skip the Country */
1902 if (_isIDSeparator(*tmpLocaleID)) {
1903 const char *cntryID;
1904 ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &cntryID);
1905 if (cntryID != tmpLocaleID+1) {
1906 /* Found optional country */
1907 tmpLocaleID = cntryID;
1908 }
1909 if(_isIDSeparator(*tmpLocaleID)) {
1910 /* If there was no country ID, skip a possible extra IDSeparator */
1911 if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {
1912 tmpLocaleID++;
1913 }
1914 i=_getVariant(tmpLocaleID+1, *tmpLocaleID, variant, variantCapacity);
1915 }
1916 }
1917 }
1918
1919 /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
1920 /* if we do not have a variant tag yet then try a POSIX variant after '@' */
1921/*
1922 if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
1923 i=_getVariant(localeID+1, '@', variant, variantCapacity);
1924 }
1925*/
1926 return u_terminateChars(variant, variantCapacity, i, err);
1927}
1928
1929U_CAPI int32_t U_EXPORT2
1930uloc_getName(const char* localeID,
1931 char* name,
1932 int32_t nameCapacity,
1933 UErrorCode* err)
1934{
1935 return _canonicalize(localeID, name, nameCapacity, 0, err);
1936}
1937
1938U_CAPI int32_t U_EXPORT2
1939uloc_getBaseName(const char* localeID,
1940 char* name,
1941 int32_t nameCapacity,
1942 UErrorCode* err)
1943{
1944 return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err);
1945}
1946
1947U_CAPI int32_t U_EXPORT2
1948uloc_canonicalize(const char* localeID,
1949 char* name,
1950 int32_t nameCapacity,
1951 UErrorCode* err)
1952{
1953 return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err);
1954}
1955
1956U_CAPI const char* U_EXPORT2
1957uloc_getISO3Language(const char* localeID)
1958{
1959 int16_t offset;
1960 char lang[ULOC_LANG_CAPACITY];
1961 UErrorCode err = U_ZERO_ERROR;
1962
1963 if (localeID == NULL)
1964 {
1965 localeID = uloc_getDefault();
1966 }
1967 uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
1968 if (U_FAILURE(err))
1969 return "";
1970 offset = _findIndex(LANGUAGES, lang);
1971 if (offset < 0)
1972 return "";
1973 return LANGUAGES_3[offset];
1974}
1975
1976U_CAPI const char* U_EXPORT2
1977uloc_getISO3Country(const char* localeID)
1978{
1979 int16_t offset;
1980 char cntry[ULOC_LANG_CAPACITY];
1981 UErrorCode err = U_ZERO_ERROR;
1982
1983 if (localeID == NULL)
1984 {
1985 localeID = uloc_getDefault();
1986 }
1987 uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
1988 if (U_FAILURE(err))
1989 return "";
1990 offset = _findIndex(COUNTRIES, cntry);
1991 if (offset < 0)
1992 return "";
1993
1994 return COUNTRIES_3[offset];
1995}
1996
1997U_CAPI uint32_t U_EXPORT2
1998uloc_getLCID(const char* localeID)
1999{
2000 UErrorCode status = U_ZERO_ERROR;
2001 char langID[ULOC_FULLNAME_CAPACITY];
2002 uint32_t lcid = 0;
2003
2004 /* Check for incomplete id. */
2005 if (!localeID || uprv_strlen(localeID) < 2) {
2006 return 0;
2007 }
2008
2009 // First, attempt Windows platform lookup if available, but fall
2010 // through to catch any special cases (ICU vs Windows name differences).
2011 lcid = uprv_convertToLCIDPlatform(localeID, &status);
2012 if (U_FAILURE(status)) {
2013 return 0;
2014 }
2015 if (lcid > 0) {
2016 // Windows found an LCID, return that
2017 return lcid;
2018 }
2019
2020 uloc_getLanguage(localeID, langID, sizeof(langID), &status);
2021 if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) {
2022 return 0;
2023 }
2024
2025 if (uprv_strchr(localeID, '@')) {
2026 // uprv_convertToLCID does not support keywords other than collation.
2027 // Remove all keywords except collation.
2028 int32_t len;
2029 char collVal[ULOC_KEYWORDS_CAPACITY];
2030 char tmpLocaleID[ULOC_FULLNAME_CAPACITY];
2031
2032 len = uloc_getKeywordValue(localeID, "collation", collVal,
2033 UPRV_LENGTHOF(collVal) - 1, &status);
2034
2035 if (U_SUCCESS(status) && len > 0) {
2036 collVal[len] = 0;
2037
2038 len = uloc_getBaseName(localeID, tmpLocaleID,
2039 UPRV_LENGTHOF(tmpLocaleID) - 1, &status);
2040
2041 if (U_SUCCESS(status) && len > 0) {
2042 tmpLocaleID[len] = 0;
2043
2044 len = uloc_setKeywordValue("collation", collVal, tmpLocaleID,
2045 UPRV_LENGTHOF(tmpLocaleID) - len - 1, &status);
2046
2047 if (U_SUCCESS(status) && len > 0) {
2048 tmpLocaleID[len] = 0;
2049 return uprv_convertToLCID(langID, tmpLocaleID, &status);
2050 }
2051 }
2052 }
2053
2054 // fall through - all keywords are simply ignored
2055 status = U_ZERO_ERROR;
2056 }
2057
2058 return uprv_convertToLCID(langID, localeID, &status);
2059}
2060
2061U_CAPI int32_t U_EXPORT2
2062uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
2063 UErrorCode *status)
2064{
2065 return uprv_convertToPosix(hostid, locale, localeCapacity, status);
2066}
2067
2068/* ### Default locale **************************************************/
2069
2070U_CAPI const char* U_EXPORT2
2071uloc_getDefault()
2072{
2073 return locale_get_default();
2074}
2075
2076U_CAPI void U_EXPORT2
2077uloc_setDefault(const char* newDefaultLocale,
2078 UErrorCode* err)
2079{
2080 if (U_FAILURE(*err))
2081 return;
2082 /* the error code isn't currently used for anything by this function*/
2083
2084 /* propagate change to C++ */
2085 locale_set_default(newDefaultLocale);
2086}
2087
2088/**
2089 * Returns a list of all 2-letter language codes defined in ISO 639. This is a pointer
2090 * to an array of pointers to arrays of char. All of these pointers are owned
2091 * by ICU-- do not delete them, and do not write through them. The array is
2092 * terminated with a null pointer.
2093 */
2094U_CAPI const char* const* U_EXPORT2
2095uloc_getISOLanguages()
2096{
2097 return LANGUAGES;
2098}
2099
2100/**
2101 * Returns a list of all 2-letter country codes defined in ISO 639. This is a
2102 * pointer to an array of pointers to arrays of char. All of these pointers are
2103 * owned by ICU-- do not delete them, and do not write through them. The array is
2104 * terminated with a null pointer.
2105 */
2106U_CAPI const char* const* U_EXPORT2
2107uloc_getISOCountries()
2108{
2109 return COUNTRIES;
2110}
2111
2112
2113/* this function to be moved into cstring.c later */
2114static char gDecimal = 0;
2115
2116static /* U_CAPI */
2117double
2118/* U_EXPORT2 */
2119_uloc_strtod(const char *start, char **end) {
2120 char *decimal;
2121 char *myEnd;
2122 char buf[30];
2123 double rv;
2124 if (!gDecimal) {
2125 char rep[5];
2126 /* For machines that decide to change the decimal on you,
2127 and try to be too smart with localization.
2128 This normally should be just a '.'. */
2129 sprintf(rep, "%+1.1f", 1.0);
2130 gDecimal = rep[2];
2131 }
2132
2133 if(gDecimal == '.') {
2134 return uprv_strtod(start, end); /* fall through to OS */
2135 } else {
2136 uprv_strncpy(buf, start, 29);
2137 buf[29]=0;
2138 decimal = uprv_strchr(buf, '.');
2139 if(decimal) {
2140 *decimal = gDecimal;
2141 } else {
2142 return uprv_strtod(start, end); /* no decimal point */
2143 }
2144 rv = uprv_strtod(buf, &myEnd);
2145 if(end) {
2146 *end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */
2147 }
2148 return rv;
2149 }
2150}
2151
2152typedef struct {
2153 float q;
2154 int32_t dummy; /* to avoid uninitialized memory copy from qsort */
2155 char locale[ULOC_FULLNAME_CAPACITY+1];
2156} _acceptLangItem;
2157
2158static int32_t U_CALLCONV
2159uloc_acceptLanguageCompare(const void * /*context*/, const void *a, const void *b)
2160{
2161 const _acceptLangItem *aa = (const _acceptLangItem*)a;
2162 const _acceptLangItem *bb = (const _acceptLangItem*)b;
2163
2164 int32_t rc = 0;
2165 if(bb->q < aa->q) {
2166 rc = -1; /* A > B */
2167 } else if(bb->q > aa->q) {
2168 rc = 1; /* A < B */
2169 } else {
2170 rc = 0; /* A = B */
2171 }
2172
2173 if(rc==0) {
2174 rc = uprv_stricmp(aa->locale, bb->locale);
2175 }
2176
2177#if defined(ULOC_DEBUG)
2178 /* fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
2179 aa->locale, aa->q,
2180 bb->locale, bb->q,
2181 rc);*/
2182#endif
2183
2184 return rc;
2185}
2186
2187/*
2188mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
2189*/
2190
2191U_CAPI int32_t U_EXPORT2
2192uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult *outResult,
2193 const char *httpAcceptLanguage,
2194 UEnumeration* availableLocales,
2195 UErrorCode *status)
2196{
2197 MaybeStackArray<_acceptLangItem, 4> items; // Struct for collecting items.
2198 char tmp[ULOC_FULLNAME_CAPACITY +1];
2199 int32_t n = 0;
2200 const char *itemEnd;
2201 const char *paramEnd;
2202 const char *s;
2203 const char *t;
2204 int32_t res;
2205 int32_t i;
2206 int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);
2207
2208 if(U_FAILURE(*status)) {
2209 return -1;
2210 }
2211
2212 for(s=httpAcceptLanguage;s&&*s;) {
2213 while(isspace(*s)) /* eat space at the beginning */
2214 s++;
2215 itemEnd=uprv_strchr(s,',');
2216 paramEnd=uprv_strchr(s,';');
2217 if(!itemEnd) {
2218 itemEnd = httpAcceptLanguage+l; /* end of string */
2219 }
2220 if(paramEnd && paramEnd<itemEnd) {
2221 /* semicolon (;) is closer than end (,) */
2222 t = paramEnd+1;
2223 if(*t=='q') {
2224 t++;
2225 }
2226 while(isspace(*t)) {
2227 t++;
2228 }
2229 if(*t=='=') {
2230 t++;
2231 }
2232 while(isspace(*t)) {
2233 t++;
2234 }
2235 items[n].q = (float)_uloc_strtod(t,NULL);
2236 } else {
2237 /* no semicolon - it's 1.0 */
2238 items[n].q = 1.0f;
2239 paramEnd = itemEnd;
2240 }
2241 items[n].dummy=0;
2242 /* eat spaces prior to semi */
2243 for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)
2244 ;
2245 int32_t slen = static_cast<int32_t>(((t+1)-s));
2246 if(slen > ULOC_FULLNAME_CAPACITY) {
2247 *status = U_BUFFER_OVERFLOW_ERROR;
2248 return -1; // too big
2249 }
2250 uprv_strncpy(items[n].locale, s, slen);
2251 items[n].locale[slen]=0; // terminate
2252 int32_t clen = uloc_canonicalize(items[n].locale, tmp, UPRV_LENGTHOF(tmp)-1, status);
2253 if(U_FAILURE(*status)) return -1;
2254 if((clen!=slen) || (uprv_strncmp(items[n].locale, tmp, slen))) {
2255 // canonicalization had an effect- copy back
2256 uprv_strncpy(items[n].locale, tmp, clen);
2257 items[n].locale[clen] = 0; // terminate
2258 }
2259#if defined(ULOC_DEBUG)
2260 /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
2261#endif
2262 n++;
2263 s = itemEnd;
2264 while(*s==',') { /* eat duplicate commas */
2265 s++;
2266 }
2267 if(n>=items.getCapacity()) { // If we need more items
2268 if(NULL == items.resize(items.getCapacity()*2, items.getCapacity())) {
2269 *status = U_MEMORY_ALLOCATION_ERROR;
2270 return -1;
2271 }
2272#if defined(ULOC_DEBUG)
2273 fprintf(stderr,"malloced at size %d\n", items.getCapacity());
2274#endif
2275 }
2276 }
2277 uprv_sortArray(items.getAlias(), n, sizeof(items[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
2278 if (U_FAILURE(*status)) {
2279 return -1;
2280 }
2281 LocalMemory<const char*> strs(NULL);
2282 if (strs.allocateInsteadAndReset(n) == NULL) {
2283 *status = U_MEMORY_ALLOCATION_ERROR;
2284 return -1;
2285 }
2286 for(i=0;i<n;i++) {
2287#if defined(ULOC_DEBUG)
2288 /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
2289#endif
2290 strs[i]=items[i].locale;
2291 }
2292 res = uloc_acceptLanguage(result, resultAvailable, outResult,
2293 strs.getAlias(), n, availableLocales, status);
2294 return res;
2295}
2296
2297
2298U_CAPI int32_t U_EXPORT2
2299uloc_acceptLanguage(char *result, int32_t resultAvailable,
2300 UAcceptResult *outResult, const char **acceptList,
2301 int32_t acceptListCount,
2302 UEnumeration* availableLocales,
2303 UErrorCode *status)
2304{
2305 int32_t i,j;
2306 int32_t len;
2307 int32_t maxLen=0;
2308 char tmp[ULOC_FULLNAME_CAPACITY+1];
2309 const char *l;
2310 char **fallbackList;
2311 if(U_FAILURE(*status)) {
2312 return -1;
2313 }
2314 fallbackList = static_cast<char **>(uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount)));
2315 if(fallbackList==NULL) {
2316 *status = U_MEMORY_ALLOCATION_ERROR;
2317 return -1;
2318 }
2319 for(i=0;i<acceptListCount;i++) {
2320#if defined(ULOC_DEBUG)
2321 fprintf(stderr,"%02d: %s\n", i, acceptList[i]);
2322#endif
2323 while((l=uenum_next(availableLocales, NULL, status)) != NULL) {
2324#if defined(ULOC_DEBUG)
2325 fprintf(stderr," %s\n", l);
2326#endif
2327 len = (int32_t)uprv_strlen(l);
2328 if(!uprv_strcmp(acceptList[i], l)) {
2329 if(outResult) {
2330 *outResult = ULOC_ACCEPT_VALID;
2331 }
2332#if defined(ULOC_DEBUG)
2333 fprintf(stderr, "MATCH! %s\n", l);
2334#endif
2335 if(len>0) {
2336 uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2337 }
2338 for(j=0;j<i;j++) {
2339 uprv_free(fallbackList[j]);
2340 }
2341 uprv_free(fallbackList);
2342 return u_terminateChars(result, resultAvailable, len, status);
2343 }
2344 if(len>maxLen) {
2345 maxLen = len;
2346 }
2347 }
2348 uenum_reset(availableLocales, status);
2349 /* save off parent info */
2350 if(uloc_getParent(acceptList[i], tmp, UPRV_LENGTHOF(tmp), status)!=0) {
2351 fallbackList[i] = uprv_strdup(tmp);
2352 } else {
2353 fallbackList[i]=0;
2354 }
2355 }
2356
2357 for(maxLen--;maxLen>0;maxLen--) {
2358 for(i=0;i<acceptListCount;i++) {
2359 if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) {
2360#if defined(ULOC_DEBUG)
2361 fprintf(stderr,"Try: [%s]", fallbackList[i]);
2362#endif
2363 while((l=uenum_next(availableLocales, NULL, status)) != NULL) {
2364#if defined(ULOC_DEBUG)
2365 fprintf(stderr," %s\n", l);
2366#endif
2367 len = (int32_t)uprv_strlen(l);
2368 if(!uprv_strcmp(fallbackList[i], l)) {
2369 if(outResult) {
2370 *outResult = ULOC_ACCEPT_FALLBACK;
2371 }
2372#if defined(ULOC_DEBUG)
2373 fprintf(stderr, "fallback MATCH! %s\n", l);
2374#endif
2375 if(len>0) {
2376 uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2377 }
2378 for(j=0;j<acceptListCount;j++) {
2379 uprv_free(fallbackList[j]);
2380 }
2381 uprv_free(fallbackList);
2382 return u_terminateChars(result, resultAvailable, len, status);
2383 }
2384 }
2385 uenum_reset(availableLocales, status);
2386
2387 if(uloc_getParent(fallbackList[i], tmp, UPRV_LENGTHOF(tmp), status)!=0) {
2388 uprv_free(fallbackList[i]);
2389 fallbackList[i] = uprv_strdup(tmp);
2390 } else {
2391 uprv_free(fallbackList[i]);
2392 fallbackList[i]=0;
2393 }
2394 }
2395 }
2396 if(outResult) {
2397 *outResult = ULOC_ACCEPT_FAILED;
2398 }
2399 }
2400 for(i=0;i<acceptListCount;i++) {
2401 uprv_free(fallbackList[i]);
2402 }
2403 uprv_free(fallbackList);
2404 return -1;
2405}
2406
2407U_CAPI const char* U_EXPORT2
2408uloc_toUnicodeLocaleKey(const char* keyword)
2409{
2410 const char* bcpKey = ulocimp_toBcpKey(keyword);
2411 if (bcpKey == NULL && ultag_isUnicodeLocaleKey(keyword, -1)) {
2412 // unknown keyword, but syntax is fine..
2413 return keyword;
2414 }
2415 return bcpKey;
2416}
2417
2418U_CAPI const char* U_EXPORT2
2419uloc_toUnicodeLocaleType(const char* keyword, const char* value)
2420{
2421 const char* bcpType = ulocimp_toBcpType(keyword, value, NULL, NULL);
2422 if (bcpType == NULL && ultag_isUnicodeLocaleType(value, -1)) {
2423 // unknown keyword, but syntax is fine..
2424 return value;
2425 }
2426 return bcpType;
2427}
2428
2429static UBool
2430isWellFormedLegacyKey(const char* legacyKey)
2431{
2432 const char* p = legacyKey;
2433 while (*p) {
2434 if (!UPRV_ISALPHANUM(*p)) {
2435 return FALSE;
2436 }
2437 p++;
2438 }
2439 return TRUE;
2440}
2441
2442static UBool
2443isWellFormedLegacyType(const char* legacyType)
2444{
2445 const char* p = legacyType;
2446 int32_t alphaNumLen = 0;
2447 while (*p) {
2448 if (*p == '_' || *p == '/' || *p == '-') {
2449 if (alphaNumLen == 0) {
2450 return FALSE;
2451 }
2452 alphaNumLen = 0;
2453 } else if (UPRV_ISALPHANUM(*p)) {
2454 alphaNumLen++;
2455 } else {
2456 return FALSE;
2457 }
2458 p++;
2459 }
2460 return (alphaNumLen != 0);
2461}
2462
2463U_CAPI const char* U_EXPORT2
2464uloc_toLegacyKey(const char* keyword)
2465{
2466 const char* legacyKey = ulocimp_toLegacyKey(keyword);
2467 if (legacyKey == NULL) {
2468 // Checks if the specified locale key is well-formed with the legacy locale syntax.
2469 //
2470 // Note:
2471 // LDML/CLDR provides some definition of keyword syntax in
2472 // * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
2473 // * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
2474 // Keys can only consist of [0-9a-zA-Z].
2475 if (isWellFormedLegacyKey(keyword)) {
2476 return keyword;
2477 }
2478 }
2479 return legacyKey;
2480}
2481
2482U_CAPI const char* U_EXPORT2
2483uloc_toLegacyType(const char* keyword, const char* value)
2484{
2485 const char* legacyType = ulocimp_toLegacyType(keyword, value, NULL, NULL);
2486 if (legacyType == NULL) {
2487 // Checks if the specified locale type is well-formed with the legacy locale syntax.
2488 //
2489 // Note:
2490 // LDML/CLDR provides some definition of keyword syntax in
2491 // * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
2492 // * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
2493 // Values (types) can only consist of [0-9a-zA-Z], plus for legacy values
2494 // we allow [/_-+] in the middle (e.g. "Etc/GMT+1", "Asia/Tel_Aviv")
2495 if (isWellFormedLegacyType(value)) {
2496 return value;
2497 }
2498 }
2499 return legacyType;
2500}
2501
2502/*eof*/