git.saurik.com Git - apple/icu.git/blame_incremental

... / ...

Commit	Line	Data
	1	// © 2016 and later: Unicode, Inc. and others.
	2	// License & terms of use: http://www.unicode.org/copyright.html
	3	/*
	4	**********************************************************************
	5	* Copyright (C) 1997-2016, International Business Machines
	6	* Corporation and others. All Rights Reserved.
	7	**********************************************************************
	8	*
	9	* File ULOC.CPP
	10	*
	11	* Modification History:
	12	*
	13	* Date Name Description
	14	* 04/01/97 aliu Creation.
	15	* 08/21/98 stephen JDK 1.2 sync
	16	* 12/08/98 rtg New Locale implementation and C API
	17	* 03/15/99 damiba overhaul.
	18	* 04/06/99 stephen changed setDefault() to realloc and copy
	19	* 06/14/99 stephen Changed calls to ures_open for new params
	20	* 07/21/99 stephen Modified setDefault() to propagate to C++
	21	* 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs,
	22	* brought canonicalization code into line with spec
	23	*****************************************************************************/
	24
	25	/*
	26	POSIX's locale format, from putil.c: [no spaces]
	27
	28	ll [ _CC ] [ . MM ] [ @ VV]
	29
	30	l = lang, C = ctry, M = charmap, V = variant
	31	*/
	32
	33	#include "unicode/utypes.h"
	34	#include "unicode/ustring.h"
	35	#include "unicode/uloc.h"
	36
	37	#include "putilimp.h"
	38	#include "ustr_imp.h"
	39	#include "ulocimp.h"
	40	#include "umutex.h"
	41	#include "cstring.h"
	42	#include "cmemory.h"
	43	#include "locmap.h"
	44	#include "uarrsort.h"
	45	#include "uenumimp.h"
	46	#include "uassert.h"
	47	#include "charstr.h"
	48
	49	#include <stdio.h> /* for sprintf */
	50
	51	U_NAMESPACE_USE
	52
	53	/* ### Declarations **************************************************/
	54
	55	/* Locale stuff from locid.cpp */
	56	U_CFUNC void locale_set_default(const char *id);
	57	U_CFUNC const char *locale_get_default(void);
	58	U_CFUNC int32_t
	59	locale_getKeywords(const char *localeID,
	60	char prev,
	61	char *keywords, int32_t keywordCapacity,
	62	char values, int32_t valuesCapacity, int32_t valLen,
	63	UBool valuesToo,
	64	UErrorCode *status);
	65
	66	/* ### Data tables **************************************************/
	67
	68	/**
	69	* Table of language codes, both 2- and 3-letter, with preference
	70	* given to 2-letter codes where possible. Includes 3-letter codes
	71	* that lack a 2-letter equivalent.
	72	*
	73	* This list must be in sorted order. This list is returned directly
	74	* to the user by some API.
	75	*
	76	* This list must be kept in sync with LANGUAGES_3, with corresponding
	77	* entries matched.
	78	*
	79	* This table should be terminated with a NULL entry, followed by a
	80	* second list, and another NULL entry. The first list is visible to
	81	* user code when this array is returned by API. The second list
	82	* contains codes we support, but do not expose through user API.
	83	*
	84	* Notes
	85	*
	86	* Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
	87	* include the revisions up to 2001/7/27 CWB
	88	*
	89	* The 3 character codes are the terminology codes like RFC 3066. This
	90	* is compatible with prior ICU codes
	91	*
	92	* "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
	93	* table but now at the end of the table because 3 character codes are
	94	* duplicates. This avoids bad searches going from 3 to 2 character
	95	* codes.
	96	*
	97	* The range qaa-qtz is reserved for local use
	98	*/
	99	/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
	100	/* ISO639 table version is 20150505 */
	101	/* Subsequent hand addition of selected languages */
	102	static const char * const LANGUAGES[] = {
	103	"aa", "ab", "ace", "ach", "ada", "ady", "ae", "aeb",
	104	"af", "afh", "agq", "ain", "ak", "akk", "akz", "ale",
	105	"aln", "alt", "am", "an", "ang", "anp", "ar", "arc",
	106	"arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "as",
	107	"asa", "ase", "ast", "av", "avk", "awa", "ay", "az",
	108	"ba", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
	109	"be", "bej", "bem", "bew", "bez", "bfd", "bfq", "bg",
	110	"bgn", "bho", "bi", "bik", "bin", "bjn", "bkm", "bla",
	111	"bm", "bn", "bo", "bpy", "bqi", "br", "bra", "brh",
	112	"brx", "bs", "bss", "bua", "bug", "bum", "byn", "byv",
	113	"ca", "cad", "car", "cay", "cch", "ccp", "ce", "ceb", "cgg",
	114	"ch", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
	115	"chr", "chy", "ckb", "co", "cop", "cps", "cr", "crh",
	116	"cs", "csb", "cu", "cv", "cy",
	117	"da", "dak", "dar", "dav", "de", "del", "den", "dgr",
	118	"din", "dje", "doi", "dsb", "dtp", "dua", "dum", "dv",
	119	"dyo", "dyu", "dz", "dzg",
	120	"ebu", "ee", "efi", "egl", "egy", "eka", "el", "elx",
	121	"en", "enm", "eo", "es", "esu", "et", "eu", "ewo",
	122	"ext",
	123	"fa", "fan", "fat", "ff", "fi", "fil", "fit", "fj",
	124	"fo", "fon", "fr", "frc", "frm", "fro", "frp", "frr",
	125	"frs", "fur", "fy",
	126	"ga", "gaa", "gag", "gan", "gay", "gba", "gbz", "gd",
	127	"gez", "gil", "gl", "glk", "gmh", "gn", "goh", "gom",
	128	"gon", "gor", "got", "grb", "grc", "gsw", "gu", "guc",
	129	"gur", "guz", "gv", "gwi",
	130	"ha", "hai", "hak", "haw", "he", "hi", "hif", "hil",
	131	"hit", "hmn", "ho", "hr", "hsb", "hsn", "ht", "hu",
	132	"hup", "hy", "hz",
	133	"ia", "iba", "ibb", "id", "ie", "ig", "ii", "ik",
	134	"ilo", "inh", "io", "is", "it", "iu", "izh",
	135	"ja", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
	136	"jv",
	137	"ka", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
	138	"kbl", "kcg", "kde", "kea", "ken", "kfo", "kg", "kgp",
	139	"kha", "kho", "khq", "khw", "ki", "kiu", "kj", "kk",
	140	"kkj", "kl", "kln", "km", "kmb", "kn", "ko", "koi",
	141	"kok", "kos", "kpe", "kr", "krc", "kri", "krj", "krl",
	142	"kru", "ks", "ksb", "ksf", "ksh", "ku", "kum", "kut",
	143	"kv", "kw", "ky",
	144	"la", "lad", "lag", "lah", "lam", "lb", "lez", "lfn",
	145	"lg", "li", "lij", "liv", "lkt", "lmo", "ln", "lo",
	146	"lol", "loz", "lrc", "lt", "ltg", "lu", "lua", "lui",
	147	"lun", "luo", "lus", "luy", "lv", "lzh", "lzz",
	148	"mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
	149	"mdf", "mdh", "mdr", "men", "mer", "mfe", "mg", "mga",
	150	"mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk",
	151	"ml", "mn", "mnc", "mni", "mo",
	152	"moh", "mos", "mr", "mrj",
	153	"ms", "mt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
	154	"my", "mye", "myv", "mzn",
	155	"na", "nan", "nap", "naq", "nb", "nd", "nds", "ne",
	156	"new", "ng", "nia", "niu", "njo", "nl", "nmg", "nn",
	157	"nnh", "no", "nog", "non", "nov", "nqo", "nr", "nso",
	158	"nus", "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi",
	159	"oc", "oj", "om", "or", "os", "osa", "ota",
	160	"pa", "pag", "pal", "pam", "pap", "pau", "pcd", "pdc",
	161	"pdt", "peo", "pfl", "phn", "pi", "pl", "pms", "pnt",
	162	"pon", "prg", "pro", "ps", "pt",
	163	"qu", "quc", "qug",
	164	"raj", "rap", "rar", "rgn", "rif", "rm", "rn", "ro",
	165	"rof", "rom", "rtm", "ru", "rue", "rug", "rup",
	166	"rw", "rwk",
	167	"sa", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
	168	"sba", "sbp", "sc", "scn", "sco", "sd", "sdc", "sdh",
	169	"se", "see", "seh", "sei", "sel", "ses", "sg", "sga",
	170	"sgs", "shi", "shn", "shu", "si", "sid", "sk",
	171	"sl", "sli", "sly", "sm", "sma", "smj", "smn", "sms",
	172	"sn", "snk", "so", "sog", "sq", "sr", "srn", "srr",
	173	"ss", "ssy", "st", "stq", "su", "suk", "sus", "sux",
	174	"sv", "sw", "swb", "swc", "syc", "syr", "szl",
	175	"ta", "tcy", "te", "tem", "teo", "ter", "tet", "tg",
	176	"th", "ti", "tig", "tiv", "tk", "tkl", "tkr", "tl",
	177	"tlh", "tli", "tly", "tmh", "tn", "to", "tog", "tpi",
	178	"tr", "tru", "trv", "ts", "tsd", "tsi", "tt", "ttt",
	179	"tum", "tvl", "tw", "twq", "ty", "tyv", "tzm",
	180	"udm", "ug", "uga", "uk", "umb", "und", "ur", "uz",
	181	"vai", "ve", "vec", "vep", "vi", "vls", "vmf", "vo",
	182	"vot", "vro", "vun",
	183	"wa", "wae", "wal", "war", "was", "wbp", "wo", "wuu",
	184	"xal", "xh", "xmf", "xog",
	185	"yao", "yap", "yav", "ybb", "yi", "yo", "yrl", "yue",
	186	"za", "zap", "zbl", "zea", "zen", "zgh", "zh", "zu",
	187	"zun", "zxx", "zza",
	188	NULL,
	189	"in", "iw", "ji", "jw", "sh", /* obsolete language codes */
	190	NULL
	191	};
	192
	193	static const char* const DEPRECATED_LANGUAGES[]={
	194	"in", "iw", "ji", "jw", NULL, NULL
	195	};
	196	static const char* const REPLACEMENT_LANGUAGES[]={
	197	"id", "he", "yi", "jv", NULL, NULL
	198	};
	199
	200	/**
	201	* Table of 3-letter language codes.
	202	*
	203	* This is a lookup table used to convert 3-letter language codes to
	204	* their 2-letter equivalent, where possible. It must be kept in sync
	205	* with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the
	206	* same language as LANGUAGES_3[i]. The commented-out lines are
	207	* copied from LANGUAGES to make eyeballing this baby easier.
	208	*
	209	* Where a 3-letter language code has no 2-letter equivalent, the
	210	* 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
	211	*
	212	* This table should be terminated with a NULL entry, followed by a
	213	* second list, and another NULL entry. The two lists correspond to
	214	* the two lists in LANGUAGES.
	215	*/
	216	/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
	217	/* ISO639 table version is 20150505 */
	218	/* Subsequent hand addition of selected languages */
	219	static const char * const LANGUAGES_3[] = {
	220	"aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb",
	221	"afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale",
	222	"aln", "alt", "amh", "arg", "ang", "anp", "ara", "arc",
	223	"arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "asm",
	224	"asa", "ase", "ast", "ava", "avk", "awa", "aym", "aze",
	225	"bak", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
	226	"bel", "bej", "bem", "bew", "bez", "bfd", "bfq", "bul",
	227	"bgn", "bho", "bis", "bik", "bin", "bjn", "bkm", "bla",
	228	"bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh",
	229	"brx", "bos", "bss", "bua", "bug", "bum", "byn", "byv",
	230	"cat", "cad", "car", "cay", "cch", "ccp", "che", "ceb", "cgg",
	231	"cha", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
	232	"chr", "chy", "ckb", "cos", "cop", "cps", "cre", "crh",
	233	"ces", "csb", "chu", "chv", "cym",
	234	"dan", "dak", "dar", "dav", "deu", "del", "den", "dgr",
	235	"din", "dje", "doi", "dsb", "dtp", "dua", "dum", "div",
	236	"dyo", "dyu", "dzo", "dzg",
	237	"ebu", "ewe", "efi", "egl", "egy", "eka", "ell", "elx",
	238	"eng", "enm", "epo", "spa", "esu", "est", "eus", "ewo",
	239	"ext",
	240	"fas", "fan", "fat", "ful", "fin", "fil", "fit", "fij",
	241	"fao", "fon", "fra", "frc", "frm", "fro", "frp", "frr",
	242	"frs", "fur", "fry",
	243	"gle", "gaa", "gag", "gan", "gay", "gba", "gbz", "gla",
	244	"gez", "gil", "glg", "glk", "gmh", "grn", "goh", "gom",
	245	"gon", "gor", "got", "grb", "grc", "gsw", "guj", "guc",
	246	"gur", "guz", "glv", "gwi",
	247	"hau", "hai", "hak", "haw", "heb", "hin", "hif", "hil",
	248	"hit", "hmn", "hmo", "hrv", "hsb", "hsn", "hat", "hun",
	249	"hup", "hye", "her",
	250	"ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ipk",
	251	"ilo", "inh", "ido", "isl", "ita", "iku", "izh",
	252	"jpn", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
	253	"jav",
	254	"kat", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
	255	"kbl", "kcg", "kde", "kea", "ken", "kfo", "kon", "kgp",
	256	"kha", "kho", "khq", "khw", "kik", "kiu", "kua", "kaz",
	257	"kkj", "kal", "kln", "khm", "kmb", "kan", "kor", "koi",
	258	"kok", "kos", "kpe", "kau", "krc", "kri", "krj", "krl",
	259	"kru", "kas", "ksb", "ksf", "ksh", "kur", "kum", "kut",
	260	"kom", "cor", "kir",
	261	"lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lfn",
	262	"lug", "lim", "lij", "liv", "lkt", "lmo", "lin", "lao",
	263	"lol", "loz", "lrc", "lit", "ltg", "lub", "lua", "lui",
	264	"lun", "luo", "lus", "luy", "lav", "lzh", "lzz",
	265	"mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
	266	"mdf", "mdh", "mdr", "men", "mer", "mfe", "mlg", "mga",
	267	"mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
	268	"mal", "mon", "mnc", "mni", "mol",
	269	"moh", "mos", "mar", "mrj",
	270	"msa", "mlt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
	271	"mya", "mye", "myv", "mzn",
	272	"nau", "nan", "nap", "naq", "nob", "nde", "nds", "nep",
	273	"new", "ndo", "nia", "niu", "njo", "nld", "nmg", "nno",
	274	"nnh", "nor", "nog", "non", "nov", "nqo", "nbl", "nso",
	275	"nus", "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi",
	276	"oci", "oji", "orm", "ori", "oss", "osa", "ota",
	277	"pan", "pag", "pal", "pam", "pap", "pau", "pcd", "pdc",
	278	"pdt", "peo", "pfl", "phn", "pli", "pol", "pms", "pnt",
	279	"pon", "prg", "pro", "pus", "por",
	280	"que", "quc", "qug",
	281	"raj", "rap", "rar", "rgn", "rif", "roh", "run", "ron",
	282	"rof", "rom", "rtm", "rus", "rue", "rug", "rup",
	283	"kin", "rwk",
	284	"san", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
	285	"sba", "sbp", "srd", "scn", "sco", "snd", "sdc", "sdh",
	286	"sme", "see", "seh", "sei", "sel", "ses", "sag", "sga",
	287	"sgs", "shi", "shn", "shu", "sin", "sid", "slk",
	288	"slv", "sli", "sly", "smo", "sma", "smj", "smn", "sms",
	289	"sna", "snk", "som", "sog", "sqi", "srp", "srn", "srr",
	290	"ssw", "ssy", "sot", "stq", "sun", "suk", "sus", "sux",
	291	"swe", "swa", "swb", "swc", "syc", "syr", "szl",
	292	"tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk",
	293	"tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr", "tgl",
	294	"tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tpi",
	295	"tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt",
	296	"tum", "tvl", "twi", "twq", "tah", "tyv", "tzm",
	297	"udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
	298	"vai", "ven", "vec", "vep", "vie", "vls", "vmf", "vol",
	299	"vot", "vro", "vun",
	300	"wln", "wae", "wal", "war", "was", "wbp", "wol", "wuu",
	301	"xal", "xho", "xmf", "xog",
	302	"yao", "yap", "yav", "ybb", "yid", "yor", "yrl", "yue",
	303	"zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul",
	304	"zun", "zxx", "zza",
	305	NULL,
	306	/* "in", "iw", "ji", "jw", "sh", */
	307	"ind", "heb", "yid", "jaw", "srp",
	308	NULL
	309	};
	310
	311	/**
	312	* Table of 2-letter country codes.
	313	*
	314	* This list must be in sorted order. This list is returned directly
	315	* to the user by some API.
	316	*
	317	* This list must be kept in sync with COUNTRIES_3, with corresponding
	318	* entries matched.
	319	*
	320	* This table should be terminated with a NULL entry, followed by a
	321	* second list, and another NULL entry. The first list is visible to
	322	* user code when this array is returned by API. The second list
	323	* contains codes we support, but do not expose through user API.
	324	*
	325	* Notes:
	326	*
	327	* ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
	328	* http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
	329	* new codes keeping the old ones for compatibility updated to include
	330	* 1999/12/03 revisions CWB
	331	*
	332	* RO(ROM) is now RO(ROU) according to
	333	* http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
	334	*/
	335	static const char * const COUNTRIES[] = {
	336	"AC", "AD", "AE", "AF", "AG", "AI", "AL", "AM",
	337	"AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",
	338	"BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",
	339	"BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV",
	340	"BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
	341	"CH", "CI", "CK", "CL", "CM", "CN", "CO", "CP", "CR",
	342	"CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DG", "DJ", "DK",
	343	"DM", "DO", "DZ", "EA", "EC", "EE", "EG", "EH", "ER",
	344	"ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
	345	"GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
	346	"GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
	347	"GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
	348	"IC", "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
	349	"IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
	350	"KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
	351	"LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
	352	"LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK",
	353	"ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",
	354	"MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
	355	"NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",
	356	"NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",
	357	"PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
	358	"PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA",
	359	"SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",
	360	"SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV",
	361	"SX", "SY", "SZ", "TA", "TC", "TD", "TF", "TG", "TH", "TJ",
	362	"TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
	363	"TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
	364	"VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
	365	"WS", "XK", "YE", "YT", "ZA", "ZM", "ZW",
	366	NULL,
	367	"AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR", /* obsolete country codes */
	368	NULL
	369	};
	370
	371	static const char* const DEPRECATED_COUNTRIES[] = {
	372	"AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */
	373	};
	374	static const char* const REPLACEMENT_COUNTRIES[] = {
	375	/* "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
	376	"CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL /* replacement country codes */
	377	};
	378
	379	/**
	380	* Table of 3-letter country codes.
	381	*
	382	* This is a lookup table used to convert 3-letter country codes to
	383	* their 2-letter equivalent. It must be kept in sync with COUNTRIES.
	384	* For all valid i, COUNTRIES[i] must refer to the same country as
	385	* COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES
	386	* to make eyeballing this baby easier.
	387	*
	388	* This table should be terminated with a NULL entry, followed by a
	389	* second list, and another NULL entry. The two lists correspond to
	390	* the two lists in COUNTRIES.
	391	*/
	392	static const char * const COUNTRIES_3[] = {
	393	/* "AC", "AD", "AE", "AF", "AG", "AI", "AL", "AM", */
	394	"ASC", "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
	395	/* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */
	396	"AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
	397	/* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */
	398	"BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
	399	/* "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", */
	400	"BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",
	401	/* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */
	402	"BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
	403	/* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CP", "CR", */
	404	"CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CPT", "CRI",
	405	/* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DG", "DJ", "DK", */
	406	"CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DGA", "DJI", "DNK",
	407	/* "DM", "DO", "DZ", "EA", "EC", "EE", "EG", "EH", "ER", */
	408	"DMA", "DOM", "DZA", "EA ", "ECU", "EST", "EGY", "ESH", "ERI", /* no valid 3-letter code for EA */
	409	/* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
	410	"ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
	411	/* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
	412	"GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
	413	/* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */
	414	"GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
	415	/* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
	416	"GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
	417	/* "IC", "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
	418	"IC ", "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL", /* no valid 3-letter code for IC */
	419	/* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
	420	"ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
	421	/* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
	422	"COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
	423	/* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */
	424	"LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
	425	/* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */
	426	"LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
	427	/* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */
	428	"MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
	429	/* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */
	430	"MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
	431	/* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */
	432	"NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
	433	/* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */
	434	"NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
	435	/* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */
	436	"PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
	437	/* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */
	438	"PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
	439	/* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */
	440	"SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
	441	/* "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", */
	442	"SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",
	443	/* "SX", "SY", "SZ", "TA", "TC", "TD", "TF", "TG", "TH", "TJ", */
	444	"SXM", "SYR", "SWZ", "TAA", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
	445	/* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */
	446	"TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
	447	/* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */
	448	"TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
	449	/* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
	450	"VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
	451	/* "WS", "XK", "YE", "YT", "ZA", "ZM", "ZW", */
	452	"WSM", "XKK", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
	453	NULL,
	454	/* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */
	455	"ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
	456	NULL
	457	};
	458
	459	typedef struct CanonicalizationMap {
	460	const char id; / input ID */
	461	const char canonicalID; / canonicalized output ID */
	462	} CanonicalizationMap;
	463
	464	/**
	465	* A map to canonicalize locale IDs. This handles a variety of
	466	* different semantic kinds of transformations.
	467	*/
	468	static const CanonicalizationMap CANONICALIZE_MAP[] = {
	469	{ "", "en_US_POSIX" }, /* .NET name */ // open ICU 64 deleted, we restore
	470	{ "c", "en_US_POSIX" }, /* POSIX name */ // open ICU 64 deleted, we restore
	471	{ "posix", "en_US_POSIX" }, /* POSIX name (alias of C) */ // open ICU 64 deleted, we restore
	472	{ "art_LOJBAN", "jbo" }, /* registered name */
	473	{ "hy__AREVELA", "hy" }, /* Registered IANA variant */
	474	{ "hy__AREVMDA", "hyw" }, /* Registered IANA variant */
	475	{ "zh_GAN", "gan" }, /* registered name */
	476	{ "zh_GUOYU", "zh" }, /* registered name */
	477	{ "zh_HAKKA", "hak" }, /* registered name */
	478	{ "zh_MIN_NAN", "nan" }, /* registered name */
	479	{ "zh_WUU", "wuu" }, /* registered name */
	480	{ "zh_XIANG", "hsn" }, /* registered name */
	481	{ "zh_YUE", "yue" }, /* registered name */
	482	};
	483
	484	/* ### BCP47 Conversion *******************************************/
	485	/* Test if the locale id has BCP47 u extension and does not have '@' */
	486	#define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
	487	/* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
	488	#define _ConvertBCP47(finalID, id, buffer, length,err) UPRV_BLOCK_MACRO_BEGIN { \
	489	if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 \|\| \
	490	U_FAILURE(err) \|\| err == U_STRING_NOT_TERMINATED_WARNING) { \
	491	finalID=id; \
	492	if (err == U_STRING_NOT_TERMINATED_WARNING) { err = U_BUFFER_OVERFLOW_ERROR; } \
	493	} else { \
	494	finalID=buffer; \
	495	} \
	496	} UPRV_BLOCK_MACRO_END
	497	/* Gets the size of the shortest subtag in the given localeID. */
	498	static int32_t getShortestSubtagLength(const char *localeID) {
	499	int32_t localeIDLength = static_cast<int32_t>(uprv_strlen(localeID));
	500	int32_t length = localeIDLength;
	501	int32_t tmpLength = 0;
	502	int32_t i;
	503	UBool reset = TRUE;
	504
	505	for (i = 0; i < localeIDLength; i++) {
	506	if (localeID[i] != '_' && localeID[i] != '-') {
	507	if (reset) {
	508	tmpLength = 0;
	509	reset = FALSE;
	510	}
	511	tmpLength++;
	512	} else {
	513	if (tmpLength != 0 && tmpLength < length) {
	514	length = tmpLength;
	515	}
	516	reset = TRUE;
	517	}
	518	}
	519
	520	return length;
	521	}
	522
	523	/* ### Keywords **************************************************/
	524	#define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
	525	#define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) \|\| UPRV_ISDIGIT(c) )
	526	/* Punctuation/symbols allowed in legacy key values */
	527	#define UPRV_OK_VALUE_PUNCTUATION(c) ((c) == '_' \|\| (c) == '-' \|\| (c) == '+' \|\| (c) == '/')
	528
	529	#define ULOC_KEYWORD_BUFFER_LEN 25
	530	#define ULOC_MAX_NO_KEYWORDS 25
	531
	532	U_CAPI const char * U_EXPORT2
	533	locale_getKeywordsStart(const char *localeID) {
	534	const char *result = NULL;
	535	if((result = uprv_strchr(localeID, '@')) != NULL) {
	536	return result;
	537	}
	538	#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
	539	else {
	540	/* We do this because the @ sign is variant, and the @ sign used on one
	541	EBCDIC machine won't be compiled the same way on other EBCDIC based
	542	machines. */
	543	static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
	544	const uint8_t *charToFind = ebcdicSigns;
	545	while(*charToFind) {
	546	if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
	547	return result;
	548	}
	549	charToFind++;
	550	}
	551	}
	552	#endif
	553	return NULL;
	554	}
	555
	556	/**
	557	* @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
	558	* @param keywordName incoming name to be canonicalized
	559	* @param status return status (keyword too long)
	560	* @return length of the keyword name
	561	*/
	562	static int32_t locale_canonKeywordName(char buf, const char keywordName, UErrorCode *status)
	563	{
	564	int32_t keywordNameLen = 0;
	565
	566	for (; *keywordName != 0; keywordName++) {
	567	if (!UPRV_ISALPHANUM(*keywordName)) {
	568	status = U_ILLEGAL_ARGUMENT_ERROR; / malformed keyword name */
	569	return 0;
	570	}
	571	if (keywordNameLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
	572	buf[keywordNameLen++] = uprv_tolower(*keywordName);
	573	} else {
	574	/* keyword name too long for internal buffer */
	575	*status = U_INTERNAL_PROGRAM_ERROR;
	576	return 0;
	577	}
	578	}
	579	if (keywordNameLen == 0) {
	580	status = U_ILLEGAL_ARGUMENT_ERROR; / empty keyword name */
	581	return 0;
	582	}
	583	buf[keywordNameLen] = 0; /* terminate */
	584
	585	return keywordNameLen;
	586	}
	587
	588	typedef struct {
	589	char keyword[ULOC_KEYWORD_BUFFER_LEN];
	590	int32_t keywordLen;
	591	const char *valueStart;
	592	int32_t valueLen;
	593	} KeywordStruct;
	594
	595	static int32_t U_CALLCONV
	596	compareKeywordStructs(const void * /context/, const void left, const void right) {
	597	const char* leftString = ((const KeywordStruct *)left)->keyword;
	598	const char* rightString = ((const KeywordStruct *)right)->keyword;
	599	return uprv_strcmp(leftString, rightString);
	600	}
	601
	602	static int32_t
	603	_getKeywords(const char *localeID,
	604	char prev,
	605	char *keywords, int32_t keywordCapacity,
	606	char values, int32_t valuesCapacity, int32_t valLen,
	607	UBool valuesToo,
	608	UErrorCode *status)
	609	{
	610	KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
	611
	612	int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
	613	int32_t numKeywords = 0;
	614	const char* pos = localeID;
	615	const char* equalSign = NULL;
	616	const char* semicolon = NULL;
	617	int32_t i = 0, j, n;
	618	int32_t keywordsLen = 0;
	619	int32_t valuesLen = 0;
	620
	621	if(prev == '@') { /* start of keyword definition */
	622	/* we will grab pairs, trim spaces, lowercase keywords, sort and return */
	623	do {
	624	UBool duplicate = FALSE;
	625	/* skip leading spaces */
	626	while(*pos == ' ') {
	627	pos++;
	628	}
	629	if (!pos) { / handle trailing "; " */
	630	break;
	631	}
	632	if(numKeywords == maxKeywords) {
	633	*status = U_INTERNAL_PROGRAM_ERROR;
	634	return 0;
	635	}
	636	equalSign = uprv_strchr(pos, '=');
	637	semicolon = uprv_strchr(pos, ';');
	638	/* lack of '=' [foo@currency] is illegal */
	639	/* ';' before '=' [foo@currency;collation=pinyin] is illegal */
	640	if(!equalSign \|\| (semicolon && semicolon<equalSign)) {
	641	*status = U_INVALID_FORMAT_ERROR;
	642	return 0;
	643	}
	644	/* need to normalize both keyword and keyword name */
	645	if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
	646	/* keyword name too long for internal buffer */
	647	*status = U_INTERNAL_PROGRAM_ERROR;
	648	return 0;
	649	}
	650	for(i = 0, n = 0; i < equalSign - pos; ++i) {
	651	if (pos[i] != ' ') {
	652	keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
	653	}
	654	}
	655
	656	/* zero-length keyword is an error. */
	657	if (n == 0) {
	658	*status = U_INVALID_FORMAT_ERROR;
	659	return 0;
	660	}
	661
	662	keywordList[numKeywords].keyword[n] = 0;
	663	keywordList[numKeywords].keywordLen = n;
	664	/* now grab the value part. First we skip the '=' */
	665	equalSign++;
	666	/* then we leading spaces */
	667	while(*equalSign == ' ') {
	668	equalSign++;
	669	}
	670
	671	/* Premature end or zero-length value */
	672	if (!*equalSign \|\| equalSign == semicolon) {
	673	*status = U_INVALID_FORMAT_ERROR;
	674	return 0;
	675	}
	676
	677	keywordList[numKeywords].valueStart = equalSign;
	678
	679	pos = semicolon;
	680	i = 0;
	681	if(pos) {
	682	while(*(pos - i - 1) == ' ') {
	683	i++;
	684	}
	685	keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
	686	pos++;
	687	} else {
	688	i = (int32_t)uprv_strlen(equalSign);
	689	while(i && equalSign[i-1] == ' ') {
	690	i--;
	691	}
	692	keywordList[numKeywords].valueLen = i;
	693	}
	694	/* If this is a duplicate keyword, then ignore it */
	695	for (j=0; j<numKeywords; ++j) {
	696	if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
	697	duplicate = TRUE;
	698	break;
	699	}
	700	}
	701	if (!duplicate) {
	702	++numKeywords;
	703	}
	704	} while(pos);
	705
	706	/* now we have a list of keywords */
	707	/* we need to sort it */
	708	uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
	709
	710	/* Now construct the keyword part */
	711	for(i = 0; i < numKeywords; i++) {
	712	if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) {
	713	uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword);
	714	if(valuesToo) {
	715	keywords[keywordsLen + keywordList[i].keywordLen] = '=';
	716	} else {
	717	keywords[keywordsLen + keywordList[i].keywordLen] = 0;
	718	}
	719	}
	720	keywordsLen += keywordList[i].keywordLen + 1;
	721	if(valuesToo) {
	722	if(keywordsLen + keywordList[i].valueLen <= keywordCapacity) {
	723	uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
	724	}
	725	keywordsLen += keywordList[i].valueLen;
	726
	727	if(i < numKeywords - 1) {
	728	if(keywordsLen < keywordCapacity) {
	729	keywords[keywordsLen] = ';';
	730	}
	731	keywordsLen++;
	732	}
	733	}
	734	if(values) {
	735	if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) {
	736	uprv_strcpy(values+valuesLen, keywordList[i].valueStart);
	737	values[valuesLen + keywordList[i].valueLen] = 0;
	738	}
	739	valuesLen += keywordList[i].valueLen + 1;
	740	}
	741	}
	742	if(values) {
	743	values[valuesLen] = 0;
	744	if(valLen) {
	745	*valLen = valuesLen;
	746	}
	747	}
	748	return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);
	749	} else {
	750	return 0;
	751	}
	752	}
	753
	754	U_CFUNC int32_t
	755	locale_getKeywords(const char *localeID,
	756	char prev,
	757	char *keywords, int32_t keywordCapacity,
	758	char values, int32_t valuesCapacity, int32_t valLen,
	759	UBool valuesToo,
	760	UErrorCode *status) {
	761	return _getKeywords(localeID, prev, keywords, keywordCapacity,
	762	values, valuesCapacity, valLen, valuesToo,
	763	status);
	764	}
	765
	766	U_CAPI int32_t U_EXPORT2
	767	uloc_getKeywordValue(const char* localeID,
	768	const char* keywordName,
	769	char* buffer, int32_t bufferCapacity,
	770	UErrorCode* status)
	771	{
	772	if (buffer != nullptr) {
	773	buffer[0] = '\0';
	774	}
	775	const char* startSearchHere = NULL;
	776	const char* nextSeparator = NULL;
	777	char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
	778	char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
	779	int32_t result = 0;
	780
	781	if(status && U_SUCCESS(*status) && localeID) {
	782	char tempBuffer[ULOC_FULLNAME_CAPACITY];
	783	const char* tmpLocaleID;
	784
	785	if (keywordName == NULL \|\| keywordName[0] == 0) {
	786	*status = U_ILLEGAL_ARGUMENT_ERROR;
	787	return 0;
	788	}
	789
	790	locale_canonKeywordName(keywordNameBuffer, keywordName, status);
	791	if(U_FAILURE(*status)) {
	792	return 0;
	793	}
	794
	795	if (_hasBCP47Extension(localeID)) {
	796	_ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
	797	} else {
	798	tmpLocaleID=localeID;
	799	}
	800
	801	startSearchHere = locale_getKeywordsStart(tmpLocaleID);
	802	if(startSearchHere == NULL) {
	803	/* no keywords, return at once */
	804	return 0;
	805	}
	806
	807	/* find the first keyword */
	808	while(startSearchHere) {
	809	const char* keyValueTail;
	810	int32_t keyValueLen;
	811
	812	startSearchHere++; /* skip @ or ; */
	813	nextSeparator = uprv_strchr(startSearchHere, '=');
	814	if(!nextSeparator) {
	815	status = U_ILLEGAL_ARGUMENT_ERROR; / key must have =value */
	816	return 0;
	817	}
	818	/* strip leading & trailing spaces (TC decided to tolerate these) */
	819	while(*startSearchHere == ' ') {
	820	startSearchHere++;
	821	}
	822	keyValueTail = nextSeparator;
	823	while (keyValueTail > startSearchHere && *(keyValueTail-1) == ' ') {
	824	keyValueTail--;
	825	}
	826	/* now keyValueTail points to first char after the keyName */
	827	/* copy & normalize keyName from locale */
	828	if (startSearchHere == keyValueTail) {
	829	status = U_ILLEGAL_ARGUMENT_ERROR; / empty keyword name in passed-in locale */
	830	return 0;
	831	}
	832	keyValueLen = 0;
	833	while (startSearchHere < keyValueTail) {
	834	if (!UPRV_ISALPHANUM(*startSearchHere)) {
	835	status = U_ILLEGAL_ARGUMENT_ERROR; / malformed keyword name */
	836	return 0;
	837	}
	838	if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
	839	localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*startSearchHere++);
	840	} else {
	841	/* keyword name too long for internal buffer */
	842	*status = U_INTERNAL_PROGRAM_ERROR;
	843	return 0;
	844	}
	845	}
	846	localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
	847
	848	startSearchHere = uprv_strchr(nextSeparator, ';');
	849
	850	if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
	851	/* current entry matches the keyword. */
	852	nextSeparator++; /* skip '=' */
	853	/* First strip leading & trailing spaces (TC decided to tolerate these) */
	854	while(*nextSeparator == ' ') {
	855	nextSeparator++;
	856	}
	857	keyValueTail = (startSearchHere)? startSearchHere: nextSeparator + uprv_strlen(nextSeparator);
	858	while(keyValueTail > nextSeparator && *(keyValueTail-1) == ' ') {
	859	keyValueTail--;
	860	}
	861	/* Now copy the value, but check well-formedness */
	862	if (nextSeparator == keyValueTail) {
	863	status = U_ILLEGAL_ARGUMENT_ERROR; / empty key value name in passed-in locale */
	864	return 0;
	865	}
	866	keyValueLen = 0;
	867	while (nextSeparator < keyValueTail) {
	868	if (!UPRV_ISALPHANUM(nextSeparator) && !UPRV_OK_VALUE_PUNCTUATION(nextSeparator)) {
	869	status = U_ILLEGAL_ARGUMENT_ERROR; / malformed key value */
	870	return 0;
	871	}
	872	if (keyValueLen < bufferCapacity) {
	873	/* Should we lowercase value to return here? Tests expect as-is. */
	874	buffer[keyValueLen++] = *nextSeparator++;
	875	} else { /* keep advancing so we return correct length in case of overflow */
	876	keyValueLen++;
	877	nextSeparator++;
	878	}
	879	}
	880	result = u_terminateChars(buffer, bufferCapacity, keyValueLen, status);
	881	return result;
	882	}
	883	}
	884	}
	885	return 0;
	886	}
	887
	888	U_CAPI int32_t U_EXPORT2
	889	uloc_setKeywordValue(const char* keywordName,
	890	const char* keywordValue,
	891	char* buffer, int32_t bufferCapacity,
	892	UErrorCode* status)
	893	{
	894	/* TODO: sorting. removal. */
	895	int32_t keywordNameLen;
	896	int32_t keywordValueLen;
	897	int32_t bufLen;
	898	int32_t needLen = 0;
	899	char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
	900	char keywordValueBuffer[ULOC_KEYWORDS_CAPACITY+1];
	901	char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
	902	int32_t rc;
	903	char* nextSeparator = NULL;
	904	char* nextEqualsign = NULL;
	905	char* startSearchHere = NULL;
	906	char* keywordStart = NULL;
	907	CharString updatedKeysAndValues;
	908	int32_t updatedKeysAndValuesLen;
	909	UBool handledInputKeyAndValue = FALSE;
	910	char keyValuePrefix = '@';
	911
	912	if(U_FAILURE(*status)) {
	913	return -1;
	914	}
	915	if (keywordName == NULL \|\| keywordName[0] == 0 \|\| bufferCapacity <= 1) {
	916	*status = U_ILLEGAL_ARGUMENT_ERROR;
	917	return 0;
	918	}
	919	bufLen = (int32_t)uprv_strlen(buffer);
	920	if(bufferCapacity<bufLen) {
	921	/* The capacity is less than the length?! Is this NULL terminated? */
	922	*status = U_ILLEGAL_ARGUMENT_ERROR;
	923	return 0;
	924	}
	925	keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
	926	if(U_FAILURE(*status)) {
	927	return 0;
	928	}
	929
	930	keywordValueLen = 0;
	931	if(keywordValue) {
	932	while (*keywordValue != 0) {
	933	if (!UPRV_ISALPHANUM(keywordValue) && !UPRV_OK_VALUE_PUNCTUATION(keywordValue)) {
	934	status = U_ILLEGAL_ARGUMENT_ERROR; / malformed key value */
	935	return 0;
	936	}
	937	if (keywordValueLen < ULOC_KEYWORDS_CAPACITY) {
	938	/* Should we force lowercase in value to set? */
	939	keywordValueBuffer[keywordValueLen++] = *keywordValue++;
	940	} else {
	941	/* keywordValue too long for internal buffer */
	942	*status = U_INTERNAL_PROGRAM_ERROR;
	943	return 0;
	944	}
	945	}
	946	}
	947	keywordValueBuffer[keywordValueLen] = 0; /* terminate */
	948
	949	startSearchHere = (char*)locale_getKeywordsStart(buffer);
	950	if(startSearchHere == NULL \|\| (startSearchHere[1]==0)) {
	951	if(keywordValueLen == 0) { /* no keywords = nothing to remove */
	952	return bufLen;
	953	}
	954
	955	needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
	956	if(startSearchHere) { /* had a single @ */
	957	needLen--; /* already had the @ */
	958	/* startSearchHere points at the @ */
	959	} else {
	960	startSearchHere=buffer+bufLen;
	961	}
	962	if(needLen >= bufferCapacity) {
	963	*status = U_BUFFER_OVERFLOW_ERROR;
	964	return needLen; /* no change */
	965	}
	966	*startSearchHere++ = '@';
	967	uprv_strcpy(startSearchHere, keywordNameBuffer);
	968	startSearchHere += keywordNameLen;
	969	*startSearchHere++ = '=';
	970	uprv_strcpy(startSearchHere, keywordValueBuffer);
	971	return needLen;
	972	} /* end shortcut - no @ */
	973
	974	keywordStart = startSearchHere;
	975	/* search for keyword */
	976	while(keywordStart) {
	977	const char* keyValueTail;
	978	int32_t keyValueLen;
	979
	980	keywordStart++; /* skip @ or ; */
	981	nextEqualsign = uprv_strchr(keywordStart, '=');
	982	if (!nextEqualsign) {
	983	status = U_ILLEGAL_ARGUMENT_ERROR; / key must have =value */
	984	return 0;
	985	}
	986	/* strip leading & trailing spaces (TC decided to tolerate these) */
	987	while(*keywordStart == ' ') {
	988	keywordStart++;
	989	}
	990	keyValueTail = nextEqualsign;
	991	while (keyValueTail > keywordStart && *(keyValueTail-1) == ' ') {
	992	keyValueTail--;
	993	}
	994	/* now keyValueTail points to first char after the keyName */
	995	/* copy & normalize keyName from locale */
	996	if (keywordStart == keyValueTail) {
	997	status = U_ILLEGAL_ARGUMENT_ERROR; / empty keyword name in passed-in locale */
	998	return 0;
	999	}
	1000	keyValueLen = 0;
	1001	while (keywordStart < keyValueTail) {
	1002	if (!UPRV_ISALPHANUM(*keywordStart)) {
	1003	status = U_ILLEGAL_ARGUMENT_ERROR; / malformed keyword name */
	1004	return 0;
	1005	}
	1006	if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
	1007	localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*keywordStart++);
	1008	} else {
	1009	/* keyword name too long for internal buffer */
	1010	*status = U_INTERNAL_PROGRAM_ERROR;
	1011	return 0;
	1012	}
	1013	}
	1014	localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
	1015
	1016	nextSeparator = uprv_strchr(nextEqualsign, ';');
	1017
	1018	/* start processing the value part */
	1019	nextEqualsign++; /* skip '=' */
	1020	/* First strip leading & trailing spaces (TC decided to tolerate these) */
	1021	while(*nextEqualsign == ' ') {
	1022	nextEqualsign++;
	1023	}
	1024	keyValueTail = (nextSeparator)? nextSeparator: nextEqualsign + uprv_strlen(nextEqualsign);
	1025	while(keyValueTail > nextEqualsign && *(keyValueTail-1) == ' ') {
	1026	keyValueTail--;
	1027	}
	1028	if (nextEqualsign == keyValueTail) {
	1029	status = U_ILLEGAL_ARGUMENT_ERROR; / empty key value in passed-in locale */
	1030	return 0;
	1031	}
	1032
	1033	rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
	1034	if(rc == 0) {
	1035	/* Current entry matches the input keyword. Update the entry */
	1036	if(keywordValueLen > 0) { /* updating a value */
	1037	updatedKeysAndValues.append(keyValuePrefix, *status);
	1038	keyValuePrefix = ';'; /* for any subsequent key-value pair */
	1039	updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
	1040	updatedKeysAndValues.append('=', *status);
	1041	updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
	1042	} /* else removing this entry, don't emit anything */
	1043	handledInputKeyAndValue = TRUE;
	1044	} else {
	1045	/* input keyword sorts earlier than current entry, add before current entry */
	1046	if (rc < 0 && keywordValueLen > 0 && !handledInputKeyAndValue) {
	1047	/* insert new entry at this location */
	1048	updatedKeysAndValues.append(keyValuePrefix, *status);
	1049	keyValuePrefix = ';'; /* for any subsequent key-value pair */
	1050	updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
	1051	updatedKeysAndValues.append('=', *status);
	1052	updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
	1053	handledInputKeyAndValue = TRUE;
	1054	}
	1055	/* copy the current entry */
	1056	updatedKeysAndValues.append(keyValuePrefix, *status);
	1057	keyValuePrefix = ';'; /* for any subsequent key-value pair */
	1058	updatedKeysAndValues.append(localeKeywordNameBuffer, keyValueLen, *status);
	1059	updatedKeysAndValues.append('=', *status);
	1060	updatedKeysAndValues.append(nextEqualsign, static_cast<int32_t>(keyValueTail-nextEqualsign), *status);
	1061	}
	1062	if (!nextSeparator && keywordValueLen > 0 && !handledInputKeyAndValue) {
	1063	/* append new entry at the end, it sorts later than existing entries */
	1064	updatedKeysAndValues.append(keyValuePrefix, *status);
	1065	/* skip keyValuePrefix update, no subsequent key-value pair */
	1066	updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
	1067	updatedKeysAndValues.append('=', *status);
	1068	updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
	1069	handledInputKeyAndValue = TRUE;
	1070	}
	1071	keywordStart = nextSeparator;
	1072	} /* end loop searching */
	1073
	1074	/* Any error from updatedKeysAndValues.append above would be internal and not due to
	1075	* problems with the passed-in locale. So if we did encounter problems with the
	1076	* passed-in locale above, those errors took precedence and overrode any error
	1077	* status from updatedKeysAndValues.append, and also caused a return of 0. If there
	1078	* are errors here they are from updatedKeysAndValues.append; they do cause an
	1079	* error return but the passed-in locale is unmodified and the original bufLen is
	1080	* returned.
	1081	*/
	1082	if (!handledInputKeyAndValue \|\| U_FAILURE(*status)) {
	1083	/* if input key/value specified removal of a keyword not present in locale, or
	1084	* there was an error in CharString.append, leave original locale alone. */
	1085	return bufLen;
	1086	}
	1087
	1088	updatedKeysAndValuesLen = updatedKeysAndValues.length();
	1089	/* needLen = length of the part before '@' + length of updated key-value part including '@' */
	1090	needLen = (int32_t)(startSearchHere - buffer) + updatedKeysAndValuesLen;
	1091	if(needLen >= bufferCapacity) {
	1092	*status = U_BUFFER_OVERFLOW_ERROR;
	1093	return needLen; /* no change */
	1094	}
	1095	if (updatedKeysAndValuesLen > 0) {
	1096	uprv_strncpy(startSearchHere, updatedKeysAndValues.data(), updatedKeysAndValuesLen);
	1097	}
	1098	buffer[needLen]=0;
	1099	return needLen;
	1100	}
	1101
	1102	/* ### ID parsing implementation **************************************************/
	1103
	1104	#define _isPrefixLetter(a) ((a=='x')\|\|(a=='X')\|\|(a=='i')\|\|(a=='I'))
	1105
	1106	/*returns TRUE if one of the special prefixes is here (s=string)
	1107	'x-' or 'i-' */
	1108	#define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
	1109
	1110	/* Dot terminates it because of POSIX form where dot precedes the codepage
	1111	* except for variant
	1112	*/
	1113	#define _isTerminator(a) ((a==0)\|\|(a=='.')\|\|(a=='@'))
	1114
	1115	/**
	1116	* Lookup 'key' in the array 'list'. The array 'list' should contain
	1117	* a NULL entry, followed by more entries, and a second NULL entry.
	1118	*
	1119	* The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
	1120	* COUNTRIES_3.
	1121	*/
	1122	static int16_t _findIndex(const char* const* list, const char* key)
	1123	{
	1124	const char* const* anchor = list;
	1125	int32_t pass = 0;
	1126
	1127	/* Make two passes through two NULL-terminated arrays at 'list' */
	1128	while (pass++ < 2) {
	1129	while (*list) {
	1130	if (uprv_strcmp(key, *list) == 0) {
	1131	return (int16_t)(list - anchor);
	1132	}
	1133	list++;
	1134	}
	1135	++list; /* skip final NULL CWB/
	1136	}
	1137	return -1;
	1138	}
	1139
	1140	/* count the length of src while copying it to dest; return strlen(src) */
	1141	static inline int32_t
	1142	_copyCount(char dest, int32_t destCapacity, const char src) {
	1143	const char *anchor;
	1144	char c;
	1145
	1146	anchor=src;
	1147	for(;;) {
	1148	if((c=*src)==0) {
	1149	return (int32_t)(src-anchor);
	1150	}
	1151	if(destCapacity<=0) {
	1152	return (int32_t)((src-anchor)+uprv_strlen(src));
	1153	}
	1154	++src;
	1155	*dest++=c;
	1156	--destCapacity;
	1157	}
	1158	}
	1159
	1160	U_CFUNC const char*
	1161	uloc_getCurrentCountryID(const char* oldID){
	1162	int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
	1163	if (offset >= 0) {
	1164	return REPLACEMENT_COUNTRIES[offset];
	1165	}
	1166	return oldID;
	1167	}
	1168	U_CFUNC const char*
	1169	uloc_getCurrentLanguageID(const char* oldID){
	1170	int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
	1171	if (offset >= 0) {
	1172	return REPLACEMENT_LANGUAGES[offset];
	1173	}
	1174	return oldID;
	1175	}
	1176	/*
	1177	* the internal functions _getLanguage(), _getCountry(), _getVariant()
	1178	* avoid duplicating code to handle the earlier locale ID pieces
	1179	* in the functions for the later ones by
	1180	* setting the *pEnd pointer to where they stopped parsing
	1181	*
	1182	* TODO try to use this in Locale
	1183	*/
	1184	U_CFUNC int32_t
	1185	ulocimp_getLanguage(const char *localeID,
	1186	char *language, int32_t languageCapacity,
	1187	const char **pEnd) {
	1188	int32_t i=0;
	1189	int32_t offset;
	1190	char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
	1191
	1192	/* if it starts with i- or x- then copy that prefix */
	1193	if(_isIDPrefix(localeID)) {
	1194	if(i<languageCapacity) {
	1195	language[i]=(char)uprv_tolower(*localeID);
	1196	}
	1197	if(i<languageCapacity) {
	1198	language[i+1]='-';
	1199	}
	1200	i+=2;
	1201	localeID+=2;
	1202	}
	1203
	1204	/* copy the language as far as possible and count its length */
	1205	while(!_isTerminator(localeID) && !_isIDSeparator(localeID)) {
	1206	if(i<languageCapacity) {
	1207	language[i]=(char)uprv_tolower(*localeID);
	1208	}
	1209	if(i<3) {
	1210	U_ASSERT(i>=0);
	1211	lang[i]=(char)uprv_tolower(*localeID);
	1212	}
	1213	i++;
	1214	localeID++;
	1215	}
	1216
	1217	if(i==3) {
	1218	/* convert 3 character code to 2 character code if possible CWB/
	1219	offset=_findIndex(LANGUAGES_3, lang);
	1220	if(offset>=0) {
	1221	i=_copyCount(language, languageCapacity, LANGUAGES[offset]);
	1222	}
	1223	}
	1224
	1225	if(pEnd!=NULL) {
	1226	*pEnd=localeID;
	1227	}
	1228	return i;
	1229	}
	1230
	1231	U_CFUNC int32_t
	1232	ulocimp_getScript(const char *localeID,
	1233	char *script, int32_t scriptCapacity,
	1234	const char **pEnd)
	1235	{
	1236	int32_t idLen = 0;
	1237
	1238	if (pEnd != NULL) {
	1239	*pEnd = localeID;
	1240	}
	1241
	1242	/* copy the second item as far as possible and count its length */
	1243	while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])
	1244	&& uprv_isASCIILetter(localeID[idLen])) {
	1245	idLen++;
	1246	}
	1247
	1248	/* If it's exactly 4 characters long, then it's a script and not a country. */
	1249	if (idLen == 4) {
	1250	int32_t i;
	1251	if (pEnd != NULL) {
	1252	*pEnd = localeID+idLen;
	1253	}
	1254	if(idLen > scriptCapacity) {
	1255	idLen = scriptCapacity;
	1256	}
	1257	if (idLen >= 1) {
	1258	script[0]=(char)uprv_toupper(*(localeID++));
	1259	}
	1260	for (i = 1; i < idLen; i++) {
	1261	script[i]=(char)uprv_tolower(*(localeID++));
	1262	}
	1263	}
	1264	else {
	1265	idLen = 0;
	1266	}
	1267	return idLen;
	1268	}
	1269
	1270	U_CFUNC int32_t
	1271	ulocimp_getCountry(const char *localeID,
	1272	char *country, int32_t countryCapacity,
	1273	const char **pEnd)
	1274	{
	1275	int32_t idLen=0;
	1276	char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 };
	1277	int32_t offset;
	1278
	1279	/* copy the country as far as possible and count its length */
	1280	while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
	1281	if(idLen<(ULOC_COUNTRY_CAPACITY-1)) { /CWB/
	1282	cnty[idLen]=(char)uprv_toupper(localeID[idLen]);
	1283	}
	1284	idLen++;
	1285	}
	1286
	1287	/* the country should be either length 2 or 3 */
	1288	if (idLen == 2 \|\| idLen == 3) {
	1289	UBool gotCountry = FALSE;
	1290	/* convert 3 character code to 2 character code if possible CWB/
	1291	if(idLen==3) {
	1292	offset=_findIndex(COUNTRIES_3, cnty);
	1293	if(offset>=0) {
	1294	idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]);
	1295	gotCountry = TRUE;
	1296	}
	1297	}
	1298	if (!gotCountry) {
	1299	int32_t i = 0;
	1300	for (i = 0; i < idLen; i++) {
	1301	if (i < countryCapacity) {
	1302	country[i]=(char)uprv_toupper(localeID[i]);
	1303	}
	1304	}
	1305	}
	1306	localeID+=idLen;
	1307	} else {
	1308	idLen = 0;
	1309	}
	1310
	1311	if(pEnd!=NULL) {
	1312	*pEnd=localeID;
	1313	}
	1314
	1315	return idLen;
	1316	}
	1317
	1318	/**
	1319	* @param needSeparator if true, then add leading '_' if any variants
	1320	* are added to 'variant'
	1321	*/
	1322	static int32_t
	1323	_getVariantEx(const char *localeID,
	1324	char prev,
	1325	char *variant, int32_t variantCapacity,
	1326	UBool needSeparator) {
	1327	int32_t i=0;
	1328
	1329	/* get one or more variant tags and separate them with '_' */
	1330	if(_isIDSeparator(prev)) {
	1331	/* get a variant string after a '-' or '_' */
	1332	while(!_isTerminator(*localeID)) {
	1333	if (needSeparator) {
	1334	if (i<variantCapacity) {
	1335	variant[i] = '_';
	1336	}
	1337	++i;
	1338	needSeparator = FALSE;
	1339	}
	1340	if(i<variantCapacity) {
	1341	variant[i]=(char)uprv_toupper(*localeID);
	1342	if(variant[i]=='-') {
	1343	variant[i]='_';
	1344	}
	1345	}
	1346	i++;
	1347	localeID++;
	1348	}
	1349	}
	1350
	1351	/* if there is no variant tag after a '-' or '_' then look for '@' */
	1352	if(i==0) {
	1353	if(prev=='@') {
	1354	/* keep localeID */
	1355	} else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
	1356	++localeID; /* point after the '@' */
	1357	} else {
	1358	return 0;
	1359	}
	1360	while(!_isTerminator(*localeID)) {
	1361	if (needSeparator) {
	1362	if (i<variantCapacity) {
	1363	variant[i] = '_';
	1364	}
	1365	++i;
	1366	needSeparator = FALSE;
	1367	}
	1368	if(i<variantCapacity) {
	1369	variant[i]=(char)uprv_toupper(*localeID);
	1370	if(variant[i]=='-' \|\| variant[i]==',') {
	1371	variant[i]='_';
	1372	}
	1373	}
	1374	i++;
	1375	localeID++;
	1376	}
	1377	}
	1378
	1379	return i;
	1380	}
	1381
	1382	static int32_t
	1383	_getVariant(const char *localeID,
	1384	char prev,
	1385	char *variant, int32_t variantCapacity) {
	1386	return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE);
	1387	}
	1388
	1389	/* Keyword enumeration */
	1390
	1391	typedef struct UKeywordsContext {
	1392	char* keywords;
	1393	char* current;
	1394	} UKeywordsContext;
	1395
	1396	U_CDECL_BEGIN
	1397
	1398	static void U_CALLCONV
	1399	uloc_kw_closeKeywords(UEnumeration *enumerator) {
	1400	uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
	1401	uprv_free(enumerator->context);
	1402	uprv_free(enumerator);
	1403	}
	1404
	1405	static int32_t U_CALLCONV
	1406	uloc_kw_countKeywords(UEnumeration en, UErrorCode /status/) {
	1407	char kw = ((UKeywordsContext )en->context)->keywords;
	1408	int32_t result = 0;
	1409	while(*kw) {
	1410	result++;
	1411	kw += uprv_strlen(kw)+1;
	1412	}
	1413	return result;
	1414	}
	1415
	1416	static const char * U_CALLCONV
	1417	uloc_kw_nextKeyword(UEnumeration* en,
	1418	int32_t* resultLength,
	1419	UErrorCode* /status/) {
	1420	const char* result = ((UKeywordsContext *)en->context)->current;
	1421	int32_t len = 0;
	1422	if(*result) {
	1423	len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
	1424	((UKeywordsContext *)en->context)->current += len+1;
	1425	} else {
	1426	result = NULL;
	1427	}
	1428	if (resultLength) {
	1429	*resultLength = len;
	1430	}
	1431	return result;
	1432	}
	1433
	1434	static void U_CALLCONV
	1435	uloc_kw_resetKeywords(UEnumeration* en,
	1436	UErrorCode* /status/) {
	1437	((UKeywordsContext )en->context)->current = ((UKeywordsContext )en->context)->keywords;
	1438	}
	1439
	1440	U_CDECL_END
	1441
	1442
	1443	static const UEnumeration gKeywordsEnum = {
	1444	NULL,
	1445	NULL,
	1446	uloc_kw_closeKeywords,
	1447	uloc_kw_countKeywords,
	1448	uenum_unextDefault,
	1449	uloc_kw_nextKeyword,
	1450	uloc_kw_resetKeywords
	1451	};
	1452
	1453	U_CAPI UEnumeration* U_EXPORT2
	1454	uloc_openKeywordList(const char keywordList, int32_t keywordListSize, UErrorCode status)
	1455	{
	1456	LocalMemory<UKeywordsContext> myContext;
	1457	LocalMemory<UEnumeration> result;
	1458
	1459	if (U_FAILURE(*status)) {
	1460	return nullptr;
	1461	}
	1462	myContext.adoptInstead(static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext))));
	1463	result.adoptInstead(static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration))));
	1464	if (myContext.isNull() \|\| result.isNull()) {
	1465	*status = U_MEMORY_ALLOCATION_ERROR;
	1466	return nullptr;
	1467	}
	1468	uprv_memcpy(result.getAlias(), &gKeywordsEnum, sizeof(UEnumeration));
	1469	myContext->keywords = static_cast<char *>(uprv_malloc(keywordListSize+1));
	1470	if (myContext->keywords == nullptr) {
	1471	*status = U_MEMORY_ALLOCATION_ERROR;
	1472	return nullptr;
	1473	}
	1474	uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
	1475	myContext->keywords[keywordListSize] = 0;
	1476	myContext->current = myContext->keywords;
	1477	result->context = myContext.orphan();
	1478	return result.orphan();
	1479	}
	1480
	1481	U_CAPI UEnumeration* U_EXPORT2
	1482	uloc_openKeywords(const char* localeID,
	1483	UErrorCode* status)
	1484	{
	1485	int32_t i=0;
	1486	char keywords[256];
	1487	int32_t keywordsCapacity = 256;
	1488	char tempBuffer[ULOC_FULLNAME_CAPACITY];
	1489	const char* tmpLocaleID;
	1490
	1491	if(status==NULL \|\| U_FAILURE(*status)) {
	1492	return 0;
	1493	}
	1494
	1495	if (_hasBCP47Extension(localeID)) {
	1496	_ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
	1497	} else {
	1498	if (localeID==NULL) {
	1499	localeID=uloc_getDefault();
	1500	}
	1501	tmpLocaleID=localeID;
	1502	}
	1503
	1504	/* Skip the language */
	1505	ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
	1506	if(_isIDSeparator(*tmpLocaleID)) {
	1507	const char *scriptID;
	1508	/* Skip the script if available */
	1509	ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
	1510	if(scriptID != tmpLocaleID+1) {
	1511	/* Found optional script */
	1512	tmpLocaleID = scriptID;
	1513	}
	1514	/* Skip the Country */
	1515	if (_isIDSeparator(*tmpLocaleID)) {
	1516	ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &tmpLocaleID);
	1517	if(_isIDSeparator(*tmpLocaleID)) {
	1518	_getVariant(tmpLocaleID+1, *tmpLocaleID, NULL, 0);
	1519	}
	1520	}
	1521	}
	1522
	1523	/* keywords are located after '@' */
	1524	if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {
	1525	i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status);
	1526	}
	1527
	1528	if(i) {
	1529	return uloc_openKeywordList(keywords, i, status);
	1530	} else {
	1531	return NULL;
	1532	}
	1533	}
	1534
	1535
	1536	/* bit-flags for 'options' parameter of _canonicalize */
	1537	#define _ULOC_STRIP_KEYWORDS 0x2
	1538	#define _ULOC_CANONICALIZE 0x1
	1539
	1540	#define OPTION_SET(options, mask) ((options & mask) != 0)
	1541
	1542	static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
	1543	#define I_DEFAULT_LENGTH UPRV_LENGTHOF(i_default)
	1544
	1545	/**
	1546	* Canonicalize the given localeID, to level 1 or to level 2,
	1547	* depending on the options. To specify level 1, pass in options=0.
	1548	* To specify level 2, pass in options=_ULOC_CANONICALIZE.
	1549	*
	1550	* This is the code underlying uloc_getName and uloc_canonicalize.
	1551	*/
	1552	static int32_t
	1553	_canonicalize(const char* localeID,
	1554	char* result,
	1555	int32_t resultCapacity,
	1556	uint32_t options,
	1557	UErrorCode* err) {
	1558	int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity;
	1559	char localeBuffer[ULOC_FULLNAME_CAPACITY];
	1560	char tempBuffer[ULOC_FULLNAME_CAPACITY];
	1561	const char* origLocaleID;
	1562	const char* tmpLocaleID;
	1563	const char* keywordAssign = NULL;
	1564	const char* separatorIndicator = NULL;
	1565	char* name;
	1566	char* variant = NULL; /* pointer into name, or NULL */
	1567
	1568	if (U_FAILURE(*err)) {
	1569	return 0;
	1570	}
	1571
	1572	if (_hasBCP47Extension(localeID)) {
	1573	_ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
	1574	} else {
	1575	if (localeID==NULL) {
	1576	localeID=uloc_getDefault();
	1577	}
	1578	tmpLocaleID=localeID;
	1579	}
	1580
	1581	origLocaleID=tmpLocaleID;
	1582
	1583	/* if we are doing a full canonicalization, then put results in
	1584	localeBuffer, if necessary; otherwise send them to result. */
	1585	if (/OPTION_SET(options, _ULOC_CANONICALIZE) &&/
	1586	(result == NULL \|\| resultCapacity < (int32_t)sizeof(localeBuffer))) {
	1587	name = localeBuffer;
	1588	nameCapacity = (int32_t)sizeof(localeBuffer);
	1589	} else {
	1590	name = result;
	1591	nameCapacity = resultCapacity;
	1592	}
	1593
	1594	/* get all pieces, one after another, and separate with '_' */
	1595	len=ulocimp_getLanguage(tmpLocaleID, name, nameCapacity, &tmpLocaleID);
	1596
	1597	if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) {
	1598	const char *d = uloc_getDefault();
	1599
	1600	len = (int32_t)uprv_strlen(d);
	1601
	1602	if (name != NULL) {
	1603	uprv_memcpy(name, d, len);
	1604	}
	1605	} else if(_isIDSeparator(*tmpLocaleID)) {
	1606	const char *scriptID;
	1607
	1608	++fieldCount;
	1609	if(len<nameCapacity) {
	1610	name[len]='_';
	1611	}
	1612	++len;
	1613
	1614	scriptSize=ulocimp_getScript(tmpLocaleID+1,
	1615	(len<nameCapacity ? name+len : NULL), nameCapacity-len, &scriptID);
	1616	if(scriptSize > 0) {
	1617	/* Found optional script */
	1618	tmpLocaleID = scriptID;
	1619	++fieldCount;
	1620	len+=scriptSize;
	1621	if (_isIDSeparator(*tmpLocaleID)) {
	1622	/* If there is something else, then we add the _ */
	1623	if(len<nameCapacity) {
	1624	name[len]='_';
	1625	}
	1626	++len;
	1627	}
	1628	}
	1629
	1630	if (_isIDSeparator(*tmpLocaleID)) {
	1631	const char *cntryID;
	1632	int32_t cntrySize = ulocimp_getCountry(tmpLocaleID+1,
	1633	(len<nameCapacity ? name+len : NULL), nameCapacity-len, &cntryID);
	1634	if (cntrySize > 0) {
	1635	/* Found optional country */
	1636	tmpLocaleID = cntryID;
	1637	len+=cntrySize;
	1638	}
	1639	if(_isIDSeparator(*tmpLocaleID)) {
	1640	/* If there is something else, then we add the _ if we found country before. */
	1641	if (cntrySize >= 0 && ! _isIDSeparator(*(tmpLocaleID+1)) ) {
	1642	++fieldCount;
	1643	if(len<nameCapacity) {
	1644	name[len]='_';
	1645	}
	1646	++len;
	1647	}
	1648
	1649	variantSize = _getVariant(tmpLocaleID+1, *tmpLocaleID,
	1650	(len<nameCapacity ? name+len : NULL), nameCapacity-len);
	1651	if (variantSize > 0) {
	1652	variant = len<nameCapacity ? name+len : NULL;
	1653	len += variantSize;
	1654	tmpLocaleID += variantSize + 1; /* skip '_' and variant */
	1655	}
	1656	}
	1657	}
	1658	}
	1659
	1660	/* Copy POSIX-style charset specifier, if any [mr.utf8] */
	1661	if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
	1662	UBool done = FALSE;
	1663	do {
	1664	char c = *tmpLocaleID;
	1665	switch (c) {
	1666	case 0:
	1667	case '@':
	1668	done = TRUE;
	1669	break;
	1670	default:
	1671	if (len<nameCapacity) {
	1672	name[len] = c;
	1673	}
	1674	++len;
	1675	++tmpLocaleID;
	1676	break;
	1677	}
	1678	} while (!done);
	1679	}
	1680
	1681	/* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
	1682	After this, tmpLocaleID either points to '@' or is NULL */
	1683	if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {
	1684	keywordAssign = uprv_strchr(tmpLocaleID, '=');
	1685	separatorIndicator = uprv_strchr(tmpLocaleID, ';');
	1686	}
	1687
	1688	/* Copy POSIX-style variant, if any [mr@FOO] */
	1689	if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
	1690	tmpLocaleID != NULL && keywordAssign == NULL) {
	1691	for (;;) {
	1692	char c = *tmpLocaleID;
	1693	if (c == 0) {
	1694	break;
	1695	}
	1696	if (len<nameCapacity) {
	1697	name[len] = c;
	1698	}
	1699	++len;
	1700	++tmpLocaleID;
	1701	}
	1702	}
	1703
	1704	if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
	1705	/* Handle @FOO variant if @ is present and not followed by = */
	1706	if (tmpLocaleID!=NULL && keywordAssign==NULL) {
	1707	int32_t posixVariantSize;
	1708	/* Add missing '_' if needed */
	1709	if (fieldCount < 2 \|\| (fieldCount < 3 && scriptSize > 0)) {
	1710	do {
	1711	if(len<nameCapacity) {
	1712	name[len]='_';
	1713	}
	1714	++len;
	1715	++fieldCount;
	1716	} while(fieldCount<2);
	1717	}
	1718	posixVariantSize = _getVariantEx(tmpLocaleID+1, '@', name+len, nameCapacity-len,
	1719	(UBool)(variantSize > 0));
	1720	if (posixVariantSize > 0) {
	1721	if (variant == NULL) {
	1722	variant = name+len;
	1723	}
	1724	len += posixVariantSize;
	1725	variantSize += posixVariantSize;
	1726	}
	1727	}
	1728
	1729	/* Look up the ID in the canonicalization map */
	1730	for (j=0; j<UPRV_LENGTHOF(CANONICALIZE_MAP); j++) {
	1731	const char* id = CANONICALIZE_MAP[j].id;
	1732	int32_t n = (int32_t)uprv_strlen(id);
	1733	if (len == n && uprv_strncmp(name, id, n) == 0) {
	1734	if (n == 0 && tmpLocaleID != NULL) {
	1735	break; /* Don't remap "" if keywords present */
	1736	}
	1737	len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID);
	1738	break;
	1739	}
	1740	}
	1741	}
	1742
	1743	if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
	1744	if (tmpLocaleID!=NULL && keywordAssign!=NULL &&
	1745	(!separatorIndicator \|\| separatorIndicator > keywordAssign)) {
	1746	if(len<nameCapacity) {
	1747	name[len]='@';
	1748	}
	1749	++len;
	1750	++fieldCount;
	1751	len += _getKeywords(tmpLocaleID+1, '@', (len<nameCapacity ? name+len : NULL), nameCapacity-len,
	1752	NULL, 0, NULL, TRUE, err);
	1753	}
	1754	}
	1755
	1756	if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) {
	1757	uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);
	1758	}
	1759
	1760	return u_terminateChars(result, resultCapacity, len, err);
	1761	}
	1762
	1763	/* ### ID parsing API **************************************************/
	1764
	1765	U_CAPI int32_t U_EXPORT2
	1766	uloc_getParent(const char* localeID,
	1767	char* parent,
	1768	int32_t parentCapacity,
	1769	UErrorCode* err)
	1770	{
	1771	const char *lastUnderscore;
	1772	int32_t i;
	1773
	1774	if (U_FAILURE(*err))
	1775	return 0;
	1776
	1777	if (localeID == NULL)
	1778	localeID = uloc_getDefault();
	1779
	1780	lastUnderscore=uprv_strrchr(localeID, '_');
	1781	if(lastUnderscore!=NULL) {
	1782	i=(int32_t)(lastUnderscore-localeID);
	1783	} else {
	1784	i=0;
	1785	}
	1786
	1787	if(i>0 && parent != localeID) {
	1788	uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
	1789	}
	1790
	1791	return u_terminateChars(parent, parentCapacity, i, err);
	1792	}
	1793
	1794	U_CAPI int32_t U_EXPORT2
	1795	uloc_getLanguage(const char* localeID,
	1796	char* language,
	1797	int32_t languageCapacity,
	1798	UErrorCode* err)
	1799	{
	1800	/* uloc_getLanguage will return a 2 character iso-639 code if one exists. CWB/
	1801	int32_t i=0;
	1802
	1803	if (err==NULL \|\| U_FAILURE(*err)) {
	1804	return 0;
	1805	}
	1806
	1807	if(localeID==NULL) {
	1808	localeID=uloc_getDefault();
	1809	}
	1810
	1811	i=ulocimp_getLanguage(localeID, language, languageCapacity, NULL);
	1812	return u_terminateChars(language, languageCapacity, i, err);
	1813	}
	1814
	1815	U_CAPI int32_t U_EXPORT2
	1816	uloc_getScript(const char* localeID,
	1817	char* script,
	1818	int32_t scriptCapacity,
	1819	UErrorCode* err)
	1820	{
	1821	int32_t i=0;
	1822
	1823	if(err==NULL \|\| U_FAILURE(*err)) {
	1824	return 0;
	1825	}
	1826
	1827	if(localeID==NULL) {
	1828	localeID=uloc_getDefault();
	1829	}
	1830
	1831	/* skip the language */
	1832	ulocimp_getLanguage(localeID, NULL, 0, &localeID);
	1833	if(_isIDSeparator(*localeID)) {
	1834	i=ulocimp_getScript(localeID+1, script, scriptCapacity, NULL);
	1835	}
	1836	return u_terminateChars(script, scriptCapacity, i, err);
	1837	}
	1838
	1839	U_CAPI int32_t U_EXPORT2
	1840	uloc_getCountry(const char* localeID,
	1841	char* country,
	1842	int32_t countryCapacity,
	1843	UErrorCode* err)
	1844	{
	1845	int32_t i=0;
	1846
	1847	if(err==NULL \|\| U_FAILURE(*err)) {
	1848	return 0;
	1849	}
	1850
	1851	if(localeID==NULL) {
	1852	localeID=uloc_getDefault();
	1853	}
	1854
	1855	/* Skip the language */
	1856	ulocimp_getLanguage(localeID, NULL, 0, &localeID);
	1857	if(_isIDSeparator(*localeID)) {
	1858	const char *scriptID;
	1859	/* Skip the script if available */
	1860	ulocimp_getScript(localeID+1, NULL, 0, &scriptID);
	1861	if(scriptID != localeID+1) {
	1862	/* Found optional script */
	1863	localeID = scriptID;
	1864	}
	1865	if(_isIDSeparator(*localeID)) {
	1866	i=ulocimp_getCountry(localeID+1, country, countryCapacity, NULL);
	1867	}
	1868	}
	1869	return u_terminateChars(country, countryCapacity, i, err);
	1870	}
	1871
	1872	U_CAPI int32_t U_EXPORT2
	1873	uloc_getVariant(const char* localeID,
	1874	char* variant,
	1875	int32_t variantCapacity,
	1876	UErrorCode* err)
	1877	{
	1878	char tempBuffer[ULOC_FULLNAME_CAPACITY];
	1879	const char* tmpLocaleID;
	1880	int32_t i=0;
	1881
	1882	if(err==NULL \|\| U_FAILURE(*err)) {
	1883	return 0;
	1884	}
	1885
	1886	if (_hasBCP47Extension(localeID)) {
	1887	_ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
	1888	} else {
	1889	if (localeID==NULL) {
	1890	localeID=uloc_getDefault();
	1891	}
	1892	tmpLocaleID=localeID;
	1893	}
	1894
	1895	/* Skip the language */
	1896	ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
	1897	if(_isIDSeparator(*tmpLocaleID)) {
	1898	const char *scriptID;
	1899	/* Skip the script if available */
	1900	ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
	1901	if(scriptID != tmpLocaleID+1) {
	1902	/* Found optional script */
	1903	tmpLocaleID = scriptID;
	1904	}
	1905	/* Skip the Country */
	1906	if (_isIDSeparator(*tmpLocaleID)) {
	1907	const char *cntryID;
	1908	ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &cntryID);
	1909	if (cntryID != tmpLocaleID+1) {
	1910	/* Found optional country */
	1911	tmpLocaleID = cntryID;
	1912	}
	1913	if(_isIDSeparator(*tmpLocaleID)) {
	1914	/* If there was no country ID, skip a possible extra IDSeparator */
	1915	if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {
	1916	tmpLocaleID++;
	1917	}
	1918	i=_getVariant(tmpLocaleID+1, *tmpLocaleID, variant, variantCapacity);
	1919	}
	1920	}
	1921	}
	1922
	1923	/* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
	1924	/* if we do not have a variant tag yet then try a POSIX variant after '@' */
	1925	/*
	1926	if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
	1927	i=_getVariant(localeID+1, '@', variant, variantCapacity);
	1928	}
	1929	*/
	1930	return u_terminateChars(variant, variantCapacity, i, err);
	1931	}
	1932
	1933	U_CAPI int32_t U_EXPORT2
	1934	uloc_getName(const char* localeID,
	1935	char* name,
	1936	int32_t nameCapacity,
	1937	UErrorCode* err)
	1938	{
	1939	return _canonicalize(localeID, name, nameCapacity, 0, err);
	1940	}
	1941
	1942	U_CAPI int32_t U_EXPORT2
	1943	uloc_getBaseName(const char* localeID,
	1944	char* name,
	1945	int32_t nameCapacity,
	1946	UErrorCode* err)
	1947	{
	1948	return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err);
	1949	}
	1950
	1951	U_CAPI int32_t U_EXPORT2
	1952	uloc_canonicalize(const char* localeID,
	1953	char* name,
	1954	int32_t nameCapacity,
	1955	UErrorCode* err)
	1956	{
	1957	return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err);
	1958	}
	1959
	1960	U_CAPI const char* U_EXPORT2
	1961	uloc_getISO3Language(const char* localeID)
	1962	{
	1963	int16_t offset;
	1964	char lang[ULOC_LANG_CAPACITY];
	1965	UErrorCode err = U_ZERO_ERROR;
	1966
	1967	if (localeID == NULL)
	1968	{
	1969	localeID = uloc_getDefault();
	1970	}
	1971	uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
	1972	if (U_FAILURE(err))
	1973	return "";
	1974	offset = _findIndex(LANGUAGES, lang);
	1975	if (offset < 0)
	1976	return "";
	1977	return LANGUAGES_3[offset];
	1978	}
	1979
	1980	U_CAPI const char* U_EXPORT2
	1981	uloc_getISO3Country(const char* localeID)
	1982	{
	1983	int16_t offset;
	1984	char cntry[ULOC_LANG_CAPACITY];
	1985	UErrorCode err = U_ZERO_ERROR;
	1986
	1987	if (localeID == NULL)
	1988	{
	1989	localeID = uloc_getDefault();
	1990	}
	1991	uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
	1992	if (U_FAILURE(err))
	1993	return "";
	1994	offset = _findIndex(COUNTRIES, cntry);
	1995	if (offset < 0)
	1996	return "";
	1997
	1998	return COUNTRIES_3[offset];
	1999	}
	2000
	2001	U_CAPI uint32_t U_EXPORT2
	2002	uloc_getLCID(const char* localeID)
	2003	{
	2004	UErrorCode status = U_ZERO_ERROR;
	2005	char langID[ULOC_FULLNAME_CAPACITY];
	2006	uint32_t lcid = 0;
	2007
	2008	/* Check for incomplete id. */
	2009	if (!localeID \|\| uprv_strlen(localeID) < 2) {
	2010	return 0;
	2011	}
	2012
	2013	// First, attempt Windows platform lookup if available, but fall
	2014	// through to catch any special cases (ICU vs Windows name differences).
	2015	lcid = uprv_convertToLCIDPlatform(localeID, &status);
	2016	if (U_FAILURE(status)) {
	2017	return 0;
	2018	}
	2019	if (lcid > 0) {
	2020	// Windows found an LCID, return that
	2021	return lcid;
	2022	}
	2023
	2024	uloc_getLanguage(localeID, langID, sizeof(langID), &status);
	2025	if (U_FAILURE(status) \|\| status == U_STRING_NOT_TERMINATED_WARNING) {
	2026	return 0;
	2027	}
	2028
	2029	if (uprv_strchr(localeID, '@')) {
	2030	// uprv_convertToLCID does not support keywords other than collation.
	2031	// Remove all keywords except collation.
	2032	int32_t len;
	2033	char collVal[ULOC_KEYWORDS_CAPACITY];
	2034	char tmpLocaleID[ULOC_FULLNAME_CAPACITY];
	2035
	2036	len = uloc_getKeywordValue(localeID, "collation", collVal,
	2037	UPRV_LENGTHOF(collVal) - 1, &status);
	2038
	2039	if (U_SUCCESS(status) && len > 0) {
	2040	collVal[len] = 0;
	2041
	2042	len = uloc_getBaseName(localeID, tmpLocaleID,
	2043	UPRV_LENGTHOF(tmpLocaleID) - 1, &status);
	2044
	2045	if (U_SUCCESS(status) && len > 0) {
	2046	tmpLocaleID[len] = 0;
	2047
	2048	len = uloc_setKeywordValue("collation", collVal, tmpLocaleID,
	2049	UPRV_LENGTHOF(tmpLocaleID) - len - 1, &status);
	2050
	2051	if (U_SUCCESS(status) && len > 0) {
	2052	tmpLocaleID[len] = 0;
	2053	return uprv_convertToLCID(langID, tmpLocaleID, &status);
	2054	}
	2055	}
	2056	}
	2057
	2058	// fall through - all keywords are simply ignored
	2059	status = U_ZERO_ERROR;
	2060	}
	2061
	2062	return uprv_convertToLCID(langID, localeID, &status);
	2063	}
	2064
	2065	U_CAPI int32_t U_EXPORT2
	2066	uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
	2067	UErrorCode *status)
	2068	{
	2069	return uprv_convertToPosix(hostid, locale, localeCapacity, status);
	2070	}
	2071
	2072	/* ### Default locale **************************************************/
	2073
	2074	U_CAPI const char* U_EXPORT2
	2075	uloc_getDefault()
	2076	{
	2077	return locale_get_default();
	2078	}
	2079
	2080	U_CAPI void U_EXPORT2
	2081	uloc_setDefault(const char* newDefaultLocale,
	2082	UErrorCode* err)
	2083	{
	2084	if (U_FAILURE(*err))
	2085	return;
	2086	/* the error code isn't currently used for anything by this function*/
	2087
	2088	/* propagate change to C++ */
	2089	locale_set_default(newDefaultLocale);
	2090	}
	2091
	2092	/**
	2093	* Returns a list of all 2-letter language codes defined in ISO 639. This is a pointer
	2094	* to an array of pointers to arrays of char. All of these pointers are owned
	2095	* by ICU-- do not delete them, and do not write through them. The array is
	2096	* terminated with a null pointer.
	2097	*/
	2098	U_CAPI const char* const* U_EXPORT2
	2099	uloc_getISOLanguages()
	2100	{
	2101	return LANGUAGES;
	2102	}
	2103
	2104	/**
	2105	* Returns a list of all 2-letter country codes defined in ISO 639. This is a
	2106	* pointer to an array of pointers to arrays of char. All of these pointers are
	2107	* owned by ICU-- do not delete them, and do not write through them. The array is
	2108	* terminated with a null pointer.
	2109	*/
	2110	U_CAPI const char* const* U_EXPORT2
	2111	uloc_getISOCountries()
	2112	{
	2113	return COUNTRIES;
	2114	}
	2115
	2116
	2117	/* this function to be moved into cstring.c later */
	2118	static char gDecimal = 0;
	2119
	2120	static /* U_CAPI */
	2121	double
	2122	/* U_EXPORT2 */
	2123	_uloc_strtod(const char start, char *end) {
	2124	char *decimal;
	2125	char *myEnd;
	2126	char buf[30];
	2127	double rv;
	2128	if (!gDecimal) {
	2129	char rep[5];
	2130	/* For machines that decide to change the decimal on you,
	2131	and try to be too smart with localization.
	2132	This normally should be just a '.'. */
	2133	sprintf(rep, "%+1.1f", 1.0);
	2134	gDecimal = rep[2];
	2135	}
	2136
	2137	if(gDecimal == '.') {
	2138	return uprv_strtod(start, end); /* fall through to OS */
	2139	} else {
	2140	uprv_strncpy(buf, start, 29);
	2141	buf[29]=0;
	2142	decimal = uprv_strchr(buf, '.');
	2143	if(decimal) {
	2144	*decimal = gDecimal;
	2145	} else {
	2146	return uprv_strtod(start, end); /* no decimal point */
	2147	}
	2148	rv = uprv_strtod(buf, &myEnd);
	2149	if(end) {
	2150	end = (char)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */
	2151	}
	2152	return rv;
	2153	}
	2154	}
	2155
	2156	typedef struct {
	2157	float q;
	2158	int32_t dummy; /* to avoid uninitialized memory copy from qsort */
	2159	char locale[ULOC_FULLNAME_CAPACITY+1];
	2160	} _acceptLangItem;
	2161
	2162	static int32_t U_CALLCONV
	2163	uloc_acceptLanguageCompare(const void * /context/, const void a, const void b)
	2164	{
	2165	const _acceptLangItem aa = (const _acceptLangItem)a;
	2166	const _acceptLangItem bb = (const _acceptLangItem)b;
	2167
	2168	int32_t rc = 0;
	2169	if(bb->q < aa->q) {
	2170	rc = -1; /* A > B */
	2171	} else if(bb->q > aa->q) {
	2172	rc = 1; /* A < B */
	2173	} else {
	2174	rc = 0; /* A = B */
	2175	}
	2176
	2177	if(rc==0) {
	2178	rc = uprv_stricmp(aa->locale, bb->locale);
	2179	}
	2180
	2181	#if defined(ULOC_DEBUG)
	2182	/* fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
	2183	aa->locale, aa->q,
	2184	bb->locale, bb->q,
	2185	rc);*/
	2186	#endif
	2187
	2188	return rc;
	2189	}
	2190
	2191	/*
	2192	mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
	2193	*/
	2194
	2195	U_CAPI int32_t U_EXPORT2
	2196	uloc_acceptLanguageFromHTTP(char result, int32_t resultAvailable, UAcceptResult outResult,
	2197	const char *httpAcceptLanguage,
	2198	UEnumeration* availableLocales,
	2199	UErrorCode *status)
	2200	{
	2201	MaybeStackArray<_acceptLangItem, 4> items; // Struct for collecting items.
	2202	char tmp[ULOC_FULLNAME_CAPACITY +1];
	2203	int32_t n = 0;
	2204	const char *itemEnd;
	2205	const char *paramEnd;
	2206	const char *s;
	2207	const char *t;
	2208	int32_t res;
	2209	int32_t i;
	2210	int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);
	2211
	2212	if(U_FAILURE(*status)) {
	2213	return -1;
	2214	}
	2215
	2216	for(s=httpAcceptLanguage;s&&*s;) {
	2217	while(isspace(s)) / eat space at the beginning */
	2218	s++;
	2219	itemEnd=uprv_strchr(s,',');
	2220	paramEnd=uprv_strchr(s,';');
	2221	if(!itemEnd) {
	2222	itemEnd = httpAcceptLanguage+l; /* end of string */
	2223	}
	2224	if(paramEnd && paramEnd<itemEnd) {
	2225	/* semicolon (;) is closer than end (,) */
	2226	t = paramEnd+1;
	2227	if(*t=='q') {
	2228	t++;
	2229	}
	2230	while(isspace(*t)) {
	2231	t++;
	2232	}
	2233	if(*t=='=') {
	2234	t++;
	2235	}
	2236	while(isspace(*t)) {
	2237	t++;
	2238	}
	2239	items[n].q = (float)_uloc_strtod(t,NULL);
	2240	} else {
	2241	/* no semicolon - it's 1.0 */
	2242	items[n].q = 1.0f;
	2243	paramEnd = itemEnd;
	2244	}
	2245	items[n].dummy=0;
	2246	/* eat spaces prior to semi */
	2247	for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)
	2248	;
	2249	int32_t slen = static_cast<int32_t>(((t+1)-s));
	2250	if(slen > ULOC_FULLNAME_CAPACITY) {
	2251	*status = U_BUFFER_OVERFLOW_ERROR;
	2252	return -1; // too big
	2253	}
	2254	uprv_strncpy(items[n].locale, s, slen);
	2255	items[n].locale[slen]=0; // terminate
	2256	int32_t clen = uloc_canonicalize(items[n].locale, tmp, UPRV_LENGTHOF(tmp)-1, status);
	2257	if(U_FAILURE(*status)) return -1;
	2258	if((clen!=slen) \|\| (uprv_strncmp(items[n].locale, tmp, slen))) {
	2259	// canonicalization had an effect- copy back
	2260	uprv_strncpy(items[n].locale, tmp, clen);
	2261	items[n].locale[clen] = 0; // terminate
	2262	}
	2263	#if defined(ULOC_DEBUG)
	2264	/fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);/
	2265	#endif
	2266	n++;
	2267	s = itemEnd;
	2268	while(s==',') { / eat duplicate commas */
	2269	s++;
	2270	}
	2271	if(n>=items.getCapacity()) { // If we need more items
	2272	if(NULL == items.resize(items.getCapacity()*2, items.getCapacity())) {
	2273	*status = U_MEMORY_ALLOCATION_ERROR;
	2274	return -1;
	2275	}
	2276	#if defined(ULOC_DEBUG)
	2277	fprintf(stderr,"malloced at size %d\n", items.getCapacity());
	2278	#endif
	2279	}
	2280	}
	2281	uprv_sortArray(items.getAlias(), n, sizeof(items[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
	2282	if (U_FAILURE(*status)) {
	2283	return -1;
	2284	}
	2285	LocalMemory<const char*> strs(NULL);
	2286	if (strs.allocateInsteadAndReset(n) == NULL) {
	2287	*status = U_MEMORY_ALLOCATION_ERROR;
	2288	return -1;
	2289	}
	2290	for(i=0;i<n;i++) {
	2291	#if defined(ULOC_DEBUG)
	2292	/fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);/
	2293	#endif
	2294	strs[i]=items[i].locale;
	2295	}
	2296	res = uloc_acceptLanguage(result, resultAvailable, outResult,
	2297	strs.getAlias(), n, availableLocales, status);
	2298	return res;
	2299	}
	2300
	2301
	2302	U_CAPI int32_t U_EXPORT2
	2303	uloc_acceptLanguage(char *result, int32_t resultAvailable,
	2304	UAcceptResult outResult, const char *acceptList,
	2305	int32_t acceptListCount,
	2306	UEnumeration* availableLocales,
	2307	UErrorCode *status)
	2308	{
	2309	int32_t i,j;
	2310	int32_t len;
	2311	int32_t maxLen=0;
	2312	char tmp[ULOC_FULLNAME_CAPACITY+1];
	2313	const char *l;
	2314	char **fallbackList;
	2315	if(U_FAILURE(*status)) {
	2316	return -1;
	2317	}
	2318	fallbackList = static_cast<char *>(uprv_malloc((size_t)(sizeof(fallbackList[0])acceptListCount)));
	2319	if(fallbackList==NULL) {
	2320	*status = U_MEMORY_ALLOCATION_ERROR;
	2321	return -1;
	2322	}
	2323	for(i=0;i<acceptListCount;i++) {
	2324	#if defined(ULOC_DEBUG)
	2325	fprintf(stderr,"%02d: %s\n", i, acceptList[i]);
	2326	#endif
	2327	while((l=uenum_next(availableLocales, NULL, status)) != NULL) {
	2328	#if defined(ULOC_DEBUG)
	2329	fprintf(stderr," %s\n", l);
	2330	#endif
	2331	len = (int32_t)uprv_strlen(l);
	2332	if(!uprv_strcmp(acceptList[i], l)) {
	2333	if(outResult) {
	2334	*outResult = ULOC_ACCEPT_VALID;
	2335	}
	2336	#if defined(ULOC_DEBUG)
	2337	fprintf(stderr, "MATCH! %s\n", l);
	2338	#endif
	2339	if(len>0) {
	2340	uprv_strncpy(result, l, uprv_min(len, resultAvailable));
	2341	}
	2342	for(j=0;j<i;j++) {
	2343	uprv_free(fallbackList[j]);
	2344	}
	2345	uprv_free(fallbackList);
	2346	return u_terminateChars(result, resultAvailable, len, status);
	2347	}
	2348	if(len>maxLen) {
	2349	maxLen = len;
	2350	}
	2351	}
	2352	uenum_reset(availableLocales, status);
	2353	/* save off parent info */
	2354	if(uloc_getParent(acceptList[i], tmp, UPRV_LENGTHOF(tmp), status)!=0) {
	2355	fallbackList[i] = uprv_strdup(tmp);
	2356	} else {
	2357	fallbackList[i]=0;
	2358	}
	2359	}
	2360
	2361	for(maxLen--;maxLen>0;maxLen--) {
	2362	for(i=0;i<acceptListCount;i++) {
	2363	if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) {
	2364	#if defined(ULOC_DEBUG)
	2365	fprintf(stderr,"Try: [%s]", fallbackList[i]);
	2366	#endif
	2367	while((l=uenum_next(availableLocales, NULL, status)) != NULL) {
	2368	#if defined(ULOC_DEBUG)
	2369	fprintf(stderr," %s\n", l);
	2370	#endif
	2371	len = (int32_t)uprv_strlen(l);
	2372	if(!uprv_strcmp(fallbackList[i], l)) {
	2373	if(outResult) {
	2374	*outResult = ULOC_ACCEPT_FALLBACK;
	2375	}
	2376	#if defined(ULOC_DEBUG)
	2377	fprintf(stderr, "fallback MATCH! %s\n", l);
	2378	#endif
	2379	if(len>0) {
	2380	uprv_strncpy(result, l, uprv_min(len, resultAvailable));
	2381	}
	2382	for(j=0;j<acceptListCount;j++) {
	2383	uprv_free(fallbackList[j]);
	2384	}
	2385	uprv_free(fallbackList);
	2386	return u_terminateChars(result, resultAvailable, len, status);
	2387	}
	2388	}
	2389	uenum_reset(availableLocales, status);
	2390
	2391	if(uloc_getParent(fallbackList[i], tmp, UPRV_LENGTHOF(tmp), status)!=0) {
	2392	uprv_free(fallbackList[i]);
	2393	fallbackList[i] = uprv_strdup(tmp);
	2394	} else {
	2395	uprv_free(fallbackList[i]);
	2396	fallbackList[i]=0;
	2397	}
	2398	}
	2399	}
	2400	if(outResult) {
	2401	*outResult = ULOC_ACCEPT_FAILED;
	2402	}
	2403	}
	2404	for(i=0;i<acceptListCount;i++) {
	2405	uprv_free(fallbackList[i]);
	2406	}
	2407	uprv_free(fallbackList);
	2408	return -1;
	2409	}
	2410
	2411	U_CAPI const char* U_EXPORT2
	2412	uloc_toUnicodeLocaleKey(const char* keyword)
	2413	{
	2414	const char* bcpKey = ulocimp_toBcpKey(keyword);
	2415	if (bcpKey == NULL && ultag_isUnicodeLocaleKey(keyword, -1)) {
	2416	// unknown keyword, but syntax is fine..
	2417	return keyword;
	2418	}
	2419	return bcpKey;
	2420	}
	2421
	2422	U_CAPI const char* U_EXPORT2
	2423	uloc_toUnicodeLocaleType(const char* keyword, const char* value)
	2424	{
	2425	const char* bcpType = ulocimp_toBcpType(keyword, value, NULL, NULL);
	2426	if (bcpType == NULL && ultag_isUnicodeLocaleType(value, -1)) {
	2427	// unknown keyword, but syntax is fine..
	2428	return value;
	2429	}
	2430	return bcpType;
	2431	}
	2432
	2433	static UBool
	2434	isWellFormedLegacyKey(const char* legacyKey)
	2435	{
	2436	const char* p = legacyKey;
	2437	while (*p) {
	2438	if (!UPRV_ISALPHANUM(*p)) {
	2439	return FALSE;
	2440	}
	2441	p++;
	2442	}
	2443	return TRUE;
	2444	}
	2445
	2446	static UBool
	2447	isWellFormedLegacyType(const char* legacyType)
	2448	{
	2449	const char* p = legacyType;
	2450	int32_t alphaNumLen = 0;
	2451	while (*p) {
	2452	if (p == '_' \|\| p == '/' \|\| *p == '-') {
	2453	if (alphaNumLen == 0) {
	2454	return FALSE;
	2455	}
	2456	alphaNumLen = 0;
	2457	} else if (UPRV_ISALPHANUM(*p)) {
	2458	alphaNumLen++;
	2459	} else {
	2460	return FALSE;
	2461	}
	2462	p++;
	2463	}
	2464	return (alphaNumLen != 0);
	2465	}
	2466
	2467	U_CAPI const char* U_EXPORT2
	2468	uloc_toLegacyKey(const char* keyword)
	2469	{
	2470	const char* legacyKey = ulocimp_toLegacyKey(keyword);
	2471	if (legacyKey == NULL) {
	2472	// Checks if the specified locale key is well-formed with the legacy locale syntax.
	2473	//
	2474	// Note:
	2475	// LDML/CLDR provides some definition of keyword syntax in
	2476	// * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
	2477	// * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
	2478	// Keys can only consist of [0-9a-zA-Z].
	2479	if (isWellFormedLegacyKey(keyword)) {
	2480	return keyword;
	2481	}
	2482	}
	2483	return legacyKey;
	2484	}
	2485
	2486	U_CAPI const char* U_EXPORT2
	2487	uloc_toLegacyType(const char* keyword, const char* value)
	2488	{
	2489	const char* legacyType = ulocimp_toLegacyType(keyword, value, NULL, NULL);
	2490	if (legacyType == NULL) {
	2491	// Checks if the specified locale type is well-formed with the legacy locale syntax.
	2492	//
	2493	// Note:
	2494	// LDML/CLDR provides some definition of keyword syntax in
	2495	// * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
	2496	// * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
	2497	// Values (types) can only consist of [0-9a-zA-Z], plus for legacy values
	2498	// we allow [/_-+] in the middle (e.g. "Etc/GMT+1", "Asia/Tel_Aviv")
	2499	if (isWellFormedLegacyType(value)) {
	2500	return value;
	2501	}
	2502	}
	2503	return legacyType;
	2504	}
	2505
	2506	/eof/