git.saurik.com Git - apple/icu.git/blame_incremental

... / ...

Commit	Line	Data
	1	// © 2016 and later: Unicode, Inc. and others.
	2	// License & terms of use: http://www.unicode.org/copyright.html
	3	/*
	4	**********************************************************************
	5	* Copyright (C) 1997-2016, International Business Machines
	6	* Corporation and others. All Rights Reserved.
	7	**********************************************************************
	8	*
	9	* File ULOC.CPP
	10	*
	11	* Modification History:
	12	*
	13	* Date Name Description
	14	* 04/01/97 aliu Creation.
	15	* 08/21/98 stephen JDK 1.2 sync
	16	* 12/08/98 rtg New Locale implementation and C API
	17	* 03/15/99 damiba overhaul.
	18	* 04/06/99 stephen changed setDefault() to realloc and copy
	19	* 06/14/99 stephen Changed calls to ures_open for new params
	20	* 07/21/99 stephen Modified setDefault() to propagate to C++
	21	* 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs,
	22	* brought canonicalization code into line with spec
	23	*****************************************************************************/
	24
	25	/*
	26	POSIX's locale format, from putil.c: [no spaces]
	27
	28	ll [ _CC ] [ . MM ] [ @ VV]
	29
	30	l = lang, C = ctry, M = charmap, V = variant
	31	*/
	32
	33	#include "unicode/utypes.h"
	34	#include "unicode/ustring.h"
	35	#include "unicode/uloc.h"
	36
	37	#include "putilimp.h"
	38	#include "ustr_imp.h"
	39	#include "ulocimp.h"
	40	#include "umutex.h"
	41	#include "cstring.h"
	42	#include "cmemory.h"
	43	#include "locmap.h"
	44	#include "uarrsort.h"
	45	#include "uenumimp.h"
	46	#include "uassert.h"
	47	#include "charstr.h"
	48
	49	#include <stdio.h> /* for sprintf */
	50
	51	U_NAMESPACE_USE
	52
	53	/* ### Declarations **************************************************/
	54
	55	/* Locale stuff from locid.cpp */
	56	U_CFUNC void locale_set_default(const char *id);
	57	U_CFUNC const char *locale_get_default(void);
	58	U_CFUNC int32_t
	59	locale_getKeywords(const char *localeID,
	60	char prev,
	61	char *keywords, int32_t keywordCapacity,
	62	char values, int32_t valuesCapacity, int32_t valLen,
	63	UBool valuesToo,
	64	UErrorCode *status);
	65
	66	/* ### Data tables **************************************************/
	67
	68	/**
	69	* Table of language codes, both 2- and 3-letter, with preference
	70	* given to 2-letter codes where possible. Includes 3-letter codes
	71	* that lack a 2-letter equivalent.
	72	*
	73	* This list must be in sorted order. This list is returned directly
	74	* to the user by some API.
	75	*
	76	* This list must be kept in sync with LANGUAGES_3, with corresponding
	77	* entries matched.
	78	*
	79	* This table should be terminated with a NULL entry, followed by a
	80	* second list, and another NULL entry. The first list is visible to
	81	* user code when this array is returned by API. The second list
	82	* contains codes we support, but do not expose through user API.
	83	*
	84	* Notes
	85	*
	86	* Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
	87	* include the revisions up to 2001/7/27 CWB
	88	*
	89	* The 3 character codes are the terminology codes like RFC 3066. This
	90	* is compatible with prior ICU codes
	91	*
	92	* "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
	93	* table but now at the end of the table because 3 character codes are
	94	* duplicates. This avoids bad searches going from 3 to 2 character
	95	* codes.
	96	*
	97	* The range qaa-qtz is reserved for local use
	98	*/
	99	/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
	100	/* ISO639 table version is 20150505 */
	101	/* Subsequent hand addition of selected languages */
	102	static const char * const LANGUAGES[] = {
	103	"aa", "ab", "ace", "ach", "ada", "ady", "ae", "aeb",
	104	"af", "afh", "agq", "ain", "ak", "akk", "akz", "ale",
	105	"aln", "alt", "am", "an", "ang", "anp", "ar", "arc",
	106	"arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "as",
	107	"asa", "ase", "ast", "av", "avk", "awa", "ay", "az",
	108	"ba", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
	109	"be", "bej", "bem", "bew", "bez", "bfd", "bfq", "bg",
	110	"bgn", "bho", "bi", "bik", "bin", "bjn", "bkm", "bla",
	111	"bm", "bn", "bo", "bpy", "bqi", "br", "bra", "brh",
	112	"brx", "bs", "bss", "bua", "bug", "bum", "byn", "byv",
	113	"ca", "cad", "car", "cay", "cch", "ccp", "ce", "ceb", "cgg",
	114	"ch", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
	115	"chr", "chy", "ckb", "co", "cop", "cps", "cr", "crh",
	116	"cs", "csb", "cu", "cv", "cy",
	117	"da", "dak", "dar", "dav", "de", "del", "den", "dgr",
	118	"din", "dje", "doi", "dsb", "dtp", "dua", "dum", "dv",
	119	"dyo", "dyu", "dz", "dzg",
	120	"ebu", "ee", "efi", "egl", "egy", "eka", "el", "elx",
	121	"en", "enm", "eo", "es", "esu", "et", "eu", "ewo",
	122	"ext",
	123	"fa", "fan", "fat", "ff", "fi", "fil", "fit", "fj",
	124	"fo", "fon", "fr", "frc", "frm", "fro", "frp", "frr",
	125	"frs", "fur", "fy",
	126	"ga", "gaa", "gag", "gan", "gay", "gba", "gbz", "gd",
	127	"gez", "gil", "gl", "glk", "gmh", "gn", "goh", "gom",
	128	"gon", "gor", "got", "grb", "grc", "gsw", "gu", "guc",
	129	"gur", "guz", "gv", "gwi",
	130	"ha", "hai", "hak", "haw", "he", "hi", "hif", "hil",
	131	"hit", "hmn", "ho", "hr", "hsb", "hsn", "ht", "hu",
	132	"hup", "hy", "hz",
	133	"ia", "iba", "ibb", "id", "ie", "ig", "ii", "ik",
	134	"ilo", "inh", "io", "is", "it", "iu", "izh",
	135	"ja", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
	136	"jv",
	137	"ka", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
	138	"kbl", "kcg", "kde", "kea", "ken", "kfo", "kg", "kgp",
	139	"kha", "kho", "khq", "khw", "ki", "kiu", "kj", "kk",
	140	"kkj", "kl", "kln", "km", "kmb", "kn", "ko", "koi",
	141	"kok", "kos", "kpe", "kr", "krc", "kri", "krj", "krl",
	142	"kru", "ks", "ksb", "ksf", "ksh", "ku", "kum", "kut",
	143	"kv", "kw", "ky",
	144	"la", "lad", "lag", "lah", "lam", "lb", "lez", "lfn",
	145	"lg", "li", "lij", "liv", "lkt", "lmo", "ln", "lo",
	146	"lol", "loz", "lrc", "lt", "ltg", "lu", "lua", "lui",
	147	"lun", "luo", "lus", "luy", "lv", "lzh", "lzz",
	148	"mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
	149	"mdf", "mdh", "mdr", "men", "mer", "mfe", "mg", "mga",
	150	"mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk",
	151	"ml", "mn", "mnc", "mni", "moh", "mos", "mr", "mrj",
	152	"ms", "mt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
	153	"my", "mye", "myv", "mzn",
	154	"na", "nan", "nap", "naq", "nb", "nd", "nds", "ne",
	155	"new", "ng", "nia", "niu", "njo", "nl", "nmg", "nn",
	156	"nnh", "no", "nog", "non", "nov", "nqo", "nr", "nso",
	157	"nus", "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi",
	158	"oc", "oj", "om", "or", "os", "osa", "ota",
	159	"pa", "pag", "pal", "pam", "pap", "pau", "pcd", "pdc",
	160	"pdt", "peo", "pfl", "phn", "pi", "pl", "pms", "pnt",
	161	"pon", "prg", "pro", "ps", "pt",
	162	"qu", "quc", "qug",
	163	"raj", "rap", "rar", "rgn", "rif", "rm", "rn", "ro",
	164	"rof", "rom", "rtm", "ru", "rue", "rug", "rup",
	165	"rw", "rwk",
	166	"sa", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
	167	"sba", "sbp", "sc", "scn", "sco", "sd", "sdc", "sdh",
	168	"se", "see", "seh", "sei", "sel", "ses", "sg", "sga",
	169	"sgs", "shi", "shn", "shu", "si", "sid", "sk",
	170	"sl", "sli", "sly", "sm", "sma", "smj", "smn", "sms",
	171	"sn", "snk", "so", "sog", "sq", "sr", "srn", "srr",
	172	"ss", "ssy", "st", "stq", "su", "suk", "sus", "sux",
	173	"sv", "sw", "swb", "swc", "syc", "syr", "szl",
	174	"ta", "tcy", "te", "tem", "teo", "ter", "tet", "tg",
	175	"th", "ti", "tig", "tiv", "tk", "tkl", "tkr", "tl",
	176	"tlh", "tli", "tly", "tmh", "tn", "to", "tog", "tpi",
	177	"tr", "tru", "trv", "ts", "tsd", "tsi", "tt", "ttt",
	178	"tum", "tvl", "tw", "twq", "ty", "tyv", "tzm",
	179	"udm", "ug", "uga", "uk", "umb", "und", "ur", "uz",
	180	"vai", "ve", "vec", "vep", "vi", "vls", "vmf", "vo",
	181	"vot", "vro", "vun",
	182	"wa", "wae", "wal", "war", "was", "wbp", "wo", "wuu",
	183	"xal", "xh", "xmf", "xog",
	184	"yao", "yap", "yav", "ybb", "yi", "yo", "yrl", "yue",
	185	"za", "zap", "zbl", "zea", "zen", "zgh", "zh", "zu",
	186	"zun", "zxx", "zza",
	187	NULL,
	188	"in", "iw", "ji", "jw", "sh", /* obsolete language codes */
	189	NULL
	190	};
	191
	192	static const char* const DEPRECATED_LANGUAGES[]={
	193	"in", "iw", "ji", "jw", NULL, NULL
	194	};
	195	static const char* const REPLACEMENT_LANGUAGES[]={
	196	"id", "he", "yi", "jv", NULL, NULL
	197	};
	198
	199	/**
	200	* Table of 3-letter language codes.
	201	*
	202	* This is a lookup table used to convert 3-letter language codes to
	203	* their 2-letter equivalent, where possible. It must be kept in sync
	204	* with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the
	205	* same language as LANGUAGES_3[i]. The commented-out lines are
	206	* copied from LANGUAGES to make eyeballing this baby easier.
	207	*
	208	* Where a 3-letter language code has no 2-letter equivalent, the
	209	* 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
	210	*
	211	* This table should be terminated with a NULL entry, followed by a
	212	* second list, and another NULL entry. The two lists correspond to
	213	* the two lists in LANGUAGES.
	214	*/
	215	/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
	216	/* ISO639 table version is 20150505 */
	217	/* Subsequent hand addition of selected languages */
	218	static const char * const LANGUAGES_3[] = {
	219	"aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb",
	220	"afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale",
	221	"aln", "alt", "amh", "arg", "ang", "anp", "ara", "arc",
	222	"arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "asm",
	223	"asa", "ase", "ast", "ava", "avk", "awa", "aym", "aze",
	224	"bak", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
	225	"bel", "bej", "bem", "bew", "bez", "bfd", "bfq", "bul",
	226	"bgn", "bho", "bis", "bik", "bin", "bjn", "bkm", "bla",
	227	"bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh",
	228	"brx", "bos", "bss", "bua", "bug", "bum", "byn", "byv",
	229	"cat", "cad", "car", "cay", "cch", "ccp", "che", "ceb", "cgg",
	230	"cha", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
	231	"chr", "chy", "ckb", "cos", "cop", "cps", "cre", "crh",
	232	"ces", "csb", "chu", "chv", "cym",
	233	"dan", "dak", "dar", "dav", "deu", "del", "den", "dgr",
	234	"din", "dje", "doi", "dsb", "dtp", "dua", "dum", "div",
	235	"dyo", "dyu", "dzo", "dzg",
	236	"ebu", "ewe", "efi", "egl", "egy", "eka", "ell", "elx",
	237	"eng", "enm", "epo", "spa", "esu", "est", "eus", "ewo",
	238	"ext",
	239	"fas", "fan", "fat", "ful", "fin", "fil", "fit", "fij",
	240	"fao", "fon", "fra", "frc", "frm", "fro", "frp", "frr",
	241	"frs", "fur", "fry",
	242	"gle", "gaa", "gag", "gan", "gay", "gba", "gbz", "gla",
	243	"gez", "gil", "glg", "glk", "gmh", "grn", "goh", "gom",
	244	"gon", "gor", "got", "grb", "grc", "gsw", "guj", "guc",
	245	"gur", "guz", "glv", "gwi",
	246	"hau", "hai", "hak", "haw", "heb", "hin", "hif", "hil",
	247	"hit", "hmn", "hmo", "hrv", "hsb", "hsn", "hat", "hun",
	248	"hup", "hye", "her",
	249	"ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ipk",
	250	"ilo", "inh", "ido", "isl", "ita", "iku", "izh",
	251	"jpn", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
	252	"jav",
	253	"kat", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
	254	"kbl", "kcg", "kde", "kea", "ken", "kfo", "kon", "kgp",
	255	"kha", "kho", "khq", "khw", "kik", "kiu", "kua", "kaz",
	256	"kkj", "kal", "kln", "khm", "kmb", "kan", "kor", "koi",
	257	"kok", "kos", "kpe", "kau", "krc", "kri", "krj", "krl",
	258	"kru", "kas", "ksb", "ksf", "ksh", "kur", "kum", "kut",
	259	"kom", "cor", "kir",
	260	"lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lfn",
	261	"lug", "lim", "lij", "liv", "lkt", "lmo", "lin", "lao",
	262	"lol", "loz", "lrc", "lit", "ltg", "lub", "lua", "lui",
	263	"lun", "luo", "lus", "luy", "lav", "lzh", "lzz",
	264	"mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
	265	"mdf", "mdh", "mdr", "men", "mer", "mfe", "mlg", "mga",
	266	"mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
	267	"mal", "mon", "mnc", "mni", "moh", "mos", "mar", "mrj",
	268	"msa", "mlt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
	269	"mya", "mye", "myv", "mzn",
	270	"nau", "nan", "nap", "naq", "nob", "nde", "nds", "nep",
	271	"new", "ndo", "nia", "niu", "njo", "nld", "nmg", "nno",
	272	"nnh", "nor", "nog", "non", "nov", "nqo", "nbl", "nso",
	273	"nus", "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi",
	274	"oci", "oji", "orm", "ori", "oss", "osa", "ota",
	275	"pan", "pag", "pal", "pam", "pap", "pau", "pcd", "pdc",
	276	"pdt", "peo", "pfl", "phn", "pli", "pol", "pms", "pnt",
	277	"pon", "prg", "pro", "pus", "por",
	278	"que", "quc", "qug",
	279	"raj", "rap", "rar", "rgn", "rif", "roh", "run", "ron",
	280	"rof", "rom", "rtm", "rus", "rue", "rug", "rup",
	281	"kin", "rwk",
	282	"san", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
	283	"sba", "sbp", "srd", "scn", "sco", "snd", "sdc", "sdh",
	284	"sme", "see", "seh", "sei", "sel", "ses", "sag", "sga",
	285	"sgs", "shi", "shn", "shu", "sin", "sid", "slk",
	286	"slv", "sli", "sly", "smo", "sma", "smj", "smn", "sms",
	287	"sna", "snk", "som", "sog", "sqi", "srp", "srn", "srr",
	288	"ssw", "ssy", "sot", "stq", "sun", "suk", "sus", "sux",
	289	"swe", "swa", "swb", "swc", "syc", "syr", "szl",
	290	"tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk",
	291	"tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr", "tgl",
	292	"tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tpi",
	293	"tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt",
	294	"tum", "tvl", "twi", "twq", "tah", "tyv", "tzm",
	295	"udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
	296	"vai", "ven", "vec", "vep", "vie", "vls", "vmf", "vol",
	297	"vot", "vro", "vun",
	298	"wln", "wae", "wal", "war", "was", "wbp", "wol", "wuu",
	299	"xal", "xho", "xmf", "xog",
	300	"yao", "yap", "yav", "ybb", "yid", "yor", "yrl", "yue",
	301	"zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul",
	302	"zun", "zxx", "zza",
	303	NULL,
	304	/* "in", "iw", "ji", "jw", "sh", */
	305	"ind", "heb", "yid", "jaw", "srp",
	306	NULL
	307	};
	308
	309	/**
	310	* Table of 2-letter country codes.
	311	*
	312	* This list must be in sorted order. This list is returned directly
	313	* to the user by some API.
	314	*
	315	* This list must be kept in sync with COUNTRIES_3, with corresponding
	316	* entries matched.
	317	*
	318	* This table should be terminated with a NULL entry, followed by a
	319	* second list, and another NULL entry. The first list is visible to
	320	* user code when this array is returned by API. The second list
	321	* contains codes we support, but do not expose through user API.
	322	*
	323	* Notes:
	324	*
	325	* ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
	326	* http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
	327	* new codes keeping the old ones for compatibility updated to include
	328	* 1999/12/03 revisions CWB
	329	*
	330	* RO(ROM) is now RO(ROU) according to
	331	* http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
	332	*/
	333	static const char * const COUNTRIES[] = {
	334	"AC", "AD", "AE", "AF", "AG", "AI", "AL", "AM",
	335	"AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",
	336	"BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",
	337	"BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV",
	338	"BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
	339	"CH", "CI", "CK", "CL", "CM", "CN", "CO", "CP", "CR",
	340	"CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DG", "DJ", "DK",
	341	"DM", "DO", "DZ", "EA", "EC", "EE", "EG", "EH", "ER",
	342	"ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
	343	"GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
	344	"GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
	345	"GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
	346	"IC", "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
	347	"IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
	348	"KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
	349	"LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
	350	"LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK",
	351	"ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",
	352	"MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
	353	"NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",
	354	"NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",
	355	"PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
	356	"PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA",
	357	"SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",
	358	"SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV",
	359	"SX", "SY", "SZ", "TA", "TC", "TD", "TF", "TG", "TH", "TJ",
	360	"TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
	361	"TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
	362	"VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
	363	"WS", "XK", "YE", "YT", "ZA", "ZM", "ZW",
	364	NULL,
	365	"AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR", /* obsolete country codes */
	366	NULL
	367	};
	368
	369	static const char* const DEPRECATED_COUNTRIES[] = {
	370	"AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */
	371	};
	372	static const char* const REPLACEMENT_COUNTRIES[] = {
	373	/* "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
	374	"CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL /* replacement country codes */
	375	};
	376
	377	/**
	378	* Table of 3-letter country codes.
	379	*
	380	* This is a lookup table used to convert 3-letter country codes to
	381	* their 2-letter equivalent. It must be kept in sync with COUNTRIES.
	382	* For all valid i, COUNTRIES[i] must refer to the same country as
	383	* COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES
	384	* to make eyeballing this baby easier.
	385	*
	386	* This table should be terminated with a NULL entry, followed by a
	387	* second list, and another NULL entry. The two lists correspond to
	388	* the two lists in COUNTRIES.
	389	*/
	390	static const char * const COUNTRIES_3[] = {
	391	/* "AC", "AD", "AE", "AF", "AG", "AI", "AL", "AM", */
	392	"ASC", "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
	393	/* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */
	394	"AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
	395	/* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */
	396	"BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
	397	/* "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", */
	398	"BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",
	399	/* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */
	400	"BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
	401	/* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CP", "CR", */
	402	"CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CPT", "CRI",
	403	/* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DG", "DJ", "DK", */
	404	"CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DGA", "DJI", "DNK",
	405	/* "DM", "DO", "DZ", "EA", "EC", "EE", "EG", "EH", "ER", */
	406	"DMA", "DOM", "DZA", "EA ", "ECU", "EST", "EGY", "ESH", "ERI", /* no valid 3-letter code for EA */
	407	/* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
	408	"ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
	409	/* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
	410	"GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
	411	/* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */
	412	"GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
	413	/* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
	414	"GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
	415	/* "IC", "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
	416	"IC ", "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL", /* no valid 3-letter code for IC */
	417	/* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
	418	"ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
	419	/* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
	420	"COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
	421	/* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */
	422	"LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
	423	/* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */
	424	"LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
	425	/* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */
	426	"MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
	427	/* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */
	428	"MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
	429	/* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */
	430	"NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
	431	/* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */
	432	"NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
	433	/* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */
	434	"PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
	435	/* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */
	436	"PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
	437	/* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */
	438	"SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
	439	/* "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", */
	440	"SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",
	441	/* "SX", "SY", "SZ", "TA", "TC", "TD", "TF", "TG", "TH", "TJ", */
	442	"SXM", "SYR", "SWZ", "TAA", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
	443	/* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */
	444	"TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
	445	/* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */
	446	"TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
	447	/* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
	448	"VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
	449	/* "WS", "XK", "YE", "YT", "ZA", "ZM", "ZW", */
	450	"WSM", "XKK", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
	451	NULL,
	452	/* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */
	453	"ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
	454	NULL
	455	};
	456
	457	typedef struct CanonicalizationMap {
	458	const char id; / input ID */
	459	const char canonicalID; / canonicalized output ID */
	460	} CanonicalizationMap;
	461
	462	/**
	463	* A map to canonicalize locale IDs. This handles a variety of
	464	* different semantic kinds of transformations.
	465	*/
	466	static const CanonicalizationMap CANONICALIZE_MAP[] = {
	467	{ "", "en_US_POSIX" }, /* .NET name */ // open ICU 64 deleted, we restore
	468	{ "c", "en_US_POSIX" }, /* POSIX name */ // open ICU 64 deleted, we restore
	469	{ "posix", "en_US_POSIX" }, /* POSIX name (alias of C) */ // open ICU 64 deleted, we restore
	470	{ "art_LOJBAN", "jbo" }, /* registered name */
	471	{ "hy__AREVELA", "hy" }, /* Registered IANA variant */
	472	{ "hy__AREVMDA", "hyw" }, /* Registered IANA variant */
	473	{ "zh_GAN", "gan" }, /* registered name */
	474	{ "zh_GUOYU", "zh" }, /* registered name */
	475	{ "zh_HAKKA", "hak" }, /* registered name */
	476	{ "zh_MIN_NAN", "nan" }, /* registered name */
	477	{ "zh_WUU", "wuu" }, /* registered name */
	478	{ "zh_XIANG", "hsn" }, /* registered name */
	479	{ "zh_YUE", "yue" }, /* registered name */
	480	};
	481
	482	/* ### BCP47 Conversion *******************************************/
	483	/* Test if the locale id has BCP47 u extension and does not have '@' */
	484	#define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
	485	/* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
	486	#define _ConvertBCP47(finalID, id, buffer, length,err) \
	487	if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 \|\| \
	488	U_FAILURE(err) \|\| err == U_STRING_NOT_TERMINATED_WARNING) { \
	489	finalID=id; \
	490	if (err == U_STRING_NOT_TERMINATED_WARNING) { err = U_BUFFER_OVERFLOW_ERROR; } \
	491	} else { \
	492	finalID=buffer; \
	493	}
	494	/* Gets the size of the shortest subtag in the given localeID. */
	495	static int32_t getShortestSubtagLength(const char *localeID) {
	496	int32_t localeIDLength = static_cast<int32_t>(uprv_strlen(localeID));
	497	int32_t length = localeIDLength;
	498	int32_t tmpLength = 0;
	499	int32_t i;
	500	UBool reset = TRUE;
	501
	502	for (i = 0; i < localeIDLength; i++) {
	503	if (localeID[i] != '_' && localeID[i] != '-') {
	504	if (reset) {
	505	tmpLength = 0;
	506	reset = FALSE;
	507	}
	508	tmpLength++;
	509	} else {
	510	if (tmpLength != 0 && tmpLength < length) {
	511	length = tmpLength;
	512	}
	513	reset = TRUE;
	514	}
	515	}
	516
	517	return length;
	518	}
	519
	520	/* ### Keywords **************************************************/
	521	#define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
	522	#define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) \|\| UPRV_ISDIGIT(c) )
	523	/* Punctuation/symbols allowed in legacy key values */
	524	#define UPRV_OK_VALUE_PUNCTUATION(c) ((c) == '_' \|\| (c) == '-' \|\| (c) == '+' \|\| (c) == '/')
	525
	526	#define ULOC_KEYWORD_BUFFER_LEN 25
	527	#define ULOC_MAX_NO_KEYWORDS 25
	528
	529	U_CAPI const char * U_EXPORT2
	530	locale_getKeywordsStart(const char *localeID) {
	531	const char *result = NULL;
	532	if((result = uprv_strchr(localeID, '@')) != NULL) {
	533	return result;
	534	}
	535	#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
	536	else {
	537	/* We do this because the @ sign is variant, and the @ sign used on one
	538	EBCDIC machine won't be compiled the same way on other EBCDIC based
	539	machines. */
	540	static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
	541	const uint8_t *charToFind = ebcdicSigns;
	542	while(*charToFind) {
	543	if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
	544	return result;
	545	}
	546	charToFind++;
	547	}
	548	}
	549	#endif
	550	return NULL;
	551	}
	552
	553	/**
	554	* @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
	555	* @param keywordName incoming name to be canonicalized
	556	* @param status return status (keyword too long)
	557	* @return length of the keyword name
	558	*/
	559	static int32_t locale_canonKeywordName(char buf, const char keywordName, UErrorCode *status)
	560	{
	561	int32_t keywordNameLen = 0;
	562
	563	for (; *keywordName != 0; keywordName++) {
	564	if (!UPRV_ISALPHANUM(*keywordName)) {
	565	status = U_ILLEGAL_ARGUMENT_ERROR; / malformed keyword name */
	566	return 0;
	567	}
	568	if (keywordNameLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
	569	buf[keywordNameLen++] = uprv_tolower(*keywordName);
	570	} else {
	571	/* keyword name too long for internal buffer */
	572	*status = U_INTERNAL_PROGRAM_ERROR;
	573	return 0;
	574	}
	575	}
	576	if (keywordNameLen == 0) {
	577	status = U_ILLEGAL_ARGUMENT_ERROR; / empty keyword name */
	578	return 0;
	579	}
	580	buf[keywordNameLen] = 0; /* terminate */
	581
	582	return keywordNameLen;
	583	}
	584
	585	typedef struct {
	586	char keyword[ULOC_KEYWORD_BUFFER_LEN];
	587	int32_t keywordLen;
	588	const char *valueStart;
	589	int32_t valueLen;
	590	} KeywordStruct;
	591
	592	static int32_t U_CALLCONV
	593	compareKeywordStructs(const void * /context/, const void left, const void right) {
	594	const char* leftString = ((const KeywordStruct *)left)->keyword;
	595	const char* rightString = ((const KeywordStruct *)right)->keyword;
	596	return uprv_strcmp(leftString, rightString);
	597	}
	598
	599	static int32_t
	600	_getKeywords(const char *localeID,
	601	char prev,
	602	char *keywords, int32_t keywordCapacity,
	603	char values, int32_t valuesCapacity, int32_t valLen,
	604	UBool valuesToo,
	605	UErrorCode *status)
	606	{
	607	KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
	608
	609	int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
	610	int32_t numKeywords = 0;
	611	const char* pos = localeID;
	612	const char* equalSign = NULL;
	613	const char* semicolon = NULL;
	614	int32_t i = 0, j, n;
	615	int32_t keywordsLen = 0;
	616	int32_t valuesLen = 0;
	617
	618	if(prev == '@') { /* start of keyword definition */
	619	/* we will grab pairs, trim spaces, lowercase keywords, sort and return */
	620	do {
	621	UBool duplicate = FALSE;
	622	/* skip leading spaces */
	623	while(*pos == ' ') {
	624	pos++;
	625	}
	626	if (!pos) { / handle trailing "; " */
	627	break;
	628	}
	629	if(numKeywords == maxKeywords) {
	630	*status = U_INTERNAL_PROGRAM_ERROR;
	631	return 0;
	632	}
	633	equalSign = uprv_strchr(pos, '=');
	634	semicolon = uprv_strchr(pos, ';');
	635	/* lack of '=' [foo@currency] is illegal */
	636	/* ';' before '=' [foo@currency;collation=pinyin] is illegal */
	637	if(!equalSign \|\| (semicolon && semicolon<equalSign)) {
	638	*status = U_INVALID_FORMAT_ERROR;
	639	return 0;
	640	}
	641	/* need to normalize both keyword and keyword name */
	642	if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
	643	/* keyword name too long for internal buffer */
	644	*status = U_INTERNAL_PROGRAM_ERROR;
	645	return 0;
	646	}
	647	for(i = 0, n = 0; i < equalSign - pos; ++i) {
	648	if (pos[i] != ' ') {
	649	keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
	650	}
	651	}
	652
	653	/* zero-length keyword is an error. */
	654	if (n == 0) {
	655	*status = U_INVALID_FORMAT_ERROR;
	656	return 0;
	657	}
	658
	659	keywordList[numKeywords].keyword[n] = 0;
	660	keywordList[numKeywords].keywordLen = n;
	661	/* now grab the value part. First we skip the '=' */
	662	equalSign++;
	663	/* then we leading spaces */
	664	while(*equalSign == ' ') {
	665	equalSign++;
	666	}
	667
	668	/* Premature end or zero-length value */
	669	if (!*equalSign \|\| equalSign == semicolon) {
	670	*status = U_INVALID_FORMAT_ERROR;
	671	return 0;
	672	}
	673
	674	keywordList[numKeywords].valueStart = equalSign;
	675
	676	pos = semicolon;
	677	i = 0;
	678	if(pos) {
	679	while(*(pos - i - 1) == ' ') {
	680	i++;
	681	}
	682	keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
	683	pos++;
	684	} else {
	685	i = (int32_t)uprv_strlen(equalSign);
	686	while(i && equalSign[i-1] == ' ') {
	687	i--;
	688	}
	689	keywordList[numKeywords].valueLen = i;
	690	}
	691	/* If this is a duplicate keyword, then ignore it */
	692	for (j=0; j<numKeywords; ++j) {
	693	if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
	694	duplicate = TRUE;
	695	break;
	696	}
	697	}
	698	if (!duplicate) {
	699	++numKeywords;
	700	}
	701	} while(pos);
	702
	703	/* now we have a list of keywords */
	704	/* we need to sort it */
	705	uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
	706
	707	/* Now construct the keyword part */
	708	for(i = 0; i < numKeywords; i++) {
	709	if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) {
	710	uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword);
	711	if(valuesToo) {
	712	keywords[keywordsLen + keywordList[i].keywordLen] = '=';
	713	} else {
	714	keywords[keywordsLen + keywordList[i].keywordLen] = 0;
	715	}
	716	}
	717	keywordsLen += keywordList[i].keywordLen + 1;
	718	if(valuesToo) {
	719	if(keywordsLen + keywordList[i].valueLen <= keywordCapacity) {
	720	uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
	721	}
	722	keywordsLen += keywordList[i].valueLen;
	723
	724	if(i < numKeywords - 1) {
	725	if(keywordsLen < keywordCapacity) {
	726	keywords[keywordsLen] = ';';
	727	}
	728	keywordsLen++;
	729	}
	730	}
	731	if(values) {
	732	if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) {
	733	uprv_strcpy(values+valuesLen, keywordList[i].valueStart);
	734	values[valuesLen + keywordList[i].valueLen] = 0;
	735	}
	736	valuesLen += keywordList[i].valueLen + 1;
	737	}
	738	}
	739	if(values) {
	740	values[valuesLen] = 0;
	741	if(valLen) {
	742	*valLen = valuesLen;
	743	}
	744	}
	745	return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);
	746	} else {
	747	return 0;
	748	}
	749	}
	750
	751	U_CFUNC int32_t
	752	locale_getKeywords(const char *localeID,
	753	char prev,
	754	char *keywords, int32_t keywordCapacity,
	755	char values, int32_t valuesCapacity, int32_t valLen,
	756	UBool valuesToo,
	757	UErrorCode *status) {
	758	return _getKeywords(localeID, prev, keywords, keywordCapacity,
	759	values, valuesCapacity, valLen, valuesToo,
	760	status);
	761	}
	762
	763	U_CAPI int32_t U_EXPORT2
	764	uloc_getKeywordValue(const char* localeID,
	765	const char* keywordName,
	766	char* buffer, int32_t bufferCapacity,
	767	UErrorCode* status)
	768	{
	769	const char* startSearchHere = NULL;
	770	const char* nextSeparator = NULL;
	771	char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
	772	char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
	773	int32_t result = 0;
	774
	775	if(status && U_SUCCESS(*status) && localeID) {
	776	char tempBuffer[ULOC_FULLNAME_CAPACITY];
	777	const char* tmpLocaleID;
	778
	779	if (keywordName == NULL \|\| keywordName[0] == 0) {
	780	*status = U_ILLEGAL_ARGUMENT_ERROR;
	781	return 0;
	782	}
	783
	784	locale_canonKeywordName(keywordNameBuffer, keywordName, status);
	785	if(U_FAILURE(*status)) {
	786	return 0;
	787	}
	788
	789	if (_hasBCP47Extension(localeID)) {
	790	_ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
	791	} else {
	792	tmpLocaleID=localeID;
	793	}
	794
	795	startSearchHere = locale_getKeywordsStart(tmpLocaleID);
	796	if(startSearchHere == NULL) {
	797	/* no keywords, return at once */
	798	return 0;
	799	}
	800
	801	/* find the first keyword */
	802	while(startSearchHere) {
	803	const char* keyValueTail;
	804	int32_t keyValueLen;
	805
	806	startSearchHere++; /* skip @ or ; */
	807	nextSeparator = uprv_strchr(startSearchHere, '=');
	808	if(!nextSeparator) {
	809	status = U_ILLEGAL_ARGUMENT_ERROR; / key must have =value */
	810	return 0;
	811	}
	812	/* strip leading & trailing spaces (TC decided to tolerate these) */
	813	while(*startSearchHere == ' ') {
	814	startSearchHere++;
	815	}
	816	keyValueTail = nextSeparator;
	817	while (keyValueTail > startSearchHere && *(keyValueTail-1) == ' ') {
	818	keyValueTail--;
	819	}
	820	/* now keyValueTail points to first char after the keyName */
	821	/* copy & normalize keyName from locale */
	822	if (startSearchHere == keyValueTail) {
	823	status = U_ILLEGAL_ARGUMENT_ERROR; / empty keyword name in passed-in locale */
	824	return 0;
	825	}
	826	keyValueLen = 0;
	827	while (startSearchHere < keyValueTail) {
	828	if (!UPRV_ISALPHANUM(*startSearchHere)) {
	829	status = U_ILLEGAL_ARGUMENT_ERROR; / malformed keyword name */
	830	return 0;
	831	}
	832	if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
	833	localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*startSearchHere++);
	834	} else {
	835	/* keyword name too long for internal buffer */
	836	*status = U_INTERNAL_PROGRAM_ERROR;
	837	return 0;
	838	}
	839	}
	840	localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
	841
	842	startSearchHere = uprv_strchr(nextSeparator, ';');
	843
	844	if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
	845	/* current entry matches the keyword. */
	846	nextSeparator++; /* skip '=' */
	847	/* First strip leading & trailing spaces (TC decided to tolerate these) */
	848	while(*nextSeparator == ' ') {
	849	nextSeparator++;
	850	}
	851	keyValueTail = (startSearchHere)? startSearchHere: nextSeparator + uprv_strlen(nextSeparator);
	852	while(keyValueTail > nextSeparator && *(keyValueTail-1) == ' ') {
	853	keyValueTail--;
	854	}
	855	/* Now copy the value, but check well-formedness */
	856	if (nextSeparator == keyValueTail) {
	857	status = U_ILLEGAL_ARGUMENT_ERROR; / empty key value name in passed-in locale */
	858	return 0;
	859	}
	860	keyValueLen = 0;
	861	while (nextSeparator < keyValueTail) {
	862	if (!UPRV_ISALPHANUM(nextSeparator) && !UPRV_OK_VALUE_PUNCTUATION(nextSeparator)) {
	863	status = U_ILLEGAL_ARGUMENT_ERROR; / malformed key value */
	864	return 0;
	865	}
	866	if (keyValueLen < bufferCapacity) {
	867	/* Should we lowercase value to return here? Tests expect as-is. */
	868	buffer[keyValueLen++] = *nextSeparator++;
	869	} else { /* keep advancing so we return correct length in case of overflow */
	870	keyValueLen++;
	871	nextSeparator++;
	872	}
	873	}
	874	result = u_terminateChars(buffer, bufferCapacity, keyValueLen, status);
	875	return result;
	876	}
	877	}
	878	}
	879	return 0;
	880	}
	881
	882	U_CAPI int32_t U_EXPORT2
	883	uloc_setKeywordValue(const char* keywordName,
	884	const char* keywordValue,
	885	char* buffer, int32_t bufferCapacity,
	886	UErrorCode* status)
	887	{
	888	/* TODO: sorting. removal. */
	889	int32_t keywordNameLen;
	890	int32_t keywordValueLen;
	891	int32_t bufLen;
	892	int32_t needLen = 0;
	893	char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
	894	char keywordValueBuffer[ULOC_KEYWORDS_CAPACITY+1];
	895	char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
	896	int32_t rc;
	897	char* nextSeparator = NULL;
	898	char* nextEqualsign = NULL;
	899	char* startSearchHere = NULL;
	900	char* keywordStart = NULL;
	901	CharString updatedKeysAndValues;
	902	int32_t updatedKeysAndValuesLen;
	903	UBool handledInputKeyAndValue = FALSE;
	904	char keyValuePrefix = '@';
	905
	906	if(U_FAILURE(*status)) {
	907	return -1;
	908	}
	909	if (keywordName == NULL \|\| keywordName[0] == 0 \|\| bufferCapacity <= 1) {
	910	*status = U_ILLEGAL_ARGUMENT_ERROR;
	911	return 0;
	912	}
	913	bufLen = (int32_t)uprv_strlen(buffer);
	914	if(bufferCapacity<bufLen) {
	915	/* The capacity is less than the length?! Is this NULL terminated? */
	916	*status = U_ILLEGAL_ARGUMENT_ERROR;
	917	return 0;
	918	}
	919	keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
	920	if(U_FAILURE(*status)) {
	921	return 0;
	922	}
	923
	924	keywordValueLen = 0;
	925	if(keywordValue) {
	926	while (*keywordValue != 0) {
	927	if (!UPRV_ISALPHANUM(keywordValue) && !UPRV_OK_VALUE_PUNCTUATION(keywordValue)) {
	928	status = U_ILLEGAL_ARGUMENT_ERROR; / malformed key value */
	929	return 0;
	930	}
	931	if (keywordValueLen < ULOC_KEYWORDS_CAPACITY) {
	932	/* Should we force lowercase in value to set? */
	933	keywordValueBuffer[keywordValueLen++] = *keywordValue++;
	934	} else {
	935	/* keywordValue too long for internal buffer */
	936	*status = U_INTERNAL_PROGRAM_ERROR;
	937	return 0;
	938	}
	939	}
	940	}
	941	keywordValueBuffer[keywordValueLen] = 0; /* terminate */
	942
	943	startSearchHere = (char*)locale_getKeywordsStart(buffer);
	944	if(startSearchHere == NULL \|\| (startSearchHere[1]==0)) {
	945	if(keywordValueLen == 0) { /* no keywords = nothing to remove */
	946	return bufLen;
	947	}
	948
	949	needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
	950	if(startSearchHere) { /* had a single @ */
	951	needLen--; /* already had the @ */
	952	/* startSearchHere points at the @ */
	953	} else {
	954	startSearchHere=buffer+bufLen;
	955	}
	956	if(needLen >= bufferCapacity) {
	957	*status = U_BUFFER_OVERFLOW_ERROR;
	958	return needLen; /* no change */
	959	}
	960	*startSearchHere++ = '@';
	961	uprv_strcpy(startSearchHere, keywordNameBuffer);
	962	startSearchHere += keywordNameLen;
	963	*startSearchHere++ = '=';
	964	uprv_strcpy(startSearchHere, keywordValueBuffer);
	965	return needLen;
	966	} /* end shortcut - no @ */
	967
	968	keywordStart = startSearchHere;
	969	/* search for keyword */
	970	while(keywordStart) {
	971	const char* keyValueTail;
	972	int32_t keyValueLen;
	973
	974	keywordStart++; /* skip @ or ; */
	975	nextEqualsign = uprv_strchr(keywordStart, '=');
	976	if (!nextEqualsign) {
	977	status = U_ILLEGAL_ARGUMENT_ERROR; / key must have =value */
	978	return 0;
	979	}
	980	/* strip leading & trailing spaces (TC decided to tolerate these) */
	981	while(*keywordStart == ' ') {
	982	keywordStart++;
	983	}
	984	keyValueTail = nextEqualsign;
	985	while (keyValueTail > keywordStart && *(keyValueTail-1) == ' ') {
	986	keyValueTail--;
	987	}
	988	/* now keyValueTail points to first char after the keyName */
	989	/* copy & normalize keyName from locale */
	990	if (keywordStart == keyValueTail) {
	991	status = U_ILLEGAL_ARGUMENT_ERROR; / empty keyword name in passed-in locale */
	992	return 0;
	993	}
	994	keyValueLen = 0;
	995	while (keywordStart < keyValueTail) {
	996	if (!UPRV_ISALPHANUM(*keywordStart)) {
	997	status = U_ILLEGAL_ARGUMENT_ERROR; / malformed keyword name */
	998	return 0;
	999	}
	1000	if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
	1001	localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*keywordStart++);
	1002	} else {
	1003	/* keyword name too long for internal buffer */
	1004	*status = U_INTERNAL_PROGRAM_ERROR;
	1005	return 0;
	1006	}
	1007	}
	1008	localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
	1009
	1010	nextSeparator = uprv_strchr(nextEqualsign, ';');
	1011
	1012	/* start processing the value part */
	1013	nextEqualsign++; /* skip '=' */
	1014	/* First strip leading & trailing spaces (TC decided to tolerate these) */
	1015	while(*nextEqualsign == ' ') {
	1016	nextEqualsign++;
	1017	}
	1018	keyValueTail = (nextSeparator)? nextSeparator: nextEqualsign + uprv_strlen(nextEqualsign);
	1019	while(keyValueTail > nextEqualsign && *(keyValueTail-1) == ' ') {
	1020	keyValueTail--;
	1021	}
	1022	if (nextEqualsign == keyValueTail) {
	1023	status = U_ILLEGAL_ARGUMENT_ERROR; / empty key value in passed-in locale */
	1024	return 0;
	1025	}
	1026
	1027	rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
	1028	if(rc == 0) {
	1029	/* Current entry matches the input keyword. Update the entry */
	1030	if(keywordValueLen > 0) { /* updating a value */
	1031	updatedKeysAndValues.append(keyValuePrefix, *status);
	1032	keyValuePrefix = ';'; /* for any subsequent key-value pair */
	1033	updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
	1034	updatedKeysAndValues.append('=', *status);
	1035	updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
	1036	} /* else removing this entry, don't emit anything */
	1037	handledInputKeyAndValue = TRUE;
	1038	} else {
	1039	/* input keyword sorts earlier than current entry, add before current entry */
	1040	if (rc < 0 && keywordValueLen > 0 && !handledInputKeyAndValue) {
	1041	/* insert new entry at this location */
	1042	updatedKeysAndValues.append(keyValuePrefix, *status);
	1043	keyValuePrefix = ';'; /* for any subsequent key-value pair */
	1044	updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
	1045	updatedKeysAndValues.append('=', *status);
	1046	updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
	1047	handledInputKeyAndValue = TRUE;
	1048	}
	1049	/* copy the current entry */
	1050	updatedKeysAndValues.append(keyValuePrefix, *status);
	1051	keyValuePrefix = ';'; /* for any subsequent key-value pair */
	1052	updatedKeysAndValues.append(localeKeywordNameBuffer, keyValueLen, *status);
	1053	updatedKeysAndValues.append('=', *status);
	1054	updatedKeysAndValues.append(nextEqualsign, static_cast<int32_t>(keyValueTail-nextEqualsign), *status);
	1055	}
	1056	if (!nextSeparator && keywordValueLen > 0 && !handledInputKeyAndValue) {
	1057	/* append new entry at the end, it sorts later than existing entries */
	1058	updatedKeysAndValues.append(keyValuePrefix, *status);
	1059	/* skip keyValuePrefix update, no subsequent key-value pair */
	1060	updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
	1061	updatedKeysAndValues.append('=', *status);
	1062	updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
	1063	handledInputKeyAndValue = TRUE;
	1064	}
	1065	keywordStart = nextSeparator;
	1066	} /* end loop searching */
	1067
	1068	/* Any error from updatedKeysAndValues.append above would be internal and not due to
	1069	* problems with the passed-in locale. So if we did encounter problems with the
	1070	* passed-in locale above, those errors took precedence and overrode any error
	1071	* status from updatedKeysAndValues.append, and also caused a return of 0. If there
	1072	* are errors here they are from updatedKeysAndValues.append; they do cause an
	1073	* error return but the passed-in locale is unmodified and the original bufLen is
	1074	* returned.
	1075	*/
	1076	if (!handledInputKeyAndValue \|\| U_FAILURE(*status)) {
	1077	/* if input key/value specified removal of a keyword not present in locale, or
	1078	* there was an error in CharString.append, leave original locale alone. */
	1079	return bufLen;
	1080	}
	1081
	1082	updatedKeysAndValuesLen = updatedKeysAndValues.length();
	1083	/* needLen = length of the part before '@' + length of updated key-value part including '@' */
	1084	needLen = (int32_t)(startSearchHere - buffer) + updatedKeysAndValuesLen;
	1085	if(needLen >= bufferCapacity) {
	1086	*status = U_BUFFER_OVERFLOW_ERROR;
	1087	return needLen; /* no change */
	1088	}
	1089	if (updatedKeysAndValuesLen > 0) {
	1090	uprv_strncpy(startSearchHere, updatedKeysAndValues.data(), updatedKeysAndValuesLen);
	1091	}
	1092	buffer[needLen]=0;
	1093	return needLen;
	1094	}
	1095
	1096	/* ### ID parsing implementation **************************************************/
	1097
	1098	#define _isPrefixLetter(a) ((a=='x')\|\|(a=='X')\|\|(a=='i')\|\|(a=='I'))
	1099
	1100	/*returns TRUE if one of the special prefixes is here (s=string)
	1101	'x-' or 'i-' */
	1102	#define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
	1103
	1104	/* Dot terminates it because of POSIX form where dot precedes the codepage
	1105	* except for variant
	1106	*/
	1107	#define _isTerminator(a) ((a==0)\|\|(a=='.')\|\|(a=='@'))
	1108
	1109	/**
	1110	* Lookup 'key' in the array 'list'. The array 'list' should contain
	1111	* a NULL entry, followed by more entries, and a second NULL entry.
	1112	*
	1113	* The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
	1114	* COUNTRIES_3.
	1115	*/
	1116	static int16_t _findIndex(const char* const* list, const char* key)
	1117	{
	1118	const char* const* anchor = list;
	1119	int32_t pass = 0;
	1120
	1121	/* Make two passes through two NULL-terminated arrays at 'list' */
	1122	while (pass++ < 2) {
	1123	while (*list) {
	1124	if (uprv_strcmp(key, *list) == 0) {
	1125	return (int16_t)(list - anchor);
	1126	}
	1127	list++;
	1128	}
	1129	++list; /* skip final NULL CWB/
	1130	}
	1131	return -1;
	1132	}
	1133
	1134	/* count the length of src while copying it to dest; return strlen(src) */
	1135	static inline int32_t
	1136	_copyCount(char dest, int32_t destCapacity, const char src) {
	1137	const char *anchor;
	1138	char c;
	1139
	1140	anchor=src;
	1141	for(;;) {
	1142	if((c=*src)==0) {
	1143	return (int32_t)(src-anchor);
	1144	}
	1145	if(destCapacity<=0) {
	1146	return (int32_t)((src-anchor)+uprv_strlen(src));
	1147	}
	1148	++src;
	1149	*dest++=c;
	1150	--destCapacity;
	1151	}
	1152	}
	1153
	1154	U_CFUNC const char*
	1155	uloc_getCurrentCountryID(const char* oldID){
	1156	int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
	1157	if (offset >= 0) {
	1158	return REPLACEMENT_COUNTRIES[offset];
	1159	}
	1160	return oldID;
	1161	}
	1162	U_CFUNC const char*
	1163	uloc_getCurrentLanguageID(const char* oldID){
	1164	int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
	1165	if (offset >= 0) {
	1166	return REPLACEMENT_LANGUAGES[offset];
	1167	}
	1168	return oldID;
	1169	}
	1170	/*
	1171	* the internal functions _getLanguage(), _getCountry(), _getVariant()
	1172	* avoid duplicating code to handle the earlier locale ID pieces
	1173	* in the functions for the later ones by
	1174	* setting the *pEnd pointer to where they stopped parsing
	1175	*
	1176	* TODO try to use this in Locale
	1177	*/
	1178	U_CFUNC int32_t
	1179	ulocimp_getLanguage(const char *localeID,
	1180	char *language, int32_t languageCapacity,
	1181	const char **pEnd) {
	1182	int32_t i=0;
	1183	int32_t offset;
	1184	char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
	1185
	1186	/* if it starts with i- or x- then copy that prefix */
	1187	if(_isIDPrefix(localeID)) {
	1188	if(i<languageCapacity) {
	1189	language[i]=(char)uprv_tolower(*localeID);
	1190	}
	1191	if(i<languageCapacity) {
	1192	language[i+1]='-';
	1193	}
	1194	i+=2;
	1195	localeID+=2;
	1196	}
	1197
	1198	/* copy the language as far as possible and count its length */
	1199	while(!_isTerminator(localeID) && !_isIDSeparator(localeID)) {
	1200	if(i<languageCapacity) {
	1201	language[i]=(char)uprv_tolower(*localeID);
	1202	}
	1203	if(i<3) {
	1204	U_ASSERT(i>=0);
	1205	lang[i]=(char)uprv_tolower(*localeID);
	1206	}
	1207	i++;
	1208	localeID++;
	1209	}
	1210
	1211	if(i==3) {
	1212	/* convert 3 character code to 2 character code if possible CWB/
	1213	offset=_findIndex(LANGUAGES_3, lang);
	1214	if(offset>=0) {
	1215	i=_copyCount(language, languageCapacity, LANGUAGES[offset]);
	1216	}
	1217	}
	1218
	1219	if(pEnd!=NULL) {
	1220	*pEnd=localeID;
	1221	}
	1222	return i;
	1223	}
	1224
	1225	U_CFUNC int32_t
	1226	ulocimp_getScript(const char *localeID,
	1227	char *script, int32_t scriptCapacity,
	1228	const char **pEnd)
	1229	{
	1230	int32_t idLen = 0;
	1231
	1232	if (pEnd != NULL) {
	1233	*pEnd = localeID;
	1234	}
	1235
	1236	/* copy the second item as far as possible and count its length */
	1237	while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])
	1238	&& uprv_isASCIILetter(localeID[idLen])) {
	1239	idLen++;
	1240	}
	1241
	1242	/* If it's exactly 4 characters long, then it's a script and not a country. */
	1243	if (idLen == 4) {
	1244	int32_t i;
	1245	if (pEnd != NULL) {
	1246	*pEnd = localeID+idLen;
	1247	}
	1248	if(idLen > scriptCapacity) {
	1249	idLen = scriptCapacity;
	1250	}
	1251	if (idLen >= 1) {
	1252	script[0]=(char)uprv_toupper(*(localeID++));
	1253	}
	1254	for (i = 1; i < idLen; i++) {
	1255	script[i]=(char)uprv_tolower(*(localeID++));
	1256	}
	1257	}
	1258	else {
	1259	idLen = 0;
	1260	}
	1261	return idLen;
	1262	}
	1263
	1264	U_CFUNC int32_t
	1265	ulocimp_getCountry(const char *localeID,
	1266	char *country, int32_t countryCapacity,
	1267	const char **pEnd)
	1268	{
	1269	int32_t idLen=0;
	1270	char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 };
	1271	int32_t offset;
	1272
	1273	/* copy the country as far as possible and count its length */
	1274	while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
	1275	if(idLen<(ULOC_COUNTRY_CAPACITY-1)) { /CWB/
	1276	cnty[idLen]=(char)uprv_toupper(localeID[idLen]);
	1277	}
	1278	idLen++;
	1279	}
	1280
	1281	/* the country should be either length 2 or 3 */
	1282	if (idLen == 2 \|\| idLen == 3) {
	1283	UBool gotCountry = FALSE;
	1284	/* convert 3 character code to 2 character code if possible CWB/
	1285	if(idLen==3) {
	1286	offset=_findIndex(COUNTRIES_3, cnty);
	1287	if(offset>=0) {
	1288	idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]);
	1289	gotCountry = TRUE;
	1290	}
	1291	}
	1292	if (!gotCountry) {
	1293	int32_t i = 0;
	1294	for (i = 0; i < idLen; i++) {
	1295	if (i < countryCapacity) {
	1296	country[i]=(char)uprv_toupper(localeID[i]);
	1297	}
	1298	}
	1299	}
	1300	localeID+=idLen;
	1301	} else {
	1302	idLen = 0;
	1303	}
	1304
	1305	if(pEnd!=NULL) {
	1306	*pEnd=localeID;
	1307	}
	1308
	1309	return idLen;
	1310	}
	1311
	1312	/**
	1313	* @param needSeparator if true, then add leading '_' if any variants
	1314	* are added to 'variant'
	1315	*/
	1316	static int32_t
	1317	_getVariantEx(const char *localeID,
	1318	char prev,
	1319	char *variant, int32_t variantCapacity,
	1320	UBool needSeparator) {
	1321	int32_t i=0;
	1322
	1323	/* get one or more variant tags and separate them with '_' */
	1324	if(_isIDSeparator(prev)) {
	1325	/* get a variant string after a '-' or '_' */
	1326	while(!_isTerminator(*localeID)) {
	1327	if (needSeparator) {
	1328	if (i<variantCapacity) {
	1329	variant[i] = '_';
	1330	}
	1331	++i;
	1332	needSeparator = FALSE;
	1333	}
	1334	if(i<variantCapacity) {
	1335	variant[i]=(char)uprv_toupper(*localeID);
	1336	if(variant[i]=='-') {
	1337	variant[i]='_';
	1338	}
	1339	}
	1340	i++;
	1341	localeID++;
	1342	}
	1343	}
	1344
	1345	/* if there is no variant tag after a '-' or '_' then look for '@' */
	1346	if(i==0) {
	1347	if(prev=='@') {
	1348	/* keep localeID */
	1349	} else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
	1350	++localeID; /* point after the '@' */
	1351	} else {
	1352	return 0;
	1353	}
	1354	while(!_isTerminator(*localeID)) {
	1355	if (needSeparator) {
	1356	if (i<variantCapacity) {
	1357	variant[i] = '_';
	1358	}
	1359	++i;
	1360	needSeparator = FALSE;
	1361	}
	1362	if(i<variantCapacity) {
	1363	variant[i]=(char)uprv_toupper(*localeID);
	1364	if(variant[i]=='-' \|\| variant[i]==',') {
	1365	variant[i]='_';
	1366	}
	1367	}
	1368	i++;
	1369	localeID++;
	1370	}
	1371	}
	1372
	1373	return i;
	1374	}
	1375
	1376	static int32_t
	1377	_getVariant(const char *localeID,
	1378	char prev,
	1379	char *variant, int32_t variantCapacity) {
	1380	return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE);
	1381	}
	1382
	1383	/* Keyword enumeration */
	1384
	1385	typedef struct UKeywordsContext {
	1386	char* keywords;
	1387	char* current;
	1388	} UKeywordsContext;
	1389
	1390	U_CDECL_BEGIN
	1391
	1392	static void U_CALLCONV
	1393	uloc_kw_closeKeywords(UEnumeration *enumerator) {
	1394	uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
	1395	uprv_free(enumerator->context);
	1396	uprv_free(enumerator);
	1397	}
	1398
	1399	static int32_t U_CALLCONV
	1400	uloc_kw_countKeywords(UEnumeration en, UErrorCode /status/) {
	1401	char kw = ((UKeywordsContext )en->context)->keywords;
	1402	int32_t result = 0;
	1403	while(*kw) {
	1404	result++;
	1405	kw += uprv_strlen(kw)+1;
	1406	}
	1407	return result;
	1408	}
	1409
	1410	static const char * U_CALLCONV
	1411	uloc_kw_nextKeyword(UEnumeration* en,
	1412	int32_t* resultLength,
	1413	UErrorCode* /status/) {
	1414	const char* result = ((UKeywordsContext *)en->context)->current;
	1415	int32_t len = 0;
	1416	if(*result) {
	1417	len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
	1418	((UKeywordsContext *)en->context)->current += len+1;
	1419	} else {
	1420	result = NULL;
	1421	}
	1422	if (resultLength) {
	1423	*resultLength = len;
	1424	}
	1425	return result;
	1426	}
	1427
	1428	static void U_CALLCONV
	1429	uloc_kw_resetKeywords(UEnumeration* en,
	1430	UErrorCode* /status/) {
	1431	((UKeywordsContext )en->context)->current = ((UKeywordsContext )en->context)->keywords;
	1432	}
	1433
	1434	U_CDECL_END
	1435
	1436
	1437	static const UEnumeration gKeywordsEnum = {
	1438	NULL,
	1439	NULL,
	1440	uloc_kw_closeKeywords,
	1441	uloc_kw_countKeywords,
	1442	uenum_unextDefault,
	1443	uloc_kw_nextKeyword,
	1444	uloc_kw_resetKeywords
	1445	};
	1446
	1447	U_CAPI UEnumeration* U_EXPORT2
	1448	uloc_openKeywordList(const char keywordList, int32_t keywordListSize, UErrorCode status)
	1449	{
	1450	UKeywordsContext *myContext = NULL;
	1451	UEnumeration *result = NULL;
	1452
	1453	if(U_FAILURE(*status)) {
	1454	return NULL;
	1455	}
	1456	result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
	1457	/* Null pointer test */
	1458	if (result == NULL) {
	1459	*status = U_MEMORY_ALLOCATION_ERROR;
	1460	return NULL;
	1461	}
	1462	uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration));
	1463	myContext = static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext)));
	1464	if (myContext == NULL) {
	1465	*status = U_MEMORY_ALLOCATION_ERROR;
	1466	uprv_free(result);
	1467	return NULL;
	1468	}
	1469	myContext->keywords = (char *)uprv_malloc(keywordListSize+1);
	1470	uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
	1471	myContext->keywords[keywordListSize] = 0;
	1472	myContext->current = myContext->keywords;
	1473	result->context = myContext;
	1474	return result;
	1475	}
	1476
	1477	U_CAPI UEnumeration* U_EXPORT2
	1478	uloc_openKeywords(const char* localeID,
	1479	UErrorCode* status)
	1480	{
	1481	int32_t i=0;
	1482	char keywords[256];
	1483	int32_t keywordsCapacity = 256;
	1484	char tempBuffer[ULOC_FULLNAME_CAPACITY];
	1485	const char* tmpLocaleID;
	1486
	1487	if(status==NULL \|\| U_FAILURE(*status)) {
	1488	return 0;
	1489	}
	1490
	1491	if (_hasBCP47Extension(localeID)) {
	1492	_ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
	1493	} else {
	1494	if (localeID==NULL) {
	1495	localeID=uloc_getDefault();
	1496	}
	1497	tmpLocaleID=localeID;
	1498	}
	1499
	1500	/* Skip the language */
	1501	ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
	1502	if(_isIDSeparator(*tmpLocaleID)) {
	1503	const char *scriptID;
	1504	/* Skip the script if available */
	1505	ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
	1506	if(scriptID != tmpLocaleID+1) {
	1507	/* Found optional script */
	1508	tmpLocaleID = scriptID;
	1509	}
	1510	/* Skip the Country */
	1511	if (_isIDSeparator(*tmpLocaleID)) {
	1512	ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &tmpLocaleID);
	1513	if(_isIDSeparator(*tmpLocaleID)) {
	1514	_getVariant(tmpLocaleID+1, *tmpLocaleID, NULL, 0);
	1515	}
	1516	}
	1517	}
	1518
	1519	/* keywords are located after '@' */
	1520	if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {
	1521	i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status);
	1522	}
	1523
	1524	if(i) {
	1525	return uloc_openKeywordList(keywords, i, status);
	1526	} else {
	1527	return NULL;
	1528	}
	1529	}
	1530
	1531
	1532	/* bit-flags for 'options' parameter of _canonicalize */
	1533	#define _ULOC_STRIP_KEYWORDS 0x2
	1534	#define _ULOC_CANONICALIZE 0x1
	1535
	1536	#define OPTION_SET(options, mask) ((options & mask) != 0)
	1537
	1538	static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
	1539	#define I_DEFAULT_LENGTH UPRV_LENGTHOF(i_default)
	1540
	1541	/**
	1542	* Canonicalize the given localeID, to level 1 or to level 2,
	1543	* depending on the options. To specify level 1, pass in options=0.
	1544	* To specify level 2, pass in options=_ULOC_CANONICALIZE.
	1545	*
	1546	* This is the code underlying uloc_getName and uloc_canonicalize.
	1547	*/
	1548	static int32_t
	1549	_canonicalize(const char* localeID,
	1550	char* result,
	1551	int32_t resultCapacity,
	1552	uint32_t options,
	1553	UErrorCode* err) {
	1554	int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity;
	1555	char localeBuffer[ULOC_FULLNAME_CAPACITY];
	1556	char tempBuffer[ULOC_FULLNAME_CAPACITY];
	1557	const char* origLocaleID;
	1558	const char* tmpLocaleID;
	1559	const char* keywordAssign = NULL;
	1560	const char* separatorIndicator = NULL;
	1561	char* name;
	1562	char* variant = NULL; /* pointer into name, or NULL */
	1563
	1564	if (U_FAILURE(*err)) {
	1565	return 0;
	1566	}
	1567
	1568	if (_hasBCP47Extension(localeID)) {
	1569	_ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
	1570	} else {
	1571	if (localeID==NULL) {
	1572	localeID=uloc_getDefault();
	1573	}
	1574	tmpLocaleID=localeID;
	1575	}
	1576
	1577	origLocaleID=tmpLocaleID;
	1578
	1579	/* if we are doing a full canonicalization, then put results in
	1580	localeBuffer, if necessary; otherwise send them to result. */
	1581	if (/OPTION_SET(options, _ULOC_CANONICALIZE) &&/
	1582	(result == NULL \|\| resultCapacity < (int32_t)sizeof(localeBuffer))) {
	1583	name = localeBuffer;
	1584	nameCapacity = (int32_t)sizeof(localeBuffer);
	1585	} else {
	1586	name = result;
	1587	nameCapacity = resultCapacity;
	1588	}
	1589
	1590	/* get all pieces, one after another, and separate with '_' */
	1591	len=ulocimp_getLanguage(tmpLocaleID, name, nameCapacity, &tmpLocaleID);
	1592
	1593	if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) {
	1594	const char *d = uloc_getDefault();
	1595
	1596	len = (int32_t)uprv_strlen(d);
	1597
	1598	if (name != NULL) {
	1599	uprv_memcpy(name, d, len);
	1600	}
	1601	} else if(_isIDSeparator(*tmpLocaleID)) {
	1602	const char *scriptID;
	1603
	1604	++fieldCount;
	1605	if(len<nameCapacity) {
	1606	name[len]='_';
	1607	}
	1608	++len;
	1609
	1610	scriptSize=ulocimp_getScript(tmpLocaleID+1,
	1611	(len<nameCapacity ? name+len : NULL), nameCapacity-len, &scriptID);
	1612	if(scriptSize > 0) {
	1613	/* Found optional script */
	1614	tmpLocaleID = scriptID;
	1615	++fieldCount;
	1616	len+=scriptSize;
	1617	if (_isIDSeparator(*tmpLocaleID)) {
	1618	/* If there is something else, then we add the _ */
	1619	if(len<nameCapacity) {
	1620	name[len]='_';
	1621	}
	1622	++len;
	1623	}
	1624	}
	1625
	1626	if (_isIDSeparator(*tmpLocaleID)) {
	1627	const char *cntryID;
	1628	int32_t cntrySize = ulocimp_getCountry(tmpLocaleID+1,
	1629	(len<nameCapacity ? name+len : NULL), nameCapacity-len, &cntryID);
	1630	if (cntrySize > 0) {
	1631	/* Found optional country */
	1632	tmpLocaleID = cntryID;
	1633	len+=cntrySize;
	1634	}
	1635	if(_isIDSeparator(*tmpLocaleID)) {
	1636	/* If there is something else, then we add the _ if we found country before. */
	1637	if (cntrySize >= 0 && ! _isIDSeparator(*(tmpLocaleID+1)) ) {
	1638	++fieldCount;
	1639	if(len<nameCapacity) {
	1640	name[len]='_';
	1641	}
	1642	++len;
	1643	}
	1644
	1645	variantSize = _getVariant(tmpLocaleID+1, *tmpLocaleID,
	1646	(len<nameCapacity ? name+len : NULL), nameCapacity-len);
	1647	if (variantSize > 0) {
	1648	variant = len<nameCapacity ? name+len : NULL;
	1649	len += variantSize;
	1650	tmpLocaleID += variantSize + 1; /* skip '_' and variant */
	1651	}
	1652	}
	1653	}
	1654	}
	1655
	1656	/* Copy POSIX-style charset specifier, if any [mr.utf8] */
	1657	if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
	1658	UBool done = FALSE;
	1659	do {
	1660	char c = *tmpLocaleID;
	1661	switch (c) {
	1662	case 0:
	1663	case '@':
	1664	done = TRUE;
	1665	break;
	1666	default:
	1667	if (len<nameCapacity) {
	1668	name[len] = c;
	1669	}
	1670	++len;
	1671	++tmpLocaleID;
	1672	break;
	1673	}
	1674	} while (!done);
	1675	}
	1676
	1677	/* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
	1678	After this, tmpLocaleID either points to '@' or is NULL */
	1679	if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {
	1680	keywordAssign = uprv_strchr(tmpLocaleID, '=');
	1681	separatorIndicator = uprv_strchr(tmpLocaleID, ';');
	1682	}
	1683
	1684	/* Copy POSIX-style variant, if any [mr@FOO] */
	1685	if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
	1686	tmpLocaleID != NULL && keywordAssign == NULL) {
	1687	for (;;) {
	1688	char c = *tmpLocaleID;
	1689	if (c == 0) {
	1690	break;
	1691	}
	1692	if (len<nameCapacity) {
	1693	name[len] = c;
	1694	}
	1695	++len;
	1696	++tmpLocaleID;
	1697	}
	1698	}
	1699
	1700	if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
	1701	/* Handle @FOO variant if @ is present and not followed by = */
	1702	if (tmpLocaleID!=NULL && keywordAssign==NULL) {
	1703	int32_t posixVariantSize;
	1704	/* Add missing '_' if needed */
	1705	if (fieldCount < 2 \|\| (fieldCount < 3 && scriptSize > 0)) {
	1706	do {
	1707	if(len<nameCapacity) {
	1708	name[len]='_';
	1709	}
	1710	++len;
	1711	++fieldCount;
	1712	} while(fieldCount<2);
	1713	}
	1714	posixVariantSize = _getVariantEx(tmpLocaleID+1, '@', name+len, nameCapacity-len,
	1715	(UBool)(variantSize > 0));
	1716	if (posixVariantSize > 0) {
	1717	if (variant == NULL) {
	1718	variant = name+len;
	1719	}
	1720	len += posixVariantSize;
	1721	variantSize += posixVariantSize;
	1722	}
	1723	}
	1724
	1725	/* Look up the ID in the canonicalization map */
	1726	for (j=0; j<UPRV_LENGTHOF(CANONICALIZE_MAP); j++) {
	1727	const char* id = CANONICALIZE_MAP[j].id;
	1728	int32_t n = (int32_t)uprv_strlen(id);
	1729	if (len == n && uprv_strncmp(name, id, n) == 0) {
	1730	if (n == 0 && tmpLocaleID != NULL) {
	1731	break; /* Don't remap "" if keywords present */
	1732	}
	1733	len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID);
	1734	break;
	1735	}
	1736	}
	1737	}
	1738
	1739	if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
	1740	if (tmpLocaleID!=NULL && keywordAssign!=NULL &&
	1741	(!separatorIndicator \|\| separatorIndicator > keywordAssign)) {
	1742	if(len<nameCapacity) {
	1743	name[len]='@';
	1744	}
	1745	++len;
	1746	++fieldCount;
	1747	len += _getKeywords(tmpLocaleID+1, '@', (len<nameCapacity ? name+len : NULL), nameCapacity-len,
	1748	NULL, 0, NULL, TRUE, err);
	1749	}
	1750	}
	1751
	1752	if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) {
	1753	uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);
	1754	}
	1755
	1756	return u_terminateChars(result, resultCapacity, len, err);
	1757	}
	1758
	1759	/* ### ID parsing API **************************************************/
	1760
	1761	U_CAPI int32_t U_EXPORT2
	1762	uloc_getParent(const char* localeID,
	1763	char* parent,
	1764	int32_t parentCapacity,
	1765	UErrorCode* err)
	1766	{
	1767	const char *lastUnderscore;
	1768	int32_t i;
	1769
	1770	if (U_FAILURE(*err))
	1771	return 0;
	1772
	1773	if (localeID == NULL)
	1774	localeID = uloc_getDefault();
	1775
	1776	lastUnderscore=uprv_strrchr(localeID, '_');
	1777	if(lastUnderscore!=NULL) {
	1778	i=(int32_t)(lastUnderscore-localeID);
	1779	} else {
	1780	i=0;
	1781	}
	1782
	1783	if(i>0 && parent != localeID) {
	1784	uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
	1785	}
	1786
	1787	return u_terminateChars(parent, parentCapacity, i, err);
	1788	}
	1789
	1790	U_CAPI int32_t U_EXPORT2
	1791	uloc_getLanguage(const char* localeID,
	1792	char* language,
	1793	int32_t languageCapacity,
	1794	UErrorCode* err)
	1795	{
	1796	/* uloc_getLanguage will return a 2 character iso-639 code if one exists. CWB/
	1797	int32_t i=0;
	1798
	1799	if (err==NULL \|\| U_FAILURE(*err)) {
	1800	return 0;
	1801	}
	1802
	1803	if(localeID==NULL) {
	1804	localeID=uloc_getDefault();
	1805	}
	1806
	1807	i=ulocimp_getLanguage(localeID, language, languageCapacity, NULL);
	1808	return u_terminateChars(language, languageCapacity, i, err);
	1809	}
	1810
	1811	U_CAPI int32_t U_EXPORT2
	1812	uloc_getScript(const char* localeID,
	1813	char* script,
	1814	int32_t scriptCapacity,
	1815	UErrorCode* err)
	1816	{
	1817	int32_t i=0;
	1818
	1819	if(err==NULL \|\| U_FAILURE(*err)) {
	1820	return 0;
	1821	}
	1822
	1823	if(localeID==NULL) {
	1824	localeID=uloc_getDefault();
	1825	}
	1826
	1827	/* skip the language */
	1828	ulocimp_getLanguage(localeID, NULL, 0, &localeID);
	1829	if(_isIDSeparator(*localeID)) {
	1830	i=ulocimp_getScript(localeID+1, script, scriptCapacity, NULL);
	1831	}
	1832	return u_terminateChars(script, scriptCapacity, i, err);
	1833	}
	1834
	1835	U_CAPI int32_t U_EXPORT2
	1836	uloc_getCountry(const char* localeID,
	1837	char* country,
	1838	int32_t countryCapacity,
	1839	UErrorCode* err)
	1840	{
	1841	int32_t i=0;
	1842
	1843	if(err==NULL \|\| U_FAILURE(*err)) {
	1844	return 0;
	1845	}
	1846
	1847	if(localeID==NULL) {
	1848	localeID=uloc_getDefault();
	1849	}
	1850
	1851	/* Skip the language */
	1852	ulocimp_getLanguage(localeID, NULL, 0, &localeID);
	1853	if(_isIDSeparator(*localeID)) {
	1854	const char *scriptID;
	1855	/* Skip the script if available */
	1856	ulocimp_getScript(localeID+1, NULL, 0, &scriptID);
	1857	if(scriptID != localeID+1) {
	1858	/* Found optional script */
	1859	localeID = scriptID;
	1860	}
	1861	if(_isIDSeparator(*localeID)) {
	1862	i=ulocimp_getCountry(localeID+1, country, countryCapacity, NULL);
	1863	}
	1864	}
	1865	return u_terminateChars(country, countryCapacity, i, err);
	1866	}
	1867
	1868	U_CAPI int32_t U_EXPORT2
	1869	uloc_getVariant(const char* localeID,
	1870	char* variant,
	1871	int32_t variantCapacity,
	1872	UErrorCode* err)
	1873	{
	1874	char tempBuffer[ULOC_FULLNAME_CAPACITY];
	1875	const char* tmpLocaleID;
	1876	int32_t i=0;
	1877
	1878	if(err==NULL \|\| U_FAILURE(*err)) {
	1879	return 0;
	1880	}
	1881
	1882	if (_hasBCP47Extension(localeID)) {
	1883	_ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
	1884	} else {
	1885	if (localeID==NULL) {
	1886	localeID=uloc_getDefault();
	1887	}
	1888	tmpLocaleID=localeID;
	1889	}
	1890
	1891	/* Skip the language */
	1892	ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
	1893	if(_isIDSeparator(*tmpLocaleID)) {
	1894	const char *scriptID;
	1895	/* Skip the script if available */
	1896	ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
	1897	if(scriptID != tmpLocaleID+1) {
	1898	/* Found optional script */
	1899	tmpLocaleID = scriptID;
	1900	}
	1901	/* Skip the Country */
	1902	if (_isIDSeparator(*tmpLocaleID)) {
	1903	const char *cntryID;
	1904	ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &cntryID);
	1905	if (cntryID != tmpLocaleID+1) {
	1906	/* Found optional country */
	1907	tmpLocaleID = cntryID;
	1908	}
	1909	if(_isIDSeparator(*tmpLocaleID)) {
	1910	/* If there was no country ID, skip a possible extra IDSeparator */
	1911	if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {
	1912	tmpLocaleID++;
	1913	}
	1914	i=_getVariant(tmpLocaleID+1, *tmpLocaleID, variant, variantCapacity);
	1915	}
	1916	}
	1917	}
	1918
	1919	/* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
	1920	/* if we do not have a variant tag yet then try a POSIX variant after '@' */
	1921	/*
	1922	if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
	1923	i=_getVariant(localeID+1, '@', variant, variantCapacity);
	1924	}
	1925	*/
	1926	return u_terminateChars(variant, variantCapacity, i, err);
	1927	}
	1928
	1929	U_CAPI int32_t U_EXPORT2
	1930	uloc_getName(const char* localeID,
	1931	char* name,
	1932	int32_t nameCapacity,
	1933	UErrorCode* err)
	1934	{
	1935	return _canonicalize(localeID, name, nameCapacity, 0, err);
	1936	}
	1937
	1938	U_CAPI int32_t U_EXPORT2
	1939	uloc_getBaseName(const char* localeID,
	1940	char* name,
	1941	int32_t nameCapacity,
	1942	UErrorCode* err)
	1943	{
	1944	return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err);
	1945	}
	1946
	1947	U_CAPI int32_t U_EXPORT2
	1948	uloc_canonicalize(const char* localeID,
	1949	char* name,
	1950	int32_t nameCapacity,
	1951	UErrorCode* err)
	1952	{
	1953	return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err);
	1954	}
	1955
	1956	U_CAPI const char* U_EXPORT2
	1957	uloc_getISO3Language(const char* localeID)
	1958	{
	1959	int16_t offset;
	1960	char lang[ULOC_LANG_CAPACITY];
	1961	UErrorCode err = U_ZERO_ERROR;
	1962
	1963	if (localeID == NULL)
	1964	{
	1965	localeID = uloc_getDefault();
	1966	}
	1967	uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
	1968	if (U_FAILURE(err))
	1969	return "";
	1970	offset = _findIndex(LANGUAGES, lang);
	1971	if (offset < 0)
	1972	return "";
	1973	return LANGUAGES_3[offset];
	1974	}
	1975
	1976	U_CAPI const char* U_EXPORT2
	1977	uloc_getISO3Country(const char* localeID)
	1978	{
	1979	int16_t offset;
	1980	char cntry[ULOC_LANG_CAPACITY];
	1981	UErrorCode err = U_ZERO_ERROR;
	1982
	1983	if (localeID == NULL)
	1984	{
	1985	localeID = uloc_getDefault();
	1986	}
	1987	uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
	1988	if (U_FAILURE(err))
	1989	return "";
	1990	offset = _findIndex(COUNTRIES, cntry);
	1991	if (offset < 0)
	1992	return "";
	1993
	1994	return COUNTRIES_3[offset];
	1995	}
	1996
	1997	U_CAPI uint32_t U_EXPORT2
	1998	uloc_getLCID(const char* localeID)
	1999	{
	2000	UErrorCode status = U_ZERO_ERROR;
	2001	char langID[ULOC_FULLNAME_CAPACITY];
	2002	uint32_t lcid = 0;
	2003
	2004	/* Check for incomplete id. */
	2005	if (!localeID \|\| uprv_strlen(localeID) < 2) {
	2006	return 0;
	2007	}
	2008
	2009	// First, attempt Windows platform lookup if available, but fall
	2010	// through to catch any special cases (ICU vs Windows name differences).
	2011	lcid = uprv_convertToLCIDPlatform(localeID, &status);
	2012	if (U_FAILURE(status)) {
	2013	return 0;
	2014	}
	2015	if (lcid > 0) {
	2016	// Windows found an LCID, return that
	2017	return lcid;
	2018	}
	2019
	2020	uloc_getLanguage(localeID, langID, sizeof(langID), &status);
	2021	if (U_FAILURE(status) \|\| status == U_STRING_NOT_TERMINATED_WARNING) {
	2022	return 0;
	2023	}
	2024
	2025	if (uprv_strchr(localeID, '@')) {
	2026	// uprv_convertToLCID does not support keywords other than collation.
	2027	// Remove all keywords except collation.
	2028	int32_t len;
	2029	char collVal[ULOC_KEYWORDS_CAPACITY];
	2030	char tmpLocaleID[ULOC_FULLNAME_CAPACITY];
	2031
	2032	len = uloc_getKeywordValue(localeID, "collation", collVal,
	2033	UPRV_LENGTHOF(collVal) - 1, &status);
	2034
	2035	if (U_SUCCESS(status) && len > 0) {
	2036	collVal[len] = 0;
	2037
	2038	len = uloc_getBaseName(localeID, tmpLocaleID,
	2039	UPRV_LENGTHOF(tmpLocaleID) - 1, &status);
	2040
	2041	if (U_SUCCESS(status) && len > 0) {
	2042	tmpLocaleID[len] = 0;
	2043
	2044	len = uloc_setKeywordValue("collation", collVal, tmpLocaleID,
	2045	UPRV_LENGTHOF(tmpLocaleID) - len - 1, &status);
	2046
	2047	if (U_SUCCESS(status) && len > 0) {
	2048	tmpLocaleID[len] = 0;
	2049	return uprv_convertToLCID(langID, tmpLocaleID, &status);
	2050	}
	2051	}
	2052	}
	2053
	2054	// fall through - all keywords are simply ignored
	2055	status = U_ZERO_ERROR;
	2056	}
	2057
	2058	return uprv_convertToLCID(langID, localeID, &status);
	2059	}
	2060
	2061	U_CAPI int32_t U_EXPORT2
	2062	uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
	2063	UErrorCode *status)
	2064	{
	2065	return uprv_convertToPosix(hostid, locale, localeCapacity, status);
	2066	}
	2067
	2068	/* ### Default locale **************************************************/
	2069
	2070	U_CAPI const char* U_EXPORT2
	2071	uloc_getDefault()
	2072	{
	2073	return locale_get_default();
	2074	}
	2075
	2076	U_CAPI void U_EXPORT2
	2077	uloc_setDefault(const char* newDefaultLocale,
	2078	UErrorCode* err)
	2079	{
	2080	if (U_FAILURE(*err))
	2081	return;
	2082	/* the error code isn't currently used for anything by this function*/
	2083
	2084	/* propagate change to C++ */
	2085	locale_set_default(newDefaultLocale);
	2086	}
	2087
	2088	/**
	2089	* Returns a list of all 2-letter language codes defined in ISO 639. This is a pointer
	2090	* to an array of pointers to arrays of char. All of these pointers are owned
	2091	* by ICU-- do not delete them, and do not write through them. The array is
	2092	* terminated with a null pointer.
	2093	*/
	2094	U_CAPI const char* const* U_EXPORT2
	2095	uloc_getISOLanguages()
	2096	{
	2097	return LANGUAGES;
	2098	}
	2099
	2100	/**
	2101	* Returns a list of all 2-letter country codes defined in ISO 639. This is a
	2102	* pointer to an array of pointers to arrays of char. All of these pointers are
	2103	* owned by ICU-- do not delete them, and do not write through them. The array is
	2104	* terminated with a null pointer.
	2105	*/
	2106	U_CAPI const char* const* U_EXPORT2
	2107	uloc_getISOCountries()
	2108	{
	2109	return COUNTRIES;
	2110	}
	2111
	2112
	2113	/* this function to be moved into cstring.c later */
	2114	static char gDecimal = 0;
	2115
	2116	static /* U_CAPI */
	2117	double
	2118	/* U_EXPORT2 */
	2119	_uloc_strtod(const char start, char *end) {
	2120	char *decimal;
	2121	char *myEnd;
	2122	char buf[30];
	2123	double rv;
	2124	if (!gDecimal) {
	2125	char rep[5];
	2126	/* For machines that decide to change the decimal on you,
	2127	and try to be too smart with localization.
	2128	This normally should be just a '.'. */
	2129	sprintf(rep, "%+1.1f", 1.0);
	2130	gDecimal = rep[2];
	2131	}
	2132
	2133	if(gDecimal == '.') {
	2134	return uprv_strtod(start, end); /* fall through to OS */
	2135	} else {
	2136	uprv_strncpy(buf, start, 29);
	2137	buf[29]=0;
	2138	decimal = uprv_strchr(buf, '.');
	2139	if(decimal) {
	2140	*decimal = gDecimal;
	2141	} else {
	2142	return uprv_strtod(start, end); /* no decimal point */
	2143	}
	2144	rv = uprv_strtod(buf, &myEnd);
	2145	if(end) {
	2146	end = (char)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */
	2147	}
	2148	return rv;
	2149	}
	2150	}
	2151
	2152	typedef struct {
	2153	float q;
	2154	int32_t dummy; /* to avoid uninitialized memory copy from qsort */
	2155	char locale[ULOC_FULLNAME_CAPACITY+1];
	2156	} _acceptLangItem;
	2157
	2158	static int32_t U_CALLCONV
	2159	uloc_acceptLanguageCompare(const void * /context/, const void a, const void b)
	2160	{
	2161	const _acceptLangItem aa = (const _acceptLangItem)a;
	2162	const _acceptLangItem bb = (const _acceptLangItem)b;
	2163
	2164	int32_t rc = 0;
	2165	if(bb->q < aa->q) {
	2166	rc = -1; /* A > B */
	2167	} else if(bb->q > aa->q) {
	2168	rc = 1; /* A < B */
	2169	} else {
	2170	rc = 0; /* A = B */
	2171	}
	2172
	2173	if(rc==0) {
	2174	rc = uprv_stricmp(aa->locale, bb->locale);
	2175	}
	2176
	2177	#if defined(ULOC_DEBUG)
	2178	/* fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
	2179	aa->locale, aa->q,
	2180	bb->locale, bb->q,
	2181	rc);*/
	2182	#endif
	2183
	2184	return rc;
	2185	}
	2186
	2187	/*
	2188	mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
	2189	*/
	2190
	2191	U_CAPI int32_t U_EXPORT2
	2192	uloc_acceptLanguageFromHTTP(char result, int32_t resultAvailable, UAcceptResult outResult,
	2193	const char *httpAcceptLanguage,
	2194	UEnumeration* availableLocales,
	2195	UErrorCode *status)
	2196	{
	2197	MaybeStackArray<_acceptLangItem, 4> items; // Struct for collecting items.
	2198	char tmp[ULOC_FULLNAME_CAPACITY +1];
	2199	int32_t n = 0;
	2200	const char *itemEnd;
	2201	const char *paramEnd;
	2202	const char *s;
	2203	const char *t;
	2204	int32_t res;
	2205	int32_t i;
	2206	int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);
	2207
	2208	if(U_FAILURE(*status)) {
	2209	return -1;
	2210	}
	2211
	2212	for(s=httpAcceptLanguage;s&&*s;) {
	2213	while(isspace(s)) / eat space at the beginning */
	2214	s++;
	2215	itemEnd=uprv_strchr(s,',');
	2216	paramEnd=uprv_strchr(s,';');
	2217	if(!itemEnd) {
	2218	itemEnd = httpAcceptLanguage+l; /* end of string */
	2219	}
	2220	if(paramEnd && paramEnd<itemEnd) {
	2221	/* semicolon (;) is closer than end (,) */
	2222	t = paramEnd+1;
	2223	if(*t=='q') {
	2224	t++;
	2225	}
	2226	while(isspace(*t)) {
	2227	t++;
	2228	}
	2229	if(*t=='=') {
	2230	t++;
	2231	}
	2232	while(isspace(*t)) {
	2233	t++;
	2234	}
	2235	items[n].q = (float)_uloc_strtod(t,NULL);
	2236	} else {
	2237	/* no semicolon - it's 1.0 */
	2238	items[n].q = 1.0f;
	2239	paramEnd = itemEnd;
	2240	}
	2241	items[n].dummy=0;
	2242	/* eat spaces prior to semi */
	2243	for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)
	2244	;
	2245	int32_t slen = static_cast<int32_t>(((t+1)-s));
	2246	if(slen > ULOC_FULLNAME_CAPACITY) {
	2247	*status = U_BUFFER_OVERFLOW_ERROR;
	2248	return -1; // too big
	2249	}
	2250	uprv_strncpy(items[n].locale, s, slen);
	2251	items[n].locale[slen]=0; // terminate
	2252	int32_t clen = uloc_canonicalize(items[n].locale, tmp, UPRV_LENGTHOF(tmp)-1, status);
	2253	if(U_FAILURE(*status)) return -1;
	2254	if((clen!=slen) \|\| (uprv_strncmp(items[n].locale, tmp, slen))) {
	2255	// canonicalization had an effect- copy back
	2256	uprv_strncpy(items[n].locale, tmp, clen);
	2257	items[n].locale[clen] = 0; // terminate
	2258	}
	2259	#if defined(ULOC_DEBUG)
	2260	/fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);/
	2261	#endif
	2262	n++;
	2263	s = itemEnd;
	2264	while(s==',') { / eat duplicate commas */
	2265	s++;
	2266	}
	2267	if(n>=items.getCapacity()) { // If we need more items
	2268	if(NULL == items.resize(items.getCapacity()*2, items.getCapacity())) {
	2269	*status = U_MEMORY_ALLOCATION_ERROR;
	2270	return -1;
	2271	}
	2272	#if defined(ULOC_DEBUG)
	2273	fprintf(stderr,"malloced at size %d\n", items.getCapacity());
	2274	#endif
	2275	}
	2276	}
	2277	uprv_sortArray(items.getAlias(), n, sizeof(items[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
	2278	if (U_FAILURE(*status)) {
	2279	return -1;
	2280	}
	2281	LocalMemory<const char*> strs(NULL);
	2282	if (strs.allocateInsteadAndReset(n) == NULL) {
	2283	*status = U_MEMORY_ALLOCATION_ERROR;
	2284	return -1;
	2285	}
	2286	for(i=0;i<n;i++) {
	2287	#if defined(ULOC_DEBUG)
	2288	/fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);/
	2289	#endif
	2290	strs[i]=items[i].locale;
	2291	}
	2292	res = uloc_acceptLanguage(result, resultAvailable, outResult,
	2293	strs.getAlias(), n, availableLocales, status);
	2294	return res;
	2295	}
	2296
	2297
	2298	U_CAPI int32_t U_EXPORT2
	2299	uloc_acceptLanguage(char *result, int32_t resultAvailable,
	2300	UAcceptResult outResult, const char *acceptList,
	2301	int32_t acceptListCount,
	2302	UEnumeration* availableLocales,
	2303	UErrorCode *status)
	2304	{
	2305	int32_t i,j;
	2306	int32_t len;
	2307	int32_t maxLen=0;
	2308	char tmp[ULOC_FULLNAME_CAPACITY+1];
	2309	const char *l;
	2310	char **fallbackList;
	2311	if(U_FAILURE(*status)) {
	2312	return -1;
	2313	}
	2314	fallbackList = static_cast<char *>(uprv_malloc((size_t)(sizeof(fallbackList[0])acceptListCount)));
	2315	if(fallbackList==NULL) {
	2316	*status = U_MEMORY_ALLOCATION_ERROR;
	2317	return -1;
	2318	}
	2319	for(i=0;i<acceptListCount;i++) {
	2320	#if defined(ULOC_DEBUG)
	2321	fprintf(stderr,"%02d: %s\n", i, acceptList[i]);
	2322	#endif
	2323	while((l=uenum_next(availableLocales, NULL, status)) != NULL) {
	2324	#if defined(ULOC_DEBUG)
	2325	fprintf(stderr," %s\n", l);
	2326	#endif
	2327	len = (int32_t)uprv_strlen(l);
	2328	if(!uprv_strcmp(acceptList[i], l)) {
	2329	if(outResult) {
	2330	*outResult = ULOC_ACCEPT_VALID;
	2331	}
	2332	#if defined(ULOC_DEBUG)
	2333	fprintf(stderr, "MATCH! %s\n", l);
	2334	#endif
	2335	if(len>0) {
	2336	uprv_strncpy(result, l, uprv_min(len, resultAvailable));
	2337	}
	2338	for(j=0;j<i;j++) {
	2339	uprv_free(fallbackList[j]);
	2340	}
	2341	uprv_free(fallbackList);
	2342	return u_terminateChars(result, resultAvailable, len, status);
	2343	}
	2344	if(len>maxLen) {
	2345	maxLen = len;
	2346	}
	2347	}
	2348	uenum_reset(availableLocales, status);
	2349	/* save off parent info */
	2350	if(uloc_getParent(acceptList[i], tmp, UPRV_LENGTHOF(tmp), status)!=0) {
	2351	fallbackList[i] = uprv_strdup(tmp);
	2352	} else {
	2353	fallbackList[i]=0;
	2354	}
	2355	}
	2356
	2357	for(maxLen--;maxLen>0;maxLen--) {
	2358	for(i=0;i<acceptListCount;i++) {
	2359	if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) {
	2360	#if defined(ULOC_DEBUG)
	2361	fprintf(stderr,"Try: [%s]", fallbackList[i]);
	2362	#endif
	2363	while((l=uenum_next(availableLocales, NULL, status)) != NULL) {
	2364	#if defined(ULOC_DEBUG)
	2365	fprintf(stderr," %s\n", l);
	2366	#endif
	2367	len = (int32_t)uprv_strlen(l);
	2368	if(!uprv_strcmp(fallbackList[i], l)) {
	2369	if(outResult) {
	2370	*outResult = ULOC_ACCEPT_FALLBACK;
	2371	}
	2372	#if defined(ULOC_DEBUG)
	2373	fprintf(stderr, "fallback MATCH! %s\n", l);
	2374	#endif
	2375	if(len>0) {
	2376	uprv_strncpy(result, l, uprv_min(len, resultAvailable));
	2377	}
	2378	for(j=0;j<acceptListCount;j++) {
	2379	uprv_free(fallbackList[j]);
	2380	}
	2381	uprv_free(fallbackList);
	2382	return u_terminateChars(result, resultAvailable, len, status);
	2383	}
	2384	}
	2385	uenum_reset(availableLocales, status);
	2386
	2387	if(uloc_getParent(fallbackList[i], tmp, UPRV_LENGTHOF(tmp), status)!=0) {
	2388	uprv_free(fallbackList[i]);
	2389	fallbackList[i] = uprv_strdup(tmp);
	2390	} else {
	2391	uprv_free(fallbackList[i]);
	2392	fallbackList[i]=0;
	2393	}
	2394	}
	2395	}
	2396	if(outResult) {
	2397	*outResult = ULOC_ACCEPT_FAILED;
	2398	}
	2399	}
	2400	for(i=0;i<acceptListCount;i++) {
	2401	uprv_free(fallbackList[i]);
	2402	}
	2403	uprv_free(fallbackList);
	2404	return -1;
	2405	}
	2406
	2407	U_CAPI const char* U_EXPORT2
	2408	uloc_toUnicodeLocaleKey(const char* keyword)
	2409	{
	2410	const char* bcpKey = ulocimp_toBcpKey(keyword);
	2411	if (bcpKey == NULL && ultag_isUnicodeLocaleKey(keyword, -1)) {
	2412	// unknown keyword, but syntax is fine..
	2413	return keyword;
	2414	}
	2415	return bcpKey;
	2416	}
	2417
	2418	U_CAPI const char* U_EXPORT2
	2419	uloc_toUnicodeLocaleType(const char* keyword, const char* value)
	2420	{
	2421	const char* bcpType = ulocimp_toBcpType(keyword, value, NULL, NULL);
	2422	if (bcpType == NULL && ultag_isUnicodeLocaleType(value, -1)) {
	2423	// unknown keyword, but syntax is fine..
	2424	return value;
	2425	}
	2426	return bcpType;
	2427	}
	2428
	2429	static UBool
	2430	isWellFormedLegacyKey(const char* legacyKey)
	2431	{
	2432	const char* p = legacyKey;
	2433	while (*p) {
	2434	if (!UPRV_ISALPHANUM(*p)) {
	2435	return FALSE;
	2436	}
	2437	p++;
	2438	}
	2439	return TRUE;
	2440	}
	2441
	2442	static UBool
	2443	isWellFormedLegacyType(const char* legacyType)
	2444	{
	2445	const char* p = legacyType;
	2446	int32_t alphaNumLen = 0;
	2447	while (*p) {
	2448	if (p == '_' \|\| p == '/' \|\| *p == '-') {
	2449	if (alphaNumLen == 0) {
	2450	return FALSE;
	2451	}
	2452	alphaNumLen = 0;
	2453	} else if (UPRV_ISALPHANUM(*p)) {
	2454	alphaNumLen++;
	2455	} else {
	2456	return FALSE;
	2457	}
	2458	p++;
	2459	}
	2460	return (alphaNumLen != 0);
	2461	}
	2462
	2463	U_CAPI const char* U_EXPORT2
	2464	uloc_toLegacyKey(const char* keyword)
	2465	{
	2466	const char* legacyKey = ulocimp_toLegacyKey(keyword);
	2467	if (legacyKey == NULL) {
	2468	// Checks if the specified locale key is well-formed with the legacy locale syntax.
	2469	//
	2470	// Note:
	2471	// LDML/CLDR provides some definition of keyword syntax in
	2472	// * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
	2473	// * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
	2474	// Keys can only consist of [0-9a-zA-Z].
	2475	if (isWellFormedLegacyKey(keyword)) {
	2476	return keyword;
	2477	}
	2478	}
	2479	return legacyKey;
	2480	}
	2481
	2482	U_CAPI const char* U_EXPORT2
	2483	uloc_toLegacyType(const char* keyword, const char* value)
	2484	{
	2485	const char* legacyType = ulocimp_toLegacyType(keyword, value, NULL, NULL);
	2486	if (legacyType == NULL) {
	2487	// Checks if the specified locale type is well-formed with the legacy locale syntax.
	2488	//
	2489	// Note:
	2490	// LDML/CLDR provides some definition of keyword syntax in
	2491	// * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
	2492	// * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
	2493	// Values (types) can only consist of [0-9a-zA-Z], plus for legacy values
	2494	// we allow [/_-+] in the middle (e.g. "Etc/GMT+1", "Asia/Tel_Aviv")
	2495	if (isWellFormedLegacyType(value)) {
	2496	return value;
	2497	}
	2498	}
	2499	return legacyType;
	2500	}
	2501
	2502	/eof/