git.saurik.com Git - apple/icu.git/blame - icuSources/common/uloc.cpp

Commit	Line	Data
f3c0d7a5 A	1	// © 2016 and later: Unicode, Inc. and others.
f3c0d7a5 A	2	// License & terms of use: http://www.unicode.org/copyright.html
b75a7d8f A	3	/*
b75a7d8f A	4	**********************************************************************
2ca993e8	5	* Copyright (C) 1997-2016, International Business Machines
b75a7d8f A	6	* Corporation and others. All Rights Reserved.
	7	**********************************************************************
	8	*
	9	* File ULOC.CPP
	10	*
	11	* Modification History:
	12	*
	13	* Date Name Description
	14	* 04/01/97 aliu Creation.
	15	* 08/21/98 stephen JDK 1.2 sync
	16	* 12/08/98 rtg New Locale implementation and C API
	17	* 03/15/99 damiba overhaul.
	18	* 04/06/99 stephen changed setDefault() to realloc and copy
	19	* 06/14/99 stephen Changed calls to ures_open for new params
	20	* 07/21/99 stephen Modified setDefault() to propagate to C++
374ca955 A	21	* 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs,
374ca955 A	22	* brought canonicalization code into line with spec
b75a7d8f A	23	*****************************************************************************/
	24
	25	/*
	26	POSIX's locale format, from putil.c: [no spaces]
	27
	28	ll [ _CC ] [ . MM ] [ @ VV]
	29
	30	l = lang, C = ctry, M = charmap, V = variant
	31	*/
	32
b75a7d8f A	33	#include "unicode/utypes.h"
	34	#include "unicode/ustring.h"
	35	#include "unicode/uloc.h"
	36
374ca955	37	#include "putilimp.h"
b75a7d8f	38	#include "ustr_imp.h"
374ca955	39	#include "ulocimp.h"
b75a7d8f A	40	#include "umutex.h"
	41	#include "cstring.h"
	42	#include "cmemory.h"
374ca955 A	43	#include "locmap.h"
	44	#include "uarrsort.h"
	45	#include "uenumimp.h"
	46	#include "uassert.h"
f3c0d7a5	47	#include "charstr.h"
b75a7d8f	48
374ca955 A	49	#include <stdio.h> /* for sprintf */
374ca955 A	50
f3c0d7a5 A	51	U_NAMESPACE_USE
f3c0d7a5 A	52
374ca955	53	/* ### Declarations **************************************************/
b75a7d8f A	54
	55	/* Locale stuff from locid.cpp */
	56	U_CFUNC void locale_set_default(const char *id);
	57	U_CFUNC const char *locale_get_default(void);
374ca955 A	58	U_CFUNC int32_t
	59	locale_getKeywords(const char *localeID,
	60	char prev,
	61	char *keywords, int32_t keywordCapacity,
	62	char values, int32_t valuesCapacity, int32_t valLen,
	63	UBool valuesToo,
	64	UErrorCode *status);
	65
374ca955 A	66	/* ### Data tables **************************************************/
	67
	68	/**
	69	* Table of language codes, both 2- and 3-letter, with preference
	70	* given to 2-letter codes where possible. Includes 3-letter codes
	71	* that lack a 2-letter equivalent.
	72	*
	73	* This list must be in sorted order. This list is returned directly
	74	* to the user by some API.
	75	*
	76	* This list must be kept in sync with LANGUAGES_3, with corresponding
	77	* entries matched.
	78	*
	79	* This table should be terminated with a NULL entry, followed by a
	80	* second list, and another NULL entry. The first list is visible to
	81	* user code when this array is returned by API. The second list
	82	* contains codes we support, but do not expose through user API.
	83	*
	84	* Notes
	85	*
	86	* Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
	87	* include the revisions up to 2001/7/27 CWB
	88	*
	89	* The 3 character codes are the terminology codes like RFC 3066. This
	90	* is compatible with prior ICU codes
	91	*
	92	* "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
	93	* table but now at the end of the table because 3 character codes are
	94	* duplicates. This avoids bad searches going from 3 to 2 character
	95	* codes.
	96	*
	97	* The range qaa-qtz is reserved for local use
	98	*/
51004dcb	99	/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
2ca993e8	100	/* ISO639 table version is 20150505 */
0f5d89e8	101	/* Subsequent hand addition of selected languages */
374ca955	102	static const char * const LANGUAGES[] = {
f3c0d7a5 A	103	"aa", "ab", "ace", "ach", "ada", "ady", "ae", "aeb",
	104	"af", "afh", "agq", "ain", "ak", "akk", "akz", "ale",
	105	"aln", "alt", "am", "an", "ang", "anp", "ar", "arc",
	106	"arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "as",
	107	"asa", "ase", "ast", "av", "avk", "awa", "ay", "az",
	108	"ba", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
	109	"be", "bej", "bem", "bew", "bez", "bfd", "bfq", "bg",
	110	"bgn", "bho", "bi", "bik", "bin", "bjn", "bkm", "bla",
	111	"bm", "bn", "bo", "bpy", "bqi", "br", "bra", "brh",
	112	"brx", "bs", "bss", "bua", "bug", "bum", "byn", "byv",
0f5d89e8	113	"ca", "cad", "car", "cay", "cch", "ccp", "ce", "ceb", "cgg",
f3c0d7a5 A	114	"ch", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
	115	"chr", "chy", "ckb", "co", "cop", "cps", "cr", "crh",
	116	"cs", "csb", "cu", "cv", "cy",
	117	"da", "dak", "dar", "dav", "de", "del", "den", "dgr",
	118	"din", "dje", "doi", "dsb", "dtp", "dua", "dum", "dv",
	119	"dyo", "dyu", "dz", "dzg",
	120	"ebu", "ee", "efi", "egl", "egy", "eka", "el", "elx",
	121	"en", "enm", "eo", "es", "esu", "et", "eu", "ewo",
	122	"ext",
	123	"fa", "fan", "fat", "ff", "fi", "fil", "fit", "fj",
	124	"fo", "fon", "fr", "frc", "frm", "fro", "frp", "frr",
	125	"frs", "fur", "fy",
	126	"ga", "gaa", "gag", "gan", "gay", "gba", "gbz", "gd",
	127	"gez", "gil", "gl", "glk", "gmh", "gn", "goh", "gom",
	128	"gon", "gor", "got", "grb", "grc", "gsw", "gu", "guc",
	129	"gur", "guz", "gv", "gwi",
	130	"ha", "hai", "hak", "haw", "he", "hi", "hif", "hil",
	131	"hit", "hmn", "ho", "hr", "hsb", "hsn", "ht", "hu",
	132	"hup", "hy", "hz",
	133	"ia", "iba", "ibb", "id", "ie", "ig", "ii", "ik",
	134	"ilo", "inh", "io", "is", "it", "iu", "izh",
	135	"ja", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
	136	"jv",
	137	"ka", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
	138	"kbl", "kcg", "kde", "kea", "ken", "kfo", "kg", "kgp",
	139	"kha", "kho", "khq", "khw", "ki", "kiu", "kj", "kk",
	140	"kkj", "kl", "kln", "km", "kmb", "kn", "ko", "koi",
	141	"kok", "kos", "kpe", "kr", "krc", "kri", "krj", "krl",
	142	"kru", "ks", "ksb", "ksf", "ksh", "ku", "kum", "kut",
	143	"kv", "kw", "ky",
	144	"la", "lad", "lag", "lah", "lam", "lb", "lez", "lfn",
	145	"lg", "li", "lij", "liv", "lkt", "lmo", "ln", "lo",
	146	"lol", "loz", "lrc", "lt", "ltg", "lu", "lua", "lui",
	147	"lun", "luo", "lus", "luy", "lv", "lzh", "lzz",
	148	"mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
	149	"mdf", "mdh", "mdr", "men", "mer", "mfe", "mg", "mga",
	150	"mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk",
340931cb A	151	"ml", "mn", "mnc", "mni", "mo",
340931cb A	152	"moh", "mos", "mr", "mrj",
f3c0d7a5 A	153	"ms", "mt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
	154	"my", "mye", "myv", "mzn",
	155	"na", "nan", "nap", "naq", "nb", "nd", "nds", "ne",
	156	"new", "ng", "nia", "niu", "njo", "nl", "nmg", "nn",
	157	"nnh", "no", "nog", "non", "nov", "nqo", "nr", "nso",
	158	"nus", "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi",
	159	"oc", "oj", "om", "or", "os", "osa", "ota",
	160	"pa", "pag", "pal", "pam", "pap", "pau", "pcd", "pdc",
	161	"pdt", "peo", "pfl", "phn", "pi", "pl", "pms", "pnt",
	162	"pon", "prg", "pro", "ps", "pt",
	163	"qu", "quc", "qug",
	164	"raj", "rap", "rar", "rgn", "rif", "rm", "rn", "ro",
	165	"rof", "rom", "rtm", "ru", "rue", "rug", "rup",
	166	"rw", "rwk",
	167	"sa", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
	168	"sba", "sbp", "sc", "scn", "sco", "sd", "sdc", "sdh",
	169	"se", "see", "seh", "sei", "sel", "ses", "sg", "sga",
	170	"sgs", "shi", "shn", "shu", "si", "sid", "sk",
	171	"sl", "sli", "sly", "sm", "sma", "smj", "smn", "sms",
	172	"sn", "snk", "so", "sog", "sq", "sr", "srn", "srr",
	173	"ss", "ssy", "st", "stq", "su", "suk", "sus", "sux",
	174	"sv", "sw", "swb", "swc", "syc", "syr", "szl",
	175	"ta", "tcy", "te", "tem", "teo", "ter", "tet", "tg",
	176	"th", "ti", "tig", "tiv", "tk", "tkl", "tkr", "tl",
	177	"tlh", "tli", "tly", "tmh", "tn", "to", "tog", "tpi",
	178	"tr", "tru", "trv", "ts", "tsd", "tsi", "tt", "ttt",
	179	"tum", "tvl", "tw", "twq", "ty", "tyv", "tzm",
	180	"udm", "ug", "uga", "uk", "umb", "und", "ur", "uz",
	181	"vai", "ve", "vec", "vep", "vi", "vls", "vmf", "vo",
	182	"vot", "vro", "vun",
	183	"wa", "wae", "wal", "war", "was", "wbp", "wo", "wuu",
	184	"xal", "xh", "xmf", "xog",
	185	"yao", "yap", "yav", "ybb", "yi", "yo", "yrl", "yue",
	186	"za", "zap", "zbl", "zea", "zen", "zgh", "zh", "zu",
	187	"zun", "zxx", "zza",
b75a7d8f A	188	NULL,
	189	"in", "iw", "ji", "jw", "sh", /* obsolete language codes */
	190	NULL
	191	};
51004dcb	192
73c04bcf A	193	static const char* const DEPRECATED_LANGUAGES[]={
	194	"in", "iw", "ji", "jw", NULL, NULL
	195	};
	196	static const char* const REPLACEMENT_LANGUAGES[]={
	197	"id", "he", "yi", "jv", NULL, NULL
	198	};
b75a7d8f	199
374ca955 A	200	/**
	201	* Table of 3-letter language codes.
	202	*
	203	* This is a lookup table used to convert 3-letter language codes to
	204	* their 2-letter equivalent, where possible. It must be kept in sync
	205	* with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the
	206	* same language as LANGUAGES_3[i]. The commented-out lines are
	207	* copied from LANGUAGES to make eyeballing this baby easier.
	208	*
	209	* Where a 3-letter language code has no 2-letter equivalent, the
	210	* 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
	211	*
	212	* This table should be terminated with a NULL entry, followed by a
	213	* second list, and another NULL entry. The two lists correspond to
	214	* the two lists in LANGUAGES.
	215	*/
51004dcb	216	/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
2ca993e8	217	/* ISO639 table version is 20150505 */
0f5d89e8	218	/* Subsequent hand addition of selected languages */
374ca955	219	static const char * const LANGUAGES_3[] = {
f3c0d7a5 A	220	"aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb",
	221	"afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale",
	222	"aln", "alt", "amh", "arg", "ang", "anp", "ara", "arc",
	223	"arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "asm",
	224	"asa", "ase", "ast", "ava", "avk", "awa", "aym", "aze",
	225	"bak", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
	226	"bel", "bej", "bem", "bew", "bez", "bfd", "bfq", "bul",
	227	"bgn", "bho", "bis", "bik", "bin", "bjn", "bkm", "bla",
	228	"bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh",
	229	"brx", "bos", "bss", "bua", "bug", "bum", "byn", "byv",
0f5d89e8	230	"cat", "cad", "car", "cay", "cch", "ccp", "che", "ceb", "cgg",
f3c0d7a5 A	231	"cha", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
	232	"chr", "chy", "ckb", "cos", "cop", "cps", "cre", "crh",
	233	"ces", "csb", "chu", "chv", "cym",
	234	"dan", "dak", "dar", "dav", "deu", "del", "den", "dgr",
	235	"din", "dje", "doi", "dsb", "dtp", "dua", "dum", "div",
	236	"dyo", "dyu", "dzo", "dzg",
	237	"ebu", "ewe", "efi", "egl", "egy", "eka", "ell", "elx",
	238	"eng", "enm", "epo", "spa", "esu", "est", "eus", "ewo",
	239	"ext",
	240	"fas", "fan", "fat", "ful", "fin", "fil", "fit", "fij",
	241	"fao", "fon", "fra", "frc", "frm", "fro", "frp", "frr",
	242	"frs", "fur", "fry",
	243	"gle", "gaa", "gag", "gan", "gay", "gba", "gbz", "gla",
	244	"gez", "gil", "glg", "glk", "gmh", "grn", "goh", "gom",
	245	"gon", "gor", "got", "grb", "grc", "gsw", "guj", "guc",
	246	"gur", "guz", "glv", "gwi",
	247	"hau", "hai", "hak", "haw", "heb", "hin", "hif", "hil",
	248	"hit", "hmn", "hmo", "hrv", "hsb", "hsn", "hat", "hun",
	249	"hup", "hye", "her",
	250	"ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ipk",
	251	"ilo", "inh", "ido", "isl", "ita", "iku", "izh",
	252	"jpn", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
	253	"jav",
	254	"kat", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
	255	"kbl", "kcg", "kde", "kea", "ken", "kfo", "kon", "kgp",
	256	"kha", "kho", "khq", "khw", "kik", "kiu", "kua", "kaz",
	257	"kkj", "kal", "kln", "khm", "kmb", "kan", "kor", "koi",
	258	"kok", "kos", "kpe", "kau", "krc", "kri", "krj", "krl",
	259	"kru", "kas", "ksb", "ksf", "ksh", "kur", "kum", "kut",
	260	"kom", "cor", "kir",
	261	"lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lfn",
	262	"lug", "lim", "lij", "liv", "lkt", "lmo", "lin", "lao",
	263	"lol", "loz", "lrc", "lit", "ltg", "lub", "lua", "lui",
	264	"lun", "luo", "lus", "luy", "lav", "lzh", "lzz",
	265	"mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
	266	"mdf", "mdh", "mdr", "men", "mer", "mfe", "mlg", "mga",
	267	"mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
340931cb A	268	"mal", "mon", "mnc", "mni", "mol",
340931cb A	269	"moh", "mos", "mar", "mrj",
f3c0d7a5 A	270	"msa", "mlt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
	271	"mya", "mye", "myv", "mzn",
	272	"nau", "nan", "nap", "naq", "nob", "nde", "nds", "nep",
	273	"new", "ndo", "nia", "niu", "njo", "nld", "nmg", "nno",
	274	"nnh", "nor", "nog", "non", "nov", "nqo", "nbl", "nso",
	275	"nus", "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi",
	276	"oci", "oji", "orm", "ori", "oss", "osa", "ota",
	277	"pan", "pag", "pal", "pam", "pap", "pau", "pcd", "pdc",
	278	"pdt", "peo", "pfl", "phn", "pli", "pol", "pms", "pnt",
	279	"pon", "prg", "pro", "pus", "por",
	280	"que", "quc", "qug",
	281	"raj", "rap", "rar", "rgn", "rif", "roh", "run", "ron",
	282	"rof", "rom", "rtm", "rus", "rue", "rug", "rup",
	283	"kin", "rwk",
	284	"san", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
	285	"sba", "sbp", "srd", "scn", "sco", "snd", "sdc", "sdh",
	286	"sme", "see", "seh", "sei", "sel", "ses", "sag", "sga",
	287	"sgs", "shi", "shn", "shu", "sin", "sid", "slk",
	288	"slv", "sli", "sly", "smo", "sma", "smj", "smn", "sms",
	289	"sna", "snk", "som", "sog", "sqi", "srp", "srn", "srr",
	290	"ssw", "ssy", "sot", "stq", "sun", "suk", "sus", "sux",
	291	"swe", "swa", "swb", "swc", "syc", "syr", "szl",
	292	"tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk",
	293	"tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr", "tgl",
	294	"tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tpi",
	295	"tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt",
	296	"tum", "tvl", "twi", "twq", "tah", "tyv", "tzm",
	297	"udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
	298	"vai", "ven", "vec", "vep", "vie", "vls", "vmf", "vol",
	299	"vot", "vro", "vun",
	300	"wln", "wae", "wal", "war", "was", "wbp", "wol", "wuu",
	301	"xal", "xho", "xmf", "xog",
	302	"yao", "yap", "yav", "ybb", "yid", "yor", "yrl", "yue",
	303	"zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul",
	304	"zun", "zxx", "zza",
b75a7d8f A	305	NULL,
	306	/* "in", "iw", "ji", "jw", "sh", */
	307	"ind", "heb", "yid", "jaw", "srp",
	308	NULL
	309	};
	310
374ca955 A	311	/**
	312	* Table of 2-letter country codes.
	313	*
	314	* This list must be in sorted order. This list is returned directly
	315	* to the user by some API.
	316	*
	317	* This list must be kept in sync with COUNTRIES_3, with corresponding
	318	* entries matched.
	319	*
	320	* This table should be terminated with a NULL entry, followed by a
	321	* second list, and another NULL entry. The first list is visible to
	322	* user code when this array is returned by API. The second list
	323	* contains codes we support, but do not expose through user API.
	324	*
	325	* Notes:
	326	*
	327	* ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
	328	* http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
	329	* new codes keeping the old ones for compatibility updated to include
	330	* 1999/12/03 revisions CWB
	331	*
	332	* RO(ROM) is now RO(ROU) according to
	333	* http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
	334	*/
	335	static const char * const COUNTRIES[] = {
2ca993e8	336	"AC", "AD", "AE", "AF", "AG", "AI", "AL", "AM",
73c04bcf	337	"AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",
b75a7d8f	338	"BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",
51004dcb	339	"BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV",
b75a7d8f	340	"BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
2ca993e8 A	341	"CH", "CI", "CK", "CL", "CM", "CN", "CO", "CP", "CR",
	342	"CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DG", "DJ", "DK",
	343	"DM", "DO", "DZ", "EA", "EC", "EE", "EG", "EH", "ER",
b75a7d8f	344	"ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
73c04bcf	345	"GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
b75a7d8f A	346	"GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
b75a7d8f A	347	"GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
2ca993e8	348	"IC", "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
73c04bcf	349	"IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
b75a7d8f A	350	"KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
b75a7d8f A	351	"LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
46f4442e	352	"LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK",
b75a7d8f A	353	"ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",
	354	"MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
	355	"NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",
	356	"NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",
	357	"PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
46f4442e	358	"PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA",
b75a7d8f	359	"SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",
51004dcb	360	"SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV",
2ca993e8	361	"SX", "SY", "SZ", "TA", "TC", "TD", "TF", "TG", "TH", "TJ",
b75a7d8f A	362	"TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
	363	"TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
	364	"VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
2ca993e8	365	"WS", "XK", "YE", "YT", "ZA", "ZM", "ZW",
b75a7d8f	366	NULL,
51004dcb	367	"AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR", /* obsolete country codes */
b75a7d8f A	368	NULL
	369	};
	370
51004dcb A	371	static const char* const DEPRECATED_COUNTRIES[] = {
51004dcb A	372	"AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */
73c04bcf A	373	};
73c04bcf A	374	static const char* const REPLACEMENT_COUNTRIES[] = {
51004dcb	375	/* "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
f3c0d7a5	376	"CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL /* replacement country codes */
73c04bcf	377	};
f3c0d7a5	378
374ca955 A	379	/**
	380	* Table of 3-letter country codes.
	381	*
	382	* This is a lookup table used to convert 3-letter country codes to
	383	* their 2-letter equivalent. It must be kept in sync with COUNTRIES.
	384	* For all valid i, COUNTRIES[i] must refer to the same country as
	385	* COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES
	386	* to make eyeballing this baby easier.
	387	*
	388	* This table should be terminated with a NULL entry, followed by a
	389	* second list, and another NULL entry. The two lists correspond to
	390	* the two lists in COUNTRIES.
	391	*/
	392	static const char * const COUNTRIES_3[] = {
2ca993e8 A	393	/* "AC", "AD", "AE", "AF", "AG", "AI", "AL", "AM", */
2ca993e8 A	394	"ASC", "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
73c04bcf A	395	/* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */
73c04bcf A	396	"AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
b75a7d8f A	397	/* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */
b75a7d8f A	398	"BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
51004dcb A	399	/* "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", */
51004dcb A	400	"BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",
b75a7d8f A	401	/* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */
b75a7d8f A	402	"BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
2ca993e8 A	403	/* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CP", "CR", */
	404	"CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CPT", "CRI",
	405	/* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DG", "DJ", "DK", */
	406	"CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DGA", "DJI", "DNK",
	407	/* "DM", "DO", "DZ", "EA", "EC", "EE", "EG", "EH", "ER", */
	408	"DMA", "DOM", "DZA", "EA ", "ECU", "EST", "EGY", "ESH", "ERI", /* no valid 3-letter code for EA */
b75a7d8f A	409	/* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
b75a7d8f A	410	"ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
46f4442e	411	/* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
73c04bcf	412	"GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
b75a7d8f A	413	/* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */
	414	"GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
	415	/* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
	416	"GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
2ca993e8 A	417	/* "IC", "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
2ca993e8 A	418	"IC ", "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL", /* no valid 3-letter code for IC */
46f4442e	419	/* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
73c04bcf	420	"ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
b75a7d8f A	421	/* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
	422	"COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
	423	/* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */
	424	"LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
46f4442e A	425	/* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */
46f4442e A	426	"LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
b75a7d8f A	427	/* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */
	428	"MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
	429	/* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */
	430	"MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
	431	/* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */
	432	"NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
	433	/* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */
	434	"NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
	435	/* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */
	436	"PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
46f4442e A	437	/* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */
46f4442e A	438	"PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
b75a7d8f A	439	/* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */
b75a7d8f A	440	"SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
51004dcb A	441	/* "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", */
51004dcb A	442	"SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",
2ca993e8 A	443	/* "SX", "SY", "SZ", "TA", "TC", "TD", "TF", "TG", "TH", "TJ", */
2ca993e8 A	444	"SXM", "SYR", "SWZ", "TAA", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
b75a7d8f A	445	/* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */
	446	"TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
	447	/* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */
	448	"TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
	449	/* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
	450	"VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
2ca993e8 A	451	/* "WS", "XK", "YE", "YT", "ZA", "ZM", "ZW", */
2ca993e8 A	452	"WSM", "XKK", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
b75a7d8f	453	NULL,
51004dcb A	454	/* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */
51004dcb A	455	"ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
b75a7d8f A	456	NULL
	457	};
	458
374ca955 A	459	typedef struct CanonicalizationMap {
	460	const char id; / input ID */
	461	const char canonicalID; / canonicalized output ID */
374ca955 A	462	} CanonicalizationMap;
	463
	464	/**
	465	* A map to canonicalize locale IDs. This handles a variety of
	466	* different semantic kinds of transformations.
	467	*/
	468	static const CanonicalizationMap CANONICALIZE_MAP[] = {
3d1f044b A	469	{ "", "en_US_POSIX" }, /* .NET name */ // open ICU 64 deleted, we restore
	470	{ "c", "en_US_POSIX" }, /* POSIX name */ // open ICU 64 deleted, we restore
	471	{ "posix", "en_US_POSIX" }, /* POSIX name (alias of C) */ // open ICU 64 deleted, we restore
	472	{ "art_LOJBAN", "jbo" }, /* registered name */
	473	{ "hy__AREVELA", "hy" }, /* Registered IANA variant */
	474	{ "hy__AREVMDA", "hyw" }, /* Registered IANA variant */
	475	{ "zh_GAN", "gan" }, /* registered name */
	476	{ "zh_GUOYU", "zh" }, /* registered name */
	477	{ "zh_HAKKA", "hak" }, /* registered name */
	478	{ "zh_MIN_NAN", "nan" }, /* registered name */
	479	{ "zh_WUU", "wuu" }, /* registered name */
	480	{ "zh_XIANG", "hsn" }, /* registered name */
	481	{ "zh_YUE", "yue" }, /* registered name */
374ca955 A	482	};
374ca955 A	483
729e4ab9 A	484	/* ### BCP47 Conversion *******************************************/
	485	/* Test if the locale id has BCP47 u extension and does not have '@' */
	486	#define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
	487	/* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
340931cb A	488	#define _ConvertBCP47(finalID, id, buffer, length,err) UPRV_BLOCK_MACRO_BEGIN { \
	489	if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 \|\| \
	490	U_FAILURE(err) \|\| err == U_STRING_NOT_TERMINATED_WARNING) { \
	491	finalID=id; \
	492	if (err == U_STRING_NOT_TERMINATED_WARNING) { err = U_BUFFER_OVERFLOW_ERROR; } \
	493	} else { \
	494	finalID=buffer; \
	495	} \
	496	} UPRV_BLOCK_MACRO_END
729e4ab9 A	497	/* Gets the size of the shortest subtag in the given localeID. */
729e4ab9 A	498	static int32_t getShortestSubtagLength(const char *localeID) {
0f5d89e8	499	int32_t localeIDLength = static_cast<int32_t>(uprv_strlen(localeID));
729e4ab9 A	500	int32_t length = localeIDLength;
	501	int32_t tmpLength = 0;
	502	int32_t i;
	503	UBool reset = TRUE;
	504
	505	for (i = 0; i < localeIDLength; i++) {
	506	if (localeID[i] != '_' && localeID[i] != '-') {
	507	if (reset) {
	508	tmpLength = 0;
	509	reset = FALSE;
	510	}
	511	tmpLength++;
	512	} else {
	513	if (tmpLength != 0 && tmpLength < length) {
	514	length = tmpLength;
	515	}
	516	reset = TRUE;
	517	}
	518	}
	519
	520	return length;
	521	}
	522
374ca955	523	/* ### Keywords **************************************************/
f3c0d7a5 A	524	#define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
	525	#define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) \|\| UPRV_ISDIGIT(c) )
	526	/* Punctuation/symbols allowed in legacy key values */
	527	#define UPRV_OK_VALUE_PUNCTUATION(c) ((c) == '_' \|\| (c) == '-' \|\| (c) == '+' \|\| (c) == '/')
374ca955 A	528
	529	#define ULOC_KEYWORD_BUFFER_LEN 25
	530	#define ULOC_MAX_NO_KEYWORDS 25
	531
729e4ab9	532	U_CAPI const char * U_EXPORT2
374ca955	533	locale_getKeywordsStart(const char *localeID) {
374ca955	534	const char *result = NULL;
374ca955 A	535	if((result = uprv_strchr(localeID, '@')) != NULL) {
374ca955 A	536	return result;
73c04bcf A	537	}
	538	#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
	539	else {
	540	/* We do this because the @ sign is variant, and the @ sign used on one
	541	EBCDIC machine won't be compiled the same way on other EBCDIC based
	542	machines. */
	543	static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
374ca955 A	544	const uint8_t *charToFind = ebcdicSigns;
	545	while(*charToFind) {
	546	if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
	547	return result;
	548	}
	549	charToFind++;
	550	}
	551	}
73c04bcf	552	#endif
374ca955 A	553	return NULL;
	554	}
	555
	556	/**
	557	* @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
	558	* @param keywordName incoming name to be canonicalized
	559	* @param status return status (keyword too long)
	560	* @return length of the keyword name
	561	*/
	562	static int32_t locale_canonKeywordName(char buf, const char keywordName, UErrorCode *status)
	563	{
f3c0d7a5 A	564	int32_t keywordNameLen = 0;
	565
	566	for (; *keywordName != 0; keywordName++) {
	567	if (!UPRV_ISALPHANUM(*keywordName)) {
	568	status = U_ILLEGAL_ARGUMENT_ERROR; / malformed keyword name */
	569	return 0;
	570	}
	571	if (keywordNameLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
	572	buf[keywordNameLen++] = uprv_tolower(*keywordName);
	573	} else {
	574	/* keyword name too long for internal buffer */
	575	*status = U_INTERNAL_PROGRAM_ERROR;
	576	return 0;
	577	}
374ca955	578	}
f3c0d7a5 A	579	if (keywordNameLen == 0) {
	580	status = U_ILLEGAL_ARGUMENT_ERROR; / empty keyword name */
	581	return 0;
374ca955	582	}
f3c0d7a5 A	583	buf[keywordNameLen] = 0; /* terminate */
f3c0d7a5 A	584
374ca955 A	585	return keywordNameLen;
	586	}
	587
	588	typedef struct {
	589	char keyword[ULOC_KEYWORD_BUFFER_LEN];
	590	int32_t keywordLen;
	591	const char *valueStart;
	592	int32_t valueLen;
	593	} KeywordStruct;
	594
	595	static int32_t U_CALLCONV
4388f060	596	compareKeywordStructs(const void * /context/, const void left, const void right) {
374ca955 A	597	const char* leftString = ((const KeywordStruct *)left)->keyword;
	598	const char* rightString = ((const KeywordStruct *)right)->keyword;
	599	return uprv_strcmp(leftString, rightString);
	600	}
	601
374ca955 A	602	static int32_t
	603	_getKeywords(const char *localeID,
	604	char prev,
	605	char *keywords, int32_t keywordCapacity,
	606	char values, int32_t valuesCapacity, int32_t valLen,
	607	UBool valuesToo,
374ca955 A	608	UErrorCode *status)
	609	{
	610	KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
f3c0d7a5	611
374ca955 A	612	int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
	613	int32_t numKeywords = 0;
	614	const char* pos = localeID;
	615	const char* equalSign = NULL;
	616	const char* semicolon = NULL;
	617	int32_t i = 0, j, n;
	618	int32_t keywordsLen = 0;
	619	int32_t valuesLen = 0;
	620
	621	if(prev == '@') { /* start of keyword definition */
	622	/* we will grab pairs, trim spaces, lowercase keywords, sort and return */
	623	do {
	624	UBool duplicate = FALSE;
	625	/* skip leading spaces */
	626	while(*pos == ' ') {
	627	pos++;
	628	}
	629	if (!pos) { / handle trailing "; " */
	630	break;
	631	}
	632	if(numKeywords == maxKeywords) {
	633	*status = U_INTERNAL_PROGRAM_ERROR;
	634	return 0;
	635	}
	636	equalSign = uprv_strchr(pos, '=');
	637	semicolon = uprv_strchr(pos, ';');
	638	/* lack of '=' [foo@currency] is illegal */
	639	/* ';' before '=' [foo@currency;collation=pinyin] is illegal */
	640	if(!equalSign \|\| (semicolon && semicolon<equalSign)) {
	641	*status = U_INVALID_FORMAT_ERROR;
	642	return 0;
	643	}
	644	/* need to normalize both keyword and keyword name */
	645	if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
	646	/* keyword name too long for internal buffer */
	647	*status = U_INTERNAL_PROGRAM_ERROR;
	648	return 0;
	649	}
	650	for(i = 0, n = 0; i < equalSign - pos; ++i) {
	651	if (pos[i] != ' ') {
	652	keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
	653	}
	654	}
57a6839d A	655
	656	/* zero-length keyword is an error. */
	657	if (n == 0) {
	658	*status = U_INVALID_FORMAT_ERROR;
	659	return 0;
	660	}
	661
374ca955 A	662	keywordList[numKeywords].keyword[n] = 0;
	663	keywordList[numKeywords].keywordLen = n;
	664	/* now grab the value part. First we skip the '=' */
	665	equalSign++;
	666	/* then we leading spaces */
	667	while(*equalSign == ' ') {
	668	equalSign++;
	669	}
57a6839d A	670
57a6839d A	671	/* Premature end or zero-length value */
2ca993e8	672	if (!*equalSign \|\| equalSign == semicolon) {
57a6839d A	673	*status = U_INVALID_FORMAT_ERROR;
	674	return 0;
	675	}
	676
374ca955	677	keywordList[numKeywords].valueStart = equalSign;
57a6839d	678
374ca955 A	679	pos = semicolon;
	680	i = 0;
	681	if(pos) {
	682	while(*(pos - i - 1) == ' ') {
	683	i++;
	684	}
73c04bcf	685	keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
374ca955 A	686	pos++;
374ca955 A	687	} else {
73c04bcf	688	i = (int32_t)uprv_strlen(equalSign);
4388f060	689	while(i && equalSign[i-1] == ' ') {
374ca955 A	690	i--;
	691	}
	692	keywordList[numKeywords].valueLen = i;
	693	}
	694	/* If this is a duplicate keyword, then ignore it */
	695	for (j=0; j<numKeywords; ++j) {
	696	if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
	697	duplicate = TRUE;
	698	break;
	699	}
	700	}
	701	if (!duplicate) {
	702	++numKeywords;
	703	}
	704	} while(pos);
	705
374ca955 A	706	/* now we have a list of keywords */
	707	/* we need to sort it */
	708	uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
f3c0d7a5	709
374ca955 A	710	/* Now construct the keyword part */
	711	for(i = 0; i < numKeywords; i++) {
	712	if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) {
	713	uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword);
	714	if(valuesToo) {
	715	keywords[keywordsLen + keywordList[i].keywordLen] = '=';
	716	} else {
	717	keywords[keywordsLen + keywordList[i].keywordLen] = 0;
	718	}
	719	}
	720	keywordsLen += keywordList[i].keywordLen + 1;
	721	if(valuesToo) {
3d1f044b	722	if(keywordsLen + keywordList[i].valueLen <= keywordCapacity) {
374ca955 A	723	uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
	724	}
	725	keywordsLen += keywordList[i].valueLen;
f3c0d7a5	726
374ca955	727	if(i < numKeywords - 1) {
f3c0d7a5	728	if(keywordsLen < keywordCapacity) {
374ca955 A	729	keywords[keywordsLen] = ';';
	730	}
	731	keywordsLen++;
	732	}
	733	}
	734	if(values) {
	735	if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) {
	736	uprv_strcpy(values+valuesLen, keywordList[i].valueStart);
	737	values[valuesLen + keywordList[i].valueLen] = 0;
	738	}
	739	valuesLen += keywordList[i].valueLen + 1;
	740	}
	741	}
	742	if(values) {
	743	values[valuesLen] = 0;
	744	if(valLen) {
	745	*valLen = valuesLen;
	746	}
	747	}
f3c0d7a5	748	return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);
374ca955 A	749	} else {
	750	return 0;
	751	}
	752	}
	753
	754	U_CFUNC int32_t
	755	locale_getKeywords(const char *localeID,
	756	char prev,
	757	char *keywords, int32_t keywordCapacity,
	758	char values, int32_t valuesCapacity, int32_t valLen,
	759	UBool valuesToo,
	760	UErrorCode *status) {
	761	return _getKeywords(localeID, prev, keywords, keywordCapacity,
	762	values, valuesCapacity, valLen, valuesToo,
3d1f044b	763	status);
374ca955 A	764	}
	765
	766	U_CAPI int32_t U_EXPORT2
	767	uloc_getKeywordValue(const char* localeID,
	768	const char* keywordName,
	769	char* buffer, int32_t bufferCapacity,
	770	UErrorCode* status)
f3c0d7a5	771	{
340931cb A	772	if (buffer != nullptr) {
	773	buffer[0] = '\0';
	774	}
729e4ab9	775	const char* startSearchHere = NULL;
374ca955	776	const char* nextSeparator = NULL;
374ca955 A	777	char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
374ca955 A	778	char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
374ca955 A	779	int32_t result = 0;
	780
	781	if(status && U_SUCCESS(*status) && localeID) {
729e4ab9 A	782	char tempBuffer[ULOC_FULLNAME_CAPACITY];
	783	const char* tmpLocaleID;
	784
f3c0d7a5 A	785	if (keywordName == NULL \|\| keywordName[0] == 0) {
	786	*status = U_ILLEGAL_ARGUMENT_ERROR;
	787	return 0;
	788	}
	789
	790	locale_canonKeywordName(keywordNameBuffer, keywordName, status);
	791	if(U_FAILURE(*status)) {
	792	return 0;
	793	}
	794
729e4ab9 A	795	if (_hasBCP47Extension(localeID)) {
	796	_ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
	797	} else {
	798	tmpLocaleID=localeID;
	799	}
f3c0d7a5 A	800
f3c0d7a5 A	801	startSearchHere = locale_getKeywordsStart(tmpLocaleID);
374ca955 A	802	if(startSearchHere == NULL) {
	803	/* no keywords, return at once */
	804	return 0;
	805	}
	806
374ca955 A	807	/* find the first keyword */
374ca955 A	808	while(startSearchHere) {
f3c0d7a5 A	809	const char* keyValueTail;
	810	int32_t keyValueLen;
	811
	812	startSearchHere++; /* skip @ or ; */
	813	nextSeparator = uprv_strchr(startSearchHere, '=');
	814	if(!nextSeparator) {
	815	status = U_ILLEGAL_ARGUMENT_ERROR; / key must have =value */
	816	return 0;
	817	}
	818	/* strip leading & trailing spaces (TC decided to tolerate these) */
374ca955 A	819	while(*startSearchHere == ' ') {
	820	startSearchHere++;
	821	}
f3c0d7a5 A	822	keyValueTail = nextSeparator;
	823	while (keyValueTail > startSearchHere && *(keyValueTail-1) == ' ') {
	824	keyValueTail--;
	825	}
	826	/* now keyValueTail points to first char after the keyName */
	827	/* copy & normalize keyName from locale */
	828	if (startSearchHere == keyValueTail) {
	829	status = U_ILLEGAL_ARGUMENT_ERROR; / empty keyword name in passed-in locale */
	830	return 0;
374ca955	831	}
f3c0d7a5 A	832	keyValueLen = 0;
	833	while (startSearchHere < keyValueTail) {
	834	if (!UPRV_ISALPHANUM(*startSearchHere)) {
	835	status = U_ILLEGAL_ARGUMENT_ERROR; / malformed keyword name */
	836	return 0;
	837	}
	838	if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
	839	localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*startSearchHere++);
	840	} else {
374ca955 A	841	/* keyword name too long for internal buffer */
	842	*status = U_INTERNAL_PROGRAM_ERROR;
	843	return 0;
f3c0d7a5	844	}
374ca955	845	}
f3c0d7a5 A	846	localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
f3c0d7a5 A	847
374ca955	848	startSearchHere = uprv_strchr(nextSeparator, ';');
f3c0d7a5	849
374ca955	850	if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
f3c0d7a5 A	851	/* current entry matches the keyword. */
	852	nextSeparator++; /* skip '=' */
	853	/* First strip leading & trailing spaces (TC decided to tolerate these) */
374ca955	854	while(*nextSeparator == ' ') {
f3c0d7a5	855	nextSeparator++;
374ca955	856	}
f3c0d7a5 A	857	keyValueTail = (startSearchHere)? startSearchHere: nextSeparator + uprv_strlen(nextSeparator);
	858	while(keyValueTail > nextSeparator && *(keyValueTail-1) == ' ') {
	859	keyValueTail--;
	860	}
	861	/* Now copy the value, but check well-formedness */
	862	if (nextSeparator == keyValueTail) {
	863	status = U_ILLEGAL_ARGUMENT_ERROR; / empty key value name in passed-in locale */
	864	return 0;
374ca955	865	}
f3c0d7a5 A	866	keyValueLen = 0;
	867	while (nextSeparator < keyValueTail) {
	868	if (!UPRV_ISALPHANUM(nextSeparator) && !UPRV_OK_VALUE_PUNCTUATION(nextSeparator)) {
	869	status = U_ILLEGAL_ARGUMENT_ERROR; / malformed key value */
	870	return 0;
	871	}
	872	if (keyValueLen < bufferCapacity) {
	873	/* Should we lowercase value to return here? Tests expect as-is. */
	874	buffer[keyValueLen++] = *nextSeparator++;
	875	} else { /* keep advancing so we return correct length in case of overflow */
	876	keyValueLen++;
	877	nextSeparator++;
	878	}
	879	}
	880	result = u_terminateChars(buffer, bufferCapacity, keyValueLen, status);
374ca955 A	881	return result;
	882	}
	883	}
	884	}
	885	return 0;
	886	}
	887
	888	U_CAPI int32_t U_EXPORT2
	889	uloc_setKeywordValue(const char* keywordName,
	890	const char* keywordValue,
	891	char* buffer, int32_t bufferCapacity,
	892	UErrorCode* status)
	893	{
	894	/* TODO: sorting. removal. */
	895	int32_t keywordNameLen;
	896	int32_t keywordValueLen;
	897	int32_t bufLen;
	898	int32_t needLen = 0;
374ca955	899	char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
f3c0d7a5	900	char keywordValueBuffer[ULOC_KEYWORDS_CAPACITY+1];
374ca955	901	char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
374ca955 A	902	int32_t rc;
	903	char* nextSeparator = NULL;
	904	char* nextEqualsign = NULL;
	905	char* startSearchHere = NULL;
	906	char* keywordStart = NULL;
f3c0d7a5 A	907	CharString updatedKeysAndValues;
	908	int32_t updatedKeysAndValuesLen;
	909	UBool handledInputKeyAndValue = FALSE;
	910	char keyValuePrefix = '@';
	911
	912	if(U_FAILURE(*status)) {
	913	return -1;
374ca955	914	}
f3c0d7a5	915	if (keywordName == NULL \|\| keywordName[0] == 0 \|\| bufferCapacity <= 1) {
73c04bcf A	916	*status = U_ILLEGAL_ARGUMENT_ERROR;
	917	return 0;
	918	}
f3c0d7a5	919	bufLen = (int32_t)uprv_strlen(buffer);
73c04bcf A	920	if(bufferCapacity<bufLen) {
	921	/* The capacity is less than the length?! Is this NULL terminated? */
	922	*status = U_ILLEGAL_ARGUMENT_ERROR;
	923	return 0;
	924	}
374ca955 A	925	keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
	926	if(U_FAILURE(*status)) {
	927	return 0;
	928	}
f3c0d7a5 A	929
	930	keywordValueLen = 0;
	931	if(keywordValue) {
	932	while (*keywordValue != 0) {
	933	if (!UPRV_ISALPHANUM(keywordValue) && !UPRV_OK_VALUE_PUNCTUATION(keywordValue)) {
	934	status = U_ILLEGAL_ARGUMENT_ERROR; / malformed key value */
	935	return 0;
	936	}
	937	if (keywordValueLen < ULOC_KEYWORDS_CAPACITY) {
	938	/* Should we force lowercase in value to set? */
	939	keywordValueBuffer[keywordValueLen++] = *keywordValue++;
	940	} else {
	941	/* keywordValue too long for internal buffer */
	942	*status = U_INTERNAL_PROGRAM_ERROR;
	943	return 0;
	944	}
	945	}
	946	}
	947	keywordValueBuffer[keywordValueLen] = 0; /* terminate */
	948
374ca955	949	startSearchHere = (char*)locale_getKeywordsStart(buffer);
374ca955	950	if(startSearchHere == NULL \|\| (startSearchHere[1]==0)) {
f3c0d7a5 A	951	if(keywordValueLen == 0) { /* no keywords = nothing to remove */
f3c0d7a5 A	952	return bufLen;
374ca955 A	953	}
	954
	955	needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
f3c0d7a5	956	if(startSearchHere) { /* had a single @ */
374ca955 A	957	needLen--; /* already had the @ */
	958	/* startSearchHere points at the @ */
	959	} else {
	960	startSearchHere=buffer+bufLen;
	961	}
	962	if(needLen >= bufferCapacity) {
	963	*status = U_BUFFER_OVERFLOW_ERROR;
	964	return needLen; /* no change */
	965	}
f3c0d7a5	966	*startSearchHere++ = '@';
374ca955 A	967	uprv_strcpy(startSearchHere, keywordNameBuffer);
374ca955 A	968	startSearchHere += keywordNameLen;
f3c0d7a5 A	969	*startSearchHere++ = '=';
f3c0d7a5 A	970	uprv_strcpy(startSearchHere, keywordValueBuffer);
374ca955 A	971	return needLen;
374ca955 A	972	} /* end shortcut - no @ */
f3c0d7a5	973
374ca955 A	974	keywordStart = startSearchHere;
	975	/* search for keyword */
	976	while(keywordStart) {
f3c0d7a5 A	977	const char* keyValueTail;
	978	int32_t keyValueLen;
	979
	980	keywordStart++; /* skip @ or ; */
	981	nextEqualsign = uprv_strchr(keywordStart, '=');
	982	if (!nextEqualsign) {
	983	status = U_ILLEGAL_ARGUMENT_ERROR; / key must have =value */
	984	return 0;
	985	}
	986	/* strip leading & trailing spaces (TC decided to tolerate these) */
374ca955 A	987	while(*keywordStart == ' ') {
	988	keywordStart++;
	989	}
f3c0d7a5 A	990	keyValueTail = nextEqualsign;
	991	while (keyValueTail > keywordStart && *(keyValueTail-1) == ' ') {
	992	keyValueTail--;
374ca955	993	}
f3c0d7a5 A	994	/* now keyValueTail points to first char after the keyName */
	995	/* copy & normalize keyName from locale */
	996	if (keywordStart == keyValueTail) {
	997	status = U_ILLEGAL_ARGUMENT_ERROR; / empty keyword name in passed-in locale */
374ca955 A	998	return 0;
374ca955 A	999	}
f3c0d7a5 A	1000	keyValueLen = 0;
	1001	while (keywordStart < keyValueTail) {
	1002	if (!UPRV_ISALPHANUM(*keywordStart)) {
	1003	status = U_ILLEGAL_ARGUMENT_ERROR; / malformed keyword name */
	1004	return 0;
	1005	}
	1006	if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
	1007	localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*keywordStart++);
	1008	} else {
	1009	/* keyword name too long for internal buffer */
	1010	*status = U_INTERNAL_PROGRAM_ERROR;
	1011	return 0;
	1012	}
374ca955	1013	}
f3c0d7a5	1014	localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
374ca955 A	1015
374ca955 A	1016	nextSeparator = uprv_strchr(nextEqualsign, ';');
f3c0d7a5 A	1017
	1018	/* start processing the value part */
	1019	nextEqualsign++; /* skip '=' */
	1020	/* First strip leading & trailing spaces (TC decided to tolerate these) */
	1021	while(*nextEqualsign == ' ') {
	1022	nextEqualsign++;
	1023	}
	1024	keyValueTail = (nextSeparator)? nextSeparator: nextEqualsign + uprv_strlen(nextEqualsign);
	1025	while(keyValueTail > nextEqualsign && *(keyValueTail-1) == ' ') {
	1026	keyValueTail--;
	1027	}
	1028	if (nextEqualsign == keyValueTail) {
	1029	status = U_ILLEGAL_ARGUMENT_ERROR; / empty key value in passed-in locale */
	1030	return 0;
	1031	}
	1032
374ca955 A	1033	rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
374ca955 A	1034	if(rc == 0) {
f3c0d7a5 A	1035	/* Current entry matches the input keyword. Update the entry */
	1036	if(keywordValueLen > 0) { /* updating a value */
	1037	updatedKeysAndValues.append(keyValuePrefix, *status);
	1038	keyValuePrefix = ';'; /* for any subsequent key-value pair */
	1039	updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
	1040	updatedKeysAndValues.append('=', *status);
	1041	updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
	1042	} /* else removing this entry, don't emit anything */
	1043	handledInputKeyAndValue = TRUE;
	1044	} else {
	1045	/* input keyword sorts earlier than current entry, add before current entry */
	1046	if (rc < 0 && keywordValueLen > 0 && !handledInputKeyAndValue) {
	1047	/* insert new entry at this location */
	1048	updatedKeysAndValues.append(keyValuePrefix, *status);
	1049	keyValuePrefix = ';'; /* for any subsequent key-value pair */
	1050	updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
	1051	updatedKeysAndValues.append('=', *status);
	1052	updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
	1053	handledInputKeyAndValue = TRUE;
374ca955	1054	}
f3c0d7a5 A	1055	/* copy the current entry */
	1056	updatedKeysAndValues.append(keyValuePrefix, *status);
	1057	keyValuePrefix = ';'; /* for any subsequent key-value pair */
	1058	updatedKeysAndValues.append(localeKeywordNameBuffer, keyValueLen, *status);
	1059	updatedKeysAndValues.append('=', *status);
3d1f044b	1060	updatedKeysAndValues.append(nextEqualsign, static_cast<int32_t>(keyValueTail-nextEqualsign), *status);
f3c0d7a5 A	1061	}
	1062	if (!nextSeparator && keywordValueLen > 0 && !handledInputKeyAndValue) {
	1063	/* append new entry at the end, it sorts later than existing entries */
	1064	updatedKeysAndValues.append(keyValuePrefix, *status);
	1065	/* skip keyValuePrefix update, no subsequent key-value pair */
	1066	updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
	1067	updatedKeysAndValues.append('=', *status);
	1068	updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
	1069	handledInputKeyAndValue = TRUE;
374ca955 A	1070	}
	1071	keywordStart = nextSeparator;
	1072	} /* end loop searching */
374ca955	1073
f3c0d7a5 A	1074	/* Any error from updatedKeysAndValues.append above would be internal and not due to
	1075	* problems with the passed-in locale. So if we did encounter problems with the
	1076	* passed-in locale above, those errors took precedence and overrode any error
	1077	* status from updatedKeysAndValues.append, and also caused a return of 0. If there
	1078	* are errors here they are from updatedKeysAndValues.append; they do cause an
	1079	* error return but the passed-in locale is unmodified and the original bufLen is
	1080	* returned.
	1081	*/
	1082	if (!handledInputKeyAndValue \|\| U_FAILURE(*status)) {
	1083	/* if input key/value specified removal of a keyword not present in locale, or
	1084	* there was an error in CharString.append, leave original locale alone. */
	1085	return bufLen;
	1086	}
	1087
	1088	updatedKeysAndValuesLen = updatedKeysAndValues.length();
	1089	/* needLen = length of the part before '@' + length of updated key-value part including '@' */
	1090	needLen = (int32_t)(startSearchHere - buffer) + updatedKeysAndValuesLen;
374ca955 A	1091	if(needLen >= bufferCapacity) {
	1092	*status = U_BUFFER_OVERFLOW_ERROR;
	1093	return needLen; /* no change */
	1094	}
f3c0d7a5 A	1095	if (updatedKeysAndValuesLen > 0) {
f3c0d7a5 A	1096	uprv_strncpy(startSearchHere, updatedKeysAndValues.data(), updatedKeysAndValuesLen);
374ca955 A	1097	}
	1098	buffer[needLen]=0;
	1099	return needLen;
	1100	}
b75a7d8f	1101
374ca955	1102	/* ### ID parsing implementation **************************************************/
b75a7d8f	1103
b75a7d8f	1104	#define _isPrefixLetter(a) ((a=='x')\|\|(a=='X')\|\|(a=='i')\|\|(a=='I'))
374ca955	1105
b75a7d8f A	1106	/*returns TRUE if one of the special prefixes is here (s=string)
	1107	'x-' or 'i-' */
	1108	#define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
	1109
	1110	/* Dot terminates it because of POSIX form where dot precedes the codepage
	1111	* except for variant
	1112	*/
	1113	#define _isTerminator(a) ((a==0)\|\|(a=='.')\|\|(a=='@'))
	1114
374ca955 A	1115	/**
	1116	* Lookup 'key' in the array 'list'. The array 'list' should contain
	1117	* a NULL entry, followed by more entries, and a second NULL entry.
	1118	*
	1119	* The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
	1120	* COUNTRIES_3.
	1121	*/
b75a7d8f A	1122	static int16_t _findIndex(const char* const* list, const char* key)
	1123	{
	1124	const char* const* anchor = list;
374ca955 A	1125	int32_t pass = 0;
	1126
	1127	/* Make two passes through two NULL-terminated arrays at 'list' */
	1128	while (pass++ < 2) {
	1129	while (*list) {
	1130	if (uprv_strcmp(key, *list) == 0) {
	1131	return (int16_t)(list - anchor);
	1132	}
	1133	list++;
b75a7d8f	1134	}
374ca955	1135	++list; /* skip final NULL CWB/
b75a7d8f A	1136	}
	1137	return -1;
	1138	}
	1139
	1140	/* count the length of src while copying it to dest; return strlen(src) */
4388f060	1141	static inline int32_t
b75a7d8f A	1142	_copyCount(char dest, int32_t destCapacity, const char src) {
	1143	const char *anchor;
	1144	char c;
	1145
	1146	anchor=src;
	1147	for(;;) {
	1148	if((c=*src)==0) {
	1149	return (int32_t)(src-anchor);
	1150	}
	1151	if(destCapacity<=0) {
	1152	return (int32_t)((src-anchor)+uprv_strlen(src));
	1153	}
	1154	++src;
	1155	*dest++=c;
	1156	--destCapacity;
	1157	}
	1158	}
	1159
f3c0d7a5	1160	U_CFUNC const char*
73c04bcf A	1161	uloc_getCurrentCountryID(const char* oldID){
	1162	int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
	1163	if (offset >= 0) {
	1164	return REPLACEMENT_COUNTRIES[offset];
	1165	}
	1166	return oldID;
	1167	}
f3c0d7a5	1168	U_CFUNC const char*
73c04bcf A	1169	uloc_getCurrentLanguageID(const char* oldID){
	1170	int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
	1171	if (offset >= 0) {
	1172	return REPLACEMENT_LANGUAGES[offset];
	1173	}
f3c0d7a5	1174	return oldID;
73c04bcf	1175	}
b75a7d8f A	1176	/*
	1177	* the internal functions _getLanguage(), _getCountry(), _getVariant()
	1178	* avoid duplicating code to handle the earlier locale ID pieces
	1179	* in the functions for the later ones by
	1180	* setting the *pEnd pointer to where they stopped parsing
	1181	*
	1182	* TODO try to use this in Locale
	1183	*/
729e4ab9 A	1184	U_CFUNC int32_t
	1185	ulocimp_getLanguage(const char *localeID,
	1186	char *language, int32_t languageCapacity,
	1187	const char **pEnd) {
b75a7d8f A	1188	int32_t i=0;
	1189	int32_t offset;
	1190	char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
	1191
	1192	/* if it starts with i- or x- then copy that prefix */
	1193	if(_isIDPrefix(localeID)) {
	1194	if(i<languageCapacity) {
	1195	language[i]=(char)uprv_tolower(*localeID);
	1196	}
	1197	if(i<languageCapacity) {
	1198	language[i+1]='-';
	1199	}
	1200	i+=2;
	1201	localeID+=2;
	1202	}
f3c0d7a5	1203
b75a7d8f A	1204	/* copy the language as far as possible and count its length */
	1205	while(!_isTerminator(localeID) && !_isIDSeparator(localeID)) {
	1206	if(i<languageCapacity) {
	1207	language[i]=(char)uprv_tolower(*localeID);
	1208	}
	1209	if(i<3) {
4388f060	1210	U_ASSERT(i>=0);
b75a7d8f A	1211	lang[i]=(char)uprv_tolower(*localeID);
	1212	}
	1213	i++;
	1214	localeID++;
	1215	}
	1216
	1217	if(i==3) {
	1218	/* convert 3 character code to 2 character code if possible CWB/
374ca955	1219	offset=_findIndex(LANGUAGES_3, lang);
b75a7d8f	1220	if(offset>=0) {
374ca955	1221	i=_copyCount(language, languageCapacity, LANGUAGES[offset]);
b75a7d8f A	1222	}
	1223	}
	1224
	1225	if(pEnd!=NULL) {
	1226	*pEnd=localeID;
	1227	}
	1228	return i;
	1229	}
	1230
729e4ab9 A	1231	U_CFUNC int32_t
	1232	ulocimp_getScript(const char *localeID,
	1233	char *script, int32_t scriptCapacity,
	1234	const char **pEnd)
b75a7d8f	1235	{
374ca955	1236	int32_t idLen = 0;
b75a7d8f	1237
374ca955 A	1238	if (pEnd != NULL) {
374ca955 A	1239	*pEnd = localeID;
b75a7d8f	1240	}
374ca955 A	1241
374ca955 A	1242	/* copy the second item as far as possible and count its length */
4388f060 A	1243	while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])
4388f060 A	1244	&& uprv_isASCIILetter(localeID[idLen])) {
374ca955	1245	idLen++;
b75a7d8f A	1246	}
b75a7d8f A	1247
374ca955 A	1248	/* If it's exactly 4 characters long, then it's a script and not a country. */
	1249	if (idLen == 4) {
	1250	int32_t i;
	1251	if (pEnd != NULL) {
	1252	*pEnd = localeID+idLen;
	1253	}
	1254	if(idLen > scriptCapacity) {
	1255	idLen = scriptCapacity;
	1256	}
	1257	if (idLen >= 1) {
	1258	script[0]=(char)uprv_toupper(*(localeID++));
	1259	}
	1260	for (i = 1; i < idLen; i++) {
	1261	script[i]=(char)uprv_tolower(*(localeID++));
	1262	}
	1263	}
	1264	else {
	1265	idLen = 0;
	1266	}
	1267	return idLen;
b75a7d8f A	1268	}
b75a7d8f A	1269
729e4ab9 A	1270	U_CFUNC int32_t
	1271	ulocimp_getCountry(const char *localeID,
	1272	char *country, int32_t countryCapacity,
	1273	const char **pEnd)
374ca955	1274	{
729e4ab9	1275	int32_t idLen=0;
374ca955	1276	char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 };
b75a7d8f A	1277	int32_t offset;
	1278
	1279	/* copy the country as far as possible and count its length */
729e4ab9 A	1280	while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
	1281	if(idLen<(ULOC_COUNTRY_CAPACITY-1)) { /CWB/
	1282	cnty[idLen]=(char)uprv_toupper(localeID[idLen]);
b75a7d8f	1283	}
729e4ab9	1284	idLen++;
b75a7d8f A	1285	}
b75a7d8f A	1286
729e4ab9 A	1287	/* the country should be either length 2 or 3 */
	1288	if (idLen == 2 \|\| idLen == 3) {
	1289	UBool gotCountry = FALSE;
	1290	/* convert 3 character code to 2 character code if possible CWB/
	1291	if(idLen==3) {
	1292	offset=_findIndex(COUNTRIES_3, cnty);
	1293	if(offset>=0) {
	1294	idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]);
	1295	gotCountry = TRUE;
	1296	}
	1297	}
	1298	if (!gotCountry) {
	1299	int32_t i = 0;
	1300	for (i = 0; i < idLen; i++) {
	1301	if (i < countryCapacity) {
	1302	country[i]=(char)uprv_toupper(localeID[i]);
	1303	}
	1304	}
b75a7d8f	1305	}
729e4ab9 A	1306	localeID+=idLen;
	1307	} else {
	1308	idLen = 0;
b75a7d8f A	1309	}
	1310
	1311	if(pEnd!=NULL) {
	1312	*pEnd=localeID;
	1313	}
729e4ab9 A	1314
729e4ab9 A	1315	return idLen;
b75a7d8f A	1316	}
b75a7d8f A	1317
374ca955 A	1318	/**
	1319	* @param needSeparator if true, then add leading '_' if any variants
	1320	* are added to 'variant'
	1321	*/
	1322	static int32_t
	1323	_getVariantEx(const char *localeID,
	1324	char prev,
	1325	char *variant, int32_t variantCapacity,
	1326	UBool needSeparator) {
b75a7d8f A	1327	int32_t i=0;
	1328
	1329	/* get one or more variant tags and separate them with '_' */
	1330	if(_isIDSeparator(prev)) {
	1331	/* get a variant string after a '-' or '_' */
	1332	while(!_isTerminator(*localeID)) {
374ca955 A	1333	if (needSeparator) {
	1334	if (i<variantCapacity) {
	1335	variant[i] = '_';
	1336	}
	1337	++i;
	1338	needSeparator = FALSE;
	1339	}
b75a7d8f A	1340	if(i<variantCapacity) {
	1341	variant[i]=(char)uprv_toupper(*localeID);
	1342	if(variant[i]=='-') {
	1343	variant[i]='_';
	1344	}
	1345	}
	1346	i++;
	1347	localeID++;
	1348	}
	1349	}
	1350
	1351	/* if there is no variant tag after a '-' or '_' then look for '@' */
	1352	if(i==0) {
	1353	if(prev=='@') {
	1354	/* keep localeID */
374ca955	1355	} else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
b75a7d8f A	1356	++localeID; /* point after the '@' */
	1357	} else {
	1358	return 0;
	1359	}
	1360	while(!_isTerminator(*localeID)) {
374ca955 A	1361	if (needSeparator) {
	1362	if (i<variantCapacity) {
	1363	variant[i] = '_';
	1364	}
	1365	++i;
	1366	needSeparator = FALSE;
	1367	}
b75a7d8f A	1368	if(i<variantCapacity) {
	1369	variant[i]=(char)uprv_toupper(*localeID);
	1370	if(variant[i]=='-' \|\| variant[i]==',') {
	1371	variant[i]='_';
	1372	}
	1373	}
	1374	i++;
	1375	localeID++;
	1376	}
	1377	}
f3c0d7a5	1378
b75a7d8f A	1379	return i;
	1380	}
	1381
374ca955 A	1382	static int32_t
	1383	_getVariant(const char *localeID,
	1384	char prev,
	1385	char *variant, int32_t variantCapacity) {
	1386	return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE);
	1387	}
	1388
374ca955 A	1389	/* Keyword enumeration */
	1390
	1391	typedef struct UKeywordsContext {
	1392	char* keywords;
	1393	char* current;
	1394	} UKeywordsContext;
	1395
f3c0d7a5 A	1396	U_CDECL_BEGIN
f3c0d7a5 A	1397
374ca955 A	1398	static void U_CALLCONV
	1399	uloc_kw_closeKeywords(UEnumeration *enumerator) {
	1400	uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
	1401	uprv_free(enumerator->context);
	1402	uprv_free(enumerator);
	1403	}
	1404
	1405	static int32_t U_CALLCONV
4388f060	1406	uloc_kw_countKeywords(UEnumeration en, UErrorCode /status/) {
374ca955 A	1407	char kw = ((UKeywordsContext )en->context)->keywords;
	1408	int32_t result = 0;
	1409	while(*kw) {
	1410	result++;
	1411	kw += uprv_strlen(kw)+1;
	1412	}
	1413	return result;
	1414	}
	1415
f3c0d7a5	1416	static const char * U_CALLCONV
374ca955 A	1417	uloc_kw_nextKeyword(UEnumeration* en,
374ca955 A	1418	int32_t* resultLength,
4388f060	1419	UErrorCode* /status/) {
374ca955 A	1420	const char* result = ((UKeywordsContext *)en->context)->current;
	1421	int32_t len = 0;
	1422	if(*result) {
73c04bcf	1423	len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
374ca955 A	1424	((UKeywordsContext *)en->context)->current += len+1;
	1425	} else {
	1426	result = NULL;
	1427	}
	1428	if (resultLength) {
	1429	*resultLength = len;
	1430	}
	1431	return result;
	1432	}
	1433
f3c0d7a5 A	1434	static void U_CALLCONV
f3c0d7a5 A	1435	uloc_kw_resetKeywords(UEnumeration* en,
4388f060	1436	UErrorCode* /status/) {
374ca955 A	1437	((UKeywordsContext )en->context)->current = ((UKeywordsContext )en->context)->keywords;
	1438	}
	1439
f3c0d7a5 A	1440	U_CDECL_END
	1441
	1442
374ca955 A	1443	static const UEnumeration gKeywordsEnum = {
	1444	NULL,
	1445	NULL,
	1446	uloc_kw_closeKeywords,
	1447	uloc_kw_countKeywords,
	1448	uenum_unextDefault,
	1449	uloc_kw_nextKeyword,
	1450	uloc_kw_resetKeywords
	1451	};
	1452
	1453	U_CAPI UEnumeration* U_EXPORT2
	1454	uloc_openKeywordList(const char keywordList, int32_t keywordListSize, UErrorCode status)
b75a7d8f	1455	{
340931cb A	1456	LocalMemory<UKeywordsContext> myContext;
340931cb A	1457	LocalMemory<UEnumeration> result;
b75a7d8f	1458
340931cb A	1459	if (U_FAILURE(*status)) {
340931cb A	1460	return nullptr;
46f4442e	1461	}
340931cb A	1462	myContext.adoptInstead(static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext))));
	1463	result.adoptInstead(static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration))));
	1464	if (myContext.isNull() \|\| result.isNull()) {
46f4442e	1465	*status = U_MEMORY_ALLOCATION_ERROR;
340931cb	1466	return nullptr;
46f4442e	1467	}
340931cb A	1468	uprv_memcpy(result.getAlias(), &gKeywordsEnum, sizeof(UEnumeration));
	1469	myContext->keywords = static_cast<char *>(uprv_malloc(keywordListSize+1));
	1470	if (myContext->keywords == nullptr) {
46f4442e	1471	*status = U_MEMORY_ALLOCATION_ERROR;
340931cb	1472	return nullptr;
46f4442e	1473	}
46f4442e A	1474	uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
	1475	myContext->keywords[keywordListSize] = 0;
	1476	myContext->current = myContext->keywords;
340931cb A	1477	result->context = myContext.orphan();
340931cb A	1478	return result.orphan();
374ca955 A	1479	}
	1480
	1481	U_CAPI UEnumeration* U_EXPORT2
	1482	uloc_openKeywords(const char* localeID,
f3c0d7a5	1483	UErrorCode* status)
374ca955 A	1484	{
	1485	int32_t i=0;
	1486	char keywords[256];
	1487	int32_t keywordsCapacity = 256;
729e4ab9 A	1488	char tempBuffer[ULOC_FULLNAME_CAPACITY];
	1489	const char* tmpLocaleID;
	1490
374ca955	1491	if(status==NULL \|\| U_FAILURE(*status)) {
b75a7d8f A	1492	return 0;
b75a7d8f A	1493	}
f3c0d7a5	1494
729e4ab9 A	1495	if (_hasBCP47Extension(localeID)) {
	1496	_ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
	1497	} else {
	1498	if (localeID==NULL) {
	1499	localeID=uloc_getDefault();
	1500	}
	1501	tmpLocaleID=localeID;
b75a7d8f A	1502	}
b75a7d8f A	1503
374ca955	1504	/* Skip the language */
729e4ab9 A	1505	ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
729e4ab9 A	1506	if(_isIDSeparator(*tmpLocaleID)) {
374ca955 A	1507	const char *scriptID;
374ca955 A	1508	/* Skip the script if available */
729e4ab9 A	1509	ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
729e4ab9 A	1510	if(scriptID != tmpLocaleID+1) {
374ca955	1511	/* Found optional script */
729e4ab9	1512	tmpLocaleID = scriptID;
374ca955 A	1513	}
374ca955 A	1514	/* Skip the Country */
729e4ab9 A	1515	if (_isIDSeparator(*tmpLocaleID)) {
	1516	ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &tmpLocaleID);
	1517	if(_isIDSeparator(*tmpLocaleID)) {
	1518	_getVariant(tmpLocaleID+1, *tmpLocaleID, NULL, 0);
374ca955	1519	}
b75a7d8f A	1520	}
	1521	}
	1522
374ca955	1523	/* keywords are located after '@' */
729e4ab9 A	1524	if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {
729e4ab9 A	1525	i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status);
374ca955 A	1526	}
	1527
	1528	if(i) {
	1529	return uloc_openKeywordList(keywords, i, status);
	1530	} else {
	1531	return NULL;
b75a7d8f	1532	}
b75a7d8f A	1533	}
b75a7d8f A	1534
b75a7d8f	1535
374ca955 A	1536	/* bit-flags for 'options' parameter of _canonicalize */
	1537	#define _ULOC_STRIP_KEYWORDS 0x2
	1538	#define _ULOC_CANONICALIZE 0x1
	1539
	1540	#define OPTION_SET(options, mask) ((options & mask) != 0)
	1541
73c04bcf	1542	static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
2ca993e8	1543	#define I_DEFAULT_LENGTH UPRV_LENGTHOF(i_default)
73c04bcf	1544
374ca955 A	1545	/**
	1546	* Canonicalize the given localeID, to level 1 or to level 2,
	1547	* depending on the options. To specify level 1, pass in options=0.
	1548	* To specify level 2, pass in options=_ULOC_CANONICALIZE.
	1549	*
	1550	* This is the code underlying uloc_getName and uloc_canonicalize.
	1551	*/
	1552	static int32_t
	1553	_canonicalize(const char* localeID,
	1554	char* result,
	1555	int32_t resultCapacity,
	1556	uint32_t options,
	1557	UErrorCode* err) {
	1558	int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity;
	1559	char localeBuffer[ULOC_FULLNAME_CAPACITY];
729e4ab9	1560	char tempBuffer[ULOC_FULLNAME_CAPACITY];
46f4442e	1561	const char* origLocaleID;
729e4ab9	1562	const char* tmpLocaleID;
374ca955 A	1563	const char* keywordAssign = NULL;
374ca955 A	1564	const char* separatorIndicator = NULL;
374ca955 A	1565	char* name;
374ca955 A	1566	char* variant = NULL; /* pointer into name, or NULL */
374ca955 A	1567
374ca955 A	1568	if (U_FAILURE(*err)) {
b75a7d8f A	1569	return 0;
b75a7d8f A	1570	}
f3c0d7a5	1571
729e4ab9 A	1572	if (_hasBCP47Extension(localeID)) {
	1573	_ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
	1574	} else {
	1575	if (localeID==NULL) {
	1576	localeID=uloc_getDefault();
	1577	}
	1578	tmpLocaleID=localeID;
b75a7d8f	1579	}
729e4ab9 A	1580
729e4ab9 A	1581	origLocaleID=tmpLocaleID;
b75a7d8f	1582
374ca955 A	1583	/* if we are doing a full canonicalization, then put results in
374ca955 A	1584	localeBuffer, if necessary; otherwise send them to result. */
729e4ab9	1585	if (/OPTION_SET(options, _ULOC_CANONICALIZE) &&/
4388f060	1586	(result == NULL \|\| resultCapacity < (int32_t)sizeof(localeBuffer))) {
374ca955	1587	name = localeBuffer;
4388f060	1588	nameCapacity = (int32_t)sizeof(localeBuffer);
374ca955 A	1589	} else {
	1590	name = result;
	1591	nameCapacity = resultCapacity;
	1592	}
	1593
b75a7d8f	1594	/* get all pieces, one after another, and separate with '_' */
729e4ab9	1595	len=ulocimp_getLanguage(tmpLocaleID, name, nameCapacity, &tmpLocaleID);
73c04bcf A	1596
	1597	if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) {
	1598	const char *d = uloc_getDefault();
f3c0d7a5	1599
729e4ab9	1600	len = (int32_t)uprv_strlen(d);
73c04bcf A	1601
73c04bcf A	1602	if (name != NULL) {
3d1f044b	1603	uprv_memcpy(name, d, len);
73c04bcf	1604	}
729e4ab9	1605	} else if(_isIDSeparator(*tmpLocaleID)) {
374ca955 A	1606	const char *scriptID;
374ca955 A	1607
b75a7d8f	1608	++fieldCount;
374ca955 A	1609	if(len<nameCapacity) {
374ca955 A	1610	name[len]='_';
b75a7d8f	1611	}
374ca955 A	1612	++len;
374ca955 A	1613
4388f060 A	1614	scriptSize=ulocimp_getScript(tmpLocaleID+1,
4388f060 A	1615	(len<nameCapacity ? name+len : NULL), nameCapacity-len, &scriptID);
374ca955 A	1616	if(scriptSize > 0) {
374ca955 A	1617	/* Found optional script */
729e4ab9	1618	tmpLocaleID = scriptID;
b75a7d8f	1619	++fieldCount;
374ca955	1620	len+=scriptSize;
729e4ab9	1621	if (_isIDSeparator(*tmpLocaleID)) {
374ca955 A	1622	/* If there is something else, then we add the _ */
	1623	if(len<nameCapacity) {
	1624	name[len]='_';
	1625	}
	1626	++len;
	1627	}
	1628	}
	1629
729e4ab9 A	1630	if (_isIDSeparator(*tmpLocaleID)) {
729e4ab9 A	1631	const char *cntryID;
4388f060 A	1632	int32_t cntrySize = ulocimp_getCountry(tmpLocaleID+1,
4388f060 A	1633	(len<nameCapacity ? name+len : NULL), nameCapacity-len, &cntryID);
729e4ab9 A	1634	if (cntrySize > 0) {
	1635	/* Found optional country */
	1636	tmpLocaleID = cntryID;
	1637	len+=cntrySize;
	1638	}
	1639	if(_isIDSeparator(*tmpLocaleID)) {
51004dcb A	1640	/* If there is something else, then we add the _ if we found country before. */
51004dcb A	1641	if (cntrySize >= 0 && ! _isIDSeparator(*(tmpLocaleID+1)) ) {
729e4ab9 A	1642	++fieldCount;
	1643	if(len<nameCapacity) {
	1644	name[len]='_';
	1645	}
	1646	++len;
374ca955	1647	}
729e4ab9	1648
4388f060 A	1649	variantSize = _getVariant(tmpLocaleID+1, *tmpLocaleID,
4388f060 A	1650	(len<nameCapacity ? name+len : NULL), nameCapacity-len);
374ca955	1651	if (variantSize > 0) {
4388f060	1652	variant = len<nameCapacity ? name+len : NULL;
374ca955	1653	len += variantSize;
729e4ab9	1654	tmpLocaleID += variantSize + 1; /* skip '_' and variant */
374ca955	1655	}
b75a7d8f	1656	}
b75a7d8f A	1657	}
	1658	}
	1659
374ca955	1660	/* Copy POSIX-style charset specifier, if any [mr.utf8] */
729e4ab9	1661	if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
374ca955	1662	UBool done = FALSE;
b75a7d8f	1663	do {
729e4ab9	1664	char c = *tmpLocaleID;
374ca955 A	1665	switch (c) {
	1666	case 0:
	1667	case '@':
	1668	done = TRUE;
	1669	break;
	1670	default:
	1671	if (len<nameCapacity) {
	1672	name[len] = c;
	1673	}
	1674	++len;
729e4ab9	1675	++tmpLocaleID;
374ca955 A	1676	break;
	1677	}
	1678	} while (!done);
	1679	}
	1680
	1681	/* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
729e4ab9 A	1682	After this, tmpLocaleID either points to '@' or is NULL */
	1683	if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {
	1684	keywordAssign = uprv_strchr(tmpLocaleID, '=');
	1685	separatorIndicator = uprv_strchr(tmpLocaleID, ';');
374ca955 A	1686	}
	1687
	1688	/* Copy POSIX-style variant, if any [mr@FOO] */
	1689	if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
729e4ab9	1690	tmpLocaleID != NULL && keywordAssign == NULL) {
374ca955	1691	for (;;) {
729e4ab9	1692	char c = *tmpLocaleID;
374ca955 A	1693	if (c == 0) {
	1694	break;
	1695	}
	1696	if (len<nameCapacity) {
	1697	name[len] = c;
	1698	}
	1699	++len;
729e4ab9	1700	++tmpLocaleID;
374ca955 A	1701	}
	1702	}
	1703
	1704	if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
	1705	/* Handle @FOO variant if @ is present and not followed by = */
729e4ab9	1706	if (tmpLocaleID!=NULL && keywordAssign==NULL) {
374ca955 A	1707	int32_t posixVariantSize;
	1708	/* Add missing '_' if needed */
	1709	if (fieldCount < 2 \|\| (fieldCount < 3 && scriptSize > 0)) {
	1710	do {
	1711	if(len<nameCapacity) {
	1712	name[len]='_';
	1713	}
	1714	++len;
	1715	++fieldCount;
	1716	} while(fieldCount<2);
	1717	}
729e4ab9	1718	posixVariantSize = _getVariantEx(tmpLocaleID+1, '@', name+len, nameCapacity-len,
374ca955 A	1719	(UBool)(variantSize > 0));
	1720	if (posixVariantSize > 0) {
	1721	if (variant == NULL) {
	1722	variant = name+len;
	1723	}
	1724	len += posixVariantSize;
	1725	variantSize += posixVariantSize;
b75a7d8f	1726	}
374ca955 A	1727	}
374ca955 A	1728
374ca955	1729	/* Look up the ID in the canonicalization map */
2ca993e8	1730	for (j=0; j<UPRV_LENGTHOF(CANONICALIZE_MAP); j++) {
374ca955	1731	const char* id = CANONICALIZE_MAP[j].id;
73c04bcf	1732	int32_t n = (int32_t)uprv_strlen(id);
374ca955	1733	if (len == n && uprv_strncmp(name, id, n) == 0) {
729e4ab9	1734	if (n == 0 && tmpLocaleID != NULL) {
374ca955 A	1735	break; /* Don't remap "" if keywords present */
	1736	}
	1737	len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID);
374ca955 A	1738	break;
	1739	}
	1740	}
374ca955 A	1741	}
	1742
	1743	if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
729e4ab9	1744	if (tmpLocaleID!=NULL && keywordAssign!=NULL &&
374ca955 A	1745	(!separatorIndicator \|\| separatorIndicator > keywordAssign)) {
	1746	if(len<nameCapacity) {
	1747	name[len]='@';
	1748	}
	1749	++len;
b75a7d8f	1750	++fieldCount;
4388f060	1751	len += _getKeywords(tmpLocaleID+1, '@', (len<nameCapacity ? name+len : NULL), nameCapacity-len,
3d1f044b	1752	NULL, 0, NULL, TRUE, err);
374ca955 A	1753	}
	1754	}
	1755
46f4442e	1756	if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) {
374ca955 A	1757	uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);
	1758	}
	1759
	1760	return u_terminateChars(result, resultCapacity, len, err);
	1761	}
	1762
	1763	/* ### ID parsing API **************************************************/
	1764
	1765	U_CAPI int32_t U_EXPORT2
	1766	uloc_getParent(const char* localeID,
	1767	char* parent,
	1768	int32_t parentCapacity,
	1769	UErrorCode* err)
	1770	{
	1771	const char *lastUnderscore;
	1772	int32_t i;
f3c0d7a5	1773
374ca955 A	1774	if (U_FAILURE(*err))
374ca955 A	1775	return 0;
f3c0d7a5	1776
374ca955 A	1777	if (localeID == NULL)
	1778	localeID = uloc_getDefault();
	1779
	1780	lastUnderscore=uprv_strrchr(localeID, '_');
	1781	if(lastUnderscore!=NULL) {
	1782	i=(int32_t)(lastUnderscore-localeID);
	1783	} else {
	1784	i=0;
b75a7d8f	1785	}
374ca955	1786
73c04bcf	1787	if(i>0 && parent != localeID) {
374ca955 A	1788	uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
374ca955 A	1789	}
3d1f044b	1790
374ca955	1791	return u_terminateChars(parent, parentCapacity, i, err);
b75a7d8f	1792	}
374ca955 A	1793
	1794	U_CAPI int32_t U_EXPORT2
	1795	uloc_getLanguage(const char* localeID,
	1796	char* language,
	1797	int32_t languageCapacity,
	1798	UErrorCode* err)
	1799	{
	1800	/* uloc_getLanguage will return a 2 character iso-639 code if one exists. CWB/
	1801	int32_t i=0;
	1802
	1803	if (err==NULL \|\| U_FAILURE(*err)) {
	1804	return 0;
	1805	}
f3c0d7a5	1806
374ca955 A	1807	if(localeID==NULL) {
	1808	localeID=uloc_getDefault();
	1809	}
	1810
729e4ab9	1811	i=ulocimp_getLanguage(localeID, language, languageCapacity, NULL);
374ca955 A	1812	return u_terminateChars(language, languageCapacity, i, err);
	1813	}
	1814
	1815	U_CAPI int32_t U_EXPORT2
	1816	uloc_getScript(const char* localeID,
	1817	char* script,
	1818	int32_t scriptCapacity,
	1819	UErrorCode* err)
	1820	{
	1821	int32_t i=0;
	1822
	1823	if(err==NULL \|\| U_FAILURE(*err)) {
	1824	return 0;
	1825	}
	1826
	1827	if(localeID==NULL) {
	1828	localeID=uloc_getDefault();
	1829	}
	1830
	1831	/* skip the language */
729e4ab9	1832	ulocimp_getLanguage(localeID, NULL, 0, &localeID);
374ca955	1833	if(_isIDSeparator(*localeID)) {
729e4ab9	1834	i=ulocimp_getScript(localeID+1, script, scriptCapacity, NULL);
374ca955 A	1835	}
	1836	return u_terminateChars(script, scriptCapacity, i, err);
	1837	}
	1838
	1839	U_CAPI int32_t U_EXPORT2
	1840	uloc_getCountry(const char* localeID,
	1841	char* country,
	1842	int32_t countryCapacity,
f3c0d7a5	1843	UErrorCode* err)
374ca955 A	1844	{
	1845	int32_t i=0;
	1846
	1847	if(err==NULL \|\| U_FAILURE(*err)) {
	1848	return 0;
	1849	}
	1850
	1851	if(localeID==NULL) {
	1852	localeID=uloc_getDefault();
	1853	}
	1854
	1855	/* Skip the language */
729e4ab9	1856	ulocimp_getLanguage(localeID, NULL, 0, &localeID);
374ca955 A	1857	if(_isIDSeparator(*localeID)) {
	1858	const char *scriptID;
	1859	/* Skip the script if available */
729e4ab9	1860	ulocimp_getScript(localeID+1, NULL, 0, &scriptID);
374ca955 A	1861	if(scriptID != localeID+1) {
	1862	/* Found optional script */
	1863	localeID = scriptID;
	1864	}
	1865	if(_isIDSeparator(*localeID)) {
729e4ab9	1866	i=ulocimp_getCountry(localeID+1, country, countryCapacity, NULL);
374ca955 A	1867	}
	1868	}
	1869	return u_terminateChars(country, countryCapacity, i, err);
	1870	}
	1871
	1872	U_CAPI int32_t U_EXPORT2
	1873	uloc_getVariant(const char* localeID,
	1874	char* variant,
	1875	int32_t variantCapacity,
f3c0d7a5	1876	UErrorCode* err)
374ca955	1877	{
729e4ab9 A	1878	char tempBuffer[ULOC_FULLNAME_CAPACITY];
729e4ab9 A	1879	const char* tmpLocaleID;
374ca955	1880	int32_t i=0;
f3c0d7a5	1881
374ca955 A	1882	if(err==NULL \|\| U_FAILURE(*err)) {
	1883	return 0;
	1884	}
f3c0d7a5	1885
729e4ab9 A	1886	if (_hasBCP47Extension(localeID)) {
	1887	_ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
	1888	} else {
	1889	if (localeID==NULL) {
	1890	localeID=uloc_getDefault();
	1891	}
	1892	tmpLocaleID=localeID;
374ca955	1893	}
f3c0d7a5	1894
374ca955	1895	/* Skip the language */
729e4ab9 A	1896	ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
729e4ab9 A	1897	if(_isIDSeparator(*tmpLocaleID)) {
374ca955 A	1898	const char *scriptID;
374ca955 A	1899	/* Skip the script if available */
729e4ab9 A	1900	ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
729e4ab9 A	1901	if(scriptID != tmpLocaleID+1) {
374ca955	1902	/* Found optional script */
729e4ab9	1903	tmpLocaleID = scriptID;
374ca955 A	1904	}
374ca955 A	1905	/* Skip the Country */
729e4ab9 A	1906	if (_isIDSeparator(*tmpLocaleID)) {
	1907	const char *cntryID;
	1908	ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &cntryID);
	1909	if (cntryID != tmpLocaleID+1) {
	1910	/* Found optional country */
	1911	tmpLocaleID = cntryID;
	1912	}
	1913	if(_isIDSeparator(*tmpLocaleID)) {
	1914	/* If there was no country ID, skip a possible extra IDSeparator */
	1915	if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {
	1916	tmpLocaleID++;
	1917	}
	1918	i=_getVariant(tmpLocaleID+1, *tmpLocaleID, variant, variantCapacity);
374ca955 A	1919	}
	1920	}
	1921	}
f3c0d7a5	1922
374ca955 A	1923	/* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
	1924	/* if we do not have a variant tag yet then try a POSIX variant after '@' */
	1925	/*
	1926	if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
	1927	i=_getVariant(localeID+1, '@', variant, variantCapacity);
	1928	}
	1929	*/
	1930	return u_terminateChars(variant, variantCapacity, i, err);
	1931	}
	1932
	1933	U_CAPI int32_t U_EXPORT2
	1934	uloc_getName(const char* localeID,
	1935	char* name,
	1936	int32_t nameCapacity,
f3c0d7a5	1937	UErrorCode* err)
374ca955 A	1938	{
	1939	return _canonicalize(localeID, name, nameCapacity, 0, err);
	1940	}
	1941
	1942	U_CAPI int32_t U_EXPORT2
	1943	uloc_getBaseName(const char* localeID,
	1944	char* name,
	1945	int32_t nameCapacity,
f3c0d7a5	1946	UErrorCode* err)
374ca955 A	1947	{
	1948	return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err);
	1949	}
	1950
	1951	U_CAPI int32_t U_EXPORT2
	1952	uloc_canonicalize(const char* localeID,
	1953	char* name,
	1954	int32_t nameCapacity,
f3c0d7a5	1955	UErrorCode* err)
374ca955 A	1956	{
	1957	return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err);
	1958	}
f3c0d7a5	1959
b75a7d8f	1960	U_CAPI const char* U_EXPORT2
f3c0d7a5	1961	uloc_getISO3Language(const char* localeID)
b75a7d8f	1962	{
374ca955 A	1963	int16_t offset;
	1964	char lang[ULOC_LANG_CAPACITY];
	1965	UErrorCode err = U_ZERO_ERROR;
f3c0d7a5	1966
374ca955 A	1967	if (localeID == NULL)
	1968	{
	1969	localeID = uloc_getDefault();
	1970	}
	1971	uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
	1972	if (U_FAILURE(err))
	1973	return "";
	1974	offset = _findIndex(LANGUAGES, lang);
	1975	if (offset < 0)
	1976	return "";
	1977	return LANGUAGES_3[offset];
b75a7d8f A	1978	}
	1979
	1980	U_CAPI const char* U_EXPORT2
f3c0d7a5	1981	uloc_getISO3Country(const char* localeID)
b75a7d8f A	1982	{
b75a7d8f A	1983	int16_t offset;
374ca955	1984	char cntry[ULOC_LANG_CAPACITY];
b75a7d8f	1985	UErrorCode err = U_ZERO_ERROR;
f3c0d7a5	1986
b75a7d8f A	1987	if (localeID == NULL)
	1988	{
	1989	localeID = uloc_getDefault();
	1990	}
374ca955	1991	uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
b75a7d8f A	1992	if (U_FAILURE(err))
b75a7d8f A	1993	return "";
374ca955	1994	offset = _findIndex(COUNTRIES, cntry);
b75a7d8f A	1995	if (offset < 0)
b75a7d8f A	1996	return "";
f3c0d7a5	1997
374ca955	1998	return COUNTRIES_3[offset];
b75a7d8f A	1999	}
	2000
	2001	U_CAPI uint32_t U_EXPORT2
f3c0d7a5	2002	uloc_getLCID(const char* localeID)
b75a7d8f	2003	{
374ca955 A	2004	UErrorCode status = U_ZERO_ERROR;
374ca955 A	2005	char langID[ULOC_FULLNAME_CAPACITY];
f3c0d7a5 A	2006	uint32_t lcid = 0;
	2007
	2008	/* Check for incomplete id. */
	2009	if (!localeID \|\| uprv_strlen(localeID) < 2) {
	2010	return 0;
	2011	}
	2012
3d1f044b A	2013	// First, attempt Windows platform lookup if available, but fall
	2014	// through to catch any special cases (ICU vs Windows name differences).
	2015	lcid = uprv_convertToLCIDPlatform(localeID, &status);
	2016	if (U_FAILURE(status)) {
	2017	return 0;
	2018	}
	2019	if (lcid > 0) {
f3c0d7a5 A	2020	// Windows found an LCID, return that
	2021	return lcid;
	2022	}
374ca955 A	2023
374ca955 A	2024	uloc_getLanguage(localeID, langID, sizeof(langID), &status);
3d1f044b	2025	if (U_FAILURE(status) \|\| status == U_STRING_NOT_TERMINATED_WARNING) {
374ca955	2026	return 0;
b75a7d8f	2027	}
374ca955	2028
57a6839d A	2029	if (uprv_strchr(localeID, '@')) {
	2030	// uprv_convertToLCID does not support keywords other than collation.
	2031	// Remove all keywords except collation.
	2032	int32_t len;
	2033	char collVal[ULOC_KEYWORDS_CAPACITY];
	2034	char tmpLocaleID[ULOC_FULLNAME_CAPACITY];
	2035
	2036	len = uloc_getKeywordValue(localeID, "collation", collVal,
2ca993e8	2037	UPRV_LENGTHOF(collVal) - 1, &status);
57a6839d A	2038
	2039	if (U_SUCCESS(status) && len > 0) {
	2040	collVal[len] = 0;
	2041
	2042	len = uloc_getBaseName(localeID, tmpLocaleID,
2ca993e8	2043	UPRV_LENGTHOF(tmpLocaleID) - 1, &status);
57a6839d	2044
2ca993e8	2045	if (U_SUCCESS(status) && len > 0) {
57a6839d A	2046	tmpLocaleID[len] = 0;
	2047
	2048	len = uloc_setKeywordValue("collation", collVal, tmpLocaleID,
2ca993e8	2049	UPRV_LENGTHOF(tmpLocaleID) - len - 1, &status);
57a6839d	2050
2ca993e8	2051	if (U_SUCCESS(status) && len > 0) {
57a6839d A	2052	tmpLocaleID[len] = 0;
	2053	return uprv_convertToLCID(langID, tmpLocaleID, &status);
	2054	}
	2055	}
	2056	}
	2057
	2058	// fall through - all keywords are simply ignored
	2059	status = U_ZERO_ERROR;
	2060	}
	2061
374ca955 A	2062	return uprv_convertToLCID(langID, localeID, &status);
	2063	}
	2064
73c04bcf A	2065	U_CAPI int32_t U_EXPORT2
	2066	uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
	2067	UErrorCode *status)
	2068	{
57a6839d	2069	return uprv_convertToPosix(hostid, locale, localeCapacity, status);
73c04bcf A	2070	}
73c04bcf A	2071
374ca955 A	2072	/* ### Default locale **************************************************/
	2073
	2074	U_CAPI const char* U_EXPORT2
	2075	uloc_getDefault()
	2076	{
	2077	return locale_get_default();
	2078	}
	2079
	2080	U_CAPI void U_EXPORT2
	2081	uloc_setDefault(const char* newDefaultLocale,
f3c0d7a5	2082	UErrorCode* err)
374ca955 A	2083	{
	2084	if (U_FAILURE(*err))
	2085	return;
	2086	/* the error code isn't currently used for anything by this function*/
f3c0d7a5	2087
374ca955 A	2088	/* propagate change to C++ */
374ca955 A	2089	locale_set_default(newDefaultLocale);
b75a7d8f A	2090	}
b75a7d8f A	2091
729e4ab9	2092	/**
51004dcb	2093	* Returns a list of all 2-letter language codes defined in ISO 639. This is a pointer
729e4ab9 A	2094	* to an array of pointers to arrays of char. All of these pointers are owned
	2095	* by ICU-- do not delete them, and do not write through them. The array is
	2096	* terminated with a null pointer.
	2097	*/
	2098	U_CAPI const char* const* U_EXPORT2
f3c0d7a5	2099	uloc_getISOLanguages()
729e4ab9 A	2100	{
	2101	return LANGUAGES;
	2102	}
374ca955	2103
729e4ab9 A	2104	/**
	2105	* Returns a list of all 2-letter country codes defined in ISO 639. This is a
	2106	* pointer to an array of pointers to arrays of char. All of these pointers are
	2107	* owned by ICU-- do not delete them, and do not write through them. The array is
	2108	* terminated with a null pointer.
b75a7d8f	2109	*/
729e4ab9	2110	U_CAPI const char* const* U_EXPORT2
f3c0d7a5	2111	uloc_getISOCountries()
b75a7d8f	2112	{
729e4ab9 A	2113	return COUNTRIES;
729e4ab9 A	2114	}
73c04bcf	2115
b75a7d8f	2116
729e4ab9 A	2117	/* this function to be moved into cstring.c later */
729e4ab9 A	2118	static char gDecimal = 0;
b75a7d8f	2119
729e4ab9 A	2120	static /* U_CAPI */
	2121	double
	2122	/* U_EXPORT2 */
	2123	_uloc_strtod(const char start, char *end) {
	2124	char *decimal;
	2125	char *myEnd;
	2126	char buf[30];
	2127	double rv;
	2128	if (!gDecimal) {
	2129	char rep[5];
	2130	/* For machines that decide to change the decimal on you,
	2131	and try to be too smart with localization.
	2132	This normally should be just a '.'. */
	2133	sprintf(rep, "%+1.1f", 1.0);
	2134	gDecimal = rep[2];
b75a7d8f	2135	}
b75a7d8f	2136
729e4ab9 A	2137	if(gDecimal == '.') {
729e4ab9 A	2138	return uprv_strtod(start, end); /* fall through to OS */
b75a7d8f	2139	} else {
729e4ab9 A	2140	uprv_strncpy(buf, start, 29);
	2141	buf[29]=0;
	2142	decimal = uprv_strchr(buf, '.');
	2143	if(decimal) {
	2144	*decimal = gDecimal;
46f4442e	2145	} else {
729e4ab9	2146	return uprv_strtod(start, end); /* no decimal point */
46f4442e	2147	}
729e4ab9 A	2148	rv = uprv_strtod(buf, &myEnd);
	2149	if(end) {
	2150	end = (char)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */
b75a7d8f	2151	}
729e4ab9	2152	return rv;
374ca955	2153	}
374ca955 A	2154	}
374ca955 A	2155
f3c0d7a5	2156	typedef struct {
729e4ab9 A	2157	float q;
729e4ab9 A	2158	int32_t dummy; /* to avoid uninitialized memory copy from qsort */
f3c0d7a5	2159	char locale[ULOC_FULLNAME_CAPACITY+1];
729e4ab9	2160	} _acceptLangItem;
b75a7d8f	2161
729e4ab9	2162	static int32_t U_CALLCONV
4388f060	2163	uloc_acceptLanguageCompare(const void * /context/, const void a, const void b)
729e4ab9 A	2164	{
	2165	const _acceptLangItem aa = (const _acceptLangItem)a;
	2166	const _acceptLangItem bb = (const _acceptLangItem)b;
b75a7d8f	2167
729e4ab9 A	2168	int32_t rc = 0;
	2169	if(bb->q < aa->q) {
	2170	rc = -1; /* A > B */
	2171	} else if(bb->q > aa->q) {
	2172	rc = 1; /* A < B */
	2173	} else {
	2174	rc = 0; /* A = B */
b75a7d8f A	2175	}
b75a7d8f A	2176
729e4ab9 A	2177	if(rc==0) {
729e4ab9 A	2178	rc = uprv_stricmp(aa->locale, bb->locale);
b75a7d8f A	2179	}
b75a7d8f A	2180
729e4ab9	2181	#if defined(ULOC_DEBUG)
f3c0d7a5 A	2182	/* fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
f3c0d7a5 A	2183	aa->locale, aa->q,
729e4ab9 A	2184	bb->locale, bb->q,
	2185	rc);*/
	2186	#endif
374ca955	2187
729e4ab9	2188	return rc;
374ca955 A	2189	}
374ca955 A	2190
f3c0d7a5	2191	/*
729e4ab9 A	2192	mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
729e4ab9 A	2193	*/
374ca955	2194
b75a7d8f	2195	U_CAPI int32_t U_EXPORT2
729e4ab9 A	2196	uloc_acceptLanguageFromHTTP(char result, int32_t resultAvailable, UAcceptResult outResult,
	2197	const char *httpAcceptLanguage,
	2198	UEnumeration* availableLocales,
	2199	UErrorCode *status)
374ca955	2200	{
f3c0d7a5	2201	MaybeStackArray<_acceptLangItem, 4> items; // Struct for collecting items.
729e4ab9 A	2202	char tmp[ULOC_FULLNAME_CAPACITY +1];
	2203	int32_t n = 0;
	2204	const char *itemEnd;
	2205	const char *paramEnd;
	2206	const char *s;
	2207	const char *t;
	2208	int32_t res;
	2209	int32_t i;
	2210	int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);
b75a7d8f	2211
729e4ab9 A	2212	if(U_FAILURE(*status)) {
729e4ab9 A	2213	return -1;
b75a7d8f A	2214	}
b75a7d8f A	2215
729e4ab9 A	2216	for(s=httpAcceptLanguage;s&&*s;) {
	2217	while(isspace(s)) / eat space at the beginning */
	2218	s++;
	2219	itemEnd=uprv_strchr(s,',');
	2220	paramEnd=uprv_strchr(s,';');
	2221	if(!itemEnd) {
	2222	itemEnd = httpAcceptLanguage+l; /* end of string */
b75a7d8f	2223	}
f3c0d7a5	2224	if(paramEnd && paramEnd<itemEnd) {
729e4ab9 A	2225	/* semicolon (;) is closer than end (,) */
	2226	t = paramEnd+1;
	2227	if(*t=='q') {
	2228	t++;
	2229	}
	2230	while(isspace(*t)) {
	2231	t++;
	2232	}
	2233	if(*t=='=') {
	2234	t++;
	2235	}
	2236	while(isspace(*t)) {
	2237	t++;
	2238	}
f3c0d7a5	2239	items[n].q = (float)_uloc_strtod(t,NULL);
729e4ab9 A	2240	} else {
729e4ab9 A	2241	/* no semicolon - it's 1.0 */
f3c0d7a5	2242	items[n].q = 1.0f;
729e4ab9	2243	paramEnd = itemEnd;
374ca955	2244	}
f3c0d7a5	2245	items[n].dummy=0;
374ca955 A	2246	/* eat spaces prior to semi */
	2247	for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)
	2248	;
3d1f044b	2249	int32_t slen = static_cast<int32_t>(((t+1)-s));
f3c0d7a5 A	2250	if(slen > ULOC_FULLNAME_CAPACITY) {
	2251	*status = U_BUFFER_OVERFLOW_ERROR;
	2252	return -1; // too big
	2253	}
	2254	uprv_strncpy(items[n].locale, s, slen);
	2255	items[n].locale[slen]=0; // terminate
	2256	int32_t clen = uloc_canonicalize(items[n].locale, tmp, UPRV_LENGTHOF(tmp)-1, status);
	2257	if(U_FAILURE(*status)) return -1;
	2258	if((clen!=slen) \|\| (uprv_strncmp(items[n].locale, tmp, slen))) {
	2259	// canonicalization had an effect- copy back
	2260	uprv_strncpy(items[n].locale, tmp, clen);
	2261	items[n].locale[clen] = 0; // terminate
374ca955 A	2262	}
	2263	#if defined(ULOC_DEBUG)
	2264	/fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);/
	2265	#endif
	2266	n++;
	2267	s = itemEnd;
	2268	while(s==',') { / eat duplicate commas */
	2269	s++;
	2270	}
f3c0d7a5 A	2271	if(n>=items.getCapacity()) { // If we need more items
	2272	if(NULL == items.resize(items.getCapacity()*2, items.getCapacity())) {
	2273	*status = U_MEMORY_ALLOCATION_ERROR;
	2274	return -1;
	2275	}
374ca955	2276	#if defined(ULOC_DEBUG)
f3c0d7a5	2277	fprintf(stderr,"malloced at size %d\n", items.getCapacity());
374ca955	2278	#endif
374ca955 A	2279	}
374ca955 A	2280	}
f3c0d7a5 A	2281	uprv_sortArray(items.getAlias(), n, sizeof(items[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
f3c0d7a5 A	2282	if (U_FAILURE(*status)) {
46f4442e	2283	return -1;
374ca955	2284	}
f3c0d7a5 A	2285	LocalMemory<const char*> strs(NULL);
f3c0d7a5 A	2286	if (strs.allocateInsteadAndReset(n) == NULL) {
46f4442e A	2287	*status = U_MEMORY_ALLOCATION_ERROR;
	2288	return -1;
	2289	}
374ca955 A	2290	for(i=0;i<n;i++) {
	2291	#if defined(ULOC_DEBUG)
	2292	/fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);/
	2293	#endif
f3c0d7a5	2294	strs[i]=items[i].locale;
374ca955	2295	}
f3c0d7a5 A	2296	res = uloc_acceptLanguage(result, resultAvailable, outResult,
f3c0d7a5 A	2297	strs.getAlias(), n, availableLocales, status);
374ca955 A	2298	return res;
	2299	}
	2300
	2301
	2302	U_CAPI int32_t U_EXPORT2
f3c0d7a5	2303	uloc_acceptLanguage(char *result, int32_t resultAvailable,
374ca955 A	2304	UAcceptResult outResult, const char *acceptList,
	2305	int32_t acceptListCount,
	2306	UEnumeration* availableLocales,
	2307	UErrorCode *status)
	2308	{
	2309	int32_t i,j;
	2310	int32_t len;
	2311	int32_t maxLen=0;
	2312	char tmp[ULOC_FULLNAME_CAPACITY+1];
	2313	const char *l;
	2314	char **fallbackList;
	2315	if(U_FAILURE(*status)) {
	2316	return -1;
	2317	}
51004dcb	2318	fallbackList = static_cast<char *>(uprv_malloc((size_t)(sizeof(fallbackList[0])acceptListCount)));
374ca955	2319	if(fallbackList==NULL) {
46f4442e A	2320	*status = U_MEMORY_ALLOCATION_ERROR;
46f4442e A	2321	return -1;
374ca955 A	2322	}
	2323	for(i=0;i<acceptListCount;i++) {
	2324	#if defined(ULOC_DEBUG)
	2325	fprintf(stderr,"%02d: %s\n", i, acceptList[i]);
	2326	#endif
0f5d89e8	2327	while((l=uenum_next(availableLocales, NULL, status)) != NULL) {
374ca955 A	2328	#if defined(ULOC_DEBUG)
	2329	fprintf(stderr," %s\n", l);
	2330	#endif
73c04bcf	2331	len = (int32_t)uprv_strlen(l);
374ca955	2332	if(!uprv_strcmp(acceptList[i], l)) {
f3c0d7a5	2333	if(outResult) {
374ca955 A	2334	*outResult = ULOC_ACCEPT_VALID;
	2335	}
	2336	#if defined(ULOC_DEBUG)
	2337	fprintf(stderr, "MATCH! %s\n", l);
	2338	#endif
	2339	if(len>0) {
	2340	uprv_strncpy(result, l, uprv_min(len, resultAvailable));
	2341	}
	2342	for(j=0;j<i;j++) {
	2343	uprv_free(fallbackList[j]);
	2344	}
	2345	uprv_free(fallbackList);
f3c0d7a5	2346	return u_terminateChars(result, resultAvailable, len, status);
374ca955 A	2347	}
	2348	if(len>maxLen) {
	2349	maxLen = len;
	2350	}
	2351	}
f3c0d7a5	2352	uenum_reset(availableLocales, status);
374ca955	2353	/* save off parent info */
2ca993e8	2354	if(uloc_getParent(acceptList[i], tmp, UPRV_LENGTHOF(tmp), status)!=0) {
374ca955 A	2355	fallbackList[i] = uprv_strdup(tmp);
	2356	} else {
	2357	fallbackList[i]=0;
	2358	}
	2359	}
	2360
	2361	for(maxLen--;maxLen>0;maxLen--) {
	2362	for(i=0;i<acceptListCount;i++) {
	2363	if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) {
	2364	#if defined(ULOC_DEBUG)
	2365	fprintf(stderr,"Try: [%s]", fallbackList[i]);
	2366	#endif
0f5d89e8	2367	while((l=uenum_next(availableLocales, NULL, status)) != NULL) {
374ca955 A	2368	#if defined(ULOC_DEBUG)
	2369	fprintf(stderr," %s\n", l);
	2370	#endif
73c04bcf	2371	len = (int32_t)uprv_strlen(l);
374ca955	2372	if(!uprv_strcmp(fallbackList[i], l)) {
f3c0d7a5	2373	if(outResult) {
374ca955 A	2374	*outResult = ULOC_ACCEPT_FALLBACK;
	2375	}
	2376	#if defined(ULOC_DEBUG)
	2377	fprintf(stderr, "fallback MATCH! %s\n", l);
	2378	#endif
	2379	if(len>0) {
	2380	uprv_strncpy(result, l, uprv_min(len, resultAvailable));
	2381	}
73c04bcf A	2382	for(j=0;j<acceptListCount;j++) {
73c04bcf A	2383	uprv_free(fallbackList[j]);
374ca955 A	2384	}
374ca955 A	2385	uprv_free(fallbackList);
73c04bcf	2386	return u_terminateChars(result, resultAvailable, len, status);
374ca955 A	2387	}
374ca955 A	2388	}
f3c0d7a5	2389	uenum_reset(availableLocales, status);
374ca955	2390
2ca993e8	2391	if(uloc_getParent(fallbackList[i], tmp, UPRV_LENGTHOF(tmp), status)!=0) {
374ca955 A	2392	uprv_free(fallbackList[i]);
	2393	fallbackList[i] = uprv_strdup(tmp);
	2394	} else {
	2395	uprv_free(fallbackList[i]);
	2396	fallbackList[i]=0;
	2397	}
	2398	}
	2399	}
f3c0d7a5	2400	if(outResult) {
374ca955 A	2401	*outResult = ULOC_ACCEPT_FAILED;
	2402	}
	2403	}
	2404	for(i=0;i<acceptListCount;i++) {
	2405	uprv_free(fallbackList[i]);
	2406	}
	2407	uprv_free(fallbackList);
	2408	return -1;
b75a7d8f	2409	}
374ca955	2410
b331163b A	2411	U_CAPI const char* U_EXPORT2
	2412	uloc_toUnicodeLocaleKey(const char* keyword)
	2413	{
	2414	const char* bcpKey = ulocimp_toBcpKey(keyword);
	2415	if (bcpKey == NULL && ultag_isUnicodeLocaleKey(keyword, -1)) {
	2416	// unknown keyword, but syntax is fine..
	2417	return keyword;
	2418	}
	2419	return bcpKey;
	2420	}
	2421
	2422	U_CAPI const char* U_EXPORT2
	2423	uloc_toUnicodeLocaleType(const char* keyword, const char* value)
	2424	{
	2425	const char* bcpType = ulocimp_toBcpType(keyword, value, NULL, NULL);
	2426	if (bcpType == NULL && ultag_isUnicodeLocaleType(value, -1)) {
	2427	// unknown keyword, but syntax is fine..
	2428	return value;
	2429	}
	2430	return bcpType;
	2431	}
	2432
b331163b A	2433	static UBool
	2434	isWellFormedLegacyKey(const char* legacyKey)
	2435	{
	2436	const char* p = legacyKey;
	2437	while (*p) {
	2438	if (!UPRV_ISALPHANUM(*p)) {
	2439	return FALSE;
	2440	}
	2441	p++;
	2442	}
	2443	return TRUE;
	2444	}
	2445
	2446	static UBool
	2447	isWellFormedLegacyType(const char* legacyType)
	2448	{
	2449	const char* p = legacyType;
	2450	int32_t alphaNumLen = 0;
	2451	while (*p) {
	2452	if (p == '_' \|\| p == '/' \|\| *p == '-') {
	2453	if (alphaNumLen == 0) {
	2454	return FALSE;
	2455	}
	2456	alphaNumLen = 0;
	2457	} else if (UPRV_ISALPHANUM(*p)) {
	2458	alphaNumLen++;
	2459	} else {
	2460	return FALSE;
	2461	}
	2462	p++;
	2463	}
	2464	return (alphaNumLen != 0);
	2465	}
	2466
	2467	U_CAPI const char* U_EXPORT2
	2468	uloc_toLegacyKey(const char* keyword)
	2469	{
	2470	const char* legacyKey = ulocimp_toLegacyKey(keyword);
	2471	if (legacyKey == NULL) {
	2472	// Checks if the specified locale key is well-formed with the legacy locale syntax.
	2473	//
	2474	// Note:
f3c0d7a5 A	2475	// LDML/CLDR provides some definition of keyword syntax in
	2476	// * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
	2477	// * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
	2478	// Keys can only consist of [0-9a-zA-Z].
b331163b A	2479	if (isWellFormedLegacyKey(keyword)) {
	2480	return keyword;
	2481	}
	2482	}
	2483	return legacyKey;
	2484	}
	2485
	2486	U_CAPI const char* U_EXPORT2
	2487	uloc_toLegacyType(const char* keyword, const char* value)
	2488	{
	2489	const char* legacyType = ulocimp_toLegacyType(keyword, value, NULL, NULL);
	2490	if (legacyType == NULL) {
	2491	// Checks if the specified locale type is well-formed with the legacy locale syntax.
	2492	//
	2493	// Note:
f3c0d7a5 A	2494	// LDML/CLDR provides some definition of keyword syntax in
	2495	// * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
	2496	// * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
	2497	// Values (types) can only consist of [0-9a-zA-Z], plus for legacy values
	2498	// we allow [/_-+] in the middle (e.g. "Etc/GMT+1", "Asia/Tel_Aviv")
b331163b A	2499	if (isWellFormedLegacyType(value)) {
	2500	return value;
	2501	}
	2502	}
	2503	return legacyType;
	2504	}
	2505
374ca955	2506	/eof/