git.saurik.com Git - apple/icu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	*******************************************************************************
	3	*
	4	* Copyright (C) 1997-2016, International Business Machines
	5	* Corporation and others. All Rights Reserved.
	6	*
	7	*******************************************************************************
	8	* file name: loclikely.cpp
	9	* encoding: US-ASCII
	10	* tab size: 8 (not used)
	11	* indentation:4
	12	*
	13	* created on: 2010feb25
	14	* created by: Markus W. Scherer
	15	*
	16	* Code for likely and minimized locale subtags, separated out from other .cpp files
	17	* that then do not depend on resource bundle code and likely-subtags data.
	18	*/
	19
	20	#include "unicode/utypes.h"
	21	#include "unicode/locid.h"
	22	#include "unicode/putil.h"
	23	#include "unicode/uloc.h"
	24	#include "unicode/ures.h"
	25	#include "unicode/uscript.h"
	26	#include "cmemory.h"
	27	#include "cstring.h"
	28	#include "ulocimp.h"
	29	#include "ustr_imp.h"
	30
	31	/**
	32	* This function looks for the localeID in the likelySubtags resource.
	33	*
	34	* @param localeID The tag to find.
	35	* @param buffer A buffer to hold the matching entry
	36	* @param bufferLength The length of the output buffer
	37	* @return A pointer to "buffer" if found, or a null pointer if not.
	38	*/
	39	static const char* U_CALLCONV
	40	findLikelySubtags(const char* localeID,
	41	char* buffer,
	42	int32_t bufferLength,
	43	UErrorCode* err) {
	44	const char* result = NULL;
	45
	46	if (!U_FAILURE(*err)) {
	47	int32_t resLen = 0;
	48	const UChar* s = NULL;
	49	UErrorCode tmpErr = U_ZERO_ERROR;
	50	UResourceBundle* subtags = ures_openDirect(NULL, "likelySubtags", &tmpErr);
	51	if (U_SUCCESS(tmpErr)) {
	52	s = ures_getStringByKey(subtags, localeID, &resLen, &tmpErr);
	53
	54	if (U_FAILURE(tmpErr)) {
	55	/*
	56	* If a resource is missing, it's not really an error, it's
	57	* just that we don't have any data for that particular locale ID.
	58	*/
	59	if (tmpErr != U_MISSING_RESOURCE_ERROR) {
	60	*err = tmpErr;
	61	}
	62	}
	63	else if (resLen >= bufferLength) {
	64	/* The buffer should never overflow. */
	65	*err = U_INTERNAL_PROGRAM_ERROR;
	66	}
	67	else {
	68	u_UCharsToChars(s, buffer, resLen + 1);
	69	result = buffer;
	70	}
	71
	72	ures_close(subtags);
	73	} else {
	74	*err = tmpErr;
	75	}
	76	}
	77
	78	return result;
	79	}
	80
	81	/**
	82	* Append a tag to a buffer, adding the separator if necessary. The buffer
	83	* must be large enough to contain the resulting tag plus any separator
	84	* necessary. The tag must not be a zero-length string.
	85	*
	86	* @param tag The tag to add.
	87	* @param tagLength The length of the tag.
	88	* @param buffer The output buffer.
	89	* @param bufferLength The length of the output buffer. This is an input/ouput parameter.
	90	**/
	91	static void U_CALLCONV
	92	appendTag(
	93	const char* tag,
	94	int32_t tagLength,
	95	char* buffer,
	96	int32_t* bufferLength) {
	97
	98	if (*bufferLength > 0) {
	99	buffer[*bufferLength] = '_';
	100	++(*bufferLength);
	101	}
	102
	103	uprv_memmove(
	104	&buffer[*bufferLength],
	105	tag,
	106	tagLength);
	107
	108	*bufferLength += tagLength;
	109	}
	110
	111	/**
	112	* These are the canonical strings for unknown languages, scripts and regions.
	113	**/
	114	static const char* const unknownLanguage = "und";
	115	static const char* const unknownScript = "Zzzz";
	116	static const char* const unknownRegion = "ZZ";
	117
	118	/**
	119	* Create a tag string from the supplied parameters. The lang, script and region
	120	* parameters may be NULL pointers. If they are, their corresponding length parameters
	121	* must be less than or equal to 0.
	122	*
	123	* If any of the language, script or region parameters are empty, and the alternateTags
	124	* parameter is not NULL, it will be parsed for potential language, script and region tags
	125	* to be used when constructing the new tag. If the alternateTags parameter is NULL, or
	126	* it contains no language tag, the default tag for the unknown language is used.
	127	*
	128	* If the length of the new string exceeds the capacity of the output buffer,
	129	* the function copies as many bytes to the output buffer as it can, and returns
	130	* the error U_BUFFER_OVERFLOW_ERROR.
	131	*
	132	* If an illegal argument is provided, the function returns the error
	133	* U_ILLEGAL_ARGUMENT_ERROR.
	134	*
	135	* Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if
	136	* the tag string fits in the output buffer, but the null terminator doesn't.
	137	*
	138	* @param lang The language tag to use.
	139	* @param langLength The length of the language tag.
	140	* @param script The script tag to use.
	141	* @param scriptLength The length of the script tag.
	142	* @param region The region tag to use.
	143	* @param regionLength The length of the region tag.
	144	* @param trailing Any trailing data to append to the new tag.
	145	* @param trailingLength The length of the trailing data.
	146	* @param alternateTags A string containing any alternate tags.
	147	* @param tag The output buffer.
	148	* @param tagCapacity The capacity of the output buffer.
	149	* @param err A pointer to a UErrorCode for error reporting.
	150	* @return The length of the tag string, which may be greater than tagCapacity, or -1 on error.
	151	**/
	152	static int32_t U_CALLCONV
	153	createTagStringWithAlternates(
	154	const char* lang,
	155	int32_t langLength,
	156	const char* script,
	157	int32_t scriptLength,
	158	const char* region,
	159	int32_t regionLength,
	160	const char* trailing,
	161	int32_t trailingLength,
	162	const char* alternateTags,
	163	char* tag,
	164	int32_t tagCapacity,
	165	UErrorCode* err) {
	166
	167	if (U_FAILURE(*err)) {
	168	goto error;
	169	}
	170	else if (tag == NULL \|\|
	171	tagCapacity <= 0 \|\|
	172	langLength >= ULOC_LANG_CAPACITY \|\|
	173	scriptLength >= ULOC_SCRIPT_CAPACITY \|\|
	174	regionLength >= ULOC_COUNTRY_CAPACITY) {
	175	goto error;
	176	}
	177	else {
	178	/**
	179	* ULOC_FULLNAME_CAPACITY will provide enough capacity
	180	* that we can build a string that contains the language,
	181	* script and region code without worrying about overrunning
	182	* the user-supplied buffer.
	183	**/
	184	char tagBuffer[ULOC_FULLNAME_CAPACITY];
	185	int32_t tagLength = 0;
	186	int32_t capacityRemaining = tagCapacity;
	187	UBool regionAppended = FALSE;
	188
	189	if (langLength > 0) {
	190	appendTag(
	191	lang,
	192	langLength,
	193	tagBuffer,
	194	&tagLength);
	195	}
	196	else if (alternateTags == NULL) {
	197	/*
	198	* Append the value for an unknown language, if
	199	* we found no language.
	200	*/
	201	appendTag(
	202	unknownLanguage,
	203	(int32_t)uprv_strlen(unknownLanguage),
	204	tagBuffer,
	205	&tagLength);
	206	}
	207	else {
	208	/*
	209	* Parse the alternateTags string for the language.
	210	*/
	211	char alternateLang[ULOC_LANG_CAPACITY];
	212	int32_t alternateLangLength = sizeof(alternateLang);
	213
	214	alternateLangLength =
	215	uloc_getLanguage(
	216	alternateTags,
	217	alternateLang,
	218	alternateLangLength,
	219	err);
	220	if(U_FAILURE(*err) \|\|
	221	alternateLangLength >= ULOC_LANG_CAPACITY) {
	222	goto error;
	223	}
	224	else if (alternateLangLength == 0) {
	225	/*
	226	* Append the value for an unknown language, if
	227	* we found no language.
	228	*/
	229	appendTag(
	230	unknownLanguage,
	231	(int32_t)uprv_strlen(unknownLanguage),
	232	tagBuffer,
	233	&tagLength);
	234	}
	235	else {
	236	appendTag(
	237	alternateLang,
	238	alternateLangLength,
	239	tagBuffer,
	240	&tagLength);
	241	}
	242	}
	243
	244	if (scriptLength > 0) {
	245	appendTag(
	246	script,
	247	scriptLength,
	248	tagBuffer,
	249	&tagLength);
	250	}
	251	else if (alternateTags != NULL) {
	252	/*
	253	* Parse the alternateTags string for the script.
	254	*/
	255	char alternateScript[ULOC_SCRIPT_CAPACITY];
	256
	257	const int32_t alternateScriptLength =
	258	uloc_getScript(
	259	alternateTags,
	260	alternateScript,
	261	sizeof(alternateScript),
	262	err);
	263
	264	if (U_FAILURE(*err) \|\|
	265	alternateScriptLength >= ULOC_SCRIPT_CAPACITY) {
	266	goto error;
	267	}
	268	else if (alternateScriptLength > 0) {
	269	appendTag(
	270	alternateScript,
	271	alternateScriptLength,
	272	tagBuffer,
	273	&tagLength);
	274	}
	275	}
	276
	277	if (regionLength > 0) {
	278	appendTag(
	279	region,
	280	regionLength,
	281	tagBuffer,
	282	&tagLength);
	283
	284	regionAppended = TRUE;
	285	}
	286	else if (alternateTags != NULL) {
	287	/*
	288	* Parse the alternateTags string for the region.
	289	*/
	290	char alternateRegion[ULOC_COUNTRY_CAPACITY];
	291
	292	const int32_t alternateRegionLength =
	293	uloc_getCountry(
	294	alternateTags,
	295	alternateRegion,
	296	sizeof(alternateRegion),
	297	err);
	298	if (U_FAILURE(*err) \|\|
	299	alternateRegionLength >= ULOC_COUNTRY_CAPACITY) {
	300	goto error;
	301	}
	302	else if (alternateRegionLength > 0) {
	303	appendTag(
	304	alternateRegion,
	305	alternateRegionLength,
	306	tagBuffer,
	307	&tagLength);
	308
	309	regionAppended = TRUE;
	310	}
	311	}
	312
	313	{
	314	const int32_t toCopy =
	315	tagLength >= tagCapacity ? tagCapacity : tagLength;
	316
	317	/**
	318	* Copy the partial tag from our internal buffer to the supplied
	319	* target.
	320	**/
	321	uprv_memcpy(
	322	tag,
	323	tagBuffer,
	324	toCopy);
	325
	326	capacityRemaining -= toCopy;
	327	}
	328
	329	if (trailingLength > 0) {
	330	if (*trailing != '@' && capacityRemaining > 0) {
	331	tag[tagLength++] = '_';
	332	--capacityRemaining;
	333	if (capacityRemaining > 0 && !regionAppended) {
	334	/* extra separator is required */
	335	tag[tagLength++] = '_';
	336	--capacityRemaining;
	337	}
	338	}
	339
	340	if (capacityRemaining > 0) {
	341	/*
	342	* Copy the trailing data into the supplied buffer. Use uprv_memmove, since we
	343	* don't know if the user-supplied buffers overlap.
	344	*/
	345	const int32_t toCopy =
	346	trailingLength >= capacityRemaining ? capacityRemaining : trailingLength;
	347
	348	uprv_memmove(
	349	&tag[tagLength],
	350	trailing,
	351	toCopy);
	352	}
	353	}
	354
	355	tagLength += trailingLength;
	356
	357	return u_terminateChars(
	358	tag,
	359	tagCapacity,
	360	tagLength,
	361	err);
	362	}
	363
	364	error:
	365
	366	/**
	367	* An overflow indicates the locale ID passed in
	368	* is ill-formed. If we got here, and there was
	369	* no previous error, it's an implicit overflow.
	370	**/
	371	if (*err == U_BUFFER_OVERFLOW_ERROR \|\|
	372	U_SUCCESS(*err)) {
	373	*err = U_ILLEGAL_ARGUMENT_ERROR;
	374	}
	375
	376	return -1;
	377	}
	378
	379	/**
	380	* Create a tag string from the supplied parameters. The lang, script and region
	381	* parameters may be NULL pointers. If they are, their corresponding length parameters
	382	* must be less than or equal to 0. If the lang parameter is an empty string, the
	383	* default value for an unknown language is written to the output buffer.
	384	*
	385	* If the length of the new string exceeds the capacity of the output buffer,
	386	* the function copies as many bytes to the output buffer as it can, and returns
	387	* the error U_BUFFER_OVERFLOW_ERROR.
	388	*
	389	* If an illegal argument is provided, the function returns the error
	390	* U_ILLEGAL_ARGUMENT_ERROR.
	391	*
	392	* @param lang The language tag to use.
	393	* @param langLength The length of the language tag.
	394	* @param script The script tag to use.
	395	* @param scriptLength The length of the script tag.
	396	* @param region The region tag to use.
	397	* @param regionLength The length of the region tag.
	398	* @param trailing Any trailing data to append to the new tag.
	399	* @param trailingLength The length of the trailing data.
	400	* @param tag The output buffer.
	401	* @param tagCapacity The capacity of the output buffer.
	402	* @param err A pointer to a UErrorCode for error reporting.
	403	* @return The length of the tag string, which may be greater than tagCapacity.
	404	**/
	405	static int32_t U_CALLCONV
	406	createTagString(
	407	const char* lang,
	408	int32_t langLength,
	409	const char* script,
	410	int32_t scriptLength,
	411	const char* region,
	412	int32_t regionLength,
	413	const char* trailing,
	414	int32_t trailingLength,
	415	char* tag,
	416	int32_t tagCapacity,
	417	UErrorCode* err)
	418	{
	419	return createTagStringWithAlternates(
	420	lang,
	421	langLength,
	422	script,
	423	scriptLength,
	424	region,
	425	regionLength,
	426	trailing,
	427	trailingLength,
	428	NULL,
	429	tag,
	430	tagCapacity,
	431	err);
	432	}
	433
	434	/**
	435	* Parse the language, script, and region subtags from a tag string, and copy the
	436	* results into the corresponding output parameters. The buffers are null-terminated,
	437	* unless overflow occurs.
	438	*
	439	* The langLength, scriptLength, and regionLength parameters are input/output
	440	* parameters, and must contain the capacity of their corresponding buffers on
	441	* input. On output, they will contain the actual length of the buffers, not
	442	* including the null terminator.
	443	*
	444	* If the length of any of the output subtags exceeds the capacity of the corresponding
	445	* buffer, the function copies as many bytes to the output buffer as it can, and returns
	446	* the error U_BUFFER_OVERFLOW_ERROR. It will not parse any more subtags once overflow
	447	* occurs.
	448	*
	449	* If an illegal argument is provided, the function returns the error
	450	* U_ILLEGAL_ARGUMENT_ERROR.
	451	*
	452	* @param localeID The locale ID to parse.
	453	* @param lang The language tag buffer.
	454	* @param langLength The length of the language tag.
	455	* @param script The script tag buffer.
	456	* @param scriptLength The length of the script tag.
	457	* @param region The region tag buffer.
	458	* @param regionLength The length of the region tag.
	459	* @param err A pointer to a UErrorCode for error reporting.
	460	* @return The number of chars of the localeID parameter consumed.
	461	**/
	462	static int32_t U_CALLCONV
	463	parseTagString(
	464	const char* localeID,
	465	char* lang,
	466	int32_t* langLength,
	467	char* script,
	468	int32_t* scriptLength,
	469	char* region,
	470	int32_t* regionLength,
	471	UErrorCode* err)
	472	{
	473	const char* position = localeID;
	474	int32_t subtagLength = 0;
	475
	476	if(U_FAILURE(*err) \|\|
	477	localeID == NULL \|\|
	478	lang == NULL \|\|
	479	langLength == NULL \|\|
	480	script == NULL \|\|
	481	scriptLength == NULL \|\|
	482	region == NULL \|\|
	483	regionLength == NULL) {
	484	goto error;
	485	}
	486
	487	subtagLength = ulocimp_getLanguage(position, lang, *langLength, &position);
	488	u_terminateChars(lang, *langLength, subtagLength, err);
	489
	490	/*
	491	* Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING
	492	* to be an error, because it indicates the user-supplied tag is
	493	* not well-formed.
	494	*/
	495	if(U_FAILURE(*err)) {
	496	goto error;
	497	}
	498
	499	*langLength = subtagLength;
	500
	501	/*
	502	* If no language was present, use the value of unknownLanguage
	503	* instead. Otherwise, move past any separator.
	504	*/
	505	if (*langLength == 0) {
	506	uprv_strcpy(
	507	lang,
	508	unknownLanguage);
	509	*langLength = (int32_t)uprv_strlen(lang);
	510	}
	511	else if (_isIDSeparator(*position)) {
	512	++position;
	513	}
	514
	515	subtagLength = ulocimp_getScript(position, script, *scriptLength, &position);
	516	u_terminateChars(script, *scriptLength, subtagLength, err);
	517
	518	if(U_FAILURE(*err)) {
	519	goto error;
	520	}
	521
	522	*scriptLength = subtagLength;
	523
	524	if (*scriptLength > 0) {
	525	if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) {
	526	/**
	527	* If the script part is the "unknown" script, then don't return it.
	528	**/
	529	*scriptLength = 0;
	530	}
	531
	532	/*
	533	* Move past any separator.
	534	*/
	535	if (_isIDSeparator(*position)) {
	536	++position;
	537	}
	538	}
	539
	540	subtagLength = ulocimp_getCountry(position, region, *regionLength, &position);
	541	u_terminateChars(region, *regionLength, subtagLength, err);
	542
	543	if(U_FAILURE(*err)) {
	544	goto error;
	545	}
	546
	547	*regionLength = subtagLength;
	548
	549	if (*regionLength > 0) {
	550	if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) {
	551	/**
	552	* If the region part is the "unknown" region, then don't return it.
	553	**/
	554	*regionLength = 0;
	555	}
	556	} else if (position != 0 && position != '@') {
	557	/* back up over consumed trailing separator */
	558	--position;
	559	}
	560
	561	exit:
	562
	563	return (int32_t)(position - localeID);
	564
	565	error:
	566
	567	/**
	568	* If we get here, we have no explicit error, it's the result of an
	569	* illegal argument.
	570	**/
	571	if (!U_FAILURE(*err)) {
	572	*err = U_ILLEGAL_ARGUMENT_ERROR;
	573	}
	574
	575	goto exit;
	576	}
	577
	578	static int32_t U_CALLCONV
	579	createLikelySubtagsString(
	580	const char* lang,
	581	int32_t langLength,
	582	const char* script,
	583	int32_t scriptLength,
	584	const char* region,
	585	int32_t regionLength,
	586	const char* variants,
	587	int32_t variantsLength,
	588	char* tag,
	589	int32_t tagCapacity,
	590	UErrorCode* err)
	591	{
	592	/**
	593	* ULOC_FULLNAME_CAPACITY will provide enough capacity
	594	* that we can build a string that contains the language,
	595	* script and region code without worrying about overrunning
	596	* the user-supplied buffer.
	597	**/
	598	char tagBuffer[ULOC_FULLNAME_CAPACITY];
	599	char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY];
	600
	601	if(U_FAILURE(*err)) {
	602	goto error;
	603	}
	604
	605	/**
	606	* Try the language with the script and region first.
	607	**/
	608	if (scriptLength > 0 && regionLength > 0) {
	609
	610	const char* likelySubtags = NULL;
	611
	612	createTagString(
	613	lang,
	614	langLength,
	615	script,
	616	scriptLength,
	617	region,
	618	regionLength,
	619	NULL,
	620	0,
	621	tagBuffer,
	622	sizeof(tagBuffer),
	623	err);
	624	if(U_FAILURE(*err)) {
	625	goto error;
	626	}
	627
	628	likelySubtags =
	629	findLikelySubtags(
	630	tagBuffer,
	631	likelySubtagsBuffer,
	632	sizeof(likelySubtagsBuffer),
	633	err);
	634	if(U_FAILURE(*err)) {
	635	goto error;
	636	}
	637
	638	if (likelySubtags != NULL) {
	639	/* Always use the language tag from the
	640	maximal string, since it may be more
	641	specific than the one provided. */
	642	return createTagStringWithAlternates(
	643	NULL,
	644	0,
	645	NULL,
	646	0,
	647	NULL,
	648	0,
	649	variants,
	650	variantsLength,
	651	likelySubtags,
	652	tag,
	653	tagCapacity,
	654	err);
	655	}
	656	}
	657
	658	/**
	659	* Try the language with just the script.
	660	**/
	661	if (scriptLength > 0) {
	662
	663	const char* likelySubtags = NULL;
	664
	665	createTagString(
	666	lang,
	667	langLength,
	668	script,
	669	scriptLength,
	670	NULL,
	671	0,
	672	NULL,
	673	0,
	674	tagBuffer,
	675	sizeof(tagBuffer),
	676	err);
	677	if(U_FAILURE(*err)) {
	678	goto error;
	679	}
	680
	681	likelySubtags =
	682	findLikelySubtags(
	683	tagBuffer,
	684	likelySubtagsBuffer,
	685	sizeof(likelySubtagsBuffer),
	686	err);
	687	if(U_FAILURE(*err)) {
	688	goto error;
	689	}
	690
	691	if (likelySubtags != NULL) {
	692	/* Always use the language tag from the
	693	maximal string, since it may be more
	694	specific than the one provided. */
	695	return createTagStringWithAlternates(
	696	NULL,
	697	0,
	698	NULL,
	699	0,
	700	region,
	701	regionLength,
	702	variants,
	703	variantsLength,
	704	likelySubtags,
	705	tag,
	706	tagCapacity,
	707	err);
	708	}
	709	}
	710
	711	/**
	712	* Try the language with just the region.
	713	**/
	714	if (regionLength > 0) {
	715
	716	const char* likelySubtags = NULL;
	717
	718	createTagString(
	719	lang,
	720	langLength,
	721	NULL,
	722	0,
	723	region,
	724	regionLength,
	725	NULL,
	726	0,
	727	tagBuffer,
	728	sizeof(tagBuffer),
	729	err);
	730	if(U_FAILURE(*err)) {
	731	goto error;
	732	}
	733
	734	likelySubtags =
	735	findLikelySubtags(
	736	tagBuffer,
	737	likelySubtagsBuffer,
	738	sizeof(likelySubtagsBuffer),
	739	err);
	740	if(U_FAILURE(*err)) {
	741	goto error;
	742	}
	743
	744	if (likelySubtags != NULL) {
	745	/* Always use the language tag from the
	746	maximal string, since it may be more
	747	specific than the one provided. */
	748	return createTagStringWithAlternates(
	749	NULL,
	750	0,
	751	script,
	752	scriptLength,
	753	NULL,
	754	0,
	755	variants,
	756	variantsLength,
	757	likelySubtags,
	758	tag,
	759	tagCapacity,
	760	err);
	761	}
	762	}
	763
	764	/**
	765	* Finally, try just the language.
	766	**/
	767	{
	768	const char* likelySubtags = NULL;
	769
	770	createTagString(
	771	lang,
	772	langLength,
	773	NULL,
	774	0,
	775	NULL,
	776	0,
	777	NULL,
	778	0,
	779	tagBuffer,
	780	sizeof(tagBuffer),
	781	err);
	782	if(U_FAILURE(*err)) {
	783	goto error;
	784	}
	785
	786	likelySubtags =
	787	findLikelySubtags(
	788	tagBuffer,
	789	likelySubtagsBuffer,
	790	sizeof(likelySubtagsBuffer),
	791	err);
	792	if(U_FAILURE(*err)) {
	793	goto error;
	794	}
	795
	796	if (likelySubtags != NULL) {
	797	/* Always use the language tag from the
	798	maximal string, since it may be more
	799	specific than the one provided. */
	800	return createTagStringWithAlternates(
	801	NULL,
	802	0,
	803	script,
	804	scriptLength,
	805	region,
	806	regionLength,
	807	variants,
	808	variantsLength,
	809	likelySubtags,
	810	tag,
	811	tagCapacity,
	812	err);
	813	}
	814	}
	815
	816	return u_terminateChars(
	817	tag,
	818	tagCapacity,
	819	0,
	820	err);
	821
	822	error:
	823
	824	if (!U_FAILURE(*err)) {
	825	*err = U_ILLEGAL_ARGUMENT_ERROR;
	826	}
	827
	828	return -1;
	829	}
	830
	831	#define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) \
	832	{ int32_t count = 0; \
	833	int32_t i; \
	834	for (i = 0; i < trailingLength; i++) { \
	835	if (trailing[i] == '-' \|\| trailing[i] == '_') { \
	836	count = 0; \
	837	if (count > 8) { \
	838	goto error; \
	839	} \
	840	} else if (trailing[i] == '@') { \
	841	break; \
	842	} else if (count > 8) { \
	843	goto error; \
	844	} else { \
	845	count++; \
	846	} \
	847	} \
	848	}
	849
	850	static int32_t
	851	_uloc_addLikelySubtags(const char* localeID,
	852	char* maximizedLocaleID,
	853	int32_t maximizedLocaleIDCapacity,
	854	UErrorCode* err)
	855	{
	856	char lang[ULOC_LANG_CAPACITY];
	857	int32_t langLength = sizeof(lang);
	858	char script[ULOC_SCRIPT_CAPACITY];
	859	int32_t scriptLength = sizeof(script);
	860	char region[ULOC_COUNTRY_CAPACITY];
	861	int32_t regionLength = sizeof(region);
	862	const char* trailing = "";
	863	int32_t trailingLength = 0;
	864	int32_t trailingIndex = 0;
	865	int32_t resultLength = 0;
	866
	867	if(U_FAILURE(*err)) {
	868	goto error;
	869	}
	870	else if (localeID == NULL \|\|
	871	maximizedLocaleID == NULL \|\|
	872	maximizedLocaleIDCapacity <= 0) {
	873	goto error;
	874	}
	875
	876	trailingIndex = parseTagString(
	877	localeID,
	878	lang,
	879	&langLength,
	880	script,
	881	&scriptLength,
	882	region,
	883	&regionLength,
	884	err);
	885	if(U_FAILURE(*err)) {
	886	/* Overflow indicates an illegal argument error */
	887	if (*err == U_BUFFER_OVERFLOW_ERROR) {
	888	*err = U_ILLEGAL_ARGUMENT_ERROR;
	889	}
	890
	891	goto error;
	892	}
	893
	894	/* Find the length of the trailing portion. */
	895	while (_isIDSeparator(localeID[trailingIndex])) {
	896	trailingIndex++;
	897	}
	898	trailing = &localeID[trailingIndex];
	899	trailingLength = (int32_t)uprv_strlen(trailing);
	900
	901	CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
	902
	903	resultLength =
	904	createLikelySubtagsString(
	905	lang,
	906	langLength,
	907	script,
	908	scriptLength,
	909	region,
	910	regionLength,
	911	trailing,
	912	trailingLength,
	913	maximizedLocaleID,
	914	maximizedLocaleIDCapacity,
	915	err);
	916
	917	if (resultLength == 0) {
	918	const int32_t localIDLength = (int32_t)uprv_strlen(localeID);
	919
	920	/*
	921	* If we get here, we need to return localeID.
	922	*/
	923	uprv_memcpy(
	924	maximizedLocaleID,
	925	localeID,
	926	localIDLength <= maximizedLocaleIDCapacity ?
	927	localIDLength : maximizedLocaleIDCapacity);
	928
	929	resultLength =
	930	u_terminateChars(
	931	maximizedLocaleID,
	932	maximizedLocaleIDCapacity,
	933	localIDLength,
	934	err);
	935	}
	936
	937	return resultLength;
	938
	939	error:
	940
	941	if (!U_FAILURE(*err)) {
	942	*err = U_ILLEGAL_ARGUMENT_ERROR;
	943	}
	944
	945	return -1;
	946	}
	947
	948	static int32_t
	949	_uloc_minimizeSubtags(const char* localeID,
	950	char* minimizedLocaleID,
	951	int32_t minimizedLocaleIDCapacity,
	952	UErrorCode* err)
	953	{
	954	/**
	955	* ULOC_FULLNAME_CAPACITY will provide enough capacity
	956	* that we can build a string that contains the language,
	957	* script and region code without worrying about overrunning
	958	* the user-supplied buffer.
	959	**/
	960	char maximizedTagBuffer[ULOC_FULLNAME_CAPACITY];
	961	int32_t maximizedTagBufferLength = sizeof(maximizedTagBuffer);
	962
	963	char lang[ULOC_LANG_CAPACITY];
	964	int32_t langLength = sizeof(lang);
	965	char script[ULOC_SCRIPT_CAPACITY];
	966	int32_t scriptLength = sizeof(script);
	967	char region[ULOC_COUNTRY_CAPACITY];
	968	int32_t regionLength = sizeof(region);
	969	const char* trailing = "";
	970	int32_t trailingLength = 0;
	971	int32_t trailingIndex = 0;
	972
	973	if(U_FAILURE(*err)) {
	974	goto error;
	975	}
	976	else if (localeID == NULL \|\|
	977	minimizedLocaleID == NULL \|\|
	978	minimizedLocaleIDCapacity <= 0) {
	979	goto error;
	980	}
	981
	982	trailingIndex =
	983	parseTagString(
	984	localeID,
	985	lang,
	986	&langLength,
	987	script,
	988	&scriptLength,
	989	region,
	990	&regionLength,
	991	err);
	992	if(U_FAILURE(*err)) {
	993
	994	/* Overflow indicates an illegal argument error */
	995	if (*err == U_BUFFER_OVERFLOW_ERROR) {
	996	*err = U_ILLEGAL_ARGUMENT_ERROR;
	997	}
	998
	999	goto error;
	1000	}
	1001
	1002	/* Find the spot where the variants or the keywords begin, if any. */
	1003	while (_isIDSeparator(localeID[trailingIndex])) {
	1004	trailingIndex++;
	1005	}
	1006	trailing = &localeID[trailingIndex];
	1007	trailingLength = (int32_t)uprv_strlen(trailing);
	1008
	1009	CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
	1010
	1011	createTagString(
	1012	lang,
	1013	langLength,
	1014	script,
	1015	scriptLength,
	1016	region,
	1017	regionLength,
	1018	NULL,
	1019	0,
	1020	maximizedTagBuffer,
	1021	maximizedTagBufferLength,
	1022	err);
	1023	if(U_FAILURE(*err)) {
	1024	goto error;
	1025	}
	1026
	1027	/**
	1028	* First, we need to first get the maximization
	1029	* from AddLikelySubtags.
	1030	**/
	1031	maximizedTagBufferLength =
	1032	uloc_addLikelySubtags(
	1033	maximizedTagBuffer,
	1034	maximizedTagBuffer,
	1035	maximizedTagBufferLength,
	1036	err);
	1037
	1038	if(U_FAILURE(*err)) {
	1039	goto error;
	1040	}
	1041
	1042	/**
	1043	* Start first with just the language.
	1044	**/
	1045	{
	1046	char tagBuffer[ULOC_FULLNAME_CAPACITY];
	1047
	1048	const int32_t tagBufferLength =
	1049	createLikelySubtagsString(
	1050	lang,
	1051	langLength,
	1052	NULL,
	1053	0,
	1054	NULL,
	1055	0,
	1056	NULL,
	1057	0,
	1058	tagBuffer,
	1059	sizeof(tagBuffer),
	1060	err);
	1061
	1062	if(U_FAILURE(*err)) {
	1063	goto error;
	1064	}
	1065	else if (uprv_strnicmp(
	1066	maximizedTagBuffer,
	1067	tagBuffer,
	1068	tagBufferLength) == 0) {
	1069
	1070	return createTagString(
	1071	lang,
	1072	langLength,
	1073	NULL,
	1074	0,
	1075	NULL,
	1076	0,
	1077	trailing,
	1078	trailingLength,
	1079	minimizedLocaleID,
	1080	minimizedLocaleIDCapacity,
	1081	err);
	1082	}
	1083	}
	1084
	1085	/**
	1086	* Next, try the language and region.
	1087	**/
	1088	if (regionLength > 0) {
	1089
	1090	char tagBuffer[ULOC_FULLNAME_CAPACITY];
	1091
	1092	const int32_t tagBufferLength =
	1093	createLikelySubtagsString(
	1094	lang,
	1095	langLength,
	1096	NULL,
	1097	0,
	1098	region,
	1099	regionLength,
	1100	NULL,
	1101	0,
	1102	tagBuffer,
	1103	sizeof(tagBuffer),
	1104	err);
	1105
	1106	if(U_FAILURE(*err)) {
	1107	goto error;
	1108	}
	1109	else if (uprv_strnicmp(
	1110	maximizedTagBuffer,
	1111	tagBuffer,
	1112	tagBufferLength) == 0) {
	1113
	1114	return createTagString(
	1115	lang,
	1116	langLength,
	1117	NULL,
	1118	0,
	1119	region,
	1120	regionLength,
	1121	trailing,
	1122	trailingLength,
	1123	minimizedLocaleID,
	1124	minimizedLocaleIDCapacity,
	1125	err);
	1126	}
	1127	}
	1128
	1129	/**
	1130	* Finally, try the language and script. This is our last chance,
	1131	* since trying with all three subtags would only yield the
	1132	* maximal version that we already have.
	1133	**/
	1134	if (scriptLength > 0 && regionLength > 0) {
	1135	char tagBuffer[ULOC_FULLNAME_CAPACITY];
	1136
	1137	const int32_t tagBufferLength =
	1138	createLikelySubtagsString(
	1139	lang,
	1140	langLength,
	1141	script,
	1142	scriptLength,
	1143	NULL,
	1144	0,
	1145	NULL,
	1146	0,
	1147	tagBuffer,
	1148	sizeof(tagBuffer),
	1149	err);
	1150
	1151	if(U_FAILURE(*err)) {
	1152	goto error;
	1153	}
	1154	else if (uprv_strnicmp(
	1155	maximizedTagBuffer,
	1156	tagBuffer,
	1157	tagBufferLength) == 0) {
	1158
	1159	return createTagString(
	1160	lang,
	1161	langLength,
	1162	script,
	1163	scriptLength,
	1164	NULL,
	1165	0,
	1166	trailing,
	1167	trailingLength,
	1168	minimizedLocaleID,
	1169	minimizedLocaleIDCapacity,
	1170	err);
	1171	}
	1172	}
	1173
	1174	{
	1175	/**
	1176	* If we got here, return the locale ID parameter.
	1177	**/
	1178	const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
	1179
	1180	uprv_memcpy(
	1181	minimizedLocaleID,
	1182	localeID,
	1183	localeIDLength <= minimizedLocaleIDCapacity ?
	1184	localeIDLength : minimizedLocaleIDCapacity);
	1185
	1186	return u_terminateChars(
	1187	minimizedLocaleID,
	1188	minimizedLocaleIDCapacity,
	1189	localeIDLength,
	1190	err);
	1191	}
	1192
	1193	error:
	1194
	1195	if (!U_FAILURE(*err)) {
	1196	*err = U_ILLEGAL_ARGUMENT_ERROR;
	1197	}
	1198
	1199	return -1;
	1200
	1201
	1202	}
	1203
	1204	static UBool
	1205	do_canonicalize(const char* localeID,
	1206	char* buffer,
	1207	int32_t bufferCapacity,
	1208	UErrorCode* err)
	1209	{
	1210	uloc_canonicalize(
	1211	localeID,
	1212	buffer,
	1213	bufferCapacity,
	1214	err);
	1215
	1216	if (*err == U_STRING_NOT_TERMINATED_WARNING \|\|
	1217	*err == U_BUFFER_OVERFLOW_ERROR) {
	1218	*err = U_ILLEGAL_ARGUMENT_ERROR;
	1219
	1220	return FALSE;
	1221	}
	1222	else if (U_FAILURE(*err)) {
	1223
	1224	return FALSE;
	1225	}
	1226	else {
	1227	return TRUE;
	1228	}
	1229	}
	1230
	1231	U_CAPI int32_t U_EXPORT2
	1232	uloc_addLikelySubtags(const char* localeID,
	1233	char* maximizedLocaleID,
	1234	int32_t maximizedLocaleIDCapacity,
	1235	UErrorCode* err)
	1236	{
	1237	char localeBuffer[ULOC_FULLNAME_CAPACITY];
	1238
	1239	if (!do_canonicalize(
	1240	localeID,
	1241	localeBuffer,
	1242	sizeof(localeBuffer),
	1243	err)) {
	1244	return -1;
	1245	}
	1246	else {
	1247	return _uloc_addLikelySubtags(
	1248	localeBuffer,
	1249	maximizedLocaleID,
	1250	maximizedLocaleIDCapacity,
	1251	err);
	1252	}
	1253	}
	1254
	1255	U_CAPI int32_t U_EXPORT2
	1256	uloc_minimizeSubtags(const char* localeID,
	1257	char* minimizedLocaleID,
	1258	int32_t minimizedLocaleIDCapacity,
	1259	UErrorCode* err)
	1260	{
	1261	char localeBuffer[ULOC_FULLNAME_CAPACITY];
	1262
	1263	if (!do_canonicalize(
	1264	localeID,
	1265	localeBuffer,
	1266	sizeof(localeBuffer),
	1267	err)) {
	1268	return -1;
	1269	}
	1270	else {
	1271	return _uloc_minimizeSubtags(
	1272	localeBuffer,
	1273	minimizedLocaleID,
	1274	minimizedLocaleIDCapacity,
	1275	err);
	1276	}
	1277	}
	1278
	1279	// Pairs of (language subtag, + or -) for finding out fast if common languages
	1280	// are LTR (minus) or RTL (plus).
	1281	static const char* LANG_DIR_STRING =
	1282	"root-en-es-pt-zh-ja-ko-de-fr-it-ar+he+fa+ru-nl-pl-th-tr-";
	1283
	1284	// Implemented here because this calls uloc_addLikelySubtags().
	1285	U_CAPI UBool U_EXPORT2
	1286	uloc_isRightToLeft(const char *locale) {
	1287	UErrorCode errorCode = U_ZERO_ERROR;
	1288	char script[8];
	1289	int32_t scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &errorCode);
	1290	if (U_FAILURE(errorCode) \|\| errorCode == U_STRING_NOT_TERMINATED_WARNING \|\|
	1291	scriptLength == 0) {
	1292	// Fastpath: We know the likely scripts and their writing direction
	1293	// for some common languages.
	1294	errorCode = U_ZERO_ERROR;
	1295	char lang[8];
	1296	int32_t langLength = uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &errorCode);
	1297	if (U_FAILURE(errorCode) \|\| errorCode == U_STRING_NOT_TERMINATED_WARNING \|\|
	1298	langLength == 0) {
	1299	return FALSE;
	1300	}
	1301	const char* langPtr = uprv_strstr(LANG_DIR_STRING, lang);
	1302	if (langPtr != NULL) {
	1303	switch (langPtr[langLength]) {
	1304	case '-': return FALSE;
	1305	case '+': return TRUE;
	1306	default: break; // partial match of a longer code
	1307	}
	1308	}
	1309	// Otherwise, find the likely script.
	1310	errorCode = U_ZERO_ERROR;
	1311	char likely[ULOC_FULLNAME_CAPACITY];
	1312	(void)uloc_addLikelySubtags(locale, likely, UPRV_LENGTHOF(likely), &errorCode);
	1313	if (U_FAILURE(errorCode) \|\| errorCode == U_STRING_NOT_TERMINATED_WARNING) {
	1314	return FALSE;
	1315	}
	1316	scriptLength = uloc_getScript(likely, script, UPRV_LENGTHOF(script), &errorCode);
	1317	if (U_FAILURE(errorCode) \|\| errorCode == U_STRING_NOT_TERMINATED_WARNING \|\|
	1318	scriptLength == 0) {
	1319	return FALSE;
	1320	}
	1321	}
	1322	UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script);
	1323	return uscript_isRightToLeft(scriptCode);
	1324	}
	1325
	1326	U_NAMESPACE_BEGIN
	1327
	1328	UBool
	1329	Locale::isRightToLeft() const {
	1330	return uloc_isRightToLeft(getBaseName());
	1331	}
	1332
	1333	// The following must at least allow for rg key value (6) plus terminator (1).
	1334	#define ULOC_RG_BUFLEN 8
	1335
	1336	U_CAPI int32_t U_EXPORT2
	1337	ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,
	1338	char region, int32_t regionCapacity, UErrorCode status) {
	1339	if (U_FAILURE(*status)) {
	1340	return 0;
	1341	}
	1342	char rgBuf[ULOC_RG_BUFLEN];
	1343	UErrorCode rgStatus = U_ZERO_ERROR;
	1344
	1345	// First check for rg keyword value
	1346	int32_t rgLen = uloc_getKeywordValue(localeID, "rg", rgBuf, ULOC_RG_BUFLEN, &rgStatus);
	1347	if (U_FAILURE(rgStatus) \|\| rgLen != 6) {
	1348	rgLen = 0;
	1349	} else {
	1350	// rgBuf guaranteed to be zero terminated here, with text len 6
	1351	char *rgPtr = rgBuf;
	1352	for (; *rgPtr!= 0; rgPtr++) {
	1353	rgPtr = uprv_toupper(rgPtr);
	1354	}
	1355	rgLen = (uprv_strcmp(rgBuf+2, "ZZZZ") == 0)? 2: 0;
	1356	}
	1357
	1358	if (rgLen == 0) {
	1359	// No valid rg keyword value, try for unicode_region_subtag
	1360	rgLen = uloc_getCountry(localeID, rgBuf, ULOC_RG_BUFLEN, status);
	1361	if (U_FAILURE(*status)) {
	1362	rgLen = 0;
	1363	} else if (rgLen == 0 && inferRegion) {
	1364	// no unicode_region_subtag but inferRegion TRUE, try likely subtags
	1365	char locBuf[ULOC_FULLNAME_CAPACITY];
	1366	rgStatus = U_ZERO_ERROR;
	1367	(void)uloc_addLikelySubtags(localeID, locBuf, ULOC_FULLNAME_CAPACITY, &rgStatus);
	1368	if (U_SUCCESS(rgStatus)) {
	1369	rgLen = uloc_getCountry(locBuf, rgBuf, ULOC_RG_BUFLEN, status);
	1370	if (U_FAILURE(*status)) {
	1371	rgLen = 0;
	1372	}
	1373	}
	1374	}
	1375	}
	1376
	1377	rgBuf[rgLen] = 0;
	1378	uprv_strncpy(region, rgBuf, regionCapacity);
	1379	return u_terminateChars(region, regionCapacity, rgLen, status);
	1380	}
	1381
	1382	U_NAMESPACE_END