git.saurik.com Git - apple/icu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	*******************************************************************************
	3	*
	4	* Copyright (C) 2004-2014, International Business Machines
	5	* Corporation and others. All Rights Reserved.
	6	*
	7	*******************************************************************************
	8	* file name: ucase.cpp
	9	* encoding: US-ASCII
	10	* tab size: 8 (not used)
	11	* indentation:4
	12	*
	13	* created on: 2004aug30
	14	* created by: Markus W. Scherer
	15	*
	16	* Low-level Unicode character/string case mapping code.
	17	* Much code moved here (and modified) from uchar.c.
	18	*/
	19
	20	#include "unicode/utypes.h"
	21	#include "unicode/unistr.h"
	22	#include "unicode/uset.h"
	23	#include "unicode/udata.h" /* UDataInfo */
	24	#include "unicode/utf16.h"
	25	#include "ucmndata.h" /* DataHeader */
	26	#include "udatamem.h"
	27	#include "umutex.h"
	28	#include "uassert.h"
	29	#include "cmemory.h"
	30	#include "utrie2.h"
	31	#include "ucase.h"
	32
	33	struct UCaseProps {
	34	UDataMemory *mem;
	35	const int32_t *indexes;
	36	const uint16_t *exceptions;
	37	const uint16_t *unfold;
	38
	39	UTrie2 trie;
	40	uint8_t formatVersion[4];
	41	};
	42
	43	/* ucase_props_data.h is machine-generated by gencase --csource */
	44	#define INCLUDED_FROM_UCASE_CPP
	45	#include "ucase_props_data.h"
	46
	47	/* UCaseProps singleton ----------------------------------------------------- */
	48
	49	U_CAPI const UCaseProps * U_EXPORT2
	50	ucase_getSingleton() {
	51	return &ucase_props_singleton;
	52	}
	53
	54	/* set of property starts for UnicodeSet ------------------------------------ */
	55
	56	static UBool U_CALLCONV
	57	_enumPropertyStartsRange(const void context, UChar32 start, UChar32 /end/, uint32_t /value*/) {
	58	/* add the start code point to the USet */
	59	const USetAdder sa=(const USetAdder )context;
	60	sa->add(sa->set, start);
	61	return TRUE;
	62	}
	63
	64	U_CFUNC void U_EXPORT2
	65	ucase_addPropertyStarts(const UCaseProps csp, const USetAdder sa, UErrorCode *pErrorCode) {
	66	if(U_FAILURE(*pErrorCode)) {
	67	return;
	68	}
	69
	70	/* add the start code point of each same-value range of the trie */
	71	utrie2_enum(&csp->trie, NULL, _enumPropertyStartsRange, sa);
	72
	73	/* add code points with hardcoded properties, plus the ones following them */
	74
	75	/* (none right now, see comment below) */
	76
	77	/*
	78	* Omit code points with hardcoded specialcasing properties
	79	* because we do not build property UnicodeSets for them right now.
	80	*/
	81	}
	82
	83	/* data access primitives --------------------------------------------------- */
	84
	85	#define GET_EXCEPTIONS(csp, props) ((csp)->exceptions+((props)>>UCASE_EXC_SHIFT))
	86
	87	#define PROPS_HAS_EXCEPTION(props) ((props)&UCASE_EXCEPTION)
	88
	89	/* number of bits in an 8-bit integer value */
	90	static const uint8_t flagsOffset[256]={
	91	0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
	92	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
	93	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
	94	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
	95	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
	96	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
	97	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
	98	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
	99	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
	100	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
	101	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
	102	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
	103	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
	104	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
	105	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
	106	4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
	107	};
	108
	109	#define HAS_SLOT(flags, idx) ((flags)&(1<<(idx)))
	110	#define SLOT_OFFSET(flags, idx) flagsOffset[(flags)&((1<<(idx))-1)]
	111
	112	/*
	113	* Get the value of an optional-value slot where HAS_SLOT(excWord, idx).
	114	*
	115	* @param excWord (in) initial exceptions word
	116	* @param idx (in) desired slot index
	117	* @param pExc16 (in/out) const uint16_t * after excWord=*pExc16++;
	118	* moved to the last uint16_t of the value, use +1 for beginning of next slot
	119	* @param value (out) int32_t or uint32_t output if hasSlot, otherwise not modified
	120	*/
	121	#define GET_SLOT_VALUE(excWord, idx, pExc16, value) \
	122	if(((excWord)&UCASE_EXC_DOUBLE_SLOTS)==0) { \
	123	(pExc16)+=SLOT_OFFSET(excWord, idx); \
	124	(value)=*pExc16; \
	125	} else { \
	126	(pExc16)+=2*SLOT_OFFSET(excWord, idx); \
	127	(value)=*pExc16++; \
	128	(value)=((value)<<16)\|*pExc16; \
	129	}
	130
	131	/* simple case mappings ----------------------------------------------------- */
	132
	133	U_CAPI UChar32 U_EXPORT2
	134	ucase_tolower(const UCaseProps *csp, UChar32 c) {
	135	uint16_t props=UTRIE2_GET16(&csp->trie, c);
	136	if(!PROPS_HAS_EXCEPTION(props)) {
	137	if(UCASE_GET_TYPE(props)>=UCASE_UPPER) {
	138	c+=UCASE_GET_DELTA(props);
	139	}
	140	} else {
	141	const uint16_t *pe=GET_EXCEPTIONS(csp, props);
	142	uint16_t excWord=*pe++;
	143	if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) {
	144	GET_SLOT_VALUE(excWord, UCASE_EXC_LOWER, pe, c);
	145	}
	146	}
	147	return c;
	148	}
	149
	150	U_CAPI UChar32 U_EXPORT2
	151	ucase_toupper(const UCaseProps *csp, UChar32 c) {
	152	uint16_t props=UTRIE2_GET16(&csp->trie, c);
	153	if(!PROPS_HAS_EXCEPTION(props)) {
	154	if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
	155	c+=UCASE_GET_DELTA(props);
	156	}
	157	} else {
	158	const uint16_t *pe=GET_EXCEPTIONS(csp, props);
	159	uint16_t excWord=*pe++;
	160	if(HAS_SLOT(excWord, UCASE_EXC_UPPER)) {
	161	GET_SLOT_VALUE(excWord, UCASE_EXC_UPPER, pe, c);
	162	}
	163	}
	164	return c;
	165	}
	166
	167	U_CAPI UChar32 U_EXPORT2
	168	ucase_totitle(const UCaseProps *csp, UChar32 c) {
	169	uint16_t props=UTRIE2_GET16(&csp->trie, c);
	170	if(!PROPS_HAS_EXCEPTION(props)) {
	171	if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
	172	c+=UCASE_GET_DELTA(props);
	173	}
	174	} else {
	175	const uint16_t *pe=GET_EXCEPTIONS(csp, props);
	176	uint16_t excWord=*pe++;
	177	int32_t idx;
	178	if(HAS_SLOT(excWord, UCASE_EXC_TITLE)) {
	179	idx=UCASE_EXC_TITLE;
	180	} else if(HAS_SLOT(excWord, UCASE_EXC_UPPER)) {
	181	idx=UCASE_EXC_UPPER;
	182	} else {
	183	return c;
	184	}
	185	GET_SLOT_VALUE(excWord, idx, pe, c);
	186	}
	187	return c;
	188	}
	189
	190	static const UChar iDot[2] = { 0x69, 0x307 };
	191	static const UChar jDot[2] = { 0x6a, 0x307 };
	192	static const UChar iOgonekDot[3] = { 0x12f, 0x307 };
	193	static const UChar iDotGrave[3] = { 0x69, 0x307, 0x300 };
	194	static const UChar iDotAcute[3] = { 0x69, 0x307, 0x301 };
	195	static const UChar iDotTilde[3] = { 0x69, 0x307, 0x303 };
	196
	197
	198	U_CFUNC void U_EXPORT2
	199	ucase_addCaseClosure(const UCaseProps csp, UChar32 c, const USetAdder sa) {
	200	uint16_t props;
	201
	202	/*
	203	* Hardcode the case closure of i and its relatives and ignore the
	204	* data file data for these characters.
	205	* The Turkic dotless i and dotted I with their case mapping conditions
	206	* and case folding option make the related characters behave specially.
	207	* This code matches their closure behavior to their case folding behavior.
	208	*/
	209
	210	switch(c) {
	211	case 0x49:
	212	/* regular i and I are in one equivalence class */
	213	sa->add(sa->set, 0x69);
	214	return;
	215	case 0x69:
	216	sa->add(sa->set, 0x49);
	217	return;
	218	case 0x130:
	219	/* dotted I is in a class with <0069 0307> (for canonical equivalence with <0049 0307>) */
	220	sa->addString(sa->set, iDot, 2);
	221	return;
	222	case 0x131:
	223	/* dotless i is in a class by itself */
	224	return;
	225	default:
	226	/* otherwise use the data file data */
	227	break;
	228	}
	229
	230	props=UTRIE2_GET16(&csp->trie, c);
	231	if(!PROPS_HAS_EXCEPTION(props)) {
	232	if(UCASE_GET_TYPE(props)!=UCASE_NONE) {
	233	/* add the one simple case mapping, no matter what type it is */
	234	int32_t delta=UCASE_GET_DELTA(props);
	235	if(delta!=0) {
	236	sa->add(sa->set, c+delta);
	237	}
	238	}
	239	} else {
	240	/*
	241	* c has exceptions, so there may be multiple simple and/or
	242	* full case mappings. Add them all.
	243	*/
	244	const uint16_t pe0, pe=GET_EXCEPTIONS(csp, props);
	245	const UChar *closure;
	246	uint16_t excWord=*pe++;
	247	int32_t idx, closureLength, fullLength, length;
	248
	249	pe0=pe;
	250
	251	/* add all simple case mappings */
	252	for(idx=UCASE_EXC_LOWER; idx<=UCASE_EXC_TITLE; ++idx) {
	253	if(HAS_SLOT(excWord, idx)) {
	254	pe=pe0;
	255	GET_SLOT_VALUE(excWord, idx, pe, c);
	256	sa->add(sa->set, c);
	257	}
	258	}
	259
	260	/* get the closure string pointer & length */
	261	if(HAS_SLOT(excWord, UCASE_EXC_CLOSURE)) {
	262	pe=pe0;
	263	GET_SLOT_VALUE(excWord, UCASE_EXC_CLOSURE, pe, closureLength);
	264	closureLength&=UCASE_CLOSURE_MAX_LENGTH; /* higher bits are reserved */
	265	closure=(const UChar )pe+1; / behind this slot, unless there are full case mappings */
	266	} else {
	267	closureLength=0;
	268	closure=NULL;
	269	}
	270
	271	/* add the full case folding */
	272	if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
	273	pe=pe0;
	274	GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, fullLength);
	275
	276	/* start of full case mapping strings */
	277	++pe;
	278
	279	fullLength&=0xffff; /* bits 16 and higher are reserved */
	280
	281	/* skip the lowercase result string */
	282	pe+=fullLength&UCASE_FULL_LOWER;
	283	fullLength>>=4;
	284
	285	/* add the full case folding string */
	286	length=fullLength&0xf;
	287	if(length!=0) {
	288	sa->addString(sa->set, (const UChar *)pe, length);
	289	pe+=length;
	290	}
	291
	292	/* skip the uppercase and titlecase strings */
	293	fullLength>>=4;
	294	pe+=fullLength&0xf;
	295	fullLength>>=4;
	296	pe+=fullLength;
	297
	298	closure=(const UChar )pe; / behind full case mappings */
	299	}
	300
	301	/* add each code point in the closure string */
	302	for(idx=0; idx<closureLength;) {
	303	U16_NEXT_UNSAFE(closure, idx, c);
	304	sa->add(sa->set, c);
	305	}
	306	}
	307	}
	308
	309	/*
	310	* compare s, which has a length, with t, which has a maximum length or is NUL-terminated
	311	* must be length>0 and max>0 and length<=max
	312	*/
	313	static inline int32_t
	314	strcmpMax(const UChar s, int32_t length, const UChar t, int32_t max) {
	315	int32_t c1, c2;
	316
	317	max-=length; /* we require length<=max, so no need to decrement max in the loop */
	318	do {
	319	c1=*s++;
	320	c2=*t++;
	321	if(c2==0) {
	322	return 1; /* reached the end of t but not of s */
	323	}
	324	c1-=c2;
	325	if(c1!=0) {
	326	return c1; /* return difference result */
	327	}
	328	} while(--length>0);
	329	/* ends with length==0 */
	330
	331	if(max==0 \|\| *t==0) {
	332	return 0; /* equal to length of both strings */
	333	} else {
	334	return -max; /* return lengh difference */
	335	}
	336	}
	337
	338	U_CFUNC UBool U_EXPORT2
	339	ucase_addStringCaseClosure(const UCaseProps csp, const UChar s, int32_t length, const USetAdder *sa) {
	340	int32_t i, start, limit, result, unfoldRows, unfoldRowWidth, unfoldStringWidth;
	341
	342	if(csp->unfold==NULL \|\| s==NULL) {
	343	return FALSE; /* no reverse case folding data, or no string */
	344	}
	345	if(length<=1) {
	346	/* the string is too short to find any match */
	347	/*
	348	* more precise would be:
	349	* if(!u_strHasMoreChar32Than(s, length, 1))
	350	* but this does not make much practical difference because
	351	* a single supplementary code point would just not be found
	352	*/
	353	return FALSE;
	354	}
	355
	356	const uint16_t *unfold=csp->unfold;
	357	unfoldRows=unfold[UCASE_UNFOLD_ROWS];
	358	unfoldRowWidth=unfold[UCASE_UNFOLD_ROW_WIDTH];
	359	unfoldStringWidth=unfold[UCASE_UNFOLD_STRING_WIDTH];
	360	unfold+=unfoldRowWidth;
	361
	362	if(length>unfoldStringWidth) {
	363	/* the string is too long to find any match */
	364	return FALSE;
	365	}
	366
	367	/* do a binary search for the string */
	368	start=0;
	369	limit=unfoldRows;
	370	while(start<limit) {
	371	i=(start+limit)/2;
	372	const UChar p=reinterpret_cast<const UChar >(unfold+(i*unfoldRowWidth));
	373	result=strcmpMax(s, length, p, unfoldStringWidth);
	374
	375	if(result==0) {
	376	/* found the string: add each code point, and its case closure */
	377	UChar32 c;
	378
	379	for(i=unfoldStringWidth; i<unfoldRowWidth && p[i]!=0;) {
	380	U16_NEXT_UNSAFE(p, i, c);
	381	sa->add(sa->set, c);
	382	ucase_addCaseClosure(csp, c, sa);
	383	}
	384	return TRUE;
	385	} else if(result<0) {
	386	limit=i;
	387	} else /* result>0 */ {
	388	start=i+1;
	389	}
	390	}
	391
	392	return FALSE; /* string not found */
	393	}
	394
	395	U_NAMESPACE_BEGIN
	396
	397	FullCaseFoldingIterator::FullCaseFoldingIterator()
	398	: unfold(reinterpret_cast<const UChar *>(ucase_props_singleton.unfold)),
	399	unfoldRows(unfold[UCASE_UNFOLD_ROWS]),
	400	unfoldRowWidth(unfold[UCASE_UNFOLD_ROW_WIDTH]),
	401	unfoldStringWidth(unfold[UCASE_UNFOLD_STRING_WIDTH]),
	402	currentRow(0),
	403	rowCpIndex(unfoldStringWidth) {
	404	unfold+=unfoldRowWidth;
	405	}
	406
	407	UChar32
	408	FullCaseFoldingIterator::next(UnicodeString &full) {
	409	// Advance past the last-delivered code point.
	410	const UChar p=unfold+(currentRowunfoldRowWidth);
	411	if(rowCpIndex>=unfoldRowWidth \|\| p[rowCpIndex]==0) {
	412	++currentRow;
	413	p+=unfoldRowWidth;
	414	rowCpIndex=unfoldStringWidth;
	415	}
	416	if(currentRow>=unfoldRows) { return U_SENTINEL; }
	417	// Set "full" to the NUL-terminated string in the first unfold column.
	418	int32_t length=unfoldStringWidth;
	419	while(length>0 && p[length-1]==0) { --length; }
	420	full.setTo(FALSE, p, length);
	421	// Return the code point.
	422	UChar32 c;
	423	U16_NEXT_UNSAFE(p, rowCpIndex, c);
	424	return c;
	425	}
	426
	427	U_NAMESPACE_END
	428
	429	/** @return UCASE_NONE, UCASE_LOWER, UCASE_UPPER, UCASE_TITLE */
	430	U_CAPI int32_t U_EXPORT2
	431	ucase_getType(const UCaseProps *csp, UChar32 c) {
	432	uint16_t props=UTRIE2_GET16(&csp->trie, c);
	433	return UCASE_GET_TYPE(props);
	434	}
	435
	436	/** @return same as ucase_getType() and set bit 2 if c is case-ignorable */
	437	U_CAPI int32_t U_EXPORT2
	438	ucase_getTypeOrIgnorable(const UCaseProps *csp, UChar32 c) {
	439	uint16_t props=UTRIE2_GET16(&csp->trie, c);
	440	return UCASE_GET_TYPE_AND_IGNORABLE(props);
	441	}
	442
	443	/** @return UCASE_NO_DOT, UCASE_SOFT_DOTTED, UCASE_ABOVE, UCASE_OTHER_ACCENT */
	444	static inline int32_t
	445	getDotType(const UCaseProps *csp, UChar32 c) {
	446	uint16_t props=UTRIE2_GET16(&csp->trie, c);
	447	if(!PROPS_HAS_EXCEPTION(props)) {
	448	return props&UCASE_DOT_MASK;
	449	} else {
	450	const uint16_t *pe=GET_EXCEPTIONS(csp, props);
	451	return (*pe>>UCASE_EXC_DOT_SHIFT)&UCASE_DOT_MASK;
	452	}
	453	}
	454
	455	U_CAPI UBool U_EXPORT2
	456	ucase_isSoftDotted(const UCaseProps *csp, UChar32 c) {
	457	return (UBool)(getDotType(csp, c)==UCASE_SOFT_DOTTED);
	458	}
	459
	460	U_CAPI UBool U_EXPORT2
	461	ucase_isCaseSensitive(const UCaseProps *csp, UChar32 c) {
	462	uint16_t props=UTRIE2_GET16(&csp->trie, c);
	463	return (UBool)((props&UCASE_SENSITIVE)!=0);
	464	}
	465
	466	/* string casing ------------------------------------------------------------ */
	467
	468	/*
	469	* These internal functions form the core of string case mappings.
	470	* They map single code points to result code points or strings and take
	471	* all necessary conditions (context, locale ID, options) into account.
	472	*
	473	* They do not iterate over the source or write to the destination
	474	* so that the same functions are useful for non-standard string storage,
	475	* such as in a Replaceable (for Transliterator) or UTF-8/32 strings etc.
	476	* For the same reason, the "surrounding text" context is passed in as a
	477	* UCaseContextIterator which does not make any assumptions about
	478	* the underlying storage.
	479	*
	480	* This section contains helper functions that check for conditions
	481	* in the input text surrounding the current code point
	482	* according to SpecialCasing.txt.
	483	*
	484	* Each helper function gets the index
	485	* - after the current code point if it looks at following text
	486	* - before the current code point if it looks at preceding text
	487	*
	488	* Unicode 3.2 UAX 21 "Case Mappings" defines the conditions as follows:
	489	*
	490	* Final_Sigma
	491	* C is preceded by a sequence consisting of
	492	* a cased letter and a case-ignorable sequence,
	493	* and C is not followed by a sequence consisting of
	494	* an ignorable sequence and then a cased letter.
	495	*
	496	* More_Above
	497	* C is followed by one or more characters of combining class 230 (ABOVE)
	498	* in the combining character sequence.
	499	*
	500	* After_Soft_Dotted
	501	* The last preceding character with combining class of zero before C
	502	* was Soft_Dotted,
	503	* and there is no intervening combining character class 230 (ABOVE).
	504	*
	505	* Before_Dot
	506	* C is followed by combining dot above (U+0307).
	507	* Any sequence of characters with a combining class that is neither 0 nor 230
	508	* may intervene between the current character and the combining dot above.
	509	*
	510	* The erratum from 2002-10-31 adds the condition
	511	*
	512	* After_I
	513	* The last preceding base character was an uppercase I, and there is no
	514	* intervening combining character class 230 (ABOVE).
	515	*
	516	* (See Jitterbug 2344 and the comments on After_I below.)
	517	*
	518	* Helper definitions in Unicode 3.2 UAX 21:
	519	*
	520	* D1. A character C is defined to be cased
	521	* if it meets any of the following criteria:
	522	*
	523	* - The general category of C is Titlecase Letter (Lt)
	524	* - In [CoreProps], C has one of the properties Uppercase, or Lowercase
	525	* - Given D = NFD(C), then it is not the case that:
	526	* D = UCD_lower(D) = UCD_upper(D) = UCD_title(D)
	527	* (This third criterium does not add any characters to the list
	528	* for Unicode 3.2. Ignored.)
	529	*
	530	* D2. A character C is defined to be case-ignorable
	531	* if it meets either of the following criteria:
	532	*
	533	* - The general category of C is
	534	* Nonspacing Mark (Mn), or Enclosing Mark (Me), or Format Control (Cf), or
	535	* Letter Modifier (Lm), or Symbol Modifier (Sk)
	536	* - C is one of the following characters
	537	* U+0027 APOSTROPHE
	538	* U+00AD SOFT HYPHEN (SHY)
	539	* U+2019 RIGHT SINGLE QUOTATION MARK
	540	* (the preferred character for apostrophe)
	541	*
	542	* D3. A case-ignorable sequence is a sequence of
	543	* zero or more case-ignorable characters.
	544	*/
	545
	546	#define is_a(c) ((c)=='a' \|\| (c)=='A')
	547	#define is_d(c) ((c)=='d' \|\| (c)=='D')
	548	#define is_e(c) ((c)=='e' \|\| (c)=='E')
	549	#define is_i(c) ((c)=='i' \|\| (c)=='I')
	550	#define is_l(c) ((c)=='l' \|\| (c)=='L')
	551	#define is_n(c) ((c)=='n' \|\| (c)=='N')
	552	#define is_r(c) ((c)=='r' \|\| (c)=='R')
	553	#define is_t(c) ((c)=='t' \|\| (c)=='T')
	554	#define is_u(c) ((c)=='u' \|\| (c)=='U')
	555	#define is_z(c) ((c)=='z' \|\| (c)=='Z')
	556
	557	/* separator? */
	558	#define is_sep(c) ((c)=='_' \|\| (c)=='-' \|\| (c)==0)
	559
	560	/**
	561	* Requires non-NULL locale ID but otherwise does the equivalent of
	562	* checking for language codes as if uloc_getLanguage() were called:
	563	* Accepts both 2- and 3-letter codes and accepts case variants.
	564	*/
	565	U_CFUNC int32_t
	566	ucase_getCaseLocale(const char locale, int32_t locCache) {
	567	int32_t result;
	568	char c;
	569
	570	if(locCache!=NULL && (result=*locCache)!=UCASE_LOC_UNKNOWN) {
	571	return result;
	572	}
	573
	574	result=UCASE_LOC_ROOT;
	575
	576	/*
	577	* This function used to use uloc_getLanguage(), but the current code
	578	* removes the dependency of this low-level code on uloc implementation code
	579	* and is faster because not the whole locale ID has to be
	580	* examined and copied/transformed.
	581	*
	582	* Because this code does not want to depend on uloc, the caller must
	583	* pass in a non-NULL locale, i.e., may need to call uloc_getDefault().
	584	*/
	585	c=*locale++;
	586	if(is_t(c)) {
	587	/* tr or tur? */
	588	c=*locale++;
	589	if(is_u(c)) {
	590	c=*locale++;
	591	}
	592	if(is_r(c)) {
	593	c=*locale;
	594	if(is_sep(c)) {
	595	result=UCASE_LOC_TURKISH;
	596	}
	597	}
	598	} else if(is_a(c)) {
	599	/* az or aze? */
	600	c=*locale++;
	601	if(is_z(c)) {
	602	c=*locale++;
	603	if(is_e(c)) {
	604	c=*locale;
	605	}
	606	if(is_sep(c)) {
	607	result=UCASE_LOC_TURKISH;
	608	}
	609	}
	610	} else if(is_l(c)) {
	611	/* lt or lit? */
	612	c=*locale++;
	613	if(is_i(c)) {
	614	c=*locale++;
	615	}
	616	if(is_t(c)) {
	617	c=*locale;
	618	if(is_sep(c)) {
	619	result=UCASE_LOC_LITHUANIAN;
	620	}
	621	}
	622	} else if(is_n(c)) {
	623	/* nl or nld? */
	624	c=*locale++;
	625	if(is_l(c)) {
	626	c=*locale++;
	627	if(is_d(c)) {
	628	c=*locale;
	629	}
	630	if(is_sep(c)) {
	631	result=UCASE_LOC_DUTCH;
	632	}
	633	}
	634	}
	635
	636	if(locCache!=NULL) {
	637	*locCache=result;
	638	}
	639	return result;
	640	}
	641
	642	/*
	643	* Is followed by
	644	* {case-ignorable}* cased
	645	* ?
	646	* (dir determines looking forward/backward)
	647	* If a character is case-ignorable, it is skipped regardless of whether
	648	* it is also cased or not.
	649	*/
	650	static UBool
	651	isFollowedByCasedLetter(const UCaseProps csp, UCaseContextIterator iter, void *context, int8_t dir) {
	652	UChar32 c;
	653
	654	if(iter==NULL) {
	655	return FALSE;
	656	}
	657
	658	for(/* dir!=0 sets direction */; (c=iter(context, dir))>=0; dir=0) {
	659	int32_t type=ucase_getTypeOrIgnorable(csp, c);
	660	if(type&4) {
	661	/* case-ignorable, continue with the loop */
	662	} else if(type!=UCASE_NONE) {
	663	return TRUE; /* followed by cased letter */
	664	} else {
	665	return FALSE; /* uncased and not case-ignorable */
	666	}
	667	}
	668
	669	return FALSE; /* not followed by cased letter */
	670	}
	671
	672	/* Is preceded by Soft_Dotted character with no intervening cc=230 ? */
	673	static UBool
	674	isPrecededBySoftDotted(const UCaseProps csp, UCaseContextIterator iter, void *context) {
	675	UChar32 c;
	676	int32_t dotType;
	677	int8_t dir;
	678
	679	if(iter==NULL) {
	680	return FALSE;
	681	}
	682
	683	for(dir=-1; (c=iter(context, dir))>=0; dir=0) {
	684	dotType=getDotType(csp, c);
	685	if(dotType==UCASE_SOFT_DOTTED) {
	686	return TRUE; /* preceded by TYPE_i */
	687	} else if(dotType!=UCASE_OTHER_ACCENT) {
	688	return FALSE; /* preceded by different base character (not TYPE_i), or intervening cc==230 */
	689	}
	690	}
	691
	692	return FALSE; /* not preceded by TYPE_i */
	693	}
	694
	695	/*
	696	* See Jitterbug 2344:
	697	* The condition After_I for Turkic-lowercasing of U+0307 combining dot above
	698	* is checked in ICU 2.0, 2.1, 2.6 but was not in 2.2 & 2.4 because
	699	* we made those releases compatible with Unicode 3.2 which had not fixed
	700	* a related bug in SpecialCasing.txt.
	701	*
	702	* From the Jitterbug 2344 text:
	703	* ... this bug is listed as a Unicode erratum
	704	* from 2002-10-31 at http://www.unicode.org/uni2errata/UnicodeErrata.html
	705	* <quote>
	706	* There are two errors in SpecialCasing.txt.
	707	* 1. Missing semicolons on two lines. ... [irrelevant for ICU]
	708	* 2. An incorrect context definition. Correct as follows:
	709	* < 0307; ; 0307; 0307; tr After_Soft_Dotted; # COMBINING DOT ABOVE
	710	* < 0307; ; 0307; 0307; az After_Soft_Dotted; # COMBINING DOT ABOVE
	711	* ---
	712	* > 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
	713	* > 0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
	714	* where the context After_I is defined as:
	715	* The last preceding base character was an uppercase I, and there is no
	716	* intervening combining character class 230 (ABOVE).
	717	* </quote>
	718	*
	719	* Note that SpecialCasing.txt even in Unicode 3.2 described the condition as:
	720	*
	721	* # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
	722	* # This matches the behavior of the canonically equivalent I-dot_above
	723	*
	724	* See also the description in this place in older versions of uchar.c (revision 1.100).
	725	*
	726	* Markus W. Scherer 2003-feb-15
	727	*/
	728
	729	/* Is preceded by base character 'I' with no intervening cc=230 ? */
	730	static UBool
	731	isPrecededBy_I(const UCaseProps csp, UCaseContextIterator iter, void *context) {
	732	UChar32 c;
	733	int32_t dotType;
	734	int8_t dir;
	735
	736	if(iter==NULL) {
	737	return FALSE;
	738	}
	739
	740	for(dir=-1; (c=iter(context, dir))>=0; dir=0) {
	741	if(c==0x49) {
	742	return TRUE; /* preceded by I */
	743	}
	744	dotType=getDotType(csp, c);
	745	if(dotType!=UCASE_OTHER_ACCENT) {
	746	return FALSE; /* preceded by different base character (not I), or intervening cc==230 */
	747	}
	748	}
	749
	750	return FALSE; /* not preceded by I */
	751	}
	752
	753	/* Is followed by one or more cc==230 ? */
	754	static UBool
	755	isFollowedByMoreAbove(const UCaseProps csp, UCaseContextIterator iter, void *context) {
	756	UChar32 c;
	757	int32_t dotType;
	758	int8_t dir;
	759
	760	if(iter==NULL) {
	761	return FALSE;
	762	}
	763
	764	for(dir=1; (c=iter(context, dir))>=0; dir=0) {
	765	dotType=getDotType(csp, c);
	766	if(dotType==UCASE_ABOVE) {
	767	return TRUE; /* at least one cc==230 following */
	768	} else if(dotType!=UCASE_OTHER_ACCENT) {
	769	return FALSE; /* next base character, no more cc==230 following */
	770	}
	771	}
	772
	773	return FALSE; /* no more cc==230 following */
	774	}
	775
	776	/* Is followed by a dot above (without cc==230 in between) ? */
	777	static UBool
	778	isFollowedByDotAbove(const UCaseProps csp, UCaseContextIterator iter, void *context) {
	779	UChar32 c;
	780	int32_t dotType;
	781	int8_t dir;
	782
	783	if(iter==NULL) {
	784	return FALSE;
	785	}
	786
	787	for(dir=1; (c=iter(context, dir))>=0; dir=0) {
	788	if(c==0x307) {
	789	return TRUE;
	790	}
	791	dotType=getDotType(csp, c);
	792	if(dotType!=UCASE_OTHER_ACCENT) {
	793	return FALSE; /* next base character or cc==230 in between */
	794	}
	795	}
	796
	797	return FALSE; /* no dot above following */
	798	}
	799
	800	U_CAPI int32_t U_EXPORT2
	801	ucase_toFullLower(const UCaseProps *csp, UChar32 c,
	802	UCaseContextIterator iter, void context,
	803	const UChar **pString,
	804	const char locale, int32_t locCache)
	805	{
	806	UChar32 result=c;
	807	uint16_t props=UTRIE2_GET16(&csp->trie, c);
	808	if(!PROPS_HAS_EXCEPTION(props)) {
	809	if(UCASE_GET_TYPE(props)>=UCASE_UPPER) {
	810	result=c+UCASE_GET_DELTA(props);
	811	}
	812	} else {
	813	const uint16_t pe=GET_EXCEPTIONS(csp, props), pe2;
	814	uint16_t excWord=*pe++;
	815	int32_t full;
	816
	817	pe2=pe;
	818
	819	if(excWord&UCASE_EXC_CONDITIONAL_SPECIAL) {
	820	/* use hardcoded conditions and mappings */
	821	int32_t loc=ucase_getCaseLocale(locale, locCache);
	822
	823	/*
	824	* Test for conditional mappings first
	825	* (otherwise the unconditional default mappings are always taken),
	826	* then test for characters that have unconditional mappings in SpecialCasing.txt,
	827	* then get the UnicodeData.txt mappings.
	828	*/
	829	if( loc==UCASE_LOC_LITHUANIAN &&
	830	/* base characters, find accents above */
	831	(((c==0x49 \|\| c==0x4a \|\| c==0x12e) &&
	832	isFollowedByMoreAbove(csp, iter, context)) \|\|
	833	/* precomposed with accent above, no need to find one */
	834	(c==0xcc \|\| c==0xcd \|\| c==0x128))
	835	) {
	836	/*
	837	# Lithuanian
	838
	839	# Lithuanian retains the dot in a lowercase i when followed by accents.
	840
	841	# Introduce an explicit dot above when lowercasing capital I's and J's
	842	# whenever there are more accents above.
	843	# (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
	844
	845	0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
	846	004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
	847	012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
	848	00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
	849	00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
	850	0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
	851	*/
	852	switch(c) {
	853	case 0x49: /* LATIN CAPITAL LETTER I */
	854	*pString=iDot;
	855	return 2;
	856	case 0x4a: /* LATIN CAPITAL LETTER J */
	857	*pString=jDot;
	858	return 2;
	859	case 0x12e: /* LATIN CAPITAL LETTER I WITH OGONEK */
	860	*pString=iOgonekDot;
	861	return 2;
	862	case 0xcc: /* LATIN CAPITAL LETTER I WITH GRAVE */
	863	*pString=iDotGrave;
	864	return 3;
	865	case 0xcd: /* LATIN CAPITAL LETTER I WITH ACUTE */
	866	*pString=iDotAcute;
	867	return 3;
	868	case 0x128: /* LATIN CAPITAL LETTER I WITH TILDE */
	869	*pString=iDotTilde;
	870	return 3;
	871	default:
	872	return 0; /* will not occur */
	873	}
	874	/* # Turkish and Azeri */
	875	} else if(loc==UCASE_LOC_TURKISH && c==0x130) {
	876	/*
	877	# I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
	878	# The following rules handle those cases.
	879
	880	0130; 0069; 0130; 0130; tr # LATIN CAPITAL LETTER I WITH DOT ABOVE
	881	0130; 0069; 0130; 0130; az # LATIN CAPITAL LETTER I WITH DOT ABOVE
	882	*/
	883	return 0x69;
	884	} else if(loc==UCASE_LOC_TURKISH && c==0x307 && isPrecededBy_I(csp, iter, context)) {
	885	/*
	886	# When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
	887	# This matches the behavior of the canonically equivalent I-dot_above
	888
	889	0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
	890	0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
	891	*/
	892	return 0; /* remove the dot (continue without output) */
	893	} else if(loc==UCASE_LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(csp, iter, context)) {
	894	/*
	895	# When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
	896
	897	0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I
	898	0049; 0131; 0049; 0049; az Not_Before_Dot; # LATIN CAPITAL LETTER I
	899	*/
	900	return 0x131;
	901	} else if(c==0x130) {
	902	/*
	903	# Preserve canonical equivalence for I with dot. Turkic is handled below.
	904
	905	0130; 0069 0307; 0130; 0130; # LATIN CAPITAL LETTER I WITH DOT ABOVE
	906	*/
	907	*pString=iDot;
	908	return 2;
	909	} else if( c==0x3a3 &&
	910	!isFollowedByCasedLetter(csp, iter, context, 1) &&
	911	isFollowedByCasedLetter(csp, iter, context, -1) /* -1=preceded */
	912	) {
	913	/* greek capital sigma maps depending on surrounding cased letters (see SpecialCasing.txt) */
	914	/*
	915	# Special case for final form of sigma
	916
	917	03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
	918	*/
	919	return 0x3c2; /* greek small final sigma */
	920	} else {
	921	/* no known conditional special case mapping, use a normal mapping */
	922	}
	923	} else if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
	924	GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, full);
	925	full&=UCASE_FULL_LOWER;
	926	if(full!=0) {
	927	/* set the output pointer to the lowercase mapping */
	928	pString=reinterpret_cast<const UChar >(pe+1);
	929
	930	/* return the string length */
	931	return full;
	932	}
	933	}
	934
	935	if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) {
	936	GET_SLOT_VALUE(excWord, UCASE_EXC_LOWER, pe2, result);
	937	}
	938	}
	939
	940	return (result==c) ? ~result : result;
	941	}
	942
	943	/* internal */
	944	static int32_t
	945	toUpperOrTitle(const UCaseProps *csp, UChar32 c,
	946	UCaseContextIterator iter, void context,
	947	const UChar **pString,
	948	const char locale, int32_t locCache,
	949	UBool upperNotTitle) {
	950	UChar32 result=c;
	951	uint16_t props=UTRIE2_GET16(&csp->trie, c);
	952	if(!PROPS_HAS_EXCEPTION(props)) {
	953	if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
	954	result=c+UCASE_GET_DELTA(props);
	955	}
	956	} else {
	957	const uint16_t pe=GET_EXCEPTIONS(csp, props), pe2;
	958	uint16_t excWord=*pe++;
	959	int32_t full, idx;
	960
	961	pe2=pe;
	962
	963	if(excWord&UCASE_EXC_CONDITIONAL_SPECIAL) {
	964	/* use hardcoded conditions and mappings */
	965	int32_t loc=ucase_getCaseLocale(locale, locCache);
	966
	967	if(loc==UCASE_LOC_TURKISH && c==0x69) {
	968	/*
	969	# Turkish and Azeri
	970
	971	# I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
	972	# The following rules handle those cases.
	973
	974	# When uppercasing, i turns into a dotted capital I
	975
	976	0069; 0069; 0130; 0130; tr; # LATIN SMALL LETTER I
	977	0069; 0069; 0130; 0130; az; # LATIN SMALL LETTER I
	978	*/
	979	return 0x130;
	980	} else if(loc==UCASE_LOC_LITHUANIAN && c==0x307 && isPrecededBySoftDotted(csp, iter, context)) {
	981	/*
	982	# Lithuanian
	983
	984	# Lithuanian retains the dot in a lowercase i when followed by accents.
	985
	986	# Remove DOT ABOVE after "i" with upper or titlecase
	987
	988	0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
	989	*/
	990	return 0; /* remove the dot (continue without output) */
	991	} else {
	992	/* no known conditional special case mapping, use a normal mapping */
	993	}
	994	} else if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
	995	GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, full);
	996
	997	/* start of full case mapping strings */
	998	++pe;
	999
	1000	/* skip the lowercase and case-folding result strings */
	1001	pe+=full&UCASE_FULL_LOWER;
	1002	full>>=4;
	1003	pe+=full&0xf;
	1004	full>>=4;
	1005
	1006	if(upperNotTitle) {
	1007	full&=0xf;
	1008	} else {
	1009	/* skip the uppercase result string */
	1010	pe+=full&0xf;
	1011	full=(full>>4)&0xf;
	1012	}
	1013
	1014	if(full!=0) {
	1015	/* set the output pointer to the result string */
	1016	pString=reinterpret_cast<const UChar >(pe);
	1017
	1018	/* return the string length */
	1019	return full;
	1020	}
	1021	}
	1022
	1023	if(!upperNotTitle && HAS_SLOT(excWord, UCASE_EXC_TITLE)) {
	1024	idx=UCASE_EXC_TITLE;
	1025	} else if(HAS_SLOT(excWord, UCASE_EXC_UPPER)) {
	1026	/* here, titlecase is same as uppercase */
	1027	idx=UCASE_EXC_UPPER;
	1028	} else {
	1029	return ~c;
	1030	}
	1031	GET_SLOT_VALUE(excWord, idx, pe2, result);
	1032	}
	1033
	1034	return (result==c) ? ~result : result;
	1035	}
	1036
	1037	U_CAPI int32_t U_EXPORT2
	1038	ucase_toFullUpper(const UCaseProps *csp, UChar32 c,
	1039	UCaseContextIterator iter, void context,
	1040	const UChar **pString,
	1041	const char locale, int32_t locCache) {
	1042	return toUpperOrTitle(csp, c, iter, context, pString, locale, locCache, TRUE);
	1043	}
	1044
	1045	U_CAPI int32_t U_EXPORT2
	1046	ucase_toFullTitle(const UCaseProps *csp, UChar32 c,
	1047	UCaseContextIterator iter, void context,
	1048	const UChar **pString,
	1049	const char locale, int32_t locCache) {
	1050	return toUpperOrTitle(csp, c, iter, context, pString, locale, locCache, FALSE);
	1051	}
	1052
	1053	/* case folding ------------------------------------------------------------- */
	1054
	1055	/*
	1056	* Case folding is similar to lowercasing.
	1057	* The result may be a simple mapping, i.e., a single code point, or
	1058	* a full mapping, i.e., a string.
	1059	* If the case folding for a code point is the same as its simple (1:1) lowercase mapping,
	1060	* then only the lowercase mapping is stored.
	1061	*
	1062	* Some special cases are hardcoded because their conditions cannot be
	1063	* parsed and processed from CaseFolding.txt.
	1064	*
	1065	* Unicode 3.2 CaseFolding.txt specifies for its status field:
	1066
	1067	# C: common case folding, common mappings shared by both simple and full mappings.
	1068	# F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces.
	1069	# S: simple case folding, mappings to single characters where different from F.
	1070	# T: special case for uppercase I and dotted uppercase I
	1071	# - For non-Turkic languages, this mapping is normally not used.
	1072	# - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters.
	1073	#
	1074	# Usage:
	1075	# A. To do a simple case folding, use the mappings with status C + S.
	1076	# B. To do a full case folding, use the mappings with status C + F.
	1077	#
	1078	# The mappings with status T can be used or omitted depending on the desired case-folding
	1079	# behavior. (The default option is to exclude them.)
	1080
	1081	* Unicode 3.2 has 'T' mappings as follows:
	1082
	1083	0049; T; 0131; # LATIN CAPITAL LETTER I
	1084	0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE
	1085
	1086	* while the default mappings for these code points are:
	1087
	1088	0049; C; 0069; # LATIN CAPITAL LETTER I
	1089	0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
	1090
	1091	* U+0130 has no simple case folding (simple-case-folds to itself).
	1092	*/
	1093
	1094	/* return the simple case folding mapping for c */
	1095	U_CAPI UChar32 U_EXPORT2
	1096	ucase_fold(const UCaseProps *csp, UChar32 c, uint32_t options) {
	1097	uint16_t props=UTRIE2_GET16(&csp->trie, c);
	1098	if(!PROPS_HAS_EXCEPTION(props)) {
	1099	if(UCASE_GET_TYPE(props)>=UCASE_UPPER) {
	1100	c+=UCASE_GET_DELTA(props);
	1101	}
	1102	} else {
	1103	const uint16_t *pe=GET_EXCEPTIONS(csp, props);
	1104	uint16_t excWord=*pe++;
	1105	int32_t idx;
	1106	if(excWord&UCASE_EXC_CONDITIONAL_FOLD) {
	1107	/* special case folding mappings, hardcoded */
	1108	if((options&_FOLD_CASE_OPTIONS_MASK)==U_FOLD_CASE_DEFAULT) {
	1109	/* default mappings */
	1110	if(c==0x49) {
	1111	/* 0049; C; 0069; # LATIN CAPITAL LETTER I */
	1112	return 0x69;
	1113	} else if(c==0x130) {
	1114	/* no simple case folding for U+0130 */
	1115	return c;
	1116	}
	1117	} else {
	1118	/* Turkic mappings */
	1119	if(c==0x49) {
	1120	/* 0049; T; 0131; # LATIN CAPITAL LETTER I */
	1121	return 0x131;
	1122	} else if(c==0x130) {
	1123	/* 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
	1124	return 0x69;
	1125	}
	1126	}
	1127	}
	1128	if(HAS_SLOT(excWord, UCASE_EXC_FOLD)) {
	1129	idx=UCASE_EXC_FOLD;
	1130	} else if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) {
	1131	idx=UCASE_EXC_LOWER;
	1132	} else {
	1133	return c;
	1134	}
	1135	GET_SLOT_VALUE(excWord, idx, pe, c);
	1136	}
	1137	return c;
	1138	}
	1139
	1140	/*
	1141	* Issue for canonical caseless match (UAX #21):
	1142	* Turkic casefolding (using "T" mappings in CaseFolding.txt) does not preserve
	1143	* canonical equivalence, unlike default-option casefolding.
	1144	* For example, I-grave and I + grave fold to strings that are not canonically
	1145	* equivalent.
	1146	* For more details, see the comment in unorm_compare() in unorm.cpp
	1147	* and the intermediate prototype changes for Jitterbug 2021.
	1148	* (For example, revision 1.104 of uchar.c and 1.4 of CaseFolding.txt.)
	1149	*
	1150	* This did not get fixed because it appears that it is not possible to fix
	1151	* it for uppercase and lowercase characters (I-grave vs. i-grave)
	1152	* together in a way that they still fold to common result strings.
	1153	*/
	1154
	1155	U_CAPI int32_t U_EXPORT2
	1156	ucase_toFullFolding(const UCaseProps *csp, UChar32 c,
	1157	const UChar **pString,
	1158	uint32_t options)
	1159	{
	1160	UChar32 result=c;
	1161	uint16_t props=UTRIE2_GET16(&csp->trie, c);
	1162	if(!PROPS_HAS_EXCEPTION(props)) {
	1163	if(UCASE_GET_TYPE(props)>=UCASE_UPPER) {
	1164	result=c+UCASE_GET_DELTA(props);
	1165	}
	1166	} else {
	1167	const uint16_t pe=GET_EXCEPTIONS(csp, props), pe2;
	1168	uint16_t excWord=*pe++;
	1169	int32_t full, idx;
	1170
	1171	pe2=pe;
	1172
	1173	if(excWord&UCASE_EXC_CONDITIONAL_FOLD) {
	1174	/* use hardcoded conditions and mappings */
	1175	if((options&_FOLD_CASE_OPTIONS_MASK)==U_FOLD_CASE_DEFAULT) {
	1176	/* default mappings */
	1177	if(c==0x49) {
	1178	/* 0049; C; 0069; # LATIN CAPITAL LETTER I */
	1179	return 0x69;
	1180	} else if(c==0x130) {
	1181	/* 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
	1182	*pString=iDot;
	1183	return 2;
	1184	}
	1185	} else {
	1186	/* Turkic mappings */
	1187	if(c==0x49) {
	1188	/* 0049; T; 0131; # LATIN CAPITAL LETTER I */
	1189	return 0x131;
	1190	} else if(c==0x130) {
	1191	/* 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
	1192	return 0x69;
	1193	}
	1194	}
	1195	} else if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
	1196	GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, full);
	1197
	1198	/* start of full case mapping strings */
	1199	++pe;
	1200
	1201	/* skip the lowercase result string */
	1202	pe+=full&UCASE_FULL_LOWER;
	1203	full=(full>>4)&0xf;
	1204
	1205	if(full!=0) {
	1206	/* set the output pointer to the result string */
	1207	pString=reinterpret_cast<const UChar >(pe);
	1208
	1209	/* return the string length */
	1210	return full;
	1211	}
	1212	}
	1213
	1214	if(HAS_SLOT(excWord, UCASE_EXC_FOLD)) {
	1215	idx=UCASE_EXC_FOLD;
	1216	} else if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) {
	1217	idx=UCASE_EXC_LOWER;
	1218	} else {
	1219	return ~c;
	1220	}
	1221	GET_SLOT_VALUE(excWord, idx, pe2, result);
	1222	}
	1223
	1224	return (result==c) ? ~result : result;
	1225	}
	1226
	1227	/* case mapping properties API ---------------------------------------------- */
	1228
	1229	#define GET_CASE_PROPS() &ucase_props_singleton
	1230
	1231	/* public API (see uchar.h) */
	1232
	1233	U_CAPI UBool U_EXPORT2
	1234	u_isULowercase(UChar32 c) {
	1235	return (UBool)(UCASE_LOWER==ucase_getType(GET_CASE_PROPS(), c));
	1236	}
	1237
	1238	U_CAPI UBool U_EXPORT2
	1239	u_isUUppercase(UChar32 c) {
	1240	return (UBool)(UCASE_UPPER==ucase_getType(GET_CASE_PROPS(), c));
	1241	}
	1242
	1243	/* Transforms the Unicode character to its lower case equivalent.*/
	1244	U_CAPI UChar32 U_EXPORT2
	1245	u_tolower(UChar32 c) {
	1246	return ucase_tolower(GET_CASE_PROPS(), c);
	1247	}
	1248
	1249	/* Transforms the Unicode character to its upper case equivalent.*/
	1250	U_CAPI UChar32 U_EXPORT2
	1251	u_toupper(UChar32 c) {
	1252	return ucase_toupper(GET_CASE_PROPS(), c);
	1253	}
	1254
	1255	/* Transforms the Unicode character to its title case equivalent.*/
	1256	U_CAPI UChar32 U_EXPORT2
	1257	u_totitle(UChar32 c) {
	1258	return ucase_totitle(GET_CASE_PROPS(), c);
	1259	}
	1260
	1261	/* return the simple case folding mapping for c */
	1262	U_CAPI UChar32 U_EXPORT2
	1263	u_foldCase(UChar32 c, uint32_t options) {
	1264	return ucase_fold(GET_CASE_PROPS(), c, options);
	1265	}
	1266
	1267	U_CFUNC int32_t U_EXPORT2
	1268	ucase_hasBinaryProperty(UChar32 c, UProperty which) {
	1269	/* case mapping properties */
	1270	const UChar *resultString;
	1271	int32_t locCache;
	1272	const UCaseProps *csp=GET_CASE_PROPS();
	1273	if(csp==NULL) {
	1274	return FALSE;
	1275	}
	1276	switch(which) {
	1277	case UCHAR_LOWERCASE:
	1278	return (UBool)(UCASE_LOWER==ucase_getType(csp, c));
	1279	case UCHAR_UPPERCASE:
	1280	return (UBool)(UCASE_UPPER==ucase_getType(csp, c));
	1281	case UCHAR_SOFT_DOTTED:
	1282	return ucase_isSoftDotted(csp, c);
	1283	case UCHAR_CASE_SENSITIVE:
	1284	return ucase_isCaseSensitive(csp, c);
	1285	case UCHAR_CASED:
	1286	return (UBool)(UCASE_NONE!=ucase_getType(csp, c));
	1287	case UCHAR_CASE_IGNORABLE:
	1288	return (UBool)(ucase_getTypeOrIgnorable(csp, c)>>2);
	1289	/*
	1290	* Note: The following Changes_When_Xyz are defined as testing whether
	1291	* the NFD form of the input changes when Xyz-case-mapped.
	1292	* However, this simpler implementation of these properties,
	1293	* ignoring NFD, passes the tests.
	1294	* The implementation needs to be changed if the tests start failing.
	1295	* When that happens, optimizations should be used to work with the
	1296	* per-single-code point ucase_toFullXyz() functions unless
	1297	* the NFD form has more than one code point,
	1298	* and the property starts set needs to be the union of the
	1299	* start sets for normalization and case mappings.
	1300	*/
	1301	case UCHAR_CHANGES_WHEN_LOWERCASED:
	1302	locCache=UCASE_LOC_ROOT;
	1303	return (UBool)(ucase_toFullLower(csp, c, NULL, NULL, &resultString, "", &locCache)>=0);
	1304	case UCHAR_CHANGES_WHEN_UPPERCASED:
	1305	locCache=UCASE_LOC_ROOT;
	1306	return (UBool)(ucase_toFullUpper(csp, c, NULL, NULL, &resultString, "", &locCache)>=0);
	1307	case UCHAR_CHANGES_WHEN_TITLECASED:
	1308	locCache=UCASE_LOC_ROOT;
	1309	return (UBool)(ucase_toFullTitle(csp, c, NULL, NULL, &resultString, "", &locCache)>=0);
	1310	/* case UCHAR_CHANGES_WHEN_CASEFOLDED: -- in uprops.c */
	1311	case UCHAR_CHANGES_WHEN_CASEMAPPED:
	1312	locCache=UCASE_LOC_ROOT;
	1313	return (UBool)(
	1314	ucase_toFullLower(csp, c, NULL, NULL, &resultString, "", &locCache)>=0 \|\|
	1315	ucase_toFullUpper(csp, c, NULL, NULL, &resultString, "", &locCache)>=0 \|\|
	1316	ucase_toFullTitle(csp, c, NULL, NULL, &resultString, "", &locCache)>=0);
	1317	default:
	1318	return FALSE;
	1319	}
	1320	}