git.saurik.com Git - apple/icu.git/blame - icuSources/common/ucase.cpp

Commit	Line	Data
f3c0d7a5 A	1	// © 2016 and later: Unicode, Inc. and others.
f3c0d7a5 A	2	// License & terms of use: http://www.unicode.org/copyright.html
374ca955 A	3	/*
	4	*******************************************************************************
	5	*
b331163b	6	* Copyright (C) 2004-2014, International Business Machines
374ca955 A	7	* Corporation and others. All Rights Reserved.
	8	*
	9	*******************************************************************************
4388f060	10	* file name: ucase.cpp
f3c0d7a5	11	* encoding: UTF-8
374ca955 A	12	* tab size: 8 (not used)
	13	* indentation:4
	14	*
	15	* created on: 2004aug30
	16	* created by: Markus W. Scherer
	17	*
	18	* Low-level Unicode character/string case mapping code.
	19	* Much code moved here (and modified) from uchar.c.
	20	*/
	21
	22	#include "unicode/utypes.h"
4388f060	23	#include "unicode/unistr.h"
374ca955 A	24	#include "unicode/uset.h"
374ca955 A	25	#include "unicode/udata.h" /* UDataInfo */
4388f060	26	#include "unicode/utf16.h"
374ca955 A	27	#include "ucmndata.h" /* DataHeader */
	28	#include "udatamem.h"
	29	#include "umutex.h"
	30	#include "uassert.h"
	31	#include "cmemory.h"
729e4ab9	32	#include "utrie2.h"
374ca955	33	#include "ucase.h"
374ca955 A	34
	35	struct UCaseProps {
	36	UDataMemory *mem;
	37	const int32_t *indexes;
	38	const uint16_t *exceptions;
4388f060	39	const uint16_t *unfold;
374ca955	40
729e4ab9	41	UTrie2 trie;
374ca955 A	42	uint8_t formatVersion[4];
	43	};
	44
4388f060 A	45	/* ucase_props_data.h is machine-generated by gencase --csource */
	46	#define INCLUDED_FROM_UCASE_CPP
	47	#include "ucase_props_data.h"
73c04bcf	48
374ca955 A	49	/* set of property starts for UnicodeSet ------------------------------------ */
	50
	51	static UBool U_CALLCONV
4388f060	52	_enumPropertyStartsRange(const void context, UChar32 start, UChar32 /end/, uint32_t /value*/) {
374ca955	53	/* add the start code point to the USet */
73c04bcf	54	const USetAdder sa=(const USetAdder )context;
374ca955 A	55	sa->add(sa->set, start);
	56	return TRUE;
	57	}
	58
46f4442e	59	U_CFUNC void U_EXPORT2
f3c0d7a5	60	ucase_addPropertyStarts(const USetAdder sa, UErrorCode pErrorCode) {
374ca955 A	61	if(U_FAILURE(*pErrorCode)) {
	62	return;
	63	}
	64
	65	/* add the start code point of each same-value range of the trie */
f3c0d7a5	66	utrie2_enum(&ucase_props_singleton.trie, NULL, _enumPropertyStartsRange, sa);
374ca955 A	67
	68	/* add code points with hardcoded properties, plus the ones following them */
	69
	70	/* (none right now, see comment below) */
	71
	72	/*
	73	* Omit code points with hardcoded specialcasing properties
	74	* because we do not build property UnicodeSets for them right now.
	75	*/
	76	}
	77
	78	/* data access primitives --------------------------------------------------- */
	79
0f5d89e8 A	80	U_CFUNC const UTrie2 * U_EXPORT2
	81	ucase_getTrie() {
	82	return &ucase_props_singleton.trie;
	83	}
374ca955	84
0f5d89e8	85	#define GET_EXCEPTIONS(csp, props) ((csp)->exceptions+((props)>>UCASE_EXC_SHIFT))
374ca955 A	86
	87	/* number of bits in an 8-bit integer value */
	88	static const uint8_t flagsOffset[256]={
	89	0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
	90	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
	91	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
	92	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
	93	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
	94	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
	95	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
	96	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
	97	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
	98	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
	99	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
	100	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
	101	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
	102	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
	103	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
	104	4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
	105	};
	106
729e4ab9 A	107	#define HAS_SLOT(flags, idx) ((flags)&(1<<(idx)))
729e4ab9 A	108	#define SLOT_OFFSET(flags, idx) flagsOffset[(flags)&((1<<(idx))-1)]
374ca955 A	109
374ca955 A	110	/*
729e4ab9	111	* Get the value of an optional-value slot where HAS_SLOT(excWord, idx).
374ca955 A	112	*
374ca955 A	113	* @param excWord (in) initial exceptions word
729e4ab9	114	* @param idx (in) desired slot index
374ca955 A	115	* @param pExc16 (in/out) const uint16_t * after excWord=*pExc16++;
	116	* moved to the last uint16_t of the value, use +1 for beginning of next slot
	117	* @param value (out) int32_t or uint32_t output if hasSlot, otherwise not modified
	118	*/
340931cb	119	#define GET_SLOT_VALUE(excWord, idx, pExc16, value) UPRV_BLOCK_MACRO_BEGIN { \
374ca955	120	if(((excWord)&UCASE_EXC_DOUBLE_SLOTS)==0) { \
729e4ab9	121	(pExc16)+=SLOT_OFFSET(excWord, idx); \
374ca955 A	122	(value)=*pExc16; \
374ca955 A	123	} else { \
729e4ab9	124	(pExc16)+=2*SLOT_OFFSET(excWord, idx); \
374ca955 A	125	(value)=*pExc16++; \
374ca955 A	126	(value)=((value)<<16)\|*pExc16; \
340931cb A	127	} \
340931cb A	128	} UPRV_BLOCK_MACRO_END
374ca955 A	129
	130	/* simple case mappings ----------------------------------------------------- */
	131
	132	U_CAPI UChar32 U_EXPORT2
f3c0d7a5 A	133	ucase_tolower(UChar32 c) {
f3c0d7a5 A	134	uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
0f5d89e8 A	135	if(!UCASE_HAS_EXCEPTION(props)) {
0f5d89e8 A	136	if(UCASE_IS_UPPER_OR_TITLE(props)) {
73c04bcf	137	c+=UCASE_GET_DELTA(props);
374ca955 A	138	}
374ca955 A	139	} else {
f3c0d7a5	140	const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
374ca955	141	uint16_t excWord=*pe++;
0f5d89e8 A	142	if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_IS_UPPER_OR_TITLE(props)) {
	143	int32_t delta;
	144	GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta);
	145	return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
	146	}
374ca955 A	147	if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) {
	148	GET_SLOT_VALUE(excWord, UCASE_EXC_LOWER, pe, c);
	149	}
	150	}
	151	return c;
	152	}
	153
	154	U_CAPI UChar32 U_EXPORT2
f3c0d7a5 A	155	ucase_toupper(UChar32 c) {
f3c0d7a5 A	156	uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
0f5d89e8	157	if(!UCASE_HAS_EXCEPTION(props)) {
73c04bcf A	158	if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
73c04bcf A	159	c+=UCASE_GET_DELTA(props);
374ca955 A	160	}
374ca955 A	161	} else {
f3c0d7a5	162	const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
374ca955	163	uint16_t excWord=*pe++;
0f5d89e8 A	164	if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_GET_TYPE(props)==UCASE_LOWER) {
	165	int32_t delta;
	166	GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta);
	167	return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
	168	}
374ca955 A	169	if(HAS_SLOT(excWord, UCASE_EXC_UPPER)) {
	170	GET_SLOT_VALUE(excWord, UCASE_EXC_UPPER, pe, c);
	171	}
	172	}
	173	return c;
	174	}
	175
	176	U_CAPI UChar32 U_EXPORT2
f3c0d7a5 A	177	ucase_totitle(UChar32 c) {
f3c0d7a5 A	178	uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
0f5d89e8	179	if(!UCASE_HAS_EXCEPTION(props)) {
73c04bcf A	180	if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
73c04bcf A	181	c+=UCASE_GET_DELTA(props);
374ca955 A	182	}
374ca955 A	183	} else {
f3c0d7a5	184	const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
374ca955	185	uint16_t excWord=*pe++;
0f5d89e8 A	186	if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_GET_TYPE(props)==UCASE_LOWER) {
	187	int32_t delta;
	188	GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta);
	189	return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
	190	}
729e4ab9	191	int32_t idx;
374ca955	192	if(HAS_SLOT(excWord, UCASE_EXC_TITLE)) {
729e4ab9	193	idx=UCASE_EXC_TITLE;
374ca955	194	} else if(HAS_SLOT(excWord, UCASE_EXC_UPPER)) {
729e4ab9	195	idx=UCASE_EXC_UPPER;
374ca955 A	196	} else {
	197	return c;
	198	}
729e4ab9	199	GET_SLOT_VALUE(excWord, idx, pe, c);
374ca955 A	200	}
	201	return c;
	202	}
	203
46f4442e A	204	static const UChar iDot[2] = { 0x69, 0x307 };
	205	static const UChar jDot[2] = { 0x6a, 0x307 };
	206	static const UChar iOgonekDot[3] = { 0x12f, 0x307 };
	207	static const UChar iDotGrave[3] = { 0x69, 0x307, 0x300 };
	208	static const UChar iDotAcute[3] = { 0x69, 0x307, 0x301 };
	209	static const UChar iDotTilde[3] = { 0x69, 0x307, 0x303 };
	210
	211
	212	U_CFUNC void U_EXPORT2
f3c0d7a5	213	ucase_addCaseClosure(UChar32 c, const USetAdder *sa) {
73c04bcf A	214	uint16_t props;
	215
	216	/*
	217	* Hardcode the case closure of i and its relatives and ignore the
	218	* data file data for these characters.
	219	* The Turkic dotless i and dotted I with their case mapping conditions
	220	* and case folding option make the related characters behave specially.
	221	* This code matches their closure behavior to their case folding behavior.
	222	*/
73c04bcf A	223
	224	switch(c) {
	225	case 0x49:
	226	/* regular i and I are in one equivalence class */
	227	sa->add(sa->set, 0x69);
	228	return;
	229	case 0x69:
	230	sa->add(sa->set, 0x49);
	231	return;
	232	case 0x130:
	233	/* dotted I is in a class with <0069 0307> (for canonical equivalence with <0049 0307>) */
	234	sa->addString(sa->set, iDot, 2);
	235	return;
	236	case 0x131:
	237	/* dotless i is in a class by itself */
	238	return;
	239	default:
	240	/* otherwise use the data file data */
	241	break;
	242	}
	243
f3c0d7a5	244	props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
0f5d89e8	245	if(!UCASE_HAS_EXCEPTION(props)) {
73c04bcf A	246	if(UCASE_GET_TYPE(props)!=UCASE_NONE) {
	247	/* add the one simple case mapping, no matter what type it is */
	248	int32_t delta=UCASE_GET_DELTA(props);
	249	if(delta!=0) {
	250	sa->add(sa->set, c+delta);
	251	}
	252	}
	253	} else {
	254	/*
	255	* c has exceptions, so there may be multiple simple and/or
	256	* full case mappings. Add them all.
	257	*/
f3c0d7a5	258	const uint16_t pe0, pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
73c04bcf A	259	const UChar *closure;
73c04bcf A	260	uint16_t excWord=*pe++;
729e4ab9	261	int32_t idx, closureLength, fullLength, length;
73c04bcf A	262
	263	pe0=pe;
	264
	265	/* add all simple case mappings */
729e4ab9 A	266	for(idx=UCASE_EXC_LOWER; idx<=UCASE_EXC_TITLE; ++idx) {
729e4ab9 A	267	if(HAS_SLOT(excWord, idx)) {
73c04bcf	268	pe=pe0;
729e4ab9	269	GET_SLOT_VALUE(excWord, idx, pe, c);
73c04bcf A	270	sa->add(sa->set, c);
	271	}
	272	}
0f5d89e8 A	273	if(HAS_SLOT(excWord, UCASE_EXC_DELTA)) {
	274	pe=pe0;
	275	int32_t delta;
	276	GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta);
	277	sa->add(sa->set, (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta);
	278	}
73c04bcf A	279
	280	/* get the closure string pointer & length */
	281	if(HAS_SLOT(excWord, UCASE_EXC_CLOSURE)) {
	282	pe=pe0;
	283	GET_SLOT_VALUE(excWord, UCASE_EXC_CLOSURE, pe, closureLength);
	284	closureLength&=UCASE_CLOSURE_MAX_LENGTH; /* higher bits are reserved */
	285	closure=(const UChar )pe+1; / behind this slot, unless there are full case mappings */
	286	} else {
	287	closureLength=0;
	288	closure=NULL;
	289	}
	290
	291	/* add the full case folding */
	292	if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
	293	pe=pe0;
	294	GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, fullLength);
	295
	296	/* start of full case mapping strings */
	297	++pe;
	298
	299	fullLength&=0xffff; /* bits 16 and higher are reserved */
	300
	301	/* skip the lowercase result string */
	302	pe+=fullLength&UCASE_FULL_LOWER;
	303	fullLength>>=4;
	304
	305	/* add the full case folding string */
	306	length=fullLength&0xf;
	307	if(length!=0) {
	308	sa->addString(sa->set, (const UChar *)pe, length);
	309	pe+=length;
	310	}
	311
	312	/* skip the uppercase and titlecase strings */
	313	fullLength>>=4;
	314	pe+=fullLength&0xf;
	315	fullLength>>=4;
	316	pe+=fullLength;
	317
	318	closure=(const UChar )pe; / behind full case mappings */
	319	}
	320
	321	/* add each code point in the closure string */
729e4ab9 A	322	for(idx=0; idx<closureLength;) {
729e4ab9 A	323	U16_NEXT_UNSAFE(closure, idx, c);
73c04bcf A	324	sa->add(sa->set, c);
	325	}
	326	}
	327	}
	328
	329	/*
	330	* compare s, which has a length, with t, which has a maximum length or is NUL-terminated
	331	* must be length>0 and max>0 and length<=max
	332	*/
4388f060	333	static inline int32_t
73c04bcf A	334	strcmpMax(const UChar s, int32_t length, const UChar t, int32_t max) {
	335	int32_t c1, c2;
	336
	337	max-=length; /* we require length<=max, so no need to decrement max in the loop */
	338	do {
	339	c1=*s++;
	340	c2=*t++;
	341	if(c2==0) {
	342	return 1; /* reached the end of t but not of s */
	343	}
	344	c1-=c2;
	345	if(c1!=0) {
	346	return c1; /* return difference result */
	347	}
	348	} while(--length>0);
	349	/* ends with length==0 */
	350
	351	if(max==0 \|\| *t==0) {
	352	return 0; /* equal to length of both strings */
	353	} else {
	354	return -max; /* return lengh difference */
	355	}
	356	}
	357
46f4442e	358	U_CFUNC UBool U_EXPORT2
f3c0d7a5	359	ucase_addStringCaseClosure(const UChar s, int32_t length, const USetAdder sa) {
73c04bcf A	360	int32_t i, start, limit, result, unfoldRows, unfoldRowWidth, unfoldStringWidth;
73c04bcf A	361
f3c0d7a5	362	if(ucase_props_singleton.unfold==NULL \|\| s==NULL) {
73c04bcf A	363	return FALSE; /* no reverse case folding data, or no string */
	364	}
	365	if(length<=1) {
	366	/* the string is too short to find any match */
	367	/*
	368	* more precise would be:
	369	* if(!u_strHasMoreChar32Than(s, length, 1))
	370	* but this does not make much practical difference because
	371	* a single supplementary code point would just not be found
	372	*/
	373	return FALSE;
	374	}
	375
f3c0d7a5	376	const uint16_t *unfold=ucase_props_singleton.unfold;
73c04bcf A	377	unfoldRows=unfold[UCASE_UNFOLD_ROWS];
	378	unfoldRowWidth=unfold[UCASE_UNFOLD_ROW_WIDTH];
	379	unfoldStringWidth=unfold[UCASE_UNFOLD_STRING_WIDTH];
	380	unfold+=unfoldRowWidth;
	381
	382	if(length>unfoldStringWidth) {
	383	/* the string is too long to find any match */
	384	return FALSE;
	385	}
	386
	387	/* do a binary search for the string */
	388	start=0;
	389	limit=unfoldRows;
	390	while(start<limit) {
	391	i=(start+limit)/2;
4388f060	392	const UChar p=reinterpret_cast<const UChar >(unfold+(i*unfoldRowWidth));
73c04bcf A	393	result=strcmpMax(s, length, p, unfoldStringWidth);
	394
	395	if(result==0) {
	396	/* found the string: add each code point, and its case closure */
	397	UChar32 c;
	398
	399	for(i=unfoldStringWidth; i<unfoldRowWidth && p[i]!=0;) {
	400	U16_NEXT_UNSAFE(p, i, c);
	401	sa->add(sa->set, c);
f3c0d7a5	402	ucase_addCaseClosure(c, sa);
73c04bcf A	403	}
	404	return TRUE;
	405	} else if(result<0) {
	406	limit=i;
	407	} else /* result>0 */ {
	408	start=i+1;
	409	}
	410	}
	411
	412	return FALSE; /* string not found */
	413	}
	414
4388f060 A	415	U_NAMESPACE_BEGIN
	416
	417	FullCaseFoldingIterator::FullCaseFoldingIterator()
	418	: unfold(reinterpret_cast<const UChar *>(ucase_props_singleton.unfold)),
	419	unfoldRows(unfold[UCASE_UNFOLD_ROWS]),
	420	unfoldRowWidth(unfold[UCASE_UNFOLD_ROW_WIDTH]),
	421	unfoldStringWidth(unfold[UCASE_UNFOLD_STRING_WIDTH]),
	422	currentRow(0),
	423	rowCpIndex(unfoldStringWidth) {
	424	unfold+=unfoldRowWidth;
	425	}
	426
	427	UChar32
	428	FullCaseFoldingIterator::next(UnicodeString &full) {
	429	// Advance past the last-delivered code point.
	430	const UChar p=unfold+(currentRowunfoldRowWidth);
	431	if(rowCpIndex>=unfoldRowWidth \|\| p[rowCpIndex]==0) {
	432	++currentRow;
	433	p+=unfoldRowWidth;
	434	rowCpIndex=unfoldStringWidth;
	435	}
	436	if(currentRow>=unfoldRows) { return U_SENTINEL; }
	437	// Set "full" to the NUL-terminated string in the first unfold column.
	438	int32_t length=unfoldStringWidth;
	439	while(length>0 && p[length-1]==0) { --length; }
	440	full.setTo(FALSE, p, length);
	441	// Return the code point.
	442	UChar32 c;
	443	U16_NEXT_UNSAFE(p, rowCpIndex, c);
	444	return c;
	445	}
	446
0f5d89e8 A	447	namespace LatinCase {
	448
	449	const int8_t TO_LOWER_NORMAL[LIMIT] = {
	450	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	451	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	452	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	453	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	454
	455	0, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
	456	32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 0, 0, 0, 0,
	457	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	458	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	459
	460	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	461	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	462	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	463	0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	464
	465	32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
	466	32, 32, 32, 32, 32, 32, 32, 0, 32, 32, 32, 32, 32, 32, 32, EXC,
	467	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	468	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	469
	470	1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
	471	1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
	472	1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
	473	EXC, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1,
	474
	475	0, 1, 0, 1, 0, 1, 0, 1, 0, EXC, 1, 0, 1, 0, 1, 0,
	476	1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
	477	1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
	478	1, 0, 1, 0, 1, 0, 1, 0, -121, 1, 0, 1, 0, 1, 0, EXC
	479	};
	480
	481	const int8_t TO_LOWER_TR_LT[LIMIT] = {
	482	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	483	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	484	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	485	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	486
	487	0, 32, 32, 32, 32, 32, 32, 32, 32, EXC, EXC, 32, 32, 32, 32, 32,
	488	32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 0, 0, 0, 0,
	489	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	490	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	491
	492	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	493	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	494	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	495	0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	496
	497	32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, EXC, EXC, 32, 32,
	498	32, 32, 32, 32, 32, 32, 32, 0, 32, 32, 32, 32, 32, 32, 32, EXC,
	499	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	500	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	501
	502	1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
	503	1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
	504	1, 0, 1, 0, 1, 0, 1, 0, EXC, 0, 1, 0, 1, 0, EXC, 0,
	505	EXC, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1,
	506
	507	0, 1, 0, 1, 0, 1, 0, 1, 0, EXC, 1, 0, 1, 0, 1, 0,
	508	1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
	509	1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
	510	1, 0, 1, 0, 1, 0, 1, 0, -121, 1, 0, 1, 0, 1, 0, EXC
511	};
512
513	const int8_t TO_UPPER_NORMAL[LIMIT] = {
514	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
515	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
516	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
517	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
518
519	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
520	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
521	0, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
522	-32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, 0, 0, 0, 0, 0,
523
524	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
525	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
526	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
527	0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
528
529	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
530	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, EXC,
531	-32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
532	-32, -32, -32, -32, -32, -32, -32, 0, -32, -32, -32, -32, -32, -32, -32, 121,
533
534	0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
535	0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
536	0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
537	0, EXC, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0,
538
539	-1, 0, -1, 0, -1, 0, -1, 0, -1, EXC, 0, -1, 0, -1, 0, -1,
540	0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
541	0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
542	0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, EXC
543	};
544
545	const int8_t TO_UPPER_TR[LIMIT] = {
546	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
547	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
548	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
549	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
550
551	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
552	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
553	0, -32, -32, -32, -32, -32, -32, -32, -32, EXC, -32, -32, -32, -32, -32, -32,
554	-32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, 0, 0, 0, 0, 0,
555
556	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
557	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
558	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
559	0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
560
561	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
562	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, EXC,
563	-32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
564	-32, -32, -32, -32, -32, -32, -32, 0, -32, -32, -32, -32, -32, -32, -32, 121,
565
566	0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
567	0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
568	0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
569	0, EXC, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0,
570
571	-1, 0, -1, 0, -1, 0, -1, 0, -1, EXC, 0, -1, 0, -1, 0, -1,
572	0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
573	0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
574	0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, EXC
575	};
576
577	} // namespace LatinCase
578
4388f060 A	579	U_NAMESPACE_END
4388f060 A	580
374ca955 A	581	/** @return UCASE_NONE, UCASE_LOWER, UCASE_UPPER, UCASE_TITLE */
374ca955 A	582	U_CAPI int32_t U_EXPORT2
f3c0d7a5 A	583	ucase_getType(UChar32 c) {
f3c0d7a5 A	584	uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
73c04bcf	585	return UCASE_GET_TYPE(props);
374ca955 A	586	}
374ca955 A	587
729e4ab9	588	/** @return same as ucase_getType() and set bit 2 if c is case-ignorable */
374ca955	589	U_CAPI int32_t U_EXPORT2
f3c0d7a5 A	590	ucase_getTypeOrIgnorable(UChar32 c) {
f3c0d7a5 A	591	uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
4388f060	592	return UCASE_GET_TYPE_AND_IGNORABLE(props);
374ca955 A	593	}
	594
	595	/** @return UCASE_NO_DOT, UCASE_SOFT_DOTTED, UCASE_ABOVE, UCASE_OTHER_ACCENT */
4388f060	596	static inline int32_t
f3c0d7a5 A	597	getDotType(UChar32 c) {
f3c0d7a5 A	598	uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
0f5d89e8	599	if(!UCASE_HAS_EXCEPTION(props)) {
374ca955 A	600	return props&UCASE_DOT_MASK;
374ca955 A	601	} else {
f3c0d7a5	602	const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
374ca955 A	603	return (*pe>>UCASE_EXC_DOT_SHIFT)&UCASE_DOT_MASK;
	604	}
	605	}
	606
	607	U_CAPI UBool U_EXPORT2
f3c0d7a5 A	608	ucase_isSoftDotted(UChar32 c) {
f3c0d7a5 A	609	return (UBool)(getDotType(c)==UCASE_SOFT_DOTTED);
374ca955 A	610	}
	611
	612	U_CAPI UBool U_EXPORT2
f3c0d7a5 A	613	ucase_isCaseSensitive(UChar32 c) {
f3c0d7a5 A	614	uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
0f5d89e8 A	615	if(!UCASE_HAS_EXCEPTION(props)) {
	616	return (UBool)((props&UCASE_SENSITIVE)!=0);
	617	} else {
	618	const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
	619	return (UBool)((*pe&UCASE_EXC_SENSITIVE)!=0);
	620	}
374ca955 A	621	}
374ca955 A	622
374ca955 A	623	/* string casing ------------------------------------------------------------ */
	624
	625	/*
	626	* These internal functions form the core of string case mappings.
	627	* They map single code points to result code points or strings and take
	628	* all necessary conditions (context, locale ID, options) into account.
	629	*
	630	* They do not iterate over the source or write to the destination
	631	* so that the same functions are useful for non-standard string storage,
	632	* such as in a Replaceable (for Transliterator) or UTF-8/32 strings etc.
	633	* For the same reason, the "surrounding text" context is passed in as a
	634	* UCaseContextIterator which does not make any assumptions about
	635	* the underlying storage.
	636	*
	637	* This section contains helper functions that check for conditions
	638	* in the input text surrounding the current code point
	639	* according to SpecialCasing.txt.
	640	*
	641	* Each helper function gets the index
	642	* - after the current code point if it looks at following text
	643	* - before the current code point if it looks at preceding text
	644	*
	645	* Unicode 3.2 UAX 21 "Case Mappings" defines the conditions as follows:
	646	*
	647	* Final_Sigma
	648	* C is preceded by a sequence consisting of
	649	* a cased letter and a case-ignorable sequence,
	650	* and C is not followed by a sequence consisting of
	651	* an ignorable sequence and then a cased letter.
	652	*
	653	* More_Above
	654	* C is followed by one or more characters of combining class 230 (ABOVE)
	655	* in the combining character sequence.
	656	*
	657	* After_Soft_Dotted
	658	* The last preceding character with combining class of zero before C
	659	* was Soft_Dotted,
	660	* and there is no intervening combining character class 230 (ABOVE).
	661	*
	662	* Before_Dot
	663	* C is followed by combining dot above (U+0307).
	664	* Any sequence of characters with a combining class that is neither 0 nor 230
	665	* may intervene between the current character and the combining dot above.
	666	*
	667	* The erratum from 2002-10-31 adds the condition
	668	*
	669	* After_I
	670	* The last preceding base character was an uppercase I, and there is no
	671	* intervening combining character class 230 (ABOVE).
	672	*
	673	* (See Jitterbug 2344 and the comments on After_I below.)
	674	*
	675	* Helper definitions in Unicode 3.2 UAX 21:
	676	*
	677	* D1. A character C is defined to be cased
	678	* if it meets any of the following criteria:
	679	*
	680	* - The general category of C is Titlecase Letter (Lt)
	681	* - In [CoreProps], C has one of the properties Uppercase, or Lowercase
	682	* - Given D = NFD(C), then it is not the case that:
	683	* D = UCD_lower(D) = UCD_upper(D) = UCD_title(D)
	684	* (This third criterium does not add any characters to the list
	685	* for Unicode 3.2. Ignored.)
	686	*
687	* D2. A character C is defined to be case-ignorable
688	* if it meets either of the following criteria:
689	*
690	* - The general category of C is
691	* Nonspacing Mark (Mn), or Enclosing Mark (Me), or Format Control (Cf), or
692	* Letter Modifier (Lm), or Symbol Modifier (Sk)
693	* - C is one of the following characters
694	* U+0027 APOSTROPHE
695	* U+00AD SOFT HYPHEN (SHY)
696	* U+2019 RIGHT SINGLE QUOTATION MARK
697	* (the preferred character for apostrophe)
698	*
699	* D3. A case-ignorable sequence is a sequence of
700	* zero or more case-ignorable characters.
701	*/
702
46f4442e	703	#define is_d(c) ((c)=='d' \|\| (c)=='D')
374ca955 A	704	#define is_e(c) ((c)=='e' \|\| (c)=='E')
	705	#define is_i(c) ((c)=='i' \|\| (c)=='I')
	706	#define is_l(c) ((c)=='l' \|\| (c)=='L')
	707	#define is_r(c) ((c)=='r' \|\| (c)=='R')
	708	#define is_t(c) ((c)=='t' \|\| (c)=='T')
	709	#define is_u(c) ((c)=='u' \|\| (c)=='U')
	710	#define is_z(c) ((c)=='z' \|\| (c)=='Z')
	711
	712	/* separator? */
	713	#define is_sep(c) ((c)=='_' \|\| (c)=='-' \|\| (c)==0)
	714
73c04bcf	715	/**
374ca955 A	716	* Requires non-NULL locale ID but otherwise does the equivalent of
	717	* checking for language codes as if uloc_getLanguage() were called:
	718	* Accepts both 2- and 3-letter codes and accepts case variants.
	719	*/
73c04bcf	720	U_CFUNC int32_t
f3c0d7a5	721	ucase_getCaseLocale(const char *locale) {
374ca955 A	722	/*
	723	* This function used to use uloc_getLanguage(), but the current code
	724	* removes the dependency of this low-level code on uloc implementation code
	725	* and is faster because not the whole locale ID has to be
	726	* examined and copied/transformed.
	727	*
	728	* Because this code does not want to depend on uloc, the caller must
	729	* pass in a non-NULL locale, i.e., may need to call uloc_getDefault().
	730	*/
f3c0d7a5 A	731	char c=*locale++;
	732	// Fastpath for English "en" which is often used for default (=root locale) case mappings,
	733	// and for Chinese "zh": Very common but no special case mapping behavior.
	734	// Then check lowercase vs. uppercase to reduce the number of comparisons
	735	// for other locales without special behavior.
	736	if(c=='e') {
	737	/* el or ell? */
374ca955	738	c=*locale++;
f3c0d7a5	739	if(is_l(c)) {
374ca955	740	c=*locale++;
f3c0d7a5 A	741	if(is_l(c)) {
	742	c=*locale;
	743	}
374ca955	744	if(is_sep(c)) {
f3c0d7a5	745	return UCASE_LOC_GREEK;
374ca955 A	746	}
374ca955 A	747	}
f3c0d7a5 A	748	// en, es, ... -> root
	749	} else if(c=='z') {
	750	return UCASE_LOC_ROOT;
	751	#if U_CHARSET_FAMILY==U_ASCII_FAMILY
	752	} else if(c>='a') { // ASCII a-z = 0x61..0x7a, after A-Z
	753	#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
	754	} else if(c<='z') { // EBCDIC a-z = 0x81..0xa9 with two gaps, before A-Z
	755	#else
	756	# error Unknown charset family!
	757	#endif
	758	// lowercase c
	759	if(c=='t') {
	760	/* tr or tur? */
374ca955	761	c=*locale++;
f3c0d7a5 A	762	if(is_u(c)) {
	763	c=*locale++;
	764	}
	765	if(is_r(c)) {
374ca955	766	c=*locale;
f3c0d7a5 A	767	if(is_sep(c)) {
	768	return UCASE_LOC_TURKISH;
	769	}
374ca955	770	}
f3c0d7a5 A	771	} else if(c=='a') {
	772	/* az or aze? */
	773	c=*locale++;
	774	if(is_z(c)) {
	775	c=*locale++;
	776	if(is_e(c)) {
	777	c=*locale;
	778	}
	779	if(is_sep(c)) {
	780	return UCASE_LOC_TURKISH;
	781	}
374ca955	782	}
f3c0d7a5 A	783	} else if(c=='l') {
f3c0d7a5 A	784	/* lt or lit? */
374ca955	785	c=*locale++;
f3c0d7a5 A	786	if(is_i(c)) {
	787	c=*locale++;
	788	}
	789	if(is_t(c)) {
	790	c=*locale;
	791	if(is_sep(c)) {
	792	return UCASE_LOC_LITHUANIAN;
	793	}
	794	}
	795	} else if(c=='n') {
	796	/* nl or nld? */
	797	c=*locale++;
	798	if(is_l(c)) {
	799	c=*locale++;
	800	if(is_d(c)) {
	801	c=*locale;
	802	}
	803	if(is_sep(c)) {
	804	return UCASE_LOC_DUTCH;
	805	}
46f4442e A	806	}
46f4442e A	807	}
f3c0d7a5 A	808	} else {
	809	// uppercase c
	810	// Same code as for lowercase c but also check for 'E'.
	811	if(c=='T') {
	812	/* tr or tur? */
46f4442e	813	c=*locale++;
f3c0d7a5 A	814	if(is_u(c)) {
	815	c=*locale++;
	816	}
	817	if(is_r(c)) {
46f4442e	818	c=*locale;
f3c0d7a5 A	819	if(is_sep(c)) {
	820	return UCASE_LOC_TURKISH;
	821	}
46f4442e	822	}
f3c0d7a5 A	823	} else if(c=='A') {
	824	/* az or aze? */
	825	c=*locale++;
	826	if(is_z(c)) {
	827	c=*locale++;
	828	if(is_e(c)) {
	829	c=*locale;
	830	}
	831	if(is_sep(c)) {
	832	return UCASE_LOC_TURKISH;
	833	}
	834	}
	835	} else if(c=='L') {
	836	/* lt or lit? */
	837	c=*locale++;
	838	if(is_i(c)) {
	839	c=*locale++;
	840	}
	841	if(is_t(c)) {
	842	c=*locale;
	843	if(is_sep(c)) {
	844	return UCASE_LOC_LITHUANIAN;
	845	}
	846	}
	847	} else if(c=='E') {
	848	/* el or ell? */
	849	c=*locale++;
	850	if(is_l(c)) {
	851	c=*locale++;
	852	if(is_l(c)) {
	853	c=*locale;
	854	}
	855	if(is_sep(c)) {
	856	return UCASE_LOC_GREEK;
	857	}
	858	}
	859	} else if(c=='N') {
	860	/* nl or nld? */
	861	c=*locale++;
	862	if(is_l(c)) {
	863	c=*locale++;
	864	if(is_d(c)) {
	865	c=*locale;
	866	}
	867	if(is_sep(c)) {
	868	return UCASE_LOC_DUTCH;
	869	}
374ca955 A	870	}
	871	}
	872	}
f3c0d7a5	873	return UCASE_LOC_ROOT;
374ca955 A	874	}
374ca955 A	875
729e4ab9 A	876	/*
	877	* Is followed by
	878	* {case-ignorable}* cased
	879	* ?
	880	* (dir determines looking forward/backward)
	881	* If a character is case-ignorable, it is skipped regardless of whether
	882	* it is also cased or not.
	883	*/
374ca955	884	static UBool
f3c0d7a5	885	isFollowedByCasedLetter(UCaseContextIterator iter, void context, int8_t dir) {
374ca955	886	UChar32 c;
374ca955 A	887
	888	if(iter==NULL) {
	889	return FALSE;
	890	}
	891
	892	for(/* dir!=0 sets direction */; (c=iter(context, dir))>=0; dir=0) {
f3c0d7a5	893	int32_t type=ucase_getTypeOrIgnorable(c);
729e4ab9	894	if(type&4) {
374ca955	895	/* case-ignorable, continue with the loop */
729e4ab9 A	896	} else if(type!=UCASE_NONE) {
729e4ab9 A	897	return TRUE; /* followed by cased letter */
374ca955	898	} else {
729e4ab9	899	return FALSE; /* uncased and not case-ignorable */
374ca955 A	900	}
	901	}
	902
	903	return FALSE; /* not followed by cased letter */
	904	}
	905
	906	/* Is preceded by Soft_Dotted character with no intervening cc=230 ? */
	907	static UBool
f3c0d7a5	908	isPrecededBySoftDotted(UCaseContextIterator iter, void context) {
374ca955 A	909	UChar32 c;
	910	int32_t dotType;
	911	int8_t dir;
	912
	913	if(iter==NULL) {
	914	return FALSE;
	915	}
	916
	917	for(dir=-1; (c=iter(context, dir))>=0; dir=0) {
f3c0d7a5	918	dotType=getDotType(c);
374ca955 A	919	if(dotType==UCASE_SOFT_DOTTED) {
	920	return TRUE; /* preceded by TYPE_i */
	921	} else if(dotType!=UCASE_OTHER_ACCENT) {
	922	return FALSE; /* preceded by different base character (not TYPE_i), or intervening cc==230 */
	923	}
	924	}
	925
	926	return FALSE; /* not preceded by TYPE_i */
	927	}
	928
	929	/*
	930	* See Jitterbug 2344:
	931	* The condition After_I for Turkic-lowercasing of U+0307 combining dot above
	932	* is checked in ICU 2.0, 2.1, 2.6 but was not in 2.2 & 2.4 because
	933	* we made those releases compatible with Unicode 3.2 which had not fixed
	934	* a related bug in SpecialCasing.txt.
	935	*
	936	* From the Jitterbug 2344 text:
	937	* ... this bug is listed as a Unicode erratum
	938	* from 2002-10-31 at http://www.unicode.org/uni2errata/UnicodeErrata.html
	939	* <quote>
	940	* There are two errors in SpecialCasing.txt.
	941	* 1. Missing semicolons on two lines. ... [irrelevant for ICU]
	942	* 2. An incorrect context definition. Correct as follows:
	943	* < 0307; ; 0307; 0307; tr After_Soft_Dotted; # COMBINING DOT ABOVE
	944	* < 0307; ; 0307; 0307; az After_Soft_Dotted; # COMBINING DOT ABOVE
	945	* ---
	946	* > 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
	947	* > 0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
	948	* where the context After_I is defined as:
	949	* The last preceding base character was an uppercase I, and there is no
	950	* intervening combining character class 230 (ABOVE).
	951	* </quote>
	952	*
	953	* Note that SpecialCasing.txt even in Unicode 3.2 described the condition as:
	954	*
	955	* # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
	956	* # This matches the behavior of the canonically equivalent I-dot_above
	957	*
	958	* See also the description in this place in older versions of uchar.c (revision 1.100).
	959	*
	960	* Markus W. Scherer 2003-feb-15
	961	*/
	962
	963	/* Is preceded by base character 'I' with no intervening cc=230 ? */
	964	static UBool
f3c0d7a5	965	isPrecededBy_I(UCaseContextIterator iter, void context) {
374ca955 A	966	UChar32 c;
	967	int32_t dotType;
	968	int8_t dir;
	969
	970	if(iter==NULL) {
	971	return FALSE;
	972	}
	973
	974	for(dir=-1; (c=iter(context, dir))>=0; dir=0) {
	975	if(c==0x49) {
	976	return TRUE; /* preceded by I */
	977	}
f3c0d7a5	978	dotType=getDotType(c);
374ca955 A	979	if(dotType!=UCASE_OTHER_ACCENT) {
	980	return FALSE; /* preceded by different base character (not I), or intervening cc==230 */
	981	}
	982	}
	983
	984	return FALSE; /* not preceded by I */
	985	}
	986
	987	/* Is followed by one or more cc==230 ? */
	988	static UBool
f3c0d7a5	989	isFollowedByMoreAbove(UCaseContextIterator iter, void context) {
374ca955 A	990	UChar32 c;
	991	int32_t dotType;
	992	int8_t dir;
	993
	994	if(iter==NULL) {
	995	return FALSE;
	996	}
	997
	998	for(dir=1; (c=iter(context, dir))>=0; dir=0) {
f3c0d7a5	999	dotType=getDotType(c);
374ca955 A	1000	if(dotType==UCASE_ABOVE) {
	1001	return TRUE; /* at least one cc==230 following */
	1002	} else if(dotType!=UCASE_OTHER_ACCENT) {
	1003	return FALSE; /* next base character, no more cc==230 following */
	1004	}
	1005	}
	1006
	1007	return FALSE; /* no more cc==230 following */
	1008	}
	1009
	1010	/* Is followed by a dot above (without cc==230 in between) ? */
	1011	static UBool
f3c0d7a5	1012	isFollowedByDotAbove(UCaseContextIterator iter, void context) {
374ca955 A	1013	UChar32 c;
	1014	int32_t dotType;
	1015	int8_t dir;
	1016
	1017	if(iter==NULL) {
	1018	return FALSE;
	1019	}
	1020
	1021	for(dir=1; (c=iter(context, dir))>=0; dir=0) {
	1022	if(c==0x307) {
	1023	return TRUE;
	1024	}
f3c0d7a5	1025	dotType=getDotType(c);
374ca955 A	1026	if(dotType!=UCASE_OTHER_ACCENT) {
	1027	return FALSE; /* next base character or cc==230 in between */
	1028	}
	1029	}
	1030
	1031	return FALSE; /* no dot above following */
	1032	}
	1033
	1034	U_CAPI int32_t U_EXPORT2
f3c0d7a5	1035	ucase_toFullLower(UChar32 c,
374ca955 A	1036	UCaseContextIterator iter, void context,
374ca955 A	1037	const UChar **pString,
f3c0d7a5 A	1038	int32_t loc) {
	1039	// The sign of the result has meaning, input must be non-negative so that it can be returned as is.
	1040	U_ASSERT(c >= 0);
729e4ab9	1041	UChar32 result=c;
f3c0d7a5	1042	uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
0f5d89e8 A	1043	if(!UCASE_HAS_EXCEPTION(props)) {
0f5d89e8 A	1044	if(UCASE_IS_UPPER_OR_TITLE(props)) {
73c04bcf	1045	result=c+UCASE_GET_DELTA(props);
374ca955 A	1046	}
374ca955 A	1047	} else {
f3c0d7a5	1048	const uint16_t pe=GET_EXCEPTIONS(&ucase_props_singleton, props), pe2;
374ca955 A	1049	uint16_t excWord=*pe++;
	1050	int32_t full;
	1051
	1052	pe2=pe;
	1053
	1054	if(excWord&UCASE_EXC_CONDITIONAL_SPECIAL) {
	1055	/* use hardcoded conditions and mappings */
374ca955 A	1056
	1057	/*
	1058	* Test for conditional mappings first
	1059	* (otherwise the unconditional default mappings are always taken),
	1060	* then test for characters that have unconditional mappings in SpecialCasing.txt,
	1061	* then get the UnicodeData.txt mappings.
	1062	*/
46f4442e	1063	if( loc==UCASE_LOC_LITHUANIAN &&
374ca955 A	1064	/* base characters, find accents above */
374ca955 A	1065	(((c==0x49 \|\| c==0x4a \|\| c==0x12e) &&
f3c0d7a5	1066	isFollowedByMoreAbove(iter, context)) \|\|
374ca955 A	1067	/* precomposed with accent above, no need to find one */
	1068	(c==0xcc \|\| c==0xcd \|\| c==0x128))
	1069	) {
	1070	/*
	1071	# Lithuanian
	1072
	1073	# Lithuanian retains the dot in a lowercase i when followed by accents.
	1074
	1075	# Introduce an explicit dot above when lowercasing capital I's and J's
	1076	# whenever there are more accents above.
	1077	# (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
	1078
	1079	0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
	1080	004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
	1081	012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
	1082	00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
	1083	00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
	1084	0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
	1085	*/
	1086	switch(c) {
	1087	case 0x49: /* LATIN CAPITAL LETTER I */
	1088	*pString=iDot;
	1089	return 2;
	1090	case 0x4a: /* LATIN CAPITAL LETTER J */
	1091	*pString=jDot;
	1092	return 2;
	1093	case 0x12e: /* LATIN CAPITAL LETTER I WITH OGONEK */
	1094	*pString=iOgonekDot;
	1095	return 2;
	1096	case 0xcc: /* LATIN CAPITAL LETTER I WITH GRAVE */
	1097	*pString=iDotGrave;
	1098	return 3;
	1099	case 0xcd: /* LATIN CAPITAL LETTER I WITH ACUTE */
	1100	*pString=iDotAcute;
	1101	return 3;
	1102	case 0x128: /* LATIN CAPITAL LETTER I WITH TILDE */
	1103	*pString=iDotTilde;
	1104	return 3;
	1105	default:
	1106	return 0; /* will not occur */
	1107	}
	1108	/* # Turkish and Azeri */
46f4442e	1109	} else if(loc==UCASE_LOC_TURKISH && c==0x130) {
374ca955 A	1110	/*
	1111	# I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
	1112	# The following rules handle those cases.
	1113
	1114	0130; 0069; 0130; 0130; tr # LATIN CAPITAL LETTER I WITH DOT ABOVE
	1115	0130; 0069; 0130; 0130; az # LATIN CAPITAL LETTER I WITH DOT ABOVE
	1116	*/
	1117	return 0x69;
f3c0d7a5	1118	} else if(loc==UCASE_LOC_TURKISH && c==0x307 && isPrecededBy_I(iter, context)) {
374ca955 A	1119	/*
	1120	# When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
	1121	# This matches the behavior of the canonically equivalent I-dot_above
	1122
	1123	0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
	1124	0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
	1125	*/
0f5d89e8	1126	*pString=nullptr;
374ca955	1127	return 0; /* remove the dot (continue without output) */
f3c0d7a5	1128	} else if(loc==UCASE_LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(iter, context)) {
374ca955 A	1129	/*
	1130	# When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
	1131
	1132	0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I
	1133	0049; 0131; 0049; 0049; az Not_Before_Dot; # LATIN CAPITAL LETTER I
	1134	*/
	1135	return 0x131;
	1136	} else if(c==0x130) {
	1137	/*
	1138	# Preserve canonical equivalence for I with dot. Turkic is handled below.
	1139
	1140	0130; 0069 0307; 0130; 0130; # LATIN CAPITAL LETTER I WITH DOT ABOVE
	1141	*/
	1142	*pString=iDot;
	1143	return 2;
	1144	} else if( c==0x3a3 &&
f3c0d7a5 A	1145	!isFollowedByCasedLetter(iter, context, 1) &&
f3c0d7a5 A	1146	isFollowedByCasedLetter(iter, context, -1) /* -1=preceded */
374ca955 A	1147	) {
	1148	/* greek capital sigma maps depending on surrounding cased letters (see SpecialCasing.txt) */
	1149	/*
	1150	# Special case for final form of sigma
	1151
	1152	03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
	1153	*/
	1154	return 0x3c2; /* greek small final sigma */
	1155	} else {
	1156	/* no known conditional special case mapping, use a normal mapping */
	1157	}
	1158	} else if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
	1159	GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, full);
	1160	full&=UCASE_FULL_LOWER;
	1161	if(full!=0) {
	1162	/* set the output pointer to the lowercase mapping */
4388f060	1163	pString=reinterpret_cast<const UChar >(pe+1);
374ca955 A	1164
	1165	/* return the string length */
	1166	return full;
	1167	}
	1168	}
	1169
0f5d89e8 A	1170	if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_IS_UPPER_OR_TITLE(props)) {
	1171	int32_t delta;
	1172	GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe2, delta);
	1173	return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
	1174	}
374ca955 A	1175	if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) {
	1176	GET_SLOT_VALUE(excWord, UCASE_EXC_LOWER, pe2, result);
	1177	}
	1178	}
	1179
	1180	return (result==c) ? ~result : result;
	1181	}
	1182
	1183	/* internal */
	1184	static int32_t
f3c0d7a5	1185	toUpperOrTitle(UChar32 c,
374ca955 A	1186	UCaseContextIterator iter, void context,
374ca955 A	1187	const UChar **pString,
f3c0d7a5	1188	int32_t loc,
374ca955	1189	UBool upperNotTitle) {
f3c0d7a5 A	1190	// The sign of the result has meaning, input must be non-negative so that it can be returned as is.
f3c0d7a5 A	1191	U_ASSERT(c >= 0);
729e4ab9	1192	UChar32 result=c;
f3c0d7a5	1193	uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
0f5d89e8	1194	if(!UCASE_HAS_EXCEPTION(props)) {
73c04bcf A	1195	if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
73c04bcf A	1196	result=c+UCASE_GET_DELTA(props);
374ca955 A	1197	}
374ca955 A	1198	} else {
f3c0d7a5	1199	const uint16_t pe=GET_EXCEPTIONS(&ucase_props_singleton, props), pe2;
374ca955	1200	uint16_t excWord=*pe++;
729e4ab9	1201	int32_t full, idx;
374ca955 A	1202
	1203	pe2=pe;
	1204
	1205	if(excWord&UCASE_EXC_CONDITIONAL_SPECIAL) {
	1206	/* use hardcoded conditions and mappings */
46f4442e	1207	if(loc==UCASE_LOC_TURKISH && c==0x69) {
374ca955 A	1208	/*
	1209	# Turkish and Azeri
	1210
	1211	# I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
	1212	# The following rules handle those cases.
	1213
	1214	# When uppercasing, i turns into a dotted capital I
	1215
	1216	0069; 0069; 0130; 0130; tr; # LATIN SMALL LETTER I
	1217	0069; 0069; 0130; 0130; az; # LATIN SMALL LETTER I
	1218	*/
	1219	return 0x130;
f3c0d7a5	1220	} else if(loc==UCASE_LOC_LITHUANIAN && c==0x307 && isPrecededBySoftDotted(iter, context)) {
374ca955 A	1221	/*
	1222	# Lithuanian
	1223
	1224	# Lithuanian retains the dot in a lowercase i when followed by accents.
	1225
	1226	# Remove DOT ABOVE after "i" with upper or titlecase
	1227
	1228	0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
	1229	*/
0f5d89e8	1230	*pString=nullptr;
374ca955 A	1231	return 0; /* remove the dot (continue without output) */
	1232	} else {
	1233	/* no known conditional special case mapping, use a normal mapping */
	1234	}
	1235	} else if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
	1236	GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, full);
	1237
	1238	/* start of full case mapping strings */
	1239	++pe;
	1240
	1241	/* skip the lowercase and case-folding result strings */
	1242	pe+=full&UCASE_FULL_LOWER;
	1243	full>>=4;
	1244	pe+=full&0xf;
	1245	full>>=4;
	1246
	1247	if(upperNotTitle) {
	1248	full&=0xf;
	1249	} else {
	1250	/* skip the uppercase result string */
	1251	pe+=full&0xf;
	1252	full=(full>>4)&0xf;
	1253	}
	1254
	1255	if(full!=0) {
	1256	/* set the output pointer to the result string */
4388f060	1257	pString=reinterpret_cast<const UChar >(pe);
374ca955 A	1258
	1259	/* return the string length */
	1260	return full;
	1261	}
	1262	}
	1263
0f5d89e8 A	1264	if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_GET_TYPE(props)==UCASE_LOWER) {
	1265	int32_t delta;
	1266	GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe2, delta);
	1267	return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
	1268	}
374ca955	1269	if(!upperNotTitle && HAS_SLOT(excWord, UCASE_EXC_TITLE)) {
729e4ab9	1270	idx=UCASE_EXC_TITLE;
374ca955 A	1271	} else if(HAS_SLOT(excWord, UCASE_EXC_UPPER)) {
374ca955 A	1272	/* here, titlecase is same as uppercase */
729e4ab9	1273	idx=UCASE_EXC_UPPER;
374ca955 A	1274	} else {
	1275	return ~c;
	1276	}
729e4ab9	1277	GET_SLOT_VALUE(excWord, idx, pe2, result);
374ca955 A	1278	}
	1279
	1280	return (result==c) ? ~result : result;
	1281	}
	1282
	1283	U_CAPI int32_t U_EXPORT2
f3c0d7a5	1284	ucase_toFullUpper(UChar32 c,
374ca955 A	1285	UCaseContextIterator iter, void context,
374ca955 A	1286	const UChar **pString,
f3c0d7a5 A	1287	int32_t caseLocale) {
f3c0d7a5 A	1288	return toUpperOrTitle(c, iter, context, pString, caseLocale, TRUE);
374ca955 A	1289	}
	1290
	1291	U_CAPI int32_t U_EXPORT2
f3c0d7a5	1292	ucase_toFullTitle(UChar32 c,
374ca955 A	1293	UCaseContextIterator iter, void context,
374ca955 A	1294	const UChar **pString,
f3c0d7a5 A	1295	int32_t caseLocale) {
f3c0d7a5 A	1296	return toUpperOrTitle(c, iter, context, pString, caseLocale, FALSE);
374ca955 A	1297	}
	1298
	1299	/* case folding ------------------------------------------------------------- */
	1300
	1301	/*
	1302	* Case folding is similar to lowercasing.
	1303	* The result may be a simple mapping, i.e., a single code point, or
	1304	* a full mapping, i.e., a string.
	1305	* If the case folding for a code point is the same as its simple (1:1) lowercase mapping,
	1306	* then only the lowercase mapping is stored.
	1307	*
	1308	* Some special cases are hardcoded because their conditions cannot be
	1309	* parsed and processed from CaseFolding.txt.
	1310	*
	1311	* Unicode 3.2 CaseFolding.txt specifies for its status field:
	1312
	1313	# C: common case folding, common mappings shared by both simple and full mappings.
	1314	# F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces.
	1315	# S: simple case folding, mappings to single characters where different from F.
	1316	# T: special case for uppercase I and dotted uppercase I
	1317	# - For non-Turkic languages, this mapping is normally not used.
	1318	# - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters.
	1319	#
	1320	# Usage:
	1321	# A. To do a simple case folding, use the mappings with status C + S.
	1322	# B. To do a full case folding, use the mappings with status C + F.
	1323	#
	1324	# The mappings with status T can be used or omitted depending on the desired case-folding
	1325	# behavior. (The default option is to exclude them.)
	1326
	1327	* Unicode 3.2 has 'T' mappings as follows:
	1328
	1329	0049; T; 0131; # LATIN CAPITAL LETTER I
	1330	0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE
	1331
	1332	* while the default mappings for these code points are:
	1333
	1334	0049; C; 0069; # LATIN CAPITAL LETTER I
	1335	0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
	1336
73c04bcf	1337	* U+0130 has no simple case folding (simple-case-folds to itself).
374ca955 A	1338	*/
	1339
	1340	/* return the simple case folding mapping for c */
	1341	U_CAPI UChar32 U_EXPORT2
f3c0d7a5 A	1342	ucase_fold(UChar32 c, uint32_t options) {
f3c0d7a5 A	1343	uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
0f5d89e8 A	1344	if(!UCASE_HAS_EXCEPTION(props)) {
0f5d89e8 A	1345	if(UCASE_IS_UPPER_OR_TITLE(props)) {
73c04bcf	1346	c+=UCASE_GET_DELTA(props);
374ca955 A	1347	}
374ca955 A	1348	} else {
f3c0d7a5	1349	const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
374ca955	1350	uint16_t excWord=*pe++;
729e4ab9	1351	int32_t idx;
374ca955 A	1352	if(excWord&UCASE_EXC_CONDITIONAL_FOLD) {
	1353	/* special case folding mappings, hardcoded */
	1354	if((options&_FOLD_CASE_OPTIONS_MASK)==U_FOLD_CASE_DEFAULT) {
	1355	/* default mappings */
	1356	if(c==0x49) {
	1357	/* 0049; C; 0069; # LATIN CAPITAL LETTER I */
	1358	return 0x69;
	1359	} else if(c==0x130) {
73c04bcf A	1360	/* no simple case folding for U+0130 */
73c04bcf A	1361	return c;
374ca955 A	1362	}
	1363	} else {
	1364	/* Turkic mappings */
	1365	if(c==0x49) {
	1366	/* 0049; T; 0131; # LATIN CAPITAL LETTER I */
	1367	return 0x131;
	1368	} else if(c==0x130) {
	1369	/* 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
	1370	return 0x69;
	1371	}
	1372	}
	1373	}
0f5d89e8 A	1374	if((excWord&UCASE_EXC_NO_SIMPLE_CASE_FOLDING)!=0) {
	1375	return c;
	1376	}
	1377	if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_IS_UPPER_OR_TITLE(props)) {
	1378	int32_t delta;
	1379	GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta);
	1380	return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
	1381	}
374ca955	1382	if(HAS_SLOT(excWord, UCASE_EXC_FOLD)) {
729e4ab9	1383	idx=UCASE_EXC_FOLD;
374ca955	1384	} else if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) {
729e4ab9	1385	idx=UCASE_EXC_LOWER;
374ca955 A	1386	} else {
	1387	return c;
	1388	}
729e4ab9	1389	GET_SLOT_VALUE(excWord, idx, pe, c);
374ca955 A	1390	}
	1391	return c;
	1392	}
	1393
	1394	/*
	1395	* Issue for canonical caseless match (UAX #21):
	1396	* Turkic casefolding (using "T" mappings in CaseFolding.txt) does not preserve
	1397	* canonical equivalence, unlike default-option casefolding.
	1398	* For example, I-grave and I + grave fold to strings that are not canonically
	1399	* equivalent.
	1400	* For more details, see the comment in unorm_compare() in unorm.cpp
	1401	* and the intermediate prototype changes for Jitterbug 2021.
	1402	* (For example, revision 1.104 of uchar.c and 1.4 of CaseFolding.txt.)
	1403	*
	1404	* This did not get fixed because it appears that it is not possible to fix
	1405	* it for uppercase and lowercase characters (I-grave vs. i-grave)
	1406	* together in a way that they still fold to common result strings.
	1407	*/
	1408
	1409	U_CAPI int32_t U_EXPORT2
f3c0d7a5	1410	ucase_toFullFolding(UChar32 c,
374ca955	1411	const UChar **pString,
f3c0d7a5 A	1412	uint32_t options) {
	1413	// The sign of the result has meaning, input must be non-negative so that it can be returned as is.
	1414	U_ASSERT(c >= 0);
729e4ab9	1415	UChar32 result=c;
f3c0d7a5	1416	uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
0f5d89e8 A	1417	if(!UCASE_HAS_EXCEPTION(props)) {
0f5d89e8 A	1418	if(UCASE_IS_UPPER_OR_TITLE(props)) {
73c04bcf	1419	result=c+UCASE_GET_DELTA(props);
374ca955 A	1420	}
374ca955 A	1421	} else {
f3c0d7a5	1422	const uint16_t pe=GET_EXCEPTIONS(&ucase_props_singleton, props), pe2;
374ca955	1423	uint16_t excWord=*pe++;
729e4ab9	1424	int32_t full, idx;
374ca955 A	1425
	1426	pe2=pe;
	1427
	1428	if(excWord&UCASE_EXC_CONDITIONAL_FOLD) {
	1429	/* use hardcoded conditions and mappings */
	1430	if((options&_FOLD_CASE_OPTIONS_MASK)==U_FOLD_CASE_DEFAULT) {
	1431	/* default mappings */
	1432	if(c==0x49) {
	1433	/* 0049; C; 0069; # LATIN CAPITAL LETTER I */
	1434	return 0x69;
	1435	} else if(c==0x130) {
	1436	/* 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
	1437	*pString=iDot;
	1438	return 2;
	1439	}
	1440	} else {
	1441	/* Turkic mappings */
	1442	if(c==0x49) {
	1443	/* 0049; T; 0131; # LATIN CAPITAL LETTER I */
	1444	return 0x131;
	1445	} else if(c==0x130) {
	1446	/* 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
	1447	return 0x69;
	1448	}
	1449	}
	1450	} else if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
	1451	GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, full);
	1452
	1453	/* start of full case mapping strings */
	1454	++pe;
	1455
	1456	/* skip the lowercase result string */
	1457	pe+=full&UCASE_FULL_LOWER;
	1458	full=(full>>4)&0xf;
	1459
	1460	if(full!=0) {
	1461	/* set the output pointer to the result string */
4388f060	1462	pString=reinterpret_cast<const UChar >(pe);
374ca955 A	1463
	1464	/* return the string length */
	1465	return full;
	1466	}
	1467	}
	1468
0f5d89e8 A	1469	if((excWord&UCASE_EXC_NO_SIMPLE_CASE_FOLDING)!=0) {
	1470	return ~c;
	1471	}
	1472	if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_IS_UPPER_OR_TITLE(props)) {
	1473	int32_t delta;
	1474	GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe2, delta);
	1475	return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
	1476	}
374ca955	1477	if(HAS_SLOT(excWord, UCASE_EXC_FOLD)) {
729e4ab9	1478	idx=UCASE_EXC_FOLD;
374ca955	1479	} else if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) {
729e4ab9	1480	idx=UCASE_EXC_LOWER;
374ca955 A	1481	} else {
	1482	return ~c;
	1483	}
729e4ab9	1484	GET_SLOT_VALUE(excWord, idx, pe2, result);
374ca955 A	1485	}
	1486
	1487	return (result==c) ? ~result : result;
	1488	}
73c04bcf A	1489
	1490	/* case mapping properties API ---------------------------------------------- */
	1491
73c04bcf A	1492	/* public API (see uchar.h) */
	1493
	1494	U_CAPI UBool U_EXPORT2
	1495	u_isULowercase(UChar32 c) {
f3c0d7a5	1496	return (UBool)(UCASE_LOWER==ucase_getType(c));
73c04bcf A	1497	}
	1498
	1499	U_CAPI UBool U_EXPORT2
	1500	u_isUUppercase(UChar32 c) {
f3c0d7a5	1501	return (UBool)(UCASE_UPPER==ucase_getType(c));
73c04bcf A	1502	}
	1503
	1504	/* Transforms the Unicode character to its lower case equivalent.*/
	1505	U_CAPI UChar32 U_EXPORT2
	1506	u_tolower(UChar32 c) {
f3c0d7a5	1507	return ucase_tolower(c);
73c04bcf A	1508	}
	1509
	1510	/* Transforms the Unicode character to its upper case equivalent.*/
	1511	U_CAPI UChar32 U_EXPORT2
	1512	u_toupper(UChar32 c) {
f3c0d7a5	1513	return ucase_toupper(c);
73c04bcf A	1514	}
	1515
	1516	/* Transforms the Unicode character to its title case equivalent.*/
	1517	U_CAPI UChar32 U_EXPORT2
	1518	u_totitle(UChar32 c) {
f3c0d7a5	1519	return ucase_totitle(c);
73c04bcf A	1520	}
	1521
	1522	/* return the simple case folding mapping for c */
	1523	U_CAPI UChar32 U_EXPORT2
	1524	u_foldCase(UChar32 c, uint32_t options) {
f3c0d7a5	1525	return ucase_fold(c, options);
73c04bcf A	1526	}
	1527
	1528	U_CFUNC int32_t U_EXPORT2
	1529	ucase_hasBinaryProperty(UChar32 c, UProperty which) {
	1530	/* case mapping properties */
729e4ab9	1531	const UChar *resultString;
73c04bcf A	1532	switch(which) {
73c04bcf A	1533	case UCHAR_LOWERCASE:
f3c0d7a5	1534	return (UBool)(UCASE_LOWER==ucase_getType(c));
73c04bcf	1535	case UCHAR_UPPERCASE:
f3c0d7a5	1536	return (UBool)(UCASE_UPPER==ucase_getType(c));
73c04bcf	1537	case UCHAR_SOFT_DOTTED:
f3c0d7a5	1538	return ucase_isSoftDotted(c);
73c04bcf	1539	case UCHAR_CASE_SENSITIVE:
f3c0d7a5	1540	return ucase_isCaseSensitive(c);
729e4ab9	1541	case UCHAR_CASED:
f3c0d7a5	1542	return (UBool)(UCASE_NONE!=ucase_getType(c));
729e4ab9	1543	case UCHAR_CASE_IGNORABLE:
f3c0d7a5	1544	return (UBool)(ucase_getTypeOrIgnorable(c)>>2);
729e4ab9 A	1545	/*
	1546	* Note: The following Changes_When_Xyz are defined as testing whether
	1547	* the NFD form of the input changes when Xyz-case-mapped.
	1548	* However, this simpler implementation of these properties,
	1549	* ignoring NFD, passes the tests.
	1550	* The implementation needs to be changed if the tests start failing.
	1551	* When that happens, optimizations should be used to work with the
	1552	* per-single-code point ucase_toFullXyz() functions unless
	1553	* the NFD form has more than one code point,
	1554	* and the property starts set needs to be the union of the
	1555	* start sets for normalization and case mappings.
	1556	*/
	1557	case UCHAR_CHANGES_WHEN_LOWERCASED:
f3c0d7a5	1558	return (UBool)(ucase_toFullLower(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0);
729e4ab9	1559	case UCHAR_CHANGES_WHEN_UPPERCASED:
f3c0d7a5	1560	return (UBool)(ucase_toFullUpper(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0);
729e4ab9	1561	case UCHAR_CHANGES_WHEN_TITLECASED:
f3c0d7a5	1562	return (UBool)(ucase_toFullTitle(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0);
729e4ab9 A	1563	/* case UCHAR_CHANGES_WHEN_CASEFOLDED: -- in uprops.c */
729e4ab9 A	1564	case UCHAR_CHANGES_WHEN_CASEMAPPED:
729e4ab9	1565	return (UBool)(
f3c0d7a5 A	1566	ucase_toFullLower(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0 \|\|
	1567	ucase_toFullUpper(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0 \|\|
	1568	ucase_toFullTitle(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0);
73c04bcf A	1569	default:
	1570	return FALSE;
	1571	}
	1572	}