+U_CAPI void U_EXPORT2
+ucase_addCaseClosure(const UCaseProps *csp, UChar32 c, const USetAdder *sa) {
+ uint16_t props;
+
+ /*
+ * Hardcode the case closure of i and its relatives and ignore the
+ * data file data for these characters.
+ * The Turkic dotless i and dotted I with their case mapping conditions
+ * and case folding option make the related characters behave specially.
+ * This code matches their closure behavior to their case folding behavior.
+ */
+ static const UChar
+ iDot[2]= { 0x69, 0x307 };
+
+ switch(c) {
+ case 0x49:
+ /* regular i and I are in one equivalence class */
+ sa->add(sa->set, 0x69);
+ return;
+ case 0x69:
+ sa->add(sa->set, 0x49);
+ return;
+ case 0x130:
+ /* dotted I is in a class with <0069 0307> (for canonical equivalence with <0049 0307>) */
+ sa->addString(sa->set, iDot, 2);
+ return;
+ case 0x131:
+ /* dotless i is in a class by itself */
+ return;
+ default:
+ /* otherwise use the data file data */
+ break;
+ }
+
+ GET_PROPS(csp, c, props);
+ if(!PROPS_HAS_EXCEPTION(props)) {
+ if(UCASE_GET_TYPE(props)!=UCASE_NONE) {
+ /* add the one simple case mapping, no matter what type it is */
+ int32_t delta=UCASE_GET_DELTA(props);
+ if(delta!=0) {
+ sa->add(sa->set, c+delta);
+ }
+ }
+ } else {
+ /*
+ * c has exceptions, so there may be multiple simple and/or
+ * full case mappings. Add them all.
+ */
+ const uint16_t *pe0, *pe=GET_EXCEPTIONS(csp, props);
+ const UChar *closure;
+ uint16_t excWord=*pe++;
+ int32_t index, closureLength, fullLength, length;
+
+ pe0=pe;
+
+ /* add all simple case mappings */
+ for(index=UCASE_EXC_LOWER; index<=UCASE_EXC_TITLE; ++index) {
+ if(HAS_SLOT(excWord, index)) {
+ pe=pe0;
+ GET_SLOT_VALUE(excWord, index, pe, c);
+ sa->add(sa->set, c);
+ }
+ }
+
+ /* get the closure string pointer & length */
+ if(HAS_SLOT(excWord, UCASE_EXC_CLOSURE)) {
+ pe=pe0;
+ GET_SLOT_VALUE(excWord, UCASE_EXC_CLOSURE, pe, closureLength);
+ closureLength&=UCASE_CLOSURE_MAX_LENGTH; /* higher bits are reserved */
+ closure=(const UChar *)pe+1; /* behind this slot, unless there are full case mappings */
+ } else {
+ closureLength=0;
+ closure=NULL;
+ }
+
+ /* add the full case folding */
+ if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
+ pe=pe0;
+ GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, fullLength);
+
+ /* start of full case mapping strings */
+ ++pe;
+
+ fullLength&=0xffff; /* bits 16 and higher are reserved */
+
+ /* skip the lowercase result string */
+ pe+=fullLength&UCASE_FULL_LOWER;
+ fullLength>>=4;
+
+ /* add the full case folding string */
+ length=fullLength&0xf;
+ if(length!=0) {
+ sa->addString(sa->set, (const UChar *)pe, length);
+ pe+=length;
+ }
+
+ /* skip the uppercase and titlecase strings */
+ fullLength>>=4;
+ pe+=fullLength&0xf;
+ fullLength>>=4;
+ pe+=fullLength;
+
+ closure=(const UChar *)pe; /* behind full case mappings */
+ }
+
+ /* add each code point in the closure string */
+ for(index=0; index<closureLength;) {
+ U16_NEXT_UNSAFE(closure, index, c);
+ sa->add(sa->set, c);
+ }
+ }
+}
+
+/*
+ * compare s, which has a length, with t, which has a maximum length or is NUL-terminated
+ * must be length>0 and max>0 and length<=max
+ */
+static U_INLINE int32_t
+strcmpMax(const UChar *s, int32_t length, const UChar *t, int32_t max) {
+ int32_t c1, c2;
+
+ max-=length; /* we require length<=max, so no need to decrement max in the loop */
+ do {
+ c1=*s++;
+ c2=*t++;
+ if(c2==0) {
+ return 1; /* reached the end of t but not of s */
+ }
+ c1-=c2;
+ if(c1!=0) {
+ return c1; /* return difference result */
+ }
+ } while(--length>0);
+ /* ends with length==0 */
+
+ if(max==0 || *t==0) {
+ return 0; /* equal to length of both strings */
+ } else {
+ return -max; /* return lengh difference */
+ }
+}
+
+U_CAPI UBool U_EXPORT2
+ucase_addStringCaseClosure(const UCaseProps *csp, const UChar *s, int32_t length, const USetAdder *sa) {
+ const UChar *unfold, *p;
+ int32_t i, start, limit, result, unfoldRows, unfoldRowWidth, unfoldStringWidth;
+
+ if(csp->unfold==NULL || s==NULL) {
+ return FALSE; /* no reverse case folding data, or no string */
+ }
+ if(length<=1) {
+ /* the string is too short to find any match */
+ /*
+ * more precise would be:
+ * if(!u_strHasMoreChar32Than(s, length, 1))
+ * but this does not make much practical difference because
+ * a single supplementary code point would just not be found
+ */
+ return FALSE;
+ }
+
+ unfold=csp->unfold;
+ unfoldRows=unfold[UCASE_UNFOLD_ROWS];
+ unfoldRowWidth=unfold[UCASE_UNFOLD_ROW_WIDTH];
+ unfoldStringWidth=unfold[UCASE_UNFOLD_STRING_WIDTH];
+ unfold+=unfoldRowWidth;
+
+ if(length>unfoldStringWidth) {
+ /* the string is too long to find any match */
+ return FALSE;
+ }
+
+ /* do a binary search for the string */
+ start=0;
+ limit=unfoldRows;
+ while(start<limit) {
+ i=(start+limit)/2;
+ p=unfold+(i*unfoldRowWidth);
+ result=strcmpMax(s, length, p, unfoldStringWidth);
+
+ if(result==0) {
+ /* found the string: add each code point, and its case closure */
+ UChar32 c;
+
+ for(i=unfoldStringWidth; i<unfoldRowWidth && p[i]!=0;) {
+ U16_NEXT_UNSAFE(p, i, c);
+ sa->add(sa->set, c);
+ ucase_addCaseClosure(csp, c, sa);
+ }
+ return TRUE;
+ } else if(result<0) {
+ limit=i;
+ } else /* result>0 */ {
+ start=i+1;
+ }
+ }
+
+ return FALSE; /* string not found */
+}
+