icuSources/common/ucase.cpp

   1 // © 2016 and later: Unicode, Inc. and others.
   2 // License & terms of use: http://www.unicode.org/copyright.html
   3 /*
   4 *******************************************************************************
   5 *
   6 *   Copyright (C) 2004-2014, International Business Machines
   7 *   Corporation and others.  All Rights Reserved.
   8 *
   9 *******************************************************************************
  10 *   file name:  ucase.cpp
  11 *   encoding:   UTF-8
  12 *   tab size:   8 (not used)
  13 *   indentation:4
  14 *
  15 *   created on: 2004aug30
  16 *   created by: Markus W. Scherer
  17 *
  18 *   Low-level Unicode character/string case mapping code.
  19 *   Much code moved here (and modified) from uchar.c.
  20 */
  21
  22 #include "unicode/utypes.h"
  23 #include "unicode/unistr.h"
  24 #include "unicode/uset.h"
  25 #include "unicode/udata.h" /* UDataInfo */
  26 #include "unicode/utf16.h"
  27 #include "ucmndata.h" /* DataHeader */
  28 #include "udatamem.h"
  29 #include "umutex.h"
  30 #include "uassert.h"
  31 #include "cmemory.h"
  32 #include "utrie2.h"
  33 #include "ucase.h"
  34
  35 struct UCaseProps {
  36     UDataMemory *mem;
  37     const int32_t *indexes;
  38     const uint16_t *exceptions;
  39     const uint16_t *unfold;
  40
  41     UTrie2 trie;
  42     uint8_t formatVersion[4];
  43 };
  44
  45 /* ucase_props_data.h is machine-generated by gencase --csource */
  46 #define INCLUDED_FROM_UCASE_CPP
  47 #include "ucase_props_data.h"
  48
  49 /* set of property starts for UnicodeSet ------------------------------------ */
  50
  51 static UBool U_CALLCONV
  52 _enumPropertyStartsRange(const void *context, UChar32 start, UChar32 /*end*/, uint32_t /*value*/) {
  53     /* add the start code point to the USet */
  54     const USetAdder *sa=(const USetAdder *)context;
  55     sa->add(sa->set, start);
  56     return TRUE;
  57 }
  58
  59 U_CFUNC void U_EXPORT2
  60 ucase_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
  61     if(U_FAILURE(*pErrorCode)) {
  62         return;
  63     }
  64
  65     /* add the start code point of each same-value range of the trie */
  66     utrie2_enum(&ucase_props_singleton.trie, NULL, _enumPropertyStartsRange, sa);
  67
  68     /* add code points with hardcoded properties, plus the ones following them */
  69
  70     /* (none right now, see comment below) */
  71
  72     /*
  73      * Omit code points with hardcoded specialcasing properties
  74      * because we do not build property UnicodeSets for them right now.
  75      */
  76 }
  77
  78 /* data access primitives --------------------------------------------------- */
  79
  80 U_CFUNC const UTrie2 * U_EXPORT2
  81 ucase_getTrie() {
  82     return &ucase_props_singleton.trie;
  83 }
  84
  85 #define GET_EXCEPTIONS(csp, props) ((csp)->exceptions+((props)>>UCASE_EXC_SHIFT))
  86
  87 /* number of bits in an 8-bit integer value */
  88 static const uint8_t flagsOffset[256]={
  89     0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
  90     1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
  91     1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
  92     2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
  93     1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
  94     2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
  95     2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
  96     3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
  97     1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
  98     2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
  99     2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
 100     3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
 101     2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
 102     3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
 103     3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
 104     4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
 105 };
 106
 107 #define HAS_SLOT(flags, idx) ((flags)&(1<<(idx)))
 108 #define SLOT_OFFSET(flags, idx) flagsOffset[(flags)&((1<<(idx))-1)]
 109
 110 /*
 111  * Get the value of an optional-value slot where HAS_SLOT(excWord, idx).
 112  *
 113  * @param excWord (in) initial exceptions word
 114  * @param idx (in) desired slot index
 115  * @param pExc16 (in/out) const uint16_t * after excWord=*pExc16++;
 116  *               moved to the last uint16_t of the value, use +1 for beginning of next slot
 117  * @param value (out) int32_t or uint32_t output if hasSlot, otherwise not modified
 118  */
 119 #define GET_SLOT_VALUE(excWord, idx, pExc16, value) \
 120     if(((excWord)&UCASE_EXC_DOUBLE_SLOTS)==0) { \
 121         (pExc16)+=SLOT_OFFSET(excWord, idx); \
 122         (value)=*pExc16; \
 123     } else { \
 124         (pExc16)+=2*SLOT_OFFSET(excWord, idx); \
 125         (value)=*pExc16++; \
 126         (value)=((value)<<16)|*pExc16; \
 127     }
 128
 129 /* simple case mappings ----------------------------------------------------- */
 130
 131 U_CAPI UChar32 U_EXPORT2
 132 ucase_tolower(UChar32 c) {
 133     uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
 134     if(!UCASE_HAS_EXCEPTION(props)) {
 135         if(UCASE_IS_UPPER_OR_TITLE(props)) {
 136             c+=UCASE_GET_DELTA(props);
 137         }
 138     } else {
 139         const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
 140         uint16_t excWord=*pe++;
 141         if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_IS_UPPER_OR_TITLE(props)) {
 142             int32_t delta;
 143             GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta);
 144             return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
 145         }
 146         if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) {
 147             GET_SLOT_VALUE(excWord, UCASE_EXC_LOWER, pe, c);
 148         }
 149     }
 150     return c;
 151 }
 152
 153 U_CAPI UChar32 U_EXPORT2
 154 ucase_toupper(UChar32 c) {
 155     uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
 156     if(!UCASE_HAS_EXCEPTION(props)) {
 157         if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
 158             c+=UCASE_GET_DELTA(props);
 159         }
 160     } else {
 161         const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
 162         uint16_t excWord=*pe++;
 163         if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_GET_TYPE(props)==UCASE_LOWER) {
 164             int32_t delta;
 165             GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta);
 166             return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
 167         }
 168         if(HAS_SLOT(excWord, UCASE_EXC_UPPER)) {
 169             GET_SLOT_VALUE(excWord, UCASE_EXC_UPPER, pe, c);
 170         }
 171     }
 172     return c;
 173 }
 174
 175 U_CAPI UChar32 U_EXPORT2
 176 ucase_totitle(UChar32 c) {
 177     uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
 178     if(!UCASE_HAS_EXCEPTION(props)) {
 179         if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
 180             c+=UCASE_GET_DELTA(props);
 181         }
 182     } else {
 183         const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
 184         uint16_t excWord=*pe++;
 185         if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_GET_TYPE(props)==UCASE_LOWER) {
 186             int32_t delta;
 187             GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta);
 188             return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
 189         }
 190         int32_t idx;
 191         if(HAS_SLOT(excWord, UCASE_EXC_TITLE)) {
 192             idx=UCASE_EXC_TITLE;
 193         } else if(HAS_SLOT(excWord, UCASE_EXC_UPPER)) {
 194             idx=UCASE_EXC_UPPER;
 195         } else {
 196             return c;
 197         }
 198         GET_SLOT_VALUE(excWord, idx, pe, c);
 199     }
 200     return c;
 201 }
 202
 203 static const UChar iDot[2] = { 0x69, 0x307 };
 204 static const UChar jDot[2] = { 0x6a, 0x307 };
 205 static const UChar iOgonekDot[3] = { 0x12f, 0x307 };
 206 static const UChar iDotGrave[3] = { 0x69, 0x307, 0x300 };
 207 static const UChar iDotAcute[3] = { 0x69, 0x307, 0x301 };
 208 static const UChar iDotTilde[3] = { 0x69, 0x307, 0x303 };
 209
 210
 211 U_CFUNC void U_EXPORT2
 212 ucase_addCaseClosure(UChar32 c, const USetAdder *sa) {
 213     uint16_t props;
 214
 215     /*
 216      * Hardcode the case closure of i and its relatives and ignore the
 217      * data file data for these characters.
 218      * The Turkic dotless i and dotted I with their case mapping conditions
 219      * and case folding option make the related characters behave specially.
 220      * This code matches their closure behavior to their case folding behavior.
 221      */
 222
 223     switch(c) {
 224     case 0x49:
 225         /* regular i and I are in one equivalence class */
 226         sa->add(sa->set, 0x69);
 227         return;
 228     case 0x69:
 229         sa->add(sa->set, 0x49);
 230         return;
 231     case 0x130:
 232         /* dotted I is in a class with <0069 0307> (for canonical equivalence with <0049 0307>) */
 233         sa->addString(sa->set, iDot, 2);
 234         return;
 235     case 0x131:
 236         /* dotless i is in a class by itself */
 237         return;
 238     default:
 239         /* otherwise use the data file data */
 240         break;
 241     }
 242
 243     props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
 244     if(!UCASE_HAS_EXCEPTION(props)) {
 245         if(UCASE_GET_TYPE(props)!=UCASE_NONE) {
 246             /* add the one simple case mapping, no matter what type it is */
 247             int32_t delta=UCASE_GET_DELTA(props);
 248             if(delta!=0) {
 249                 sa->add(sa->set, c+delta);
 250             }
 251         }
 252     } else {
 253         /*
 254          * c has exceptions, so there may be multiple simple and/or
 255          * full case mappings. Add them all.
 256          */
 257         const uint16_t *pe0, *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
 258         const UChar *closure;
 259         uint16_t excWord=*pe++;
 260         int32_t idx, closureLength, fullLength, length;
 261
 262         pe0=pe;
 263
 264         /* add all simple case mappings */
 265         for(idx=UCASE_EXC_LOWER; idx<=UCASE_EXC_TITLE; ++idx) {
 266             if(HAS_SLOT(excWord, idx)) {
 267                 pe=pe0;
 268                 GET_SLOT_VALUE(excWord, idx, pe, c);
 269                 sa->add(sa->set, c);
 270             }
 271         }
 272         if(HAS_SLOT(excWord, UCASE_EXC_DELTA)) {
 273             pe=pe0;
 274             int32_t delta;
 275             GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta);
 276             sa->add(sa->set, (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta);
 277         }
 278
 279         /* get the closure string pointer & length */
 280         if(HAS_SLOT(excWord, UCASE_EXC_CLOSURE)) {
 281             pe=pe0;
 282             GET_SLOT_VALUE(excWord, UCASE_EXC_CLOSURE, pe, closureLength);
 283             closureLength&=UCASE_CLOSURE_MAX_LENGTH; /* higher bits are reserved */
 284             closure=(const UChar *)pe+1; /* behind this slot, unless there are full case mappings */
 285         } else {
 286             closureLength=0;
 287             closure=NULL;
 288         }
 289
 290         /* add the full case folding */
 291         if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
 292             pe=pe0;
 293             GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, fullLength);
 294
 295             /* start of full case mapping strings */
 296             ++pe;
 297
 298             fullLength&=0xffff; /* bits 16 and higher are reserved */
 299
 300             /* skip the lowercase result string */
 301             pe+=fullLength&UCASE_FULL_LOWER;
 302             fullLength>>=4;
 303
 304             /* add the full case folding string */
 305             length=fullLength&0xf;
 306             if(length!=0) {
 307                 sa->addString(sa->set, (const UChar *)pe, length);
 308                 pe+=length;
 309             }
 310
 311             /* skip the uppercase and titlecase strings */
 312             fullLength>>=4;
 313             pe+=fullLength&0xf;
 314             fullLength>>=4;
 315             pe+=fullLength;
 316
 317             closure=(const UChar *)pe; /* behind full case mappings */
 318         }
 319
 320         /* add each code point in the closure string */
 321         for(idx=0; idx<closureLength;) {
 322             U16_NEXT_UNSAFE(closure, idx, c);
 323             sa->add(sa->set, c);
 324         }
 325     }
 326 }
 327
 328 /*
 329  * compare s, which has a length, with t, which has a maximum length or is NUL-terminated
 330  * must be length>0 and max>0 and length<=max
 331  */
 332 static inline int32_t
 333 strcmpMax(const UChar *s, int32_t length, const UChar *t, int32_t max) {
 334     int32_t c1, c2;
 335
 336     max-=length; /* we require length<=max, so no need to decrement max in the loop */
 337     do {
 338         c1=*s++;
 339         c2=*t++;
 340         if(c2==0) {
 341             return 1; /* reached the end of t but not of s */
 342         }
 343         c1-=c2;
 344         if(c1!=0) {
 345             return c1; /* return difference result */
 346         }
 347     } while(--length>0);
 348     /* ends with length==0 */
 349
 350     if(max==0 || *t==0) {
 351         return 0; /* equal to length of both strings */
 352     } else {
 353         return -max; /* return lengh difference */
 354     }
 355 }
 356
 357 U_CFUNC UBool U_EXPORT2
 358 ucase_addStringCaseClosure(const UChar *s, int32_t length, const USetAdder *sa) {
 359     int32_t i, start, limit, result, unfoldRows, unfoldRowWidth, unfoldStringWidth;
 360
 361     if(ucase_props_singleton.unfold==NULL || s==NULL) {
 362         return FALSE; /* no reverse case folding data, or no string */
 363     }
 364     if(length<=1) {
 365         /* the string is too short to find any match */
 366         /*
 367          * more precise would be:
 368          * if(!u_strHasMoreChar32Than(s, length, 1))
 369          * but this does not make much practical difference because
 370          * a single supplementary code point would just not be found
 371          */
 372         return FALSE;
 373     }
 374
 375     const uint16_t *unfold=ucase_props_singleton.unfold;
 376     unfoldRows=unfold[UCASE_UNFOLD_ROWS];
 377     unfoldRowWidth=unfold[UCASE_UNFOLD_ROW_WIDTH];
 378     unfoldStringWidth=unfold[UCASE_UNFOLD_STRING_WIDTH];
 379     unfold+=unfoldRowWidth;
 380
 381     if(length>unfoldStringWidth) {
 382         /* the string is too long to find any match */
 383         return FALSE;
 384     }
 385
 386     /* do a binary search for the string */
 387     start=0;
 388     limit=unfoldRows;
 389     while(start<limit) {
 390         i=(start+limit)/2;
 391         const UChar *p=reinterpret_cast<const UChar *>(unfold+(i*unfoldRowWidth));
 392         result=strcmpMax(s, length, p, unfoldStringWidth);
 393
 394         if(result==0) {
 395             /* found the string: add each code point, and its case closure */
 396             UChar32 c;
 397
 398             for(i=unfoldStringWidth; i<unfoldRowWidth && p[i]!=0;) {
 399                 U16_NEXT_UNSAFE(p, i, c);
 400                 sa->add(sa->set, c);
 401                 ucase_addCaseClosure(c, sa);
 402             }
 403             return TRUE;
 404         } else if(result<0) {
 405             limit=i;
 406         } else /* result>0 */ {
 407             start=i+1;
 408         }
 409     }
 410
 411     return FALSE; /* string not found */
 412 }
 413
 414 U_NAMESPACE_BEGIN
 415
 416 FullCaseFoldingIterator::FullCaseFoldingIterator()
 417         : unfold(reinterpret_cast<const UChar *>(ucase_props_singleton.unfold)),
 418           unfoldRows(unfold[UCASE_UNFOLD_ROWS]),
 419           unfoldRowWidth(unfold[UCASE_UNFOLD_ROW_WIDTH]),
 420           unfoldStringWidth(unfold[UCASE_UNFOLD_STRING_WIDTH]),
 421           currentRow(0),
 422           rowCpIndex(unfoldStringWidth) {
 423     unfold+=unfoldRowWidth;
 424 }
 425
 426 UChar32
 427 FullCaseFoldingIterator::next(UnicodeString &full) {
 428     // Advance past the last-delivered code point.
 429     const UChar *p=unfold+(currentRow*unfoldRowWidth);
 430     if(rowCpIndex>=unfoldRowWidth || p[rowCpIndex]==0) {
 431         ++currentRow;
 432         p+=unfoldRowWidth;
 433         rowCpIndex=unfoldStringWidth;
 434     }
 435     if(currentRow>=unfoldRows) { return U_SENTINEL; }
 436     // Set "full" to the NUL-terminated string in the first unfold column.
 437     int32_t length=unfoldStringWidth;
 438     while(length>0 && p[length-1]==0) { --length; }
 439     full.setTo(FALSE, p, length);
 440     // Return the code point.
 441     UChar32 c;
 442     U16_NEXT_UNSAFE(p, rowCpIndex, c);
 443     return c;
 444 }
 445
 446 namespace LatinCase {
 447
 448 const int8_t TO_LOWER_NORMAL[LIMIT] = {
 449     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 450     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 451     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 452     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 453
 454     0, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
 455     32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 0, 0, 0, 0,
 456     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 457     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 458
 459     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 460     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 461     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 462     0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 463
 464     32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
 465     32, 32, 32, 32, 32, 32, 32, 0, 32, 32, 32, 32, 32, 32, 32, EXC,
 466     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 467     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 468
 469     1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
 470     1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
 471     1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
 472     EXC, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1,
 473
 474     0, 1, 0, 1, 0, 1, 0, 1, 0, EXC, 1, 0, 1, 0, 1, 0,
 475     1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
 476     1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
 477     1, 0, 1, 0, 1, 0, 1, 0, -121, 1, 0, 1, 0, 1, 0, EXC
 478 };
 479
 480 const int8_t TO_LOWER_TR_LT[LIMIT] = {
 481     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 482     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 483     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 484     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 485
 486     0, 32, 32, 32, 32, 32, 32, 32, 32, EXC, EXC, 32, 32, 32, 32, 32,
 487     32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 0, 0, 0, 0,
 488     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 489     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 490
 491     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 492     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 493     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 494     0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 495
 496     32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, EXC, EXC, 32, 32,
 497     32, 32, 32, 32, 32, 32, 32, 0, 32, 32, 32, 32, 32, 32, 32, EXC,
 498     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 499     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 500
 501     1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
 502     1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
 503     1, 0, 1, 0, 1, 0, 1, 0, EXC, 0, 1, 0, 1, 0, EXC, 0,
 504     EXC, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1,
 505
 506     0, 1, 0, 1, 0, 1, 0, 1, 0, EXC, 1, 0, 1, 0, 1, 0,
 507     1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
 508     1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
 509     1, 0, 1, 0, 1, 0, 1, 0, -121, 1, 0, 1, 0, 1, 0, EXC
 510 };
 511
 512 const int8_t TO_UPPER_NORMAL[LIMIT] = {
 513     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 514     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 515     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 516     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 517
 518     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 519     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 520     0, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
 521     -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, 0, 0, 0, 0, 0,
 522
 523     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 524     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 525     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 526     0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 527
 528     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 529     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, EXC,
 530     -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
 531     -32, -32, -32, -32, -32, -32, -32, 0, -32, -32, -32, -32, -32, -32, -32, 121,
 532
 533     0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
 534     0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
 535     0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
 536     0, EXC, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0,
 537
 538     -1, 0, -1, 0, -1, 0, -1, 0, -1, EXC, 0, -1, 0, -1, 0, -1,
 539     0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
 540     0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
 541     0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, EXC
 542 };
 543
 544 const int8_t TO_UPPER_TR[LIMIT] = {
 545     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 546     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 547     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 548     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 549
 550     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 551     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 552     0, -32, -32, -32, -32, -32, -32, -32, -32, EXC, -32, -32, -32, -32, -32, -32,
 553     -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, 0, 0, 0, 0, 0,
 554
 555     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 556     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 557     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 558     0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 559
 560     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 561     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, EXC,
 562     -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
 563     -32, -32, -32, -32, -32, -32, -32, 0, -32, -32, -32, -32, -32, -32, -32, 121,
 564
 565     0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
 566     0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
 567     0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
 568     0, EXC, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0,
 569
 570     -1, 0, -1, 0, -1, 0, -1, 0, -1, EXC, 0, -1, 0, -1, 0, -1,
 571     0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
 572     0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,
 573     0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, EXC
 574 };
 575
 576 }  // namespace LatinCase
 577
 578 U_NAMESPACE_END
 579
 580 /** @return UCASE_NONE, UCASE_LOWER, UCASE_UPPER, UCASE_TITLE */
 581 U_CAPI int32_t U_EXPORT2
 582 ucase_getType(UChar32 c) {
 583     uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
 584     return UCASE_GET_TYPE(props);
 585 }
 586
 587 /** @return same as ucase_getType() and set bit 2 if c is case-ignorable */
 588 U_CAPI int32_t U_EXPORT2
 589 ucase_getTypeOrIgnorable(UChar32 c) {
 590     uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
 591     return UCASE_GET_TYPE_AND_IGNORABLE(props);
 592 }
 593
 594 /** @return UCASE_NO_DOT, UCASE_SOFT_DOTTED, UCASE_ABOVE, UCASE_OTHER_ACCENT */
 595 static inline int32_t
 596 getDotType(UChar32 c) {
 597     uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
 598     if(!UCASE_HAS_EXCEPTION(props)) {
 599         return props&UCASE_DOT_MASK;
 600     } else {
 601         const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
 602         return (*pe>>UCASE_EXC_DOT_SHIFT)&UCASE_DOT_MASK;
 603     }
 604 }
 605
 606 U_CAPI UBool U_EXPORT2
 607 ucase_isSoftDotted(UChar32 c) {
 608     return (UBool)(getDotType(c)==UCASE_SOFT_DOTTED);
 609 }
 610
 611 U_CAPI UBool U_EXPORT2
 612 ucase_isCaseSensitive(UChar32 c) {
 613     uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
 614     if(!UCASE_HAS_EXCEPTION(props)) {
 615         return (UBool)((props&UCASE_SENSITIVE)!=0);
 616     } else {
 617         const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
 618         return (UBool)((*pe&UCASE_EXC_SENSITIVE)!=0);
 619     }
 620 }
 621
 622 /* string casing ------------------------------------------------------------ */
 623
 624 /*
 625  * These internal functions form the core of string case mappings.
 626  * They map single code points to result code points or strings and take
 627  * all necessary conditions (context, locale ID, options) into account.
 628  *
 629  * They do not iterate over the source or write to the destination
 630  * so that the same functions are useful for non-standard string storage,
 631  * such as in a Replaceable (for Transliterator) or UTF-8/32 strings etc.
 632  * For the same reason, the "surrounding text" context is passed in as a
 633  * UCaseContextIterator which does not make any assumptions about
 634  * the underlying storage.
 635  *
 636  * This section contains helper functions that check for conditions
 637  * in the input text surrounding the current code point
 638  * according to SpecialCasing.txt.
 639  *
 640  * Each helper function gets the index
 641  * - after the current code point if it looks at following text
 642  * - before the current code point if it looks at preceding text
 643  *
 644  * Unicode 3.2 UAX 21 "Case Mappings" defines the conditions as follows:
 645  *
 646  * Final_Sigma
 647  *   C is preceded by a sequence consisting of
 648  *     a cased letter and a case-ignorable sequence,
 649  *   and C is not followed by a sequence consisting of
 650  *     an ignorable sequence and then a cased letter.
 651  *
 652  * More_Above
 653  *   C is followed by one or more characters of combining class 230 (ABOVE)
 654  *   in the combining character sequence.
 655  *
 656  * After_Soft_Dotted
 657  *   The last preceding character with combining class of zero before C
 658  *   was Soft_Dotted,
 659  *   and there is no intervening combining character class 230 (ABOVE).
 660  *
 661  * Before_Dot
 662  *   C is followed by combining dot above (U+0307).
 663  *   Any sequence of characters with a combining class that is neither 0 nor 230
 664  *   may intervene between the current character and the combining dot above.
 665  *
 666  * The erratum from 2002-10-31 adds the condition
 667  *
 668  * After_I
 669  *   The last preceding base character was an uppercase I, and there is no
 670  *   intervening combining character class 230 (ABOVE).
 671  *
 672  *   (See Jitterbug 2344 and the comments on After_I below.)
 673  *
 674  * Helper definitions in Unicode 3.2 UAX 21:
 675  *
 676  * D1. A character C is defined to be cased
 677  *     if it meets any of the following criteria:
 678  *
 679  *   - The general category of C is Titlecase Letter (Lt)
 680  *   - In [CoreProps], C has one of the properties Uppercase, or Lowercase
 681  *   - Given D = NFD(C), then it is not the case that:
 682  *     D = UCD_lower(D) = UCD_upper(D) = UCD_title(D)
 683  *     (This third criterium does not add any characters to the list
 684  *      for Unicode 3.2. Ignored.)
 685  *
 686  * D2. A character C is defined to be case-ignorable
 687  *     if it meets either of the following criteria:
 688  *
 689  *   - The general category of C is
 690  *     Nonspacing Mark (Mn), or Enclosing Mark (Me), or Format Control (Cf), or
 691  *     Letter Modifier (Lm), or Symbol Modifier (Sk)
 692  *   - C is one of the following characters
 693  *     U+0027 APOSTROPHE
 694  *     U+00AD SOFT HYPHEN (SHY)
 695  *     U+2019 RIGHT SINGLE QUOTATION MARK
 696  *            (the preferred character for apostrophe)
 697  *
 698  * D3. A case-ignorable sequence is a sequence of
 699  *     zero or more case-ignorable characters.
 700  */
 701
 702 #define is_d(c) ((c)=='d' || (c)=='D')
 703 #define is_e(c) ((c)=='e' || (c)=='E')
 704 #define is_i(c) ((c)=='i' || (c)=='I')
 705 #define is_l(c) ((c)=='l' || (c)=='L')
 706 #define is_r(c) ((c)=='r' || (c)=='R')
 707 #define is_t(c) ((c)=='t' || (c)=='T')
 708 #define is_u(c) ((c)=='u' || (c)=='U')
 709 #define is_z(c) ((c)=='z' || (c)=='Z')
 710
 711 /* separator? */
 712 #define is_sep(c) ((c)=='_' || (c)=='-' || (c)==0)
 713
 714 /**
 715  * Requires non-NULL locale ID but otherwise does the equivalent of
 716  * checking for language codes as if uloc_getLanguage() were called:
 717  * Accepts both 2- and 3-letter codes and accepts case variants.
 718  */
 719 U_CFUNC int32_t
 720 ucase_getCaseLocale(const char *locale) {
 721     /*
 722      * This function used to use uloc_getLanguage(), but the current code
 723      * removes the dependency of this low-level code on uloc implementation code
 724      * and is faster because not the whole locale ID has to be
 725      * examined and copied/transformed.
 726      *
 727      * Because this code does not want to depend on uloc, the caller must
 728      * pass in a non-NULL locale, i.e., may need to call uloc_getDefault().
 729      */
 730     char c=*locale++;
 731     // Fastpath for English "en" which is often used for default (=root locale) case mappings,
 732     // and for Chinese "zh": Very common but no special case mapping behavior.
 733     // Then check lowercase vs. uppercase to reduce the number of comparisons
 734     // for other locales without special behavior.
 735     if(c=='e') {
 736         /* el or ell? */
 737         c=*locale++;
 738         if(is_l(c)) {
 739             c=*locale++;
 740             if(is_l(c)) {
 741                 c=*locale;
 742             }
 743             if(is_sep(c)) {
 744                 return UCASE_LOC_GREEK;
 745             }
 746         }
 747         // en, es, ... -> root
 748     } else if(c=='z') {
 749         return UCASE_LOC_ROOT;
 750 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
 751     } else if(c>='a') {  // ASCII a-z = 0x61..0x7a, after A-Z
 752 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
 753     } else if(c<='z') {  // EBCDIC a-z = 0x81..0xa9 with two gaps, before A-Z
 754 #else
 755 #   error Unknown charset family!
 756 #endif
 757         // lowercase c
 758         if(c=='t') {
 759             /* tr or tur? */
 760             c=*locale++;
 761             if(is_u(c)) {
 762                 c=*locale++;
 763             }
 764             if(is_r(c)) {
 765                 c=*locale;
 766                 if(is_sep(c)) {
 767                     return UCASE_LOC_TURKISH;
 768                 }
 769             }
 770         } else if(c=='a') {
 771             /* az or aze? */
 772             c=*locale++;
 773             if(is_z(c)) {
 774                 c=*locale++;
 775                 if(is_e(c)) {
 776                     c=*locale;
 777                 }
 778                 if(is_sep(c)) {
 779                     return UCASE_LOC_TURKISH;
 780                 }
 781             }
 782         } else if(c=='l') {
 783             /* lt or lit? */
 784             c=*locale++;
 785             if(is_i(c)) {
 786                 c=*locale++;
 787             }
 788             if(is_t(c)) {
 789                 c=*locale;
 790                 if(is_sep(c)) {
 791                     return UCASE_LOC_LITHUANIAN;
 792                 }
 793             }
 794         } else if(c=='n') {
 795             /* nl or nld? */
 796             c=*locale++;
 797             if(is_l(c)) {
 798                 c=*locale++;
 799                 if(is_d(c)) {
 800                     c=*locale;
 801                 }
 802                 if(is_sep(c)) {
 803                     return UCASE_LOC_DUTCH;
 804                 }
 805             }
 806         }
 807     } else {
 808         // uppercase c
 809         // Same code as for lowercase c but also check for 'E'.
 810         if(c=='T') {
 811             /* tr or tur? */
 812             c=*locale++;
 813             if(is_u(c)) {
 814                 c=*locale++;
 815             }
 816             if(is_r(c)) {
 817                 c=*locale;
 818                 if(is_sep(c)) {
 819                     return UCASE_LOC_TURKISH;
 820                 }
 821             }
 822         } else if(c=='A') {
 823             /* az or aze? */
 824             c=*locale++;
 825             if(is_z(c)) {
 826                 c=*locale++;
 827                 if(is_e(c)) {
 828                     c=*locale;
 829                 }
 830                 if(is_sep(c)) {
 831                     return UCASE_LOC_TURKISH;
 832                 }
 833             }
 834         } else if(c=='L') {
 835             /* lt or lit? */
 836             c=*locale++;
 837             if(is_i(c)) {
 838                 c=*locale++;
 839             }
 840             if(is_t(c)) {
 841                 c=*locale;
 842                 if(is_sep(c)) {
 843                     return UCASE_LOC_LITHUANIAN;
 844                 }
 845             }
 846         } else if(c=='E') {
 847             /* el or ell? */
 848             c=*locale++;
 849             if(is_l(c)) {
 850                 c=*locale++;
 851                 if(is_l(c)) {
 852                     c=*locale;
 853                 }
 854                 if(is_sep(c)) {
 855                     return UCASE_LOC_GREEK;
 856                 }
 857             }
 858         } else if(c=='N') {
 859             /* nl or nld? */
 860             c=*locale++;
 861             if(is_l(c)) {
 862                 c=*locale++;
 863                 if(is_d(c)) {
 864                     c=*locale;
 865                 }
 866                 if(is_sep(c)) {
 867                     return UCASE_LOC_DUTCH;
 868                 }
 869             }
 870         }
 871     }
 872     return UCASE_LOC_ROOT;
 873 }
 874
 875 /*
 876  * Is followed by
 877  *   {case-ignorable}* cased
 878  * ?
 879  * (dir determines looking forward/backward)
 880  * If a character is case-ignorable, it is skipped regardless of whether
 881  * it is also cased or not.
 882  */
 883 static UBool
 884 isFollowedByCasedLetter(UCaseContextIterator *iter, void *context, int8_t dir) {
 885     UChar32 c;
 886
 887     if(iter==NULL) {
 888         return FALSE;
 889     }
 890
 891     for(/* dir!=0 sets direction */; (c=iter(context, dir))>=0; dir=0) {
 892         int32_t type=ucase_getTypeOrIgnorable(c);
 893         if(type&4) {
 894             /* case-ignorable, continue with the loop */
 895         } else if(type!=UCASE_NONE) {
 896             return TRUE; /* followed by cased letter */
 897         } else {
 898             return FALSE; /* uncased and not case-ignorable */
 899         }
 900     }
 901
 902     return FALSE; /* not followed by cased letter */
 903 }
 904
 905 /* Is preceded by Soft_Dotted character with no intervening cc=230 ? */
 906 static UBool
 907 isPrecededBySoftDotted(UCaseContextIterator *iter, void *context) {
 908     UChar32 c;
 909     int32_t dotType;
 910     int8_t dir;
 911
 912     if(iter==NULL) {
 913         return FALSE;
 914     }
 915
 916     for(dir=-1; (c=iter(context, dir))>=0; dir=0) {
 917         dotType=getDotType(c);
 918         if(dotType==UCASE_SOFT_DOTTED) {
 919             return TRUE; /* preceded by TYPE_i */
 920         } else if(dotType!=UCASE_OTHER_ACCENT) {
 921             return FALSE; /* preceded by different base character (not TYPE_i), or intervening cc==230 */
 922         }
 923     }
 924
 925     return FALSE; /* not preceded by TYPE_i */
 926 }
 927
 928 /*
 929  * See Jitterbug 2344:
 930  * The condition After_I for Turkic-lowercasing of U+0307 combining dot above
 931  * is checked in ICU 2.0, 2.1, 2.6 but was not in 2.2 & 2.4 because
 932  * we made those releases compatible with Unicode 3.2 which had not fixed
 933  * a related bug in SpecialCasing.txt.
 934  *
 935  * From the Jitterbug 2344 text:
 936  * ... this bug is listed as a Unicode erratum
 937  * from 2002-10-31 at http://www.unicode.org/uni2errata/UnicodeErrata.html
 938  * <quote>
 939  * There are two errors in SpecialCasing.txt.
 940  * 1. Missing semicolons on two lines. ... [irrelevant for ICU]
 941  * 2. An incorrect context definition. Correct as follows:
 942  * < 0307; ; 0307; 0307; tr After_Soft_Dotted; # COMBINING DOT ABOVE
 943  * < 0307; ; 0307; 0307; az After_Soft_Dotted; # COMBINING DOT ABOVE
 944  * ---
 945  * > 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
 946  * > 0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
 947  * where the context After_I is defined as:
 948  * The last preceding base character was an uppercase I, and there is no
 949  * intervening combining character class 230 (ABOVE).
 950  * </quote>
 951  *
 952  * Note that SpecialCasing.txt even in Unicode 3.2 described the condition as:
 953  *
 954  * # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
 955  * # This matches the behavior of the canonically equivalent I-dot_above
 956  *
 957  * See also the description in this place in older versions of uchar.c (revision 1.100).
 958  *
 959  * Markus W. Scherer 2003-feb-15
 960  */
 961
 962 /* Is preceded by base character 'I' with no intervening cc=230 ? */
 963 static UBool
 964 isPrecededBy_I(UCaseContextIterator *iter, void *context) {
 965     UChar32 c;
 966     int32_t dotType;
 967     int8_t dir;
 968
 969     if(iter==NULL) {
 970         return FALSE;
 971     }
 972
 973     for(dir=-1; (c=iter(context, dir))>=0; dir=0) {
 974         if(c==0x49) {
 975             return TRUE; /* preceded by I */
 976         }
 977         dotType=getDotType(c);
 978         if(dotType!=UCASE_OTHER_ACCENT) {
 979             return FALSE; /* preceded by different base character (not I), or intervening cc==230 */
 980         }
 981     }
 982
 983     return FALSE; /* not preceded by I */
 984 }
 985
 986 /* Is followed by one or more cc==230 ? */
 987 static UBool
 988 isFollowedByMoreAbove(UCaseContextIterator *iter, void *context) {
 989     UChar32 c;
 990     int32_t dotType;
 991     int8_t dir;
 992
 993     if(iter==NULL) {
 994         return FALSE;
 995     }
 996
 997     for(dir=1; (c=iter(context, dir))>=0; dir=0) {
 998         dotType=getDotType(c);
 999         if(dotType==UCASE_ABOVE) {
1000             return TRUE; /* at least one cc==230 following */
1001         } else if(dotType!=UCASE_OTHER_ACCENT) {
1002             return FALSE; /* next base character, no more cc==230 following */
1003         }
1004     }
1005
1006     return FALSE; /* no more cc==230 following */
1007 }
1008
1009 /* Is followed by a dot above (without cc==230 in between) ? */
1010 static UBool
1011 isFollowedByDotAbove(UCaseContextIterator *iter, void *context) {
1012     UChar32 c;
1013     int32_t dotType;
1014     int8_t dir;
1015
1016     if(iter==NULL) {
1017         return FALSE;
1018     }
1019
1020     for(dir=1; (c=iter(context, dir))>=0; dir=0) {
1021         if(c==0x307) {
1022             return TRUE;
1023         }
1024         dotType=getDotType(c);
1025         if(dotType!=UCASE_OTHER_ACCENT) {
1026             return FALSE; /* next base character or cc==230 in between */
1027         }
1028     }
1029
1030     return FALSE; /* no dot above following */
1031 }
1032
1033 U_CAPI int32_t U_EXPORT2
1034 ucase_toFullLower(UChar32 c,
1035                   UCaseContextIterator *iter, void *context,
1036                   const UChar **pString,
1037                   int32_t loc) {
1038     // The sign of the result has meaning, input must be non-negative so that it can be returned as is.
1039     U_ASSERT(c >= 0);
1040     UChar32 result=c;
1041     uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
1042     if(!UCASE_HAS_EXCEPTION(props)) {
1043         if(UCASE_IS_UPPER_OR_TITLE(props)) {
1044             result=c+UCASE_GET_DELTA(props);
1045         }
1046     } else {
1047         const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props), *pe2;
1048         uint16_t excWord=*pe++;
1049         int32_t full;
1050
1051         pe2=pe;
1052
1053         if(excWord&UCASE_EXC_CONDITIONAL_SPECIAL) {
1054             /* use hardcoded conditions and mappings */
1055
1056             /*
1057              * Test for conditional mappings first
1058              *   (otherwise the unconditional default mappings are always taken),
1059              * then test for characters that have unconditional mappings in SpecialCasing.txt,
1060              * then get the UnicodeData.txt mappings.
1061              */
1062             if( loc==UCASE_LOC_LITHUANIAN &&
1063                     /* base characters, find accents above */
1064                     (((c==0x49 || c==0x4a || c==0x12e) &&
1065                         isFollowedByMoreAbove(iter, context)) ||
1066                     /* precomposed with accent above, no need to find one */
1067                     (c==0xcc || c==0xcd || c==0x128))
1068             ) {
1069                 /*
1070                     # Lithuanian
1071
1072                     # Lithuanian retains the dot in a lowercase i when followed by accents.
1073
1074                     # Introduce an explicit dot above when lowercasing capital I's and J's
1075                     # whenever there are more accents above.
1076                     # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
1077
1078                     0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
1079                     004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
1080                     012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
1081                     00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
1082                     00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
1083                     0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
1084                  */
1085                 switch(c) {
1086                 case 0x49:  /* LATIN CAPITAL LETTER I */
1087                     *pString=iDot;
1088                     return 2;
1089                 case 0x4a:  /* LATIN CAPITAL LETTER J */
1090                     *pString=jDot;
1091                     return 2;
1092                 case 0x12e: /* LATIN CAPITAL LETTER I WITH OGONEK */
1093                     *pString=iOgonekDot;
1094                     return 2;
1095                 case 0xcc:  /* LATIN CAPITAL LETTER I WITH GRAVE */
1096                     *pString=iDotGrave;
1097                     return 3;
1098                 case 0xcd:  /* LATIN CAPITAL LETTER I WITH ACUTE */
1099                     *pString=iDotAcute;
1100                     return 3;
1101                 case 0x128: /* LATIN CAPITAL LETTER I WITH TILDE */
1102                     *pString=iDotTilde;
1103                     return 3;
1104                 default:
1105                     return 0; /* will not occur */
1106                 }
1107             /* # Turkish and Azeri */
1108             } else if(loc==UCASE_LOC_TURKISH && c==0x130) {
1109                 /*
1110                     # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
1111                     # The following rules handle those cases.
1112
1113                     0130; 0069; 0130; 0130; tr # LATIN CAPITAL LETTER I WITH DOT ABOVE
1114                     0130; 0069; 0130; 0130; az # LATIN CAPITAL LETTER I WITH DOT ABOVE
1115                  */
1116                 return 0x69;
1117             } else if(loc==UCASE_LOC_TURKISH && c==0x307 && isPrecededBy_I(iter, context)) {
1118                 /*
1119                     # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
1120                     # This matches the behavior of the canonically equivalent I-dot_above
1121
1122                     0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
1123                     0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
1124                  */
1125                 *pString=nullptr;
1126                 return 0; /* remove the dot (continue without output) */
1127             } else if(loc==UCASE_LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(iter, context)) {
1128                 /*
1129                     # When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
1130
1131                     0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I
1132                     0049; 0131; 0049; 0049; az Not_Before_Dot; # LATIN CAPITAL LETTER I
1133                  */
1134                 return 0x131;
1135             } else if(c==0x130) {
1136                 /*
1137                     # Preserve canonical equivalence for I with dot. Turkic is handled below.
1138
1139                     0130; 0069 0307; 0130; 0130; # LATIN CAPITAL LETTER I WITH DOT ABOVE
1140                  */
1141                 *pString=iDot;
1142                 return 2;
1143             } else if(  c==0x3a3 &&
1144                         !isFollowedByCasedLetter(iter, context, 1) &&
1145                         isFollowedByCasedLetter(iter, context, -1) /* -1=preceded */
1146             ) {
1147                 /* greek capital sigma maps depending on surrounding cased letters (see SpecialCasing.txt) */
1148                 /*
1149                     # Special case for final form of sigma
1150
1151                     03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
1152                  */
1153                 return 0x3c2; /* greek small final sigma */
1154             } else {
1155                 /* no known conditional special case mapping, use a normal mapping */
1156             }
1157         } else if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
1158             GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, full);
1159             full&=UCASE_FULL_LOWER;
1160             if(full!=0) {
1161                 /* set the output pointer to the lowercase mapping */
1162                 *pString=reinterpret_cast<const UChar *>(pe+1);
1163
1164                 /* return the string length */
1165                 return full;
1166             }
1167         }
1168
1169         if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_IS_UPPER_OR_TITLE(props)) {
1170             int32_t delta;
1171             GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe2, delta);
1172             return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
1173         }
1174         if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) {
1175             GET_SLOT_VALUE(excWord, UCASE_EXC_LOWER, pe2, result);
1176         }
1177     }
1178
1179     return (result==c) ? ~result : result;
1180 }
1181
1182 /* internal */
1183 static int32_t
1184 toUpperOrTitle(UChar32 c,
1185                UCaseContextIterator *iter, void *context,
1186                const UChar **pString,
1187                int32_t loc,
1188                UBool upperNotTitle) {
1189     // The sign of the result has meaning, input must be non-negative so that it can be returned as is.
1190     U_ASSERT(c >= 0);
1191     UChar32 result=c;
1192     uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
1193     if(!UCASE_HAS_EXCEPTION(props)) {
1194         if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
1195             result=c+UCASE_GET_DELTA(props);
1196         }
1197     } else {
1198         const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props), *pe2;
1199         uint16_t excWord=*pe++;
1200         int32_t full, idx;
1201
1202         pe2=pe;
1203
1204         if(excWord&UCASE_EXC_CONDITIONAL_SPECIAL) {
1205             /* use hardcoded conditions and mappings */
1206             if(loc==UCASE_LOC_TURKISH && c==0x69) {
1207                 /*
1208                     # Turkish and Azeri
1209
1210                     # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
1211                     # The following rules handle those cases.
1212
1213                     # When uppercasing, i turns into a dotted capital I
1214
1215                     0069; 0069; 0130; 0130; tr; # LATIN SMALL LETTER I
1216                     0069; 0069; 0130; 0130; az; # LATIN SMALL LETTER I
1217                 */
1218                 return 0x130;
1219             } else if(loc==UCASE_LOC_LITHUANIAN && c==0x307 && isPrecededBySoftDotted(iter, context)) {
1220                 /*
1221                     # Lithuanian
1222
1223                     # Lithuanian retains the dot in a lowercase i when followed by accents.
1224
1225                     # Remove DOT ABOVE after "i" with upper or titlecase
1226
1227                     0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
1228                  */
1229                 *pString=nullptr;
1230                 return 0; /* remove the dot (continue without output) */
1231             } else {
1232                 /* no known conditional special case mapping, use a normal mapping */
1233             }
1234         } else if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
1235             GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, full);
1236
1237             /* start of full case mapping strings */
1238             ++pe;
1239
1240             /* skip the lowercase and case-folding result strings */
1241             pe+=full&UCASE_FULL_LOWER;
1242             full>>=4;
1243             pe+=full&0xf;
1244             full>>=4;
1245
1246             if(upperNotTitle) {
1247                 full&=0xf;
1248             } else {
1249                 /* skip the uppercase result string */
1250                 pe+=full&0xf;
1251                 full=(full>>4)&0xf;
1252             }
1253
1254             if(full!=0) {
1255                 /* set the output pointer to the result string */
1256                 *pString=reinterpret_cast<const UChar *>(pe);
1257
1258                 /* return the string length */
1259                 return full;
1260             }
1261         }
1262
1263         if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_GET_TYPE(props)==UCASE_LOWER) {
1264             int32_t delta;
1265             GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe2, delta);
1266             return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
1267         }
1268         if(!upperNotTitle && HAS_SLOT(excWord, UCASE_EXC_TITLE)) {
1269             idx=UCASE_EXC_TITLE;
1270         } else if(HAS_SLOT(excWord, UCASE_EXC_UPPER)) {
1271             /* here, titlecase is same as uppercase */
1272             idx=UCASE_EXC_UPPER;
1273         } else {
1274             return ~c;
1275         }
1276         GET_SLOT_VALUE(excWord, idx, pe2, result);
1277     }
1278
1279     return (result==c) ? ~result : result;
1280 }
1281
1282 U_CAPI int32_t U_EXPORT2
1283 ucase_toFullUpper(UChar32 c,
1284                   UCaseContextIterator *iter, void *context,
1285                   const UChar **pString,
1286                   int32_t caseLocale) {
1287     return toUpperOrTitle(c, iter, context, pString, caseLocale, TRUE);
1288 }
1289
1290 U_CAPI int32_t U_EXPORT2
1291 ucase_toFullTitle(UChar32 c,
1292                   UCaseContextIterator *iter, void *context,
1293                   const UChar **pString,
1294                   int32_t caseLocale) {
1295     return toUpperOrTitle(c, iter, context, pString, caseLocale, FALSE);
1296 }
1297
1298 /* case folding ------------------------------------------------------------- */
1299
1300 /*
1301  * Case folding is similar to lowercasing.
1302  * The result may be a simple mapping, i.e., a single code point, or
1303  * a full mapping, i.e., a string.
1304  * If the case folding for a code point is the same as its simple (1:1) lowercase mapping,
1305  * then only the lowercase mapping is stored.
1306  *
1307  * Some special cases are hardcoded because their conditions cannot be
1308  * parsed and processed from CaseFolding.txt.
1309  *
1310  * Unicode 3.2 CaseFolding.txt specifies for its status field:
1311
1312 # C: common case folding, common mappings shared by both simple and full mappings.
1313 # F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces.
1314 # S: simple case folding, mappings to single characters where different from F.
1315 # T: special case for uppercase I and dotted uppercase I
1316 #    - For non-Turkic languages, this mapping is normally not used.
1317 #    - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters.
1318 #
1319 # Usage:
1320 #  A. To do a simple case folding, use the mappings with status C + S.
1321 #  B. To do a full case folding, use the mappings with status C + F.
1322 #
1323 #    The mappings with status T can be used or omitted depending on the desired case-folding
1324 #    behavior. (The default option is to exclude them.)
1325
1326  * Unicode 3.2 has 'T' mappings as follows:
1327
1328 0049; T; 0131; # LATIN CAPITAL LETTER I
1329 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE
1330
1331  * while the default mappings for these code points are:
1332
1333 0049; C; 0069; # LATIN CAPITAL LETTER I
1334 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
1335
1336  * U+0130 has no simple case folding (simple-case-folds to itself).
1337  */
1338
1339 /* return the simple case folding mapping for c */
1340 U_CAPI UChar32 U_EXPORT2
1341 ucase_fold(UChar32 c, uint32_t options) {
1342     uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
1343     if(!UCASE_HAS_EXCEPTION(props)) {
1344         if(UCASE_IS_UPPER_OR_TITLE(props)) {
1345             c+=UCASE_GET_DELTA(props);
1346         }
1347     } else {
1348         const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
1349         uint16_t excWord=*pe++;
1350         int32_t idx;
1351         if(excWord&UCASE_EXC_CONDITIONAL_FOLD) {
1352             /* special case folding mappings, hardcoded */
1353             if((options&_FOLD_CASE_OPTIONS_MASK)==U_FOLD_CASE_DEFAULT) {
1354                 /* default mappings */
1355                 if(c==0x49) {
1356                     /* 0049; C; 0069; # LATIN CAPITAL LETTER I */
1357                     return 0x69;
1358                 } else if(c==0x130) {
1359                     /* no simple case folding for U+0130 */
1360                     return c;
1361                 }
1362             } else {
1363                 /* Turkic mappings */
1364                 if(c==0x49) {
1365                     /* 0049; T; 0131; # LATIN CAPITAL LETTER I */
1366                     return 0x131;
1367                 } else if(c==0x130) {
1368                     /* 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
1369                     return 0x69;
1370                 }
1371             }
1372         }
1373         if((excWord&UCASE_EXC_NO_SIMPLE_CASE_FOLDING)!=0) {
1374             return c;
1375         }
1376         if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_IS_UPPER_OR_TITLE(props)) {
1377             int32_t delta;
1378             GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta);
1379             return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
1380         }
1381         if(HAS_SLOT(excWord, UCASE_EXC_FOLD)) {
1382             idx=UCASE_EXC_FOLD;
1383         } else if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) {
1384             idx=UCASE_EXC_LOWER;
1385         } else {
1386             return c;
1387         }
1388         GET_SLOT_VALUE(excWord, idx, pe, c);
1389     }
1390     return c;
1391 }
1392
1393 /*
1394  * Issue for canonical caseless match (UAX #21):
1395  * Turkic casefolding (using "T" mappings in CaseFolding.txt) does not preserve
1396  * canonical equivalence, unlike default-option casefolding.
1397  * For example, I-grave and I + grave fold to strings that are not canonically
1398  * equivalent.
1399  * For more details, see the comment in unorm_compare() in unorm.cpp
1400  * and the intermediate prototype changes for Jitterbug 2021.
1401  * (For example, revision 1.104 of uchar.c and 1.4 of CaseFolding.txt.)
1402  *
1403  * This did not get fixed because it appears that it is not possible to fix
1404  * it for uppercase and lowercase characters (I-grave vs. i-grave)
1405  * together in a way that they still fold to common result strings.
1406  */
1407
1408 U_CAPI int32_t U_EXPORT2
1409 ucase_toFullFolding(UChar32 c,
1410                     const UChar **pString,
1411                     uint32_t options) {
1412     // The sign of the result has meaning, input must be non-negative so that it can be returned as is.
1413     U_ASSERT(c >= 0);
1414     UChar32 result=c;
1415     uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
1416     if(!UCASE_HAS_EXCEPTION(props)) {
1417         if(UCASE_IS_UPPER_OR_TITLE(props)) {
1418             result=c+UCASE_GET_DELTA(props);
1419         }
1420     } else {
1421         const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props), *pe2;
1422         uint16_t excWord=*pe++;
1423         int32_t full, idx;
1424
1425         pe2=pe;
1426
1427         if(excWord&UCASE_EXC_CONDITIONAL_FOLD) {
1428             /* use hardcoded conditions and mappings */
1429             if((options&_FOLD_CASE_OPTIONS_MASK)==U_FOLD_CASE_DEFAULT) {
1430                 /* default mappings */
1431                 if(c==0x49) {
1432                     /* 0049; C; 0069; # LATIN CAPITAL LETTER I */
1433                     return 0x69;
1434                 } else if(c==0x130) {
1435                     /* 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
1436                     *pString=iDot;
1437                     return 2;
1438                 }
1439             } else {
1440                 /* Turkic mappings */
1441                 if(c==0x49) {
1442                     /* 0049; T; 0131; # LATIN CAPITAL LETTER I */
1443                     return 0x131;
1444                 } else if(c==0x130) {
1445                     /* 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
1446                     return 0x69;
1447                 }
1448             }
1449         } else if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
1450             GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, full);
1451
1452             /* start of full case mapping strings */
1453             ++pe;
1454
1455             /* skip the lowercase result string */
1456             pe+=full&UCASE_FULL_LOWER;
1457             full=(full>>4)&0xf;
1458
1459             if(full!=0) {
1460                 /* set the output pointer to the result string */
1461                 *pString=reinterpret_cast<const UChar *>(pe);
1462
1463                 /* return the string length */
1464                 return full;
1465             }
1466         }
1467
1468         if((excWord&UCASE_EXC_NO_SIMPLE_CASE_FOLDING)!=0) {
1469             return ~c;
1470         }
1471         if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_IS_UPPER_OR_TITLE(props)) {
1472             int32_t delta;
1473             GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe2, delta);
1474             return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
1475         }
1476         if(HAS_SLOT(excWord, UCASE_EXC_FOLD)) {
1477             idx=UCASE_EXC_FOLD;
1478         } else if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) {
1479             idx=UCASE_EXC_LOWER;
1480         } else {
1481             return ~c;
1482         }
1483         GET_SLOT_VALUE(excWord, idx, pe2, result);
1484     }
1485
1486     return (result==c) ? ~result : result;
1487 }
1488
1489 /* case mapping properties API ---------------------------------------------- */
1490
1491 /* public API (see uchar.h) */
1492
1493 U_CAPI UBool U_EXPORT2
1494 u_isULowercase(UChar32 c) {
1495     return (UBool)(UCASE_LOWER==ucase_getType(c));
1496 }
1497
1498 U_CAPI UBool U_EXPORT2
1499 u_isUUppercase(UChar32 c) {
1500     return (UBool)(UCASE_UPPER==ucase_getType(c));
1501 }
1502
1503 /* Transforms the Unicode character to its lower case equivalent.*/
1504 U_CAPI UChar32 U_EXPORT2
1505 u_tolower(UChar32 c) {
1506     return ucase_tolower(c);
1507 }
1508
1509 /* Transforms the Unicode character to its upper case equivalent.*/
1510 U_CAPI UChar32 U_EXPORT2
1511 u_toupper(UChar32 c) {
1512     return ucase_toupper(c);
1513 }
1514
1515 /* Transforms the Unicode character to its title case equivalent.*/
1516 U_CAPI UChar32 U_EXPORT2
1517 u_totitle(UChar32 c) {
1518     return ucase_totitle(c);
1519 }
1520
1521 /* return the simple case folding mapping for c */
1522 U_CAPI UChar32 U_EXPORT2
1523 u_foldCase(UChar32 c, uint32_t options) {
1524     return ucase_fold(c, options);
1525 }
1526
1527 U_CFUNC int32_t U_EXPORT2
1528 ucase_hasBinaryProperty(UChar32 c, UProperty which) {
1529     /* case mapping properties */
1530     const UChar *resultString;
1531     switch(which) {
1532     case UCHAR_LOWERCASE:
1533         return (UBool)(UCASE_LOWER==ucase_getType(c));
1534     case UCHAR_UPPERCASE:
1535         return (UBool)(UCASE_UPPER==ucase_getType(c));
1536     case UCHAR_SOFT_DOTTED:
1537         return ucase_isSoftDotted(c);
1538     case UCHAR_CASE_SENSITIVE:
1539         return ucase_isCaseSensitive(c);
1540     case UCHAR_CASED:
1541         return (UBool)(UCASE_NONE!=ucase_getType(c));
1542     case UCHAR_CASE_IGNORABLE:
1543         return (UBool)(ucase_getTypeOrIgnorable(c)>>2);
1544     /*
1545      * Note: The following Changes_When_Xyz are defined as testing whether
1546      * the NFD form of the input changes when Xyz-case-mapped.
1547      * However, this simpler implementation of these properties,
1548      * ignoring NFD, passes the tests.
1549      * The implementation needs to be changed if the tests start failing.
1550      * When that happens, optimizations should be used to work with the
1551      * per-single-code point ucase_toFullXyz() functions unless
1552      * the NFD form has more than one code point,
1553      * and the property starts set needs to be the union of the
1554      * start sets for normalization and case mappings.
1555      */
1556     case UCHAR_CHANGES_WHEN_LOWERCASED:
1557         return (UBool)(ucase_toFullLower(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0);
1558     case UCHAR_CHANGES_WHEN_UPPERCASED:
1559         return (UBool)(ucase_toFullUpper(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0);
1560     case UCHAR_CHANGES_WHEN_TITLECASED:
1561         return (UBool)(ucase_toFullTitle(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0);
1562     /* case UCHAR_CHANGES_WHEN_CASEFOLDED: -- in uprops.c */
1563     case UCHAR_CHANGES_WHEN_CASEMAPPED:
1564         return (UBool)(
1565             ucase_toFullLower(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0 ||
1566             ucase_toFullUpper(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0 ||
1567             ucase_toFullTitle(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0);
1568     default:
1569         return FALSE;
1570     }
1571 }