+#define ZERO_WIDTH_JOINER (0x200D)
+#define COMBINING_GRAPHEME_JOINER (0x034F)
+// Hangul ranges
+#define HANGUL_CHOSEONG_START (0x1100)
+#define HANGUL_CHOSEONG_END (0x115F)
+#define HANGUL_JUNGSEONG_START (0x1160)
+#define HANGUL_JUNGSEONG_END (0x11A2)
+#define HANGUL_JONGSEONG_START (0x11A8)
+#define HANGUL_JONGSEONG_END (0x11F9)
+
+#define HANGUL_SYLLABLE_START (0xAC00)
+#define HANGUL_SYLLABLE_END (0xD7AF)
+
+
+// Returns the length of characters filled into outCharacters. If no change, returns 0. maxBufLen shoule be at least 8
+static inline CFIndex __CFStringFoldCharacterClusterAtIndex(UTF32Char character, CFStringInlineBuffer *buffer, CFIndex index, CFOptionFlags flags, const uint8_t *langCode, UTF32Char *outCharacters, CFIndex maxBufferLength, CFIndex *consumedLength) {
+ CFIndex filledLength = 0, currentIndex = index;
+
+ if (0 != character) {
+ UTF16Char lowSurrogate;
+ CFIndex planeNo = (character >> 16);
+ bool isTurkikCapitalI = false;
+ static const uint8_t *decompBMP = NULL;
+ static const uint8_t *nonBaseBMP = NULL;
+
+ if (NULL == decompBMP) {
+ decompBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, 0);
+ nonBaseBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, 0);
+ }
+
+ ++currentIndex;
+
+ if ((character < 0x0080) && ((NULL == langCode) || (character != 'I'))) { // ASCII
+ if ((flags & kCFCompareCaseInsensitive) && (character >= 'A') && (character <= 'Z')) {
+ character += ('a' - 'A');
+ *outCharacters = character;
+ filledLength = 1;
+ }
+ } else {
+ // do width-insensitive mapping
+ if ((flags & kCFCompareWidthInsensitive) && (character >= 0xFF00) && (character <= 0xFFEF)) {
+ (void)CFUniCharCompatibilityDecompose(&character, 1, 1);
+ *outCharacters = character;
+ filledLength = 1;
+ }
+
+ // map surrogates
+ if ((0 == planeNo) && CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((lowSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex)))) {
+ character = CFUniCharGetLongCharacterForSurrogatePair(character, lowSurrogate);
+ ++currentIndex;
+ planeNo = (character >> 16);
+ }
+
+ // decompose
+ if (flags & (kCFCompareDiacriticsInsensitive|kCFCompareNonliteral)) {
+ if (CFUniCharIsMemberOfBitmap(character, ((0 == planeNo) ? decompBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, planeNo)))) {
+ filledLength = CFUniCharDecomposeCharacter(character, outCharacters, maxBufferLength);
+ character = *outCharacters;
+ if ((flags & kCFCompareDiacriticsInsensitive) && (character < 0x0510)) filledLength = 1; // reset if Roman, Greek, Cyrillic
+ }
+ }
+
+ // fold case
+ if (flags & kCFCompareCaseInsensitive) {
+ const uint8_t *nonBaseBitmap;
+ bool filterNonBase = (((flags & kCFCompareDiacriticsInsensitive) && (character < 0x0510)) ? true : false);
+ static const uint8_t *lowerBMP = NULL;
+ static const uint8_t *caseFoldBMP = NULL;
+
+ if (NULL == lowerBMP) {
+ lowerBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfLowercaseCharacterSet, 0);
+ caseFoldBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfCaseFoldingCharacterSet, 0);
+ }
+
+ if ((NULL != langCode) && ('I' == character) && ((0 == strcmp(langCode, "tr")) || (0 == strcmp(langCode, "az")))) { // do Turkik special-casing
+ if (filledLength > 1) {
+ if (0x0307 == outCharacters[1]) {
+ memmove(&(outCharacters[index]), &(outCharacters[index + 1]), sizeof(UTF32Char) * (--filledLength));
+ character = *outCharacters = 'i';
+ isTurkikCapitalI = true;
+ }
+ } else if (0x0307 == CFStringGetCharacterFromInlineBuffer(buffer, currentIndex)) {
+ character = *outCharacters = 'i';
+ filledLength = 1;
+ ++currentIndex;
+ isTurkikCapitalI = true;
+ }
+ }
+ if (!isTurkikCapitalI && (CFUniCharIsMemberOfBitmap(character, ((0 == planeNo) ? lowerBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfLowercaseCharacterSet, planeNo))) || CFUniCharIsMemberOfBitmap(character, ((0 == planeNo) ? caseFoldBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfCaseFoldingCharacterSet, planeNo))))) {
+ UTF16Char caseFoldBuffer[MAX_CASE_MAPPING_BUF];
+ const UTF16Char *bufferP = caseFoldBuffer, *bufferLimit;
+ UTF32Char *outCharactersP = outCharacters;
+ uint32_t bufferLength = CFUniCharMapCaseTo(character, caseFoldBuffer, MAX_CASE_MAPPING_BUF, kCFUniCharCaseFold, 0, langCode);
+
+ bufferLimit = bufferP + bufferLength;
+
+ if (filledLength > 0) --filledLength; // decrement filledLength (will add back later)
+
+ // make space for casefold characters
+ if ((filledLength > 0) && (bufferLength > 1)) {
+ CFIndex totalScalerLength = 0;
+
+ while (bufferP < bufferLimit) {
+ if (CFUniCharIsSurrogateHighCharacter(*(bufferP++)) && (bufferP < bufferLimit) && CFUniCharIsSurrogateLowCharacter(*bufferP)) ++bufferP;
+ ++totalScalerLength;
+ }
+ memmove(outCharacters + totalScalerLength, outCharacters + 1, filledLength * sizeof(UTF32Char));
+ bufferP = caseFoldBuffer;
+ }
+
+ // fill
+ while (bufferP < bufferLimit) {
+ character = *(bufferP++);
+ if (CFUniCharIsSurrogateHighCharacter(character) && (bufferP < bufferLimit) && CFUniCharIsSurrogateLowCharacter(*bufferP)) {
+ character = CFUniCharGetLongCharacterForSurrogatePair(character, *(bufferP++));
+ nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (character >> 16));
+ } else {
+ nonBaseBitmap = nonBaseBMP;
+ }
+
+ if (!filterNonBase || !CFUniCharIsMemberOfBitmap(character, nonBaseBitmap)) {
+ *(outCharactersP++) = character;
+ ++filledLength;
+ }
+ }
+ }
+ }
+ }
+
+ // collect following combining marks
+ if (flags & (kCFCompareDiacriticsInsensitive|kCFCompareNonliteral)) {
+ const uint8_t *nonBaseBitmap;
+ const uint8_t *decompBitmap;
+ bool doFill = (((flags & kCFCompareDiacriticsInsensitive) && (character < 0x0510)) ? false : true);
+
+ if (doFill && (0 == filledLength)) { // check if really needs to fill
+ UTF32Char nonBaseCharacter = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex);
+
+ if (CFUniCharIsSurrogateHighCharacter(nonBaseCharacter) && CFUniCharIsSurrogateLowCharacter((lowSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex + 1)))) {
+ nonBaseCharacter = CFUniCharGetLongCharacterForSurrogatePair(nonBaseCharacter, lowSurrogate);
+ nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (nonBaseCharacter >> 16));
+ decompBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (nonBaseCharacter >> 16));
+ } else {
+ nonBaseBitmap = nonBaseBMP;
+ decompBitmap = decompBMP;
+ }
+
+ if (CFUniCharIsMemberOfBitmap(nonBaseCharacter, nonBaseBitmap)) {
+ outCharacters[filledLength++] = character;
+
+ if ((0 == (flags & kCFCompareDiacriticsInsensitive)) || (nonBaseCharacter > 0x050F)) {
+ if (CFUniCharIsMemberOfBitmap(nonBaseCharacter, decompBitmap)) {
+ filledLength += CFUniCharDecomposeCharacter(nonBaseCharacter, &(outCharacters[filledLength]), maxBufferLength - filledLength);
+ } else {
+ outCharacters[filledLength++] = nonBaseCharacter;
+ }
+ }
+ currentIndex += ((nonBaseBitmap == nonBaseBMP) ? 1 : 2);
+ } else {
+ doFill = false;
+ }
+ }
+
+ while (filledLength < maxBufferLength) { // do the rest
+ character = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex);
+
+ if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((lowSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex + 1)))) {
+ character = CFUniCharGetLongCharacterForSurrogatePair(character, lowSurrogate);
+ nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (character >> 16));
+ decompBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (character >> 16));
+ } else {
+ nonBaseBitmap = nonBaseBMP;
+ decompBitmap = decompBMP;
+ }
+ if (isTurkikCapitalI) {
+ isTurkikCapitalI = false;
+ } else if (CFUniCharIsMemberOfBitmap(character, nonBaseBitmap)) {
+ if (doFill && ((0 == (flags & kCFCompareDiacriticsInsensitive)) || (character > 0x050F))) {
+ if (CFUniCharIsMemberOfBitmap(character, decompBitmap)) {
+ CFIndex currentLength = CFUniCharDecomposeCharacter(character, &(outCharacters[filledLength]), maxBufferLength - filledLength);
+
+ if (0 == currentLength) break; // didn't fit
+
+ filledLength += currentLength;
+ } else {
+ outCharacters[filledLength++] = character;
+ }
+ }
+ currentIndex += ((nonBaseBitmap == nonBaseBMP) ? 1 : 2);
+ } else {
+ break;
+ }
+ }
+
+ if (filledLength > 1) CFUniCharPrioritySort(outCharacters, filledLength); // priority sort
+ }
+ }
+
+ if ((filledLength > 0) && (NULL != consumedLength)) *consumedLength = (currentIndex - index);
+
+ return filledLength;
+}