2  * Copyright (c) 2013 Apple Inc. All rights reserved. 
   4  * @APPLE_LICENSE_HEADER_START@ 
   6  * This file contains Original Code and/or Modifications of Original Code 
   7  * as defined in and that are subject to the Apple Public Source License 
   8  * Version 2.0 (the 'License'). You may not use this file except in 
   9  * compliance with the License. Please obtain a copy of the License at 
  10  * http://www.opensource.apple.com/apsl/ and read it before using this 
  13  * The Original Code and all software distributed under the License are 
  14  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 
  15  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 
  16  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 
  17  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 
  18  * Please see the License for the specific language governing rights and 
  19  * limitations under the License. 
  21  * @APPLE_LICENSE_HEADER_END@ 
  24 /*    CFUnicodeDecomposition.c 
  25     Copyright (c) 1999-2013, Apple Inc. All rights reserved. 
  26     Responsibility: Aki Inoue 
  30 #include <CoreFoundation/CFBase.h> 
  31 #include <CoreFoundation/CFCharacterSet.h> 
  32 #include <CoreFoundation/CFUniChar.h> 
  33 #include <CoreFoundation/CFUnicodeDecomposition.h> 
  34 #include "CFInternal.h" 
  35 #include "CFUniCharPriv.h" 
  37 // Canonical Decomposition 
  38 static UTF32Char 
*__CFUniCharDecompositionTable 
= NULL
; 
  39 static uint32_t __CFUniCharDecompositionTableLength 
= 0; 
  40 static UTF32Char 
*__CFUniCharMultipleDecompositionTable 
= NULL
; 
  42 static const uint8_t *__CFUniCharDecomposableBitmapForBMP 
= NULL
; 
  43 static const uint8_t *__CFUniCharHFSPlusDecomposableBitmapForBMP 
= NULL
; 
  45 static CFSpinLock_t __CFUniCharDecompositionTableLock 
= CFSpinLockInit
; 
  47 static const uint8_t **__CFUniCharCombiningPriorityTable 
= NULL
; 
  48 static uint8_t __CFUniCharCombiningPriorityTableNumPlane 
= 0; 
  50 static void __CFUniCharLoadDecompositionTable(void) { 
  52     __CFSpinLock(&__CFUniCharDecompositionTableLock
); 
  54     if (NULL 
== __CFUniCharDecompositionTable
) { 
  55         const uint32_t *bytes 
= (uint32_t *)CFUniCharGetMappingData(kCFUniCharCanonicalDecompMapping
); 
  58             __CFSpinUnlock(&__CFUniCharDecompositionTableLock
); 
  62         __CFUniCharDecompositionTableLength 
= *(bytes
++); 
  63         __CFUniCharDecompositionTable 
= (UTF32Char 
*)bytes
; 
  64         __CFUniCharMultipleDecompositionTable 
= (UTF32Char 
*)((intptr_t)bytes 
+ __CFUniCharDecompositionTableLength
); 
  66         __CFUniCharDecompositionTableLength 
/= (sizeof(uint32_t) * 2); 
  67         __CFUniCharDecomposableBitmapForBMP 
= CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet
, 0); 
  68         __CFUniCharHFSPlusDecomposableBitmapForBMP 
= CFUniCharGetBitmapPtrForPlane(kCFUniCharHFSPlusDecomposableCharacterSet
, 0); 
  72         __CFUniCharCombiningPriorityTableNumPlane 
= CFUniCharGetNumberOfPlanesForUnicodePropertyData(kCFUniCharCombiningProperty
); 
  73         __CFUniCharCombiningPriorityTable 
= (const uint8_t **)CFAllocatorAllocate(kCFAllocatorSystemDefault
, sizeof(uint8_t *) * __CFUniCharCombiningPriorityTableNumPlane
, 0); 
  74         for (idx 
= 0;idx 
< __CFUniCharCombiningPriorityTableNumPlane
;idx
++) __CFUniCharCombiningPriorityTable
[idx
] = (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty
, idx
); 
  77     __CFSpinUnlock(&__CFUniCharDecompositionTableLock
); 
  80 static CFSpinLock_t __CFUniCharCompatibilityDecompositionTableLock 
= CFSpinLockInit
; 
  81 static UTF32Char 
*__CFUniCharCompatibilityDecompositionTable 
= NULL
; 
  82 static uint32_t __CFUniCharCompatibilityDecompositionTableLength 
= 0; 
  83 static UTF32Char 
*__CFUniCharCompatibilityMultipleDecompositionTable 
= NULL
; 
  85 static void __CFUniCharLoadCompatibilityDecompositionTable(void) { 
  87     __CFSpinLock(&__CFUniCharCompatibilityDecompositionTableLock
); 
  89     if (NULL 
== __CFUniCharCompatibilityDecompositionTable
) { 
  90         const uint32_t *bytes 
= (uint32_t *)CFUniCharGetMappingData(kCFUniCharCompatibilityDecompMapping
); 
  93             __CFSpinUnlock(&__CFUniCharCompatibilityDecompositionTableLock
); 
  97         __CFUniCharCompatibilityDecompositionTableLength 
= *(bytes
++); 
  98         __CFUniCharCompatibilityDecompositionTable 
= (UTF32Char 
*)bytes
; 
  99         __CFUniCharCompatibilityMultipleDecompositionTable 
= (UTF32Char 
*)((intptr_t)bytes 
+ __CFUniCharCompatibilityDecompositionTableLength
); 
 101         __CFUniCharCompatibilityDecompositionTableLength 
/= (sizeof(uint32_t) * 2); 
 104     __CFSpinUnlock(&__CFUniCharCompatibilityDecompositionTableLock
); 
 107 CF_INLINE 
bool __CFUniCharIsDecomposableCharacterWithFlag(UTF32Char character
, bool isHFSPlus
) { 
 108     return CFUniCharIsMemberOfBitmap(character
, (character 
< 0x10000 ? (isHFSPlus 
? __CFUniCharHFSPlusDecomposableBitmapForBMP 
: __CFUniCharDecomposableBitmapForBMP
) : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet
, ((character 
>> 16) & 0xFF)))); 
 111 CF_INLINE 
uint8_t __CFUniCharGetCombiningPropertyForCharacter(UTF32Char character
) { return CFUniCharGetCombiningPropertyForCharacter(character
, (((character
) >> 16) < __CFUniCharCombiningPriorityTableNumPlane 
? __CFUniCharCombiningPriorityTable
[(character
) >> 16] : NULL
)); } 
 113 CF_INLINE 
bool __CFUniCharIsNonBaseCharacter(UTF32Char character
) { return ((0 == __CFUniCharGetCombiningPropertyForCharacter(character
)) ? false : true); } // the notion of non-base in normalization is characters with non-0 combining class 
 118 } __CFUniCharDecomposeMappings
; 
 120 static uint32_t __CFUniCharGetMappedValue(const __CFUniCharDecomposeMappings 
*theTable
, uint32_t numElem
, UTF32Char character
) { 
 121     const __CFUniCharDecomposeMappings 
*p
, *q
, *divider
; 
 123     if ((character 
< theTable
[0]._key
) || (character 
> theTable
[numElem
-1]._key
)) { 
 129         divider 
= p 
+ ((q 
- p
) >> 1);    /* divide by 2 */ 
 130         if (character 
< divider
->_key
) { q 
= divider 
- 1; } 
 131         else if (character 
> divider
->_key
) { p 
= divider 
+ 1; } 
 132         else { return divider
->_value
; } 
 137 static void __CFUniCharPrioritySort(UTF32Char 
*characters
, CFIndex length
) { 
 138     UTF32Char 
*end 
= characters 
+ length
; 
 140     while ((characters 
< end
) && (0 == __CFUniCharGetCombiningPropertyForCharacter(*characters
))) ++characters
; 
 142     if ((end 
- characters
) > 1) { 
 144         UTF32Char 
*ch1
, *ch2
; 
 149             ch1 
= characters
; ch2 
= characters 
+ 1; 
 150             p2 
= __CFUniCharGetCombiningPropertyForCharacter(*ch1
); 
 152                 p1 
= p2
; p2 
= __CFUniCharGetCombiningPropertyForCharacter(*ch2
); 
 154                     UTF32Char tmp 
= *ch1
; *ch1 
= *ch2
; *ch2 
= tmp
; 
 163 static CFIndex 
__CFUniCharRecursivelyDecomposeCharacter(UTF32Char character
, UTF32Char 
*convertedChars
, CFIndex maxBufferLength
) { 
 164     uint32_t value 
= __CFUniCharGetMappedValue((const __CFUniCharDecomposeMappings 
*)__CFUniCharDecompositionTable
, __CFUniCharDecompositionTableLength
, character
); 
 165     CFIndex length 
= CFUniCharConvertFlagToCount(value
); 
 166     UTF32Char firstChar 
= value 
& 0xFFFFFF; 
 167     UTF32Char 
*mappings 
= (length 
> 1 ? __CFUniCharMultipleDecompositionTable 
+ firstChar 
: &firstChar
); 
 168     CFIndex usedLength 
= 0; 
 170     if (maxBufferLength 
< length
) return 0; 
 172     if (value 
& kCFUniCharRecursiveDecompositionFlag
) { 
 173         usedLength 
= __CFUniCharRecursivelyDecomposeCharacter(*mappings
, convertedChars
, maxBufferLength 
- length
); 
 175         --length
; // Decrement for the first char 
 176         if (!usedLength 
|| usedLength 
+ length 
> maxBufferLength
) return 0; 
 178         convertedChars 
+= usedLength
; 
 181     usedLength 
+= length
; 
 183     while (length
--) *(convertedChars
++) = *(mappings
++); 
 188 #define HANGUL_SBASE 0xAC00 
 189 #define HANGUL_LBASE 0x1100 
 190 #define HANGUL_VBASE 0x1161 
 191 #define HANGUL_TBASE 0x11A7 
 192 #define HANGUL_SCOUNT 11172 
 193 #define HANGUL_LCOUNT 19 
 194 #define HANGUL_VCOUNT 21 
 195 #define HANGUL_TCOUNT 28 
 196 #define HANGUL_NCOUNT (HANGUL_VCOUNT * HANGUL_TCOUNT) 
 198 CFIndex 
CFUniCharDecomposeCharacter(UTF32Char character
, UTF32Char 
*convertedChars
, CFIndex maxBufferLength
) { 
 199     if (NULL 
== __CFUniCharDecompositionTable
) __CFUniCharLoadDecompositionTable(); 
 200     if (character 
>= HANGUL_SBASE 
&& character 
<= (HANGUL_SBASE 
+ HANGUL_SCOUNT
)) { 
 203         character 
-= HANGUL_SBASE
; 
 205         length 
= (character 
% HANGUL_TCOUNT 
? 3 : 2); 
 207         if (maxBufferLength 
< length
) return 0; 
 209         *(convertedChars
++) = character 
/ HANGUL_NCOUNT 
+ HANGUL_LBASE
; 
 210         *(convertedChars
++) = (character 
% HANGUL_NCOUNT
) / HANGUL_TCOUNT 
+ HANGUL_VBASE
; 
 211         if (length 
> 2) *convertedChars 
= (character 
% HANGUL_TCOUNT
) + HANGUL_TBASE
; 
 214         return __CFUniCharRecursivelyDecomposeCharacter(character
, convertedChars
, maxBufferLength
); 
 218 CF_INLINE 
bool __CFProcessReorderBuffer(UTF32Char 
*buffer
, CFIndex length
, void **dst
, CFIndex dstLength
, CFIndex 
*filledLength
, uint32_t dstFormat
) { 
 219     if (length 
> 1) __CFUniCharPrioritySort(buffer
, length
); 
 220     return CFUniCharFillDestinationBuffer(buffer
, length
, dst
, dstLength
, filledLength
, dstFormat
); 
 223 #define MAX_BUFFER_LENGTH (32) 
 224 bool CFUniCharDecompose(const UTF16Char 
*src
, CFIndex length
, CFIndex 
*consumedLength
, void *dst
, CFIndex maxLength
, CFIndex 
*filledLength
, bool needToReorder
, uint32_t dstFormat
, bool isHFSPlus
) { 
 225     CFIndex usedLength 
= 0; 
 226     CFIndex originalLength 
= length
; 
 227     UTF32Char buffer
[MAX_BUFFER_LENGTH
]; 
 228     UTF32Char 
*decompBuffer 
= buffer
; 
 229     CFIndex decompBufferSize 
= MAX_BUFFER_LENGTH
; 
 230     CFIndex decompBufferLen 
= 0; 
 231     CFIndex segmentLength 
= 0; 
 232     UTF32Char currentChar
; 
 234     if (NULL 
== __CFUniCharDecompositionTable
) __CFUniCharLoadDecompositionTable(); 
 236     while ((length 
- segmentLength
) > 0) { 
 237         currentChar 
= *(src
++); 
 239         if (currentChar 
< 0x80) { 
 240             if (decompBufferLen 
> 0) { 
 241                 if (!__CFProcessReorderBuffer(decompBuffer
, decompBufferLen
, &dst
, maxLength
, &usedLength
, dstFormat
)) break; 
 242                 length 
-= segmentLength
; 
 248                 if (usedLength 
>= maxLength
) break; 
 250                 case kCFUniCharUTF8Format
: *(uint8_t *)dst 
= currentChar
; dst 
= (uint8_t *)dst 
+ sizeof(uint8_t); break; 
 251                 case kCFUniCharUTF16Format
: *(UTF16Char 
*)dst 
= currentChar
; dst 
= (uint8_t *)dst 
+ sizeof(UTF16Char
); break; 
 252                 case kCFUniCharUTF32Format
: *(UTF32Char 
*)dst 
= currentChar
; dst 
= (uint8_t *)dst 
+ sizeof(UTF32Char
); break; 
 259             if (CFUniCharIsSurrogateLowCharacter(currentChar
)) { // Stray surrogagte 
 260                 if (dstFormat 
!= kCFUniCharUTF16Format
) break; 
 261             } else if (CFUniCharIsSurrogateHighCharacter(currentChar
)) { 
 262                 if (((length 
- segmentLength
) > 1) && CFUniCharIsSurrogateLowCharacter(*src
)) { 
 263                     currentChar 
= CFUniCharGetLongCharacterForSurrogatePair(currentChar
, *(src
++)); 
 265                     if (dstFormat 
!= kCFUniCharUTF16Format
) break; 
 269             if (needToReorder 
&& __CFUniCharIsNonBaseCharacter(currentChar
)) { 
 270                 if ((decompBufferLen 
+ 1) >= decompBufferSize
) { 
 271                     UTF32Char 
*newBuffer
; 
 273                     decompBufferSize 
+= MAX_BUFFER_LENGTH
; 
 274                     newBuffer 
= (UTF32Char 
*)CFAllocatorAllocate(kCFAllocatorSystemDefault
, sizeof(UTF32Char
) * decompBufferSize
, 0); 
 275                     memmove(newBuffer
, decompBuffer
, (decompBufferSize 
- MAX_BUFFER_LENGTH
) * sizeof(UTF32Char
)); 
 276                     if (decompBuffer 
!= buffer
) CFAllocatorDeallocate(kCFAllocatorSystemDefault
, decompBuffer
); 
 277                     decompBuffer 
= newBuffer
; 
 280                 if (__CFUniCharIsDecomposableCharacterWithFlag(currentChar
, isHFSPlus
)) { // Vietnamese accent, etc. 
 281                     decompBufferLen 
+= CFUniCharDecomposeCharacter(currentChar
, decompBuffer 
+ decompBufferLen
, decompBufferSize 
- decompBufferLen
); 
 283                     decompBuffer
[decompBufferLen
++] = currentChar
; 
 286                 if (decompBufferLen 
> 0) { 
 287                     if (!__CFProcessReorderBuffer(decompBuffer
, decompBufferLen
, &dst
, maxLength
, &usedLength
, dstFormat
)) break; 
 288                     length 
-= segmentLength
; 
 292                 if (__CFUniCharIsDecomposableCharacterWithFlag(currentChar
, isHFSPlus
)) { 
 293                     decompBufferLen 
= CFUniCharDecomposeCharacter(currentChar
, decompBuffer
, MAX_BUFFER_LENGTH
); 
 296                     *decompBuffer 
= currentChar
; 
 299                 if (!needToReorder 
|| (decompBufferLen 
== 1)) { 
 300                     if (!CFUniCharFillDestinationBuffer(decompBuffer
, decompBufferLen
, &dst
, maxLength
, &usedLength
, dstFormat
)) break; 
 301                     length 
-= ((currentChar 
> 0xFFFF) ? 2 : 1); 
 307             segmentLength 
+= ((currentChar 
> 0xFFFF) ? 2 : 1); 
 311     if ((decompBufferLen 
> 0) && __CFProcessReorderBuffer(decompBuffer
, decompBufferLen
, &dst
, maxLength
, &usedLength
, dstFormat
)) length 
-= segmentLength
; 
 313     if (decompBuffer 
!= buffer
) CFAllocatorDeallocate(kCFAllocatorSystemDefault
, decompBuffer
); 
 315     if (consumedLength
) *consumedLength 
= originalLength 
- length
; 
 316     if (filledLength
) *filledLength 
= usedLength
; 
 318     return ((length 
> 0) ? false : true); 
 321 #define MAX_COMP_DECOMP_LEN (32) 
 323 static CFIndex 
__CFUniCharRecursivelyCompatibilityDecomposeCharacter(UTF32Char character
, UTF32Char 
*convertedChars
) { 
 324     uint32_t value 
= __CFUniCharGetMappedValue((const __CFUniCharDecomposeMappings 
*)__CFUniCharCompatibilityDecompositionTable
, __CFUniCharCompatibilityDecompositionTableLength
, character
); 
 325     CFIndex length 
= CFUniCharConvertFlagToCount(value
); 
 326     UTF32Char firstChar 
= value 
& 0xFFFFFF; 
 327     const UTF32Char 
*mappings 
= (length 
> 1 ? __CFUniCharCompatibilityMultipleDecompositionTable 
+ firstChar 
: &firstChar
); 
 328     CFIndex usedLength 
= length
; 
 329     UTF32Char currentChar
; 
 330     CFIndex currentLength
; 
 332     while (length
-- > 0) { 
 333         currentChar 
= *(mappings
++); 
 334         if (__CFUniCharIsDecomposableCharacterWithFlag(currentChar
, false)) { 
 335             currentLength 
= __CFUniCharRecursivelyDecomposeCharacter(currentChar
, convertedChars
, MAX_COMP_DECOMP_LEN 
- length
); 
 336             convertedChars 
+= currentLength
; 
 337             usedLength 
+= (currentLength 
- 1); 
 338         } else if (CFUniCharIsMemberOf(currentChar
, kCFUniCharCompatibilityDecomposableCharacterSet
)) { 
 339             currentLength 
= __CFUniCharRecursivelyCompatibilityDecomposeCharacter(currentChar
, convertedChars
); 
 340             convertedChars 
+= currentLength
; 
 341             usedLength 
+= (currentLength 
- 1); 
 343             *(convertedChars
++) = currentChar
; 
 350 CF_INLINE 
void __CFUniCharMoveBufferFromEnd1(UTF32Char 
*convertedChars
, CFIndex length
, CFIndex delta
) { 
 351     const UTF32Char 
*limit 
= convertedChars
; 
 354     convertedChars 
+= length
; 
 355     dstP 
= convertedChars 
+ delta
; 
 357     while (convertedChars 
> limit
) *(--dstP
) = *(--convertedChars
); 
 360 CF_PRIVATE CFIndex 
CFUniCharCompatibilityDecompose(UTF32Char 
*convertedChars
, CFIndex length
, CFIndex maxBufferLength
) { 
 361     UTF32Char currentChar
; 
 362     UTF32Char buffer
[MAX_COMP_DECOMP_LEN
]; 
 363     const UTF32Char 
*bufferP
; 
 364     const UTF32Char 
*limit 
= convertedChars 
+ length
; 
 365     CFIndex filledLength
; 
 367     if (NULL 
== __CFUniCharCompatibilityDecompositionTable
) __CFUniCharLoadCompatibilityDecompositionTable(); 
 369     while (convertedChars 
< limit
) { 
 370         currentChar 
= *convertedChars
; 
 372         if (CFUniCharIsMemberOf(currentChar
, kCFUniCharCompatibilityDecomposableCharacterSet
)) { 
 373             filledLength 
= __CFUniCharRecursivelyCompatibilityDecomposeCharacter(currentChar
, buffer
); 
 375             if (filledLength 
+ length 
- 1 > maxBufferLength
) return 0; 
 377             if (filledLength 
> 1) __CFUniCharMoveBufferFromEnd1(convertedChars 
+ 1, limit 
- convertedChars 
- 1, filledLength 
- 1); 
 380             length 
+= (filledLength 
- 1); 
 381             while (filledLength
-- > 0) *(convertedChars
++) = *(bufferP
++); 
 390 CF_EXPORT 
void CFUniCharPrioritySort(UTF32Char 
*characters
, CFIndex length
) { 
 391     __CFUniCharPrioritySort(characters
, length
); 
 394 #undef MAX_BUFFER_LENGTH 
 395 #undef MAX_COMP_DECOMP_LEN