2 * Copyright (c) 2009 Apple Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
23 /* CFUnicodePrecomposition.c
24 Copyright (c) 1999-2009, Apple Inc. All rights reserved.
25 Responsibility: Aki Inoue
29 #include <CoreFoundation/CFBase.h>
30 #include <CoreFoundation/CFCharacterSet.h>
31 #include "CFUniChar.h"
32 #include "CFUnicodePrecomposition.h"
33 #include "CFInternal.h"
34 #include "CFUniCharPriv.h"
36 // Canonical Precomposition
37 static UTF32Char
*__CFUniCharPrecompSourceTable
= NULL
;
38 static uint32_t __CFUniCharPrecompositionTableLength
= 0;
39 static uint16_t *__CFUniCharBMPPrecompDestinationTable
= NULL
;
40 static uint32_t *__CFUniCharNonBMPPrecompDestinationTable
= NULL
;
42 static const uint8_t *__CFUniCharNonBaseBitmapForBMP_P
= NULL
; // Adding _P so the symbol name is different from the one in CFUnicodeDecomposition.c
43 static const uint8_t *__CFUniCharCombiningClassForBMP
= NULL
;
45 static CFSpinLock_t __CFUniCharPrecompositionTableLock
= CFSpinLockInit
;
47 static void __CFUniCharLoadPrecompositionTable(void) {
49 __CFSpinLock(&__CFUniCharPrecompositionTableLock
);
51 if (NULL
== __CFUniCharPrecompSourceTable
) {
52 const uint32_t *bytes
= (const uint32_t *)CFUniCharGetMappingData(kCFUniCharCanonicalPrecompMapping
);
53 uint32_t bmpMappingLength
;
56 __CFSpinUnlock(&__CFUniCharPrecompositionTableLock
);
60 __CFUniCharPrecompositionTableLength
= *(bytes
++);
61 bmpMappingLength
= *(bytes
++);
62 __CFUniCharPrecompSourceTable
= (UTF32Char
*)bytes
;
63 __CFUniCharBMPPrecompDestinationTable
= (uint16_t *)((intptr_t)bytes
+ (__CFUniCharPrecompositionTableLength
* sizeof(UTF32Char
) * 2));
64 __CFUniCharNonBMPPrecompDestinationTable
= (uint32_t *)(((intptr_t)__CFUniCharBMPPrecompDestinationTable
) + bmpMappingLength
);
66 __CFUniCharNonBaseBitmapForBMP_P
= CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet
, 0);
67 __CFUniCharCombiningClassForBMP
= (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty
, 0);
70 __CFSpinUnlock(&__CFUniCharPrecompositionTableLock
);
73 // Adding _P so the symbol name is different from the one in CFUnicodeDecomposition.c
74 #define __CFUniCharIsNonBaseCharacter __CFUniCharIsNonBaseCharacter_P
75 CF_INLINE
bool __CFUniCharIsNonBaseCharacter(UTF32Char character
) {
76 return CFUniCharIsMemberOfBitmap(character
, (character
< 0x10000 ? __CFUniCharNonBaseBitmapForBMP_P
: CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet
, ((character
>> 16) & 0xFF))));
82 } __CFUniCharPrecomposeBMPMappings
;
84 static UTF16Char
__CFUniCharGetMappedBMPValue(const __CFUniCharPrecomposeBMPMappings
*theTable
, uint32_t numElem
, UTF16Char character
) {
85 const __CFUniCharPrecomposeBMPMappings
*p
, *q
, *divider
;
87 if ((character
< theTable
[0]._key
) || (character
> theTable
[numElem
-1]._key
)) {
93 divider
= p
+ ((q
- p
) >> 1); /* divide by 2 */
94 if (character
< divider
->_key
) { q
= divider
- 1; }
95 else if (character
> divider
->_key
) { p
= divider
+ 1; }
96 else { return divider
->_value
; }
104 } __CFUniCharPrecomposeMappings
;
106 static uint32_t __CFUniCharGetMappedValue_P(const __CFUniCharPrecomposeMappings
*theTable
, uint32_t numElem
, UTF32Char character
) {
107 const __CFUniCharPrecomposeMappings
*p
, *q
, *divider
;
109 if ((character
< theTable
[0]._key
) || (character
> theTable
[numElem
-1]._key
)) {
115 divider
= p
+ ((q
- p
) >> 1); /* divide by 2 */
116 if (character
< divider
->_key
) { q
= divider
- 1; }
117 else if (character
> divider
->_key
) { p
= divider
+ 1; }
118 else { return divider
->_value
; }
124 UTF32Char
CFUniCharPrecomposeCharacter(UTF32Char base
, UTF32Char combining
) {
127 if (NULL
== __CFUniCharPrecompSourceTable
) __CFUniCharLoadPrecompositionTable();
129 if (!(value
= __CFUniCharGetMappedValue_P((const __CFUniCharPrecomposeMappings
*)__CFUniCharPrecompSourceTable
, __CFUniCharPrecompositionTableLength
, combining
))) return 0xFFFD;
131 // We don't have precomposition in non-BMP
132 if (value
& kCFUniCharNonBmpFlag
) {
133 value
= __CFUniCharGetMappedValue_P((const __CFUniCharPrecomposeMappings
*)((uint32_t *)__CFUniCharNonBMPPrecompDestinationTable
+ (value
& 0xFFFF)), (value
>> 16) & 0x7FFF, base
);
135 value
= __CFUniCharGetMappedBMPValue((const __CFUniCharPrecomposeBMPMappings
*)((uint32_t *)__CFUniCharBMPPrecompDestinationTable
+ (value
& 0xFFFF)), (value
>> 16), base
);
137 return (value
? value
: 0xFFFD);
140 #define HANGUL_SBASE 0xAC00
141 #define HANGUL_LBASE 0x1100
142 #define HANGUL_VBASE 0x1161
143 #define HANGUL_TBASE 0x11A7
144 #define HANGUL_SCOUNT 11172
145 #define HANGUL_LCOUNT 19
146 #define HANGUL_VCOUNT 21
147 #define HANGUL_TCOUNT 28
148 #define HANGUL_NCOUNT (HANGUL_VCOUNT * HANGUL_TCOUNT)
150 CF_INLINE
void __CFUniCharMoveBufferFromEnd0(UTF16Char
*convertedChars
, CFIndex length
, CFIndex delta
) {
151 const UTF16Char
*limit
= convertedChars
;
154 convertedChars
+= length
;
155 dstP
= convertedChars
+ delta
;
157 while (convertedChars
> limit
) *(--dstP
) = *(--convertedChars
);
160 bool CFUniCharPrecompose(const UTF16Char
*characters
, CFIndex length
, CFIndex
*consumedLength
, UTF16Char
*precomposed
, CFIndex maxLength
, CFIndex
*filledLength
) {
161 UTF32Char currentChar
= 0, lastChar
= 0, precomposedChar
= 0xFFFD;
162 CFIndex originalLength
= length
, usedLength
= 0;
163 UTF16Char
*currentBase
= precomposed
;
164 uint8_t currentClass
, lastClass
= 0;
165 bool currentBaseIsBMP
= true;
168 if (NULL
== __CFUniCharPrecompSourceTable
) __CFUniCharLoadPrecompositionTable();
171 currentChar
= *(characters
++);
174 if (CFUniCharIsSurrogateHighCharacter(currentChar
) && (length
> 0) && CFUniCharIsSurrogateLowCharacter(*characters
)) {
175 currentChar
= CFUniCharGetLongCharacterForSurrogatePair(currentChar
, *(characters
++));
179 if (lastChar
&& __CFUniCharIsNonBaseCharacter(currentChar
)) {
180 isPrecomposed
= (precomposedChar
== 0xFFFD ? false : true);
181 if (isPrecomposed
) lastChar
= precomposedChar
;
183 currentClass
= (currentChar
> 0xFFFF ? CFUniCharGetUnicodeProperty(currentChar
, kCFUniCharCombiningProperty
) : CFUniCharGetCombiningPropertyForCharacter(currentChar
, __CFUniCharCombiningClassForBMP
));
185 if ((lastClass
== 0) || (currentClass
> lastClass
)) {
186 if ((precomposedChar
= CFUniCharPrecomposeCharacter(lastChar
, currentChar
)) == 0xFFFD) {
187 if (isPrecomposed
) precomposedChar
= lastChar
;
188 lastClass
= currentClass
;
193 if (currentChar
> 0xFFFF) { // Non-BMP
195 if (usedLength
> maxLength
) break;
196 currentChar
-= 0x10000;
197 *(precomposed
++) = (UTF16Char
)((currentChar
>> 10) + 0xD800UL
);
198 *(precomposed
++) = (UTF16Char
)((currentChar
& 0x3FF) + 0xDC00UL
);
201 if (usedLength
> maxLength
) break;
202 *(precomposed
++) = (UTF16Char
)currentChar
;
205 if ((currentChar
>= HANGUL_LBASE
) && (currentChar
< (HANGUL_LBASE
+ 0xFF))) { // Hangul Jamo
206 int8_t lIndex
= currentChar
- HANGUL_LBASE
;
208 if ((length
> 0) && (0 <= lIndex
) && (lIndex
<= HANGUL_LCOUNT
)) {
209 int16_t vIndex
= *characters
- HANGUL_VBASE
;
211 if ((vIndex
>= 0) && (vIndex
<= HANGUL_VCOUNT
)) {
214 ++characters
; --length
;
217 tIndex
= *characters
- HANGUL_TBASE
;
218 if ((tIndex
< 0) || (tIndex
> HANGUL_TCOUNT
)) {
221 ++characters
; --length
;
224 currentChar
= (lIndex
* HANGUL_VCOUNT
+ vIndex
) * HANGUL_TCOUNT
+ tIndex
+ HANGUL_SBASE
;
229 if (precomposedChar
!= 0xFFFD) {
230 if (currentBaseIsBMP
) { // Non-BMP
231 if (lastChar
> 0xFFFF) { // Last char was Non-BMP
233 memmove(currentBase
+ 1, currentBase
+ 2, (precomposed
- (currentBase
+ 2)) * sizeof(UTF16Char
));
235 *(currentBase
) = (UTF16Char
)precomposedChar
;
237 if (lastChar
< 0x10000) { // Last char was BMP
239 if (usedLength
> maxLength
) break;
240 __CFUniCharMoveBufferFromEnd0(currentBase
+ 1, precomposed
- (currentBase
+ 1), 1);
242 precomposedChar
-= 0x10000;
243 *currentBase
= (UTF16Char
)((precomposedChar
>> 10) + 0xD800UL
);
244 *(currentBase
+ 1) = (UTF16Char
)((precomposedChar
& 0x3FF) + 0xDC00UL
);
246 precomposedChar
= 0xFFFD;
248 currentBase
= precomposed
;
250 lastChar
= currentChar
;
253 if (currentChar
> 0xFFFF) { // Non-BMP
255 if (usedLength
> maxLength
) break;
256 currentChar
-= 0x10000;
257 *(precomposed
++) = (UTF16Char
)((currentChar
>> 10) + 0xD800UL
);
258 *(precomposed
++) = (UTF16Char
)((currentChar
& 0x3FF) + 0xDC00UL
);
259 currentBaseIsBMP
= false;
262 if (usedLength
> maxLength
) break;
263 *(precomposed
++) = (UTF16Char
)currentChar
;
264 currentBaseIsBMP
= true;
269 if (precomposedChar
!= 0xFFFD) {
270 if (currentChar
> 0xFFFF) { // Non-BMP
271 if (lastChar
< 0x10000) { // Last char was BMP
273 if (usedLength
> maxLength
) {
274 if (consumedLength
) *consumedLength
= originalLength
- length
;
275 if (filledLength
) *filledLength
= usedLength
;
278 __CFUniCharMoveBufferFromEnd0(currentBase
+ 1, precomposed
- (currentBase
+ 1), 1);
280 precomposedChar
-= 0x10000;
281 *currentBase
= (UTF16Char
)((precomposedChar
>> 10) + 0xD800UL
);
282 *(currentBase
+ 1) = (UTF16Char
)((precomposedChar
& 0x3FF) + 0xDC00UL
);
284 if (lastChar
> 0xFFFF) { // Last char was Non-BMP
286 memmove(currentBase
+ 1, currentBase
+ 2, (precomposed
- (currentBase
+ 2)) * sizeof(UTF16Char
));
288 *(currentBase
) = (UTF16Char
)precomposedChar
;
292 if (consumedLength
) *consumedLength
= originalLength
- length
;
293 if (filledLength
) *filledLength
= usedLength
;
298 #undef __CFUniCharIsNonBaseCharacter