2 * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
23 * @APPLE_LICENSE_HEADER_END@
25 /* CFUnicodePrecomposition.c
26 Copyright 1999-2002, Apple, Inc. All rights reserved.
27 Responsibility: Aki Inoue
36 #include "CFUnicodePrecomposition.h"
37 #include "CFUniCharPrecompData.h"
39 #include <CoreFoundation/CFBase.h>
40 #include <CoreFoundation/CFCharacterSet.h>
41 #include "CFUniChar.h"
42 #include "CFUnicodePrecomposition.h"
43 #include "CFInternal.h"
44 #include "CFUniCharPriv.h"
47 // Canonical Precomposition
49 static const uint32_t __CFUniCharPrecompositionTableLength
= (sizeof(__CFUniCharPrecompSourceTable
) / (sizeof(uint32_t) * 2));
50 CF_EXPORT
uint8_t **CFUniCharCombiningPriorityTable
;
51 CF_EXPORT
uint8_t **CFUniCharCombiningPriorityExtraTable
;
52 CF_EXPORT
uint8_t CFUniCharNumberOfPlanesForCombiningPriority
;
54 CF_EXPORT
uint8_t __CFUniCharGetCombiningPriority(UTF32Char character
) {
55 if (character
< (CFUniCharNumberOfPlanesForCombiningPriority
<< 16)) {
56 uint32_t plane
= character
>> 16;
57 const uint8_t *bitmap
= CFUniCharCombiningPriorityTable
[plane
];
60 uint8_t value
= bitmap
[(character
>> 8) & 0xFF];
63 bitmap
= CFUniCharCombiningPriorityExtraTable
[plane
] + ((value
- 1) * 256);
64 return bitmap
[character
% 256];
71 CF_EXPORT
uint8_t **CFUniCharNonBaseBitmap
;
72 CF_EXPORT
uint8_t CFUniCharNumberOfPlanesForNonBaseBitmap
;
74 CF_INLINE
bool __CFUniCharIsNonBaseCharacter(UTF32Char character
) {
75 if (character
< (CFUniCharNumberOfPlanesForNonBaseBitmap
<< 16)) {
76 const uint8_t *bitmap
= CFUniCharNonBaseBitmap
[character
>> 16];
77 uint8_t value
= bitmap
[(character
>> 8) & 0xFF];
82 bitmap
= bitmap
+ ((value
- 1) * 32) + 256;
83 return (bitmap
[(character
& 0xFF) / 8] & (1 << (character
% 8)) ? true : false);
90 static UTF32Char
*__CFUniCharPrecompSourceTable
= NULL
;
91 static uint32_t __CFUniCharPrecompositionTableLength
= 0;
92 static uint16_t *__CFUniCharBMPPrecompDestinationTable
= NULL
;
93 static uint32_t *__CFUniCharNonBMPPrecompDestinationTable
= NULL
;
95 static const uint8_t *__CFUniCharNonBaseBitmapForBMP_P
= NULL
; // Adding _P so the symbol name is different from the one in CFUnicodeDecomposition.c
96 static const uint8_t *__CFUniCharCombiningClassForBMP
= NULL
;
98 static CFSpinLock_t __CFUniCharPrecompositionTableLock
= 0;
100 static void __CFUniCharLoadPrecompositionTable(void) {
102 __CFSpinLock(&__CFUniCharPrecompositionTableLock
);
104 if (NULL
== __CFUniCharPrecompSourceTable
) {
105 const void *bytes
= CFUniCharGetMappingData(kCFUniCharCanonicalPrecompMapping
);
106 uint32_t bmpMappingLength
;
109 __CFSpinUnlock(&__CFUniCharPrecompositionTableLock
);
113 __CFUniCharPrecompositionTableLength
= *(((uint32_t *)bytes
)++);
114 bmpMappingLength
= *(((uint32_t *)bytes
)++);
115 __CFUniCharPrecompSourceTable
= (UTF32Char
*)bytes
;
116 __CFUniCharBMPPrecompDestinationTable
= (uint16_t *)((intptr_t)bytes
+ (__CFUniCharPrecompositionTableLength
* sizeof(UTF32Char
) * 2));
117 __CFUniCharNonBMPPrecompDestinationTable
= (uint32_t *)(((intptr_t)__CFUniCharBMPPrecompDestinationTable
) + bmpMappingLength
);
119 __CFUniCharNonBaseBitmapForBMP_P
= CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet
, 0);
120 __CFUniCharCombiningClassForBMP
= CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty
, 0);
123 __CFSpinUnlock(&__CFUniCharPrecompositionTableLock
);
126 // Adding _P so the symbol name is different from the one in CFUnicodeDecomposition.c
127 #define __CFUniCharIsNonBaseCharacter __CFUniCharIsNonBaseCharacter_P
128 CF_INLINE
bool __CFUniCharIsNonBaseCharacter(UTF32Char character
) {
129 return CFUniCharIsMemberOfBitmap(character
, (character
< 0x10000 ? __CFUniCharNonBaseBitmapForBMP_P
: CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet
, ((character
>> 16) & 0xFF))));
136 } __CFUniCharPrecomposeBMPMappings
;
138 static UTF16Char
__CFUniCharGetMappedBMPValue(const __CFUniCharPrecomposeBMPMappings
*theTable
, uint32_t numElem
, UTF16Char character
) {
139 const __CFUniCharPrecomposeBMPMappings
*p
, *q
, *divider
;
141 if ((character
< theTable
[0]._key
) || (character
> theTable
[numElem
-1]._key
)) {
147 divider
= p
+ ((q
- p
) >> 1); /* divide by 2 */
148 if (character
< divider
->_key
) { q
= divider
- 1; }
149 else if (character
> divider
->_key
) { p
= divider
+ 1; }
150 else { return divider
->_value
; }
158 } __CFUniCharPrecomposeMappings
;
160 static uint32_t __CFUniCharGetMappedValue_P(const __CFUniCharPrecomposeMappings
*theTable
, uint32_t numElem
, UTF32Char character
) {
161 const __CFUniCharPrecomposeMappings
*p
, *q
, *divider
;
163 if ((character
< theTable
[0]._key
) || (character
> theTable
[numElem
-1]._key
)) {
169 divider
= p
+ ((q
- p
) >> 1); /* divide by 2 */
170 if (character
< divider
->_key
) { q
= divider
- 1; }
171 else if (character
> divider
->_key
) { p
= divider
+ 1; }
172 else { return divider
->_value
; }
180 UTF32Char
CFUniCharPrecomposeCharacter(UTF32Char base
, UTF32Char combining
) {
184 if (NULL
== __CFUniCharPrecompSourceTable
) __CFUniCharLoadPrecompositionTable();
187 if (!(value
= __CFUniCharGetMappedValue_P((const __CFUniCharPrecomposeMappings
*)__CFUniCharPrecompSourceTable
, __CFUniCharPrecompositionTableLength
, combining
))) return 0xFFFD;
190 // We don't have precomposition in non-BMP
191 if (value
& kCFUniCharNonBmpFlag
) {
192 value
= __CFUniCharGetMappedValue_P((const __CFUniCharPrecomposeMappings
*)((uint32_t *)__CFUniCharNonBMPPrecompDestinationTable
+ (value
& 0xFFFF)), (value
>> 16) & 0x7FFF, base
);
195 value
= __CFUniCharGetMappedBMPValue((const __CFUniCharPrecomposeBMPMappings
*)((uint32_t *)__CFUniCharBMPPrecompDestinationTable
+ (value
& 0xFFFF)), (value
>> 16), base
);
199 return (value
? value
: 0xFFFD);
202 #define HANGUL_SBASE 0xAC00
203 #define HANGUL_LBASE 0x1100
204 #define HANGUL_VBASE 0x1161
205 #define HANGUL_TBASE 0x11A7
206 #define HANGUL_SCOUNT 11172
207 #define HANGUL_LCOUNT 19
208 #define HANGUL_VCOUNT 21
209 #define HANGUL_TCOUNT 28
210 #define HANGUL_NCOUNT (HANGUL_VCOUNT * HANGUL_TCOUNT)
212 CF_INLINE
void __CFUniCharMoveBufferFromEnd(UTF16Char
*convertedChars
, uint32_t length
, uint32_t delta
) {
213 const UTF16Char
*limit
= convertedChars
;
216 convertedChars
+= length
;
217 dstP
= convertedChars
+ delta
;
219 while (convertedChars
> limit
) *(--dstP
) = *(--convertedChars
);
222 bool CFUniCharPrecompose(const UTF16Char
*characters
, uint32_t length
, uint32_t *consumedLength
, UTF16Char
*precomposed
, uint32_t maxLength
, uint32_t *filledLength
) {
223 UTF32Char currentChar
= 0, lastChar
= 0, precomposedChar
= 0xFFFD;
224 uint32_t originalLength
= length
, usedLength
= 0;
225 UTF16Char
*currentBase
= precomposed
;
226 uint8_t currentClass
, lastClass
= 0;
227 bool currentBaseIsBMP
= true;
231 if (NULL
== __CFUniCharPrecompSourceTable
) __CFUniCharLoadPrecompositionTable();
235 currentChar
= *(characters
++);
238 if (CFUniCharIsSurrogateHighCharacter(currentChar
) && (length
> 0) && CFUniCharIsSurrogateLowCharacter(*characters
)) {
239 currentChar
= CFUniCharGetLongCharacterForSurrogatePair(currentChar
, *(characters
++));
243 if (lastChar
&& __CFUniCharIsNonBaseCharacter(currentChar
)) {
244 isPrecomposed
= (precomposedChar
== 0xFFFD ? false : true);
245 if (isPrecomposed
) lastChar
= precomposedChar
;
248 currentClass
= __CFUniCharGetCombiningPriority(currentChar
);
250 currentClass
= (currentChar
> 0xFFFF ? CFUniCharGetUnicodeProperty(currentChar
, kCFUniCharCombiningProperty
) : CFUniCharGetCombiningPropertyForCharacter(currentChar
, __CFUniCharCombiningClassForBMP
));
253 if ((lastClass
== 0) || (currentClass
!= lastClass
)) {
254 if ((precomposedChar
= CFUniCharPrecomposeCharacter(lastChar
, currentChar
)) == 0xFFFD) {
255 if (isPrecomposed
) precomposedChar
= lastChar
;
256 lastClass
= currentClass
;
262 if (currentChar
> 0xFFFF) { // Non-BMP
264 if (usedLength
> maxLength
) break;
265 currentChar
-= 0x10000;
266 *(precomposed
++) = (UTF16Char
)((currentChar
>> 10) + 0xD800UL
);
267 *(precomposed
++) = (UTF16Char
)((currentChar
& 0x3FF) + 0xDC00UL
);
270 if (usedLength
> maxLength
) break;
271 *(precomposed
++) = (UTF16Char
)currentChar
;
274 if ((currentChar
>= HANGUL_LBASE
) && (currentChar
< (HANGUL_LBASE
+ 0xFF))) { // Hangul Jamo
275 int8_t lIndex
= currentChar
- HANGUL_LBASE
;
277 if ((length
> 0) && (0 <= lIndex
) && (lIndex
<= HANGUL_LCOUNT
)) {
278 int16_t vIndex
= *characters
- HANGUL_VBASE
;
280 if ((vIndex
>= 0) && (vIndex
<= HANGUL_VCOUNT
)) {
283 ++characters
; --length
;
286 tIndex
= *characters
- HANGUL_TBASE
;
287 if ((tIndex
< 0) || (tIndex
> HANGUL_TCOUNT
)) {
290 ++characters
; --length
;
293 currentChar
= (lIndex
* HANGUL_VCOUNT
+ vIndex
) * HANGUL_TCOUNT
+ tIndex
+ HANGUL_SBASE
;
298 if (precomposedChar
!= 0xFFFD) {
299 if (currentBaseIsBMP
) { // Non-BMP
300 if (lastChar
> 0xFFFF) { // Last char was Non-BMP
302 memmove(currentBase
+ 1, currentBase
+ 2, (precomposed
- (currentBase
+ 2)) * sizeof(UTF16Char
));
304 *(currentBase
) = (UTF16Char
)precomposedChar
;
306 if (lastChar
< 0x10000) { // Last char was BMP
308 if (usedLength
> maxLength
) break;
309 __CFUniCharMoveBufferFromEnd(currentBase
+ 1, precomposed
- (currentBase
+ 1), 1);
311 precomposedChar
-= 0x10000;
312 *currentBase
= (UTF16Char
)((precomposedChar
>> 10) + 0xD800UL
);
313 *(currentBase
+ 1) = (UTF16Char
)((precomposedChar
& 0x3FF) + 0xDC00UL
);
315 precomposedChar
= 0xFFFD;
317 currentBase
= precomposed
;
319 lastChar
= currentChar
;
322 if (currentChar
> 0xFFFF) { // Non-BMP
324 if (usedLength
> maxLength
) break;
325 currentChar
-= 0x10000;
326 *(precomposed
++) = (UTF16Char
)((currentChar
>> 10) + 0xD800UL
);
327 *(precomposed
++) = (UTF16Char
)((currentChar
& 0x3FF) + 0xDC00UL
);
328 currentBaseIsBMP
= false;
331 if (usedLength
> maxLength
) break;
332 *(precomposed
++) = (UTF16Char
)currentChar
;
333 currentBaseIsBMP
= true;
338 if (precomposedChar
!= 0xFFFD) {
339 if (currentChar
> 0xFFFF) { // Non-BMP
340 if (lastChar
< 0x10000) { // Last char was BMP
342 if (usedLength
> maxLength
) {
343 if (consumedLength
) *consumedLength
= originalLength
- length
;
344 if (filledLength
) *filledLength
= usedLength
;
347 __CFUniCharMoveBufferFromEnd(currentBase
+ 1, precomposed
- (currentBase
+ 1), 1);
349 precomposedChar
-= 0x10000;
350 *currentBase
= (UTF16Char
)((precomposedChar
>> 10) + 0xD800UL
);
351 *(currentBase
+ 1) = (UTF16Char
)((precomposedChar
& 0x3FF) + 0xDC00UL
);
353 if (lastChar
> 0xFFFF) { // Last char was Non-BMP
355 memmove(currentBase
+ 1, currentBase
+ 2, (precomposed
- (currentBase
+ 2)) * sizeof(UTF16Char
));
357 *(currentBase
) = (UTF16Char
)precomposedChar
;
361 if (consumedLength
) *consumedLength
= originalLength
- length
;
362 if (filledLength
) *filledLength
= usedLength
;