2 * Copyright (c) 2005 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
23 /* CFUnicodePrecomposition.c
24 Copyright 1999-2002, Apple, Inc. All rights reserved.
25 Responsibility: Aki Inoue
29 #include <CoreFoundation/CFBase.h>
30 #include <CoreFoundation/CFCharacterSet.h>
31 #include "CFUniChar.h"
32 #include "CFUnicodePrecomposition.h"
33 #include "CFInternal.h"
34 #include "CFUniCharPriv.h"
36 // Canonical Precomposition
37 static UTF32Char
*__CFUniCharPrecompSourceTable
= NULL
;
38 static uint32_t __CFUniCharPrecompositionTableLength
= 0;
39 static uint16_t *__CFUniCharBMPPrecompDestinationTable
= NULL
;
40 static uint32_t *__CFUniCharNonBMPPrecompDestinationTable
= NULL
;
42 static const uint8_t *__CFUniCharNonBaseBitmapForBMP_P
= NULL
; // Adding _P so the symbol name is different from the one in CFUnicodeDecomposition.c
43 static const uint8_t *__CFUniCharCombiningClassForBMP
= NULL
;
45 static CFSpinLock_t __CFUniCharPrecompositionTableLock
= 0;
47 static void __CFUniCharLoadPrecompositionTable(void) {
49 __CFSpinLock(&__CFUniCharPrecompositionTableLock
);
51 if (NULL
== __CFUniCharPrecompSourceTable
) {
52 const void *bytes
= CFUniCharGetMappingData(kCFUniCharCanonicalPrecompMapping
);
53 uint32_t bmpMappingLength
;
56 __CFSpinUnlock(&__CFUniCharPrecompositionTableLock
);
60 __CFUniCharPrecompositionTableLength
= *(((uint32_t *)bytes
)++);
61 bmpMappingLength
= *(((uint32_t *)bytes
)++);
62 __CFUniCharPrecompSourceTable
= (UTF32Char
*)bytes
;
63 __CFUniCharBMPPrecompDestinationTable
= (uint16_t *)((intptr_t)bytes
+ (__CFUniCharPrecompositionTableLength
* sizeof(UTF32Char
) * 2));
64 __CFUniCharNonBMPPrecompDestinationTable
= (uint32_t *)(((intptr_t)__CFUniCharBMPPrecompDestinationTable
) + bmpMappingLength
);
66 __CFUniCharNonBaseBitmapForBMP_P
= CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet
, 0);
67 __CFUniCharCombiningClassForBMP
= CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty
, 0);
70 __CFSpinUnlock(&__CFUniCharPrecompositionTableLock
);
73 // Adding _P so the symbol name is different from the one in CFUnicodeDecomposition.c
74 #define __CFUniCharIsNonBaseCharacter __CFUniCharIsNonBaseCharacter_P
75 CF_INLINE
bool __CFUniCharIsNonBaseCharacter(UTF32Char character
) {
76 return CFUniCharIsMemberOfBitmap(character
, (character
< 0x10000 ? __CFUniCharNonBaseBitmapForBMP_P
: CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet
, ((character
>> 16) & 0xFF))));
82 } __CFUniCharPrecomposeBMPMappings
;
84 static UTF16Char
__CFUniCharGetMappedBMPValue(const __CFUniCharPrecomposeBMPMappings
*theTable
, uint32_t numElem
, UTF16Char character
) {
85 const __CFUniCharPrecomposeBMPMappings
*p
, *q
, *divider
;
87 if ((character
< theTable
[0]._key
) || (character
> theTable
[numElem
-1]._key
)) {
93 divider
= p
+ ((q
- p
) >> 1); /* divide by 2 */
94 if (character
< divider
->_key
) { q
= divider
- 1; }
95 else if (character
> divider
->_key
) { p
= divider
+ 1; }
96 else { return divider
->_value
; }
104 } __CFUniCharPrecomposeMappings
;
106 static uint32_t __CFUniCharGetMappedValue_P(const __CFUniCharPrecomposeMappings
*theTable
, uint32_t numElem
, UTF32Char character
) {
107 const __CFUniCharPrecomposeMappings
*p
, *q
, *divider
;
109 if ((character
< theTable
[0]._key
) || (character
> theTable
[numElem
-1]._key
)) {
115 divider
= p
+ ((q
- p
) >> 1); /* divide by 2 */
116 if (character
< divider
->_key
) { q
= divider
- 1; }
117 else if (character
> divider
->_key
) { p
= divider
+ 1; }
118 else { return divider
->_value
; }
124 UTF32Char
CFUniCharPrecomposeCharacter(UTF32Char base
, UTF32Char combining
) {
127 if (NULL
== __CFUniCharPrecompSourceTable
) __CFUniCharLoadPrecompositionTable();
129 if (!(value
= __CFUniCharGetMappedValue_P((const __CFUniCharPrecomposeMappings
*)__CFUniCharPrecompSourceTable
, __CFUniCharPrecompositionTableLength
, combining
))) return 0xFFFD;
131 // We don't have precomposition in non-BMP
132 if (value
& kCFUniCharNonBmpFlag
) {
133 value
= __CFUniCharGetMappedValue_P((const __CFUniCharPrecomposeMappings
*)((uint32_t *)__CFUniCharNonBMPPrecompDestinationTable
+ (value
& 0xFFFF)), (value
>> 16) & 0x7FFF, base
);
135 value
= __CFUniCharGetMappedBMPValue((const __CFUniCharPrecomposeBMPMappings
*)((uint32_t *)__CFUniCharBMPPrecompDestinationTable
+ (value
& 0xFFFF)), (value
>> 16), base
);
137 return (value
? value
: 0xFFFD);
140 #define HANGUL_SBASE 0xAC00
141 #define HANGUL_LBASE 0x1100
142 #define HANGUL_VBASE 0x1161
143 #define HANGUL_TBASE 0x11A7
144 #define HANGUL_SCOUNT 11172
145 #define HANGUL_LCOUNT 19
146 #define HANGUL_VCOUNT 21
147 #define HANGUL_TCOUNT 28
148 #define HANGUL_NCOUNT (HANGUL_VCOUNT * HANGUL_TCOUNT)
150 CF_INLINE
void __CFUniCharMoveBufferFromEnd(UTF16Char
*convertedChars
, uint32_t length
, uint32_t delta
) {
151 const UTF16Char
*limit
= convertedChars
;
154 convertedChars
+= length
;
155 dstP
= convertedChars
+ delta
;
157 while (convertedChars
> limit
) *(--dstP
) = *(--convertedChars
);
160 bool CFUniCharPrecompose(const UTF16Char
*characters
, uint32_t length
, uint32_t *consumedLength
, UTF16Char
*precomposed
, uint32_t maxLength
, uint32_t *filledLength
) {
161 UTF32Char currentChar
= 0, lastChar
= 0, precomposedChar
= 0xFFFD;
162 uint32_t originalLength
= length
, usedLength
= 0;
163 UTF16Char
*currentBase
= precomposed
;
164 uint8_t currentClass
, lastClass
= 0;
165 bool currentBaseIsBMP
= true;
168 if (NULL
== __CFUniCharPrecompSourceTable
) __CFUniCharLoadPrecompositionTable();
171 currentChar
= *(characters
++);
174 if (CFUniCharIsSurrogateHighCharacter(currentChar
) && (length
> 0) && CFUniCharIsSurrogateLowCharacter(*characters
)) {
175 currentChar
= CFUniCharGetLongCharacterForSurrogatePair(currentChar
, *(characters
++));
179 if (lastChar
&& __CFUniCharIsNonBaseCharacter(currentChar
)) {
180 isPrecomposed
= (precomposedChar
== 0xFFFD ? false : true);
181 if (isPrecomposed
) lastChar
= precomposedChar
;
183 currentClass
= (currentChar
> 0xFFFF ? CFUniCharGetUnicodeProperty(currentChar
, kCFUniCharCombiningProperty
) : CFUniCharGetCombiningPropertyForCharacter(currentChar
, __CFUniCharCombiningClassForBMP
));
185 if ((lastClass
== 0) || (currentClass
!= lastClass
)) {
186 if ((precomposedChar
= CFUniCharPrecomposeCharacter(lastChar
, currentChar
)) == 0xFFFD) {
187 if (isPrecomposed
) precomposedChar
= lastChar
;
188 lastClass
= currentClass
;
194 if (currentChar
> 0xFFFF) { // Non-BMP
196 if (usedLength
> maxLength
) break;
197 currentChar
-= 0x10000;
198 *(precomposed
++) = (UTF16Char
)((currentChar
>> 10) + 0xD800UL
);
199 *(precomposed
++) = (UTF16Char
)((currentChar
& 0x3FF) + 0xDC00UL
);
202 if (usedLength
> maxLength
) break;
203 *(precomposed
++) = (UTF16Char
)currentChar
;
206 if ((currentChar
>= HANGUL_LBASE
) && (currentChar
< (HANGUL_LBASE
+ 0xFF))) { // Hangul Jamo
207 int8_t lIndex
= currentChar
- HANGUL_LBASE
;
209 if ((length
> 0) && (0 <= lIndex
) && (lIndex
<= HANGUL_LCOUNT
)) {
210 int16_t vIndex
= *characters
- HANGUL_VBASE
;
212 if ((vIndex
>= 0) && (vIndex
<= HANGUL_VCOUNT
)) {
215 ++characters
; --length
;
218 tIndex
= *characters
- HANGUL_TBASE
;
219 if ((tIndex
< 0) || (tIndex
> HANGUL_TCOUNT
)) {
222 ++characters
; --length
;
225 currentChar
= (lIndex
* HANGUL_VCOUNT
+ vIndex
) * HANGUL_TCOUNT
+ tIndex
+ HANGUL_SBASE
;
230 if (precomposedChar
!= 0xFFFD) {
231 if (currentBaseIsBMP
) { // Non-BMP
232 if (lastChar
> 0xFFFF) { // Last char was Non-BMP
234 memmove(currentBase
+ 1, currentBase
+ 2, (precomposed
- (currentBase
+ 2)) * sizeof(UTF16Char
));
236 *(currentBase
) = (UTF16Char
)precomposedChar
;
238 if (lastChar
< 0x10000) { // Last char was BMP
240 if (usedLength
> maxLength
) break;
241 __CFUniCharMoveBufferFromEnd(currentBase
+ 1, precomposed
- (currentBase
+ 1), 1);
243 precomposedChar
-= 0x10000;
244 *currentBase
= (UTF16Char
)((precomposedChar
>> 10) + 0xD800UL
);
245 *(currentBase
+ 1) = (UTF16Char
)((precomposedChar
& 0x3FF) + 0xDC00UL
);
247 precomposedChar
= 0xFFFD;
249 currentBase
= precomposed
;
251 lastChar
= currentChar
;
254 if (currentChar
> 0xFFFF) { // Non-BMP
256 if (usedLength
> maxLength
) break;
257 currentChar
-= 0x10000;
258 *(precomposed
++) = (UTF16Char
)((currentChar
>> 10) + 0xD800UL
);
259 *(precomposed
++) = (UTF16Char
)((currentChar
& 0x3FF) + 0xDC00UL
);
260 currentBaseIsBMP
= false;
263 if (usedLength
> maxLength
) break;
264 *(precomposed
++) = (UTF16Char
)currentChar
;
265 currentBaseIsBMP
= true;
270 if (precomposedChar
!= 0xFFFD) {
271 if (currentChar
> 0xFFFF) { // Non-BMP
272 if (lastChar
< 0x10000) { // Last char was BMP
274 if (usedLength
> maxLength
) {
275 if (consumedLength
) *consumedLength
= originalLength
- length
;
276 if (filledLength
) *filledLength
= usedLength
;
279 __CFUniCharMoveBufferFromEnd(currentBase
+ 1, precomposed
- (currentBase
+ 1), 1);
281 precomposedChar
-= 0x10000;
282 *currentBase
= (UTF16Char
)((precomposedChar
>> 10) + 0xD800UL
);
283 *(currentBase
+ 1) = (UTF16Char
)((precomposedChar
& 0x3FF) + 0xDC00UL
);
285 if (lastChar
> 0xFFFF) { // Last char was Non-BMP
287 memmove(currentBase
+ 1, currentBase
+ 2, (precomposed
- (currentBase
+ 2)) * sizeof(UTF16Char
));
289 *(currentBase
) = (UTF16Char
)precomposedChar
;
293 if (consumedLength
) *consumedLength
= originalLength
- length
;
294 if (filledLength
) *filledLength
= usedLength
;