2 * Copyright (c) 2014 Apple Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
24 /* CFUnicodePrecomposition.c
25 Copyright (c) 1999-2014, Apple Inc. All rights reserved.
26 Responsibility: Aki Inoue
30 #include <CoreFoundation/CFBase.h>
31 #include <CoreFoundation/CFCharacterSet.h>
32 #include "CFUniChar.h"
33 #include "CFUnicodePrecomposition.h"
34 #include "CFInternal.h"
35 #include "CFUniCharPriv.h"
37 // Canonical Precomposition
38 static UTF32Char
*__CFUniCharPrecompSourceTable
= NULL
;
39 static uint32_t __CFUniCharPrecompositionTableLength
= 0;
40 static uint16_t *__CFUniCharBMPPrecompDestinationTable
= NULL
;
41 static uint32_t *__CFUniCharNonBMPPrecompDestinationTable
= NULL
;
43 static const uint8_t *__CFUniCharNonBaseBitmapForBMP_P
= NULL
; // Adding _P so the symbol name is different from the one in CFUnicodeDecomposition.c
44 static const uint8_t *__CFUniCharCombiningClassForBMP
= NULL
;
46 static CFLock_t __CFUniCharPrecompositionTableLock
= CFLockInit
;
48 static void __CFUniCharLoadPrecompositionTable(void) {
50 __CFLock(&__CFUniCharPrecompositionTableLock
);
52 if (NULL
== __CFUniCharPrecompSourceTable
) {
53 const uint32_t *bytes
= (const uint32_t *)CFUniCharGetMappingData(kCFUniCharCanonicalPrecompMapping
);
54 uint32_t bmpMappingLength
;
57 __CFUnlock(&__CFUniCharPrecompositionTableLock
);
61 __CFUniCharPrecompositionTableLength
= *(bytes
++);
62 bmpMappingLength
= *(bytes
++);
63 __CFUniCharPrecompSourceTable
= (UTF32Char
*)bytes
;
64 __CFUniCharBMPPrecompDestinationTable
= (uint16_t *)((intptr_t)bytes
+ (__CFUniCharPrecompositionTableLength
* sizeof(UTF32Char
) * 2));
65 __CFUniCharNonBMPPrecompDestinationTable
= (uint32_t *)(((intptr_t)__CFUniCharBMPPrecompDestinationTable
) + bmpMappingLength
);
67 __CFUniCharNonBaseBitmapForBMP_P
= CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet
, 0);
68 __CFUniCharCombiningClassForBMP
= (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty
, 0);
71 __CFUnlock(&__CFUniCharPrecompositionTableLock
);
74 // Adding _P so the symbol name is different from the one in CFUnicodeDecomposition.c
75 #define __CFUniCharIsNonBaseCharacter __CFUniCharIsNonBaseCharacter_P
76 CF_INLINE
bool __CFUniCharIsNonBaseCharacter(UTF32Char character
) {
77 return CFUniCharIsMemberOfBitmap(character
, (character
< 0x10000 ? __CFUniCharNonBaseBitmapForBMP_P
: CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet
, ((character
>> 16) & 0xFF))));
83 } __CFUniCharPrecomposeBMPMappings
;
85 static UTF16Char
__CFUniCharGetMappedBMPValue(const __CFUniCharPrecomposeBMPMappings
*theTable
, uint32_t numElem
, UTF16Char character
) {
86 const __CFUniCharPrecomposeBMPMappings
*p
, *q
, *divider
;
88 if ((character
< theTable
[0]._key
) || (character
> theTable
[numElem
-1]._key
)) {
94 divider
= p
+ ((q
- p
) >> 1); /* divide by 2 */
95 if (character
< divider
->_key
) { q
= divider
- 1; }
96 else if (character
> divider
->_key
) { p
= divider
+ 1; }
97 else { return divider
->_value
; }
105 } __CFUniCharPrecomposeMappings
;
107 static uint32_t __CFUniCharGetMappedValue_P(const __CFUniCharPrecomposeMappings
*theTable
, uint32_t numElem
, UTF32Char character
) {
108 const __CFUniCharPrecomposeMappings
*p
, *q
, *divider
;
110 if ((character
< theTable
[0]._key
) || (character
> theTable
[numElem
-1]._key
)) {
116 divider
= p
+ ((q
- p
) >> 1); /* divide by 2 */
117 if (character
< divider
->_key
) { q
= divider
- 1; }
118 else if (character
> divider
->_key
) { p
= divider
+ 1; }
119 else { return divider
->_value
; }
125 UTF32Char
CFUniCharPrecomposeCharacter(UTF32Char base
, UTF32Char combining
) {
128 if (NULL
== __CFUniCharPrecompSourceTable
) __CFUniCharLoadPrecompositionTable();
130 if (!(value
= __CFUniCharGetMappedValue_P((const __CFUniCharPrecomposeMappings
*)__CFUniCharPrecompSourceTable
, __CFUniCharPrecompositionTableLength
, combining
))) return 0xFFFD;
132 // We don't have precomposition in non-BMP
133 if (value
& kCFUniCharNonBmpFlag
) {
134 value
= __CFUniCharGetMappedValue_P((const __CFUniCharPrecomposeMappings
*)((uint32_t *)__CFUniCharNonBMPPrecompDestinationTable
+ (value
& 0xFFFF)), (value
>> 16) & 0x7FFF, base
);
136 value
= __CFUniCharGetMappedBMPValue((const __CFUniCharPrecomposeBMPMappings
*)((uint32_t *)__CFUniCharBMPPrecompDestinationTable
+ (value
& 0xFFFF)), (value
>> 16), base
);
138 return (value
? value
: 0xFFFD);
141 #define HANGUL_SBASE 0xAC00
142 #define HANGUL_LBASE 0x1100
143 #define HANGUL_VBASE 0x1161
144 #define HANGUL_TBASE 0x11A7
145 #define HANGUL_SCOUNT 11172
146 #define HANGUL_LCOUNT 19
147 #define HANGUL_VCOUNT 21
148 #define HANGUL_TCOUNT 28
149 #define HANGUL_NCOUNT (HANGUL_VCOUNT * HANGUL_TCOUNT)
151 CF_INLINE
void __CFUniCharMoveBufferFromEnd0(UTF16Char
*convertedChars
, CFIndex length
, CFIndex delta
) {
152 const UTF16Char
*limit
= convertedChars
;
155 convertedChars
+= length
;
156 dstP
= convertedChars
+ delta
;
158 while (convertedChars
> limit
) *(--dstP
) = *(--convertedChars
);
161 bool CFUniCharPrecompose(const UTF16Char
*characters
, CFIndex length
, CFIndex
*consumedLength
, UTF16Char
*precomposed
, CFIndex maxLength
, CFIndex
*filledLength
) {
162 UTF32Char currentChar
= 0, lastChar
= 0, precomposedChar
= 0xFFFD;
163 CFIndex originalLength
= length
, usedLength
= 0;
164 UTF16Char
*currentBase
= precomposed
;
165 uint8_t currentClass
, lastClass
= 0;
166 bool currentBaseIsBMP
= true;
169 if (NULL
== __CFUniCharPrecompSourceTable
) __CFUniCharLoadPrecompositionTable();
172 currentChar
= *(characters
++);
175 if (CFUniCharIsSurrogateHighCharacter(currentChar
) && (length
> 0) && CFUniCharIsSurrogateLowCharacter(*characters
)) {
176 currentChar
= CFUniCharGetLongCharacterForSurrogatePair(currentChar
, *(characters
++));
180 if (lastChar
&& __CFUniCharIsNonBaseCharacter(currentChar
)) {
181 isPrecomposed
= (precomposedChar
== 0xFFFD ? false : true);
182 if (isPrecomposed
) lastChar
= precomposedChar
;
184 currentClass
= (currentChar
> 0xFFFF ? CFUniCharGetUnicodeProperty(currentChar
, kCFUniCharCombiningProperty
) : CFUniCharGetCombiningPropertyForCharacter(currentChar
, __CFUniCharCombiningClassForBMP
));
186 if ((lastClass
== 0) || (currentClass
> lastClass
)) {
187 if ((precomposedChar
= CFUniCharPrecomposeCharacter(lastChar
, currentChar
)) == 0xFFFD) {
188 if (isPrecomposed
) precomposedChar
= lastChar
;
189 lastClass
= currentClass
;
194 if (currentChar
> 0xFFFF) { // Non-BMP
196 if (usedLength
> maxLength
) break;
197 currentChar
-= 0x10000;
198 *(precomposed
++) = (UTF16Char
)((currentChar
>> 10) + 0xD800UL
);
199 *(precomposed
++) = (UTF16Char
)((currentChar
& 0x3FF) + 0xDC00UL
);
202 if (usedLength
> maxLength
) break;
203 *(precomposed
++) = (UTF16Char
)currentChar
;
206 if ((currentChar
>= HANGUL_LBASE
) && (currentChar
< (HANGUL_LBASE
+ 0xFF))) { // Hangul Jamo
207 int8_t lIndex
= currentChar
- HANGUL_LBASE
;
209 if ((length
> 0) && (0 <= lIndex
) && (lIndex
<= HANGUL_LCOUNT
)) {
210 int16_t vIndex
= *characters
- HANGUL_VBASE
;
212 if ((vIndex
>= 0) && (vIndex
<= HANGUL_VCOUNT
)) {
215 ++characters
; --length
;
218 tIndex
= *characters
- HANGUL_TBASE
;
219 if ((tIndex
< 0) || (tIndex
> HANGUL_TCOUNT
)) {
222 ++characters
; --length
;
225 currentChar
= (lIndex
* HANGUL_VCOUNT
+ vIndex
) * HANGUL_TCOUNT
+ tIndex
+ HANGUL_SBASE
;
230 if (precomposedChar
!= 0xFFFD) {
231 if (currentBaseIsBMP
) { // Non-BMP
232 if (lastChar
> 0xFFFF) { // Last char was Non-BMP
234 memmove(currentBase
+ 1, currentBase
+ 2, (precomposed
- (currentBase
+ 2)) * sizeof(UTF16Char
));
236 *(currentBase
) = (UTF16Char
)precomposedChar
;
238 if (lastChar
< 0x10000) { // Last char was BMP
240 if (usedLength
> maxLength
) break;
241 __CFUniCharMoveBufferFromEnd0(currentBase
+ 1, precomposed
- (currentBase
+ 1), 1);
243 precomposedChar
-= 0x10000;
244 *currentBase
= (UTF16Char
)((precomposedChar
>> 10) + 0xD800UL
);
245 *(currentBase
+ 1) = (UTF16Char
)((precomposedChar
& 0x3FF) + 0xDC00UL
);
247 precomposedChar
= 0xFFFD;
249 currentBase
= precomposed
;
251 lastChar
= currentChar
;
254 if (currentChar
> 0xFFFF) { // Non-BMP
256 if (usedLength
> maxLength
) break;
257 currentChar
-= 0x10000;
258 *(precomposed
++) = (UTF16Char
)((currentChar
>> 10) + 0xD800UL
);
259 *(precomposed
++) = (UTF16Char
)((currentChar
& 0x3FF) + 0xDC00UL
);
260 currentBaseIsBMP
= false;
263 if (usedLength
> maxLength
) break;
264 *(precomposed
++) = (UTF16Char
)currentChar
;
265 currentBaseIsBMP
= true;
270 if (precomposedChar
!= 0xFFFD) {
271 if (currentChar
> 0xFFFF) { // Non-BMP
272 if (lastChar
< 0x10000) { // Last char was BMP
274 if (usedLength
> maxLength
) {
275 if (consumedLength
) *consumedLength
= originalLength
- length
;
276 if (filledLength
) *filledLength
= usedLength
;
279 __CFUniCharMoveBufferFromEnd0(currentBase
+ 1, precomposed
- (currentBase
+ 1), 1);
281 precomposedChar
-= 0x10000;
282 *currentBase
= (UTF16Char
)((precomposedChar
>> 10) + 0xD800UL
);
283 *(currentBase
+ 1) = (UTF16Char
)((precomposedChar
& 0x3FF) + 0xDC00UL
);
285 if (lastChar
> 0xFFFF) { // Last char was Non-BMP
287 memmove(currentBase
+ 1, currentBase
+ 2, (precomposed
- (currentBase
+ 2)) * sizeof(UTF16Char
));
289 *(currentBase
) = (UTF16Char
)precomposedChar
;
293 if (consumedLength
) *consumedLength
= originalLength
- length
;
294 if (filledLength
) *filledLength
= usedLength
;
299 #undef __CFUniCharIsNonBaseCharacter