]> git.saurik.com Git - apple/cf.git/blob - CFUnicodePrecomposition.c
CF-635.tar.gz
[apple/cf.git] / CFUnicodePrecomposition.c
1 /*
2 * Copyright (c) 2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /* CFUnicodePrecomposition.c
25 Copyright (c) 1999-2011, Apple Inc. All rights reserved.
26 Responsibility: Aki Inoue
27 */
28
29 #include <string.h>
30 #include <CoreFoundation/CFBase.h>
31 #include <CoreFoundation/CFCharacterSet.h>
32 #include "CFUniChar.h"
33 #include "CFUnicodePrecomposition.h"
34 #include "CFInternal.h"
35 #include "CFUniCharPriv.h"
36
37 // Canonical Precomposition
38 static UTF32Char *__CFUniCharPrecompSourceTable = NULL;
39 static uint32_t __CFUniCharPrecompositionTableLength = 0;
40 static uint16_t *__CFUniCharBMPPrecompDestinationTable = NULL;
41 static uint32_t *__CFUniCharNonBMPPrecompDestinationTable = NULL;
42
43 static const uint8_t *__CFUniCharNonBaseBitmapForBMP_P = NULL; // Adding _P so the symbol name is different from the one in CFUnicodeDecomposition.c
44 static const uint8_t *__CFUniCharCombiningClassForBMP = NULL;
45
46 static CFSpinLock_t __CFUniCharPrecompositionTableLock = CFSpinLockInit;
47
48 static void __CFUniCharLoadPrecompositionTable(void) {
49
50 __CFSpinLock(&__CFUniCharPrecompositionTableLock);
51
52 if (NULL == __CFUniCharPrecompSourceTable) {
53 const uint32_t *bytes = (const uint32_t *)CFUniCharGetMappingData(kCFUniCharCanonicalPrecompMapping);
54 uint32_t bmpMappingLength;
55
56 if (NULL == bytes) {
57 __CFSpinUnlock(&__CFUniCharPrecompositionTableLock);
58 return;
59 }
60
61 __CFUniCharPrecompositionTableLength = *(bytes++);
62 bmpMappingLength = *(bytes++);
63 __CFUniCharPrecompSourceTable = (UTF32Char *)bytes;
64 __CFUniCharBMPPrecompDestinationTable = (uint16_t *)((intptr_t)bytes + (__CFUniCharPrecompositionTableLength * sizeof(UTF32Char) * 2));
65 __CFUniCharNonBMPPrecompDestinationTable = (uint32_t *)(((intptr_t)__CFUniCharBMPPrecompDestinationTable) + bmpMappingLength);
66
67 __CFUniCharNonBaseBitmapForBMP_P = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, 0);
68 __CFUniCharCombiningClassForBMP = (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0);
69 }
70
71 __CFSpinUnlock(&__CFUniCharPrecompositionTableLock);
72 }
73
74 // Adding _P so the symbol name is different from the one in CFUnicodeDecomposition.c
75 #define __CFUniCharIsNonBaseCharacter __CFUniCharIsNonBaseCharacter_P
76 CF_INLINE bool __CFUniCharIsNonBaseCharacter(UTF32Char character) {
77 return CFUniCharIsMemberOfBitmap(character, (character < 0x10000 ? __CFUniCharNonBaseBitmapForBMP_P : CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, ((character >> 16) & 0xFF))));
78 }
79
80 typedef struct {
81 UTF16Char _key;
82 UTF16Char _value;
83 } __CFUniCharPrecomposeBMPMappings;
84
85 static UTF16Char __CFUniCharGetMappedBMPValue(const __CFUniCharPrecomposeBMPMappings *theTable, uint32_t numElem, UTF16Char character) {
86 const __CFUniCharPrecomposeBMPMappings *p, *q, *divider;
87
88 if ((character < theTable[0]._key) || (character > theTable[numElem-1]._key)) {
89 return 0;
90 }
91 p = theTable;
92 q = p + (numElem-1);
93 while (p <= q) {
94 divider = p + ((q - p) >> 1); /* divide by 2 */
95 if (character < divider->_key) { q = divider - 1; }
96 else if (character > divider->_key) { p = divider + 1; }
97 else { return divider->_value; }
98 }
99 return 0;
100 }
101
102 typedef struct {
103 UTF32Char _key;
104 uint32_t _value;
105 } __CFUniCharPrecomposeMappings;
106
107 static uint32_t __CFUniCharGetMappedValue_P(const __CFUniCharPrecomposeMappings *theTable, uint32_t numElem, UTF32Char character) {
108 const __CFUniCharPrecomposeMappings *p, *q, *divider;
109
110 if ((character < theTable[0]._key) || (character > theTable[numElem-1]._key)) {
111 return 0;
112 }
113 p = theTable;
114 q = p + (numElem-1);
115 while (p <= q) {
116 divider = p + ((q - p) >> 1); /* divide by 2 */
117 if (character < divider->_key) { q = divider - 1; }
118 else if (character > divider->_key) { p = divider + 1; }
119 else { return divider->_value; }
120 }
121 return 0;
122 }
123
124 __private_extern__
125 UTF32Char CFUniCharPrecomposeCharacter(UTF32Char base, UTF32Char combining) {
126 uint32_t value;
127
128 if (NULL == __CFUniCharPrecompSourceTable) __CFUniCharLoadPrecompositionTable();
129
130 if (!(value = __CFUniCharGetMappedValue_P((const __CFUniCharPrecomposeMappings *)__CFUniCharPrecompSourceTable, __CFUniCharPrecompositionTableLength, combining))) return 0xFFFD;
131
132 // We don't have precomposition in non-BMP
133 if (value & kCFUniCharNonBmpFlag) {
134 value = __CFUniCharGetMappedValue_P((const __CFUniCharPrecomposeMappings *)((uint32_t *)__CFUniCharNonBMPPrecompDestinationTable + (value & 0xFFFF)), (value >> 16) & 0x7FFF, base);
135 } else {
136 value = __CFUniCharGetMappedBMPValue((const __CFUniCharPrecomposeBMPMappings *)((uint32_t *)__CFUniCharBMPPrecompDestinationTable + (value & 0xFFFF)), (value >> 16), base);
137 }
138 return (value ? value : 0xFFFD);
139 }
140
141 #define HANGUL_SBASE 0xAC00
142 #define HANGUL_LBASE 0x1100
143 #define HANGUL_VBASE 0x1161
144 #define HANGUL_TBASE 0x11A7
145 #define HANGUL_SCOUNT 11172
146 #define HANGUL_LCOUNT 19
147 #define HANGUL_VCOUNT 21
148 #define HANGUL_TCOUNT 28
149 #define HANGUL_NCOUNT (HANGUL_VCOUNT * HANGUL_TCOUNT)
150
151 CF_INLINE void __CFUniCharMoveBufferFromEnd0(UTF16Char *convertedChars, CFIndex length, CFIndex delta) {
152 const UTF16Char *limit = convertedChars;
153 UTF16Char *dstP;
154
155 convertedChars += length;
156 dstP = convertedChars + delta;
157
158 while (convertedChars > limit) *(--dstP) = *(--convertedChars);
159 }
160
161 bool CFUniCharPrecompose(const UTF16Char *characters, CFIndex length, CFIndex *consumedLength, UTF16Char *precomposed, CFIndex maxLength, CFIndex *filledLength) {
162 UTF32Char currentChar = 0, lastChar = 0, precomposedChar = 0xFFFD;
163 CFIndex originalLength = length, usedLength = 0;
164 UTF16Char *currentBase = precomposed;
165 uint8_t currentClass, lastClass = 0;
166 bool currentBaseIsBMP = true;
167 bool isPrecomposed;
168
169 if (NULL == __CFUniCharPrecompSourceTable) __CFUniCharLoadPrecompositionTable();
170
171 while (length > 0) {
172 currentChar = *(characters++);
173 --length;
174
175 if (CFUniCharIsSurrogateHighCharacter(currentChar) && (length > 0) && CFUniCharIsSurrogateLowCharacter(*characters)) {
176 currentChar = CFUniCharGetLongCharacterForSurrogatePair(currentChar, *(characters++));
177 --length;
178 }
179
180 if (lastChar && __CFUniCharIsNonBaseCharacter(currentChar)) {
181 isPrecomposed = (precomposedChar == 0xFFFD ? false : true);
182 if (isPrecomposed) lastChar = precomposedChar;
183
184 currentClass = (currentChar > 0xFFFF ? CFUniCharGetUnicodeProperty(currentChar, kCFUniCharCombiningProperty) : CFUniCharGetCombiningPropertyForCharacter(currentChar, __CFUniCharCombiningClassForBMP));
185
186 if ((lastClass == 0) || (currentClass > lastClass)) {
187 if ((precomposedChar = CFUniCharPrecomposeCharacter(lastChar, currentChar)) == 0xFFFD) {
188 if (isPrecomposed) precomposedChar = lastChar;
189 lastClass = currentClass;
190 } else {
191 continue;
192 }
193 }
194 if (currentChar > 0xFFFF) { // Non-BMP
195 usedLength += 2;
196 if (usedLength > maxLength) break;
197 currentChar -= 0x10000;
198 *(precomposed++) = (UTF16Char)((currentChar >> 10) + 0xD800UL);
199 *(precomposed++) = (UTF16Char)((currentChar & 0x3FF) + 0xDC00UL);
200 } else {
201 ++usedLength;
202 if (usedLength > maxLength) break;
203 *(precomposed++) = (UTF16Char)currentChar;
204 }
205 } else {
206 if ((currentChar >= HANGUL_LBASE) && (currentChar < (HANGUL_LBASE + 0xFF))) { // Hangul Jamo
207 int8_t lIndex = currentChar - HANGUL_LBASE;
208
209 if ((length > 0) && (0 <= lIndex) && (lIndex <= HANGUL_LCOUNT)) {
210 int16_t vIndex = *characters - HANGUL_VBASE;
211
212 if ((vIndex >= 0) && (vIndex <= HANGUL_VCOUNT)) {
213 int16_t tIndex = 0;
214
215 ++characters; --length;
216
217 if (length > 0) {
218 tIndex = *characters - HANGUL_TBASE;
219 if ((tIndex < 0) || (tIndex > HANGUL_TCOUNT)) {
220 tIndex = 0;
221 } else {
222 ++characters; --length;
223 }
224 }
225 currentChar = (lIndex * HANGUL_VCOUNT + vIndex) * HANGUL_TCOUNT + tIndex + HANGUL_SBASE;
226 }
227 }
228 }
229
230 if (precomposedChar != 0xFFFD) {
231 if (currentBaseIsBMP) { // Non-BMP
232 if (lastChar > 0xFFFF) { // Last char was Non-BMP
233 --usedLength;
234 memmove(currentBase + 1, currentBase + 2, (precomposed - (currentBase + 2)) * sizeof(UTF16Char));
235 }
236 *(currentBase) = (UTF16Char)precomposedChar;
237 } else {
238 if (lastChar < 0x10000) { // Last char was BMP
239 ++usedLength;
240 if (usedLength > maxLength) break;
241 __CFUniCharMoveBufferFromEnd0(currentBase + 1, precomposed - (currentBase + 1), 1);
242 }
243 precomposedChar -= 0x10000;
244 *currentBase = (UTF16Char)((precomposedChar >> 10) + 0xD800UL);
245 *(currentBase + 1) = (UTF16Char)((precomposedChar & 0x3FF) + 0xDC00UL);
246 }
247 precomposedChar = 0xFFFD;
248 }
249 currentBase = precomposed;
250
251 lastChar = currentChar;
252 lastClass = 0;
253
254 if (currentChar > 0xFFFF) { // Non-BMP
255 usedLength += 2;
256 if (usedLength > maxLength) break;
257 currentChar -= 0x10000;
258 *(precomposed++) = (UTF16Char)((currentChar >> 10) + 0xD800UL);
259 *(precomposed++) = (UTF16Char)((currentChar & 0x3FF) + 0xDC00UL);
260 currentBaseIsBMP = false;
261 } else {
262 ++usedLength;
263 if (usedLength > maxLength) break;
264 *(precomposed++) = (UTF16Char)currentChar;
265 currentBaseIsBMP = true;
266 }
267 }
268 }
269
270 if (precomposedChar != 0xFFFD) {
271 if (currentChar > 0xFFFF) { // Non-BMP
272 if (lastChar < 0x10000) { // Last char was BMP
273 ++usedLength;
274 if (usedLength > maxLength) {
275 if (consumedLength) *consumedLength = originalLength - length;
276 if (filledLength) *filledLength = usedLength;
277 return false;
278 }
279 __CFUniCharMoveBufferFromEnd0(currentBase + 1, precomposed - (currentBase + 1), 1);
280 }
281 precomposedChar -= 0x10000;
282 *currentBase = (UTF16Char)((precomposedChar >> 10) + 0xD800UL);
283 *(currentBase + 1) = (UTF16Char)((precomposedChar & 0x3FF) + 0xDC00UL);
284 } else {
285 if (lastChar > 0xFFFF) { // Last char was Non-BMP
286 --usedLength;
287 memmove(currentBase + 1, currentBase + 2, (precomposed - (currentBase + 2)) * sizeof(UTF16Char));
288 }
289 *(currentBase) = (UTF16Char)precomposedChar;
290 }
291 }
292
293 if (consumedLength) *consumedLength = originalLength - length;
294 if (filledLength) *filledLength = usedLength;
295
296 return true;
297 }
298
299 #undef __CFUniCharIsNonBaseCharacter
300 #undef HANGUL_SBASE
301 #undef HANGUL_LBASE
302 #undef HANGUL_VBASE
303 #undef HANGUL_TBASE
304 #undef HANGUL_SCOUNT
305 #undef HANGUL_LCOUNT
306 #undef HANGUL_VCOUNT
307 #undef HANGUL_TCOUNT
308 #undef HANGUL_NCOUNT
309