]> git.saurik.com Git - apple/cf.git/blob - CFUniChar.c
CF-476.19.tar.gz
[apple/cf.git] / CFUniChar.c
1 /*
2 * Copyright (c) 2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23 /* CFUniChar.c
24 Copyright 2001-2002, Apple, Inc. All rights reserved.
25 Responsibility: Aki Inoue
26 */
27
28 #include <CoreFoundation/CFByteOrder.h>
29 #include "CFInternal.h"
30 #include "CFBundle_Internal.h"
31 #include "CFUniChar.h"
32 #include "CFStringEncodingConverterExt.h"
33 #include "CFUnicodeDecomposition.h"
34 #include "CFUniCharPriv.h"
35 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
36 #include <fcntl.h>
37 #include <sys/types.h>
38 #include <sys/stat.h>
39 #include <sys/param.h>
40 #include <sys/mman.h>
41 #include <unistd.h>
42 #include <stdlib.h>
43 #endif
44 #if DEPLOYMENT_TARGET_MACOSX
45 #include <mach/mach.h>
46 #endif
47
48 #if DEPLOYMENT_TARGET_MACOSX
49 #define __kCFCharacterSetDir "/System/Library/CoreServices"
50 #elif DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
51 #define __kCFCharacterSetDir "/usr/local/share/CoreFoundation"
52 #elif defined(__WIN32__)
53 #define __kCFCharacterSetDir "\\Windows\\CoreFoundation"
54 #endif
55
56 #if DEPLOYMENT_TARGET_MACOSX
57 #define USE_MACHO_SEGMENT 1
58 #endif //__MACH__
59
60 enum {
61 kCFUniCharLastExternalSet = kCFUniCharNewlineCharacterSet,
62 kCFUniCharFirstInternalSet = kCFUniCharCompatibilityDecomposableCharacterSet,
63 kCFUniCharLastInternalSet = kCFUniCharGraphemeExtendCharacterSet,
64 kCFUniCharFirstBitmapSet = kCFUniCharDecimalDigitCharacterSet
65 };
66
67 CF_INLINE uint32_t __CFUniCharMapExternalSetToInternalIndex(uint32_t cset) { return ((kCFUniCharFirstInternalSet <= cset) ? ((cset - kCFUniCharFirstInternalSet) + kCFUniCharLastExternalSet) : cset) - kCFUniCharFirstBitmapSet; }
68 CF_INLINE uint32_t __CFUniCharMapCompatibilitySetID(uint32_t cset) { return ((cset == kCFUniCharControlCharacterSet) ? kCFUniCharControlAndFormatterCharacterSet : (((cset > kCFUniCharLastExternalSet) && (cset < kCFUniCharFirstInternalSet)) ? ((cset - kCFUniCharLastExternalSet) + kCFUniCharFirstInternalSet) : cset)); }
69
70 #if (DEPLOYMENT_TARGET_MACOSX) && USE_MACHO_SEGMENT
71
72 #include <mach-o/getsect.h>
73 #include <mach-o/dyld.h>
74 #include <mach-o/ldsyms.h>
75
76 static const void *__CFGetSectDataPtr(const char *segname, const char *sectname, uint64_t *sizep) {
77 uint32_t idx, cnt = _dyld_image_count();
78 for (idx = 0; idx < cnt; idx++) {
79 void *mh = (void *)_dyld_get_image_header(idx);
80 if (mh != &_mh_dylib_header) continue;
81 #if __LP64__
82 const struct section_64 *sect = getsectbynamefromheader_64((struct mach_header_64 *)mh, segname, sectname);
83 #else
84 const struct section *sect = getsectbynamefromheader((struct mach_header *)mh, segname, sectname);
85 #endif
86 if (!sect) break;
87 if (sizep) *sizep = (uint64_t)sect->size;
88 return (char *)sect->addr + _dyld_get_image_vmaddr_slide(idx);
89 }
90 if (sizep) *sizep = 0ULL;
91 return NULL;
92 }
93
94 #endif
95
96 #if !USE_MACHO_SEGMENT
97
98 // Memory map the file
99
100 CF_INLINE void __CFUniCharCharacterSetPath(char *cpath) {
101 #if DEPLOYMENT_TARGET_MACOSX
102 strlcpy(cpath, __kCFCharacterSetDir, MAXPATHLEN);
103 #else
104 strlcpy(cpath, __kCFCharacterSetDir, MAXPATHLEN);
105 #endif
106
107 #if 0 || 0
108 #if 0
109 strncat(cpath, "\\Resources\\", MAXPATHLEN - strlen(cpath));
110 #else
111 strncat(cpath, "\\CoreFoundation.resources\\CharacterSets\\", MAXPATHLEN - strlen(cpath));
112 #endif
113 #else
114 strlcat(cpath, "/CharacterSets/", MAXPATHLEN);
115 #endif
116 }
117
118 #if defined (__WIN32__)
119 #define MAX_BITMAP_STATE 512
120 //
121 // If a string is placed into this array, then it has been previously
122 // determined that the bitmap-file cannot be found. Thus, we make
123 // the assumption it won't be there in future calls and we avoid
124 // hitting the disk un-necessarily. This assumption isn't 100%
125 // correct, as bitmap-files can be added. We would have to re-start
126 // the application in order to pick-up the new bitmap info.
127 //
128 // We should probably re-visit this.
129 //
130 static char *mappedBitmapState[MAX_BITMAP_STATE];
131 static int __nNumStateEntries = -1;
132 CRITICAL_SECTION __bitmapStateLock = {0};
133
134 bool __GetBitmapStateForName(char *bitmapName) {
135 if (NULL == __bitmapStateLock.DebugInfo)
136 InitializeCriticalSection(&__bitmapStateLock);
137 EnterCriticalSection(&__bitmapStateLock);
138 if (__nNumStateEntries >= 0) {
139 for (int i = 0; i < __nNumStateEntries; i++) {
140 if (strcmp(mappedBitmapState[i], bitmapName) == 0) {
141 LeaveCriticalSection(&__bitmapStateLock);
142 return true;
143 }
144 }
145 }
146 LeaveCriticalSection(&__bitmapStateLock);
147 return false;
148 }
149 void __AddBitmapStateForName(char *bitmapName) {
150 if (NULL == __bitmapStateLock.DebugInfo)
151 InitializeCriticalSection(&__bitmapStateLock);
152 EnterCriticalSection(&__bitmapStateLock);
153 __nNumStateEntries++;
154 mappedBitmapState[__nNumStateEntries] = (char *)malloc((strlen(bitmapName)+1) * sizeof(char));
155 strcpy(mappedBitmapState[__nNumStateEntries], bitmapName);
156 LeaveCriticalSection(&__bitmapStateLock);
157 }
158 #endif //__WIN32__
159
160 static bool __CFUniCharLoadBytesFromFile(const char *fileName, const void **bytes) {
161 #if 0 || 0
162 HANDLE bitmapFileHandle = NULL;
163 HANDLE mappingHandle = NULL;
164
165 if (__GetBitmapStateForName((char *)fileName)) {
166 // The fileName has been tried in the past, so just return false
167 // and move on.
168 *bytes = NULL;
169 return false;
170 }
171 mappingHandle = OpenFileMappingA(FILE_MAP_READ, TRUE, fileName);
172 if (NULL == mappingHandle) {
173 if ((bitmapFileHandle = CreateFileA(fileName, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL)) == INVALID_HANDLE_VALUE) {
174 // We tried to get the bitmap file for mapping, but it's not there. Add to list of non-existant bitmap-files so
175 // we don't have to try this again in the future.
176 __AddBitmapStateForName((char *)fileName);
177 return false;
178 }
179 mappingHandle = CreateFileMappingA(bitmapFileHandle, NULL, PAGE_READONLY, 0, 0, NULL);
180 CloseHandle(bitmapFileHandle);
181 if (!mappingHandle) return false;
182
183 *bytes = MapViewOfFileEx(mappingHandle, FILE_MAP_READ, 0, 0, 0, 0);
184 CloseHandle(mappingHandle);
185 } else {
186 *bytes = MapViewOfFileEx(mappingHandle, FILE_MAP_READ, 0, 0, 0, 0);
187 CloseHandle(mappingHandle);
188 }
189
190 return (*bytes ? true : false);
191 #else
192 struct stat statBuf;
193 int fd = -1;
194
195 int no_hang_fd = open("/dev/autofs_nowait", 0);
196 if ((fd = open(fileName, O_RDONLY, 0)) < 0) {
197 close(no_hang_fd);
198 return false;
199 }
200 if (fstat(fd, &statBuf) < 0 || (*bytes = mmap(0, statBuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0)) == (void *)-1) {
201 close(fd);
202 return false;
203 }
204 close(fd);
205 close(no_hang_fd);
206
207 return true;
208 #endif
209 }
210
211 #endif // USE_MACHO_SEGMENT
212
213 static bool __CFUniCharLoadFile(const char *bitmapName, const void **bytes) {
214 #if USE_MACHO_SEGMENT
215 *bytes = __CFGetSectDataPtr("__UNICODE", bitmapName, NULL);
216 return *bytes ? true : false;
217 #else
218 char cpath[MAXPATHLEN];
219 __CFUniCharCharacterSetPath(cpath);
220 strlcat(cpath, bitmapName, MAXPATHLEN);
221 return __CFUniCharLoadBytesFromFile(cpath, bytes);
222 #endif
223 }
224
225 // Bitmap functions
226 CF_INLINE bool isControl(UTF32Char theChar, uint16_t charset, const void *data) { // ISO Control
227 return (((theChar <= 0x001F) || (theChar >= 0x007F && theChar <= 0x009F)) ? true : false);
228 }
229
230 CF_INLINE bool isWhitespace(UTF32Char theChar, uint16_t charset, const void *data) { // Space
231 return (((theChar == 0x0020) || (theChar == 0x0009) || (theChar == 0x00A0) || (theChar == 0x1680) || (theChar >= 0x2000 && theChar <= 0x200B) || (theChar == 0x202F) || (theChar == 0x205F) || (theChar == 0x3000)) ? true : false);
232 }
233
234 CF_INLINE bool isNewline(UTF32Char theChar, uint16_t charset, const void *data) { // White space
235 return (((theChar >= 0x000A && theChar <= 0x000D) || (theChar == 0x0085) || (theChar == 0x2028) || (theChar == 0x2029)) ? true : false);
236 }
237
238 CF_INLINE bool isWhitespaceAndNewline(UTF32Char theChar, uint16_t charset, const void *data) { // White space
239 return ((isWhitespace(theChar, charset, data) || isNewline(theChar, charset, data)) ? true : false);
240 }
241
242 typedef struct {
243 uint32_t _numPlanes;
244 const uint8_t **_planes;
245 } __CFUniCharBitmapData;
246
247 static char __CFUniCharUnicodeVersionString[8] = {0, 0, 0, 0, 0, 0, 0, 0};
248
249 static uint32_t __CFUniCharNumberOfBitmaps = 0;
250 static __CFUniCharBitmapData *__CFUniCharBitmapDataArray = NULL;
251
252 static CFSpinLock_t __CFUniCharBitmapLock = CFSpinLockInit;
253
254 #if !defined(CF_UNICHAR_BITMAP_FILE)
255 #if USE_MACHO_SEGMENT
256 #define CF_UNICHAR_BITMAP_FILE "__csbitmaps"
257 #else
258 #define CF_UNICHAR_BITMAP_FILE "CFCharacterSetBitmaps.bitmap"
259 #endif
260 #endif
261
262 static bool __CFUniCharLoadBitmapData(void) {
263 __CFUniCharBitmapData *array;
264 uint32_t headerSize;
265 uint32_t bitmapSize;
266 int numPlanes;
267 uint8_t currentPlane;
268 const void *bytes;
269 const void *bitmapBase;
270 const void *bitmap;
271 int idx, bitmapIndex;
272
273 __CFSpinLock(&__CFUniCharBitmapLock);
274
275 if (__CFUniCharBitmapDataArray || !__CFUniCharLoadFile(CF_UNICHAR_BITMAP_FILE, &bytes)) {
276 __CFSpinUnlock(&__CFUniCharBitmapLock);
277 return false;
278 }
279
280 for (idx = 0;idx < 4 && ((const uint8_t *)bytes)[idx];idx++) {
281 __CFUniCharUnicodeVersionString[idx * 2] = ((const uint8_t *)bytes)[idx];
282 __CFUniCharUnicodeVersionString[idx * 2 + 1] = '.';
283 }
284 __CFUniCharUnicodeVersionString[(idx < 4 ? idx * 2 - 1 : 7)] = '\0';
285
286 headerSize = CFSwapInt32BigToHost(*((uint32_t *)((char *)bytes + 4)));
287
288 bitmapBase = (uint8_t *)bytes + headerSize;
289 bytes = (uint8_t *)bytes + (sizeof(uint32_t) * 2);
290 headerSize -= (sizeof(uint32_t) * 2);
291
292 __CFUniCharNumberOfBitmaps = headerSize / (sizeof(uint32_t) * 2);
293
294 array = (__CFUniCharBitmapData *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(__CFUniCharBitmapData) * __CFUniCharNumberOfBitmaps, 0);
295
296 for (idx = 0;idx < (int)__CFUniCharNumberOfBitmaps;idx++) {
297 bitmap = (uint8_t *)bitmapBase + CFSwapInt32BigToHost(*((uint32_t *)bytes)); bytes = (uint8_t *)bytes + sizeof(uint32_t);
298 bitmapSize = CFSwapInt32BigToHost(*((uint32_t *)bytes)); bytes = (uint8_t *)bytes + sizeof(uint32_t);
299
300 numPlanes = bitmapSize / (8 * 1024);
301 numPlanes = *(const uint8_t *)((char *)bitmap + (((numPlanes - 1) * ((8 * 1024) + 1)) - 1)) + 1;
302 array[idx]._planes = (const uint8_t **)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(const void *) * numPlanes, 0);
303 array[idx]._numPlanes = numPlanes;
304
305 currentPlane = 0;
306 for (bitmapIndex = 0;bitmapIndex < numPlanes;bitmapIndex++) {
307 if (bitmapIndex == currentPlane) {
308 array[idx]._planes[bitmapIndex] = (const uint8_t *)bitmap;
309 bitmap = (uint8_t *)bitmap + (8 * 1024);
310 #if defined (__cplusplus)
311 currentPlane = *(((const uint8_t*&)bitmap)++);
312 #else
313 currentPlane = *((const uint8_t *)bitmap++);
314 #endif //C++
315
316 } else {
317 array[idx]._planes[bitmapIndex] = NULL;
318 }
319 }
320 }
321
322 __CFUniCharBitmapDataArray = array;
323
324 __CFSpinUnlock(&__CFUniCharBitmapLock);
325
326 return true;
327 }
328
329 __private_extern__ const char *__CFUniCharGetUnicodeVersionString(void) {
330 if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData();
331 return __CFUniCharUnicodeVersionString;
332 }
333
334 bool CFUniCharIsMemberOf(UTF32Char theChar, uint32_t charset) {
335 charset = __CFUniCharMapCompatibilitySetID(charset);
336
337 switch (charset) {
338 case kCFUniCharWhitespaceCharacterSet:
339 return isWhitespace(theChar, charset, NULL);
340
341 case kCFUniCharWhitespaceAndNewlineCharacterSet:
342 return isWhitespaceAndNewline(theChar, charset, NULL);
343
344 case kCFUniCharNewlineCharacterSet:
345 return isNewline(theChar, charset, NULL);
346
347 default: {
348 uint32_t tableIndex = __CFUniCharMapExternalSetToInternalIndex(charset);
349
350 if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData();
351
352 if (tableIndex < __CFUniCharNumberOfBitmaps) {
353 __CFUniCharBitmapData *data = __CFUniCharBitmapDataArray + tableIndex;
354 uint8_t planeNo = (theChar >> 16) & 0xFF;
355
356 // The bitmap data for kCFUniCharIllegalCharacterSet is actually LEGAL set less Plane 14 ~ 16
357 if (charset == kCFUniCharIllegalCharacterSet) {
358 if (planeNo == 0x0E) { // Plane 14
359 theChar &= 0xFF;
360 return (((theChar == 0x01) || ((theChar > 0x1F) && (theChar < 0x80))) ? false : true);
361 } else if (planeNo == 0x0F || planeNo == 0x10) { // Plane 15 & 16
362 return ((theChar & 0xFF) > 0xFFFD ? true : false);
363 } else {
364 return (planeNo < data->_numPlanes && data->_planes[planeNo] ? !CFUniCharIsMemberOfBitmap(theChar, data->_planes[planeNo]) : true);
365 }
366 } else if (charset == kCFUniCharControlAndFormatterCharacterSet) {
367 if (planeNo == 0x0E) { // Plane 14
368 theChar &= 0xFF;
369 return (((theChar == 0x01) || ((theChar > 0x1F) && (theChar < 0x80))) ? true : false);
370 } else {
371 return (planeNo < data->_numPlanes && data->_planes[planeNo] ? CFUniCharIsMemberOfBitmap(theChar, data->_planes[planeNo]) : false);
372 }
373 } else {
374 return (planeNo < data->_numPlanes && data->_planes[planeNo] ? CFUniCharIsMemberOfBitmap(theChar, data->_planes[planeNo]) : false);
375 }
376 }
377 return false;
378 }
379 }
380 }
381
382 const uint8_t *CFUniCharGetBitmapPtrForPlane(uint32_t charset, uint32_t plane) {
383 if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData();
384
385 charset = __CFUniCharMapCompatibilitySetID(charset);
386
387 if ((charset > kCFUniCharWhitespaceAndNewlineCharacterSet) && (charset != kCFUniCharIllegalCharacterSet) && (charset != kCFUniCharNewlineCharacterSet)) {
388 uint32_t tableIndex = __CFUniCharMapExternalSetToInternalIndex(charset);
389
390 if (tableIndex < __CFUniCharNumberOfBitmaps) {
391 __CFUniCharBitmapData *data = __CFUniCharBitmapDataArray + tableIndex;
392
393 return (plane < data->_numPlanes ? data->_planes[plane] : NULL);
394 }
395 }
396 return NULL;
397 }
398
399 __private_extern__ uint8_t CFUniCharGetBitmapForPlane(uint32_t charset, uint32_t plane, void *bitmap, bool isInverted) {
400 const uint8_t *src = CFUniCharGetBitmapPtrForPlane(charset, plane);
401 int numBytes = (8 * 1024);
402
403 if (src) {
404 if (isInverted) {
405 #if defined (__cplusplus)
406 while (numBytes-- > 0) *(((uint8_t *&)bitmap)++) = ~(*(src++));
407 #else
408 while (numBytes-- > 0) *((uint8_t *)bitmap++) = ~(*(src++));
409 #endif //C++
410 } else {
411 #if defined (__cplusplus)
412 while (numBytes-- > 0) *(((uint8_t *&)bitmap)++) = *(src++);
413 #else
414 while (numBytes-- > 0) *((uint8_t *)bitmap++) = *(src++);
415 #endif //C++
416 }
417 return kCFUniCharBitmapFilled;
418 } else if (charset == kCFUniCharIllegalCharacterSet) {
419 __CFUniCharBitmapData *data = __CFUniCharBitmapDataArray + __CFUniCharMapExternalSetToInternalIndex(__CFUniCharMapCompatibilitySetID(charset));
420
421 if (plane < data->_numPlanes && (src = data->_planes[plane])) {
422 if (isInverted) {
423 #if defined (__cplusplus)
424 while (numBytes-- > 0) *(((uint8_t *&)bitmap)++) = *(src++);
425 #else
426 while (numBytes-- > 0) *((uint8_t *)bitmap++) = *(src++);
427 #endif //C++
428 } else {
429 #if defined (__cplusplus)
430 while (numBytes-- > 0) *(((uint8_t *&)bitmap)++) = ~(*(src++));
431 #else
432 while (numBytes-- > 0) *((uint8_t *)bitmap++) = ~(*(src++));
433 #endif //C++
434 }
435 return kCFUniCharBitmapFilled;
436 } else if (plane == 0x0E) { // Plane 14
437 int idx;
438 uint8_t asciiRange = (isInverted ? (uint8_t)0xFF : (uint8_t)0);
439 uint8_t otherRange = (isInverted ? (uint8_t)0 : (uint8_t)0xFF);
440
441 #if defined (__cplusplus)
442 *(((uint8_t *&)bitmap)++) = 0x02; // UE0001 LANGUAGE TAG
443 #else
444 *((uint8_t *)bitmap++) = 0x02; // UE0001 LANGUAGE TAG
445 #endif //C++
446 for (idx = 1;idx < numBytes;idx++) {
447 #if defined (__cplusplus)
448 *(((uint8_t *&)bitmap)++) = ((idx >= (0x20 / 8) && (idx < (0x80 / 8))) ? asciiRange : otherRange);
449 #else
450 *((uint8_t *)bitmap++) = ((idx >= (0x20 / 8) && (idx < (0x80 / 8))) ? asciiRange : otherRange);
451 #endif //C++
452 }
453 return kCFUniCharBitmapFilled;
454 } else if (plane == 0x0F || plane == 0x10) { // Plane 15 & 16
455 uint32_t value = (isInverted ? ~0 : 0);
456 numBytes /= 4; // for 32bit
457
458 while (numBytes-- > 0) {
459 *((uint32_t *)bitmap) = value;
460 #if defined (__cplusplus)
461 bitmap = (uint8_t *)bitmap + sizeof(uint32_t);
462 #else
463 bitmap += sizeof(uint32_t);
464 #endif //C++
465 }
466 *(((uint8_t *)bitmap) - 5) = (isInverted ? 0x3F : 0xC0); // 0xFFFE & 0xFFFF
467 return kCFUniCharBitmapFilled;
468 }
469 return (isInverted ? kCFUniCharBitmapEmpty : kCFUniCharBitmapAll);
470 } else if ((charset < kCFUniCharDecimalDigitCharacterSet) || (charset == kCFUniCharNewlineCharacterSet)) {
471 if (plane) return (isInverted ? kCFUniCharBitmapAll : kCFUniCharBitmapEmpty);
472
473 uint8_t *bitmapBase = (uint8_t *)bitmap;
474 CFIndex idx;
475 uint8_t nonFillValue = (isInverted ? (uint8_t)0xFF : (uint8_t)0);
476
477 #if defined (__cplusplus)
478 while (numBytes-- > 0) *(((uint8_t *&)bitmap)++) = nonFillValue;
479 #else
480 while (numBytes-- > 0) *((uint8_t *)bitmap++) = nonFillValue;
481 #endif //C++
482
483 if ((charset == kCFUniCharWhitespaceAndNewlineCharacterSet) || (charset == kCFUniCharNewlineCharacterSet)) {
484 const UniChar newlines[] = {0x000A, 0x000B, 0x000C, 0x000D, 0x0085, 0x2028, 0x2029};
485
486 for (idx = 0;idx < (int)(sizeof(newlines) / sizeof(*newlines)); idx++) {
487 if (isInverted) {
488 CFUniCharRemoveCharacterFromBitmap(newlines[idx], bitmapBase);
489 } else {
490 CFUniCharAddCharacterToBitmap(newlines[idx], bitmapBase);
491 }
492 }
493
494 if (charset == kCFUniCharNewlineCharacterSet) return kCFUniCharBitmapFilled;
495 }
496
497 if (isInverted) {
498 CFUniCharRemoveCharacterFromBitmap(0x0009, bitmapBase);
499 CFUniCharRemoveCharacterFromBitmap(0x0020, bitmapBase);
500 CFUniCharRemoveCharacterFromBitmap(0x00A0, bitmapBase);
501 CFUniCharRemoveCharacterFromBitmap(0x1680, bitmapBase);
502 CFUniCharRemoveCharacterFromBitmap(0x202F, bitmapBase);
503 CFUniCharRemoveCharacterFromBitmap(0x205F, bitmapBase);
504 CFUniCharRemoveCharacterFromBitmap(0x3000, bitmapBase);
505 } else {
506 CFUniCharAddCharacterToBitmap(0x0009, bitmapBase);
507 CFUniCharAddCharacterToBitmap(0x0020, bitmapBase);
508 CFUniCharAddCharacterToBitmap(0x00A0, bitmapBase);
509 CFUniCharAddCharacterToBitmap(0x1680, bitmapBase);
510 CFUniCharAddCharacterToBitmap(0x202F, bitmapBase);
511 CFUniCharAddCharacterToBitmap(0x205F, bitmapBase);
512 CFUniCharAddCharacterToBitmap(0x3000, bitmapBase);
513 }
514
515 for (idx = 0x2000;idx <= 0x200B;idx++) {
516 if (isInverted) {
517 CFUniCharRemoveCharacterFromBitmap(idx, bitmapBase);
518 } else {
519 CFUniCharAddCharacterToBitmap(idx, bitmapBase);
520 }
521 }
522 return kCFUniCharBitmapFilled;
523 }
524 return (isInverted ? kCFUniCharBitmapAll : kCFUniCharBitmapEmpty);
525 }
526
527 __private_extern__ uint32_t CFUniCharGetNumberOfPlanes(uint32_t charset) {
528 if ((charset == kCFUniCharControlCharacterSet) || (charset == kCFUniCharControlAndFormatterCharacterSet)) {
529 return 15; // 0 to 14
530 } else if (charset < kCFUniCharDecimalDigitCharacterSet) {
531 return 1;
532 } else if (charset == kCFUniCharIllegalCharacterSet) {
533 return 17;
534 } else {
535 uint32_t numPlanes;
536
537 if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData();
538
539 numPlanes = __CFUniCharBitmapDataArray[__CFUniCharMapExternalSetToInternalIndex(__CFUniCharMapCompatibilitySetID(charset))]._numPlanes;
540
541 return numPlanes;
542 }
543 }
544
545 // Mapping data loading
546 static const void **__CFUniCharMappingTables = NULL;
547
548 static CFSpinLock_t __CFUniCharMappingTableLock = CFSpinLockInit;
549
550 #if __CF_BIG_ENDIAN__
551 #if USE_MACHO_SEGMENT
552 #define MAPPING_TABLE_FILE "__data"
553 #else
554 #define MAPPING_TABLE_FILE "CFUnicodeData-B.mapping"
555 #endif
556 #else
557 #if USE_MACHO_SEGMENT
558 #define MAPPING_TABLE_FILE "__data"
559 #else
560 #define MAPPING_TABLE_FILE "CFUnicodeData-L.mapping"
561 #endif
562 #endif
563
564 __private_extern__ const void *CFUniCharGetMappingData(uint32_t type) {
565
566 __CFSpinLock(&__CFUniCharMappingTableLock);
567
568 if (NULL == __CFUniCharMappingTables) {
569 const void *bytes;
570 const void *bodyBase;
571 int headerSize;
572 int idx, count;
573
574 if (!__CFUniCharLoadFile(MAPPING_TABLE_FILE, &bytes)) {
575 __CFSpinUnlock(&__CFUniCharMappingTableLock);
576 return NULL;
577 }
578
579 #if defined (__cplusplus)
580 bytes = (uint8_t *)bytes + 4; // Skip Unicode version
581 headerSize = *((uint8_t *)bytes); bytes = (uint8_t *)bytes + sizeof(uint32_t);
582 #else
583 bytes += 4; // Skip Unicode version
584 headerSize = *((uint32_t *)bytes); bytes += sizeof(uint32_t);
585 #endif //C++
586 headerSize -= (sizeof(uint32_t) * 2);
587 bodyBase = (char *)bytes + headerSize;
588
589 count = headerSize / sizeof(uint32_t);
590
591 __CFUniCharMappingTables = (const void **)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(const void *) * count, 0);
592
593 for (idx = 0;idx < count;idx++) {
594 #if defined (__cplusplus)
595 __CFUniCharMappingTables[idx] = (char *)bodyBase + *((uint32_t *)bytes); bytes = (uint8_t *)bytes + sizeof(uint32_t);
596 #else
597 __CFUniCharMappingTables[idx] = (char *)bodyBase + *((uint32_t *)bytes); bytes += sizeof(uint32_t);
598 #endif //C++
599 }
600 }
601
602 __CFSpinUnlock(&__CFUniCharMappingTableLock);
603
604 return __CFUniCharMappingTables[type];
605 }
606
607 // Case mapping functions
608 #define DO_SPECIAL_CASE_MAPPING 1
609
610 static uint32_t *__CFUniCharCaseMappingTableCounts = NULL;
611 static uint32_t **__CFUniCharCaseMappingTable = NULL;
612 static const uint32_t **__CFUniCharCaseMappingExtraTable = NULL;
613
614 typedef struct {
615 uint32_t _key;
616 uint32_t _value;
617 } __CFUniCharCaseMappings;
618
619 /* Binary searches CFStringEncodingUnicodeTo8BitCharMap */
620 static uint32_t __CFUniCharGetMappedCase(const __CFUniCharCaseMappings *theTable, uint32_t numElem, UTF32Char character) {
621 const __CFUniCharCaseMappings *p, *q, *divider;
622
623 if ((character < theTable[0]._key) || (character > theTable[numElem-1]._key)) {
624 return 0;
625 }
626 p = theTable;
627 q = p + (numElem-1);
628 while (p <= q) {
629 divider = p + ((q - p) >> 1); /* divide by 2 */
630 if (character < divider->_key) { q = divider - 1; }
631 else if (character > divider->_key) { p = divider + 1; }
632 else { return divider->_value; }
633 }
634 return 0;
635 }
636
637 #define NUM_CASE_MAP_DATA (kCFUniCharCaseFold + 1)
638
639 static bool __CFUniCharLoadCaseMappingTable(void) {
640 uint32_t *countArray;
641 int idx;
642
643 if (NULL == __CFUniCharMappingTables) (void)CFUniCharGetMappingData(kCFUniCharToLowercase);
644 if (NULL == __CFUniCharMappingTables) return false;
645
646 __CFSpinLock(&__CFUniCharMappingTableLock);
647
648 if (__CFUniCharCaseMappingTableCounts) {
649 __CFSpinUnlock(&__CFUniCharMappingTableLock);
650 return true;
651 }
652
653 countArray = (uint32_t *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(uint32_t) * NUM_CASE_MAP_DATA + sizeof(uint32_t *) * NUM_CASE_MAP_DATA * 2, 0);
654 __CFUniCharCaseMappingTable = (uint32_t **)((char *)countArray + sizeof(uint32_t) * NUM_CASE_MAP_DATA);
655 __CFUniCharCaseMappingExtraTable = (const uint32_t **)__CFUniCharCaseMappingTable + NUM_CASE_MAP_DATA;
656
657 for (idx = 0;idx < NUM_CASE_MAP_DATA;idx++) {
658 countArray[idx] = *((uint32_t *)__CFUniCharMappingTables[idx]) / (sizeof(uint32_t) * 2);
659 __CFUniCharCaseMappingTable[idx] = ((uint32_t *)__CFUniCharMappingTables[idx]) + 1;
660 __CFUniCharCaseMappingExtraTable[idx] = (const uint32_t *)((char *)__CFUniCharCaseMappingTable[idx] + *((uint32_t *)__CFUniCharMappingTables[idx]));
661 }
662
663 __CFUniCharCaseMappingTableCounts = countArray;
664
665 __CFSpinUnlock(&__CFUniCharMappingTableLock);
666 return true;
667 }
668
669 #if __CF_BIG_ENDIAN__
670 #define TURKISH_LANG_CODE (0x7472) // tr
671 #define LITHUANIAN_LANG_CODE (0x6C74) // lt
672 #define AZERI_LANG_CODE (0x617A) // az
673 #else
674 #define TURKISH_LANG_CODE (0x7274) // tr
675 #define LITHUANIAN_LANG_CODE (0x746C) // lt
676 #define AZERI_LANG_CODE (0x7A61) // az
677 #endif
678
679 CFIndex CFUniCharMapCaseTo(UTF32Char theChar, UTF16Char *convertedChar, CFIndex maxLength, uint32_t ctype, uint32_t flags, const uint8_t *langCode) {
680 __CFUniCharBitmapData *data;
681 uint8_t planeNo = (theChar >> 16) & 0xFF;
682
683 caseFoldRetry:
684
685 #if DO_SPECIAL_CASE_MAPPING
686 if (flags & kCFUniCharCaseMapFinalSigma) {
687 if (theChar == 0x03A3) { // Final sigma
688 *convertedChar = (ctype == kCFUniCharToLowercase ? 0x03C2 : 0x03A3);
689 return 1;
690 }
691 }
692
693 if (langCode) {
694 switch (*(uint16_t *)langCode) {
695 case LITHUANIAN_LANG_CODE:
696 if (theChar == 0x0307 && (flags & kCFUniCharCaseMapAfter_i)) {
697 return 0;
698 } else if (ctype == kCFUniCharToLowercase) {
699 if (flags & kCFUniCharCaseMapMoreAbove) {
700 switch (theChar) {
701 case 0x0049: // LATIN CAPITAL LETTER I
702 *(convertedChar++) = 0x0069;
703 *(convertedChar++) = 0x0307;
704 return 2;
705
706 case 0x004A: // LATIN CAPITAL LETTER J
707 *(convertedChar++) = 0x006A;
708 *(convertedChar++) = 0x0307;
709 return 2;
710
711 case 0x012E: // LATIN CAPITAL LETTER I WITH OGONEK
712 *(convertedChar++) = 0x012F;
713 *(convertedChar++) = 0x0307;
714 return 2;
715
716 default: break;
717 }
718 }
719 switch (theChar) {
720 case 0x00CC: // LATIN CAPITAL LETTER I WITH GRAVE
721 *(convertedChar++) = 0x0069;
722 *(convertedChar++) = 0x0307;
723 *(convertedChar++) = 0x0300;
724 return 3;
725
726 case 0x00CD: // LATIN CAPITAL LETTER I WITH ACUTE
727 *(convertedChar++) = 0x0069;
728 *(convertedChar++) = 0x0307;
729 *(convertedChar++) = 0x0301;
730 return 3;
731
732 case 0x0128: // LATIN CAPITAL LETTER I WITH TILDE
733 *(convertedChar++) = 0x0069;
734 *(convertedChar++) = 0x0307;
735 *(convertedChar++) = 0x0303;
736 return 3;
737
738 default: break;
739 }
740 }
741 break;
742
743 case TURKISH_LANG_CODE:
744 case AZERI_LANG_CODE:
745 if ((theChar == 0x0049) || (theChar == 0x0131)) { // LATIN CAPITAL LETTER I & LATIN SMALL LETTER DOTLESS I
746 *convertedChar = (((ctype == kCFUniCharToLowercase) || (ctype == kCFUniCharCaseFold)) ? ((kCFUniCharCaseMapMoreAbove & flags) ? 0x0069 : 0x0131) : 0x0049);
747 return 1;
748 } else if ((theChar == 0x0069) || (theChar == 0x0130)) { // LATIN SMALL LETTER I & LATIN CAPITAL LETTER I WITH DOT ABOVE
749 *convertedChar = (((ctype == kCFUniCharToLowercase) || (ctype == kCFUniCharCaseFold)) ? 0x0069 : 0x0130);
750 return 1;
751 } else if (theChar == 0x0307 && (kCFUniCharCaseMapAfter_i & flags)) { // COMBINING DOT ABOVE AFTER_i
752 if (ctype == kCFUniCharToLowercase) {
753 return 0;
754 } else {
755 *convertedChar = 0x0307;
756 return 1;
757 }
758 }
759 break;
760
761 default: break;
762 }
763 }
764 #endif DO_SPECIAL_CASE_MAPPING
765
766 if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData();
767
768 data = __CFUniCharBitmapDataArray + __CFUniCharMapExternalSetToInternalIndex(__CFUniCharMapCompatibilitySetID(ctype + kCFUniCharHasNonSelfLowercaseCharacterSet));
769
770 if (planeNo < data->_numPlanes && data->_planes[planeNo] && CFUniCharIsMemberOfBitmap(theChar, data->_planes[planeNo]) && (__CFUniCharCaseMappingTableCounts || __CFUniCharLoadCaseMappingTable())) {
771 uint32_t value = __CFUniCharGetMappedCase((const __CFUniCharCaseMappings *)__CFUniCharCaseMappingTable[ctype], __CFUniCharCaseMappingTableCounts[ctype], theChar);
772
773 if (!value && ctype == kCFUniCharToTitlecase) {
774 value = __CFUniCharGetMappedCase((const __CFUniCharCaseMappings *)__CFUniCharCaseMappingTable[kCFUniCharToUppercase], __CFUniCharCaseMappingTableCounts[kCFUniCharToUppercase], theChar);
775 if (value) ctype = kCFUniCharToUppercase;
776 }
777
778 if (value) {
779 CFIndex count = CFUniCharConvertFlagToCount(value);
780
781 if (count == 1) {
782 if (value & kCFUniCharNonBmpFlag) {
783 if (maxLength > 1) {
784 value = (value & 0xFFFFFF) - 0x10000;
785 *(convertedChar++) = (UTF16Char)(value >> 10) + 0xD800UL;
786 *(convertedChar++) = (UTF16Char)(value & 0x3FF) + 0xDC00UL;
787 return 2;
788 }
789 } else {
790 *convertedChar = (UTF16Char)value;
791 return 1;
792 }
793 } else if (count < maxLength) {
794 const uint32_t *extraMapping = __CFUniCharCaseMappingExtraTable[ctype] + (value & 0xFFFFFF);
795
796 if (value & kCFUniCharNonBmpFlag) {
797 CFIndex copiedLen = 0;
798
799 while (count-- > 0) {
800 value = *(extraMapping++);
801 if (value > 0xFFFF) {
802 if (copiedLen + 2 >= maxLength) break;
803 value = (value & 0xFFFFFF) - 0x10000;
804 convertedChar[copiedLen++] = (UTF16Char)(value >> 10) + 0xD800UL;
805 convertedChar[copiedLen++] = (UTF16Char)(value & 0x3FF) + 0xDC00UL;
806 } else {
807 if (copiedLen + 1 >= maxLength) break;
808 convertedChar[copiedLen++] = value;
809 }
810 }
811 if (!count) return copiedLen;
812 } else {
813 CFIndex idx;
814
815 for (idx = 0;idx < count;idx++) *(convertedChar++) = (UTF16Char)*(extraMapping++);
816 return count;
817 }
818 }
819 }
820 } else if (ctype == kCFUniCharCaseFold) {
821 ctype = kCFUniCharToLowercase;
822 goto caseFoldRetry;
823 }
824
825 if (theChar > 0xFFFF) { // non-BMP
826 theChar = (theChar & 0xFFFFFF) - 0x10000;
827 *(convertedChar++) = (UTF16Char)(theChar >> 10) + 0xD800UL;
828 *(convertedChar++) = (UTF16Char)(theChar & 0x3FF) + 0xDC00UL;
829 return 2;
830 } else {
831 *convertedChar = theChar;
832 return 1;
833 }
834 }
835
836 CFIndex CFUniCharMapTo(UniChar theChar, UniChar *convertedChar, CFIndex maxLength, uint16_t ctype, uint32_t flags) {
837 if (ctype == kCFUniCharCaseFold + 1) { // kCFUniCharDecompose
838 if (CFUniCharIsDecomposableCharacter(theChar, false)) {
839 UTF32Char buffer[MAX_DECOMPOSED_LENGTH];
840 CFIndex usedLength = CFUniCharDecomposeCharacter(theChar, buffer, MAX_DECOMPOSED_LENGTH);
841 CFIndex idx;
842
843 for (idx = 0;idx < usedLength;idx++) *(convertedChar++) = buffer[idx];
844 return usedLength;
845 } else {
846 *convertedChar = theChar;
847 return 1;
848 }
849 } else {
850 return CFUniCharMapCaseTo(theChar, convertedChar, maxLength, ctype, flags, NULL);
851 }
852 }
853
854 CF_INLINE bool __CFUniCharIsMoreAbove(UTF16Char *buffer, CFIndex length) {
855 UTF32Char currentChar;
856 uint32_t property;
857
858 while (length-- > 0) {
859 currentChar = *(buffer)++;
860 if (CFUniCharIsSurrogateHighCharacter(currentChar) && (length > 0) && CFUniCharIsSurrogateLowCharacter(*(buffer + 1))) {
861 currentChar = CFUniCharGetLongCharacterForSurrogatePair(currentChar, *(buffer++));
862 --length;
863 }
864 if (!CFUniCharIsMemberOf(currentChar, kCFUniCharNonBaseCharacterSet)) break;
865
866 property = CFUniCharGetCombiningPropertyForCharacter(currentChar, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16) & 0xFF));
867
868 if (property == 230) return true; // Above priority
869 }
870 return false;
871 }
872
873 CF_INLINE bool __CFUniCharIsAfter_i(UTF16Char *buffer, CFIndex length) {
874 UTF32Char currentChar = 0;
875 uint32_t property;
876 UTF32Char decomposed[MAX_DECOMPOSED_LENGTH];
877 CFIndex decompLength;
878 CFIndex idx;
879
880 if (length < 1) return 0;
881
882 buffer += length;
883 while (length-- > 1) {
884 currentChar = *(--buffer);
885 if (CFUniCharIsSurrogateLowCharacter(currentChar)) {
886 if ((length > 1) && CFUniCharIsSurrogateHighCharacter(*(buffer - 1))) {
887 currentChar = CFUniCharGetLongCharacterForSurrogatePair(*(--buffer), currentChar);
888 --length;
889 } else {
890 break;
891 }
892 }
893 if (!CFUniCharIsMemberOf(currentChar, kCFUniCharNonBaseCharacterSet)) break;
894
895 property = CFUniCharGetCombiningPropertyForCharacter(currentChar, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16) & 0xFF));
896
897 if (property == 230) return false; // Above priority
898 }
899 if (length == 0) {
900 currentChar = *(--buffer);
901 } else if (CFUniCharIsSurrogateLowCharacter(currentChar) && CFUniCharIsSurrogateHighCharacter(*(--buffer))) {
902 currentChar = CFUniCharGetLongCharacterForSurrogatePair(*buffer, currentChar);
903 }
904
905 decompLength = CFUniCharDecomposeCharacter(currentChar, decomposed, MAX_DECOMPOSED_LENGTH);
906 currentChar = *decomposed;
907
908
909 for (idx = 1;idx < decompLength;idx++) {
910 currentChar = decomposed[idx];
911 property = CFUniCharGetCombiningPropertyForCharacter(currentChar, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16) & 0xFF));
912
913 if (property == 230) return false; // Above priority
914 }
915 return true;
916 }
917
918 __private_extern__ uint32_t CFUniCharGetConditionalCaseMappingFlags(UTF32Char theChar, UTF16Char *buffer, CFIndex currentIndex, CFIndex length, uint32_t type, const uint8_t *langCode, uint32_t lastFlags) {
919 if (theChar == 0x03A3) { // GREEK CAPITAL LETTER SIGMA
920 if ((type == kCFUniCharToLowercase) && (currentIndex > 0)) {
921 UTF16Char *start = buffer;
922 UTF16Char *end = buffer + length;
923 UTF32Char otherChar;
924
925 // First check if we're after a cased character
926 buffer += (currentIndex - 1);
927 while (start <= buffer) {
928 otherChar = *(buffer--);
929 if (CFUniCharIsSurrogateLowCharacter(otherChar) && (start <= buffer) && CFUniCharIsSurrogateHighCharacter(*buffer)) {
930 otherChar = CFUniCharGetLongCharacterForSurrogatePair(*(buffer--), otherChar);
931 }
932 if (!CFUniCharIsMemberOf(otherChar, kCFUniCharCaseIgnorableCharacterSet)) {
933 if (!CFUniCharIsMemberOf(otherChar, kCFUniCharUppercaseLetterCharacterSet) && !CFUniCharIsMemberOf(otherChar, kCFUniCharLowercaseLetterCharacterSet)) return 0; // Uppercase set contains titlecase
934 break;
935 }
936 }
937
938 // Next check if we're before a cased character
939 buffer = start + currentIndex + 1;
940 while (buffer < end) {
941 otherChar = *(buffer++);
942 if (CFUniCharIsSurrogateHighCharacter(otherChar) && (buffer < end) && CFUniCharIsSurrogateLowCharacter(*buffer)) {
943 otherChar = CFUniCharGetLongCharacterForSurrogatePair(otherChar, *(buffer++));
944 }
945 if (!CFUniCharIsMemberOf(otherChar, kCFUniCharCaseIgnorableCharacterSet)) {
946 if (CFUniCharIsMemberOf(otherChar, kCFUniCharUppercaseLetterCharacterSet) || CFUniCharIsMemberOf(otherChar, kCFUniCharLowercaseLetterCharacterSet)) return 0; // Uppercase set contains titlecase
947 break;
948 }
949 }
950 return kCFUniCharCaseMapFinalSigma;
951 }
952 } else if (langCode) {
953 if (*((const uint16_t *)langCode) == LITHUANIAN_LANG_CODE) {
954 if ((theChar == 0x0307) && ((kCFUniCharCaseMapAfter_i|kCFUniCharCaseMapMoreAbove) & lastFlags) == (kCFUniCharCaseMapAfter_i|kCFUniCharCaseMapMoreAbove)) {
955 return (__CFUniCharIsAfter_i(buffer, currentIndex) ? kCFUniCharCaseMapAfter_i : 0);
956 } else if (type == kCFUniCharToLowercase) {
957 if ((theChar == 0x0049) || (theChar == 0x004A) || (theChar == 0x012E)) {
958 return (__CFUniCharIsMoreAbove(buffer + (++currentIndex), length - currentIndex) ? kCFUniCharCaseMapMoreAbove : 0);
959 }
960 } else if ((theChar == 'i') || (theChar == 'j')) {
961 return (__CFUniCharIsMoreAbove(buffer + (++currentIndex), length - currentIndex) ? (kCFUniCharCaseMapAfter_i|kCFUniCharCaseMapMoreAbove) : 0);
962 }
963 } else if ((*((const uint16_t *)langCode) == TURKISH_LANG_CODE) || (*((const uint16_t *)langCode) == AZERI_LANG_CODE)) {
964 if (type == kCFUniCharToLowercase) {
965 if (theChar == 0x0307) {
966 return (kCFUniCharCaseMapMoreAbove & lastFlags ? kCFUniCharCaseMapAfter_i : 0);
967 } else if (theChar == 0x0049) {
968 return (((++currentIndex < length) && (buffer[currentIndex] == 0x0307)) ? kCFUniCharCaseMapMoreAbove : 0);
969 }
970 }
971 }
972 }
973 return 0;
974 }
975
976 // Unicode property database
977 static __CFUniCharBitmapData *__CFUniCharUnicodePropertyTable = NULL;
978 static int __CFUniCharUnicodePropertyTableCount = 0;
979
980 static CFSpinLock_t __CFUniCharPropTableLock = CFSpinLockInit;
981
982 #if USE_MACHO_SEGMENT
983 #define PROP_DB_FILE "__properties"
984 #else
985 #define PROP_DB_FILE "CFUniCharPropertyDatabase.data"
986 #endif
987
988 const void *CFUniCharGetUnicodePropertyDataForPlane(uint32_t propertyType, uint32_t plane) {
989
990 __CFSpinLock(&__CFUniCharPropTableLock);
991
992 if (NULL == __CFUniCharUnicodePropertyTable) {
993 __CFUniCharBitmapData *table;
994 const void *bytes;
995 const void *bodyBase;
996 const void *planeBase;
997 int headerSize;
998 int idx, count;
999 int planeIndex, planeCount;
1000 int planeSize;
1001
1002 if (!__CFUniCharLoadFile(PROP_DB_FILE, &bytes)) {
1003 __CFSpinUnlock(&__CFUniCharPropTableLock);
1004 return NULL;
1005 }
1006
1007 #if defined (__cplusplus)
1008 bytes = (uint8_t*)bytes + 4; // Skip Unicode version
1009 headerSize = CFSwapInt32BigToHost(*((uint32_t *)bytes)); bytes = (uint8_t *)bytes + sizeof(uint32_t);
1010 #else
1011 bytes += 4; // Skip Unicode version
1012 headerSize = CFSwapInt32BigToHost(*((uint32_t *)bytes)); bytes += sizeof(uint32_t);
1013 #endif //C++
1014
1015 headerSize -= (sizeof(uint32_t) * 2);
1016 bodyBase = (char *)bytes + headerSize;
1017
1018 count = headerSize / sizeof(uint32_t);
1019 __CFUniCharUnicodePropertyTableCount = count;
1020
1021 table = (__CFUniCharBitmapData *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(__CFUniCharBitmapData) * count, 0);
1022
1023 for (idx = 0;idx < count;idx++) {
1024 planeCount = *((const uint8_t *)bodyBase);
1025 planeBase = (char *)bodyBase + planeCount + (planeCount % 4 ? 4 - (planeCount % 4) : 0);
1026 table[idx]._planes = (const uint8_t **)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(const void *) * planeCount, 0);
1027
1028 for (planeIndex = 0;planeIndex < planeCount;planeIndex++) {
1029 if ((planeSize = ((const uint8_t *)bodyBase)[planeIndex + 1])) {
1030 table[idx]._planes[planeIndex] = (const uint8_t *)planeBase;
1031 #if defined (__cplusplus)
1032 planeBase = (char*)planeBase + (planeSize * 256);
1033 #else
1034 planeBase += (planeSize * 256);
1035 #endif //C++
1036 } else {
1037 table[idx]._planes[planeIndex] = NULL;
1038 }
1039 }
1040
1041 table[idx]._numPlanes = planeCount;
1042 #if defined (__cplusplus)
1043 bodyBase = (const uint8_t *)bodyBase + (CFSwapInt32BigToHost(*(uint32_t *)bytes));
1044 ((uint32_t *&)bytes) ++;
1045 #else
1046 bodyBase += (CFSwapInt32BigToHost(*((uint32_t *)bytes++)));
1047 #endif //C++
1048 }
1049
1050 __CFUniCharUnicodePropertyTable = table;
1051 }
1052
1053 __CFSpinUnlock(&__CFUniCharPropTableLock);
1054
1055 return (plane < __CFUniCharUnicodePropertyTable[propertyType]._numPlanes ? __CFUniCharUnicodePropertyTable[propertyType]._planes[plane] : NULL);
1056 }
1057
1058 __private_extern__ uint32_t CFUniCharGetNumberOfPlanesForUnicodePropertyData(uint32_t propertyType) {
1059 (void)CFUniCharGetUnicodePropertyDataForPlane(propertyType, 0);
1060 return __CFUniCharUnicodePropertyTable[propertyType]._numPlanes;
1061 }
1062
1063 __private_extern__ uint32_t CFUniCharGetUnicodeProperty(UTF32Char character, uint32_t propertyType) {
1064 if (propertyType == kCFUniCharCombiningProperty) {
1065 return CFUniCharGetCombiningPropertyForCharacter(character, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(propertyType, (character >> 16) & 0xFF));
1066 } else if (propertyType == kCFUniCharBidiProperty) {
1067 return CFUniCharGetBidiPropertyForCharacter(character, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(propertyType, (character >> 16) & 0xFF));
1068 } else {
1069 return 0;
1070 }
1071 }
1072
1073
1074
1075 /*
1076 The UTF8 conversion in the following function is derived from ConvertUTF.c
1077 */
1078 /*
1079 * Copyright 2001 Unicode, Inc.
1080 *
1081 * Disclaimer
1082 *
1083 * This source code is provided as is by Unicode, Inc. No claims are
1084 * made as to fitness for any particular purpose. No warranties of any
1085 * kind are expressed or implied. The recipient agrees to determine
1086 * applicability of information provided. If this file has been
1087 * purchased on magnetic or optical media from Unicode, Inc., the
1088 * sole remedy for any claim will be exchange of defective media
1089 * within 90 days of receipt.
1090 *
1091 * Limitations on Rights to Redistribute This Code
1092 *
1093 * Unicode, Inc. hereby grants the right to freely use the information
1094 * supplied in this file in the creation of products supporting the
1095 * Unicode Standard, and to make copies of this file in any form
1096 * for internal or external distribution as long as this notice
1097 * remains attached.
1098 */
1099 #define UNI_REPLACEMENT_CHAR (0x0000FFFDUL)
1100
1101 bool CFUniCharFillDestinationBuffer(const UTF32Char *src, CFIndex srcLength, void **dst, CFIndex dstLength, CFIndex *filledLength, uint32_t dstFormat) {
1102 UTF32Char currentChar;
1103 CFIndex usedLength = *filledLength;
1104
1105 if (dstFormat == kCFUniCharUTF16Format) {
1106 UTF16Char *dstBuffer = (UTF16Char *)*dst;
1107
1108 while (srcLength-- > 0) {
1109 currentChar = *(src++);
1110
1111 if (currentChar > 0xFFFF) { // Non-BMP
1112 usedLength += 2;
1113 if (dstLength) {
1114 if (usedLength > dstLength) return false;
1115 currentChar -= 0x10000;
1116 *(dstBuffer++) = (UTF16Char)((currentChar >> 10) + 0xD800UL);
1117 *(dstBuffer++) = (UTF16Char)((currentChar & 0x3FF) + 0xDC00UL);
1118 }
1119 } else {
1120 ++usedLength;
1121 if (dstLength) {
1122 if (usedLength > dstLength) return false;
1123 *(dstBuffer++) = (UTF16Char)currentChar;
1124 }
1125 }
1126 }
1127
1128 *dst = dstBuffer;
1129 } else if (dstFormat == kCFUniCharUTF8Format) {
1130 uint8_t *dstBuffer = (uint8_t *)*dst;
1131 uint16_t bytesToWrite = 0;
1132 const UTF32Char byteMask = 0xBF;
1133 const UTF32Char byteMark = 0x80;
1134 static const uint8_t firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
1135
1136 while (srcLength-- > 0) {
1137 currentChar = *(src++);
1138
1139 /* Figure out how many bytes the result will require */
1140 if (currentChar < (UTF32Char)0x80) {
1141 bytesToWrite = 1;
1142 } else if (currentChar < (UTF32Char)0x800) {
1143 bytesToWrite = 2;
1144 } else if (currentChar < (UTF32Char)0x10000) {
1145 bytesToWrite = 3;
1146 } else if (currentChar < (UTF32Char)0x200000) {
1147 bytesToWrite = 4;
1148 } else {
1149 bytesToWrite = 2;
1150 currentChar = UNI_REPLACEMENT_CHAR;
1151 }
1152
1153 usedLength += bytesToWrite;
1154
1155 if (dstLength) {
1156 if (usedLength > dstLength) return false;
1157
1158 dstBuffer += bytesToWrite;
1159 switch (bytesToWrite) { /* note: everything falls through. */
1160 case 4: *--dstBuffer = (currentChar | byteMark) & byteMask; currentChar >>= 6;
1161 case 3: *--dstBuffer = (currentChar | byteMark) & byteMask; currentChar >>= 6;
1162 case 2: *--dstBuffer = (currentChar | byteMark) & byteMask; currentChar >>= 6;
1163 case 1: *--dstBuffer = currentChar | firstByteMark[bytesToWrite];
1164 }
1165 dstBuffer += bytesToWrite;
1166 }
1167 }
1168
1169 *dst = dstBuffer;
1170 } else {
1171 UTF32Char *dstBuffer = (UTF32Char *)*dst;
1172
1173 while (srcLength-- > 0) {
1174 currentChar = *(src++);
1175
1176 ++usedLength;
1177 if (dstLength) {
1178 if (usedLength > dstLength) return false;
1179 *(dstBuffer++) = currentChar;
1180 }
1181 }
1182
1183 *dst = dstBuffer;
1184 }
1185
1186 *filledLength = usedLength;
1187
1188 return true;
1189 }
1190
1191 #if 0 || 0
1192 void __CFUniCharCleanup(void)
1193 {
1194 int idx;
1195
1196 // cleanup memory allocated by __CFUniCharLoadBitmapData()
1197 __CFSpinLock(&__CFUniCharBitmapLock);
1198
1199 if (__CFUniCharBitmapDataArray != NULL) {
1200 for (idx = 0; idx < (int)__CFUniCharNumberOfBitmaps; idx++) {
1201 CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharBitmapDataArray[idx]._planes);
1202 __CFUniCharBitmapDataArray[idx]._planes = NULL;
1203 }
1204
1205 CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharBitmapDataArray);
1206 __CFUniCharBitmapDataArray = NULL;
1207 __CFUniCharNumberOfBitmaps = 0;
1208 }
1209
1210 __CFSpinUnlock(&__CFUniCharBitmapLock);
1211
1212 // cleanup memory allocated by CFUniCharGetMappingData()
1213 __CFSpinLock(&__CFUniCharMappingTableLock);
1214
1215 if (__CFUniCharMappingTables != NULL) {
1216 CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharMappingTables);
1217 __CFUniCharMappingTables = NULL;
1218 }
1219
1220 // cleanup memory allocated by __CFUniCharLoadCaseMappingTable()
1221 if (__CFUniCharCaseMappingTableCounts != NULL) {
1222 CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharCaseMappingTableCounts);
1223 __CFUniCharCaseMappingTableCounts = NULL;
1224
1225 __CFUniCharCaseMappingTable = NULL;
1226 __CFUniCharCaseMappingExtraTable = NULL;
1227 }
1228
1229 __CFSpinUnlock(&__CFUniCharMappingTableLock);
1230
1231 // cleanup memory allocated by CFUniCharGetUnicodePropertyDataForPlane()
1232 __CFSpinLock(&__CFUniCharPropTableLock);
1233
1234 if (__CFUniCharUnicodePropertyTable != NULL) {
1235 for (idx = 0; idx < __CFUniCharUnicodePropertyTableCount; idx++) {
1236 CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharUnicodePropertyTable[idx]._planes);
1237 __CFUniCharUnicodePropertyTable[idx]._planes = NULL;
1238 }
1239
1240 CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharUnicodePropertyTable);
1241 __CFUniCharUnicodePropertyTable = NULL;
1242 __CFUniCharUnicodePropertyTableCount = 0;
1243 }
1244
1245 __CFSpinUnlock(&__CFUniCharPropTableLock);
1246 }
1247 #endif // __WIN32__
1248
1249 #undef USE_MACHO_SEGMENT
1250