]> git.saurik.com Git - apple/cf.git/blob - CFUniChar.c
CF-550.19.tar.gz
[apple/cf.git] / CFUniChar.c
1 /*
2 * Copyright (c) 2010 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /* CFUniChar.c
25 Copyright (c) 2001-2009, Apple Inc. All rights reserved.
26 Responsibility: Aki Inoue
27 */
28
29 #include <CoreFoundation/CFByteOrder.h>
30 #include "CFInternal.h"
31 #include "CFBundle_Internal.h"
32 #include "CFUniChar.h"
33 #include "CFStringEncodingConverterExt.h"
34 #include "CFUnicodeDecomposition.h"
35 #include "CFUniCharPriv.h"
36 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
37 #include <fcntl.h>
38 #include <sys/types.h>
39 #include <sys/stat.h>
40 #include <sys/param.h>
41 #include <sys/mman.h>
42 #include <unistd.h>
43 #include <stdlib.h>
44 #include <zlib.h>
45 #endif
46 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
47 #include <mach/mach.h>
48 #endif
49
50 #if DEPLOYMENT_TARGET_WINDOWS
51 extern void _CFGetFrameworkPath(wchar_t *path, int maxLength);
52 #endif
53
54 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
55 #define __kCFCharacterSetDir "/System/Library/CoreServices"
56 #elif DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
57 #define __kCFCharacterSetDir "/usr/local/share/CoreFoundation"
58 #elif DEPLOYMENT_TARGET_WINDOWS
59 #define __kCFCharacterSetDir "\\Windows\\CoreFoundation"
60 #endif
61
62 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
63 #define USE_MACHO_SEGMENT 1
64 #endif
65
66 enum {
67 kCFUniCharLastExternalSet = kCFUniCharNewlineCharacterSet,
68 kCFUniCharFirstInternalSet = kCFUniCharCompatibilityDecomposableCharacterSet,
69 kCFUniCharLastInternalSet = kCFUniCharGraphemeExtendCharacterSet,
70 kCFUniCharFirstBitmapSet = kCFUniCharDecimalDigitCharacterSet
71 };
72
73 CF_INLINE uint32_t __CFUniCharMapExternalSetToInternalIndex(uint32_t cset) { return ((kCFUniCharFirstInternalSet <= cset) ? ((cset - kCFUniCharFirstInternalSet) + kCFUniCharLastExternalSet) : cset) - kCFUniCharFirstBitmapSet; }
74 CF_INLINE uint32_t __CFUniCharMapCompatibilitySetID(uint32_t cset) { return ((cset == kCFUniCharControlCharacterSet) ? kCFUniCharControlAndFormatterCharacterSet : (((cset > kCFUniCharLastExternalSet) && (cset < kCFUniCharFirstInternalSet)) ? ((cset - kCFUniCharLastExternalSet) + kCFUniCharFirstInternalSet) : cset)); }
75
76 #if (DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED) && USE_MACHO_SEGMENT
77 #include <mach-o/getsect.h>
78 #include <mach-o/dyld.h>
79 #include <mach-o/ldsyms.h>
80
81 static const void *__CFGetSectDataPtr(const char *segname, const char *sectname, uint64_t *sizep) {
82 uint32_t idx, cnt = _dyld_image_count();
83 for (idx = 0; idx < cnt; idx++) {
84 void *mh = (void *)_dyld_get_image_header(idx);
85 if (mh != &_mh_dylib_header) continue;
86 #if __LP64__
87 const struct section_64 *sect = getsectbynamefromheader_64((struct mach_header_64 *)mh, segname, sectname);
88 #else
89 const struct section *sect = getsectbynamefromheader((struct mach_header *)mh, segname, sectname);
90 #endif
91 if (!sect) break;
92 if (sizep) *sizep = (uint64_t)sect->size;
93 return (char *)sect->addr + _dyld_get_image_vmaddr_slide(idx);
94 }
95 if (sizep) *sizep = 0ULL;
96 return NULL;
97 }
98 #endif
99
100 #if !USE_MACHO_SEGMENT
101
102 // Memory map the file
103
104 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
105 CF_INLINE void __CFUniCharCharacterSetPath(char *cpath) {
106 #elif DEPLOYMENT_TARGET_WINDOWS
107 CF_INLINE void __CFUniCharCharacterSetPath(wchar_t *wpath) {
108 #else
109 #error Unknown or unspecified DEPLOYMENT_TARGET
110 #endif
111 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
112 strlcpy(cpath, __kCFCharacterSetDir, MAXPATHLEN);
113 #elif DEPLOYMENT_TARGET_WINDOWS
114 wchar_t frameworkPath[MAXPATHLEN];
115 _CFGetFrameworkPath(frameworkPath, MAXPATHLEN);
116 wcsncpy(wpath, frameworkPath, MAXPATHLEN);
117 wcsncat(wpath, L"\\CoreFoundation.resources\\", MAXPATHLEN - wcslen(wpath));
118 #else
119 strlcpy(cpath, __kCFCharacterSetDir, MAXPATHLEN);
120 strlcat(cpath, "/CharacterSets/", MAXPATHLEN);
121 #endif
122 }
123
124 #if DEPLOYMENT_TARGET_WINDOWS
125 #define MAX_BITMAP_STATE 512
126 //
127 // If a string is placed into this array, then it has been previously
128 // determined that the bitmap-file cannot be found. Thus, we make
129 // the assumption it won't be there in future calls and we avoid
130 // hitting the disk un-necessarily. This assumption isn't 100%
131 // correct, as bitmap-files can be added. We would have to re-start
132 // the application in order to pick-up the new bitmap info.
133 //
134 // We should probably re-visit this.
135 //
136 static wchar_t *mappedBitmapState[MAX_BITMAP_STATE];
137 static int __nNumStateEntries = -1;
138 CRITICAL_SECTION __bitmapStateLock = {0};
139
140 bool __GetBitmapStateForName(const wchar_t *bitmapName) {
141 if (NULL == __bitmapStateLock.DebugInfo)
142 InitializeCriticalSection(&__bitmapStateLock);
143 EnterCriticalSection(&__bitmapStateLock);
144 if (__nNumStateEntries >= 0) {
145 for (int i = 0; i < __nNumStateEntries; i++) {
146 if (wcscmp(mappedBitmapState[i], bitmapName) == 0) {
147 LeaveCriticalSection(&__bitmapStateLock);
148 return true;
149 }
150 }
151 }
152 LeaveCriticalSection(&__bitmapStateLock);
153 return false;
154 }
155 void __AddBitmapStateForName(const wchar_t *bitmapName) {
156 if (NULL == __bitmapStateLock.DebugInfo)
157 InitializeCriticalSection(&__bitmapStateLock);
158 EnterCriticalSection(&__bitmapStateLock);
159 __nNumStateEntries++;
160 mappedBitmapState[__nNumStateEntries] = (wchar_t *)malloc((lstrlenW(bitmapName)+1) * sizeof(wchar_t));
161 lstrcpyW(mappedBitmapState[__nNumStateEntries], bitmapName);
162 LeaveCriticalSection(&__bitmapStateLock);
163 }
164 #endif
165
166 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
167 static bool __CFUniCharLoadBytesFromFile(const char *fileName, const void **bytes) {
168 #elif DEPLOYMENT_TARGET_WINDOWS
169 static bool __CFUniCharLoadBytesFromFile(const wchar_t *fileName, const void **bytes) {
170 #else
171 #error Unknown or unspecified DEPLOYMENT_TARGET
172 #endif
173 #if DEPLOYMENT_TARGET_WINDOWS
174 HANDLE bitmapFileHandle = NULL;
175 HANDLE mappingHandle = NULL;
176
177 if (__GetBitmapStateForName(fileName)) {
178 // The fileName has been tried in the past, so just return false
179 // and move on.
180 *bytes = NULL;
181 return false;
182 }
183 mappingHandle = OpenFileMappingW(FILE_MAP_READ, TRUE, fileName);
184 if (NULL == mappingHandle) {
185 if ((bitmapFileHandle = CreateFileW(fileName, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL)) == INVALID_HANDLE_VALUE) {
186 // We tried to get the bitmap file for mapping, but it's not there. Add to list of non-existant bitmap-files so
187 // we don't have to try this again in the future.
188 __AddBitmapStateForName(fileName);
189 return false;
190 }
191 mappingHandle = CreateFileMapping(bitmapFileHandle, NULL, PAGE_READONLY, 0, 0, NULL);
192 CloseHandle(bitmapFileHandle);
193 if (!mappingHandle) return false;
194
195 *bytes = MapViewOfFileEx(mappingHandle, FILE_MAP_READ, 0, 0, 0, 0);
196 CloseHandle(mappingHandle);
197 } else {
198 *bytes = MapViewOfFileEx(mappingHandle, FILE_MAP_READ, 0, 0, 0, 0);
199 CloseHandle(mappingHandle);
200 }
201
202 return (*bytes ? true : false);
203 #else
204 struct stat statBuf;
205 int fd = -1;
206
207 if ((fd = open(fileName, O_RDONLY, 0)) < 0) {
208 return false;
209 }
210 if (fstat(fd, &statBuf) < 0 || (*bytes = mmap(0, statBuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0)) == (void *)-1) {
211 close(fd);
212 return false;
213 }
214 close(fd);
215
216 return true;
217 #endif
218 }
219
220 #endif // USE_MACHO_SEGMENT
221
222 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
223 static bool __CFUniCharLoadFile(const char *bitmapName, const void **bytes) {
224 #elif DEPLOYMENT_TARGET_WINDOWS
225 static bool __CFUniCharLoadFile(const wchar_t *bitmapName, const void **bytes) {
226 #else
227 #error Unknown or unspecified DEPLOYMENT_TARGET
228 #endif
229 #if USE_MACHO_SEGMENT
230 *bytes = __CFGetSectDataPtr("__UNICODE", bitmapName, NULL);
231 return *bytes ? true : false;
232 #else
233 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
234 char cpath[MAXPATHLEN];
235 __CFUniCharCharacterSetPath(cpath);
236 strlcat(cpath, bitmapName, MAXPATHLEN);
237 return __CFUniCharLoadBytesFromFile(cpath, bytes);
238 #elif DEPLOYMENT_TARGET_WINDOWS
239 wchar_t wpath[MAXPATHLEN];
240 __CFUniCharCharacterSetPath(wpath);
241 wcsncat(wpath, bitmapName, MAXPATHLEN);
242 return __CFUniCharLoadBytesFromFile(wpath, bytes);
243 #else
244 #error Unknown or unspecified DEPLOYMENT_TARGET
245 #endif
246 #endif
247 }
248
249 // Bitmap functions
250 CF_INLINE bool isControl(UTF32Char theChar, uint16_t charset, const void *data) { // ISO Control
251 return (((theChar <= 0x001F) || (theChar >= 0x007F && theChar <= 0x009F)) ? true : false);
252 }
253
254 CF_INLINE bool isWhitespace(UTF32Char theChar, uint16_t charset, const void *data) { // Space
255 return (((theChar == 0x0020) || (theChar == 0x0009) || (theChar == 0x00A0) || (theChar == 0x1680) || (theChar >= 0x2000 && theChar <= 0x200B) || (theChar == 0x202F) || (theChar == 0x205F) || (theChar == 0x3000)) ? true : false);
256 }
257
258 CF_INLINE bool isNewline(UTF32Char theChar, uint16_t charset, const void *data) { // White space
259 return (((theChar >= 0x000A && theChar <= 0x000D) || (theChar == 0x0085) || (theChar == 0x2028) || (theChar == 0x2029)) ? true : false);
260 }
261
262 CF_INLINE bool isWhitespaceAndNewline(UTF32Char theChar, uint16_t charset, const void *data) { // White space
263 return ((isWhitespace(theChar, charset, data) || isNewline(theChar, charset, data)) ? true : false);
264 }
265
266 typedef struct {
267 uint32_t _numPlanes;
268 const uint8_t **_planes;
269 } __CFUniCharBitmapData;
270
271 static char __CFUniCharUnicodeVersionString[8] = {0, 0, 0, 0, 0, 0, 0, 0};
272
273 static uint32_t __CFUniCharNumberOfBitmaps = 0;
274 static __CFUniCharBitmapData *__CFUniCharBitmapDataArray = NULL;
275
276 static CFSpinLock_t __CFUniCharBitmapLock = CFSpinLockInit;
277
278 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
279 #if !defined(CF_UNICHAR_BITMAP_FILE)
280 #if USE_MACHO_SEGMENT
281 #define CF_UNICHAR_BITMAP_FILE "__csbitmaps"
282 #else
283 #define CF_UNICHAR_BITMAP_FILE "CFCharacterSetBitmaps.bitmap"
284 #endif
285 #endif
286 #elif DEPLOYMENT_TARGET_WINDOWS
287 #if !defined(CF_UNICHAR_BITMAP_FILE)
288 #define CF_UNICHAR_BITMAP_FILE L"CFCharacterSetBitmaps.bitmap"
289 #endif
290 #else
291 #error Unknown or unspecified DEPLOYMENT_TARGET
292 #endif
293
294 static bool __CFUniCharLoadBitmapData(void) {
295 __CFUniCharBitmapData *array;
296 uint32_t headerSize;
297 uint32_t bitmapSize;
298 int numPlanes;
299 uint8_t currentPlane;
300 const void *bytes;
301 const void *bitmapBase;
302 const void *bitmap;
303 int idx, bitmapIndex;
304
305 __CFSpinLock(&__CFUniCharBitmapLock);
306
307 if (__CFUniCharBitmapDataArray || !__CFUniCharLoadFile(CF_UNICHAR_BITMAP_FILE, &bytes)) {
308 __CFSpinUnlock(&__CFUniCharBitmapLock);
309 return false;
310 }
311
312 for (idx = 0;idx < 4 && ((const uint8_t *)bytes)[idx];idx++) {
313 __CFUniCharUnicodeVersionString[idx * 2] = ((const uint8_t *)bytes)[idx];
314 __CFUniCharUnicodeVersionString[idx * 2 + 1] = '.';
315 }
316 __CFUniCharUnicodeVersionString[(idx < 4 ? idx * 2 - 1 : 7)] = '\0';
317
318 headerSize = CFSwapInt32BigToHost(*((uint32_t *)((char *)bytes + 4)));
319
320 bitmapBase = (uint8_t *)bytes + headerSize;
321 bytes = (uint8_t *)bytes + (sizeof(uint32_t) * 2);
322 headerSize -= (sizeof(uint32_t) * 2);
323
324 __CFUniCharNumberOfBitmaps = headerSize / (sizeof(uint32_t) * 2);
325
326 array = (__CFUniCharBitmapData *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(__CFUniCharBitmapData) * __CFUniCharNumberOfBitmaps, 0);
327
328 for (idx = 0;idx < (int)__CFUniCharNumberOfBitmaps;idx++) {
329 bitmap = (uint8_t *)bitmapBase + CFSwapInt32BigToHost(*((uint32_t *)bytes)); bytes = (uint8_t *)bytes + sizeof(uint32_t);
330 bitmapSize = CFSwapInt32BigToHost(*((uint32_t *)bytes)); bytes = (uint8_t *)bytes + sizeof(uint32_t);
331
332 numPlanes = bitmapSize / (8 * 1024);
333 numPlanes = *(const uint8_t *)((char *)bitmap + (((numPlanes - 1) * ((8 * 1024) + 1)) - 1)) + 1;
334 array[idx]._planes = (const uint8_t **)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(const void *) * numPlanes, 0);
335 array[idx]._numPlanes = numPlanes;
336
337 currentPlane = 0;
338 for (bitmapIndex = 0;bitmapIndex < numPlanes;bitmapIndex++) {
339 if (bitmapIndex == currentPlane) {
340 array[idx]._planes[bitmapIndex] = (const uint8_t *)bitmap;
341 bitmap = (uint8_t *)bitmap + (8 * 1024);
342 #if defined (__cplusplus)
343 currentPlane = *(((const uint8_t*&)bitmap)++);
344 #else
345 currentPlane = *((const uint8_t *)bitmap++);
346 #endif
347
348 } else {
349 array[idx]._planes[bitmapIndex] = NULL;
350 }
351 }
352 }
353
354 __CFUniCharBitmapDataArray = array;
355
356 __CFSpinUnlock(&__CFUniCharBitmapLock);
357
358 return true;
359 }
360
361 __private_extern__ const char *__CFUniCharGetUnicodeVersionString(void) {
362 if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData();
363 return __CFUniCharUnicodeVersionString;
364 }
365
366 bool CFUniCharIsMemberOf(UTF32Char theChar, uint32_t charset) {
367 charset = __CFUniCharMapCompatibilitySetID(charset);
368
369 switch (charset) {
370 case kCFUniCharWhitespaceCharacterSet:
371 return isWhitespace(theChar, charset, NULL);
372
373 case kCFUniCharWhitespaceAndNewlineCharacterSet:
374 return isWhitespaceAndNewline(theChar, charset, NULL);
375
376 case kCFUniCharNewlineCharacterSet:
377 return isNewline(theChar, charset, NULL);
378
379 default: {
380 uint32_t tableIndex = __CFUniCharMapExternalSetToInternalIndex(charset);
381
382 if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData();
383
384 if (tableIndex < __CFUniCharNumberOfBitmaps) {
385 __CFUniCharBitmapData *data = __CFUniCharBitmapDataArray + tableIndex;
386 uint8_t planeNo = (theChar >> 16) & 0xFF;
387
388 // The bitmap data for kCFUniCharIllegalCharacterSet is actually LEGAL set less Plane 14 ~ 16
389 if (charset == kCFUniCharIllegalCharacterSet) {
390 if (planeNo == 0x0E) { // Plane 14
391 theChar &= 0xFF;
392 return (((theChar == 0x01) || ((theChar > 0x1F) && (theChar < 0x80))) ? false : true);
393 } else if (planeNo == 0x0F || planeNo == 0x10) { // Plane 15 & 16
394 return ((theChar & 0xFF) > 0xFFFD ? true : false);
395 } else {
396 return (planeNo < data->_numPlanes && data->_planes[planeNo] ? !CFUniCharIsMemberOfBitmap(theChar, data->_planes[planeNo]) : true);
397 }
398 } else if (charset == kCFUniCharControlAndFormatterCharacterSet) {
399 if (planeNo == 0x0E) { // Plane 14
400 theChar &= 0xFF;
401 return (((theChar == 0x01) || ((theChar > 0x1F) && (theChar < 0x80))) ? true : false);
402 } else {
403 return (planeNo < data->_numPlanes && data->_planes[planeNo] ? CFUniCharIsMemberOfBitmap(theChar, data->_planes[planeNo]) : false);
404 }
405 } else {
406 return (planeNo < data->_numPlanes && data->_planes[planeNo] ? CFUniCharIsMemberOfBitmap(theChar, data->_planes[planeNo]) : false);
407 }
408 }
409 return false;
410 }
411 }
412 }
413
414 const uint8_t *CFUniCharGetBitmapPtrForPlane(uint32_t charset, uint32_t plane) {
415 if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData();
416
417 charset = __CFUniCharMapCompatibilitySetID(charset);
418
419 if ((charset > kCFUniCharWhitespaceAndNewlineCharacterSet) && (charset != kCFUniCharIllegalCharacterSet) && (charset != kCFUniCharNewlineCharacterSet)) {
420 uint32_t tableIndex = __CFUniCharMapExternalSetToInternalIndex(charset);
421
422 if (tableIndex < __CFUniCharNumberOfBitmaps) {
423 __CFUniCharBitmapData *data = __CFUniCharBitmapDataArray + tableIndex;
424
425 return (plane < data->_numPlanes ? data->_planes[plane] : NULL);
426 }
427 }
428 return NULL;
429 }
430
431 __private_extern__ uint8_t CFUniCharGetBitmapForPlane(uint32_t charset, uint32_t plane, void *bitmap, bool isInverted) {
432 const uint8_t *src = CFUniCharGetBitmapPtrForPlane(charset, plane);
433 int numBytes = (8 * 1024);
434
435 if (src) {
436 if (isInverted) {
437 #if defined (__cplusplus)
438 while (numBytes-- > 0) *(((uint8_t *&)bitmap)++) = ~(*(src++));
439 #else
440 while (numBytes-- > 0) *((uint8_t *)bitmap++) = ~(*(src++));
441 #endif
442 } else {
443 #if defined (__cplusplus)
444 while (numBytes-- > 0) *(((uint8_t *&)bitmap)++) = *(src++);
445 #else
446 while (numBytes-- > 0) *((uint8_t *)bitmap++) = *(src++);
447 #endif
448 }
449 return kCFUniCharBitmapFilled;
450 } else if (charset == kCFUniCharIllegalCharacterSet) {
451 __CFUniCharBitmapData *data = __CFUniCharBitmapDataArray + __CFUniCharMapExternalSetToInternalIndex(__CFUniCharMapCompatibilitySetID(charset));
452
453 if (plane < data->_numPlanes && (src = data->_planes[plane])) {
454 if (isInverted) {
455 #if defined (__cplusplus)
456 while (numBytes-- > 0) *(((uint8_t *&)bitmap)++) = *(src++);
457 #else
458 while (numBytes-- > 0) *((uint8_t *)bitmap++) = *(src++);
459 #endif
460 } else {
461 #if defined (__cplusplus)
462 while (numBytes-- > 0) *(((uint8_t *&)bitmap)++) = ~(*(src++));
463 #else
464 while (numBytes-- > 0) *((uint8_t *)bitmap++) = ~(*(src++));
465 #endif
466 }
467 return kCFUniCharBitmapFilled;
468 } else if (plane == 0x0E) { // Plane 14
469 int idx;
470 uint8_t asciiRange = (isInverted ? (uint8_t)0xFF : (uint8_t)0);
471 uint8_t otherRange = (isInverted ? (uint8_t)0 : (uint8_t)0xFF);
472
473 #if defined (__cplusplus)
474 *(((uint8_t *&)bitmap)++) = 0x02; // UE0001 LANGUAGE TAG
475 #else
476 *((uint8_t *)bitmap++) = 0x02; // UE0001 LANGUAGE TAG
477 #endif
478 for (idx = 1;idx < numBytes;idx++) {
479 #if defined (__cplusplus)
480 *(((uint8_t *&)bitmap)++) = ((idx >= (0x20 / 8) && (idx < (0x80 / 8))) ? asciiRange : otherRange);
481 #else
482 *((uint8_t *)bitmap++) = ((idx >= (0x20 / 8) && (idx < (0x80 / 8))) ? asciiRange : otherRange);
483 #endif
484 }
485 return kCFUniCharBitmapFilled;
486 } else if (plane == 0x0F || plane == 0x10) { // Plane 15 & 16
487 uint32_t value = (isInverted ? ~0 : 0);
488 numBytes /= 4; // for 32bit
489
490 while (numBytes-- > 0) {
491 *((uint32_t *)bitmap) = value;
492 #if defined (__cplusplus)
493 bitmap = (uint8_t *)bitmap + sizeof(uint32_t);
494 #else
495 bitmap += sizeof(uint32_t);
496 #endif
497 }
498 *(((uint8_t *)bitmap) - 5) = (isInverted ? 0x3F : 0xC0); // 0xFFFE & 0xFFFF
499 return kCFUniCharBitmapFilled;
500 }
501 return (isInverted ? kCFUniCharBitmapEmpty : kCFUniCharBitmapAll);
502 } else if ((charset < kCFUniCharDecimalDigitCharacterSet) || (charset == kCFUniCharNewlineCharacterSet)) {
503 if (plane) return (isInverted ? kCFUniCharBitmapAll : kCFUniCharBitmapEmpty);
504
505 uint8_t *bitmapBase = (uint8_t *)bitmap;
506 CFIndex idx;
507 uint8_t nonFillValue = (isInverted ? (uint8_t)0xFF : (uint8_t)0);
508
509 #if defined (__cplusplus)
510 while (numBytes-- > 0) *(((uint8_t *&)bitmap)++) = nonFillValue;
511 #else
512 while (numBytes-- > 0) *((uint8_t *)bitmap++) = nonFillValue;
513 #endif
514
515 if ((charset == kCFUniCharWhitespaceAndNewlineCharacterSet) || (charset == kCFUniCharNewlineCharacterSet)) {
516 const UniChar newlines[] = {0x000A, 0x000B, 0x000C, 0x000D, 0x0085, 0x2028, 0x2029};
517
518 for (idx = 0;idx < (int)(sizeof(newlines) / sizeof(*newlines)); idx++) {
519 if (isInverted) {
520 CFUniCharRemoveCharacterFromBitmap(newlines[idx], bitmapBase);
521 } else {
522 CFUniCharAddCharacterToBitmap(newlines[idx], bitmapBase);
523 }
524 }
525
526 if (charset == kCFUniCharNewlineCharacterSet) return kCFUniCharBitmapFilled;
527 }
528
529 if (isInverted) {
530 CFUniCharRemoveCharacterFromBitmap(0x0009, bitmapBase);
531 CFUniCharRemoveCharacterFromBitmap(0x0020, bitmapBase);
532 CFUniCharRemoveCharacterFromBitmap(0x00A0, bitmapBase);
533 CFUniCharRemoveCharacterFromBitmap(0x1680, bitmapBase);
534 CFUniCharRemoveCharacterFromBitmap(0x202F, bitmapBase);
535 CFUniCharRemoveCharacterFromBitmap(0x205F, bitmapBase);
536 CFUniCharRemoveCharacterFromBitmap(0x3000, bitmapBase);
537 } else {
538 CFUniCharAddCharacterToBitmap(0x0009, bitmapBase);
539 CFUniCharAddCharacterToBitmap(0x0020, bitmapBase);
540 CFUniCharAddCharacterToBitmap(0x00A0, bitmapBase);
541 CFUniCharAddCharacterToBitmap(0x1680, bitmapBase);
542 CFUniCharAddCharacterToBitmap(0x202F, bitmapBase);
543 CFUniCharAddCharacterToBitmap(0x205F, bitmapBase);
544 CFUniCharAddCharacterToBitmap(0x3000, bitmapBase);
545 }
546
547 for (idx = 0x2000;idx <= 0x200B;idx++) {
548 if (isInverted) {
549 CFUniCharRemoveCharacterFromBitmap(idx, bitmapBase);
550 } else {
551 CFUniCharAddCharacterToBitmap(idx, bitmapBase);
552 }
553 }
554 return kCFUniCharBitmapFilled;
555 }
556 return (isInverted ? kCFUniCharBitmapAll : kCFUniCharBitmapEmpty);
557 }
558
559 __private_extern__ uint32_t CFUniCharGetNumberOfPlanes(uint32_t charset) {
560 if ((charset == kCFUniCharControlCharacterSet) || (charset == kCFUniCharControlAndFormatterCharacterSet)) {
561 return 15; // 0 to 14
562 } else if (charset < kCFUniCharDecimalDigitCharacterSet) {
563 return 1;
564 } else if (charset == kCFUniCharIllegalCharacterSet) {
565 return 17;
566 } else {
567 uint32_t numPlanes;
568
569 if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData();
570
571 numPlanes = __CFUniCharBitmapDataArray[__CFUniCharMapExternalSetToInternalIndex(__CFUniCharMapCompatibilitySetID(charset))]._numPlanes;
572
573 return numPlanes;
574 }
575 }
576
577 // Mapping data loading
578 static const void **__CFUniCharMappingTables = NULL;
579
580 static CFSpinLock_t __CFUniCharMappingTableLock = CFSpinLockInit;
581
582 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
583 #if __CF_BIG_ENDIAN__
584 #if USE_MACHO_SEGMENT
585 #define MAPPING_TABLE_FILE "__data"
586 #else
587 #define MAPPING_TABLE_FILE "CFUnicodeData-B.mapping"
588 #endif
589 #else
590 #if USE_MACHO_SEGMENT
591 #define MAPPING_TABLE_FILE "__data"
592 #else
593 #define MAPPING_TABLE_FILE "CFUnicodeData-L.mapping"
594 #endif
595 #endif
596 #elif DEPLOYMENT_TARGET_WINDOWS
597 #if __CF_BIG_ENDIAN__
598 #if USE_MACHO_SEGMENT
599 #define MAPPING_TABLE_FILE "__data"
600 #else
601 #define MAPPING_TABLE_FILE L"CFUnicodeData-B.mapping"
602 #endif
603 #else
604 #if USE_MACHO_SEGMENT
605 #define MAPPING_TABLE_FILE "__data"
606 #else
607 #define MAPPING_TABLE_FILE L"CFUnicodeData-L.mapping"
608 #endif
609 #endif
610 #else
611 #error Unknown or unspecified DEPLOYMENT_TARGET
612 #endif
613
614 __private_extern__ const void *CFUniCharGetMappingData(uint32_t type) {
615
616 __CFSpinLock(&__CFUniCharMappingTableLock);
617
618 if (NULL == __CFUniCharMappingTables) {
619 const void *bytes;
620 const void *bodyBase;
621 int headerSize;
622 int idx, count;
623
624 if (!__CFUniCharLoadFile(MAPPING_TABLE_FILE, &bytes)) {
625 __CFSpinUnlock(&__CFUniCharMappingTableLock);
626 return NULL;
627 }
628
629 #if defined (__cplusplus)
630 bytes = (uint8_t *)bytes + 4; // Skip Unicode version
631 headerSize = *((uint8_t *)bytes); bytes = (uint8_t *)bytes + sizeof(uint32_t);
632 #else
633 bytes += 4; // Skip Unicode version
634 headerSize = *((uint32_t *)bytes); bytes += sizeof(uint32_t);
635 #endif
636 headerSize -= (sizeof(uint32_t) * 2);
637 bodyBase = (char *)bytes + headerSize;
638
639 count = headerSize / sizeof(uint32_t);
640
641 __CFUniCharMappingTables = (const void **)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(const void *) * count, 0);
642
643 for (idx = 0;idx < count;idx++) {
644 #if defined (__cplusplus)
645 __CFUniCharMappingTables[idx] = (char *)bodyBase + *((uint32_t *)bytes); bytes = (uint8_t *)bytes + sizeof(uint32_t);
646 #else
647 __CFUniCharMappingTables[idx] = (char *)bodyBase + *((uint32_t *)bytes); bytes += sizeof(uint32_t);
648 #endif
649 }
650 }
651
652 __CFSpinUnlock(&__CFUniCharMappingTableLock);
653
654 return __CFUniCharMappingTables[type];
655 }
656
657 // Case mapping functions
658 #define DO_SPECIAL_CASE_MAPPING 1
659
660 static uint32_t *__CFUniCharCaseMappingTableCounts = NULL;
661 static uint32_t **__CFUniCharCaseMappingTable = NULL;
662 static const uint32_t **__CFUniCharCaseMappingExtraTable = NULL;
663
664 typedef struct {
665 uint32_t _key;
666 uint32_t _value;
667 } __CFUniCharCaseMappings;
668
669 /* Binary searches CFStringEncodingUnicodeTo8BitCharMap */
670 static uint32_t __CFUniCharGetMappedCase(const __CFUniCharCaseMappings *theTable, uint32_t numElem, UTF32Char character) {
671 const __CFUniCharCaseMappings *p, *q, *divider;
672
673 if ((character < theTable[0]._key) || (character > theTable[numElem-1]._key)) {
674 return 0;
675 }
676 p = theTable;
677 q = p + (numElem-1);
678 while (p <= q) {
679 divider = p + ((q - p) >> 1); /* divide by 2 */
680 if (character < divider->_key) { q = divider - 1; }
681 else if (character > divider->_key) { p = divider + 1; }
682 else { return divider->_value; }
683 }
684 return 0;
685 }
686
687 #define NUM_CASE_MAP_DATA (kCFUniCharCaseFold + 1)
688
689 static bool __CFUniCharLoadCaseMappingTable(void) {
690 uint32_t *countArray;
691 int idx;
692
693 if (NULL == __CFUniCharMappingTables) (void)CFUniCharGetMappingData(kCFUniCharToLowercase);
694 if (NULL == __CFUniCharMappingTables) return false;
695
696 __CFSpinLock(&__CFUniCharMappingTableLock);
697
698 if (__CFUniCharCaseMappingTableCounts) {
699 __CFSpinUnlock(&__CFUniCharMappingTableLock);
700 return true;
701 }
702
703 countArray = (uint32_t *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(uint32_t) * NUM_CASE_MAP_DATA + sizeof(uint32_t *) * NUM_CASE_MAP_DATA * 2, 0);
704 __CFUniCharCaseMappingTable = (uint32_t **)((char *)countArray + sizeof(uint32_t) * NUM_CASE_MAP_DATA);
705 __CFUniCharCaseMappingExtraTable = (const uint32_t **)__CFUniCharCaseMappingTable + NUM_CASE_MAP_DATA;
706
707 for (idx = 0;idx < NUM_CASE_MAP_DATA;idx++) {
708 countArray[idx] = *((uint32_t *)__CFUniCharMappingTables[idx]) / (sizeof(uint32_t) * 2);
709 __CFUniCharCaseMappingTable[idx] = ((uint32_t *)__CFUniCharMappingTables[idx]) + 1;
710 __CFUniCharCaseMappingExtraTable[idx] = (const uint32_t *)((char *)__CFUniCharCaseMappingTable[idx] + *((uint32_t *)__CFUniCharMappingTables[idx]));
711 }
712
713 __CFUniCharCaseMappingTableCounts = countArray;
714
715 __CFSpinUnlock(&__CFUniCharMappingTableLock);
716 return true;
717 }
718
719 #if __CF_BIG_ENDIAN__
720 #define TURKISH_LANG_CODE (0x7472) // tr
721 #define LITHUANIAN_LANG_CODE (0x6C74) // lt
722 #define AZERI_LANG_CODE (0x617A) // az
723 #else
724 #define TURKISH_LANG_CODE (0x7274) // tr
725 #define LITHUANIAN_LANG_CODE (0x746C) // lt
726 #define AZERI_LANG_CODE (0x7A61) // az
727 #endif
728
729 CFIndex CFUniCharMapCaseTo(UTF32Char theChar, UTF16Char *convertedChar, CFIndex maxLength, uint32_t ctype, uint32_t flags, const uint8_t *langCode) {
730 __CFUniCharBitmapData *data;
731 uint8_t planeNo = (theChar >> 16) & 0xFF;
732
733 caseFoldRetry:
734
735 #if DO_SPECIAL_CASE_MAPPING
736 if (flags & kCFUniCharCaseMapFinalSigma) {
737 if (theChar == 0x03A3) { // Final sigma
738 *convertedChar = (ctype == kCFUniCharToLowercase ? 0x03C2 : 0x03A3);
739 return 1;
740 }
741 }
742
743 if (langCode) {
744 switch (*(uint16_t *)langCode) {
745 case LITHUANIAN_LANG_CODE:
746 if (theChar == 0x0307 && (flags & kCFUniCharCaseMapAfter_i)) {
747 return 0;
748 } else if (ctype == kCFUniCharToLowercase) {
749 if (flags & kCFUniCharCaseMapMoreAbove) {
750 switch (theChar) {
751 case 0x0049: // LATIN CAPITAL LETTER I
752 *(convertedChar++) = 0x0069;
753 *(convertedChar++) = 0x0307;
754 return 2;
755
756 case 0x004A: // LATIN CAPITAL LETTER J
757 *(convertedChar++) = 0x006A;
758 *(convertedChar++) = 0x0307;
759 return 2;
760
761 case 0x012E: // LATIN CAPITAL LETTER I WITH OGONEK
762 *(convertedChar++) = 0x012F;
763 *(convertedChar++) = 0x0307;
764 return 2;
765
766 default: break;
767 }
768 }
769 switch (theChar) {
770 case 0x00CC: // LATIN CAPITAL LETTER I WITH GRAVE
771 *(convertedChar++) = 0x0069;
772 *(convertedChar++) = 0x0307;
773 *(convertedChar++) = 0x0300;
774 return 3;
775
776 case 0x00CD: // LATIN CAPITAL LETTER I WITH ACUTE
777 *(convertedChar++) = 0x0069;
778 *(convertedChar++) = 0x0307;
779 *(convertedChar++) = 0x0301;
780 return 3;
781
782 case 0x0128: // LATIN CAPITAL LETTER I WITH TILDE
783 *(convertedChar++) = 0x0069;
784 *(convertedChar++) = 0x0307;
785 *(convertedChar++) = 0x0303;
786 return 3;
787
788 default: break;
789 }
790 }
791 break;
792
793 case TURKISH_LANG_CODE:
794 case AZERI_LANG_CODE:
795 if ((theChar == 0x0049) || (theChar == 0x0131)) { // LATIN CAPITAL LETTER I & LATIN SMALL LETTER DOTLESS I
796 *convertedChar = (((ctype == kCFUniCharToLowercase) || (ctype == kCFUniCharCaseFold)) ? ((kCFUniCharCaseMapMoreAbove & flags) ? 0x0069 : 0x0131) : 0x0049);
797 return 1;
798 } else if ((theChar == 0x0069) || (theChar == 0x0130)) { // LATIN SMALL LETTER I & LATIN CAPITAL LETTER I WITH DOT ABOVE
799 *convertedChar = (((ctype == kCFUniCharToLowercase) || (ctype == kCFUniCharCaseFold)) ? 0x0069 : 0x0130);
800 return 1;
801 } else if (theChar == 0x0307 && (kCFUniCharCaseMapAfter_i & flags)) { // COMBINING DOT ABOVE AFTER_i
802 if (ctype == kCFUniCharToLowercase) {
803 return 0;
804 } else {
805 *convertedChar = 0x0307;
806 return 1;
807 }
808 }
809 break;
810
811 default: break;
812 }
813 }
814 #endif DO_SPECIAL_CASE_MAPPING
815
816 if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData();
817
818 data = __CFUniCharBitmapDataArray + __CFUniCharMapExternalSetToInternalIndex(__CFUniCharMapCompatibilitySetID(ctype + kCFUniCharHasNonSelfLowercaseCharacterSet));
819
820 if (planeNo < data->_numPlanes && data->_planes[planeNo] && CFUniCharIsMemberOfBitmap(theChar, data->_planes[planeNo]) && (__CFUniCharCaseMappingTableCounts || __CFUniCharLoadCaseMappingTable())) {
821 uint32_t value = __CFUniCharGetMappedCase((const __CFUniCharCaseMappings *)__CFUniCharCaseMappingTable[ctype], __CFUniCharCaseMappingTableCounts[ctype], theChar);
822
823 if (!value && ctype == kCFUniCharToTitlecase) {
824 value = __CFUniCharGetMappedCase((const __CFUniCharCaseMappings *)__CFUniCharCaseMappingTable[kCFUniCharToUppercase], __CFUniCharCaseMappingTableCounts[kCFUniCharToUppercase], theChar);
825 if (value) ctype = kCFUniCharToUppercase;
826 }
827
828 if (value) {
829 CFIndex count = CFUniCharConvertFlagToCount(value);
830
831 if (count == 1) {
832 if (value & kCFUniCharNonBmpFlag) {
833 if (maxLength > 1) {
834 value = (value & 0xFFFFFF) - 0x10000;
835 *(convertedChar++) = (UTF16Char)(value >> 10) + 0xD800UL;
836 *(convertedChar++) = (UTF16Char)(value & 0x3FF) + 0xDC00UL;
837 return 2;
838 }
839 } else {
840 *convertedChar = (UTF16Char)value;
841 return 1;
842 }
843 } else if (count < maxLength) {
844 const uint32_t *extraMapping = __CFUniCharCaseMappingExtraTable[ctype] + (value & 0xFFFFFF);
845
846 if (value & kCFUniCharNonBmpFlag) {
847 CFIndex copiedLen = 0;
848
849 while (count-- > 0) {
850 value = *(extraMapping++);
851 if (value > 0xFFFF) {
852 if (copiedLen + 2 >= maxLength) break;
853 value = (value & 0xFFFFFF) - 0x10000;
854 convertedChar[copiedLen++] = (UTF16Char)(value >> 10) + 0xD800UL;
855 convertedChar[copiedLen++] = (UTF16Char)(value & 0x3FF) + 0xDC00UL;
856 } else {
857 if (copiedLen + 1 >= maxLength) break;
858 convertedChar[copiedLen++] = value;
859 }
860 }
861 if (!count) return copiedLen;
862 } else {
863 CFIndex idx;
864
865 for (idx = 0;idx < count;idx++) *(convertedChar++) = (UTF16Char)*(extraMapping++);
866 return count;
867 }
868 }
869 }
870 } else if (ctype == kCFUniCharCaseFold) {
871 ctype = kCFUniCharToLowercase;
872 goto caseFoldRetry;
873 }
874
875 if (theChar > 0xFFFF) { // non-BMP
876 theChar = (theChar & 0xFFFFFF) - 0x10000;
877 *(convertedChar++) = (UTF16Char)(theChar >> 10) + 0xD800UL;
878 *(convertedChar++) = (UTF16Char)(theChar & 0x3FF) + 0xDC00UL;
879 return 2;
880 } else {
881 *convertedChar = theChar;
882 return 1;
883 }
884 }
885
886 CFIndex CFUniCharMapTo(UniChar theChar, UniChar *convertedChar, CFIndex maxLength, uint16_t ctype, uint32_t flags) {
887 if (ctype == kCFUniCharCaseFold + 1) { // kCFUniCharDecompose
888 if (CFUniCharIsDecomposableCharacter(theChar, false)) {
889 UTF32Char buffer[MAX_DECOMPOSED_LENGTH];
890 CFIndex usedLength = CFUniCharDecomposeCharacter(theChar, buffer, MAX_DECOMPOSED_LENGTH);
891 CFIndex idx;
892
893 for (idx = 0;idx < usedLength;idx++) *(convertedChar++) = buffer[idx];
894 return usedLength;
895 } else {
896 *convertedChar = theChar;
897 return 1;
898 }
899 } else {
900 return CFUniCharMapCaseTo(theChar, convertedChar, maxLength, ctype, flags, NULL);
901 }
902 }
903
904 CF_INLINE bool __CFUniCharIsMoreAbove(UTF16Char *buffer, CFIndex length) {
905 UTF32Char currentChar;
906 uint32_t property;
907
908 while (length-- > 0) {
909 currentChar = *(buffer)++;
910 if (CFUniCharIsSurrogateHighCharacter(currentChar) && (length > 0) && CFUniCharIsSurrogateLowCharacter(*(buffer + 1))) {
911 currentChar = CFUniCharGetLongCharacterForSurrogatePair(currentChar, *(buffer++));
912 --length;
913 }
914 if (!CFUniCharIsMemberOf(currentChar, kCFUniCharNonBaseCharacterSet)) break;
915
916 property = CFUniCharGetCombiningPropertyForCharacter(currentChar, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16) & 0xFF));
917
918 if (property == 230) return true; // Above priority
919 }
920 return false;
921 }
922
923 CF_INLINE bool __CFUniCharIsAfter_i(UTF16Char *buffer, CFIndex length) {
924 UTF32Char currentChar = 0;
925 uint32_t property;
926 UTF32Char decomposed[MAX_DECOMPOSED_LENGTH];
927 CFIndex decompLength;
928 CFIndex idx;
929
930 if (length < 1) return 0;
931
932 buffer += length;
933 while (length-- > 1) {
934 currentChar = *(--buffer);
935 if (CFUniCharIsSurrogateLowCharacter(currentChar)) {
936 if ((length > 1) && CFUniCharIsSurrogateHighCharacter(*(buffer - 1))) {
937 currentChar = CFUniCharGetLongCharacterForSurrogatePair(*(--buffer), currentChar);
938 --length;
939 } else {
940 break;
941 }
942 }
943 if (!CFUniCharIsMemberOf(currentChar, kCFUniCharNonBaseCharacterSet)) break;
944
945 property = CFUniCharGetCombiningPropertyForCharacter(currentChar, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16) & 0xFF));
946
947 if (property == 230) return false; // Above priority
948 }
949 if (length == 0) {
950 currentChar = *(--buffer);
951 } else if (CFUniCharIsSurrogateLowCharacter(currentChar) && CFUniCharIsSurrogateHighCharacter(*(--buffer))) {
952 currentChar = CFUniCharGetLongCharacterForSurrogatePair(*buffer, currentChar);
953 }
954
955 decompLength = CFUniCharDecomposeCharacter(currentChar, decomposed, MAX_DECOMPOSED_LENGTH);
956 currentChar = *decomposed;
957
958
959 for (idx = 1;idx < decompLength;idx++) {
960 currentChar = decomposed[idx];
961 property = CFUniCharGetCombiningPropertyForCharacter(currentChar, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16) & 0xFF));
962
963 if (property == 230) return false; // Above priority
964 }
965 return true;
966 }
967
968 __private_extern__ uint32_t CFUniCharGetConditionalCaseMappingFlags(UTF32Char theChar, UTF16Char *buffer, CFIndex currentIndex, CFIndex length, uint32_t type, const uint8_t *langCode, uint32_t lastFlags) {
969 if (theChar == 0x03A3) { // GREEK CAPITAL LETTER SIGMA
970 if ((type == kCFUniCharToLowercase) && (currentIndex > 0)) {
971 UTF16Char *start = buffer;
972 UTF16Char *end = buffer + length;
973 UTF32Char otherChar;
974
975 // First check if we're after a cased character
976 buffer += (currentIndex - 1);
977 while (start <= buffer) {
978 otherChar = *(buffer--);
979 if (CFUniCharIsSurrogateLowCharacter(otherChar) && (start <= buffer) && CFUniCharIsSurrogateHighCharacter(*buffer)) {
980 otherChar = CFUniCharGetLongCharacterForSurrogatePair(*(buffer--), otherChar);
981 }
982 if (!CFUniCharIsMemberOf(otherChar, kCFUniCharCaseIgnorableCharacterSet)) {
983 if (!CFUniCharIsMemberOf(otherChar, kCFUniCharUppercaseLetterCharacterSet) && !CFUniCharIsMemberOf(otherChar, kCFUniCharLowercaseLetterCharacterSet)) return 0; // Uppercase set contains titlecase
984 break;
985 }
986 }
987
988 // Next check if we're before a cased character
989 buffer = start + currentIndex + 1;
990 while (buffer < end) {
991 otherChar = *(buffer++);
992 if (CFUniCharIsSurrogateHighCharacter(otherChar) && (buffer < end) && CFUniCharIsSurrogateLowCharacter(*buffer)) {
993 otherChar = CFUniCharGetLongCharacterForSurrogatePair(otherChar, *(buffer++));
994 }
995 if (!CFUniCharIsMemberOf(otherChar, kCFUniCharCaseIgnorableCharacterSet)) {
996 if (CFUniCharIsMemberOf(otherChar, kCFUniCharUppercaseLetterCharacterSet) || CFUniCharIsMemberOf(otherChar, kCFUniCharLowercaseLetterCharacterSet)) return 0; // Uppercase set contains titlecase
997 break;
998 }
999 }
1000 return kCFUniCharCaseMapFinalSigma;
1001 }
1002 } else if (langCode) {
1003 if (*((const uint16_t *)langCode) == LITHUANIAN_LANG_CODE) {
1004 if ((theChar == 0x0307) && ((kCFUniCharCaseMapAfter_i|kCFUniCharCaseMapMoreAbove) & lastFlags) == (kCFUniCharCaseMapAfter_i|kCFUniCharCaseMapMoreAbove)) {
1005 return (__CFUniCharIsAfter_i(buffer, currentIndex) ? kCFUniCharCaseMapAfter_i : 0);
1006 } else if (type == kCFUniCharToLowercase) {
1007 if ((theChar == 0x0049) || (theChar == 0x004A) || (theChar == 0x012E)) {
1008 return (__CFUniCharIsMoreAbove(buffer + (++currentIndex), length - currentIndex) ? kCFUniCharCaseMapMoreAbove : 0);
1009 }
1010 } else if ((theChar == 'i') || (theChar == 'j')) {
1011 return (__CFUniCharIsMoreAbove(buffer + (++currentIndex), length - currentIndex) ? (kCFUniCharCaseMapAfter_i|kCFUniCharCaseMapMoreAbove) : 0);
1012 }
1013 } else if ((*((const uint16_t *)langCode) == TURKISH_LANG_CODE) || (*((const uint16_t *)langCode) == AZERI_LANG_CODE)) {
1014 if (type == kCFUniCharToLowercase) {
1015 if (theChar == 0x0307) {
1016 return (kCFUniCharCaseMapMoreAbove & lastFlags ? kCFUniCharCaseMapAfter_i : 0);
1017 } else if (theChar == 0x0049) {
1018 return (((++currentIndex < length) && (buffer[currentIndex] == 0x0307)) ? kCFUniCharCaseMapMoreAbove : 0);
1019 }
1020 }
1021 }
1022 }
1023 return 0;
1024 }
1025
1026 // Unicode property database
1027 static __CFUniCharBitmapData *__CFUniCharUnicodePropertyTable = NULL;
1028 static int __CFUniCharUnicodePropertyTableCount = 0;
1029
1030 static CFSpinLock_t __CFUniCharPropTableLock = CFSpinLockInit;
1031
1032 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
1033 #if USE_MACHO_SEGMENT
1034 #define PROP_DB_FILE "__properties"
1035 #else
1036 #define PROP_DB_FILE "CFUniCharPropertyDatabase.data"
1037 #endif
1038 #elif DEPLOYMENT_TARGET_WINDOWS
1039 #if USE_MACHO_SEGMENT
1040 #define PROP_DB_FILE "__properties"
1041 #else
1042 #define PROP_DB_FILE L"CFUniCharPropertyDatabase.data"
1043 #endif
1044 #else
1045 #error Unknown or unspecified DEPLOYMENT_TARGET
1046 #endif
1047
1048 const void *CFUniCharGetUnicodePropertyDataForPlane(uint32_t propertyType, uint32_t plane) {
1049
1050 __CFSpinLock(&__CFUniCharPropTableLock);
1051
1052 if (NULL == __CFUniCharUnicodePropertyTable) {
1053 __CFUniCharBitmapData *table;
1054 const void *bytes;
1055 const void *bodyBase;
1056 const void *planeBase;
1057 int headerSize;
1058 int idx, count;
1059 int planeIndex, planeCount;
1060 int planeSize;
1061
1062 if (!__CFUniCharLoadFile(PROP_DB_FILE, &bytes)) {
1063 __CFSpinUnlock(&__CFUniCharPropTableLock);
1064 return NULL;
1065 }
1066
1067 #if defined (__cplusplus)
1068 bytes = (uint8_t*)bytes + 4; // Skip Unicode version
1069 headerSize = CFSwapInt32BigToHost(*((uint32_t *)bytes)); bytes = (uint8_t *)bytes + sizeof(uint32_t);
1070 #else
1071 bytes += 4; // Skip Unicode version
1072 headerSize = CFSwapInt32BigToHost(*((uint32_t *)bytes)); bytes += sizeof(uint32_t);
1073 #endif
1074
1075 headerSize -= (sizeof(uint32_t) * 2);
1076 bodyBase = (char *)bytes + headerSize;
1077
1078 count = headerSize / sizeof(uint32_t);
1079 __CFUniCharUnicodePropertyTableCount = count;
1080
1081 table = (__CFUniCharBitmapData *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(__CFUniCharBitmapData) * count, 0);
1082
1083 for (idx = 0;idx < count;idx++) {
1084 planeCount = *((const uint8_t *)bodyBase);
1085 planeBase = (char *)bodyBase + planeCount + (planeCount % 4 ? 4 - (planeCount % 4) : 0);
1086 table[idx]._planes = (const uint8_t **)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(const void *) * planeCount, 0);
1087
1088 for (planeIndex = 0;planeIndex < planeCount;planeIndex++) {
1089 if ((planeSize = ((const uint8_t *)bodyBase)[planeIndex + 1])) {
1090 table[idx]._planes[planeIndex] = (const uint8_t *)planeBase;
1091 #if defined (__cplusplus)
1092 planeBase = (char*)planeBase + (planeSize * 256);
1093 #else
1094 planeBase += (planeSize * 256);
1095 #endif
1096 } else {
1097 table[idx]._planes[planeIndex] = NULL;
1098 }
1099 }
1100
1101 table[idx]._numPlanes = planeCount;
1102 #if defined (__cplusplus)
1103 bodyBase = (const uint8_t *)bodyBase + (CFSwapInt32BigToHost(*(uint32_t *)bytes));
1104 ((uint32_t *&)bytes) ++;
1105 #else
1106 bodyBase += (CFSwapInt32BigToHost(*((uint32_t *)bytes++)));
1107 #endif
1108 }
1109
1110 __CFUniCharUnicodePropertyTable = table;
1111 }
1112
1113 __CFSpinUnlock(&__CFUniCharPropTableLock);
1114
1115 return (plane < __CFUniCharUnicodePropertyTable[propertyType]._numPlanes ? __CFUniCharUnicodePropertyTable[propertyType]._planes[plane] : NULL);
1116 }
1117
1118 __private_extern__ uint32_t CFUniCharGetNumberOfPlanesForUnicodePropertyData(uint32_t propertyType) {
1119 (void)CFUniCharGetUnicodePropertyDataForPlane(propertyType, 0);
1120 return __CFUniCharUnicodePropertyTable[propertyType]._numPlanes;
1121 }
1122
1123 __private_extern__ uint32_t CFUniCharGetUnicodeProperty(UTF32Char character, uint32_t propertyType) {
1124 if (propertyType == kCFUniCharCombiningProperty) {
1125 return CFUniCharGetCombiningPropertyForCharacter(character, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(propertyType, (character >> 16) & 0xFF));
1126 } else if (propertyType == kCFUniCharBidiProperty) {
1127 return CFUniCharGetBidiPropertyForCharacter(character, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(propertyType, (character >> 16) & 0xFF));
1128 } else {
1129 return 0;
1130 }
1131 }
1132
1133
1134
1135 /*
1136 The UTF8 conversion in the following function is derived from ConvertUTF.c
1137 */
1138 /*
1139 * Copyright 2001 Unicode, Inc.
1140 *
1141 * Disclaimer
1142 *
1143 * This source code is provided as is by Unicode, Inc. No claims are
1144 * made as to fitness for any particular purpose. No warranties of any
1145 * kind are expressed or implied. The recipient agrees to determine
1146 * applicability of information provided. If this file has been
1147 * purchased on magnetic or optical media from Unicode, Inc., the
1148 * sole remedy for any claim will be exchange of defective media
1149 * within 90 days of receipt.
1150 *
1151 * Limitations on Rights to Redistribute This Code
1152 *
1153 * Unicode, Inc. hereby grants the right to freely use the information
1154 * supplied in this file in the creation of products supporting the
1155 * Unicode Standard, and to make copies of this file in any form
1156 * for internal or external distribution as long as this notice
1157 * remains attached.
1158 */
1159 #define UNI_REPLACEMENT_CHAR (0x0000FFFDUL)
1160
1161 bool CFUniCharFillDestinationBuffer(const UTF32Char *src, CFIndex srcLength, void **dst, CFIndex dstLength, CFIndex *filledLength, uint32_t dstFormat) {
1162 UTF32Char currentChar;
1163 CFIndex usedLength = *filledLength;
1164
1165 if (dstFormat == kCFUniCharUTF16Format) {
1166 UTF16Char *dstBuffer = (UTF16Char *)*dst;
1167
1168 while (srcLength-- > 0) {
1169 currentChar = *(src++);
1170
1171 if (currentChar > 0xFFFF) { // Non-BMP
1172 usedLength += 2;
1173 if (dstLength) {
1174 if (usedLength > dstLength) return false;
1175 currentChar -= 0x10000;
1176 *(dstBuffer++) = (UTF16Char)((currentChar >> 10) + 0xD800UL);
1177 *(dstBuffer++) = (UTF16Char)((currentChar & 0x3FF) + 0xDC00UL);
1178 }
1179 } else {
1180 ++usedLength;
1181 if (dstLength) {
1182 if (usedLength > dstLength) return false;
1183 *(dstBuffer++) = (UTF16Char)currentChar;
1184 }
1185 }
1186 }
1187
1188 *dst = dstBuffer;
1189 } else if (dstFormat == kCFUniCharUTF8Format) {
1190 uint8_t *dstBuffer = (uint8_t *)*dst;
1191 uint16_t bytesToWrite = 0;
1192 const UTF32Char byteMask = 0xBF;
1193 const UTF32Char byteMark = 0x80;
1194 static const uint8_t firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
1195
1196 while (srcLength-- > 0) {
1197 currentChar = *(src++);
1198
1199 /* Figure out how many bytes the result will require */
1200 if (currentChar < (UTF32Char)0x80) {
1201 bytesToWrite = 1;
1202 } else if (currentChar < (UTF32Char)0x800) {
1203 bytesToWrite = 2;
1204 } else if (currentChar < (UTF32Char)0x10000) {
1205 bytesToWrite = 3;
1206 } else if (currentChar < (UTF32Char)0x200000) {
1207 bytesToWrite = 4;
1208 } else {
1209 bytesToWrite = 2;
1210 currentChar = UNI_REPLACEMENT_CHAR;
1211 }
1212
1213 usedLength += bytesToWrite;
1214
1215 if (dstLength) {
1216 if (usedLength > dstLength) return false;
1217
1218 dstBuffer += bytesToWrite;
1219 switch (bytesToWrite) { /* note: everything falls through. */
1220 case 4: *--dstBuffer = (currentChar | byteMark) & byteMask; currentChar >>= 6;
1221 case 3: *--dstBuffer = (currentChar | byteMark) & byteMask; currentChar >>= 6;
1222 case 2: *--dstBuffer = (currentChar | byteMark) & byteMask; currentChar >>= 6;
1223 case 1: *--dstBuffer = currentChar | firstByteMark[bytesToWrite];
1224 }
1225 dstBuffer += bytesToWrite;
1226 }
1227 }
1228
1229 *dst = dstBuffer;
1230 } else {
1231 UTF32Char *dstBuffer = (UTF32Char *)*dst;
1232
1233 while (srcLength-- > 0) {
1234 currentChar = *(src++);
1235
1236 ++usedLength;
1237 if (dstLength) {
1238 if (usedLength > dstLength) return false;
1239 *(dstBuffer++) = currentChar;
1240 }
1241 }
1242
1243 *dst = dstBuffer;
1244 }
1245
1246 *filledLength = usedLength;
1247
1248 return true;
1249 }
1250
1251 #if DEPLOYMENT_TARGET_WINDOWS
1252 void __CFUniCharCleanup(void)
1253 {
1254 int idx;
1255
1256 // cleanup memory allocated by __CFUniCharLoadBitmapData()
1257 __CFSpinLock(&__CFUniCharBitmapLock);
1258
1259 if (__CFUniCharBitmapDataArray != NULL) {
1260 for (idx = 0; idx < (int)__CFUniCharNumberOfBitmaps; idx++) {
1261 CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharBitmapDataArray[idx]._planes);
1262 __CFUniCharBitmapDataArray[idx]._planes = NULL;
1263 }
1264
1265 CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharBitmapDataArray);
1266 __CFUniCharBitmapDataArray = NULL;
1267 __CFUniCharNumberOfBitmaps = 0;
1268 }
1269
1270 __CFSpinUnlock(&__CFUniCharBitmapLock);
1271
1272 // cleanup memory allocated by CFUniCharGetMappingData()
1273 __CFSpinLock(&__CFUniCharMappingTableLock);
1274
1275 if (__CFUniCharMappingTables != NULL) {
1276 CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharMappingTables);
1277 __CFUniCharMappingTables = NULL;
1278 }
1279
1280 // cleanup memory allocated by __CFUniCharLoadCaseMappingTable()
1281 if (__CFUniCharCaseMappingTableCounts != NULL) {
1282 CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharCaseMappingTableCounts);
1283 __CFUniCharCaseMappingTableCounts = NULL;
1284
1285 __CFUniCharCaseMappingTable = NULL;
1286 __CFUniCharCaseMappingExtraTable = NULL;
1287 }
1288
1289 __CFSpinUnlock(&__CFUniCharMappingTableLock);
1290
1291 // cleanup memory allocated by CFUniCharGetUnicodePropertyDataForPlane()
1292 __CFSpinLock(&__CFUniCharPropTableLock);
1293
1294 if (__CFUniCharUnicodePropertyTable != NULL) {
1295 for (idx = 0; idx < __CFUniCharUnicodePropertyTableCount; idx++) {
1296 CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharUnicodePropertyTable[idx]._planes);
1297 __CFUniCharUnicodePropertyTable[idx]._planes = NULL;
1298 }
1299
1300 CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharUnicodePropertyTable);
1301 __CFUniCharUnicodePropertyTable = NULL;
1302 __CFUniCharUnicodePropertyTableCount = 0;
1303 }
1304
1305 __CFSpinUnlock(&__CFUniCharPropTableLock);
1306 }
1307 #endif
1308
1309 #undef USE_MACHO_SEGMENT
1310