]> git.saurik.com Git - apple/cf.git/blob - CFUniChar.c
CF-635.19.tar.gz
[apple/cf.git] / CFUniChar.c
1 /*
2 * Copyright (c) 2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /* CFUniChar.c
25 Copyright (c) 2001-2011, Apple Inc. All rights reserved.
26 Responsibility: Aki Inoue
27 */
28
29 #include <CoreFoundation/CFByteOrder.h>
30 #include "CFInternal.h"
31 #include "CFUniChar.h"
32 #include "CFStringEncodingConverterExt.h"
33 #include "CFUnicodeDecomposition.h"
34 #include "CFUniCharPriv.h"
35 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
36 #include <fcntl.h>
37 #include <sys/types.h>
38 #include <sys/stat.h>
39 #include <sys/param.h>
40 #include <sys/mman.h>
41 #include <unistd.h>
42 #include <stdlib.h>
43 #endif
44 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
45 #include <mach/mach.h>
46 #endif
47
48 #if DEPLOYMENT_TARGET_WINDOWS
49 extern void _CFGetFrameworkPath(wchar_t *path, int maxLength);
50 #endif
51
52 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
53 #define __kCFCharacterSetDir "/System/Library/CoreServices"
54 #elif DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
55 #define __kCFCharacterSetDir "/usr/local/share/CoreFoundation"
56 #elif DEPLOYMENT_TARGET_WINDOWS
57 #define __kCFCharacterSetDir "\\Windows\\CoreFoundation"
58 #endif
59
60 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
61 #define USE_MACHO_SEGMENT 1
62 #endif
63
64 enum {
65 kCFUniCharLastExternalSet = kCFUniCharNewlineCharacterSet,
66 kCFUniCharFirstInternalSet = kCFUniCharCompatibilityDecomposableCharacterSet,
67 kCFUniCharLastInternalSet = kCFUniCharGraphemeExtendCharacterSet,
68 kCFUniCharFirstBitmapSet = kCFUniCharDecimalDigitCharacterSet
69 };
70
71 CF_INLINE uint32_t __CFUniCharMapExternalSetToInternalIndex(uint32_t cset) { return ((kCFUniCharFirstInternalSet <= cset) ? ((cset - kCFUniCharFirstInternalSet) + kCFUniCharLastExternalSet) : cset) - kCFUniCharFirstBitmapSet; }
72 CF_INLINE uint32_t __CFUniCharMapCompatibilitySetID(uint32_t cset) { return ((cset == kCFUniCharControlCharacterSet) ? kCFUniCharControlAndFormatterCharacterSet : (((cset > kCFUniCharLastExternalSet) && (cset < kCFUniCharFirstInternalSet)) ? ((cset - kCFUniCharLastExternalSet) + kCFUniCharFirstInternalSet) : cset)); }
73
74 #if (DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED) && USE_MACHO_SEGMENT
75 #include <mach-o/getsect.h>
76 #include <mach-o/dyld.h>
77 #include <mach-o/ldsyms.h>
78
79 static const void *__CFGetSectDataPtr(const char *segname, const char *sectname, uint64_t *sizep) {
80 uint32_t idx, cnt = _dyld_image_count();
81 for (idx = 0; idx < cnt; idx++) {
82 void *mh = (void *)_dyld_get_image_header(idx);
83 if (mh != &_mh_dylib_header) continue;
84 #if __LP64__
85 const struct section_64 *sect = getsectbynamefromheader_64((struct mach_header_64 *)mh, segname, sectname);
86 #else
87 const struct section *sect = getsectbynamefromheader((struct mach_header *)mh, segname, sectname);
88 #endif
89 if (!sect) break;
90 if (sizep) *sizep = (uint64_t)sect->size;
91 return (char *)sect->addr + _dyld_get_image_vmaddr_slide(idx);
92 }
93 if (sizep) *sizep = 0ULL;
94 return NULL;
95 }
96 #endif
97
98 #if !USE_MACHO_SEGMENT
99
100 // Memory map the file
101
102 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_LINUX
103 CF_INLINE void __CFUniCharCharacterSetPath(char *cpath) {
104 #elif DEPLOYMENT_TARGET_WINDOWS
105 CF_INLINE void __CFUniCharCharacterSetPath(wchar_t *wpath) {
106 #else
107 #error Unknown or unspecified DEPLOYMENT_TARGET
108 #endif
109 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
110 strlcpy(cpath, __kCFCharacterSetDir, MAXPATHLEN);
111 #elif DEPLOYMENT_TARGET_LINUX
112 strlcpy(cpath, __kCFCharacterSetDir, MAXPATHLEN);
113 #elif DEPLOYMENT_TARGET_WINDOWS
114 wchar_t frameworkPath[MAXPATHLEN];
115 _CFGetFrameworkPath(frameworkPath, MAXPATHLEN);
116 wcsncpy(wpath, frameworkPath, MAXPATHLEN);
117 wcsncat(wpath, L"\\CoreFoundation.resources\\", MAXPATHLEN - wcslen(wpath));
118 #else
119 strlcpy(cpath, __kCFCharacterSetDir, MAXPATHLEN);
120 strlcat(cpath, "/CharacterSets/", MAXPATHLEN);
121 #endif
122 }
123
124 #if DEPLOYMENT_TARGET_WINDOWS
125 #define MAX_BITMAP_STATE 512
126 //
127 // If a string is placed into this array, then it has been previously
128 // determined that the bitmap-file cannot be found. Thus, we make
129 // the assumption it won't be there in future calls and we avoid
130 // hitting the disk un-necessarily. This assumption isn't 100%
131 // correct, as bitmap-files can be added. We would have to re-start
132 // the application in order to pick-up the new bitmap info.
133 //
134 // We should probably re-visit this.
135 //
136 static wchar_t *mappedBitmapState[MAX_BITMAP_STATE];
137 static int __nNumStateEntries = -1;
138 CRITICAL_SECTION __bitmapStateLock = {0};
139
140 bool __GetBitmapStateForName(const wchar_t *bitmapName) {
141 if (NULL == __bitmapStateLock.DebugInfo)
142 InitializeCriticalSection(&__bitmapStateLock);
143 EnterCriticalSection(&__bitmapStateLock);
144 if (__nNumStateEntries >= 0) {
145 for (int i = 0; i < __nNumStateEntries; i++) {
146 if (wcscmp(mappedBitmapState[i], bitmapName) == 0) {
147 LeaveCriticalSection(&__bitmapStateLock);
148 return true;
149 }
150 }
151 }
152 LeaveCriticalSection(&__bitmapStateLock);
153 return false;
154 }
155 void __AddBitmapStateForName(const wchar_t *bitmapName) {
156 if (NULL == __bitmapStateLock.DebugInfo)
157 InitializeCriticalSection(&__bitmapStateLock);
158 EnterCriticalSection(&__bitmapStateLock);
159 __nNumStateEntries++;
160 mappedBitmapState[__nNumStateEntries] = (wchar_t *)malloc((lstrlenW(bitmapName)+1) * sizeof(wchar_t));
161 lstrcpyW(mappedBitmapState[__nNumStateEntries], bitmapName);
162 LeaveCriticalSection(&__bitmapStateLock);
163 }
164 #endif
165
166 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_LINUX
167 static bool __CFUniCharLoadBytesFromFile(const char *fileName, const void **bytes, int64_t *fileSize) {
168 #elif DEPLOYMENT_TARGET_WINDOWS
169 static bool __CFUniCharLoadBytesFromFile(const wchar_t *fileName, const void **bytes, int64_t *fileSize) {
170 #else
171 #error Unknown or unspecified DEPLOYMENT_TARGET
172 #endif
173 #if DEPLOYMENT_TARGET_WINDOWS
174 HANDLE bitmapFileHandle = NULL;
175 HANDLE mappingHandle = NULL;
176
177 if (__GetBitmapStateForName(fileName)) {
178 // The fileName has been tried in the past, so just return false
179 // and move on.
180 *bytes = NULL;
181 return false;
182 }
183 mappingHandle = OpenFileMappingW(FILE_MAP_READ, TRUE, fileName);
184 if (NULL == mappingHandle) {
185 if ((bitmapFileHandle = CreateFileW(fileName, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL)) == INVALID_HANDLE_VALUE) {
186 // We tried to get the bitmap file for mapping, but it's not there. Add to list of non-existant bitmap-files so
187 // we don't have to try this again in the future.
188 __AddBitmapStateForName(fileName);
189 return false;
190 }
191 mappingHandle = CreateFileMapping(bitmapFileHandle, NULL, PAGE_READONLY, 0, 0, NULL);
192 CloseHandle(bitmapFileHandle);
193 if (!mappingHandle) return false;
194 }
195
196 *bytes = MapViewOfFileEx(mappingHandle, FILE_MAP_READ, 0, 0, 0, 0);
197
198 if (NULL != fileSize) {
199 MEMORY_BASIC_INFORMATION memoryInfo;
200
201 if (0 == VirtualQueryEx(mappingHandle, *bytes, &memoryInfo, sizeof(memoryInfo))) {
202 *fileSize = 0; // This indicates no checking. Is it right ?
203 } else {
204 *fileSize = memoryInfo.RegionSize;
205 }
206 }
207
208 CloseHandle(mappingHandle);
209
210 return (*bytes ? true : false);
211 #else
212 struct stat statBuf;
213 int fd = -1;
214
215 if ((fd = open(fileName, O_RDONLY, 0)) < 0) {
216 return false;
217 }
218 if (fstat(fd, &statBuf) < 0 || (*bytes = mmap(0, statBuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0)) == (void *)-1) {
219 close(fd);
220 return false;
221 }
222 close(fd);
223
224 if (NULL != fileSize) *fileSize = statBuf.st_size;
225
226 return true;
227 #endif
228 }
229
230 #endif // USE_MACHO_SEGMENT
231
232 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_LINUX
233 static bool __CFUniCharLoadFile(const char *bitmapName, const void **bytes, int64_t *fileSize) {
234 #elif DEPLOYMENT_TARGET_WINDOWS
235 static bool __CFUniCharLoadFile(const wchar_t *bitmapName, const void **bytes, int64_t *fileSize) {
236 #else
237 #error Unknown or unspecified DEPLOYMENT_TARGET
238 #endif
239 #if USE_MACHO_SEGMENT
240 *bytes = __CFGetSectDataPtr("__UNICODE", bitmapName, NULL);
241
242 if (NULL != fileSize) *fileSize = 0;
243
244 return *bytes ? true : false;
245 #else
246 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_LINUX
247 char cpath[MAXPATHLEN];
248 __CFUniCharCharacterSetPath(cpath);
249 strlcat(cpath, bitmapName, MAXPATHLEN);
250 return __CFUniCharLoadBytesFromFile(cpath, bytes, fileSize);
251 #elif DEPLOYMENT_TARGET_WINDOWS
252 wchar_t wpath[MAXPATHLEN];
253 __CFUniCharCharacterSetPath(wpath);
254 wcsncat(wpath, bitmapName, MAXPATHLEN);
255 return __CFUniCharLoadBytesFromFile(wpath, bytes, fileSize);
256 #else
257 #error Unknown or unspecified DEPLOYMENT_TARGET
258 #endif
259 #endif
260 }
261
262 // Bitmap functions
263 CF_INLINE bool isControl(UTF32Char theChar, uint16_t charset, const void *data) { // ISO Control
264 return (((theChar <= 0x001F) || (theChar >= 0x007F && theChar <= 0x009F)) ? true : false);
265 }
266
267 CF_INLINE bool isWhitespace(UTF32Char theChar, uint16_t charset, const void *data) { // Space
268 return (((theChar == 0x0020) || (theChar == 0x0009) || (theChar == 0x00A0) || (theChar == 0x1680) || (theChar >= 0x2000 && theChar <= 0x200B) || (theChar == 0x202F) || (theChar == 0x205F) || (theChar == 0x3000)) ? true : false);
269 }
270
271 CF_INLINE bool isNewline(UTF32Char theChar, uint16_t charset, const void *data) { // White space
272 return (((theChar >= 0x000A && theChar <= 0x000D) || (theChar == 0x0085) || (theChar == 0x2028) || (theChar == 0x2029)) ? true : false);
273 }
274
275 CF_INLINE bool isWhitespaceAndNewline(UTF32Char theChar, uint16_t charset, const void *data) { // White space
276 return ((isWhitespace(theChar, charset, data) || isNewline(theChar, charset, data)) ? true : false);
277 }
278
279 #if USE_MACHO_SEGMENT
280 CF_INLINE bool __CFSimpleFileSizeVerification(const void *bytes, int64_t fileSize) { return true; }
281 #elif 1
282 // <rdar://problem/8961744> __CFSimpleFileSizeVerification is broken
283 static bool __CFSimpleFileSizeVerification(const void *bytes, int64_t fileSize) { return true; }
284 #else
285 static bool __CFSimpleFileSizeVerification(const void *bytes, int64_t fileSize) {
286 bool result = true;
287
288 if (fileSize > 0) {
289 if ((sizeof(uint32_t) * 2) > fileSize) {
290 result = false;
291 } else {
292 uint32_t headerSize = CFSwapInt32BigToHost(*((uint32_t *)((char *)bytes + 4)));
293
294 if ((headerSize < (sizeof(uint32_t) * 4)) || (headerSize > fileSize)) {
295 result = false;
296 } else {
297 const uint32_t *lastElement = (uint32_t *)(((uint8_t *)bytes) + headerSize) - 2;
298
299 if ((headerSize + CFSwapInt32BigToHost(lastElement[0]) + CFSwapInt32BigToHost(lastElement[1])) > headerSize) result = false;
300 }
301 }
302 }
303
304 if (!result) CFLog(kCFLogLevelCritical, CFSTR("File size verification for Unicode database file failed."));
305
306 return result;
307 }
308 #endif // USE_MACHO_SEGMENT
309
310 typedef struct {
311 uint32_t _numPlanes;
312 const uint8_t **_planes;
313 } __CFUniCharBitmapData;
314
315 static char __CFUniCharUnicodeVersionString[8] = {0, 0, 0, 0, 0, 0, 0, 0};
316
317 static uint32_t __CFUniCharNumberOfBitmaps = 0;
318 static __CFUniCharBitmapData *__CFUniCharBitmapDataArray = NULL;
319
320 static CFSpinLock_t __CFUniCharBitmapLock = CFSpinLockInit;
321
322 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_LINUX
323 #if !defined(CF_UNICHAR_BITMAP_FILE)
324 #if USE_MACHO_SEGMENT
325 #define CF_UNICHAR_BITMAP_FILE "__csbitmaps"
326 #else
327 #define CF_UNICHAR_BITMAP_FILE "/CFCharacterSetBitmaps.bitmap"
328 #endif
329 #endif
330 #elif DEPLOYMENT_TARGET_WINDOWS
331 #if !defined(CF_UNICHAR_BITMAP_FILE)
332 #define CF_UNICHAR_BITMAP_FILE L"CFCharacterSetBitmaps.bitmap"
333 #endif
334 #else
335 #error Unknown or unspecified DEPLOYMENT_TARGET
336 #endif
337
338 static bool __CFUniCharLoadBitmapData(void) {
339 __CFUniCharBitmapData *array;
340 uint32_t headerSize;
341 uint32_t bitmapSize;
342 int numPlanes;
343 uint8_t currentPlane;
344 const void *bytes;
345 const void *bitmapBase;
346 const void *bitmap;
347 int idx, bitmapIndex;
348 int64_t fileSize;
349
350 __CFSpinLock(&__CFUniCharBitmapLock);
351
352 if (__CFUniCharBitmapDataArray || !__CFUniCharLoadFile(CF_UNICHAR_BITMAP_FILE, &bytes, &fileSize) || !__CFSimpleFileSizeVerification(bytes, fileSize)) {
353 __CFSpinUnlock(&__CFUniCharBitmapLock);
354 return false;
355 }
356
357 for (idx = 0;idx < 4 && ((const uint8_t *)bytes)[idx];idx++) {
358 __CFUniCharUnicodeVersionString[idx * 2] = ((const uint8_t *)bytes)[idx];
359 __CFUniCharUnicodeVersionString[idx * 2 + 1] = '.';
360 }
361 __CFUniCharUnicodeVersionString[(idx < 4 ? idx * 2 - 1 : 7)] = '\0';
362
363 headerSize = CFSwapInt32BigToHost(*((uint32_t *)((char *)bytes + 4)));
364
365 bitmapBase = (uint8_t *)bytes + headerSize;
366 bytes = (uint8_t *)bytes + (sizeof(uint32_t) * 2);
367 headerSize -= (sizeof(uint32_t) * 2);
368
369 __CFUniCharNumberOfBitmaps = headerSize / (sizeof(uint32_t) * 2);
370
371 array = (__CFUniCharBitmapData *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(__CFUniCharBitmapData) * __CFUniCharNumberOfBitmaps, 0);
372
373 for (idx = 0;idx < (int)__CFUniCharNumberOfBitmaps;idx++) {
374 bitmap = (uint8_t *)bitmapBase + CFSwapInt32BigToHost(*((uint32_t *)bytes)); bytes = (uint8_t *)bytes + sizeof(uint32_t);
375 bitmapSize = CFSwapInt32BigToHost(*((uint32_t *)bytes)); bytes = (uint8_t *)bytes + sizeof(uint32_t);
376
377 numPlanes = bitmapSize / (8 * 1024);
378 numPlanes = *(const uint8_t *)((char *)bitmap + (((numPlanes - 1) * ((8 * 1024) + 1)) - 1)) + 1;
379 array[idx]._planes = (const uint8_t **)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(const void *) * numPlanes, 0);
380 array[idx]._numPlanes = numPlanes;
381
382 currentPlane = 0;
383 for (bitmapIndex = 0;bitmapIndex < numPlanes;bitmapIndex++) {
384 if (bitmapIndex == currentPlane) {
385 array[idx]._planes[bitmapIndex] = (const uint8_t *)bitmap;
386 bitmap = (uint8_t *)bitmap + (8 * 1024);
387 #if defined (__cplusplus)
388 currentPlane = *(((const uint8_t*&)bitmap)++);
389 #else
390 currentPlane = *((const uint8_t *)bitmap++);
391 #endif
392
393 } else {
394 array[idx]._planes[bitmapIndex] = NULL;
395 }
396 }
397 }
398
399 __CFUniCharBitmapDataArray = array;
400
401 __CFSpinUnlock(&__CFUniCharBitmapLock);
402
403 return true;
404 }
405
406 __private_extern__ const char *__CFUniCharGetUnicodeVersionString(void) {
407 if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData();
408 return __CFUniCharUnicodeVersionString;
409 }
410
411 bool CFUniCharIsMemberOf(UTF32Char theChar, uint32_t charset) {
412 charset = __CFUniCharMapCompatibilitySetID(charset);
413
414 switch (charset) {
415 case kCFUniCharWhitespaceCharacterSet:
416 return isWhitespace(theChar, charset, NULL);
417
418 case kCFUniCharWhitespaceAndNewlineCharacterSet:
419 return isWhitespaceAndNewline(theChar, charset, NULL);
420
421 case kCFUniCharNewlineCharacterSet:
422 return isNewline(theChar, charset, NULL);
423
424 default: {
425 uint32_t tableIndex = __CFUniCharMapExternalSetToInternalIndex(charset);
426
427 if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData();
428
429 if (tableIndex < __CFUniCharNumberOfBitmaps) {
430 __CFUniCharBitmapData *data = __CFUniCharBitmapDataArray + tableIndex;
431 uint8_t planeNo = (theChar >> 16) & 0xFF;
432
433 // The bitmap data for kCFUniCharIllegalCharacterSet is actually LEGAL set less Plane 14 ~ 16
434 if (charset == kCFUniCharIllegalCharacterSet) {
435 if (planeNo == 0x0E) { // Plane 14
436 theChar &= 0xFF;
437 return (((theChar == 0x01) || ((theChar > 0x1F) && (theChar < 0x80))) ? false : true);
438 } else if (planeNo == 0x0F || planeNo == 0x10) { // Plane 15 & 16
439 return ((theChar & 0xFF) > 0xFFFD ? true : false);
440 } else {
441 return (planeNo < data->_numPlanes && data->_planes[planeNo] ? !CFUniCharIsMemberOfBitmap(theChar, data->_planes[planeNo]) : true);
442 }
443 } else if (charset == kCFUniCharControlAndFormatterCharacterSet) {
444 if (planeNo == 0x0E) { // Plane 14
445 theChar &= 0xFF;
446 return (((theChar == 0x01) || ((theChar > 0x1F) && (theChar < 0x80))) ? true : false);
447 } else {
448 return (planeNo < data->_numPlanes && data->_planes[planeNo] ? CFUniCharIsMemberOfBitmap(theChar, data->_planes[planeNo]) : false);
449 }
450 } else {
451 return (planeNo < data->_numPlanes && data->_planes[planeNo] ? CFUniCharIsMemberOfBitmap(theChar, data->_planes[planeNo]) : false);
452 }
453 }
454 return false;
455 }
456 }
457 }
458
459 const uint8_t *CFUniCharGetBitmapPtrForPlane(uint32_t charset, uint32_t plane) {
460 if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData();
461
462 charset = __CFUniCharMapCompatibilitySetID(charset);
463
464 if ((charset > kCFUniCharWhitespaceAndNewlineCharacterSet) && (charset != kCFUniCharIllegalCharacterSet) && (charset != kCFUniCharNewlineCharacterSet)) {
465 uint32_t tableIndex = __CFUniCharMapExternalSetToInternalIndex(charset);
466
467 if (tableIndex < __CFUniCharNumberOfBitmaps) {
468 __CFUniCharBitmapData *data = __CFUniCharBitmapDataArray + tableIndex;
469
470 return (plane < data->_numPlanes ? data->_planes[plane] : NULL);
471 }
472 }
473 return NULL;
474 }
475
476 __private_extern__ uint8_t CFUniCharGetBitmapForPlane(uint32_t charset, uint32_t plane, void *bitmap, bool isInverted) {
477 const uint8_t *src = CFUniCharGetBitmapPtrForPlane(charset, plane);
478 int numBytes = (8 * 1024);
479
480 if (src) {
481 if (isInverted) {
482 #if defined (__cplusplus)
483 while (numBytes-- > 0) *(((uint8_t *&)bitmap)++) = ~(*(src++));
484 #else
485 while (numBytes-- > 0) *((uint8_t *)bitmap++) = ~(*(src++));
486 #endif
487 } else {
488 #if defined (__cplusplus)
489 while (numBytes-- > 0) *(((uint8_t *&)bitmap)++) = *(src++);
490 #else
491 while (numBytes-- > 0) *((uint8_t *)bitmap++) = *(src++);
492 #endif
493 }
494 return kCFUniCharBitmapFilled;
495 } else if (charset == kCFUniCharIllegalCharacterSet) {
496 __CFUniCharBitmapData *data = __CFUniCharBitmapDataArray + __CFUniCharMapExternalSetToInternalIndex(__CFUniCharMapCompatibilitySetID(charset));
497
498 if (plane < data->_numPlanes && (src = data->_planes[plane])) {
499 if (isInverted) {
500 #if defined (__cplusplus)
501 while (numBytes-- > 0) *(((uint8_t *&)bitmap)++) = *(src++);
502 #else
503 while (numBytes-- > 0) *((uint8_t *)bitmap++) = *(src++);
504 #endif
505 } else {
506 #if defined (__cplusplus)
507 while (numBytes-- > 0) *(((uint8_t *&)bitmap)++) = ~(*(src++));
508 #else
509 while (numBytes-- > 0) *((uint8_t *)bitmap++) = ~(*(src++));
510 #endif
511 }
512 return kCFUniCharBitmapFilled;
513 } else if (plane == 0x0E) { // Plane 14
514 int idx;
515 uint8_t asciiRange = (isInverted ? (uint8_t)0xFF : (uint8_t)0);
516 uint8_t otherRange = (isInverted ? (uint8_t)0 : (uint8_t)0xFF);
517
518 #if defined (__cplusplus)
519 *(((uint8_t *&)bitmap)++) = 0x02; // UE0001 LANGUAGE TAG
520 #else
521 *((uint8_t *)bitmap++) = 0x02; // UE0001 LANGUAGE TAG
522 #endif
523 for (idx = 1;idx < numBytes;idx++) {
524 #if defined (__cplusplus)
525 *(((uint8_t *&)bitmap)++) = ((idx >= (0x20 / 8) && (idx < (0x80 / 8))) ? asciiRange : otherRange);
526 #else
527 *((uint8_t *)bitmap++) = ((idx >= (0x20 / 8) && (idx < (0x80 / 8))) ? asciiRange : otherRange);
528 #endif
529 }
530 return kCFUniCharBitmapFilled;
531 } else if (plane == 0x0F || plane == 0x10) { // Plane 15 & 16
532 uint32_t value = (isInverted ? ~0 : 0);
533 numBytes /= 4; // for 32bit
534
535 while (numBytes-- > 0) {
536 *((uint32_t *)bitmap) = value;
537 #if defined (__cplusplus)
538 bitmap = (uint8_t *)bitmap + sizeof(uint32_t);
539 #else
540 bitmap += sizeof(uint32_t);
541 #endif
542 }
543 *(((uint8_t *)bitmap) - 5) = (isInverted ? 0x3F : 0xC0); // 0xFFFE & 0xFFFF
544 return kCFUniCharBitmapFilled;
545 }
546 return (isInverted ? kCFUniCharBitmapEmpty : kCFUniCharBitmapAll);
547 } else if ((charset < kCFUniCharDecimalDigitCharacterSet) || (charset == kCFUniCharNewlineCharacterSet)) {
548 if (plane) return (isInverted ? kCFUniCharBitmapAll : kCFUniCharBitmapEmpty);
549
550 uint8_t *bitmapBase = (uint8_t *)bitmap;
551 CFIndex idx;
552 uint8_t nonFillValue = (isInverted ? (uint8_t)0xFF : (uint8_t)0);
553
554 #if defined (__cplusplus)
555 while (numBytes-- > 0) *(((uint8_t *&)bitmap)++) = nonFillValue;
556 #else
557 while (numBytes-- > 0) *((uint8_t *)bitmap++) = nonFillValue;
558 #endif
559
560 if ((charset == kCFUniCharWhitespaceAndNewlineCharacterSet) || (charset == kCFUniCharNewlineCharacterSet)) {
561 const UniChar newlines[] = {0x000A, 0x000B, 0x000C, 0x000D, 0x0085, 0x2028, 0x2029};
562
563 for (idx = 0;idx < (int)(sizeof(newlines) / sizeof(*newlines)); idx++) {
564 if (isInverted) {
565 CFUniCharRemoveCharacterFromBitmap(newlines[idx], bitmapBase);
566 } else {
567 CFUniCharAddCharacterToBitmap(newlines[idx], bitmapBase);
568 }
569 }
570
571 if (charset == kCFUniCharNewlineCharacterSet) return kCFUniCharBitmapFilled;
572 }
573
574 if (isInverted) {
575 CFUniCharRemoveCharacterFromBitmap(0x0009, bitmapBase);
576 CFUniCharRemoveCharacterFromBitmap(0x0020, bitmapBase);
577 CFUniCharRemoveCharacterFromBitmap(0x00A0, bitmapBase);
578 CFUniCharRemoveCharacterFromBitmap(0x1680, bitmapBase);
579 CFUniCharRemoveCharacterFromBitmap(0x202F, bitmapBase);
580 CFUniCharRemoveCharacterFromBitmap(0x205F, bitmapBase);
581 CFUniCharRemoveCharacterFromBitmap(0x3000, bitmapBase);
582 } else {
583 CFUniCharAddCharacterToBitmap(0x0009, bitmapBase);
584 CFUniCharAddCharacterToBitmap(0x0020, bitmapBase);
585 CFUniCharAddCharacterToBitmap(0x00A0, bitmapBase);
586 CFUniCharAddCharacterToBitmap(0x1680, bitmapBase);
587 CFUniCharAddCharacterToBitmap(0x202F, bitmapBase);
588 CFUniCharAddCharacterToBitmap(0x205F, bitmapBase);
589 CFUniCharAddCharacterToBitmap(0x3000, bitmapBase);
590 }
591
592 for (idx = 0x2000;idx <= 0x200B;idx++) {
593 if (isInverted) {
594 CFUniCharRemoveCharacterFromBitmap(idx, bitmapBase);
595 } else {
596 CFUniCharAddCharacterToBitmap(idx, bitmapBase);
597 }
598 }
599 return kCFUniCharBitmapFilled;
600 }
601 return (isInverted ? kCFUniCharBitmapAll : kCFUniCharBitmapEmpty);
602 }
603
604 __private_extern__ uint32_t CFUniCharGetNumberOfPlanes(uint32_t charset) {
605 if ((charset == kCFUniCharControlCharacterSet) || (charset == kCFUniCharControlAndFormatterCharacterSet)) {
606 return 15; // 0 to 14
607 } else if (charset < kCFUniCharDecimalDigitCharacterSet) {
608 return 1;
609 } else if (charset == kCFUniCharIllegalCharacterSet) {
610 return 17;
611 } else {
612 uint32_t numPlanes;
613
614 if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData();
615
616 numPlanes = __CFUniCharBitmapDataArray[__CFUniCharMapExternalSetToInternalIndex(__CFUniCharMapCompatibilitySetID(charset))]._numPlanes;
617
618 return numPlanes;
619 }
620 }
621
622 // Mapping data loading
623 static const void **__CFUniCharMappingTables = NULL;
624
625 static CFSpinLock_t __CFUniCharMappingTableLock = CFSpinLockInit;
626
627 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_LINUX
628 #if __CF_BIG_ENDIAN__
629 #if USE_MACHO_SEGMENT
630 #define MAPPING_TABLE_FILE "__data"
631 #else
632 #define MAPPING_TABLE_FILE "/CFUnicodeData-B.mapping"
633 #endif
634 #else
635 #if USE_MACHO_SEGMENT
636 #define MAPPING_TABLE_FILE "__data"
637 #else
638 #define MAPPING_TABLE_FILE "/CFUnicodeData-L.mapping"
639 #endif
640 #endif
641 #elif DEPLOYMENT_TARGET_WINDOWS
642 #if __CF_BIG_ENDIAN__
643 #if USE_MACHO_SEGMENT
644 #define MAPPING_TABLE_FILE "__data"
645 #else
646 #define MAPPING_TABLE_FILE L"CFUnicodeData-B.mapping"
647 #endif
648 #else
649 #if USE_MACHO_SEGMENT
650 #define MAPPING_TABLE_FILE "__data"
651 #else
652 #define MAPPING_TABLE_FILE L"CFUnicodeData-L.mapping"
653 #endif
654 #endif
655 #else
656 #error Unknown or unspecified DEPLOYMENT_TARGET
657 #endif
658
659 __private_extern__ const void *CFUniCharGetMappingData(uint32_t type) {
660
661 __CFSpinLock(&__CFUniCharMappingTableLock);
662
663 if (NULL == __CFUniCharMappingTables) {
664 const void *bytes;
665 const void *bodyBase;
666 int headerSize;
667 int idx, count;
668 int64_t fileSize;
669
670 if (!__CFUniCharLoadFile(MAPPING_TABLE_FILE, &bytes, &fileSize) || !__CFSimpleFileSizeVerification(bytes, fileSize)) {
671 __CFSpinUnlock(&__CFUniCharMappingTableLock);
672 return NULL;
673 }
674
675 #if defined (__cplusplus)
676 bytes = (uint8_t *)bytes + 4; // Skip Unicode version
677 headerSize = *((uint8_t *)bytes); bytes = (uint8_t *)bytes + sizeof(uint32_t);
678 #else
679 bytes += 4; // Skip Unicode version
680 headerSize = *((uint32_t *)bytes); bytes += sizeof(uint32_t);
681 #endif
682 headerSize -= (sizeof(uint32_t) * 2);
683 bodyBase = (char *)bytes + headerSize;
684
685 count = headerSize / sizeof(uint32_t);
686
687 __CFUniCharMappingTables = (const void **)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(const void *) * count, 0);
688
689 for (idx = 0;idx < count;idx++) {
690 #if defined (__cplusplus)
691 __CFUniCharMappingTables[idx] = (char *)bodyBase + *((uint32_t *)bytes); bytes = (uint8_t *)bytes + sizeof(uint32_t);
692 #else
693 __CFUniCharMappingTables[idx] = (char *)bodyBase + *((uint32_t *)bytes); bytes += sizeof(uint32_t);
694 #endif
695 }
696 }
697
698 __CFSpinUnlock(&__CFUniCharMappingTableLock);
699
700 return __CFUniCharMappingTables[type];
701 }
702
703 // Case mapping functions
704 #define DO_SPECIAL_CASE_MAPPING 1
705
706 static uint32_t *__CFUniCharCaseMappingTableCounts = NULL;
707 static uint32_t **__CFUniCharCaseMappingTable = NULL;
708 static const uint32_t **__CFUniCharCaseMappingExtraTable = NULL;
709
710 typedef struct {
711 uint32_t _key;
712 uint32_t _value;
713 } __CFUniCharCaseMappings;
714
715 /* Binary searches CFStringEncodingUnicodeTo8BitCharMap */
716 static uint32_t __CFUniCharGetMappedCase(const __CFUniCharCaseMappings *theTable, uint32_t numElem, UTF32Char character) {
717 const __CFUniCharCaseMappings *p, *q, *divider;
718
719 if ((character < theTable[0]._key) || (character > theTable[numElem-1]._key)) {
720 return 0;
721 }
722 p = theTable;
723 q = p + (numElem-1);
724 while (p <= q) {
725 divider = p + ((q - p) >> 1); /* divide by 2 */
726 if (character < divider->_key) { q = divider - 1; }
727 else if (character > divider->_key) { p = divider + 1; }
728 else { return divider->_value; }
729 }
730 return 0;
731 }
732
733 #define NUM_CASE_MAP_DATA (kCFUniCharCaseFold + 1)
734
735 static bool __CFUniCharLoadCaseMappingTable(void) {
736 uint32_t *countArray;
737 int idx;
738
739 if (NULL == __CFUniCharMappingTables) (void)CFUniCharGetMappingData(kCFUniCharToLowercase);
740 if (NULL == __CFUniCharMappingTables) return false;
741
742 __CFSpinLock(&__CFUniCharMappingTableLock);
743
744 if (__CFUniCharCaseMappingTableCounts) {
745 __CFSpinUnlock(&__CFUniCharMappingTableLock);
746 return true;
747 }
748
749 countArray = (uint32_t *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(uint32_t) * NUM_CASE_MAP_DATA + sizeof(uint32_t *) * NUM_CASE_MAP_DATA * 2, 0);
750 __CFUniCharCaseMappingTable = (uint32_t **)((char *)countArray + sizeof(uint32_t) * NUM_CASE_MAP_DATA);
751 __CFUniCharCaseMappingExtraTable = (const uint32_t **)__CFUniCharCaseMappingTable + NUM_CASE_MAP_DATA;
752
753 for (idx = 0;idx < NUM_CASE_MAP_DATA;idx++) {
754 countArray[idx] = *((uint32_t *)__CFUniCharMappingTables[idx]) / (sizeof(uint32_t) * 2);
755 __CFUniCharCaseMappingTable[idx] = ((uint32_t *)__CFUniCharMappingTables[idx]) + 1;
756 __CFUniCharCaseMappingExtraTable[idx] = (const uint32_t *)((char *)__CFUniCharCaseMappingTable[idx] + *((uint32_t *)__CFUniCharMappingTables[idx]));
757 }
758
759 __CFUniCharCaseMappingTableCounts = countArray;
760
761 __CFSpinUnlock(&__CFUniCharMappingTableLock);
762 return true;
763 }
764
765 #if __CF_BIG_ENDIAN__
766 #define TURKISH_LANG_CODE (0x7472) // tr
767 #define LITHUANIAN_LANG_CODE (0x6C74) // lt
768 #define AZERI_LANG_CODE (0x617A) // az
769 #define DUTCH_LANG_CODE (0x6E6C) // nl
770 #else
771 #define TURKISH_LANG_CODE (0x7274) // tr
772 #define LITHUANIAN_LANG_CODE (0x746C) // lt
773 #define AZERI_LANG_CODE (0x7A61) // az
774 #define DUTCH_LANG_CODE (0x6C6E) // nl
775 #endif
776
777 CFIndex CFUniCharMapCaseTo(UTF32Char theChar, UTF16Char *convertedChar, CFIndex maxLength, uint32_t ctype, uint32_t flags, const uint8_t *langCode) {
778 __CFUniCharBitmapData *data;
779 uint8_t planeNo = (theChar >> 16) & 0xFF;
780
781 caseFoldRetry:
782
783 #if DO_SPECIAL_CASE_MAPPING
784 if (flags & kCFUniCharCaseMapFinalSigma) {
785 if (theChar == 0x03A3) { // Final sigma
786 *convertedChar = (ctype == kCFUniCharToLowercase ? 0x03C2 : 0x03A3);
787 return 1;
788 }
789 }
790
791 if (langCode) {
792 switch (*(uint16_t *)langCode) {
793 case LITHUANIAN_LANG_CODE:
794 if (theChar == 0x0307 && (flags & kCFUniCharCaseMapAfter_i)) {
795 return 0;
796 } else if (ctype == kCFUniCharToLowercase) {
797 if (flags & kCFUniCharCaseMapMoreAbove) {
798 switch (theChar) {
799 case 0x0049: // LATIN CAPITAL LETTER I
800 *(convertedChar++) = 0x0069;
801 *(convertedChar++) = 0x0307;
802 return 2;
803
804 case 0x004A: // LATIN CAPITAL LETTER J
805 *(convertedChar++) = 0x006A;
806 *(convertedChar++) = 0x0307;
807 return 2;
808
809 case 0x012E: // LATIN CAPITAL LETTER I WITH OGONEK
810 *(convertedChar++) = 0x012F;
811 *(convertedChar++) = 0x0307;
812 return 2;
813
814 default: break;
815 }
816 }
817 switch (theChar) {
818 case 0x00CC: // LATIN CAPITAL LETTER I WITH GRAVE
819 *(convertedChar++) = 0x0069;
820 *(convertedChar++) = 0x0307;
821 *(convertedChar++) = 0x0300;
822 return 3;
823
824 case 0x00CD: // LATIN CAPITAL LETTER I WITH ACUTE
825 *(convertedChar++) = 0x0069;
826 *(convertedChar++) = 0x0307;
827 *(convertedChar++) = 0x0301;
828 return 3;
829
830 case 0x0128: // LATIN CAPITAL LETTER I WITH TILDE
831 *(convertedChar++) = 0x0069;
832 *(convertedChar++) = 0x0307;
833 *(convertedChar++) = 0x0303;
834 return 3;
835
836 default: break;
837 }
838 }
839 break;
840
841 case TURKISH_LANG_CODE:
842 case AZERI_LANG_CODE:
843 if ((theChar == 0x0049) || (theChar == 0x0131)) { // LATIN CAPITAL LETTER I & LATIN SMALL LETTER DOTLESS I
844 *convertedChar = (((ctype == kCFUniCharToLowercase) || (ctype == kCFUniCharCaseFold)) ? ((kCFUniCharCaseMapMoreAbove & flags) ? 0x0069 : 0x0131) : 0x0049);
845 return 1;
846 } else if ((theChar == 0x0069) || (theChar == 0x0130)) { // LATIN SMALL LETTER I & LATIN CAPITAL LETTER I WITH DOT ABOVE
847 *convertedChar = (((ctype == kCFUniCharToLowercase) || (ctype == kCFUniCharCaseFold)) ? 0x0069 : 0x0130);
848 return 1;
849 } else if (theChar == 0x0307 && (kCFUniCharCaseMapAfter_i & flags)) { // COMBINING DOT ABOVE AFTER_i
850 if (ctype == kCFUniCharToLowercase) {
851 return 0;
852 } else {
853 *convertedChar = 0x0307;
854 return 1;
855 }
856 }
857 break;
858
859 case DUTCH_LANG_CODE:
860 if ((theChar == 0x004A) || (theChar == 0x006A)) {
861 *convertedChar = (((ctype == kCFUniCharToUppercase) || (ctype == kCFUniCharToTitlecase) || (kCFUniCharCaseMapDutchDigraph & flags)) ? 0x004A : 0x006A);
862 return 1;
863 }
864 break;
865
866 default: break;
867 }
868 }
869 #endif // DO_SPECIAL_CASE_MAPPING
870
871 if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData();
872
873 data = __CFUniCharBitmapDataArray + __CFUniCharMapExternalSetToInternalIndex(__CFUniCharMapCompatibilitySetID(ctype + kCFUniCharHasNonSelfLowercaseCharacterSet));
874
875 if (planeNo < data->_numPlanes && data->_planes[planeNo] && CFUniCharIsMemberOfBitmap(theChar, data->_planes[planeNo]) && (__CFUniCharCaseMappingTableCounts || __CFUniCharLoadCaseMappingTable())) {
876 uint32_t value = __CFUniCharGetMappedCase((const __CFUniCharCaseMappings *)__CFUniCharCaseMappingTable[ctype], __CFUniCharCaseMappingTableCounts[ctype], theChar);
877
878 if (!value && ctype == kCFUniCharToTitlecase) {
879 value = __CFUniCharGetMappedCase((const __CFUniCharCaseMappings *)__CFUniCharCaseMappingTable[kCFUniCharToUppercase], __CFUniCharCaseMappingTableCounts[kCFUniCharToUppercase], theChar);
880 if (value) ctype = kCFUniCharToUppercase;
881 }
882
883 if (value) {
884 CFIndex count = CFUniCharConvertFlagToCount(value);
885
886 if (count == 1) {
887 if (value & kCFUniCharNonBmpFlag) {
888 if (maxLength > 1) {
889 value = (value & 0xFFFFFF) - 0x10000;
890 *(convertedChar++) = (UTF16Char)(value >> 10) + 0xD800UL;
891 *(convertedChar++) = (UTF16Char)(value & 0x3FF) + 0xDC00UL;
892 return 2;
893 }
894 } else {
895 *convertedChar = (UTF16Char)value;
896 return 1;
897 }
898 } else if (count < maxLength) {
899 const uint32_t *extraMapping = __CFUniCharCaseMappingExtraTable[ctype] + (value & 0xFFFFFF);
900
901 if (value & kCFUniCharNonBmpFlag) {
902 CFIndex copiedLen = 0;
903
904 while (count-- > 0) {
905 value = *(extraMapping++);
906 if (value > 0xFFFF) {
907 if (copiedLen + 2 >= maxLength) break;
908 value = (value & 0xFFFFFF) - 0x10000;
909 convertedChar[copiedLen++] = (UTF16Char)(value >> 10) + 0xD800UL;
910 convertedChar[copiedLen++] = (UTF16Char)(value & 0x3FF) + 0xDC00UL;
911 } else {
912 if (copiedLen + 1 >= maxLength) break;
913 convertedChar[copiedLen++] = value;
914 }
915 }
916 if (!count) return copiedLen;
917 } else {
918 CFIndex idx;
919
920 for (idx = 0;idx < count;idx++) *(convertedChar++) = (UTF16Char)*(extraMapping++);
921 return count;
922 }
923 }
924 }
925 } else if (ctype == kCFUniCharCaseFold) {
926 ctype = kCFUniCharToLowercase;
927 goto caseFoldRetry;
928 }
929
930 if (theChar > 0xFFFF) { // non-BMP
931 theChar = (theChar & 0xFFFFFF) - 0x10000;
932 *(convertedChar++) = (UTF16Char)(theChar >> 10) + 0xD800UL;
933 *(convertedChar++) = (UTF16Char)(theChar & 0x3FF) + 0xDC00UL;
934 return 2;
935 } else {
936 *convertedChar = theChar;
937 return 1;
938 }
939 }
940
941 CFIndex CFUniCharMapTo(UniChar theChar, UniChar *convertedChar, CFIndex maxLength, uint16_t ctype, uint32_t flags) {
942 if (ctype == kCFUniCharCaseFold + 1) { // kCFUniCharDecompose
943 if (CFUniCharIsDecomposableCharacter(theChar, false)) {
944 UTF32Char buffer[MAX_DECOMPOSED_LENGTH];
945 CFIndex usedLength = CFUniCharDecomposeCharacter(theChar, buffer, MAX_DECOMPOSED_LENGTH);
946 CFIndex idx;
947
948 for (idx = 0;idx < usedLength;idx++) *(convertedChar++) = buffer[idx];
949 return usedLength;
950 } else {
951 *convertedChar = theChar;
952 return 1;
953 }
954 } else {
955 return CFUniCharMapCaseTo(theChar, convertedChar, maxLength, ctype, flags, NULL);
956 }
957 }
958
959 CF_INLINE bool __CFUniCharIsMoreAbove(UTF16Char *buffer, CFIndex length) {
960 UTF32Char currentChar;
961 uint32_t property;
962
963 while (length-- > 0) {
964 currentChar = *(buffer)++;
965 if (CFUniCharIsSurrogateHighCharacter(currentChar) && (length > 0) && CFUniCharIsSurrogateLowCharacter(*(buffer + 1))) {
966 currentChar = CFUniCharGetLongCharacterForSurrogatePair(currentChar, *(buffer++));
967 --length;
968 }
969 if (!CFUniCharIsMemberOf(currentChar, kCFUniCharNonBaseCharacterSet)) break;
970
971 property = CFUniCharGetCombiningPropertyForCharacter(currentChar, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16) & 0xFF));
972
973 if (property == 230) return true; // Above priority
974 }
975 return false;
976 }
977
978 CF_INLINE bool __CFUniCharIsAfter_i(UTF16Char *buffer, CFIndex length) {
979 UTF32Char currentChar = 0;
980 uint32_t property;
981 UTF32Char decomposed[MAX_DECOMPOSED_LENGTH];
982 CFIndex decompLength;
983 CFIndex idx;
984
985 if (length < 1) return 0;
986
987 buffer += length;
988 while (length-- > 1) {
989 currentChar = *(--buffer);
990 if (CFUniCharIsSurrogateLowCharacter(currentChar)) {
991 if ((length > 1) && CFUniCharIsSurrogateHighCharacter(*(buffer - 1))) {
992 currentChar = CFUniCharGetLongCharacterForSurrogatePair(*(--buffer), currentChar);
993 --length;
994 } else {
995 break;
996 }
997 }
998 if (!CFUniCharIsMemberOf(currentChar, kCFUniCharNonBaseCharacterSet)) break;
999
1000 property = CFUniCharGetCombiningPropertyForCharacter(currentChar, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16) & 0xFF));
1001
1002 if (property == 230) return false; // Above priority
1003 }
1004 if (length == 0) {
1005 currentChar = *(--buffer);
1006 } else if (CFUniCharIsSurrogateLowCharacter(currentChar) && CFUniCharIsSurrogateHighCharacter(*(--buffer))) {
1007 currentChar = CFUniCharGetLongCharacterForSurrogatePair(*buffer, currentChar);
1008 }
1009
1010 decompLength = CFUniCharDecomposeCharacter(currentChar, decomposed, MAX_DECOMPOSED_LENGTH);
1011 currentChar = *decomposed;
1012
1013
1014 for (idx = 1;idx < decompLength;idx++) {
1015 currentChar = decomposed[idx];
1016 property = CFUniCharGetCombiningPropertyForCharacter(currentChar, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16) & 0xFF));
1017
1018 if (property == 230) return false; // Above priority
1019 }
1020 return true;
1021 }
1022
1023 __private_extern__ uint32_t CFUniCharGetConditionalCaseMappingFlags(UTF32Char theChar, UTF16Char *buffer, CFIndex currentIndex, CFIndex length, uint32_t type, const uint8_t *langCode, uint32_t lastFlags) {
1024 if (theChar == 0x03A3) { // GREEK CAPITAL LETTER SIGMA
1025 if ((type == kCFUniCharToLowercase) && (currentIndex > 0)) {
1026 UTF16Char *start = buffer;
1027 UTF16Char *end = buffer + length;
1028 UTF32Char otherChar;
1029
1030 // First check if we're after a cased character
1031 buffer += (currentIndex - 1);
1032 while (start <= buffer) {
1033 otherChar = *(buffer--);
1034 if (CFUniCharIsSurrogateLowCharacter(otherChar) && (start <= buffer) && CFUniCharIsSurrogateHighCharacter(*buffer)) {
1035 otherChar = CFUniCharGetLongCharacterForSurrogatePair(*(buffer--), otherChar);
1036 }
1037 if (!CFUniCharIsMemberOf(otherChar, kCFUniCharCaseIgnorableCharacterSet)) {
1038 if (!CFUniCharIsMemberOf(otherChar, kCFUniCharUppercaseLetterCharacterSet) && !CFUniCharIsMemberOf(otherChar, kCFUniCharLowercaseLetterCharacterSet)) return 0; // Uppercase set contains titlecase
1039 break;
1040 }
1041 }
1042
1043 // Next check if we're before a cased character
1044 buffer = start + currentIndex + 1;
1045 while (buffer < end) {
1046 otherChar = *(buffer++);
1047 if (CFUniCharIsSurrogateHighCharacter(otherChar) && (buffer < end) && CFUniCharIsSurrogateLowCharacter(*buffer)) {
1048 otherChar = CFUniCharGetLongCharacterForSurrogatePair(otherChar, *(buffer++));
1049 }
1050 if (!CFUniCharIsMemberOf(otherChar, kCFUniCharCaseIgnorableCharacterSet)) {
1051 if (CFUniCharIsMemberOf(otherChar, kCFUniCharUppercaseLetterCharacterSet) || CFUniCharIsMemberOf(otherChar, kCFUniCharLowercaseLetterCharacterSet)) return 0; // Uppercase set contains titlecase
1052 break;
1053 }
1054 }
1055 return kCFUniCharCaseMapFinalSigma;
1056 }
1057 } else if (langCode) {
1058 if (*((const uint16_t *)langCode) == LITHUANIAN_LANG_CODE) {
1059 if ((theChar == 0x0307) && ((kCFUniCharCaseMapAfter_i|kCFUniCharCaseMapMoreAbove) & lastFlags) == (kCFUniCharCaseMapAfter_i|kCFUniCharCaseMapMoreAbove)) {
1060 return (__CFUniCharIsAfter_i(buffer, currentIndex) ? kCFUniCharCaseMapAfter_i : 0);
1061 } else if (type == kCFUniCharToLowercase) {
1062 if ((theChar == 0x0049) || (theChar == 0x004A) || (theChar == 0x012E)) {
1063 return (__CFUniCharIsMoreAbove(buffer + (++currentIndex), length - currentIndex) ? kCFUniCharCaseMapMoreAbove : 0);
1064 }
1065 } else if ((theChar == 'i') || (theChar == 'j')) {
1066 return (__CFUniCharIsMoreAbove(buffer + (++currentIndex), length - currentIndex) ? (kCFUniCharCaseMapAfter_i|kCFUniCharCaseMapMoreAbove) : 0);
1067 }
1068 } else if ((*((const uint16_t *)langCode) == TURKISH_LANG_CODE) || (*((const uint16_t *)langCode) == AZERI_LANG_CODE)) {
1069 if (type == kCFUniCharToLowercase) {
1070 if (theChar == 0x0307) {
1071 return (kCFUniCharCaseMapMoreAbove & lastFlags ? kCFUniCharCaseMapAfter_i : 0);
1072 } else if (theChar == 0x0049) {
1073 return (((++currentIndex < length) && (buffer[currentIndex] == 0x0307)) ? kCFUniCharCaseMapMoreAbove : 0);
1074 }
1075 }
1076 } else if (*((const uint16_t *)langCode) == DUTCH_LANG_CODE) {
1077 if (kCFUniCharCaseMapDutchDigraph & lastFlags) {
1078 return (((theChar == 0x006A) || (theChar == 0x004A)) ? kCFUniCharCaseMapDutchDigraph : 0);
1079 } else {
1080 if ((type == kCFUniCharToTitlecase) && ((theChar == 0x0069) || (theChar == 0x0049))) {
1081 return (((++currentIndex < length) && ((buffer[currentIndex] == 0x006A) || (buffer[currentIndex] == 0x004A))) ? kCFUniCharCaseMapDutchDigraph : 0);
1082 }
1083 }
1084 }
1085 }
1086 return 0;
1087 }
1088
1089 // Unicode property database
1090 static __CFUniCharBitmapData *__CFUniCharUnicodePropertyTable = NULL;
1091 static int __CFUniCharUnicodePropertyTableCount = 0;
1092
1093 static CFSpinLock_t __CFUniCharPropTableLock = CFSpinLockInit;
1094
1095 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_LINUX
1096 #if USE_MACHO_SEGMENT
1097 #define PROP_DB_FILE "__properties"
1098 #else
1099 #define PROP_DB_FILE "/CFUniCharPropertyDatabase.data"
1100 #endif
1101 #elif DEPLOYMENT_TARGET_WINDOWS
1102 #if USE_MACHO_SEGMENT
1103 #define PROP_DB_FILE "__properties"
1104 #else
1105 #define PROP_DB_FILE L"CFUniCharPropertyDatabase.data"
1106 #endif
1107 #else
1108 #error Unknown or unspecified DEPLOYMENT_TARGET
1109 #endif
1110
1111 const void *CFUniCharGetUnicodePropertyDataForPlane(uint32_t propertyType, uint32_t plane) {
1112
1113 __CFSpinLock(&__CFUniCharPropTableLock);
1114
1115 if (NULL == __CFUniCharUnicodePropertyTable) {
1116 __CFUniCharBitmapData *table;
1117 const void *bytes;
1118 const void *bodyBase;
1119 const void *planeBase;
1120 int headerSize;
1121 int idx, count;
1122 int planeIndex, planeCount;
1123 int planeSize;
1124 int64_t fileSize;
1125
1126 if (!__CFUniCharLoadFile(PROP_DB_FILE, &bytes, &fileSize) || !__CFSimpleFileSizeVerification(bytes, fileSize)) {
1127 __CFSpinUnlock(&__CFUniCharPropTableLock);
1128 return NULL;
1129 }
1130
1131 #if defined (__cplusplus)
1132 bytes = (uint8_t*)bytes + 4; // Skip Unicode version
1133 headerSize = CFSwapInt32BigToHost(*((uint32_t *)bytes)); bytes = (uint8_t *)bytes + sizeof(uint32_t);
1134 #else
1135 bytes += 4; // Skip Unicode version
1136 headerSize = CFSwapInt32BigToHost(*((uint32_t *)bytes)); bytes += sizeof(uint32_t);
1137 #endif
1138
1139 headerSize -= (sizeof(uint32_t) * 2);
1140 bodyBase = (char *)bytes + headerSize;
1141
1142 count = headerSize / sizeof(uint32_t);
1143 __CFUniCharUnicodePropertyTableCount = count;
1144
1145 table = (__CFUniCharBitmapData *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(__CFUniCharBitmapData) * count, 0);
1146
1147 for (idx = 0;idx < count;idx++) {
1148 planeCount = *((const uint8_t *)bodyBase);
1149 planeBase = (char *)bodyBase + planeCount + (planeCount % 4 ? 4 - (planeCount % 4) : 0);
1150 table[idx]._planes = (const uint8_t **)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(const void *) * planeCount, 0);
1151
1152 for (planeIndex = 0;planeIndex < planeCount;planeIndex++) {
1153 if ((planeSize = ((const uint8_t *)bodyBase)[planeIndex + 1])) {
1154 table[idx]._planes[planeIndex] = (const uint8_t *)planeBase;
1155 #if defined (__cplusplus)
1156 planeBase = (char*)planeBase + (planeSize * 256);
1157 #else
1158 planeBase += (planeSize * 256);
1159 #endif
1160 } else {
1161 table[idx]._planes[planeIndex] = NULL;
1162 }
1163 }
1164
1165 table[idx]._numPlanes = planeCount;
1166 #if defined (__cplusplus)
1167 bodyBase = (const uint8_t *)bodyBase + (CFSwapInt32BigToHost(*(uint32_t *)bytes));
1168 ((uint32_t *&)bytes) ++;
1169 #else
1170 bodyBase += (CFSwapInt32BigToHost(*((uint32_t *)bytes++)));
1171 #endif
1172 }
1173
1174 __CFUniCharUnicodePropertyTable = table;
1175 }
1176
1177 __CFSpinUnlock(&__CFUniCharPropTableLock);
1178
1179 return (plane < __CFUniCharUnicodePropertyTable[propertyType]._numPlanes ? __CFUniCharUnicodePropertyTable[propertyType]._planes[plane] : NULL);
1180 }
1181
1182 __private_extern__ uint32_t CFUniCharGetNumberOfPlanesForUnicodePropertyData(uint32_t propertyType) {
1183 (void)CFUniCharGetUnicodePropertyDataForPlane(propertyType, 0);
1184 return __CFUniCharUnicodePropertyTable[propertyType]._numPlanes;
1185 }
1186
1187 __private_extern__ uint32_t CFUniCharGetUnicodeProperty(UTF32Char character, uint32_t propertyType) {
1188 if (propertyType == kCFUniCharCombiningProperty) {
1189 return CFUniCharGetCombiningPropertyForCharacter(character, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(propertyType, (character >> 16) & 0xFF));
1190 } else if (propertyType == kCFUniCharBidiProperty) {
1191 return CFUniCharGetBidiPropertyForCharacter(character, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(propertyType, (character >> 16) & 0xFF));
1192 } else {
1193 return 0;
1194 }
1195 }
1196
1197
1198
1199 /*
1200 The UTF8 conversion in the following function is derived from ConvertUTF.c
1201 */
1202 /*
1203 * Copyright 2001 Unicode, Inc.
1204 *
1205 * Disclaimer
1206 *
1207 * This source code is provided as is by Unicode, Inc. No claims are
1208 * made as to fitness for any particular purpose. No warranties of any
1209 * kind are expressed or implied. The recipient agrees to determine
1210 * applicability of information provided. If this file has been
1211 * purchased on magnetic or optical media from Unicode, Inc., the
1212 * sole remedy for any claim will be exchange of defective media
1213 * within 90 days of receipt.
1214 *
1215 * Limitations on Rights to Redistribute This Code
1216 *
1217 * Unicode, Inc. hereby grants the right to freely use the information
1218 * supplied in this file in the creation of products supporting the
1219 * Unicode Standard, and to make copies of this file in any form
1220 * for internal or external distribution as long as this notice
1221 * remains attached.
1222 */
1223 #define UNI_REPLACEMENT_CHAR (0x0000FFFDUL)
1224
1225 bool CFUniCharFillDestinationBuffer(const UTF32Char *src, CFIndex srcLength, void **dst, CFIndex dstLength, CFIndex *filledLength, uint32_t dstFormat) {
1226 UTF32Char currentChar;
1227 CFIndex usedLength = *filledLength;
1228
1229 if (dstFormat == kCFUniCharUTF16Format) {
1230 UTF16Char *dstBuffer = (UTF16Char *)*dst;
1231
1232 while (srcLength-- > 0) {
1233 currentChar = *(src++);
1234
1235 if (currentChar > 0xFFFF) { // Non-BMP
1236 usedLength += 2;
1237 if (dstLength) {
1238 if (usedLength > dstLength) return false;
1239 currentChar -= 0x10000;
1240 *(dstBuffer++) = (UTF16Char)((currentChar >> 10) + 0xD800UL);
1241 *(dstBuffer++) = (UTF16Char)((currentChar & 0x3FF) + 0xDC00UL);
1242 }
1243 } else {
1244 ++usedLength;
1245 if (dstLength) {
1246 if (usedLength > dstLength) return false;
1247 *(dstBuffer++) = (UTF16Char)currentChar;
1248 }
1249 }
1250 }
1251
1252 *dst = dstBuffer;
1253 } else if (dstFormat == kCFUniCharUTF8Format) {
1254 uint8_t *dstBuffer = (uint8_t *)*dst;
1255 uint16_t bytesToWrite = 0;
1256 const UTF32Char byteMask = 0xBF;
1257 const UTF32Char byteMark = 0x80;
1258 static const uint8_t firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
1259
1260 while (srcLength-- > 0) {
1261 currentChar = *(src++);
1262
1263 /* Figure out how many bytes the result will require */
1264 if (currentChar < (UTF32Char)0x80) {
1265 bytesToWrite = 1;
1266 } else if (currentChar < (UTF32Char)0x800) {
1267 bytesToWrite = 2;
1268 } else if (currentChar < (UTF32Char)0x10000) {
1269 bytesToWrite = 3;
1270 } else if (currentChar < (UTF32Char)0x200000) {
1271 bytesToWrite = 4;
1272 } else {
1273 bytesToWrite = 2;
1274 currentChar = UNI_REPLACEMENT_CHAR;
1275 }
1276
1277 usedLength += bytesToWrite;
1278
1279 if (dstLength) {
1280 if (usedLength > dstLength) return false;
1281
1282 dstBuffer += bytesToWrite;
1283 switch (bytesToWrite) { /* note: everything falls through. */
1284 case 4: *--dstBuffer = (currentChar | byteMark) & byteMask; currentChar >>= 6;
1285 case 3: *--dstBuffer = (currentChar | byteMark) & byteMask; currentChar >>= 6;
1286 case 2: *--dstBuffer = (currentChar | byteMark) & byteMask; currentChar >>= 6;
1287 case 1: *--dstBuffer = currentChar | firstByteMark[bytesToWrite];
1288 }
1289 dstBuffer += bytesToWrite;
1290 }
1291 }
1292
1293 *dst = dstBuffer;
1294 } else {
1295 UTF32Char *dstBuffer = (UTF32Char *)*dst;
1296
1297 while (srcLength-- > 0) {
1298 currentChar = *(src++);
1299
1300 ++usedLength;
1301 if (dstLength) {
1302 if (usedLength > dstLength) return false;
1303 *(dstBuffer++) = currentChar;
1304 }
1305 }
1306
1307 *dst = dstBuffer;
1308 }
1309
1310 *filledLength = usedLength;
1311
1312 return true;
1313 }
1314
1315 #if DEPLOYMENT_TARGET_WINDOWS
1316 void __CFUniCharCleanup(void)
1317 {
1318 int idx;
1319
1320 // cleanup memory allocated by __CFUniCharLoadBitmapData()
1321 __CFSpinLock(&__CFUniCharBitmapLock);
1322
1323 if (__CFUniCharBitmapDataArray != NULL) {
1324 for (idx = 0; idx < (int)__CFUniCharNumberOfBitmaps; idx++) {
1325 CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharBitmapDataArray[idx]._planes);
1326 __CFUniCharBitmapDataArray[idx]._planes = NULL;
1327 }
1328
1329 CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharBitmapDataArray);
1330 __CFUniCharBitmapDataArray = NULL;
1331 __CFUniCharNumberOfBitmaps = 0;
1332 }
1333
1334 __CFSpinUnlock(&__CFUniCharBitmapLock);
1335
1336 // cleanup memory allocated by CFUniCharGetMappingData()
1337 __CFSpinLock(&__CFUniCharMappingTableLock);
1338
1339 if (__CFUniCharMappingTables != NULL) {
1340 CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharMappingTables);
1341 __CFUniCharMappingTables = NULL;
1342 }
1343
1344 // cleanup memory allocated by __CFUniCharLoadCaseMappingTable()
1345 if (__CFUniCharCaseMappingTableCounts != NULL) {
1346 CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharCaseMappingTableCounts);
1347 __CFUniCharCaseMappingTableCounts = NULL;
1348
1349 __CFUniCharCaseMappingTable = NULL;
1350 __CFUniCharCaseMappingExtraTable = NULL;
1351 }
1352
1353 __CFSpinUnlock(&__CFUniCharMappingTableLock);
1354
1355 // cleanup memory allocated by CFUniCharGetUnicodePropertyDataForPlane()
1356 __CFSpinLock(&__CFUniCharPropTableLock);
1357
1358 if (__CFUniCharUnicodePropertyTable != NULL) {
1359 for (idx = 0; idx < __CFUniCharUnicodePropertyTableCount; idx++) {
1360 CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharUnicodePropertyTable[idx]._planes);
1361 __CFUniCharUnicodePropertyTable[idx]._planes = NULL;
1362 }
1363
1364 CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharUnicodePropertyTable);
1365 __CFUniCharUnicodePropertyTable = NULL;
1366 __CFUniCharUnicodePropertyTableCount = 0;
1367 }
1368
1369 __CFSpinUnlock(&__CFUniCharPropTableLock);
1370 }
1371 #endif
1372
1373 #undef USE_MACHO_SEGMENT
1374