]> git.saurik.com Git - apple/cf.git/blame - CFUniChar.c
CF-476.19.tar.gz
[apple/cf.git] / CFUniChar.c
CommitLineData
9ce05555 1/*
bd5b749c 2 * Copyright (c) 2008 Apple Inc. All rights reserved.
9ce05555
A
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
9ce05555
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23/* CFUniChar.c
24 Copyright 2001-2002, Apple, Inc. All rights reserved.
25 Responsibility: Aki Inoue
26*/
27
28#include <CoreFoundation/CFByteOrder.h>
29#include "CFInternal.h"
bd5b749c 30#include "CFBundle_Internal.h"
9ce05555
A
31#include "CFUniChar.h"
32#include "CFStringEncodingConverterExt.h"
33#include "CFUnicodeDecomposition.h"
34#include "CFUniCharPriv.h"
bd5b749c 35#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
9ce05555
A
36#include <fcntl.h>
37#include <sys/types.h>
38#include <sys/stat.h>
39#include <sys/param.h>
40#include <sys/mman.h>
41#include <unistd.h>
42#include <stdlib.h>
43#endif
bd5b749c
A
44#if DEPLOYMENT_TARGET_MACOSX
45#include <mach/mach.h>
46#endif
9ce05555 47
bd5b749c
A
48#if DEPLOYMENT_TARGET_MACOSX
49#define __kCFCharacterSetDir "/System/Library/CoreServices"
50#elif DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
51#define __kCFCharacterSetDir "/usr/local/share/CoreFoundation"
52#elif defined(__WIN32__)
53#define __kCFCharacterSetDir "\\Windows\\CoreFoundation"
9ce05555
A
54#endif
55
bd5b749c
A
56#if DEPLOYMENT_TARGET_MACOSX
57#define USE_MACHO_SEGMENT 1
58#endif //__MACH__
59
60enum {
61 kCFUniCharLastExternalSet = kCFUniCharNewlineCharacterSet,
62 kCFUniCharFirstInternalSet = kCFUniCharCompatibilityDecomposableCharacterSet,
63 kCFUniCharLastInternalSet = kCFUniCharGraphemeExtendCharacterSet,
64 kCFUniCharFirstBitmapSet = kCFUniCharDecimalDigitCharacterSet
65};
66
67CF_INLINE uint32_t __CFUniCharMapExternalSetToInternalIndex(uint32_t cset) { return ((kCFUniCharFirstInternalSet <= cset) ? ((cset - kCFUniCharFirstInternalSet) + kCFUniCharLastExternalSet) : cset) - kCFUniCharFirstBitmapSet; }
68CF_INLINE uint32_t __CFUniCharMapCompatibilitySetID(uint32_t cset) { return ((cset == kCFUniCharControlCharacterSet) ? kCFUniCharControlAndFormatterCharacterSet : (((cset > kCFUniCharLastExternalSet) && (cset < kCFUniCharFirstInternalSet)) ? ((cset - kCFUniCharLastExternalSet) + kCFUniCharFirstInternalSet) : cset)); }
69
70#if (DEPLOYMENT_TARGET_MACOSX) && USE_MACHO_SEGMENT
71
72#include <mach-o/getsect.h>
73#include <mach-o/dyld.h>
74#include <mach-o/ldsyms.h>
75
76static const void *__CFGetSectDataPtr(const char *segname, const char *sectname, uint64_t *sizep) {
77 uint32_t idx, cnt = _dyld_image_count();
78 for (idx = 0; idx < cnt; idx++) {
79 void *mh = (void *)_dyld_get_image_header(idx);
80 if (mh != &_mh_dylib_header) continue;
81#if __LP64__
82 const struct section_64 *sect = getsectbynamefromheader_64((struct mach_header_64 *)mh, segname, sectname);
83#else
84 const struct section *sect = getsectbynamefromheader((struct mach_header *)mh, segname, sectname);
85#endif
86 if (!sect) break;
87 if (sizep) *sizep = (uint64_t)sect->size;
88 return (char *)sect->addr + _dyld_get_image_vmaddr_slide(idx);
89 }
90 if (sizep) *sizep = 0ULL;
91 return NULL;
92}
93
94#endif
95
96#if !USE_MACHO_SEGMENT
97
9ce05555 98// Memory map the file
9ce05555
A
99
100CF_INLINE void __CFUniCharCharacterSetPath(char *cpath) {
bd5b749c 101#if DEPLOYMENT_TARGET_MACOSX
d8925383 102 strlcpy(cpath, __kCFCharacterSetDir, MAXPATHLEN);
d8925383 103#else
df7f3a2a 104 strlcpy(cpath, __kCFCharacterSetDir, MAXPATHLEN);
d8925383
A
105#endif
106
bd5b749c
A
107#if 0 || 0
108#if 0
109 strncat(cpath, "\\Resources\\", MAXPATHLEN - strlen(cpath));
110#else
111 strncat(cpath, "\\CoreFoundation.resources\\CharacterSets\\", MAXPATHLEN - strlen(cpath));
112#endif
d8925383 113#else
df7f3a2a 114 strlcat(cpath, "/CharacterSets/", MAXPATHLEN);
d8925383 115#endif
9ce05555
A
116}
117
bd5b749c
A
118#if defined (__WIN32__)
119#define MAX_BITMAP_STATE 512
120//
121// If a string is placed into this array, then it has been previously
122// determined that the bitmap-file cannot be found. Thus, we make
123// the assumption it won't be there in future calls and we avoid
124// hitting the disk un-necessarily. This assumption isn't 100%
125// correct, as bitmap-files can be added. We would have to re-start
126// the application in order to pick-up the new bitmap info.
127//
128// We should probably re-visit this.
129//
130static char *mappedBitmapState[MAX_BITMAP_STATE];
131static int __nNumStateEntries = -1;
132CRITICAL_SECTION __bitmapStateLock = {0};
133
134bool __GetBitmapStateForName(char *bitmapName) {
135 if (NULL == __bitmapStateLock.DebugInfo)
136 InitializeCriticalSection(&__bitmapStateLock);
137 EnterCriticalSection(&__bitmapStateLock);
138 if (__nNumStateEntries >= 0) {
139 for (int i = 0; i < __nNumStateEntries; i++) {
140 if (strcmp(mappedBitmapState[i], bitmapName) == 0) {
141 LeaveCriticalSection(&__bitmapStateLock);
142 return true;
143 }
144 }
145 }
146 LeaveCriticalSection(&__bitmapStateLock);
147 return false;
148}
149void __AddBitmapStateForName(char *bitmapName) {
150 if (NULL == __bitmapStateLock.DebugInfo)
151 InitializeCriticalSection(&__bitmapStateLock);
152 EnterCriticalSection(&__bitmapStateLock);
153 __nNumStateEntries++;
154 mappedBitmapState[__nNumStateEntries] = (char *)malloc((strlen(bitmapName)+1) * sizeof(char));
155 strcpy(mappedBitmapState[__nNumStateEntries], bitmapName);
156 LeaveCriticalSection(&__bitmapStateLock);
157}
158#endif //__WIN32__
9ce05555 159
bd5b749c
A
160static bool __CFUniCharLoadBytesFromFile(const char *fileName, const void **bytes) {
161#if 0 || 0
162 HANDLE bitmapFileHandle = NULL;
163 HANDLE mappingHandle = NULL;
164
165 if (__GetBitmapStateForName((char *)fileName)) {
166 // The fileName has been tried in the past, so just return false
167 // and move on.
168 *bytes = NULL;
169 return false;
170 }
171 mappingHandle = OpenFileMappingA(FILE_MAP_READ, TRUE, fileName);
172 if (NULL == mappingHandle) {
173 if ((bitmapFileHandle = CreateFileA(fileName, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL)) == INVALID_HANDLE_VALUE) {
174 // We tried to get the bitmap file for mapping, but it's not there. Add to list of non-existant bitmap-files so
175 // we don't have to try this again in the future.
176 __AddBitmapStateForName((char *)fileName);
177 return false;
178 }
179 mappingHandle = CreateFileMappingA(bitmapFileHandle, NULL, PAGE_READONLY, 0, 0, NULL);
180 CloseHandle(bitmapFileHandle);
181 if (!mappingHandle) return false;
9ce05555 182
bd5b749c
A
183 *bytes = MapViewOfFileEx(mappingHandle, FILE_MAP_READ, 0, 0, 0, 0);
184 CloseHandle(mappingHandle);
185 } else {
186 *bytes = MapViewOfFileEx(mappingHandle, FILE_MAP_READ, 0, 0, 0, 0);
187 CloseHandle(mappingHandle);
188 }
9ce05555
A
189
190 return (*bytes ? true : false);
191#else
192 struct stat statBuf;
193 int fd = -1;
194
bd5b749c
A
195 int no_hang_fd = open("/dev/autofs_nowait", 0);
196 if ((fd = open(fileName, O_RDONLY, 0)) < 0) {
197 close(no_hang_fd);
198 return false;
199 }
9ce05555
A
200 if (fstat(fd, &statBuf) < 0 || (*bytes = mmap(0, statBuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0)) == (void *)-1) {
201 close(fd);
9ce05555
A
202 return false;
203 }
9ce05555 204 close(fd);
bd5b749c 205 close(no_hang_fd);
9ce05555
A
206
207 return true;
208#endif
209}
210
bd5b749c
A
211#endif // USE_MACHO_SEGMENT
212
9ce05555 213static bool __CFUniCharLoadFile(const char *bitmapName, const void **bytes) {
bd5b749c
A
214#if USE_MACHO_SEGMENT
215 *bytes = __CFGetSectDataPtr("__UNICODE", bitmapName, NULL);
216 return *bytes ? true : false;
217#else
9ce05555 218 char cpath[MAXPATHLEN];
9ce05555 219 __CFUniCharCharacterSetPath(cpath);
bd5b749c 220 strlcat(cpath, bitmapName, MAXPATHLEN);
9ce05555 221 return __CFUniCharLoadBytesFromFile(cpath, bytes);
bd5b749c 222#endif
9ce05555 223}
9ce05555
A
224
225// Bitmap functions
226CF_INLINE bool isControl(UTF32Char theChar, uint16_t charset, const void *data) { // ISO Control
bd5b749c 227 return (((theChar <= 0x001F) || (theChar >= 0x007F && theChar <= 0x009F)) ? true : false);
9ce05555
A
228}
229
230CF_INLINE bool isWhitespace(UTF32Char theChar, uint16_t charset, const void *data) { // Space
bd5b749c 231 return (((theChar == 0x0020) || (theChar == 0x0009) || (theChar == 0x00A0) || (theChar == 0x1680) || (theChar >= 0x2000 && theChar <= 0x200B) || (theChar == 0x202F) || (theChar == 0x205F) || (theChar == 0x3000)) ? true : false);
9ce05555
A
232}
233
bd5b749c
A
234CF_INLINE bool isNewline(UTF32Char theChar, uint16_t charset, const void *data) { // White space
235 return (((theChar >= 0x000A && theChar <= 0x000D) || (theChar == 0x0085) || (theChar == 0x2028) || (theChar == 0x2029)) ? true : false);
9ce05555
A
236}
237
bd5b749c
A
238CF_INLINE bool isWhitespaceAndNewline(UTF32Char theChar, uint16_t charset, const void *data) { // White space
239 return ((isWhitespace(theChar, charset, data) || isNewline(theChar, charset, data)) ? true : false);
9ce05555
A
240}
241
9ce05555
A
242typedef struct {
243 uint32_t _numPlanes;
244 const uint8_t **_planes;
245} __CFUniCharBitmapData;
246
247static char __CFUniCharUnicodeVersionString[8] = {0, 0, 0, 0, 0, 0, 0, 0};
248
249static uint32_t __CFUniCharNumberOfBitmaps = 0;
250static __CFUniCharBitmapData *__CFUniCharBitmapDataArray = NULL;
251
bd5b749c 252static CFSpinLock_t __CFUniCharBitmapLock = CFSpinLockInit;
9ce05555 253
bd5b749c
A
254#if !defined(CF_UNICHAR_BITMAP_FILE)
255#if USE_MACHO_SEGMENT
256#define CF_UNICHAR_BITMAP_FILE "__csbitmaps"
257#else
9ce05555 258#define CF_UNICHAR_BITMAP_FILE "CFCharacterSetBitmaps.bitmap"
bd5b749c
A
259#endif
260#endif
9ce05555
A
261
262static bool __CFUniCharLoadBitmapData(void) {
bd5b749c 263 __CFUniCharBitmapData *array;
9ce05555
A
264 uint32_t headerSize;
265 uint32_t bitmapSize;
266 int numPlanes;
267 uint8_t currentPlane;
268 const void *bytes;
269 const void *bitmapBase;
270 const void *bitmap;
271 int idx, bitmapIndex;
272
273 __CFSpinLock(&__CFUniCharBitmapLock);
274
275 if (__CFUniCharBitmapDataArray || !__CFUniCharLoadFile(CF_UNICHAR_BITMAP_FILE, &bytes)) {
276 __CFSpinUnlock(&__CFUniCharBitmapLock);
277 return false;
278 }
279
280 for (idx = 0;idx < 4 && ((const uint8_t *)bytes)[idx];idx++) {
281 __CFUniCharUnicodeVersionString[idx * 2] = ((const uint8_t *)bytes)[idx];
282 __CFUniCharUnicodeVersionString[idx * 2 + 1] = '.';
283 }
284 __CFUniCharUnicodeVersionString[(idx < 4 ? idx * 2 - 1 : 7)] = '\0';
285
286 headerSize = CFSwapInt32BigToHost(*((uint32_t *)((char *)bytes + 4)));
287
bd5b749c
A
288 bitmapBase = (uint8_t *)bytes + headerSize;
289 bytes = (uint8_t *)bytes + (sizeof(uint32_t) * 2);
9ce05555
A
290 headerSize -= (sizeof(uint32_t) * 2);
291
292 __CFUniCharNumberOfBitmaps = headerSize / (sizeof(uint32_t) * 2);
293
bd5b749c 294 array = (__CFUniCharBitmapData *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(__CFUniCharBitmapData) * __CFUniCharNumberOfBitmaps, 0);
9ce05555
A
295
296 for (idx = 0;idx < (int)__CFUniCharNumberOfBitmaps;idx++) {
bd5b749c
A
297 bitmap = (uint8_t *)bitmapBase + CFSwapInt32BigToHost(*((uint32_t *)bytes)); bytes = (uint8_t *)bytes + sizeof(uint32_t);
298 bitmapSize = CFSwapInt32BigToHost(*((uint32_t *)bytes)); bytes = (uint8_t *)bytes + sizeof(uint32_t);
9ce05555
A
299
300 numPlanes = bitmapSize / (8 * 1024);
301 numPlanes = *(const uint8_t *)((char *)bitmap + (((numPlanes - 1) * ((8 * 1024) + 1)) - 1)) + 1;
bd5b749c
A
302 array[idx]._planes = (const uint8_t **)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(const void *) * numPlanes, 0);
303 array[idx]._numPlanes = numPlanes;
9ce05555
A
304
305 currentPlane = 0;
306 for (bitmapIndex = 0;bitmapIndex < numPlanes;bitmapIndex++) {
307 if (bitmapIndex == currentPlane) {
bd5b749c
A
308 array[idx]._planes[bitmapIndex] = (const uint8_t *)bitmap;
309 bitmap = (uint8_t *)bitmap + (8 * 1024);
310#if defined (__cplusplus)
311 currentPlane = *(((const uint8_t*&)bitmap)++);
312#else
313 currentPlane = *((const uint8_t *)bitmap++);
314#endif //C++
315
9ce05555 316 } else {
bd5b749c 317 array[idx]._planes[bitmapIndex] = NULL;
9ce05555
A
318 }
319 }
320 }
321
bd5b749c
A
322 __CFUniCharBitmapDataArray = array;
323
9ce05555
A
324 __CFSpinUnlock(&__CFUniCharBitmapLock);
325
326 return true;
327}
328
329__private_extern__ const char *__CFUniCharGetUnicodeVersionString(void) {
330 if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData();
331 return __CFUniCharUnicodeVersionString;
332}
333
9ce05555 334bool CFUniCharIsMemberOf(UTF32Char theChar, uint32_t charset) {
bd5b749c 335 charset = __CFUniCharMapCompatibilitySetID(charset);
9ce05555
A
336
337 switch (charset) {
9ce05555
A
338 case kCFUniCharWhitespaceCharacterSet:
339 return isWhitespace(theChar, charset, NULL);
340
341 case kCFUniCharWhitespaceAndNewlineCharacterSet:
bd5b749c
A
342 return isWhitespaceAndNewline(theChar, charset, NULL);
343
344 case kCFUniCharNewlineCharacterSet:
345 return isNewline(theChar, charset, NULL);
346
347 default: {
348 uint32_t tableIndex = __CFUniCharMapExternalSetToInternalIndex(charset);
349
9ce05555
A
350 if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData();
351
bd5b749c
A
352 if (tableIndex < __CFUniCharNumberOfBitmaps) {
353 __CFUniCharBitmapData *data = __CFUniCharBitmapDataArray + tableIndex;
9ce05555
A
354 uint8_t planeNo = (theChar >> 16) & 0xFF;
355
356 // The bitmap data for kCFUniCharIllegalCharacterSet is actually LEGAL set less Plane 14 ~ 16
357 if (charset == kCFUniCharIllegalCharacterSet) {
358 if (planeNo == 0x0E) { // Plane 14
359 theChar &= 0xFF;
360 return (((theChar == 0x01) || ((theChar > 0x1F) && (theChar < 0x80))) ? false : true);
361 } else if (planeNo == 0x0F || planeNo == 0x10) { // Plane 15 & 16
362 return ((theChar & 0xFF) > 0xFFFD ? true : false);
363 } else {
364 return (planeNo < data->_numPlanes && data->_planes[planeNo] ? !CFUniCharIsMemberOfBitmap(theChar, data->_planes[planeNo]) : true);
365 }
366 } else if (charset == kCFUniCharControlAndFormatterCharacterSet) {
367 if (planeNo == 0x0E) { // Plane 14
368 theChar &= 0xFF;
369 return (((theChar == 0x01) || ((theChar > 0x1F) && (theChar < 0x80))) ? true : false);
370 } else {
371 return (planeNo < data->_numPlanes && data->_planes[planeNo] ? CFUniCharIsMemberOfBitmap(theChar, data->_planes[planeNo]) : false);
372 }
373 } else {
374 return (planeNo < data->_numPlanes && data->_planes[planeNo] ? CFUniCharIsMemberOfBitmap(theChar, data->_planes[planeNo]) : false);
375 }
376 }
377 return false;
bd5b749c 378 }
9ce05555
A
379 }
380}
381
382const uint8_t *CFUniCharGetBitmapPtrForPlane(uint32_t charset, uint32_t plane) {
383 if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData();
384
bd5b749c 385 charset = __CFUniCharMapCompatibilitySetID(charset);
9ce05555 386
bd5b749c
A
387 if ((charset > kCFUniCharWhitespaceAndNewlineCharacterSet) && (charset != kCFUniCharIllegalCharacterSet) && (charset != kCFUniCharNewlineCharacterSet)) {
388 uint32_t tableIndex = __CFUniCharMapExternalSetToInternalIndex(charset);
9ce05555 389
bd5b749c
A
390 if (tableIndex < __CFUniCharNumberOfBitmaps) {
391 __CFUniCharBitmapData *data = __CFUniCharBitmapDataArray + tableIndex;
392
393 return (plane < data->_numPlanes ? data->_planes[plane] : NULL);
394 }
9ce05555
A
395 }
396 return NULL;
397}
398
399__private_extern__ uint8_t CFUniCharGetBitmapForPlane(uint32_t charset, uint32_t plane, void *bitmap, bool isInverted) {
400 const uint8_t *src = CFUniCharGetBitmapPtrForPlane(charset, plane);
401 int numBytes = (8 * 1024);
402
403 if (src) {
404 if (isInverted) {
bd5b749c
A
405#if defined (__cplusplus)
406 while (numBytes-- > 0) *(((uint8_t *&)bitmap)++) = ~(*(src++));
407#else
408 while (numBytes-- > 0) *((uint8_t *)bitmap++) = ~(*(src++));
409#endif //C++
9ce05555 410 } else {
bd5b749c
A
411#if defined (__cplusplus)
412 while (numBytes-- > 0) *(((uint8_t *&)bitmap)++) = *(src++);
413#else
414 while (numBytes-- > 0) *((uint8_t *)bitmap++) = *(src++);
415#endif //C++
9ce05555
A
416 }
417 return kCFUniCharBitmapFilled;
418 } else if (charset == kCFUniCharIllegalCharacterSet) {
bd5b749c 419 __CFUniCharBitmapData *data = __CFUniCharBitmapDataArray + __CFUniCharMapExternalSetToInternalIndex(__CFUniCharMapCompatibilitySetID(charset));
9ce05555
A
420
421 if (plane < data->_numPlanes && (src = data->_planes[plane])) {
422 if (isInverted) {
bd5b749c
A
423#if defined (__cplusplus)
424 while (numBytes-- > 0) *(((uint8_t *&)bitmap)++) = *(src++);
425#else
426 while (numBytes-- > 0) *((uint8_t *)bitmap++) = *(src++);
427#endif //C++
9ce05555 428 } else {
bd5b749c
A
429#if defined (__cplusplus)
430 while (numBytes-- > 0) *(((uint8_t *&)bitmap)++) = ~(*(src++));
431#else
432 while (numBytes-- > 0) *((uint8_t *)bitmap++) = ~(*(src++));
433#endif //C++
9ce05555
A
434 }
435 return kCFUniCharBitmapFilled;
436 } else if (plane == 0x0E) { // Plane 14
437 int idx;
438 uint8_t asciiRange = (isInverted ? (uint8_t)0xFF : (uint8_t)0);
439 uint8_t otherRange = (isInverted ? (uint8_t)0 : (uint8_t)0xFF);
440
bd5b749c
A
441#if defined (__cplusplus)
442 *(((uint8_t *&)bitmap)++) = 0x02; // UE0001 LANGUAGE TAG
443#else
444 *((uint8_t *)bitmap++) = 0x02; // UE0001 LANGUAGE TAG
445#endif //C++
9ce05555 446 for (idx = 1;idx < numBytes;idx++) {
bd5b749c
A
447#if defined (__cplusplus)
448 *(((uint8_t *&)bitmap)++) = ((idx >= (0x20 / 8) && (idx < (0x80 / 8))) ? asciiRange : otherRange);
449#else
450 *((uint8_t *)bitmap++) = ((idx >= (0x20 / 8) && (idx < (0x80 / 8))) ? asciiRange : otherRange);
451#endif //C++
9ce05555
A
452 }
453 return kCFUniCharBitmapFilled;
454 } else if (plane == 0x0F || plane == 0x10) { // Plane 15 & 16
bd5b749c 455 uint32_t value = (isInverted ? ~0 : 0);
9ce05555
A
456 numBytes /= 4; // for 32bit
457
bd5b749c
A
458 while (numBytes-- > 0) {
459 *((uint32_t *)bitmap) = value;
460#if defined (__cplusplus)
461 bitmap = (uint8_t *)bitmap + sizeof(uint32_t);
462#else
463 bitmap += sizeof(uint32_t);
464#endif //C++
465 }
9ce05555
A
466 *(((uint8_t *)bitmap) - 5) = (isInverted ? 0x3F : 0xC0); // 0xFFFE & 0xFFFF
467 return kCFUniCharBitmapFilled;
468 }
469 return (isInverted ? kCFUniCharBitmapEmpty : kCFUniCharBitmapAll);
bd5b749c 470 } else if ((charset < kCFUniCharDecimalDigitCharacterSet) || (charset == kCFUniCharNewlineCharacterSet)) {
9ce05555
A
471 if (plane) return (isInverted ? kCFUniCharBitmapAll : kCFUniCharBitmapEmpty);
472
bd5b749c
A
473 uint8_t *bitmapBase = (uint8_t *)bitmap;
474 CFIndex idx;
475 uint8_t nonFillValue = (isInverted ? (uint8_t)0xFF : (uint8_t)0);
9ce05555 476
bd5b749c
A
477#if defined (__cplusplus)
478 while (numBytes-- > 0) *(((uint8_t *&)bitmap)++) = nonFillValue;
479#else
480 while (numBytes-- > 0) *((uint8_t *)bitmap++) = nonFillValue;
481#endif //C++
9ce05555 482
bd5b749c
A
483 if ((charset == kCFUniCharWhitespaceAndNewlineCharacterSet) || (charset == kCFUniCharNewlineCharacterSet)) {
484 const UniChar newlines[] = {0x000A, 0x000B, 0x000C, 0x000D, 0x0085, 0x2028, 0x2029};
9ce05555 485
bd5b749c
A
486 for (idx = 0;idx < (int)(sizeof(newlines) / sizeof(*newlines)); idx++) {
487 if (isInverted) {
488 CFUniCharRemoveCharacterFromBitmap(newlines[idx], bitmapBase);
489 } else {
490 CFUniCharAddCharacterToBitmap(newlines[idx], bitmapBase);
9ce05555
A
491 }
492 }
493
bd5b749c
A
494 if (charset == kCFUniCharNewlineCharacterSet) return kCFUniCharBitmapFilled;
495 }
496
497 if (isInverted) {
498 CFUniCharRemoveCharacterFromBitmap(0x0009, bitmapBase);
499 CFUniCharRemoveCharacterFromBitmap(0x0020, bitmapBase);
500 CFUniCharRemoveCharacterFromBitmap(0x00A0, bitmapBase);
501 CFUniCharRemoveCharacterFromBitmap(0x1680, bitmapBase);
502 CFUniCharRemoveCharacterFromBitmap(0x202F, bitmapBase);
503 CFUniCharRemoveCharacterFromBitmap(0x205F, bitmapBase);
504 CFUniCharRemoveCharacterFromBitmap(0x3000, bitmapBase);
505 } else {
506 CFUniCharAddCharacterToBitmap(0x0009, bitmapBase);
507 CFUniCharAddCharacterToBitmap(0x0020, bitmapBase);
508 CFUniCharAddCharacterToBitmap(0x00A0, bitmapBase);
509 CFUniCharAddCharacterToBitmap(0x1680, bitmapBase);
510 CFUniCharAddCharacterToBitmap(0x202F, bitmapBase);
511 CFUniCharAddCharacterToBitmap(0x205F, bitmapBase);
512 CFUniCharAddCharacterToBitmap(0x3000, bitmapBase);
513 }
514
515 for (idx = 0x2000;idx <= 0x200B;idx++) {
9ce05555 516 if (isInverted) {
bd5b749c 517 CFUniCharRemoveCharacterFromBitmap(idx, bitmapBase);
9ce05555 518 } else {
bd5b749c 519 CFUniCharAddCharacterToBitmap(idx, bitmapBase);
9ce05555
A
520 }
521 }
522 return kCFUniCharBitmapFilled;
523 }
524 return (isInverted ? kCFUniCharBitmapAll : kCFUniCharBitmapEmpty);
525}
526
527__private_extern__ uint32_t CFUniCharGetNumberOfPlanes(uint32_t charset) {
bd5b749c
A
528 if ((charset == kCFUniCharControlCharacterSet) || (charset == kCFUniCharControlAndFormatterCharacterSet)) {
529 return 15; // 0 to 14
530 } else if (charset < kCFUniCharDecimalDigitCharacterSet) {
9ce05555
A
531 return 1;
532 } else if (charset == kCFUniCharIllegalCharacterSet) {
533 return 17;
534 } else {
535 uint32_t numPlanes;
536
537 if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData();
538
bd5b749c 539 numPlanes = __CFUniCharBitmapDataArray[__CFUniCharMapExternalSetToInternalIndex(__CFUniCharMapCompatibilitySetID(charset))]._numPlanes;
9ce05555
A
540
541 return numPlanes;
542 }
9ce05555
A
543}
544
545// Mapping data loading
546static const void **__CFUniCharMappingTables = NULL;
547
bd5b749c 548static CFSpinLock_t __CFUniCharMappingTableLock = CFSpinLockInit;
9ce05555 549
bd5b749c
A
550#if __CF_BIG_ENDIAN__
551#if USE_MACHO_SEGMENT
552#define MAPPING_TABLE_FILE "__data"
553#else
9ce05555 554#define MAPPING_TABLE_FILE "CFUnicodeData-B.mapping"
bd5b749c
A
555#endif
556#else
557#if USE_MACHO_SEGMENT
558#define MAPPING_TABLE_FILE "__data"
559#else
9ce05555 560#define MAPPING_TABLE_FILE "CFUnicodeData-L.mapping"
bd5b749c
A
561#endif
562#endif
9ce05555
A
563
564__private_extern__ const void *CFUniCharGetMappingData(uint32_t type) {
565
566 __CFSpinLock(&__CFUniCharMappingTableLock);
567
568 if (NULL == __CFUniCharMappingTables) {
569 const void *bytes;
570 const void *bodyBase;
571 int headerSize;
572 int idx, count;
573
574 if (!__CFUniCharLoadFile(MAPPING_TABLE_FILE, &bytes)) {
575 __CFSpinUnlock(&__CFUniCharMappingTableLock);
576 return NULL;
577 }
578
bd5b749c
A
579#if defined (__cplusplus)
580 bytes = (uint8_t *)bytes + 4; // Skip Unicode version
581 headerSize = *((uint8_t *)bytes); bytes = (uint8_t *)bytes + sizeof(uint32_t);
582#else
583 bytes += 4; // Skip Unicode version
584 headerSize = *((uint32_t *)bytes); bytes += sizeof(uint32_t);
585#endif //C++
9ce05555
A
586 headerSize -= (sizeof(uint32_t) * 2);
587 bodyBase = (char *)bytes + headerSize;
588
589 count = headerSize / sizeof(uint32_t);
590
bd5b749c 591 __CFUniCharMappingTables = (const void **)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(const void *) * count, 0);
9ce05555
A
592
593 for (idx = 0;idx < count;idx++) {
bd5b749c
A
594#if defined (__cplusplus)
595 __CFUniCharMappingTables[idx] = (char *)bodyBase + *((uint32_t *)bytes); bytes = (uint8_t *)bytes + sizeof(uint32_t);
596#else
597 __CFUniCharMappingTables[idx] = (char *)bodyBase + *((uint32_t *)bytes); bytes += sizeof(uint32_t);
598#endif //C++
9ce05555
A
599 }
600 }
601
602 __CFSpinUnlock(&__CFUniCharMappingTableLock);
603
604 return __CFUniCharMappingTables[type];
605}
606
607// Case mapping functions
608#define DO_SPECIAL_CASE_MAPPING 1
609
610static uint32_t *__CFUniCharCaseMappingTableCounts = NULL;
611static uint32_t **__CFUniCharCaseMappingTable = NULL;
612static const uint32_t **__CFUniCharCaseMappingExtraTable = NULL;
613
614typedef struct {
615 uint32_t _key;
616 uint32_t _value;
617} __CFUniCharCaseMappings;
618
619/* Binary searches CFStringEncodingUnicodeTo8BitCharMap */
620static uint32_t __CFUniCharGetMappedCase(const __CFUniCharCaseMappings *theTable, uint32_t numElem, UTF32Char character) {
621 const __CFUniCharCaseMappings *p, *q, *divider;
622
623 if ((character < theTable[0]._key) || (character > theTable[numElem-1]._key)) {
624 return 0;
625 }
626 p = theTable;
627 q = p + (numElem-1);
628 while (p <= q) {
629 divider = p + ((q - p) >> 1); /* divide by 2 */
630 if (character < divider->_key) { q = divider - 1; }
631 else if (character > divider->_key) { p = divider + 1; }
632 else { return divider->_value; }
633 }
634 return 0;
635}
636
637#define NUM_CASE_MAP_DATA (kCFUniCharCaseFold + 1)
638
639static bool __CFUniCharLoadCaseMappingTable(void) {
bd5b749c 640 uint32_t *countArray;
9ce05555
A
641 int idx;
642
643 if (NULL == __CFUniCharMappingTables) (void)CFUniCharGetMappingData(kCFUniCharToLowercase);
644 if (NULL == __CFUniCharMappingTables) return false;
645
646 __CFSpinLock(&__CFUniCharMappingTableLock);
647
648 if (__CFUniCharCaseMappingTableCounts) {
649 __CFSpinUnlock(&__CFUniCharMappingTableLock);
650 return true;
651 }
652
bd5b749c
A
653 countArray = (uint32_t *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(uint32_t) * NUM_CASE_MAP_DATA + sizeof(uint32_t *) * NUM_CASE_MAP_DATA * 2, 0);
654 __CFUniCharCaseMappingTable = (uint32_t **)((char *)countArray + sizeof(uint32_t) * NUM_CASE_MAP_DATA);
9ce05555
A
655 __CFUniCharCaseMappingExtraTable = (const uint32_t **)__CFUniCharCaseMappingTable + NUM_CASE_MAP_DATA;
656
657 for (idx = 0;idx < NUM_CASE_MAP_DATA;idx++) {
bd5b749c 658 countArray[idx] = *((uint32_t *)__CFUniCharMappingTables[idx]) / (sizeof(uint32_t) * 2);
9ce05555
A
659 __CFUniCharCaseMappingTable[idx] = ((uint32_t *)__CFUniCharMappingTables[idx]) + 1;
660 __CFUniCharCaseMappingExtraTable[idx] = (const uint32_t *)((char *)__CFUniCharCaseMappingTable[idx] + *((uint32_t *)__CFUniCharMappingTables[idx]));
661 }
662
bd5b749c
A
663 __CFUniCharCaseMappingTableCounts = countArray;
664
9ce05555
A
665 __CFSpinUnlock(&__CFUniCharMappingTableLock);
666 return true;
667}
668
bd5b749c 669#if __CF_BIG_ENDIAN__
9ce05555
A
670#define TURKISH_LANG_CODE (0x7472) // tr
671#define LITHUANIAN_LANG_CODE (0x6C74) // lt
672#define AZERI_LANG_CODE (0x617A) // az
bd5b749c 673#else
9ce05555
A
674#define TURKISH_LANG_CODE (0x7274) // tr
675#define LITHUANIAN_LANG_CODE (0x746C) // lt
676#define AZERI_LANG_CODE (0x7A61) // az
bd5b749c 677#endif
9ce05555 678
bd5b749c 679CFIndex CFUniCharMapCaseTo(UTF32Char theChar, UTF16Char *convertedChar, CFIndex maxLength, uint32_t ctype, uint32_t flags, const uint8_t *langCode) {
9ce05555
A
680 __CFUniCharBitmapData *data;
681 uint8_t planeNo = (theChar >> 16) & 0xFF;
682
683caseFoldRetry:
684
685#if DO_SPECIAL_CASE_MAPPING
686 if (flags & kCFUniCharCaseMapFinalSigma) {
687 if (theChar == 0x03A3) { // Final sigma
688 *convertedChar = (ctype == kCFUniCharToLowercase ? 0x03C2 : 0x03A3);
689 return 1;
690 }
691 }
692
693 if (langCode) {
694 switch (*(uint16_t *)langCode) {
695 case LITHUANIAN_LANG_CODE:
696 if (theChar == 0x0307 && (flags & kCFUniCharCaseMapAfter_i)) {
697 return 0;
698 } else if (ctype == kCFUniCharToLowercase) {
699 if (flags & kCFUniCharCaseMapMoreAbove) {
700 switch (theChar) {
701 case 0x0049: // LATIN CAPITAL LETTER I
702 *(convertedChar++) = 0x0069;
703 *(convertedChar++) = 0x0307;
704 return 2;
705
706 case 0x004A: // LATIN CAPITAL LETTER J
707 *(convertedChar++) = 0x006A;
708 *(convertedChar++) = 0x0307;
709 return 2;
710
711 case 0x012E: // LATIN CAPITAL LETTER I WITH OGONEK
712 *(convertedChar++) = 0x012F;
713 *(convertedChar++) = 0x0307;
714 return 2;
715
716 default: break;
717 }
718 }
719 switch (theChar) {
720 case 0x00CC: // LATIN CAPITAL LETTER I WITH GRAVE
721 *(convertedChar++) = 0x0069;
722 *(convertedChar++) = 0x0307;
723 *(convertedChar++) = 0x0300;
724 return 3;
725
726 case 0x00CD: // LATIN CAPITAL LETTER I WITH ACUTE
727 *(convertedChar++) = 0x0069;
728 *(convertedChar++) = 0x0307;
729 *(convertedChar++) = 0x0301;
730 return 3;
731
732 case 0x0128: // LATIN CAPITAL LETTER I WITH TILDE
733 *(convertedChar++) = 0x0069;
734 *(convertedChar++) = 0x0307;
735 *(convertedChar++) = 0x0303;
736 return 3;
737
738 default: break;
739 }
740 }
741 break;
742
743 case TURKISH_LANG_CODE:
744 case AZERI_LANG_CODE:
d8925383
A
745 if ((theChar == 0x0049) || (theChar == 0x0131)) { // LATIN CAPITAL LETTER I & LATIN SMALL LETTER DOTLESS I
746 *convertedChar = (((ctype == kCFUniCharToLowercase) || (ctype == kCFUniCharCaseFold)) ? ((kCFUniCharCaseMapMoreAbove & flags) ? 0x0069 : 0x0131) : 0x0049);
9ce05555
A
747 return 1;
748 } else if ((theChar == 0x0069) || (theChar == 0x0130)) { // LATIN SMALL LETTER I & LATIN CAPITAL LETTER I WITH DOT ABOVE
d8925383 749 *convertedChar = (((ctype == kCFUniCharToLowercase) || (ctype == kCFUniCharCaseFold)) ? 0x0069 : 0x0130);
9ce05555
A
750 return 1;
751 } else if (theChar == 0x0307 && (kCFUniCharCaseMapAfter_i & flags)) { // COMBINING DOT ABOVE AFTER_i
752 if (ctype == kCFUniCharToLowercase) {
753 return 0;
754 } else {
755 *convertedChar = 0x0307;
756 return 1;
757 }
758 }
759 break;
760
761 default: break;
762 }
763 }
764#endif DO_SPECIAL_CASE_MAPPING
765
766 if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData();
767
bd5b749c 768 data = __CFUniCharBitmapDataArray + __CFUniCharMapExternalSetToInternalIndex(__CFUniCharMapCompatibilitySetID(ctype + kCFUniCharHasNonSelfLowercaseCharacterSet));
9ce05555
A
769
770 if (planeNo < data->_numPlanes && data->_planes[planeNo] && CFUniCharIsMemberOfBitmap(theChar, data->_planes[planeNo]) && (__CFUniCharCaseMappingTableCounts || __CFUniCharLoadCaseMappingTable())) {
771 uint32_t value = __CFUniCharGetMappedCase((const __CFUniCharCaseMappings *)__CFUniCharCaseMappingTable[ctype], __CFUniCharCaseMappingTableCounts[ctype], theChar);
772
773 if (!value && ctype == kCFUniCharToTitlecase) {
774 value = __CFUniCharGetMappedCase((const __CFUniCharCaseMappings *)__CFUniCharCaseMappingTable[kCFUniCharToUppercase], __CFUniCharCaseMappingTableCounts[kCFUniCharToUppercase], theChar);
775 if (value) ctype = kCFUniCharToUppercase;
776 }
777
778 if (value) {
bd5b749c 779 CFIndex count = CFUniCharConvertFlagToCount(value);
9ce05555
A
780
781 if (count == 1) {
782 if (value & kCFUniCharNonBmpFlag) {
783 if (maxLength > 1) {
784 value = (value & 0xFFFFFF) - 0x10000;
bd5b749c
A
785 *(convertedChar++) = (UTF16Char)(value >> 10) + 0xD800UL;
786 *(convertedChar++) = (UTF16Char)(value & 0x3FF) + 0xDC00UL;
9ce05555
A
787 return 2;
788 }
789 } else {
790 *convertedChar = (UTF16Char)value;
791 return 1;
792 }
bd5b749c 793 } else if (count < maxLength) {
9ce05555
A
794 const uint32_t *extraMapping = __CFUniCharCaseMappingExtraTable[ctype] + (value & 0xFFFFFF);
795
796 if (value & kCFUniCharNonBmpFlag) {
bd5b749c 797 CFIndex copiedLen = 0;
9ce05555
A
798
799 while (count-- > 0) {
800 value = *(extraMapping++);
801 if (value > 0xFFFF) {
bd5b749c 802 if (copiedLen + 2 >= maxLength) break;
9ce05555 803 value = (value & 0xFFFFFF) - 0x10000;
bd5b749c
A
804 convertedChar[copiedLen++] = (UTF16Char)(value >> 10) + 0xD800UL;
805 convertedChar[copiedLen++] = (UTF16Char)(value & 0x3FF) + 0xDC00UL;
9ce05555 806 } else {
bd5b749c 807 if (copiedLen + 1 >= maxLength) break;
9ce05555
A
808 convertedChar[copiedLen++] = value;
809 }
810 }
811 if (!count) return copiedLen;
812 } else {
bd5b749c 813 CFIndex idx;
9ce05555
A
814
815 for (idx = 0;idx < count;idx++) *(convertedChar++) = (UTF16Char)*(extraMapping++);
816 return count;
817 }
818 }
819 }
820 } else if (ctype == kCFUniCharCaseFold) {
821 ctype = kCFUniCharToLowercase;
822 goto caseFoldRetry;
823 }
824
d8925383
A
825 if (theChar > 0xFFFF) { // non-BMP
826 theChar = (theChar & 0xFFFFFF) - 0x10000;
bd5b749c
A
827 *(convertedChar++) = (UTF16Char)(theChar >> 10) + 0xD800UL;
828 *(convertedChar++) = (UTF16Char)(theChar & 0x3FF) + 0xDC00UL;
d8925383
A
829 return 2;
830 } else {
831 *convertedChar = theChar;
832 return 1;
833 }
9ce05555
A
834}
835
bd5b749c 836CFIndex CFUniCharMapTo(UniChar theChar, UniChar *convertedChar, CFIndex maxLength, uint16_t ctype, uint32_t flags) {
9ce05555
A
837 if (ctype == kCFUniCharCaseFold + 1) { // kCFUniCharDecompose
838 if (CFUniCharIsDecomposableCharacter(theChar, false)) {
839 UTF32Char buffer[MAX_DECOMPOSED_LENGTH];
840 CFIndex usedLength = CFUniCharDecomposeCharacter(theChar, buffer, MAX_DECOMPOSED_LENGTH);
841 CFIndex idx;
842
843 for (idx = 0;idx < usedLength;idx++) *(convertedChar++) = buffer[idx];
844 return usedLength;
845 } else {
846 *convertedChar = theChar;
847 return 1;
848 }
849 } else {
850 return CFUniCharMapCaseTo(theChar, convertedChar, maxLength, ctype, flags, NULL);
851 }
852}
853
bd5b749c 854CF_INLINE bool __CFUniCharIsMoreAbove(UTF16Char *buffer, CFIndex length) {
9ce05555
A
855 UTF32Char currentChar;
856 uint32_t property;
857
858 while (length-- > 0) {
859 currentChar = *(buffer)++;
860 if (CFUniCharIsSurrogateHighCharacter(currentChar) && (length > 0) && CFUniCharIsSurrogateLowCharacter(*(buffer + 1))) {
861 currentChar = CFUniCharGetLongCharacterForSurrogatePair(currentChar, *(buffer++));
862 --length;
863 }
864 if (!CFUniCharIsMemberOf(currentChar, kCFUniCharNonBaseCharacterSet)) break;
865
bd5b749c 866 property = CFUniCharGetCombiningPropertyForCharacter(currentChar, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16) & 0xFF));
9ce05555
A
867
868 if (property == 230) return true; // Above priority
869 }
870 return false;
871}
872
bd5b749c 873CF_INLINE bool __CFUniCharIsAfter_i(UTF16Char *buffer, CFIndex length) {
9ce05555
A
874 UTF32Char currentChar = 0;
875 uint32_t property;
876 UTF32Char decomposed[MAX_DECOMPOSED_LENGTH];
bd5b749c
A
877 CFIndex decompLength;
878 CFIndex idx;
9ce05555
A
879
880 if (length < 1) return 0;
881
882 buffer += length;
883 while (length-- > 1) {
884 currentChar = *(--buffer);
885 if (CFUniCharIsSurrogateLowCharacter(currentChar)) {
886 if ((length > 1) && CFUniCharIsSurrogateHighCharacter(*(buffer - 1))) {
887 currentChar = CFUniCharGetLongCharacterForSurrogatePair(*(--buffer), currentChar);
888 --length;
889 } else {
890 break;
891 }
892 }
893 if (!CFUniCharIsMemberOf(currentChar, kCFUniCharNonBaseCharacterSet)) break;
894
bd5b749c 895 property = CFUniCharGetCombiningPropertyForCharacter(currentChar, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16) & 0xFF));
9ce05555
A
896
897 if (property == 230) return false; // Above priority
898 }
899 if (length == 0) {
900 currentChar = *(--buffer);
901 } else if (CFUniCharIsSurrogateLowCharacter(currentChar) && CFUniCharIsSurrogateHighCharacter(*(--buffer))) {
902 currentChar = CFUniCharGetLongCharacterForSurrogatePair(*buffer, currentChar);
903 }
904
905 decompLength = CFUniCharDecomposeCharacter(currentChar, decomposed, MAX_DECOMPOSED_LENGTH);
906 currentChar = *decomposed;
907
908
909 for (idx = 1;idx < decompLength;idx++) {
910 currentChar = decomposed[idx];
bd5b749c 911 property = CFUniCharGetCombiningPropertyForCharacter(currentChar, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16) & 0xFF));
9ce05555
A
912
913 if (property == 230) return false; // Above priority
914 }
915 return true;
916}
917
bd5b749c 918__private_extern__ uint32_t CFUniCharGetConditionalCaseMappingFlags(UTF32Char theChar, UTF16Char *buffer, CFIndex currentIndex, CFIndex length, uint32_t type, const uint8_t *langCode, uint32_t lastFlags) {
9ce05555
A
919 if (theChar == 0x03A3) { // GREEK CAPITAL LETTER SIGMA
920 if ((type == kCFUniCharToLowercase) && (currentIndex > 0)) {
921 UTF16Char *start = buffer;
922 UTF16Char *end = buffer + length;
923 UTF32Char otherChar;
924
925 // First check if we're after a cased character
926 buffer += (currentIndex - 1);
927 while (start <= buffer) {
928 otherChar = *(buffer--);
929 if (CFUniCharIsSurrogateLowCharacter(otherChar) && (start <= buffer) && CFUniCharIsSurrogateHighCharacter(*buffer)) {
930 otherChar = CFUniCharGetLongCharacterForSurrogatePair(*(buffer--), otherChar);
931 }
932 if (!CFUniCharIsMemberOf(otherChar, kCFUniCharCaseIgnorableCharacterSet)) {
933 if (!CFUniCharIsMemberOf(otherChar, kCFUniCharUppercaseLetterCharacterSet) && !CFUniCharIsMemberOf(otherChar, kCFUniCharLowercaseLetterCharacterSet)) return 0; // Uppercase set contains titlecase
934 break;
935 }
936 }
937
938 // Next check if we're before a cased character
939 buffer = start + currentIndex + 1;
940 while (buffer < end) {
941 otherChar = *(buffer++);
942 if (CFUniCharIsSurrogateHighCharacter(otherChar) && (buffer < end) && CFUniCharIsSurrogateLowCharacter(*buffer)) {
943 otherChar = CFUniCharGetLongCharacterForSurrogatePair(otherChar, *(buffer++));
944 }
945 if (!CFUniCharIsMemberOf(otherChar, kCFUniCharCaseIgnorableCharacterSet)) {
946 if (CFUniCharIsMemberOf(otherChar, kCFUniCharUppercaseLetterCharacterSet) || CFUniCharIsMemberOf(otherChar, kCFUniCharLowercaseLetterCharacterSet)) return 0; // Uppercase set contains titlecase
947 break;
948 }
949 }
950 return kCFUniCharCaseMapFinalSigma;
951 }
952 } else if (langCode) {
953 if (*((const uint16_t *)langCode) == LITHUANIAN_LANG_CODE) {
954 if ((theChar == 0x0307) && ((kCFUniCharCaseMapAfter_i|kCFUniCharCaseMapMoreAbove) & lastFlags) == (kCFUniCharCaseMapAfter_i|kCFUniCharCaseMapMoreAbove)) {
955 return (__CFUniCharIsAfter_i(buffer, currentIndex) ? kCFUniCharCaseMapAfter_i : 0);
956 } else if (type == kCFUniCharToLowercase) {
957 if ((theChar == 0x0049) || (theChar == 0x004A) || (theChar == 0x012E)) {
958 return (__CFUniCharIsMoreAbove(buffer + (++currentIndex), length - currentIndex) ? kCFUniCharCaseMapMoreAbove : 0);
959 }
960 } else if ((theChar == 'i') || (theChar == 'j')) {
961 return (__CFUniCharIsMoreAbove(buffer + (++currentIndex), length - currentIndex) ? (kCFUniCharCaseMapAfter_i|kCFUniCharCaseMapMoreAbove) : 0);
962 }
963 } else if ((*((const uint16_t *)langCode) == TURKISH_LANG_CODE) || (*((const uint16_t *)langCode) == AZERI_LANG_CODE)) {
964 if (type == kCFUniCharToLowercase) {
965 if (theChar == 0x0307) {
966 return (kCFUniCharCaseMapMoreAbove & lastFlags ? kCFUniCharCaseMapAfter_i : 0);
967 } else if (theChar == 0x0049) {
968 return (((++currentIndex < length) && (buffer[currentIndex] == 0x0307)) ? kCFUniCharCaseMapMoreAbove : 0);
969 }
970 }
971 }
972 }
973 return 0;
974}
975
976// Unicode property database
977static __CFUniCharBitmapData *__CFUniCharUnicodePropertyTable = NULL;
d8925383 978static int __CFUniCharUnicodePropertyTableCount = 0;
9ce05555 979
bd5b749c 980static CFSpinLock_t __CFUniCharPropTableLock = CFSpinLockInit;
9ce05555 981
bd5b749c
A
982#if USE_MACHO_SEGMENT
983#define PROP_DB_FILE "__properties"
984#else
9ce05555 985#define PROP_DB_FILE "CFUniCharPropertyDatabase.data"
bd5b749c 986#endif
9ce05555
A
987
988const void *CFUniCharGetUnicodePropertyDataForPlane(uint32_t propertyType, uint32_t plane) {
989
990 __CFSpinLock(&__CFUniCharPropTableLock);
991
992 if (NULL == __CFUniCharUnicodePropertyTable) {
bd5b749c 993 __CFUniCharBitmapData *table;
9ce05555
A
994 const void *bytes;
995 const void *bodyBase;
996 const void *planeBase;
997 int headerSize;
998 int idx, count;
999 int planeIndex, planeCount;
1000 int planeSize;
1001
1002 if (!__CFUniCharLoadFile(PROP_DB_FILE, &bytes)) {
1003 __CFSpinUnlock(&__CFUniCharPropTableLock);
1004 return NULL;
1005 }
1006
bd5b749c
A
1007#if defined (__cplusplus)
1008 bytes = (uint8_t*)bytes + 4; // Skip Unicode version
1009 headerSize = CFSwapInt32BigToHost(*((uint32_t *)bytes)); bytes = (uint8_t *)bytes + sizeof(uint32_t);
1010#else
1011 bytes += 4; // Skip Unicode version
1012 headerSize = CFSwapInt32BigToHost(*((uint32_t *)bytes)); bytes += sizeof(uint32_t);
1013#endif //C++
1014
9ce05555
A
1015 headerSize -= (sizeof(uint32_t) * 2);
1016 bodyBase = (char *)bytes + headerSize;
1017
1018 count = headerSize / sizeof(uint32_t);
d8925383 1019 __CFUniCharUnicodePropertyTableCount = count;
9ce05555 1020
bd5b749c 1021 table = (__CFUniCharBitmapData *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(__CFUniCharBitmapData) * count, 0);
9ce05555
A
1022
1023 for (idx = 0;idx < count;idx++) {
1024 planeCount = *((const uint8_t *)bodyBase);
bd5b749c
A
1025 planeBase = (char *)bodyBase + planeCount + (planeCount % 4 ? 4 - (planeCount % 4) : 0);
1026 table[idx]._planes = (const uint8_t **)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(const void *) * planeCount, 0);
9ce05555
A
1027
1028 for (planeIndex = 0;planeIndex < planeCount;planeIndex++) {
1029 if ((planeSize = ((const uint8_t *)bodyBase)[planeIndex + 1])) {
bd5b749c
A
1030 table[idx]._planes[planeIndex] = (const uint8_t *)planeBase;
1031#if defined (__cplusplus)
1032 planeBase = (char*)planeBase + (planeSize * 256);
1033#else
1034 planeBase += (planeSize * 256);
1035#endif //C++
9ce05555 1036 } else {
bd5b749c 1037 table[idx]._planes[planeIndex] = NULL;
9ce05555
A
1038 }
1039 }
1040
bd5b749c
A
1041 table[idx]._numPlanes = planeCount;
1042#if defined (__cplusplus)
1043 bodyBase = (const uint8_t *)bodyBase + (CFSwapInt32BigToHost(*(uint32_t *)bytes));
1044 ((uint32_t *&)bytes) ++;
1045#else
1046 bodyBase += (CFSwapInt32BigToHost(*((uint32_t *)bytes++)));
1047#endif //C++
9ce05555 1048 }
bd5b749c
A
1049
1050 __CFUniCharUnicodePropertyTable = table;
9ce05555
A
1051 }
1052
1053 __CFSpinUnlock(&__CFUniCharPropTableLock);
1054
1055 return (plane < __CFUniCharUnicodePropertyTable[propertyType]._numPlanes ? __CFUniCharUnicodePropertyTable[propertyType]._planes[plane] : NULL);
1056}
1057
1058__private_extern__ uint32_t CFUniCharGetNumberOfPlanesForUnicodePropertyData(uint32_t propertyType) {
1059 (void)CFUniCharGetUnicodePropertyDataForPlane(propertyType, 0);
1060 return __CFUniCharUnicodePropertyTable[propertyType]._numPlanes;
1061}
1062
1063__private_extern__ uint32_t CFUniCharGetUnicodeProperty(UTF32Char character, uint32_t propertyType) {
1064 if (propertyType == kCFUniCharCombiningProperty) {
bd5b749c 1065 return CFUniCharGetCombiningPropertyForCharacter(character, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(propertyType, (character >> 16) & 0xFF));
9ce05555 1066 } else if (propertyType == kCFUniCharBidiProperty) {
bd5b749c 1067 return CFUniCharGetBidiPropertyForCharacter(character, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(propertyType, (character >> 16) & 0xFF));
9ce05555
A
1068 } else {
1069 return 0;
1070 }
1071}
1072
1073
1074
1075/*
1076 The UTF8 conversion in the following function is derived from ConvertUTF.c
1077*/
1078/*
1079 * Copyright 2001 Unicode, Inc.
1080 *
1081 * Disclaimer
1082 *
1083 * This source code is provided as is by Unicode, Inc. No claims are
1084 * made as to fitness for any particular purpose. No warranties of any
1085 * kind are expressed or implied. The recipient agrees to determine
1086 * applicability of information provided. If this file has been
1087 * purchased on magnetic or optical media from Unicode, Inc., the
1088 * sole remedy for any claim will be exchange of defective media
1089 * within 90 days of receipt.
1090 *
1091 * Limitations on Rights to Redistribute This Code
1092 *
1093 * Unicode, Inc. hereby grants the right to freely use the information
1094 * supplied in this file in the creation of products supporting the
1095 * Unicode Standard, and to make copies of this file in any form
1096 * for internal or external distribution as long as this notice
1097 * remains attached.
1098 */
1099#define UNI_REPLACEMENT_CHAR (0x0000FFFDUL)
1100
bd5b749c 1101bool CFUniCharFillDestinationBuffer(const UTF32Char *src, CFIndex srcLength, void **dst, CFIndex dstLength, CFIndex *filledLength, uint32_t dstFormat) {
9ce05555 1102 UTF32Char currentChar;
bd5b749c 1103 CFIndex usedLength = *filledLength;
9ce05555
A
1104
1105 if (dstFormat == kCFUniCharUTF16Format) {
1106 UTF16Char *dstBuffer = (UTF16Char *)*dst;
1107
1108 while (srcLength-- > 0) {
1109 currentChar = *(src++);
1110
1111 if (currentChar > 0xFFFF) { // Non-BMP
1112 usedLength += 2;
1113 if (dstLength) {
1114 if (usedLength > dstLength) return false;
1115 currentChar -= 0x10000;
1116 *(dstBuffer++) = (UTF16Char)((currentChar >> 10) + 0xD800UL);
1117 *(dstBuffer++) = (UTF16Char)((currentChar & 0x3FF) + 0xDC00UL);
1118 }
1119 } else {
1120 ++usedLength;
1121 if (dstLength) {
1122 if (usedLength > dstLength) return false;
1123 *(dstBuffer++) = (UTF16Char)currentChar;
1124 }
1125 }
1126 }
1127
1128 *dst = dstBuffer;
1129 } else if (dstFormat == kCFUniCharUTF8Format) {
1130 uint8_t *dstBuffer = (uint8_t *)*dst;
1131 uint16_t bytesToWrite = 0;
1132 const UTF32Char byteMask = 0xBF;
1133 const UTF32Char byteMark = 0x80;
1134 static const uint8_t firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
1135
1136 while (srcLength-- > 0) {
1137 currentChar = *(src++);
1138
1139 /* Figure out how many bytes the result will require */
1140 if (currentChar < (UTF32Char)0x80) {
1141 bytesToWrite = 1;
1142 } else if (currentChar < (UTF32Char)0x800) {
1143 bytesToWrite = 2;
1144 } else if (currentChar < (UTF32Char)0x10000) {
1145 bytesToWrite = 3;
1146 } else if (currentChar < (UTF32Char)0x200000) {
1147 bytesToWrite = 4;
1148 } else {
1149 bytesToWrite = 2;
1150 currentChar = UNI_REPLACEMENT_CHAR;
1151 }
1152
1153 usedLength += bytesToWrite;
1154
1155 if (dstLength) {
1156 if (usedLength > dstLength) return false;
1157
1158 dstBuffer += bytesToWrite;
1159 switch (bytesToWrite) { /* note: everything falls through. */
1160 case 4: *--dstBuffer = (currentChar | byteMark) & byteMask; currentChar >>= 6;
1161 case 3: *--dstBuffer = (currentChar | byteMark) & byteMask; currentChar >>= 6;
1162 case 2: *--dstBuffer = (currentChar | byteMark) & byteMask; currentChar >>= 6;
1163 case 1: *--dstBuffer = currentChar | firstByteMark[bytesToWrite];
1164 }
1165 dstBuffer += bytesToWrite;
1166 }
1167 }
1168
1169 *dst = dstBuffer;
1170 } else {
1171 UTF32Char *dstBuffer = (UTF32Char *)*dst;
1172
1173 while (srcLength-- > 0) {
1174 currentChar = *(src++);
1175
1176 ++usedLength;
1177 if (dstLength) {
1178 if (usedLength > dstLength) return false;
1179 *(dstBuffer++) = currentChar;
1180 }
1181 }
1182
1183 *dst = dstBuffer;
1184 }
1185
1186 *filledLength = usedLength;
1187
1188 return true;
1189}
d8925383 1190
bd5b749c 1191#if 0 || 0
d8925383
A
1192void __CFUniCharCleanup(void)
1193{
1194 int idx;
1195
1196 // cleanup memory allocated by __CFUniCharLoadBitmapData()
1197 __CFSpinLock(&__CFUniCharBitmapLock);
1198
1199 if (__CFUniCharBitmapDataArray != NULL) {
bd5b749c
A
1200 for (idx = 0; idx < (int)__CFUniCharNumberOfBitmaps; idx++) {
1201 CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharBitmapDataArray[idx]._planes);
d8925383
A
1202 __CFUniCharBitmapDataArray[idx]._planes = NULL;
1203 }
1204
bd5b749c 1205 CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharBitmapDataArray);
d8925383
A
1206 __CFUniCharBitmapDataArray = NULL;
1207 __CFUniCharNumberOfBitmaps = 0;
1208 }
1209
1210 __CFSpinUnlock(&__CFUniCharBitmapLock);
1211
1212 // cleanup memory allocated by CFUniCharGetMappingData()
1213 __CFSpinLock(&__CFUniCharMappingTableLock);
1214
1215 if (__CFUniCharMappingTables != NULL) {
bd5b749c 1216 CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharMappingTables);
d8925383
A
1217 __CFUniCharMappingTables = NULL;
1218 }
1219
1220 // cleanup memory allocated by __CFUniCharLoadCaseMappingTable()
1221 if (__CFUniCharCaseMappingTableCounts != NULL) {
bd5b749c 1222 CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharCaseMappingTableCounts);
d8925383
A
1223 __CFUniCharCaseMappingTableCounts = NULL;
1224
1225 __CFUniCharCaseMappingTable = NULL;
1226 __CFUniCharCaseMappingExtraTable = NULL;
1227 }
1228
1229 __CFSpinUnlock(&__CFUniCharMappingTableLock);
1230
1231 // cleanup memory allocated by CFUniCharGetUnicodePropertyDataForPlane()
1232 __CFSpinLock(&__CFUniCharPropTableLock);
1233
1234 if (__CFUniCharUnicodePropertyTable != NULL) {
1235 for (idx = 0; idx < __CFUniCharUnicodePropertyTableCount; idx++) {
bd5b749c 1236 CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharUnicodePropertyTable[idx]._planes);
d8925383
A
1237 __CFUniCharUnicodePropertyTable[idx]._planes = NULL;
1238 }
1239
bd5b749c 1240 CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharUnicodePropertyTable);
d8925383
A
1241 __CFUniCharUnicodePropertyTable = NULL;
1242 __CFUniCharUnicodePropertyTableCount = 0;
1243 }
1244
1245 __CFSpinUnlock(&__CFUniCharPropTableLock);
1246}
1247#endif // __WIN32__
1248
bd5b749c
A
1249#undef USE_MACHO_SEGMENT
1250