2 * Copyright (c) 2008 Apple Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
24 Copyright 2001-2002, Apple, Inc. All rights reserved.
25 Responsibility: Aki Inoue
28 #include <CoreFoundation/CFByteOrder.h>
29 #include "CFInternal.h"
30 #include "CFBundle_Internal.h"
31 #include "CFUniChar.h"
32 #include "CFStringEncodingConverterExt.h"
33 #include "CFUnicodeDecomposition.h"
34 #include "CFUniCharPriv.h"
35 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
37 #include <sys/types.h>
39 #include <sys/param.h>
44 #if DEPLOYMENT_TARGET_MACOSX
45 #include <mach/mach.h>
48 #if DEPLOYMENT_TARGET_MACOSX
49 #define __kCFCharacterSetDir "/System/Library/CoreServices"
50 #elif DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
51 #define __kCFCharacterSetDir "/usr/local/share/CoreFoundation"
52 #elif defined(__WIN32__)
53 #define __kCFCharacterSetDir "\\Windows\\CoreFoundation"
56 #if DEPLOYMENT_TARGET_MACOSX
57 #define USE_MACHO_SEGMENT 1
61 kCFUniCharLastExternalSet
= kCFUniCharNewlineCharacterSet
,
62 kCFUniCharFirstInternalSet
= kCFUniCharCompatibilityDecomposableCharacterSet
,
63 kCFUniCharLastInternalSet
= kCFUniCharGraphemeExtendCharacterSet
,
64 kCFUniCharFirstBitmapSet
= kCFUniCharDecimalDigitCharacterSet
67 CF_INLINE
uint32_t __CFUniCharMapExternalSetToInternalIndex(uint32_t cset
) { return ((kCFUniCharFirstInternalSet
<= cset
) ? ((cset
- kCFUniCharFirstInternalSet
) + kCFUniCharLastExternalSet
) : cset
) - kCFUniCharFirstBitmapSet
; }
68 CF_INLINE
uint32_t __CFUniCharMapCompatibilitySetID(uint32_t cset
) { return ((cset
== kCFUniCharControlCharacterSet
) ? kCFUniCharControlAndFormatterCharacterSet
: (((cset
> kCFUniCharLastExternalSet
) && (cset
< kCFUniCharFirstInternalSet
)) ? ((cset
- kCFUniCharLastExternalSet
) + kCFUniCharFirstInternalSet
) : cset
)); }
70 #if (DEPLOYMENT_TARGET_MACOSX) && USE_MACHO_SEGMENT
72 #include <mach-o/getsect.h>
73 #include <mach-o/dyld.h>
74 #include <mach-o/ldsyms.h>
76 static const void *__CFGetSectDataPtr(const char *segname
, const char *sectname
, uint64_t *sizep
) {
77 uint32_t idx
, cnt
= _dyld_image_count();
78 for (idx
= 0; idx
< cnt
; idx
++) {
79 void *mh
= (void *)_dyld_get_image_header(idx
);
80 if (mh
!= &_mh_dylib_header
) continue;
82 const struct section_64
*sect
= getsectbynamefromheader_64((struct mach_header_64
*)mh
, segname
, sectname
);
84 const struct section
*sect
= getsectbynamefromheader((struct mach_header
*)mh
, segname
, sectname
);
87 if (sizep
) *sizep
= (uint64_t)sect
->size
;
88 return (char *)sect
->addr
+ _dyld_get_image_vmaddr_slide(idx
);
90 if (sizep
) *sizep
= 0ULL;
96 #if !USE_MACHO_SEGMENT
98 // Memory map the file
100 CF_INLINE
void __CFUniCharCharacterSetPath(char *cpath
) {
101 #if DEPLOYMENT_TARGET_MACOSX
102 strlcpy(cpath
, __kCFCharacterSetDir
, MAXPATHLEN
);
104 strlcpy(cpath
, __kCFCharacterSetDir
, MAXPATHLEN
);
109 strncat(cpath
, "\\Resources\\", MAXPATHLEN
- strlen(cpath
));
111 strncat(cpath
, "\\CoreFoundation.resources\\CharacterSets\\", MAXPATHLEN
- strlen(cpath
));
114 strlcat(cpath
, "/CharacterSets/", MAXPATHLEN
);
118 #if defined (__WIN32__)
119 #define MAX_BITMAP_STATE 512
121 // If a string is placed into this array, then it has been previously
122 // determined that the bitmap-file cannot be found. Thus, we make
123 // the assumption it won't be there in future calls and we avoid
124 // hitting the disk un-necessarily. This assumption isn't 100%
125 // correct, as bitmap-files can be added. We would have to re-start
126 // the application in order to pick-up the new bitmap info.
128 // We should probably re-visit this.
130 static char *mappedBitmapState
[MAX_BITMAP_STATE
];
131 static int __nNumStateEntries
= -1;
132 CRITICAL_SECTION __bitmapStateLock
= {0};
134 bool __GetBitmapStateForName(char *bitmapName
) {
135 if (NULL
== __bitmapStateLock
.DebugInfo
)
136 InitializeCriticalSection(&__bitmapStateLock
);
137 EnterCriticalSection(&__bitmapStateLock
);
138 if (__nNumStateEntries
>= 0) {
139 for (int i
= 0; i
< __nNumStateEntries
; i
++) {
140 if (strcmp(mappedBitmapState
[i
], bitmapName
) == 0) {
141 LeaveCriticalSection(&__bitmapStateLock
);
146 LeaveCriticalSection(&__bitmapStateLock
);
149 void __AddBitmapStateForName(char *bitmapName
) {
150 if (NULL
== __bitmapStateLock
.DebugInfo
)
151 InitializeCriticalSection(&__bitmapStateLock
);
152 EnterCriticalSection(&__bitmapStateLock
);
153 __nNumStateEntries
++;
154 mappedBitmapState
[__nNumStateEntries
] = (char *)malloc((strlen(bitmapName
)+1) * sizeof(char));
155 strcpy(mappedBitmapState
[__nNumStateEntries
], bitmapName
);
156 LeaveCriticalSection(&__bitmapStateLock
);
160 static bool __CFUniCharLoadBytesFromFile(const char *fileName
, const void **bytes
) {
162 HANDLE bitmapFileHandle
= NULL
;
163 HANDLE mappingHandle
= NULL
;
165 if (__GetBitmapStateForName((char *)fileName
)) {
166 // The fileName has been tried in the past, so just return false
171 mappingHandle
= OpenFileMappingA(FILE_MAP_READ
, TRUE
, fileName
);
172 if (NULL
== mappingHandle
) {
173 if ((bitmapFileHandle
= CreateFileA(fileName
, GENERIC_READ
, FILE_SHARE_READ
, NULL
, OPEN_EXISTING
, FILE_ATTRIBUTE_NORMAL
, NULL
)) == INVALID_HANDLE_VALUE
) {
174 // We tried to get the bitmap file for mapping, but it's not there. Add to list of non-existant bitmap-files so
175 // we don't have to try this again in the future.
176 __AddBitmapStateForName((char *)fileName
);
179 mappingHandle
= CreateFileMappingA(bitmapFileHandle
, NULL
, PAGE_READONLY
, 0, 0, NULL
);
180 CloseHandle(bitmapFileHandle
);
181 if (!mappingHandle
) return false;
183 *bytes
= MapViewOfFileEx(mappingHandle
, FILE_MAP_READ
, 0, 0, 0, 0);
184 CloseHandle(mappingHandle
);
186 *bytes
= MapViewOfFileEx(mappingHandle
, FILE_MAP_READ
, 0, 0, 0, 0);
187 CloseHandle(mappingHandle
);
190 return (*bytes
? true : false);
195 int no_hang_fd
= open("/dev/autofs_nowait", 0);
196 if ((fd
= open(fileName
, O_RDONLY
, 0)) < 0) {
200 if (fstat(fd
, &statBuf
) < 0 || (*bytes
= mmap(0, statBuf
.st_size
, PROT_READ
, MAP_PRIVATE
, fd
, 0)) == (void *)-1) {
211 #endif // USE_MACHO_SEGMENT
213 static bool __CFUniCharLoadFile(const char *bitmapName
, const void **bytes
) {
214 #if USE_MACHO_SEGMENT
215 *bytes
= __CFGetSectDataPtr("__UNICODE", bitmapName
, NULL
);
216 return *bytes
? true : false;
218 char cpath
[MAXPATHLEN
];
219 __CFUniCharCharacterSetPath(cpath
);
220 strlcat(cpath
, bitmapName
, MAXPATHLEN
);
221 return __CFUniCharLoadBytesFromFile(cpath
, bytes
);
226 CF_INLINE
bool isControl(UTF32Char theChar
, uint16_t charset
, const void *data
) { // ISO Control
227 return (((theChar
<= 0x001F) || (theChar
>= 0x007F && theChar
<= 0x009F)) ? true : false);
230 CF_INLINE
bool isWhitespace(UTF32Char theChar
, uint16_t charset
, const void *data
) { // Space
231 return (((theChar
== 0x0020) || (theChar
== 0x0009) || (theChar
== 0x00A0) || (theChar
== 0x1680) || (theChar
>= 0x2000 && theChar
<= 0x200B) || (theChar
== 0x202F) || (theChar
== 0x205F) || (theChar
== 0x3000)) ? true : false);
234 CF_INLINE
bool isNewline(UTF32Char theChar
, uint16_t charset
, const void *data
) { // White space
235 return (((theChar
>= 0x000A && theChar
<= 0x000D) || (theChar
== 0x0085) || (theChar
== 0x2028) || (theChar
== 0x2029)) ? true : false);
238 CF_INLINE
bool isWhitespaceAndNewline(UTF32Char theChar
, uint16_t charset
, const void *data
) { // White space
239 return ((isWhitespace(theChar
, charset
, data
) || isNewline(theChar
, charset
, data
)) ? true : false);
244 const uint8_t **_planes
;
245 } __CFUniCharBitmapData
;
247 static char __CFUniCharUnicodeVersionString
[8] = {0, 0, 0, 0, 0, 0, 0, 0};
249 static uint32_t __CFUniCharNumberOfBitmaps
= 0;
250 static __CFUniCharBitmapData
*__CFUniCharBitmapDataArray
= NULL
;
252 static CFSpinLock_t __CFUniCharBitmapLock
= CFSpinLockInit
;
254 #if !defined(CF_UNICHAR_BITMAP_FILE)
255 #if USE_MACHO_SEGMENT
256 #define CF_UNICHAR_BITMAP_FILE "__csbitmaps"
258 #define CF_UNICHAR_BITMAP_FILE "CFCharacterSetBitmaps.bitmap"
262 static bool __CFUniCharLoadBitmapData(void) {
263 __CFUniCharBitmapData
*array
;
267 uint8_t currentPlane
;
269 const void *bitmapBase
;
271 int idx
, bitmapIndex
;
273 __CFSpinLock(&__CFUniCharBitmapLock
);
275 if (__CFUniCharBitmapDataArray
|| !__CFUniCharLoadFile(CF_UNICHAR_BITMAP_FILE
, &bytes
)) {
276 __CFSpinUnlock(&__CFUniCharBitmapLock
);
280 for (idx
= 0;idx
< 4 && ((const uint8_t *)bytes
)[idx
];idx
++) {
281 __CFUniCharUnicodeVersionString
[idx
* 2] = ((const uint8_t *)bytes
)[idx
];
282 __CFUniCharUnicodeVersionString
[idx
* 2 + 1] = '.';
284 __CFUniCharUnicodeVersionString
[(idx
< 4 ? idx
* 2 - 1 : 7)] = '\0';
286 headerSize
= CFSwapInt32BigToHost(*((uint32_t *)((char *)bytes
+ 4)));
288 bitmapBase
= (uint8_t *)bytes
+ headerSize
;
289 bytes
= (uint8_t *)bytes
+ (sizeof(uint32_t) * 2);
290 headerSize
-= (sizeof(uint32_t) * 2);
292 __CFUniCharNumberOfBitmaps
= headerSize
/ (sizeof(uint32_t) * 2);
294 array
= (__CFUniCharBitmapData
*)CFAllocatorAllocate(kCFAllocatorSystemDefault
, sizeof(__CFUniCharBitmapData
) * __CFUniCharNumberOfBitmaps
, 0);
296 for (idx
= 0;idx
< (int)__CFUniCharNumberOfBitmaps
;idx
++) {
297 bitmap
= (uint8_t *)bitmapBase
+ CFSwapInt32BigToHost(*((uint32_t *)bytes
)); bytes
= (uint8_t *)bytes
+ sizeof(uint32_t);
298 bitmapSize
= CFSwapInt32BigToHost(*((uint32_t *)bytes
)); bytes
= (uint8_t *)bytes
+ sizeof(uint32_t);
300 numPlanes
= bitmapSize
/ (8 * 1024);
301 numPlanes
= *(const uint8_t *)((char *)bitmap
+ (((numPlanes
- 1) * ((8 * 1024) + 1)) - 1)) + 1;
302 array
[idx
]._planes
= (const uint8_t **)CFAllocatorAllocate(kCFAllocatorSystemDefault
, sizeof(const void *) * numPlanes
, 0);
303 array
[idx
]._numPlanes
= numPlanes
;
306 for (bitmapIndex
= 0;bitmapIndex
< numPlanes
;bitmapIndex
++) {
307 if (bitmapIndex
== currentPlane
) {
308 array
[idx
]._planes
[bitmapIndex
] = (const uint8_t *)bitmap
;
309 bitmap
= (uint8_t *)bitmap
+ (8 * 1024);
310 #if defined (__cplusplus)
311 currentPlane
= *(((const uint8_t*&)bitmap
)++);
313 currentPlane
= *((const uint8_t *)bitmap
++);
317 array
[idx
]._planes
[bitmapIndex
] = NULL
;
322 __CFUniCharBitmapDataArray
= array
;
324 __CFSpinUnlock(&__CFUniCharBitmapLock
);
329 __private_extern__
const char *__CFUniCharGetUnicodeVersionString(void) {
330 if (NULL
== __CFUniCharBitmapDataArray
) __CFUniCharLoadBitmapData();
331 return __CFUniCharUnicodeVersionString
;
334 bool CFUniCharIsMemberOf(UTF32Char theChar
, uint32_t charset
) {
335 charset
= __CFUniCharMapCompatibilitySetID(charset
);
338 case kCFUniCharWhitespaceCharacterSet
:
339 return isWhitespace(theChar
, charset
, NULL
);
341 case kCFUniCharWhitespaceAndNewlineCharacterSet
:
342 return isWhitespaceAndNewline(theChar
, charset
, NULL
);
344 case kCFUniCharNewlineCharacterSet
:
345 return isNewline(theChar
, charset
, NULL
);
348 uint32_t tableIndex
= __CFUniCharMapExternalSetToInternalIndex(charset
);
350 if (NULL
== __CFUniCharBitmapDataArray
) __CFUniCharLoadBitmapData();
352 if (tableIndex
< __CFUniCharNumberOfBitmaps
) {
353 __CFUniCharBitmapData
*data
= __CFUniCharBitmapDataArray
+ tableIndex
;
354 uint8_t planeNo
= (theChar
>> 16) & 0xFF;
356 // The bitmap data for kCFUniCharIllegalCharacterSet is actually LEGAL set less Plane 14 ~ 16
357 if (charset
== kCFUniCharIllegalCharacterSet
) {
358 if (planeNo
== 0x0E) { // Plane 14
360 return (((theChar
== 0x01) || ((theChar
> 0x1F) && (theChar
< 0x80))) ? false : true);
361 } else if (planeNo
== 0x0F || planeNo
== 0x10) { // Plane 15 & 16
362 return ((theChar
& 0xFF) > 0xFFFD ? true : false);
364 return (planeNo
< data
->_numPlanes
&& data
->_planes
[planeNo
] ? !CFUniCharIsMemberOfBitmap(theChar
, data
->_planes
[planeNo
]) : true);
366 } else if (charset
== kCFUniCharControlAndFormatterCharacterSet
) {
367 if (planeNo
== 0x0E) { // Plane 14
369 return (((theChar
== 0x01) || ((theChar
> 0x1F) && (theChar
< 0x80))) ? true : false);
371 return (planeNo
< data
->_numPlanes
&& data
->_planes
[planeNo
] ? CFUniCharIsMemberOfBitmap(theChar
, data
->_planes
[planeNo
]) : false);
374 return (planeNo
< data
->_numPlanes
&& data
->_planes
[planeNo
] ? CFUniCharIsMemberOfBitmap(theChar
, data
->_planes
[planeNo
]) : false);
382 const uint8_t *CFUniCharGetBitmapPtrForPlane(uint32_t charset
, uint32_t plane
) {
383 if (NULL
== __CFUniCharBitmapDataArray
) __CFUniCharLoadBitmapData();
385 charset
= __CFUniCharMapCompatibilitySetID(charset
);
387 if ((charset
> kCFUniCharWhitespaceAndNewlineCharacterSet
) && (charset
!= kCFUniCharIllegalCharacterSet
) && (charset
!= kCFUniCharNewlineCharacterSet
)) {
388 uint32_t tableIndex
= __CFUniCharMapExternalSetToInternalIndex(charset
);
390 if (tableIndex
< __CFUniCharNumberOfBitmaps
) {
391 __CFUniCharBitmapData
*data
= __CFUniCharBitmapDataArray
+ tableIndex
;
393 return (plane
< data
->_numPlanes
? data
->_planes
[plane
] : NULL
);
399 __private_extern__
uint8_t CFUniCharGetBitmapForPlane(uint32_t charset
, uint32_t plane
, void *bitmap
, bool isInverted
) {
400 const uint8_t *src
= CFUniCharGetBitmapPtrForPlane(charset
, plane
);
401 int numBytes
= (8 * 1024);
405 #if defined (__cplusplus)
406 while (numBytes
-- > 0) *(((uint8_t *&)bitmap
)++) = ~(*(src
++));
408 while (numBytes
-- > 0) *((uint8_t *)bitmap
++) = ~(*(src
++));
411 #if defined (__cplusplus)
412 while (numBytes
-- > 0) *(((uint8_t *&)bitmap
)++) = *(src
++);
414 while (numBytes
-- > 0) *((uint8_t *)bitmap
++) = *(src
++);
417 return kCFUniCharBitmapFilled
;
418 } else if (charset
== kCFUniCharIllegalCharacterSet
) {
419 __CFUniCharBitmapData
*data
= __CFUniCharBitmapDataArray
+ __CFUniCharMapExternalSetToInternalIndex(__CFUniCharMapCompatibilitySetID(charset
));
421 if (plane
< data
->_numPlanes
&& (src
= data
->_planes
[plane
])) {
423 #if defined (__cplusplus)
424 while (numBytes
-- > 0) *(((uint8_t *&)bitmap
)++) = *(src
++);
426 while (numBytes
-- > 0) *((uint8_t *)bitmap
++) = *(src
++);
429 #if defined (__cplusplus)
430 while (numBytes
-- > 0) *(((uint8_t *&)bitmap
)++) = ~(*(src
++));
432 while (numBytes
-- > 0) *((uint8_t *)bitmap
++) = ~(*(src
++));
435 return kCFUniCharBitmapFilled
;
436 } else if (plane
== 0x0E) { // Plane 14
438 uint8_t asciiRange
= (isInverted
? (uint8_t)0xFF : (uint8_t)0);
439 uint8_t otherRange
= (isInverted
? (uint8_t)0 : (uint8_t)0xFF);
441 #if defined (__cplusplus)
442 *(((uint8_t *&)bitmap
)++) = 0x02; // UE0001 LANGUAGE TAG
444 *((uint8_t *)bitmap
++) = 0x02; // UE0001 LANGUAGE TAG
446 for (idx
= 1;idx
< numBytes
;idx
++) {
447 #if defined (__cplusplus)
448 *(((uint8_t *&)bitmap
)++) = ((idx
>= (0x20 / 8) && (idx
< (0x80 / 8))) ? asciiRange
: otherRange
);
450 *((uint8_t *)bitmap
++) = ((idx
>= (0x20 / 8) && (idx
< (0x80 / 8))) ? asciiRange
: otherRange
);
453 return kCFUniCharBitmapFilled
;
454 } else if (plane
== 0x0F || plane
== 0x10) { // Plane 15 & 16
455 uint32_t value
= (isInverted
? ~0 : 0);
456 numBytes
/= 4; // for 32bit
458 while (numBytes
-- > 0) {
459 *((uint32_t *)bitmap
) = value
;
460 #if defined (__cplusplus)
461 bitmap
= (uint8_t *)bitmap
+ sizeof(uint32_t);
463 bitmap
+= sizeof(uint32_t);
466 *(((uint8_t *)bitmap
) - 5) = (isInverted
? 0x3F : 0xC0); // 0xFFFE & 0xFFFF
467 return kCFUniCharBitmapFilled
;
469 return (isInverted
? kCFUniCharBitmapEmpty
: kCFUniCharBitmapAll
);
470 } else if ((charset
< kCFUniCharDecimalDigitCharacterSet
) || (charset
== kCFUniCharNewlineCharacterSet
)) {
471 if (plane
) return (isInverted
? kCFUniCharBitmapAll
: kCFUniCharBitmapEmpty
);
473 uint8_t *bitmapBase
= (uint8_t *)bitmap
;
475 uint8_t nonFillValue
= (isInverted
? (uint8_t)0xFF : (uint8_t)0);
477 #if defined (__cplusplus)
478 while (numBytes
-- > 0) *(((uint8_t *&)bitmap
)++) = nonFillValue
;
480 while (numBytes
-- > 0) *((uint8_t *)bitmap
++) = nonFillValue
;
483 if ((charset
== kCFUniCharWhitespaceAndNewlineCharacterSet
) || (charset
== kCFUniCharNewlineCharacterSet
)) {
484 const UniChar newlines
[] = {0x000A, 0x000B, 0x000C, 0x000D, 0x0085, 0x2028, 0x2029};
486 for (idx
= 0;idx
< (int)(sizeof(newlines
) / sizeof(*newlines
)); idx
++) {
488 CFUniCharRemoveCharacterFromBitmap(newlines
[idx
], bitmapBase
);
490 CFUniCharAddCharacterToBitmap(newlines
[idx
], bitmapBase
);
494 if (charset
== kCFUniCharNewlineCharacterSet
) return kCFUniCharBitmapFilled
;
498 CFUniCharRemoveCharacterFromBitmap(0x0009, bitmapBase
);
499 CFUniCharRemoveCharacterFromBitmap(0x0020, bitmapBase
);
500 CFUniCharRemoveCharacterFromBitmap(0x00A0, bitmapBase
);
501 CFUniCharRemoveCharacterFromBitmap(0x1680, bitmapBase
);
502 CFUniCharRemoveCharacterFromBitmap(0x202F, bitmapBase
);
503 CFUniCharRemoveCharacterFromBitmap(0x205F, bitmapBase
);
504 CFUniCharRemoveCharacterFromBitmap(0x3000, bitmapBase
);
506 CFUniCharAddCharacterToBitmap(0x0009, bitmapBase
);
507 CFUniCharAddCharacterToBitmap(0x0020, bitmapBase
);
508 CFUniCharAddCharacterToBitmap(0x00A0, bitmapBase
);
509 CFUniCharAddCharacterToBitmap(0x1680, bitmapBase
);
510 CFUniCharAddCharacterToBitmap(0x202F, bitmapBase
);
511 CFUniCharAddCharacterToBitmap(0x205F, bitmapBase
);
512 CFUniCharAddCharacterToBitmap(0x3000, bitmapBase
);
515 for (idx
= 0x2000;idx
<= 0x200B;idx
++) {
517 CFUniCharRemoveCharacterFromBitmap(idx
, bitmapBase
);
519 CFUniCharAddCharacterToBitmap(idx
, bitmapBase
);
522 return kCFUniCharBitmapFilled
;
524 return (isInverted
? kCFUniCharBitmapAll
: kCFUniCharBitmapEmpty
);
527 __private_extern__
uint32_t CFUniCharGetNumberOfPlanes(uint32_t charset
) {
528 if ((charset
== kCFUniCharControlCharacterSet
) || (charset
== kCFUniCharControlAndFormatterCharacterSet
)) {
529 return 15; // 0 to 14
530 } else if (charset
< kCFUniCharDecimalDigitCharacterSet
) {
532 } else if (charset
== kCFUniCharIllegalCharacterSet
) {
537 if (NULL
== __CFUniCharBitmapDataArray
) __CFUniCharLoadBitmapData();
539 numPlanes
= __CFUniCharBitmapDataArray
[__CFUniCharMapExternalSetToInternalIndex(__CFUniCharMapCompatibilitySetID(charset
))]._numPlanes
;
545 // Mapping data loading
546 static const void **__CFUniCharMappingTables
= NULL
;
548 static CFSpinLock_t __CFUniCharMappingTableLock
= CFSpinLockInit
;
550 #if __CF_BIG_ENDIAN__
551 #if USE_MACHO_SEGMENT
552 #define MAPPING_TABLE_FILE "__data"
554 #define MAPPING_TABLE_FILE "CFUnicodeData-B.mapping"
557 #if USE_MACHO_SEGMENT
558 #define MAPPING_TABLE_FILE "__data"
560 #define MAPPING_TABLE_FILE "CFUnicodeData-L.mapping"
564 __private_extern__
const void *CFUniCharGetMappingData(uint32_t type
) {
566 __CFSpinLock(&__CFUniCharMappingTableLock
);
568 if (NULL
== __CFUniCharMappingTables
) {
570 const void *bodyBase
;
574 if (!__CFUniCharLoadFile(MAPPING_TABLE_FILE
, &bytes
)) {
575 __CFSpinUnlock(&__CFUniCharMappingTableLock
);
579 #if defined (__cplusplus)
580 bytes
= (uint8_t *)bytes
+ 4; // Skip Unicode version
581 headerSize
= *((uint8_t *)bytes
); bytes
= (uint8_t *)bytes
+ sizeof(uint32_t);
583 bytes
+= 4; // Skip Unicode version
584 headerSize
= *((uint32_t *)bytes
); bytes
+= sizeof(uint32_t);
586 headerSize
-= (sizeof(uint32_t) * 2);
587 bodyBase
= (char *)bytes
+ headerSize
;
589 count
= headerSize
/ sizeof(uint32_t);
591 __CFUniCharMappingTables
= (const void **)CFAllocatorAllocate(kCFAllocatorSystemDefault
, sizeof(const void *) * count
, 0);
593 for (idx
= 0;idx
< count
;idx
++) {
594 #if defined (__cplusplus)
595 __CFUniCharMappingTables
[idx
] = (char *)bodyBase
+ *((uint32_t *)bytes
); bytes
= (uint8_t *)bytes
+ sizeof(uint32_t);
597 __CFUniCharMappingTables
[idx
] = (char *)bodyBase
+ *((uint32_t *)bytes
); bytes
+= sizeof(uint32_t);
602 __CFSpinUnlock(&__CFUniCharMappingTableLock
);
604 return __CFUniCharMappingTables
[type
];
607 // Case mapping functions
608 #define DO_SPECIAL_CASE_MAPPING 1
610 static uint32_t *__CFUniCharCaseMappingTableCounts
= NULL
;
611 static uint32_t **__CFUniCharCaseMappingTable
= NULL
;
612 static const uint32_t **__CFUniCharCaseMappingExtraTable
= NULL
;
617 } __CFUniCharCaseMappings
;
619 /* Binary searches CFStringEncodingUnicodeTo8BitCharMap */
620 static uint32_t __CFUniCharGetMappedCase(const __CFUniCharCaseMappings
*theTable
, uint32_t numElem
, UTF32Char character
) {
621 const __CFUniCharCaseMappings
*p
, *q
, *divider
;
623 if ((character
< theTable
[0]._key
) || (character
> theTable
[numElem
-1]._key
)) {
629 divider
= p
+ ((q
- p
) >> 1); /* divide by 2 */
630 if (character
< divider
->_key
) { q
= divider
- 1; }
631 else if (character
> divider
->_key
) { p
= divider
+ 1; }
632 else { return divider
->_value
; }
637 #define NUM_CASE_MAP_DATA (kCFUniCharCaseFold + 1)
639 static bool __CFUniCharLoadCaseMappingTable(void) {
640 uint32_t *countArray
;
643 if (NULL
== __CFUniCharMappingTables
) (void)CFUniCharGetMappingData(kCFUniCharToLowercase
);
644 if (NULL
== __CFUniCharMappingTables
) return false;
646 __CFSpinLock(&__CFUniCharMappingTableLock
);
648 if (__CFUniCharCaseMappingTableCounts
) {
649 __CFSpinUnlock(&__CFUniCharMappingTableLock
);
653 countArray
= (uint32_t *)CFAllocatorAllocate(kCFAllocatorSystemDefault
, sizeof(uint32_t) * NUM_CASE_MAP_DATA
+ sizeof(uint32_t *) * NUM_CASE_MAP_DATA
* 2, 0);
654 __CFUniCharCaseMappingTable
= (uint32_t **)((char *)countArray
+ sizeof(uint32_t) * NUM_CASE_MAP_DATA
);
655 __CFUniCharCaseMappingExtraTable
= (const uint32_t **)__CFUniCharCaseMappingTable
+ NUM_CASE_MAP_DATA
;
657 for (idx
= 0;idx
< NUM_CASE_MAP_DATA
;idx
++) {
658 countArray
[idx
] = *((uint32_t *)__CFUniCharMappingTables
[idx
]) / (sizeof(uint32_t) * 2);
659 __CFUniCharCaseMappingTable
[idx
] = ((uint32_t *)__CFUniCharMappingTables
[idx
]) + 1;
660 __CFUniCharCaseMappingExtraTable
[idx
] = (const uint32_t *)((char *)__CFUniCharCaseMappingTable
[idx
] + *((uint32_t *)__CFUniCharMappingTables
[idx
]));
663 __CFUniCharCaseMappingTableCounts
= countArray
;
665 __CFSpinUnlock(&__CFUniCharMappingTableLock
);
669 #if __CF_BIG_ENDIAN__
670 #define TURKISH_LANG_CODE (0x7472) // tr
671 #define LITHUANIAN_LANG_CODE (0x6C74) // lt
672 #define AZERI_LANG_CODE (0x617A) // az
674 #define TURKISH_LANG_CODE (0x7274) // tr
675 #define LITHUANIAN_LANG_CODE (0x746C) // lt
676 #define AZERI_LANG_CODE (0x7A61) // az
679 CFIndex
CFUniCharMapCaseTo(UTF32Char theChar
, UTF16Char
*convertedChar
, CFIndex maxLength
, uint32_t ctype
, uint32_t flags
, const uint8_t *langCode
) {
680 __CFUniCharBitmapData
*data
;
681 uint8_t planeNo
= (theChar
>> 16) & 0xFF;
685 #if DO_SPECIAL_CASE_MAPPING
686 if (flags
& kCFUniCharCaseMapFinalSigma
) {
687 if (theChar
== 0x03A3) { // Final sigma
688 *convertedChar
= (ctype
== kCFUniCharToLowercase
? 0x03C2 : 0x03A3);
694 switch (*(uint16_t *)langCode
) {
695 case LITHUANIAN_LANG_CODE
:
696 if (theChar
== 0x0307 && (flags
& kCFUniCharCaseMapAfter_i
)) {
698 } else if (ctype
== kCFUniCharToLowercase
) {
699 if (flags
& kCFUniCharCaseMapMoreAbove
) {
701 case 0x0049: // LATIN CAPITAL LETTER I
702 *(convertedChar
++) = 0x0069;
703 *(convertedChar
++) = 0x0307;
706 case 0x004A: // LATIN CAPITAL LETTER J
707 *(convertedChar
++) = 0x006A;
708 *(convertedChar
++) = 0x0307;
711 case 0x012E: // LATIN CAPITAL LETTER I WITH OGONEK
712 *(convertedChar
++) = 0x012F;
713 *(convertedChar
++) = 0x0307;
720 case 0x00CC: // LATIN CAPITAL LETTER I WITH GRAVE
721 *(convertedChar
++) = 0x0069;
722 *(convertedChar
++) = 0x0307;
723 *(convertedChar
++) = 0x0300;
726 case 0x00CD: // LATIN CAPITAL LETTER I WITH ACUTE
727 *(convertedChar
++) = 0x0069;
728 *(convertedChar
++) = 0x0307;
729 *(convertedChar
++) = 0x0301;
732 case 0x0128: // LATIN CAPITAL LETTER I WITH TILDE
733 *(convertedChar
++) = 0x0069;
734 *(convertedChar
++) = 0x0307;
735 *(convertedChar
++) = 0x0303;
743 case TURKISH_LANG_CODE
:
744 case AZERI_LANG_CODE
:
745 if ((theChar
== 0x0049) || (theChar
== 0x0131)) { // LATIN CAPITAL LETTER I & LATIN SMALL LETTER DOTLESS I
746 *convertedChar
= (((ctype
== kCFUniCharToLowercase
) || (ctype
== kCFUniCharCaseFold
)) ? ((kCFUniCharCaseMapMoreAbove
& flags
) ? 0x0069 : 0x0131) : 0x0049);
748 } else if ((theChar
== 0x0069) || (theChar
== 0x0130)) { // LATIN SMALL LETTER I & LATIN CAPITAL LETTER I WITH DOT ABOVE
749 *convertedChar
= (((ctype
== kCFUniCharToLowercase
) || (ctype
== kCFUniCharCaseFold
)) ? 0x0069 : 0x0130);
751 } else if (theChar
== 0x0307 && (kCFUniCharCaseMapAfter_i
& flags
)) { // COMBINING DOT ABOVE AFTER_i
752 if (ctype
== kCFUniCharToLowercase
) {
755 *convertedChar
= 0x0307;
764 #endif DO_SPECIAL_CASE_MAPPING
766 if (NULL
== __CFUniCharBitmapDataArray
) __CFUniCharLoadBitmapData();
768 data
= __CFUniCharBitmapDataArray
+ __CFUniCharMapExternalSetToInternalIndex(__CFUniCharMapCompatibilitySetID(ctype
+ kCFUniCharHasNonSelfLowercaseCharacterSet
));
770 if (planeNo
< data
->_numPlanes
&& data
->_planes
[planeNo
] && CFUniCharIsMemberOfBitmap(theChar
, data
->_planes
[planeNo
]) && (__CFUniCharCaseMappingTableCounts
|| __CFUniCharLoadCaseMappingTable())) {
771 uint32_t value
= __CFUniCharGetMappedCase((const __CFUniCharCaseMappings
*)__CFUniCharCaseMappingTable
[ctype
], __CFUniCharCaseMappingTableCounts
[ctype
], theChar
);
773 if (!value
&& ctype
== kCFUniCharToTitlecase
) {
774 value
= __CFUniCharGetMappedCase((const __CFUniCharCaseMappings
*)__CFUniCharCaseMappingTable
[kCFUniCharToUppercase
], __CFUniCharCaseMappingTableCounts
[kCFUniCharToUppercase
], theChar
);
775 if (value
) ctype
= kCFUniCharToUppercase
;
779 CFIndex count
= CFUniCharConvertFlagToCount(value
);
782 if (value
& kCFUniCharNonBmpFlag
) {
784 value
= (value
& 0xFFFFFF) - 0x10000;
785 *(convertedChar
++) = (UTF16Char
)(value
>> 10) + 0xD800UL
;
786 *(convertedChar
++) = (UTF16Char
)(value
& 0x3FF) + 0xDC00UL
;
790 *convertedChar
= (UTF16Char
)value
;
793 } else if (count
< maxLength
) {
794 const uint32_t *extraMapping
= __CFUniCharCaseMappingExtraTable
[ctype
] + (value
& 0xFFFFFF);
796 if (value
& kCFUniCharNonBmpFlag
) {
797 CFIndex copiedLen
= 0;
799 while (count
-- > 0) {
800 value
= *(extraMapping
++);
801 if (value
> 0xFFFF) {
802 if (copiedLen
+ 2 >= maxLength
) break;
803 value
= (value
& 0xFFFFFF) - 0x10000;
804 convertedChar
[copiedLen
++] = (UTF16Char
)(value
>> 10) + 0xD800UL
;
805 convertedChar
[copiedLen
++] = (UTF16Char
)(value
& 0x3FF) + 0xDC00UL
;
807 if (copiedLen
+ 1 >= maxLength
) break;
808 convertedChar
[copiedLen
++] = value
;
811 if (!count
) return copiedLen
;
815 for (idx
= 0;idx
< count
;idx
++) *(convertedChar
++) = (UTF16Char
)*(extraMapping
++);
820 } else if (ctype
== kCFUniCharCaseFold
) {
821 ctype
= kCFUniCharToLowercase
;
825 if (theChar
> 0xFFFF) { // non-BMP
826 theChar
= (theChar
& 0xFFFFFF) - 0x10000;
827 *(convertedChar
++) = (UTF16Char
)(theChar
>> 10) + 0xD800UL
;
828 *(convertedChar
++) = (UTF16Char
)(theChar
& 0x3FF) + 0xDC00UL
;
831 *convertedChar
= theChar
;
836 CFIndex
CFUniCharMapTo(UniChar theChar
, UniChar
*convertedChar
, CFIndex maxLength
, uint16_t ctype
, uint32_t flags
) {
837 if (ctype
== kCFUniCharCaseFold
+ 1) { // kCFUniCharDecompose
838 if (CFUniCharIsDecomposableCharacter(theChar
, false)) {
839 UTF32Char buffer
[MAX_DECOMPOSED_LENGTH
];
840 CFIndex usedLength
= CFUniCharDecomposeCharacter(theChar
, buffer
, MAX_DECOMPOSED_LENGTH
);
843 for (idx
= 0;idx
< usedLength
;idx
++) *(convertedChar
++) = buffer
[idx
];
846 *convertedChar
= theChar
;
850 return CFUniCharMapCaseTo(theChar
, convertedChar
, maxLength
, ctype
, flags
, NULL
);
854 CF_INLINE
bool __CFUniCharIsMoreAbove(UTF16Char
*buffer
, CFIndex length
) {
855 UTF32Char currentChar
;
858 while (length
-- > 0) {
859 currentChar
= *(buffer
)++;
860 if (CFUniCharIsSurrogateHighCharacter(currentChar
) && (length
> 0) && CFUniCharIsSurrogateLowCharacter(*(buffer
+ 1))) {
861 currentChar
= CFUniCharGetLongCharacterForSurrogatePair(currentChar
, *(buffer
++));
864 if (!CFUniCharIsMemberOf(currentChar
, kCFUniCharNonBaseCharacterSet
)) break;
866 property
= CFUniCharGetCombiningPropertyForCharacter(currentChar
, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty
, (currentChar
>> 16) & 0xFF));
868 if (property
== 230) return true; // Above priority
873 CF_INLINE
bool __CFUniCharIsAfter_i(UTF16Char
*buffer
, CFIndex length
) {
874 UTF32Char currentChar
= 0;
876 UTF32Char decomposed
[MAX_DECOMPOSED_LENGTH
];
877 CFIndex decompLength
;
880 if (length
< 1) return 0;
883 while (length
-- > 1) {
884 currentChar
= *(--buffer
);
885 if (CFUniCharIsSurrogateLowCharacter(currentChar
)) {
886 if ((length
> 1) && CFUniCharIsSurrogateHighCharacter(*(buffer
- 1))) {
887 currentChar
= CFUniCharGetLongCharacterForSurrogatePair(*(--buffer
), currentChar
);
893 if (!CFUniCharIsMemberOf(currentChar
, kCFUniCharNonBaseCharacterSet
)) break;
895 property
= CFUniCharGetCombiningPropertyForCharacter(currentChar
, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty
, (currentChar
>> 16) & 0xFF));
897 if (property
== 230) return false; // Above priority
900 currentChar
= *(--buffer
);
901 } else if (CFUniCharIsSurrogateLowCharacter(currentChar
) && CFUniCharIsSurrogateHighCharacter(*(--buffer
))) {
902 currentChar
= CFUniCharGetLongCharacterForSurrogatePair(*buffer
, currentChar
);
905 decompLength
= CFUniCharDecomposeCharacter(currentChar
, decomposed
, MAX_DECOMPOSED_LENGTH
);
906 currentChar
= *decomposed
;
909 for (idx
= 1;idx
< decompLength
;idx
++) {
910 currentChar
= decomposed
[idx
];
911 property
= CFUniCharGetCombiningPropertyForCharacter(currentChar
, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty
, (currentChar
>> 16) & 0xFF));
913 if (property
== 230) return false; // Above priority
918 __private_extern__
uint32_t CFUniCharGetConditionalCaseMappingFlags(UTF32Char theChar
, UTF16Char
*buffer
, CFIndex currentIndex
, CFIndex length
, uint32_t type
, const uint8_t *langCode
, uint32_t lastFlags
) {
919 if (theChar
== 0x03A3) { // GREEK CAPITAL LETTER SIGMA
920 if ((type
== kCFUniCharToLowercase
) && (currentIndex
> 0)) {
921 UTF16Char
*start
= buffer
;
922 UTF16Char
*end
= buffer
+ length
;
925 // First check if we're after a cased character
926 buffer
+= (currentIndex
- 1);
927 while (start
<= buffer
) {
928 otherChar
= *(buffer
--);
929 if (CFUniCharIsSurrogateLowCharacter(otherChar
) && (start
<= buffer
) && CFUniCharIsSurrogateHighCharacter(*buffer
)) {
930 otherChar
= CFUniCharGetLongCharacterForSurrogatePair(*(buffer
--), otherChar
);
932 if (!CFUniCharIsMemberOf(otherChar
, kCFUniCharCaseIgnorableCharacterSet
)) {
933 if (!CFUniCharIsMemberOf(otherChar
, kCFUniCharUppercaseLetterCharacterSet
) && !CFUniCharIsMemberOf(otherChar
, kCFUniCharLowercaseLetterCharacterSet
)) return 0; // Uppercase set contains titlecase
938 // Next check if we're before a cased character
939 buffer
= start
+ currentIndex
+ 1;
940 while (buffer
< end
) {
941 otherChar
= *(buffer
++);
942 if (CFUniCharIsSurrogateHighCharacter(otherChar
) && (buffer
< end
) && CFUniCharIsSurrogateLowCharacter(*buffer
)) {
943 otherChar
= CFUniCharGetLongCharacterForSurrogatePair(otherChar
, *(buffer
++));
945 if (!CFUniCharIsMemberOf(otherChar
, kCFUniCharCaseIgnorableCharacterSet
)) {
946 if (CFUniCharIsMemberOf(otherChar
, kCFUniCharUppercaseLetterCharacterSet
) || CFUniCharIsMemberOf(otherChar
, kCFUniCharLowercaseLetterCharacterSet
)) return 0; // Uppercase set contains titlecase
950 return kCFUniCharCaseMapFinalSigma
;
952 } else if (langCode
) {
953 if (*((const uint16_t *)langCode
) == LITHUANIAN_LANG_CODE
) {
954 if ((theChar
== 0x0307) && ((kCFUniCharCaseMapAfter_i
|kCFUniCharCaseMapMoreAbove
) & lastFlags
) == (kCFUniCharCaseMapAfter_i
|kCFUniCharCaseMapMoreAbove
)) {
955 return (__CFUniCharIsAfter_i(buffer
, currentIndex
) ? kCFUniCharCaseMapAfter_i
: 0);
956 } else if (type
== kCFUniCharToLowercase
) {
957 if ((theChar
== 0x0049) || (theChar
== 0x004A) || (theChar
== 0x012E)) {
958 return (__CFUniCharIsMoreAbove(buffer
+ (++currentIndex
), length
- currentIndex
) ? kCFUniCharCaseMapMoreAbove
: 0);
960 } else if ((theChar
== 'i') || (theChar
== 'j')) {
961 return (__CFUniCharIsMoreAbove(buffer
+ (++currentIndex
), length
- currentIndex
) ? (kCFUniCharCaseMapAfter_i
|kCFUniCharCaseMapMoreAbove
) : 0);
963 } else if ((*((const uint16_t *)langCode
) == TURKISH_LANG_CODE
) || (*((const uint16_t *)langCode
) == AZERI_LANG_CODE
)) {
964 if (type
== kCFUniCharToLowercase
) {
965 if (theChar
== 0x0307) {
966 return (kCFUniCharCaseMapMoreAbove
& lastFlags
? kCFUniCharCaseMapAfter_i
: 0);
967 } else if (theChar
== 0x0049) {
968 return (((++currentIndex
< length
) && (buffer
[currentIndex
] == 0x0307)) ? kCFUniCharCaseMapMoreAbove
: 0);
976 // Unicode property database
977 static __CFUniCharBitmapData
*__CFUniCharUnicodePropertyTable
= NULL
;
978 static int __CFUniCharUnicodePropertyTableCount
= 0;
980 static CFSpinLock_t __CFUniCharPropTableLock
= CFSpinLockInit
;
982 #if USE_MACHO_SEGMENT
983 #define PROP_DB_FILE "__properties"
985 #define PROP_DB_FILE "CFUniCharPropertyDatabase.data"
988 const void *CFUniCharGetUnicodePropertyDataForPlane(uint32_t propertyType
, uint32_t plane
) {
990 __CFSpinLock(&__CFUniCharPropTableLock
);
992 if (NULL
== __CFUniCharUnicodePropertyTable
) {
993 __CFUniCharBitmapData
*table
;
995 const void *bodyBase
;
996 const void *planeBase
;
999 int planeIndex
, planeCount
;
1002 if (!__CFUniCharLoadFile(PROP_DB_FILE
, &bytes
)) {
1003 __CFSpinUnlock(&__CFUniCharPropTableLock
);
1007 #if defined (__cplusplus)
1008 bytes
= (uint8_t*)bytes
+ 4; // Skip Unicode version
1009 headerSize
= CFSwapInt32BigToHost(*((uint32_t *)bytes
)); bytes
= (uint8_t *)bytes
+ sizeof(uint32_t);
1011 bytes
+= 4; // Skip Unicode version
1012 headerSize
= CFSwapInt32BigToHost(*((uint32_t *)bytes
)); bytes
+= sizeof(uint32_t);
1015 headerSize
-= (sizeof(uint32_t) * 2);
1016 bodyBase
= (char *)bytes
+ headerSize
;
1018 count
= headerSize
/ sizeof(uint32_t);
1019 __CFUniCharUnicodePropertyTableCount
= count
;
1021 table
= (__CFUniCharBitmapData
*)CFAllocatorAllocate(kCFAllocatorSystemDefault
, sizeof(__CFUniCharBitmapData
) * count
, 0);
1023 for (idx
= 0;idx
< count
;idx
++) {
1024 planeCount
= *((const uint8_t *)bodyBase
);
1025 planeBase
= (char *)bodyBase
+ planeCount
+ (planeCount
% 4 ? 4 - (planeCount
% 4) : 0);
1026 table
[idx
]._planes
= (const uint8_t **)CFAllocatorAllocate(kCFAllocatorSystemDefault
, sizeof(const void *) * planeCount
, 0);
1028 for (planeIndex
= 0;planeIndex
< planeCount
;planeIndex
++) {
1029 if ((planeSize
= ((const uint8_t *)bodyBase
)[planeIndex
+ 1])) {
1030 table
[idx
]._planes
[planeIndex
] = (const uint8_t *)planeBase
;
1031 #if defined (__cplusplus)
1032 planeBase
= (char*)planeBase
+ (planeSize
* 256);
1034 planeBase
+= (planeSize
* 256);
1037 table
[idx
]._planes
[planeIndex
] = NULL
;
1041 table
[idx
]._numPlanes
= planeCount
;
1042 #if defined (__cplusplus)
1043 bodyBase
= (const uint8_t *)bodyBase
+ (CFSwapInt32BigToHost(*(uint32_t *)bytes
));
1044 ((uint32_t *&)bytes
) ++;
1046 bodyBase
+= (CFSwapInt32BigToHost(*((uint32_t *)bytes
++)));
1050 __CFUniCharUnicodePropertyTable
= table
;
1053 __CFSpinUnlock(&__CFUniCharPropTableLock
);
1055 return (plane
< __CFUniCharUnicodePropertyTable
[propertyType
]._numPlanes
? __CFUniCharUnicodePropertyTable
[propertyType
]._planes
[plane
] : NULL
);
1058 __private_extern__
uint32_t CFUniCharGetNumberOfPlanesForUnicodePropertyData(uint32_t propertyType
) {
1059 (void)CFUniCharGetUnicodePropertyDataForPlane(propertyType
, 0);
1060 return __CFUniCharUnicodePropertyTable
[propertyType
]._numPlanes
;
1063 __private_extern__
uint32_t CFUniCharGetUnicodeProperty(UTF32Char character
, uint32_t propertyType
) {
1064 if (propertyType
== kCFUniCharCombiningProperty
) {
1065 return CFUniCharGetCombiningPropertyForCharacter(character
, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(propertyType
, (character
>> 16) & 0xFF));
1066 } else if (propertyType
== kCFUniCharBidiProperty
) {
1067 return CFUniCharGetBidiPropertyForCharacter(character
, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(propertyType
, (character
>> 16) & 0xFF));
1076 The UTF8 conversion in the following function is derived from ConvertUTF.c
1079 * Copyright 2001 Unicode, Inc.
1083 * This source code is provided as is by Unicode, Inc. No claims are
1084 * made as to fitness for any particular purpose. No warranties of any
1085 * kind are expressed or implied. The recipient agrees to determine
1086 * applicability of information provided. If this file has been
1087 * purchased on magnetic or optical media from Unicode, Inc., the
1088 * sole remedy for any claim will be exchange of defective media
1089 * within 90 days of receipt.
1091 * Limitations on Rights to Redistribute This Code
1093 * Unicode, Inc. hereby grants the right to freely use the information
1094 * supplied in this file in the creation of products supporting the
1095 * Unicode Standard, and to make copies of this file in any form
1096 * for internal or external distribution as long as this notice
1099 #define UNI_REPLACEMENT_CHAR (0x0000FFFDUL)
1101 bool CFUniCharFillDestinationBuffer(const UTF32Char
*src
, CFIndex srcLength
, void **dst
, CFIndex dstLength
, CFIndex
*filledLength
, uint32_t dstFormat
) {
1102 UTF32Char currentChar
;
1103 CFIndex usedLength
= *filledLength
;
1105 if (dstFormat
== kCFUniCharUTF16Format
) {
1106 UTF16Char
*dstBuffer
= (UTF16Char
*)*dst
;
1108 while (srcLength
-- > 0) {
1109 currentChar
= *(src
++);
1111 if (currentChar
> 0xFFFF) { // Non-BMP
1114 if (usedLength
> dstLength
) return false;
1115 currentChar
-= 0x10000;
1116 *(dstBuffer
++) = (UTF16Char
)((currentChar
>> 10) + 0xD800UL
);
1117 *(dstBuffer
++) = (UTF16Char
)((currentChar
& 0x3FF) + 0xDC00UL
);
1122 if (usedLength
> dstLength
) return false;
1123 *(dstBuffer
++) = (UTF16Char
)currentChar
;
1129 } else if (dstFormat
== kCFUniCharUTF8Format
) {
1130 uint8_t *dstBuffer
= (uint8_t *)*dst
;
1131 uint16_t bytesToWrite
= 0;
1132 const UTF32Char byteMask
= 0xBF;
1133 const UTF32Char byteMark
= 0x80;
1134 static const uint8_t firstByteMark
[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
1136 while (srcLength
-- > 0) {
1137 currentChar
= *(src
++);
1139 /* Figure out how many bytes the result will require */
1140 if (currentChar
< (UTF32Char
)0x80) {
1142 } else if (currentChar
< (UTF32Char
)0x800) {
1144 } else if (currentChar
< (UTF32Char
)0x10000) {
1146 } else if (currentChar
< (UTF32Char
)0x200000) {
1150 currentChar
= UNI_REPLACEMENT_CHAR
;
1153 usedLength
+= bytesToWrite
;
1156 if (usedLength
> dstLength
) return false;
1158 dstBuffer
+= bytesToWrite
;
1159 switch (bytesToWrite
) { /* note: everything falls through. */
1160 case 4: *--dstBuffer
= (currentChar
| byteMark
) & byteMask
; currentChar
>>= 6;
1161 case 3: *--dstBuffer
= (currentChar
| byteMark
) & byteMask
; currentChar
>>= 6;
1162 case 2: *--dstBuffer
= (currentChar
| byteMark
) & byteMask
; currentChar
>>= 6;
1163 case 1: *--dstBuffer
= currentChar
| firstByteMark
[bytesToWrite
];
1165 dstBuffer
+= bytesToWrite
;
1171 UTF32Char
*dstBuffer
= (UTF32Char
*)*dst
;
1173 while (srcLength
-- > 0) {
1174 currentChar
= *(src
++);
1178 if (usedLength
> dstLength
) return false;
1179 *(dstBuffer
++) = currentChar
;
1186 *filledLength
= usedLength
;
1192 void __CFUniCharCleanup(void)
1196 // cleanup memory allocated by __CFUniCharLoadBitmapData()
1197 __CFSpinLock(&__CFUniCharBitmapLock
);
1199 if (__CFUniCharBitmapDataArray
!= NULL
) {
1200 for (idx
= 0; idx
< (int)__CFUniCharNumberOfBitmaps
; idx
++) {
1201 CFAllocatorDeallocate(kCFAllocatorSystemDefault
, __CFUniCharBitmapDataArray
[idx
]._planes
);
1202 __CFUniCharBitmapDataArray
[idx
]._planes
= NULL
;
1205 CFAllocatorDeallocate(kCFAllocatorSystemDefault
, __CFUniCharBitmapDataArray
);
1206 __CFUniCharBitmapDataArray
= NULL
;
1207 __CFUniCharNumberOfBitmaps
= 0;
1210 __CFSpinUnlock(&__CFUniCharBitmapLock
);
1212 // cleanup memory allocated by CFUniCharGetMappingData()
1213 __CFSpinLock(&__CFUniCharMappingTableLock
);
1215 if (__CFUniCharMappingTables
!= NULL
) {
1216 CFAllocatorDeallocate(kCFAllocatorSystemDefault
, __CFUniCharMappingTables
);
1217 __CFUniCharMappingTables
= NULL
;
1220 // cleanup memory allocated by __CFUniCharLoadCaseMappingTable()
1221 if (__CFUniCharCaseMappingTableCounts
!= NULL
) {
1222 CFAllocatorDeallocate(kCFAllocatorSystemDefault
, __CFUniCharCaseMappingTableCounts
);
1223 __CFUniCharCaseMappingTableCounts
= NULL
;
1225 __CFUniCharCaseMappingTable
= NULL
;
1226 __CFUniCharCaseMappingExtraTable
= NULL
;
1229 __CFSpinUnlock(&__CFUniCharMappingTableLock
);
1231 // cleanup memory allocated by CFUniCharGetUnicodePropertyDataForPlane()
1232 __CFSpinLock(&__CFUniCharPropTableLock
);
1234 if (__CFUniCharUnicodePropertyTable
!= NULL
) {
1235 for (idx
= 0; idx
< __CFUniCharUnicodePropertyTableCount
; idx
++) {
1236 CFAllocatorDeallocate(kCFAllocatorSystemDefault
, __CFUniCharUnicodePropertyTable
[idx
]._planes
);
1237 __CFUniCharUnicodePropertyTable
[idx
]._planes
= NULL
;
1240 CFAllocatorDeallocate(kCFAllocatorSystemDefault
, __CFUniCharUnicodePropertyTable
);
1241 __CFUniCharUnicodePropertyTable
= NULL
;
1242 __CFUniCharUnicodePropertyTableCount
= 0;
1245 __CFSpinUnlock(&__CFUniCharPropTableLock
);
1249 #undef USE_MACHO_SEGMENT