2 * Copyright (c) 2015 Apple Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
25 Copyright (c) 2001-2014, Apple Inc. All rights reserved.
26 Responsibility: Aki Inoue
29 #include <CoreFoundation/CFByteOrder.h>
30 #include "CFInternal.h"
31 #include "CFUniChar.h"
32 #include "CFStringEncodingConverterExt.h"
33 #include "CFUnicodeDecomposition.h"
34 #include "CFUniCharPriv.h"
35 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
37 #include <sys/types.h>
39 #include <sys/param.h>
44 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
45 #include <mach/mach.h>
48 #if DEPLOYMENT_TARGET_WINDOWS
49 extern void _CFGetFrameworkPath(wchar_t *path
, int maxLength
);
52 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
53 #define __kCFCharacterSetDir "/System/Library/CoreServices"
54 #elif DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD || DEPLOYMENT_TARGET_EMBEDDED_MINI
55 #define __kCFCharacterSetDir "/usr/local/share/CoreFoundation"
56 #elif DEPLOYMENT_TARGET_WINDOWS
57 #define __kCFCharacterSetDir "\\Windows\\CoreFoundation"
60 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
61 #define USE_MACHO_SEGMENT 1
65 kCFUniCharLastExternalSet
= kCFUniCharNewlineCharacterSet
,
66 kCFUniCharFirstInternalSet
= kCFUniCharCompatibilityDecomposableCharacterSet
,
67 kCFUniCharLastInternalSet
= kCFUniCharGraphemeExtendCharacterSet
,
68 kCFUniCharFirstBitmapSet
= kCFUniCharDecimalDigitCharacterSet
71 CF_INLINE
uint32_t __CFUniCharMapExternalSetToInternalIndex(uint32_t cset
) { return ((kCFUniCharFirstInternalSet
<= cset
) ? ((cset
- kCFUniCharFirstInternalSet
) + kCFUniCharLastExternalSet
) : cset
) - kCFUniCharFirstBitmapSet
; }
72 CF_INLINE
uint32_t __CFUniCharMapCompatibilitySetID(uint32_t cset
) { return ((cset
== kCFUniCharControlCharacterSet
) ? kCFUniCharControlAndFormatterCharacterSet
: (((cset
> kCFUniCharLastExternalSet
) && (cset
< kCFUniCharFirstInternalSet
)) ? ((cset
- kCFUniCharLastExternalSet
) + kCFUniCharFirstInternalSet
) : cset
)); }
74 #if (DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED) && USE_MACHO_SEGMENT
75 #include <mach-o/getsect.h>
76 #include <mach-o/dyld.h>
77 #include <mach-o/ldsyms.h>
79 extern const void* unicode_csbitmaps_section_start
__asm("section$start$__UNICODE$__csbitmaps");
80 extern const void* unicode_csbitmaps_section_end
__asm("section$end$__UNICODE$__csbitmaps");
81 extern const void* unicode_properties_section_start
__asm("section$start$__UNICODE$__properties");
82 extern const void* unicode_properties_section_end
__asm("section$end$__UNICODE$__properties");
83 extern const void* unicode_data_section_start
__asm("section$start$__UNICODE$__data");
84 extern const void* unicode_data_section_end
__asm("section$end$__UNICODE$__data");
86 static const void *__CFGetSectDataPtr(const char *segname
, const char *sectname
, uint64_t *sizep
) {
87 // special case three common sections to have fast access
88 if ( strcmp(segname
, "__UNICODE") == 0 ) {
89 if ( strcmp(sectname
, "__csbitmaps") == 0) {
90 if (sizep
) *sizep
= &unicode_csbitmaps_section_end
- &unicode_csbitmaps_section_start
;
91 return &unicode_csbitmaps_section_start
;
93 else if ( strcmp(sectname
, "__properties") == 0 ) {
94 if (sizep
) *sizep
= &unicode_properties_section_end
- &unicode_properties_section_start
;
95 return &unicode_properties_section_start
;
97 else if ( strcmp(sectname
, "__data") == 0 ) {
98 if (sizep
) *sizep
= &unicode_data_section_end
- &unicode_data_section_start
;
99 return &unicode_data_section_start
;
103 uint32_t idx
, cnt
= _dyld_image_count();
104 for (idx
= 0; idx
< cnt
; idx
++) {
105 void *mh
= (void *)_dyld_get_image_header(idx
);
106 if (mh
!= &_mh_dylib_header
) continue;
108 const struct section_64
*sect
= getsectbynamefromheader_64((struct mach_header_64
*)mh
, segname
, sectname
);
110 const struct section
*sect
= getsectbynamefromheader((struct mach_header
*)mh
, segname
, sectname
);
113 if (sizep
) *sizep
= (uint64_t)sect
->size
;
114 return (char *)sect
->addr
+ _dyld_get_image_vmaddr_slide(idx
);
116 if (sizep
) *sizep
= 0ULL;
121 #if !USE_MACHO_SEGMENT
123 // Memory map the file
125 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX
126 CF_INLINE
void __CFUniCharCharacterSetPath(char *cpath
) {
127 #elif DEPLOYMENT_TARGET_WINDOWS
128 CF_INLINE
void __CFUniCharCharacterSetPath(wchar_t *wpath
) {
130 #error Unknown or unspecified DEPLOYMENT_TARGET
132 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
133 strlcpy(cpath
, __kCFCharacterSetDir
, MAXPATHLEN
);
134 #elif DEPLOYMENT_TARGET_LINUX
135 strlcpy(cpath
, __kCFCharacterSetDir
, MAXPATHLEN
);
136 #elif DEPLOYMENT_TARGET_WINDOWS
137 wchar_t frameworkPath
[MAXPATHLEN
];
138 _CFGetFrameworkPath(frameworkPath
, MAXPATHLEN
);
139 wcsncpy(wpath
, frameworkPath
, MAXPATHLEN
);
140 wcsncat(wpath
, L
"\\CoreFoundation.resources\\", MAXPATHLEN
- wcslen(wpath
));
142 strlcpy(cpath
, __kCFCharacterSetDir
, MAXPATHLEN
);
143 strlcat(cpath
, "/CharacterSets/", MAXPATHLEN
);
147 #if DEPLOYMENT_TARGET_WINDOWS
148 #define MAX_BITMAP_STATE 512
150 // If a string is placed into this array, then it has been previously
151 // determined that the bitmap-file cannot be found. Thus, we make
152 // the assumption it won't be there in future calls and we avoid
153 // hitting the disk un-necessarily. This assumption isn't 100%
154 // correct, as bitmap-files can be added. We would have to re-start
155 // the application in order to pick-up the new bitmap info.
157 // We should probably re-visit this.
159 static wchar_t *mappedBitmapState
[MAX_BITMAP_STATE
];
160 static int __nNumStateEntries
= -1;
161 CRITICAL_SECTION __bitmapStateLock
= {0};
163 bool __GetBitmapStateForName(const wchar_t *bitmapName
) {
164 if (NULL
== __bitmapStateLock
.DebugInfo
)
165 InitializeCriticalSection(&__bitmapStateLock
);
166 EnterCriticalSection(&__bitmapStateLock
);
167 if (__nNumStateEntries
>= 0) {
168 for (int i
= 0; i
< __nNumStateEntries
; i
++) {
169 if (wcscmp(mappedBitmapState
[i
], bitmapName
) == 0) {
170 LeaveCriticalSection(&__bitmapStateLock
);
175 LeaveCriticalSection(&__bitmapStateLock
);
178 void __AddBitmapStateForName(const wchar_t *bitmapName
) {
179 if (NULL
== __bitmapStateLock
.DebugInfo
)
180 InitializeCriticalSection(&__bitmapStateLock
);
181 EnterCriticalSection(&__bitmapStateLock
);
182 __nNumStateEntries
++;
183 mappedBitmapState
[__nNumStateEntries
] = (wchar_t *)malloc((lstrlenW(bitmapName
)+1) * sizeof(wchar_t));
184 lstrcpyW(mappedBitmapState
[__nNumStateEntries
], bitmapName
);
185 LeaveCriticalSection(&__bitmapStateLock
);
189 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX
190 static bool __CFUniCharLoadBytesFromFile(const char *fileName
, const void **bytes
, int64_t *fileSize
) {
191 #elif DEPLOYMENT_TARGET_WINDOWS
192 static bool __CFUniCharLoadBytesFromFile(const wchar_t *fileName
, const void **bytes
, int64_t *fileSize
) {
194 #error Unknown or unspecified DEPLOYMENT_TARGET
196 #if DEPLOYMENT_TARGET_WINDOWS
197 HANDLE bitmapFileHandle
= NULL
;
198 HANDLE mappingHandle
= NULL
;
200 if (__GetBitmapStateForName(fileName
)) {
201 // The fileName has been tried in the past, so just return false
206 mappingHandle
= OpenFileMappingW(FILE_MAP_READ
, TRUE
, fileName
);
207 if (NULL
== mappingHandle
) {
208 if ((bitmapFileHandle
= CreateFileW(fileName
, GENERIC_READ
, FILE_SHARE_READ
, NULL
, OPEN_EXISTING
, FILE_ATTRIBUTE_NORMAL
, NULL
)) == INVALID_HANDLE_VALUE
) {
209 // We tried to get the bitmap file for mapping, but it's not there. Add to list of non-existant bitmap-files so
210 // we don't have to try this again in the future.
211 __AddBitmapStateForName(fileName
);
214 mappingHandle
= CreateFileMapping(bitmapFileHandle
, NULL
, PAGE_READONLY
, 0, 0, NULL
);
215 CloseHandle(bitmapFileHandle
);
216 if (!mappingHandle
) return false;
219 *bytes
= MapViewOfFileEx(mappingHandle
, FILE_MAP_READ
, 0, 0, 0, 0);
221 if (NULL
!= fileSize
) {
222 MEMORY_BASIC_INFORMATION memoryInfo
;
224 if (0 == VirtualQueryEx(mappingHandle
, *bytes
, &memoryInfo
, sizeof(memoryInfo
))) {
225 *fileSize
= 0; // This indicates no checking. Is it right ?
227 *fileSize
= memoryInfo
.RegionSize
;
231 CloseHandle(mappingHandle
);
233 return (*bytes
? true : false);
238 if ((fd
= open(fileName
, O_RDONLY
, 0)) < 0) {
241 if (fstat(fd
, &statBuf
) < 0 || (*bytes
= mmap(0, statBuf
.st_size
, PROT_READ
, MAP_PRIVATE
, fd
, 0)) == (void *)-1) {
247 if (NULL
!= fileSize
) *fileSize
= statBuf
.st_size
;
253 #endif // USE_MACHO_SEGMENT
255 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX
256 static bool __CFUniCharLoadFile(const char *bitmapName
, const void **bytes
, int64_t *fileSize
) {
257 #elif DEPLOYMENT_TARGET_WINDOWS
258 static bool __CFUniCharLoadFile(const wchar_t *bitmapName
, const void **bytes
, int64_t *fileSize
) {
260 #error Unknown or unspecified DEPLOYMENT_TARGET
262 #if USE_MACHO_SEGMENT
263 *bytes
= __CFGetSectDataPtr("__UNICODE", bitmapName
, NULL
);
265 if (NULL
!= fileSize
) *fileSize
= 0;
267 return *bytes
? true : false;
269 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX
270 char cpath
[MAXPATHLEN
];
271 __CFUniCharCharacterSetPath(cpath
);
272 strlcat(cpath
, bitmapName
, MAXPATHLEN
);
273 Boolean needToFree
= false;
274 const char *possiblyFrameworkRootedCPath
= CFPathRelativeToAppleFrameworksRoot(cpath
, &needToFree
);
275 bool result
= __CFUniCharLoadBytesFromFile(possiblyFrameworkRootedCPath
, bytes
, fileSize
);
276 if (needToFree
) free((void *)possiblyFrameworkRootedCPath
);
278 #elif DEPLOYMENT_TARGET_WINDOWS
279 wchar_t wpath
[MAXPATHLEN
];
280 __CFUniCharCharacterSetPath(wpath
);
281 wcsncat(wpath
, bitmapName
, MAXPATHLEN
);
282 return __CFUniCharLoadBytesFromFile(wpath
, bytes
, fileSize
);
284 #error Unknown or unspecified DEPLOYMENT_TARGET
291 Currently unused but left in for symmetry/informative purposes
292 CF_INLINE bool isControl(UTF32Char theChar, uint16_t charset, const void *data) { // ISO Control
293 return (((theChar <= 0x001F) || (theChar >= 0x007F && theChar <= 0x009F)) ? true : false);
296 CF_INLINE
bool isWhitespace(UTF32Char theChar
, uint16_t charset
, const void *data
) { // Space
297 return (((theChar
== 0x0020) || (theChar
== 0x0009) || (theChar
== 0x00A0) || (theChar
== 0x1680) || (theChar
>= 0x2000 && theChar
<= 0x200B) || (theChar
== 0x202F) || (theChar
== 0x205F) || (theChar
== 0x3000)) ? true : false);
300 CF_INLINE
bool isNewline(UTF32Char theChar
, uint16_t charset
, const void *data
) { // White space
301 return (((theChar
>= 0x000A && theChar
<= 0x000D) || (theChar
== 0x0085) || (theChar
== 0x2028) || (theChar
== 0x2029)) ? true : false);
304 CF_INLINE
bool isWhitespaceAndNewline(UTF32Char theChar
, uint16_t charset
, const void *data
) { // White space
305 return ((isWhitespace(theChar
, charset
, data
) || isNewline(theChar
, charset
, data
)) ? true : false);
308 #if USE_MACHO_SEGMENT
309 CF_INLINE
bool __CFSimpleFileSizeVerification(const void *bytes
, int64_t fileSize
) { return true; }
311 // <rdar://problem/8961744> __CFSimpleFileSizeVerification is broken
312 static bool __CFSimpleFileSizeVerification(const void *bytes
, int64_t fileSize
) { return true; }
314 static bool __CFSimpleFileSizeVerification(const void *bytes
, int64_t fileSize
) {
318 if ((sizeof(uint32_t) * 2) > fileSize
) {
321 uint32_t headerSize
= CFSwapInt32BigToHost(*((uint32_t *)((char *)bytes
+ 4)));
323 if ((headerSize
< (sizeof(uint32_t) * 4)) || (headerSize
> fileSize
)) {
326 const uint32_t *lastElement
= (uint32_t *)(((uint8_t *)bytes
) + headerSize
) - 2;
328 if ((headerSize
+ CFSwapInt32BigToHost(lastElement
[0]) + CFSwapInt32BigToHost(lastElement
[1])) > headerSize
) result
= false;
333 if (!result
) CFLog(kCFLogLevelCritical
, CFSTR("File size verification for Unicode database file failed."));
337 #endif // USE_MACHO_SEGMENT
341 const uint8_t **_planes
;
342 } __CFUniCharBitmapData
;
344 static char __CFUniCharUnicodeVersionString
[8] = {0, 0, 0, 0, 0, 0, 0, 0};
346 static uint32_t __CFUniCharNumberOfBitmaps
= 0;
347 static __CFUniCharBitmapData
*__CFUniCharBitmapDataArray
= NULL
;
349 static CFLock_t __CFUniCharBitmapLock
= CFLockInit
;
351 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX
352 #if !defined(CF_UNICHAR_BITMAP_FILE)
353 #if USE_MACHO_SEGMENT
354 #define CF_UNICHAR_BITMAP_FILE "__csbitmaps"
356 #define CF_UNICHAR_BITMAP_FILE "/CFCharacterSetBitmaps.bitmap"
359 #elif DEPLOYMENT_TARGET_WINDOWS
360 #if !defined(CF_UNICHAR_BITMAP_FILE)
361 #define CF_UNICHAR_BITMAP_FILE L"CFCharacterSetBitmaps.bitmap"
364 #error Unknown or unspecified DEPLOYMENT_TARGET
367 static bool __CFUniCharLoadBitmapData(void) {
368 __CFUniCharBitmapData
*array
;
372 uint8_t currentPlane
;
374 const void *bitmapBase
;
376 int idx
, bitmapIndex
;
379 __CFLock(&__CFUniCharBitmapLock
);
381 if (__CFUniCharBitmapDataArray
|| !__CFUniCharLoadFile(CF_UNICHAR_BITMAP_FILE
, &bytes
, &fileSize
) || !__CFSimpleFileSizeVerification(bytes
, fileSize
)) {
382 __CFUnlock(&__CFUniCharBitmapLock
);
386 for (idx
= 0;idx
< 4 && ((const uint8_t *)bytes
)[idx
];idx
++) {
387 __CFUniCharUnicodeVersionString
[idx
* 2] = ((const uint8_t *)bytes
)[idx
];
388 __CFUniCharUnicodeVersionString
[idx
* 2 + 1] = '.';
390 __CFUniCharUnicodeVersionString
[(idx
< 4 ? idx
* 2 - 1 : 7)] = '\0';
392 headerSize
= CFSwapInt32BigToHost(*((uint32_t *)((char *)bytes
+ 4)));
394 bitmapBase
= (uint8_t *)bytes
+ headerSize
;
395 bytes
= (uint8_t *)bytes
+ (sizeof(uint32_t) * 2);
396 headerSize
-= (sizeof(uint32_t) * 2);
398 __CFUniCharNumberOfBitmaps
= headerSize
/ (sizeof(uint32_t) * 2);
400 array
= (__CFUniCharBitmapData
*)CFAllocatorAllocate(kCFAllocatorSystemDefault
, sizeof(__CFUniCharBitmapData
) * __CFUniCharNumberOfBitmaps
, 0);
402 for (idx
= 0;idx
< (int)__CFUniCharNumberOfBitmaps
;idx
++) {
403 bitmap
= (uint8_t *)bitmapBase
+ CFSwapInt32BigToHost(*((uint32_t *)bytes
)); bytes
= (uint8_t *)bytes
+ sizeof(uint32_t);
404 bitmapSize
= CFSwapInt32BigToHost(*((uint32_t *)bytes
)); bytes
= (uint8_t *)bytes
+ sizeof(uint32_t);
406 numPlanes
= bitmapSize
/ (8 * 1024);
407 numPlanes
= *(const uint8_t *)((char *)bitmap
+ (((numPlanes
- 1) * ((8 * 1024) + 1)) - 1)) + 1;
408 array
[idx
]._planes
= (const uint8_t **)CFAllocatorAllocate(kCFAllocatorSystemDefault
, sizeof(const void *) * numPlanes
, 0);
409 array
[idx
]._numPlanes
= numPlanes
;
412 for (bitmapIndex
= 0;bitmapIndex
< numPlanes
;bitmapIndex
++) {
413 if (bitmapIndex
== currentPlane
) {
414 array
[idx
]._planes
[bitmapIndex
] = (const uint8_t *)bitmap
;
415 bitmap
= (uint8_t *)bitmap
+ (8 * 1024);
416 #if defined (__cplusplus)
417 currentPlane
= *(((const uint8_t*&)bitmap
)++);
419 currentPlane
= *((const uint8_t *)bitmap
++);
423 array
[idx
]._planes
[bitmapIndex
] = NULL
;
428 __CFUniCharBitmapDataArray
= array
;
430 __CFUnlock(&__CFUniCharBitmapLock
);
435 CF_PRIVATE
const char *__CFUniCharGetUnicodeVersionString(void) {
436 if (NULL
== __CFUniCharBitmapDataArray
) __CFUniCharLoadBitmapData();
437 return __CFUniCharUnicodeVersionString
;
440 bool CFUniCharIsMemberOf(UTF32Char theChar
, uint32_t charset
) {
441 charset
= __CFUniCharMapCompatibilitySetID(charset
);
444 case kCFUniCharWhitespaceCharacterSet
:
445 return isWhitespace(theChar
, charset
, NULL
);
447 case kCFUniCharWhitespaceAndNewlineCharacterSet
:
448 return isWhitespaceAndNewline(theChar
, charset
, NULL
);
450 case kCFUniCharNewlineCharacterSet
:
451 return isNewline(theChar
, charset
, NULL
);
454 uint32_t tableIndex
= __CFUniCharMapExternalSetToInternalIndex(charset
);
456 if (NULL
== __CFUniCharBitmapDataArray
) __CFUniCharLoadBitmapData();
458 if (tableIndex
< __CFUniCharNumberOfBitmaps
) {
459 __CFUniCharBitmapData
*data
= __CFUniCharBitmapDataArray
+ tableIndex
;
460 uint8_t planeNo
= (theChar
>> 16) & 0xFF;
462 // The bitmap data for kCFUniCharIllegalCharacterSet is actually LEGAL set less Plane 14 ~ 16
463 if (charset
== kCFUniCharIllegalCharacterSet
) {
464 if (planeNo
== 0x0E) { // Plane 14
466 return (((theChar
== 0x01) || ((theChar
> 0x1F) && (theChar
< 0x80))) ? false : true);
467 } else if (planeNo
== 0x0F || planeNo
== 0x10) { // Plane 15 & 16
468 return ((theChar
& 0xFF) > 0xFFFD ? true : false);
470 return (planeNo
< data
->_numPlanes
&& data
->_planes
[planeNo
] ? !CFUniCharIsMemberOfBitmap(theChar
, data
->_planes
[planeNo
]) : true);
472 } else if (charset
== kCFUniCharControlAndFormatterCharacterSet
) {
473 if (planeNo
== 0x0E) { // Plane 14
475 return (((theChar
== 0x01) || ((theChar
> 0x1F) && (theChar
< 0x80))) ? true : false);
477 return (planeNo
< data
->_numPlanes
&& data
->_planes
[planeNo
] ? CFUniCharIsMemberOfBitmap(theChar
, data
->_planes
[planeNo
]) : false);
480 return (planeNo
< data
->_numPlanes
&& data
->_planes
[planeNo
] ? CFUniCharIsMemberOfBitmap(theChar
, data
->_planes
[planeNo
]) : false);
488 const uint8_t *CFUniCharGetBitmapPtrForPlane(uint32_t charset
, uint32_t plane
) {
489 if (NULL
== __CFUniCharBitmapDataArray
) __CFUniCharLoadBitmapData();
491 charset
= __CFUniCharMapCompatibilitySetID(charset
);
493 if ((charset
> kCFUniCharWhitespaceAndNewlineCharacterSet
) && (charset
!= kCFUniCharIllegalCharacterSet
) && (charset
!= kCFUniCharNewlineCharacterSet
)) {
494 uint32_t tableIndex
= __CFUniCharMapExternalSetToInternalIndex(charset
);
496 if (tableIndex
< __CFUniCharNumberOfBitmaps
) {
497 __CFUniCharBitmapData
*data
= __CFUniCharBitmapDataArray
+ tableIndex
;
499 return (plane
< data
->_numPlanes
? data
->_planes
[plane
] : NULL
);
505 CF_PRIVATE
uint8_t CFUniCharGetBitmapForPlane(uint32_t charset
, uint32_t plane
, void *bitmap
, bool isInverted
) {
506 const uint8_t *src
= CFUniCharGetBitmapPtrForPlane(charset
, plane
);
507 int numBytes
= (8 * 1024);
511 #if defined (__cplusplus)
512 while (numBytes
-- > 0) *(((uint8_t *&)bitmap
)++) = ~(*(src
++));
514 while (numBytes
-- > 0) *((uint8_t *)bitmap
++) = ~(*(src
++));
517 #if defined (__cplusplus)
518 while (numBytes
-- > 0) *(((uint8_t *&)bitmap
)++) = *(src
++);
520 while (numBytes
-- > 0) *((uint8_t *)bitmap
++) = *(src
++);
523 return kCFUniCharBitmapFilled
;
524 } else if (charset
== kCFUniCharIllegalCharacterSet
) {
525 __CFUniCharBitmapData
*data
= __CFUniCharBitmapDataArray
+ __CFUniCharMapExternalSetToInternalIndex(__CFUniCharMapCompatibilitySetID(charset
));
527 if (plane
< data
->_numPlanes
&& (src
= data
->_planes
[plane
])) {
529 #if defined (__cplusplus)
530 while (numBytes
-- > 0) *(((uint8_t *&)bitmap
)++) = *(src
++);
532 while (numBytes
-- > 0) *((uint8_t *)bitmap
++) = *(src
++);
535 #if defined (__cplusplus)
536 while (numBytes
-- > 0) *(((uint8_t *&)bitmap
)++) = ~(*(src
++));
538 while (numBytes
-- > 0) *((uint8_t *)bitmap
++) = ~(*(src
++));
541 return kCFUniCharBitmapFilled
;
542 } else if (plane
== 0x0E) { // Plane 14
544 uint8_t asciiRange
= (isInverted
? (uint8_t)0xFF : (uint8_t)0);
545 uint8_t otherRange
= (isInverted
? (uint8_t)0 : (uint8_t)0xFF);
547 #if defined (__cplusplus)
548 *(((uint8_t *&)bitmap
)++) = 0x02; // UE0001 LANGUAGE TAG
550 *((uint8_t *)bitmap
++) = 0x02; // UE0001 LANGUAGE TAG
552 for (idx
= 1;idx
< numBytes
;idx
++) {
553 #if defined (__cplusplus)
554 *(((uint8_t *&)bitmap
)++) = ((idx
>= (0x20 / 8) && (idx
< (0x80 / 8))) ? asciiRange
: otherRange
);
556 *((uint8_t *)bitmap
++) = ((idx
>= (0x20 / 8) && (idx
< (0x80 / 8))) ? asciiRange
: otherRange
);
559 return kCFUniCharBitmapFilled
;
560 } else if (plane
== 0x0F || plane
== 0x10) { // Plane 15 & 16
561 uint32_t value
= (isInverted
? ~0 : 0);
562 numBytes
/= 4; // for 32bit
564 while (numBytes
-- > 0) {
565 *((uint32_t *)bitmap
) = value
;
566 #if defined (__cplusplus)
567 bitmap
= (uint8_t *)bitmap
+ sizeof(uint32_t);
569 bitmap
+= sizeof(uint32_t);
572 *(((uint8_t *)bitmap
) - 5) = (isInverted
? 0x3F : 0xC0); // 0xFFFE & 0xFFFF
573 return kCFUniCharBitmapFilled
;
575 return (isInverted
? kCFUniCharBitmapEmpty
: kCFUniCharBitmapAll
);
576 } else if ((charset
< kCFUniCharDecimalDigitCharacterSet
) || (charset
== kCFUniCharNewlineCharacterSet
)) {
577 if (plane
) return (isInverted
? kCFUniCharBitmapAll
: kCFUniCharBitmapEmpty
);
579 uint8_t *bitmapBase
= (uint8_t *)bitmap
;
581 uint8_t nonFillValue
= (isInverted
? (uint8_t)0xFF : (uint8_t)0);
583 #if defined (__cplusplus)
584 while (numBytes
-- > 0) *(((uint8_t *&)bitmap
)++) = nonFillValue
;
586 while (numBytes
-- > 0) *((uint8_t *)bitmap
++) = nonFillValue
;
589 if ((charset
== kCFUniCharWhitespaceAndNewlineCharacterSet
) || (charset
== kCFUniCharNewlineCharacterSet
)) {
590 const UniChar newlines
[] = {0x000A, 0x000B, 0x000C, 0x000D, 0x0085, 0x2028, 0x2029};
592 for (idx
= 0;idx
< (int)(sizeof(newlines
) / sizeof(*newlines
)); idx
++) {
594 CFUniCharRemoveCharacterFromBitmap(newlines
[idx
], bitmapBase
);
596 CFUniCharAddCharacterToBitmap(newlines
[idx
], bitmapBase
);
600 if (charset
== kCFUniCharNewlineCharacterSet
) return kCFUniCharBitmapFilled
;
604 CFUniCharRemoveCharacterFromBitmap(0x0009, bitmapBase
);
605 CFUniCharRemoveCharacterFromBitmap(0x0020, bitmapBase
);
606 CFUniCharRemoveCharacterFromBitmap(0x00A0, bitmapBase
);
607 CFUniCharRemoveCharacterFromBitmap(0x1680, bitmapBase
);
608 CFUniCharRemoveCharacterFromBitmap(0x202F, bitmapBase
);
609 CFUniCharRemoveCharacterFromBitmap(0x205F, bitmapBase
);
610 CFUniCharRemoveCharacterFromBitmap(0x3000, bitmapBase
);
612 CFUniCharAddCharacterToBitmap(0x0009, bitmapBase
);
613 CFUniCharAddCharacterToBitmap(0x0020, bitmapBase
);
614 CFUniCharAddCharacterToBitmap(0x00A0, bitmapBase
);
615 CFUniCharAddCharacterToBitmap(0x1680, bitmapBase
);
616 CFUniCharAddCharacterToBitmap(0x202F, bitmapBase
);
617 CFUniCharAddCharacterToBitmap(0x205F, bitmapBase
);
618 CFUniCharAddCharacterToBitmap(0x3000, bitmapBase
);
621 for (idx
= 0x2000;idx
<= 0x200B;idx
++) {
623 CFUniCharRemoveCharacterFromBitmap(idx
, bitmapBase
);
625 CFUniCharAddCharacterToBitmap(idx
, bitmapBase
);
628 return kCFUniCharBitmapFilled
;
630 return (isInverted
? kCFUniCharBitmapAll
: kCFUniCharBitmapEmpty
);
633 CF_PRIVATE
uint32_t CFUniCharGetNumberOfPlanes(uint32_t charset
) {
634 if ((charset
== kCFUniCharControlCharacterSet
) || (charset
== kCFUniCharControlAndFormatterCharacterSet
)) {
635 return 15; // 0 to 14
636 } else if (charset
< kCFUniCharDecimalDigitCharacterSet
) {
638 } else if (charset
== kCFUniCharIllegalCharacterSet
) {
643 if (NULL
== __CFUniCharBitmapDataArray
) __CFUniCharLoadBitmapData();
645 numPlanes
= __CFUniCharBitmapDataArray
[__CFUniCharMapExternalSetToInternalIndex(__CFUniCharMapCompatibilitySetID(charset
))]._numPlanes
;
651 // Mapping data loading
652 static const void **__CFUniCharMappingTables
= NULL
;
654 static CFLock_t __CFUniCharMappingTableLock
= CFLockInit
;
656 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX
657 #if __CF_BIG_ENDIAN__
658 #if USE_MACHO_SEGMENT
659 #define MAPPING_TABLE_FILE "__data"
661 #define MAPPING_TABLE_FILE "/CFUnicodeData-B.mapping"
664 #if USE_MACHO_SEGMENT
665 #define MAPPING_TABLE_FILE "__data"
667 #define MAPPING_TABLE_FILE "/CFUnicodeData-L.mapping"
670 #elif DEPLOYMENT_TARGET_WINDOWS
671 #if __CF_BIG_ENDIAN__
672 #if USE_MACHO_SEGMENT
673 #define MAPPING_TABLE_FILE "__data"
675 #define MAPPING_TABLE_FILE L"CFUnicodeData-B.mapping"
678 #if USE_MACHO_SEGMENT
679 #define MAPPING_TABLE_FILE "__data"
681 #define MAPPING_TABLE_FILE L"CFUnicodeData-L.mapping"
685 #error Unknown or unspecified DEPLOYMENT_TARGET
688 CF_PRIVATE
const void *CFUniCharGetMappingData(uint32_t type
) {
690 __CFLock(&__CFUniCharMappingTableLock
);
692 if (NULL
== __CFUniCharMappingTables
) {
694 const void *bodyBase
;
699 if (!__CFUniCharLoadFile(MAPPING_TABLE_FILE
, &bytes
, &fileSize
) || !__CFSimpleFileSizeVerification(bytes
, fileSize
)) {
700 __CFUnlock(&__CFUniCharMappingTableLock
);
704 #if defined (__cplusplus)
705 bytes
= (uint8_t *)bytes
+ 4; // Skip Unicode version
706 headerSize
= *((uint8_t *)bytes
); bytes
= (uint8_t *)bytes
+ sizeof(uint32_t);
708 bytes
+= 4; // Skip Unicode version
709 headerSize
= *((uint32_t *)bytes
); bytes
+= sizeof(uint32_t);
711 headerSize
-= (sizeof(uint32_t) * 2);
712 bodyBase
= (char *)bytes
+ headerSize
;
714 count
= headerSize
/ sizeof(uint32_t);
716 __CFUniCharMappingTables
= (const void **)CFAllocatorAllocate(kCFAllocatorSystemDefault
, sizeof(const void *) * count
, 0);
718 for (idx
= 0;idx
< count
;idx
++) {
719 #if defined (__cplusplus)
720 __CFUniCharMappingTables
[idx
] = (char *)bodyBase
+ *((uint32_t *)bytes
); bytes
= (uint8_t *)bytes
+ sizeof(uint32_t);
722 __CFUniCharMappingTables
[idx
] = (char *)bodyBase
+ *((uint32_t *)bytes
); bytes
+= sizeof(uint32_t);
727 __CFUnlock(&__CFUniCharMappingTableLock
);
729 return __CFUniCharMappingTables
[type
];
732 // Case mapping functions
733 #define DO_SPECIAL_CASE_MAPPING 1
735 static uint32_t *__CFUniCharCaseMappingTableCounts
= NULL
;
736 static uint32_t **__CFUniCharCaseMappingTable
= NULL
;
737 static const uint32_t **__CFUniCharCaseMappingExtraTable
= NULL
;
742 } __CFUniCharCaseMappings
;
744 /* Binary searches CFStringEncodingUnicodeTo8BitCharMap */
745 static uint32_t __CFUniCharGetMappedCase(const __CFUniCharCaseMappings
*theTable
, uint32_t numElem
, UTF32Char character
) {
746 const __CFUniCharCaseMappings
*p
, *q
, *divider
;
748 if ((character
< theTable
[0]._key
) || (character
> theTable
[numElem
-1]._key
)) {
754 divider
= p
+ ((q
- p
) >> 1); /* divide by 2 */
755 if (character
< divider
->_key
) { q
= divider
- 1; }
756 else if (character
> divider
->_key
) { p
= divider
+ 1; }
757 else { return divider
->_value
; }
762 #define NUM_CASE_MAP_DATA (kCFUniCharCaseFold + 1)
764 static bool __CFUniCharLoadCaseMappingTable(void) {
765 uint32_t *countArray
;
768 if (NULL
== __CFUniCharMappingTables
) (void)CFUniCharGetMappingData(kCFUniCharToLowercase
);
769 if (NULL
== __CFUniCharMappingTables
) return false;
771 __CFLock(&__CFUniCharMappingTableLock
);
773 if (__CFUniCharCaseMappingTableCounts
) {
774 __CFUnlock(&__CFUniCharMappingTableLock
);
778 countArray
= (uint32_t *)CFAllocatorAllocate(kCFAllocatorSystemDefault
, sizeof(uint32_t) * NUM_CASE_MAP_DATA
+ sizeof(uint32_t *) * NUM_CASE_MAP_DATA
* 2, 0);
779 __CFUniCharCaseMappingTable
= (uint32_t **)((char *)countArray
+ sizeof(uint32_t) * NUM_CASE_MAP_DATA
);
780 __CFUniCharCaseMappingExtraTable
= (const uint32_t **)__CFUniCharCaseMappingTable
+ NUM_CASE_MAP_DATA
;
782 for (idx
= 0;idx
< NUM_CASE_MAP_DATA
;idx
++) {
783 countArray
[idx
] = *((uint32_t *)__CFUniCharMappingTables
[idx
]) / (sizeof(uint32_t) * 2);
784 __CFUniCharCaseMappingTable
[idx
] = ((uint32_t *)__CFUniCharMappingTables
[idx
]) + 1;
785 __CFUniCharCaseMappingExtraTable
[idx
] = (const uint32_t *)((char *)__CFUniCharCaseMappingTable
[idx
] + *((uint32_t *)__CFUniCharMappingTables
[idx
]));
788 __CFUniCharCaseMappingTableCounts
= countArray
;
790 __CFUnlock(&__CFUniCharMappingTableLock
);
794 #if __CF_BIG_ENDIAN__
795 #define TURKISH_LANG_CODE (0x7472) // tr
796 #define LITHUANIAN_LANG_CODE (0x6C74) // lt
797 #define AZERI_LANG_CODE (0x617A) // az
798 #define DUTCH_LANG_CODE (0x6E6C) // nl
799 #define GREEK_LANG_CODE (0x656C) // el
801 #define TURKISH_LANG_CODE (0x7274) // tr
802 #define LITHUANIAN_LANG_CODE (0x746C) // lt
803 #define AZERI_LANG_CODE (0x7A61) // az
804 #define DUTCH_LANG_CODE (0x6C6E) // nl
805 #define GREEK_LANG_CODE (0x6C65) // el
808 CFIndex
CFUniCharMapCaseTo(UTF32Char theChar
, UTF16Char
*convertedChar
, CFIndex maxLength
, uint32_t ctype
, uint32_t flags
, const uint8_t *langCode
) {
809 __CFUniCharBitmapData
*data
;
810 uint8_t planeNo
= (theChar
>> 16) & 0xFF;
814 #if DO_SPECIAL_CASE_MAPPING
815 if (flags
& kCFUniCharCaseMapFinalSigma
) {
816 if (theChar
== 0x03A3) { // Final sigma
817 *convertedChar
= (ctype
== kCFUniCharToLowercase
? 0x03C2 : 0x03A3);
823 if (flags
& kCFUniCharCaseMapGreekTonos
) { // localized Greek uppercasing
824 if (theChar
== 0x0301) { // GREEK TONOS
826 } else if (theChar
== 0x0344) {// COMBINING GREEK DIALYTIKA TONOS
827 *convertedChar
= 0x0308; // COMBINING GREEK DIALYTIKA
829 } else if (CFUniCharIsMemberOf(theChar
, kCFUniCharDecomposableCharacterSet
)) {
830 UTF32Char buffer
[MAX_DECOMPOSED_LENGTH
];
831 CFIndex length
= CFUniCharDecomposeCharacter(theChar
, buffer
, MAX_DECOMPOSED_LENGTH
);
834 UTF32Char
*characters
= buffer
+ 1;
835 UTF32Char
*tail
= buffer
+ length
;
837 while (characters
< tail
) {
838 if (*characters
== 0x0301) break;
842 if (characters
< tail
) { // found a tonos
843 CFIndex convertedLength
= CFUniCharMapCaseTo(*buffer
, convertedChar
, maxLength
, ctype
, 0, langCode
);
845 if (convertedLength
== 0) {
846 *convertedChar
= (UTF16Char
)*buffer
;
850 characters
= buffer
+ 1;
852 while (characters
< tail
) {
853 if (*characters
!= 0x0301) { // not tonos
854 if (*characters
< 0x10000) { // BMP
855 convertedChar
[convertedLength
] = (UTF16Char
)*characters
;
858 UTF32Char character
= *characters
- 0x10000;
859 convertedChar
[convertedLength
++] = (UTF16Char
)((character
>> 10) + 0xD800UL
);
860 convertedChar
[convertedLength
++] = (UTF16Char
)((character
& 0x3FF) + 0xDC00UL
);
866 return convertedLength
;
871 switch (*(uint16_t *)langCode
) {
872 case LITHUANIAN_LANG_CODE
:
873 if (theChar
== 0x0307 && (flags
& kCFUniCharCaseMapAfter_i
)) {
875 } else if (ctype
== kCFUniCharToLowercase
) {
876 if (flags
& kCFUniCharCaseMapMoreAbove
) {
878 case 0x0049: // LATIN CAPITAL LETTER I
879 *(convertedChar
++) = 0x0069;
880 *(convertedChar
++) = 0x0307;
883 case 0x004A: // LATIN CAPITAL LETTER J
884 *(convertedChar
++) = 0x006A;
885 *(convertedChar
++) = 0x0307;
888 case 0x012E: // LATIN CAPITAL LETTER I WITH OGONEK
889 *(convertedChar
++) = 0x012F;
890 *(convertedChar
++) = 0x0307;
897 case 0x00CC: // LATIN CAPITAL LETTER I WITH GRAVE
898 *(convertedChar
++) = 0x0069;
899 *(convertedChar
++) = 0x0307;
900 *(convertedChar
++) = 0x0300;
903 case 0x00CD: // LATIN CAPITAL LETTER I WITH ACUTE
904 *(convertedChar
++) = 0x0069;
905 *(convertedChar
++) = 0x0307;
906 *(convertedChar
++) = 0x0301;
909 case 0x0128: // LATIN CAPITAL LETTER I WITH TILDE
910 *(convertedChar
++) = 0x0069;
911 *(convertedChar
++) = 0x0307;
912 *(convertedChar
++) = 0x0303;
920 case TURKISH_LANG_CODE
:
921 case AZERI_LANG_CODE
:
922 if ((theChar
== 0x0049) || (theChar
== 0x0131)) { // LATIN CAPITAL LETTER I & LATIN SMALL LETTER DOTLESS I
923 *convertedChar
= (((ctype
== kCFUniCharToLowercase
) || (ctype
== kCFUniCharCaseFold
)) ? ((kCFUniCharCaseMapMoreAbove
& flags
) ? 0x0069 : 0x0131) : 0x0049);
925 } else if ((theChar
== 0x0069) || (theChar
== 0x0130)) { // LATIN SMALL LETTER I & LATIN CAPITAL LETTER I WITH DOT ABOVE
926 *convertedChar
= (((ctype
== kCFUniCharToLowercase
) || (ctype
== kCFUniCharCaseFold
)) ? 0x0069 : 0x0130);
928 } else if (theChar
== 0x0307 && (kCFUniCharCaseMapAfter_i
& flags
)) { // COMBINING DOT ABOVE AFTER_i
929 if (ctype
== kCFUniCharToLowercase
) {
932 *convertedChar
= 0x0307;
938 case DUTCH_LANG_CODE
:
939 if ((theChar
== 0x004A) || (theChar
== 0x006A)) {
940 *convertedChar
= (((ctype
== kCFUniCharToUppercase
) || (ctype
== kCFUniCharToTitlecase
) || (kCFUniCharCaseMapDutchDigraph
& flags
)) ? 0x004A : 0x006A);
948 #endif // DO_SPECIAL_CASE_MAPPING
950 if (NULL
== __CFUniCharBitmapDataArray
) __CFUniCharLoadBitmapData();
952 data
= __CFUniCharBitmapDataArray
+ __CFUniCharMapExternalSetToInternalIndex(__CFUniCharMapCompatibilitySetID(ctype
+ kCFUniCharHasNonSelfLowercaseCharacterSet
));
954 if (planeNo
< data
->_numPlanes
&& data
->_planes
[planeNo
] && CFUniCharIsMemberOfBitmap(theChar
, data
->_planes
[planeNo
]) && (__CFUniCharCaseMappingTableCounts
|| __CFUniCharLoadCaseMappingTable())) {
955 uint32_t value
= __CFUniCharGetMappedCase((const __CFUniCharCaseMappings
*)__CFUniCharCaseMappingTable
[ctype
], __CFUniCharCaseMappingTableCounts
[ctype
], theChar
);
957 if (!value
&& ctype
== kCFUniCharToTitlecase
) {
958 value
= __CFUniCharGetMappedCase((const __CFUniCharCaseMappings
*)__CFUniCharCaseMappingTable
[kCFUniCharToUppercase
], __CFUniCharCaseMappingTableCounts
[kCFUniCharToUppercase
], theChar
);
959 if (value
) ctype
= kCFUniCharToUppercase
;
963 CFIndex count
= CFUniCharConvertFlagToCount(value
);
966 if (value
& kCFUniCharNonBmpFlag
) {
968 value
= (value
& 0xFFFFFF) - 0x10000;
969 *(convertedChar
++) = (UTF16Char
)(value
>> 10) + 0xD800UL
;
970 *(convertedChar
++) = (UTF16Char
)(value
& 0x3FF) + 0xDC00UL
;
974 *convertedChar
= (UTF16Char
)value
;
977 } else if (count
< maxLength
) {
978 const uint32_t *extraMapping
= __CFUniCharCaseMappingExtraTable
[ctype
] + (value
& 0xFFFFFF);
980 if (value
& kCFUniCharNonBmpFlag
) {
981 CFIndex copiedLen
= 0;
983 while (count
-- > 0) {
984 value
= *(extraMapping
++);
985 if (value
> 0xFFFF) {
986 if (copiedLen
+ 2 >= maxLength
) break;
987 value
= (value
& 0xFFFFFF) - 0x10000;
988 convertedChar
[copiedLen
++] = (UTF16Char
)(value
>> 10) + 0xD800UL
;
989 convertedChar
[copiedLen
++] = (UTF16Char
)(value
& 0x3FF) + 0xDC00UL
;
991 if (copiedLen
+ 1 >= maxLength
) break;
992 convertedChar
[copiedLen
++] = value
;
995 if (!count
) return copiedLen
;
999 for (idx
= 0;idx
< count
;idx
++) *(convertedChar
++) = (UTF16Char
)*(extraMapping
++);
1004 } else if (ctype
== kCFUniCharCaseFold
) {
1005 ctype
= kCFUniCharToLowercase
;
1009 if (theChar
> 0xFFFF) { // non-BMP
1010 theChar
= (theChar
& 0xFFFFFF) - 0x10000;
1011 *(convertedChar
++) = (UTF16Char
)(theChar
>> 10) + 0xD800UL
;
1012 *(convertedChar
++) = (UTF16Char
)(theChar
& 0x3FF) + 0xDC00UL
;
1015 *convertedChar
= theChar
;
1020 CFIndex
CFUniCharMapTo(UniChar theChar
, UniChar
*convertedChar
, CFIndex maxLength
, uint16_t ctype
, uint32_t flags
) {
1021 if (ctype
== kCFUniCharCaseFold
+ 1) { // kCFUniCharDecompose
1022 if (CFUniCharIsDecomposableCharacter(theChar
, false)) {
1023 UTF32Char buffer
[MAX_DECOMPOSED_LENGTH
];
1024 CFIndex usedLength
= CFUniCharDecomposeCharacter(theChar
, buffer
, MAX_DECOMPOSED_LENGTH
);
1027 for (idx
= 0;idx
< usedLength
;idx
++) *(convertedChar
++) = buffer
[idx
];
1030 *convertedChar
= theChar
;
1034 return CFUniCharMapCaseTo(theChar
, convertedChar
, maxLength
, ctype
, flags
, NULL
);
1038 CF_INLINE
bool __CFUniCharIsMoreAbove(UTF16Char
*buffer
, CFIndex length
) {
1039 UTF32Char currentChar
;
1042 while (length
-- > 0) {
1043 currentChar
= *(buffer
)++;
1044 if (CFUniCharIsSurrogateHighCharacter(currentChar
) && (length
> 0) && CFUniCharIsSurrogateLowCharacter(*(buffer
+ 1))) {
1045 currentChar
= CFUniCharGetLongCharacterForSurrogatePair(currentChar
, *(buffer
++));
1048 if (!CFUniCharIsMemberOf(currentChar
, kCFUniCharNonBaseCharacterSet
)) break;
1050 property
= CFUniCharGetCombiningPropertyForCharacter(currentChar
, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty
, (currentChar
>> 16) & 0xFF));
1052 if (property
== 230) return true; // Above priority
1057 CF_INLINE
bool __CFUniCharIsAfter_i(UTF16Char
*buffer
, CFIndex length
) {
1058 UTF32Char currentChar
= 0;
1060 UTF32Char decomposed
[MAX_DECOMPOSED_LENGTH
];
1061 CFIndex decompLength
;
1064 if (length
< 1) return 0;
1067 while (length
-- > 1) {
1068 currentChar
= *(--buffer
);
1069 if (CFUniCharIsSurrogateLowCharacter(currentChar
)) {
1070 if ((length
> 1) && CFUniCharIsSurrogateHighCharacter(*(buffer
- 1))) {
1071 currentChar
= CFUniCharGetLongCharacterForSurrogatePair(*(--buffer
), currentChar
);
1077 if (!CFUniCharIsMemberOf(currentChar
, kCFUniCharNonBaseCharacterSet
)) break;
1079 property
= CFUniCharGetCombiningPropertyForCharacter(currentChar
, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty
, (currentChar
>> 16) & 0xFF));
1081 if (property
== 230) return false; // Above priority
1084 currentChar
= *(--buffer
);
1085 } else if (CFUniCharIsSurrogateLowCharacter(currentChar
) && CFUniCharIsSurrogateHighCharacter(*(--buffer
))) {
1086 currentChar
= CFUniCharGetLongCharacterForSurrogatePair(*buffer
, currentChar
);
1089 decompLength
= CFUniCharDecomposeCharacter(currentChar
, decomposed
, MAX_DECOMPOSED_LENGTH
);
1090 currentChar
= *decomposed
;
1093 for (idx
= 1;idx
< decompLength
;idx
++) {
1094 currentChar
= decomposed
[idx
];
1095 property
= CFUniCharGetCombiningPropertyForCharacter(currentChar
, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty
, (currentChar
>> 16) & 0xFF));
1097 if (property
== 230) return false; // Above priority
1102 CF_PRIVATE
uint32_t CFUniCharGetConditionalCaseMappingFlags(UTF32Char theChar
, UTF16Char
*buffer
, CFIndex currentIndex
, CFIndex length
, uint32_t type
, const uint8_t *langCode
, uint32_t lastFlags
) {
1103 if (theChar
== 0x03A3) { // GREEK CAPITAL LETTER SIGMA
1104 if ((type
== kCFUniCharToLowercase
) && (currentIndex
> 0)) {
1105 UTF16Char
*start
= buffer
;
1106 UTF16Char
*end
= buffer
+ length
;
1107 UTF32Char otherChar
;
1109 // First check if we're after a cased character
1110 buffer
+= (currentIndex
- 1);
1111 while (start
<= buffer
) {
1112 otherChar
= *(buffer
--);
1113 if (CFUniCharIsSurrogateLowCharacter(otherChar
) && (start
<= buffer
) && CFUniCharIsSurrogateHighCharacter(*buffer
)) {
1114 otherChar
= CFUniCharGetLongCharacterForSurrogatePair(*(buffer
--), otherChar
);
1116 if (!CFUniCharIsMemberOf(otherChar
, kCFUniCharCaseIgnorableCharacterSet
)) {
1117 if (!CFUniCharIsMemberOf(otherChar
, kCFUniCharUppercaseLetterCharacterSet
) && !CFUniCharIsMemberOf(otherChar
, kCFUniCharLowercaseLetterCharacterSet
)) return 0; // Uppercase set contains titlecase
1122 // Next check if we're before a cased character
1123 buffer
= start
+ currentIndex
+ 1;
1124 while (buffer
< end
) {
1125 otherChar
= *(buffer
++);
1126 if (CFUniCharIsSurrogateHighCharacter(otherChar
) && (buffer
< end
) && CFUniCharIsSurrogateLowCharacter(*buffer
)) {
1127 otherChar
= CFUniCharGetLongCharacterForSurrogatePair(otherChar
, *(buffer
++));
1129 if (!CFUniCharIsMemberOf(otherChar
, kCFUniCharCaseIgnorableCharacterSet
)) {
1130 if (CFUniCharIsMemberOf(otherChar
, kCFUniCharUppercaseLetterCharacterSet
) || CFUniCharIsMemberOf(otherChar
, kCFUniCharLowercaseLetterCharacterSet
)) return 0; // Uppercase set contains titlecase
1134 return kCFUniCharCaseMapFinalSigma
;
1136 } else if (langCode
) {
1137 if (*((const uint16_t *)langCode
) == LITHUANIAN_LANG_CODE
) {
1138 if ((theChar
== 0x0307) && ((kCFUniCharCaseMapAfter_i
|kCFUniCharCaseMapMoreAbove
) & lastFlags
) == (kCFUniCharCaseMapAfter_i
|kCFUniCharCaseMapMoreAbove
)) {
1139 return (__CFUniCharIsAfter_i(buffer
, currentIndex
) ? kCFUniCharCaseMapAfter_i
: 0);
1140 } else if (type
== kCFUniCharToLowercase
) {
1141 if ((theChar
== 0x0049) || (theChar
== 0x004A) || (theChar
== 0x012E)) {
1143 return (__CFUniCharIsMoreAbove(buffer
+ currentIndex
, length
- currentIndex
) ? kCFUniCharCaseMapMoreAbove
: 0);
1145 } else if ((theChar
== 'i') || (theChar
== 'j')) {
1147 return (__CFUniCharIsMoreAbove(buffer
+ currentIndex
, length
- currentIndex
) ? (kCFUniCharCaseMapAfter_i
|kCFUniCharCaseMapMoreAbove
) : 0);
1149 } else if ((*((const uint16_t *)langCode
) == TURKISH_LANG_CODE
) || (*((const uint16_t *)langCode
) == AZERI_LANG_CODE
)) {
1150 if (type
== kCFUniCharToLowercase
) {
1151 if (theChar
== 0x0307) {
1152 return (kCFUniCharCaseMapMoreAbove
& lastFlags
? kCFUniCharCaseMapAfter_i
: 0);
1153 } else if (theChar
== 0x0049) {
1154 return (((++currentIndex
< length
) && (buffer
[currentIndex
] == 0x0307)) ? kCFUniCharCaseMapMoreAbove
: 0);
1157 } else if (*((const uint16_t *)langCode
) == DUTCH_LANG_CODE
) {
1158 if (kCFUniCharCaseMapDutchDigraph
& lastFlags
) {
1159 return (((theChar
== 0x006A) || (theChar
== 0x004A)) ? kCFUniCharCaseMapDutchDigraph
: 0);
1161 if ((type
== kCFUniCharToTitlecase
) && ((theChar
== 0x0069) || (theChar
== 0x0049))) {
1162 return (((++currentIndex
< length
) && ((buffer
[currentIndex
] == 0x006A) || (buffer
[currentIndex
] == 0x004A))) ? kCFUniCharCaseMapDutchDigraph
: 0);
1167 if (kCFUniCharCaseMapGreekTonos
& lastFlags
) { // still searching for tonos
1168 if (CFUniCharIsMemberOf(theChar
, kCFUniCharNonBaseCharacterSet
)) {
1169 return kCFUniCharCaseMapGreekTonos
;
1172 if (((theChar
>= 0x0370) && (theChar
< 0x0400)) || ((theChar
>= 0x1F00) && (theChar
< 0x2000))) { // Greek/Coptic & Greek extended ranges
1173 if ((type
== kCFUniCharToUppercase
) && (CFUniCharIsMemberOf(theChar
, kCFUniCharLetterCharacterSet
))) return kCFUniCharCaseMapGreekTonos
;
1179 // Unicode property database
1180 static __CFUniCharBitmapData
*__CFUniCharUnicodePropertyTable
= NULL
;
1181 static int __CFUniCharUnicodePropertyTableCount
= 0;
1183 static CFLock_t __CFUniCharPropTableLock
= CFLockInit
;
1185 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX
1186 #if USE_MACHO_SEGMENT
1187 #define PROP_DB_FILE "__properties"
1189 #define PROP_DB_FILE "/CFUniCharPropertyDatabase.data"
1191 #elif DEPLOYMENT_TARGET_WINDOWS
1192 #if USE_MACHO_SEGMENT
1193 #define PROP_DB_FILE "__properties"
1195 #define PROP_DB_FILE L"CFUniCharPropertyDatabase.data"
1198 #error Unknown or unspecified DEPLOYMENT_TARGET
1201 const void *CFUniCharGetUnicodePropertyDataForPlane(uint32_t propertyType
, uint32_t plane
) {
1203 __CFLock(&__CFUniCharPropTableLock
);
1205 if (NULL
== __CFUniCharUnicodePropertyTable
) {
1206 __CFUniCharBitmapData
*table
;
1208 const void *bodyBase
;
1209 const void *planeBase
;
1212 int planeIndex
, planeCount
;
1216 if (!__CFUniCharLoadFile(PROP_DB_FILE
, &bytes
, &fileSize
) || !__CFSimpleFileSizeVerification(bytes
, fileSize
)) {
1217 __CFUnlock(&__CFUniCharPropTableLock
);
1221 #if defined (__cplusplus)
1222 bytes
= (uint8_t*)bytes
+ 4; // Skip Unicode version
1223 headerSize
= CFSwapInt32BigToHost(*((uint32_t *)bytes
)); bytes
= (uint8_t *)bytes
+ sizeof(uint32_t);
1225 bytes
+= 4; // Skip Unicode version
1226 headerSize
= CFSwapInt32BigToHost(*((uint32_t *)bytes
)); bytes
+= sizeof(uint32_t);
1229 headerSize
-= (sizeof(uint32_t) * 2);
1230 bodyBase
= (char *)bytes
+ headerSize
;
1232 count
= headerSize
/ sizeof(uint32_t);
1233 __CFUniCharUnicodePropertyTableCount
= count
;
1235 table
= (__CFUniCharBitmapData
*)CFAllocatorAllocate(kCFAllocatorSystemDefault
, sizeof(__CFUniCharBitmapData
) * count
, 0);
1237 for (idx
= 0;idx
< count
;idx
++) {
1238 planeCount
= *((const uint8_t *)bodyBase
);
1239 planeBase
= (char *)bodyBase
+ planeCount
+ (planeCount
% 4 ? 4 - (planeCount
% 4) : 0);
1240 table
[idx
]._planes
= (const uint8_t **)CFAllocatorAllocate(kCFAllocatorSystemDefault
, sizeof(const void *) * planeCount
, 0);
1242 for (planeIndex
= 0;planeIndex
< planeCount
;planeIndex
++) {
1243 if ((planeSize
= ((const uint8_t *)bodyBase
)[planeIndex
+ 1])) {
1244 table
[idx
]._planes
[planeIndex
] = (const uint8_t *)planeBase
;
1245 #if defined (__cplusplus)
1246 planeBase
= (char*)planeBase
+ (planeSize
* 256);
1248 planeBase
+= (planeSize
* 256);
1251 table
[idx
]._planes
[planeIndex
] = NULL
;
1255 table
[idx
]._numPlanes
= planeCount
;
1256 #if defined (__cplusplus)
1257 bodyBase
= (const uint8_t *)bodyBase
+ (CFSwapInt32BigToHost(*(uint32_t *)bytes
));
1258 ((uint32_t *&)bytes
) ++;
1260 bodyBase
+= (CFSwapInt32BigToHost(*((uint32_t *)bytes
++)));
1264 __CFUniCharUnicodePropertyTable
= table
;
1267 __CFUnlock(&__CFUniCharPropTableLock
);
1269 return (plane
< __CFUniCharUnicodePropertyTable
[propertyType
]._numPlanes
? __CFUniCharUnicodePropertyTable
[propertyType
]._planes
[plane
] : NULL
);
1272 CF_PRIVATE
uint32_t CFUniCharGetNumberOfPlanesForUnicodePropertyData(uint32_t propertyType
) {
1273 (void)CFUniCharGetUnicodePropertyDataForPlane(propertyType
, 0);
1274 return __CFUniCharUnicodePropertyTable
[propertyType
]._numPlanes
;
1277 CF_PRIVATE
uint32_t CFUniCharGetUnicodeProperty(UTF32Char character
, uint32_t propertyType
) {
1278 if (propertyType
== kCFUniCharCombiningProperty
) {
1279 return CFUniCharGetCombiningPropertyForCharacter(character
, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(propertyType
, (character
>> 16) & 0xFF));
1280 } else if (propertyType
== kCFUniCharBidiProperty
) {
1281 return CFUniCharGetBidiPropertyForCharacter(character
, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(propertyType
, (character
>> 16) & 0xFF));
1290 The UTF8 conversion in the following function is derived from ConvertUTF.c
1293 * Copyright 2001 Unicode, Inc.
1297 * This source code is provided as is by Unicode, Inc. No claims are
1298 * made as to fitness for any particular purpose. No warranties of any
1299 * kind are expressed or implied. The recipient agrees to determine
1300 * applicability of information provided. If this file has been
1301 * purchased on magnetic or optical media from Unicode, Inc., the
1302 * sole remedy for any claim will be exchange of defective media
1303 * within 90 days of receipt.
1305 * Limitations on Rights to Redistribute This Code
1307 * Unicode, Inc. hereby grants the right to freely use the information
1308 * supplied in this file in the creation of products supporting the
1309 * Unicode Standard, and to make copies of this file in any form
1310 * for internal or external distribution as long as this notice
1313 #define UNI_REPLACEMENT_CHAR (0x0000FFFDUL)
1315 bool CFUniCharFillDestinationBuffer(const UTF32Char
*src
, CFIndex srcLength
, void **dst
, CFIndex dstLength
, CFIndex
*filledLength
, uint32_t dstFormat
) {
1316 UTF32Char currentChar
;
1317 CFIndex usedLength
= *filledLength
;
1319 if (dstFormat
== kCFUniCharUTF16Format
) {
1320 UTF16Char
*dstBuffer
= (UTF16Char
*)*dst
;
1322 while (srcLength
-- > 0) {
1323 currentChar
= *(src
++);
1325 if (currentChar
> 0xFFFF) { // Non-BMP
1328 if (usedLength
> dstLength
) return false;
1329 currentChar
-= 0x10000;
1330 *(dstBuffer
++) = (UTF16Char
)((currentChar
>> 10) + 0xD800UL
);
1331 *(dstBuffer
++) = (UTF16Char
)((currentChar
& 0x3FF) + 0xDC00UL
);
1336 if (usedLength
> dstLength
) return false;
1337 *(dstBuffer
++) = (UTF16Char
)currentChar
;
1343 } else if (dstFormat
== kCFUniCharUTF8Format
) {
1344 uint8_t *dstBuffer
= (uint8_t *)*dst
;
1345 uint16_t bytesToWrite
= 0;
1346 const UTF32Char byteMask
= 0xBF;
1347 const UTF32Char byteMark
= 0x80;
1348 static const uint8_t firstByteMark
[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
1350 while (srcLength
-- > 0) {
1351 currentChar
= *(src
++);
1353 /* Figure out how many bytes the result will require */
1354 if (currentChar
< (UTF32Char
)0x80) {
1356 } else if (currentChar
< (UTF32Char
)0x800) {
1358 } else if (currentChar
< (UTF32Char
)0x10000) {
1360 } else if (currentChar
< (UTF32Char
)0x200000) {
1364 currentChar
= UNI_REPLACEMENT_CHAR
;
1367 usedLength
+= bytesToWrite
;
1370 if (usedLength
> dstLength
) return false;
1372 dstBuffer
+= bytesToWrite
;
1373 switch (bytesToWrite
) { /* note: everything falls through. */
1374 case 4: *--dstBuffer
= (currentChar
| byteMark
) & byteMask
; currentChar
>>= 6;
1375 case 3: *--dstBuffer
= (currentChar
| byteMark
) & byteMask
; currentChar
>>= 6;
1376 case 2: *--dstBuffer
= (currentChar
| byteMark
) & byteMask
; currentChar
>>= 6;
1377 case 1: *--dstBuffer
= currentChar
| firstByteMark
[bytesToWrite
];
1379 dstBuffer
+= bytesToWrite
;
1385 UTF32Char
*dstBuffer
= (UTF32Char
*)*dst
;
1387 while (srcLength
-- > 0) {
1388 currentChar
= *(src
++);
1392 if (usedLength
> dstLength
) return false;
1393 *(dstBuffer
++) = currentChar
;
1400 *filledLength
= usedLength
;
1405 #if DEPLOYMENT_TARGET_WINDOWS
1406 void __CFUniCharCleanup(void)
1410 // cleanup memory allocated by __CFUniCharLoadBitmapData()
1411 __CFLock(&__CFUniCharBitmapLock
);
1413 if (__CFUniCharBitmapDataArray
!= NULL
) {
1414 for (idx
= 0; idx
< (int)__CFUniCharNumberOfBitmaps
; idx
++) {
1415 CFAllocatorDeallocate(kCFAllocatorSystemDefault
, __CFUniCharBitmapDataArray
[idx
]._planes
);
1416 __CFUniCharBitmapDataArray
[idx
]._planes
= NULL
;
1419 CFAllocatorDeallocate(kCFAllocatorSystemDefault
, __CFUniCharBitmapDataArray
);
1420 __CFUniCharBitmapDataArray
= NULL
;
1421 __CFUniCharNumberOfBitmaps
= 0;
1424 __CFUnlock(&__CFUniCharBitmapLock
);
1426 // cleanup memory allocated by CFUniCharGetMappingData()
1427 __CFLock(&__CFUniCharMappingTableLock
);
1429 if (__CFUniCharMappingTables
!= NULL
) {
1430 CFAllocatorDeallocate(kCFAllocatorSystemDefault
, __CFUniCharMappingTables
);
1431 __CFUniCharMappingTables
= NULL
;
1434 // cleanup memory allocated by __CFUniCharLoadCaseMappingTable()
1435 if (__CFUniCharCaseMappingTableCounts
!= NULL
) {
1436 CFAllocatorDeallocate(kCFAllocatorSystemDefault
, __CFUniCharCaseMappingTableCounts
);
1437 __CFUniCharCaseMappingTableCounts
= NULL
;
1439 __CFUniCharCaseMappingTable
= NULL
;
1440 __CFUniCharCaseMappingExtraTable
= NULL
;
1443 __CFUnlock(&__CFUniCharMappingTableLock
);
1445 // cleanup memory allocated by CFUniCharGetUnicodePropertyDataForPlane()
1446 __CFLock(&__CFUniCharPropTableLock
);
1448 if (__CFUniCharUnicodePropertyTable
!= NULL
) {
1449 for (idx
= 0; idx
< __CFUniCharUnicodePropertyTableCount
; idx
++) {
1450 CFAllocatorDeallocate(kCFAllocatorSystemDefault
, __CFUniCharUnicodePropertyTable
[idx
]._planes
);
1451 __CFUniCharUnicodePropertyTable
[idx
]._planes
= NULL
;
1454 CFAllocatorDeallocate(kCFAllocatorSystemDefault
, __CFUniCharUnicodePropertyTable
);
1455 __CFUniCharUnicodePropertyTable
= NULL
;
1456 __CFUniCharUnicodePropertyTableCount
= 0;
1459 __CFUnlock(&__CFUniCharPropTableLock
);
1463 #undef USE_MACHO_SEGMENT