2 * Copyright (c) 2005 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
24 Copyright 2001-2002, Apple, Inc. All rights reserved.
25 Responsibility: Aki Inoue
28 #include <CoreFoundation/CFByteOrder.h>
29 #include "CFInternal.h"
30 #include "CFUniChar.h"
31 #include "CFStringEncodingConverterExt.h"
32 #include "CFUnicodeDecomposition.h"
33 #include "CFUniCharPriv.h"
34 #if defined(__MACOS8__)
36 #elif defined(__WIN32__)
41 #elif defined(__MACH__) || defined(__LINUX__) || defined(__FREEBSD__)
43 #include <mach/mach.h>
46 #include <sys/types.h>
48 #include <sys/param.h>
54 #if defined(__MACOS8__)
55 #define MAXPATHLEN FILENAME_MAX
57 #define MAXPATHLEN MAX_PATH
60 // Memory map the file
61 #if !defined(__MACOS8__)
63 CF_INLINE
void __CFUniCharCharacterSetPath(char *cpath
) {
65 strlcpy(cpath
, __kCFCharacterSetDir
, MAXPATHLEN
);
66 #elif defined(__WIN32__)
67 strlcpy(cpath
, _CFDLLPath(), MAXPATHLEN
);
69 strlcpy(cpath
, __kCFCharacterSetDir
, MAXPATHLEN
);
72 #if defined(__WIN32__)
73 strlcat(cpath
, "\\CharacterSets\\", MAXPATHLEN
);
75 strlcat(cpath
, "/CharacterSets/", MAXPATHLEN
);
79 static bool __CFUniCharLoadBytesFromFile(const char *fileName
, const void **bytes
) {
80 #if defined(__WIN32__)
81 HANDLE bitmapFileHandle
;
84 if ((bitmapFileHandle
= CreateFile(fileName
, GENERIC_READ
, FILE_SHARE_READ
, NULL
, OPEN_EXISTING
, FILE_ATTRIBUTE_NORMAL
, NULL
)) == INVALID_HANDLE_VALUE
) return false;
85 mappingHandle
= CreateFileMapping(bitmapFileHandle
, NULL
, PAGE_READONLY
, 0, 0, NULL
);
86 CloseHandle(bitmapFileHandle
);
87 if (!mappingHandle
) return false;
89 *bytes
= MapViewOfFileEx(mappingHandle
, FILE_MAP_READ
, 0, 0, 0, NULL
);
90 CloseHandle(mappingHandle
);
92 return (*bytes
? true : false);
97 if ((fd
= open(fileName
, O_RDONLY
, 0)) < 0) return false;
99 if (fstat(fd
, &statBuf
) < 0 || (*bytes
= mmap(0, statBuf
.st_size
, PROT_READ
, MAP_PRIVATE
, fd
, 0)) == (void *)-1) {
109 static bool __CFUniCharLoadFile(const char *bitmapName
, const void **bytes
) {
110 char cpath
[MAXPATHLEN
];
112 __CFUniCharCharacterSetPath(cpath
);
113 strlcat(cpath
, bitmapName
, MAXPATHLEN
);
115 return __CFUniCharLoadBytesFromFile(cpath
, bytes
);
117 #endif !defined(__MACOS8__)
120 CF_INLINE
bool isControl(UTF32Char theChar
, uint16_t charset
, const void *data
) { // ISO Control
121 if ((theChar
<= 0x001F) || (theChar
>= 0x007F && theChar
<= 0x009F)) return true;
125 CF_INLINE
bool isWhitespace(UTF32Char theChar
, uint16_t charset
, const void *data
) { // Space
126 if ((theChar
== 0x0020) || (theChar
== 0x0009) || (theChar
== 0x00A0) || (theChar
== 0x1680) || (theChar
>= 0x2000 && theChar
<= 0x200B) || (theChar
== 0x202F) || (theChar
== 0x205F) || (theChar
== 0x3000)) return true;
130 CF_INLINE
bool isWhitespaceAndNewLine(UTF32Char theChar
, uint16_t charset
, const void *data
) { // White space
131 if (isWhitespace(theChar
, charset
, data
) || (theChar
>= 0x000A && theChar
<= 0x000D) || (theChar
== 0x0085) || (theChar
== 0x2028) || (theChar
== 0x2029)) return true;
135 #if defined(__MACOS8__)
136 /* This structure MUST match the sets in NSRulebook.h The "__CFCSetIsMemberSet()" function is a modified version of the one in Text shlib.
138 typedef struct _CFCharSetPrivateStruct
{
139 int issorted
; /* 1=sorted or 0=unsorted ; 2=is_property_table */
140 int bitrange
[4]; /* bitmap (each bit is a 1k range in space of 2^17) */
141 int nsingles
; /* number of single elements */
142 int nranges
; /* number of ranges */
143 int singmin
; /* minimum single element */
144 int singmax
; /* maximum single element */
145 int array
[1]; /* actually bunch of singles followed by ranges */
146 } CFCharSetPrivateStruct
;
148 /* Membership function for complex sets
150 CF_INLINE
bool __CFCSetIsMemberSet(const CFCharSetPrivateStruct
*set
, UTF16Char theChar
) {
155 if (set
->issorted
!= 1) {
158 theChar
&= 0x0001FFFF; /* range 1-131k */
159 if (__CFCSetBitsInRange(theChar
, set
->bitrange
)) {
160 if (theChar
>= set
->singmin
&& theChar
<= set
->singmax
) {
161 tmp
= (int *) &(set
->array
[0]);
162 if ((nel
= set
->nsingles
) < __kCFSetBreakeven
) {
163 for (i
= 0; i
< nel
; i
++) {
164 if (*tmp
== theChar
) return true;
168 else { // this does a binary search
169 p
= tmp
; q
= tmp
+ (nel
-1);
171 wari
= (p
+ ((q
-p
)>>1));
172 if (theChar
< *wari
) q
= wari
- 1;
173 else if (theChar
> *wari
) p
= wari
+ 1;
178 tmp
= (int *) &(set
->array
[0]) + set
->nsingles
;
179 if ((nel
= set
->nranges
) < __kCFSetBreakeven
) {
183 if (theChar
<= *tmp2
) {
184 if (theChar
>= *tmp
) return true;
190 } else { /* binary search the ranges */
191 p
= tmp
; q
= tmp
+ (2*nel
-2);
193 i
= (q
- p
) >> 1; /* >>1 means divide by 2 */
194 wari
= p
+ (i
& 0xFFFFFFFE); /* &fffffffe make it an even num */
195 if (theChar
< *wari
) q
= wari
- 2;
196 else if (theChar
> *(wari
+ 1)) p
= wari
+ 2;
201 /* fall through & return zero */
203 return false; /* not a member */
206 /* Take a private "set" structure and make a bitmap from it. Return the bitmap. THE CALLER MUST RELEASE THE RETURNED MEMORY as necessary.
209 CF_INLINE
void __CFCSetBitmapProcessManyCharacters(unsigned char *map
, unsigned n
, unsigned m
) {
211 for (tmp
= n
; tmp
<= m
; tmp
++) CFUniCharAddCharacterToBitmap(tmp
, map
);
214 CF_INLINE
void __CFCSetMakeSetBitmapFromSet(const CFCharSetPrivateStruct
*theSet
, uint8_t *map
)
220 for (cnt
= 0; cnt
< theSet
->nsingles
; cnt
++) {
221 ctmp
= theSet
->array
[cnt
];
222 CFUniCharAddCharacterToBitmap(tmp
, map
);
224 ip
= (int *) (&(theSet
->array
[0]) + theSet
->nsingles
);
225 cnt
= theSet
->nranges
;
227 /* This could be more efficient: turn on whole bytes at a time
228 when there are such cases as 8 characters in a row... */
229 __CFCSetBitmapProcessManyCharacters((unsigned char *)map
, *ip
, *(ip
+1));
235 extern const CFCharSetPrivateStruct
*_CFdecimalDigitCharacterSetData
;
236 extern const CFCharSetPrivateStruct
*_CFletterCharacterSetData
;
237 extern const CFCharSetPrivateStruct
*_CFlowercaseLetterCharacterSetData
;
238 extern const CFCharSetPrivateStruct
*_CFuppercaseLetterCharacterSetData
;
239 extern const CFCharSetPrivateStruct
*_CFnonBaseCharacterSetData
;
240 extern const CFCharSetPrivateStruct
*_CFdecomposableCharacterSetData
;
241 extern const CFCharSetPrivateStruct
*_CFpunctuationCharacterSetData
;
242 extern const CFCharSetPrivateStruct
*_CFalphanumericCharacterSetData
;
243 extern const CFCharSetPrivateStruct
*_CFillegalCharacterSetData
;
244 extern const CFCharSetPrivateStruct
*_CFhasNonSelfLowercaseMappingData
;
245 extern const CFCharSetPrivateStruct
*_CFhasNonSelfUppercaseMappingData
;
246 extern const CFCharSetPrivateStruct
*_CFhasNonSelfTitlecaseMappingData
;
251 const uint8_t **_planes
;
252 } __CFUniCharBitmapData
;
254 static char __CFUniCharUnicodeVersionString
[8] = {0, 0, 0, 0, 0, 0, 0, 0};
256 static uint32_t __CFUniCharNumberOfBitmaps
= 0;
257 static __CFUniCharBitmapData
*__CFUniCharBitmapDataArray
= NULL
;
259 static CFSpinLock_t __CFUniCharBitmapLock
= 0;
261 #ifndef CF_UNICHAR_BITMAP_FILE
262 #define CF_UNICHAR_BITMAP_FILE "CFCharacterSetBitmaps.bitmap"
263 #endif CF_UNICHAR_BITMAP_FILE
265 static bool __CFUniCharLoadBitmapData(void) {
269 uint8_t currentPlane
;
271 const void *bitmapBase
;
273 int idx
, bitmapIndex
;
275 __CFSpinLock(&__CFUniCharBitmapLock
);
277 if (__CFUniCharBitmapDataArray
|| !__CFUniCharLoadFile(CF_UNICHAR_BITMAP_FILE
, &bytes
)) {
278 __CFSpinUnlock(&__CFUniCharBitmapLock
);
282 for (idx
= 0;idx
< 4 && ((const uint8_t *)bytes
)[idx
];idx
++) {
283 __CFUniCharUnicodeVersionString
[idx
* 2] = ((const uint8_t *)bytes
)[idx
];
284 __CFUniCharUnicodeVersionString
[idx
* 2 + 1] = '.';
286 __CFUniCharUnicodeVersionString
[(idx
< 4 ? idx
* 2 - 1 : 7)] = '\0';
288 headerSize
= CFSwapInt32BigToHost(*((uint32_t *)((char *)bytes
+ 4)));
290 bitmapBase
= (char *)bytes
+ headerSize
;
291 (char *)bytes
+= (sizeof(uint32_t) * 2);
292 headerSize
-= (sizeof(uint32_t) * 2);
294 __CFUniCharNumberOfBitmaps
= headerSize
/ (sizeof(uint32_t) * 2);
296 __CFUniCharBitmapDataArray
= (__CFUniCharBitmapData
*)CFAllocatorAllocate(NULL
, sizeof(__CFUniCharBitmapData
) * __CFUniCharNumberOfBitmaps
, 0);
298 for (idx
= 0;idx
< (int)__CFUniCharNumberOfBitmaps
;idx
++) {
299 bitmap
= (char *)bitmapBase
+ CFSwapInt32BigToHost(*(((uint32_t *)bytes
)++));
300 bitmapSize
= CFSwapInt32BigToHost(*(((uint32_t *)bytes
)++));
302 numPlanes
= bitmapSize
/ (8 * 1024);
303 numPlanes
= *(const uint8_t *)((char *)bitmap
+ (((numPlanes
- 1) * ((8 * 1024) + 1)) - 1)) + 1;
304 __CFUniCharBitmapDataArray
[idx
]._planes
= (const uint8_t **)CFAllocatorAllocate(NULL
, sizeof(const void *) * numPlanes
, 0);
305 __CFUniCharBitmapDataArray
[idx
]._numPlanes
= numPlanes
;
308 for (bitmapIndex
= 0;bitmapIndex
< numPlanes
;bitmapIndex
++) {
309 if (bitmapIndex
== currentPlane
) {
310 __CFUniCharBitmapDataArray
[idx
]._planes
[bitmapIndex
] = bitmap
;
311 (char *)bitmap
+= (8 * 1024);
312 currentPlane
= *(((const uint8_t *)bitmap
)++);
314 __CFUniCharBitmapDataArray
[idx
]._planes
[bitmapIndex
] = NULL
;
319 __CFSpinUnlock(&__CFUniCharBitmapLock
);
324 __private_extern__
const char *__CFUniCharGetUnicodeVersionString(void) {
325 if (NULL
== __CFUniCharBitmapDataArray
) __CFUniCharLoadBitmapData();
326 return __CFUniCharUnicodeVersionString
;
331 #define CONTROLSET_HAS_FORMATTER 1
333 bool CFUniCharIsMemberOf(UTF32Char theChar
, uint32_t charset
) {
334 #if CONTROLSET_HAS_FORMATTER
335 if (charset
== kCFUniCharControlCharacterSet
) charset
= kCFUniCharControlAndFormatterCharacterSet
;
336 #endif CONTROLSET_HAS_FORMATTER
339 case kCFUniCharControlCharacterSet
:
340 return isControl(theChar
, charset
, NULL
);
342 case kCFUniCharWhitespaceCharacterSet
:
343 return isWhitespace(theChar
, charset
, NULL
);
345 case kCFUniCharWhitespaceAndNewlineCharacterSet
:
346 return isWhitespaceAndNewLine(theChar
, charset
, NULL
);
348 #if defined(__MACOS8__)
349 case kCFUniCharDecimalDigitCharacterSet
:
350 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct
*)&_CFdecimalDigitCharacterSetData
, theChar
);
351 case kCFUniCharLetterCharacterSet
:
352 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct
*)&_CFletterCharacterSetData
, theChar
);
353 case kCFUniCharLowercaseLetterCharacterSet
:
354 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct
*)&_CFlowercaseLetterCharacterSetData
, theChar
);
355 case kCFUniCharUppercaseLetterCharacterSet
:
356 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct
*)&_CFuppercaseLetterCharacterSetData
, theChar
);
357 case kCFUniCharNonBaseCharacterSet
:
358 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct
*)&_CFnonBaseCharacterSetData
, theChar
);
359 case kCFUniCharAlphaNumericCharacterSet
:
360 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct
*)&_CFalphanumericCharacterSetData
, theChar
);
361 case kCFUniCharDecomposableCharacterSet
:
362 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct
*)&_CFdecomposableCharacterSetData
, theChar
);
363 case kCFUniCharPunctuationCharacterSet
:
364 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct
*)&_CFpunctuationCharacterSetData
, theChar
);
365 case kCFUniCharIllegalCharacterSet
:
366 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct
*)&_CFillegalCharacterSetData
, theChar
);
367 case kCFUniCharHasNonSelfLowercaseMapping
:
368 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct
*)&_CFhasNonSelfLowercaseMappingData
, theChar
);
369 case kCFUniCharHasNonSelfUppercaseMapping
:
370 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct
*)&_CFhasNonSelfUppercaseMappingData
, theChar
);
371 case kCFUniCharHasNonSelfTitlecaseMapping
:
372 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct
*)&_CFhasNonSelfTitlecaseMappingData
, theChar
);
377 if (NULL
== __CFUniCharBitmapDataArray
) __CFUniCharLoadBitmapData();
379 if ((charset
- kCFUniCharDecimalDigitCharacterSet
) < __CFUniCharNumberOfBitmaps
) {
380 __CFUniCharBitmapData
*data
= __CFUniCharBitmapDataArray
+ (charset
- kCFUniCharDecimalDigitCharacterSet
);
381 uint8_t planeNo
= (theChar
>> 16) & 0xFF;
383 // The bitmap data for kCFUniCharIllegalCharacterSet is actually LEGAL set less Plane 14 ~ 16
384 if (charset
== kCFUniCharIllegalCharacterSet
) {
385 if (planeNo
== 0x0E) { // Plane 14
387 return (((theChar
== 0x01) || ((theChar
> 0x1F) && (theChar
< 0x80))) ? false : true);
388 } else if (planeNo
== 0x0F || planeNo
== 0x10) { // Plane 15 & 16
389 return ((theChar
& 0xFF) > 0xFFFD ? true : false);
391 return (planeNo
< data
->_numPlanes
&& data
->_planes
[planeNo
] ? !CFUniCharIsMemberOfBitmap(theChar
, data
->_planes
[planeNo
]) : true);
393 } else if (charset
== kCFUniCharControlAndFormatterCharacterSet
) {
394 if (planeNo
== 0x0E) { // Plane 14
396 return (((theChar
== 0x01) || ((theChar
> 0x1F) && (theChar
< 0x80))) ? true : false);
398 return (planeNo
< data
->_numPlanes
&& data
->_planes
[planeNo
] ? CFUniCharIsMemberOfBitmap(theChar
, data
->_planes
[planeNo
]) : false);
401 return (planeNo
< data
->_numPlanes
&& data
->_planes
[planeNo
] ? CFUniCharIsMemberOfBitmap(theChar
, data
->_planes
[planeNo
]) : false);
409 const uint8_t *CFUniCharGetBitmapPtrForPlane(uint32_t charset
, uint32_t plane
) {
410 if (NULL
== __CFUniCharBitmapDataArray
) __CFUniCharLoadBitmapData();
412 #if CONTROLSET_HAS_FORMATTER
413 if (charset
== kCFUniCharControlCharacterSet
) charset
= kCFUniCharControlAndFormatterCharacterSet
;
414 #endif CONTROLSET_HAS_FORMATTER
416 if (charset
> kCFUniCharWhitespaceAndNewlineCharacterSet
&& (charset
- kCFUniCharDecimalDigitCharacterSet
) < __CFUniCharNumberOfBitmaps
&& charset
!= kCFUniCharIllegalCharacterSet
) {
417 __CFUniCharBitmapData
*data
= __CFUniCharBitmapDataArray
+ (charset
- kCFUniCharDecimalDigitCharacterSet
);
419 return (plane
< data
->_numPlanes
? data
->_planes
[plane
] : NULL
);
424 __private_extern__
uint8_t CFUniCharGetBitmapForPlane(uint32_t charset
, uint32_t plane
, void *bitmap
, bool isInverted
) {
425 const uint8_t *src
= CFUniCharGetBitmapPtrForPlane(charset
, plane
);
426 int numBytes
= (8 * 1024);
430 while (numBytes
-- > 0) *(((uint8_t *)bitmap
)++) = ~(*(src
++));
432 while (numBytes
-- > 0) *(((uint8_t *)bitmap
)++) = *(src
++);
434 return kCFUniCharBitmapFilled
;
435 } else if (charset
== kCFUniCharIllegalCharacterSet
) {
436 __CFUniCharBitmapData
*data
= __CFUniCharBitmapDataArray
+ (charset
- kCFUniCharDecimalDigitCharacterSet
);
438 if (plane
< data
->_numPlanes
&& (src
= data
->_planes
[plane
])) {
440 while (numBytes
-- > 0) *(((uint8_t *)bitmap
)++) = *(src
++);
442 while (numBytes
-- > 0) *(((uint8_t *)bitmap
)++) = ~(*(src
++));
444 return kCFUniCharBitmapFilled
;
445 } else if (plane
== 0x0E) { // Plane 14
447 uint8_t asciiRange
= (isInverted
? (uint8_t)0xFF : (uint8_t)0);
448 uint8_t otherRange
= (isInverted
? (uint8_t)0 : (uint8_t)0xFF);
450 *(((uint8_t *)bitmap
)++) = 0x02; // UE0001 LANGUAGE TAG
451 for (idx
= 1;idx
< numBytes
;idx
++) {
452 *(((uint8_t *)bitmap
)++) = ((idx
>= (0x20 / 8) && (idx
< (0x80 / 8))) ? asciiRange
: otherRange
);
454 return kCFUniCharBitmapFilled
;
455 } else if (plane
== 0x0F || plane
== 0x10) { // Plane 15 & 16
456 uint32_t value
= (isInverted
? 0xFFFFFFFF : 0);
457 numBytes
/= 4; // for 32bit
459 while (numBytes
-- > 0) *(((uint32_t *)bitmap
)++) = value
;
460 *(((uint8_t *)bitmap
) - 5) = (isInverted
? 0x3F : 0xC0); // 0xFFFE & 0xFFFF
461 return kCFUniCharBitmapFilled
;
463 return (isInverted
? kCFUniCharBitmapEmpty
: kCFUniCharBitmapAll
);
464 #if CONTROLSET_HAS_FORMATTER
465 } else if ((charset
== kCFUniCharControlCharacterSet
) && (plane
== 0x0E)) { // Language tags
467 uint8_t asciiRange
= (isInverted
? (uint8_t)0 : (uint8_t)0xFF);
468 uint8_t otherRange
= (isInverted
? (uint8_t)0xFF : (uint8_t)0);
470 *(((uint8_t *)bitmap
)++) = 0x02; // UE0001 LANGUAGE TAG
471 for (idx
= 1;idx
< numBytes
;idx
++) {
472 *(((uint8_t *)bitmap
)++) = ((idx
>= (0x20 / 8) && (idx
< (0x80 / 8))) ? asciiRange
: otherRange
);
474 return kCFUniCharBitmapFilled
;
475 #endif CONTROLSET_HAS_FORMATTER
476 } else if (charset
< kCFUniCharDecimalDigitCharacterSet
) {
477 if (plane
) return (isInverted
? kCFUniCharBitmapAll
: kCFUniCharBitmapEmpty
);
479 if (charset
== kCFUniCharControlCharacterSet
) {
481 uint8_t nonFillValue
= (isInverted
? (uint8_t)0xFF : (uint8_t)0);
482 uint8_t fillValue
= (isInverted
? (uint8_t)0 : (uint8_t)0xFF);
483 uint8_t *bitmapP
= (uint8_t *)bitmap
;
485 for (idx
= 0;idx
< numBytes
;idx
++) {
486 *(bitmapP
++) = (idx
< (0x20 / 8) || (idx
>= (0x80 / 8) && idx
< (0xA0 / 8)) ? fillValue
: nonFillValue
);
491 CFUniCharRemoveCharacterFromBitmap(0x007F, bitmap
);
493 CFUniCharAddCharacterToBitmap(0x007F, bitmap
);
496 uint8_t *bitmapBase
= (uint8_t *)bitmap
;
498 uint8_t nonFillValue
= (isInverted
? (uint8_t)0xFF : (uint8_t)0);
500 while (numBytes
-- > 0) *(((uint8_t *)bitmap
)++) = nonFillValue
;
502 if (charset
== kCFUniCharWhitespaceAndNewlineCharacterSet
) {
503 static const UniChar newlines
[] = {0x000A, 0x000B, 0x000C, 0x000D, 0x0085, 0x2028, 0x2029};
505 for (idx
= 0;idx
< (int)(sizeof(newlines
) / sizeof(*newlines
)); idx
++) {
507 CFUniCharRemoveCharacterFromBitmap(newlines
[idx
], bitmapBase
);
509 CFUniCharAddCharacterToBitmap(newlines
[idx
], bitmapBase
);
515 CFUniCharRemoveCharacterFromBitmap(0x0009, bitmapBase
);
516 CFUniCharRemoveCharacterFromBitmap(0x0020, bitmapBase
);
517 CFUniCharRemoveCharacterFromBitmap(0x00A0, bitmapBase
);
518 CFUniCharRemoveCharacterFromBitmap(0x1680, bitmapBase
);
519 CFUniCharRemoveCharacterFromBitmap(0x202F, bitmapBase
);
520 CFUniCharRemoveCharacterFromBitmap(0x205F, bitmapBase
);
521 CFUniCharRemoveCharacterFromBitmap(0x3000, bitmapBase
);
523 CFUniCharAddCharacterToBitmap(0x0009, bitmapBase
);
524 CFUniCharAddCharacterToBitmap(0x0020, bitmapBase
);
525 CFUniCharAddCharacterToBitmap(0x00A0, bitmapBase
);
526 CFUniCharAddCharacterToBitmap(0x1680, bitmapBase
);
527 CFUniCharAddCharacterToBitmap(0x202F, bitmapBase
);
528 CFUniCharAddCharacterToBitmap(0x205F, bitmapBase
);
529 CFUniCharAddCharacterToBitmap(0x3000, bitmapBase
);
532 for (idx
= 0x2000;idx
<= 0x200B;idx
++) {
534 CFUniCharRemoveCharacterFromBitmap(idx
, bitmapBase
);
536 CFUniCharAddCharacterToBitmap(idx
, bitmapBase
);
540 return kCFUniCharBitmapFilled
;
542 return (isInverted
? kCFUniCharBitmapAll
: kCFUniCharBitmapEmpty
);
545 __private_extern__
uint32_t CFUniCharGetNumberOfPlanes(uint32_t charset
) {
546 #if defined(__MACOS8__)
549 #if CONTROLSET_HAS_FORMATTER
550 if (charset
== kCFUniCharControlCharacterSet
) return 15; // 0 to 14
551 #endif CONTROLSET_HAS_FORMATTER
553 if (charset
< kCFUniCharDecimalDigitCharacterSet
) {
555 } else if (charset
== kCFUniCharIllegalCharacterSet
) {
560 if (NULL
== __CFUniCharBitmapDataArray
) __CFUniCharLoadBitmapData();
562 numPlanes
= __CFUniCharBitmapDataArray
[charset
- kCFUniCharDecimalDigitCharacterSet
]._numPlanes
;
569 // Mapping data loading
570 static const void **__CFUniCharMappingTables
= NULL
;
572 static CFSpinLock_t __CFUniCharMappingTableLock
= 0;
574 #if defined(__BIG_ENDIAN__)
575 #define MAPPING_TABLE_FILE "CFUnicodeData-B.mapping"
577 #define MAPPING_TABLE_FILE "CFUnicodeData-L.mapping"
578 #endif __BIG_ENDIAN__
580 __private_extern__
const void *CFUniCharGetMappingData(uint32_t type
) {
582 __CFSpinLock(&__CFUniCharMappingTableLock
);
584 if (NULL
== __CFUniCharMappingTables
) {
586 const void *bodyBase
;
590 if (!__CFUniCharLoadFile(MAPPING_TABLE_FILE
, &bytes
)) {
591 __CFSpinUnlock(&__CFUniCharMappingTableLock
);
595 (char *)bytes
+= 4; // Skip Unicode version
596 headerSize
= *(((uint32_t *)bytes
)++);
597 headerSize
-= (sizeof(uint32_t) * 2);
598 bodyBase
= (char *)bytes
+ headerSize
;
600 count
= headerSize
/ sizeof(uint32_t);
602 __CFUniCharMappingTables
= (const void **)CFAllocatorAllocate(NULL
, sizeof(const void *) * count
, 0);
604 for (idx
= 0;idx
< count
;idx
++) {
605 __CFUniCharMappingTables
[idx
] = (char *)bodyBase
+ *(((uint32_t *)bytes
)++);
609 __CFSpinUnlock(&__CFUniCharMappingTableLock
);
611 return __CFUniCharMappingTables
[type
];
614 // Case mapping functions
615 #define DO_SPECIAL_CASE_MAPPING 1
617 static uint32_t *__CFUniCharCaseMappingTableCounts
= NULL
;
618 static uint32_t **__CFUniCharCaseMappingTable
= NULL
;
619 static const uint32_t **__CFUniCharCaseMappingExtraTable
= NULL
;
624 } __CFUniCharCaseMappings
;
626 /* Binary searches CFStringEncodingUnicodeTo8BitCharMap */
627 static uint32_t __CFUniCharGetMappedCase(const __CFUniCharCaseMappings
*theTable
, uint32_t numElem
, UTF32Char character
) {
628 const __CFUniCharCaseMappings
*p
, *q
, *divider
;
630 if ((character
< theTable
[0]._key
) || (character
> theTable
[numElem
-1]._key
)) {
636 divider
= p
+ ((q
- p
) >> 1); /* divide by 2 */
637 if (character
< divider
->_key
) { q
= divider
- 1; }
638 else if (character
> divider
->_key
) { p
= divider
+ 1; }
639 else { return divider
->_value
; }
644 #define NUM_CASE_MAP_DATA (kCFUniCharCaseFold + 1)
646 static bool __CFUniCharLoadCaseMappingTable(void) {
649 if (NULL
== __CFUniCharMappingTables
) (void)CFUniCharGetMappingData(kCFUniCharToLowercase
);
650 if (NULL
== __CFUniCharMappingTables
) return false;
652 __CFSpinLock(&__CFUniCharMappingTableLock
);
654 if (__CFUniCharCaseMappingTableCounts
) {
655 __CFSpinUnlock(&__CFUniCharMappingTableLock
);
659 __CFUniCharCaseMappingTableCounts
= (uint32_t *)CFAllocatorAllocate(NULL
, sizeof(uint32_t) * NUM_CASE_MAP_DATA
+ sizeof(uint32_t *) * NUM_CASE_MAP_DATA
* 2, 0);
660 __CFUniCharCaseMappingTable
= (uint32_t **)((char *)__CFUniCharCaseMappingTableCounts
+ sizeof(uint32_t) * NUM_CASE_MAP_DATA
);
661 __CFUniCharCaseMappingExtraTable
= (const uint32_t **)__CFUniCharCaseMappingTable
+ NUM_CASE_MAP_DATA
;
663 for (idx
= 0;idx
< NUM_CASE_MAP_DATA
;idx
++) {
664 __CFUniCharCaseMappingTableCounts
[idx
] = *((uint32_t *)__CFUniCharMappingTables
[idx
]) / (sizeof(uint32_t) * 2);
665 __CFUniCharCaseMappingTable
[idx
] = ((uint32_t *)__CFUniCharMappingTables
[idx
]) + 1;
666 __CFUniCharCaseMappingExtraTable
[idx
] = (const uint32_t *)((char *)__CFUniCharCaseMappingTable
[idx
] + *((uint32_t *)__CFUniCharMappingTables
[idx
]));
669 __CFSpinUnlock(&__CFUniCharMappingTableLock
);
674 #define TURKISH_LANG_CODE (0x7472) // tr
675 #define LITHUANIAN_LANG_CODE (0x6C74) // lt
676 #define AZERI_LANG_CODE (0x617A) // az
678 #define TURKISH_LANG_CODE (0x7274) // tr
679 #define LITHUANIAN_LANG_CODE (0x746C) // lt
680 #define AZERI_LANG_CODE (0x7A61) // az
681 #endif __BIG_ENDIAN__
683 uint32_t CFUniCharMapCaseTo(UTF32Char theChar
, UTF16Char
*convertedChar
, uint32_t maxLength
, uint32_t ctype
, uint32_t flags
, const uint8_t *langCode
) {
684 __CFUniCharBitmapData
*data
;
685 uint8_t planeNo
= (theChar
>> 16) & 0xFF;
689 #if DO_SPECIAL_CASE_MAPPING
690 if (flags
& kCFUniCharCaseMapFinalSigma
) {
691 if (theChar
== 0x03A3) { // Final sigma
692 *convertedChar
= (ctype
== kCFUniCharToLowercase
? 0x03C2 : 0x03A3);
698 switch (*(uint16_t *)langCode
) {
699 case LITHUANIAN_LANG_CODE
:
700 if (theChar
== 0x0307 && (flags
& kCFUniCharCaseMapAfter_i
)) {
702 } else if (ctype
== kCFUniCharToLowercase
) {
703 if (flags
& kCFUniCharCaseMapMoreAbove
) {
705 case 0x0049: // LATIN CAPITAL LETTER I
706 *(convertedChar
++) = 0x0069;
707 *(convertedChar
++) = 0x0307;
710 case 0x004A: // LATIN CAPITAL LETTER J
711 *(convertedChar
++) = 0x006A;
712 *(convertedChar
++) = 0x0307;
715 case 0x012E: // LATIN CAPITAL LETTER I WITH OGONEK
716 *(convertedChar
++) = 0x012F;
717 *(convertedChar
++) = 0x0307;
724 case 0x00CC: // LATIN CAPITAL LETTER I WITH GRAVE
725 *(convertedChar
++) = 0x0069;
726 *(convertedChar
++) = 0x0307;
727 *(convertedChar
++) = 0x0300;
730 case 0x00CD: // LATIN CAPITAL LETTER I WITH ACUTE
731 *(convertedChar
++) = 0x0069;
732 *(convertedChar
++) = 0x0307;
733 *(convertedChar
++) = 0x0301;
736 case 0x0128: // LATIN CAPITAL LETTER I WITH TILDE
737 *(convertedChar
++) = 0x0069;
738 *(convertedChar
++) = 0x0307;
739 *(convertedChar
++) = 0x0303;
747 case TURKISH_LANG_CODE
:
748 case AZERI_LANG_CODE
:
749 if ((theChar
== 0x0049) || (theChar
== 0x0131)) { // LATIN CAPITAL LETTER I & LATIN SMALL LETTER DOTLESS I
750 *convertedChar
= (((ctype
== kCFUniCharToLowercase
) || (ctype
== kCFUniCharCaseFold
)) ? ((kCFUniCharCaseMapMoreAbove
& flags
) ? 0x0069 : 0x0131) : 0x0049);
752 } else if ((theChar
== 0x0069) || (theChar
== 0x0130)) { // LATIN SMALL LETTER I & LATIN CAPITAL LETTER I WITH DOT ABOVE
753 *convertedChar
= (((ctype
== kCFUniCharToLowercase
) || (ctype
== kCFUniCharCaseFold
)) ? 0x0069 : 0x0130);
755 } else if (theChar
== 0x0307 && (kCFUniCharCaseMapAfter_i
& flags
)) { // COMBINING DOT ABOVE AFTER_i
756 if (ctype
== kCFUniCharToLowercase
) {
759 *convertedChar
= 0x0307;
768 #endif DO_SPECIAL_CASE_MAPPING
770 if (NULL
== __CFUniCharBitmapDataArray
) __CFUniCharLoadBitmapData();
772 data
= __CFUniCharBitmapDataArray
+ ((ctype
+ kCFUniCharHasNonSelfLowercaseCharacterSet
) - kCFUniCharDecimalDigitCharacterSet
);
774 if (planeNo
< data
->_numPlanes
&& data
->_planes
[planeNo
] && CFUniCharIsMemberOfBitmap(theChar
, data
->_planes
[planeNo
]) && (__CFUniCharCaseMappingTableCounts
|| __CFUniCharLoadCaseMappingTable())) {
775 uint32_t value
= __CFUniCharGetMappedCase((const __CFUniCharCaseMappings
*)__CFUniCharCaseMappingTable
[ctype
], __CFUniCharCaseMappingTableCounts
[ctype
], theChar
);
777 if (!value
&& ctype
== kCFUniCharToTitlecase
) {
778 value
= __CFUniCharGetMappedCase((const __CFUniCharCaseMappings
*)__CFUniCharCaseMappingTable
[kCFUniCharToUppercase
], __CFUniCharCaseMappingTableCounts
[kCFUniCharToUppercase
], theChar
);
779 if (value
) ctype
= kCFUniCharToUppercase
;
783 int count
= CFUniCharConvertFlagToCount(value
);
786 if (value
& kCFUniCharNonBmpFlag
) {
788 value
= (value
& 0xFFFFFF) - 0x10000;
789 *(convertedChar
++) = (value
>> 10) + 0xD800UL
;
790 *(convertedChar
++) = (value
& 0x3FF) + 0xDC00UL
;
794 *convertedChar
= (UTF16Char
)value
;
797 } else if (count
< (int)maxLength
) {
798 const uint32_t *extraMapping
= __CFUniCharCaseMappingExtraTable
[ctype
] + (value
& 0xFFFFFF);
800 if (value
& kCFUniCharNonBmpFlag
) {
803 while (count
-- > 0) {
804 value
= *(extraMapping
++);
805 if (value
> 0xFFFF) {
806 if (copiedLen
+ 2 >= (int)maxLength
) break;
807 value
= (value
& 0xFFFFFF) - 0x10000;
808 convertedChar
[copiedLen
++] = (value
>> 10) + 0xD800UL
;
809 convertedChar
[copiedLen
++] = (value
& 0x3FF) + 0xDC00UL
;
811 if (copiedLen
+ 1 >= (int)maxLength
) break;
812 convertedChar
[copiedLen
++] = value
;
815 if (!count
) return copiedLen
;
819 for (idx
= 0;idx
< count
;idx
++) *(convertedChar
++) = (UTF16Char
)*(extraMapping
++);
824 } else if (ctype
== kCFUniCharCaseFold
) {
825 ctype
= kCFUniCharToLowercase
;
829 if (theChar
> 0xFFFF) { // non-BMP
830 theChar
= (theChar
& 0xFFFFFF) - 0x10000;
831 *(convertedChar
++) = (theChar
>> 10) + 0xD800UL
;
832 *(convertedChar
++) = (theChar
& 0x3FF) + 0xDC00UL
;
835 *convertedChar
= theChar
;
840 UInt32
CFUniCharMapTo(UniChar theChar
, UniChar
*convertedChar
, UInt32 maxLength
, uint16_t ctype
, UInt32 flags
) {
841 if (ctype
== kCFUniCharCaseFold
+ 1) { // kCFUniCharDecompose
842 if (CFUniCharIsDecomposableCharacter(theChar
, false)) {
843 UTF32Char buffer
[MAX_DECOMPOSED_LENGTH
];
844 CFIndex usedLength
= CFUniCharDecomposeCharacter(theChar
, buffer
, MAX_DECOMPOSED_LENGTH
);
847 for (idx
= 0;idx
< usedLength
;idx
++) *(convertedChar
++) = buffer
[idx
];
850 *convertedChar
= theChar
;
854 return CFUniCharMapCaseTo(theChar
, convertedChar
, maxLength
, ctype
, flags
, NULL
);
858 CF_INLINE
bool __CFUniCharIsMoreAbove(UTF16Char
*buffer
, uint32_t length
) {
859 UTF32Char currentChar
;
862 while (length
-- > 0) {
863 currentChar
= *(buffer
)++;
864 if (CFUniCharIsSurrogateHighCharacter(currentChar
) && (length
> 0) && CFUniCharIsSurrogateLowCharacter(*(buffer
+ 1))) {
865 currentChar
= CFUniCharGetLongCharacterForSurrogatePair(currentChar
, *(buffer
++));
868 if (!CFUniCharIsMemberOf(currentChar
, kCFUniCharNonBaseCharacterSet
)) break;
870 property
= CFUniCharGetCombiningPropertyForCharacter(currentChar
, CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty
, (currentChar
>> 16) & 0xFF));
872 if (property
== 230) return true; // Above priority
877 CF_INLINE
bool __CFUniCharIsAfter_i(UTF16Char
*buffer
, uint32_t length
) {
878 UTF32Char currentChar
= 0;
880 UTF32Char decomposed
[MAX_DECOMPOSED_LENGTH
];
881 uint32_t decompLength
;
884 if (length
< 1) return 0;
887 while (length
-- > 1) {
888 currentChar
= *(--buffer
);
889 if (CFUniCharIsSurrogateLowCharacter(currentChar
)) {
890 if ((length
> 1) && CFUniCharIsSurrogateHighCharacter(*(buffer
- 1))) {
891 currentChar
= CFUniCharGetLongCharacterForSurrogatePair(*(--buffer
), currentChar
);
897 if (!CFUniCharIsMemberOf(currentChar
, kCFUniCharNonBaseCharacterSet
)) break;
899 property
= CFUniCharGetCombiningPropertyForCharacter(currentChar
, CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty
, (currentChar
>> 16) & 0xFF));
901 if (property
== 230) return false; // Above priority
904 currentChar
= *(--buffer
);
905 } else if (CFUniCharIsSurrogateLowCharacter(currentChar
) && CFUniCharIsSurrogateHighCharacter(*(--buffer
))) {
906 currentChar
= CFUniCharGetLongCharacterForSurrogatePair(*buffer
, currentChar
);
909 decompLength
= CFUniCharDecomposeCharacter(currentChar
, decomposed
, MAX_DECOMPOSED_LENGTH
);
910 currentChar
= *decomposed
;
913 for (idx
= 1;idx
< decompLength
;idx
++) {
914 currentChar
= decomposed
[idx
];
915 property
= CFUniCharGetCombiningPropertyForCharacter(currentChar
, CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty
, (currentChar
>> 16) & 0xFF));
917 if (property
== 230) return false; // Above priority
922 __private_extern__
uint32_t CFUniCharGetConditionalCaseMappingFlags(UTF32Char theChar
, UTF16Char
*buffer
, uint32_t currentIndex
, uint32_t length
, uint32_t type
, const uint8_t *langCode
, uint32_t lastFlags
) {
923 if (theChar
== 0x03A3) { // GREEK CAPITAL LETTER SIGMA
924 if ((type
== kCFUniCharToLowercase
) && (currentIndex
> 0)) {
925 UTF16Char
*start
= buffer
;
926 UTF16Char
*end
= buffer
+ length
;
929 // First check if we're after a cased character
930 buffer
+= (currentIndex
- 1);
931 while (start
<= buffer
) {
932 otherChar
= *(buffer
--);
933 if (CFUniCharIsSurrogateLowCharacter(otherChar
) && (start
<= buffer
) && CFUniCharIsSurrogateHighCharacter(*buffer
)) {
934 otherChar
= CFUniCharGetLongCharacterForSurrogatePair(*(buffer
--), otherChar
);
936 if (!CFUniCharIsMemberOf(otherChar
, kCFUniCharCaseIgnorableCharacterSet
)) {
937 if (!CFUniCharIsMemberOf(otherChar
, kCFUniCharUppercaseLetterCharacterSet
) && !CFUniCharIsMemberOf(otherChar
, kCFUniCharLowercaseLetterCharacterSet
)) return 0; // Uppercase set contains titlecase
942 // Next check if we're before a cased character
943 buffer
= start
+ currentIndex
+ 1;
944 while (buffer
< end
) {
945 otherChar
= *(buffer
++);
946 if (CFUniCharIsSurrogateHighCharacter(otherChar
) && (buffer
< end
) && CFUniCharIsSurrogateLowCharacter(*buffer
)) {
947 otherChar
= CFUniCharGetLongCharacterForSurrogatePair(otherChar
, *(buffer
++));
949 if (!CFUniCharIsMemberOf(otherChar
, kCFUniCharCaseIgnorableCharacterSet
)) {
950 if (CFUniCharIsMemberOf(otherChar
, kCFUniCharUppercaseLetterCharacterSet
) || CFUniCharIsMemberOf(otherChar
, kCFUniCharLowercaseLetterCharacterSet
)) return 0; // Uppercase set contains titlecase
954 return kCFUniCharCaseMapFinalSigma
;
956 } else if (langCode
) {
957 if (*((const uint16_t *)langCode
) == LITHUANIAN_LANG_CODE
) {
958 if ((theChar
== 0x0307) && ((kCFUniCharCaseMapAfter_i
|kCFUniCharCaseMapMoreAbove
) & lastFlags
) == (kCFUniCharCaseMapAfter_i
|kCFUniCharCaseMapMoreAbove
)) {
959 return (__CFUniCharIsAfter_i(buffer
, currentIndex
) ? kCFUniCharCaseMapAfter_i
: 0);
960 } else if (type
== kCFUniCharToLowercase
) {
961 if ((theChar
== 0x0049) || (theChar
== 0x004A) || (theChar
== 0x012E)) {
962 return (__CFUniCharIsMoreAbove(buffer
+ (++currentIndex
), length
- currentIndex
) ? kCFUniCharCaseMapMoreAbove
: 0);
964 } else if ((theChar
== 'i') || (theChar
== 'j')) {
965 return (__CFUniCharIsMoreAbove(buffer
+ (++currentIndex
), length
- currentIndex
) ? (kCFUniCharCaseMapAfter_i
|kCFUniCharCaseMapMoreAbove
) : 0);
967 } else if ((*((const uint16_t *)langCode
) == TURKISH_LANG_CODE
) || (*((const uint16_t *)langCode
) == AZERI_LANG_CODE
)) {
968 if (type
== kCFUniCharToLowercase
) {
969 if (theChar
== 0x0307) {
970 return (kCFUniCharCaseMapMoreAbove
& lastFlags
? kCFUniCharCaseMapAfter_i
: 0);
971 } else if (theChar
== 0x0049) {
972 return (((++currentIndex
< length
) && (buffer
[currentIndex
] == 0x0307)) ? kCFUniCharCaseMapMoreAbove
: 0);
980 // Unicode property database
981 static __CFUniCharBitmapData
*__CFUniCharUnicodePropertyTable
= NULL
;
982 static int __CFUniCharUnicodePropertyTableCount
= 0;
984 static CFSpinLock_t __CFUniCharPropTableLock
= 0;
986 #define PROP_DB_FILE "CFUniCharPropertyDatabase.data"
988 const void *CFUniCharGetUnicodePropertyDataForPlane(uint32_t propertyType
, uint32_t plane
) {
990 __CFSpinLock(&__CFUniCharPropTableLock
);
992 if (NULL
== __CFUniCharUnicodePropertyTable
) {
994 const void *bodyBase
;
995 const void *planeBase
;
998 int planeIndex
, planeCount
;
1001 if (!__CFUniCharLoadFile(PROP_DB_FILE
, &bytes
)) {
1002 __CFSpinUnlock(&__CFUniCharPropTableLock
);
1006 (char *)bytes
+= 4; // Skip Unicode version
1007 headerSize
= CFSwapInt32BigToHost(*(((uint32_t *)bytes
)++));
1008 headerSize
-= (sizeof(uint32_t) * 2);
1009 bodyBase
= (char *)bytes
+ headerSize
;
1011 count
= headerSize
/ sizeof(uint32_t);
1012 __CFUniCharUnicodePropertyTableCount
= count
;
1014 __CFUniCharUnicodePropertyTable
= (__CFUniCharBitmapData
*)CFAllocatorAllocate(NULL
, sizeof(__CFUniCharBitmapData
) * count
, 0);
1016 for (idx
= 0;idx
< count
;idx
++) {
1017 planeCount
= *((const uint8_t *)bodyBase
);
1018 (char *)planeBase
= (char *)bodyBase
+ planeCount
+ (planeCount
% 4 ? 4 - (planeCount
% 4) : 0);
1019 __CFUniCharUnicodePropertyTable
[idx
]._planes
= (const uint8_t **)CFAllocatorAllocate(NULL
, sizeof(const void *) * planeCount
, 0);
1021 for (planeIndex
= 0;planeIndex
< planeCount
;planeIndex
++) {
1022 if ((planeSize
= ((const uint8_t *)bodyBase
)[planeIndex
+ 1])) {
1023 __CFUniCharUnicodePropertyTable
[idx
]._planes
[planeIndex
] = planeBase
;
1024 (char *)planeBase
+= (planeSize
* 256);
1026 __CFUniCharUnicodePropertyTable
[idx
]._planes
[planeIndex
] = NULL
;
1030 __CFUniCharUnicodePropertyTable
[idx
]._numPlanes
= planeCount
;
1031 (char *)bodyBase
+= (CFSwapInt32BigToHost(*(((uint32_t *)bytes
)++)));
1035 __CFSpinUnlock(&__CFUniCharPropTableLock
);
1037 return (plane
< __CFUniCharUnicodePropertyTable
[propertyType
]._numPlanes
? __CFUniCharUnicodePropertyTable
[propertyType
]._planes
[plane
] : NULL
);
1040 __private_extern__
uint32_t CFUniCharGetNumberOfPlanesForUnicodePropertyData(uint32_t propertyType
) {
1041 (void)CFUniCharGetUnicodePropertyDataForPlane(propertyType
, 0);
1042 return __CFUniCharUnicodePropertyTable
[propertyType
]._numPlanes
;
1045 __private_extern__
uint32_t CFUniCharGetUnicodeProperty(UTF32Char character
, uint32_t propertyType
) {
1046 if (propertyType
== kCFUniCharCombiningProperty
) {
1047 return CFUniCharGetCombiningPropertyForCharacter(character
, CFUniCharGetUnicodePropertyDataForPlane(propertyType
, (character
>> 16) & 0xFF));
1048 } else if (propertyType
== kCFUniCharBidiProperty
) {
1049 return CFUniCharGetBidiPropertyForCharacter(character
, CFUniCharGetUnicodePropertyDataForPlane(propertyType
, (character
>> 16) & 0xFF));
1058 The UTF8 conversion in the following function is derived from ConvertUTF.c
1061 * Copyright 2001 Unicode, Inc.
1065 * This source code is provided as is by Unicode, Inc. No claims are
1066 * made as to fitness for any particular purpose. No warranties of any
1067 * kind are expressed or implied. The recipient agrees to determine
1068 * applicability of information provided. If this file has been
1069 * purchased on magnetic or optical media from Unicode, Inc., the
1070 * sole remedy for any claim will be exchange of defective media
1071 * within 90 days of receipt.
1073 * Limitations on Rights to Redistribute This Code
1075 * Unicode, Inc. hereby grants the right to freely use the information
1076 * supplied in this file in the creation of products supporting the
1077 * Unicode Standard, and to make copies of this file in any form
1078 * for internal or external distribution as long as this notice
1081 #define UNI_REPLACEMENT_CHAR (0x0000FFFDUL)
1083 bool CFUniCharFillDestinationBuffer(const UTF32Char
*src
, uint32_t srcLength
, void **dst
, uint32_t dstLength
, uint32_t *filledLength
, uint32_t dstFormat
) {
1084 UTF32Char currentChar
;
1085 uint32_t usedLength
= *filledLength
;
1087 if (dstFormat
== kCFUniCharUTF16Format
) {
1088 UTF16Char
*dstBuffer
= (UTF16Char
*)*dst
;
1090 while (srcLength
-- > 0) {
1091 currentChar
= *(src
++);
1093 if (currentChar
> 0xFFFF) { // Non-BMP
1096 if (usedLength
> dstLength
) return false;
1097 currentChar
-= 0x10000;
1098 *(dstBuffer
++) = (UTF16Char
)((currentChar
>> 10) + 0xD800UL
);
1099 *(dstBuffer
++) = (UTF16Char
)((currentChar
& 0x3FF) + 0xDC00UL
);
1104 if (usedLength
> dstLength
) return false;
1105 *(dstBuffer
++) = (UTF16Char
)currentChar
;
1111 } else if (dstFormat
== kCFUniCharUTF8Format
) {
1112 uint8_t *dstBuffer
= (uint8_t *)*dst
;
1113 uint16_t bytesToWrite
= 0;
1114 const UTF32Char byteMask
= 0xBF;
1115 const UTF32Char byteMark
= 0x80;
1116 static const uint8_t firstByteMark
[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
1118 while (srcLength
-- > 0) {
1119 currentChar
= *(src
++);
1121 /* Figure out how many bytes the result will require */
1122 if (currentChar
< (UTF32Char
)0x80) {
1124 } else if (currentChar
< (UTF32Char
)0x800) {
1126 } else if (currentChar
< (UTF32Char
)0x10000) {
1128 } else if (currentChar
< (UTF32Char
)0x200000) {
1132 currentChar
= UNI_REPLACEMENT_CHAR
;
1135 usedLength
+= bytesToWrite
;
1138 if (usedLength
> dstLength
) return false;
1140 dstBuffer
+= bytesToWrite
;
1141 switch (bytesToWrite
) { /* note: everything falls through. */
1142 case 4: *--dstBuffer
= (currentChar
| byteMark
) & byteMask
; currentChar
>>= 6;
1143 case 3: *--dstBuffer
= (currentChar
| byteMark
) & byteMask
; currentChar
>>= 6;
1144 case 2: *--dstBuffer
= (currentChar
| byteMark
) & byteMask
; currentChar
>>= 6;
1145 case 1: *--dstBuffer
= currentChar
| firstByteMark
[bytesToWrite
];
1147 dstBuffer
+= bytesToWrite
;
1153 UTF32Char
*dstBuffer
= (UTF32Char
*)*dst
;
1155 while (srcLength
-- > 0) {
1156 currentChar
= *(src
++);
1160 if (usedLength
> dstLength
) return false;
1161 *(dstBuffer
++) = currentChar
;
1168 *filledLength
= usedLength
;
1173 #if defined(__WIN32__)
1174 void __CFUniCharCleanup(void)
1178 // cleanup memory allocated by __CFUniCharLoadBitmapData()
1179 __CFSpinLock(&__CFUniCharBitmapLock
);
1181 if (__CFUniCharBitmapDataArray
!= NULL
) {
1182 for (idx
= 0; idx
< __CFUniCharNumberOfBitmaps
; idx
++) {
1183 CFAllocatorDeallocate(NULL
, __CFUniCharBitmapDataArray
[idx
]._planes
);
1184 __CFUniCharBitmapDataArray
[idx
]._planes
= NULL
;
1187 CFAllocatorDeallocate(NULL
, __CFUniCharBitmapDataArray
);
1188 __CFUniCharBitmapDataArray
= NULL
;
1189 __CFUniCharNumberOfBitmaps
= 0;
1192 __CFSpinUnlock(&__CFUniCharBitmapLock
);
1194 // cleanup memory allocated by CFUniCharGetMappingData()
1195 __CFSpinLock(&__CFUniCharMappingTableLock
);
1197 if (__CFUniCharMappingTables
!= NULL
) {
1198 CFAllocatorDeallocate(NULL
, __CFUniCharMappingTables
);
1199 __CFUniCharMappingTables
= NULL
;
1202 // cleanup memory allocated by __CFUniCharLoadCaseMappingTable()
1203 if (__CFUniCharCaseMappingTableCounts
!= NULL
) {
1204 CFAllocatorDeallocate(NULL
, __CFUniCharCaseMappingTableCounts
);
1205 __CFUniCharCaseMappingTableCounts
= NULL
;
1207 __CFUniCharCaseMappingTable
= NULL
;
1208 __CFUniCharCaseMappingExtraTable
= NULL
;
1211 __CFSpinUnlock(&__CFUniCharMappingTableLock
);
1213 // cleanup memory allocated by CFUniCharGetUnicodePropertyDataForPlane()
1214 __CFSpinLock(&__CFUniCharPropTableLock
);
1216 if (__CFUniCharUnicodePropertyTable
!= NULL
) {
1217 for (idx
= 0; idx
< __CFUniCharUnicodePropertyTableCount
; idx
++) {
1218 CFAllocatorDeallocate(NULL
, __CFUniCharUnicodePropertyTable
[idx
]._planes
);
1219 __CFUniCharUnicodePropertyTable
[idx
]._planes
= NULL
;
1222 CFAllocatorDeallocate(NULL
, __CFUniCharUnicodePropertyTable
);
1223 __CFUniCharUnicodePropertyTable
= NULL
;
1224 __CFUniCharUnicodePropertyTableCount
= 0;
1227 __CFSpinUnlock(&__CFUniCharPropTableLock
);