2 * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
23 * @APPLE_LICENSE_HEADER_END@
26 Copyright 2001-2002, Apple, Inc. All rights reserved.
27 Responsibility: Aki Inoue
30 #include <CoreFoundation/CFByteOrder.h>
31 #include "CFInternal.h"
32 #include "CFUniChar.h"
33 #include "CFStringEncodingConverterExt.h"
34 #include "CFUnicodeDecomposition.h"
35 #include "CFUniCharPriv.h"
36 #if defined(__MACOS8__)
38 #elif defined(__WIN32__)
43 #elif defined(__MACH__) || defined(__LINUX__) || defined(__FREEBSD__)
45 #include <mach/mach.h>
48 #include <sys/types.h>
50 #include <sys/param.h>
56 #if defined(__MACOS8__)
57 #define MAXPATHLEN FILENAME_MAX
59 #define MAXPATHLEN MAX_PATH
62 // Memory map the file
63 #if !defined(__MACOS8__)
65 CF_INLINE
void __CFUniCharCharacterSetPath(char *cpath
) {
66 strlcpy(cpath
, __kCFCharacterSetDir
, MAXPATHLEN
);
67 strlcat(cpath
, "/CharacterSets/", MAXPATHLEN
);
70 static bool __CFUniCharLoadBytesFromFile(const char *fileName
, const void **bytes
) {
71 #if defined(__WIN32__)
72 HANDLE bitmapFileHandle
;
75 if ((bitmapFileHandle
= CreateFile(fileName
, GENERIC_READ
, FILE_SHARE_READ
, NULL
, OPEN_EXISTING
, FILE_ATTRIBUTE_NORMAL
, NULL
)) == INVALID_HANDLE_VALUE
) return false;
76 mappingHandle
= CreateFileMapping(bitmapFileHandle
, NULL
, PAGE_READONLY
, 0, 0, NULL
);
77 CloseHandle(bitmapFileHandle
);
78 if (!mappingHandle
) return false;
80 *bytes
= MapViewOfFileEx(mappingHandle
, FILE_MAP_READ
, 0, 0, 0, NULL
);
81 CloseHandle(mappingHandle
);
83 return (*bytes
? true : false);
88 if ((fd
= open(fileName
, O_RDONLY
, 0)) < 0) return false;
91 if (fstat(fd
, &statBuf
) < 0 || map_fd(fd
, 0, (vm_offset_t
*)bytes
, true, (vm_size_t
)statBuf
.st_size
)) {
96 if (fstat(fd
, &statBuf
) < 0 || (*bytes
= mmap(0, statBuf
.st_size
, PROT_READ
, MAP_PRIVATE
, fd
, 0)) == (void *)-1) {
108 static bool __CFUniCharLoadFile(const char *bitmapName
, const void **bytes
) {
109 char cpath
[MAXPATHLEN
];
111 __CFUniCharCharacterSetPath(cpath
);
112 strlcat(cpath
, bitmapName
, MAXPATHLEN
);
114 return __CFUniCharLoadBytesFromFile(cpath
, bytes
);
116 #endif !defined(__MACOS8__)
119 CF_INLINE
bool isControl(UTF32Char theChar
, uint16_t charset
, const void *data
) { // ISO Control
120 if ((theChar
<= 0x001F) || (theChar
>= 0x007F && theChar
<= 0x009F)) return true;
124 CF_INLINE
bool isWhitespace(UTF32Char theChar
, uint16_t charset
, const void *data
) { // Space
125 if ((theChar
== 0x0020) || (theChar
== 0x0009) || (theChar
== 0x00A0) || (theChar
== 0x1680) || (theChar
>= 0x2000 && theChar
<= 0x200B) || (theChar
== 0x202F) || (theChar
== 0x205F) || (theChar
== 0x3000)) return true;
129 CF_INLINE
bool isWhitespaceAndNewLine(UTF32Char theChar
, uint16_t charset
, const void *data
) { // White space
130 if (isWhitespace(theChar
, charset
, data
) || (theChar
>= 0x000A && theChar
<= 0x000D) || (theChar
== 0x0085) || (theChar
== 0x2028) || (theChar
== 0x2029)) return true;
134 #if defined(__MACOS8__)
135 /* This structure MUST match the sets in NSRulebook.h The "__CFCSetIsMemberSet()" function is a modified version of the one in Text shlib.
137 typedef struct _CFCharSetPrivateStruct
{
138 int issorted
; /* 1=sorted or 0=unsorted ; 2=is_property_table */
139 int bitrange
[4]; /* bitmap (each bit is a 1k range in space of 2^17) */
140 int nsingles
; /* number of single elements */
141 int nranges
; /* number of ranges */
142 int singmin
; /* minimum single element */
143 int singmax
; /* maximum single element */
144 int array
[1]; /* actually bunch of singles followed by ranges */
145 } CFCharSetPrivateStruct
;
147 /* Membership function for complex sets
149 CF_INLINE
bool __CFCSetIsMemberSet(const CFCharSetPrivateStruct
*set
, UTF16Char theChar
) {
154 if (set
->issorted
!= 1) {
157 theChar
&= 0x0001FFFF; /* range 1-131k */
158 if (__CFCSetBitsInRange(theChar
, set
->bitrange
)) {
159 if (theChar
>= set
->singmin
&& theChar
<= set
->singmax
) {
160 tmp
= (int *) &(set
->array
[0]);
161 if ((nel
= set
->nsingles
) < __kCFSetBreakeven
) {
162 for (i
= 0; i
< nel
; i
++) {
163 if (*tmp
== theChar
) return true;
167 else { // this does a binary search
168 p
= tmp
; q
= tmp
+ (nel
-1);
170 wari
= (p
+ ((q
-p
)>>1));
171 if (theChar
< *wari
) q
= wari
- 1;
172 else if (theChar
> *wari
) p
= wari
+ 1;
177 tmp
= (int *) &(set
->array
[0]) + set
->nsingles
;
178 if ((nel
= set
->nranges
) < __kCFSetBreakeven
) {
182 if (theChar
<= *tmp2
) {
183 if (theChar
>= *tmp
) return true;
189 } else { /* binary search the ranges */
190 p
= tmp
; q
= tmp
+ (2*nel
-2);
192 i
= (q
- p
) >> 1; /* >>1 means divide by 2 */
193 wari
= p
+ (i
& 0xFFFFFFFE); /* &fffffffe make it an even num */
194 if (theChar
< *wari
) q
= wari
- 2;
195 else if (theChar
> *(wari
+ 1)) p
= wari
+ 2;
200 /* fall through & return zero */
202 return false; /* not a member */
205 /* Take a private "set" structure and make a bitmap from it. Return the bitmap. THE CALLER MUST RELEASE THE RETURNED MEMORY as necessary.
208 CF_INLINE
void __CFCSetBitmapProcessManyCharacters(unsigned char *map
, unsigned n
, unsigned m
) {
210 for (tmp
= n
; tmp
<= m
; tmp
++) CFUniCharAddCharacterToBitmap(tmp
, map
);
213 CF_INLINE
void __CFCSetMakeSetBitmapFromSet(const CFCharSetPrivateStruct
*theSet
, uint8_t *map
)
219 for (cnt
= 0; cnt
< theSet
->nsingles
; cnt
++) {
220 ctmp
= theSet
->array
[cnt
];
221 CFUniCharAddCharacterToBitmap(tmp
, map
);
223 ip
= (int *) (&(theSet
->array
[0]) + theSet
->nsingles
);
224 cnt
= theSet
->nranges
;
226 /* This could be more efficient: turn on whole bytes at a time
227 when there are such cases as 8 characters in a row... */
228 __CFCSetBitmapProcessManyCharacters((unsigned char *)map
, *ip
, *(ip
+1));
234 extern const CFCharSetPrivateStruct
*_CFdecimalDigitCharacterSetData
;
235 extern const CFCharSetPrivateStruct
*_CFletterCharacterSetData
;
236 extern const CFCharSetPrivateStruct
*_CFlowercaseLetterCharacterSetData
;
237 extern const CFCharSetPrivateStruct
*_CFuppercaseLetterCharacterSetData
;
238 extern const CFCharSetPrivateStruct
*_CFnonBaseCharacterSetData
;
239 extern const CFCharSetPrivateStruct
*_CFdecomposableCharacterSetData
;
240 extern const CFCharSetPrivateStruct
*_CFpunctuationCharacterSetData
;
241 extern const CFCharSetPrivateStruct
*_CFalphanumericCharacterSetData
;
242 extern const CFCharSetPrivateStruct
*_CFillegalCharacterSetData
;
243 extern const CFCharSetPrivateStruct
*_CFhasNonSelfLowercaseMappingData
;
244 extern const CFCharSetPrivateStruct
*_CFhasNonSelfUppercaseMappingData
;
245 extern const CFCharSetPrivateStruct
*_CFhasNonSelfTitlecaseMappingData
;
250 const uint8_t **_planes
;
251 } __CFUniCharBitmapData
;
253 static char __CFUniCharUnicodeVersionString
[8] = {0, 0, 0, 0, 0, 0, 0, 0};
255 static uint32_t __CFUniCharNumberOfBitmaps
= 0;
256 static __CFUniCharBitmapData
*__CFUniCharBitmapDataArray
= NULL
;
258 static CFSpinLock_t __CFUniCharBitmapLock
= 0;
260 #ifndef CF_UNICHAR_BITMAP_FILE
261 #define CF_UNICHAR_BITMAP_FILE "CFCharacterSetBitmaps.bitmap"
262 #endif CF_UNICHAR_BITMAP_FILE
264 static bool __CFUniCharLoadBitmapData(void) {
268 uint8_t currentPlane
;
270 const void *bitmapBase
;
272 int idx
, bitmapIndex
;
274 __CFSpinLock(&__CFUniCharBitmapLock
);
276 if (__CFUniCharBitmapDataArray
|| !__CFUniCharLoadFile(CF_UNICHAR_BITMAP_FILE
, &bytes
)) {
277 __CFSpinUnlock(&__CFUniCharBitmapLock
);
281 for (idx
= 0;idx
< 4 && ((const uint8_t *)bytes
)[idx
];idx
++) {
282 __CFUniCharUnicodeVersionString
[idx
* 2] = ((const uint8_t *)bytes
)[idx
];
283 __CFUniCharUnicodeVersionString
[idx
* 2 + 1] = '.';
285 __CFUniCharUnicodeVersionString
[(idx
< 4 ? idx
* 2 - 1 : 7)] = '\0';
287 headerSize
= CFSwapInt32BigToHost(*((uint32_t *)((char *)bytes
+ 4)));
289 bitmapBase
= (char *)bytes
+ headerSize
;
290 (char *)bytes
+= (sizeof(uint32_t) * 2);
291 headerSize
-= (sizeof(uint32_t) * 2);
293 __CFUniCharNumberOfBitmaps
= headerSize
/ (sizeof(uint32_t) * 2);
295 __CFUniCharBitmapDataArray
= (__CFUniCharBitmapData
*)CFAllocatorAllocate(NULL
, sizeof(__CFUniCharBitmapData
) * __CFUniCharNumberOfBitmaps
, 0);
297 for (idx
= 0;idx
< (int)__CFUniCharNumberOfBitmaps
;idx
++) {
298 bitmap
= (char *)bitmapBase
+ CFSwapInt32BigToHost(*(((uint32_t *)bytes
)++));
299 bitmapSize
= CFSwapInt32BigToHost(*(((uint32_t *)bytes
)++));
301 numPlanes
= bitmapSize
/ (8 * 1024);
302 numPlanes
= *(const uint8_t *)((char *)bitmap
+ (((numPlanes
- 1) * ((8 * 1024) + 1)) - 1)) + 1;
303 __CFUniCharBitmapDataArray
[idx
]._planes
= (const uint8_t **)CFAllocatorAllocate(NULL
, sizeof(const void *) * numPlanes
, NULL
);
304 __CFUniCharBitmapDataArray
[idx
]._numPlanes
= numPlanes
;
307 for (bitmapIndex
= 0;bitmapIndex
< numPlanes
;bitmapIndex
++) {
308 if (bitmapIndex
== currentPlane
) {
309 __CFUniCharBitmapDataArray
[idx
]._planes
[bitmapIndex
] = bitmap
;
310 (char *)bitmap
+= (8 * 1024);
311 currentPlane
= *(((const uint8_t *)bitmap
)++);
313 __CFUniCharBitmapDataArray
[idx
]._planes
[bitmapIndex
] = NULL
;
318 __CFSpinUnlock(&__CFUniCharBitmapLock
);
323 __private_extern__
const char *__CFUniCharGetUnicodeVersionString(void) {
324 if (NULL
== __CFUniCharBitmapDataArray
) __CFUniCharLoadBitmapData();
325 return __CFUniCharUnicodeVersionString
;
330 #define CONTROLSET_HAS_FORMATTER 1
332 bool CFUniCharIsMemberOf(UTF32Char theChar
, uint32_t charset
) {
333 #if CONTROLSET_HAS_FORMATTER
334 if (charset
== kCFUniCharControlCharacterSet
) charset
= kCFUniCharControlAndFormatterCharacterSet
;
335 #endif CONTROLSET_HAS_FORMATTER
338 case kCFUniCharControlCharacterSet
:
339 return isControl(theChar
, charset
, NULL
);
341 case kCFUniCharWhitespaceCharacterSet
:
342 return isWhitespace(theChar
, charset
, NULL
);
344 case kCFUniCharWhitespaceAndNewlineCharacterSet
:
345 return isWhitespaceAndNewLine(theChar
, charset
, NULL
);
347 #if defined(__MACOS8__)
348 case kCFUniCharDecimalDigitCharacterSet
:
349 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct
*)&_CFdecimalDigitCharacterSetData
, theChar
);
350 case kCFUniCharLetterCharacterSet
:
351 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct
*)&_CFletterCharacterSetData
, theChar
);
352 case kCFUniCharLowercaseLetterCharacterSet
:
353 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct
*)&_CFlowercaseLetterCharacterSetData
, theChar
);
354 case kCFUniCharUppercaseLetterCharacterSet
:
355 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct
*)&_CFuppercaseLetterCharacterSetData
, theChar
);
356 case kCFUniCharNonBaseCharacterSet
:
357 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct
*)&_CFnonBaseCharacterSetData
, theChar
);
358 case kCFUniCharAlphaNumericCharacterSet
:
359 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct
*)&_CFalphanumericCharacterSetData
, theChar
);
360 case kCFUniCharDecomposableCharacterSet
:
361 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct
*)&_CFdecomposableCharacterSetData
, theChar
);
362 case kCFUniCharPunctuationCharacterSet
:
363 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct
*)&_CFpunctuationCharacterSetData
, theChar
);
364 case kCFUniCharIllegalCharacterSet
:
365 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct
*)&_CFillegalCharacterSetData
, theChar
);
366 case kCFUniCharHasNonSelfLowercaseMapping
:
367 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct
*)&_CFhasNonSelfLowercaseMappingData
, theChar
);
368 case kCFUniCharHasNonSelfUppercaseMapping
:
369 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct
*)&_CFhasNonSelfUppercaseMappingData
, theChar
);
370 case kCFUniCharHasNonSelfTitlecaseMapping
:
371 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct
*)&_CFhasNonSelfTitlecaseMappingData
, theChar
);
376 if (NULL
== __CFUniCharBitmapDataArray
) __CFUniCharLoadBitmapData();
378 if ((charset
- kCFUniCharDecimalDigitCharacterSet
) < __CFUniCharNumberOfBitmaps
) {
379 __CFUniCharBitmapData
*data
= __CFUniCharBitmapDataArray
+ (charset
- kCFUniCharDecimalDigitCharacterSet
);
380 uint8_t planeNo
= (theChar
>> 16) & 0xFF;
382 // The bitmap data for kCFUniCharIllegalCharacterSet is actually LEGAL set less Plane 14 ~ 16
383 if (charset
== kCFUniCharIllegalCharacterSet
) {
384 if (planeNo
== 0x0E) { // Plane 14
386 return (((theChar
== 0x01) || ((theChar
> 0x1F) && (theChar
< 0x80))) ? false : true);
387 } else if (planeNo
== 0x0F || planeNo
== 0x10) { // Plane 15 & 16
388 return ((theChar
& 0xFF) > 0xFFFD ? true : false);
390 return (planeNo
< data
->_numPlanes
&& data
->_planes
[planeNo
] ? !CFUniCharIsMemberOfBitmap(theChar
, data
->_planes
[planeNo
]) : true);
392 } else if (charset
== kCFUniCharControlAndFormatterCharacterSet
) {
393 if (planeNo
== 0x0E) { // Plane 14
395 return (((theChar
== 0x01) || ((theChar
> 0x1F) && (theChar
< 0x80))) ? true : false);
397 return (planeNo
< data
->_numPlanes
&& data
->_planes
[planeNo
] ? CFUniCharIsMemberOfBitmap(theChar
, data
->_planes
[planeNo
]) : false);
400 return (planeNo
< data
->_numPlanes
&& data
->_planes
[planeNo
] ? CFUniCharIsMemberOfBitmap(theChar
, data
->_planes
[planeNo
]) : false);
408 const uint8_t *CFUniCharGetBitmapPtrForPlane(uint32_t charset
, uint32_t plane
) {
409 if (NULL
== __CFUniCharBitmapDataArray
) __CFUniCharLoadBitmapData();
411 #if CONTROLSET_HAS_FORMATTER
412 if (charset
== kCFUniCharControlCharacterSet
) charset
= kCFUniCharControlAndFormatterCharacterSet
;
413 #endif CONTROLSET_HAS_FORMATTER
415 if (charset
> kCFUniCharWhitespaceAndNewlineCharacterSet
&& (charset
- kCFUniCharDecimalDigitCharacterSet
) < __CFUniCharNumberOfBitmaps
&& charset
!= kCFUniCharIllegalCharacterSet
) {
416 __CFUniCharBitmapData
*data
= __CFUniCharBitmapDataArray
+ (charset
- kCFUniCharDecimalDigitCharacterSet
);
418 return (plane
< data
->_numPlanes
? data
->_planes
[plane
] : NULL
);
423 __private_extern__
uint8_t CFUniCharGetBitmapForPlane(uint32_t charset
, uint32_t plane
, void *bitmap
, bool isInverted
) {
424 const uint8_t *src
= CFUniCharGetBitmapPtrForPlane(charset
, plane
);
425 int numBytes
= (8 * 1024);
429 while (numBytes
-- > 0) *(((uint8_t *)bitmap
)++) = ~(*(src
++));
431 while (numBytes
-- > 0) *(((uint8_t *)bitmap
)++) = *(src
++);
433 return kCFUniCharBitmapFilled
;
434 } else if (charset
== kCFUniCharIllegalCharacterSet
) {
435 __CFUniCharBitmapData
*data
= __CFUniCharBitmapDataArray
+ (charset
- kCFUniCharDecimalDigitCharacterSet
);
437 if (plane
< data
->_numPlanes
&& (src
= data
->_planes
[plane
])) {
439 while (numBytes
-- > 0) *(((uint8_t *)bitmap
)++) = *(src
++);
441 while (numBytes
-- > 0) *(((uint8_t *)bitmap
)++) = ~(*(src
++));
443 return kCFUniCharBitmapFilled
;
444 } else if (plane
== 0x0E) { // Plane 14
446 uint8_t asciiRange
= (isInverted
? (uint8_t)0xFF : (uint8_t)0);
447 uint8_t otherRange
= (isInverted
? (uint8_t)0 : (uint8_t)0xFF);
449 *(((uint8_t *)bitmap
)++) = 0x02; // UE0001 LANGUAGE TAG
450 for (idx
= 1;idx
< numBytes
;idx
++) {
451 *(((uint8_t *)bitmap
)++) = ((idx
>= (0x20 / 8) && (idx
< (0x80 / 8))) ? asciiRange
: otherRange
);
453 return kCFUniCharBitmapFilled
;
454 } else if (plane
== 0x0F || plane
== 0x10) { // Plane 15 & 16
455 uint32_t value
= (isInverted
? 0xFFFFFFFF : 0);
456 numBytes
/= 4; // for 32bit
458 while (numBytes
-- > 0) *(((uint32_t *)bitmap
)++) = value
;
459 *(((uint8_t *)bitmap
) - 5) = (isInverted
? 0x3F : 0xC0); // 0xFFFE & 0xFFFF
460 return kCFUniCharBitmapFilled
;
462 return (isInverted
? kCFUniCharBitmapEmpty
: kCFUniCharBitmapAll
);
463 #if CONTROLSET_HAS_FORMATTER
464 } else if ((charset
== kCFUniCharControlCharacterSet
) && (plane
== 0x0E)) { // Language tags
466 uint8_t asciiRange
= (isInverted
? (uint8_t)0 : (uint8_t)0xFF);
467 uint8_t otherRange
= (isInverted
? (uint8_t)0xFF : (uint8_t)0);
469 *(((uint8_t *)bitmap
)++) = 0x02; // UE0001 LANGUAGE TAG
470 for (idx
= 1;idx
< numBytes
;idx
++) {
471 *(((uint8_t *)bitmap
)++) = ((idx
>= (0x20 / 8) && (idx
< (0x80 / 8))) ? asciiRange
: otherRange
);
473 return kCFUniCharBitmapFilled
;
474 #endif CONTROLSET_HAS_FORMATTER
475 } else if (charset
< kCFUniCharDecimalDigitCharacterSet
) {
476 if (plane
) return (isInverted
? kCFUniCharBitmapAll
: kCFUniCharBitmapEmpty
);
478 if (charset
== kCFUniCharControlCharacterSet
) {
480 uint8_t nonFillValue
= (isInverted
? (uint8_t)0xFF : (uint8_t)0);
481 uint8_t fillValue
= (isInverted
? (uint8_t)0 : (uint8_t)0xFF);
482 uint8_t *bitmapP
= (uint8_t *)bitmap
;
484 for (idx
= 0;idx
< numBytes
;idx
++) {
485 *(bitmapP
++) = (idx
< (0x20 / 8) || (idx
>= (0x80 / 8) && idx
< (0xA0 / 8)) ? fillValue
: nonFillValue
);
490 CFUniCharRemoveCharacterFromBitmap(0x007F, bitmap
);
492 CFUniCharAddCharacterToBitmap(0x007F, bitmap
);
495 uint8_t *bitmapBase
= (uint8_t *)bitmap
;
497 uint8_t nonFillValue
= (isInverted
? (uint8_t)0xFF : (uint8_t)0);
499 while (numBytes
-- > 0) *(((uint8_t *)bitmap
)++) = nonFillValue
;
501 if (charset
== kCFUniCharWhitespaceAndNewlineCharacterSet
) {
502 static const UniChar newlines
[] = {0x000A, 0x000B, 0x000C, 0x000D, 0x0085, 0x2028, 0x2029};
504 for (idx
= 0;idx
< (int)(sizeof(newlines
) / sizeof(*newlines
)); idx
++) {
506 CFUniCharRemoveCharacterFromBitmap(newlines
[idx
], bitmapBase
);
508 CFUniCharAddCharacterToBitmap(newlines
[idx
], bitmapBase
);
514 CFUniCharRemoveCharacterFromBitmap(0x0009, bitmapBase
);
515 CFUniCharRemoveCharacterFromBitmap(0x0020, bitmapBase
);
516 CFUniCharRemoveCharacterFromBitmap(0x00A0, bitmapBase
);
517 CFUniCharRemoveCharacterFromBitmap(0x1680, bitmapBase
);
518 CFUniCharRemoveCharacterFromBitmap(0x202F, bitmapBase
);
519 CFUniCharRemoveCharacterFromBitmap(0x205F, bitmapBase
);
520 CFUniCharRemoveCharacterFromBitmap(0x3000, bitmapBase
);
522 CFUniCharAddCharacterToBitmap(0x0009, bitmapBase
);
523 CFUniCharAddCharacterToBitmap(0x0020, bitmapBase
);
524 CFUniCharAddCharacterToBitmap(0x00A0, bitmapBase
);
525 CFUniCharAddCharacterToBitmap(0x1680, bitmapBase
);
526 CFUniCharAddCharacterToBitmap(0x202F, bitmapBase
);
527 CFUniCharAddCharacterToBitmap(0x205F, bitmapBase
);
528 CFUniCharAddCharacterToBitmap(0x3000, bitmapBase
);
531 for (idx
= 0x2000;idx
<= 0x200B;idx
++) {
533 CFUniCharRemoveCharacterFromBitmap(idx
, bitmapBase
);
535 CFUniCharAddCharacterToBitmap(idx
, bitmapBase
);
539 return kCFUniCharBitmapFilled
;
541 return (isInverted
? kCFUniCharBitmapAll
: kCFUniCharBitmapEmpty
);
544 __private_extern__
uint32_t CFUniCharGetNumberOfPlanes(uint32_t charset
) {
545 #if defined(__MACOS8__)
548 #if CONTROLSET_HAS_FORMATTER
549 if (charset
== kCFUniCharControlCharacterSet
) return 15; // 0 to 14
550 #endif CONTROLSET_HAS_FORMATTER
552 if (charset
< kCFUniCharDecimalDigitCharacterSet
) {
554 } else if (charset
== kCFUniCharIllegalCharacterSet
) {
559 if (NULL
== __CFUniCharBitmapDataArray
) __CFUniCharLoadBitmapData();
561 numPlanes
= __CFUniCharBitmapDataArray
[charset
- kCFUniCharDecimalDigitCharacterSet
]._numPlanes
;
568 // Mapping data loading
569 static const void **__CFUniCharMappingTables
= NULL
;
571 static CFSpinLock_t __CFUniCharMappingTableLock
= 0;
573 #if defined(__BIG_ENDIAN__)
574 #define MAPPING_TABLE_FILE "CFUnicodeData-B.mapping"
576 #define MAPPING_TABLE_FILE "CFUnicodeData-L.mapping"
577 #endif __BIG_ENDIAN__
579 __private_extern__
const void *CFUniCharGetMappingData(uint32_t type
) {
581 __CFSpinLock(&__CFUniCharMappingTableLock
);
583 if (NULL
== __CFUniCharMappingTables
) {
585 const void *bodyBase
;
589 if (!__CFUniCharLoadFile(MAPPING_TABLE_FILE
, &bytes
)) {
590 __CFSpinUnlock(&__CFUniCharMappingTableLock
);
594 (char *)bytes
+= 4; // Skip Unicode version
595 headerSize
= *(((uint32_t *)bytes
)++);
596 headerSize
-= (sizeof(uint32_t) * 2);
597 bodyBase
= (char *)bytes
+ headerSize
;
599 count
= headerSize
/ sizeof(uint32_t);
601 __CFUniCharMappingTables
= (const void **)CFAllocatorAllocate(NULL
, sizeof(const void *) * count
, 0);
603 for (idx
= 0;idx
< count
;idx
++) {
604 __CFUniCharMappingTables
[idx
] = (char *)bodyBase
+ *(((uint32_t *)bytes
)++);
608 __CFSpinUnlock(&__CFUniCharMappingTableLock
);
610 return __CFUniCharMappingTables
[type
];
613 // Case mapping functions
614 #define DO_SPECIAL_CASE_MAPPING 1
616 static uint32_t *__CFUniCharCaseMappingTableCounts
= NULL
;
617 static uint32_t **__CFUniCharCaseMappingTable
= NULL
;
618 static const uint32_t **__CFUniCharCaseMappingExtraTable
= NULL
;
623 } __CFUniCharCaseMappings
;
625 /* Binary searches CFStringEncodingUnicodeTo8BitCharMap */
626 static uint32_t __CFUniCharGetMappedCase(const __CFUniCharCaseMappings
*theTable
, uint32_t numElem
, UTF32Char character
) {
627 const __CFUniCharCaseMappings
*p
, *q
, *divider
;
629 if ((character
< theTable
[0]._key
) || (character
> theTable
[numElem
-1]._key
)) {
635 divider
= p
+ ((q
- p
) >> 1); /* divide by 2 */
636 if (character
< divider
->_key
) { q
= divider
- 1; }
637 else if (character
> divider
->_key
) { p
= divider
+ 1; }
638 else { return divider
->_value
; }
643 #define NUM_CASE_MAP_DATA (kCFUniCharCaseFold + 1)
645 static bool __CFUniCharLoadCaseMappingTable(void) {
648 if (NULL
== __CFUniCharMappingTables
) (void)CFUniCharGetMappingData(kCFUniCharToLowercase
);
649 if (NULL
== __CFUniCharMappingTables
) return false;
651 __CFSpinLock(&__CFUniCharMappingTableLock
);
653 if (__CFUniCharCaseMappingTableCounts
) {
654 __CFSpinUnlock(&__CFUniCharMappingTableLock
);
658 __CFUniCharCaseMappingTableCounts
= (uint32_t *)CFAllocatorAllocate(NULL
, sizeof(uint32_t) * NUM_CASE_MAP_DATA
+ sizeof(uint32_t *) * NUM_CASE_MAP_DATA
* 2, 0);
659 __CFUniCharCaseMappingTable
= (uint32_t **)((char *)__CFUniCharCaseMappingTableCounts
+ sizeof(uint32_t) * NUM_CASE_MAP_DATA
);
660 __CFUniCharCaseMappingExtraTable
= (const uint32_t **)__CFUniCharCaseMappingTable
+ NUM_CASE_MAP_DATA
;
662 for (idx
= 0;idx
< NUM_CASE_MAP_DATA
;idx
++) {
663 __CFUniCharCaseMappingTableCounts
[idx
] = *((uint32_t *)__CFUniCharMappingTables
[idx
]) / (sizeof(uint32_t) * 2);
664 __CFUniCharCaseMappingTable
[idx
] = ((uint32_t *)__CFUniCharMappingTables
[idx
]) + 1;
665 __CFUniCharCaseMappingExtraTable
[idx
] = (const uint32_t *)((char *)__CFUniCharCaseMappingTable
[idx
] + *((uint32_t *)__CFUniCharMappingTables
[idx
]));
668 __CFSpinUnlock(&__CFUniCharMappingTableLock
);
673 #define TURKISH_LANG_CODE (0x7472) // tr
674 #define LITHUANIAN_LANG_CODE (0x6C74) // lt
675 #define AZERI_LANG_CODE (0x617A) // az
677 #define TURKISH_LANG_CODE (0x7274) // tr
678 #define LITHUANIAN_LANG_CODE (0x746C) // lt
679 #define AZERI_LANG_CODE (0x7A61) // az
680 #endif __BIG_ENDIAN__
682 uint32_t CFUniCharMapCaseTo(UTF32Char theChar
, UTF16Char
*convertedChar
, uint32_t maxLength
, uint32_t ctype
, uint32_t flags
, const uint8_t *langCode
) {
683 __CFUniCharBitmapData
*data
;
684 uint8_t planeNo
= (theChar
>> 16) & 0xFF;
688 #if DO_SPECIAL_CASE_MAPPING
689 if (flags
& kCFUniCharCaseMapFinalSigma
) {
690 if (theChar
== 0x03A3) { // Final sigma
691 *convertedChar
= (ctype
== kCFUniCharToLowercase
? 0x03C2 : 0x03A3);
697 switch (*(uint16_t *)langCode
) {
698 case LITHUANIAN_LANG_CODE
:
699 if (theChar
== 0x0307 && (flags
& kCFUniCharCaseMapAfter_i
)) {
701 } else if (ctype
== kCFUniCharToLowercase
) {
702 if (flags
& kCFUniCharCaseMapMoreAbove
) {
704 case 0x0049: // LATIN CAPITAL LETTER I
705 *(convertedChar
++) = 0x0069;
706 *(convertedChar
++) = 0x0307;
709 case 0x004A: // LATIN CAPITAL LETTER J
710 *(convertedChar
++) = 0x006A;
711 *(convertedChar
++) = 0x0307;
714 case 0x012E: // LATIN CAPITAL LETTER I WITH OGONEK
715 *(convertedChar
++) = 0x012F;
716 *(convertedChar
++) = 0x0307;
723 case 0x00CC: // LATIN CAPITAL LETTER I WITH GRAVE
724 *(convertedChar
++) = 0x0069;
725 *(convertedChar
++) = 0x0307;
726 *(convertedChar
++) = 0x0300;
729 case 0x00CD: // LATIN CAPITAL LETTER I WITH ACUTE
730 *(convertedChar
++) = 0x0069;
731 *(convertedChar
++) = 0x0307;
732 *(convertedChar
++) = 0x0301;
735 case 0x0128: // LATIN CAPITAL LETTER I WITH TILDE
736 *(convertedChar
++) = 0x0069;
737 *(convertedChar
++) = 0x0307;
738 *(convertedChar
++) = 0x0303;
746 case TURKISH_LANG_CODE
:
747 case AZERI_LANG_CODE
:
748 if (theChar
== 0x0049) { // LATIN CAPITAL LETTER I
749 *convertedChar
= (ctype
== kCFUniCharToLowercase
? ((kCFUniCharCaseMapMoreAbove
& flags
) ? 0x0069 : 0x0131) : 0x0049);
751 } else if ((theChar
== 0x0069) || (theChar
== 0x0130)) { // LATIN SMALL LETTER I & LATIN CAPITAL LETTER I WITH DOT ABOVE
752 *convertedChar
= (ctype
== kCFUniCharToLowercase
? 0x0069 : 0x0130);
754 } else if (theChar
== 0x0307 && (kCFUniCharCaseMapAfter_i
& flags
)) { // COMBINING DOT ABOVE AFTER_i
755 if (ctype
== kCFUniCharToLowercase
) {
758 *convertedChar
= 0x0307;
767 #endif DO_SPECIAL_CASE_MAPPING
769 if (NULL
== __CFUniCharBitmapDataArray
) __CFUniCharLoadBitmapData();
771 data
= __CFUniCharBitmapDataArray
+ ((ctype
+ kCFUniCharHasNonSelfLowercaseCharacterSet
) - kCFUniCharDecimalDigitCharacterSet
);
773 if (planeNo
< data
->_numPlanes
&& data
->_planes
[planeNo
] && CFUniCharIsMemberOfBitmap(theChar
, data
->_planes
[planeNo
]) && (__CFUniCharCaseMappingTableCounts
|| __CFUniCharLoadCaseMappingTable())) {
774 uint32_t value
= __CFUniCharGetMappedCase((const __CFUniCharCaseMappings
*)__CFUniCharCaseMappingTable
[ctype
], __CFUniCharCaseMappingTableCounts
[ctype
], theChar
);
776 if (!value
&& ctype
== kCFUniCharToTitlecase
) {
777 value
= __CFUniCharGetMappedCase((const __CFUniCharCaseMappings
*)__CFUniCharCaseMappingTable
[kCFUniCharToUppercase
], __CFUniCharCaseMappingTableCounts
[kCFUniCharToUppercase
], theChar
);
778 if (value
) ctype
= kCFUniCharToUppercase
;
782 int count
= CFUniCharConvertFlagToCount(value
);
785 if (value
& kCFUniCharNonBmpFlag
) {
787 value
= (value
& 0xFFFFFF) - 0x10000;
788 *(convertedChar
++) = (value
>> 10) + 0xD800UL
;
789 *(convertedChar
++) = (value
& 0x3FF) + 0xDC00UL
;
793 *convertedChar
= (UTF16Char
)value
;
796 } else if (count
< (int)maxLength
) {
797 const uint32_t *extraMapping
= __CFUniCharCaseMappingExtraTable
[ctype
] + (value
& 0xFFFFFF);
799 if (value
& kCFUniCharNonBmpFlag
) {
802 while (count
-- > 0) {
803 value
= *(extraMapping
++);
804 if (value
> 0xFFFF) {
805 if (copiedLen
+ 2 >= (int)maxLength
) break;
806 value
= (value
& 0xFFFFFF) - 0x10000;
807 convertedChar
[copiedLen
++] = (value
>> 10) + 0xD800UL
;
808 convertedChar
[copiedLen
++] = (value
& 0x3FF) + 0xDC00UL
;
810 if (copiedLen
+ 1 >= (int)maxLength
) break;
811 convertedChar
[copiedLen
++] = value
;
814 if (!count
) return copiedLen
;
818 for (idx
= 0;idx
< count
;idx
++) *(convertedChar
++) = (UTF16Char
)*(extraMapping
++);
823 } else if (ctype
== kCFUniCharCaseFold
) {
824 ctype
= kCFUniCharToLowercase
;
828 *convertedChar
= theChar
;
832 UInt32
CFUniCharMapTo(UniChar theChar
, UniChar
*convertedChar
, UInt32 maxLength
, uint16_t ctype
, UInt32 flags
) {
833 if (ctype
== kCFUniCharCaseFold
+ 1) { // kCFUniCharDecompose
834 if (CFUniCharIsDecomposableCharacter(theChar
, false)) {
835 UTF32Char buffer
[MAX_DECOMPOSED_LENGTH
];
836 CFIndex usedLength
= CFUniCharDecomposeCharacter(theChar
, buffer
, MAX_DECOMPOSED_LENGTH
);
839 for (idx
= 0;idx
< usedLength
;idx
++) *(convertedChar
++) = buffer
[idx
];
842 *convertedChar
= theChar
;
846 return CFUniCharMapCaseTo(theChar
, convertedChar
, maxLength
, ctype
, flags
, NULL
);
850 CF_INLINE
bool __CFUniCharIsMoreAbove(UTF16Char
*buffer
, uint32_t length
) {
851 UTF32Char currentChar
;
854 while (length
-- > 0) {
855 currentChar
= *(buffer
)++;
856 if (CFUniCharIsSurrogateHighCharacter(currentChar
) && (length
> 0) && CFUniCharIsSurrogateLowCharacter(*(buffer
+ 1))) {
857 currentChar
= CFUniCharGetLongCharacterForSurrogatePair(currentChar
, *(buffer
++));
860 if (!CFUniCharIsMemberOf(currentChar
, kCFUniCharNonBaseCharacterSet
)) break;
862 property
= CFUniCharGetCombiningPropertyForCharacter(currentChar
, CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty
, (currentChar
>> 16) & 0xFF));
864 if (property
== 230) return true; // Above priority
869 CF_INLINE
bool __CFUniCharIsAfter_i(UTF16Char
*buffer
, uint32_t length
) {
870 UTF32Char currentChar
= 0;
872 UTF32Char decomposed
[MAX_DECOMPOSED_LENGTH
];
873 uint32_t decompLength
;
876 if (length
< 1) return 0;
879 while (length
-- > 1) {
880 currentChar
= *(--buffer
);
881 if (CFUniCharIsSurrogateLowCharacter(currentChar
)) {
882 if ((length
> 1) && CFUniCharIsSurrogateHighCharacter(*(buffer
- 1))) {
883 currentChar
= CFUniCharGetLongCharacterForSurrogatePair(*(--buffer
), currentChar
);
889 if (!CFUniCharIsMemberOf(currentChar
, kCFUniCharNonBaseCharacterSet
)) break;
891 property
= CFUniCharGetCombiningPropertyForCharacter(currentChar
, CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty
, (currentChar
>> 16) & 0xFF));
893 if (property
== 230) return false; // Above priority
896 currentChar
= *(--buffer
);
897 } else if (CFUniCharIsSurrogateLowCharacter(currentChar
) && CFUniCharIsSurrogateHighCharacter(*(--buffer
))) {
898 currentChar
= CFUniCharGetLongCharacterForSurrogatePair(*buffer
, currentChar
);
901 decompLength
= CFUniCharDecomposeCharacter(currentChar
, decomposed
, MAX_DECOMPOSED_LENGTH
);
902 currentChar
= *decomposed
;
905 for (idx
= 1;idx
< decompLength
;idx
++) {
906 currentChar
= decomposed
[idx
];
907 property
= CFUniCharGetCombiningPropertyForCharacter(currentChar
, CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty
, (currentChar
>> 16) & 0xFF));
909 if (property
== 230) return false; // Above priority
914 __private_extern__
uint32_t CFUniCharGetConditionalCaseMappingFlags(UTF32Char theChar
, UTF16Char
*buffer
, uint32_t currentIndex
, uint32_t length
, uint32_t type
, const uint8_t *langCode
, uint32_t lastFlags
) {
915 if (theChar
== 0x03A3) { // GREEK CAPITAL LETTER SIGMA
916 if ((type
== kCFUniCharToLowercase
) && (currentIndex
> 0)) {
917 UTF16Char
*start
= buffer
;
918 UTF16Char
*end
= buffer
+ length
;
921 // First check if we're after a cased character
922 buffer
+= (currentIndex
- 1);
923 while (start
<= buffer
) {
924 otherChar
= *(buffer
--);
925 if (CFUniCharIsSurrogateLowCharacter(otherChar
) && (start
<= buffer
) && CFUniCharIsSurrogateHighCharacter(*buffer
)) {
926 otherChar
= CFUniCharGetLongCharacterForSurrogatePair(*(buffer
--), otherChar
);
928 if (!CFUniCharIsMemberOf(otherChar
, kCFUniCharCaseIgnorableCharacterSet
)) {
929 if (!CFUniCharIsMemberOf(otherChar
, kCFUniCharUppercaseLetterCharacterSet
) && !CFUniCharIsMemberOf(otherChar
, kCFUniCharLowercaseLetterCharacterSet
)) return 0; // Uppercase set contains titlecase
934 // Next check if we're before a cased character
935 buffer
= start
+ currentIndex
+ 1;
936 while (buffer
< end
) {
937 otherChar
= *(buffer
++);
938 if (CFUniCharIsSurrogateHighCharacter(otherChar
) && (buffer
< end
) && CFUniCharIsSurrogateLowCharacter(*buffer
)) {
939 otherChar
= CFUniCharGetLongCharacterForSurrogatePair(otherChar
, *(buffer
++));
941 if (!CFUniCharIsMemberOf(otherChar
, kCFUniCharCaseIgnorableCharacterSet
)) {
942 if (CFUniCharIsMemberOf(otherChar
, kCFUniCharUppercaseLetterCharacterSet
) || CFUniCharIsMemberOf(otherChar
, kCFUniCharLowercaseLetterCharacterSet
)) return 0; // Uppercase set contains titlecase
946 return kCFUniCharCaseMapFinalSigma
;
948 } else if (langCode
) {
949 if (*((const uint16_t *)langCode
) == LITHUANIAN_LANG_CODE
) {
950 if ((theChar
== 0x0307) && ((kCFUniCharCaseMapAfter_i
|kCFUniCharCaseMapMoreAbove
) & lastFlags
) == (kCFUniCharCaseMapAfter_i
|kCFUniCharCaseMapMoreAbove
)) {
951 return (__CFUniCharIsAfter_i(buffer
, currentIndex
) ? kCFUniCharCaseMapAfter_i
: 0);
952 } else if (type
== kCFUniCharToLowercase
) {
953 if ((theChar
== 0x0049) || (theChar
== 0x004A) || (theChar
== 0x012E)) {
954 return (__CFUniCharIsMoreAbove(buffer
+ (++currentIndex
), length
- currentIndex
) ? kCFUniCharCaseMapMoreAbove
: 0);
956 } else if ((theChar
== 'i') || (theChar
== 'j')) {
957 return (__CFUniCharIsMoreAbove(buffer
+ (++currentIndex
), length
- currentIndex
) ? (kCFUniCharCaseMapAfter_i
|kCFUniCharCaseMapMoreAbove
) : 0);
959 } else if ((*((const uint16_t *)langCode
) == TURKISH_LANG_CODE
) || (*((const uint16_t *)langCode
) == AZERI_LANG_CODE
)) {
960 if (type
== kCFUniCharToLowercase
) {
961 if (theChar
== 0x0307) {
962 return (kCFUniCharCaseMapMoreAbove
& lastFlags
? kCFUniCharCaseMapAfter_i
: 0);
963 } else if (theChar
== 0x0049) {
964 return (((++currentIndex
< length
) && (buffer
[currentIndex
] == 0x0307)) ? kCFUniCharCaseMapMoreAbove
: 0);
972 // Unicode property database
973 static __CFUniCharBitmapData
*__CFUniCharUnicodePropertyTable
= NULL
;
975 static CFSpinLock_t __CFUniCharPropTableLock
= 0;
977 #define PROP_DB_FILE "CFUniCharPropertyDatabase.data"
979 const void *CFUniCharGetUnicodePropertyDataForPlane(uint32_t propertyType
, uint32_t plane
) {
981 __CFSpinLock(&__CFUniCharPropTableLock
);
983 if (NULL
== __CFUniCharUnicodePropertyTable
) {
985 const void *bodyBase
;
986 const void *planeBase
;
989 int planeIndex
, planeCount
;
992 if (!__CFUniCharLoadFile(PROP_DB_FILE
, &bytes
)) {
993 __CFSpinUnlock(&__CFUniCharPropTableLock
);
997 (char *)bytes
+= 4; // Skip Unicode version
998 headerSize
= CFSwapInt32BigToHost(*(((uint32_t *)bytes
)++));
999 headerSize
-= (sizeof(uint32_t) * 2);
1000 bodyBase
= (char *)bytes
+ headerSize
;
1002 count
= headerSize
/ sizeof(uint32_t);
1004 __CFUniCharUnicodePropertyTable
= (__CFUniCharBitmapData
*)CFAllocatorAllocate(NULL
, sizeof(__CFUniCharBitmapData
) * count
, 0);
1006 for (idx
= 0;idx
< count
;idx
++) {
1007 planeCount
= *((const uint8_t *)bodyBase
);
1008 (char *)planeBase
= (char *)bodyBase
+ planeCount
+ (planeCount
% 4 ? 4 - (planeCount
% 4) : 0);
1009 __CFUniCharUnicodePropertyTable
[idx
]._planes
= (const uint8_t **)CFAllocatorAllocate(NULL
, sizeof(const void *) * planeCount
, 0);
1011 for (planeIndex
= 0;planeIndex
< planeCount
;planeIndex
++) {
1012 if ((planeSize
= ((const uint8_t *)bodyBase
)[planeIndex
+ 1])) {
1013 __CFUniCharUnicodePropertyTable
[idx
]._planes
[planeIndex
] = planeBase
;
1014 (char *)planeBase
+= (planeSize
* 256);
1016 __CFUniCharUnicodePropertyTable
[idx
]._planes
[planeIndex
] = NULL
;
1020 __CFUniCharUnicodePropertyTable
[idx
]._numPlanes
= planeCount
;
1021 (char *)bodyBase
+= (CFSwapInt32BigToHost(*(((uint32_t *)bytes
)++)));
1025 __CFSpinUnlock(&__CFUniCharPropTableLock
);
1027 return (plane
< __CFUniCharUnicodePropertyTable
[propertyType
]._numPlanes
? __CFUniCharUnicodePropertyTable
[propertyType
]._planes
[plane
] : NULL
);
1030 __private_extern__
uint32_t CFUniCharGetNumberOfPlanesForUnicodePropertyData(uint32_t propertyType
) {
1031 (void)CFUniCharGetUnicodePropertyDataForPlane(propertyType
, 0);
1032 return __CFUniCharUnicodePropertyTable
[propertyType
]._numPlanes
;
1035 __private_extern__
uint32_t CFUniCharGetUnicodeProperty(UTF32Char character
, uint32_t propertyType
) {
1036 if (propertyType
== kCFUniCharCombiningProperty
) {
1037 return CFUniCharGetCombiningPropertyForCharacter(character
, CFUniCharGetUnicodePropertyDataForPlane(propertyType
, (character
>> 16) & 0xFF));
1038 } else if (propertyType
== kCFUniCharBidiProperty
) {
1039 return CFUniCharGetBidiPropertyForCharacter(character
, CFUniCharGetUnicodePropertyDataForPlane(propertyType
, (character
>> 16) & 0xFF));
1048 The UTF8 conversion in the following function is derived from ConvertUTF.c
1051 * Copyright 2001 Unicode, Inc.
1055 * This source code is provided as is by Unicode, Inc. No claims are
1056 * made as to fitness for any particular purpose. No warranties of any
1057 * kind are expressed or implied. The recipient agrees to determine
1058 * applicability of information provided. If this file has been
1059 * purchased on magnetic or optical media from Unicode, Inc., the
1060 * sole remedy for any claim will be exchange of defective media
1061 * within 90 days of receipt.
1063 * Limitations on Rights to Redistribute This Code
1065 * Unicode, Inc. hereby grants the right to freely use the information
1066 * supplied in this file in the creation of products supporting the
1067 * Unicode Standard, and to make copies of this file in any form
1068 * for internal or external distribution as long as this notice
1071 #define UNI_REPLACEMENT_CHAR (0x0000FFFDUL)
1073 bool CFUniCharFillDestinationBuffer(const UTF32Char
*src
, uint32_t srcLength
, void **dst
, uint32_t dstLength
, uint32_t *filledLength
, uint32_t dstFormat
) {
1074 UTF32Char currentChar
;
1075 uint32_t usedLength
= *filledLength
;
1077 if (dstFormat
== kCFUniCharUTF16Format
) {
1078 UTF16Char
*dstBuffer
= (UTF16Char
*)*dst
;
1080 while (srcLength
-- > 0) {
1081 currentChar
= *(src
++);
1083 if (currentChar
> 0xFFFF) { // Non-BMP
1086 if (usedLength
> dstLength
) return false;
1087 currentChar
-= 0x10000;
1088 *(dstBuffer
++) = (UTF16Char
)((currentChar
>> 10) + 0xD800UL
);
1089 *(dstBuffer
++) = (UTF16Char
)((currentChar
& 0x3FF) + 0xDC00UL
);
1094 if (usedLength
> dstLength
) return false;
1095 *(dstBuffer
++) = (UTF16Char
)currentChar
;
1101 } else if (dstFormat
== kCFUniCharUTF8Format
) {
1102 uint8_t *dstBuffer
= (uint8_t *)*dst
;
1103 uint16_t bytesToWrite
= 0;
1104 const UTF32Char byteMask
= 0xBF;
1105 const UTF32Char byteMark
= 0x80;
1106 static const uint8_t firstByteMark
[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
1108 while (srcLength
-- > 0) {
1109 currentChar
= *(src
++);
1111 /* Figure out how many bytes the result will require */
1112 if (currentChar
< (UTF32Char
)0x80) {
1114 } else if (currentChar
< (UTF32Char
)0x800) {
1116 } else if (currentChar
< (UTF32Char
)0x10000) {
1118 } else if (currentChar
< (UTF32Char
)0x200000) {
1122 currentChar
= UNI_REPLACEMENT_CHAR
;
1125 usedLength
+= bytesToWrite
;
1128 if (usedLength
> dstLength
) return false;
1130 dstBuffer
+= bytesToWrite
;
1131 switch (bytesToWrite
) { /* note: everything falls through. */
1132 case 4: *--dstBuffer
= (currentChar
| byteMark
) & byteMask
; currentChar
>>= 6;
1133 case 3: *--dstBuffer
= (currentChar
| byteMark
) & byteMask
; currentChar
>>= 6;
1134 case 2: *--dstBuffer
= (currentChar
| byteMark
) & byteMask
; currentChar
>>= 6;
1135 case 1: *--dstBuffer
= currentChar
| firstByteMark
[bytesToWrite
];
1137 dstBuffer
+= bytesToWrite
;
1143 UTF32Char
*dstBuffer
= (UTF32Char
*)*dst
;
1145 while (srcLength
-- > 0) {
1146 currentChar
= *(src
++);
1150 if (usedLength
> dstLength
) return false;
1151 *(dstBuffer
++) = currentChar
;
1158 *filledLength
= usedLength
;