2 * Copyright (c) 2008 Apple Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
24 Copyright (c) 1998-2007, Apple Inc. All rights reserved.
27 #if !defined(__COREFOUNDATION_CFUNICHAR__)
28 #define __COREFOUNDATION_CFUNICHAR__ 1
31 #include <CoreFoundation/CFByteOrder.h>
32 #include <CoreFoundation/CFBase.h>
36 #define kCFUniCharBitShiftForByte (3)
37 #define kCFUniCharBitShiftForMask (7)
39 CF_INLINE
bool CFUniCharIsSurrogateHighCharacter(UniChar character
) {
40 return ((character
>= 0xD800UL
) && (character
<= 0xDBFFUL
) ? true : false);
43 CF_INLINE
bool CFUniCharIsSurrogateLowCharacter(UniChar character
) {
44 return ((character
>= 0xDC00UL
) && (character
<= 0xDFFFUL
) ? true : false);
47 CF_INLINE UTF32Char
CFUniCharGetLongCharacterForSurrogatePair(UniChar surrogateHigh
, UniChar surrogateLow
) {
48 return ((surrogateHigh
- 0xD800UL
) << 10) + (surrogateLow
- 0xDC00UL
) + 0x0010000UL
;
51 // The following values coinside TextEncodingFormat format defines in TextCommon.h
53 kCFUniCharUTF16Format
= 0,
54 kCFUniCharUTF8Format
= 2,
55 kCFUniCharUTF32Format
= 3
58 CF_INLINE
bool CFUniCharIsMemberOfBitmap(UTF16Char theChar
, const uint8_t *bitmap
) {
59 return (bitmap
&& (bitmap
[(theChar
) >> kCFUniCharBitShiftForByte
] & (((uint32_t)1) << (theChar
& kCFUniCharBitShiftForMask
))) ? true : false);
62 CF_INLINE
void CFUniCharAddCharacterToBitmap(UTF16Char theChar
, uint8_t *bitmap
) {
63 bitmap
[(theChar
) >> kCFUniCharBitShiftForByte
] |= (((uint32_t)1) << (theChar
& kCFUniCharBitShiftForMask
));
66 CF_INLINE
void CFUniCharRemoveCharacterFromBitmap(UTF16Char theChar
, uint8_t *bitmap
) {
67 bitmap
[(theChar
) >> kCFUniCharBitShiftForByte
] &= ~(((uint32_t)1) << (theChar
& kCFUniCharBitShiftForMask
));
71 kCFUniCharControlCharacterSet
= 1,
72 kCFUniCharWhitespaceCharacterSet
,
73 kCFUniCharWhitespaceAndNewlineCharacterSet
,
74 kCFUniCharDecimalDigitCharacterSet
,
75 kCFUniCharLetterCharacterSet
,
76 kCFUniCharLowercaseLetterCharacterSet
,
77 kCFUniCharUppercaseLetterCharacterSet
,
78 kCFUniCharNonBaseCharacterSet
,
79 kCFUniCharCanonicalDecomposableCharacterSet
,
80 kCFUniCharDecomposableCharacterSet
= kCFUniCharCanonicalDecomposableCharacterSet
,
81 kCFUniCharAlphaNumericCharacterSet
,
82 kCFUniCharPunctuationCharacterSet
,
83 kCFUniCharIllegalCharacterSet
,
84 kCFUniCharTitlecaseLetterCharacterSet
,
85 kCFUniCharSymbolAndOperatorCharacterSet
,
86 kCFUniCharNewlineCharacterSet
,
88 kCFUniCharCompatibilityDecomposableCharacterSet
= 100, // internal character sets begins here
89 kCFUniCharHFSPlusDecomposableCharacterSet
,
90 kCFUniCharStrongRightToLeftCharacterSet
,
91 kCFUniCharHasNonSelfLowercaseCharacterSet
,
92 kCFUniCharHasNonSelfUppercaseCharacterSet
,
93 kCFUniCharHasNonSelfTitlecaseCharacterSet
,
94 kCFUniCharHasNonSelfCaseFoldingCharacterSet
,
95 kCFUniCharHasNonSelfMirrorMappingCharacterSet
,
96 kCFUniCharControlAndFormatterCharacterSet
,
97 kCFUniCharCaseIgnorableCharacterSet
,
98 kCFUniCharGraphemeExtendCharacterSet
101 CF_EXPORT
bool CFUniCharIsMemberOf(UTF32Char theChar
, uint32_t charset
);
103 // This function returns NULL for kCFUniCharControlCharacterSet, kCFUniCharWhitespaceCharacterSet, kCFUniCharWhitespaceAndNewlineCharacterSet, & kCFUniCharIllegalCharacterSet
104 CF_EXPORT
const uint8_t *CFUniCharGetBitmapPtrForPlane(uint32_t charset
, uint32_t plane
);
107 kCFUniCharBitmapFilled
= (uint8_t)0,
108 kCFUniCharBitmapEmpty
= (uint8_t)0xFF,
109 kCFUniCharBitmapAll
= (uint8_t)1
112 CF_EXPORT
uint8_t CFUniCharGetBitmapForPlane(uint32_t charset
, uint32_t plane
, void *bitmap
, bool isInverted
);
114 CF_EXPORT
uint32_t CFUniCharGetNumberOfPlanes(uint32_t charset
);
117 kCFUniCharToLowercase
= 0,
118 kCFUniCharToUppercase
,
119 kCFUniCharToTitlecase
,
124 kCFUniCharCaseMapFinalSigma
= (1),
125 kCFUniCharCaseMapAfter_i
= (1 << 1),
126 kCFUniCharCaseMapMoreAbove
= (1 << 2)
129 CF_EXPORT CFIndex
CFUniCharMapCaseTo(UTF32Char theChar
, UTF16Char
*convertedChar
, CFIndex maxLength
, uint32_t ctype
, uint32_t flags
, const uint8_t *langCode
);
131 CF_EXPORT
uint32_t CFUniCharGetConditionalCaseMappingFlags(UTF32Char theChar
, UTF16Char
*buffer
, CFIndex currentIndex
, CFIndex length
, uint32_t type
, const uint8_t *langCode
, uint32_t lastFlags
);
134 kCFUniCharBiDiPropertyON
= 0,
135 kCFUniCharBiDiPropertyL
,
136 kCFUniCharBiDiPropertyR
,
137 kCFUniCharBiDiPropertyAN
,
138 kCFUniCharBiDiPropertyEN
,
139 kCFUniCharBiDiPropertyAL
,
140 kCFUniCharBiDiPropertyNSM
,
141 kCFUniCharBiDiPropertyCS
,
142 kCFUniCharBiDiPropertyES
,
143 kCFUniCharBiDiPropertyET
,
144 kCFUniCharBiDiPropertyBN
,
145 kCFUniCharBiDiPropertyS
,
146 kCFUniCharBiDiPropertyWS
,
147 kCFUniCharBiDiPropertyB
,
148 kCFUniCharBiDiPropertyRLO
,
149 kCFUniCharBiDiPropertyRLE
,
150 kCFUniCharBiDiPropertyLRO
,
151 kCFUniCharBiDiPropertyLRE
,
152 kCFUniCharBiDiPropertyPDF
156 kCFUniCharCombiningProperty
= 0,
157 kCFUniCharBidiProperty
160 // The second arg 'bitmap' has to be the pointer to a specific plane
161 CF_INLINE
uint8_t CFUniCharGetBidiPropertyForCharacter(UTF16Char character
, const uint8_t *bitmap
) {
163 uint8_t value
= bitmap
[(character
>> 8)];
165 if (value
> kCFUniCharBiDiPropertyPDF
) {
166 bitmap
= bitmap
+ 256 + ((value
- kCFUniCharBiDiPropertyPDF
- 1) * 256);
167 return bitmap
[character
% 256];
172 return kCFUniCharBiDiPropertyL
;
175 CF_INLINE
uint8_t CFUniCharGetCombiningPropertyForCharacter(UTF16Char character
, const uint8_t *bitmap
) {
177 uint8_t value
= bitmap
[(character
>> 8)];
180 bitmap
= bitmap
+ 256 + ((value
- 1) * 256);
181 return bitmap
[character
% 256];
187 CF_EXPORT
const void *CFUniCharGetUnicodePropertyDataForPlane(uint32_t propertyType
, uint32_t plane
);
188 CF_EXPORT
uint32_t CFUniCharGetNumberOfPlanesForUnicodePropertyData(uint32_t propertyType
);
189 CF_EXPORT
uint32_t CFUniCharGetUnicodeProperty(UTF32Char character
, uint32_t propertyType
);
191 CF_EXPORT
bool CFUniCharFillDestinationBuffer(const UTF32Char
*src
, CFIndex srcLength
, void **dst
, CFIndex dstLength
, CFIndex
*filledLength
, uint32_t dstFormat
);
195 CF_INLINE
bool CFUniCharToUTF32(const UTF16Char
*src
, CFIndex length
, UTF32Char
*dst
, bool allowLossy
, bool isBigEndien
) {
196 const UTF16Char
*limit
= src
+ length
;
199 while (src
< limit
) {
200 character
= *(src
++);
202 if (CFUniCharIsSurrogateHighCharacter(character
)) {
203 if ((src
< limit
) && CFUniCharIsSurrogateLowCharacter(*src
)) {
204 character
= CFUniCharGetLongCharacterForSurrogatePair(character
, *(src
++));
206 if (!allowLossy
) return false;
207 character
= 0xFFFD; // replacement character
209 } else if (CFUniCharIsSurrogateLowCharacter(character
)) {
210 if (!allowLossy
) return false;
211 character
= 0xFFFD; // replacement character
214 *(dst
++) = (isBigEndien
? CFSwapInt32HostToBig(character
) : CFSwapInt32HostToLittle(character
));
220 CF_INLINE
bool CFUniCharFromUTF32(const UTF32Char
*src
, CFIndex length
, UTF16Char
*dst
, bool allowLossy
, bool isBigEndien
) {
221 const UTF32Char
*limit
= src
+ length
;
224 while (src
< limit
) {
225 character
= (isBigEndien
? CFSwapInt32BigToHost(*(src
++)) : CFSwapInt32LittleToHost(*(src
++)));
227 if (character
< 0xFFFF) { // BMP
229 if (CFUniCharIsSurrogateHighCharacter(character
)) {
230 UTF32Char otherCharacter
= 0xFFFD; // replacement character
233 otherCharacter
= (isBigEndien
? CFSwapInt32BigToHost(*src
) : CFSwapInt32LittleToHost(*src
));
236 if ((otherCharacter
< 0x10000) && CFUniCharIsSurrogateLowCharacter(otherCharacter
)) {
237 *(dst
++) = character
; ++src
;
239 otherCharacter
= 0xFFFD; // replacement character
243 character
= otherCharacter
;
244 } else if (CFUniCharIsSurrogateLowCharacter(character
)) {
245 character
= 0xFFFD; // replacement character
248 if (CFUniCharIsSurrogateHighCharacter(character
) || CFUniCharIsSurrogateLowCharacter(character
)) return false;
250 } else if (character
< 0x110000) { // non-BMP
251 character
-= 0x10000;
252 *(dst
++) = (UTF16Char
)((character
>> 10) + 0xD800UL
);
253 character
= (UTF16Char
)((character
& 0x3FF) + 0xDC00UL
);
255 if (!allowLossy
) return false;
256 character
= 0xFFFD; // replacement character
259 *(dst
++) = character
;
266 #endif /* ! __COREFOUNDATION_CFUNICHAR__ */