2 * Copyright (c) 2005 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
24 Copyright (c) 1998-2005, Apple, Inc. All rights reserved.
27 #if !defined(__COREFOUNDATION_CFUNICHAR__)
28 #define __COREFOUNDATION_CFUNICHAR__ 1
31 #include <CoreFoundation/CFByteOrder.h>
32 #include <CoreFoundation/CFBase.h>
34 #if defined(__cplusplus)
38 #define kCFUniCharBitShiftForByte (3)
39 #define kCFUniCharBitShiftForMask (7)
41 CF_INLINE Boolean
CFUniCharIsSurrogateHighCharacter(UniChar character
) {
42 return ((character
>= 0xD800UL
) && (character
<= 0xDBFFUL
) ? true : false);
45 CF_INLINE Boolean
CFUniCharIsSurrogateLowCharacter(UniChar character
) {
46 return ((character
>= 0xDC00UL
) && (character
<= 0xDFFFUL
) ? true : false);
49 CF_INLINE UTF32Char
CFUniCharGetLongCharacterForSurrogatePair(UniChar surrogateHigh
, UniChar surrogateLow
) {
50 return ((surrogateHigh
- 0xD800UL
) << 10) + (surrogateLow
- 0xDC00UL
) + 0x0010000UL
;
53 // The following values coinside TextEncodingFormat format defines in TextCommon.h
55 kCFUniCharUTF16Format
= 0,
56 kCFUniCharUTF8Format
= 2,
57 kCFUniCharUTF32Format
= 3
60 CF_INLINE
bool CFUniCharIsMemberOfBitmap(UTF16Char theChar
, const uint8_t *bitmap
) {
61 return (bitmap
&& (bitmap
[(theChar
) >> kCFUniCharBitShiftForByte
] & (((uint32_t)1) << (theChar
& kCFUniCharBitShiftForMask
))) ? true : false);
64 CF_INLINE
void CFUniCharAddCharacterToBitmap(UTF16Char theChar
, uint8_t *bitmap
) {
65 bitmap
[(theChar
) >> kCFUniCharBitShiftForByte
] |= (((uint32_t)1) << (theChar
& kCFUniCharBitShiftForMask
));
68 CF_INLINE
void CFUniCharRemoveCharacterFromBitmap(UTF16Char theChar
, uint8_t *bitmap
) {
69 bitmap
[(theChar
) >> kCFUniCharBitShiftForByte
] &= ~(((uint32_t)1) << (theChar
& kCFUniCharBitShiftForMask
));
73 kCFUniCharControlCharacterSet
= 1,
74 kCFUniCharWhitespaceCharacterSet
,
75 kCFUniCharWhitespaceAndNewlineCharacterSet
,
76 kCFUniCharDecimalDigitCharacterSet
,
77 kCFUniCharLetterCharacterSet
,
78 kCFUniCharLowercaseLetterCharacterSet
,
79 kCFUniCharUppercaseLetterCharacterSet
,
80 kCFUniCharNonBaseCharacterSet
,
81 kCFUniCharCanonicalDecomposableCharacterSet
,
82 kCFUniCharDecomposableCharacterSet
= kCFUniCharCanonicalDecomposableCharacterSet
,
83 kCFUniCharAlphaNumericCharacterSet
,
84 kCFUniCharPunctuationCharacterSet
,
85 kCFUniCharIllegalCharacterSet
,
86 kCFUniCharTitlecaseLetterCharacterSet
,
87 kCFUniCharSymbolAndOperatorCharacterSet
,
88 kCFUniCharCompatibilityDecomposableCharacterSet
,
89 kCFUniCharHFSPlusDecomposableCharacterSet
,
90 kCFUniCharStrongRightToLeftCharacterSet
,
91 kCFUniCharHasNonSelfLowercaseCharacterSet
,
92 kCFUniCharHasNonSelfUppercaseCharacterSet
,
93 kCFUniCharHasNonSelfTitlecaseCharacterSet
,
94 kCFUniCharHasNonSelfCaseFoldingCharacterSet
,
95 kCFUniCharHasNonSelfMirrorMappingCharacterSet
,
96 kCFUniCharControlAndFormatterCharacterSet
,
97 kCFUniCharCaseIgnorableCharacterSet
100 CF_EXPORT
bool CFUniCharIsMemberOf(UTF32Char theChar
, uint32_t charset
);
102 // This function returns NULL for kCFUniCharControlCharacterSet, kCFUniCharWhitespaceCharacterSet, kCFUniCharWhitespaceAndNewlineCharacterSet, & kCFUniCharIllegalCharacterSet
103 CF_EXPORT
const uint8_t *CFUniCharGetBitmapPtrForPlane(uint32_t charset
, uint32_t plane
);
106 kCFUniCharBitmapFilled
= (uint8_t)0,
107 kCFUniCharBitmapEmpty
= (uint8_t)0xFF,
108 kCFUniCharBitmapAll
= (uint8_t)1
111 CF_EXPORT
uint8_t CFUniCharGetBitmapForPlane(uint32_t charset
, uint32_t plane
, void *bitmap
, bool isInverted
);
113 CF_EXPORT
uint32_t CFUniCharGetNumberOfPlanes(uint32_t charset
);
116 kCFUniCharToLowercase
= 0,
117 kCFUniCharToUppercase
,
118 kCFUniCharToTitlecase
,
123 kCFUniCharCaseMapFinalSigma
= (1),
124 kCFUniCharCaseMapAfter_i
= (1 << 1),
125 kCFUniCharCaseMapMoreAbove
= (1 << 2)
128 CF_EXPORT
uint32_t CFUniCharMapCaseTo(UTF32Char theChar
, UTF16Char
*convertedChar
, uint32_t maxLength
, uint32_t ctype
, uint32_t flags
, const uint8_t *langCode
);
130 CF_EXPORT
uint32_t CFUniCharGetConditionalCaseMappingFlags(UTF32Char theChar
, UTF16Char
*buffer
, uint32_t currentIndex
, uint32_t length
, uint32_t type
, const uint8_t *langCode
, uint32_t lastFlags
);
133 kCFUniCharBiDiPropertyON
= 0,
134 kCFUniCharBiDiPropertyL
,
135 kCFUniCharBiDiPropertyR
,
136 kCFUniCharBiDiPropertyAN
,
137 kCFUniCharBiDiPropertyEN
,
138 kCFUniCharBiDiPropertyAL
,
139 kCFUniCharBiDiPropertyNSM
,
140 kCFUniCharBiDiPropertyCS
,
141 kCFUniCharBiDiPropertyES
,
142 kCFUniCharBiDiPropertyET
,
143 kCFUniCharBiDiPropertyBN
,
144 kCFUniCharBiDiPropertyS
,
145 kCFUniCharBiDiPropertyWS
,
146 kCFUniCharBiDiPropertyB
,
147 kCFUniCharBiDiPropertyRLO
,
148 kCFUniCharBiDiPropertyRLE
,
149 kCFUniCharBiDiPropertyLRO
,
150 kCFUniCharBiDiPropertyLRE
,
151 kCFUniCharBiDiPropertyPDF
155 kCFUniCharCombiningProperty
= 0,
156 kCFUniCharBidiProperty
159 // The second arg 'bitmap' has to be the pointer to a specific plane
160 CF_INLINE
uint8_t CFUniCharGetBidiPropertyForCharacter(UTF16Char character
, const uint8_t *bitmap
) {
162 uint8_t value
= bitmap
[(character
>> 8)];
164 if (value
> kCFUniCharBiDiPropertyPDF
) {
165 bitmap
= bitmap
+ 256 + ((value
- kCFUniCharBiDiPropertyPDF
- 1) * 256);
166 return bitmap
[character
% 256];
171 return kCFUniCharBiDiPropertyL
;
174 CF_INLINE
uint8_t CFUniCharGetCombiningPropertyForCharacter(UTF16Char character
, const uint8_t *bitmap
) {
176 uint8_t value
= bitmap
[(character
>> 8)];
179 bitmap
= bitmap
+ 256 + ((value
- 1) * 256);
180 return bitmap
[character
% 256];
186 CF_EXPORT
const void *CFUniCharGetUnicodePropertyDataForPlane(uint32_t propertyType
, uint32_t plane
);
187 CF_EXPORT
uint32_t CFUniCharGetNumberOfPlanesForUnicodePropertyData(uint32_t propertyType
);
188 CF_EXPORT
uint32_t CFUniCharGetUnicodeProperty(UTF32Char character
, uint32_t propertyType
);
190 CF_EXPORT
bool CFUniCharFillDestinationBuffer(const UTF32Char
*src
, uint32_t srcLength
, void **dst
, uint32_t dstLength
, uint32_t *filledLength
, uint32_t dstFormat
);
194 CF_INLINE
bool CFUniCharToUTF32(const UTF16Char
*src
, CFIndex length
, UTF32Char
*dst
, bool allowLossy
, bool isBigEndien
) {
195 const UTF16Char
*limit
= src
+ length
;
198 while (src
< limit
) {
199 character
= *(src
++);
201 if (CFUniCharIsSurrogateHighCharacter(character
)) {
202 if ((src
< limit
) && CFUniCharIsSurrogateLowCharacter(*src
)) {
203 character
= CFUniCharGetLongCharacterForSurrogatePair(character
, *(src
++));
205 if (!allowLossy
) return false;
206 character
= 0xFFFD; // replacement character
208 } else if (CFUniCharIsSurrogateLowCharacter(character
)) {
209 if (!allowLossy
) return false;
210 character
= 0xFFFD; // replacement character
213 *(dst
++) = (isBigEndien
? CFSwapInt32HostToBig(character
) : CFSwapInt32HostToLittle(character
));
219 CF_INLINE
bool CFUniCharFromUTF32(const UTF32Char
*src
, CFIndex length
, UTF16Char
*dst
, bool allowLossy
, bool isBigEndien
) {
220 const UTF32Char
*limit
= src
+ length
;
223 while (src
< limit
) {
224 character
= (isBigEndien
? CFSwapInt32BigToHost(*(src
++)) : CFSwapInt32LittleToHost(*(src
++)));
226 if (character
< 0xFFFF) { // BMP
228 if (CFUniCharIsSurrogateHighCharacter(character
)) {
229 UTF32Char otherCharacter
= 0xFFFD; // replacement character
232 otherCharacter
= (isBigEndien
? CFSwapInt32BigToHost(*src
) : CFSwapInt32LittleToHost(*src
));
235 if ((otherCharacter
< 0x10000) && CFUniCharIsSurrogateLowCharacter(otherCharacter
)) {
236 *(dst
++) = character
; ++src
;
238 otherCharacter
= 0xFFFD; // replacement character
242 character
= otherCharacter
;
243 } else if (CFUniCharIsSurrogateLowCharacter(character
)) {
244 character
= 0xFFFD; // replacement character
247 if (CFUniCharIsSurrogateHighCharacter(character
) || CFUniCharIsSurrogateLowCharacter(character
)) return false;
249 } else if (character
< 0x110000) { // non-BMP
250 character
-= 0x10000;
251 *(dst
++) = (UTF16Char
)((character
>> 10) + 0xD800UL
);
252 character
= (UTF16Char
)((character
& 0x3FF) + 0xDC00UL
);
254 if (!allowLossy
) return false;
255 character
= 0xFFFD; // replacement character
258 *(dst
++) = character
;
263 #if defined(__cplusplus)
267 #endif /* ! __COREFOUNDATION_CFUNICHAR__ */