2 * Copyright (c) 2014 Apple Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
25 Copyright (c) 1998-2013, Apple Inc. All rights reserved.
28 #if !defined(__COREFOUNDATION_CFUNICHAR__)
29 #define __COREFOUNDATION_CFUNICHAR__ 1
32 #include <CoreFoundation/CFByteOrder.h>
33 #include <CoreFoundation/CFBase.h>
37 #define kCFUniCharBitShiftForByte (3)
38 #define kCFUniCharBitShiftForMask (7)
40 CF_INLINE
bool CFUniCharIsSurrogateHighCharacter(UniChar character
) {
41 return ((character
>= 0xD800UL
) && (character
<= 0xDBFFUL
) ? true : false);
44 CF_INLINE
bool CFUniCharIsSurrogateLowCharacter(UniChar character
) {
45 return ((character
>= 0xDC00UL
) && (character
<= 0xDFFFUL
) ? true : false);
48 CF_INLINE UTF32Char
CFUniCharGetLongCharacterForSurrogatePair(UniChar surrogateHigh
, UniChar surrogateLow
) {
49 return ((surrogateHigh
- 0xD800UL
) << 10) + (surrogateLow
- 0xDC00UL
) + 0x0010000UL
;
52 // The following values coinside TextEncodingFormat format defines in TextCommon.h
54 kCFUniCharUTF16Format
= 0,
55 kCFUniCharUTF8Format
= 2,
56 kCFUniCharUTF32Format
= 3
59 CF_INLINE
bool CFUniCharIsMemberOfBitmap(UTF16Char theChar
, const uint8_t *bitmap
) {
60 return (bitmap
&& (bitmap
[(theChar
) >> kCFUniCharBitShiftForByte
] & (((uint32_t)1) << (theChar
& kCFUniCharBitShiftForMask
))) ? true : false);
63 CF_INLINE
void CFUniCharAddCharacterToBitmap(UTF16Char theChar
, uint8_t *bitmap
) {
64 bitmap
[(theChar
) >> kCFUniCharBitShiftForByte
] |= (((uint32_t)1) << (theChar
& kCFUniCharBitShiftForMask
));
67 CF_INLINE
void CFUniCharRemoveCharacterFromBitmap(UTF16Char theChar
, uint8_t *bitmap
) {
68 bitmap
[(theChar
) >> kCFUniCharBitShiftForByte
] &= ~(((uint32_t)1) << (theChar
& kCFUniCharBitShiftForMask
));
72 kCFUniCharControlCharacterSet
= 1,
73 kCFUniCharWhitespaceCharacterSet
,
74 kCFUniCharWhitespaceAndNewlineCharacterSet
,
75 kCFUniCharDecimalDigitCharacterSet
,
76 kCFUniCharLetterCharacterSet
,
77 kCFUniCharLowercaseLetterCharacterSet
,
78 kCFUniCharUppercaseLetterCharacterSet
,
79 kCFUniCharNonBaseCharacterSet
,
80 kCFUniCharCanonicalDecomposableCharacterSet
,
81 kCFUniCharDecomposableCharacterSet
= kCFUniCharCanonicalDecomposableCharacterSet
,
82 kCFUniCharAlphaNumericCharacterSet
,
83 kCFUniCharPunctuationCharacterSet
,
84 kCFUniCharIllegalCharacterSet
,
85 kCFUniCharTitlecaseLetterCharacterSet
,
86 kCFUniCharSymbolAndOperatorCharacterSet
,
87 kCFUniCharNewlineCharacterSet
,
89 kCFUniCharCompatibilityDecomposableCharacterSet
= 100, // internal character sets begins here
90 kCFUniCharHFSPlusDecomposableCharacterSet
,
91 kCFUniCharStrongRightToLeftCharacterSet
,
92 kCFUniCharHasNonSelfLowercaseCharacterSet
,
93 kCFUniCharHasNonSelfUppercaseCharacterSet
,
94 kCFUniCharHasNonSelfTitlecaseCharacterSet
,
95 kCFUniCharHasNonSelfCaseFoldingCharacterSet
,
96 kCFUniCharHasNonSelfMirrorMappingCharacterSet
,
97 kCFUniCharControlAndFormatterCharacterSet
,
98 kCFUniCharCaseIgnorableCharacterSet
,
99 kCFUniCharGraphemeExtendCharacterSet
102 CF_EXPORT
bool CFUniCharIsMemberOf(UTF32Char theChar
, uint32_t charset
);
104 // This function returns NULL for kCFUniCharControlCharacterSet, kCFUniCharWhitespaceCharacterSet, kCFUniCharWhitespaceAndNewlineCharacterSet, & kCFUniCharIllegalCharacterSet
105 CF_EXPORT
const uint8_t *CFUniCharGetBitmapPtrForPlane(uint32_t charset
, uint32_t plane
);
108 kCFUniCharBitmapFilled
= (uint8_t)0,
109 kCFUniCharBitmapEmpty
= (uint8_t)0xFF,
110 kCFUniCharBitmapAll
= (uint8_t)1
113 CF_EXPORT
uint8_t CFUniCharGetBitmapForPlane(uint32_t charset
, uint32_t plane
, void *bitmap
, bool isInverted
);
115 CF_EXPORT
uint32_t CFUniCharGetNumberOfPlanes(uint32_t charset
);
118 kCFUniCharToLowercase
= 0,
119 kCFUniCharToUppercase
,
120 kCFUniCharToTitlecase
,
125 kCFUniCharCaseMapFinalSigma
= (1UL << 0),
126 kCFUniCharCaseMapAfter_i
= (1UL << 1),
127 kCFUniCharCaseMapMoreAbove
= (1UL << 2),
128 kCFUniCharCaseMapDutchDigraph
= (1UL << 3),
129 kCFUniCharCaseMapGreekTonos
= (1UL << 4)
132 CF_EXPORT CFIndex
CFUniCharMapCaseTo(UTF32Char theChar
, UTF16Char
*convertedChar
, CFIndex maxLength
, uint32_t ctype
, uint32_t flags
, const uint8_t *langCode
);
134 CF_EXPORT
uint32_t CFUniCharGetConditionalCaseMappingFlags(UTF32Char theChar
, UTF16Char
*buffer
, CFIndex currentIndex
, CFIndex length
, uint32_t type
, const uint8_t *langCode
, uint32_t lastFlags
);
137 kCFUniCharBiDiPropertyON
= 0,
138 kCFUniCharBiDiPropertyL
,
139 kCFUniCharBiDiPropertyR
,
140 kCFUniCharBiDiPropertyAN
,
141 kCFUniCharBiDiPropertyEN
,
142 kCFUniCharBiDiPropertyAL
,
143 kCFUniCharBiDiPropertyNSM
,
144 kCFUniCharBiDiPropertyCS
,
145 kCFUniCharBiDiPropertyES
,
146 kCFUniCharBiDiPropertyET
,
147 kCFUniCharBiDiPropertyBN
,
148 kCFUniCharBiDiPropertyS
,
149 kCFUniCharBiDiPropertyWS
,
150 kCFUniCharBiDiPropertyB
,
151 kCFUniCharBiDiPropertyRLO
,
152 kCFUniCharBiDiPropertyRLE
,
153 kCFUniCharBiDiPropertyLRO
,
154 kCFUniCharBiDiPropertyLRE
,
155 kCFUniCharBiDiPropertyPDF
159 kCFUniCharCombiningProperty
= 0,
160 kCFUniCharBidiProperty
163 // The second arg 'bitmap' has to be the pointer to a specific plane
164 CF_INLINE
uint8_t CFUniCharGetBidiPropertyForCharacter(UTF16Char character
, const uint8_t *bitmap
) {
166 uint8_t value
= bitmap
[(character
>> 8)];
168 if (value
> kCFUniCharBiDiPropertyPDF
) {
169 bitmap
= bitmap
+ 256 + ((value
- kCFUniCharBiDiPropertyPDF
- 1) * 256);
170 return bitmap
[character
% 256];
175 return kCFUniCharBiDiPropertyL
;
178 CF_INLINE
uint8_t CFUniCharGetCombiningPropertyForCharacter(UTF16Char character
, const uint8_t *bitmap
) {
180 uint8_t value
= bitmap
[(character
>> 8)];
183 bitmap
= bitmap
+ 256 + ((value
- 1) * 256);
184 return bitmap
[character
% 256];
190 CF_EXPORT
const void *CFUniCharGetUnicodePropertyDataForPlane(uint32_t propertyType
, uint32_t plane
);
191 CF_EXPORT
uint32_t CFUniCharGetNumberOfPlanesForUnicodePropertyData(uint32_t propertyType
);
192 CF_EXPORT
uint32_t CFUniCharGetUnicodeProperty(UTF32Char character
, uint32_t propertyType
);
194 CF_EXPORT
bool CFUniCharFillDestinationBuffer(const UTF32Char
*src
, CFIndex srcLength
, void **dst
, CFIndex dstLength
, CFIndex
*filledLength
, uint32_t dstFormat
);
198 CF_INLINE
bool CFUniCharToUTF32(const UTF16Char
*src
, CFIndex length
, UTF32Char
*dst
, bool allowLossy
, bool isBigEndien
) {
199 const UTF16Char
*limit
= src
+ length
;
202 while (src
< limit
) {
203 character
= *(src
++);
205 if (CFUniCharIsSurrogateHighCharacter(character
)) {
206 if ((src
< limit
) && CFUniCharIsSurrogateLowCharacter(*src
)) {
207 character
= CFUniCharGetLongCharacterForSurrogatePair(character
, *(src
++));
209 if (!allowLossy
) return false;
210 character
= 0xFFFD; // replacement character
212 } else if (CFUniCharIsSurrogateLowCharacter(character
)) {
213 if (!allowLossy
) return false;
214 character
= 0xFFFD; // replacement character
217 *(dst
++) = (isBigEndien
? CFSwapInt32HostToBig(character
) : CFSwapInt32HostToLittle(character
));
223 CF_INLINE
bool CFUniCharFromUTF32(const UTF32Char
*src
, CFIndex length
, UTF16Char
*dst
, bool allowLossy
, bool isBigEndien
) {
224 const UTF32Char
*limit
= src
+ length
;
227 while (src
< limit
) {
228 character
= (isBigEndien
? CFSwapInt32BigToHost(*(src
++)) : CFSwapInt32LittleToHost(*(src
++)));
230 if (character
< 0x10000) { // BMP
232 if (CFUniCharIsSurrogateHighCharacter(character
)) {
233 UTF32Char otherCharacter
= 0xFFFD; // replacement character
236 otherCharacter
= (isBigEndien
? CFSwapInt32BigToHost(*src
) : CFSwapInt32LittleToHost(*src
));
239 if ((otherCharacter
< 0x10000) && CFUniCharIsSurrogateLowCharacter(otherCharacter
)) {
240 *(dst
++) = character
; ++src
;
242 otherCharacter
= 0xFFFD; // replacement character
246 character
= otherCharacter
;
247 } else if (CFUniCharIsSurrogateLowCharacter(character
)) {
248 character
= 0xFFFD; // replacement character
251 if (CFUniCharIsSurrogateHighCharacter(character
) || CFUniCharIsSurrogateLowCharacter(character
)) return false;
253 } else if (character
< 0x110000) { // non-BMP
254 character
-= 0x10000;
255 *(dst
++) = (UTF16Char
)((character
>> 10) + 0xD800UL
);
256 character
= (UTF16Char
)((character
& 0x3FF) + 0xDC00UL
);
258 if (!allowLossy
) return false;
259 character
= 0xFFFD; // replacement character
262 *(dst
++) = character
;
269 #endif /* ! __COREFOUNDATION_CFUNICHAR__ */