]> git.saurik.com Git - apple/cf.git/blob - CFUniChar.h
CF-635.tar.gz
[apple/cf.git] / CFUniChar.h
1 /*
2 * Copyright (c) 2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /* CFUniChar.h
25 Copyright (c) 1998-2011, Apple Inc. All rights reserved.
26 */
27
28 #if !defined(__COREFOUNDATION_CFUNICHAR__)
29 #define __COREFOUNDATION_CFUNICHAR__ 1
30
31
32 #include <CoreFoundation/CFByteOrder.h>
33 #include <CoreFoundation/CFBase.h>
34
35 CF_EXTERN_C_BEGIN
36
37 #define kCFUniCharBitShiftForByte (3)
38 #define kCFUniCharBitShiftForMask (7)
39
40 CF_INLINE bool CFUniCharIsSurrogateHighCharacter(UniChar character) {
41 return ((character >= 0xD800UL) && (character <= 0xDBFFUL) ? true : false);
42 }
43
44 CF_INLINE bool CFUniCharIsSurrogateLowCharacter(UniChar character) {
45 return ((character >= 0xDC00UL) && (character <= 0xDFFFUL) ? true : false);
46 }
47
48 CF_INLINE UTF32Char CFUniCharGetLongCharacterForSurrogatePair(UniChar surrogateHigh, UniChar surrogateLow) {
49 return ((surrogateHigh - 0xD800UL) << 10) + (surrogateLow - 0xDC00UL) + 0x0010000UL;
50 }
51
52 // The following values coinside TextEncodingFormat format defines in TextCommon.h
53 enum {
54 kCFUniCharUTF16Format = 0,
55 kCFUniCharUTF8Format = 2,
56 kCFUniCharUTF32Format = 3
57 };
58
59 CF_INLINE bool CFUniCharIsMemberOfBitmap(UTF16Char theChar, const uint8_t *bitmap) {
60 return (bitmap && (bitmap[(theChar) >> kCFUniCharBitShiftForByte] & (((uint32_t)1) << (theChar & kCFUniCharBitShiftForMask))) ? true : false);
61 }
62
63 CF_INLINE void CFUniCharAddCharacterToBitmap(UTF16Char theChar, uint8_t *bitmap) {
64 bitmap[(theChar) >> kCFUniCharBitShiftForByte] |= (((uint32_t)1) << (theChar & kCFUniCharBitShiftForMask));
65 }
66
67 CF_INLINE void CFUniCharRemoveCharacterFromBitmap(UTF16Char theChar, uint8_t *bitmap) {
68 bitmap[(theChar) >> kCFUniCharBitShiftForByte] &= ~(((uint32_t)1) << (theChar & kCFUniCharBitShiftForMask));
69 }
70
71 enum {
72 kCFUniCharControlCharacterSet = 1,
73 kCFUniCharWhitespaceCharacterSet,
74 kCFUniCharWhitespaceAndNewlineCharacterSet,
75 kCFUniCharDecimalDigitCharacterSet,
76 kCFUniCharLetterCharacterSet,
77 kCFUniCharLowercaseLetterCharacterSet,
78 kCFUniCharUppercaseLetterCharacterSet,
79 kCFUniCharNonBaseCharacterSet,
80 kCFUniCharCanonicalDecomposableCharacterSet,
81 kCFUniCharDecomposableCharacterSet = kCFUniCharCanonicalDecomposableCharacterSet,
82 kCFUniCharAlphaNumericCharacterSet,
83 kCFUniCharPunctuationCharacterSet,
84 kCFUniCharIllegalCharacterSet,
85 kCFUniCharTitlecaseLetterCharacterSet,
86 kCFUniCharSymbolAndOperatorCharacterSet,
87 kCFUniCharNewlineCharacterSet,
88
89 kCFUniCharCompatibilityDecomposableCharacterSet = 100, // internal character sets begins here
90 kCFUniCharHFSPlusDecomposableCharacterSet,
91 kCFUniCharStrongRightToLeftCharacterSet,
92 kCFUniCharHasNonSelfLowercaseCharacterSet,
93 kCFUniCharHasNonSelfUppercaseCharacterSet,
94 kCFUniCharHasNonSelfTitlecaseCharacterSet,
95 kCFUniCharHasNonSelfCaseFoldingCharacterSet,
96 kCFUniCharHasNonSelfMirrorMappingCharacterSet,
97 kCFUniCharControlAndFormatterCharacterSet,
98 kCFUniCharCaseIgnorableCharacterSet,
99 kCFUniCharGraphemeExtendCharacterSet
100 };
101
102 CF_EXPORT bool CFUniCharIsMemberOf(UTF32Char theChar, uint32_t charset);
103
104 // This function returns NULL for kCFUniCharControlCharacterSet, kCFUniCharWhitespaceCharacterSet, kCFUniCharWhitespaceAndNewlineCharacterSet, & kCFUniCharIllegalCharacterSet
105 CF_EXPORT const uint8_t *CFUniCharGetBitmapPtrForPlane(uint32_t charset, uint32_t plane);
106
107 enum {
108 kCFUniCharBitmapFilled = (uint8_t)0,
109 kCFUniCharBitmapEmpty = (uint8_t)0xFF,
110 kCFUniCharBitmapAll = (uint8_t)1
111 };
112
113 CF_EXPORT uint8_t CFUniCharGetBitmapForPlane(uint32_t charset, uint32_t plane, void *bitmap, bool isInverted);
114
115 CF_EXPORT uint32_t CFUniCharGetNumberOfPlanes(uint32_t charset);
116
117 enum {
118 kCFUniCharToLowercase = 0,
119 kCFUniCharToUppercase,
120 kCFUniCharToTitlecase,
121 kCFUniCharCaseFold
122 };
123
124 enum {
125 kCFUniCharCaseMapFinalSigma = (1UL << 0),
126 kCFUniCharCaseMapAfter_i = (1UL << 1),
127 kCFUniCharCaseMapMoreAbove = (1UL << 2),
128 kCFUniCharCaseMapDutchDigraph = (1UL << 3)
129 };
130
131 CF_EXPORT CFIndex CFUniCharMapCaseTo(UTF32Char theChar, UTF16Char *convertedChar, CFIndex maxLength, uint32_t ctype, uint32_t flags, const uint8_t *langCode);
132
133 CF_EXPORT uint32_t CFUniCharGetConditionalCaseMappingFlags(UTF32Char theChar, UTF16Char *buffer, CFIndex currentIndex, CFIndex length, uint32_t type, const uint8_t *langCode, uint32_t lastFlags);
134
135 enum {
136 kCFUniCharBiDiPropertyON = 0,
137 kCFUniCharBiDiPropertyL,
138 kCFUniCharBiDiPropertyR,
139 kCFUniCharBiDiPropertyAN,
140 kCFUniCharBiDiPropertyEN,
141 kCFUniCharBiDiPropertyAL,
142 kCFUniCharBiDiPropertyNSM,
143 kCFUniCharBiDiPropertyCS,
144 kCFUniCharBiDiPropertyES,
145 kCFUniCharBiDiPropertyET,
146 kCFUniCharBiDiPropertyBN,
147 kCFUniCharBiDiPropertyS,
148 kCFUniCharBiDiPropertyWS,
149 kCFUniCharBiDiPropertyB,
150 kCFUniCharBiDiPropertyRLO,
151 kCFUniCharBiDiPropertyRLE,
152 kCFUniCharBiDiPropertyLRO,
153 kCFUniCharBiDiPropertyLRE,
154 kCFUniCharBiDiPropertyPDF
155 };
156
157 enum {
158 kCFUniCharCombiningProperty = 0,
159 kCFUniCharBidiProperty
160 };
161
162 // The second arg 'bitmap' has to be the pointer to a specific plane
163 CF_INLINE uint8_t CFUniCharGetBidiPropertyForCharacter(UTF16Char character, const uint8_t *bitmap) {
164 if (bitmap) {
165 uint8_t value = bitmap[(character >> 8)];
166
167 if (value > kCFUniCharBiDiPropertyPDF) {
168 bitmap = bitmap + 256 + ((value - kCFUniCharBiDiPropertyPDF - 1) * 256);
169 return bitmap[character % 256];
170 } else {
171 return value;
172 }
173 }
174 return kCFUniCharBiDiPropertyL;
175 }
176
177 CF_INLINE uint8_t CFUniCharGetCombiningPropertyForCharacter(UTF16Char character, const uint8_t *bitmap) {
178 if (bitmap) {
179 uint8_t value = bitmap[(character >> 8)];
180
181 if (value) {
182 bitmap = bitmap + 256 + ((value - 1) * 256);
183 return bitmap[character % 256];
184 }
185 }
186 return 0;
187 }
188
189 CF_EXPORT const void *CFUniCharGetUnicodePropertyDataForPlane(uint32_t propertyType, uint32_t plane);
190 CF_EXPORT uint32_t CFUniCharGetNumberOfPlanesForUnicodePropertyData(uint32_t propertyType);
191 CF_EXPORT uint32_t CFUniCharGetUnicodeProperty(UTF32Char character, uint32_t propertyType);
192
193 CF_EXPORT bool CFUniCharFillDestinationBuffer(const UTF32Char *src, CFIndex srcLength, void **dst, CFIndex dstLength, CFIndex *filledLength, uint32_t dstFormat);
194
195 // UTF32 support
196
197 CF_INLINE bool CFUniCharToUTF32(const UTF16Char *src, CFIndex length, UTF32Char *dst, bool allowLossy, bool isBigEndien) {
198 const UTF16Char *limit = src + length;
199 UTF32Char character;
200
201 while (src < limit) {
202 character = *(src++);
203
204 if (CFUniCharIsSurrogateHighCharacter(character)) {
205 if ((src < limit) && CFUniCharIsSurrogateLowCharacter(*src)) {
206 character = CFUniCharGetLongCharacterForSurrogatePair(character, *(src++));
207 } else {
208 if (!allowLossy) return false;
209 character = 0xFFFD; // replacement character
210 }
211 } else if (CFUniCharIsSurrogateLowCharacter(character)) {
212 if (!allowLossy) return false;
213 character = 0xFFFD; // replacement character
214 }
215
216 *(dst++) = (isBigEndien ? CFSwapInt32HostToBig(character) : CFSwapInt32HostToLittle(character));
217 }
218
219 return true;
220 }
221
222 CF_INLINE bool CFUniCharFromUTF32(const UTF32Char *src, CFIndex length, UTF16Char *dst, bool allowLossy, bool isBigEndien) {
223 const UTF32Char *limit = src + length;
224 UTF32Char character;
225
226 while (src < limit) {
227 character = (isBigEndien ? CFSwapInt32BigToHost(*(src++)) : CFSwapInt32LittleToHost(*(src++)));
228
229 if (character < 0xFFFF) { // BMP
230 if (allowLossy) {
231 if (CFUniCharIsSurrogateHighCharacter(character)) {
232 UTF32Char otherCharacter = 0xFFFD; // replacement character
233
234 if (src < limit) {
235 otherCharacter = (isBigEndien ? CFSwapInt32BigToHost(*src) : CFSwapInt32LittleToHost(*src));
236
237
238 if ((otherCharacter < 0x10000) && CFUniCharIsSurrogateLowCharacter(otherCharacter)) {
239 *(dst++) = character; ++src;
240 } else {
241 otherCharacter = 0xFFFD; // replacement character
242 }
243 }
244
245 character = otherCharacter;
246 } else if (CFUniCharIsSurrogateLowCharacter(character)) {
247 character = 0xFFFD; // replacement character
248 }
249 } else {
250 if (CFUniCharIsSurrogateHighCharacter(character) || CFUniCharIsSurrogateLowCharacter(character)) return false;
251 }
252 } else if (character < 0x110000) { // non-BMP
253 character -= 0x10000;
254 *(dst++) = (UTF16Char)((character >> 10) + 0xD800UL);
255 character = (UTF16Char)((character & 0x3FF) + 0xDC00UL);
256 } else {
257 if (!allowLossy) return false;
258 character = 0xFFFD; // replacement character
259 }
260
261 *(dst++) = character;
262 }
263 return true;
264 }
265
266 CF_EXTERN_C_END
267
268 #endif /* ! __COREFOUNDATION_CFUNICHAR__ */
269