]> git.saurik.com Git - apple/cf.git/blob - CFUniChar.h
CF-476.13.tar.gz
[apple/cf.git] / CFUniChar.h
1 /*
2 * Copyright (c) 2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23 /* CFUniChar.h
24 Copyright (c) 1998-2007, Apple Inc. All rights reserved.
25 */
26
27 #if !defined(__COREFOUNDATION_CFUNICHAR__)
28 #define __COREFOUNDATION_CFUNICHAR__ 1
29
30
31 #include <CoreFoundation/CFByteOrder.h>
32 #include <CoreFoundation/CFBase.h>
33
34 CF_EXTERN_C_BEGIN
35
36 #define kCFUniCharBitShiftForByte (3)
37 #define kCFUniCharBitShiftForMask (7)
38
39 CF_INLINE bool CFUniCharIsSurrogateHighCharacter(UniChar character) {
40 return ((character >= 0xD800UL) && (character <= 0xDBFFUL) ? true : false);
41 }
42
43 CF_INLINE bool CFUniCharIsSurrogateLowCharacter(UniChar character) {
44 return ((character >= 0xDC00UL) && (character <= 0xDFFFUL) ? true : false);
45 }
46
47 CF_INLINE UTF32Char CFUniCharGetLongCharacterForSurrogatePair(UniChar surrogateHigh, UniChar surrogateLow) {
48 return ((surrogateHigh - 0xD800UL) << 10) + (surrogateLow - 0xDC00UL) + 0x0010000UL;
49 }
50
51 // The following values coinside TextEncodingFormat format defines in TextCommon.h
52 enum {
53 kCFUniCharUTF16Format = 0,
54 kCFUniCharUTF8Format = 2,
55 kCFUniCharUTF32Format = 3
56 };
57
58 CF_INLINE bool CFUniCharIsMemberOfBitmap(UTF16Char theChar, const uint8_t *bitmap) {
59 return (bitmap && (bitmap[(theChar) >> kCFUniCharBitShiftForByte] & (((uint32_t)1) << (theChar & kCFUniCharBitShiftForMask))) ? true : false);
60 }
61
62 CF_INLINE void CFUniCharAddCharacterToBitmap(UTF16Char theChar, uint8_t *bitmap) {
63 bitmap[(theChar) >> kCFUniCharBitShiftForByte] |= (((uint32_t)1) << (theChar & kCFUniCharBitShiftForMask));
64 }
65
66 CF_INLINE void CFUniCharRemoveCharacterFromBitmap(UTF16Char theChar, uint8_t *bitmap) {
67 bitmap[(theChar) >> kCFUniCharBitShiftForByte] &= ~(((uint32_t)1) << (theChar & kCFUniCharBitShiftForMask));
68 }
69
70 enum {
71 kCFUniCharControlCharacterSet = 1,
72 kCFUniCharWhitespaceCharacterSet,
73 kCFUniCharWhitespaceAndNewlineCharacterSet,
74 kCFUniCharDecimalDigitCharacterSet,
75 kCFUniCharLetterCharacterSet,
76 kCFUniCharLowercaseLetterCharacterSet,
77 kCFUniCharUppercaseLetterCharacterSet,
78 kCFUniCharNonBaseCharacterSet,
79 kCFUniCharCanonicalDecomposableCharacterSet,
80 kCFUniCharDecomposableCharacterSet = kCFUniCharCanonicalDecomposableCharacterSet,
81 kCFUniCharAlphaNumericCharacterSet,
82 kCFUniCharPunctuationCharacterSet,
83 kCFUniCharIllegalCharacterSet,
84 kCFUniCharTitlecaseLetterCharacterSet,
85 kCFUniCharSymbolAndOperatorCharacterSet,
86 kCFUniCharNewlineCharacterSet,
87
88 kCFUniCharCompatibilityDecomposableCharacterSet = 100, // internal character sets begins here
89 kCFUniCharHFSPlusDecomposableCharacterSet,
90 kCFUniCharStrongRightToLeftCharacterSet,
91 kCFUniCharHasNonSelfLowercaseCharacterSet,
92 kCFUniCharHasNonSelfUppercaseCharacterSet,
93 kCFUniCharHasNonSelfTitlecaseCharacterSet,
94 kCFUniCharHasNonSelfCaseFoldingCharacterSet,
95 kCFUniCharHasNonSelfMirrorMappingCharacterSet,
96 kCFUniCharControlAndFormatterCharacterSet,
97 kCFUniCharCaseIgnorableCharacterSet,
98 kCFUniCharGraphemeExtendCharacterSet
99 };
100
101 CF_EXPORT bool CFUniCharIsMemberOf(UTF32Char theChar, uint32_t charset);
102
103 // This function returns NULL for kCFUniCharControlCharacterSet, kCFUniCharWhitespaceCharacterSet, kCFUniCharWhitespaceAndNewlineCharacterSet, & kCFUniCharIllegalCharacterSet
104 CF_EXPORT const uint8_t *CFUniCharGetBitmapPtrForPlane(uint32_t charset, uint32_t plane);
105
106 enum {
107 kCFUniCharBitmapFilled = (uint8_t)0,
108 kCFUniCharBitmapEmpty = (uint8_t)0xFF,
109 kCFUniCharBitmapAll = (uint8_t)1
110 };
111
112 CF_EXPORT uint8_t CFUniCharGetBitmapForPlane(uint32_t charset, uint32_t plane, void *bitmap, bool isInverted);
113
114 CF_EXPORT uint32_t CFUniCharGetNumberOfPlanes(uint32_t charset);
115
116 enum {
117 kCFUniCharToLowercase = 0,
118 kCFUniCharToUppercase,
119 kCFUniCharToTitlecase,
120 kCFUniCharCaseFold
121 };
122
123 enum {
124 kCFUniCharCaseMapFinalSigma = (1),
125 kCFUniCharCaseMapAfter_i = (1 << 1),
126 kCFUniCharCaseMapMoreAbove = (1 << 2)
127 };
128
129 CF_EXPORT CFIndex CFUniCharMapCaseTo(UTF32Char theChar, UTF16Char *convertedChar, CFIndex maxLength, uint32_t ctype, uint32_t flags, const uint8_t *langCode);
130
131 CF_EXPORT uint32_t CFUniCharGetConditionalCaseMappingFlags(UTF32Char theChar, UTF16Char *buffer, CFIndex currentIndex, CFIndex length, uint32_t type, const uint8_t *langCode, uint32_t lastFlags);
132
133 enum {
134 kCFUniCharBiDiPropertyON = 0,
135 kCFUniCharBiDiPropertyL,
136 kCFUniCharBiDiPropertyR,
137 kCFUniCharBiDiPropertyAN,
138 kCFUniCharBiDiPropertyEN,
139 kCFUniCharBiDiPropertyAL,
140 kCFUniCharBiDiPropertyNSM,
141 kCFUniCharBiDiPropertyCS,
142 kCFUniCharBiDiPropertyES,
143 kCFUniCharBiDiPropertyET,
144 kCFUniCharBiDiPropertyBN,
145 kCFUniCharBiDiPropertyS,
146 kCFUniCharBiDiPropertyWS,
147 kCFUniCharBiDiPropertyB,
148 kCFUniCharBiDiPropertyRLO,
149 kCFUniCharBiDiPropertyRLE,
150 kCFUniCharBiDiPropertyLRO,
151 kCFUniCharBiDiPropertyLRE,
152 kCFUniCharBiDiPropertyPDF
153 };
154
155 enum {
156 kCFUniCharCombiningProperty = 0,
157 kCFUniCharBidiProperty
158 };
159
160 // The second arg 'bitmap' has to be the pointer to a specific plane
161 CF_INLINE uint8_t CFUniCharGetBidiPropertyForCharacter(UTF16Char character, const uint8_t *bitmap) {
162 if (bitmap) {
163 uint8_t value = bitmap[(character >> 8)];
164
165 if (value > kCFUniCharBiDiPropertyPDF) {
166 bitmap = bitmap + 256 + ((value - kCFUniCharBiDiPropertyPDF - 1) * 256);
167 return bitmap[character % 256];
168 } else {
169 return value;
170 }
171 }
172 return kCFUniCharBiDiPropertyL;
173 }
174
175 CF_INLINE uint8_t CFUniCharGetCombiningPropertyForCharacter(UTF16Char character, const uint8_t *bitmap) {
176 if (bitmap) {
177 uint8_t value = bitmap[(character >> 8)];
178
179 if (value) {
180 bitmap = bitmap + 256 + ((value - 1) * 256);
181 return bitmap[character % 256];
182 }
183 }
184 return 0;
185 }
186
187 CF_EXPORT const void *CFUniCharGetUnicodePropertyDataForPlane(uint32_t propertyType, uint32_t plane);
188 CF_EXPORT uint32_t CFUniCharGetNumberOfPlanesForUnicodePropertyData(uint32_t propertyType);
189 CF_EXPORT uint32_t CFUniCharGetUnicodeProperty(UTF32Char character, uint32_t propertyType);
190
191 CF_EXPORT bool CFUniCharFillDestinationBuffer(const UTF32Char *src, CFIndex srcLength, void **dst, CFIndex dstLength, CFIndex *filledLength, uint32_t dstFormat);
192
193 // UTF32 support
194
195 CF_INLINE bool CFUniCharToUTF32(const UTF16Char *src, CFIndex length, UTF32Char *dst, bool allowLossy, bool isBigEndien) {
196 const UTF16Char *limit = src + length;
197 UTF32Char character;
198
199 while (src < limit) {
200 character = *(src++);
201
202 if (CFUniCharIsSurrogateHighCharacter(character)) {
203 if ((src < limit) && CFUniCharIsSurrogateLowCharacter(*src)) {
204 character = CFUniCharGetLongCharacterForSurrogatePair(character, *(src++));
205 } else {
206 if (!allowLossy) return false;
207 character = 0xFFFD; // replacement character
208 }
209 } else if (CFUniCharIsSurrogateLowCharacter(character)) {
210 if (!allowLossy) return false;
211 character = 0xFFFD; // replacement character
212 }
213
214 *(dst++) = (isBigEndien ? CFSwapInt32HostToBig(character) : CFSwapInt32HostToLittle(character));
215 }
216
217 return true;
218 }
219
220 CF_INLINE bool CFUniCharFromUTF32(const UTF32Char *src, CFIndex length, UTF16Char *dst, bool allowLossy, bool isBigEndien) {
221 const UTF32Char *limit = src + length;
222 UTF32Char character;
223
224 while (src < limit) {
225 character = (isBigEndien ? CFSwapInt32BigToHost(*(src++)) : CFSwapInt32LittleToHost(*(src++)));
226
227 if (character < 0xFFFF) { // BMP
228 if (allowLossy) {
229 if (CFUniCharIsSurrogateHighCharacter(character)) {
230 UTF32Char otherCharacter = 0xFFFD; // replacement character
231
232 if (src < limit) {
233 otherCharacter = (isBigEndien ? CFSwapInt32BigToHost(*src) : CFSwapInt32LittleToHost(*src));
234
235
236 if ((otherCharacter < 0x10000) && CFUniCharIsSurrogateLowCharacter(otherCharacter)) {
237 *(dst++) = character; ++src;
238 } else {
239 otherCharacter = 0xFFFD; // replacement character
240 }
241 }
242
243 character = otherCharacter;
244 } else if (CFUniCharIsSurrogateLowCharacter(character)) {
245 character = 0xFFFD; // replacement character
246 }
247 } else {
248 if (CFUniCharIsSurrogateHighCharacter(character) || CFUniCharIsSurrogateLowCharacter(character)) return false;
249 }
250 } else if (character < 0x110000) { // non-BMP
251 character -= 0x10000;
252 *(dst++) = (UTF16Char)((character >> 10) + 0xD800UL);
253 character = (UTF16Char)((character & 0x3FF) + 0xDC00UL);
254 } else {
255 if (!allowLossy) return false;
256 character = 0xFFFD; // replacement character
257 }
258
259 *(dst++) = character;
260 }
261 return true;
262 }
263
264 CF_EXTERN_C_END
265
266 #endif /* ! __COREFOUNDATION_CFUNICHAR__ */
267