]>
Commit | Line | Data |
---|---|---|
9ce05555 | 1 | /* |
8ca704e1 | 2 | * Copyright (c) 2011 Apple Inc. All rights reserved. |
9ce05555 A |
3 | * |
4 | * @APPLE_LICENSE_HEADER_START@ | |
5 | * | |
9ce05555 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. Please obtain a copy of the License at | |
10 | * http://www.opensource.apple.com/apsl/ and read it before using this | |
11 | * file. | |
12 | * | |
13 | * The Original Code and all software distributed under the License are | |
14 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
15 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
16 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
18 | * Please see the License for the specific language governing rights and | |
19 | * limitations under the License. | |
20 | * | |
21 | * @APPLE_LICENSE_HEADER_END@ | |
22 | */ | |
f64f9b69 | 23 | |
9ce05555 | 24 | /* CFUniChar.h |
8ca704e1 | 25 | Copyright (c) 1998-2011, Apple Inc. All rights reserved. |
9ce05555 A |
26 | */ |
27 | ||
28 | #if !defined(__COREFOUNDATION_CFUNICHAR__) | |
29 | #define __COREFOUNDATION_CFUNICHAR__ 1 | |
30 | ||
d8925383 A |
31 | |
32 | #include <CoreFoundation/CFByteOrder.h> | |
9ce05555 A |
33 | #include <CoreFoundation/CFBase.h> |
34 | ||
bd5b749c | 35 | CF_EXTERN_C_BEGIN |
9ce05555 A |
36 | |
37 | #define kCFUniCharBitShiftForByte (3) | |
38 | #define kCFUniCharBitShiftForMask (7) | |
39 | ||
bd5b749c | 40 | CF_INLINE bool CFUniCharIsSurrogateHighCharacter(UniChar character) { |
9ce05555 A |
41 | return ((character >= 0xD800UL) && (character <= 0xDBFFUL) ? true : false); |
42 | } | |
43 | ||
bd5b749c | 44 | CF_INLINE bool CFUniCharIsSurrogateLowCharacter(UniChar character) { |
9ce05555 A |
45 | return ((character >= 0xDC00UL) && (character <= 0xDFFFUL) ? true : false); |
46 | } | |
47 | ||
48 | CF_INLINE UTF32Char CFUniCharGetLongCharacterForSurrogatePair(UniChar surrogateHigh, UniChar surrogateLow) { | |
49 | return ((surrogateHigh - 0xD800UL) << 10) + (surrogateLow - 0xDC00UL) + 0x0010000UL; | |
50 | } | |
51 | ||
52 | // The following values coinside TextEncodingFormat format defines in TextCommon.h | |
53 | enum { | |
54 | kCFUniCharUTF16Format = 0, | |
55 | kCFUniCharUTF8Format = 2, | |
56 | kCFUniCharUTF32Format = 3 | |
57 | }; | |
58 | ||
59 | CF_INLINE bool CFUniCharIsMemberOfBitmap(UTF16Char theChar, const uint8_t *bitmap) { | |
60 | return (bitmap && (bitmap[(theChar) >> kCFUniCharBitShiftForByte] & (((uint32_t)1) << (theChar & kCFUniCharBitShiftForMask))) ? true : false); | |
61 | } | |
62 | ||
63 | CF_INLINE void CFUniCharAddCharacterToBitmap(UTF16Char theChar, uint8_t *bitmap) { | |
64 | bitmap[(theChar) >> kCFUniCharBitShiftForByte] |= (((uint32_t)1) << (theChar & kCFUniCharBitShiftForMask)); | |
65 | } | |
66 | ||
67 | CF_INLINE void CFUniCharRemoveCharacterFromBitmap(UTF16Char theChar, uint8_t *bitmap) { | |
68 | bitmap[(theChar) >> kCFUniCharBitShiftForByte] &= ~(((uint32_t)1) << (theChar & kCFUniCharBitShiftForMask)); | |
69 | } | |
70 | ||
71 | enum { | |
72 | kCFUniCharControlCharacterSet = 1, | |
73 | kCFUniCharWhitespaceCharacterSet, | |
74 | kCFUniCharWhitespaceAndNewlineCharacterSet, | |
75 | kCFUniCharDecimalDigitCharacterSet, | |
76 | kCFUniCharLetterCharacterSet, | |
77 | kCFUniCharLowercaseLetterCharacterSet, | |
78 | kCFUniCharUppercaseLetterCharacterSet, | |
79 | kCFUniCharNonBaseCharacterSet, | |
80 | kCFUniCharCanonicalDecomposableCharacterSet, | |
81 | kCFUniCharDecomposableCharacterSet = kCFUniCharCanonicalDecomposableCharacterSet, | |
82 | kCFUniCharAlphaNumericCharacterSet, | |
83 | kCFUniCharPunctuationCharacterSet, | |
84 | kCFUniCharIllegalCharacterSet, | |
85 | kCFUniCharTitlecaseLetterCharacterSet, | |
86 | kCFUniCharSymbolAndOperatorCharacterSet, | |
bd5b749c A |
87 | kCFUniCharNewlineCharacterSet, |
88 | ||
89 | kCFUniCharCompatibilityDecomposableCharacterSet = 100, // internal character sets begins here | |
9ce05555 A |
90 | kCFUniCharHFSPlusDecomposableCharacterSet, |
91 | kCFUniCharStrongRightToLeftCharacterSet, | |
92 | kCFUniCharHasNonSelfLowercaseCharacterSet, | |
93 | kCFUniCharHasNonSelfUppercaseCharacterSet, | |
94 | kCFUniCharHasNonSelfTitlecaseCharacterSet, | |
95 | kCFUniCharHasNonSelfCaseFoldingCharacterSet, | |
96 | kCFUniCharHasNonSelfMirrorMappingCharacterSet, | |
97 | kCFUniCharControlAndFormatterCharacterSet, | |
bd5b749c A |
98 | kCFUniCharCaseIgnorableCharacterSet, |
99 | kCFUniCharGraphemeExtendCharacterSet | |
9ce05555 A |
100 | }; |
101 | ||
102 | CF_EXPORT bool CFUniCharIsMemberOf(UTF32Char theChar, uint32_t charset); | |
103 | ||
104 | // This function returns NULL for kCFUniCharControlCharacterSet, kCFUniCharWhitespaceCharacterSet, kCFUniCharWhitespaceAndNewlineCharacterSet, & kCFUniCharIllegalCharacterSet | |
105 | CF_EXPORT const uint8_t *CFUniCharGetBitmapPtrForPlane(uint32_t charset, uint32_t plane); | |
106 | ||
107 | enum { | |
108 | kCFUniCharBitmapFilled = (uint8_t)0, | |
109 | kCFUniCharBitmapEmpty = (uint8_t)0xFF, | |
110 | kCFUniCharBitmapAll = (uint8_t)1 | |
111 | }; | |
112 | ||
113 | CF_EXPORT uint8_t CFUniCharGetBitmapForPlane(uint32_t charset, uint32_t plane, void *bitmap, bool isInverted); | |
114 | ||
115 | CF_EXPORT uint32_t CFUniCharGetNumberOfPlanes(uint32_t charset); | |
116 | ||
117 | enum { | |
118 | kCFUniCharToLowercase = 0, | |
119 | kCFUniCharToUppercase, | |
120 | kCFUniCharToTitlecase, | |
121 | kCFUniCharCaseFold | |
122 | }; | |
123 | ||
124 | enum { | |
cf7d2af9 A |
125 | kCFUniCharCaseMapFinalSigma = (1UL << 0), |
126 | kCFUniCharCaseMapAfter_i = (1UL << 1), | |
8ca704e1 A |
127 | kCFUniCharCaseMapMoreAbove = (1UL << 2), |
128 | kCFUniCharCaseMapDutchDigraph = (1UL << 3) | |
9ce05555 A |
129 | }; |
130 | ||
bd5b749c | 131 | CF_EXPORT CFIndex CFUniCharMapCaseTo(UTF32Char theChar, UTF16Char *convertedChar, CFIndex maxLength, uint32_t ctype, uint32_t flags, const uint8_t *langCode); |
9ce05555 | 132 | |
bd5b749c | 133 | CF_EXPORT uint32_t CFUniCharGetConditionalCaseMappingFlags(UTF32Char theChar, UTF16Char *buffer, CFIndex currentIndex, CFIndex length, uint32_t type, const uint8_t *langCode, uint32_t lastFlags); |
9ce05555 A |
134 | |
135 | enum { | |
136 | kCFUniCharBiDiPropertyON = 0, | |
137 | kCFUniCharBiDiPropertyL, | |
138 | kCFUniCharBiDiPropertyR, | |
139 | kCFUniCharBiDiPropertyAN, | |
140 | kCFUniCharBiDiPropertyEN, | |
141 | kCFUniCharBiDiPropertyAL, | |
142 | kCFUniCharBiDiPropertyNSM, | |
143 | kCFUniCharBiDiPropertyCS, | |
144 | kCFUniCharBiDiPropertyES, | |
145 | kCFUniCharBiDiPropertyET, | |
146 | kCFUniCharBiDiPropertyBN, | |
147 | kCFUniCharBiDiPropertyS, | |
148 | kCFUniCharBiDiPropertyWS, | |
149 | kCFUniCharBiDiPropertyB, | |
150 | kCFUniCharBiDiPropertyRLO, | |
151 | kCFUniCharBiDiPropertyRLE, | |
152 | kCFUniCharBiDiPropertyLRO, | |
153 | kCFUniCharBiDiPropertyLRE, | |
154 | kCFUniCharBiDiPropertyPDF | |
155 | }; | |
156 | ||
157 | enum { | |
158 | kCFUniCharCombiningProperty = 0, | |
159 | kCFUniCharBidiProperty | |
160 | }; | |
161 | ||
162 | // The second arg 'bitmap' has to be the pointer to a specific plane | |
163 | CF_INLINE uint8_t CFUniCharGetBidiPropertyForCharacter(UTF16Char character, const uint8_t *bitmap) { | |
164 | if (bitmap) { | |
165 | uint8_t value = bitmap[(character >> 8)]; | |
166 | ||
d8925383 | 167 | if (value > kCFUniCharBiDiPropertyPDF) { |
9ce05555 A |
168 | bitmap = bitmap + 256 + ((value - kCFUniCharBiDiPropertyPDF - 1) * 256); |
169 | return bitmap[character % 256]; | |
d8925383 A |
170 | } else { |
171 | return value; | |
9ce05555 A |
172 | } |
173 | } | |
174 | return kCFUniCharBiDiPropertyL; | |
175 | } | |
176 | ||
177 | CF_INLINE uint8_t CFUniCharGetCombiningPropertyForCharacter(UTF16Char character, const uint8_t *bitmap) { | |
178 | if (bitmap) { | |
179 | uint8_t value = bitmap[(character >> 8)]; | |
180 | ||
181 | if (value) { | |
182 | bitmap = bitmap + 256 + ((value - 1) * 256); | |
183 | return bitmap[character % 256]; | |
184 | } | |
185 | } | |
186 | return 0; | |
187 | } | |
188 | ||
189 | CF_EXPORT const void *CFUniCharGetUnicodePropertyDataForPlane(uint32_t propertyType, uint32_t plane); | |
190 | CF_EXPORT uint32_t CFUniCharGetNumberOfPlanesForUnicodePropertyData(uint32_t propertyType); | |
191 | CF_EXPORT uint32_t CFUniCharGetUnicodeProperty(UTF32Char character, uint32_t propertyType); | |
192 | ||
bd5b749c | 193 | CF_EXPORT bool CFUniCharFillDestinationBuffer(const UTF32Char *src, CFIndex srcLength, void **dst, CFIndex dstLength, CFIndex *filledLength, uint32_t dstFormat); |
9ce05555 | 194 | |
d8925383 A |
195 | // UTF32 support |
196 | ||
197 | CF_INLINE bool CFUniCharToUTF32(const UTF16Char *src, CFIndex length, UTF32Char *dst, bool allowLossy, bool isBigEndien) { | |
198 | const UTF16Char *limit = src + length; | |
199 | UTF32Char character; | |
200 | ||
201 | while (src < limit) { | |
202 | character = *(src++); | |
203 | ||
204 | if (CFUniCharIsSurrogateHighCharacter(character)) { | |
205 | if ((src < limit) && CFUniCharIsSurrogateLowCharacter(*src)) { | |
206 | character = CFUniCharGetLongCharacterForSurrogatePair(character, *(src++)); | |
207 | } else { | |
208 | if (!allowLossy) return false; | |
209 | character = 0xFFFD; // replacement character | |
210 | } | |
211 | } else if (CFUniCharIsSurrogateLowCharacter(character)) { | |
212 | if (!allowLossy) return false; | |
213 | character = 0xFFFD; // replacement character | |
214 | } | |
215 | ||
216 | *(dst++) = (isBigEndien ? CFSwapInt32HostToBig(character) : CFSwapInt32HostToLittle(character)); | |
217 | } | |
218 | ||
219 | return true; | |
220 | } | |
221 | ||
222 | CF_INLINE bool CFUniCharFromUTF32(const UTF32Char *src, CFIndex length, UTF16Char *dst, bool allowLossy, bool isBigEndien) { | |
223 | const UTF32Char *limit = src + length; | |
224 | UTF32Char character; | |
225 | ||
226 | while (src < limit) { | |
227 | character = (isBigEndien ? CFSwapInt32BigToHost(*(src++)) : CFSwapInt32LittleToHost(*(src++))); | |
228 | ||
229 | if (character < 0xFFFF) { // BMP | |
230 | if (allowLossy) { | |
231 | if (CFUniCharIsSurrogateHighCharacter(character)) { | |
232 | UTF32Char otherCharacter = 0xFFFD; // replacement character | |
233 | ||
234 | if (src < limit) { | |
235 | otherCharacter = (isBigEndien ? CFSwapInt32BigToHost(*src) : CFSwapInt32LittleToHost(*src)); | |
236 | ||
237 | ||
238 | if ((otherCharacter < 0x10000) && CFUniCharIsSurrogateLowCharacter(otherCharacter)) { | |
239 | *(dst++) = character; ++src; | |
240 | } else { | |
241 | otherCharacter = 0xFFFD; // replacement character | |
242 | } | |
243 | } | |
244 | ||
245 | character = otherCharacter; | |
246 | } else if (CFUniCharIsSurrogateLowCharacter(character)) { | |
247 | character = 0xFFFD; // replacement character | |
248 | } | |
249 | } else { | |
250 | if (CFUniCharIsSurrogateHighCharacter(character) || CFUniCharIsSurrogateLowCharacter(character)) return false; | |
251 | } | |
252 | } else if (character < 0x110000) { // non-BMP | |
253 | character -= 0x10000; | |
254 | *(dst++) = (UTF16Char)((character >> 10) + 0xD800UL); | |
255 | character = (UTF16Char)((character & 0x3FF) + 0xDC00UL); | |
256 | } else { | |
257 | if (!allowLossy) return false; | |
258 | character = 0xFFFD; // replacement character | |
259 | } | |
260 | ||
261 | *(dst++) = character; | |
262 | } | |
263 | return true; | |
264 | } | |
265 | ||
bd5b749c | 266 | CF_EXTERN_C_END |
9ce05555 A |
267 | |
268 | #endif /* ! __COREFOUNDATION_CFUNICHAR__ */ | |
269 |