]>
Commit | Line | Data |
---|---|---|
9ce05555 | 1 | /* |
bd5b749c | 2 | * Copyright (c) 2008 Apple Inc. All rights reserved. |
9ce05555 A |
3 | * |
4 | * @APPLE_LICENSE_HEADER_START@ | |
5 | * | |
9ce05555 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. Please obtain a copy of the License at | |
10 | * http://www.opensource.apple.com/apsl/ and read it before using this | |
11 | * file. | |
12 | * | |
13 | * The Original Code and all software distributed under the License are | |
14 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
15 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
16 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
18 | * Please see the License for the specific language governing rights and | |
19 | * limitations under the License. | |
20 | * | |
21 | * @APPLE_LICENSE_HEADER_END@ | |
22 | */ | |
23 | /* CFUniChar.h | |
bd5b749c | 24 | Copyright (c) 1998-2007, Apple Inc. All rights reserved. |
9ce05555 A |
25 | */ |
26 | ||
27 | #if !defined(__COREFOUNDATION_CFUNICHAR__) | |
28 | #define __COREFOUNDATION_CFUNICHAR__ 1 | |
29 | ||
d8925383 A |
30 | |
31 | #include <CoreFoundation/CFByteOrder.h> | |
9ce05555 A |
32 | #include <CoreFoundation/CFBase.h> |
33 | ||
bd5b749c | 34 | CF_EXTERN_C_BEGIN |
9ce05555 A |
35 | |
36 | #define kCFUniCharBitShiftForByte (3) | |
37 | #define kCFUniCharBitShiftForMask (7) | |
38 | ||
bd5b749c | 39 | CF_INLINE bool CFUniCharIsSurrogateHighCharacter(UniChar character) { |
9ce05555 A |
40 | return ((character >= 0xD800UL) && (character <= 0xDBFFUL) ? true : false); |
41 | } | |
42 | ||
bd5b749c | 43 | CF_INLINE bool CFUniCharIsSurrogateLowCharacter(UniChar character) { |
9ce05555 A |
44 | return ((character >= 0xDC00UL) && (character <= 0xDFFFUL) ? true : false); |
45 | } | |
46 | ||
47 | CF_INLINE UTF32Char CFUniCharGetLongCharacterForSurrogatePair(UniChar surrogateHigh, UniChar surrogateLow) { | |
48 | return ((surrogateHigh - 0xD800UL) << 10) + (surrogateLow - 0xDC00UL) + 0x0010000UL; | |
49 | } | |
50 | ||
51 | // The following values coinside TextEncodingFormat format defines in TextCommon.h | |
52 | enum { | |
53 | kCFUniCharUTF16Format = 0, | |
54 | kCFUniCharUTF8Format = 2, | |
55 | kCFUniCharUTF32Format = 3 | |
56 | }; | |
57 | ||
58 | CF_INLINE bool CFUniCharIsMemberOfBitmap(UTF16Char theChar, const uint8_t *bitmap) { | |
59 | return (bitmap && (bitmap[(theChar) >> kCFUniCharBitShiftForByte] & (((uint32_t)1) << (theChar & kCFUniCharBitShiftForMask))) ? true : false); | |
60 | } | |
61 | ||
62 | CF_INLINE void CFUniCharAddCharacterToBitmap(UTF16Char theChar, uint8_t *bitmap) { | |
63 | bitmap[(theChar) >> kCFUniCharBitShiftForByte] |= (((uint32_t)1) << (theChar & kCFUniCharBitShiftForMask)); | |
64 | } | |
65 | ||
66 | CF_INLINE void CFUniCharRemoveCharacterFromBitmap(UTF16Char theChar, uint8_t *bitmap) { | |
67 | bitmap[(theChar) >> kCFUniCharBitShiftForByte] &= ~(((uint32_t)1) << (theChar & kCFUniCharBitShiftForMask)); | |
68 | } | |
69 | ||
70 | enum { | |
71 | kCFUniCharControlCharacterSet = 1, | |
72 | kCFUniCharWhitespaceCharacterSet, | |
73 | kCFUniCharWhitespaceAndNewlineCharacterSet, | |
74 | kCFUniCharDecimalDigitCharacterSet, | |
75 | kCFUniCharLetterCharacterSet, | |
76 | kCFUniCharLowercaseLetterCharacterSet, | |
77 | kCFUniCharUppercaseLetterCharacterSet, | |
78 | kCFUniCharNonBaseCharacterSet, | |
79 | kCFUniCharCanonicalDecomposableCharacterSet, | |
80 | kCFUniCharDecomposableCharacterSet = kCFUniCharCanonicalDecomposableCharacterSet, | |
81 | kCFUniCharAlphaNumericCharacterSet, | |
82 | kCFUniCharPunctuationCharacterSet, | |
83 | kCFUniCharIllegalCharacterSet, | |
84 | kCFUniCharTitlecaseLetterCharacterSet, | |
85 | kCFUniCharSymbolAndOperatorCharacterSet, | |
bd5b749c A |
86 | kCFUniCharNewlineCharacterSet, |
87 | ||
88 | kCFUniCharCompatibilityDecomposableCharacterSet = 100, // internal character sets begins here | |
9ce05555 A |
89 | kCFUniCharHFSPlusDecomposableCharacterSet, |
90 | kCFUniCharStrongRightToLeftCharacterSet, | |
91 | kCFUniCharHasNonSelfLowercaseCharacterSet, | |
92 | kCFUniCharHasNonSelfUppercaseCharacterSet, | |
93 | kCFUniCharHasNonSelfTitlecaseCharacterSet, | |
94 | kCFUniCharHasNonSelfCaseFoldingCharacterSet, | |
95 | kCFUniCharHasNonSelfMirrorMappingCharacterSet, | |
96 | kCFUniCharControlAndFormatterCharacterSet, | |
bd5b749c A |
97 | kCFUniCharCaseIgnorableCharacterSet, |
98 | kCFUniCharGraphemeExtendCharacterSet | |
9ce05555 A |
99 | }; |
100 | ||
101 | CF_EXPORT bool CFUniCharIsMemberOf(UTF32Char theChar, uint32_t charset); | |
102 | ||
103 | // This function returns NULL for kCFUniCharControlCharacterSet, kCFUniCharWhitespaceCharacterSet, kCFUniCharWhitespaceAndNewlineCharacterSet, & kCFUniCharIllegalCharacterSet | |
104 | CF_EXPORT const uint8_t *CFUniCharGetBitmapPtrForPlane(uint32_t charset, uint32_t plane); | |
105 | ||
106 | enum { | |
107 | kCFUniCharBitmapFilled = (uint8_t)0, | |
108 | kCFUniCharBitmapEmpty = (uint8_t)0xFF, | |
109 | kCFUniCharBitmapAll = (uint8_t)1 | |
110 | }; | |
111 | ||
112 | CF_EXPORT uint8_t CFUniCharGetBitmapForPlane(uint32_t charset, uint32_t plane, void *bitmap, bool isInverted); | |
113 | ||
114 | CF_EXPORT uint32_t CFUniCharGetNumberOfPlanes(uint32_t charset); | |
115 | ||
116 | enum { | |
117 | kCFUniCharToLowercase = 0, | |
118 | kCFUniCharToUppercase, | |
119 | kCFUniCharToTitlecase, | |
120 | kCFUniCharCaseFold | |
121 | }; | |
122 | ||
123 | enum { | |
124 | kCFUniCharCaseMapFinalSigma = (1), | |
125 | kCFUniCharCaseMapAfter_i = (1 << 1), | |
126 | kCFUniCharCaseMapMoreAbove = (1 << 2) | |
127 | }; | |
128 | ||
bd5b749c | 129 | CF_EXPORT CFIndex CFUniCharMapCaseTo(UTF32Char theChar, UTF16Char *convertedChar, CFIndex maxLength, uint32_t ctype, uint32_t flags, const uint8_t *langCode); |
9ce05555 | 130 | |
bd5b749c | 131 | CF_EXPORT uint32_t CFUniCharGetConditionalCaseMappingFlags(UTF32Char theChar, UTF16Char *buffer, CFIndex currentIndex, CFIndex length, uint32_t type, const uint8_t *langCode, uint32_t lastFlags); |
9ce05555 A |
132 | |
133 | enum { | |
134 | kCFUniCharBiDiPropertyON = 0, | |
135 | kCFUniCharBiDiPropertyL, | |
136 | kCFUniCharBiDiPropertyR, | |
137 | kCFUniCharBiDiPropertyAN, | |
138 | kCFUniCharBiDiPropertyEN, | |
139 | kCFUniCharBiDiPropertyAL, | |
140 | kCFUniCharBiDiPropertyNSM, | |
141 | kCFUniCharBiDiPropertyCS, | |
142 | kCFUniCharBiDiPropertyES, | |
143 | kCFUniCharBiDiPropertyET, | |
144 | kCFUniCharBiDiPropertyBN, | |
145 | kCFUniCharBiDiPropertyS, | |
146 | kCFUniCharBiDiPropertyWS, | |
147 | kCFUniCharBiDiPropertyB, | |
148 | kCFUniCharBiDiPropertyRLO, | |
149 | kCFUniCharBiDiPropertyRLE, | |
150 | kCFUniCharBiDiPropertyLRO, | |
151 | kCFUniCharBiDiPropertyLRE, | |
152 | kCFUniCharBiDiPropertyPDF | |
153 | }; | |
154 | ||
155 | enum { | |
156 | kCFUniCharCombiningProperty = 0, | |
157 | kCFUniCharBidiProperty | |
158 | }; | |
159 | ||
160 | // The second arg 'bitmap' has to be the pointer to a specific plane | |
161 | CF_INLINE uint8_t CFUniCharGetBidiPropertyForCharacter(UTF16Char character, const uint8_t *bitmap) { | |
162 | if (bitmap) { | |
163 | uint8_t value = bitmap[(character >> 8)]; | |
164 | ||
d8925383 | 165 | if (value > kCFUniCharBiDiPropertyPDF) { |
9ce05555 A |
166 | bitmap = bitmap + 256 + ((value - kCFUniCharBiDiPropertyPDF - 1) * 256); |
167 | return bitmap[character % 256]; | |
d8925383 A |
168 | } else { |
169 | return value; | |
9ce05555 A |
170 | } |
171 | } | |
172 | return kCFUniCharBiDiPropertyL; | |
173 | } | |
174 | ||
175 | CF_INLINE uint8_t CFUniCharGetCombiningPropertyForCharacter(UTF16Char character, const uint8_t *bitmap) { | |
176 | if (bitmap) { | |
177 | uint8_t value = bitmap[(character >> 8)]; | |
178 | ||
179 | if (value) { | |
180 | bitmap = bitmap + 256 + ((value - 1) * 256); | |
181 | return bitmap[character % 256]; | |
182 | } | |
183 | } | |
184 | return 0; | |
185 | } | |
186 | ||
187 | CF_EXPORT const void *CFUniCharGetUnicodePropertyDataForPlane(uint32_t propertyType, uint32_t plane); | |
188 | CF_EXPORT uint32_t CFUniCharGetNumberOfPlanesForUnicodePropertyData(uint32_t propertyType); | |
189 | CF_EXPORT uint32_t CFUniCharGetUnicodeProperty(UTF32Char character, uint32_t propertyType); | |
190 | ||
bd5b749c | 191 | CF_EXPORT bool CFUniCharFillDestinationBuffer(const UTF32Char *src, CFIndex srcLength, void **dst, CFIndex dstLength, CFIndex *filledLength, uint32_t dstFormat); |
9ce05555 | 192 | |
d8925383 A |
193 | // UTF32 support |
194 | ||
195 | CF_INLINE bool CFUniCharToUTF32(const UTF16Char *src, CFIndex length, UTF32Char *dst, bool allowLossy, bool isBigEndien) { | |
196 | const UTF16Char *limit = src + length; | |
197 | UTF32Char character; | |
198 | ||
199 | while (src < limit) { | |
200 | character = *(src++); | |
201 | ||
202 | if (CFUniCharIsSurrogateHighCharacter(character)) { | |
203 | if ((src < limit) && CFUniCharIsSurrogateLowCharacter(*src)) { | |
204 | character = CFUniCharGetLongCharacterForSurrogatePair(character, *(src++)); | |
205 | } else { | |
206 | if (!allowLossy) return false; | |
207 | character = 0xFFFD; // replacement character | |
208 | } | |
209 | } else if (CFUniCharIsSurrogateLowCharacter(character)) { | |
210 | if (!allowLossy) return false; | |
211 | character = 0xFFFD; // replacement character | |
212 | } | |
213 | ||
214 | *(dst++) = (isBigEndien ? CFSwapInt32HostToBig(character) : CFSwapInt32HostToLittle(character)); | |
215 | } | |
216 | ||
217 | return true; | |
218 | } | |
219 | ||
220 | CF_INLINE bool CFUniCharFromUTF32(const UTF32Char *src, CFIndex length, UTF16Char *dst, bool allowLossy, bool isBigEndien) { | |
221 | const UTF32Char *limit = src + length; | |
222 | UTF32Char character; | |
223 | ||
224 | while (src < limit) { | |
225 | character = (isBigEndien ? CFSwapInt32BigToHost(*(src++)) : CFSwapInt32LittleToHost(*(src++))); | |
226 | ||
227 | if (character < 0xFFFF) { // BMP | |
228 | if (allowLossy) { | |
229 | if (CFUniCharIsSurrogateHighCharacter(character)) { | |
230 | UTF32Char otherCharacter = 0xFFFD; // replacement character | |
231 | ||
232 | if (src < limit) { | |
233 | otherCharacter = (isBigEndien ? CFSwapInt32BigToHost(*src) : CFSwapInt32LittleToHost(*src)); | |
234 | ||
235 | ||
236 | if ((otherCharacter < 0x10000) && CFUniCharIsSurrogateLowCharacter(otherCharacter)) { | |
237 | *(dst++) = character; ++src; | |
238 | } else { | |
239 | otherCharacter = 0xFFFD; // replacement character | |
240 | } | |
241 | } | |
242 | ||
243 | character = otherCharacter; | |
244 | } else if (CFUniCharIsSurrogateLowCharacter(character)) { | |
245 | character = 0xFFFD; // replacement character | |
246 | } | |
247 | } else { | |
248 | if (CFUniCharIsSurrogateHighCharacter(character) || CFUniCharIsSurrogateLowCharacter(character)) return false; | |
249 | } | |
250 | } else if (character < 0x110000) { // non-BMP | |
251 | character -= 0x10000; | |
252 | *(dst++) = (UTF16Char)((character >> 10) + 0xD800UL); | |
253 | character = (UTF16Char)((character & 0x3FF) + 0xDC00UL); | |
254 | } else { | |
255 | if (!allowLossy) return false; | |
256 | character = 0xFFFD; // replacement character | |
257 | } | |
258 | ||
259 | *(dst++) = character; | |
260 | } | |
261 | return true; | |
262 | } | |
263 | ||
bd5b749c | 264 | CF_EXTERN_C_END |
9ce05555 A |
265 | |
266 | #endif /* ! __COREFOUNDATION_CFUNICHAR__ */ | |
267 |