]> git.saurik.com Git - apple/cf.git/blob - CFUniChar.h
CF-1152.14.tar.gz
[apple/cf.git] / CFUniChar.h
1 /*
2 * Copyright (c) 2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /* CFUniChar.h
25 Copyright (c) 1998-2014, Apple Inc. All rights reserved.
26 */
27
28 #if !defined(__COREFOUNDATION_CFUNICHAR__)
29 #define __COREFOUNDATION_CFUNICHAR__ 1
30
31
32 #include <CoreFoundation/CFByteOrder.h>
33 #include <CoreFoundation/CFBase.h>
34
35 CF_EXTERN_C_BEGIN
36
37 #define kCFUniCharBitShiftForByte (3)
38 #define kCFUniCharBitShiftForMask (7)
39
40 CF_INLINE bool CFUniCharIsSurrogateHighCharacter(UniChar character) {
41 return ((character >= 0xD800UL) && (character <= 0xDBFFUL) ? true : false);
42 }
43
44 CF_INLINE bool CFUniCharIsSurrogateLowCharacter(UniChar character) {
45 return ((character >= 0xDC00UL) && (character <= 0xDFFFUL) ? true : false);
46 }
47
48 CF_INLINE UTF32Char CFUniCharGetLongCharacterForSurrogatePair(UniChar surrogateHigh, UniChar surrogateLow) {
49 return ((surrogateHigh - 0xD800UL) << 10) + (surrogateLow - 0xDC00UL) + 0x0010000UL;
50 }
51
52 // The following values coinside TextEncodingFormat format defines in TextCommon.h
53 enum {
54 kCFUniCharUTF16Format = 0,
55 kCFUniCharUTF8Format = 2,
56 kCFUniCharUTF32Format = 3
57 };
58
59 CF_INLINE bool CFUniCharIsMemberOfBitmap(UTF16Char theChar, const uint8_t *bitmap) {
60 return (bitmap && (bitmap[(theChar) >> kCFUniCharBitShiftForByte] & (((uint32_t)1) << (theChar & kCFUniCharBitShiftForMask))) ? true : false);
61 }
62
63 CF_INLINE void CFUniCharAddCharacterToBitmap(UTF16Char theChar, uint8_t *bitmap) {
64 bitmap[(theChar) >> kCFUniCharBitShiftForByte] |= (((uint32_t)1) << (theChar & kCFUniCharBitShiftForMask));
65 }
66
67 CF_INLINE void CFUniCharRemoveCharacterFromBitmap(UTF16Char theChar, uint8_t *bitmap) {
68 bitmap[(theChar) >> kCFUniCharBitShiftForByte] &= ~(((uint32_t)1) << (theChar & kCFUniCharBitShiftForMask));
69 }
70
71 enum {
72 kCFUniCharControlCharacterSet = 1,
73 kCFUniCharWhitespaceCharacterSet,
74 kCFUniCharWhitespaceAndNewlineCharacterSet,
75 kCFUniCharDecimalDigitCharacterSet,
76 kCFUniCharLetterCharacterSet,
77 kCFUniCharLowercaseLetterCharacterSet,
78 kCFUniCharUppercaseLetterCharacterSet,
79 kCFUniCharNonBaseCharacterSet,
80 kCFUniCharCanonicalDecomposableCharacterSet,
81 kCFUniCharDecomposableCharacterSet = kCFUniCharCanonicalDecomposableCharacterSet,
82 kCFUniCharAlphaNumericCharacterSet,
83 kCFUniCharPunctuationCharacterSet,
84 kCFUniCharIllegalCharacterSet,
85 kCFUniCharTitlecaseLetterCharacterSet,
86 kCFUniCharSymbolAndOperatorCharacterSet,
87 kCFUniCharNewlineCharacterSet,
88
89 kCFUniCharCompatibilityDecomposableCharacterSet = 100, // internal character sets begins here
90 kCFUniCharHFSPlusDecomposableCharacterSet,
91 kCFUniCharStrongRightToLeftCharacterSet,
92 kCFUniCharHasNonSelfLowercaseCharacterSet,
93 kCFUniCharHasNonSelfUppercaseCharacterSet,
94 kCFUniCharHasNonSelfTitlecaseCharacterSet,
95 kCFUniCharHasNonSelfCaseFoldingCharacterSet,
96 kCFUniCharHasNonSelfMirrorMappingCharacterSet,
97 kCFUniCharControlAndFormatterCharacterSet,
98 kCFUniCharCaseIgnorableCharacterSet,
99 kCFUniCharGraphemeExtendCharacterSet
100 };
101
102 CF_EXPORT bool CFUniCharIsMemberOf(UTF32Char theChar, uint32_t charset);
103
104 // This function returns NULL for kCFUniCharControlCharacterSet, kCFUniCharWhitespaceCharacterSet, kCFUniCharWhitespaceAndNewlineCharacterSet, & kCFUniCharIllegalCharacterSet
105 CF_EXPORT const uint8_t *CFUniCharGetBitmapPtrForPlane(uint32_t charset, uint32_t plane);
106
107 enum {
108 kCFUniCharBitmapFilled = (uint8_t)0,
109 kCFUniCharBitmapEmpty = (uint8_t)0xFF,
110 kCFUniCharBitmapAll = (uint8_t)1
111 };
112
113 CF_EXPORT uint8_t CFUniCharGetBitmapForPlane(uint32_t charset, uint32_t plane, void *bitmap, bool isInverted);
114
115 CF_EXPORT uint32_t CFUniCharGetNumberOfPlanes(uint32_t charset);
116
117 enum {
118 kCFUniCharToLowercase = 0,
119 kCFUniCharToUppercase,
120 kCFUniCharToTitlecase,
121 kCFUniCharCaseFold
122 };
123
124 enum {
125 kCFUniCharCaseMapFinalSigma = (1UL << 0),
126 kCFUniCharCaseMapAfter_i = (1UL << 1),
127 kCFUniCharCaseMapMoreAbove = (1UL << 2),
128 kCFUniCharCaseMapDutchDigraph = (1UL << 3),
129 kCFUniCharCaseMapGreekTonos = (1UL << 4)
130 };
131
132 CF_EXPORT CFIndex CFUniCharMapCaseTo(UTF32Char theChar, UTF16Char *convertedChar, CFIndex maxLength, uint32_t ctype, uint32_t flags, const uint8_t *langCode);
133
134 CF_EXPORT uint32_t CFUniCharGetConditionalCaseMappingFlags(UTF32Char theChar, UTF16Char *buffer, CFIndex currentIndex, CFIndex length, uint32_t type, const uint8_t *langCode, uint32_t lastFlags);
135
136 enum {
137 kCFUniCharBiDiPropertyON = 0,
138 kCFUniCharBiDiPropertyL,
139 kCFUniCharBiDiPropertyR,
140 kCFUniCharBiDiPropertyAN,
141 kCFUniCharBiDiPropertyEN,
142 kCFUniCharBiDiPropertyAL,
143 kCFUniCharBiDiPropertyNSM,
144 kCFUniCharBiDiPropertyCS,
145 kCFUniCharBiDiPropertyES,
146 kCFUniCharBiDiPropertyET,
147 kCFUniCharBiDiPropertyBN,
148 kCFUniCharBiDiPropertyS,
149 kCFUniCharBiDiPropertyWS,
150 kCFUniCharBiDiPropertyB,
151 kCFUniCharBiDiPropertyRLO,
152 kCFUniCharBiDiPropertyRLE,
153 kCFUniCharBiDiPropertyLRO,
154 kCFUniCharBiDiPropertyLRE,
155 kCFUniCharBiDiPropertyPDF
156 };
157
158 enum {
159 kCFUniCharCombiningProperty = 0,
160 kCFUniCharBidiProperty
161 };
162
163 // The second arg 'bitmap' has to be the pointer to a specific plane
164 CF_INLINE uint8_t CFUniCharGetBidiPropertyForCharacter(UTF16Char character, const uint8_t *bitmap) {
165 if (bitmap) {
166 uint8_t value = bitmap[(character >> 8)];
167
168 if (value > kCFUniCharBiDiPropertyPDF) {
169 bitmap = bitmap + 256 + ((value - kCFUniCharBiDiPropertyPDF - 1) * 256);
170 return bitmap[character % 256];
171 } else {
172 return value;
173 }
174 }
175 return kCFUniCharBiDiPropertyL;
176 }
177
178 CF_INLINE uint8_t CFUniCharGetCombiningPropertyForCharacter(UTF16Char character, const uint8_t *bitmap) {
179 if (bitmap) {
180 uint8_t value = bitmap[(character >> 8)];
181
182 if (value) {
183 bitmap = bitmap + 256 + ((value - 1) * 256);
184 return bitmap[character % 256];
185 }
186 }
187 return 0;
188 }
189
190 CF_EXPORT const void *CFUniCharGetUnicodePropertyDataForPlane(uint32_t propertyType, uint32_t plane);
191 CF_EXPORT uint32_t CFUniCharGetNumberOfPlanesForUnicodePropertyData(uint32_t propertyType);
192 CF_EXPORT uint32_t CFUniCharGetUnicodeProperty(UTF32Char character, uint32_t propertyType);
193
194 CF_EXPORT bool CFUniCharFillDestinationBuffer(const UTF32Char *src, CFIndex srcLength, void **dst, CFIndex dstLength, CFIndex *filledLength, uint32_t dstFormat);
195
196 // UTF32 support
197
198 CF_INLINE bool CFUniCharToUTF32(const UTF16Char *src, CFIndex length, UTF32Char *dst, bool allowLossy, bool isBigEndien) {
199 const UTF16Char *limit = src + length;
200 UTF32Char character;
201
202 while (src < limit) {
203 character = *(src++);
204
205 if (CFUniCharIsSurrogateHighCharacter(character)) {
206 if ((src < limit) && CFUniCharIsSurrogateLowCharacter(*src)) {
207 character = CFUniCharGetLongCharacterForSurrogatePair(character, *(src++));
208 } else {
209 if (!allowLossy) return false;
210 character = 0xFFFD; // replacement character
211 }
212 } else if (CFUniCharIsSurrogateLowCharacter(character)) {
213 if (!allowLossy) return false;
214 character = 0xFFFD; // replacement character
215 }
216
217 *(dst++) = (isBigEndien ? CFSwapInt32HostToBig(character) : CFSwapInt32HostToLittle(character));
218 }
219
220 return true;
221 }
222
223 CF_INLINE bool CFUniCharFromUTF32(const UTF32Char *src, CFIndex length, UTF16Char *dst, bool allowLossy, bool isBigEndien) {
224 const UTF32Char *limit = src + length;
225 UTF32Char character;
226
227 while (src < limit) {
228 character = (isBigEndien ? CFSwapInt32BigToHost(*(src++)) : CFSwapInt32LittleToHost(*(src++)));
229
230 if (character < 0x10000) { // BMP
231 if (allowLossy) {
232 if (CFUniCharIsSurrogateHighCharacter(character)) {
233 UTF32Char otherCharacter = 0xFFFD; // replacement character
234
235 if (src < limit) {
236 otherCharacter = (isBigEndien ? CFSwapInt32BigToHost(*src) : CFSwapInt32LittleToHost(*src));
237
238
239 if ((otherCharacter < 0x10000) && CFUniCharIsSurrogateLowCharacter(otherCharacter)) {
240 *(dst++) = character; ++src;
241 } else {
242 otherCharacter = 0xFFFD; // replacement character
243 }
244 }
245
246 character = otherCharacter;
247 } else if (CFUniCharIsSurrogateLowCharacter(character)) {
248 character = 0xFFFD; // replacement character
249 }
250 } else {
251 if (CFUniCharIsSurrogateHighCharacter(character) || CFUniCharIsSurrogateLowCharacter(character)) return false;
252 }
253 } else if (character < 0x110000) { // non-BMP
254 character -= 0x10000;
255 *(dst++) = (UTF16Char)((character >> 10) + 0xD800UL);
256 character = (UTF16Char)((character & 0x3FF) + 0xDC00UL);
257 } else {
258 if (!allowLossy) return false;
259 character = 0xFFFD; // replacement character
260 }
261
262 *(dst++) = character;
263 }
264 return true;
265 }
266
267 CF_EXTERN_C_END
268
269 #endif /* ! __COREFOUNDATION_CFUNICHAR__ */
270