]> git.saurik.com Git - apple/cf.git/blame - CFUniChar.h
CF-635.15.tar.gz
[apple/cf.git] / CFUniChar.h
CommitLineData
9ce05555 1/*
8ca704e1 2 * Copyright (c) 2011 Apple Inc. All rights reserved.
9ce05555
A
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
9ce05555
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
f64f9b69 23
9ce05555 24/* CFUniChar.h
8ca704e1 25 Copyright (c) 1998-2011, Apple Inc. All rights reserved.
9ce05555
A
26*/
27
28#if !defined(__COREFOUNDATION_CFUNICHAR__)
29#define __COREFOUNDATION_CFUNICHAR__ 1
30
d8925383
A
31
32#include <CoreFoundation/CFByteOrder.h>
9ce05555
A
33#include <CoreFoundation/CFBase.h>
34
bd5b749c 35CF_EXTERN_C_BEGIN
9ce05555
A
36
37#define kCFUniCharBitShiftForByte (3)
38#define kCFUniCharBitShiftForMask (7)
39
bd5b749c 40CF_INLINE bool CFUniCharIsSurrogateHighCharacter(UniChar character) {
9ce05555
A
41 return ((character >= 0xD800UL) && (character <= 0xDBFFUL) ? true : false);
42}
43
bd5b749c 44CF_INLINE bool CFUniCharIsSurrogateLowCharacter(UniChar character) {
9ce05555
A
45 return ((character >= 0xDC00UL) && (character <= 0xDFFFUL) ? true : false);
46}
47
48CF_INLINE UTF32Char CFUniCharGetLongCharacterForSurrogatePair(UniChar surrogateHigh, UniChar surrogateLow) {
49 return ((surrogateHigh - 0xD800UL) << 10) + (surrogateLow - 0xDC00UL) + 0x0010000UL;
50}
51
52// The following values coinside TextEncodingFormat format defines in TextCommon.h
53enum {
54 kCFUniCharUTF16Format = 0,
55 kCFUniCharUTF8Format = 2,
56 kCFUniCharUTF32Format = 3
57};
58
59CF_INLINE bool CFUniCharIsMemberOfBitmap(UTF16Char theChar, const uint8_t *bitmap) {
60 return (bitmap && (bitmap[(theChar) >> kCFUniCharBitShiftForByte] & (((uint32_t)1) << (theChar & kCFUniCharBitShiftForMask))) ? true : false);
61}
62
63CF_INLINE void CFUniCharAddCharacterToBitmap(UTF16Char theChar, uint8_t *bitmap) {
64 bitmap[(theChar) >> kCFUniCharBitShiftForByte] |= (((uint32_t)1) << (theChar & kCFUniCharBitShiftForMask));
65}
66
67CF_INLINE void CFUniCharRemoveCharacterFromBitmap(UTF16Char theChar, uint8_t *bitmap) {
68 bitmap[(theChar) >> kCFUniCharBitShiftForByte] &= ~(((uint32_t)1) << (theChar & kCFUniCharBitShiftForMask));
69}
70
71enum {
72 kCFUniCharControlCharacterSet = 1,
73 kCFUniCharWhitespaceCharacterSet,
74 kCFUniCharWhitespaceAndNewlineCharacterSet,
75 kCFUniCharDecimalDigitCharacterSet,
76 kCFUniCharLetterCharacterSet,
77 kCFUniCharLowercaseLetterCharacterSet,
78 kCFUniCharUppercaseLetterCharacterSet,
79 kCFUniCharNonBaseCharacterSet,
80 kCFUniCharCanonicalDecomposableCharacterSet,
81 kCFUniCharDecomposableCharacterSet = kCFUniCharCanonicalDecomposableCharacterSet,
82 kCFUniCharAlphaNumericCharacterSet,
83 kCFUniCharPunctuationCharacterSet,
84 kCFUniCharIllegalCharacterSet,
85 kCFUniCharTitlecaseLetterCharacterSet,
86 kCFUniCharSymbolAndOperatorCharacterSet,
bd5b749c
A
87 kCFUniCharNewlineCharacterSet,
88
89 kCFUniCharCompatibilityDecomposableCharacterSet = 100, // internal character sets begins here
9ce05555
A
90 kCFUniCharHFSPlusDecomposableCharacterSet,
91 kCFUniCharStrongRightToLeftCharacterSet,
92 kCFUniCharHasNonSelfLowercaseCharacterSet,
93 kCFUniCharHasNonSelfUppercaseCharacterSet,
94 kCFUniCharHasNonSelfTitlecaseCharacterSet,
95 kCFUniCharHasNonSelfCaseFoldingCharacterSet,
96 kCFUniCharHasNonSelfMirrorMappingCharacterSet,
97 kCFUniCharControlAndFormatterCharacterSet,
bd5b749c
A
98 kCFUniCharCaseIgnorableCharacterSet,
99 kCFUniCharGraphemeExtendCharacterSet
9ce05555
A
100};
101
102CF_EXPORT bool CFUniCharIsMemberOf(UTF32Char theChar, uint32_t charset);
103
104// This function returns NULL for kCFUniCharControlCharacterSet, kCFUniCharWhitespaceCharacterSet, kCFUniCharWhitespaceAndNewlineCharacterSet, & kCFUniCharIllegalCharacterSet
105CF_EXPORT const uint8_t *CFUniCharGetBitmapPtrForPlane(uint32_t charset, uint32_t plane);
106
107enum {
108 kCFUniCharBitmapFilled = (uint8_t)0,
109 kCFUniCharBitmapEmpty = (uint8_t)0xFF,
110 kCFUniCharBitmapAll = (uint8_t)1
111};
112
113CF_EXPORT uint8_t CFUniCharGetBitmapForPlane(uint32_t charset, uint32_t plane, void *bitmap, bool isInverted);
114
115CF_EXPORT uint32_t CFUniCharGetNumberOfPlanes(uint32_t charset);
116
117enum {
118 kCFUniCharToLowercase = 0,
119 kCFUniCharToUppercase,
120 kCFUniCharToTitlecase,
121 kCFUniCharCaseFold
122};
123
124enum {
cf7d2af9
A
125 kCFUniCharCaseMapFinalSigma = (1UL << 0),
126 kCFUniCharCaseMapAfter_i = (1UL << 1),
8ca704e1
A
127 kCFUniCharCaseMapMoreAbove = (1UL << 2),
128 kCFUniCharCaseMapDutchDigraph = (1UL << 3)
9ce05555
A
129};
130
bd5b749c 131CF_EXPORT CFIndex CFUniCharMapCaseTo(UTF32Char theChar, UTF16Char *convertedChar, CFIndex maxLength, uint32_t ctype, uint32_t flags, const uint8_t *langCode);
9ce05555 132
bd5b749c 133CF_EXPORT uint32_t CFUniCharGetConditionalCaseMappingFlags(UTF32Char theChar, UTF16Char *buffer, CFIndex currentIndex, CFIndex length, uint32_t type, const uint8_t *langCode, uint32_t lastFlags);
9ce05555
A
134
135enum {
136 kCFUniCharBiDiPropertyON = 0,
137 kCFUniCharBiDiPropertyL,
138 kCFUniCharBiDiPropertyR,
139 kCFUniCharBiDiPropertyAN,
140 kCFUniCharBiDiPropertyEN,
141 kCFUniCharBiDiPropertyAL,
142 kCFUniCharBiDiPropertyNSM,
143 kCFUniCharBiDiPropertyCS,
144 kCFUniCharBiDiPropertyES,
145 kCFUniCharBiDiPropertyET,
146 kCFUniCharBiDiPropertyBN,
147 kCFUniCharBiDiPropertyS,
148 kCFUniCharBiDiPropertyWS,
149 kCFUniCharBiDiPropertyB,
150 kCFUniCharBiDiPropertyRLO,
151 kCFUniCharBiDiPropertyRLE,
152 kCFUniCharBiDiPropertyLRO,
153 kCFUniCharBiDiPropertyLRE,
154 kCFUniCharBiDiPropertyPDF
155};
156
157enum {
158 kCFUniCharCombiningProperty = 0,
159 kCFUniCharBidiProperty
160};
161
162// The second arg 'bitmap' has to be the pointer to a specific plane
163CF_INLINE uint8_t CFUniCharGetBidiPropertyForCharacter(UTF16Char character, const uint8_t *bitmap) {
164 if (bitmap) {
165 uint8_t value = bitmap[(character >> 8)];
166
d8925383 167 if (value > kCFUniCharBiDiPropertyPDF) {
9ce05555
A
168 bitmap = bitmap + 256 + ((value - kCFUniCharBiDiPropertyPDF - 1) * 256);
169 return bitmap[character % 256];
d8925383
A
170 } else {
171 return value;
9ce05555
A
172 }
173 }
174 return kCFUniCharBiDiPropertyL;
175}
176
177CF_INLINE uint8_t CFUniCharGetCombiningPropertyForCharacter(UTF16Char character, const uint8_t *bitmap) {
178 if (bitmap) {
179 uint8_t value = bitmap[(character >> 8)];
180
181 if (value) {
182 bitmap = bitmap + 256 + ((value - 1) * 256);
183 return bitmap[character % 256];
184 }
185 }
186 return 0;
187}
188
189CF_EXPORT const void *CFUniCharGetUnicodePropertyDataForPlane(uint32_t propertyType, uint32_t plane);
190CF_EXPORT uint32_t CFUniCharGetNumberOfPlanesForUnicodePropertyData(uint32_t propertyType);
191CF_EXPORT uint32_t CFUniCharGetUnicodeProperty(UTF32Char character, uint32_t propertyType);
192
bd5b749c 193CF_EXPORT bool CFUniCharFillDestinationBuffer(const UTF32Char *src, CFIndex srcLength, void **dst, CFIndex dstLength, CFIndex *filledLength, uint32_t dstFormat);
9ce05555 194
d8925383
A
195// UTF32 support
196
197CF_INLINE bool CFUniCharToUTF32(const UTF16Char *src, CFIndex length, UTF32Char *dst, bool allowLossy, bool isBigEndien) {
198 const UTF16Char *limit = src + length;
199 UTF32Char character;
200
201 while (src < limit) {
202 character = *(src++);
203
204 if (CFUniCharIsSurrogateHighCharacter(character)) {
205 if ((src < limit) && CFUniCharIsSurrogateLowCharacter(*src)) {
206 character = CFUniCharGetLongCharacterForSurrogatePair(character, *(src++));
207 } else {
208 if (!allowLossy) return false;
209 character = 0xFFFD; // replacement character
210 }
211 } else if (CFUniCharIsSurrogateLowCharacter(character)) {
212 if (!allowLossy) return false;
213 character = 0xFFFD; // replacement character
214 }
215
216 *(dst++) = (isBigEndien ? CFSwapInt32HostToBig(character) : CFSwapInt32HostToLittle(character));
217 }
218
219 return true;
220}
221
222CF_INLINE bool CFUniCharFromUTF32(const UTF32Char *src, CFIndex length, UTF16Char *dst, bool allowLossy, bool isBigEndien) {
223 const UTF32Char *limit = src + length;
224 UTF32Char character;
225
226 while (src < limit) {
227 character = (isBigEndien ? CFSwapInt32BigToHost(*(src++)) : CFSwapInt32LittleToHost(*(src++)));
228
229 if (character < 0xFFFF) { // BMP
230 if (allowLossy) {
231 if (CFUniCharIsSurrogateHighCharacter(character)) {
232 UTF32Char otherCharacter = 0xFFFD; // replacement character
233
234 if (src < limit) {
235 otherCharacter = (isBigEndien ? CFSwapInt32BigToHost(*src) : CFSwapInt32LittleToHost(*src));
236
237
238 if ((otherCharacter < 0x10000) && CFUniCharIsSurrogateLowCharacter(otherCharacter)) {
239 *(dst++) = character; ++src;
240 } else {
241 otherCharacter = 0xFFFD; // replacement character
242 }
243 }
244
245 character = otherCharacter;
246 } else if (CFUniCharIsSurrogateLowCharacter(character)) {
247 character = 0xFFFD; // replacement character
248 }
249 } else {
250 if (CFUniCharIsSurrogateHighCharacter(character) || CFUniCharIsSurrogateLowCharacter(character)) return false;
251 }
252 } else if (character < 0x110000) { // non-BMP
253 character -= 0x10000;
254 *(dst++) = (UTF16Char)((character >> 10) + 0xD800UL);
255 character = (UTF16Char)((character & 0x3FF) + 0xDC00UL);
256 } else {
257 if (!allowLossy) return false;
258 character = 0xFFFD; // replacement character
259 }
260
261 *(dst++) = character;
262 }
263 return true;
264}
265
bd5b749c 266CF_EXTERN_C_END
9ce05555
A
267
268#endif /* ! __COREFOUNDATION_CFUNICHAR__ */
269