]> git.saurik.com Git - apple/cf.git/blame - CFUniChar.h
CF-476.19.tar.gz
[apple/cf.git] / CFUniChar.h
CommitLineData
9ce05555 1/*
bd5b749c 2 * Copyright (c) 2008 Apple Inc. All rights reserved.
9ce05555
A
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
9ce05555
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23/* CFUniChar.h
bd5b749c 24 Copyright (c) 1998-2007, Apple Inc. All rights reserved.
9ce05555
A
25*/
26
27#if !defined(__COREFOUNDATION_CFUNICHAR__)
28#define __COREFOUNDATION_CFUNICHAR__ 1
29
d8925383
A
30
31#include <CoreFoundation/CFByteOrder.h>
9ce05555
A
32#include <CoreFoundation/CFBase.h>
33
bd5b749c 34CF_EXTERN_C_BEGIN
9ce05555
A
35
36#define kCFUniCharBitShiftForByte (3)
37#define kCFUniCharBitShiftForMask (7)
38
bd5b749c 39CF_INLINE bool CFUniCharIsSurrogateHighCharacter(UniChar character) {
9ce05555
A
40 return ((character >= 0xD800UL) && (character <= 0xDBFFUL) ? true : false);
41}
42
bd5b749c 43CF_INLINE bool CFUniCharIsSurrogateLowCharacter(UniChar character) {
9ce05555
A
44 return ((character >= 0xDC00UL) && (character <= 0xDFFFUL) ? true : false);
45}
46
47CF_INLINE UTF32Char CFUniCharGetLongCharacterForSurrogatePair(UniChar surrogateHigh, UniChar surrogateLow) {
48 return ((surrogateHigh - 0xD800UL) << 10) + (surrogateLow - 0xDC00UL) + 0x0010000UL;
49}
50
51// The following values coinside TextEncodingFormat format defines in TextCommon.h
52enum {
53 kCFUniCharUTF16Format = 0,
54 kCFUniCharUTF8Format = 2,
55 kCFUniCharUTF32Format = 3
56};
57
58CF_INLINE bool CFUniCharIsMemberOfBitmap(UTF16Char theChar, const uint8_t *bitmap) {
59 return (bitmap && (bitmap[(theChar) >> kCFUniCharBitShiftForByte] & (((uint32_t)1) << (theChar & kCFUniCharBitShiftForMask))) ? true : false);
60}
61
62CF_INLINE void CFUniCharAddCharacterToBitmap(UTF16Char theChar, uint8_t *bitmap) {
63 bitmap[(theChar) >> kCFUniCharBitShiftForByte] |= (((uint32_t)1) << (theChar & kCFUniCharBitShiftForMask));
64}
65
66CF_INLINE void CFUniCharRemoveCharacterFromBitmap(UTF16Char theChar, uint8_t *bitmap) {
67 bitmap[(theChar) >> kCFUniCharBitShiftForByte] &= ~(((uint32_t)1) << (theChar & kCFUniCharBitShiftForMask));
68}
69
70enum {
71 kCFUniCharControlCharacterSet = 1,
72 kCFUniCharWhitespaceCharacterSet,
73 kCFUniCharWhitespaceAndNewlineCharacterSet,
74 kCFUniCharDecimalDigitCharacterSet,
75 kCFUniCharLetterCharacterSet,
76 kCFUniCharLowercaseLetterCharacterSet,
77 kCFUniCharUppercaseLetterCharacterSet,
78 kCFUniCharNonBaseCharacterSet,
79 kCFUniCharCanonicalDecomposableCharacterSet,
80 kCFUniCharDecomposableCharacterSet = kCFUniCharCanonicalDecomposableCharacterSet,
81 kCFUniCharAlphaNumericCharacterSet,
82 kCFUniCharPunctuationCharacterSet,
83 kCFUniCharIllegalCharacterSet,
84 kCFUniCharTitlecaseLetterCharacterSet,
85 kCFUniCharSymbolAndOperatorCharacterSet,
bd5b749c
A
86 kCFUniCharNewlineCharacterSet,
87
88 kCFUniCharCompatibilityDecomposableCharacterSet = 100, // internal character sets begins here
9ce05555
A
89 kCFUniCharHFSPlusDecomposableCharacterSet,
90 kCFUniCharStrongRightToLeftCharacterSet,
91 kCFUniCharHasNonSelfLowercaseCharacterSet,
92 kCFUniCharHasNonSelfUppercaseCharacterSet,
93 kCFUniCharHasNonSelfTitlecaseCharacterSet,
94 kCFUniCharHasNonSelfCaseFoldingCharacterSet,
95 kCFUniCharHasNonSelfMirrorMappingCharacterSet,
96 kCFUniCharControlAndFormatterCharacterSet,
bd5b749c
A
97 kCFUniCharCaseIgnorableCharacterSet,
98 kCFUniCharGraphemeExtendCharacterSet
9ce05555
A
99};
100
101CF_EXPORT bool CFUniCharIsMemberOf(UTF32Char theChar, uint32_t charset);
102
103// This function returns NULL for kCFUniCharControlCharacterSet, kCFUniCharWhitespaceCharacterSet, kCFUniCharWhitespaceAndNewlineCharacterSet, & kCFUniCharIllegalCharacterSet
104CF_EXPORT const uint8_t *CFUniCharGetBitmapPtrForPlane(uint32_t charset, uint32_t plane);
105
106enum {
107 kCFUniCharBitmapFilled = (uint8_t)0,
108 kCFUniCharBitmapEmpty = (uint8_t)0xFF,
109 kCFUniCharBitmapAll = (uint8_t)1
110};
111
112CF_EXPORT uint8_t CFUniCharGetBitmapForPlane(uint32_t charset, uint32_t plane, void *bitmap, bool isInverted);
113
114CF_EXPORT uint32_t CFUniCharGetNumberOfPlanes(uint32_t charset);
115
116enum {
117 kCFUniCharToLowercase = 0,
118 kCFUniCharToUppercase,
119 kCFUniCharToTitlecase,
120 kCFUniCharCaseFold
121};
122
123enum {
124 kCFUniCharCaseMapFinalSigma = (1),
125 kCFUniCharCaseMapAfter_i = (1 << 1),
126 kCFUniCharCaseMapMoreAbove = (1 << 2)
127};
128
bd5b749c 129CF_EXPORT CFIndex CFUniCharMapCaseTo(UTF32Char theChar, UTF16Char *convertedChar, CFIndex maxLength, uint32_t ctype, uint32_t flags, const uint8_t *langCode);
9ce05555 130
bd5b749c 131CF_EXPORT uint32_t CFUniCharGetConditionalCaseMappingFlags(UTF32Char theChar, UTF16Char *buffer, CFIndex currentIndex, CFIndex length, uint32_t type, const uint8_t *langCode, uint32_t lastFlags);
9ce05555
A
132
133enum {
134 kCFUniCharBiDiPropertyON = 0,
135 kCFUniCharBiDiPropertyL,
136 kCFUniCharBiDiPropertyR,
137 kCFUniCharBiDiPropertyAN,
138 kCFUniCharBiDiPropertyEN,
139 kCFUniCharBiDiPropertyAL,
140 kCFUniCharBiDiPropertyNSM,
141 kCFUniCharBiDiPropertyCS,
142 kCFUniCharBiDiPropertyES,
143 kCFUniCharBiDiPropertyET,
144 kCFUniCharBiDiPropertyBN,
145 kCFUniCharBiDiPropertyS,
146 kCFUniCharBiDiPropertyWS,
147 kCFUniCharBiDiPropertyB,
148 kCFUniCharBiDiPropertyRLO,
149 kCFUniCharBiDiPropertyRLE,
150 kCFUniCharBiDiPropertyLRO,
151 kCFUniCharBiDiPropertyLRE,
152 kCFUniCharBiDiPropertyPDF
153};
154
155enum {
156 kCFUniCharCombiningProperty = 0,
157 kCFUniCharBidiProperty
158};
159
160// The second arg 'bitmap' has to be the pointer to a specific plane
161CF_INLINE uint8_t CFUniCharGetBidiPropertyForCharacter(UTF16Char character, const uint8_t *bitmap) {
162 if (bitmap) {
163 uint8_t value = bitmap[(character >> 8)];
164
d8925383 165 if (value > kCFUniCharBiDiPropertyPDF) {
9ce05555
A
166 bitmap = bitmap + 256 + ((value - kCFUniCharBiDiPropertyPDF - 1) * 256);
167 return bitmap[character % 256];
d8925383
A
168 } else {
169 return value;
9ce05555
A
170 }
171 }
172 return kCFUniCharBiDiPropertyL;
173}
174
175CF_INLINE uint8_t CFUniCharGetCombiningPropertyForCharacter(UTF16Char character, const uint8_t *bitmap) {
176 if (bitmap) {
177 uint8_t value = bitmap[(character >> 8)];
178
179 if (value) {
180 bitmap = bitmap + 256 + ((value - 1) * 256);
181 return bitmap[character % 256];
182 }
183 }
184 return 0;
185}
186
187CF_EXPORT const void *CFUniCharGetUnicodePropertyDataForPlane(uint32_t propertyType, uint32_t plane);
188CF_EXPORT uint32_t CFUniCharGetNumberOfPlanesForUnicodePropertyData(uint32_t propertyType);
189CF_EXPORT uint32_t CFUniCharGetUnicodeProperty(UTF32Char character, uint32_t propertyType);
190
bd5b749c 191CF_EXPORT bool CFUniCharFillDestinationBuffer(const UTF32Char *src, CFIndex srcLength, void **dst, CFIndex dstLength, CFIndex *filledLength, uint32_t dstFormat);
9ce05555 192
d8925383
A
193// UTF32 support
194
195CF_INLINE bool CFUniCharToUTF32(const UTF16Char *src, CFIndex length, UTF32Char *dst, bool allowLossy, bool isBigEndien) {
196 const UTF16Char *limit = src + length;
197 UTF32Char character;
198
199 while (src < limit) {
200 character = *(src++);
201
202 if (CFUniCharIsSurrogateHighCharacter(character)) {
203 if ((src < limit) && CFUniCharIsSurrogateLowCharacter(*src)) {
204 character = CFUniCharGetLongCharacterForSurrogatePair(character, *(src++));
205 } else {
206 if (!allowLossy) return false;
207 character = 0xFFFD; // replacement character
208 }
209 } else if (CFUniCharIsSurrogateLowCharacter(character)) {
210 if (!allowLossy) return false;
211 character = 0xFFFD; // replacement character
212 }
213
214 *(dst++) = (isBigEndien ? CFSwapInt32HostToBig(character) : CFSwapInt32HostToLittle(character));
215 }
216
217 return true;
218}
219
220CF_INLINE bool CFUniCharFromUTF32(const UTF32Char *src, CFIndex length, UTF16Char *dst, bool allowLossy, bool isBigEndien) {
221 const UTF32Char *limit = src + length;
222 UTF32Char character;
223
224 while (src < limit) {
225 character = (isBigEndien ? CFSwapInt32BigToHost(*(src++)) : CFSwapInt32LittleToHost(*(src++)));
226
227 if (character < 0xFFFF) { // BMP
228 if (allowLossy) {
229 if (CFUniCharIsSurrogateHighCharacter(character)) {
230 UTF32Char otherCharacter = 0xFFFD; // replacement character
231
232 if (src < limit) {
233 otherCharacter = (isBigEndien ? CFSwapInt32BigToHost(*src) : CFSwapInt32LittleToHost(*src));
234
235
236 if ((otherCharacter < 0x10000) && CFUniCharIsSurrogateLowCharacter(otherCharacter)) {
237 *(dst++) = character; ++src;
238 } else {
239 otherCharacter = 0xFFFD; // replacement character
240 }
241 }
242
243 character = otherCharacter;
244 } else if (CFUniCharIsSurrogateLowCharacter(character)) {
245 character = 0xFFFD; // replacement character
246 }
247 } else {
248 if (CFUniCharIsSurrogateHighCharacter(character) || CFUniCharIsSurrogateLowCharacter(character)) return false;
249 }
250 } else if (character < 0x110000) { // non-BMP
251 character -= 0x10000;
252 *(dst++) = (UTF16Char)((character >> 10) + 0xD800UL);
253 character = (UTF16Char)((character & 0x3FF) + 0xDC00UL);
254 } else {
255 if (!allowLossy) return false;
256 character = 0xFFFD; // replacement character
257 }
258
259 *(dst++) = character;
260 }
261 return true;
262}
263
bd5b749c 264CF_EXTERN_C_END
9ce05555
A
265
266#endif /* ! __COREFOUNDATION_CFUNICHAR__ */
267