]> git.saurik.com Git - apple/cf.git/blob - StringEncodings.subproj/CFUniChar.h
CF-299.35.tar.gz
[apple/cf.git] / StringEncodings.subproj / CFUniChar.h
1 /*
2 * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
7 *
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
25 /* CFUniChar.h
26 Copyright (c) 1998-2003, Apple, Inc. All rights reserved.
27 */
28
29 #if !defined(__COREFOUNDATION_CFUNICHAR__)
30 #define __COREFOUNDATION_CFUNICHAR__ 1
31
32 #include <CoreFoundation/CFBase.h>
33
34 #if defined(__cplusplus)
35 extern "C" {
36 #endif
37
38 #define kCFUniCharBitShiftForByte (3)
39 #define kCFUniCharBitShiftForMask (7)
40
41 CF_INLINE Boolean CFUniCharIsSurrogateHighCharacter(UniChar character) {
42 return ((character >= 0xD800UL) && (character <= 0xDBFFUL) ? true : false);
43 }
44
45 CF_INLINE Boolean CFUniCharIsSurrogateLowCharacter(UniChar character) {
46 return ((character >= 0xDC00UL) && (character <= 0xDFFFUL) ? true : false);
47 }
48
49 CF_INLINE UTF32Char CFUniCharGetLongCharacterForSurrogatePair(UniChar surrogateHigh, UniChar surrogateLow) {
50 return ((surrogateHigh - 0xD800UL) << 10) + (surrogateLow - 0xDC00UL) + 0x0010000UL;
51 }
52
53 // The following values coinside TextEncodingFormat format defines in TextCommon.h
54 enum {
55 kCFUniCharUTF16Format = 0,
56 kCFUniCharUTF8Format = 2,
57 kCFUniCharUTF32Format = 3
58 };
59
60 CF_INLINE bool CFUniCharIsMemberOfBitmap(UTF16Char theChar, const uint8_t *bitmap) {
61 return (bitmap && (bitmap[(theChar) >> kCFUniCharBitShiftForByte] & (((uint32_t)1) << (theChar & kCFUniCharBitShiftForMask))) ? true : false);
62 }
63
64 CF_INLINE void CFUniCharAddCharacterToBitmap(UTF16Char theChar, uint8_t *bitmap) {
65 bitmap[(theChar) >> kCFUniCharBitShiftForByte] |= (((uint32_t)1) << (theChar & kCFUniCharBitShiftForMask));
66 }
67
68 CF_INLINE void CFUniCharRemoveCharacterFromBitmap(UTF16Char theChar, uint8_t *bitmap) {
69 bitmap[(theChar) >> kCFUniCharBitShiftForByte] &= ~(((uint32_t)1) << (theChar & kCFUniCharBitShiftForMask));
70 }
71
72 enum {
73 kCFUniCharControlCharacterSet = 1,
74 kCFUniCharWhitespaceCharacterSet,
75 kCFUniCharWhitespaceAndNewlineCharacterSet,
76 kCFUniCharDecimalDigitCharacterSet,
77 kCFUniCharLetterCharacterSet,
78 kCFUniCharLowercaseLetterCharacterSet,
79 kCFUniCharUppercaseLetterCharacterSet,
80 kCFUniCharNonBaseCharacterSet,
81 kCFUniCharCanonicalDecomposableCharacterSet,
82 kCFUniCharDecomposableCharacterSet = kCFUniCharCanonicalDecomposableCharacterSet,
83 kCFUniCharAlphaNumericCharacterSet,
84 kCFUniCharPunctuationCharacterSet,
85 kCFUniCharIllegalCharacterSet,
86 kCFUniCharTitlecaseLetterCharacterSet,
87 kCFUniCharSymbolAndOperatorCharacterSet,
88 kCFUniCharCompatibilityDecomposableCharacterSet,
89 kCFUniCharHFSPlusDecomposableCharacterSet,
90 kCFUniCharStrongRightToLeftCharacterSet,
91 kCFUniCharHasNonSelfLowercaseCharacterSet,
92 kCFUniCharHasNonSelfUppercaseCharacterSet,
93 kCFUniCharHasNonSelfTitlecaseCharacterSet,
94 kCFUniCharHasNonSelfCaseFoldingCharacterSet,
95 kCFUniCharHasNonSelfMirrorMappingCharacterSet,
96 kCFUniCharControlAndFormatterCharacterSet,
97 kCFUniCharCaseIgnorableCharacterSet
98 };
99
100 CF_EXPORT bool CFUniCharIsMemberOf(UTF32Char theChar, uint32_t charset);
101
102 // This function returns NULL for kCFUniCharControlCharacterSet, kCFUniCharWhitespaceCharacterSet, kCFUniCharWhitespaceAndNewlineCharacterSet, & kCFUniCharIllegalCharacterSet
103 CF_EXPORT const uint8_t *CFUniCharGetBitmapPtrForPlane(uint32_t charset, uint32_t plane);
104
105 enum {
106 kCFUniCharBitmapFilled = (uint8_t)0,
107 kCFUniCharBitmapEmpty = (uint8_t)0xFF,
108 kCFUniCharBitmapAll = (uint8_t)1
109 };
110
111 CF_EXPORT uint8_t CFUniCharGetBitmapForPlane(uint32_t charset, uint32_t plane, void *bitmap, bool isInverted);
112
113 CF_EXPORT uint32_t CFUniCharGetNumberOfPlanes(uint32_t charset);
114
115 enum {
116 kCFUniCharToLowercase = 0,
117 kCFUniCharToUppercase,
118 kCFUniCharToTitlecase,
119 kCFUniCharCaseFold
120 };
121
122 enum {
123 kCFUniCharCaseMapFinalSigma = (1),
124 kCFUniCharCaseMapAfter_i = (1 << 1),
125 kCFUniCharCaseMapMoreAbove = (1 << 2)
126 };
127
128 CF_EXPORT uint32_t CFUniCharMapCaseTo(UTF32Char theChar, UTF16Char *convertedChar, uint32_t maxLength, uint32_t ctype, uint32_t flags, const uint8_t *langCode);
129
130 CF_EXPORT uint32_t CFUniCharGetConditionalCaseMappingFlags(UTF32Char theChar, UTF16Char *buffer, uint32_t currentIndex, uint32_t length, uint32_t type, const uint8_t *langCode, uint32_t lastFlags);
131
132 enum {
133 kCFUniCharBiDiPropertyON = 0,
134 kCFUniCharBiDiPropertyL,
135 kCFUniCharBiDiPropertyR,
136 kCFUniCharBiDiPropertyAN,
137 kCFUniCharBiDiPropertyEN,
138 kCFUniCharBiDiPropertyAL,
139 kCFUniCharBiDiPropertyNSM,
140 kCFUniCharBiDiPropertyCS,
141 kCFUniCharBiDiPropertyES,
142 kCFUniCharBiDiPropertyET,
143 kCFUniCharBiDiPropertyBN,
144 kCFUniCharBiDiPropertyS,
145 kCFUniCharBiDiPropertyWS,
146 kCFUniCharBiDiPropertyB,
147 kCFUniCharBiDiPropertyRLO,
148 kCFUniCharBiDiPropertyRLE,
149 kCFUniCharBiDiPropertyLRO,
150 kCFUniCharBiDiPropertyLRE,
151 kCFUniCharBiDiPropertyPDF
152 };
153
154 enum {
155 kCFUniCharCombiningProperty = 0,
156 kCFUniCharBidiProperty
157 };
158
159 // The second arg 'bitmap' has to be the pointer to a specific plane
160 CF_INLINE uint8_t CFUniCharGetBidiPropertyForCharacter(UTF16Char character, const uint8_t *bitmap) {
161 if (bitmap) {
162 uint8_t value = bitmap[(character >> 8)];
163
164 if (value != kCFUniCharBiDiPropertyL) {
165 bitmap = bitmap + 256 + ((value - kCFUniCharBiDiPropertyPDF - 1) * 256);
166 return bitmap[character % 256];
167 }
168 }
169 return kCFUniCharBiDiPropertyL;
170 }
171
172 CF_INLINE uint8_t CFUniCharGetCombiningPropertyForCharacter(UTF16Char character, const uint8_t *bitmap) {
173 if (bitmap) {
174 uint8_t value = bitmap[(character >> 8)];
175
176 if (value) {
177 bitmap = bitmap + 256 + ((value - 1) * 256);
178 return bitmap[character % 256];
179 }
180 }
181 return 0;
182 }
183
184 CF_EXPORT const void *CFUniCharGetUnicodePropertyDataForPlane(uint32_t propertyType, uint32_t plane);
185 CF_EXPORT uint32_t CFUniCharGetNumberOfPlanesForUnicodePropertyData(uint32_t propertyType);
186 CF_EXPORT uint32_t CFUniCharGetUnicodeProperty(UTF32Char character, uint32_t propertyType);
187
188 CF_EXPORT bool CFUniCharFillDestinationBuffer(const UTF32Char *src, uint32_t srcLength, void **dst, uint32_t dstLength, uint32_t *filledLength, uint32_t dstFormat);
189
190 #if defined(__cplusplus)
191 }
192 #endif
193
194 #endif /* ! __COREFOUNDATION_CFUNICHAR__ */
195