]>
Commit | Line | Data |
---|---|---|
9ce05555 | 1 | /* |
d8925383 | 2 | * Copyright (c) 2005 Apple Computer, Inc. All rights reserved. |
9ce05555 A |
3 | * |
4 | * @APPLE_LICENSE_HEADER_START@ | |
5 | * | |
9ce05555 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. Please obtain a copy of the License at | |
10 | * http://www.opensource.apple.com/apsl/ and read it before using this | |
11 | * file. | |
12 | * | |
13 | * The Original Code and all software distributed under the License are | |
14 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
15 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
16 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
18 | * Please see the License for the specific language governing rights and | |
19 | * limitations under the License. | |
20 | * | |
21 | * @APPLE_LICENSE_HEADER_END@ | |
22 | */ | |
23 | /* CFCharacterSet.h | |
d8925383 | 24 | Copyright (c) 1999-2005, Apple, Inc. All rights reserved. |
9ce05555 A |
25 | */ |
26 | ||
27 | /*! | |
28 | @header CFCharacterSet | |
29 | CFCharacterSet represents a set, or a bag, of Unicode characters. | |
30 | The API consists of 3 groups: | |
31 | 1) creation/manipulation of CFCharacterSet instances, | |
32 | 2) query of a single Unicode character membership, | |
33 | and 3) bitmap representation related (reading/writing). | |
34 | Conceptually, CFCharacterSet is a 136K byte bitmap array of | |
35 | which each bit represents a Unicode code point. It could | |
36 | contain the Unicode characters in ISO 10646 Basic Multilingual | |
37 | Plane (BMP) and characters in Plane 1 through Plane 16 | |
38 | accessible via surrogate paris in the Unicode Transformation | |
39 | Format, 16-bit encoding form (UTF-16). In other words, it can | |
40 | store values from 0x00000 to 0x10FFFF in the Unicode | |
41 | Transformation Format, 32-bit encoding form (UTF-32). However, | |
42 | in general, how CFCharacterSet stores the information is an | |
43 | implementation detail. Note even CFData used for the external | |
44 | bitmap representation rarely has 136K byte. For detailed | |
45 | discussion of the external bitmap representation, refer to the | |
46 | comments for CFCharacterSetCreateWithBitmapRepresentation below. | |
47 | Note that the existance of non-BMP characters in a character set | |
48 | does not imply the membership of the corresponding surrogate | |
49 | characters. For example, a character set with U+10000 does not | |
50 | match with U+D800. | |
51 | */ | |
52 | ||
53 | #if !defined(__COREFOUNDATION_CFCHARACTERSET__) | |
54 | #define __COREFOUNDATION_CFCHARACTERSET__ 1 | |
55 | ||
56 | #include <CoreFoundation/CFBase.h> | |
57 | #include <CoreFoundation/CFData.h> | |
58 | ||
59 | #if defined(__cplusplus) | |
60 | extern "C" { | |
61 | #endif | |
62 | ||
63 | /*! | |
64 | @typedef CFCharacterSetRef | |
65 | This is the type of a reference to immutable CFCharacterSets. | |
66 | */ | |
67 | typedef const struct __CFCharacterSet * CFCharacterSetRef; | |
68 | ||
69 | /*! | |
70 | @typedef CFMutableCharacterSetRef | |
71 | This is the type of a reference to mutable CFMutableCharacterSets. | |
72 | */ | |
73 | typedef struct __CFCharacterSet * CFMutableCharacterSetRef; | |
74 | ||
75 | /*! | |
76 | @typedef CFCharacterSetPredefinedSet | |
77 | Type of the predefined CFCharacterSet selector values. | |
78 | */ | |
79 | typedef enum { | |
80 | kCFCharacterSetControl = 1, /* Control character set (Unicode General Category Cc and Cf) */ | |
81 | kCFCharacterSetWhitespace, /* Whitespace character set (Unicode General Category Zs and U0009 CHARACTER TABULATION) */ | |
82 | kCFCharacterSetWhitespaceAndNewline, /* Whitespace and Newline character set (Unicode General Category Z*, U000A ~ U000D, and U0085) */ | |
83 | kCFCharacterSetDecimalDigit, /* Decimal digit character set */ | |
84 | kCFCharacterSetLetter, /* Letter character set (Unicode General Category L* & M*) */ | |
85 | kCFCharacterSetLowercaseLetter, /* Lowercase character set (Unicode General Category Ll) */ | |
86 | kCFCharacterSetUppercaseLetter, /* Uppercase character set (Unicode General Category Lu and Lt) */ | |
87 | kCFCharacterSetNonBase, /* Non-base character set (Unicode General Category M*) */ | |
88 | kCFCharacterSetDecomposable, /* Canonically decomposable character set */ | |
89 | kCFCharacterSetAlphaNumeric, /* Alpha Numeric character set (Unicode General Category L*, M*, & N*) */ | |
90 | kCFCharacterSetPunctuation, /* Punctuation character set (Unicode General Category P*) */ | |
91 | kCFCharacterSetIllegal /* Illegal character set */ | |
92 | #if MAC_OS_X_VERSION_10_2 <= MAC_OS_X_VERSION_MAX_ALLOWED | |
93 | , kCFCharacterSetCapitalizedLetter /* Titlecase character set (Unicode General Category Lt) */ | |
94 | #endif | |
95 | #if MAC_OS_X_VERSION_10_3 <= MAC_OS_X_VERSION_MAX_ALLOWED | |
96 | , kCFCharacterSetSymbol /* Symbol character set (Unicode General Category S*) */ | |
97 | #endif | |
98 | } CFCharacterSetPredefinedSet; | |
99 | ||
100 | /*! | |
101 | @function CFCharacterSetGetTypeID | |
102 | Returns the type identifier of all CFCharacterSet instances. | |
103 | */ | |
104 | CF_EXPORT | |
105 | CFTypeID CFCharacterSetGetTypeID(void); | |
106 | ||
107 | /*! | |
108 | @function CFCharacterSetGetPredefined | |
109 | Returns a predefined CFCharacterSet instance. | |
110 | @param theSetIdentifier The CFCharacterSetPredefinedSet selector | |
111 | which specifies the predefined character set. If the | |
112 | value is not in CFCharacterSetPredefinedSet, the behavior | |
113 | is undefined. | |
114 | @result A reference to the predefined immutable CFCharacterSet. | |
115 | This instance is owned by CF. | |
116 | */ | |
117 | CF_EXPORT | |
118 | CFCharacterSetRef CFCharacterSetGetPredefined(CFCharacterSetPredefinedSet theSetIdentifier); | |
119 | ||
120 | /*! | |
121 | @function CFCharacterSetCreateWithCharactersInRange | |
122 | Creates a new immutable character set with the values from the given range. | |
123 | @param alloc The CFAllocator which should be used to allocate | |
124 | memory for the array and its storage for values. This | |
125 | parameter may be NULL in which case the current default | |
126 | CFAllocator is used. If this reference is not a valid | |
127 | CFAllocator, the behavior is undefined. | |
128 | @param theRange The CFRange which should be used to specify the | |
129 | Unicode range the character set is filled with. It | |
130 | accepts the range in 32-bit in the UTF-32 format. The | |
131 | valid character point range is from 0x00000 to 0x10FFFF. | |
132 | If the range is outside of the valid Unicode character | |
133 | point, the behavior is undefined. | |
134 | @result A reference to the new immutable CFCharacterSet. | |
135 | */ | |
136 | CF_EXPORT | |
137 | CFCharacterSetRef CFCharacterSetCreateWithCharactersInRange(CFAllocatorRef alloc, CFRange theRange); | |
138 | ||
139 | /*! | |
140 | @function CFCharacterSetCreateWithCharactersInString | |
141 | Creates a new immutable character set with the values in the given string. | |
142 | @param alloc The CFAllocator which should be used to allocate | |
143 | memory for the array and its storage for values. This | |
144 | parameter may be NULL in which case the current default | |
145 | CFAllocator is used. If this reference is not a valid | |
146 | CFAllocator, the behavior is undefined. | |
147 | @param theString The CFString which should be used to specify | |
148 | the Unicode characters the character set is filled with. | |
149 | If this parameter is not a valid CFString, the behavior | |
150 | is undefined. | |
151 | @result A reference to the new immutable CFCharacterSet. | |
152 | */ | |
153 | CF_EXPORT | |
154 | CFCharacterSetRef CFCharacterSetCreateWithCharactersInString(CFAllocatorRef alloc, CFStringRef theString); | |
155 | ||
156 | /*! | |
157 | @function CFCharacterSetCreateWithBitmapRepresentation | |
158 | Creates a new immutable character set with the bitmap representtion in the given data. | |
159 | @param alloc The CFAllocator which should be used to allocate | |
160 | memory for the array and its storage for values. This | |
161 | parameter may be NULL in which case the current default | |
162 | CFAllocator is used. If this reference is not a valid | |
163 | CFAllocator, the behavior is undefined. | |
164 | @param theData The CFData which should be used to specify the | |
165 | bitmap representation of the Unicode character points | |
166 | the character set is filled with. The bitmap | |
167 | representation could contain all the Unicode character | |
168 | range starting from BMP to Plane 16. The first 8K bytes | |
169 | of the data represents the BMP range. The BMP range 8K | |
170 | bytes can be followed by zero to sixteen 8K byte | |
171 | bitmaps, each one with the plane index byte prepended. | |
172 | For example, the bitmap representing the BMP and Plane 2 | |
173 | has the size of 16385 bytes (8K bytes for BMP, 1 byte | |
174 | index + 8K bytes bitmap for Plane 2). The plane index | |
175 | byte, in this case, contains the integer value two. If | |
176 | this parameter is not a valid CFData or it contains a | |
177 | Plane index byte outside of the valid Plane range | |
178 | (1 to 16), the behavior is undefined. | |
179 | @result A reference to the new immutable CFCharacterSet. | |
180 | */ | |
181 | CF_EXPORT | |
182 | CFCharacterSetRef CFCharacterSetCreateWithBitmapRepresentation(CFAllocatorRef alloc, CFDataRef theData); | |
183 | ||
184 | #if MAC_OS_X_VERSION_10_2 <= MAC_OS_X_VERSION_MAX_ALLOWED | |
185 | /*! | |
186 | @function CFCharacterSetCreateInvertedSet | |
187 | Creates a new immutable character set that is the invert of the specified character set. | |
188 | @param alloc The CFAllocator which should be used to allocate | |
189 | memory for the array and its storage for values. This | |
190 | parameter may be NULL in which case the current default | |
191 | CFAllocator is used. If this reference is not a valid | |
192 | CFAllocator, the behavior is undefined. | |
193 | @param theSet The CFCharacterSet which is to be inverted. If this | |
194 | parameter is not a valid CFCharacterSet, the behavior is | |
195 | undefined. | |
196 | @result A reference to the new immutable CFCharacterSet. | |
197 | */ | |
198 | CF_EXPORT CFCharacterSetRef CFCharacterSetCreateInvertedSet(CFAllocatorRef alloc, CFCharacterSetRef theSet); | |
199 | ||
200 | /*! | |
201 | @function CFCharacterSetIsSupersetOfSet | |
202 | Reports whether or not the character set is a superset of the character set specified as the second parameter. | |
203 | @param theSet The character set to be checked for the membership of theOtherSet. | |
204 | If this parameter is not a valid CFCharacterSet, the behavior is undefined. | |
205 | @param theOtherset The character set to be checked whether or not it is a subset of theSet. | |
206 | If this parameter is not a valid CFCharacterSet, the behavior is undefined. | |
207 | */ | |
208 | CF_EXPORT Boolean CFCharacterSetIsSupersetOfSet(CFCharacterSetRef theSet, CFCharacterSetRef theOtherset); | |
209 | ||
210 | /*! | |
211 | @function CFCharacterSetHasMemberInPlane | |
212 | Reports whether or not the character set contains at least one member character in the specified plane. | |
213 | @param theSet The character set to be checked for the membership. If this | |
214 | parameter is not a valid CFCharacterSet, the behavior is undefined. | |
215 | @param thePlane The plane number to be checked for the membership. | |
216 | The valid value range is from 0 to 16. If the value is outside of the valid | |
217 | plane number range, the behavior is undefined. | |
218 | */ | |
219 | CF_EXPORT Boolean CFCharacterSetHasMemberInPlane(CFCharacterSetRef theSet, CFIndex thePlane); | |
220 | #endif | |
221 | ||
222 | /*! | |
223 | @function CFCharacterSetCreateMutable | |
224 | Creates a new empty mutable character set. | |
225 | @param allocator The CFAllocator which should be used to allocate | |
226 | memory for the array and its storage for values. This | |
227 | parameter may be NULL in which case the current default | |
228 | CFAllocator is used. If this reference is not a valid | |
229 | CFAllocator, the behavior is undefined. | |
230 | @result A reference to the new mutable CFCharacterSet. | |
231 | */ | |
232 | CF_EXPORT | |
233 | CFMutableCharacterSetRef CFCharacterSetCreateMutable(CFAllocatorRef alloc); | |
234 | ||
235 | #if MAC_OS_X_VERSION_10_3 <= MAC_OS_X_VERSION_MAX_ALLOWED | |
236 | /*! | |
237 | @function CFCharacterSetCreateCopy | |
238 | Creates a new character set with the values from the given character set. This function tries to compact the backing store where applicable. | |
239 | @param allocator The CFAllocator which should be used to allocate | |
240 | memory for the array and its storage for values. This | |
241 | parameter may be NULL in which case the current default | |
242 | CFAllocator is used. If this reference is not a valid | |
243 | CFAllocator, the behavior is undefined. | |
244 | @param theSet The CFCharacterSet which is to be copied. If this | |
245 | parameter is not a valid CFCharacterSet, the behavior is | |
246 | undefined. | |
247 | @result A reference to the new CFCharacterSet. | |
248 | */ | |
249 | CF_EXPORT | |
250 | CFCharacterSetRef CFCharacterSetCreateCopy(CFAllocatorRef alloc, CFCharacterSetRef theSet) AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER; | |
251 | #endif /* MAC_OS_X_VERSION_10_3 <= MAC_OS_X_VERSION_MAX_ALLOWED */ | |
252 | ||
253 | /*! | |
254 | @function CFCharacterSetCreateMutableCopy | |
255 | Creates a new mutable character set with the values from the given character set. | |
256 | @param allocator The CFAllocator which should be used to allocate | |
257 | memory for the array and its storage for values. This | |
258 | parameter may be NULL in which case the current default | |
259 | CFAllocator is used. If this reference is not a valid | |
260 | CFAllocator, the behavior is undefined. | |
261 | @param theSet The CFCharacterSet which is to be copied. If this | |
262 | parameter is not a valid CFCharacterSet, the behavior is | |
263 | undefined. | |
264 | @result A reference to the new mutable CFCharacterSet. | |
265 | */ | |
266 | CF_EXPORT | |
267 | CFMutableCharacterSetRef CFCharacterSetCreateMutableCopy(CFAllocatorRef alloc, CFCharacterSetRef theSet); | |
268 | ||
269 | /*! | |
270 | @function CFCharacterSetIsCharacterMember | |
271 | Reports whether or not the Unicode character is in the character set. | |
272 | @param theSet The character set to be searched. If this parameter | |
273 | is not a valid CFCharacterSet, the behavior is undefined. | |
274 | @param theChar The Unicode character for which to test against the | |
275 | character set. Note that this function takes 16-bit Unicode | |
276 | character value; hence, it does not support access to the | |
277 | non-BMP planes. | |
278 | @result true, if the value is in the character set, otherwise false. | |
279 | */ | |
280 | CF_EXPORT | |
281 | Boolean CFCharacterSetIsCharacterMember(CFCharacterSetRef theSet, UniChar theChar); | |
282 | ||
283 | #if MAC_OS_X_VERSION_10_2 <= MAC_OS_X_VERSION_MAX_ALLOWED | |
284 | /*! | |
285 | @function CFCharacterSetIsLongCharacterMember | |
286 | Reports whether or not the UTF-32 character is in the character set. | |
287 | @param theSet The character set to be searched. If this parameter | |
288 | is not a valid CFCharacterSet, the behavior is undefined. | |
289 | @param theChar The UTF-32 character for which to test against the | |
290 | character set. | |
291 | @result true, if the value is in the character set, otherwise false. | |
292 | */ | |
293 | CF_EXPORT Boolean CFCharacterSetIsLongCharacterMember(CFCharacterSetRef theSet, UTF32Char theChar); | |
294 | #endif | |
295 | ||
296 | /*! | |
297 | @function CFCharacterSetCreateBitmapRepresentation | |
298 | Creates a new immutable data with the bitmap representation from the given character set. | |
299 | @param allocator The CFAllocator which should be used to allocate | |
300 | memory for the array and its storage for values. This | |
301 | parameter may be NULL in which case the current default | |
302 | CFAllocator is used. If this reference is not a valid | |
303 | CFAllocator, the behavior is undefined. | |
304 | @param theSet The CFCharacterSet which is to be used create the | |
305 | bitmap representation from. Refer to the comments for | |
306 | CFCharacterSetCreateWithBitmapRepresentation for the | |
307 | detailed discussion of the bitmap representation format. | |
308 | If this parameter is not a valid CFCharacterSet, the | |
309 | behavior is undefined. | |
310 | @result A reference to the new immutable CFData. | |
311 | */ | |
312 | CF_EXPORT | |
313 | CFDataRef CFCharacterSetCreateBitmapRepresentation(CFAllocatorRef alloc, CFCharacterSetRef theSet); | |
314 | ||
315 | /*! | |
316 | @function CFCharacterSetAddCharactersInRange | |
317 | Adds the given range to the charaacter set. | |
318 | @param theSet The character set to which the range is to be added. | |
319 | If this parameter is not a valid mutable CFCharacterSet, | |
320 | the behavior is undefined. | |
321 | @param theRange The range to add to the character set. It accepts | |
322 | the range in 32-bit in the UTF-32 format. The valid | |
323 | character point range is from 0x00000 to 0x10FFFF. If the | |
324 | range is outside of the valid Unicode character point, | |
325 | the behavior is undefined. | |
326 | */ | |
327 | CF_EXPORT | |
328 | void CFCharacterSetAddCharactersInRange(CFMutableCharacterSetRef theSet, CFRange theRange); | |
329 | ||
330 | /*! | |
331 | @function CFCharacterSetRemoveCharactersInRange | |
332 | Removes the given range from the charaacter set. | |
333 | @param theSet The character set from which the range is to be | |
334 | removed. If this parameter is not a valid mutable | |
335 | CFCharacterSet, the behavior is undefined. | |
336 | @param theRange The range to remove from the character set. | |
337 | It accepts the range in 32-bit in the UTF-32 format. | |
338 | The valid character point range is from 0x00000 to 0x10FFFF. | |
339 | If the range is outside of the valid Unicode character point, | |
340 | the behavior is undefined. | |
341 | */ | |
342 | CF_EXPORT | |
343 | void CFCharacterSetRemoveCharactersInRange(CFMutableCharacterSetRef theSet, CFRange theRange); | |
344 | ||
345 | /*! | |
346 | @function CFCharacterSetAddCharactersInString | |
347 | Adds the characters in the given string to the charaacter set. | |
348 | @param theSet The character set to which the characters in the | |
349 | string are to be added. If this parameter is not a | |
350 | valid mutable CFCharacterSet, the behavior is undefined. | |
351 | @param theString The string to add to the character set. | |
352 | If this parameter is not a valid CFString, the behavior | |
353 | is undefined. | |
354 | */ | |
355 | CF_EXPORT | |
356 | void CFCharacterSetAddCharactersInString(CFMutableCharacterSetRef theSet, CFStringRef theString); | |
357 | ||
358 | /*! | |
359 | @function CFCharacterSetRemoveCharactersInString | |
360 | Removes the characters in the given string from the charaacter set. | |
361 | @param theSet The character set from which the characters in the | |
362 | string are to be remove. If this parameter is not a | |
363 | valid mutable CFCharacterSet, the behavior is undefined. | |
364 | @param theString The string to remove from the character set. | |
365 | If this parameter is not a valid CFString, the behavior | |
366 | is undefined. | |
367 | */ | |
368 | CF_EXPORT | |
369 | void CFCharacterSetRemoveCharactersInString(CFMutableCharacterSetRef theSet, CFStringRef theString); | |
370 | ||
371 | /*! | |
372 | @function CFCharacterSetUnion | |
373 | Forms the union with the given character set. | |
374 | @param theSet The destination character set into which the | |
375 | union of the two character sets is stored. If this | |
376 | parameter is not a valid mutable CFCharacterSet, the | |
377 | behavior is undefined. | |
378 | @param theOtherSet The character set with which the union is | |
379 | formed. If this parameter is not a valid CFCharacterSet, | |
380 | the behavior is undefined. | |
381 | */ | |
382 | CF_EXPORT | |
383 | void CFCharacterSetUnion(CFMutableCharacterSetRef theSet, CFCharacterSetRef theOtherSet); | |
384 | ||
385 | /*! | |
386 | @function CFCharacterSetIntersect | |
387 | Forms the intersection with the given character set. | |
388 | @param theSet The destination character set into which the | |
389 | intersection of the two character sets is stored. | |
390 | If this parameter is not a valid mutable CFCharacterSet, | |
391 | the behavior is undefined. | |
392 | @param theOtherSet The character set with which the intersection | |
393 | is formed. If this parameter is not a valid CFCharacterSet, | |
394 | the behavior is undefined. | |
395 | */ | |
396 | CF_EXPORT | |
397 | void CFCharacterSetIntersect(CFMutableCharacterSetRef theSet, CFCharacterSetRef theOtherSet); | |
398 | ||
399 | /*! | |
400 | @function CFCharacterSetInvert | |
401 | Inverts the content of the given character set. | |
402 | @param theSet The character set to be inverted. | |
403 | If this parameter is not a valid mutable CFCharacterSet, | |
404 | the behavior is undefined. | |
405 | */ | |
406 | CF_EXPORT | |
407 | void CFCharacterSetInvert(CFMutableCharacterSetRef theSet); | |
408 | ||
409 | #if defined(__cplusplus) | |
410 | } | |
411 | #endif | |
412 | ||
413 | #endif /* !__COREFOUNDATION_CFCHARACTERSET__ */ | |
414 |