]> git.saurik.com Git - apple/cf.git/blob - String.subproj/CFCharacterSet.h
CF-299.35.tar.gz
[apple/cf.git] / String.subproj / CFCharacterSet.h
1 /*
2 * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
7 *
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
25 /* CFCharacterSet.h
26 Copyright (c) 1999-2003, Apple, Inc. All rights reserved.
27 */
28
29 /*!
30 @header CFCharacterSet
31 CFCharacterSet represents a set, or a bag, of Unicode characters.
32 The API consists of 3 groups:
33 1) creation/manipulation of CFCharacterSet instances,
34 2) query of a single Unicode character membership,
35 and 3) bitmap representation related (reading/writing).
36 Conceptually, CFCharacterSet is a 136K byte bitmap array of
37 which each bit represents a Unicode code point. It could
38 contain the Unicode characters in ISO 10646 Basic Multilingual
39 Plane (BMP) and characters in Plane 1 through Plane 16
40 accessible via surrogate paris in the Unicode Transformation
41 Format, 16-bit encoding form (UTF-16). In other words, it can
42 store values from 0x00000 to 0x10FFFF in the Unicode
43 Transformation Format, 32-bit encoding form (UTF-32). However,
44 in general, how CFCharacterSet stores the information is an
45 implementation detail. Note even CFData used for the external
46 bitmap representation rarely has 136K byte. For detailed
47 discussion of the external bitmap representation, refer to the
48 comments for CFCharacterSetCreateWithBitmapRepresentation below.
49 Note that the existance of non-BMP characters in a character set
50 does not imply the membership of the corresponding surrogate
51 characters. For example, a character set with U+10000 does not
52 match with U+D800.
53 */
54
55 #if !defined(__COREFOUNDATION_CFCHARACTERSET__)
56 #define __COREFOUNDATION_CFCHARACTERSET__ 1
57
58 #include <CoreFoundation/CFBase.h>
59 #include <CoreFoundation/CFData.h>
60
61 #if defined(__cplusplus)
62 extern "C" {
63 #endif
64
65 /*!
66 @typedef CFCharacterSetRef
67 This is the type of a reference to immutable CFCharacterSets.
68 */
69 typedef const struct __CFCharacterSet * CFCharacterSetRef;
70
71 /*!
72 @typedef CFMutableCharacterSetRef
73 This is the type of a reference to mutable CFMutableCharacterSets.
74 */
75 typedef struct __CFCharacterSet * CFMutableCharacterSetRef;
76
77 /*!
78 @typedef CFCharacterSetPredefinedSet
79 Type of the predefined CFCharacterSet selector values.
80 */
81 typedef enum {
82 kCFCharacterSetControl = 1, /* Control character set (Unicode General Category Cc and Cf) */
83 kCFCharacterSetWhitespace, /* Whitespace character set (Unicode General Category Zs and U0009 CHARACTER TABULATION) */
84 kCFCharacterSetWhitespaceAndNewline, /* Whitespace and Newline character set (Unicode General Category Z*, U000A ~ U000D, and U0085) */
85 kCFCharacterSetDecimalDigit, /* Decimal digit character set */
86 kCFCharacterSetLetter, /* Letter character set (Unicode General Category L* & M*) */
87 kCFCharacterSetLowercaseLetter, /* Lowercase character set (Unicode General Category Ll) */
88 kCFCharacterSetUppercaseLetter, /* Uppercase character set (Unicode General Category Lu and Lt) */
89 kCFCharacterSetNonBase, /* Non-base character set (Unicode General Category M*) */
90 kCFCharacterSetDecomposable, /* Canonically decomposable character set */
91 kCFCharacterSetAlphaNumeric, /* Alpha Numeric character set (Unicode General Category L*, M*, & N*) */
92 kCFCharacterSetPunctuation, /* Punctuation character set (Unicode General Category P*) */
93 kCFCharacterSetIllegal /* Illegal character set */
94 #if MAC_OS_X_VERSION_10_2 <= MAC_OS_X_VERSION_MAX_ALLOWED
95 , kCFCharacterSetCapitalizedLetter /* Titlecase character set (Unicode General Category Lt) */
96 #endif
97 #if MAC_OS_X_VERSION_10_3 <= MAC_OS_X_VERSION_MAX_ALLOWED
98 , kCFCharacterSetSymbol /* Symbol character set (Unicode General Category S*) */
99 #endif
100 } CFCharacterSetPredefinedSet;
101
102 /*!
103 @function CFCharacterSetGetTypeID
104 Returns the type identifier of all CFCharacterSet instances.
105 */
106 CF_EXPORT
107 CFTypeID CFCharacterSetGetTypeID(void);
108
109 /*!
110 @function CFCharacterSetGetPredefined
111 Returns a predefined CFCharacterSet instance.
112 @param theSetIdentifier The CFCharacterSetPredefinedSet selector
113 which specifies the predefined character set. If the
114 value is not in CFCharacterSetPredefinedSet, the behavior
115 is undefined.
116 @result A reference to the predefined immutable CFCharacterSet.
117 This instance is owned by CF.
118 */
119 CF_EXPORT
120 CFCharacterSetRef CFCharacterSetGetPredefined(CFCharacterSetPredefinedSet theSetIdentifier);
121
122 /*!
123 @function CFCharacterSetCreateWithCharactersInRange
124 Creates a new immutable character set with the values from the given range.
125 @param alloc The CFAllocator which should be used to allocate
126 memory for the array and its storage for values. This
127 parameter may be NULL in which case the current default
128 CFAllocator is used. If this reference is not a valid
129 CFAllocator, the behavior is undefined.
130 @param theRange The CFRange which should be used to specify the
131 Unicode range the character set is filled with. It
132 accepts the range in 32-bit in the UTF-32 format. The
133 valid character point range is from 0x00000 to 0x10FFFF.
134 If the range is outside of the valid Unicode character
135 point, the behavior is undefined.
136 @result A reference to the new immutable CFCharacterSet.
137 */
138 CF_EXPORT
139 CFCharacterSetRef CFCharacterSetCreateWithCharactersInRange(CFAllocatorRef alloc, CFRange theRange);
140
141 /*!
142 @function CFCharacterSetCreateWithCharactersInString
143 Creates a new immutable character set with the values in the given string.
144 @param alloc The CFAllocator which should be used to allocate
145 memory for the array and its storage for values. This
146 parameter may be NULL in which case the current default
147 CFAllocator is used. If this reference is not a valid
148 CFAllocator, the behavior is undefined.
149 @param theString The CFString which should be used to specify
150 the Unicode characters the character set is filled with.
151 If this parameter is not a valid CFString, the behavior
152 is undefined.
153 @result A reference to the new immutable CFCharacterSet.
154 */
155 CF_EXPORT
156 CFCharacterSetRef CFCharacterSetCreateWithCharactersInString(CFAllocatorRef alloc, CFStringRef theString);
157
158 /*!
159 @function CFCharacterSetCreateWithBitmapRepresentation
160 Creates a new immutable character set with the bitmap representtion in the given data.
161 @param alloc The CFAllocator which should be used to allocate
162 memory for the array and its storage for values. This
163 parameter may be NULL in which case the current default
164 CFAllocator is used. If this reference is not a valid
165 CFAllocator, the behavior is undefined.
166 @param theData The CFData which should be used to specify the
167 bitmap representation of the Unicode character points
168 the character set is filled with. The bitmap
169 representation could contain all the Unicode character
170 range starting from BMP to Plane 16. The first 8K bytes
171 of the data represents the BMP range. The BMP range 8K
172 bytes can be followed by zero to sixteen 8K byte
173 bitmaps, each one with the plane index byte prepended.
174 For example, the bitmap representing the BMP and Plane 2
175 has the size of 16385 bytes (8K bytes for BMP, 1 byte
176 index + 8K bytes bitmap for Plane 2). The plane index
177 byte, in this case, contains the integer value two. If
178 this parameter is not a valid CFData or it contains a
179 Plane index byte outside of the valid Plane range
180 (1 to 16), the behavior is undefined.
181 @result A reference to the new immutable CFCharacterSet.
182 */
183 CF_EXPORT
184 CFCharacterSetRef CFCharacterSetCreateWithBitmapRepresentation(CFAllocatorRef alloc, CFDataRef theData);
185
186 #if MAC_OS_X_VERSION_10_2 <= MAC_OS_X_VERSION_MAX_ALLOWED
187 /*!
188 @function CFCharacterSetCreateInvertedSet
189 Creates a new immutable character set that is the invert of the specified character set.
190 @param alloc The CFAllocator which should be used to allocate
191 memory for the array and its storage for values. This
192 parameter may be NULL in which case the current default
193 CFAllocator is used. If this reference is not a valid
194 CFAllocator, the behavior is undefined.
195 @param theSet The CFCharacterSet which is to be inverted. If this
196 parameter is not a valid CFCharacterSet, the behavior is
197 undefined.
198 @result A reference to the new immutable CFCharacterSet.
199 */
200 CF_EXPORT CFCharacterSetRef CFCharacterSetCreateInvertedSet(CFAllocatorRef alloc, CFCharacterSetRef theSet);
201
202 /*!
203 @function CFCharacterSetIsSupersetOfSet
204 Reports whether or not the character set is a superset of the character set specified as the second parameter.
205 @param theSet The character set to be checked for the membership of theOtherSet.
206 If this parameter is not a valid CFCharacterSet, the behavior is undefined.
207 @param theOtherset The character set to be checked whether or not it is a subset of theSet.
208 If this parameter is not a valid CFCharacterSet, the behavior is undefined.
209 */
210 CF_EXPORT Boolean CFCharacterSetIsSupersetOfSet(CFCharacterSetRef theSet, CFCharacterSetRef theOtherset);
211
212 /*!
213 @function CFCharacterSetHasMemberInPlane
214 Reports whether or not the character set contains at least one member character in the specified plane.
215 @param theSet The character set to be checked for the membership. If this
216 parameter is not a valid CFCharacterSet, the behavior is undefined.
217 @param thePlane The plane number to be checked for the membership.
218 The valid value range is from 0 to 16. If the value is outside of the valid
219 plane number range, the behavior is undefined.
220 */
221 CF_EXPORT Boolean CFCharacterSetHasMemberInPlane(CFCharacterSetRef theSet, CFIndex thePlane);
222 #endif
223
224 /*!
225 @function CFCharacterSetCreateMutable
226 Creates a new empty mutable character set.
227 @param allocator The CFAllocator which should be used to allocate
228 memory for the array and its storage for values. This
229 parameter may be NULL in which case the current default
230 CFAllocator is used. If this reference is not a valid
231 CFAllocator, the behavior is undefined.
232 @result A reference to the new mutable CFCharacterSet.
233 */
234 CF_EXPORT
235 CFMutableCharacterSetRef CFCharacterSetCreateMutable(CFAllocatorRef alloc);
236
237 #if MAC_OS_X_VERSION_10_3 <= MAC_OS_X_VERSION_MAX_ALLOWED
238 /*!
239 @function CFCharacterSetCreateCopy
240 Creates a new character set with the values from the given character set. This function tries to compact the backing store where applicable.
241 @param allocator The CFAllocator which should be used to allocate
242 memory for the array and its storage for values. This
243 parameter may be NULL in which case the current default
244 CFAllocator is used. If this reference is not a valid
245 CFAllocator, the behavior is undefined.
246 @param theSet The CFCharacterSet which is to be copied. If this
247 parameter is not a valid CFCharacterSet, the behavior is
248 undefined.
249 @result A reference to the new CFCharacterSet.
250 */
251 CF_EXPORT
252 CFCharacterSetRef CFCharacterSetCreateCopy(CFAllocatorRef alloc, CFCharacterSetRef theSet) AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER;
253 #endif /* MAC_OS_X_VERSION_10_3 <= MAC_OS_X_VERSION_MAX_ALLOWED */
254
255 /*!
256 @function CFCharacterSetCreateMutableCopy
257 Creates a new mutable character set with the values from the given character set.
258 @param allocator The CFAllocator which should be used to allocate
259 memory for the array and its storage for values. This
260 parameter may be NULL in which case the current default
261 CFAllocator is used. If this reference is not a valid
262 CFAllocator, the behavior is undefined.
263 @param theSet The CFCharacterSet which is to be copied. If this
264 parameter is not a valid CFCharacterSet, the behavior is
265 undefined.
266 @result A reference to the new mutable CFCharacterSet.
267 */
268 CF_EXPORT
269 CFMutableCharacterSetRef CFCharacterSetCreateMutableCopy(CFAllocatorRef alloc, CFCharacterSetRef theSet);
270
271 /*!
272 @function CFCharacterSetIsCharacterMember
273 Reports whether or not the Unicode character is in the character set.
274 @param theSet The character set to be searched. If this parameter
275 is not a valid CFCharacterSet, the behavior is undefined.
276 @param theChar The Unicode character for which to test against the
277 character set. Note that this function takes 16-bit Unicode
278 character value; hence, it does not support access to the
279 non-BMP planes.
280 @result true, if the value is in the character set, otherwise false.
281 */
282 CF_EXPORT
283 Boolean CFCharacterSetIsCharacterMember(CFCharacterSetRef theSet, UniChar theChar);
284
285 #if MAC_OS_X_VERSION_10_2 <= MAC_OS_X_VERSION_MAX_ALLOWED
286 /*!
287 @function CFCharacterSetIsLongCharacterMember
288 Reports whether or not the UTF-32 character is in the character set.
289 @param theSet The character set to be searched. If this parameter
290 is not a valid CFCharacterSet, the behavior is undefined.
291 @param theChar The UTF-32 character for which to test against the
292 character set.
293 @result true, if the value is in the character set, otherwise false.
294 */
295 CF_EXPORT Boolean CFCharacterSetIsLongCharacterMember(CFCharacterSetRef theSet, UTF32Char theChar);
296 #endif
297
298 /*!
299 @function CFCharacterSetCreateBitmapRepresentation
300 Creates a new immutable data with the bitmap representation from the given character set.
301 @param allocator The CFAllocator which should be used to allocate
302 memory for the array and its storage for values. This
303 parameter may be NULL in which case the current default
304 CFAllocator is used. If this reference is not a valid
305 CFAllocator, the behavior is undefined.
306 @param theSet The CFCharacterSet which is to be used create the
307 bitmap representation from. Refer to the comments for
308 CFCharacterSetCreateWithBitmapRepresentation for the
309 detailed discussion of the bitmap representation format.
310 If this parameter is not a valid CFCharacterSet, the
311 behavior is undefined.
312 @result A reference to the new immutable CFData.
313 */
314 CF_EXPORT
315 CFDataRef CFCharacterSetCreateBitmapRepresentation(CFAllocatorRef alloc, CFCharacterSetRef theSet);
316
317 /*!
318 @function CFCharacterSetAddCharactersInRange
319 Adds the given range to the charaacter set.
320 @param theSet The character set to which the range is to be added.
321 If this parameter is not a valid mutable CFCharacterSet,
322 the behavior is undefined.
323 @param theRange The range to add to the character set. It accepts
324 the range in 32-bit in the UTF-32 format. The valid
325 character point range is from 0x00000 to 0x10FFFF. If the
326 range is outside of the valid Unicode character point,
327 the behavior is undefined.
328 */
329 CF_EXPORT
330 void CFCharacterSetAddCharactersInRange(CFMutableCharacterSetRef theSet, CFRange theRange);
331
332 /*!
333 @function CFCharacterSetRemoveCharactersInRange
334 Removes the given range from the charaacter set.
335 @param theSet The character set from which the range is to be
336 removed. If this parameter is not a valid mutable
337 CFCharacterSet, the behavior is undefined.
338 @param theRange The range to remove from the character set.
339 It accepts the range in 32-bit in the UTF-32 format.
340 The valid character point range is from 0x00000 to 0x10FFFF.
341 If the range is outside of the valid Unicode character point,
342 the behavior is undefined.
343 */
344 CF_EXPORT
345 void CFCharacterSetRemoveCharactersInRange(CFMutableCharacterSetRef theSet, CFRange theRange);
346
347 /*!
348 @function CFCharacterSetAddCharactersInString
349 Adds the characters in the given string to the charaacter set.
350 @param theSet The character set to which the characters in the
351 string are to be added. If this parameter is not a
352 valid mutable CFCharacterSet, the behavior is undefined.
353 @param theString The string to add to the character set.
354 If this parameter is not a valid CFString, the behavior
355 is undefined.
356 */
357 CF_EXPORT
358 void CFCharacterSetAddCharactersInString(CFMutableCharacterSetRef theSet, CFStringRef theString);
359
360 /*!
361 @function CFCharacterSetRemoveCharactersInString
362 Removes the characters in the given string from the charaacter set.
363 @param theSet The character set from which the characters in the
364 string are to be remove. If this parameter is not a
365 valid mutable CFCharacterSet, the behavior is undefined.
366 @param theString The string to remove from the character set.
367 If this parameter is not a valid CFString, the behavior
368 is undefined.
369 */
370 CF_EXPORT
371 void CFCharacterSetRemoveCharactersInString(CFMutableCharacterSetRef theSet, CFStringRef theString);
372
373 /*!
374 @function CFCharacterSetUnion
375 Forms the union with the given character set.
376 @param theSet The destination character set into which the
377 union of the two character sets is stored. If this
378 parameter is not a valid mutable CFCharacterSet, the
379 behavior is undefined.
380 @param theOtherSet The character set with which the union is
381 formed. If this parameter is not a valid CFCharacterSet,
382 the behavior is undefined.
383 */
384 CF_EXPORT
385 void CFCharacterSetUnion(CFMutableCharacterSetRef theSet, CFCharacterSetRef theOtherSet);
386
387 /*!
388 @function CFCharacterSetIntersect
389 Forms the intersection with the given character set.
390 @param theSet The destination character set into which the
391 intersection of the two character sets is stored.
392 If this parameter is not a valid mutable CFCharacterSet,
393 the behavior is undefined.
394 @param theOtherSet The character set with which the intersection
395 is formed. If this parameter is not a valid CFCharacterSet,
396 the behavior is undefined.
397 */
398 CF_EXPORT
399 void CFCharacterSetIntersect(CFMutableCharacterSetRef theSet, CFCharacterSetRef theOtherSet);
400
401 /*!
402 @function CFCharacterSetInvert
403 Inverts the content of the given character set.
404 @param theSet The character set to be inverted.
405 If this parameter is not a valid mutable CFCharacterSet,
406 the behavior is undefined.
407 */
408 CF_EXPORT
409 void CFCharacterSetInvert(CFMutableCharacterSetRef theSet);
410
411 #if defined(__cplusplus)
412 }
413 #endif
414
415 #endif /* !__COREFOUNDATION_CFCHARACTERSET__ */
416