2 * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
23 * @APPLE_LICENSE_HEADER_END@
26 Copyright (c) 1999-2003, Apple, Inc. All rights reserved.
30 @header CFCharacterSet
31 CFCharacterSet represents a set, or a bag, of Unicode characters.
32 The API consists of 3 groups:
33 1) creation/manipulation of CFCharacterSet instances,
34 2) query of a single Unicode character membership,
35 and 3) bitmap representation related (reading/writing).
36 Conceptually, CFCharacterSet is a 136K byte bitmap array of
37 which each bit represents a Unicode code point. It could
38 contain the Unicode characters in ISO 10646 Basic Multilingual
39 Plane (BMP) and characters in Plane 1 through Plane 16
40 accessible via surrogate paris in the Unicode Transformation
41 Format, 16-bit encoding form (UTF-16). In other words, it can
42 store values from 0x00000 to 0x10FFFF in the Unicode
43 Transformation Format, 32-bit encoding form (UTF-32). However,
44 in general, how CFCharacterSet stores the information is an
45 implementation detail. Note even CFData used for the external
46 bitmap representation rarely has 136K byte. For detailed
47 discussion of the external bitmap representation, refer to the
48 comments for CFCharacterSetCreateWithBitmapRepresentation below.
49 Note that the existance of non-BMP characters in a character set
50 does not imply the membership of the corresponding surrogate
51 characters. For example, a character set with U+10000 does not
55 #if !defined(__COREFOUNDATION_CFCHARACTERSET__)
56 #define __COREFOUNDATION_CFCHARACTERSET__ 1
58 #include <CoreFoundation/CFBase.h>
59 #include <CoreFoundation/CFData.h>
61 #if defined(__cplusplus)
66 @typedef CFCharacterSetRef
67 This is the type of a reference to immutable CFCharacterSets.
69 typedef const struct __CFCharacterSet
* CFCharacterSetRef
;
72 @typedef CFMutableCharacterSetRef
73 This is the type of a reference to mutable CFMutableCharacterSets.
75 typedef struct __CFCharacterSet
* CFMutableCharacterSetRef
;
78 @typedef CFCharacterSetPredefinedSet
79 Type of the predefined CFCharacterSet selector values.
82 kCFCharacterSetControl
= 1, /* Control character set (Unicode General Category Cc and Cf) */
83 kCFCharacterSetWhitespace
, /* Whitespace character set (Unicode General Category Zs and U0009 CHARACTER TABULATION) */
84 kCFCharacterSetWhitespaceAndNewline
, /* Whitespace and Newline character set (Unicode General Category Z*, U000A ~ U000D, and U0085) */
85 kCFCharacterSetDecimalDigit
, /* Decimal digit character set */
86 kCFCharacterSetLetter
, /* Letter character set (Unicode General Category L* & M*) */
87 kCFCharacterSetLowercaseLetter
, /* Lowercase character set (Unicode General Category Ll) */
88 kCFCharacterSetUppercaseLetter
, /* Uppercase character set (Unicode General Category Lu and Lt) */
89 kCFCharacterSetNonBase
, /* Non-base character set (Unicode General Category M*) */
90 kCFCharacterSetDecomposable
, /* Canonically decomposable character set */
91 kCFCharacterSetAlphaNumeric
, /* Alpha Numeric character set (Unicode General Category L*, M*, & N*) */
92 kCFCharacterSetPunctuation
, /* Punctuation character set (Unicode General Category P*) */
93 kCFCharacterSetIllegal
/* Illegal character set */
94 #if MAC_OS_X_VERSION_10_2 <= MAC_OS_X_VERSION_MAX_ALLOWED
95 , kCFCharacterSetCapitalizedLetter
/* Titlecase character set (Unicode General Category Lt) */
97 #if MAC_OS_X_VERSION_10_3 <= MAC_OS_X_VERSION_MAX_ALLOWED
98 , kCFCharacterSetSymbol
/* Symbol character set (Unicode General Category S*) */
100 } CFCharacterSetPredefinedSet
;
103 @function CFCharacterSetGetTypeID
104 Returns the type identifier of all CFCharacterSet instances.
107 CFTypeID
CFCharacterSetGetTypeID(void);
110 @function CFCharacterSetGetPredefined
111 Returns a predefined CFCharacterSet instance.
112 @param theSetIdentifier The CFCharacterSetPredefinedSet selector
113 which specifies the predefined character set. If the
114 value is not in CFCharacterSetPredefinedSet, the behavior
116 @result A reference to the predefined immutable CFCharacterSet.
117 This instance is owned by CF.
120 CFCharacterSetRef
CFCharacterSetGetPredefined(CFCharacterSetPredefinedSet theSetIdentifier
);
123 @function CFCharacterSetCreateWithCharactersInRange
124 Creates a new immutable character set with the values from the given range.
125 @param alloc The CFAllocator which should be used to allocate
126 memory for the array and its storage for values. This
127 parameter may be NULL in which case the current default
128 CFAllocator is used. If this reference is not a valid
129 CFAllocator, the behavior is undefined.
130 @param theRange The CFRange which should be used to specify the
131 Unicode range the character set is filled with. It
132 accepts the range in 32-bit in the UTF-32 format. The
133 valid character point range is from 0x00000 to 0x10FFFF.
134 If the range is outside of the valid Unicode character
135 point, the behavior is undefined.
136 @result A reference to the new immutable CFCharacterSet.
139 CFCharacterSetRef
CFCharacterSetCreateWithCharactersInRange(CFAllocatorRef alloc
, CFRange theRange
);
142 @function CFCharacterSetCreateWithCharactersInString
143 Creates a new immutable character set with the values in the given string.
144 @param alloc The CFAllocator which should be used to allocate
145 memory for the array and its storage for values. This
146 parameter may be NULL in which case the current default
147 CFAllocator is used. If this reference is not a valid
148 CFAllocator, the behavior is undefined.
149 @param theString The CFString which should be used to specify
150 the Unicode characters the character set is filled with.
151 If this parameter is not a valid CFString, the behavior
153 @result A reference to the new immutable CFCharacterSet.
156 CFCharacterSetRef
CFCharacterSetCreateWithCharactersInString(CFAllocatorRef alloc
, CFStringRef theString
);
159 @function CFCharacterSetCreateWithBitmapRepresentation
160 Creates a new immutable character set with the bitmap representtion in the given data.
161 @param alloc The CFAllocator which should be used to allocate
162 memory for the array and its storage for values. This
163 parameter may be NULL in which case the current default
164 CFAllocator is used. If this reference is not a valid
165 CFAllocator, the behavior is undefined.
166 @param theData The CFData which should be used to specify the
167 bitmap representation of the Unicode character points
168 the character set is filled with. The bitmap
169 representation could contain all the Unicode character
170 range starting from BMP to Plane 16. The first 8K bytes
171 of the data represents the BMP range. The BMP range 8K
172 bytes can be followed by zero to sixteen 8K byte
173 bitmaps, each one with the plane index byte prepended.
174 For example, the bitmap representing the BMP and Plane 2
175 has the size of 16385 bytes (8K bytes for BMP, 1 byte
176 index + 8K bytes bitmap for Plane 2). The plane index
177 byte, in this case, contains the integer value two. If
178 this parameter is not a valid CFData or it contains a
179 Plane index byte outside of the valid Plane range
180 (1 to 16), the behavior is undefined.
181 @result A reference to the new immutable CFCharacterSet.
184 CFCharacterSetRef
CFCharacterSetCreateWithBitmapRepresentation(CFAllocatorRef alloc
, CFDataRef theData
);
186 #if MAC_OS_X_VERSION_10_2 <= MAC_OS_X_VERSION_MAX_ALLOWED
188 @function CFCharacterSetCreateInvertedSet
189 Creates a new immutable character set that is the invert of the specified character set.
190 @param alloc The CFAllocator which should be used to allocate
191 memory for the array and its storage for values. This
192 parameter may be NULL in which case the current default
193 CFAllocator is used. If this reference is not a valid
194 CFAllocator, the behavior is undefined.
195 @param theSet The CFCharacterSet which is to be inverted. If this
196 parameter is not a valid CFCharacterSet, the behavior is
198 @result A reference to the new immutable CFCharacterSet.
200 CF_EXPORT CFCharacterSetRef
CFCharacterSetCreateInvertedSet(CFAllocatorRef alloc
, CFCharacterSetRef theSet
);
203 @function CFCharacterSetIsSupersetOfSet
204 Reports whether or not the character set is a superset of the character set specified as the second parameter.
205 @param theSet The character set to be checked for the membership of theOtherSet.
206 If this parameter is not a valid CFCharacterSet, the behavior is undefined.
207 @param theOtherset The character set to be checked whether or not it is a subset of theSet.
208 If this parameter is not a valid CFCharacterSet, the behavior is undefined.
210 CF_EXPORT Boolean
CFCharacterSetIsSupersetOfSet(CFCharacterSetRef theSet
, CFCharacterSetRef theOtherset
);
213 @function CFCharacterSetHasMemberInPlane
214 Reports whether or not the character set contains at least one member character in the specified plane.
215 @param theSet The character set to be checked for the membership. If this
216 parameter is not a valid CFCharacterSet, the behavior is undefined.
217 @param thePlane The plane number to be checked for the membership.
218 The valid value range is from 0 to 16. If the value is outside of the valid
219 plane number range, the behavior is undefined.
221 CF_EXPORT Boolean
CFCharacterSetHasMemberInPlane(CFCharacterSetRef theSet
, CFIndex thePlane
);
225 @function CFCharacterSetCreateMutable
226 Creates a new empty mutable character set.
227 @param allocator The CFAllocator which should be used to allocate
228 memory for the array and its storage for values. This
229 parameter may be NULL in which case the current default
230 CFAllocator is used. If this reference is not a valid
231 CFAllocator, the behavior is undefined.
232 @result A reference to the new mutable CFCharacterSet.
235 CFMutableCharacterSetRef
CFCharacterSetCreateMutable(CFAllocatorRef alloc
);
237 #if MAC_OS_X_VERSION_10_3 <= MAC_OS_X_VERSION_MAX_ALLOWED
239 @function CFCharacterSetCreateCopy
240 Creates a new character set with the values from the given character set. This function tries to compact the backing store where applicable.
241 @param allocator The CFAllocator which should be used to allocate
242 memory for the array and its storage for values. This
243 parameter may be NULL in which case the current default
244 CFAllocator is used. If this reference is not a valid
245 CFAllocator, the behavior is undefined.
246 @param theSet The CFCharacterSet which is to be copied. If this
247 parameter is not a valid CFCharacterSet, the behavior is
249 @result A reference to the new CFCharacterSet.
252 CFCharacterSetRef
CFCharacterSetCreateCopy(CFAllocatorRef alloc
, CFCharacterSetRef theSet
) AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER
;
253 #endif /* MAC_OS_X_VERSION_10_3 <= MAC_OS_X_VERSION_MAX_ALLOWED */
256 @function CFCharacterSetCreateMutableCopy
257 Creates a new mutable character set with the values from the given character set.
258 @param allocator The CFAllocator which should be used to allocate
259 memory for the array and its storage for values. This
260 parameter may be NULL in which case the current default
261 CFAllocator is used. If this reference is not a valid
262 CFAllocator, the behavior is undefined.
263 @param theSet The CFCharacterSet which is to be copied. If this
264 parameter is not a valid CFCharacterSet, the behavior is
266 @result A reference to the new mutable CFCharacterSet.
269 CFMutableCharacterSetRef
CFCharacterSetCreateMutableCopy(CFAllocatorRef alloc
, CFCharacterSetRef theSet
);
272 @function CFCharacterSetIsCharacterMember
273 Reports whether or not the Unicode character is in the character set.
274 @param theSet The character set to be searched. If this parameter
275 is not a valid CFCharacterSet, the behavior is undefined.
276 @param theChar The Unicode character for which to test against the
277 character set. Note that this function takes 16-bit Unicode
278 character value; hence, it does not support access to the
280 @result true, if the value is in the character set, otherwise false.
283 Boolean
CFCharacterSetIsCharacterMember(CFCharacterSetRef theSet
, UniChar theChar
);
285 #if MAC_OS_X_VERSION_10_2 <= MAC_OS_X_VERSION_MAX_ALLOWED
287 @function CFCharacterSetIsLongCharacterMember
288 Reports whether or not the UTF-32 character is in the character set.
289 @param theSet The character set to be searched. If this parameter
290 is not a valid CFCharacterSet, the behavior is undefined.
291 @param theChar The UTF-32 character for which to test against the
293 @result true, if the value is in the character set, otherwise false.
295 CF_EXPORT Boolean
CFCharacterSetIsLongCharacterMember(CFCharacterSetRef theSet
, UTF32Char theChar
);
299 @function CFCharacterSetCreateBitmapRepresentation
300 Creates a new immutable data with the bitmap representation from the given character set.
301 @param allocator The CFAllocator which should be used to allocate
302 memory for the array and its storage for values. This
303 parameter may be NULL in which case the current default
304 CFAllocator is used. If this reference is not a valid
305 CFAllocator, the behavior is undefined.
306 @param theSet The CFCharacterSet which is to be used create the
307 bitmap representation from. Refer to the comments for
308 CFCharacterSetCreateWithBitmapRepresentation for the
309 detailed discussion of the bitmap representation format.
310 If this parameter is not a valid CFCharacterSet, the
311 behavior is undefined.
312 @result A reference to the new immutable CFData.
315 CFDataRef
CFCharacterSetCreateBitmapRepresentation(CFAllocatorRef alloc
, CFCharacterSetRef theSet
);
318 @function CFCharacterSetAddCharactersInRange
319 Adds the given range to the charaacter set.
320 @param theSet The character set to which the range is to be added.
321 If this parameter is not a valid mutable CFCharacterSet,
322 the behavior is undefined.
323 @param theRange The range to add to the character set. It accepts
324 the range in 32-bit in the UTF-32 format. The valid
325 character point range is from 0x00000 to 0x10FFFF. If the
326 range is outside of the valid Unicode character point,
327 the behavior is undefined.
330 void CFCharacterSetAddCharactersInRange(CFMutableCharacterSetRef theSet
, CFRange theRange
);
333 @function CFCharacterSetRemoveCharactersInRange
334 Removes the given range from the charaacter set.
335 @param theSet The character set from which the range is to be
336 removed. If this parameter is not a valid mutable
337 CFCharacterSet, the behavior is undefined.
338 @param theRange The range to remove from the character set.
339 It accepts the range in 32-bit in the UTF-32 format.
340 The valid character point range is from 0x00000 to 0x10FFFF.
341 If the range is outside of the valid Unicode character point,
342 the behavior is undefined.
345 void CFCharacterSetRemoveCharactersInRange(CFMutableCharacterSetRef theSet
, CFRange theRange
);
348 @function CFCharacterSetAddCharactersInString
349 Adds the characters in the given string to the charaacter set.
350 @param theSet The character set to which the characters in the
351 string are to be added. If this parameter is not a
352 valid mutable CFCharacterSet, the behavior is undefined.
353 @param theString The string to add to the character set.
354 If this parameter is not a valid CFString, the behavior
358 void CFCharacterSetAddCharactersInString(CFMutableCharacterSetRef theSet
, CFStringRef theString
);
361 @function CFCharacterSetRemoveCharactersInString
362 Removes the characters in the given string from the charaacter set.
363 @param theSet The character set from which the characters in the
364 string are to be remove. If this parameter is not a
365 valid mutable CFCharacterSet, the behavior is undefined.
366 @param theString The string to remove from the character set.
367 If this parameter is not a valid CFString, the behavior
371 void CFCharacterSetRemoveCharactersInString(CFMutableCharacterSetRef theSet
, CFStringRef theString
);
374 @function CFCharacterSetUnion
375 Forms the union with the given character set.
376 @param theSet The destination character set into which the
377 union of the two character sets is stored. If this
378 parameter is not a valid mutable CFCharacterSet, the
379 behavior is undefined.
380 @param theOtherSet The character set with which the union is
381 formed. If this parameter is not a valid CFCharacterSet,
382 the behavior is undefined.
385 void CFCharacterSetUnion(CFMutableCharacterSetRef theSet
, CFCharacterSetRef theOtherSet
);
388 @function CFCharacterSetIntersect
389 Forms the intersection with the given character set.
390 @param theSet The destination character set into which the
391 intersection of the two character sets is stored.
392 If this parameter is not a valid mutable CFCharacterSet,
393 the behavior is undefined.
394 @param theOtherSet The character set with which the intersection
395 is formed. If this parameter is not a valid CFCharacterSet,
396 the behavior is undefined.
399 void CFCharacterSetIntersect(CFMutableCharacterSetRef theSet
, CFCharacterSetRef theOtherSet
);
402 @function CFCharacterSetInvert
403 Inverts the content of the given character set.
404 @param theSet The character set to be inverted.
405 If this parameter is not a valid mutable CFCharacterSet,
406 the behavior is undefined.
409 void CFCharacterSetInvert(CFMutableCharacterSetRef theSet
);
411 #if defined(__cplusplus)
415 #endif /* !__COREFOUNDATION_CFCHARACTERSET__ */