2 * Copyright (c) 2014 Apple Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
25 Copyright (c) 1999-2014, Apple Inc. All rights reserved.
29 @header CFCharacterSet
30 CFCharacterSet represents a set, or a bag, of Unicode characters.
31 The API consists of 3 groups:
32 1) creation/manipulation of CFCharacterSet instances,
33 2) query of a single Unicode character membership,
34 and 3) bitmap representation related (reading/writing).
35 Conceptually, CFCharacterSet is a 136K byte bitmap array of
36 which each bit represents a Unicode code point. It could
37 contain the Unicode characters in ISO 10646 Basic Multilingual
38 Plane (BMP) and characters in Plane 1 through Plane 16
39 accessible via surrogate paris in the Unicode Transformation
40 Format, 16-bit encoding form (UTF-16). In other words, it can
41 store values from 0x00000 to 0x10FFFF in the Unicode
42 Transformation Format, 32-bit encoding form (UTF-32). However,
43 in general, how CFCharacterSet stores the information is an
44 implementation detail. Note even CFData used for the external
45 bitmap representation rarely has 136K byte. For detailed
46 discussion of the external bitmap representation, refer to the
47 comments for CFCharacterSetCreateWithBitmapRepresentation below.
48 Note that the existance of non-BMP characters in a character set
49 does not imply the membership of the corresponding surrogate
50 characters. For example, a character set with U+10000 does not
54 #if !defined(__COREFOUNDATION_CFCHARACTERSET__)
55 #define __COREFOUNDATION_CFCHARACTERSET__ 1
57 #include <CoreFoundation/CFBase.h>
58 #include <CoreFoundation/CFData.h>
60 CF_IMPLICIT_BRIDGING_ENABLED
64 @typedef CFCharacterSetRef
65 This is the type of a reference to immutable CFCharacterSets.
67 typedef const struct CF_BRIDGED_TYPE(NSCharacterSet
) __CFCharacterSet
* CFCharacterSetRef
;
70 @typedef CFMutableCharacterSetRef
71 This is the type of a reference to mutable CFMutableCharacterSets.
73 typedef struct CF_BRIDGED_MUTABLE_TYPE(NSMutableCharacterSet
) __CFCharacterSet
* CFMutableCharacterSetRef
;
76 @typedef CFCharacterSetPredefinedSet
77 Type of the predefined CFCharacterSet selector values.
80 typedef CF_ENUM(CFIndex
, CFCharacterSetPredefinedSet
) {
81 kCFCharacterSetControl
= 1, /* Control character set (Unicode General Category Cc and Cf) */
82 kCFCharacterSetWhitespace
, /* Whitespace character set (Unicode General Category Zs and U0009 CHARACTER TABULATION) */
83 kCFCharacterSetWhitespaceAndNewline
, /* Whitespace and Newline character set (Unicode General Category Z*, U000A ~ U000D, and U0085) */
84 kCFCharacterSetDecimalDigit
, /* Decimal digit character set */
85 kCFCharacterSetLetter
, /* Letter character set (Unicode General Category L* & M*) */
86 kCFCharacterSetLowercaseLetter
, /* Lowercase character set (Unicode General Category Ll) */
87 kCFCharacterSetUppercaseLetter
, /* Uppercase character set (Unicode General Category Lu and Lt) */
88 kCFCharacterSetNonBase
, /* Non-base character set (Unicode General Category M*) */
89 kCFCharacterSetDecomposable
, /* Canonically decomposable character set */
90 kCFCharacterSetAlphaNumeric
, /* Alpha Numeric character set (Unicode General Category L*, M*, & N*) */
91 kCFCharacterSetPunctuation
, /* Punctuation character set (Unicode General Category P*) */
92 kCFCharacterSetCapitalizedLetter
= 13, /* Titlecase character set (Unicode General Category Lt) */
93 kCFCharacterSetSymbol
= 14, /* Symbol character set (Unicode General Category S*) */
94 kCFCharacterSetNewline
CF_ENUM_AVAILABLE(10_5
, 2_0
) = 15, /* Newline character set (U000A ~ U000D, U0085, U2028, and U2029) */
95 kCFCharacterSetIllegal
= 12/* Illegal character set */
99 @function CFCharacterSetGetTypeID
100 Returns the type identifier of all CFCharacterSet instances.
103 CFTypeID
CFCharacterSetGetTypeID(void);
106 @function CFCharacterSetGetPredefined
107 Returns a predefined CFCharacterSet instance.
108 @param theSetIdentifier The CFCharacterSetPredefinedSet selector
109 which specifies the predefined character set. If the
110 value is not in CFCharacterSetPredefinedSet, the behavior
112 @result A reference to the predefined immutable CFCharacterSet.
113 This instance is owned by CF.
116 CFCharacterSetRef
CFCharacterSetGetPredefined(CFCharacterSetPredefinedSet theSetIdentifier
);
119 @function CFCharacterSetCreateWithCharactersInRange
120 Creates a new immutable character set with the values from the given range.
121 @param alloc The CFAllocator which should be used to allocate
122 memory for the array and its storage for values. This
123 parameter may be NULL in which case the current default
124 CFAllocator is used. If this reference is not a valid
125 CFAllocator, the behavior is undefined.
126 @param theRange The CFRange which should be used to specify the
127 Unicode range the character set is filled with. It
128 accepts the range in 32-bit in the UTF-32 format. The
129 valid character point range is from 0x00000 to 0x10FFFF.
130 If the range is outside of the valid Unicode character
131 point, the behavior is undefined.
132 @result A reference to the new immutable CFCharacterSet.
135 CFCharacterSetRef
CFCharacterSetCreateWithCharactersInRange(CFAllocatorRef alloc
, CFRange theRange
);
138 @function CFCharacterSetCreateWithCharactersInString
139 Creates a new immutable character set with the values in the given string.
140 @param alloc The CFAllocator which should be used to allocate
141 memory for the array and its storage for values. This
142 parameter may be NULL in which case the current default
143 CFAllocator is used. If this reference is not a valid
144 CFAllocator, the behavior is undefined.
145 @param theString The CFString which should be used to specify
146 the Unicode characters the character set is filled with.
147 If this parameter is not a valid CFString, the behavior
149 @result A reference to the new immutable CFCharacterSet.
152 CFCharacterSetRef
CFCharacterSetCreateWithCharactersInString(CFAllocatorRef alloc
, CFStringRef theString
);
155 @function CFCharacterSetCreateWithBitmapRepresentation
156 Creates a new immutable character set with the bitmap representtion in the given data.
157 @param alloc The CFAllocator which should be used to allocate
158 memory for the array and its storage for values. This
159 parameter may be NULL in which case the current default
160 CFAllocator is used. If this reference is not a valid
161 CFAllocator, the behavior is undefined.
162 @param theData The CFData which should be used to specify the
163 bitmap representation of the Unicode character points
164 the character set is filled with. The bitmap
165 representation could contain all the Unicode character
166 range starting from BMP to Plane 16. The first 8192 bytes
167 of the data represent the BMP range. The BMP range 8192
168 bytes can be followed by zero to sixteen 8192 byte
169 bitmaps, each one with the plane index byte prepended.
170 For example, the bitmap representing the BMP and Plane 2
171 has the size of 16385 bytes (8192 bytes for BMP, 1 byte
172 index + 8192 bytes bitmap for Plane 2). The plane index
173 byte, in this case, contains the integer value two. If
174 this parameter is not a valid CFData or it contains a
175 Plane index byte outside of the valid Plane range
176 (1 to 16), the behavior is undefined.
177 @result A reference to the new immutable CFCharacterSet.
180 CFCharacterSetRef
CFCharacterSetCreateWithBitmapRepresentation(CFAllocatorRef alloc
, CFDataRef theData
);
183 @function CFCharacterSetCreateInvertedSet
184 Creates a new immutable character set that is the invert of the specified character set.
185 @param alloc The CFAllocator which should be used to allocate
186 memory for the array and its storage for values. This
187 parameter may be NULL in which case the current default
188 CFAllocator is used. If this reference is not a valid
189 CFAllocator, the behavior is undefined.
190 @param theSet The CFCharacterSet which is to be inverted. If this
191 parameter is not a valid CFCharacterSet, the behavior is
193 @result A reference to the new immutable CFCharacterSet.
195 CF_EXPORT CFCharacterSetRef
CFCharacterSetCreateInvertedSet(CFAllocatorRef alloc
, CFCharacterSetRef theSet
);
198 @function CFCharacterSetIsSupersetOfSet
199 Reports whether or not the character set is a superset of the character set specified as the second parameter.
200 @param theSet The character set to be checked for the membership of theOtherSet.
201 If this parameter is not a valid CFCharacterSet, the behavior is undefined.
202 @param theOtherset The character set to be checked whether or not it is a subset of theSet.
203 If this parameter is not a valid CFCharacterSet, the behavior is undefined.
205 CF_EXPORT Boolean
CFCharacterSetIsSupersetOfSet(CFCharacterSetRef theSet
, CFCharacterSetRef theOtherset
);
208 @function CFCharacterSetHasMemberInPlane
209 Reports whether or not the character set contains at least one member character in the specified plane.
210 @param theSet The character set to be checked for the membership. If this
211 parameter is not a valid CFCharacterSet, the behavior is undefined.
212 @param thePlane The plane number to be checked for the membership.
213 The valid value range is from 0 to 16. If the value is outside of the valid
214 plane number range, the behavior is undefined.
216 CF_EXPORT Boolean
CFCharacterSetHasMemberInPlane(CFCharacterSetRef theSet
, CFIndex thePlane
);
219 @function CFCharacterSetCreateMutable
220 Creates a new empty mutable character set.
221 @param allocator The CFAllocator which should be used to allocate
222 memory for the array and its storage for values. This
223 parameter may be NULL in which case the current default
224 CFAllocator is used. If this reference is not a valid
225 CFAllocator, the behavior is undefined.
226 @result A reference to the new mutable CFCharacterSet.
229 CFMutableCharacterSetRef
CFCharacterSetCreateMutable(CFAllocatorRef alloc
);
232 @function CFCharacterSetCreateCopy
233 Creates a new character set with the values from the given character set. This function tries to compact the backing store where applicable.
234 @param allocator The CFAllocator which should be used to allocate
235 memory for the array and its storage for values. This
236 parameter may be NULL in which case the current default
237 CFAllocator is used. If this reference is not a valid
238 CFAllocator, the behavior is undefined.
239 @param theSet The CFCharacterSet which is to be copied. If this
240 parameter is not a valid CFCharacterSet, the behavior is
242 @result A reference to the new CFCharacterSet.
245 CFCharacterSetRef
CFCharacterSetCreateCopy(CFAllocatorRef alloc
, CFCharacterSetRef theSet
);
248 @function CFCharacterSetCreateMutableCopy
249 Creates a new mutable character set with the values from the given character set.
250 @param allocator The CFAllocator which should be used to allocate
251 memory for the array and its storage for values. This
252 parameter may be NULL in which case the current default
253 CFAllocator is used. If this reference is not a valid
254 CFAllocator, the behavior is undefined.
255 @param theSet The CFCharacterSet which is to be copied. If this
256 parameter is not a valid CFCharacterSet, the behavior is
258 @result A reference to the new mutable CFCharacterSet.
261 CFMutableCharacterSetRef
CFCharacterSetCreateMutableCopy(CFAllocatorRef alloc
, CFCharacterSetRef theSet
);
264 @function CFCharacterSetIsCharacterMember
265 Reports whether or not the Unicode character is in the character set.
266 @param theSet The character set to be searched. If this parameter
267 is not a valid CFCharacterSet, the behavior is undefined.
268 @param theChar The Unicode character for which to test against the
269 character set. Note that this function takes 16-bit Unicode
270 character value; hence, it does not support access to the
272 @result true, if the value is in the character set, otherwise false.
275 Boolean
CFCharacterSetIsCharacterMember(CFCharacterSetRef theSet
, UniChar theChar
);
278 @function CFCharacterSetIsLongCharacterMember
279 Reports whether or not the UTF-32 character is in the character set.
280 @param theSet The character set to be searched. If this parameter
281 is not a valid CFCharacterSet, the behavior is undefined.
282 @param theChar The UTF-32 character for which to test against the
284 @result true, if the value is in the character set, otherwise false.
286 CF_EXPORT Boolean
CFCharacterSetIsLongCharacterMember(CFCharacterSetRef theSet
, UTF32Char theChar
);
289 @function CFCharacterSetCreateBitmapRepresentation
290 Creates a new immutable data with the bitmap representation from the given character set.
291 @param allocator The CFAllocator which should be used to allocate
292 memory for the array and its storage for values. This
293 parameter may be NULL in which case the current default
294 CFAllocator is used. If this reference is not a valid
295 CFAllocator, the behavior is undefined.
296 @param theSet The CFCharacterSet which is to be used create the
297 bitmap representation from. Refer to the comments for
298 CFCharacterSetCreateWithBitmapRepresentation for the
299 detailed discussion of the bitmap representation format.
300 If this parameter is not a valid CFCharacterSet, the
301 behavior is undefined.
302 @result A reference to the new immutable CFData.
305 CFDataRef
CFCharacterSetCreateBitmapRepresentation(CFAllocatorRef alloc
, CFCharacterSetRef theSet
);
308 @function CFCharacterSetAddCharactersInRange
309 Adds the given range to the charaacter set.
310 @param theSet The character set to which the range is to be added.
311 If this parameter is not a valid mutable CFCharacterSet,
312 the behavior is undefined.
313 @param theRange The range to add to the character set. It accepts
314 the range in 32-bit in the UTF-32 format. The valid
315 character point range is from 0x00000 to 0x10FFFF. If the
316 range is outside of the valid Unicode character point,
317 the behavior is undefined.
320 void CFCharacterSetAddCharactersInRange(CFMutableCharacterSetRef theSet
, CFRange theRange
);
323 @function CFCharacterSetRemoveCharactersInRange
324 Removes the given range from the charaacter set.
325 @param theSet The character set from which the range is to be
326 removed. If this parameter is not a valid mutable
327 CFCharacterSet, the behavior is undefined.
328 @param theRange The range to remove from the character set.
329 It accepts the range in 32-bit in the UTF-32 format.
330 The valid character point range is from 0x00000 to 0x10FFFF.
331 If the range is outside of the valid Unicode character point,
332 the behavior is undefined.
335 void CFCharacterSetRemoveCharactersInRange(CFMutableCharacterSetRef theSet
, CFRange theRange
);
338 @function CFCharacterSetAddCharactersInString
339 Adds the characters in the given string to the charaacter set.
340 @param theSet The character set to which the characters in the
341 string are to be added. If this parameter is not a
342 valid mutable CFCharacterSet, the behavior is undefined.
343 @param theString The string to add to the character set.
344 If this parameter is not a valid CFString, the behavior
348 void CFCharacterSetAddCharactersInString(CFMutableCharacterSetRef theSet
, CFStringRef theString
);
351 @function CFCharacterSetRemoveCharactersInString
352 Removes the characters in the given string from the charaacter set.
353 @param theSet The character set from which the characters in the
354 string are to be remove. If this parameter is not a
355 valid mutable CFCharacterSet, the behavior is undefined.
356 @param theString The string to remove from the character set.
357 If this parameter is not a valid CFString, the behavior
361 void CFCharacterSetRemoveCharactersInString(CFMutableCharacterSetRef theSet
, CFStringRef theString
);
364 @function CFCharacterSetUnion
365 Forms the union with the given character set.
366 @param theSet The destination character set into which the
367 union of the two character sets is stored. If this
368 parameter is not a valid mutable CFCharacterSet, the
369 behavior is undefined.
370 @param theOtherSet The character set with which the union is
371 formed. If this parameter is not a valid CFCharacterSet,
372 the behavior is undefined.
375 void CFCharacterSetUnion(CFMutableCharacterSetRef theSet
, CFCharacterSetRef theOtherSet
);
378 @function CFCharacterSetIntersect
379 Forms the intersection with the given character set.
380 @param theSet The destination character set into which the
381 intersection of the two character sets is stored.
382 If this parameter is not a valid mutable CFCharacterSet,
383 the behavior is undefined.
384 @param theOtherSet The character set with which the intersection
385 is formed. If this parameter is not a valid CFCharacterSet,
386 the behavior is undefined.
389 void CFCharacterSetIntersect(CFMutableCharacterSetRef theSet
, CFCharacterSetRef theOtherSet
);
392 @function CFCharacterSetInvert
393 Inverts the content of the given character set.
394 @param theSet The character set to be inverted.
395 If this parameter is not a valid mutable CFCharacterSet,
396 the behavior is undefined.
399 void CFCharacterSetInvert(CFMutableCharacterSetRef theSet
);
402 CF_IMPLICIT_BRIDGING_DISABLED
404 #endif /* ! __COREFOUNDATION_CFCHARACTERSET__ */