]> git.saurik.com Git - apple/cf.git/blob - CFCharacterSet.h
cf6259158d6d1e3ea04bd3cdc7c37ac538631d08
[apple/cf.git] / CFCharacterSet.h
1 /*
2 * Copyright (c) 2014 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /* CFCharacterSet.h
25 Copyright (c) 1999-2014, Apple Inc. All rights reserved.
26 */
27
28 /*!
29 @header CFCharacterSet
30 CFCharacterSet represents a set, or a bag, of Unicode characters.
31 The API consists of 3 groups:
32 1) creation/manipulation of CFCharacterSet instances,
33 2) query of a single Unicode character membership,
34 and 3) bitmap representation related (reading/writing).
35 Conceptually, CFCharacterSet is a 136K byte bitmap array of
36 which each bit represents a Unicode code point. It could
37 contain the Unicode characters in ISO 10646 Basic Multilingual
38 Plane (BMP) and characters in Plane 1 through Plane 16
39 accessible via surrogate paris in the Unicode Transformation
40 Format, 16-bit encoding form (UTF-16). In other words, it can
41 store values from 0x00000 to 0x10FFFF in the Unicode
42 Transformation Format, 32-bit encoding form (UTF-32). However,
43 in general, how CFCharacterSet stores the information is an
44 implementation detail. Note even CFData used for the external
45 bitmap representation rarely has 136K byte. For detailed
46 discussion of the external bitmap representation, refer to the
47 comments for CFCharacterSetCreateWithBitmapRepresentation below.
48 Note that the existance of non-BMP characters in a character set
49 does not imply the membership of the corresponding surrogate
50 characters. For example, a character set with U+10000 does not
51 match with U+D800.
52 */
53
54 #if !defined(__COREFOUNDATION_CFCHARACTERSET__)
55 #define __COREFOUNDATION_CFCHARACTERSET__ 1
56
57 #include <CoreFoundation/CFBase.h>
58 #include <CoreFoundation/CFData.h>
59
60 CF_IMPLICIT_BRIDGING_ENABLED
61 CF_EXTERN_C_BEGIN
62
63 /*!
64 @typedef CFCharacterSetRef
65 This is the type of a reference to immutable CFCharacterSets.
66 */
67 typedef const struct CF_BRIDGED_TYPE(NSCharacterSet) __CFCharacterSet * CFCharacterSetRef;
68
69 /*!
70 @typedef CFMutableCharacterSetRef
71 This is the type of a reference to mutable CFMutableCharacterSets.
72 */
73 typedef struct CF_BRIDGED_MUTABLE_TYPE(NSMutableCharacterSet) __CFCharacterSet * CFMutableCharacterSetRef;
74
75 /*!
76 @typedef CFCharacterSetPredefinedSet
77 Type of the predefined CFCharacterSet selector values.
78 */
79
80 typedef CF_ENUM(CFIndex, CFCharacterSetPredefinedSet) {
81 kCFCharacterSetControl = 1, /* Control character set (Unicode General Category Cc and Cf) */
82 kCFCharacterSetWhitespace, /* Whitespace character set (Unicode General Category Zs and U0009 CHARACTER TABULATION) */
83 kCFCharacterSetWhitespaceAndNewline, /* Whitespace and Newline character set (Unicode General Category Z*, U000A ~ U000D, and U0085) */
84 kCFCharacterSetDecimalDigit, /* Decimal digit character set */
85 kCFCharacterSetLetter, /* Letter character set (Unicode General Category L* & M*) */
86 kCFCharacterSetLowercaseLetter, /* Lowercase character set (Unicode General Category Ll) */
87 kCFCharacterSetUppercaseLetter, /* Uppercase character set (Unicode General Category Lu and Lt) */
88 kCFCharacterSetNonBase, /* Non-base character set (Unicode General Category M*) */
89 kCFCharacterSetDecomposable, /* Canonically decomposable character set */
90 kCFCharacterSetAlphaNumeric, /* Alpha Numeric character set (Unicode General Category L*, M*, & N*) */
91 kCFCharacterSetPunctuation, /* Punctuation character set (Unicode General Category P*) */
92 kCFCharacterSetCapitalizedLetter = 13, /* Titlecase character set (Unicode General Category Lt) */
93 kCFCharacterSetSymbol = 14, /* Symbol character set (Unicode General Category S*) */
94 kCFCharacterSetNewline CF_ENUM_AVAILABLE(10_5, 2_0) = 15, /* Newline character set (U000A ~ U000D, U0085, U2028, and U2029) */
95 kCFCharacterSetIllegal = 12/* Illegal character set */
96 };
97
98 /*!
99 @function CFCharacterSetGetTypeID
100 Returns the type identifier of all CFCharacterSet instances.
101 */
102 CF_EXPORT
103 CFTypeID CFCharacterSetGetTypeID(void);
104
105 /*!
106 @function CFCharacterSetGetPredefined
107 Returns a predefined CFCharacterSet instance.
108 @param theSetIdentifier The CFCharacterSetPredefinedSet selector
109 which specifies the predefined character set. If the
110 value is not in CFCharacterSetPredefinedSet, the behavior
111 is undefined.
112 @result A reference to the predefined immutable CFCharacterSet.
113 This instance is owned by CF.
114 */
115 CF_EXPORT
116 CFCharacterSetRef CFCharacterSetGetPredefined(CFCharacterSetPredefinedSet theSetIdentifier);
117
118 /*!
119 @function CFCharacterSetCreateWithCharactersInRange
120 Creates a new immutable character set with the values from the given range.
121 @param alloc The CFAllocator which should be used to allocate
122 memory for the array and its storage for values. This
123 parameter may be NULL in which case the current default
124 CFAllocator is used. If this reference is not a valid
125 CFAllocator, the behavior is undefined.
126 @param theRange The CFRange which should be used to specify the
127 Unicode range the character set is filled with. It
128 accepts the range in 32-bit in the UTF-32 format. The
129 valid character point range is from 0x00000 to 0x10FFFF.
130 If the range is outside of the valid Unicode character
131 point, the behavior is undefined.
132 @result A reference to the new immutable CFCharacterSet.
133 */
134 CF_EXPORT
135 CFCharacterSetRef CFCharacterSetCreateWithCharactersInRange(CFAllocatorRef alloc, CFRange theRange);
136
137 /*!
138 @function CFCharacterSetCreateWithCharactersInString
139 Creates a new immutable character set with the values in the given string.
140 @param alloc The CFAllocator which should be used to allocate
141 memory for the array and its storage for values. This
142 parameter may be NULL in which case the current default
143 CFAllocator is used. If this reference is not a valid
144 CFAllocator, the behavior is undefined.
145 @param theString The CFString which should be used to specify
146 the Unicode characters the character set is filled with.
147 If this parameter is not a valid CFString, the behavior
148 is undefined.
149 @result A reference to the new immutable CFCharacterSet.
150 */
151 CF_EXPORT
152 CFCharacterSetRef CFCharacterSetCreateWithCharactersInString(CFAllocatorRef alloc, CFStringRef theString);
153
154 /*!
155 @function CFCharacterSetCreateWithBitmapRepresentation
156 Creates a new immutable character set with the bitmap representtion in the given data.
157 @param alloc The CFAllocator which should be used to allocate
158 memory for the array and its storage for values. This
159 parameter may be NULL in which case the current default
160 CFAllocator is used. If this reference is not a valid
161 CFAllocator, the behavior is undefined.
162 @param theData The CFData which should be used to specify the
163 bitmap representation of the Unicode character points
164 the character set is filled with. The bitmap
165 representation could contain all the Unicode character
166 range starting from BMP to Plane 16. The first 8192 bytes
167 of the data represent the BMP range. The BMP range 8192
168 bytes can be followed by zero to sixteen 8192 byte
169 bitmaps, each one with the plane index byte prepended.
170 For example, the bitmap representing the BMP and Plane 2
171 has the size of 16385 bytes (8192 bytes for BMP, 1 byte
172 index + 8192 bytes bitmap for Plane 2). The plane index
173 byte, in this case, contains the integer value two. If
174 this parameter is not a valid CFData or it contains a
175 Plane index byte outside of the valid Plane range
176 (1 to 16), the behavior is undefined.
177 @result A reference to the new immutable CFCharacterSet.
178 */
179 CF_EXPORT
180 CFCharacterSetRef CFCharacterSetCreateWithBitmapRepresentation(CFAllocatorRef alloc, CFDataRef theData);
181
182 /*!
183 @function CFCharacterSetCreateInvertedSet
184 Creates a new immutable character set that is the invert of the specified character set.
185 @param alloc The CFAllocator which should be used to allocate
186 memory for the array and its storage for values. This
187 parameter may be NULL in which case the current default
188 CFAllocator is used. If this reference is not a valid
189 CFAllocator, the behavior is undefined.
190 @param theSet The CFCharacterSet which is to be inverted. If this
191 parameter is not a valid CFCharacterSet, the behavior is
192 undefined.
193 @result A reference to the new immutable CFCharacterSet.
194 */
195 CF_EXPORT CFCharacterSetRef CFCharacterSetCreateInvertedSet(CFAllocatorRef alloc, CFCharacterSetRef theSet);
196
197 /*!
198 @function CFCharacterSetIsSupersetOfSet
199 Reports whether or not the character set is a superset of the character set specified as the second parameter.
200 @param theSet The character set to be checked for the membership of theOtherSet.
201 If this parameter is not a valid CFCharacterSet, the behavior is undefined.
202 @param theOtherset The character set to be checked whether or not it is a subset of theSet.
203 If this parameter is not a valid CFCharacterSet, the behavior is undefined.
204 */
205 CF_EXPORT Boolean CFCharacterSetIsSupersetOfSet(CFCharacterSetRef theSet, CFCharacterSetRef theOtherset);
206
207 /*!
208 @function CFCharacterSetHasMemberInPlane
209 Reports whether or not the character set contains at least one member character in the specified plane.
210 @param theSet The character set to be checked for the membership. If this
211 parameter is not a valid CFCharacterSet, the behavior is undefined.
212 @param thePlane The plane number to be checked for the membership.
213 The valid value range is from 0 to 16. If the value is outside of the valid
214 plane number range, the behavior is undefined.
215 */
216 CF_EXPORT Boolean CFCharacterSetHasMemberInPlane(CFCharacterSetRef theSet, CFIndex thePlane);
217
218 /*!
219 @function CFCharacterSetCreateMutable
220 Creates a new empty mutable character set.
221 @param allocator The CFAllocator which should be used to allocate
222 memory for the array and its storage for values. This
223 parameter may be NULL in which case the current default
224 CFAllocator is used. If this reference is not a valid
225 CFAllocator, the behavior is undefined.
226 @result A reference to the new mutable CFCharacterSet.
227 */
228 CF_EXPORT
229 CFMutableCharacterSetRef CFCharacterSetCreateMutable(CFAllocatorRef alloc);
230
231 /*!
232 @function CFCharacterSetCreateCopy
233 Creates a new character set with the values from the given character set. This function tries to compact the backing store where applicable.
234 @param allocator The CFAllocator which should be used to allocate
235 memory for the array and its storage for values. This
236 parameter may be NULL in which case the current default
237 CFAllocator is used. If this reference is not a valid
238 CFAllocator, the behavior is undefined.
239 @param theSet The CFCharacterSet which is to be copied. If this
240 parameter is not a valid CFCharacterSet, the behavior is
241 undefined.
242 @result A reference to the new CFCharacterSet.
243 */
244 CF_EXPORT
245 CFCharacterSetRef CFCharacterSetCreateCopy(CFAllocatorRef alloc, CFCharacterSetRef theSet);
246
247 /*!
248 @function CFCharacterSetCreateMutableCopy
249 Creates a new mutable character set with the values from the given character set.
250 @param allocator The CFAllocator which should be used to allocate
251 memory for the array and its storage for values. This
252 parameter may be NULL in which case the current default
253 CFAllocator is used. If this reference is not a valid
254 CFAllocator, the behavior is undefined.
255 @param theSet The CFCharacterSet which is to be copied. If this
256 parameter is not a valid CFCharacterSet, the behavior is
257 undefined.
258 @result A reference to the new mutable CFCharacterSet.
259 */
260 CF_EXPORT
261 CFMutableCharacterSetRef CFCharacterSetCreateMutableCopy(CFAllocatorRef alloc, CFCharacterSetRef theSet);
262
263 /*!
264 @function CFCharacterSetIsCharacterMember
265 Reports whether or not the Unicode character is in the character set.
266 @param theSet The character set to be searched. If this parameter
267 is not a valid CFCharacterSet, the behavior is undefined.
268 @param theChar The Unicode character for which to test against the
269 character set. Note that this function takes 16-bit Unicode
270 character value; hence, it does not support access to the
271 non-BMP planes.
272 @result true, if the value is in the character set, otherwise false.
273 */
274 CF_EXPORT
275 Boolean CFCharacterSetIsCharacterMember(CFCharacterSetRef theSet, UniChar theChar);
276
277 /*!
278 @function CFCharacterSetIsLongCharacterMember
279 Reports whether or not the UTF-32 character is in the character set.
280 @param theSet The character set to be searched. If this parameter
281 is not a valid CFCharacterSet, the behavior is undefined.
282 @param theChar The UTF-32 character for which to test against the
283 character set.
284 @result true, if the value is in the character set, otherwise false.
285 */
286 CF_EXPORT Boolean CFCharacterSetIsLongCharacterMember(CFCharacterSetRef theSet, UTF32Char theChar);
287
288 /*!
289 @function CFCharacterSetCreateBitmapRepresentation
290 Creates a new immutable data with the bitmap representation from the given character set.
291 @param allocator The CFAllocator which should be used to allocate
292 memory for the array and its storage for values. This
293 parameter may be NULL in which case the current default
294 CFAllocator is used. If this reference is not a valid
295 CFAllocator, the behavior is undefined.
296 @param theSet The CFCharacterSet which is to be used create the
297 bitmap representation from. Refer to the comments for
298 CFCharacterSetCreateWithBitmapRepresentation for the
299 detailed discussion of the bitmap representation format.
300 If this parameter is not a valid CFCharacterSet, the
301 behavior is undefined.
302 @result A reference to the new immutable CFData.
303 */
304 CF_EXPORT
305 CFDataRef CFCharacterSetCreateBitmapRepresentation(CFAllocatorRef alloc, CFCharacterSetRef theSet);
306
307 /*!
308 @function CFCharacterSetAddCharactersInRange
309 Adds the given range to the charaacter set.
310 @param theSet The character set to which the range is to be added.
311 If this parameter is not a valid mutable CFCharacterSet,
312 the behavior is undefined.
313 @param theRange The range to add to the character set. It accepts
314 the range in 32-bit in the UTF-32 format. The valid
315 character point range is from 0x00000 to 0x10FFFF. If the
316 range is outside of the valid Unicode character point,
317 the behavior is undefined.
318 */
319 CF_EXPORT
320 void CFCharacterSetAddCharactersInRange(CFMutableCharacterSetRef theSet, CFRange theRange);
321
322 /*!
323 @function CFCharacterSetRemoveCharactersInRange
324 Removes the given range from the charaacter set.
325 @param theSet The character set from which the range is to be
326 removed. If this parameter is not a valid mutable
327 CFCharacterSet, the behavior is undefined.
328 @param theRange The range to remove from the character set.
329 It accepts the range in 32-bit in the UTF-32 format.
330 The valid character point range is from 0x00000 to 0x10FFFF.
331 If the range is outside of the valid Unicode character point,
332 the behavior is undefined.
333 */
334 CF_EXPORT
335 void CFCharacterSetRemoveCharactersInRange(CFMutableCharacterSetRef theSet, CFRange theRange);
336
337 /*!
338 @function CFCharacterSetAddCharactersInString
339 Adds the characters in the given string to the charaacter set.
340 @param theSet The character set to which the characters in the
341 string are to be added. If this parameter is not a
342 valid mutable CFCharacterSet, the behavior is undefined.
343 @param theString The string to add to the character set.
344 If this parameter is not a valid CFString, the behavior
345 is undefined.
346 */
347 CF_EXPORT
348 void CFCharacterSetAddCharactersInString(CFMutableCharacterSetRef theSet, CFStringRef theString);
349
350 /*!
351 @function CFCharacterSetRemoveCharactersInString
352 Removes the characters in the given string from the charaacter set.
353 @param theSet The character set from which the characters in the
354 string are to be remove. If this parameter is not a
355 valid mutable CFCharacterSet, the behavior is undefined.
356 @param theString The string to remove from the character set.
357 If this parameter is not a valid CFString, the behavior
358 is undefined.
359 */
360 CF_EXPORT
361 void CFCharacterSetRemoveCharactersInString(CFMutableCharacterSetRef theSet, CFStringRef theString);
362
363 /*!
364 @function CFCharacterSetUnion
365 Forms the union with the given character set.
366 @param theSet The destination character set into which the
367 union of the two character sets is stored. If this
368 parameter is not a valid mutable CFCharacterSet, the
369 behavior is undefined.
370 @param theOtherSet The character set with which the union is
371 formed. If this parameter is not a valid CFCharacterSet,
372 the behavior is undefined.
373 */
374 CF_EXPORT
375 void CFCharacterSetUnion(CFMutableCharacterSetRef theSet, CFCharacterSetRef theOtherSet);
376
377 /*!
378 @function CFCharacterSetIntersect
379 Forms the intersection with the given character set.
380 @param theSet The destination character set into which the
381 intersection of the two character sets is stored.
382 If this parameter is not a valid mutable CFCharacterSet,
383 the behavior is undefined.
384 @param theOtherSet The character set with which the intersection
385 is formed. If this parameter is not a valid CFCharacterSet,
386 the behavior is undefined.
387 */
388 CF_EXPORT
389 void CFCharacterSetIntersect(CFMutableCharacterSetRef theSet, CFCharacterSetRef theOtherSet);
390
391 /*!
392 @function CFCharacterSetInvert
393 Inverts the content of the given character set.
394 @param theSet The character set to be inverted.
395 If this parameter is not a valid mutable CFCharacterSet,
396 the behavior is undefined.
397 */
398 CF_EXPORT
399 void CFCharacterSetInvert(CFMutableCharacterSetRef theSet);
400
401 CF_EXTERN_C_END
402 CF_IMPLICIT_BRIDGING_DISABLED
403
404 #endif /* ! __COREFOUNDATION_CFCHARACTERSET__ */
405