]>
Commit | Line | Data |
---|---|---|
9ce05555 | 1 | /* |
e29e285d | 2 | * Copyright (c) 2015 Apple Inc. All rights reserved. |
9ce05555 A |
3 | * |
4 | * @APPLE_LICENSE_HEADER_START@ | |
d7384798 | 5 | * |
9ce05555 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. Please obtain a copy of the License at | |
10 | * http://www.opensource.apple.com/apsl/ and read it before using this | |
11 | * file. | |
d7384798 | 12 | * |
9ce05555 A |
13 | * The Original Code and all software distributed under the License are |
14 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
15 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
16 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
18 | * Please see the License for the specific language governing rights and | |
19 | * limitations under the License. | |
d7384798 | 20 | * |
9ce05555 A |
21 | * @APPLE_LICENSE_HEADER_END@ |
22 | */ | |
f64f9b69 | 23 | |
9ce05555 | 24 | /* CFCharacterSet.h |
d7384798 | 25 | Copyright (c) 1999-2014, Apple Inc. All rights reserved. |
9ce05555 A |
26 | */ |
27 | ||
28 | /*! | |
29 | @header CFCharacterSet | |
30 | CFCharacterSet represents a set, or a bag, of Unicode characters. | |
31 | The API consists of 3 groups: | |
32 | 1) creation/manipulation of CFCharacterSet instances, | |
33 | 2) query of a single Unicode character membership, | |
34 | and 3) bitmap representation related (reading/writing). | |
35 | Conceptually, CFCharacterSet is a 136K byte bitmap array of | |
36 | which each bit represents a Unicode code point. It could | |
37 | contain the Unicode characters in ISO 10646 Basic Multilingual | |
38 | Plane (BMP) and characters in Plane 1 through Plane 16 | |
39 | accessible via surrogate paris in the Unicode Transformation | |
40 | Format, 16-bit encoding form (UTF-16). In other words, it can | |
41 | store values from 0x00000 to 0x10FFFF in the Unicode | |
42 | Transformation Format, 32-bit encoding form (UTF-32). However, | |
43 | in general, how CFCharacterSet stores the information is an | |
44 | implementation detail. Note even CFData used for the external | |
45 | bitmap representation rarely has 136K byte. For detailed | |
46 | discussion of the external bitmap representation, refer to the | |
47 | comments for CFCharacterSetCreateWithBitmapRepresentation below. | |
48 | Note that the existance of non-BMP characters in a character set | |
49 | does not imply the membership of the corresponding surrogate | |
50 | characters. For example, a character set with U+10000 does not | |
51 | match with U+D800. | |
52 | */ | |
53 | ||
54 | #if !defined(__COREFOUNDATION_CFCHARACTERSET__) | |
55 | #define __COREFOUNDATION_CFCHARACTERSET__ 1 | |
56 | ||
57 | #include <CoreFoundation/CFBase.h> | |
58 | #include <CoreFoundation/CFData.h> | |
59 | ||
856091c5 | 60 | CF_IMPLICIT_BRIDGING_ENABLED |
bd5b749c | 61 | CF_EXTERN_C_BEGIN |
9ce05555 A |
62 | |
63 | /*! | |
64 | @typedef CFCharacterSetRef | |
65 | This is the type of a reference to immutable CFCharacterSets. | |
66 | */ | |
d7384798 | 67 | typedef const struct CF_BRIDGED_TYPE(NSCharacterSet) __CFCharacterSet * CFCharacterSetRef; |
9ce05555 A |
68 | |
69 | /*! | |
70 | @typedef CFMutableCharacterSetRef | |
71 | This is the type of a reference to mutable CFMutableCharacterSets. | |
72 | */ | |
d7384798 | 73 | typedef struct CF_BRIDGED_MUTABLE_TYPE(NSMutableCharacterSet) __CFCharacterSet * CFMutableCharacterSetRef; |
9ce05555 A |
74 | |
75 | /*! | |
76 | @typedef CFCharacterSetPredefinedSet | |
77 | Type of the predefined CFCharacterSet selector values. | |
78 | */ | |
bd5b749c | 79 | |
856091c5 | 80 | typedef CF_ENUM(CFIndex, CFCharacterSetPredefinedSet) { |
9ce05555 A |
81 | kCFCharacterSetControl = 1, /* Control character set (Unicode General Category Cc and Cf) */ |
82 | kCFCharacterSetWhitespace, /* Whitespace character set (Unicode General Category Zs and U0009 CHARACTER TABULATION) */ | |
83 | kCFCharacterSetWhitespaceAndNewline, /* Whitespace and Newline character set (Unicode General Category Z*, U000A ~ U000D, and U0085) */ | |
84 | kCFCharacterSetDecimalDigit, /* Decimal digit character set */ | |
85 | kCFCharacterSetLetter, /* Letter character set (Unicode General Category L* & M*) */ | |
86 | kCFCharacterSetLowercaseLetter, /* Lowercase character set (Unicode General Category Ll) */ | |
87 | kCFCharacterSetUppercaseLetter, /* Uppercase character set (Unicode General Category Lu and Lt) */ | |
88 | kCFCharacterSetNonBase, /* Non-base character set (Unicode General Category M*) */ | |
89 | kCFCharacterSetDecomposable, /* Canonically decomposable character set */ | |
90 | kCFCharacterSetAlphaNumeric, /* Alpha Numeric character set (Unicode General Category L*, M*, & N*) */ | |
91 | kCFCharacterSetPunctuation, /* Punctuation character set (Unicode General Category P*) */ | |
bd5b749c | 92 | kCFCharacterSetCapitalizedLetter = 13, /* Titlecase character set (Unicode General Category Lt) */ |
bd5b749c | 93 | kCFCharacterSetSymbol = 14, /* Symbol character set (Unicode General Category S*) */ |
856091c5 | 94 | kCFCharacterSetNewline CF_ENUM_AVAILABLE(10_5, 2_0) = 15, /* Newline character set (U000A ~ U000D, U0085, U2028, and U2029) */ |
bd5b749c A |
95 | kCFCharacterSetIllegal = 12/* Illegal character set */ |
96 | }; | |
9ce05555 A |
97 | |
98 | /*! | |
99 | @function CFCharacterSetGetTypeID | |
100 | Returns the type identifier of all CFCharacterSet instances. | |
101 | */ | |
102 | CF_EXPORT | |
103 | CFTypeID CFCharacterSetGetTypeID(void); | |
104 | ||
105 | /*! | |
106 | @function CFCharacterSetGetPredefined | |
107 | Returns a predefined CFCharacterSet instance. | |
108 | @param theSetIdentifier The CFCharacterSetPredefinedSet selector | |
109 | which specifies the predefined character set. If the | |
110 | value is not in CFCharacterSetPredefinedSet, the behavior | |
111 | is undefined. | |
112 | @result A reference to the predefined immutable CFCharacterSet. | |
113 | This instance is owned by CF. | |
114 | */ | |
115 | CF_EXPORT | |
116 | CFCharacterSetRef CFCharacterSetGetPredefined(CFCharacterSetPredefinedSet theSetIdentifier); | |
117 | ||
118 | /*! | |
119 | @function CFCharacterSetCreateWithCharactersInRange | |
120 | Creates a new immutable character set with the values from the given range. | |
121 | @param alloc The CFAllocator which should be used to allocate | |
122 | memory for the array and its storage for values. This | |
123 | parameter may be NULL in which case the current default | |
124 | CFAllocator is used. If this reference is not a valid | |
125 | CFAllocator, the behavior is undefined. | |
126 | @param theRange The CFRange which should be used to specify the | |
127 | Unicode range the character set is filled with. It | |
128 | accepts the range in 32-bit in the UTF-32 format. The | |
129 | valid character point range is from 0x00000 to 0x10FFFF. | |
130 | If the range is outside of the valid Unicode character | |
131 | point, the behavior is undefined. | |
132 | @result A reference to the new immutable CFCharacterSet. | |
133 | */ | |
134 | CF_EXPORT | |
135 | CFCharacterSetRef CFCharacterSetCreateWithCharactersInRange(CFAllocatorRef alloc, CFRange theRange); | |
136 | ||
137 | /*! | |
138 | @function CFCharacterSetCreateWithCharactersInString | |
139 | Creates a new immutable character set with the values in the given string. | |
140 | @param alloc The CFAllocator which should be used to allocate | |
141 | memory for the array and its storage for values. This | |
142 | parameter may be NULL in which case the current default | |
143 | CFAllocator is used. If this reference is not a valid | |
144 | CFAllocator, the behavior is undefined. | |
145 | @param theString The CFString which should be used to specify | |
146 | the Unicode characters the character set is filled with. | |
147 | If this parameter is not a valid CFString, the behavior | |
148 | is undefined. | |
149 | @result A reference to the new immutable CFCharacterSet. | |
150 | */ | |
151 | CF_EXPORT | |
152 | CFCharacterSetRef CFCharacterSetCreateWithCharactersInString(CFAllocatorRef alloc, CFStringRef theString); | |
153 | ||
154 | /*! | |
155 | @function CFCharacterSetCreateWithBitmapRepresentation | |
156 | Creates a new immutable character set with the bitmap representtion in the given data. | |
157 | @param alloc The CFAllocator which should be used to allocate | |
158 | memory for the array and its storage for values. This | |
159 | parameter may be NULL in which case the current default | |
160 | CFAllocator is used. If this reference is not a valid | |
161 | CFAllocator, the behavior is undefined. | |
162 | @param theData The CFData which should be used to specify the | |
163 | bitmap representation of the Unicode character points | |
164 | the character set is filled with. The bitmap | |
165 | representation could contain all the Unicode character | |
bd5b749c A |
166 | range starting from BMP to Plane 16. The first 8192 bytes |
167 | of the data represent the BMP range. The BMP range 8192 | |
168 | bytes can be followed by zero to sixteen 8192 byte | |
9ce05555 A |
169 | bitmaps, each one with the plane index byte prepended. |
170 | For example, the bitmap representing the BMP and Plane 2 | |
bd5b749c A |
171 | has the size of 16385 bytes (8192 bytes for BMP, 1 byte |
172 | index + 8192 bytes bitmap for Plane 2). The plane index | |
9ce05555 A |
173 | byte, in this case, contains the integer value two. If |
174 | this parameter is not a valid CFData or it contains a | |
175 | Plane index byte outside of the valid Plane range | |
176 | (1 to 16), the behavior is undefined. | |
177 | @result A reference to the new immutable CFCharacterSet. | |
178 | */ | |
179 | CF_EXPORT | |
180 | CFCharacterSetRef CFCharacterSetCreateWithBitmapRepresentation(CFAllocatorRef alloc, CFDataRef theData); | |
181 | ||
9ce05555 A |
182 | /*! |
183 | @function CFCharacterSetCreateInvertedSet | |
184 | Creates a new immutable character set that is the invert of the specified character set. | |
185 | @param alloc The CFAllocator which should be used to allocate | |
186 | memory for the array and its storage for values. This | |
187 | parameter may be NULL in which case the current default | |
188 | CFAllocator is used. If this reference is not a valid | |
189 | CFAllocator, the behavior is undefined. | |
190 | @param theSet The CFCharacterSet which is to be inverted. If this | |
191 | parameter is not a valid CFCharacterSet, the behavior is | |
192 | undefined. | |
193 | @result A reference to the new immutable CFCharacterSet. | |
194 | */ | |
195 | CF_EXPORT CFCharacterSetRef CFCharacterSetCreateInvertedSet(CFAllocatorRef alloc, CFCharacterSetRef theSet); | |
196 | ||
197 | /*! | |
198 | @function CFCharacterSetIsSupersetOfSet | |
199 | Reports whether or not the character set is a superset of the character set specified as the second parameter. | |
200 | @param theSet The character set to be checked for the membership of theOtherSet. | |
201 | If this parameter is not a valid CFCharacterSet, the behavior is undefined. | |
202 | @param theOtherset The character set to be checked whether or not it is a subset of theSet. | |
203 | If this parameter is not a valid CFCharacterSet, the behavior is undefined. | |
204 | */ | |
205 | CF_EXPORT Boolean CFCharacterSetIsSupersetOfSet(CFCharacterSetRef theSet, CFCharacterSetRef theOtherset); | |
206 | ||
207 | /*! | |
208 | @function CFCharacterSetHasMemberInPlane | |
209 | Reports whether or not the character set contains at least one member character in the specified plane. | |
210 | @param theSet The character set to be checked for the membership. If this | |
211 | parameter is not a valid CFCharacterSet, the behavior is undefined. | |
212 | @param thePlane The plane number to be checked for the membership. | |
213 | The valid value range is from 0 to 16. If the value is outside of the valid | |
214 | plane number range, the behavior is undefined. | |
215 | */ | |
216 | CF_EXPORT Boolean CFCharacterSetHasMemberInPlane(CFCharacterSetRef theSet, CFIndex thePlane); | |
9ce05555 A |
217 | |
218 | /*! | |
219 | @function CFCharacterSetCreateMutable | |
220 | Creates a new empty mutable character set. | |
221 | @param allocator The CFAllocator which should be used to allocate | |
222 | memory for the array and its storage for values. This | |
223 | parameter may be NULL in which case the current default | |
224 | CFAllocator is used. If this reference is not a valid | |
225 | CFAllocator, the behavior is undefined. | |
226 | @result A reference to the new mutable CFCharacterSet. | |
227 | */ | |
228 | CF_EXPORT | |
229 | CFMutableCharacterSetRef CFCharacterSetCreateMutable(CFAllocatorRef alloc); | |
230 | ||
9ce05555 A |
231 | /*! |
232 | @function CFCharacterSetCreateCopy | |
233 | Creates a new character set with the values from the given character set. This function tries to compact the backing store where applicable. | |
234 | @param allocator The CFAllocator which should be used to allocate | |
235 | memory for the array and its storage for values. This | |
236 | parameter may be NULL in which case the current default | |
237 | CFAllocator is used. If this reference is not a valid | |
238 | CFAllocator, the behavior is undefined. | |
239 | @param theSet The CFCharacterSet which is to be copied. If this | |
240 | parameter is not a valid CFCharacterSet, the behavior is | |
241 | undefined. | |
242 | @result A reference to the new CFCharacterSet. | |
243 | */ | |
244 | CF_EXPORT | |
8ca704e1 | 245 | CFCharacterSetRef CFCharacterSetCreateCopy(CFAllocatorRef alloc, CFCharacterSetRef theSet); |
9ce05555 A |
246 | |
247 | /*! | |
248 | @function CFCharacterSetCreateMutableCopy | |
249 | Creates a new mutable character set with the values from the given character set. | |
250 | @param allocator The CFAllocator which should be used to allocate | |
251 | memory for the array and its storage for values. This | |
252 | parameter may be NULL in which case the current default | |
253 | CFAllocator is used. If this reference is not a valid | |
254 | CFAllocator, the behavior is undefined. | |
255 | @param theSet The CFCharacterSet which is to be copied. If this | |
256 | parameter is not a valid CFCharacterSet, the behavior is | |
257 | undefined. | |
258 | @result A reference to the new mutable CFCharacterSet. | |
259 | */ | |
260 | CF_EXPORT | |
261 | CFMutableCharacterSetRef CFCharacterSetCreateMutableCopy(CFAllocatorRef alloc, CFCharacterSetRef theSet); | |
262 | ||
263 | /*! | |
264 | @function CFCharacterSetIsCharacterMember | |
265 | Reports whether or not the Unicode character is in the character set. | |
266 | @param theSet The character set to be searched. If this parameter | |
267 | is not a valid CFCharacterSet, the behavior is undefined. | |
268 | @param theChar The Unicode character for which to test against the | |
269 | character set. Note that this function takes 16-bit Unicode | |
270 | character value; hence, it does not support access to the | |
271 | non-BMP planes. | |
272 | @result true, if the value is in the character set, otherwise false. | |
273 | */ | |
274 | CF_EXPORT | |
275 | Boolean CFCharacterSetIsCharacterMember(CFCharacterSetRef theSet, UniChar theChar); | |
276 | ||
9ce05555 A |
277 | /*! |
278 | @function CFCharacterSetIsLongCharacterMember | |
279 | Reports whether or not the UTF-32 character is in the character set. | |
280 | @param theSet The character set to be searched. If this parameter | |
281 | is not a valid CFCharacterSet, the behavior is undefined. | |
282 | @param theChar The UTF-32 character for which to test against the | |
283 | character set. | |
284 | @result true, if the value is in the character set, otherwise false. | |
285 | */ | |
286 | CF_EXPORT Boolean CFCharacterSetIsLongCharacterMember(CFCharacterSetRef theSet, UTF32Char theChar); | |
9ce05555 A |
287 | |
288 | /*! | |
289 | @function CFCharacterSetCreateBitmapRepresentation | |
290 | Creates a new immutable data with the bitmap representation from the given character set. | |
291 | @param allocator The CFAllocator which should be used to allocate | |
292 | memory for the array and its storage for values. This | |
293 | parameter may be NULL in which case the current default | |
294 | CFAllocator is used. If this reference is not a valid | |
295 | CFAllocator, the behavior is undefined. | |
296 | @param theSet The CFCharacterSet which is to be used create the | |
297 | bitmap representation from. Refer to the comments for | |
298 | CFCharacterSetCreateWithBitmapRepresentation for the | |
299 | detailed discussion of the bitmap representation format. | |
300 | If this parameter is not a valid CFCharacterSet, the | |
301 | behavior is undefined. | |
302 | @result A reference to the new immutable CFData. | |
303 | */ | |
304 | CF_EXPORT | |
305 | CFDataRef CFCharacterSetCreateBitmapRepresentation(CFAllocatorRef alloc, CFCharacterSetRef theSet); | |
306 | ||
307 | /*! | |
308 | @function CFCharacterSetAddCharactersInRange | |
309 | Adds the given range to the charaacter set. | |
310 | @param theSet The character set to which the range is to be added. | |
311 | If this parameter is not a valid mutable CFCharacterSet, | |
312 | the behavior is undefined. | |
313 | @param theRange The range to add to the character set. It accepts | |
314 | the range in 32-bit in the UTF-32 format. The valid | |
315 | character point range is from 0x00000 to 0x10FFFF. If the | |
316 | range is outside of the valid Unicode character point, | |
317 | the behavior is undefined. | |
318 | */ | |
319 | CF_EXPORT | |
320 | void CFCharacterSetAddCharactersInRange(CFMutableCharacterSetRef theSet, CFRange theRange); | |
321 | ||
322 | /*! | |
323 | @function CFCharacterSetRemoveCharactersInRange | |
324 | Removes the given range from the charaacter set. | |
325 | @param theSet The character set from which the range is to be | |
326 | removed. If this parameter is not a valid mutable | |
327 | CFCharacterSet, the behavior is undefined. | |
328 | @param theRange The range to remove from the character set. | |
329 | It accepts the range in 32-bit in the UTF-32 format. | |
330 | The valid character point range is from 0x00000 to 0x10FFFF. | |
331 | If the range is outside of the valid Unicode character point, | |
332 | the behavior is undefined. | |
333 | */ | |
334 | CF_EXPORT | |
335 | void CFCharacterSetRemoveCharactersInRange(CFMutableCharacterSetRef theSet, CFRange theRange); | |
336 | ||
337 | /*! | |
338 | @function CFCharacterSetAddCharactersInString | |
339 | Adds the characters in the given string to the charaacter set. | |
340 | @param theSet The character set to which the characters in the | |
341 | string are to be added. If this parameter is not a | |
342 | valid mutable CFCharacterSet, the behavior is undefined. | |
343 | @param theString The string to add to the character set. | |
344 | If this parameter is not a valid CFString, the behavior | |
345 | is undefined. | |
346 | */ | |
347 | CF_EXPORT | |
348 | void CFCharacterSetAddCharactersInString(CFMutableCharacterSetRef theSet, CFStringRef theString); | |
349 | ||
350 | /*! | |
351 | @function CFCharacterSetRemoveCharactersInString | |
352 | Removes the characters in the given string from the charaacter set. | |
353 | @param theSet The character set from which the characters in the | |
354 | string are to be remove. If this parameter is not a | |
355 | valid mutable CFCharacterSet, the behavior is undefined. | |
356 | @param theString The string to remove from the character set. | |
357 | If this parameter is not a valid CFString, the behavior | |
358 | is undefined. | |
359 | */ | |
360 | CF_EXPORT | |
361 | void CFCharacterSetRemoveCharactersInString(CFMutableCharacterSetRef theSet, CFStringRef theString); | |
362 | ||
363 | /*! | |
364 | @function CFCharacterSetUnion | |
365 | Forms the union with the given character set. | |
366 | @param theSet The destination character set into which the | |
367 | union of the two character sets is stored. If this | |
368 | parameter is not a valid mutable CFCharacterSet, the | |
369 | behavior is undefined. | |
370 | @param theOtherSet The character set with which the union is | |
371 | formed. If this parameter is not a valid CFCharacterSet, | |
372 | the behavior is undefined. | |
373 | */ | |
374 | CF_EXPORT | |
375 | void CFCharacterSetUnion(CFMutableCharacterSetRef theSet, CFCharacterSetRef theOtherSet); | |
376 | ||
377 | /*! | |
378 | @function CFCharacterSetIntersect | |
379 | Forms the intersection with the given character set. | |
380 | @param theSet The destination character set into which the | |
381 | intersection of the two character sets is stored. | |
382 | If this parameter is not a valid mutable CFCharacterSet, | |
383 | the behavior is undefined. | |
384 | @param theOtherSet The character set with which the intersection | |
385 | is formed. If this parameter is not a valid CFCharacterSet, | |
386 | the behavior is undefined. | |
387 | */ | |
388 | CF_EXPORT | |
389 | void CFCharacterSetIntersect(CFMutableCharacterSetRef theSet, CFCharacterSetRef theOtherSet); | |
390 | ||
391 | /*! | |
392 | @function CFCharacterSetInvert | |
393 | Inverts the content of the given character set. | |
394 | @param theSet The character set to be inverted. | |
395 | If this parameter is not a valid mutable CFCharacterSet, | |
396 | the behavior is undefined. | |
397 | */ | |
398 | CF_EXPORT | |
399 | void CFCharacterSetInvert(CFMutableCharacterSetRef theSet); | |
400 | ||
bd5b749c | 401 | CF_EXTERN_C_END |
856091c5 | 402 | CF_IMPLICIT_BRIDGING_DISABLED |
9ce05555 | 403 | |
bd5b749c | 404 | #endif /* ! __COREFOUNDATION_CFCHARACTERSET__ */ |
9ce05555 | 405 |