]> git.saurik.com Git - apple/cf.git/blob - CFCharacterSet.c
9b4ab0b60dcf3e7087d416038b4acd9cc922753c
[apple/cf.git] / CFCharacterSet.c
1 /*
2 * Copyright (c) 2014 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /* CFCharacterSet.c
25 Copyright (c) 1999-2013, Apple Inc. All rights reserved.
26 Responsibility: Aki Inoue
27 */
28
29 #include <CoreFoundation/CFCharacterSet.h>
30 #include <CoreFoundation/CFByteOrder.h>
31 #include "CFCharacterSetPriv.h"
32 #include <CoreFoundation/CFData.h>
33 #include <CoreFoundation/CFString.h>
34 #include "CFInternal.h"
35 #include <CoreFoundation/CFUniChar.h>
36 #include "CFUniCharPriv.h"
37 #include <stdlib.h>
38 #include <string.h>
39
40
41 #define BITSPERBYTE 8 /* (CHAR_BIT * sizeof(unsigned char)) */
42 #define LOG_BPB 3
43 #define LOG_BPLW 5
44 #define NUMCHARACTERS 65536
45
46 #define MAX_ANNEX_PLANE (16)
47
48 /* Number of things in the array keeping the bits.
49 */
50 #define __kCFBitmapSize (NUMCHARACTERS / BITSPERBYTE)
51
52 /* How many elements max can be in an __kCFCharSetClassString CFCharacterSet
53 */
54 #define __kCFStringCharSetMax 64
55
56 /* The last builtin set ID number
57 */
58 #define __kCFLastBuiltinSetID kCFCharacterSetNewline
59
60 /* How many elements in the "singles" array before we use binary search.
61 */
62 #define __kCFSetBreakeven 10
63
64 /* This tells us, within 1k or so, whether a thing is POTENTIALLY in the set (in the bitmap blob of the private structure) before we bother to do specific checking.
65 */
66 #define __CFCSetBitsInRange(n, i) (i[n>>15] & (1L << ((n>>10) % 32)))
67
68 /* Compact bitmap params
69 */
70 #define __kCFCompactBitmapNumPages (256)
71
72 #define __kCFCompactBitmapMaxPages (128) // the max pages allocated
73
74 #define __kCFCompactBitmapPageSize (__kCFBitmapSize / __kCFCompactBitmapNumPages)
75
76 typedef struct {
77 CFCharacterSetRef *_nonBMPPlanes;
78 unsigned int _validEntriesBitmap;
79 unsigned char _numOfAllocEntries;
80 unsigned char _isAnnexInverted;
81 uint16_t _padding;
82 } CFCharSetAnnexStruct;
83
84 struct __CFCharacterSet {
85 CFRuntimeBase _base;
86 CFHashCode _hashValue;
87 union {
88 struct {
89 CFIndex _type;
90 } _builtin;
91 struct {
92 UInt32 _firstChar;
93 CFIndex _length;
94 } _range;
95 struct {
96 UniChar *_buffer;
97 CFIndex _length;
98 } _string;
99 struct {
100 uint8_t *_bits;
101 } _bitmap;
102 struct {
103 uint8_t *_cBits;
104 } _compactBitmap;
105 } _variants;
106 CFCharSetAnnexStruct *_annex;
107 };
108
109 /* _base._info values interesting for CFCharacterSet
110 */
111 enum {
112 __kCFCharSetClassTypeMask = 0x0070,
113 __kCFCharSetClassBuiltin = 0x0000,
114 __kCFCharSetClassRange = 0x0010,
115 __kCFCharSetClassString = 0x0020,
116 __kCFCharSetClassBitmap = 0x0030,
117 __kCFCharSetClassSet = 0x0040,
118 __kCFCharSetClassCompactBitmap = 0x0040,
119
120 __kCFCharSetIsInvertedMask = 0x0008,
121 __kCFCharSetIsInverted = 0x0008,
122
123 __kCFCharSetHasHashValueMask = 0x00004,
124 __kCFCharSetHasHashValue = 0x0004,
125
126 /* Generic CFBase values */
127 __kCFCharSetIsMutableMask = 0x0001,
128 __kCFCharSetIsMutable = 0x0001,
129 };
130
131 /* Inline accessor macros for _base._info
132 */
133 CF_INLINE Boolean __CFCSetIsMutable(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetIsMutableMask) == __kCFCharSetIsMutable;}
134 CF_INLINE Boolean __CFCSetIsBuiltin(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetClassTypeMask) == __kCFCharSetClassBuiltin;}
135 CF_INLINE Boolean __CFCSetIsRange(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetClassTypeMask) == __kCFCharSetClassRange;}
136 CF_INLINE Boolean __CFCSetIsString(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetClassTypeMask) == __kCFCharSetClassString;}
137 CF_INLINE Boolean __CFCSetIsBitmap(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetClassTypeMask) == __kCFCharSetClassBitmap;}
138 CF_INLINE Boolean __CFCSetIsCompactBitmap(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetClassTypeMask) == __kCFCharSetClassCompactBitmap;}
139 CF_INLINE Boolean __CFCSetIsInverted(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetIsInvertedMask) == __kCFCharSetIsInverted;}
140 CF_INLINE Boolean __CFCSetHasHashValue(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetHasHashValueMask) == __kCFCharSetHasHashValue;}
141 CF_INLINE UInt32 __CFCSetClassType(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetClassTypeMask);}
142
143 CF_INLINE void __CFCSetPutIsMutable(CFMutableCharacterSetRef cset, Boolean isMutable) {(isMutable ? (cset->_base._cfinfo[CF_INFO_BITS] |= __kCFCharSetIsMutable) : (cset->_base._cfinfo[CF_INFO_BITS] &= ~ __kCFCharSetIsMutable));}
144 CF_INLINE void __CFCSetPutIsInverted(CFMutableCharacterSetRef cset, Boolean isInverted) {(isInverted ? (cset->_base._cfinfo[CF_INFO_BITS] |= __kCFCharSetIsInverted) : (cset->_base._cfinfo[CF_INFO_BITS] &= ~__kCFCharSetIsInverted));}
145 CF_INLINE void __CFCSetPutHasHashValue(CFMutableCharacterSetRef cset, Boolean hasHash) {(hasHash ? (cset->_base._cfinfo[CF_INFO_BITS] |= __kCFCharSetHasHashValue) : (cset->_base._cfinfo[CF_INFO_BITS] &= ~__kCFCharSetHasHashValue));}
146 CF_INLINE void __CFCSetPutClassType(CFMutableCharacterSetRef cset, UInt32 classType) {cset->_base._cfinfo[CF_INFO_BITS] &= ~__kCFCharSetClassTypeMask; cset->_base._cfinfo[CF_INFO_BITS] |= classType;}
147
148 CF_PRIVATE Boolean __CFCharacterSetIsMutable(CFCharacterSetRef cset) {return __CFCSetIsMutable(cset);}
149
150 /* Inline contents accessor macros
151 */
152 CF_INLINE CFCharacterSetPredefinedSet __CFCSetBuiltinType(CFCharacterSetRef cset) {return cset->_variants._builtin._type;}
153 CF_INLINE UInt32 __CFCSetRangeFirstChar(CFCharacterSetRef cset) {return cset->_variants._range._firstChar;}
154 CF_INLINE CFIndex __CFCSetRangeLength(CFCharacterSetRef cset) {return cset->_variants._range._length;}
155 CF_INLINE UniChar *__CFCSetStringBuffer(CFCharacterSetRef cset) {return (UniChar*)(cset->_variants._string._buffer);}
156 CF_INLINE CFIndex __CFCSetStringLength(CFCharacterSetRef cset) {return cset->_variants._string._length;}
157 CF_INLINE uint8_t *__CFCSetBitmapBits(CFCharacterSetRef cset) {return cset->_variants._bitmap._bits;}
158 CF_INLINE uint8_t *__CFCSetCompactBitmapBits(CFCharacterSetRef cset) {return cset->_variants._compactBitmap._cBits;}
159
160 CF_INLINE void __CFCSetPutBuiltinType(CFMutableCharacterSetRef cset, CFCharacterSetPredefinedSet type) {cset->_variants._builtin._type = type;}
161 CF_INLINE void __CFCSetPutRangeFirstChar(CFMutableCharacterSetRef cset, UInt32 first) {cset->_variants._range._firstChar = first;}
162 CF_INLINE void __CFCSetPutRangeLength(CFMutableCharacterSetRef cset, CFIndex length) {cset->_variants._range._length = length;}
163 CF_INLINE void __CFCSetPutStringBuffer(CFMutableCharacterSetRef cset, UniChar *theBuffer) {cset->_variants._string._buffer = theBuffer;}
164 CF_INLINE void __CFCSetPutStringLength(CFMutableCharacterSetRef cset, CFIndex length) {cset->_variants._string._length = length;}
165 CF_INLINE void __CFCSetPutBitmapBits(CFMutableCharacterSetRef cset, uint8_t *bits) {cset->_variants._bitmap._bits = bits;}
166 CF_INLINE void __CFCSetPutCompactBitmapBits(CFMutableCharacterSetRef cset, uint8_t *bits) {cset->_variants._compactBitmap._cBits = bits;}
167
168 /* Validation funcs
169 */
170 #if defined(CF_ENABLE_ASSERTIONS)
171 CF_INLINE void __CFCSetValidateBuiltinType(CFCharacterSetPredefinedSet type, const char *func) {
172 CFAssert2(type > 0 && type <= __kCFLastBuiltinSetID, __kCFLogAssertion, "%s: Unknowen builtin type %d", func, type);
173 }
174 CF_INLINE void __CFCSetValidateRange(CFRange theRange, const char *func) {
175 CFAssert3(theRange.location >= 0 && theRange.location + theRange.length <= 0x1FFFFF, __kCFLogAssertion, "%s: Range out of Unicode range (location -> %d length -> %d)", func, theRange.location, theRange.length);
176 }
177 CF_INLINE void __CFCSetValidateTypeAndMutability(CFCharacterSetRef cset, const char *func) {
178 __CFGenericValidateType(cset, __kCFCharacterSetTypeID);
179 CFAssert1(__CFCSetIsMutable(cset), __kCFLogAssertion, "%s: Immutable character set passed to mutable function", func);
180 }
181 #else
182 #define __CFCSetValidateBuiltinType(t,f)
183 #define __CFCSetValidateRange(r,f)
184 #define __CFCSetValidateTypeAndMutability(r,f)
185 #endif
186
187 /* Inline utility funcs
188 */
189 static Boolean __CFCSetIsEqualBitmap(const UInt32 *bits1, const UInt32 *bits2) {
190 CFIndex length = __kCFBitmapSize / sizeof(UInt32);
191
192 if (bits1 == bits2) {
193 return true;
194 } else if (bits1 && bits2) {
195 if (bits1 == (const UInt32 *)-1) {
196 while (length--) if ((UInt32)-1 != *bits2++) return false;
197 } else if (bits2 == (const UInt32 *)-1) {
198 while (length--) if ((UInt32)-1 != *bits1++) return false;
199 } else {
200 while (length--) if (*bits1++ != *bits2++) return false;
201 }
202 return true;
203 } else if (!bits1 && !bits2) { // empty set
204 return true;
205 } else {
206 if (bits2) bits1 = bits2;
207 if (bits1 == (const UInt32 *)-1) return false;
208 while (length--) if (*bits1++) return false;
209 return true;
210 }
211 }
212
213 CF_INLINE Boolean __CFCSetIsEqualBitmapInverted(const UInt32 *bits1, const UInt32 *bits2) {
214 CFIndex length = __kCFBitmapSize / sizeof(UInt32);
215
216 while (length--) if (*bits1++ != ~(*(bits2++))) return false;
217 return true;
218 }
219
220 static Boolean __CFCSetIsBitmapEqualToRange(const UInt32 *bits, UniChar firstChar, UniChar lastChar, Boolean isInverted) {
221 CFIndex firstCharIndex = firstChar >> LOG_BPB;
222 CFIndex lastCharIndex = lastChar >> LOG_BPB;
223 CFIndex length;
224 UInt32 value;
225
226 if (firstCharIndex == lastCharIndex) {
227 value = ((((UInt32)0xFF) << (firstChar & (BITSPERBYTE - 1))) & (((UInt32)0xFF) >> ((BITSPERBYTE - 1) - (lastChar & (BITSPERBYTE - 1))))) << (((sizeof(UInt32) - 1) - (firstCharIndex % sizeof(UInt32))) * BITSPERBYTE);
228 value = CFSwapInt32HostToBig(value);
229 firstCharIndex = lastCharIndex = firstChar >> LOG_BPLW;
230 if (*(bits + firstCharIndex) != (isInverted ? ~value : value)) return FALSE;
231 } else {
232 UInt32 firstCharMask;
233 UInt32 lastCharMask;
234
235 length = firstCharIndex % sizeof(UInt32);
236 firstCharMask = (((((UInt32)0xFF) << (firstChar & (BITSPERBYTE - 1))) & 0xFF) << (((sizeof(UInt32) - 1) - length) * BITSPERBYTE)) | (((UInt32)0xFFFFFFFF) >> ((length + 1) * BITSPERBYTE));
237
238 length = lastCharIndex % sizeof(UInt32);
239 lastCharMask = ((((UInt32)0xFF) >> ((BITSPERBYTE - 1) - (lastChar & (BITSPERBYTE - 1)))) << (((sizeof(UInt32) - 1) - length) * BITSPERBYTE)) | (((UInt32)0xFFFFFFFF) << ((sizeof(UInt32) - length) * BITSPERBYTE));
240
241 firstCharIndex = firstChar >> LOG_BPLW;
242 lastCharIndex = lastChar >> LOG_BPLW;
243
244 if (firstCharIndex == lastCharIndex) {
245 firstCharMask &= lastCharMask;
246 value = CFSwapInt32HostToBig(firstCharMask & lastCharMask);
247 if (*(bits + firstCharIndex) != (isInverted ? ~value : value)) return FALSE;
248 } else {
249 value = CFSwapInt32HostToBig(firstCharMask);
250 if (*(bits + firstCharIndex) != (isInverted ? ~value : value)) return FALSE;
251
252 value = CFSwapInt32HostToBig(lastCharMask);
253 if (*(bits + lastCharIndex) != (isInverted ? ~value : value)) return FALSE;
254 }
255 }
256
257 length = firstCharIndex;
258 value = (isInverted ? ((UInt32)0xFFFFFFFF) : 0);
259 while (length--) {
260 if (*(bits++) != value) return FALSE;
261 }
262
263 ++bits; // Skip firstCharIndex
264 length = (lastCharIndex - (firstCharIndex + 1));
265 value = (isInverted ? 0 : ((UInt32)0xFFFFFFFF));
266 while (length-- > 0) {
267 if (*(bits++) != value) return FALSE;
268 }
269 if (firstCharIndex != lastCharIndex) ++bits;
270
271 length = (0xFFFF >> LOG_BPLW) - lastCharIndex;
272 value = (isInverted ? ((UInt32)0xFFFFFFFF) : 0);
273 while (length--) {
274 if (*(bits++) != value) return FALSE;
275 }
276
277 return TRUE;
278 }
279
280 CF_INLINE Boolean __CFCSetIsBitmapSupersetOfBitmap(const UInt32 *bits1, const UInt32 *bits2, Boolean isInverted1, Boolean isInverted2) {
281 CFIndex length = __kCFBitmapSize / sizeof(UInt32);
282 UInt32 val1, val2;
283
284 while (length--) {
285 val2 = (isInverted2 ? ~(*(bits2++)) : *(bits2++));
286 val1 = (isInverted1 ? ~(*(bits1++)) : *(bits1++)) & val2;
287 if (val1 != val2) return false;
288 }
289
290 return true;
291 }
292
293 CF_INLINE Boolean __CFCSetHasNonBMPPlane(CFCharacterSetRef cset) { return ((cset)->_annex && (cset)->_annex->_validEntriesBitmap ? true : false); }
294 CF_INLINE Boolean __CFCSetAnnexIsInverted (CFCharacterSetRef cset) { return ((cset)->_annex && (cset)->_annex->_isAnnexInverted ? true : false); }
295 CF_INLINE UInt32 __CFCSetAnnexValidEntriesBitmap(CFCharacterSetRef cset) { return ((cset)->_annex ? (cset)->_annex->_validEntriesBitmap : 0); }
296
297 CF_INLINE Boolean __CFCSetIsEmpty(CFCharacterSetRef cset) {
298 if (__CFCSetHasNonBMPPlane(cset) || __CFCSetAnnexIsInverted(cset)) return false;
299
300 switch (__CFCSetClassType(cset)) {
301 case __kCFCharSetClassRange: if (!__CFCSetRangeLength(cset)) return true; break;
302 case __kCFCharSetClassString: if (!__CFCSetStringLength(cset)) return true; break;
303 case __kCFCharSetClassBitmap: if (!__CFCSetBitmapBits(cset)) return true; break;
304 case __kCFCharSetClassCompactBitmap: if (!__CFCSetCompactBitmapBits(cset)) return true; break;
305 }
306 return false;
307 }
308
309 CF_INLINE void __CFCSetBitmapAddCharacter(uint8_t *bitmap, UniChar theChar) {
310 bitmap[(theChar) >> LOG_BPB] |= (((unsigned)1) << (theChar & (BITSPERBYTE - 1)));
311 }
312
313 CF_INLINE void __CFCSetBitmapRemoveCharacter(uint8_t *bitmap, UniChar theChar) {
314 bitmap[(theChar) >> LOG_BPB] &= ~(((unsigned)1) << (theChar & (BITSPERBYTE - 1)));
315 }
316
317 CF_INLINE Boolean __CFCSetIsMemberBitmap(const uint8_t *bitmap, UniChar theChar) {
318 return ((bitmap[(theChar) >> LOG_BPB] & (((unsigned)1) << (theChar & (BITSPERBYTE - 1)))) ? true : false);
319 }
320
321 #define NUM_32BIT_SLOTS (NUMCHARACTERS / 32)
322
323 CF_INLINE void __CFCSetBitmapFastFillWithValue(UInt32 *bitmap, uint8_t value) {
324 UInt32 mask = (value << 24) | (value << 16) | (value << 8) | value;
325 UInt32 numSlots = NUMCHARACTERS / 32;
326
327 while (numSlots--) *(bitmap++) = mask;
328 }
329
330 CF_INLINE void __CFCSetBitmapAddCharactersInRange(uint8_t *bitmap, UniChar firstChar, UniChar lastChar) {
331 if (firstChar == lastChar) {
332 bitmap[firstChar >> LOG_BPB] |= (((unsigned)1) << (firstChar & (BITSPERBYTE - 1)));
333 } else {
334 UInt32 idx = firstChar >> LOG_BPB;
335 UInt32 max = lastChar >> LOG_BPB;
336
337 if (idx == max) {
338 bitmap[idx] |= (((unsigned)0xFF) << (firstChar & (BITSPERBYTE - 1))) & (((unsigned)0xFF) >> ((BITSPERBYTE - 1) - (lastChar & (BITSPERBYTE - 1))));
339 } else {
340 bitmap[idx] |= (((unsigned)0xFF) << (firstChar & (BITSPERBYTE - 1)));
341 bitmap[max] |= (((unsigned)0xFF) >> ((BITSPERBYTE - 1) - (lastChar & (BITSPERBYTE - 1))));
342
343 ++idx;
344 while (idx < max) bitmap[idx++] = 0xFF;
345 }
346 }
347 }
348
349 CF_INLINE void __CFCSetBitmapRemoveCharactersInRange(uint8_t *bitmap, UniChar firstChar, UniChar lastChar) {
350 UInt32 idx = firstChar >> LOG_BPB;
351 UInt32 max = lastChar >> LOG_BPB;
352
353 if (idx == max) {
354 bitmap[idx] &= ~((((unsigned)0xFF) << (firstChar & (BITSPERBYTE - 1))) & (((unsigned)0xFF) >> ((BITSPERBYTE - 1) - (lastChar & (BITSPERBYTE - 1)))));
355 } else {
356 bitmap[idx] &= ~(((unsigned)0xFF) << (firstChar & (BITSPERBYTE - 1)));
357 bitmap[max] &= ~(((unsigned)0xFF) >> ((BITSPERBYTE - 1) - (lastChar & (BITSPERBYTE - 1))));
358
359 ++idx;
360 while (idx < max) bitmap[idx++] = 0;
361 }
362 }
363
364 #define __CFCSetAnnexBitmapSetPlane(bitmap,plane) ((bitmap) |= (1 << (plane)))
365 #define __CFCSetAnnexBitmapClearPlane(bitmap,plane) ((bitmap) &= (~(1 << (plane))))
366 #define __CFCSetAnnexBitmapGetPlane(bitmap,plane) ((bitmap) & (1 << (plane)))
367
368 CF_INLINE void __CFCSetAllocateAnnexForPlane(CFCharacterSetRef cset, int plane) {
369 if (cset->_annex == NULL) {
370 ((CFMutableCharacterSetRef)cset)->_annex = (CFCharSetAnnexStruct *)CFAllocatorAllocate(CFGetAllocator(cset), sizeof(CFCharSetAnnexStruct), 0);
371 cset->_annex->_numOfAllocEntries = plane;
372 cset->_annex->_isAnnexInverted = false;
373 cset->_annex->_validEntriesBitmap = 0;
374 cset->_annex->_nonBMPPlanes = ((plane > 0) ? (CFCharacterSetRef*)CFAllocatorAllocate(CFGetAllocator(cset), sizeof(CFCharacterSetRef) * plane, 0) : NULL);
375 } else if (cset->_annex->_numOfAllocEntries < plane) {
376 cset->_annex->_numOfAllocEntries = plane;
377 if (NULL == cset->_annex->_nonBMPPlanes) {
378 cset->_annex->_nonBMPPlanes = (CFCharacterSetRef*)CFAllocatorAllocate(CFGetAllocator(cset), sizeof(CFCharacterSetRef) * plane, 0);
379 } else {
380 cset->_annex->_nonBMPPlanes = (CFCharacterSetRef*)CFAllocatorReallocate(CFGetAllocator(cset), (void *)cset->_annex->_nonBMPPlanes, sizeof(CFCharacterSetRef) * plane, 0);
381 }
382 }
383 }
384
385 CF_INLINE void __CFCSetAnnexSetIsInverted(CFCharacterSetRef cset, Boolean flag) {
386 if (flag) __CFCSetAllocateAnnexForPlane(cset, 0);
387 if (cset->_annex) ((CFMutableCharacterSetRef)cset)->_annex->_isAnnexInverted = flag;
388 }
389
390 CF_INLINE void __CFCSetPutCharacterSetToAnnexPlane(CFCharacterSetRef cset, CFCharacterSetRef annexCSet, int plane) {
391 __CFCSetAllocateAnnexForPlane(cset, plane);
392 if (__CFCSetAnnexBitmapGetPlane(cset->_annex->_validEntriesBitmap, plane)) CFRelease(cset->_annex->_nonBMPPlanes[plane - 1]);
393 if (annexCSet) {
394 cset->_annex->_nonBMPPlanes[plane - 1] = (CFCharacterSetRef)CFRetain(annexCSet);
395 __CFCSetAnnexBitmapSetPlane(cset->_annex->_validEntriesBitmap, plane);
396 } else {
397 __CFCSetAnnexBitmapClearPlane(cset->_annex->_validEntriesBitmap, plane);
398 }
399 }
400
401 CF_INLINE CFCharacterSetRef __CFCSetGetAnnexPlaneCharacterSet(CFCharacterSetRef cset, int plane) {
402 __CFCSetAllocateAnnexForPlane(cset, plane);
403 if (!__CFCSetAnnexBitmapGetPlane(cset->_annex->_validEntriesBitmap, plane)) {
404 cset->_annex->_nonBMPPlanes[plane - 1] = (CFCharacterSetRef)CFCharacterSetCreateMutable(CFGetAllocator(cset));
405 __CFCSetAnnexBitmapSetPlane(cset->_annex->_validEntriesBitmap, plane);
406 }
407 return cset->_annex->_nonBMPPlanes[plane - 1];
408 }
409
410 CF_INLINE CFCharacterSetRef __CFCSetGetAnnexPlaneCharacterSetNoAlloc(CFCharacterSetRef cset, int plane) {
411 return (cset->_annex && __CFCSetAnnexBitmapGetPlane(cset->_annex->_validEntriesBitmap, plane) ? cset->_annex->_nonBMPPlanes[plane - 1] : NULL);
412 }
413
414 CF_INLINE void __CFCSetDeallocateAnnexPlane(CFCharacterSetRef cset) {
415 if (cset->_annex) {
416 int idx;
417
418 for (idx = 0;idx < MAX_ANNEX_PLANE;idx++) {
419 if (__CFCSetAnnexBitmapGetPlane(cset->_annex->_validEntriesBitmap, idx + 1)) {
420 CFRelease(cset->_annex->_nonBMPPlanes[idx]);
421 }
422 }
423 CFAllocatorDeallocate(CFGetAllocator(cset), cset->_annex->_nonBMPPlanes);
424 CFAllocatorDeallocate(CFGetAllocator(cset), cset->_annex);
425 ((CFMutableCharacterSetRef)cset)->_annex = NULL;
426 }
427 }
428
429 CF_INLINE uint8_t __CFCSetGetHeaderValue(const uint8_t *bitmap, int *numPages) {
430 uint8_t value = *bitmap;
431
432 if ((value == 0) || (value == UINT8_MAX)) {
433 int numBytes = __kCFCompactBitmapPageSize - 1;
434
435 while (numBytes > 0) {
436 if (*(++bitmap) != value) break;
437 --numBytes;
438 }
439 if (numBytes == 0) return value;
440 }
441 return (uint8_t)(++(*numPages));
442 }
443
444 CF_INLINE bool __CFCSetIsMemberInCompactBitmap(const uint8_t *compactBitmap, UTF16Char character) {
445 uint8_t value = compactBitmap[(character >> 8)]; // Assuming __kCFCompactBitmapNumPages == 256
446
447 if (value == 0) {
448 return false;
449 } else if (value == UINT8_MAX) {
450 return true;
451 } else {
452 compactBitmap += (__kCFCompactBitmapNumPages + (__kCFCompactBitmapPageSize * (value - 1)));
453 character &= 0xFF; // Assuming __kCFCompactBitmapNumPages == 256
454 return ((compactBitmap[(character / BITSPERBYTE)] & (1 << (character % BITSPERBYTE))) ? true : false);
455 }
456 }
457
458 CF_INLINE uint32_t __CFCSetGetCompactBitmapSize(const uint8_t *compactBitmap) {
459 uint32_t length = __kCFCompactBitmapNumPages;
460 uint32_t size = __kCFCompactBitmapNumPages;
461 uint8_t value;
462
463 while (length-- > 0) {
464 value = *(compactBitmap++);
465 if ((value != 0) && (value != UINT8_MAX)) size += __kCFCompactBitmapPageSize;
466 }
467 return size;
468 }
469
470 /* Take a private "set" structure and make a bitmap from it. Return the bitmap. THE CALLER MUST RELEASE THE RETURNED MEMORY as necessary.
471 */
472
473 CF_INLINE void __CFCSetBitmapProcessManyCharacters(unsigned char *map, unsigned n, unsigned m, Boolean isInverted) {
474 if (isInverted) {
475 __CFCSetBitmapRemoveCharactersInRange(map, n, m);
476 } else {
477 __CFCSetBitmapAddCharactersInRange(map, n, m);
478 }
479 }
480
481 CF_INLINE void __CFExpandCompactBitmap(const uint8_t *src, uint8_t *dst) {
482 const uint8_t *srcBody = src + __kCFCompactBitmapNumPages;
483 int i;
484 uint8_t value;
485
486 for (i = 0;i < __kCFCompactBitmapNumPages;i++) {
487 value = *(src++);
488 if ((value == 0) || (value == UINT8_MAX)) {
489 memset(dst, value, __kCFCompactBitmapPageSize);
490 } else {
491 memmove(dst, srcBody, __kCFCompactBitmapPageSize);
492 srcBody += __kCFCompactBitmapPageSize;
493 }
494 dst += __kCFCompactBitmapPageSize;
495 }
496 }
497
498
499 static void __CFCheckForExpandedSet(CFCharacterSetRef cset) {
500 static int8_t __CFNumberOfPlanesForLogging = -1;
501 static bool warnedOnce = false;
502
503 if (0 > __CFNumberOfPlanesForLogging) {
504 const char *envVar = __CFgetenv("CFCharacterSetCheckForExpandedSet");
505 long value = (envVar ? strtol_l(envVar, NULL, 0, NULL) : 0);
506 __CFNumberOfPlanesForLogging = (int8_t)(((value > 0) && (value <= 16)) ? value : 0);
507 }
508
509 if (__CFNumberOfPlanesForLogging) {
510 uint32_t entries = __CFCSetAnnexValidEntriesBitmap(cset);
511 int count = 0;
512
513 while (entries) {
514 if ((entries & 1) && (++count >= __CFNumberOfPlanesForLogging)) {
515 if (!warnedOnce) {
516 CFLog(kCFLogLevelWarning, CFSTR("An expanded CFMutableCharacter has been detected. Recommend to compact with CFCharacterSetCreateCopy"));
517 warnedOnce = true;
518 }
519 break;
520 }
521 entries >>= 1;
522 }
523 }
524 }
525
526 static void __CFCSetGetBitmap(CFCharacterSetRef cset, uint8_t *bits) {
527 uint8_t *bitmap;
528 CFIndex length = __kCFBitmapSize;
529
530 if (__CFCSetIsBitmap(cset) && (bitmap = __CFCSetBitmapBits(cset))) {
531 memmove(bits, bitmap, __kCFBitmapSize);
532 } else {
533 Boolean isInverted = __CFCSetIsInverted(cset);
534 uint8_t value = (isInverted ? (uint8_t)-1 : 0);
535
536 bitmap = bits;
537 while (length--) *bitmap++ = value; // Initialize the buffer
538
539 if (!__CFCSetIsEmpty(cset)) {
540 switch (__CFCSetClassType(cset)) {
541 case __kCFCharSetClassBuiltin: {
542 UInt8 result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(cset), 0, bits, (isInverted != 0));
543 if (result == kCFUniCharBitmapEmpty && isInverted) {
544 length = __kCFBitmapSize;
545 bitmap = bits;
546 while (length--) *bitmap++ = 0;
547 } else if (result == kCFUniCharBitmapAll && !isInverted) {
548 length = __kCFBitmapSize;
549 bitmap = bits;
550 while (length--) *bitmap++ = (UInt8)0xFF;
551 }
552 }
553 break;
554
555 case __kCFCharSetClassRange: {
556 UInt32 theChar = __CFCSetRangeFirstChar(cset);
557 if (theChar < NUMCHARACTERS) { // the range starts in BMP
558 length = __CFCSetRangeLength(cset);
559 if (theChar + length >= NUMCHARACTERS) length = NUMCHARACTERS - theChar;
560 if (isInverted) {
561 __CFCSetBitmapRemoveCharactersInRange(bits, theChar, (UniChar)(theChar + length) - 1);
562 } else {
563 __CFCSetBitmapAddCharactersInRange(bits, theChar, (UniChar)(theChar + length) - 1);
564 }
565 }
566 }
567 break;
568
569 case __kCFCharSetClassString: {
570 const UniChar *buffer = __CFCSetStringBuffer(cset);
571 length = __CFCSetStringLength(cset);
572 while (length--) (isInverted ? __CFCSetBitmapRemoveCharacter(bits, *buffer++) : __CFCSetBitmapAddCharacter(bits, *buffer++));
573 }
574 break;
575
576 case __kCFCharSetClassCompactBitmap:
577 __CFExpandCompactBitmap(__CFCSetCompactBitmapBits(cset), bits);
578 break;
579 }
580 }
581 }
582 }
583
584 static Boolean __CFCharacterSetEqual(CFTypeRef cf1, CFTypeRef cf2);
585
586 static Boolean __CFCSetIsEqualAnnex(CFCharacterSetRef cf1, CFCharacterSetRef cf2) {
587 CFCharacterSetRef subSet1;
588 CFCharacterSetRef subSet2;
589 Boolean isAnnexInvertStateIdentical = (__CFCSetAnnexIsInverted(cf1) == __CFCSetAnnexIsInverted(cf2) ? true: false);
590 int idx;
591
592 if (isAnnexInvertStateIdentical) {
593 if (__CFCSetAnnexValidEntriesBitmap(cf1) != __CFCSetAnnexValidEntriesBitmap(cf2)) return false;
594 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
595 subSet1 = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(cf1, idx);
596 subSet2 = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(cf2, idx);
597
598 if (subSet1 && !__CFCharacterSetEqual(subSet1, subSet2)) return false;
599 }
600 } else {
601 uint8_t bitsBuf[__kCFBitmapSize];
602 uint8_t bitsBuf2[__kCFBitmapSize];
603
604 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
605 subSet1 = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(cf1, idx);
606 subSet2 = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(cf2, idx);
607
608 if (subSet1 == NULL && subSet2 == NULL) {
609 return false;
610 } else if (subSet1 == NULL) {
611 if (__CFCSetIsBitmap(subSet2)) {
612 if (!__CFCSetIsEqualBitmap((const UInt32 *)__CFCSetBitmapBits(subSet2), (const UInt32 *)-1)) {
613 return false;
614 }
615 } else {
616 __CFCSetGetBitmap(subSet2, bitsBuf);
617 if (!__CFCSetIsEqualBitmap((const UInt32 *)bitsBuf, (const UInt32 *)-1)) {
618 return false;
619 }
620 }
621 } else if (subSet2 == NULL) {
622 if (__CFCSetIsBitmap(subSet1)) {
623 if (!__CFCSetIsEqualBitmap((const UInt32 *)__CFCSetBitmapBits(subSet1), (const UInt32 *)-1)) {
624 return false;
625 }
626 } else {
627 __CFCSetGetBitmap(subSet1, bitsBuf);
628 if (!__CFCSetIsEqualBitmap((const UInt32 *)bitsBuf, (const UInt32 *)-1)) {
629 return false;
630 }
631 }
632 } else {
633 Boolean isBitmap1 = __CFCSetIsBitmap(subSet1);
634 Boolean isBitmap2 = __CFCSetIsBitmap(subSet2);
635
636 if (isBitmap1 && isBitmap2) {
637 if (!__CFCSetIsEqualBitmapInverted((const UInt32 *)__CFCSetBitmapBits(subSet1), (const UInt32 *)__CFCSetBitmapBits(subSet2))) {
638 return false;
639 }
640 } else if (!isBitmap1 && !isBitmap2) {
641 __CFCSetGetBitmap(subSet1, bitsBuf);
642 __CFCSetGetBitmap(subSet2, bitsBuf2);
643 if (!__CFCSetIsEqualBitmapInverted((const UInt32 *)bitsBuf, (const UInt32 *)bitsBuf2)) {
644 return false;
645 }
646 } else {
647 if (isBitmap2) {
648 CFCharacterSetRef tmp = subSet2;
649 subSet2 = subSet1;
650 subSet1 = tmp;
651 }
652 __CFCSetGetBitmap(subSet2, bitsBuf);
653 if (!__CFCSetIsEqualBitmapInverted((const UInt32 *)__CFCSetBitmapBits(subSet1), (const UInt32 *)bitsBuf)) {
654 return false;
655 }
656 }
657 }
658 }
659 }
660 return true;
661 }
662
663 /* Compact bitmap
664 */
665 static uint8_t *__CFCreateCompactBitmap(CFAllocatorRef allocator, const uint8_t *bitmap) {
666 const uint8_t *src;
667 uint8_t *dst;
668 int i;
669 int numPages = 0;
670 uint8_t header[__kCFCompactBitmapNumPages];
671
672 src = bitmap;
673 for (i = 0;i < __kCFCompactBitmapNumPages;i++) {
674 header[i] = __CFCSetGetHeaderValue(src, &numPages);
675
676 // Allocating more pages is probably not interesting enough to be compact
677 if (numPages > __kCFCompactBitmapMaxPages) return NULL;
678 src += __kCFCompactBitmapPageSize;
679 }
680
681 dst = (uint8_t *)CFAllocatorAllocate(allocator, __kCFCompactBitmapNumPages + (__kCFCompactBitmapPageSize * numPages), 0);
682
683 if (numPages > 0) {
684 uint8_t *dstBody = dst + __kCFCompactBitmapNumPages;
685
686 src = bitmap;
687 for (i = 0;i < __kCFCompactBitmapNumPages;i++) {
688 dst[i] = header[i];
689
690 if ((dst[i] != 0) && (dst[i] != UINT8_MAX)) {
691 memmove(dstBody, src, __kCFCompactBitmapPageSize);
692 dstBody += __kCFCompactBitmapPageSize;
693 }
694 src += __kCFCompactBitmapPageSize;
695 }
696 } else {
697 memmove(dst, header, __kCFCompactBitmapNumPages);
698 }
699
700 return dst;
701 }
702
703 static void __CFCSetMakeCompact(CFMutableCharacterSetRef cset) {
704 if (__CFCSetIsBitmap(cset) && __CFCSetBitmapBits(cset)) {
705 uint8_t *bitmap = __CFCSetBitmapBits(cset);
706 uint8_t *cBitmap = __CFCreateCompactBitmap(CFGetAllocator(cset), bitmap);
707
708 if (cBitmap) {
709 CFAllocatorDeallocate(CFGetAllocator(cset), bitmap);
710 __CFCSetPutClassType(cset, __kCFCharSetClassCompactBitmap);
711 __CFCSetPutCompactBitmapBits(cset, cBitmap);
712 }
713 }
714 }
715
716 static void __CFCSetAddNonBMPPlanesInRange(CFMutableCharacterSetRef cset, CFRange range) {
717 int firstChar = (range.location & 0xFFFF);
718 int maxChar = range.location + range.length;
719 int idx = range.location >> 16; // first plane
720 int maxPlane = (maxChar - 1) >> 16; // last plane
721 CFRange planeRange;
722 CFMutableCharacterSetRef annexPlane;
723
724 maxChar &= 0xFFFF;
725
726 for (idx = (idx ? idx : 1);idx <= maxPlane;idx++) {
727 planeRange.location = __CFMax(firstChar, 0);
728 planeRange.length = (idx == maxPlane && maxChar ? maxChar : 0x10000) - planeRange.location;
729 if (__CFCSetAnnexIsInverted(cset)) {
730 if ((annexPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(cset, idx))) {
731 CFCharacterSetRemoveCharactersInRange(annexPlane, planeRange);
732 if (__CFCSetIsEmpty(annexPlane) && !__CFCSetIsInverted(annexPlane)) {
733 CFRelease(annexPlane);
734 __CFCSetAnnexBitmapClearPlane(cset->_annex->_validEntriesBitmap, idx);
735 }
736 }
737 } else {
738 CFCharacterSetAddCharactersInRange((CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(cset, idx), planeRange);
739 }
740 }
741 if (!__CFCSetHasNonBMPPlane(cset) && !__CFCSetAnnexIsInverted(cset)) __CFCSetDeallocateAnnexPlane(cset);
742 }
743
744 static void __CFCSetRemoveNonBMPPlanesInRange(CFMutableCharacterSetRef cset, CFRange range) {
745 int firstChar = (range.location & 0xFFFF);
746 int maxChar = range.location + range.length;
747 int idx = range.location >> 16; // first plane
748 int maxPlane = (maxChar - 1) >> 16; // last plane
749 CFRange planeRange;
750 CFMutableCharacterSetRef annexPlane;
751
752 maxChar &= 0xFFFF;
753
754 for (idx = (idx ? idx : 1);idx <= maxPlane;idx++) {
755 planeRange.location = __CFMax(firstChar, 0);
756 planeRange.length = (idx == maxPlane && maxChar ? maxChar : 0x10000) - planeRange.location;
757 if (__CFCSetAnnexIsInverted(cset)) {
758 CFCharacterSetAddCharactersInRange((CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(cset, idx), planeRange);
759 } else {
760 if ((annexPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(cset, idx))) {
761 CFCharacterSetRemoveCharactersInRange(annexPlane, planeRange);
762 if(__CFCSetIsEmpty(annexPlane) && !__CFCSetIsInverted(annexPlane)) {
763 CFRelease(annexPlane);
764 __CFCSetAnnexBitmapClearPlane(cset->_annex->_validEntriesBitmap, idx);
765 }
766 }
767 }
768 }
769 if (!__CFCSetHasNonBMPPlane(cset) && !__CFCSetAnnexIsInverted(cset)) __CFCSetDeallocateAnnexPlane(cset);
770 }
771
772 static void __CFCSetMakeBitmap(CFMutableCharacterSetRef cset) {
773 if (!__CFCSetIsBitmap(cset) || !__CFCSetBitmapBits(cset)) {
774 CFAllocatorRef allocator = CFGetAllocator(cset);
775 uint8_t *bitmap = (uint8_t *)CFAllocatorAllocate(allocator, __kCFBitmapSize, 0);
776 __CFCSetGetBitmap(cset, bitmap);
777
778 if (__CFCSetIsBuiltin(cset)) {
779 CFIndex numPlanes = CFUniCharGetNumberOfPlanes(__CFCSetBuiltinType(cset));
780
781 if (numPlanes > 1) {
782 CFMutableCharacterSetRef annexSet;
783 uint8_t *annexBitmap = NULL;
784 int idx;
785 UInt8 result;
786
787 __CFCSetAllocateAnnexForPlane(cset, numPlanes - 1);
788 for (idx = 1;idx < numPlanes;idx++) {
789 if (NULL == annexBitmap) {
790 annexBitmap = (uint8_t *)CFAllocatorAllocate(allocator, __kCFBitmapSize, 0);
791 }
792 result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(cset), idx, annexBitmap, false);
793 if (result == kCFUniCharBitmapEmpty) continue;
794 if (result == kCFUniCharBitmapAll) {
795 CFIndex bitmapLength = __kCFBitmapSize;
796 uint8_t *bytes = annexBitmap;
797 while (bitmapLength-- > 0) *(bytes++) = (uint8_t)0xFF;
798 }
799 annexSet = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(cset, idx);
800 __CFCSetPutClassType(annexSet, __kCFCharSetClassBitmap);
801 __CFCSetPutBitmapBits(annexSet, annexBitmap);
802 __CFCSetPutIsInverted(annexSet, false);
803 __CFCSetPutHasHashValue(annexSet, false);
804 annexBitmap = NULL;
805 }
806 if (annexBitmap) CFAllocatorDeallocate(allocator, annexBitmap);
807 }
808 } else if (__CFCSetIsCompactBitmap(cset) && __CFCSetCompactBitmapBits(cset)) {
809 CFAllocatorDeallocate(allocator, __CFCSetCompactBitmapBits(cset));
810 __CFCSetPutCompactBitmapBits(cset, NULL);
811 } else if (__CFCSetIsString(cset) && __CFCSetStringBuffer(cset)) {
812 CFAllocatorDeallocate(allocator, __CFCSetStringBuffer(cset));
813 __CFCSetPutStringBuffer(cset, NULL);
814 } else if (__CFCSetIsRange(cset)) { // We may have to allocate annex here
815 Boolean needsToInvert = (!__CFCSetHasNonBMPPlane(cset) && __CFCSetIsInverted(cset) ? true : false);
816 __CFCSetAddNonBMPPlanesInRange(cset, CFRangeMake(__CFCSetRangeFirstChar(cset), __CFCSetRangeLength(cset)));
817 if (needsToInvert) __CFCSetAnnexSetIsInverted(cset, true);
818 }
819 __CFCSetPutClassType(cset, __kCFCharSetClassBitmap);
820 __CFCSetPutBitmapBits(cset, bitmap);
821 __CFCSetPutIsInverted(cset, false);
822 }
823 }
824
825 CF_INLINE CFMutableCharacterSetRef __CFCSetGenericCreate(CFAllocatorRef allocator, UInt32 flags) {
826 CFMutableCharacterSetRef cset;
827 CFIndex size = sizeof(struct __CFCharacterSet) - sizeof(CFRuntimeBase);
828
829 cset = (CFMutableCharacterSetRef)_CFRuntimeCreateInstance(allocator, CFCharacterSetGetTypeID(), size, NULL);
830 if (NULL == cset) return NULL;
831
832 cset->_base._cfinfo[CF_INFO_BITS] |= flags;
833 cset->_hashValue = 0;
834 cset->_annex = NULL;
835
836 return cset;
837 }
838
839 static void __CFApplySurrogatesInString(CFMutableCharacterSetRef cset, CFStringRef string, void (*applyer)(CFMutableCharacterSetRef, CFRange)) {
840 CFStringInlineBuffer buffer;
841 CFIndex index, length = CFStringGetLength(string);
842 CFRange range = CFRangeMake(0, 0);
843 UTF32Char character;
844
845 CFStringInitInlineBuffer(string, &buffer, CFRangeMake(0, length));
846
847 for (index = 0;index < length;index++) {
848 character = __CFStringGetCharacterFromInlineBufferQuick(&buffer, index);
849
850 if (CFStringIsSurrogateHighCharacter(character) && ((index + 1) < length)) {
851 UTF16Char other = __CFStringGetCharacterFromInlineBufferQuick(&buffer, index + 1);
852
853 if (CFStringIsSurrogateLowCharacter(other)) {
854 character = CFStringGetLongCharacterForSurrogatePair(character, other);
855
856 if ((range.length + range.location) == character) {
857 ++range.length;
858 } else {
859 if (range.length > 0) applyer(cset, range);
860 range.location = character;
861 range.length = 1;
862 }
863 }
864
865 ++index; // skip the low surrogate
866 }
867 }
868
869 if (range.length > 0) applyer(cset, range);
870 }
871
872
873 /* Bsearch theChar for __kCFCharSetClassString
874 */
875 CF_INLINE Boolean __CFCSetBsearchUniChar(const UniChar *theTable, CFIndex length, UniChar theChar) {
876 const UniChar *p, *q, *divider;
877
878 if ((theChar < theTable[0]) || (theChar > theTable[length - 1])) return false;
879
880 p = theTable;
881 q = p + (length - 1);
882 while (p <= q) {
883 divider = p + ((q - p) >> 1); /* divide by 2 */
884 if (theChar < *divider) q = divider - 1;
885 else if (theChar > *divider) p = divider + 1;
886 else return true;
887 }
888 return false;
889 }
890
891 /* Array of instantiated builtin set. Note builtin set ID starts with 1 so the array index is ID - 1
892 */
893 static CFCharacterSetRef *__CFBuiltinSets = NULL;
894
895 /* Global lock for character set
896 */
897 static CFSpinLock_t __CFCharacterSetLock = CFSpinLockInit;
898
899 /* CFBase API functions
900 */
901 static Boolean __CFCharacterSetEqual(CFTypeRef cf1, CFTypeRef cf2) {
902 Boolean isInvertStateIdentical = (__CFCSetIsInverted((CFCharacterSetRef)cf1) == __CFCSetIsInverted((CFCharacterSetRef)cf2) ? true: false);
903 Boolean isAnnexInvertStateIdentical = (__CFCSetAnnexIsInverted((CFCharacterSetRef)cf1) == __CFCSetAnnexIsInverted((CFCharacterSetRef)cf2) ? true: false);
904 CFIndex idx;
905 CFCharacterSetRef subSet1;
906 uint8_t bitsBuf[__kCFBitmapSize];
907 uint8_t *bits;
908 Boolean isBitmap1;
909 Boolean isBitmap2;
910
911 if (__CFCSetHasHashValue((CFCharacterSetRef)cf1) && __CFCSetHasHashValue((CFCharacterSetRef)cf2) && ((CFCharacterSetRef)cf1)->_hashValue != ((CFCharacterSetRef)cf2)->_hashValue) return false;
912 if (__CFCSetIsEmpty((CFCharacterSetRef)cf1) && __CFCSetIsEmpty((CFCharacterSetRef)cf2) && !isInvertStateIdentical) return false;
913
914 if ((__CFCSetClassType((CFCharacterSetRef)cf1) == __CFCSetClassType((CFCharacterSetRef)cf2)) && !__CFCSetIsCompactBitmap((CFCharacterSetRef)cf1)) { // Types are identical, we can do it fast
915 switch (__CFCSetClassType((CFCharacterSetRef)cf1)) {
916 case __kCFCharSetClassBuiltin:
917 return (__CFCSetBuiltinType((CFCharacterSetRef)cf1) == __CFCSetBuiltinType((CFCharacterSetRef)cf2) && isInvertStateIdentical ? true : false);
918
919 case __kCFCharSetClassRange:
920 return (__CFCSetRangeFirstChar((CFCharacterSetRef)cf1) == __CFCSetRangeFirstChar((CFCharacterSetRef)cf2) && __CFCSetRangeLength((CFCharacterSetRef)cf1) && __CFCSetRangeLength((CFCharacterSetRef)cf2) && isInvertStateIdentical ? true : false);
921
922 case __kCFCharSetClassString:
923 if (isInvertStateIdentical) {
924 const UniChar *buf1 = __CFCSetStringBuffer((CFCharacterSetRef)cf1);
925 const UniChar *buf1End = buf1 + __CFCSetStringLength((CFCharacterSetRef)cf1);
926 const UniChar *buf2 = __CFCSetStringBuffer((CFCharacterSetRef)cf2);
927 const UniChar *buf2End = buf2 + __CFCSetStringLength((CFCharacterSetRef)cf2);
928
929 while ((buf1 < buf1End) && (buf2 < buf2End)) {
930 UniChar char1 = *buf1;
931 UniChar char2 = *buf2;
932
933 if (char1 != char2) return false;
934
935 do { ++buf1; } while ((buf1 < buf1End) && (char1 == *buf1));
936 do { ++buf2; } while ((buf2 < buf2End) && (char2 == *buf2));
937 }
938 } else {
939 return false;
940 }
941 break;
942
943 case __kCFCharSetClassBitmap:
944 if (!__CFCSetIsEqualBitmap((const UInt32 *)__CFCSetBitmapBits((CFCharacterSetRef)cf1), (const UInt32 *)__CFCSetBitmapBits((CFCharacterSetRef)cf2))) return false;
945 break;
946 }
947 return __CFCSetIsEqualAnnex((CFCharacterSetRef)cf1, (CFCharacterSetRef)cf2);
948 }
949
950 // Check for easy empty cases
951 if (__CFCSetIsEmpty((CFCharacterSetRef)cf1) || __CFCSetIsEmpty((CFCharacterSetRef)cf2)) {
952 CFCharacterSetRef emptySet = (__CFCSetIsEmpty((CFCharacterSetRef)cf1) ? (CFCharacterSetRef)cf1 : (CFCharacterSetRef)cf2);
953 CFCharacterSetRef nonEmptySet = (emptySet == cf1 ? (CFCharacterSetRef)cf2 : (CFCharacterSetRef)cf1);
954
955 if (__CFCSetIsBuiltin(nonEmptySet)) {
956 return false;
957 } else if (__CFCSetIsRange(nonEmptySet)) {
958 if (isInvertStateIdentical) {
959 return (__CFCSetRangeLength(nonEmptySet) ? false : true);
960 } else {
961 return (__CFCSetRangeLength(nonEmptySet) == 0x110000 ? true : false);
962 }
963 } else {
964 if (__CFCSetAnnexIsInverted(nonEmptySet)) {
965 if (__CFCSetAnnexValidEntriesBitmap(nonEmptySet) != 0x1FFFE) return false;
966 } else {
967 if (__CFCSetAnnexValidEntriesBitmap(nonEmptySet)) return false;
968 }
969
970 if (__CFCSetIsBitmap(nonEmptySet)) {
971 bits = __CFCSetBitmapBits(nonEmptySet);
972 } else {
973 bits = bitsBuf;
974 __CFCSetGetBitmap(nonEmptySet, bitsBuf);
975 }
976
977 if (__CFCSetIsEqualBitmap(NULL, (const UInt32 *)bits)) {
978 if (!__CFCSetAnnexIsInverted(nonEmptySet)) return true;
979 } else {
980 return false;
981 }
982
983 // Annex set has to be CFRangeMake(0x10000, 0xfffff)
984 for (idx = 1;idx < MAX_ANNEX_PLANE;idx++) {
985 if (__CFCSetIsBitmap(nonEmptySet)) {
986 if (!__CFCSetIsEqualBitmap((__CFCSetAnnexIsInverted(nonEmptySet) ? NULL : (const UInt32 *)-1), (const UInt32 *)bitsBuf)) return false;
987 } else {
988 __CFCSetGetBitmap(__CFCSetGetAnnexPlaneCharacterSetNoAlloc(nonEmptySet, idx), bitsBuf);
989 if (!__CFCSetIsEqualBitmap((const UInt32 *)-1, (const UInt32 *)bitsBuf)) return false;
990 }
991 }
992 return true;
993 }
994 }
995
996 if (__CFCSetIsBuiltin((CFCharacterSetRef)cf1) || __CFCSetIsBuiltin((CFCharacterSetRef)cf2)) {
997 CFCharacterSetRef builtinSet = (__CFCSetIsBuiltin((CFCharacterSetRef)cf1) ? (CFCharacterSetRef)cf1 : (CFCharacterSetRef)cf2);
998 CFCharacterSetRef nonBuiltinSet = (builtinSet == cf1 ? (CFCharacterSetRef)cf2 : (CFCharacterSetRef)cf1);
999
1000
1001 if (__CFCSetIsRange(nonBuiltinSet)) {
1002 UTF32Char firstChar = __CFCSetRangeFirstChar(nonBuiltinSet);
1003 UTF32Char lastChar = (firstChar + __CFCSetRangeLength(nonBuiltinSet) - 1);
1004 uint8_t firstPlane = (firstChar >> 16) & 0xFF;
1005 uint8_t lastPlane = (lastChar >> 16) & 0xFF;
1006 uint8_t result;
1007
1008 for (idx = 0;idx < MAX_ANNEX_PLANE;idx++) {
1009 result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(builtinSet), idx, bitsBuf, (isInvertStateIdentical != 0));
1010
1011 if (idx < firstPlane || idx > lastPlane) {
1012 if (result == kCFUniCharBitmapAll) {
1013 return false;
1014 } else if (result == kCFUniCharBitmapFilled) {
1015 if (!__CFCSetIsEqualBitmap(NULL, (const UInt32 *)bitsBuf)) return false;
1016 }
1017 } else if (idx > firstPlane && idx < lastPlane) {
1018 if (result == kCFUniCharBitmapEmpty) {
1019 return false;
1020 } else if (result == kCFUniCharBitmapFilled) {
1021 if (!__CFCSetIsEqualBitmap((const UInt32 *)-1, (const UInt32 *)bitsBuf)) return false;
1022 }
1023 } else {
1024 if (result == kCFUniCharBitmapEmpty) {
1025 return false;
1026 } else if (result == kCFUniCharBitmapAll) {
1027 if (idx == firstPlane) {
1028 if (((firstChar & 0xFFFF) != 0) || (firstPlane == lastPlane && ((lastChar & 0xFFFF) != 0xFFFF))) return false;
1029 } else {
1030 if (((lastChar & 0xFFFF) != 0xFFFF) || (firstPlane == lastPlane && ((firstChar & 0xFFFF) != 0))) return false;
1031 }
1032 } else {
1033 if (idx == firstPlane) {
1034 if (!__CFCSetIsBitmapEqualToRange((const UInt32 *)bitsBuf, firstChar & 0xFFFF, (firstPlane == lastPlane ? lastChar & 0xFFFF : 0xFFFF), false)) return false;
1035 } else {
1036 if (!__CFCSetIsBitmapEqualToRange((const UInt32 *)bitsBuf, (firstPlane == lastPlane ? firstChar & 0xFFFF : 0), lastChar & 0xFFFF, false)) return false;
1037 }
1038 }
1039 }
1040 }
1041 return true;
1042 } else {
1043 uint8_t bitsBuf2[__kCFBitmapSize];
1044 uint8_t result;
1045
1046 result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(builtinSet), 0, bitsBuf, (__CFCSetIsInverted(builtinSet) != 0));
1047 if (result == kCFUniCharBitmapFilled) {
1048 if (__CFCSetIsBitmap(nonBuiltinSet)) {
1049 if (!__CFCSetIsEqualBitmap((const UInt32 *)bitsBuf, (const UInt32 *)__CFCSetBitmapBits(nonBuiltinSet))) return false;
1050 } else {
1051
1052 __CFCSetGetBitmap(nonBuiltinSet, bitsBuf2);
1053 if (!__CFCSetIsEqualBitmap((const UInt32 *)bitsBuf, (const UInt32 *)bitsBuf2)) {
1054 return false;
1055 }
1056 }
1057 } else {
1058 if (__CFCSetIsBitmap(nonBuiltinSet)) {
1059 if (!__CFCSetIsEqualBitmap((result == kCFUniCharBitmapAll ? (const UInt32*)-1 : NULL), (const UInt32 *)__CFCSetBitmapBits(nonBuiltinSet))) return false;
1060 } else {
1061 __CFCSetGetBitmap(nonBuiltinSet, bitsBuf);
1062 if (!__CFCSetIsEqualBitmap((result == kCFUniCharBitmapAll ? (const UInt32*)-1: NULL), (const UInt32 *)bitsBuf)) return false;
1063 }
1064 }
1065
1066 isInvertStateIdentical = (__CFCSetIsInverted(builtinSet) == __CFCSetAnnexIsInverted(nonBuiltinSet) ? true : false);
1067
1068 for (idx = 1;idx < MAX_ANNEX_PLANE;idx++) {
1069 result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(builtinSet), idx, bitsBuf, !isInvertStateIdentical);
1070 subSet1 = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(nonBuiltinSet, idx);
1071
1072 if (result == kCFUniCharBitmapFilled) {
1073 if (NULL == subSet1) {
1074 return false;
1075 } else if (__CFCSetIsBitmap(subSet1)) {
1076 if (!__CFCSetIsEqualBitmap((const UInt32*)bitsBuf, (const UInt32*)__CFCSetBitmapBits(subSet1))) {
1077 return false;
1078 }
1079 } else {
1080
1081 __CFCSetGetBitmap(subSet1, bitsBuf2);
1082 if (!__CFCSetIsEqualBitmap((const UInt32*)bitsBuf, (const UInt32*)bitsBuf2)) {
1083 return false;
1084 }
1085 }
1086 } else {
1087 if (NULL == subSet1) {
1088 if (result == kCFUniCharBitmapAll) {
1089 return false;
1090 }
1091 } else if (__CFCSetIsBitmap(subSet1)) {
1092 if (!__CFCSetIsEqualBitmap((result == kCFUniCharBitmapAll ? (const UInt32*)-1: NULL), (const UInt32*)__CFCSetBitmapBits(subSet1))) {
1093 return false;
1094 }
1095 } else {
1096 __CFCSetGetBitmap(subSet1, bitsBuf);
1097 if (!__CFCSetIsEqualBitmap((result == kCFUniCharBitmapAll ? (const UInt32*)-1: NULL), (const UInt32*)bitsBuf)) {
1098 return false;
1099 }
1100 }
1101 }
1102 }
1103 return true;
1104 }
1105 }
1106
1107 if (__CFCSetIsRange((CFCharacterSetRef)cf1) || __CFCSetIsRange((CFCharacterSetRef)cf2)) {
1108 CFCharacterSetRef rangeSet = (__CFCSetIsRange((CFCharacterSetRef)cf1) ? (CFCharacterSetRef)cf1 : (CFCharacterSetRef)cf2);
1109 CFCharacterSetRef nonRangeSet = (rangeSet == cf1 ? (CFCharacterSetRef)cf2 : (CFCharacterSetRef)cf1);
1110 UTF32Char firstChar = __CFCSetRangeFirstChar(rangeSet);
1111 UTF32Char lastChar = (firstChar + __CFCSetRangeLength(rangeSet) - 1);
1112 uint8_t firstPlane = (firstChar >> 16) & 0xFF;
1113 uint8_t lastPlane = (lastChar >> 16) & 0xFF;
1114 Boolean isRangeSetInverted = __CFCSetIsInverted(rangeSet);
1115
1116 if (__CFCSetIsBitmap(nonRangeSet)) {
1117 bits = __CFCSetBitmapBits(nonRangeSet);
1118 } else {
1119 bits = bitsBuf;
1120 __CFCSetGetBitmap(nonRangeSet, bitsBuf);
1121 }
1122 if (firstPlane == 0) {
1123 if (!__CFCSetIsBitmapEqualToRange((const UInt32*)bits, firstChar, (lastPlane == 0 ? lastChar : 0xFFFF), isRangeSetInverted)) return false;
1124 firstPlane = 1;
1125 firstChar = 0;
1126 } else {
1127 if (!__CFCSetIsEqualBitmap((const UInt32*)bits, (isRangeSetInverted ? (const UInt32 *)-1 : NULL))) return false;
1128 firstChar &= 0xFFFF;
1129 }
1130
1131 lastChar &= 0xFFFF;
1132
1133 isAnnexInvertStateIdentical = (isRangeSetInverted == __CFCSetAnnexIsInverted(nonRangeSet) ? true : false);
1134
1135 for (idx = 1;idx < MAX_ANNEX_PLANE;idx++) {
1136 subSet1 = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(nonRangeSet, idx);
1137 if (NULL == subSet1) {
1138 if (idx < firstPlane || idx > lastPlane) {
1139 if (!isAnnexInvertStateIdentical) return false;
1140 } else if (idx > firstPlane && idx < lastPlane) {
1141 if (isAnnexInvertStateIdentical) return false;
1142 } else if (idx == firstPlane) {
1143 if (isAnnexInvertStateIdentical || firstChar || (idx == lastPlane && lastChar != 0xFFFF)) return false;
1144 } else if (idx == lastPlane) {
1145 if (isAnnexInvertStateIdentical || (idx == firstPlane && firstChar) || (lastChar != 0xFFFF)) return false;
1146 }
1147 } else {
1148 if (__CFCSetIsBitmap(subSet1)) {
1149 bits = __CFCSetBitmapBits(subSet1);
1150 } else {
1151 __CFCSetGetBitmap(subSet1, bitsBuf);
1152 bits = bitsBuf;
1153 }
1154
1155 if (idx < firstPlane || idx > lastPlane) {
1156 if (!__CFCSetIsEqualBitmap((const UInt32*)bits, (isAnnexInvertStateIdentical ? NULL : (const UInt32 *)-1))) return false;
1157 } else if (idx > firstPlane && idx < lastPlane) {
1158 if (!__CFCSetIsEqualBitmap((const UInt32*)bits, (isAnnexInvertStateIdentical ? (const UInt32 *)-1 : NULL))) return false;
1159 } else if (idx == firstPlane) {
1160 if (!__CFCSetIsBitmapEqualToRange((const UInt32*)bits, firstChar, (idx == lastPlane ? lastChar : 0xFFFF), !isAnnexInvertStateIdentical)) return false;
1161 } else if (idx == lastPlane) {
1162 if (!__CFCSetIsBitmapEqualToRange((const UInt32*)bits, (idx == firstPlane ? firstChar : 0), lastChar, !isAnnexInvertStateIdentical)) return false;
1163 }
1164 }
1165 }
1166 return true;
1167 }
1168
1169 isBitmap1 = __CFCSetIsBitmap((CFCharacterSetRef)cf1);
1170 isBitmap2 = __CFCSetIsBitmap((CFCharacterSetRef)cf2);
1171
1172 if (isBitmap1 && isBitmap2) {
1173 if (!__CFCSetIsEqualBitmap((const UInt32 *)__CFCSetBitmapBits((CFCharacterSetRef)cf1), (const UInt32 *)__CFCSetBitmapBits((CFCharacterSetRef)cf2))) return false;
1174 } else if (!isBitmap1 && !isBitmap2) {
1175 uint8_t bitsBuf2[__kCFBitmapSize];
1176
1177 __CFCSetGetBitmap((CFCharacterSetRef)cf1, bitsBuf);
1178 __CFCSetGetBitmap((CFCharacterSetRef)cf2, bitsBuf2);
1179
1180 if (!__CFCSetIsEqualBitmap((const UInt32*)bitsBuf, (const UInt32*)bitsBuf2)) {
1181 return false;
1182 }
1183 } else {
1184 if (isBitmap2) {
1185 CFCharacterSetRef tmp = (CFCharacterSetRef)cf2;
1186 cf2 = cf1;
1187 cf1 = tmp;
1188 }
1189
1190 __CFCSetGetBitmap((CFCharacterSetRef)cf2, bitsBuf);
1191
1192 if (!__CFCSetIsEqualBitmap((const UInt32 *)__CFCSetBitmapBits((CFCharacterSetRef)cf1), (const UInt32 *)bitsBuf)) return false;
1193 }
1194 return __CFCSetIsEqualAnnex((CFCharacterSetRef)cf1, (CFCharacterSetRef)cf2);
1195 }
1196
1197 static CFHashCode __CFCharacterSetHash(CFTypeRef cf) {
1198 if (!__CFCSetHasHashValue((CFCharacterSetRef)cf)) {
1199 if (__CFCSetIsEmpty((CFCharacterSetRef)cf)) {
1200 ((CFMutableCharacterSetRef)cf)->_hashValue = (__CFCSetIsInverted((CFCharacterSetRef)cf) ? ((UInt32)0xFFFFFFFF) : 0);
1201 } else if (__CFCSetIsBitmap( (CFCharacterSetRef) cf )) {
1202 ((CFMutableCharacterSetRef)cf)->_hashValue = CFHashBytes(__CFCSetBitmapBits((CFCharacterSetRef)cf), __kCFBitmapSize);
1203 } else {
1204 uint8_t bitsBuf[__kCFBitmapSize];
1205 __CFCSetGetBitmap((CFCharacterSetRef)cf, bitsBuf);
1206 ((CFMutableCharacterSetRef)cf)->_hashValue = CFHashBytes(bitsBuf, __kCFBitmapSize);
1207 }
1208 __CFCSetPutHasHashValue((CFMutableCharacterSetRef)cf, true);
1209 }
1210 return ((CFCharacterSetRef)cf)->_hashValue;
1211 }
1212
1213 static CFStringRef __CFCharacterSetCopyDescription(CFTypeRef cf) {
1214 CFMutableStringRef string;
1215 CFIndex idx;
1216 CFIndex length;
1217
1218 if (__CFCSetIsEmpty((CFCharacterSetRef)cf)) {
1219 return (CFStringRef)(__CFCSetIsInverted((CFCharacterSetRef)cf) ? CFRetain(CFSTR("<CFCharacterSet All>")) : CFRetain(CFSTR("<CFCharacterSet Empty>")));
1220 }
1221
1222 switch (__CFCSetClassType((CFCharacterSetRef)cf)) {
1223 case __kCFCharSetClassBuiltin:
1224 switch (__CFCSetBuiltinType((CFCharacterSetRef)cf)) {
1225 case kCFCharacterSetControl: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined Control Set>"));
1226 case kCFCharacterSetWhitespace : return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined Whitespace Set>"));
1227 case kCFCharacterSetWhitespaceAndNewline: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined WhitespaceAndNewline Set>"));
1228 case kCFCharacterSetDecimalDigit: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined DecimalDigit Set>"));
1229 case kCFCharacterSetLetter: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined Letter Set>"));
1230 case kCFCharacterSetLowercaseLetter: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined LowercaseLetter Set>"));
1231 case kCFCharacterSetUppercaseLetter: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined UppercaseLetter Set>"));
1232 case kCFCharacterSetNonBase: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined NonBase Set>"));
1233 case kCFCharacterSetDecomposable: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined Decomposable Set>"));
1234 case kCFCharacterSetAlphaNumeric: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined AlphaNumeric Set>"));
1235 case kCFCharacterSetPunctuation: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined Punctuation Set>"));
1236 case kCFCharacterSetIllegal: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined Illegal Set>"));
1237 case kCFCharacterSetCapitalizedLetter: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined CapitalizedLetter Set>"));
1238 case kCFCharacterSetSymbol: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined Symbol Set>"));
1239 case kCFCharacterSetNewline: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined Newline Set>"));
1240 }
1241 break;
1242
1243 case __kCFCharSetClassRange:
1244 return CFStringCreateWithFormat(CFGetAllocator((CFCharacterSetRef)cf), NULL, CFSTR("<CFCharacterSet Range(%u, %ld)>"), (unsigned int)__CFCSetRangeFirstChar((CFCharacterSetRef)cf), (long)__CFCSetRangeLength((CFCharacterSetRef)cf));
1245
1246 case __kCFCharSetClassString: {
1247 CFStringRef format = CFSTR("<CFCharacterSet Items(");
1248
1249 length = __CFCSetStringLength((CFCharacterSetRef)cf);
1250 string = CFStringCreateMutable(CFGetAllocator(cf), CFStringGetLength(format) + 7 * length + 2); // length of format + "U+XXXX "(7) * length + ")>"(2)
1251 CFStringAppend(string, format);
1252 for (idx = 0;idx < length;idx++) {
1253 CFStringAppendFormat(string, NULL, CFSTR("%sU+%04X"), (idx > 0 ? " " : ""), (unsigned int)((__CFCSetStringBuffer((CFCharacterSetRef)cf))[idx]));
1254 }
1255 CFStringAppend(string, CFSTR(")>"));
1256 return string;
1257 }
1258
1259 case __kCFCharSetClassBitmap:
1260 case __kCFCharSetClassCompactBitmap:
1261 return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Bitmap>")); // ??? Should generate description for 8k bitmap ?
1262 }
1263 CFAssert1(0, __kCFLogAssertion, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__); // We should never come here
1264 return NULL;
1265 }
1266
1267 static void __CFCharacterSetDeallocate(CFTypeRef cf) {
1268 CFAllocatorRef allocator = CFGetAllocator(cf);
1269
1270 if (__CFCSetIsBuiltin((CFCharacterSetRef)cf) && !__CFCSetIsMutable((CFCharacterSetRef)cf) && !__CFCSetIsInverted((CFCharacterSetRef)cf)) {
1271 CFCharacterSetRef sharedSet = CFCharacterSetGetPredefined(__CFCSetBuiltinType((CFCharacterSetRef)cf));
1272 if (sharedSet == cf) { // We're trying to dealloc the builtin set
1273 CFAssert1(0, __kCFLogAssertion, "%s: Trying to deallocate predefined set. The process is likely to crash.", __PRETTY_FUNCTION__);
1274 return; // We never deallocate builtin set
1275 }
1276 }
1277
1278 if (__CFCSetIsString((CFCharacterSetRef)cf) && __CFCSetStringBuffer((CFCharacterSetRef)cf)) CFAllocatorDeallocate(allocator, __CFCSetStringBuffer((CFCharacterSetRef)cf));
1279 else if (__CFCSetIsBitmap((CFCharacterSetRef)cf) && __CFCSetBitmapBits((CFCharacterSetRef)cf)) CFAllocatorDeallocate(allocator, __CFCSetBitmapBits((CFCharacterSetRef)cf));
1280 else if (__CFCSetIsCompactBitmap((CFCharacterSetRef)cf) && __CFCSetCompactBitmapBits((CFCharacterSetRef)cf)) CFAllocatorDeallocate(allocator, __CFCSetCompactBitmapBits((CFCharacterSetRef)cf));
1281 __CFCSetDeallocateAnnexPlane((CFCharacterSetRef)cf);
1282 }
1283
1284 static CFTypeID __kCFCharacterSetTypeID = _kCFRuntimeNotATypeID;
1285
1286 static const CFRuntimeClass __CFCharacterSetClass = {
1287 0,
1288 "CFCharacterSet",
1289 NULL, // init
1290 NULL, // copy
1291 __CFCharacterSetDeallocate,
1292 __CFCharacterSetEqual,
1293 __CFCharacterSetHash,
1294 NULL, //
1295 __CFCharacterSetCopyDescription
1296 };
1297
1298 static bool __CFCheckForExapendedSet = false;
1299
1300 CF_PRIVATE void __CFCharacterSetInitialize(void) {
1301 const char *checkForExpandedSet = __CFgetenv("__CF_DEBUG_EXPANDED_SET");
1302
1303 __kCFCharacterSetTypeID = _CFRuntimeRegisterClass(&__CFCharacterSetClass);
1304
1305 if (checkForExpandedSet && (*checkForExpandedSet == 'Y')) __CFCheckForExapendedSet = true;
1306 }
1307
1308 /* Public functions
1309 */
1310
1311 CFTypeID CFCharacterSetGetTypeID(void) {
1312 return __kCFCharacterSetTypeID;
1313 }
1314
1315 /*** CharacterSet creation ***/
1316 /* Functions to create basic immutable characterset.
1317 */
1318 CFCharacterSetRef CFCharacterSetGetPredefined(CFCharacterSetPredefinedSet theSetIdentifier) {
1319 CFCharacterSetRef cset;
1320
1321 __CFCSetValidateBuiltinType(theSetIdentifier, __PRETTY_FUNCTION__);
1322
1323 __CFSpinLock(&__CFCharacterSetLock);
1324 cset = ((NULL != __CFBuiltinSets) ? __CFBuiltinSets[theSetIdentifier - 1] : NULL);
1325 __CFSpinUnlock(&__CFCharacterSetLock);
1326
1327 if (NULL != cset) return cset;
1328
1329 if (!(cset = __CFCSetGenericCreate(kCFAllocatorSystemDefault, __kCFCharSetClassBuiltin))) return NULL;
1330 __CFCSetPutBuiltinType((CFMutableCharacterSetRef)cset, theSetIdentifier);
1331
1332 __CFSpinLock(&__CFCharacterSetLock);
1333 if (!__CFBuiltinSets) {
1334 __CFBuiltinSets = (CFCharacterSetRef *)CFAllocatorAllocate((CFAllocatorRef)CFRetain(__CFGetDefaultAllocator()), sizeof(CFCharacterSetRef) * __kCFLastBuiltinSetID, 0);
1335 memset(__CFBuiltinSets, 0, sizeof(CFCharacterSetRef) * __kCFLastBuiltinSetID);
1336 }
1337
1338 __CFBuiltinSets[theSetIdentifier - 1] = cset;
1339 __CFSpinUnlock(&__CFCharacterSetLock);
1340
1341 return cset;
1342 }
1343
1344 CFCharacterSetRef CFCharacterSetCreateWithCharactersInRange(CFAllocatorRef allocator, CFRange theRange) {
1345 CFMutableCharacterSetRef cset;
1346
1347 __CFCSetValidateRange(theRange, __PRETTY_FUNCTION__);
1348
1349 if (theRange.length) {
1350 if (!(cset = __CFCSetGenericCreate(allocator, __kCFCharSetClassRange))) return NULL;
1351 __CFCSetPutRangeFirstChar(cset, theRange.location);
1352 __CFCSetPutRangeLength(cset, theRange.length);
1353 } else {
1354 if (!(cset = __CFCSetGenericCreate(allocator, __kCFCharSetClassBitmap))) return NULL;
1355 __CFCSetPutBitmapBits(cset, NULL);
1356 __CFCSetPutHasHashValue(cset, true); // _hashValue is 0
1357 }
1358
1359 return cset;
1360 }
1361
1362 static int chcompar(const void *a, const void *b) {
1363 return -(int)(*(UniChar *)b - *(UniChar *)a);
1364 }
1365
1366 CFCharacterSetRef CFCharacterSetCreateWithCharactersInString(CFAllocatorRef allocator, CFStringRef theString) {
1367 CFIndex length;
1368
1369 length = CFStringGetLength(theString);
1370 if (length < __kCFStringCharSetMax) {
1371 CFMutableCharacterSetRef cset;
1372
1373 if (!(cset = __CFCSetGenericCreate(allocator, __kCFCharSetClassString))) return NULL;
1374 __CFCSetPutStringBuffer(cset, (UniChar *)CFAllocatorAllocate(CFGetAllocator(cset), __kCFStringCharSetMax * sizeof(UniChar), 0));
1375 __CFCSetPutStringLength(cset, length);
1376 CFStringGetCharacters(theString, CFRangeMake(0, length), __CFCSetStringBuffer(cset));
1377 qsort(__CFCSetStringBuffer(cset), length, sizeof(UniChar), chcompar);
1378
1379 if (0 == length) {
1380 __CFCSetPutHasHashValue(cset, true); // _hashValue is 0
1381 } else if (length > 1) { // Check for surrogate
1382 const UTF16Char *characters = __CFCSetStringBuffer(cset);
1383 const UTF16Char *charactersLimit = characters + length;
1384
1385 if ((*characters < 0xDC00UL) && (*(charactersLimit - 1) > 0xDBFFUL)) { // might have surrogate chars
1386 while (characters < charactersLimit) {
1387 if (CFStringIsSurrogateHighCharacter(*characters) || CFStringIsSurrogateLowCharacter(*characters)) {
1388 CFRelease(cset);
1389 cset = NULL;
1390 break;
1391 }
1392 ++characters;
1393 }
1394 }
1395 }
1396 if (NULL != cset) return cset;
1397 }
1398
1399 CFMutableCharacterSetRef mcset = CFCharacterSetCreateMutable(allocator);
1400 CFCharacterSetAddCharactersInString(mcset, theString);
1401 __CFCSetMakeCompact(mcset);
1402 __CFCSetPutIsMutable(mcset, false);
1403 return mcset;
1404 }
1405
1406 CFCharacterSetRef CFCharacterSetCreateWithBitmapRepresentation(CFAllocatorRef allocator, CFDataRef theData) {
1407 CFMutableCharacterSetRef cset;
1408 CFIndex length;
1409
1410 if (!(cset = __CFCSetGenericCreate(allocator, __kCFCharSetClassBitmap))) return NULL;
1411
1412 if (theData && (length = CFDataGetLength(theData)) > 0) {
1413 uint8_t *bitmap;
1414 uint8_t *cBitmap;
1415
1416 if (length < __kCFBitmapSize) {
1417 bitmap = (uint8_t *)CFAllocatorAllocate(allocator, __kCFBitmapSize, 0);
1418 memmove(bitmap, CFDataGetBytePtr(theData), length);
1419 memset(bitmap + length, 0, __kCFBitmapSize - length);
1420
1421 cBitmap = __CFCreateCompactBitmap(allocator, bitmap);
1422
1423 if (cBitmap == NULL) {
1424 __CFCSetPutBitmapBits(cset, bitmap);
1425 } else {
1426 CFAllocatorDeallocate(allocator, bitmap);
1427 __CFCSetPutCompactBitmapBits(cset, cBitmap);
1428 __CFCSetPutClassType(cset, __kCFCharSetClassCompactBitmap);
1429 }
1430 } else {
1431 cBitmap = __CFCreateCompactBitmap(allocator, CFDataGetBytePtr(theData));
1432
1433 if (cBitmap == NULL) {
1434 bitmap = (uint8_t *)CFAllocatorAllocate(allocator, __kCFBitmapSize, 0);
1435 memmove(bitmap, CFDataGetBytePtr(theData), __kCFBitmapSize);
1436
1437 __CFCSetPutBitmapBits(cset, bitmap);
1438 } else {
1439 __CFCSetPutCompactBitmapBits(cset, cBitmap);
1440 __CFCSetPutClassType(cset, __kCFCharSetClassCompactBitmap);
1441 }
1442
1443 if (length > __kCFBitmapSize) {
1444 CFMutableCharacterSetRef annexSet;
1445 const uint8_t *bytes = CFDataGetBytePtr(theData) + __kCFBitmapSize;
1446
1447 length -= __kCFBitmapSize;
1448
1449 while (length > 1) {
1450 annexSet = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(cset, *(bytes++));
1451 --length; // Decrement the plane no byte
1452
1453 if (length < __kCFBitmapSize) {
1454 bitmap = (uint8_t *)CFAllocatorAllocate(allocator, __kCFBitmapSize, 0);
1455 memmove(bitmap, bytes, length);
1456 memset(bitmap + length, 0, __kCFBitmapSize - length);
1457
1458 cBitmap = __CFCreateCompactBitmap(allocator, bitmap);
1459
1460 if (cBitmap == NULL) {
1461 __CFCSetPutBitmapBits(annexSet, bitmap);
1462 } else {
1463 CFAllocatorDeallocate(allocator, bitmap);
1464 __CFCSetPutCompactBitmapBits(annexSet, cBitmap);
1465 __CFCSetPutClassType(annexSet, __kCFCharSetClassCompactBitmap);
1466 }
1467 } else {
1468 cBitmap = __CFCreateCompactBitmap(allocator, bytes);
1469
1470 if (cBitmap == NULL) {
1471 bitmap = (uint8_t *)CFAllocatorAllocate(allocator, __kCFBitmapSize, 0);
1472 memmove(bitmap, bytes, __kCFBitmapSize);
1473
1474 __CFCSetPutBitmapBits(annexSet, bitmap);
1475 } else {
1476 __CFCSetPutCompactBitmapBits(annexSet, cBitmap);
1477 __CFCSetPutClassType(annexSet, __kCFCharSetClassCompactBitmap);
1478 }
1479 }
1480 length -= __kCFBitmapSize;
1481 bytes += __kCFBitmapSize;
1482 }
1483 }
1484 }
1485 } else {
1486 __CFCSetPutBitmapBits(cset, NULL);
1487 __CFCSetPutHasHashValue(cset, true); // Hash value is 0
1488 }
1489
1490 return cset;
1491 }
1492
1493 CFCharacterSetRef CFCharacterSetCreateInvertedSet(CFAllocatorRef alloc, CFCharacterSetRef theSet) {
1494 CFMutableCharacterSetRef cset;
1495
1496 CF_OBJC_FUNCDISPATCHV(__kCFCharacterSetTypeID, CFCharacterSetRef , (NSCharacterSet *)theSet, invertedSet);
1497
1498 cset = CFCharacterSetCreateMutableCopy(alloc, theSet);
1499 CFCharacterSetInvert(cset);
1500 __CFCSetPutIsMutable(cset, false);
1501
1502 return cset;
1503 }
1504
1505 /* Functions to create mutable characterset.
1506 */
1507 CFMutableCharacterSetRef CFCharacterSetCreateMutable(CFAllocatorRef allocator) {
1508 CFMutableCharacterSetRef cset;
1509
1510 if (!(cset = __CFCSetGenericCreate(allocator, __kCFCharSetClassBitmap| __kCFCharSetIsMutable))) return NULL;
1511 __CFCSetPutBitmapBits(cset, NULL);
1512 __CFCSetPutHasHashValue(cset, true); // Hash value is 0
1513
1514 return cset;
1515 }
1516
1517 static CFMutableCharacterSetRef __CFCharacterSetCreateCopy(CFAllocatorRef alloc, CFCharacterSetRef theSet, bool isMutable) {
1518 CFMutableCharacterSetRef cset;
1519
1520 CF_OBJC_FUNCDISPATCHV(__kCFCharacterSetTypeID, CFMutableCharacterSetRef , (NSCharacterSet *)theSet, mutableCopy);
1521
1522 __CFGenericValidateType(theSet, __kCFCharacterSetTypeID);
1523
1524 if (!isMutable && !__CFCSetIsMutable(theSet)) {
1525 return (CFMutableCharacterSetRef)CFRetain(theSet);
1526 }
1527
1528 cset = CFCharacterSetCreateMutable(alloc);
1529
1530 __CFCSetPutClassType(cset, __CFCSetClassType(theSet));
1531 __CFCSetPutHasHashValue(cset, __CFCSetHasHashValue(theSet));
1532 __CFCSetPutIsInverted(cset, __CFCSetIsInverted(theSet));
1533 cset->_hashValue = theSet->_hashValue;
1534
1535 switch (__CFCSetClassType(theSet)) {
1536 case __kCFCharSetClassBuiltin:
1537 __CFCSetPutBuiltinType(cset, __CFCSetBuiltinType(theSet));
1538 break;
1539
1540 case __kCFCharSetClassRange:
1541 __CFCSetPutRangeFirstChar(cset, __CFCSetRangeFirstChar(theSet));
1542 __CFCSetPutRangeLength(cset, __CFCSetRangeLength(theSet));
1543 break;
1544
1545 case __kCFCharSetClassString:
1546 __CFCSetPutStringBuffer(cset, (UniChar *)CFAllocatorAllocate(alloc, __kCFStringCharSetMax * sizeof(UniChar), 0));
1547
1548 __CFCSetPutStringLength(cset, __CFCSetStringLength(theSet));
1549 memmove(__CFCSetStringBuffer(cset), __CFCSetStringBuffer(theSet), __CFCSetStringLength(theSet) * sizeof(UniChar));
1550 break;
1551
1552 case __kCFCharSetClassBitmap:
1553 if (__CFCSetBitmapBits(theSet)) {
1554 uint8_t * bitmap = (isMutable ? NULL : __CFCreateCompactBitmap(alloc, __CFCSetBitmapBits(theSet)));
1555
1556 if (bitmap == NULL) {
1557 bitmap = (uint8_t *)CFAllocatorAllocate(alloc, sizeof(uint8_t) * __kCFBitmapSize, 0);
1558 memmove(bitmap, __CFCSetBitmapBits(theSet), __kCFBitmapSize);
1559 __CFCSetPutBitmapBits(cset, bitmap);
1560 } else {
1561 __CFCSetPutCompactBitmapBits(cset, bitmap);
1562 __CFCSetPutClassType(cset, __kCFCharSetClassCompactBitmap);
1563 }
1564 } else {
1565 __CFCSetPutBitmapBits(cset, NULL);
1566 }
1567 break;
1568
1569 case __kCFCharSetClassCompactBitmap: {
1570 const uint8_t *compactBitmap = __CFCSetCompactBitmapBits(theSet);
1571
1572 if (compactBitmap) {
1573 uint32_t size = __CFCSetGetCompactBitmapSize(compactBitmap);
1574 uint8_t *newBitmap = (uint8_t *)CFAllocatorAllocate(alloc, size, 0);
1575
1576 memmove(newBitmap, compactBitmap, size);
1577 __CFCSetPutCompactBitmapBits(cset, newBitmap);
1578 }
1579 }
1580 break;
1581
1582 default:
1583 CFAssert1(0, __kCFLogAssertion, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__); // We should never come here
1584 }
1585 if (__CFCSetHasNonBMPPlane(theSet)) {
1586 CFMutableCharacterSetRef annexPlane;
1587 int idx;
1588
1589 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
1590 if ((annexPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, idx))) {
1591 annexPlane = __CFCharacterSetCreateCopy(alloc, annexPlane, isMutable);
1592 __CFCSetPutCharacterSetToAnnexPlane(cset, annexPlane, idx);
1593 CFRelease(annexPlane);
1594 }
1595 }
1596 __CFCSetAnnexSetIsInverted(cset, __CFCSetAnnexIsInverted(theSet));
1597 } else if (__CFCSetAnnexIsInverted(theSet)) {
1598 __CFCSetAnnexSetIsInverted(cset, true);
1599 }
1600
1601 return cset;
1602 }
1603
1604 CFCharacterSetRef CFCharacterSetCreateCopy(CFAllocatorRef alloc, CFCharacterSetRef theSet) {
1605 return __CFCharacterSetCreateCopy(alloc, theSet, false);
1606 }
1607
1608 CFMutableCharacterSetRef CFCharacterSetCreateMutableCopy(CFAllocatorRef alloc, CFCharacterSetRef theSet) {
1609 return __CFCharacterSetCreateCopy(alloc, theSet, true);
1610 }
1611
1612 /*** Basic accessors ***/
1613 Boolean CFCharacterSetIsCharacterMember(CFCharacterSetRef theSet, UniChar theChar) {
1614 CFIndex length;
1615 Boolean isInverted;
1616 Boolean result = false;
1617
1618 CF_OBJC_FUNCDISPATCHV(__kCFCharacterSetTypeID, Boolean, (NSCharacterSet *)theSet, longCharacterIsMember:(UTF32Char)theChar);
1619
1620 __CFGenericValidateType(theSet, __kCFCharacterSetTypeID);
1621
1622 isInverted = __CFCSetIsInverted(theSet);
1623
1624 switch (__CFCSetClassType(theSet)) {
1625 case __kCFCharSetClassBuiltin:
1626 result = (CFUniCharIsMemberOf(theChar, __CFCSetBuiltinType(theSet)) ? !isInverted : isInverted);
1627 break;
1628
1629 case __kCFCharSetClassRange:
1630 length = __CFCSetRangeLength(theSet);
1631 result = (length && __CFCSetRangeFirstChar(theSet) <= theChar && theChar < __CFCSetRangeFirstChar(theSet) + length ? !isInverted : isInverted);
1632 break;
1633
1634 case __kCFCharSetClassString:
1635 result = ((length = __CFCSetStringLength(theSet)) ? (__CFCSetBsearchUniChar(__CFCSetStringBuffer(theSet), length, theChar) ? !isInverted : isInverted) : isInverted);
1636 break;
1637
1638 case __kCFCharSetClassBitmap:
1639 result = (__CFCSetCompactBitmapBits(theSet) ? (__CFCSetIsMemberBitmap(__CFCSetBitmapBits(theSet), theChar) ? true : false) : isInverted);
1640 break;
1641
1642 case __kCFCharSetClassCompactBitmap:
1643 result = (__CFCSetCompactBitmapBits(theSet) ? (__CFCSetIsMemberInCompactBitmap(__CFCSetCompactBitmapBits(theSet), theChar) ? true : false) : isInverted);
1644 break;
1645
1646 default:
1647 CFAssert1(0, __kCFLogAssertion, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__); // We should never come here
1648 break;
1649 }
1650
1651 return result;
1652 }
1653
1654 Boolean CFCharacterSetIsLongCharacterMember(CFCharacterSetRef theSet, UTF32Char theChar) {
1655 CFIndex length;
1656 UInt32 plane = (theChar >> 16);
1657 Boolean isAnnexInverted = false;
1658 Boolean isInverted;
1659 Boolean result = false;
1660
1661 CF_OBJC_FUNCDISPATCHV(__kCFCharacterSetTypeID, Boolean, (NSCharacterSet *)theSet, longCharacterIsMember:(UTF32Char)theChar);
1662
1663 __CFGenericValidateType(theSet, __kCFCharacterSetTypeID);
1664
1665 if (plane) {
1666 CFCharacterSetRef annexPlane;
1667
1668 if (__CFCSetIsBuiltin(theSet)) {
1669 isInverted = __CFCSetIsInverted(theSet);
1670 return (CFUniCharIsMemberOf(theChar, __CFCSetBuiltinType(theSet)) ? !isInverted : isInverted);
1671 }
1672
1673 isAnnexInverted = __CFCSetAnnexIsInverted(theSet);
1674
1675 if ((annexPlane = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, plane)) == NULL) {
1676 if (!__CFCSetHasNonBMPPlane(theSet) && __CFCSetIsRange(theSet)) {
1677 isInverted = __CFCSetIsInverted(theSet);
1678 length = __CFCSetRangeLength(theSet);
1679 return (length && __CFCSetRangeFirstChar(theSet) <= theChar && theChar < __CFCSetRangeFirstChar(theSet) + length ? !isInverted : isInverted);
1680 } else {
1681 return (isAnnexInverted ? true : false);
1682 }
1683 } else {
1684 theSet = annexPlane;
1685 theChar &= 0xFFFF;
1686 }
1687 }
1688
1689 isInverted = __CFCSetIsInverted(theSet);
1690
1691 switch (__CFCSetClassType(theSet)) {
1692 case __kCFCharSetClassBuiltin:
1693 result = (CFUniCharIsMemberOf(theChar, __CFCSetBuiltinType(theSet)) ? !isInverted : isInverted);
1694 break;
1695
1696 case __kCFCharSetClassRange:
1697 length = __CFCSetRangeLength(theSet);
1698 result = (length && __CFCSetRangeFirstChar(theSet) <= theChar && theChar < __CFCSetRangeFirstChar(theSet) + length ? !isInverted : isInverted);
1699 break;
1700
1701 case __kCFCharSetClassString:
1702 result = ((length = __CFCSetStringLength(theSet)) ? (__CFCSetBsearchUniChar(__CFCSetStringBuffer(theSet), length, theChar) ? !isInverted : isInverted) : isInverted);
1703 break;
1704
1705 case __kCFCharSetClassBitmap:
1706 result = (__CFCSetCompactBitmapBits(theSet) ? (__CFCSetIsMemberBitmap(__CFCSetBitmapBits(theSet), theChar) ? true : false) : isInverted);
1707 break;
1708
1709 case __kCFCharSetClassCompactBitmap:
1710 result = (__CFCSetCompactBitmapBits(theSet) ? (__CFCSetIsMemberInCompactBitmap(__CFCSetCompactBitmapBits(theSet), theChar) ? true : false) : isInverted);
1711 break;
1712
1713 default:
1714 CFAssert1(0, __kCFLogAssertion, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__); // We should never come here
1715 return false; // To make compiler happy
1716 }
1717
1718 return (result ? !isAnnexInverted : isAnnexInverted);
1719 }
1720
1721 Boolean CFCharacterSetIsSurrogatePairMember(CFCharacterSetRef theSet, UniChar surrogateHigh, UniChar surrogateLow) {
1722 return CFCharacterSetIsLongCharacterMember(theSet, CFCharacterSetGetLongCharacterForSurrogatePair(surrogateHigh, surrogateLow));
1723 }
1724
1725
1726 static inline CFCharacterSetRef __CFCharacterSetGetExpandedSetForNSCharacterSet(const void *characterSet) {
1727 CF_OBJC_FUNCDISPATCHV(__kCFCharacterSetTypeID, CFCharacterSetRef , (NSCharacterSet *)characterSet, _expandedCFCharacterSet);
1728 return NULL;
1729 }
1730
1731 Boolean CFCharacterSetIsSupersetOfSet(CFCharacterSetRef theSet, CFCharacterSetRef theOtherSet) {
1732 CFMutableCharacterSetRef copy;
1733 CFCharacterSetRef expandedSet = NULL;
1734 CFCharacterSetRef expandedOtherSet = NULL;
1735 Boolean result;
1736
1737 if ((!CF_IS_OBJC(__kCFCharacterSetTypeID, theSet) || (expandedSet = __CFCharacterSetGetExpandedSetForNSCharacterSet(theSet))) && (!CF_IS_OBJC(__kCFCharacterSetTypeID, theOtherSet) || (expandedOtherSet = __CFCharacterSetGetExpandedSetForNSCharacterSet(theOtherSet)))) { // Really CF, we can do some trick here
1738 if (expandedSet) theSet = expandedSet;
1739 if (expandedOtherSet) theOtherSet = expandedOtherSet;
1740
1741 __CFGenericValidateType(theSet, __kCFCharacterSetTypeID);
1742 __CFGenericValidateType(theOtherSet, __kCFCharacterSetTypeID);
1743
1744 if (__CFCSetIsEmpty(theSet)) {
1745 if (__CFCSetIsInverted(theSet)) {
1746 return TRUE; // Inverted empty set covers all range
1747 } else if (!__CFCSetIsEmpty(theOtherSet) || __CFCSetIsInverted(theOtherSet)) {
1748 return FALSE;
1749 }
1750 } else if (__CFCSetIsEmpty(theOtherSet) && !__CFCSetIsInverted(theOtherSet)) {
1751 return TRUE;
1752 } else {
1753 if (__CFCSetIsBuiltin(theSet) || __CFCSetIsBuiltin(theOtherSet)) {
1754 if (__CFCSetClassType(theSet) == __CFCSetClassType(theOtherSet) && __CFCSetBuiltinType(theSet) == __CFCSetBuiltinType(theOtherSet) && !__CFCSetIsInverted(theSet) && !__CFCSetIsInverted(theOtherSet)) return TRUE;
1755 } else if (__CFCSetIsRange(theSet) || __CFCSetIsRange(theOtherSet)) {
1756 if (__CFCSetClassType(theSet) == __CFCSetClassType(theOtherSet)) {
1757 if (__CFCSetIsInverted(theSet)) {
1758 if (__CFCSetIsInverted(theOtherSet)) {
1759 return (__CFCSetRangeFirstChar(theOtherSet) > __CFCSetRangeFirstChar(theSet) || (__CFCSetRangeFirstChar(theSet) + __CFCSetRangeLength(theSet)) > (__CFCSetRangeFirstChar(theOtherSet) + __CFCSetRangeLength(theOtherSet)) ? FALSE : TRUE);
1760 } else {
1761 return ((__CFCSetRangeFirstChar(theOtherSet) + __CFCSetRangeLength(theOtherSet)) <= __CFCSetRangeFirstChar(theSet) || (__CFCSetRangeFirstChar(theSet) + __CFCSetRangeLength(theSet)) <= __CFCSetRangeFirstChar(theOtherSet) ? TRUE : FALSE);
1762 }
1763 } else {
1764 if (__CFCSetIsInverted(theOtherSet)) {
1765 return ((__CFCSetRangeFirstChar(theSet) == 0 && __CFCSetRangeLength(theSet) == 0x110000) || (__CFCSetRangeFirstChar(theOtherSet) == 0 && (UInt32)__CFCSetRangeLength(theOtherSet) <= __CFCSetRangeFirstChar(theSet)) || ((__CFCSetRangeFirstChar(theSet) + __CFCSetRangeLength(theSet)) <= __CFCSetRangeFirstChar(theOtherSet) && (__CFCSetRangeFirstChar(theOtherSet) + __CFCSetRangeLength(theOtherSet)) == 0x110000) ? TRUE : FALSE);
1766 } else {
1767 return (__CFCSetRangeFirstChar(theOtherSet) < __CFCSetRangeFirstChar(theSet) || (__CFCSetRangeFirstChar(theSet) + __CFCSetRangeLength(theSet)) < (__CFCSetRangeFirstChar(theOtherSet) + __CFCSetRangeLength(theOtherSet)) ? FALSE : TRUE);
1768 }
1769 }
1770 }
1771 } else {
1772 UInt32 theSetAnnexMask = __CFCSetAnnexValidEntriesBitmap(theSet);
1773 UInt32 theOtherSetAnnexMask = __CFCSetAnnexValidEntriesBitmap(theOtherSet);
1774 Boolean isTheSetAnnexInverted = __CFCSetAnnexIsInverted(theSet);
1775 Boolean isTheOtherSetAnnexInverted = __CFCSetAnnexIsInverted(theOtherSet);
1776 uint8_t theSetBuffer[__kCFBitmapSize];
1777 uint8_t theOtherSetBuffer[__kCFBitmapSize];
1778
1779 // We mask plane 1 to plane 16
1780 if (isTheSetAnnexInverted) theSetAnnexMask = (~theSetAnnexMask) & (0xFFFF << 1);
1781 if (isTheOtherSetAnnexInverted) theOtherSetAnnexMask = (~theOtherSetAnnexMask) & (0xFFFF << 1);
1782
1783 __CFCSetGetBitmap(theSet, theSetBuffer);
1784 __CFCSetGetBitmap(theOtherSet, theOtherSetBuffer);
1785
1786 if (!__CFCSetIsBitmapSupersetOfBitmap((const UInt32 *)theSetBuffer, (const UInt32 *)theOtherSetBuffer, FALSE, FALSE)) return FALSE;
1787
1788 if (theOtherSetAnnexMask) {
1789 CFCharacterSetRef theSetAnnex;
1790 CFCharacterSetRef theOtherSetAnnex;
1791 uint32_t idx;
1792
1793 if ((theSetAnnexMask & theOtherSetAnnexMask) != theOtherSetAnnexMask) return FALSE;
1794
1795 for (idx = 1;idx <= 16;idx++) {
1796 theSetAnnex = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, idx);
1797 if (NULL == theSetAnnex) continue; // This case is already handled by the mask above
1798
1799 theOtherSetAnnex = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theOtherSet, idx);
1800
1801 if (NULL == theOtherSetAnnex) {
1802 if (isTheOtherSetAnnexInverted) {
1803 __CFCSetGetBitmap(theSetAnnex, theSetBuffer);
1804 if (!__CFCSetIsEqualBitmap((const UInt32 *)theSetBuffer, (isTheSetAnnexInverted ? NULL : (const UInt32 *)-1))) return FALSE;
1805 }
1806 } else {
1807 __CFCSetGetBitmap(theSetAnnex, theSetBuffer);
1808 __CFCSetGetBitmap(theOtherSetAnnex, theOtherSetBuffer);
1809 if (!__CFCSetIsBitmapSupersetOfBitmap((const UInt32 *)theSetBuffer, (const UInt32 *)theOtherSetBuffer, isTheSetAnnexInverted, isTheOtherSetAnnexInverted)) return FALSE;
1810 }
1811 }
1812 }
1813
1814 return TRUE;
1815 }
1816 }
1817 }
1818
1819 copy = CFCharacterSetCreateMutableCopy(kCFAllocatorSystemDefault, theSet);
1820 CFCharacterSetIntersect(copy, theOtherSet);
1821 result = __CFCharacterSetEqual(copy, theOtherSet);
1822 CFRelease(copy);
1823
1824 return result;
1825 }
1826
1827 Boolean CFCharacterSetHasMemberInPlane(CFCharacterSetRef theSet, CFIndex thePlane) {
1828 Boolean isInverted = __CFCSetIsInverted(theSet);
1829
1830 CF_OBJC_FUNCDISPATCHV(__kCFCharacterSetTypeID, Boolean, (NSCharacterSet *)theSet, hasMemberInPlane:(uint8_t)thePlane);
1831
1832 if (__CFCSetIsEmpty(theSet)) {
1833 return (isInverted ? TRUE : FALSE);
1834 } else if (__CFCSetIsBuiltin(theSet)) {
1835 CFCharacterSetPredefinedSet type = __CFCSetBuiltinType(theSet);
1836
1837 if (type == kCFCharacterSetControl) {
1838 if (isInverted || (thePlane == 14)) {
1839 return TRUE; // There is no plane that covers all values || Plane 14 has language tags
1840 } else {
1841 return (CFUniCharGetBitmapPtrForPlane(type, thePlane) ? TRUE : FALSE);
1842 }
1843 } else if ((type < kCFCharacterSetDecimalDigit) || (type == kCFCharacterSetNewline)) {
1844 return (thePlane && !isInverted ? FALSE : TRUE);
1845 } else if (__CFCSetBuiltinType(theSet) == kCFCharacterSetIllegal) {
1846 return (isInverted ? (thePlane < 3 || thePlane > 13 ? TRUE : FALSE) : TRUE); // This is according to Unicode 3.1
1847 } else {
1848 if (isInverted) {
1849 return TRUE; // There is no plane that covers all values
1850 } else {
1851 return (CFUniCharGetBitmapPtrForPlane(type, thePlane) ? TRUE : FALSE);
1852 }
1853 }
1854 } else if (__CFCSetIsRange(theSet)) {
1855 UTF32Char firstChar = __CFCSetRangeFirstChar(theSet);
1856 UTF32Char lastChar = (firstChar + __CFCSetRangeLength(theSet) - 1);
1857 CFIndex firstPlane = firstChar >> 16;
1858 CFIndex lastPlane = lastChar >> 16;
1859
1860 if (isInverted) {
1861 if (thePlane < firstPlane || thePlane > lastPlane) {
1862 return TRUE;
1863 } else if (thePlane > firstPlane && thePlane < lastPlane) {
1864 return FALSE;
1865 } else {
1866 firstChar &= 0xFFFF;
1867 lastChar &= 0xFFFF;
1868 if (thePlane == firstPlane) {
1869 return (firstChar || (firstPlane == lastPlane && lastChar != 0xFFFF) ? TRUE : FALSE);
1870 } else {
1871 return (lastChar != 0xFFFF || (firstPlane == lastPlane && firstChar) ? TRUE : FALSE);
1872 }
1873 }
1874 } else {
1875 return (thePlane < firstPlane || thePlane > lastPlane ? FALSE : TRUE);
1876 }
1877 } else {
1878 if (thePlane == 0) {
1879 switch (__CFCSetClassType(theSet)) {
1880 case __kCFCharSetClassString: if (!__CFCSetStringLength(theSet)) return isInverted; break;
1881 case __kCFCharSetClassCompactBitmap: return (__CFCSetCompactBitmapBits(theSet) ? TRUE : FALSE); break;
1882 case __kCFCharSetClassBitmap: return (__CFCSetBitmapBits(theSet) ? TRUE : FALSE); break;
1883 }
1884 return TRUE;
1885 } else {
1886 CFCharacterSetRef annex = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, thePlane);
1887 if (annex) {
1888 if (__CFCSetIsRange(annex)) {
1889 return (__CFCSetAnnexIsInverted(theSet) && (__CFCSetRangeFirstChar(annex) == 0) && (__CFCSetRangeLength(annex) == 0x10000) ? FALSE : TRUE);
1890 } else if (__CFCSetIsBitmap(annex)) {
1891 return (__CFCSetAnnexIsInverted(theSet) && __CFCSetIsEqualBitmap((const UInt32 *)__CFCSetBitmapBits(annex), (const UInt32 *)-1) ? FALSE : TRUE);
1892 } else {
1893 uint8_t bitsBuf[__kCFBitmapSize];
1894 __CFCSetGetBitmap(annex, bitsBuf);
1895 return (__CFCSetAnnexIsInverted(theSet) && __CFCSetIsEqualBitmap((const UInt32 *)bitsBuf, (const UInt32 *)-1) ? FALSE : TRUE);
1896 }
1897 } else {
1898 return __CFCSetAnnexIsInverted(theSet);
1899 }
1900 }
1901 }
1902
1903 return FALSE;
1904 }
1905
1906
1907 CFDataRef CFCharacterSetCreateBitmapRepresentation(CFAllocatorRef alloc, CFCharacterSetRef theSet) {
1908 CFMutableDataRef data;
1909 int numNonBMPPlanes = 0;
1910 int planeIndices[MAX_ANNEX_PLANE];
1911 int idx;
1912 int length;
1913 bool isAnnexInverted;
1914
1915 CF_OBJC_FUNCDISPATCHV(__kCFCharacterSetTypeID, CFDataRef , (NSCharacterSet *)theSet, _retainedBitmapRepresentation);
1916
1917 __CFGenericValidateType(theSet, __kCFCharacterSetTypeID);
1918
1919 isAnnexInverted = (__CFCSetAnnexIsInverted(theSet) != 0);
1920
1921 if (__CFCSetHasNonBMPPlane(theSet)) {
1922 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
1923 if (isAnnexInverted || __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, idx)) {
1924 planeIndices[numNonBMPPlanes++] = idx;
1925 }
1926 }
1927 } else if (__CFCSetIsBuiltin(theSet)) {
1928 numNonBMPPlanes = (__CFCSetIsInverted(theSet) ? MAX_ANNEX_PLANE : CFUniCharGetNumberOfPlanes(__CFCSetBuiltinType(theSet)) - 1);
1929 } else if (__CFCSetIsRange(theSet)) {
1930 UInt32 firstChar = __CFCSetRangeFirstChar(theSet);
1931 UInt32 lastChar = __CFCSetRangeFirstChar(theSet) + __CFCSetRangeLength(theSet) - 1;
1932 int firstPlane = (firstChar >> 16);
1933 int lastPlane = (lastChar >> 16);
1934 bool isInverted = (__CFCSetIsInverted(theSet) != 0);
1935
1936 if (lastPlane > 0) {
1937 if (firstPlane == 0) {
1938 firstPlane = 1;
1939 firstChar = 0x10000;
1940 }
1941 numNonBMPPlanes = (lastPlane - firstPlane) + 1;
1942 if (isInverted) {
1943 numNonBMPPlanes = MAX_ANNEX_PLANE - numNonBMPPlanes;
1944 if (firstPlane == lastPlane) {
1945 if (((firstChar & 0xFFFF) > 0) || ((lastChar & 0xFFFF) < 0xFFFF)) ++numNonBMPPlanes;
1946 } else {
1947 if ((firstChar & 0xFFFF) > 0) ++numNonBMPPlanes;
1948 if ((lastChar & 0xFFFF) < 0xFFFF) ++numNonBMPPlanes;
1949 }
1950 }
1951 } else if (isInverted) {
1952 numNonBMPPlanes = MAX_ANNEX_PLANE;
1953 }
1954 } else if (isAnnexInverted) {
1955 numNonBMPPlanes = MAX_ANNEX_PLANE;
1956 }
1957
1958 length = __kCFBitmapSize + ((__kCFBitmapSize + 1) * numNonBMPPlanes);
1959 data = CFDataCreateMutable(alloc, length);
1960 CFDataSetLength(data, length);
1961 __CFCSetGetBitmap(theSet, CFDataGetMutableBytePtr(data));
1962
1963 if (numNonBMPPlanes > 0) {
1964 uint8_t *bytes = CFDataGetMutableBytePtr(data) + __kCFBitmapSize;
1965
1966 if (__CFCSetHasNonBMPPlane(theSet)) {
1967 CFCharacterSetRef subset;
1968
1969 for (idx = 0;idx < numNonBMPPlanes;idx++) {
1970 *(bytes++) = planeIndices[idx];
1971 if ((subset = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, planeIndices[idx])) == NULL) {
1972 __CFCSetBitmapFastFillWithValue((UInt32 *)bytes, (isAnnexInverted ? 0xFF : 0));
1973 } else {
1974 __CFCSetGetBitmap(subset, bytes);
1975 if (isAnnexInverted) {
1976 uint32_t count = __kCFBitmapSize / sizeof(uint32_t);
1977 uint32_t *bits = (uint32_t *)bytes;
1978
1979 while (count-- > 0) {
1980 *bits = ~(*bits);
1981 ++bits;
1982 }
1983 }
1984 }
1985 bytes += __kCFBitmapSize;
1986 }
1987 } else if (__CFCSetIsBuiltin(theSet)) {
1988 UInt8 result;
1989 CFIndex delta;
1990 Boolean isInverted = __CFCSetIsInverted(theSet);
1991
1992 for (idx = 0;idx < numNonBMPPlanes;idx++) {
1993 if ((result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(theSet), idx + 1, bytes + 1, (isInverted != 0))) == kCFUniCharBitmapEmpty) continue;
1994 *(bytes++) = idx + 1;
1995 if (result == kCFUniCharBitmapAll) {
1996 CFIndex bitmapLength = __kCFBitmapSize;
1997 while (bitmapLength-- > 0) *(bytes++) = (uint8_t)0xFF;
1998 } else {
1999 bytes += __kCFBitmapSize;
2000 }
2001 }
2002 delta = bytes - (const uint8_t *)CFDataGetBytePtr(data);
2003 if (delta < length) CFDataSetLength(data, delta);
2004 } else if (__CFCSetIsRange(theSet)) {
2005 UInt32 firstChar = __CFCSetRangeFirstChar(theSet);
2006 UInt32 lastChar = __CFCSetRangeFirstChar(theSet) + __CFCSetRangeLength(theSet) - 1;
2007 int firstPlane = (firstChar >> 16);
2008 int lastPlane = (lastChar >> 16);
2009
2010 if (firstPlane == 0) {
2011 firstPlane = 1;
2012 firstChar = 0x10000;
2013 }
2014 if (__CFCSetIsInverted(theSet)) {
2015 // Mask out the plane byte
2016 firstChar &= 0xFFFF;
2017 lastChar &= 0xFFFF;
2018
2019 for (idx = 1;idx < firstPlane;idx++) { // Fill up until the first plane
2020 *(bytes++) = idx;
2021 __CFCSetBitmapFastFillWithValue((UInt32 *)bytes, 0xFF);
2022 bytes += __kCFBitmapSize;
2023 }
2024 if (firstPlane == lastPlane) {
2025 if ((firstChar > 0) || (lastChar < 0xFFFF)) {
2026 *(bytes++) = idx;
2027 __CFCSetBitmapFastFillWithValue((UInt32 *)bytes, 0xFF);
2028 __CFCSetBitmapRemoveCharactersInRange(bytes, firstChar, lastChar);
2029 bytes += __kCFBitmapSize;
2030 }
2031 } else if (firstPlane < lastPlane) {
2032 if (firstChar > 0) {
2033 *(bytes++) = idx;
2034 __CFCSetBitmapFastFillWithValue((UInt32 *)bytes, 0);
2035 __CFCSetBitmapAddCharactersInRange(bytes, 0, firstChar - 1);
2036 bytes += __kCFBitmapSize;
2037 }
2038 if (lastChar < 0xFFFF) {
2039 *(bytes++) = idx;
2040 __CFCSetBitmapFastFillWithValue((UInt32 *)bytes, 0);
2041 __CFCSetBitmapAddCharactersInRange(bytes, lastChar, 0xFFFF);
2042 bytes += __kCFBitmapSize;
2043 }
2044 }
2045 for (idx = lastPlane + 1;idx <= MAX_ANNEX_PLANE;idx++) {
2046 *(bytes++) = idx;
2047 __CFCSetBitmapFastFillWithValue((UInt32 *)bytes, 0xFF);
2048 bytes += __kCFBitmapSize;
2049 }
2050 } else {
2051 for (idx = firstPlane;idx <= lastPlane;idx++) {
2052 *(bytes++) = idx;
2053 __CFCSetBitmapAddCharactersInRange(bytes, (idx == firstPlane ? firstChar : 0), (idx == lastPlane ? lastChar : 0xFFFF));
2054 bytes += __kCFBitmapSize;
2055 }
2056 }
2057 } else if (isAnnexInverted) {
2058 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
2059 *(bytes++) = idx;
2060 __CFCSetBitmapFastFillWithValue((UInt32 *)bytes, 0xFF);
2061 bytes += __kCFBitmapSize;
2062 }
2063 }
2064 }
2065
2066 return data;
2067 }
2068
2069 /*** MutableCharacterSet functions ***/
2070 void CFCharacterSetAddCharactersInRange(CFMutableCharacterSetRef theSet, CFRange theRange) {
2071 CF_OBJC_FUNCDISPATCHV(__kCFCharacterSetTypeID, void, (NSMutableCharacterSet *)theSet, addCharactersInRange:NSMakeRange(theRange.location, theRange.length));
2072
2073 __CFCSetValidateTypeAndMutability(theSet, __PRETTY_FUNCTION__);
2074 __CFCSetValidateRange(theRange, __PRETTY_FUNCTION__);
2075
2076 if (__CFCSetIsBuiltin((CFCharacterSetRef)theSet) && !__CFCSetIsMutable((CFCharacterSetRef)theSet) && !__CFCSetIsInverted((CFCharacterSetRef)theSet)) {
2077 CFCharacterSetRef sharedSet = CFCharacterSetGetPredefined(__CFCSetBuiltinType((CFCharacterSetRef)theSet));
2078 if (sharedSet == theSet) { // We're trying to dealloc the builtin set
2079 CFAssert1(0, __kCFLogAssertion, "%s: Trying to mutable predefined set.", __PRETTY_FUNCTION__);
2080 return; // We don't mutate builtin set
2081 }
2082 }
2083
2084 if (!theRange.length || (__CFCSetIsInverted(theSet) && __CFCSetIsEmpty(theSet))) return; // Inverted && empty set contains all char
2085
2086 if (!__CFCSetIsInverted(theSet)) {
2087 if (__CFCSetIsEmpty(theSet)) {
2088 __CFCSetPutClassType(theSet, __kCFCharSetClassRange);
2089 __CFCSetPutRangeFirstChar(theSet, theRange.location);
2090 __CFCSetPutRangeLength(theSet, theRange.length);
2091 __CFCSetPutHasHashValue(theSet, false);
2092 return;
2093 } else if (__CFCSetIsRange(theSet)) {
2094 CFIndex firstChar = __CFCSetRangeFirstChar(theSet);
2095 CFIndex length = __CFCSetRangeLength(theSet);
2096
2097 if (firstChar == theRange.location) {
2098 __CFCSetPutRangeLength(theSet, __CFMax(length, theRange.length));
2099 __CFCSetPutHasHashValue(theSet, false);
2100 return;
2101 } else if (firstChar < theRange.location && theRange.location <= firstChar + length) {
2102 if (firstChar + length < theRange.location + theRange.length) __CFCSetPutRangeLength(theSet, theRange.length + (theRange.location - firstChar));
2103 __CFCSetPutHasHashValue(theSet, false);
2104 return;
2105 } else if (theRange.location < firstChar && firstChar <= theRange.location + theRange.length) {
2106 __CFCSetPutRangeFirstChar(theSet, theRange.location);
2107 __CFCSetPutRangeLength(theSet, length + (firstChar - theRange.location));
2108 __CFCSetPutHasHashValue(theSet, false);
2109 return;
2110 }
2111 } else if (__CFCSetIsString(theSet) && __CFCSetStringLength(theSet) + theRange.length < __kCFStringCharSetMax) {
2112 UniChar *buffer;
2113 if (!__CFCSetStringBuffer(theSet))
2114 __CFCSetPutStringBuffer(theSet, (UniChar *)CFAllocatorAllocate(CFGetAllocator(theSet), __kCFStringCharSetMax * sizeof(UniChar), 0));
2115 buffer = __CFCSetStringBuffer(theSet) + __CFCSetStringLength(theSet);
2116 __CFCSetPutStringLength(theSet, __CFCSetStringLength(theSet) + theRange.length);
2117 while (theRange.length--) *buffer++ = (UniChar)theRange.location++;
2118 qsort(__CFCSetStringBuffer(theSet), __CFCSetStringLength(theSet), sizeof(UniChar), chcompar);
2119 __CFCSetPutHasHashValue(theSet, false);
2120 return;
2121 }
2122 }
2123
2124 // OK, I have to be a bitmap
2125 __CFCSetMakeBitmap(theSet);
2126 __CFCSetAddNonBMPPlanesInRange(theSet, theRange);
2127 if (theRange.location < 0x10000) { // theRange is in BMP
2128 if (theRange.location + theRange.length >= NUMCHARACTERS) theRange.length = NUMCHARACTERS - theRange.location;
2129 __CFCSetBitmapAddCharactersInRange(__CFCSetBitmapBits(theSet), (UniChar)theRange.location, (UniChar)(theRange.location + theRange.length - 1));
2130 }
2131 __CFCSetPutHasHashValue(theSet, false);
2132
2133 if (__CFCheckForExapendedSet) __CFCheckForExpandedSet(theSet);
2134 }
2135
2136 void CFCharacterSetRemoveCharactersInRange(CFMutableCharacterSetRef theSet, CFRange theRange) {
2137 CF_OBJC_FUNCDISPATCHV(__kCFCharacterSetTypeID, void, (NSMutableCharacterSet *)theSet, removeCharactersInRange:NSMakeRange(theRange.location, theRange.length));
2138
2139 __CFCSetValidateTypeAndMutability(theSet, __PRETTY_FUNCTION__);
2140 __CFCSetValidateRange(theRange, __PRETTY_FUNCTION__);
2141
2142 if (__CFCSetIsBuiltin((CFCharacterSetRef)theSet) && !__CFCSetIsMutable((CFCharacterSetRef)theSet) && !__CFCSetIsInverted((CFCharacterSetRef)theSet)) {
2143 CFCharacterSetRef sharedSet = CFCharacterSetGetPredefined(__CFCSetBuiltinType((CFCharacterSetRef)theSet));
2144 if (sharedSet == theSet) { // We're trying to dealloc the builtin set
2145 CFAssert1(0, __kCFLogAssertion, "%s: Trying to mutable predefined set.", __PRETTY_FUNCTION__);
2146 return; // We don't mutate builtin set
2147 }
2148 }
2149
2150 if (!theRange.length || (!__CFCSetIsInverted(theSet) && __CFCSetIsEmpty(theSet))) return; // empty set
2151
2152 if (__CFCSetIsInverted(theSet)) {
2153 if (__CFCSetIsEmpty(theSet)) {
2154 __CFCSetPutClassType(theSet, __kCFCharSetClassRange);
2155 __CFCSetPutRangeFirstChar(theSet, theRange.location);
2156 __CFCSetPutRangeLength(theSet, theRange.length);
2157 __CFCSetPutHasHashValue(theSet, false);
2158 return;
2159 } else if (__CFCSetIsRange(theSet)) {
2160 CFIndex firstChar = __CFCSetRangeFirstChar(theSet);
2161 CFIndex length = __CFCSetRangeLength(theSet);
2162
2163 if (firstChar == theRange.location) {
2164 __CFCSetPutRangeLength(theSet, __CFMin(length, theRange.length));
2165 __CFCSetPutHasHashValue(theSet, false);
2166 return;
2167 } else if (firstChar < theRange.location && theRange.location <= firstChar + length) {
2168 if (firstChar + length < theRange.location + theRange.length) __CFCSetPutRangeLength(theSet, theRange.length + (theRange.location - firstChar));
2169 __CFCSetPutHasHashValue(theSet, false);
2170 return;
2171 } else if (theRange.location < firstChar && firstChar <= theRange.location + theRange.length) {
2172 __CFCSetPutRangeFirstChar(theSet, theRange.location);
2173 __CFCSetPutRangeLength(theSet, length + (firstChar - theRange.location));
2174 __CFCSetPutHasHashValue(theSet, false);
2175 return;
2176 }
2177 } else if (__CFCSetIsString(theSet) && __CFCSetStringLength(theSet) + theRange.length < __kCFStringCharSetMax) {
2178 UniChar *buffer;
2179 if (!__CFCSetStringBuffer(theSet))
2180 __CFCSetPutStringBuffer(theSet, (UniChar *)CFAllocatorAllocate(CFGetAllocator(theSet), __kCFStringCharSetMax * sizeof(UniChar), 0));
2181 buffer = __CFCSetStringBuffer(theSet) + __CFCSetStringLength(theSet);
2182 __CFCSetPutStringLength(theSet, __CFCSetStringLength(theSet) + theRange.length);
2183 while (theRange.length--) *buffer++ = (UniChar)theRange.location++;
2184 qsort(__CFCSetStringBuffer(theSet), __CFCSetStringLength(theSet), sizeof(UniChar), chcompar);
2185 __CFCSetPutHasHashValue(theSet, false);
2186 return;
2187 }
2188 }
2189
2190 // OK, I have to be a bitmap
2191 __CFCSetMakeBitmap(theSet);
2192 __CFCSetRemoveNonBMPPlanesInRange(theSet, theRange);
2193 if (theRange.location < 0x10000) { // theRange is in BMP
2194 if (theRange.location + theRange.length > NUMCHARACTERS) theRange.length = NUMCHARACTERS - theRange.location;
2195 if (theRange.location == 0 && theRange.length == NUMCHARACTERS) { // Remove all
2196 CFAllocatorDeallocate(CFGetAllocator(theSet), __CFCSetBitmapBits(theSet));
2197 __CFCSetPutBitmapBits(theSet, NULL);
2198 } else {
2199 __CFCSetBitmapRemoveCharactersInRange(__CFCSetBitmapBits(theSet), (UniChar)theRange.location, (UniChar)(theRange.location + theRange.length - 1));
2200 }
2201 }
2202
2203 __CFCSetPutHasHashValue(theSet, false);
2204 if (__CFCheckForExapendedSet) __CFCheckForExpandedSet(theSet);
2205 }
2206
2207 void CFCharacterSetAddCharactersInString(CFMutableCharacterSetRef theSet, CFStringRef theString) {
2208 UniChar *buffer;
2209 CFIndex length;
2210 BOOL hasSurrogate = NO;
2211
2212 CF_OBJC_FUNCDISPATCHV(__kCFCharacterSetTypeID, void, (NSMutableCharacterSet *)theSet, addCharactersInString:(NSString *)theString);
2213
2214 __CFCSetValidateTypeAndMutability(theSet, __PRETTY_FUNCTION__);
2215
2216 if (__CFCSetIsBuiltin((CFCharacterSetRef)theSet) && !__CFCSetIsMutable((CFCharacterSetRef)theSet) && !__CFCSetIsInverted((CFCharacterSetRef)theSet)) {
2217 CFCharacterSetRef sharedSet = CFCharacterSetGetPredefined(__CFCSetBuiltinType((CFCharacterSetRef)theSet));
2218 if (sharedSet == theSet) { // We're trying to dealloc the builtin set
2219 CFAssert1(0, __kCFLogAssertion, "%s: Trying to mutable predefined set.", __PRETTY_FUNCTION__);
2220 return; // We don't mutate builtin set
2221 }
2222 }
2223
2224 if ((__CFCSetIsEmpty(theSet) && __CFCSetIsInverted(theSet)) || !(length = CFStringGetLength(theString))) return;
2225
2226 if (!__CFCSetIsInverted(theSet)) {
2227 CFIndex newLength = length + (__CFCSetIsEmpty(theSet) ? 0 : (__CFCSetIsString(theSet) ? __CFCSetStringLength(theSet) : __kCFStringCharSetMax));
2228
2229 if (newLength < __kCFStringCharSetMax) {
2230 buffer = __CFCSetStringBuffer(theSet);
2231
2232 if (NULL == buffer) {
2233 buffer = (UniChar *)CFAllocatorAllocate(CFGetAllocator(theSet), __kCFStringCharSetMax * sizeof(UniChar), 0);
2234 } else {
2235 buffer += __CFCSetStringLength(theSet);
2236 }
2237
2238 CFStringGetCharacters(theString, CFRangeMake(0, length), (UniChar*)buffer);
2239
2240 if (length > 1) {
2241 UTF16Char *characters = buffer;
2242 const UTF16Char *charactersLimit = characters + length;
2243
2244 while (characters < charactersLimit) {
2245 if (CFStringIsSurrogateHighCharacter(*characters) || CFStringIsSurrogateLowCharacter(*characters)) {
2246 memmove(characters, characters + 1, (charactersLimit - (characters + 1)) * sizeof(*characters));
2247 --charactersLimit;
2248 hasSurrogate = YES;
2249 } else {
2250 ++characters;
2251 }
2252 }
2253
2254 newLength -= (length - (charactersLimit - buffer));
2255 }
2256
2257 if (0 == newLength) {
2258 if (NULL == __CFCSetStringBuffer(theSet)) CFAllocatorDeallocate(CFGetAllocator(theSet), buffer);
2259 } else {
2260 if (NULL == __CFCSetStringBuffer(theSet)) {
2261 __CFCSetPutClassType(theSet, __kCFCharSetClassString);
2262 __CFCSetPutStringBuffer(theSet, buffer);
2263 }
2264 __CFCSetPutStringLength(theSet, newLength);
2265 qsort(__CFCSetStringBuffer(theSet), newLength, sizeof(UniChar), chcompar);
2266 }
2267 __CFCSetPutHasHashValue(theSet, false);
2268
2269 if (hasSurrogate) __CFApplySurrogatesInString(theSet, theString, &CFCharacterSetAddCharactersInRange);
2270
2271 return;
2272 }
2273 }
2274
2275 // OK, I have to be a bitmap
2276 __CFCSetMakeBitmap(theSet);
2277 CFStringInlineBuffer inlineBuffer;
2278 CFIndex idx;
2279
2280 CFStringInitInlineBuffer(theString, &inlineBuffer, CFRangeMake(0, length));
2281
2282 for (idx = 0;idx < length;idx++) {
2283 UTF16Char character = __CFStringGetCharacterFromInlineBufferQuick(&inlineBuffer, idx);
2284
2285 if (CFStringIsSurrogateHighCharacter(character) || CFStringIsSurrogateLowCharacter(character)) {
2286 hasSurrogate = YES;
2287 } else {
2288 __CFCSetBitmapAddCharacter(__CFCSetBitmapBits(theSet), character);
2289 }
2290 }
2291
2292 __CFCSetPutHasHashValue(theSet, false);
2293
2294 if (__CFCheckForExapendedSet) __CFCheckForExpandedSet(theSet);
2295
2296 if (hasSurrogate) __CFApplySurrogatesInString(theSet, theString, &CFCharacterSetAddCharactersInRange);
2297 }
2298
2299 void CFCharacterSetRemoveCharactersInString(CFMutableCharacterSetRef theSet, CFStringRef theString) {
2300 UniChar *buffer;
2301 CFIndex length;
2302 BOOL hasSurrogate = NO;
2303
2304 CF_OBJC_FUNCDISPATCHV(__kCFCharacterSetTypeID, void, (NSMutableCharacterSet *)theSet, removeCharactersInString:(NSString *)theString);
2305
2306 __CFCSetValidateTypeAndMutability(theSet, __PRETTY_FUNCTION__);
2307
2308 if (__CFCSetIsBuiltin((CFCharacterSetRef)theSet) && !__CFCSetIsMutable((CFCharacterSetRef)theSet) && !__CFCSetIsInverted((CFCharacterSetRef)theSet)) {
2309 CFCharacterSetRef sharedSet = CFCharacterSetGetPredefined(__CFCSetBuiltinType((CFCharacterSetRef)theSet));
2310 if (sharedSet == theSet) { // We're trying to dealloc the builtin set
2311 CFAssert1(0, __kCFLogAssertion, "%s: Trying to mutable predefined set.", __PRETTY_FUNCTION__);
2312 return; // We don't mutate builtin set
2313 }
2314 }
2315
2316 if ((__CFCSetIsEmpty(theSet) && !__CFCSetIsInverted(theSet)) || !(length = CFStringGetLength(theString))) return;
2317
2318 if (__CFCSetIsInverted(theSet)) {
2319 CFIndex newLength = length + (__CFCSetIsEmpty(theSet) ? 0 : (__CFCSetIsString(theSet) ? __CFCSetStringLength(theSet) : __kCFStringCharSetMax));
2320
2321 if (newLength < __kCFStringCharSetMax) {
2322 buffer = __CFCSetStringBuffer(theSet);
2323
2324 if (NULL == buffer) {
2325 buffer = (UniChar *)CFAllocatorAllocate(CFGetAllocator(theSet), __kCFStringCharSetMax * sizeof(UniChar), 0);
2326 } else {
2327 buffer += __CFCSetStringLength(theSet);
2328 }
2329
2330 CFStringGetCharacters(theString, CFRangeMake(0, length), (UniChar*)buffer);
2331
2332 if (length > 1) {
2333 UTF16Char *characters = buffer;
2334 const UTF16Char *charactersLimit = characters + length;
2335
2336 while (characters < charactersLimit) {
2337 if (CFStringIsSurrogateHighCharacter(*characters) || CFStringIsSurrogateLowCharacter(*characters)) {
2338 memmove(characters, characters + 1, charactersLimit - (characters + 1));
2339 --charactersLimit;
2340 hasSurrogate = YES;
2341 }
2342 ++characters;
2343 }
2344
2345 newLength -= (length - (charactersLimit - buffer));
2346 }
2347
2348 if (NULL == __CFCSetStringBuffer(theSet)) {
2349 __CFCSetPutClassType(theSet, __kCFCharSetClassString);
2350 __CFCSetPutStringBuffer(theSet, buffer);
2351 }
2352 __CFCSetPutStringLength(theSet, newLength);
2353 qsort(__CFCSetStringBuffer(theSet), newLength, sizeof(UniChar), chcompar);
2354 __CFCSetPutHasHashValue(theSet, false);
2355
2356 if (hasSurrogate) __CFApplySurrogatesInString(theSet, theString, &CFCharacterSetRemoveCharactersInRange);
2357
2358 return;
2359 }
2360 }
2361
2362 // OK, I have to be a bitmap
2363 __CFCSetMakeBitmap(theSet);
2364 CFStringInlineBuffer inlineBuffer;
2365 CFIndex idx;
2366
2367 CFStringInitInlineBuffer(theString, &inlineBuffer, CFRangeMake(0, length));
2368
2369 for (idx = 0;idx < length;idx++) {
2370 UTF16Char character = __CFStringGetCharacterFromInlineBufferQuick(&inlineBuffer, idx);
2371
2372 if (CFStringIsSurrogateHighCharacter(character) || CFStringIsSurrogateLowCharacter(character)) {
2373 hasSurrogate = YES;
2374 } else {
2375 __CFCSetBitmapRemoveCharacter(__CFCSetBitmapBits(theSet), character);
2376 }
2377 }
2378
2379 __CFCSetPutHasHashValue(theSet, false);
2380 if (__CFCheckForExapendedSet) __CFCheckForExpandedSet(theSet);
2381
2382 if (hasSurrogate) __CFApplySurrogatesInString(theSet, theString, &CFCharacterSetRemoveCharactersInRange);
2383 }
2384
2385 void CFCharacterSetUnion(CFMutableCharacterSetRef theSet, CFCharacterSetRef theOtherSet) {
2386 CFCharacterSetRef expandedSet = NULL;
2387
2388 CF_OBJC_FUNCDISPATCHV(__kCFCharacterSetTypeID, void, (NSMutableCharacterSet *)theSet, formUnionWithCharacterSet:(NSCharacterSet *)theOtherSet);
2389
2390 __CFCSetValidateTypeAndMutability(theSet, __PRETTY_FUNCTION__);
2391
2392 if (__CFCSetIsBuiltin((CFCharacterSetRef)theSet) && !__CFCSetIsMutable((CFCharacterSetRef)theSet) && !__CFCSetIsInverted((CFCharacterSetRef)theSet)) {
2393 CFCharacterSetRef sharedSet = CFCharacterSetGetPredefined(__CFCSetBuiltinType((CFCharacterSetRef)theSet));
2394 if (sharedSet == theSet) { // We're trying to dealloc the builtin set
2395 CFAssert1(0, __kCFLogAssertion, "%s: Trying to mutable predefined set.", __PRETTY_FUNCTION__);
2396 return; // We don't mutate builtin set
2397 }
2398 }
2399
2400 if (__CFCSetIsEmpty(theSet) && __CFCSetIsInverted(theSet)) return; // Inverted empty set contains all char
2401
2402 if (!CF_IS_OBJC(__kCFCharacterSetTypeID, theOtherSet) || (expandedSet = __CFCharacterSetGetExpandedSetForNSCharacterSet(theOtherSet))) { // Really CF, we can do some trick here
2403 if (expandedSet) theOtherSet = expandedSet;
2404
2405 if (__CFCSetIsEmpty(theOtherSet)) {
2406 if (__CFCSetIsInverted(theOtherSet)) {
2407 if (__CFCSetIsString(theSet) && __CFCSetStringBuffer(theSet)) {
2408 CFAllocatorDeallocate(CFGetAllocator(theSet), __CFCSetStringBuffer(theSet));
2409 } else if (__CFCSetIsBitmap(theSet) && __CFCSetBitmapBits(theSet)) {
2410 CFAllocatorDeallocate(CFGetAllocator(theSet), __CFCSetBitmapBits(theSet));
2411 } else if (__CFCSetIsCompactBitmap(theSet) && __CFCSetCompactBitmapBits(theSet)) {
2412 CFAllocatorDeallocate(CFGetAllocator(theSet), __CFCSetCompactBitmapBits(theSet));
2413 }
2414 __CFCSetPutClassType(theSet, __kCFCharSetClassRange);
2415 __CFCSetPutRangeLength(theSet, 0);
2416 __CFCSetPutIsInverted(theSet, true);
2417 __CFCSetPutHasHashValue(theSet, false);
2418 __CFCSetDeallocateAnnexPlane(theSet);
2419 }
2420 } else if (__CFCSetIsBuiltin(theOtherSet) && __CFCSetIsEmpty(theSet)) { // theSet can be builtin set
2421 __CFCSetPutClassType(theSet, __kCFCharSetClassBuiltin);
2422 __CFCSetPutBuiltinType(theSet, __CFCSetBuiltinType(theOtherSet));
2423 if (__CFCSetIsInverted(theOtherSet)) __CFCSetPutIsInverted(theSet, true);
2424 if (__CFCSetAnnexIsInverted(theOtherSet)) __CFCSetAnnexSetIsInverted(theSet, true);
2425 __CFCSetPutHasHashValue(theSet, false);
2426 } else {
2427 if (__CFCSetIsRange(theOtherSet)) {
2428 if (__CFCSetIsInverted(theOtherSet)) {
2429 UTF32Char firstChar = __CFCSetRangeFirstChar(theOtherSet);
2430 CFIndex length = __CFCSetRangeLength(theOtherSet);
2431
2432 if (firstChar > 0) CFCharacterSetAddCharactersInRange(theSet, CFRangeMake(0, firstChar));
2433 firstChar += length;
2434 length = 0x110000 - firstChar;
2435 CFCharacterSetAddCharactersInRange(theSet, CFRangeMake(firstChar, length));
2436 } else {
2437 CFCharacterSetAddCharactersInRange(theSet, CFRangeMake(__CFCSetRangeFirstChar(theOtherSet), __CFCSetRangeLength(theOtherSet)));
2438 }
2439 } else if (__CFCSetIsString(theOtherSet)) {
2440 CFStringRef string = CFStringCreateWithCharactersNoCopy(CFGetAllocator(theSet), __CFCSetStringBuffer(theOtherSet), __CFCSetStringLength(theOtherSet), kCFAllocatorNull);
2441 CFCharacterSetAddCharactersInString(theSet, string);
2442 CFRelease(string);
2443 } else {
2444 __CFCSetMakeBitmap(theSet);
2445 if (__CFCSetIsBitmap(theOtherSet)) {
2446 UInt32 *bitmap1 = (UInt32*)__CFCSetBitmapBits(theSet);
2447 UInt32 *bitmap2 = (UInt32*)__CFCSetBitmapBits(theOtherSet);
2448 CFIndex length = __kCFBitmapSize / sizeof(UInt32);
2449 while (length--) *bitmap1++ |= *bitmap2++;
2450 } else {
2451 UInt32 *bitmap1 = (UInt32*)__CFCSetBitmapBits(theSet);
2452 UInt32 *bitmap2;
2453 CFIndex length = __kCFBitmapSize / sizeof(UInt32);
2454 uint8_t bitmapBuffer[__kCFBitmapSize];
2455 __CFCSetGetBitmap(theOtherSet, bitmapBuffer);
2456 bitmap2 = (UInt32*)bitmapBuffer;
2457 while (length--) *bitmap1++ |= *bitmap2++;
2458 }
2459 __CFCSetPutHasHashValue(theSet, false);
2460 }
2461 if (__CFCSetHasNonBMPPlane(theOtherSet)) {
2462 CFMutableCharacterSetRef otherSetPlane;
2463 int idx;
2464
2465 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
2466 if ((otherSetPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theOtherSet, idx))) {
2467 CFCharacterSetUnion((CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(theSet, idx), otherSetPlane);
2468 }
2469 }
2470 } else if (__CFCSetAnnexIsInverted(theOtherSet)) {
2471 if (__CFCSetHasNonBMPPlane(theSet)) __CFCSetDeallocateAnnexPlane(theSet);
2472 __CFCSetAnnexSetIsInverted(theSet, true);
2473 } else if (__CFCSetIsBuiltin(theOtherSet)) {
2474 CFMutableCharacterSetRef annexPlane;
2475 uint8_t bitmapBuffer[__kCFBitmapSize];
2476 uint8_t result;
2477 int planeIndex;
2478 Boolean isOtherAnnexPlaneInverted = __CFCSetAnnexIsInverted(theOtherSet);
2479 UInt32 *bitmap1;
2480 UInt32 *bitmap2;
2481 CFIndex length;
2482
2483 for (planeIndex = 1;planeIndex <= MAX_ANNEX_PLANE;planeIndex++) {
2484 result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(theOtherSet), planeIndex, bitmapBuffer, (isOtherAnnexPlaneInverted != 0));
2485 if (result != kCFUniCharBitmapEmpty) {
2486 annexPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(theSet, planeIndex);
2487 if (result == kCFUniCharBitmapAll) {
2488 CFCharacterSetAddCharactersInRange(annexPlane, CFRangeMake(0x0000, 0x10000));
2489 } else {
2490 __CFCSetMakeBitmap(annexPlane);
2491 bitmap1 = (UInt32 *)__CFCSetBitmapBits(annexPlane);
2492 length = __kCFBitmapSize / sizeof(UInt32);
2493 bitmap2 = (UInt32*)bitmapBuffer;
2494 while (length--) *bitmap1++ |= *bitmap2++;
2495 }
2496 }
2497 }
2498 }
2499 }
2500 if (__CFCheckForExapendedSet) __CFCheckForExpandedSet(theSet);
2501 } else { // It's NSCharacterSet
2502 CFDataRef bitmapRep = CFCharacterSetCreateBitmapRepresentation(kCFAllocatorSystemDefault, theOtherSet);
2503 const UInt32 *bitmap2 = (bitmapRep && CFDataGetLength(bitmapRep) ? (const UInt32 *)CFDataGetBytePtr(bitmapRep) : NULL);
2504 if (bitmap2) {
2505 UInt32 *bitmap1;
2506 CFIndex length = __kCFBitmapSize / sizeof(UInt32);
2507 __CFCSetMakeBitmap(theSet);
2508 bitmap1 = (UInt32*)__CFCSetBitmapBits(theSet);
2509 while (length--) *bitmap1++ |= *bitmap2++;
2510 __CFCSetPutHasHashValue(theSet, false);
2511 }
2512 CFRelease(bitmapRep);
2513 }
2514 }
2515
2516 void CFCharacterSetIntersect(CFMutableCharacterSetRef theSet, CFCharacterSetRef theOtherSet) {
2517 CFCharacterSetRef expandedSet = NULL;
2518
2519 CF_OBJC_FUNCDISPATCHV(__kCFCharacterSetTypeID, void, (NSMutableCharacterSet *)theSet, formIntersectionWithCharacterSet:(NSCharacterSet *)theOtherSet);
2520
2521 __CFCSetValidateTypeAndMutability(theSet, __PRETTY_FUNCTION__);
2522
2523 if (__CFCSetIsBuiltin((CFCharacterSetRef)theSet) && !__CFCSetIsMutable((CFCharacterSetRef)theSet) && !__CFCSetIsInverted((CFCharacterSetRef)theSet)) {
2524 CFCharacterSetRef sharedSet = CFCharacterSetGetPredefined(__CFCSetBuiltinType((CFCharacterSetRef)theSet));
2525 if (sharedSet == theSet) { // We're trying to dealloc the builtin set
2526 CFAssert1(0, __kCFLogAssertion, "%s: Trying to mutable predefined set.", __PRETTY_FUNCTION__);
2527 return; // We don't mutate builtin set
2528 }
2529 }
2530
2531 if (__CFCSetIsEmpty(theSet) && !__CFCSetIsInverted(theSet)) return; // empty set
2532
2533 if (!CF_IS_OBJC(__kCFCharacterSetTypeID, theOtherSet) || (expandedSet = __CFCharacterSetGetExpandedSetForNSCharacterSet(theOtherSet))) { // Really CF, we can do some trick here
2534 if (expandedSet) theOtherSet = expandedSet;
2535
2536 if (__CFCSetIsEmpty(theOtherSet)) {
2537 if (!__CFCSetIsInverted(theOtherSet)) {
2538 if (__CFCSetIsString(theSet) && __CFCSetStringBuffer(theSet)) {
2539 CFAllocatorDeallocate(CFGetAllocator(theSet), __CFCSetStringBuffer(theSet));
2540 } else if (__CFCSetIsBitmap(theSet) && __CFCSetBitmapBits(theSet)) {
2541 CFAllocatorDeallocate(CFGetAllocator(theSet), __CFCSetBitmapBits(theSet));
2542 } else if (__CFCSetIsCompactBitmap(theSet) && __CFCSetCompactBitmapBits(theSet)) {
2543 CFAllocatorDeallocate(CFGetAllocator(theSet), __CFCSetCompactBitmapBits(theSet));
2544 }
2545 __CFCSetPutClassType(theSet, __kCFCharSetClassBitmap);
2546 __CFCSetPutBitmapBits(theSet, NULL);
2547 __CFCSetPutIsInverted(theSet, false);
2548 theSet->_hashValue = 0;
2549 __CFCSetPutHasHashValue(theSet, true);
2550 __CFCSetDeallocateAnnexPlane(theSet);
2551 }
2552 } else if (__CFCSetIsEmpty(theSet)) { // non inverted empty set contains all character
2553 __CFCSetPutClassType(theSet, __CFCSetClassType(theOtherSet));
2554 __CFCSetPutHasHashValue(theSet, __CFCSetHasHashValue(theOtherSet));
2555 __CFCSetPutIsInverted(theSet, __CFCSetIsInverted(theOtherSet));
2556 theSet->_hashValue = theOtherSet->_hashValue;
2557 if (__CFCSetHasNonBMPPlane(theOtherSet)) {
2558 CFMutableCharacterSetRef otherSetPlane;
2559 int idx;
2560 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
2561 if ((otherSetPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theOtherSet, idx))) {
2562 otherSetPlane = (CFMutableCharacterSetRef)CFCharacterSetCreateMutableCopy(CFGetAllocator(theSet), otherSetPlane);
2563 __CFCSetPutCharacterSetToAnnexPlane(theSet, otherSetPlane, idx);
2564 CFRelease(otherSetPlane);
2565 }
2566 }
2567 __CFCSetAnnexSetIsInverted(theSet, __CFCSetAnnexIsInverted(theOtherSet));
2568 }
2569
2570 switch (__CFCSetClassType(theOtherSet)) {
2571 case __kCFCharSetClassBuiltin:
2572 __CFCSetPutBuiltinType(theSet, __CFCSetBuiltinType(theOtherSet));
2573 break;
2574
2575 case __kCFCharSetClassRange:
2576 __CFCSetPutRangeFirstChar(theSet, __CFCSetRangeFirstChar(theOtherSet));
2577 __CFCSetPutRangeLength(theSet, __CFCSetRangeLength(theOtherSet));
2578 break;
2579
2580 case __kCFCharSetClassString:
2581 __CFCSetPutStringLength(theSet, __CFCSetStringLength(theOtherSet));
2582 if (!__CFCSetStringBuffer(theSet))
2583 __CFCSetPutStringBuffer(theSet, (UniChar *)CFAllocatorAllocate(CFGetAllocator(theSet), __kCFStringCharSetMax * sizeof(UniChar), 0));
2584 memmove(__CFCSetStringBuffer(theSet), __CFCSetStringBuffer(theOtherSet), __CFCSetStringLength(theSet) * sizeof(UniChar));
2585 break;
2586
2587 case __kCFCharSetClassBitmap:
2588 __CFCSetPutBitmapBits(theSet, (uint8_t *)CFAllocatorAllocate(CFGetAllocator(theSet), sizeof(uint8_t) * __kCFBitmapSize, 0));
2589 memmove(__CFCSetBitmapBits(theSet), __CFCSetBitmapBits(theOtherSet), __kCFBitmapSize);
2590 break;
2591
2592 case __kCFCharSetClassCompactBitmap: {
2593 const uint8_t *cBitmap = __CFCSetCompactBitmapBits(theOtherSet);
2594 uint8_t *newBitmap;
2595 uint32_t size = __CFCSetGetCompactBitmapSize(cBitmap);
2596 newBitmap = (uint8_t *)CFAllocatorAllocate(CFGetAllocator(theSet), sizeof(uint8_t) * size, 0);
2597 __CFCSetPutBitmapBits(theSet, newBitmap);
2598 memmove(newBitmap, cBitmap, size);
2599 }
2600 break;
2601
2602 default:
2603 CFAssert1(0, __kCFLogAssertion, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__); // We should never come here
2604 }
2605 } else {
2606 __CFCSetMakeBitmap(theSet);
2607 if (__CFCSetIsBitmap(theOtherSet)) {
2608 UInt32 *bitmap1 = (UInt32*)__CFCSetBitmapBits(theSet);
2609 UInt32 *bitmap2 = (UInt32*)__CFCSetBitmapBits(theOtherSet);
2610 CFIndex length = __kCFBitmapSize / sizeof(UInt32);
2611 while (length--) *bitmap1++ &= *bitmap2++;
2612 } else {
2613 UInt32 *bitmap1 = (UInt32*)__CFCSetBitmapBits(theSet);
2614 UInt32 *bitmap2;
2615 CFIndex length = __kCFBitmapSize / sizeof(UInt32);
2616 uint8_t bitmapBuffer[__kCFBitmapSize];
2617 __CFCSetGetBitmap(theOtherSet, bitmapBuffer);
2618 bitmap2 = (UInt32*)bitmapBuffer;
2619 while (length--) *bitmap1++ &= *bitmap2++;
2620 }
2621 __CFCSetPutHasHashValue(theSet, false);
2622 if (__CFCSetHasNonBMPPlane(theOtherSet)) {
2623 CFMutableCharacterSetRef annexPlane;
2624 CFMutableCharacterSetRef otherSetPlane;
2625 int idx;
2626 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
2627 if ((otherSetPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theOtherSet, idx))) {
2628 annexPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(theSet, idx);
2629 if (__CFCSetAnnexIsInverted(theSet)) CFCharacterSetInvert(annexPlane);
2630 CFCharacterSetIntersect(annexPlane, otherSetPlane);
2631 if (__CFCSetAnnexIsInverted(theSet)) CFCharacterSetInvert(annexPlane);
2632 if (__CFCSetIsEmpty(annexPlane) && !__CFCSetIsInverted(annexPlane)) __CFCSetPutCharacterSetToAnnexPlane(theSet, NULL, idx);
2633 } else if (__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, idx)) {
2634 __CFCSetPutCharacterSetToAnnexPlane(theSet, NULL, idx);
2635 }
2636 }
2637 if (!__CFCSetHasNonBMPPlane(theSet)) __CFCSetDeallocateAnnexPlane(theSet);
2638 } else if (__CFCSetIsBuiltin(theOtherSet) && !__CFCSetAnnexIsInverted(theOtherSet)) {
2639 CFMutableCharacterSetRef annexPlane;
2640 uint8_t bitmapBuffer[__kCFBitmapSize];
2641 uint8_t result;
2642 int planeIndex;
2643 UInt32 *bitmap1;
2644 UInt32 *bitmap2;
2645 CFIndex length;
2646
2647 for (planeIndex = 1;planeIndex <= MAX_ANNEX_PLANE;planeIndex++) {
2648 annexPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, planeIndex);
2649 if (annexPlane) {
2650 result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(theOtherSet), planeIndex, bitmapBuffer, false);
2651 if (result == kCFUniCharBitmapEmpty) {
2652 __CFCSetPutCharacterSetToAnnexPlane(theSet, NULL, planeIndex);
2653 } else if (result == kCFUniCharBitmapFilled) {
2654 Boolean isEmpty = true;
2655
2656 __CFCSetMakeBitmap(annexPlane);
2657 bitmap1 = (UInt32 *)__CFCSetBitmapBits(annexPlane);
2658 length = __kCFBitmapSize / sizeof(UInt32);
2659 bitmap2 = (UInt32*)bitmapBuffer;
2660
2661 while (length--) {
2662 if ((*bitmap1++ &= *bitmap2++)) isEmpty = false;
2663 }
2664 if (isEmpty) __CFCSetPutCharacterSetToAnnexPlane(theSet, NULL, planeIndex);
2665 }
2666 }
2667 }
2668 if (!__CFCSetHasNonBMPPlane(theSet)) __CFCSetDeallocateAnnexPlane(theSet);
2669 } else if (__CFCSetIsRange(theOtherSet)) {
2670 CFMutableCharacterSetRef tempOtherSet = CFCharacterSetCreateMutable(CFGetAllocator(theSet));
2671 CFMutableCharacterSetRef annexPlane;
2672 CFMutableCharacterSetRef otherSetPlane;
2673 int idx;
2674
2675 __CFCSetAddNonBMPPlanesInRange(tempOtherSet, CFRangeMake(__CFCSetRangeFirstChar(theOtherSet), __CFCSetRangeLength(theOtherSet)));
2676
2677 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
2678 if ((otherSetPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(tempOtherSet, idx))) {
2679 annexPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(theSet, idx);
2680 if (__CFCSetAnnexIsInverted(theSet)) CFCharacterSetInvert(annexPlane);
2681 CFCharacterSetIntersect(annexPlane, otherSetPlane);
2682 if (__CFCSetAnnexIsInverted(theSet)) CFCharacterSetInvert(annexPlane);
2683 if (__CFCSetIsEmpty(annexPlane) && !__CFCSetIsInverted(annexPlane)) __CFCSetPutCharacterSetToAnnexPlane(theSet, NULL, idx);
2684 } else if (__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, idx)) {
2685 __CFCSetPutCharacterSetToAnnexPlane(theSet, NULL, idx);
2686 }
2687 }
2688 if (!__CFCSetHasNonBMPPlane(theSet)) __CFCSetDeallocateAnnexPlane(theSet);
2689 CFRelease(tempOtherSet);
2690 } else if ((__CFCSetHasNonBMPPlane(theSet) || __CFCSetAnnexIsInverted(theSet)) && !__CFCSetAnnexIsInverted(theOtherSet)) {
2691 __CFCSetDeallocateAnnexPlane(theSet);
2692 }
2693 }
2694 if (__CFCheckForExapendedSet) __CFCheckForExpandedSet(theSet);
2695 } else { // It's NSCharacterSet
2696 CFDataRef bitmapRep = CFCharacterSetCreateBitmapRepresentation(kCFAllocatorSystemDefault, theOtherSet);
2697 const UInt32 *bitmap2 = (bitmapRep && CFDataGetLength(bitmapRep) ? (const UInt32 *)CFDataGetBytePtr(bitmapRep) : NULL);
2698 if (bitmap2) {
2699 UInt32 *bitmap1;
2700 CFIndex length = __kCFBitmapSize / sizeof(UInt32);
2701 __CFCSetMakeBitmap(theSet);
2702 bitmap1 = (UInt32*)__CFCSetBitmapBits(theSet);
2703 while (length--) *bitmap1++ &= *bitmap2++;
2704 __CFCSetPutHasHashValue(theSet, false);
2705 }
2706 CFRelease(bitmapRep);
2707 }
2708 }
2709
2710 void CFCharacterSetInvert(CFMutableCharacterSetRef theSet) {
2711
2712 CF_OBJC_FUNCDISPATCHV(__kCFCharacterSetTypeID, void, (NSMutableCharacterSet *)theSet, invert);
2713
2714 __CFCSetValidateTypeAndMutability(theSet, __PRETTY_FUNCTION__);
2715
2716 if (__CFCSetIsBuiltin((CFCharacterSetRef)theSet) && !__CFCSetIsMutable((CFCharacterSetRef)theSet) && !__CFCSetIsInverted((CFCharacterSetRef)theSet)) {
2717 CFCharacterSetRef sharedSet = CFCharacterSetGetPredefined(__CFCSetBuiltinType((CFCharacterSetRef)theSet));
2718 if (sharedSet == theSet) { // We're trying to dealloc the builtin set
2719 CFAssert1(0, __kCFLogAssertion, "%s: Trying to mutable predefined set.", __PRETTY_FUNCTION__);
2720 return; // We don't mutate builtin set
2721 }
2722 }
2723
2724 __CFCSetPutHasHashValue(theSet, false);
2725
2726 if (__CFCSetClassType(theSet) == __kCFCharSetClassBitmap) {
2727 CFIndex idx;
2728 CFIndex count = __kCFBitmapSize / sizeof(UInt32);
2729 UInt32 *bitmap = (UInt32*) __CFCSetBitmapBits(theSet);
2730
2731 if (NULL == bitmap) {
2732 bitmap = (UInt32 *)CFAllocatorAllocate(CFGetAllocator(theSet), __kCFBitmapSize, 0);
2733 __CFCSetPutBitmapBits(theSet, (uint8_t *)bitmap);
2734 for (idx = 0;idx < count;idx++) bitmap[idx] = ((UInt32)0xFFFFFFFF);
2735 } else {
2736 for (idx = 0;idx < count;idx++) bitmap[idx] = ~(bitmap[idx]);
2737 }
2738 __CFCSetAllocateAnnexForPlane(theSet, 0); // We need to alloc annex to invert
2739 } else if (__CFCSetClassType(theSet) == __kCFCharSetClassCompactBitmap) {
2740 uint8_t *bitmap = __CFCSetCompactBitmapBits(theSet);
2741 int idx;
2742 int length = 0;
2743 uint8_t value;
2744
2745 for (idx = 0;idx < __kCFCompactBitmapNumPages;idx++) {
2746 value = bitmap[idx];
2747
2748 if (value == 0) {
2749 bitmap[idx] = UINT8_MAX;
2750 } else if (value == UINT8_MAX) {
2751 bitmap[idx] = 0;
2752 } else {
2753 length += __kCFCompactBitmapPageSize;
2754 }
2755 }
2756 bitmap += __kCFCompactBitmapNumPages;
2757 for (idx = 0;idx < length;idx++) bitmap[idx] = ~(bitmap[idx]);
2758 __CFCSetAllocateAnnexForPlane(theSet, 0); // We need to alloc annex to invert
2759 } else {
2760 __CFCSetPutIsInverted(theSet, !__CFCSetIsInverted(theSet));
2761 }
2762 __CFCSetAnnexSetIsInverted(theSet, !__CFCSetAnnexIsInverted(theSet));
2763 }
2764
2765 void CFCharacterSetCompact(CFMutableCharacterSetRef theSet) {
2766 if (__CFCSetIsBitmap(theSet) && __CFCSetBitmapBits(theSet)) __CFCSetMakeCompact(theSet);
2767 if (__CFCSetHasNonBMPPlane(theSet)) {
2768 CFMutableCharacterSetRef annex;
2769 int idx;
2770
2771 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
2772 if ((annex = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, idx)) && __CFCSetIsBitmap(annex) && __CFCSetBitmapBits(annex)) {
2773 __CFCSetMakeCompact(annex);
2774 }
2775 }
2776 }
2777 }
2778
2779 void CFCharacterSetFast(CFMutableCharacterSetRef theSet) {
2780 if (__CFCSetIsCompactBitmap(theSet) && __CFCSetCompactBitmapBits(theSet)) __CFCSetMakeBitmap(theSet);
2781 if (__CFCSetHasNonBMPPlane(theSet)) {
2782 CFMutableCharacterSetRef annex;
2783 int idx;
2784
2785 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
2786 if ((annex = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, idx)) && __CFCSetIsCompactBitmap(annex) && __CFCSetCompactBitmapBits(annex)) {
2787 __CFCSetMakeBitmap(annex);
2788 }
2789 }
2790 }
2791 }
2792
2793 /* Keyed-coding support
2794 */
2795 CFCharacterSetKeyedCodingType _CFCharacterSetGetKeyedCodingType(CFCharacterSetRef cset) {
2796 if (CF_IS_OBJC(__kCFCharacterSetTypeID, cset)) return kCFCharacterSetKeyedCodingTypeBitmap;
2797
2798 switch (__CFCSetClassType(cset)) {
2799 case __kCFCharSetClassBuiltin: return ((__CFCSetBuiltinType(cset) < kCFCharacterSetSymbol) ? kCFCharacterSetKeyedCodingTypeBuiltin : kCFCharacterSetKeyedCodingTypeBuiltinAndBitmap);
2800 case __kCFCharSetClassRange: return kCFCharacterSetKeyedCodingTypeRange;
2801
2802 case __kCFCharSetClassString: // We have to check if we have non-BMP here
2803 if (!__CFCSetHasNonBMPPlane(cset) && !__CFCSetAnnexIsInverted(cset)) return kCFCharacterSetKeyedCodingTypeString; // BMP only. we can archive the string
2804 /* fallthrough */
2805
2806 default:
2807 return kCFCharacterSetKeyedCodingTypeBitmap;
2808 }
2809 }
2810
2811 CFCharacterSetPredefinedSet _CFCharacterSetGetKeyedCodingBuiltinType(CFCharacterSetRef cset) { return __CFCSetBuiltinType(cset); }
2812 CFRange _CFCharacterSetGetKeyedCodingRange(CFCharacterSetRef cset) { return CFRangeMake(__CFCSetRangeFirstChar(cset), __CFCSetRangeLength(cset)); }
2813 CFStringRef _CFCharacterSetCreateKeyedCodingString(CFCharacterSetRef cset) { return CFStringCreateWithCharacters(kCFAllocatorSystemDefault, __CFCSetStringBuffer(cset), __CFCSetStringLength(cset)); }
2814
2815 bool _CFCharacterSetIsInverted(CFCharacterSetRef cset) { return (__CFCSetIsInverted(cset) != 0); }
2816 void _CFCharacterSetSetIsInverted(CFCharacterSetRef cset, bool flag) { __CFCSetPutIsInverted((CFMutableCharacterSetRef)cset, flag); }
2817
2818 /* Inline buffer support
2819 */
2820 void CFCharacterSetInitInlineBuffer(CFCharacterSetRef cset, CFCharacterSetInlineBuffer *buffer) {
2821 memset(buffer, 0, sizeof(CFCharacterSetInlineBuffer));
2822 buffer->cset = cset;
2823 buffer->rangeLimit = 0x10000;
2824
2825 if (CF_IS_OBJC(__kCFCharacterSetTypeID, cset)) {
2826 CFCharacterSetRef expandedSet = __CFCharacterSetGetExpandedSetForNSCharacterSet(cset);
2827
2828 if (NULL == expandedSet) {
2829 buffer->flags = kCFCharacterSetNoBitmapAvailable;
2830 buffer->rangeLimit = 0x110000;
2831
2832 return;
2833 } else {
2834 cset = expandedSet;
2835 }
2836 }
2837
2838 switch (__CFCSetClassType(cset)) {
2839 case __kCFCharSetClassBuiltin:
2840 buffer->bitmap = CFUniCharGetBitmapPtrForPlane(__CFCSetBuiltinType(cset), 0);
2841 buffer->rangeLimit = 0x110000;
2842 if (NULL == buffer->bitmap) {
2843 buffer->flags = kCFCharacterSetNoBitmapAvailable;
2844 } else {
2845 if (__CFCSetIsInverted(cset)) buffer->flags = kCFCharacterSetIsInverted;
2846 }
2847 break;
2848
2849 case __kCFCharSetClassRange:
2850 buffer->rangeStart = __CFCSetRangeFirstChar(cset);
2851 buffer->rangeLimit = __CFCSetRangeFirstChar(cset) + __CFCSetRangeLength(cset);
2852 if (__CFCSetIsInverted(cset)) buffer->flags = kCFCharacterSetIsInverted;
2853 return;
2854
2855 case __kCFCharSetClassString:
2856 buffer->flags = kCFCharacterSetNoBitmapAvailable;
2857 if (__CFCSetStringLength(cset) > 0) {
2858 buffer->rangeStart = *__CFCSetStringBuffer(cset);
2859 buffer->rangeLimit = *(__CFCSetStringBuffer(cset) + __CFCSetStringLength(cset) - 1) + 1;
2860
2861 if (__CFCSetIsInverted(cset)) {
2862 if (0 == buffer->rangeStart) {
2863 buffer->rangeStart = buffer->rangeLimit;
2864 buffer->rangeLimit = 0x10000;
2865 } else if (0x10000 == buffer->rangeLimit) {
2866 buffer->rangeLimit = buffer->rangeStart;
2867 buffer->rangeStart = 0;
2868 } else {
2869 buffer->rangeStart = 0;
2870 buffer->rangeLimit = 0x10000;
2871 }
2872 }
2873 }
2874 break;
2875
2876 case __kCFCharSetClassBitmap:
2877 case __kCFCharSetClassCompactBitmap:
2878 buffer->bitmap = __CFCSetCompactBitmapBits(cset);
2879 if (NULL == buffer->bitmap) {
2880 buffer->flags = kCFCharacterSetIsCompactBitmap;
2881 if (__CFCSetIsInverted(cset)) buffer->flags |= kCFCharacterSetIsInverted;
2882 } else {
2883 if (__kCFCharSetClassCompactBitmap == __CFCSetClassType(cset)) buffer->flags = kCFCharacterSetIsCompactBitmap;
2884 }
2885 break;
2886
2887 default:
2888 CFAssert1(0, __kCFLogAssertion, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__); // We should never come here
2889 return;
2890 }
2891
2892 if (__CFCSetAnnexIsInverted(cset)) {
2893 buffer->rangeLimit = 0x110000;
2894 } else if (__CFCSetHasNonBMPPlane(cset)) {
2895 CFIndex index;
2896
2897 for (index = MAX_ANNEX_PLANE;index > 0;index--) {
2898 if (NULL != __CFCSetGetAnnexPlaneCharacterSetNoAlloc(cset, index)) {
2899 buffer->rangeLimit = (index + 1) << 16;
2900 break;
2901 }
2902 }
2903 }
2904 }