]> git.saurik.com Git - apple/cf.git/blob - CFCharacterSet.c
cc8598d9757a2fcf03cd8168918db4da8e1a558e
[apple/cf.git] / CFCharacterSet.c
1 /*
2 * Copyright (c) 2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /* CFCharacterSet.c
25 Copyright (c) 1999-2009, Apple Inc. All rights reserved.
26 Responsibility: Aki Inoue
27 */
28
29 #include <CoreFoundation/CFCharacterSet.h>
30 #include <CoreFoundation/CFByteOrder.h>
31 #include "CFCharacterSetPriv.h"
32 #include <CoreFoundation/CFData.h>
33 #include <CoreFoundation/CFString.h>
34 #include "CFInternal.h"
35 #include <CoreFoundation/CFUniChar.h>
36 #include "CFUniCharPriv.h"
37 #include <stdlib.h>
38 #include <string.h>
39
40
41 #define BITSPERBYTE 8 /* (CHAR_BIT * sizeof(unsigned char)) */
42 #define LOG_BPB 3
43 #define LOG_BPLW 5
44 #define NUMCHARACTERS 65536
45
46 #define MAX_ANNEX_PLANE (16)
47
48 /* Number of things in the array keeping the bits.
49 */
50 #define __kCFBitmapSize (NUMCHARACTERS / BITSPERBYTE)
51
52 /* How many elements max can be in an __kCFCharSetClassString CFCharacterSet
53 */
54 #define __kCFStringCharSetMax 64
55
56 /* The last builtin set ID number
57 */
58 #define __kCFLastBuiltinSetID kCFCharacterSetNewline
59
60 /* How many elements in the "singles" array before we use binary search.
61 */
62 #define __kCFSetBreakeven 10
63
64 /* This tells us, within 1k or so, whether a thing is POTENTIALLY in the set (in the bitmap blob of the private structure) before we bother to do specific checking.
65 */
66 #define __CFCSetBitsInRange(n, i) (i[n>>15] & (1L << ((n>>10) % 32)))
67
68 /* Compact bitmap params
69 */
70 #define __kCFCompactBitmapNumPages (256)
71
72 #define __kCFCompactBitmapMaxPages (128) // the max pages allocated
73
74 #define __kCFCompactBitmapPageSize (__kCFBitmapSize / __kCFCompactBitmapNumPages)
75
76 typedef struct {
77 CFCharacterSetRef *_nonBMPPlanes;
78 unsigned int _validEntriesBitmap;
79 unsigned char _numOfAllocEntries;
80 unsigned char _isAnnexInverted;
81 uint16_t _padding;
82 } CFCharSetAnnexStruct;
83
84 struct __CFCharacterSet {
85 CFRuntimeBase _base;
86 CFHashCode _hashValue;
87 union {
88 struct {
89 CFIndex _type;
90 } _builtin;
91 struct {
92 UInt32 _firstChar;
93 CFIndex _length;
94 } _range;
95 struct {
96 UniChar *_buffer;
97 CFIndex _length;
98 } _string;
99 struct {
100 uint8_t *_bits;
101 } _bitmap;
102 struct {
103 uint8_t *_cBits;
104 } _compactBitmap;
105 } _variants;
106 CFCharSetAnnexStruct *_annex;
107 };
108
109 /* _base._info values interesting for CFCharacterSet
110 */
111 enum {
112 __kCFCharSetClassTypeMask = 0x0070,
113 __kCFCharSetClassBuiltin = 0x0000,
114 __kCFCharSetClassRange = 0x0010,
115 __kCFCharSetClassString = 0x0020,
116 __kCFCharSetClassBitmap = 0x0030,
117 __kCFCharSetClassSet = 0x0040,
118 __kCFCharSetClassCompactBitmap = 0x0040,
119
120 __kCFCharSetIsInvertedMask = 0x0008,
121 __kCFCharSetIsInverted = 0x0008,
122
123 __kCFCharSetHasHashValueMask = 0x00004,
124 __kCFCharSetHasHashValue = 0x0004,
125
126 /* Generic CFBase values */
127 __kCFCharSetIsMutableMask = 0x0001,
128 __kCFCharSetIsMutable = 0x0001,
129 };
130
131 /* Inline accessor macros for _base._info
132 */
133 CF_INLINE Boolean __CFCSetIsMutable(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetIsMutableMask) == __kCFCharSetIsMutable;}
134 CF_INLINE Boolean __CFCSetIsBuiltin(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetClassTypeMask) == __kCFCharSetClassBuiltin;}
135 CF_INLINE Boolean __CFCSetIsRange(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetClassTypeMask) == __kCFCharSetClassRange;}
136 CF_INLINE Boolean __CFCSetIsString(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetClassTypeMask) == __kCFCharSetClassString;}
137 CF_INLINE Boolean __CFCSetIsBitmap(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetClassTypeMask) == __kCFCharSetClassBitmap;}
138 CF_INLINE Boolean __CFCSetIsCompactBitmap(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetClassTypeMask) == __kCFCharSetClassCompactBitmap;}
139 CF_INLINE Boolean __CFCSetIsInverted(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetIsInvertedMask) == __kCFCharSetIsInverted;}
140 CF_INLINE Boolean __CFCSetHasHashValue(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetHasHashValueMask) == __kCFCharSetHasHashValue;}
141 CF_INLINE UInt32 __CFCSetClassType(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetClassTypeMask);}
142
143 CF_INLINE void __CFCSetPutIsMutable(CFMutableCharacterSetRef cset, Boolean isMutable) {(isMutable ? (cset->_base._cfinfo[CF_INFO_BITS] |= __kCFCharSetIsMutable) : (cset->_base._cfinfo[CF_INFO_BITS] &= ~ __kCFCharSetIsMutable));}
144 CF_INLINE void __CFCSetPutIsInverted(CFMutableCharacterSetRef cset, Boolean isInverted) {(isInverted ? (cset->_base._cfinfo[CF_INFO_BITS] |= __kCFCharSetIsInverted) : (cset->_base._cfinfo[CF_INFO_BITS] &= ~__kCFCharSetIsInverted));}
145 CF_INLINE void __CFCSetPutHasHashValue(CFMutableCharacterSetRef cset, Boolean hasHash) {(hasHash ? (cset->_base._cfinfo[CF_INFO_BITS] |= __kCFCharSetHasHashValue) : (cset->_base._cfinfo[CF_INFO_BITS] &= ~__kCFCharSetHasHashValue));}
146 CF_INLINE void __CFCSetPutClassType(CFMutableCharacterSetRef cset, UInt32 classType) {cset->_base._cfinfo[CF_INFO_BITS] &= ~__kCFCharSetClassTypeMask; cset->_base._cfinfo[CF_INFO_BITS] |= classType;}
147
148
149 /* Inline contents accessor macros
150 */
151 CF_INLINE CFCharacterSetPredefinedSet __CFCSetBuiltinType(CFCharacterSetRef cset) {return cset->_variants._builtin._type;}
152 CF_INLINE UInt32 __CFCSetRangeFirstChar(CFCharacterSetRef cset) {return cset->_variants._range._firstChar;}
153 CF_INLINE CFIndex __CFCSetRangeLength(CFCharacterSetRef cset) {return cset->_variants._range._length;}
154 CF_INLINE UniChar *__CFCSetStringBuffer(CFCharacterSetRef cset) {return (UniChar*)(cset->_variants._string._buffer);}
155 CF_INLINE CFIndex __CFCSetStringLength(CFCharacterSetRef cset) {return cset->_variants._string._length;}
156 CF_INLINE uint8_t *__CFCSetBitmapBits(CFCharacterSetRef cset) {return cset->_variants._bitmap._bits;}
157 CF_INLINE uint8_t *__CFCSetCompactBitmapBits(CFCharacterSetRef cset) {return cset->_variants._compactBitmap._cBits;}
158
159 CF_INLINE void __CFCSetPutBuiltinType(CFMutableCharacterSetRef cset, CFCharacterSetPredefinedSet type) {cset->_variants._builtin._type = type;}
160 CF_INLINE void __CFCSetPutRangeFirstChar(CFMutableCharacterSetRef cset, UInt32 first) {cset->_variants._range._firstChar = first;}
161 CF_INLINE void __CFCSetPutRangeLength(CFMutableCharacterSetRef cset, CFIndex length) {cset->_variants._range._length = length;}
162 CF_INLINE void __CFCSetPutStringBuffer(CFMutableCharacterSetRef cset, UniChar *theBuffer) {cset->_variants._string._buffer = theBuffer;}
163 CF_INLINE void __CFCSetPutStringLength(CFMutableCharacterSetRef cset, CFIndex length) {cset->_variants._string._length = length;}
164 CF_INLINE void __CFCSetPutBitmapBits(CFMutableCharacterSetRef cset, uint8_t *bits) {cset->_variants._bitmap._bits = bits;}
165 CF_INLINE void __CFCSetPutCompactBitmapBits(CFMutableCharacterSetRef cset, uint8_t *bits) {cset->_variants._compactBitmap._cBits = bits;}
166
167 /* Validation funcs
168 */
169 #if defined(CF_ENABLE_ASSERTIONS)
170 CF_INLINE void __CFCSetValidateBuiltinType(CFCharacterSetPredefinedSet type, const char *func) {
171 CFAssert2(type > 0 && type <= __kCFLastBuiltinSetID, __kCFLogAssertion, "%s: Unknowen builtin type %d", func, type);
172 }
173 CF_INLINE void __CFCSetValidateRange(CFRange theRange, const char *func) {
174 CFAssert3(theRange.location >= 0 && theRange.location + theRange.length <= 0x1FFFFF, __kCFLogAssertion, "%s: Range out of Unicode range (location -> %d length -> %d)", func, theRange.location, theRange.length);
175 }
176 CF_INLINE void __CFCSetValidateTypeAndMutability(CFCharacterSetRef cset, const char *func) {
177 __CFGenericValidateType(cset, __kCFCharacterSetTypeID);
178 CFAssert1(__CFCSetIsMutable(cset), __kCFLogAssertion, "%s: Immutable character set passed to mutable function", func);
179 }
180 #else
181 #define __CFCSetValidateBuiltinType(t,f)
182 #define __CFCSetValidateRange(r,f)
183 #define __CFCSetValidateTypeAndMutability(r,f)
184 #endif
185
186 /* Inline utility funcs
187 */
188 static Boolean __CFCSetIsEqualBitmap(const UInt32 *bits1, const UInt32 *bits2) {
189 CFIndex length = __kCFBitmapSize / sizeof(UInt32);
190
191 if (bits1 == bits2) {
192 return true;
193 } else if (bits1 && bits2) {
194 if (bits1 == (const UInt32 *)-1) {
195 while (length--) if ((UInt32)-1 != *bits2++) return false;
196 } else if (bits2 == (const UInt32 *)-1) {
197 while (length--) if ((UInt32)-1 != *bits1++) return false;
198 } else {
199 while (length--) if (*bits1++ != *bits2++) return false;
200 }
201 return true;
202 } else if (!bits1 && !bits2) { // empty set
203 return true;
204 } else {
205 if (bits2) bits1 = bits2;
206 if (bits1 == (const UInt32 *)-1) return false;
207 while (length--) if (*bits1++) return false;
208 return true;
209 }
210 }
211
212 CF_INLINE Boolean __CFCSetIsEqualBitmapInverted(const UInt32 *bits1, const UInt32 *bits2) {
213 CFIndex length = __kCFBitmapSize / sizeof(UInt32);
214
215 while (length--) if (*bits1++ != ~(*(bits2++))) return false;
216 return true;
217 }
218
219 static Boolean __CFCSetIsBitmapEqualToRange(const UInt32 *bits, UniChar firstChar, UniChar lastChar, Boolean isInverted) {
220 CFIndex firstCharIndex = firstChar >> LOG_BPB;
221 CFIndex lastCharIndex = lastChar >> LOG_BPB;
222 CFIndex length;
223 UInt32 value;
224
225 if (firstCharIndex == lastCharIndex) {
226 value = ((((UInt32)0xFF) << (firstChar & (BITSPERBYTE - 1))) & (((UInt32)0xFF) >> ((BITSPERBYTE - 1) - (lastChar & (BITSPERBYTE - 1))))) << (((sizeof(UInt32) - 1) - (firstCharIndex % sizeof(UInt32))) * BITSPERBYTE);
227 value = CFSwapInt32HostToBig(value);
228 firstCharIndex = lastCharIndex = firstChar >> LOG_BPLW;
229 if (*(bits + firstCharIndex) != (isInverted ? ~value : value)) return FALSE;
230 } else {
231 UInt32 firstCharMask;
232 UInt32 lastCharMask;
233
234 length = firstCharIndex % sizeof(UInt32);
235 firstCharMask = (((((UInt32)0xFF) << (firstChar & (BITSPERBYTE - 1))) & 0xFF) << (((sizeof(UInt32) - 1) - length) * BITSPERBYTE)) | (((UInt32)0xFFFFFFFF) >> ((length + 1) * BITSPERBYTE));
236
237 length = lastCharIndex % sizeof(UInt32);
238 lastCharMask = ((((UInt32)0xFF) >> ((BITSPERBYTE - 1) - (lastChar & (BITSPERBYTE - 1)))) << (((sizeof(UInt32) - 1) - length) * BITSPERBYTE)) | (((UInt32)0xFFFFFFFF) << ((sizeof(UInt32) - length) * BITSPERBYTE));
239
240 firstCharIndex = firstChar >> LOG_BPLW;
241 lastCharIndex = lastChar >> LOG_BPLW;
242
243 if (firstCharIndex == lastCharIndex) {
244 firstCharMask &= lastCharMask;
245 value = CFSwapInt32HostToBig(firstCharMask & lastCharMask);
246 if (*(bits + firstCharIndex) != (isInverted ? ~value : value)) return FALSE;
247 } else {
248 value = CFSwapInt32HostToBig(firstCharMask);
249 if (*(bits + firstCharIndex) != (isInverted ? ~value : value)) return FALSE;
250
251 value = CFSwapInt32HostToBig(lastCharMask);
252 if (*(bits + lastCharIndex) != (isInverted ? ~value : value)) return FALSE;
253 }
254 }
255
256 length = firstCharIndex;
257 value = (isInverted ? ((UInt32)0xFFFFFFFF) : 0);
258 while (length--) {
259 if (*(bits++) != value) return FALSE;
260 }
261
262 ++bits; // Skip firstCharIndex
263 length = (lastCharIndex - (firstCharIndex + 1));
264 value = (isInverted ? 0 : ((UInt32)0xFFFFFFFF));
265 while (length-- > 0) {
266 if (*(bits++) != value) return FALSE;
267 }
268 if (firstCharIndex != lastCharIndex) ++bits;
269
270 length = (0xFFFF >> LOG_BPLW) - lastCharIndex;
271 value = (isInverted ? ((UInt32)0xFFFFFFFF) : 0);
272 while (length--) {
273 if (*(bits++) != value) return FALSE;
274 }
275
276 return TRUE;
277 }
278
279 CF_INLINE Boolean __CFCSetIsBitmapSupersetOfBitmap(const UInt32 *bits1, const UInt32 *bits2, Boolean isInverted1, Boolean isInverted2) {
280 CFIndex length = __kCFBitmapSize / sizeof(UInt32);
281 UInt32 val1, val2;
282
283 while (length--) {
284 val2 = (isInverted2 ? ~(*(bits2++)) : *(bits2++));
285 val1 = (isInverted1 ? ~(*(bits1++)) : *(bits1++)) & val2;
286 if (val1 != val2) return false;
287 }
288
289 return true;
290 }
291
292 CF_INLINE Boolean __CFCSetHasNonBMPPlane(CFCharacterSetRef cset) { return ((cset)->_annex && (cset)->_annex->_validEntriesBitmap ? true : false); }
293 CF_INLINE Boolean __CFCSetAnnexIsInverted (CFCharacterSetRef cset) { return ((cset)->_annex && (cset)->_annex->_isAnnexInverted ? true : false); }
294 CF_INLINE UInt32 __CFCSetAnnexValidEntriesBitmap(CFCharacterSetRef cset) { return ((cset)->_annex ? (cset)->_annex->_validEntriesBitmap : 0); }
295
296 CF_INLINE Boolean __CFCSetIsEmpty(CFCharacterSetRef cset) {
297 if (__CFCSetHasNonBMPPlane(cset) || __CFCSetAnnexIsInverted(cset)) return false;
298
299 switch (__CFCSetClassType(cset)) {
300 case __kCFCharSetClassRange: if (!__CFCSetRangeLength(cset)) return true; break;
301 case __kCFCharSetClassString: if (!__CFCSetStringLength(cset)) return true; break;
302 case __kCFCharSetClassBitmap: if (!__CFCSetBitmapBits(cset)) return true; break;
303 case __kCFCharSetClassCompactBitmap: if (!__CFCSetCompactBitmapBits(cset)) return true; break;
304 }
305 return false;
306 }
307
308 CF_INLINE void __CFCSetBitmapAddCharacter(uint8_t *bitmap, UniChar theChar) {
309 bitmap[(theChar) >> LOG_BPB] |= (((unsigned)1) << (theChar & (BITSPERBYTE - 1)));
310 }
311
312 CF_INLINE void __CFCSetBitmapRemoveCharacter(uint8_t *bitmap, UniChar theChar) {
313 bitmap[(theChar) >> LOG_BPB] &= ~(((unsigned)1) << (theChar & (BITSPERBYTE - 1)));
314 }
315
316 CF_INLINE Boolean __CFCSetIsMemberBitmap(const uint8_t *bitmap, UniChar theChar) {
317 return ((bitmap[(theChar) >> LOG_BPB] & (((unsigned)1) << (theChar & (BITSPERBYTE - 1)))) ? true : false);
318 }
319
320 #define NUM_32BIT_SLOTS (NUMCHARACTERS / 32)
321
322 CF_INLINE void __CFCSetBitmapFastFillWithValue(UInt32 *bitmap, uint8_t value) {
323 UInt32 mask = (value << 24) | (value << 16) | (value << 8) | value;
324 UInt32 numSlots = NUMCHARACTERS / 32;
325
326 while (numSlots--) *(bitmap++) = mask;
327 }
328
329 CF_INLINE void __CFCSetBitmapAddCharactersInRange(uint8_t *bitmap, UniChar firstChar, UniChar lastChar) {
330 if (firstChar == lastChar) {
331 bitmap[firstChar >> LOG_BPB] |= (((unsigned)1) << (firstChar & (BITSPERBYTE - 1)));
332 } else {
333 UInt32 idx = firstChar >> LOG_BPB;
334 UInt32 max = lastChar >> LOG_BPB;
335
336 if (idx == max) {
337 bitmap[idx] |= (((unsigned)0xFF) << (firstChar & (BITSPERBYTE - 1))) & (((unsigned)0xFF) >> ((BITSPERBYTE - 1) - (lastChar & (BITSPERBYTE - 1))));
338 } else {
339 bitmap[idx] |= (((unsigned)0xFF) << (firstChar & (BITSPERBYTE - 1)));
340 bitmap[max] |= (((unsigned)0xFF) >> ((BITSPERBYTE - 1) - (lastChar & (BITSPERBYTE - 1))));
341
342 ++idx;
343 while (idx < max) bitmap[idx++] = 0xFF;
344 }
345 }
346 }
347
348 CF_INLINE void __CFCSetBitmapRemoveCharactersInRange(uint8_t *bitmap, UniChar firstChar, UniChar lastChar) {
349 UInt32 idx = firstChar >> LOG_BPB;
350 UInt32 max = lastChar >> LOG_BPB;
351
352 if (idx == max) {
353 bitmap[idx] &= ~((((unsigned)0xFF) << (firstChar & (BITSPERBYTE - 1))) & (((unsigned)0xFF) >> ((BITSPERBYTE - 1) - (lastChar & (BITSPERBYTE - 1)))));
354 } else {
355 bitmap[idx] &= ~(((unsigned)0xFF) << (firstChar & (BITSPERBYTE - 1)));
356 bitmap[max] &= ~(((unsigned)0xFF) >> ((BITSPERBYTE - 1) - (lastChar & (BITSPERBYTE - 1))));
357
358 ++idx;
359 while (idx < max) bitmap[idx++] = 0;
360 }
361 }
362
363 #define __CFCSetAnnexBitmapSetPlane(bitmap,plane) ((bitmap) |= (1 << (plane)))
364 #define __CFCSetAnnexBitmapClearPlane(bitmap,plane) ((bitmap) &= (~(1 << (plane))))
365 #define __CFCSetAnnexBitmapGetPlane(bitmap,plane) ((bitmap) & (1 << (plane)))
366
367 CF_INLINE void __CFCSetAllocateAnnexForPlane(CFCharacterSetRef cset, int plane) {
368 if (cset->_annex == NULL) {
369 ((CFMutableCharacterSetRef)cset)->_annex = (CFCharSetAnnexStruct *)CFAllocatorAllocate(CFGetAllocator(cset), sizeof(CFCharSetAnnexStruct), 0);
370 cset->_annex->_numOfAllocEntries = plane;
371 cset->_annex->_isAnnexInverted = false;
372 cset->_annex->_validEntriesBitmap = 0;
373 cset->_annex->_nonBMPPlanes = ((plane > 0) ? (CFCharacterSetRef*)CFAllocatorAllocate(CFGetAllocator(cset), sizeof(CFCharacterSetRef) * plane, 0) : NULL);
374 } else if (cset->_annex->_numOfAllocEntries < plane) {
375 cset->_annex->_numOfAllocEntries = plane;
376 if (NULL == cset->_annex->_nonBMPPlanes) {
377 cset->_annex->_nonBMPPlanes = (CFCharacterSetRef*)CFAllocatorAllocate(CFGetAllocator(cset), sizeof(CFCharacterSetRef) * plane, 0);
378 } else {
379 cset->_annex->_nonBMPPlanes = (CFCharacterSetRef*)CFAllocatorReallocate(CFGetAllocator(cset), (void *)cset->_annex->_nonBMPPlanes, sizeof(CFCharacterSetRef) * plane, 0);
380 }
381 }
382 }
383
384 CF_INLINE void __CFCSetAnnexSetIsInverted(CFCharacterSetRef cset, Boolean flag) {
385 if (flag) __CFCSetAllocateAnnexForPlane(cset, 0);
386 if (cset->_annex) ((CFMutableCharacterSetRef)cset)->_annex->_isAnnexInverted = flag;
387 }
388
389 CF_INLINE void __CFCSetPutCharacterSetToAnnexPlane(CFCharacterSetRef cset, CFCharacterSetRef annexCSet, int plane) {
390 __CFCSetAllocateAnnexForPlane(cset, plane);
391 if (__CFCSetAnnexBitmapGetPlane(cset->_annex->_validEntriesBitmap, plane)) CFRelease(cset->_annex->_nonBMPPlanes[plane - 1]);
392 if (annexCSet) {
393 cset->_annex->_nonBMPPlanes[plane - 1] = (CFCharacterSetRef)CFRetain(annexCSet);
394 __CFCSetAnnexBitmapSetPlane(cset->_annex->_validEntriesBitmap, plane);
395 } else {
396 __CFCSetAnnexBitmapClearPlane(cset->_annex->_validEntriesBitmap, plane);
397 }
398 }
399
400 CF_INLINE CFCharacterSetRef __CFCSetGetAnnexPlaneCharacterSet(CFCharacterSetRef cset, int plane) {
401 __CFCSetAllocateAnnexForPlane(cset, plane);
402 if (!__CFCSetAnnexBitmapGetPlane(cset->_annex->_validEntriesBitmap, plane)) {
403 cset->_annex->_nonBMPPlanes[plane - 1] = (CFCharacterSetRef)CFCharacterSetCreateMutable(CFGetAllocator(cset));
404 __CFCSetAnnexBitmapSetPlane(cset->_annex->_validEntriesBitmap, plane);
405 }
406 return cset->_annex->_nonBMPPlanes[plane - 1];
407 }
408
409 CF_INLINE CFCharacterSetRef __CFCSetGetAnnexPlaneCharacterSetNoAlloc(CFCharacterSetRef cset, int plane) {
410 return (cset->_annex && __CFCSetAnnexBitmapGetPlane(cset->_annex->_validEntriesBitmap, plane) ? cset->_annex->_nonBMPPlanes[plane - 1] : NULL);
411 }
412
413 CF_INLINE void __CFCSetDeallocateAnnexPlane(CFCharacterSetRef cset) {
414 if (cset->_annex) {
415 int idx;
416
417 for (idx = 0;idx < MAX_ANNEX_PLANE;idx++) {
418 if (__CFCSetAnnexBitmapGetPlane(cset->_annex->_validEntriesBitmap, idx + 1)) {
419 CFRelease(cset->_annex->_nonBMPPlanes[idx]);
420 }
421 }
422 CFAllocatorDeallocate(CFGetAllocator(cset), cset->_annex->_nonBMPPlanes);
423 CFAllocatorDeallocate(CFGetAllocator(cset), cset->_annex);
424 ((CFMutableCharacterSetRef)cset)->_annex = NULL;
425 }
426 }
427
428 CF_INLINE uint8_t __CFCSetGetHeaderValue(const uint8_t *bitmap, int *numPages) {
429 uint8_t value = *bitmap;
430
431 if ((value == 0) || (value == UINT8_MAX)) {
432 int numBytes = __kCFCompactBitmapPageSize - 1;
433
434 while (numBytes > 0) {
435 if (*(++bitmap) != value) break;
436 --numBytes;
437 }
438 if (numBytes == 0) return value;
439 }
440 return (uint8_t)(++(*numPages));
441 }
442
443 CF_INLINE bool __CFCSetIsMemberInCompactBitmap(const uint8_t *compactBitmap, UTF16Char character) {
444 uint8_t value = compactBitmap[(character >> 8)]; // Assuming __kCFCompactBitmapNumPages == 256
445
446 if (value == 0) {
447 return false;
448 } else if (value == UINT8_MAX) {
449 return true;
450 } else {
451 compactBitmap += (__kCFCompactBitmapNumPages + (__kCFCompactBitmapPageSize * (value - 1)));
452 character &= 0xFF; // Assuming __kCFCompactBitmapNumPages == 256
453 return ((compactBitmap[(character / BITSPERBYTE)] & (1 << (character % BITSPERBYTE))) ? true : false);
454 }
455 }
456
457 CF_INLINE uint32_t __CFCSetGetCompactBitmapSize(const uint8_t *compactBitmap) {
458 uint32_t length = __kCFCompactBitmapNumPages;
459 uint32_t size = __kCFCompactBitmapNumPages;
460 uint8_t value;
461
462 while (length-- > 0) {
463 value = *(compactBitmap++);
464 if ((value != 0) && (value != UINT8_MAX)) size += __kCFCompactBitmapPageSize;
465 }
466 return size;
467 }
468
469 /* Take a private "set" structure and make a bitmap from it. Return the bitmap. THE CALLER MUST RELEASE THE RETURNED MEMORY as necessary.
470 */
471
472 CF_INLINE void __CFCSetBitmapProcessManyCharacters(unsigned char *map, unsigned n, unsigned m, Boolean isInverted) {
473 if (isInverted) {
474 __CFCSetBitmapRemoveCharactersInRange(map, n, m);
475 } else {
476 __CFCSetBitmapAddCharactersInRange(map, n, m);
477 }
478 }
479
480 CF_INLINE void __CFExpandCompactBitmap(const uint8_t *src, uint8_t *dst) {
481 const uint8_t *srcBody = src + __kCFCompactBitmapNumPages;
482 int i;
483 uint8_t value;
484
485 for (i = 0;i < __kCFCompactBitmapNumPages;i++) {
486 value = *(src++);
487 if ((value == 0) || (value == UINT8_MAX)) {
488 memset(dst, value, __kCFCompactBitmapPageSize);
489 } else {
490 memmove(dst, srcBody, __kCFCompactBitmapPageSize);
491 srcBody += __kCFCompactBitmapPageSize;
492 }
493 dst += __kCFCompactBitmapPageSize;
494 }
495 }
496
497
498 static void __CFCheckForExpandedSet(CFCharacterSetRef cset) {
499 static int8_t __CFNumberOfPlanesForLogging = -1;
500 static bool warnedOnce = false;
501
502 if (0 > __CFNumberOfPlanesForLogging) {
503 const char *envVar = __CFgetenv("CFCharacterSetCheckForExpandedSet");
504 long value = (envVar ? strtol_l(envVar, NULL, 0, NULL) : 0);
505 __CFNumberOfPlanesForLogging = (int8_t)(((value > 0) && (value <= 16)) ? value : 0);
506 }
507
508 if (__CFNumberOfPlanesForLogging) {
509 uint32_t entries = __CFCSetAnnexValidEntriesBitmap(cset);
510 int count = 0;
511
512 while (entries) {
513 if ((entries & 1) && (++count >= __CFNumberOfPlanesForLogging)) {
514 if (!warnedOnce) {
515 CFLog(kCFLogLevelWarning, CFSTR("An expanded CFMutableCharacter has been detected. Recommend to compact with CFCharacterSetCreateCopy"));
516 warnedOnce = true;
517 }
518 break;
519 }
520 entries >>= 1;
521 }
522 }
523 }
524
525 static void __CFCSetGetBitmap(CFCharacterSetRef cset, uint8_t *bits) {
526 uint8_t *bitmap;
527 CFIndex length = __kCFBitmapSize;
528
529 if (__CFCSetIsBitmap(cset) && (bitmap = __CFCSetBitmapBits(cset))) {
530 memmove(bits, bitmap, __kCFBitmapSize);
531 } else {
532 Boolean isInverted = __CFCSetIsInverted(cset);
533 uint8_t value = (isInverted ? (uint8_t)-1 : 0);
534
535 bitmap = bits;
536 while (length--) *bitmap++ = value; // Initialize the buffer
537
538 if (!__CFCSetIsEmpty(cset)) {
539 switch (__CFCSetClassType(cset)) {
540 case __kCFCharSetClassBuiltin: {
541 UInt8 result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(cset), 0, bits, (isInverted != 0));
542 if (result == kCFUniCharBitmapEmpty && isInverted) {
543 length = __kCFBitmapSize;
544 bitmap = bits;
545 while (length--) *bitmap++ = 0;
546 } else if (result == kCFUniCharBitmapAll && !isInverted) {
547 length = __kCFBitmapSize;
548 bitmap = bits;
549 while (length--) *bitmap++ = (UInt8)0xFF;
550 }
551 }
552 break;
553
554 case __kCFCharSetClassRange: {
555 UInt32 theChar = __CFCSetRangeFirstChar(cset);
556 if (theChar < NUMCHARACTERS) { // the range starts in BMP
557 length = __CFCSetRangeLength(cset);
558 if (theChar + length >= NUMCHARACTERS) length = NUMCHARACTERS - theChar;
559 if (isInverted) {
560 __CFCSetBitmapRemoveCharactersInRange(bits, theChar, (UniChar)(theChar + length) - 1);
561 } else {
562 __CFCSetBitmapAddCharactersInRange(bits, theChar, (UniChar)(theChar + length) - 1);
563 }
564 }
565 }
566 break;
567
568 case __kCFCharSetClassString: {
569 const UniChar *buffer = __CFCSetStringBuffer(cset);
570 length = __CFCSetStringLength(cset);
571 while (length--) (isInverted ? __CFCSetBitmapRemoveCharacter(bits, *buffer++) : __CFCSetBitmapAddCharacter(bits, *buffer++));
572 }
573 break;
574
575 case __kCFCharSetClassCompactBitmap:
576 __CFExpandCompactBitmap(__CFCSetCompactBitmapBits(cset), bits);
577 break;
578 }
579 }
580 }
581 }
582
583 static Boolean __CFCharacterSetEqual(CFTypeRef cf1, CFTypeRef cf2);
584
585 static Boolean __CFCSetIsEqualAnnex(CFCharacterSetRef cf1, CFCharacterSetRef cf2) {
586 CFCharacterSetRef subSet1;
587 CFCharacterSetRef subSet2;
588 Boolean isAnnexInvertStateIdentical = (__CFCSetAnnexIsInverted(cf1) == __CFCSetAnnexIsInverted(cf2) ? true: false);
589 int idx;
590
591 if (isAnnexInvertStateIdentical) {
592 if (__CFCSetAnnexValidEntriesBitmap(cf1) != __CFCSetAnnexValidEntriesBitmap(cf2)) return false;
593 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
594 subSet1 = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(cf1, idx);
595 subSet2 = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(cf2, idx);
596
597 if (subSet1 && !__CFCharacterSetEqual(subSet1, subSet2)) return false;
598 }
599 } else {
600 uint8_t bitsBuf[__kCFBitmapSize];
601 uint8_t bitsBuf2[__kCFBitmapSize];
602
603 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
604 subSet1 = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(cf1, idx);
605 subSet2 = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(cf2, idx);
606
607 if (subSet1 == NULL && subSet2 == NULL) {
608 return false;
609 } else if (subSet1 == NULL) {
610 if (__CFCSetIsBitmap(subSet2)) {
611 if (!__CFCSetIsEqualBitmap((const UInt32 *)__CFCSetBitmapBits(subSet2), (const UInt32 *)-1)) {
612 return false;
613 }
614 } else {
615 __CFCSetGetBitmap(subSet2, bitsBuf);
616 if (!__CFCSetIsEqualBitmap((const UInt32 *)bitsBuf, (const UInt32 *)-1)) {
617 return false;
618 }
619 }
620 } else if (subSet2 == NULL) {
621 if (__CFCSetIsBitmap(subSet1)) {
622 if (!__CFCSetIsEqualBitmap((const UInt32 *)__CFCSetBitmapBits(subSet1), (const UInt32 *)-1)) {
623 return false;
624 }
625 } else {
626 __CFCSetGetBitmap(subSet1, bitsBuf);
627 if (!__CFCSetIsEqualBitmap((const UInt32 *)bitsBuf, (const UInt32 *)-1)) {
628 return false;
629 }
630 }
631 } else {
632 Boolean isBitmap1 = __CFCSetIsBitmap(subSet1);
633 Boolean isBitmap2 = __CFCSetIsBitmap(subSet2);
634
635 if (isBitmap1 && isBitmap2) {
636 if (!__CFCSetIsEqualBitmapInverted((const UInt32 *)__CFCSetBitmapBits(subSet1), (const UInt32 *)__CFCSetBitmapBits(subSet2))) {
637 return false;
638 }
639 } else if (!isBitmap1 && !isBitmap2) {
640 __CFCSetGetBitmap(subSet1, bitsBuf);
641 __CFCSetGetBitmap(subSet2, bitsBuf2);
642 if (!__CFCSetIsEqualBitmapInverted((const UInt32 *)bitsBuf, (const UInt32 *)bitsBuf2)) {
643 return false;
644 }
645 } else {
646 if (isBitmap2) {
647 CFCharacterSetRef tmp = subSet2;
648 subSet2 = subSet1;
649 subSet1 = tmp;
650 }
651 __CFCSetGetBitmap(subSet2, bitsBuf);
652 if (!__CFCSetIsEqualBitmapInverted((const UInt32 *)__CFCSetBitmapBits(subSet1), (const UInt32 *)bitsBuf)) {
653 return false;
654 }
655 }
656 }
657 }
658 }
659 return true;
660 }
661
662 /* Compact bitmap
663 */
664 static uint8_t *__CFCreateCompactBitmap(CFAllocatorRef allocator, const uint8_t *bitmap) {
665 const uint8_t *src;
666 uint8_t *dst;
667 int i;
668 int numPages = 0;
669 uint8_t header[__kCFCompactBitmapNumPages];
670
671 src = bitmap;
672 for (i = 0;i < __kCFCompactBitmapNumPages;i++) {
673 header[i] = __CFCSetGetHeaderValue(src, &numPages);
674
675 // Allocating more pages is probably not interesting enough to be compact
676 if (numPages > __kCFCompactBitmapMaxPages) return NULL;
677 src += __kCFCompactBitmapPageSize;
678 }
679
680 dst = (uint8_t *)CFAllocatorAllocate(allocator, __kCFCompactBitmapNumPages + (__kCFCompactBitmapPageSize * numPages), 0);
681
682 if (numPages > 0) {
683 uint8_t *dstBody = dst + __kCFCompactBitmapNumPages;
684
685 src = bitmap;
686 for (i = 0;i < __kCFCompactBitmapNumPages;i++) {
687 dst[i] = header[i];
688
689 if ((dst[i] != 0) && (dst[i] != UINT8_MAX)) {
690 memmove(dstBody, src, __kCFCompactBitmapPageSize);
691 dstBody += __kCFCompactBitmapPageSize;
692 }
693 src += __kCFCompactBitmapPageSize;
694 }
695 } else {
696 memmove(dst, header, __kCFCompactBitmapNumPages);
697 }
698
699 return dst;
700 }
701
702 static void __CFCSetMakeCompact(CFMutableCharacterSetRef cset) {
703 if (__CFCSetIsBitmap(cset) && __CFCSetBitmapBits(cset)) {
704 uint8_t *bitmap = __CFCSetBitmapBits(cset);
705 uint8_t *cBitmap = __CFCreateCompactBitmap(CFGetAllocator(cset), bitmap);
706
707 if (cBitmap) {
708 CFAllocatorDeallocate(CFGetAllocator(cset), bitmap);
709 __CFCSetPutClassType(cset, __kCFCharSetClassCompactBitmap);
710 __CFCSetPutCompactBitmapBits(cset, cBitmap);
711 }
712 }
713 }
714
715 static void __CFCSetAddNonBMPPlanesInRange(CFMutableCharacterSetRef cset, CFRange range) {
716 int firstChar = (range.location & 0xFFFF);
717 int maxChar = range.location + range.length;
718 int idx = range.location >> 16; // first plane
719 int maxPlane = (maxChar - 1) >> 16; // last plane
720 CFRange planeRange;
721 CFMutableCharacterSetRef annexPlane;
722
723 maxChar &= 0xFFFF;
724
725 for (idx = (idx ? idx : 1);idx <= maxPlane;idx++) {
726 planeRange.location = __CFMax(firstChar, 0);
727 planeRange.length = (idx == maxPlane && maxChar ? maxChar : 0x10000) - planeRange.location;
728 if (__CFCSetAnnexIsInverted(cset)) {
729 if ((annexPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(cset, idx))) {
730 CFCharacterSetRemoveCharactersInRange(annexPlane, planeRange);
731 if (__CFCSetIsEmpty(annexPlane) && !__CFCSetIsInverted(annexPlane)) {
732 CFRelease(annexPlane);
733 __CFCSetAnnexBitmapClearPlane(cset->_annex->_validEntriesBitmap, idx);
734 }
735 }
736 } else {
737 CFCharacterSetAddCharactersInRange((CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(cset, idx), planeRange);
738 }
739 }
740 if (!__CFCSetHasNonBMPPlane(cset) && !__CFCSetAnnexIsInverted(cset)) __CFCSetDeallocateAnnexPlane(cset);
741 }
742
743 static void __CFCSetRemoveNonBMPPlanesInRange(CFMutableCharacterSetRef cset, CFRange range) {
744 int firstChar = (range.location & 0xFFFF);
745 int maxChar = range.location + range.length;
746 int idx = range.location >> 16; // first plane
747 int maxPlane = (maxChar - 1) >> 16; // last plane
748 CFRange planeRange;
749 CFMutableCharacterSetRef annexPlane;
750
751 maxChar &= 0xFFFF;
752
753 for (idx = (idx ? idx : 1);idx <= maxPlane;idx++) {
754 planeRange.location = __CFMax(firstChar, 0);
755 planeRange.length = (idx == maxPlane && maxChar ? maxChar : 0x10000) - planeRange.location;
756 if (__CFCSetAnnexIsInverted(cset)) {
757 CFCharacterSetAddCharactersInRange((CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(cset, idx), planeRange);
758 } else {
759 if ((annexPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(cset, idx))) {
760 CFCharacterSetRemoveCharactersInRange(annexPlane, planeRange);
761 if(__CFCSetIsEmpty(annexPlane) && !__CFCSetIsInverted(annexPlane)) {
762 CFRelease(annexPlane);
763 __CFCSetAnnexBitmapClearPlane(cset->_annex->_validEntriesBitmap, idx);
764 }
765 }
766 }
767 }
768 if (!__CFCSetHasNonBMPPlane(cset) && !__CFCSetAnnexIsInverted(cset)) __CFCSetDeallocateAnnexPlane(cset);
769 }
770
771 static void __CFCSetMakeBitmap(CFMutableCharacterSetRef cset) {
772 if (!__CFCSetIsBitmap(cset) || !__CFCSetBitmapBits(cset)) {
773 CFAllocatorRef allocator = CFGetAllocator(cset);
774 uint8_t *bitmap = (uint8_t *)CFAllocatorAllocate(allocator, __kCFBitmapSize, 0);
775 __CFCSetGetBitmap(cset, bitmap);
776
777 if (__CFCSetIsBuiltin(cset)) {
778 CFIndex numPlanes = CFUniCharGetNumberOfPlanes(__CFCSetBuiltinType(cset));
779
780 if (numPlanes > 1) {
781 CFMutableCharacterSetRef annexSet;
782 uint8_t *annexBitmap = NULL;
783 int idx;
784 UInt8 result;
785
786 __CFCSetAllocateAnnexForPlane(cset, numPlanes - 1);
787 for (idx = 1;idx < numPlanes;idx++) {
788 if (NULL == annexBitmap) {
789 annexBitmap = (uint8_t *)CFAllocatorAllocate(allocator, __kCFBitmapSize, 0);
790 }
791 result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(cset), idx, annexBitmap, false);
792 if (result == kCFUniCharBitmapEmpty) continue;
793 if (result == kCFUniCharBitmapAll) {
794 CFIndex bitmapLength = __kCFBitmapSize;
795 uint8_t *bytes = annexBitmap;
796 while (bitmapLength-- > 0) *(bytes++) = (uint8_t)0xFF;
797 }
798 annexSet = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(cset, idx);
799 __CFCSetPutClassType(annexSet, __kCFCharSetClassBitmap);
800 __CFCSetPutBitmapBits(annexSet, annexBitmap);
801 __CFCSetPutIsInverted(annexSet, false);
802 __CFCSetPutHasHashValue(annexSet, false);
803 annexBitmap = NULL;
804 }
805 if (annexBitmap) CFAllocatorDeallocate(allocator, annexBitmap);
806 }
807 } else if (__CFCSetIsCompactBitmap(cset) && __CFCSetCompactBitmapBits(cset)) {
808 CFAllocatorDeallocate(allocator, __CFCSetCompactBitmapBits(cset));
809 __CFCSetPutCompactBitmapBits(cset, NULL);
810 } else if (__CFCSetIsString(cset) && __CFCSetStringBuffer(cset)) {
811 CFAllocatorDeallocate(allocator, __CFCSetStringBuffer(cset));
812 __CFCSetPutStringBuffer(cset, NULL);
813 } else if (__CFCSetIsRange(cset)) { // We may have to allocate annex here
814 Boolean needsToInvert = (!__CFCSetHasNonBMPPlane(cset) && __CFCSetIsInverted(cset) ? true : false);
815 __CFCSetAddNonBMPPlanesInRange(cset, CFRangeMake(__CFCSetRangeFirstChar(cset), __CFCSetRangeLength(cset)));
816 if (needsToInvert) __CFCSetAnnexSetIsInverted(cset, true);
817 }
818 __CFCSetPutClassType(cset, __kCFCharSetClassBitmap);
819 __CFCSetPutBitmapBits(cset, bitmap);
820 __CFCSetPutIsInverted(cset, false);
821 }
822 }
823
824 CF_INLINE CFMutableCharacterSetRef __CFCSetGenericCreate(CFAllocatorRef allocator, UInt32 flags) {
825 CFMutableCharacterSetRef cset;
826 CFIndex size = sizeof(struct __CFCharacterSet) - sizeof(CFRuntimeBase);
827
828 cset = (CFMutableCharacterSetRef)_CFRuntimeCreateInstance(allocator, CFCharacterSetGetTypeID(), size, NULL);
829 if (NULL == cset) return NULL;
830
831 cset->_base._cfinfo[CF_INFO_BITS] |= flags;
832 cset->_hashValue = 0;
833 cset->_annex = NULL;
834
835 return cset;
836 }
837
838 static void __CFApplySurrogatesInString(CFMutableCharacterSetRef cset, CFStringRef string, void (*applyer)(CFMutableCharacterSetRef, CFRange)) {
839 CFStringInlineBuffer buffer;
840 CFIndex index, length = CFStringGetLength(string);
841 CFRange range = CFRangeMake(0, 0);
842 UTF32Char character;
843
844 CFStringInitInlineBuffer(string, &buffer, CFRangeMake(0, length));
845
846 for (index = 0;index < length;index++) {
847 character = __CFStringGetCharacterFromInlineBufferQuick(&buffer, index);
848
849 if (CFStringIsSurrogateHighCharacter(character) && ((index + 1) < length)) {
850 UTF16Char other = __CFStringGetCharacterFromInlineBufferQuick(&buffer, index + 1);
851
852 if (CFStringIsSurrogateLowCharacter(other)) {
853 character = CFStringGetLongCharacterForSurrogatePair(character, other);
854
855 if ((range.length + range.location) == character) {
856 ++range.length;
857 } else {
858 if (range.length > 0) applyer(cset, range);
859 range.location = character;
860 range.length = 1;
861 }
862 }
863
864 ++index; // skip the low surrogate
865 }
866 }
867
868 if (range.length > 0) applyer(cset, range);
869 }
870
871
872 /* Bsearch theChar for __kCFCharSetClassString
873 */
874 CF_INLINE Boolean __CFCSetBsearchUniChar(const UniChar *theTable, CFIndex length, UniChar theChar) {
875 const UniChar *p, *q, *divider;
876
877 if ((theChar < theTable[0]) || (theChar > theTable[length - 1])) return false;
878
879 p = theTable;
880 q = p + (length - 1);
881 while (p <= q) {
882 divider = p + ((q - p) >> 1); /* divide by 2 */
883 if (theChar < *divider) q = divider - 1;
884 else if (theChar > *divider) p = divider + 1;
885 else return true;
886 }
887 return false;
888 }
889
890 /* Array of instantiated builtin set. Note builtin set ID starts with 1 so the array index is ID - 1
891 */
892 static CFCharacterSetRef *__CFBuiltinSets = NULL;
893
894 /* Global lock for character set
895 */
896 static CFSpinLock_t __CFCharacterSetLock = CFSpinLockInit;
897
898 /* CFBase API functions
899 */
900 static Boolean __CFCharacterSetEqual(CFTypeRef cf1, CFTypeRef cf2) {
901 Boolean isInvertStateIdentical = (__CFCSetIsInverted((CFCharacterSetRef)cf1) == __CFCSetIsInverted((CFCharacterSetRef)cf2) ? true: false);
902 Boolean isAnnexInvertStateIdentical = (__CFCSetAnnexIsInverted((CFCharacterSetRef)cf1) == __CFCSetAnnexIsInverted((CFCharacterSetRef)cf2) ? true: false);
903 CFIndex idx;
904 CFCharacterSetRef subSet1;
905 uint8_t bitsBuf[__kCFBitmapSize];
906 uint8_t *bits;
907 Boolean isBitmap1;
908 Boolean isBitmap2;
909
910 if (__CFCSetHasHashValue((CFCharacterSetRef)cf1) && __CFCSetHasHashValue((CFCharacterSetRef)cf2) && ((CFCharacterSetRef)cf1)->_hashValue != ((CFCharacterSetRef)cf2)->_hashValue) return false;
911 if (__CFCSetIsEmpty((CFCharacterSetRef)cf1) && __CFCSetIsEmpty((CFCharacterSetRef)cf2) && !isInvertStateIdentical) return false;
912
913 if (__CFCSetClassType((CFCharacterSetRef)cf1) == __CFCSetClassType((CFCharacterSetRef)cf2)) { // Types are identical, we can do it fast
914 switch (__CFCSetClassType((CFCharacterSetRef)cf1)) {
915 case __kCFCharSetClassBuiltin:
916 return (__CFCSetBuiltinType((CFCharacterSetRef)cf1) == __CFCSetBuiltinType((CFCharacterSetRef)cf2) && isInvertStateIdentical ? true : false);
917
918 case __kCFCharSetClassRange:
919 return (__CFCSetRangeFirstChar((CFCharacterSetRef)cf1) == __CFCSetRangeFirstChar((CFCharacterSetRef)cf2) && __CFCSetRangeLength((CFCharacterSetRef)cf1) && __CFCSetRangeLength((CFCharacterSetRef)cf2) && isInvertStateIdentical ? true : false);
920
921 case __kCFCharSetClassString:
922 if (__CFCSetStringLength((CFCharacterSetRef)cf1) == __CFCSetStringLength((CFCharacterSetRef)cf2) && isInvertStateIdentical) {
923 const UniChar *buf1 = __CFCSetStringBuffer((CFCharacterSetRef)cf1);
924 const UniChar *buf2 = __CFCSetStringBuffer((CFCharacterSetRef)cf2);
925 CFIndex length = __CFCSetStringLength((CFCharacterSetRef)cf1);
926
927 while (length--) if (*buf1++ != *buf2++) return false;
928 } else {
929 return false;
930 }
931 break;
932
933 case __kCFCharSetClassBitmap:
934 if (!__CFCSetIsEqualBitmap((const UInt32 *)__CFCSetBitmapBits((CFCharacterSetRef)cf1), (const UInt32 *)__CFCSetBitmapBits((CFCharacterSetRef)cf2))) return false;
935 break;
936 }
937 return __CFCSetIsEqualAnnex((CFCharacterSetRef)cf1, (CFCharacterSetRef)cf2);
938 }
939
940 // Check for easy empty cases
941 if (__CFCSetIsEmpty((CFCharacterSetRef)cf1) || __CFCSetIsEmpty((CFCharacterSetRef)cf2)) {
942 CFCharacterSetRef emptySet = (__CFCSetIsEmpty((CFCharacterSetRef)cf1) ? (CFCharacterSetRef)cf1 : (CFCharacterSetRef)cf2);
943 CFCharacterSetRef nonEmptySet = (emptySet == cf1 ? (CFCharacterSetRef)cf2 : (CFCharacterSetRef)cf1);
944
945 if (__CFCSetIsBuiltin(nonEmptySet)) {
946 return false;
947 } else if (__CFCSetIsRange(nonEmptySet)) {
948 if (isInvertStateIdentical) {
949 return (__CFCSetRangeLength(nonEmptySet) ? false : true);
950 } else {
951 return (__CFCSetRangeLength(nonEmptySet) == 0x110000 ? true : false);
952 }
953 } else {
954 if (__CFCSetAnnexIsInverted(nonEmptySet)) {
955 if (__CFCSetAnnexValidEntriesBitmap(nonEmptySet) != 0x1FFFE) return false;
956 } else {
957 if (__CFCSetAnnexValidEntriesBitmap(nonEmptySet)) return false;
958 }
959
960 if (__CFCSetIsBitmap(nonEmptySet)) {
961 bits = __CFCSetBitmapBits(nonEmptySet);
962 } else {
963 bits = bitsBuf;
964 __CFCSetGetBitmap(nonEmptySet, bitsBuf);
965 }
966
967 if (__CFCSetIsEqualBitmap(NULL, (const UInt32 *)bits)) {
968 if (!__CFCSetAnnexIsInverted(nonEmptySet)) return true;
969 } else {
970 return false;
971 }
972
973 // Annex set has to be CFRangeMake(0x10000, 0xfffff)
974 for (idx = 1;idx < MAX_ANNEX_PLANE;idx++) {
975 if (__CFCSetIsBitmap(nonEmptySet)) {
976 if (!__CFCSetIsEqualBitmap((__CFCSetAnnexIsInverted(nonEmptySet) ? NULL : (const UInt32 *)-1), (const UInt32 *)bitsBuf)) return false;
977 } else {
978 __CFCSetGetBitmap(__CFCSetGetAnnexPlaneCharacterSetNoAlloc(nonEmptySet, idx), bitsBuf);
979 if (!__CFCSetIsEqualBitmap((const UInt32 *)-1, (const UInt32 *)bitsBuf)) return false;
980 }
981 }
982 return true;
983 }
984 }
985
986 if (__CFCSetIsBuiltin((CFCharacterSetRef)cf1) || __CFCSetIsBuiltin((CFCharacterSetRef)cf2)) {
987 CFCharacterSetRef builtinSet = (__CFCSetIsBuiltin((CFCharacterSetRef)cf1) ? (CFCharacterSetRef)cf1 : (CFCharacterSetRef)cf2);
988 CFCharacterSetRef nonBuiltinSet = (builtinSet == cf1 ? (CFCharacterSetRef)cf2 : (CFCharacterSetRef)cf1);
989
990
991 if (__CFCSetIsRange(nonBuiltinSet)) {
992 UTF32Char firstChar = __CFCSetRangeFirstChar(nonBuiltinSet);
993 UTF32Char lastChar = (firstChar + __CFCSetRangeLength(nonBuiltinSet) - 1);
994 uint8_t firstPlane = (firstChar >> 16) & 0xFF;
995 uint8_t lastPlane = (lastChar >> 16) & 0xFF;
996 uint8_t result;
997
998 for (idx = 0;idx < MAX_ANNEX_PLANE;idx++) {
999 result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(builtinSet), idx, bitsBuf, (isInvertStateIdentical != 0));
1000
1001 if (idx < firstPlane || idx > lastPlane) {
1002 if (result == kCFUniCharBitmapAll) {
1003 return false;
1004 } else if (result == kCFUniCharBitmapFilled) {
1005 if (!__CFCSetIsEqualBitmap(NULL, (const UInt32 *)bitsBuf)) return false;
1006 }
1007 } else if (idx > firstPlane && idx < lastPlane) {
1008 if (result == kCFUniCharBitmapEmpty) {
1009 return false;
1010 } else if (result == kCFUniCharBitmapFilled) {
1011 if (!__CFCSetIsEqualBitmap((const UInt32 *)-1, (const UInt32 *)bitsBuf)) return false;
1012 }
1013 } else {
1014 if (result == kCFUniCharBitmapEmpty) {
1015 return false;
1016 } else if (result == kCFUniCharBitmapAll) {
1017 if (idx == firstPlane) {
1018 if (((firstChar & 0xFFFF) != 0) || (firstPlane == lastPlane && ((lastChar & 0xFFFF) != 0xFFFF))) return false;
1019 } else {
1020 if (((lastChar & 0xFFFF) != 0xFFFF) || (firstPlane == lastPlane && ((firstChar & 0xFFFF) != 0))) return false;
1021 }
1022 } else {
1023 if (idx == firstPlane) {
1024 if (!__CFCSetIsBitmapEqualToRange((const UInt32 *)bitsBuf, firstChar & 0xFFFF, (firstPlane == lastPlane ? lastChar & 0xFFFF : 0xFFFF), false)) return false;
1025 } else {
1026 if (!__CFCSetIsBitmapEqualToRange((const UInt32 *)bitsBuf, (firstPlane == lastPlane ? firstChar & 0xFFFF : 0), lastChar & 0xFFFF, false)) return false;
1027 }
1028 }
1029 }
1030 }
1031 return true;
1032 } else {
1033 uint8_t bitsBuf2[__kCFBitmapSize];
1034 uint8_t result;
1035
1036 result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(builtinSet), 0, bitsBuf, (__CFCSetIsInverted(builtinSet) != 0));
1037 if (result == kCFUniCharBitmapFilled) {
1038 if (__CFCSetIsBitmap(nonBuiltinSet)) {
1039 if (!__CFCSetIsEqualBitmap((const UInt32 *)bitsBuf, (const UInt32 *)__CFCSetBitmapBits(nonBuiltinSet))) return false;
1040 } else {
1041
1042 __CFCSetGetBitmap(nonBuiltinSet, bitsBuf2);
1043 if (!__CFCSetIsEqualBitmap((const UInt32 *)bitsBuf, (const UInt32 *)bitsBuf2)) {
1044 return false;
1045 }
1046 }
1047 } else {
1048 if (__CFCSetIsBitmap(nonBuiltinSet)) {
1049 if (!__CFCSetIsEqualBitmap((result == kCFUniCharBitmapAll ? (const UInt32*)-1 : NULL), (const UInt32 *)__CFCSetBitmapBits(nonBuiltinSet))) return false;
1050 } else {
1051 __CFCSetGetBitmap(nonBuiltinSet, bitsBuf);
1052 if (!__CFCSetIsEqualBitmap((result == kCFUniCharBitmapAll ? (const UInt32*)-1: NULL), (const UInt32 *)bitsBuf)) return false;
1053 }
1054 }
1055
1056 isInvertStateIdentical = (__CFCSetIsInverted(builtinSet) == __CFCSetAnnexIsInverted(nonBuiltinSet) ? true : false);
1057
1058 for (idx = 1;idx < MAX_ANNEX_PLANE;idx++) {
1059 result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(builtinSet), idx, bitsBuf, !isInvertStateIdentical);
1060 subSet1 = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(nonBuiltinSet, idx);
1061
1062 if (result == kCFUniCharBitmapFilled) {
1063 if (NULL == subSet1) {
1064 return false;
1065 } else if (__CFCSetIsBitmap(subSet1)) {
1066 if (!__CFCSetIsEqualBitmap((const UInt32*)bitsBuf, (const UInt32*)__CFCSetBitmapBits(subSet1))) {
1067 return false;
1068 }
1069 } else {
1070
1071 __CFCSetGetBitmap(subSet1, bitsBuf2);
1072 if (!__CFCSetIsEqualBitmap((const UInt32*)bitsBuf, (const UInt32*)bitsBuf2)) {
1073 return false;
1074 }
1075 }
1076 } else {
1077 if (NULL == subSet1) {
1078 if (result == kCFUniCharBitmapAll) {
1079 return false;
1080 }
1081 } else if (__CFCSetIsBitmap(subSet1)) {
1082 if (!__CFCSetIsEqualBitmap((result == kCFUniCharBitmapAll ? (const UInt32*)-1: NULL), (const UInt32*)__CFCSetBitmapBits(subSet1))) {
1083 return false;
1084 }
1085 } else {
1086 __CFCSetGetBitmap(subSet1, bitsBuf);
1087 if (!__CFCSetIsEqualBitmap((result == kCFUniCharBitmapAll ? (const UInt32*)-1: NULL), (const UInt32*)bitsBuf)) {
1088 return false;
1089 }
1090 }
1091 }
1092 }
1093 return true;
1094 }
1095 }
1096
1097 if (__CFCSetIsRange((CFCharacterSetRef)cf1) || __CFCSetIsRange((CFCharacterSetRef)cf2)) {
1098 CFCharacterSetRef rangeSet = (__CFCSetIsRange((CFCharacterSetRef)cf1) ? (CFCharacterSetRef)cf1 : (CFCharacterSetRef)cf2);
1099 CFCharacterSetRef nonRangeSet = (rangeSet == cf1 ? (CFCharacterSetRef)cf2 : (CFCharacterSetRef)cf1);
1100 UTF32Char firstChar = __CFCSetRangeFirstChar(rangeSet);
1101 UTF32Char lastChar = (firstChar + __CFCSetRangeLength(rangeSet) - 1);
1102 uint8_t firstPlane = (firstChar >> 16) & 0xFF;
1103 uint8_t lastPlane = (lastChar >> 16) & 0xFF;
1104 Boolean isRangeSetInverted = __CFCSetIsInverted(rangeSet);
1105
1106 if (__CFCSetIsBitmap(nonRangeSet)) {
1107 bits = __CFCSetBitmapBits(nonRangeSet);
1108 } else {
1109 bits = bitsBuf;
1110 __CFCSetGetBitmap(nonRangeSet, bitsBuf);
1111 }
1112 if (firstPlane == 0) {
1113 if (!__CFCSetIsBitmapEqualToRange((const UInt32*)bits, firstChar, (lastPlane == 0 ? lastChar : 0xFFFF), isRangeSetInverted)) return false;
1114 firstPlane = 1;
1115 firstChar = 0;
1116 } else {
1117 if (!__CFCSetIsEqualBitmap((const UInt32*)bits, (isRangeSetInverted ? (const UInt32 *)-1 : NULL))) return false;
1118 firstChar &= 0xFFFF;
1119 }
1120
1121 lastChar &= 0xFFFF;
1122
1123 isAnnexInvertStateIdentical = (isRangeSetInverted == __CFCSetAnnexIsInverted(nonRangeSet) ? true : false);
1124
1125 for (idx = 1;idx < MAX_ANNEX_PLANE;idx++) {
1126 subSet1 = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(nonRangeSet, idx);
1127 if (NULL == subSet1) {
1128 if (idx < firstPlane || idx > lastPlane) {
1129 if (!isAnnexInvertStateIdentical) return false;
1130 } else if (idx > firstPlane && idx < lastPlane) {
1131 if (isAnnexInvertStateIdentical) return false;
1132 } else if (idx == firstPlane) {
1133 if (isAnnexInvertStateIdentical || firstChar || (idx == lastPlane && lastChar != 0xFFFF)) return false;
1134 } else if (idx == lastPlane) {
1135 if (isAnnexInvertStateIdentical || (idx == firstPlane && firstChar) || (lastChar != 0xFFFF)) return false;
1136 }
1137 } else {
1138 if (__CFCSetIsBitmap(subSet1)) {
1139 bits = __CFCSetBitmapBits(subSet1);
1140 } else {
1141 __CFCSetGetBitmap(subSet1, bitsBuf);
1142 bits = bitsBuf;
1143 }
1144
1145 if (idx < firstPlane || idx > lastPlane) {
1146 if (!__CFCSetIsEqualBitmap((const UInt32*)bits, (isAnnexInvertStateIdentical ? NULL : (const UInt32 *)-1))) return false;
1147 } else if (idx > firstPlane && idx < lastPlane) {
1148 if (!__CFCSetIsEqualBitmap((const UInt32*)bits, (isAnnexInvertStateIdentical ? (const UInt32 *)-1 : NULL))) return false;
1149 } else if (idx == firstPlane) {
1150 if (!__CFCSetIsBitmapEqualToRange((const UInt32*)bits, firstChar, (idx == lastPlane ? lastChar : 0xFFFF), !isAnnexInvertStateIdentical)) return false;
1151 } else if (idx == lastPlane) {
1152 if (!__CFCSetIsBitmapEqualToRange((const UInt32*)bits, (idx == firstPlane ? firstChar : 0), lastChar, !isAnnexInvertStateIdentical)) return false;
1153 }
1154 }
1155 }
1156 return true;
1157 }
1158
1159 isBitmap1 = __CFCSetIsBitmap((CFCharacterSetRef)cf1);
1160 isBitmap2 = __CFCSetIsBitmap((CFCharacterSetRef)cf2);
1161
1162 if (isBitmap1 && isBitmap2) {
1163 if (!__CFCSetIsEqualBitmap((const UInt32 *)__CFCSetBitmapBits((CFCharacterSetRef)cf1), (const UInt32 *)__CFCSetBitmapBits((CFCharacterSetRef)cf2))) return false;
1164 } else if (!isBitmap1 && !isBitmap2) {
1165 uint8_t bitsBuf2[__kCFBitmapSize];
1166
1167 __CFCSetGetBitmap((CFCharacterSetRef)cf1, bitsBuf);
1168 __CFCSetGetBitmap((CFCharacterSetRef)cf2, bitsBuf2);
1169
1170 if (!__CFCSetIsEqualBitmap((const UInt32*)bitsBuf, (const UInt32*)bitsBuf2)) {
1171 return false;
1172 }
1173 } else {
1174 if (isBitmap2) {
1175 CFCharacterSetRef tmp = (CFCharacterSetRef)cf2;
1176 cf2 = cf1;
1177 cf1 = tmp;
1178 }
1179
1180 __CFCSetGetBitmap((CFCharacterSetRef)cf2, bitsBuf);
1181
1182 if (!__CFCSetIsEqualBitmap((const UInt32 *)__CFCSetBitmapBits((CFCharacterSetRef)cf1), (const UInt32 *)bitsBuf)) return false;
1183 }
1184 return __CFCSetIsEqualAnnex((CFCharacterSetRef)cf1, (CFCharacterSetRef)cf2);
1185 }
1186
1187 static CFHashCode __CFCharacterSetHash(CFTypeRef cf) {
1188 if (!__CFCSetHasHashValue((CFCharacterSetRef)cf)) {
1189 if (__CFCSetIsEmpty((CFCharacterSetRef)cf)) {
1190 ((CFMutableCharacterSetRef)cf)->_hashValue = (__CFCSetIsInverted((CFCharacterSetRef)cf) ? ((UInt32)0xFFFFFFFF) : 0);
1191 } else if (__CFCSetIsBitmap( (CFCharacterSetRef) cf )) {
1192 ((CFMutableCharacterSetRef)cf)->_hashValue = CFHashBytes(__CFCSetBitmapBits((CFCharacterSetRef)cf), __kCFBitmapSize);
1193 } else {
1194 uint8_t bitsBuf[__kCFBitmapSize];
1195 __CFCSetGetBitmap((CFCharacterSetRef)cf, bitsBuf);
1196 ((CFMutableCharacterSetRef)cf)->_hashValue = CFHashBytes(bitsBuf, __kCFBitmapSize);
1197 }
1198 __CFCSetPutHasHashValue((CFMutableCharacterSetRef)cf, true);
1199 }
1200 return ((CFCharacterSetRef)cf)->_hashValue;
1201 }
1202
1203 static CFStringRef __CFCharacterSetCopyDescription(CFTypeRef cf) {
1204 CFMutableStringRef string;
1205 CFIndex idx;
1206 CFIndex length;
1207
1208 if (__CFCSetIsEmpty((CFCharacterSetRef)cf)) {
1209 return (CFStringRef)(__CFCSetIsInverted((CFCharacterSetRef)cf) ? CFRetain(CFSTR("<CFCharacterSet All>")) : CFRetain(CFSTR("<CFCharacterSet Empty>")));
1210 }
1211
1212 switch (__CFCSetClassType((CFCharacterSetRef)cf)) {
1213 case __kCFCharSetClassBuiltin:
1214 switch (__CFCSetBuiltinType((CFCharacterSetRef)cf)) {
1215 case kCFCharacterSetControl: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined Control Set>"));
1216 case kCFCharacterSetWhitespace : return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined Whitespace Set>"));
1217 case kCFCharacterSetWhitespaceAndNewline: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined WhitespaceAndNewline Set>"));
1218 case kCFCharacterSetDecimalDigit: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined DecimalDigit Set>"));
1219 case kCFCharacterSetLetter: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined Letter Set>"));
1220 case kCFCharacterSetLowercaseLetter: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined LowercaseLetter Set>"));
1221 case kCFCharacterSetUppercaseLetter: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined UppercaseLetter Set>"));
1222 case kCFCharacterSetNonBase: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined NonBase Set>"));
1223 case kCFCharacterSetDecomposable: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined Decomposable Set>"));
1224 case kCFCharacterSetAlphaNumeric: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined AlphaNumeric Set>"));
1225 case kCFCharacterSetPunctuation: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined Punctuation Set>"));
1226 case kCFCharacterSetIllegal: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined Illegal Set>"));
1227 case kCFCharacterSetCapitalizedLetter: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined CapitalizedLetter Set>"));
1228 case kCFCharacterSetSymbol: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined Symbol Set>"));
1229 case kCFCharacterSetNewline: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined Newline Set>"));
1230 }
1231 break;
1232
1233 case __kCFCharSetClassRange:
1234 return CFStringCreateWithFormat(CFGetAllocator((CFCharacterSetRef)cf), NULL, CFSTR("<CFCharacterSet Range(%d, %d)>"), __CFCSetRangeFirstChar((CFCharacterSetRef)cf), __CFCSetRangeLength((CFCharacterSetRef)cf));
1235
1236 case __kCFCharSetClassString: {
1237 CFStringRef format = CFSTR("<CFCharacterSet Items(");
1238
1239 length = __CFCSetStringLength((CFCharacterSetRef)cf);
1240 string = CFStringCreateMutable(CFGetAllocator(cf), CFStringGetLength(format) + 7 * length + 2); // length of format + "U+XXXX "(7) * length + ")>"(2)
1241 CFStringAppend(string, format);
1242 for (idx = 0;idx < length;idx++) {
1243 CFStringAppendFormat(string, NULL, CFSTR("%sU+%04X"), (idx > 0 ? " " : ""), (UInt32)((__CFCSetStringBuffer((CFCharacterSetRef)cf))[idx]));
1244 }
1245 CFStringAppend(string, CFSTR(")>"));
1246 return string;
1247 }
1248
1249 case __kCFCharSetClassBitmap:
1250 case __kCFCharSetClassCompactBitmap:
1251 return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Bitmap>")); // ??? Should generate description for 8k bitmap ?
1252 }
1253 CFAssert1(0, __kCFLogAssertion, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__); // We should never come here
1254 return NULL;
1255 }
1256
1257 static void __CFCharacterSetDeallocate(CFTypeRef cf) {
1258 CFAllocatorRef allocator = CFGetAllocator(cf);
1259
1260 if (__CFCSetIsBuiltin((CFCharacterSetRef)cf) && !__CFCSetIsMutable((CFCharacterSetRef)cf) && !__CFCSetIsInverted((CFCharacterSetRef)cf)) {
1261 CFCharacterSetRef sharedSet = CFCharacterSetGetPredefined(__CFCSetBuiltinType((CFCharacterSetRef)cf));
1262 if (sharedSet == cf) { // We're trying to dealloc the builtin set
1263 CFAssert1(0, __kCFLogAssertion, "%s: Trying to deallocate predefined set. The process is likely to crash.", __PRETTY_FUNCTION__);
1264 return; // We never deallocate builtin set
1265 }
1266 }
1267
1268 if (__CFCSetIsString((CFCharacterSetRef)cf) && __CFCSetStringBuffer((CFCharacterSetRef)cf)) CFAllocatorDeallocate(allocator, __CFCSetStringBuffer((CFCharacterSetRef)cf));
1269 else if (__CFCSetIsBitmap((CFCharacterSetRef)cf) && __CFCSetBitmapBits((CFCharacterSetRef)cf)) CFAllocatorDeallocate(allocator, __CFCSetBitmapBits((CFCharacterSetRef)cf));
1270 else if (__CFCSetIsCompactBitmap((CFCharacterSetRef)cf) && __CFCSetCompactBitmapBits((CFCharacterSetRef)cf)) CFAllocatorDeallocate(allocator, __CFCSetCompactBitmapBits((CFCharacterSetRef)cf));
1271 __CFCSetDeallocateAnnexPlane((CFCharacterSetRef)cf);
1272 }
1273
1274 static CFTypeID __kCFCharacterSetTypeID = _kCFRuntimeNotATypeID;
1275
1276 static const CFRuntimeClass __CFCharacterSetClass = {
1277 0,
1278 "CFCharacterSet",
1279 NULL, // init
1280 NULL, // copy
1281 __CFCharacterSetDeallocate,
1282 __CFCharacterSetEqual,
1283 __CFCharacterSetHash,
1284 NULL, //
1285 __CFCharacterSetCopyDescription
1286 };
1287
1288 static bool __CFCheckForExapendedSet = false;
1289
1290 __private_extern__ void __CFCharacterSetInitialize(void) {
1291 const char *checkForExpandedSet = __CFgetenv("__CF_DEBUG_EXPANDED_SET");
1292
1293 __kCFCharacterSetTypeID = _CFRuntimeRegisterClass(&__CFCharacterSetClass);
1294
1295 if (checkForExpandedSet && (*checkForExpandedSet == 'Y')) __CFCheckForExapendedSet = true;
1296 }
1297
1298 /* Public functions
1299 */
1300
1301 CFTypeID CFCharacterSetGetTypeID(void) {
1302 return __kCFCharacterSetTypeID;
1303 }
1304
1305 /*** CharacterSet creation ***/
1306 /* Functions to create basic immutable characterset.
1307 */
1308 CFCharacterSetRef CFCharacterSetGetPredefined(CFCharacterSetPredefinedSet theSetIdentifier) {
1309 CFCharacterSetRef cset;
1310
1311 __CFCSetValidateBuiltinType(theSetIdentifier, __PRETTY_FUNCTION__);
1312
1313 __CFSpinLock(&__CFCharacterSetLock);
1314 cset = ((NULL != __CFBuiltinSets) ? __CFBuiltinSets[theSetIdentifier - 1] : NULL);
1315 __CFSpinUnlock(&__CFCharacterSetLock);
1316
1317 if (NULL != cset) return cset;
1318
1319 if (!(cset = __CFCSetGenericCreate(kCFAllocatorSystemDefault, __kCFCharSetClassBuiltin))) return NULL;
1320 __CFCSetPutBuiltinType((CFMutableCharacterSetRef)cset, theSetIdentifier);
1321
1322 __CFSpinLock(&__CFCharacterSetLock);
1323 if (!__CFBuiltinSets) {
1324 __CFBuiltinSets = (CFCharacterSetRef *)CFAllocatorAllocate((CFAllocatorRef)CFRetain(__CFGetDefaultAllocator()), sizeof(CFCharacterSetRef) * __kCFLastBuiltinSetID, 0);
1325 memset(__CFBuiltinSets, 0, sizeof(CFCharacterSetRef) * __kCFLastBuiltinSetID);
1326 }
1327
1328 __CFBuiltinSets[theSetIdentifier - 1] = cset;
1329 __CFSpinUnlock(&__CFCharacterSetLock);
1330
1331 return cset;
1332 }
1333
1334 CFCharacterSetRef CFCharacterSetCreateWithCharactersInRange(CFAllocatorRef allocator, CFRange theRange) {
1335 CFMutableCharacterSetRef cset;
1336
1337 __CFCSetValidateRange(theRange, __PRETTY_FUNCTION__);
1338
1339 if (theRange.length) {
1340 if (!(cset = __CFCSetGenericCreate(allocator, __kCFCharSetClassRange))) return NULL;
1341 __CFCSetPutRangeFirstChar(cset, theRange.location);
1342 __CFCSetPutRangeLength(cset, theRange.length);
1343 } else {
1344 if (!(cset = __CFCSetGenericCreate(allocator, __kCFCharSetClassBitmap))) return NULL;
1345 __CFCSetPutBitmapBits(cset, NULL);
1346 __CFCSetPutHasHashValue(cset, true); // _hashValue is 0
1347 }
1348
1349 return cset;
1350 }
1351
1352 static int chcompar(const void *a, const void *b) {
1353 return -(int)(*(UniChar *)b - *(UniChar *)a);
1354 }
1355
1356 CFCharacterSetRef CFCharacterSetCreateWithCharactersInString(CFAllocatorRef allocator, CFStringRef theString) {
1357 CFIndex length;
1358
1359 length = CFStringGetLength(theString);
1360 if (length < __kCFStringCharSetMax) {
1361 CFMutableCharacterSetRef cset;
1362
1363 if (!(cset = __CFCSetGenericCreate(allocator, __kCFCharSetClassString))) return NULL;
1364 __CFCSetPutStringBuffer(cset, (UniChar *)CFAllocatorAllocate(CFGetAllocator(cset), __kCFStringCharSetMax * sizeof(UniChar), 0));
1365 __CFCSetPutStringLength(cset, length);
1366 CFStringGetCharacters(theString, CFRangeMake(0, length), __CFCSetStringBuffer(cset));
1367 qsort(__CFCSetStringBuffer(cset), length, sizeof(UniChar), chcompar);
1368
1369 if (0 == length) {
1370 __CFCSetPutHasHashValue(cset, true); // _hashValue is 0
1371 } else if (length > 1) { // Check for surrogate
1372 const UTF16Char *characters = __CFCSetStringBuffer(cset);
1373 const UTF16Char *charactersLimit = characters + length;
1374
1375 if ((*characters < 0xDC00UL) && (*(charactersLimit - 1) > 0xDBFFUL)) { // might have surrogate chars
1376 while (characters < charactersLimit) {
1377 if (CFStringIsSurrogateHighCharacter(*characters) || CFStringIsSurrogateLowCharacter(*characters)) {
1378 CFRelease(cset);
1379 cset = NULL;
1380 break;
1381 }
1382 ++characters;
1383 }
1384 }
1385 }
1386 if (NULL != cset) return cset;
1387 }
1388
1389 CFMutableCharacterSetRef mcset = CFCharacterSetCreateMutable(allocator);
1390 CFCharacterSetAddCharactersInString(mcset, theString);
1391 __CFCSetMakeCompact(mcset);
1392 __CFCSetPutIsMutable(mcset, false);
1393 return mcset;
1394 }
1395
1396 CFCharacterSetRef CFCharacterSetCreateWithBitmapRepresentation(CFAllocatorRef allocator, CFDataRef theData) {
1397 CFMutableCharacterSetRef cset;
1398 CFIndex length;
1399
1400 if (!(cset = __CFCSetGenericCreate(allocator, __kCFCharSetClassBitmap))) return NULL;
1401
1402 if (theData && (length = CFDataGetLength(theData)) > 0) {
1403 uint8_t *bitmap;
1404 uint8_t *cBitmap;
1405
1406 if (length < __kCFBitmapSize) {
1407 bitmap = (uint8_t *)CFAllocatorAllocate(allocator, __kCFBitmapSize, 0);
1408 memmove(bitmap, CFDataGetBytePtr(theData), length);
1409 memset(bitmap + length, 0, __kCFBitmapSize - length);
1410
1411 cBitmap = __CFCreateCompactBitmap(allocator, bitmap);
1412
1413 if (cBitmap == NULL) {
1414 __CFCSetPutBitmapBits(cset, bitmap);
1415 } else {
1416 CFAllocatorDeallocate(allocator, bitmap);
1417 __CFCSetPutCompactBitmapBits(cset, cBitmap);
1418 __CFCSetPutClassType(cset, __kCFCharSetClassCompactBitmap);
1419 }
1420 } else {
1421 cBitmap = __CFCreateCompactBitmap(allocator, CFDataGetBytePtr(theData));
1422
1423 if (cBitmap == NULL) {
1424 bitmap = (uint8_t *)CFAllocatorAllocate(allocator, __kCFBitmapSize, 0);
1425 memmove(bitmap, CFDataGetBytePtr(theData), __kCFBitmapSize);
1426
1427 __CFCSetPutBitmapBits(cset, bitmap);
1428 } else {
1429 __CFCSetPutCompactBitmapBits(cset, cBitmap);
1430 __CFCSetPutClassType(cset, __kCFCharSetClassCompactBitmap);
1431 }
1432
1433 if (length > __kCFBitmapSize) {
1434 CFMutableCharacterSetRef annexSet;
1435 const uint8_t *bytes = CFDataGetBytePtr(theData) + __kCFBitmapSize;
1436
1437 length -= __kCFBitmapSize;
1438
1439 while (length > 1) {
1440 annexSet = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(cset, *(bytes++));
1441 --length; // Decrement the plane no byte
1442
1443 if (length < __kCFBitmapSize) {
1444 bitmap = (uint8_t *)CFAllocatorAllocate(allocator, __kCFBitmapSize, 0);
1445 memmove(bitmap, bytes, length);
1446 memset(bitmap + length, 0, __kCFBitmapSize - length);
1447
1448 cBitmap = __CFCreateCompactBitmap(allocator, bitmap);
1449
1450 if (cBitmap == NULL) {
1451 __CFCSetPutBitmapBits(annexSet, bitmap);
1452 } else {
1453 CFAllocatorDeallocate(allocator, bitmap);
1454 __CFCSetPutCompactBitmapBits(annexSet, cBitmap);
1455 __CFCSetPutClassType(annexSet, __kCFCharSetClassCompactBitmap);
1456 }
1457 } else {
1458 cBitmap = __CFCreateCompactBitmap(allocator, bytes);
1459
1460 if (cBitmap == NULL) {
1461 bitmap = (uint8_t *)CFAllocatorAllocate(allocator, __kCFBitmapSize, 0);
1462 memmove(bitmap, bytes, __kCFBitmapSize);
1463
1464 __CFCSetPutBitmapBits(annexSet, bitmap);
1465 } else {
1466 __CFCSetPutCompactBitmapBits(annexSet, cBitmap);
1467 __CFCSetPutClassType(annexSet, __kCFCharSetClassCompactBitmap);
1468 }
1469 }
1470 length -= __kCFBitmapSize;
1471 bytes += __kCFBitmapSize;
1472 }
1473 }
1474 }
1475 } else {
1476 __CFCSetPutBitmapBits(cset, NULL);
1477 __CFCSetPutHasHashValue(cset, true); // Hash value is 0
1478 }
1479
1480 return cset;
1481 }
1482
1483 CFCharacterSetRef CFCharacterSetCreateInvertedSet(CFAllocatorRef alloc, CFCharacterSetRef theSet) {
1484 CFMutableCharacterSetRef cset;
1485
1486 CF_OBJC_FUNCDISPATCH0(__kCFCharacterSetTypeID, CFCharacterSetRef , theSet, "invertedSet");
1487
1488 cset = CFCharacterSetCreateMutableCopy(alloc, theSet);
1489 CFCharacterSetInvert(cset);
1490 __CFCSetPutIsMutable(cset, false);
1491
1492 return cset;
1493 }
1494
1495 /* Functions to create mutable characterset.
1496 */
1497 CFMutableCharacterSetRef CFCharacterSetCreateMutable(CFAllocatorRef allocator) {
1498 CFMutableCharacterSetRef cset;
1499
1500 if (!(cset = __CFCSetGenericCreate(allocator, __kCFCharSetClassBitmap| __kCFCharSetIsMutable))) return NULL;
1501 __CFCSetPutBitmapBits(cset, NULL);
1502 __CFCSetPutHasHashValue(cset, true); // Hash value is 0
1503
1504 return cset;
1505 }
1506
1507 static CFMutableCharacterSetRef __CFCharacterSetCreateCopy(CFAllocatorRef alloc, CFCharacterSetRef theSet, bool isMutable) {
1508 CFMutableCharacterSetRef cset;
1509
1510 CF_OBJC_FUNCDISPATCH0(__kCFCharacterSetTypeID, CFMutableCharacterSetRef , theSet, "mutableCopy");
1511
1512 __CFGenericValidateType(theSet, __kCFCharacterSetTypeID);
1513
1514 if (!isMutable && !__CFCSetIsMutable(theSet)) {
1515 return (CFMutableCharacterSetRef)CFRetain(theSet);
1516 }
1517
1518 cset = CFCharacterSetCreateMutable(alloc);
1519
1520 __CFCSetPutClassType(cset, __CFCSetClassType(theSet));
1521 __CFCSetPutHasHashValue(cset, __CFCSetHasHashValue(theSet));
1522 __CFCSetPutIsInverted(cset, __CFCSetIsInverted(theSet));
1523 cset->_hashValue = theSet->_hashValue;
1524
1525 switch (__CFCSetClassType(theSet)) {
1526 case __kCFCharSetClassBuiltin:
1527 __CFCSetPutBuiltinType(cset, __CFCSetBuiltinType(theSet));
1528 break;
1529
1530 case __kCFCharSetClassRange:
1531 __CFCSetPutRangeFirstChar(cset, __CFCSetRangeFirstChar(theSet));
1532 __CFCSetPutRangeLength(cset, __CFCSetRangeLength(theSet));
1533 break;
1534
1535 case __kCFCharSetClassString:
1536 __CFCSetPutStringBuffer(cset, (UniChar *)CFAllocatorAllocate(alloc, __kCFStringCharSetMax * sizeof(UniChar), 0));
1537
1538 __CFCSetPutStringLength(cset, __CFCSetStringLength(theSet));
1539 memmove(__CFCSetStringBuffer(cset), __CFCSetStringBuffer(theSet), __CFCSetStringLength(theSet) * sizeof(UniChar));
1540 break;
1541
1542 case __kCFCharSetClassBitmap:
1543 if (__CFCSetBitmapBits(theSet)) {
1544 uint8_t * bitmap = (isMutable ? NULL : __CFCreateCompactBitmap(alloc, __CFCSetBitmapBits(theSet)));
1545
1546 if (bitmap == NULL) {
1547 bitmap = (uint8_t *)CFAllocatorAllocate(alloc, sizeof(uint8_t) * __kCFBitmapSize, 0);
1548 memmove(bitmap, __CFCSetBitmapBits(theSet), __kCFBitmapSize);
1549 __CFCSetPutBitmapBits(cset, bitmap);
1550 } else {
1551 __CFCSetPutCompactBitmapBits(cset, bitmap);
1552 __CFCSetPutClassType(cset, __kCFCharSetClassCompactBitmap);
1553 }
1554 } else {
1555 __CFCSetPutBitmapBits(cset, NULL);
1556 }
1557 break;
1558
1559 case __kCFCharSetClassCompactBitmap: {
1560 const uint8_t *compactBitmap = __CFCSetCompactBitmapBits(theSet);
1561
1562 if (compactBitmap) {
1563 uint32_t size = __CFCSetGetCompactBitmapSize(compactBitmap);
1564 uint8_t *newBitmap = (uint8_t *)CFAllocatorAllocate(alloc, size, 0);
1565
1566 memmove(newBitmap, compactBitmap, size);
1567 __CFCSetPutCompactBitmapBits(cset, newBitmap);
1568 }
1569 }
1570 break;
1571
1572 default:
1573 CFAssert1(0, __kCFLogAssertion, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__); // We should never come here
1574 }
1575 if (__CFCSetHasNonBMPPlane(theSet)) {
1576 CFMutableCharacterSetRef annexPlane;
1577 int idx;
1578
1579 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
1580 if ((annexPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, idx))) {
1581 annexPlane = __CFCharacterSetCreateCopy(alloc, annexPlane, isMutable);
1582 __CFCSetPutCharacterSetToAnnexPlane(cset, annexPlane, idx);
1583 CFRelease(annexPlane);
1584 }
1585 }
1586 __CFCSetAnnexSetIsInverted(cset, __CFCSetAnnexIsInverted(theSet));
1587 } else if (__CFCSetAnnexIsInverted(theSet)) {
1588 __CFCSetAnnexSetIsInverted(cset, true);
1589 }
1590
1591 return cset;
1592 }
1593
1594 CFCharacterSetRef CFCharacterSetCreateCopy(CFAllocatorRef alloc, CFCharacterSetRef theSet) {
1595 return __CFCharacterSetCreateCopy(alloc, theSet, false);
1596 }
1597
1598 CFMutableCharacterSetRef CFCharacterSetCreateMutableCopy(CFAllocatorRef alloc, CFCharacterSetRef theSet) {
1599 return __CFCharacterSetCreateCopy(alloc, theSet, true);
1600 }
1601
1602 /*** Basic accessors ***/
1603 Boolean CFCharacterSetIsCharacterMember(CFCharacterSetRef theSet, UniChar theChar) {
1604 CFIndex length;
1605 Boolean isInverted;
1606 Boolean result = false;
1607
1608 CF_OBJC_FUNCDISPATCH1(__kCFCharacterSetTypeID, Boolean, theSet, "longCharacterIsMember:", theChar);
1609
1610 __CFGenericValidateType(theSet, __kCFCharacterSetTypeID);
1611
1612 isInverted = __CFCSetIsInverted(theSet);
1613
1614 switch (__CFCSetClassType(theSet)) {
1615 case __kCFCharSetClassBuiltin:
1616 result = (CFUniCharIsMemberOf(theChar, __CFCSetBuiltinType(theSet)) ? !isInverted : isInverted);
1617 break;
1618
1619 case __kCFCharSetClassRange:
1620 length = __CFCSetRangeLength(theSet);
1621 result = (length && __CFCSetRangeFirstChar(theSet) <= theChar && theChar < __CFCSetRangeFirstChar(theSet) + length ? !isInverted : isInverted);
1622 break;
1623
1624 case __kCFCharSetClassString:
1625 result = ((length = __CFCSetStringLength(theSet)) ? (__CFCSetBsearchUniChar(__CFCSetStringBuffer(theSet), length, theChar) ? !isInverted : isInverted) : isInverted);
1626 break;
1627
1628 case __kCFCharSetClassBitmap:
1629 result = (__CFCSetCompactBitmapBits(theSet) ? (__CFCSetIsMemberBitmap(__CFCSetBitmapBits(theSet), theChar) ? true : false) : isInverted);
1630 break;
1631
1632 case __kCFCharSetClassCompactBitmap:
1633 result = (__CFCSetCompactBitmapBits(theSet) ? (__CFCSetIsMemberInCompactBitmap(__CFCSetCompactBitmapBits(theSet), theChar) ? true : false) : isInverted);
1634 break;
1635
1636 default:
1637 CFAssert1(0, __kCFLogAssertion, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__); // We should never come here
1638 break;
1639 }
1640
1641 return result;
1642 }
1643
1644 Boolean CFCharacterSetIsLongCharacterMember(CFCharacterSetRef theSet, UTF32Char theChar) {
1645 CFIndex length;
1646 UInt32 plane = (theChar >> 16);
1647 Boolean isAnnexInverted = false;
1648 Boolean isInverted;
1649 Boolean result = false;
1650
1651 CF_OBJC_FUNCDISPATCH1(__kCFCharacterSetTypeID, Boolean, theSet, "longCharacterIsMember:", theChar);
1652
1653 __CFGenericValidateType(theSet, __kCFCharacterSetTypeID);
1654
1655 if (plane) {
1656 CFCharacterSetRef annexPlane;
1657
1658 if (__CFCSetIsBuiltin(theSet)) {
1659 isInverted = __CFCSetIsInverted(theSet);
1660 return (CFUniCharIsMemberOf(theChar, __CFCSetBuiltinType(theSet)) ? !isInverted : isInverted);
1661 }
1662
1663 isAnnexInverted = __CFCSetAnnexIsInverted(theSet);
1664
1665 if ((annexPlane = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, plane)) == NULL) {
1666 if (!__CFCSetHasNonBMPPlane(theSet) && __CFCSetIsRange(theSet)) {
1667 isInverted = __CFCSetIsInverted(theSet);
1668 length = __CFCSetRangeLength(theSet);
1669 return (length && __CFCSetRangeFirstChar(theSet) <= theChar && theChar < __CFCSetRangeFirstChar(theSet) + length ? !isInverted : isInverted);
1670 } else {
1671 return (isAnnexInverted ? true : false);
1672 }
1673 } else {
1674 theSet = annexPlane;
1675 theChar &= 0xFFFF;
1676 }
1677 }
1678
1679 isInverted = __CFCSetIsInverted(theSet);
1680
1681 switch (__CFCSetClassType(theSet)) {
1682 case __kCFCharSetClassBuiltin:
1683 result = (CFUniCharIsMemberOf(theChar, __CFCSetBuiltinType(theSet)) ? !isInverted : isInverted);
1684 break;
1685
1686 case __kCFCharSetClassRange:
1687 length = __CFCSetRangeLength(theSet);
1688 result = (length && __CFCSetRangeFirstChar(theSet) <= theChar && theChar < __CFCSetRangeFirstChar(theSet) + length ? !isInverted : isInverted);
1689 break;
1690
1691 case __kCFCharSetClassString:
1692 result = ((length = __CFCSetStringLength(theSet)) ? (__CFCSetBsearchUniChar(__CFCSetStringBuffer(theSet), length, theChar) ? !isInverted : isInverted) : isInverted);
1693 break;
1694
1695 case __kCFCharSetClassBitmap:
1696 result = (__CFCSetCompactBitmapBits(theSet) ? (__CFCSetIsMemberBitmap(__CFCSetBitmapBits(theSet), theChar) ? true : false) : isInverted);
1697 break;
1698
1699 case __kCFCharSetClassCompactBitmap:
1700 result = (__CFCSetCompactBitmapBits(theSet) ? (__CFCSetIsMemberInCompactBitmap(__CFCSetCompactBitmapBits(theSet), theChar) ? true : false) : isInverted);
1701 break;
1702
1703 default:
1704 CFAssert1(0, __kCFLogAssertion, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__); // We should never come here
1705 return false; // To make compiler happy
1706 }
1707
1708 return (result ? !isAnnexInverted : isAnnexInverted);
1709 }
1710
1711 Boolean CFCharacterSetIsSurrogatePairMember(CFCharacterSetRef theSet, UniChar surrogateHigh, UniChar surrogateLow) {
1712 return CFCharacterSetIsLongCharacterMember(theSet, CFCharacterSetGetLongCharacterForSurrogatePair(surrogateHigh, surrogateLow));
1713 }
1714
1715
1716 static inline CFCharacterSetRef __CFCharacterSetGetExpandedSetForNSCharacterSet(const void *characterSet) {
1717 CF_OBJC_FUNCDISPATCH0(__kCFCharacterSetTypeID, CFCharacterSetRef , characterSet, "_expandedCFCharacterSet");
1718 return NULL;
1719 }
1720
1721 Boolean CFCharacterSetIsSupersetOfSet(CFCharacterSetRef theSet, CFCharacterSetRef theOtherSet) {
1722 CFMutableCharacterSetRef copy;
1723 CFCharacterSetRef expandedSet = NULL;
1724 CFCharacterSetRef expandedOtherSet = NULL;
1725 Boolean result;
1726
1727 if ((!CF_IS_OBJC(__kCFCharacterSetTypeID, theSet) || (expandedSet = __CFCharacterSetGetExpandedSetForNSCharacterSet(theSet))) && (!CF_IS_OBJC(__kCFCharacterSetTypeID, theOtherSet) || (expandedOtherSet = __CFCharacterSetGetExpandedSetForNSCharacterSet(theOtherSet)))) { // Really CF, we can do some trick here
1728 if (expandedSet) theSet = expandedSet;
1729 if (expandedOtherSet) theOtherSet = expandedOtherSet;
1730
1731 __CFGenericValidateType(theSet, __kCFCharacterSetTypeID);
1732 __CFGenericValidateType(theOtherSet, __kCFCharacterSetTypeID);
1733
1734 if (__CFCSetIsEmpty(theSet)) {
1735 if (__CFCSetIsInverted(theSet)) {
1736 return TRUE; // Inverted empty set covers all range
1737 } else if (!__CFCSetIsEmpty(theOtherSet) || __CFCSetIsInverted(theOtherSet)) {
1738 return FALSE;
1739 }
1740 } else if (__CFCSetIsEmpty(theOtherSet) && !__CFCSetIsInverted(theOtherSet)) {
1741 return TRUE;
1742 } else {
1743 if (__CFCSetIsBuiltin(theSet) || __CFCSetIsBuiltin(theOtherSet)) {
1744 if (__CFCSetClassType(theSet) == __CFCSetClassType(theOtherSet) && __CFCSetBuiltinType(theSet) == __CFCSetBuiltinType(theOtherSet) && !__CFCSetIsInverted(theSet) && !__CFCSetIsInverted(theOtherSet)) return TRUE;
1745 } else if (__CFCSetIsRange(theSet) || __CFCSetIsRange(theOtherSet)) {
1746 if (__CFCSetClassType(theSet) == __CFCSetClassType(theOtherSet)) {
1747 if (__CFCSetIsInverted(theSet)) {
1748 if (__CFCSetIsInverted(theOtherSet)) {
1749 return (__CFCSetRangeFirstChar(theOtherSet) > __CFCSetRangeFirstChar(theSet) || (__CFCSetRangeFirstChar(theSet) + __CFCSetRangeLength(theSet)) > (__CFCSetRangeFirstChar(theOtherSet) + __CFCSetRangeLength(theOtherSet)) ? FALSE : TRUE);
1750 } else {
1751 return ((__CFCSetRangeFirstChar(theOtherSet) + __CFCSetRangeLength(theOtherSet)) <= __CFCSetRangeFirstChar(theSet) || (__CFCSetRangeFirstChar(theSet) + __CFCSetRangeLength(theSet)) <= __CFCSetRangeFirstChar(theOtherSet) ? TRUE : FALSE);
1752 }
1753 } else {
1754 if (__CFCSetIsInverted(theOtherSet)) {
1755 return ((__CFCSetRangeFirstChar(theSet) == 0 && __CFCSetRangeLength(theSet) == 0x110000) || (__CFCSetRangeFirstChar(theOtherSet) == 0 && (UInt32)__CFCSetRangeLength(theOtherSet) <= __CFCSetRangeFirstChar(theSet)) || ((__CFCSetRangeFirstChar(theSet) + __CFCSetRangeLength(theSet)) <= __CFCSetRangeFirstChar(theOtherSet) && (__CFCSetRangeFirstChar(theOtherSet) + __CFCSetRangeLength(theOtherSet)) == 0x110000) ? TRUE : FALSE);
1756 } else {
1757 return (__CFCSetRangeFirstChar(theOtherSet) < __CFCSetRangeFirstChar(theSet) || (__CFCSetRangeFirstChar(theSet) + __CFCSetRangeLength(theSet)) < (__CFCSetRangeFirstChar(theOtherSet) + __CFCSetRangeLength(theOtherSet)) ? FALSE : TRUE);
1758 }
1759 }
1760 }
1761 } else {
1762 UInt32 theSetAnnexMask = __CFCSetAnnexValidEntriesBitmap(theSet);
1763 UInt32 theOtherSetAnnexMask = __CFCSetAnnexValidEntriesBitmap(theOtherSet);
1764 Boolean isTheSetAnnexInverted = __CFCSetAnnexIsInverted(theSet);
1765 Boolean isTheOtherSetAnnexInverted = __CFCSetAnnexIsInverted(theOtherSet);
1766 uint8_t theSetBuffer[__kCFBitmapSize];
1767 uint8_t theOtherSetBuffer[__kCFBitmapSize];
1768
1769 // We mask plane 1 to plane 16
1770 if (isTheSetAnnexInverted) theSetAnnexMask = (~theSetAnnexMask) & (0xFFFF << 1);
1771 if (isTheOtherSetAnnexInverted) theOtherSetAnnexMask = (~theOtherSetAnnexMask) & (0xFFFF << 1);
1772
1773 __CFCSetGetBitmap(theSet, theSetBuffer);
1774 __CFCSetGetBitmap(theOtherSet, theOtherSetBuffer);
1775
1776 if (!__CFCSetIsBitmapSupersetOfBitmap((const UInt32 *)theSetBuffer, (const UInt32 *)theOtherSetBuffer, FALSE, FALSE)) return FALSE;
1777
1778 if (theOtherSetAnnexMask) {
1779 CFCharacterSetRef theSetAnnex;
1780 CFCharacterSetRef theOtherSetAnnex;
1781 uint32_t idx;
1782
1783 if ((theSetAnnexMask & theOtherSetAnnexMask) != theOtherSetAnnexMask) return FALSE;
1784
1785 for (idx = 1;idx <= 16;idx++) {
1786 theSetAnnex = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, idx);
1787 if (NULL == theSetAnnex) continue; // This case is already handled by the mask above
1788
1789 theOtherSetAnnex = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theOtherSet, idx);
1790
1791 if (NULL == theOtherSetAnnex) {
1792 if (isTheOtherSetAnnexInverted) {
1793 __CFCSetGetBitmap(theSetAnnex, theSetBuffer);
1794 if (!__CFCSetIsEqualBitmap((const UInt32 *)theSetBuffer, (isTheSetAnnexInverted ? NULL : (const UInt32 *)-1))) return FALSE;
1795 }
1796 } else {
1797 __CFCSetGetBitmap(theSetAnnex, theSetBuffer);
1798 __CFCSetGetBitmap(theOtherSetAnnex, theOtherSetBuffer);
1799 if (!__CFCSetIsBitmapSupersetOfBitmap((const UInt32 *)theSetBuffer, (const UInt32 *)theOtherSetBuffer, isTheSetAnnexInverted, isTheOtherSetAnnexInverted)) return FALSE;
1800 }
1801 }
1802 }
1803
1804 return TRUE;
1805 }
1806 }
1807 }
1808
1809 copy = CFCharacterSetCreateMutableCopy(kCFAllocatorSystemDefault, theSet);
1810 CFCharacterSetIntersect(copy, theOtherSet);
1811 result = __CFCharacterSetEqual(copy, theOtherSet);
1812 CFRelease(copy);
1813
1814 return result;
1815 }
1816
1817 Boolean CFCharacterSetHasMemberInPlane(CFCharacterSetRef theSet, CFIndex thePlane) {
1818 Boolean isInverted = __CFCSetIsInverted(theSet);
1819
1820 CF_OBJC_FUNCDISPATCH1(__kCFCharacterSetTypeID, Boolean, theSet, "hasMemberInPlane:", thePlane);
1821
1822 if (__CFCSetIsEmpty(theSet)) {
1823 return (isInverted ? TRUE : FALSE);
1824 } else if (__CFCSetIsBuiltin(theSet)) {
1825 CFCharacterSetPredefinedSet type = __CFCSetBuiltinType(theSet);
1826
1827 if (type == kCFCharacterSetControl) {
1828 if (isInverted || (thePlane == 14)) {
1829 return TRUE; // There is no plane that covers all values || Plane 14 has language tags
1830 } else {
1831 return (CFUniCharGetBitmapPtrForPlane(type, thePlane) ? TRUE : FALSE);
1832 }
1833 } else if ((type < kCFCharacterSetDecimalDigit) || (type == kCFCharacterSetNewline)) {
1834 return (thePlane && !isInverted ? FALSE : TRUE);
1835 } else if (__CFCSetBuiltinType(theSet) == kCFCharacterSetIllegal) {
1836 return (isInverted ? (thePlane < 3 || thePlane > 13 ? TRUE : FALSE) : TRUE); // This is according to Unicode 3.1
1837 } else {
1838 if (isInverted) {
1839 return TRUE; // There is no plane that covers all values
1840 } else {
1841 return (CFUniCharGetBitmapPtrForPlane(type, thePlane) ? TRUE : FALSE);
1842 }
1843 }
1844 } else if (__CFCSetIsRange(theSet)) {
1845 UTF32Char firstChar = __CFCSetRangeFirstChar(theSet);
1846 UTF32Char lastChar = (firstChar + __CFCSetRangeLength(theSet) - 1);
1847 CFIndex firstPlane = firstChar >> 16;
1848 CFIndex lastPlane = lastChar >> 16;
1849
1850 if (isInverted) {
1851 if (thePlane < firstPlane || thePlane > lastPlane) {
1852 return TRUE;
1853 } else if (thePlane > firstPlane && thePlane < lastPlane) {
1854 return FALSE;
1855 } else {
1856 firstChar &= 0xFFFF;
1857 lastChar &= 0xFFFF;
1858 if (thePlane == firstPlane) {
1859 return (firstChar || (firstPlane == lastPlane && lastChar != 0xFFFF) ? TRUE : FALSE);
1860 } else {
1861 return (lastChar != 0xFFFF || (firstPlane == lastPlane && firstChar) ? TRUE : FALSE);
1862 }
1863 }
1864 } else {
1865 return (thePlane < firstPlane || thePlane > lastPlane ? FALSE : TRUE);
1866 }
1867 } else {
1868 if (thePlane == 0) {
1869 switch (__CFCSetClassType(theSet)) {
1870 case __kCFCharSetClassString: if (!__CFCSetStringLength(theSet)) return isInverted; break;
1871 case __kCFCharSetClassCompactBitmap: return (__CFCSetCompactBitmapBits(theSet) ? TRUE : FALSE); break;
1872 case __kCFCharSetClassBitmap: return (__CFCSetBitmapBits(theSet) ? TRUE : FALSE); break;
1873 }
1874 return TRUE;
1875 } else {
1876 CFCharacterSetRef annex = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, thePlane);
1877 if (annex) {
1878 if (__CFCSetIsRange(annex)) {
1879 return (__CFCSetAnnexIsInverted(theSet) && (__CFCSetRangeFirstChar(annex) == 0) && (__CFCSetRangeLength(annex) == 0x10000) ? FALSE : TRUE);
1880 } else if (__CFCSetIsBitmap(annex)) {
1881 return (__CFCSetAnnexIsInverted(theSet) && __CFCSetIsEqualBitmap((const UInt32 *)__CFCSetBitmapBits(annex), (const UInt32 *)-1) ? FALSE : TRUE);
1882 } else {
1883 uint8_t bitsBuf[__kCFBitmapSize];
1884 __CFCSetGetBitmap(annex, bitsBuf);
1885 return (__CFCSetAnnexIsInverted(theSet) && __CFCSetIsEqualBitmap((const UInt32 *)bitsBuf, (const UInt32 *)-1) ? FALSE : TRUE);
1886 }
1887 } else {
1888 return __CFCSetAnnexIsInverted(theSet);
1889 }
1890 }
1891 }
1892
1893 return FALSE;
1894 }
1895
1896
1897 CFDataRef CFCharacterSetCreateBitmapRepresentation(CFAllocatorRef alloc, CFCharacterSetRef theSet) {
1898 CFMutableDataRef data;
1899 int numNonBMPPlanes = 0;
1900 int planeIndices[MAX_ANNEX_PLANE];
1901 int idx;
1902 int length;
1903 bool isAnnexInverted;
1904
1905 CF_OBJC_FUNCDISPATCH0(__kCFCharacterSetTypeID, CFDataRef , theSet, "_retainedBitmapRepresentation");
1906
1907 __CFGenericValidateType(theSet, __kCFCharacterSetTypeID);
1908
1909 isAnnexInverted = (__CFCSetAnnexIsInverted(theSet) != 0);
1910
1911 if (__CFCSetHasNonBMPPlane(theSet)) {
1912 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
1913 if (isAnnexInverted || __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, idx)) {
1914 planeIndices[numNonBMPPlanes++] = idx;
1915 }
1916 }
1917 } else if (__CFCSetIsBuiltin(theSet)) {
1918 numNonBMPPlanes = (__CFCSetIsInverted(theSet) ? MAX_ANNEX_PLANE : CFUniCharGetNumberOfPlanes(__CFCSetBuiltinType(theSet)) - 1);
1919 } else if (__CFCSetIsRange(theSet)) {
1920 UInt32 firstChar = __CFCSetRangeFirstChar(theSet);
1921 UInt32 lastChar = __CFCSetRangeFirstChar(theSet) + __CFCSetRangeLength(theSet) - 1;
1922 int firstPlane = (firstChar >> 16);
1923 int lastPlane = (lastChar >> 16);
1924 bool isInverted = (__CFCSetIsInverted(theSet) != 0);
1925
1926 if (lastPlane > 0) {
1927 if (firstPlane == 0) {
1928 firstPlane = 1;
1929 firstChar = 0x10000;
1930 }
1931 numNonBMPPlanes = (lastPlane - firstPlane) + 1;
1932 if (isInverted) {
1933 numNonBMPPlanes = MAX_ANNEX_PLANE - numNonBMPPlanes;
1934 if (firstPlane == lastPlane) {
1935 if (((firstChar & 0xFFFF) > 0) || ((lastChar & 0xFFFF) < 0xFFFF)) ++numNonBMPPlanes;
1936 } else {
1937 if ((firstChar & 0xFFFF) > 0) ++numNonBMPPlanes;
1938 if ((lastChar & 0xFFFF) < 0xFFFF) ++numNonBMPPlanes;
1939 }
1940 }
1941 } else if (isInverted) {
1942 numNonBMPPlanes = MAX_ANNEX_PLANE;
1943 }
1944 } else if (isAnnexInverted) {
1945 numNonBMPPlanes = MAX_ANNEX_PLANE;
1946 }
1947
1948 length = __kCFBitmapSize + ((__kCFBitmapSize + 1) * numNonBMPPlanes);
1949 data = CFDataCreateMutable(alloc, length);
1950 CFDataSetLength(data, length);
1951 __CFCSetGetBitmap(theSet, CFDataGetMutableBytePtr(data));
1952
1953 if (numNonBMPPlanes > 0) {
1954 uint8_t *bytes = CFDataGetMutableBytePtr(data) + __kCFBitmapSize;
1955
1956 if (__CFCSetHasNonBMPPlane(theSet)) {
1957 CFCharacterSetRef subset;
1958
1959 for (idx = 0;idx < numNonBMPPlanes;idx++) {
1960 *(bytes++) = planeIndices[idx];
1961 if ((subset = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, planeIndices[idx])) == NULL) {
1962 __CFCSetBitmapFastFillWithValue((UInt32 *)bytes, (isAnnexInverted ? 0xFF : 0));
1963 } else {
1964 __CFCSetGetBitmap(subset, bytes);
1965 if (isAnnexInverted) {
1966 uint32_t count = __kCFBitmapSize / sizeof(uint32_t);
1967 uint32_t *bits = (uint32_t *)bytes;
1968
1969 while (count-- > 0) {
1970 *bits = ~(*bits);
1971 ++bits;
1972 }
1973 }
1974 }
1975 bytes += __kCFBitmapSize;
1976 }
1977 } else if (__CFCSetIsBuiltin(theSet)) {
1978 UInt8 result;
1979 CFIndex delta;
1980 Boolean isInverted = __CFCSetIsInverted(theSet);
1981
1982 for (idx = 0;idx < numNonBMPPlanes;idx++) {
1983 if ((result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(theSet), idx + 1, bytes + 1, (isInverted != 0))) == kCFUniCharBitmapEmpty) continue;
1984 *(bytes++) = idx + 1;
1985 if (result == kCFUniCharBitmapAll) {
1986 CFIndex bitmapLength = __kCFBitmapSize;
1987 while (bitmapLength-- > 0) *(bytes++) = (uint8_t)0xFF;
1988 } else {
1989 bytes += __kCFBitmapSize;
1990 }
1991 }
1992 delta = bytes - (const uint8_t *)CFDataGetBytePtr(data);
1993 if (delta < length) CFDataSetLength(data, delta);
1994 } else if (__CFCSetIsRange(theSet)) {
1995 UInt32 firstChar = __CFCSetRangeFirstChar(theSet);
1996 UInt32 lastChar = __CFCSetRangeFirstChar(theSet) + __CFCSetRangeLength(theSet) - 1;
1997 int firstPlane = (firstChar >> 16);
1998 int lastPlane = (lastChar >> 16);
1999
2000 if (firstPlane == 0) {
2001 firstPlane = 1;
2002 firstChar = 0x10000;
2003 }
2004 if (__CFCSetIsInverted(theSet)) {
2005 // Mask out the plane byte
2006 firstChar &= 0xFFFF;
2007 lastChar &= 0xFFFF;
2008
2009 for (idx = 1;idx < firstPlane;idx++) { // Fill up until the first plane
2010 *(bytes++) = idx;
2011 __CFCSetBitmapFastFillWithValue((UInt32 *)bytes, 0xFF);
2012 bytes += __kCFBitmapSize;
2013 }
2014 if (firstPlane == lastPlane) {
2015 if ((firstChar > 0) || (lastChar < 0xFFFF)) {
2016 *(bytes++) = idx;
2017 __CFCSetBitmapFastFillWithValue((UInt32 *)bytes, 0xFF);
2018 __CFCSetBitmapRemoveCharactersInRange(bytes, firstChar, lastChar);
2019 bytes += __kCFBitmapSize;
2020 }
2021 } else if (firstPlane < lastPlane) {
2022 if (firstChar > 0) {
2023 *(bytes++) = idx;
2024 __CFCSetBitmapFastFillWithValue((UInt32 *)bytes, 0);
2025 __CFCSetBitmapAddCharactersInRange(bytes, 0, firstChar - 1);
2026 bytes += __kCFBitmapSize;
2027 }
2028 if (lastChar < 0xFFFF) {
2029 *(bytes++) = idx;
2030 __CFCSetBitmapFastFillWithValue((UInt32 *)bytes, 0);
2031 __CFCSetBitmapAddCharactersInRange(bytes, lastChar, 0xFFFF);
2032 bytes += __kCFBitmapSize;
2033 }
2034 }
2035 for (idx = lastPlane + 1;idx <= MAX_ANNEX_PLANE;idx++) {
2036 *(bytes++) = idx;
2037 __CFCSetBitmapFastFillWithValue((UInt32 *)bytes, 0xFF);
2038 bytes += __kCFBitmapSize;
2039 }
2040 } else {
2041 for (idx = firstPlane;idx <= lastPlane;idx++) {
2042 *(bytes++) = idx;
2043 __CFCSetBitmapAddCharactersInRange(bytes, (idx == firstPlane ? firstChar : 0), (idx == lastPlane ? lastChar : 0xFFFF));
2044 bytes += __kCFBitmapSize;
2045 }
2046 }
2047 } else if (isAnnexInverted) {
2048 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
2049 *(bytes++) = idx;
2050 __CFCSetBitmapFastFillWithValue((UInt32 *)bytes, 0xFF);
2051 bytes += __kCFBitmapSize;
2052 }
2053 }
2054 }
2055
2056 return data;
2057 }
2058
2059 /*** MutableCharacterSet functions ***/
2060 void CFCharacterSetAddCharactersInRange(CFMutableCharacterSetRef theSet, CFRange theRange) {
2061 CF_OBJC_FUNCDISPATCH1(__kCFCharacterSetTypeID, void, theSet, "addCharactersInRange:", theRange);
2062
2063 __CFCSetValidateTypeAndMutability(theSet, __PRETTY_FUNCTION__);
2064 __CFCSetValidateRange(theRange, __PRETTY_FUNCTION__);
2065
2066 if (!theRange.length || (__CFCSetIsInverted(theSet) && __CFCSetIsEmpty(theSet))) return; // Inverted && empty set contains all char
2067
2068 if (!__CFCSetIsInverted(theSet)) {
2069 if (__CFCSetIsEmpty(theSet)) {
2070 __CFCSetPutClassType(theSet, __kCFCharSetClassRange);
2071 __CFCSetPutRangeFirstChar(theSet, theRange.location);
2072 __CFCSetPutRangeLength(theSet, theRange.length);
2073 __CFCSetPutHasHashValue(theSet, false);
2074 return;
2075 } else if (__CFCSetIsRange(theSet)) {
2076 CFIndex firstChar = __CFCSetRangeFirstChar(theSet);
2077 CFIndex length = __CFCSetRangeLength(theSet);
2078
2079 if (firstChar == theRange.location) {
2080 __CFCSetPutRangeLength(theSet, __CFMin(length, theRange.length));
2081 __CFCSetPutHasHashValue(theSet, false);
2082 return;
2083 } else if (firstChar < theRange.location && theRange.location <= firstChar + length) {
2084 if (firstChar + length < theRange.location + theRange.length) __CFCSetPutRangeLength(theSet, theRange.length + (theRange.location - firstChar));
2085 __CFCSetPutHasHashValue(theSet, false);
2086 return;
2087 } else if (theRange.location < firstChar && firstChar <= theRange.location + theRange.length) {
2088 __CFCSetPutRangeFirstChar(theSet, theRange.location);
2089 __CFCSetPutRangeLength(theSet, length + (firstChar - theRange.location));
2090 __CFCSetPutHasHashValue(theSet, false);
2091 return;
2092 }
2093 } else if (__CFCSetIsString(theSet) && __CFCSetStringLength(theSet) + theRange.length < __kCFStringCharSetMax) {
2094 UniChar *buffer;
2095 if (!__CFCSetStringBuffer(theSet))
2096 __CFCSetPutStringBuffer(theSet, (UniChar *)CFAllocatorAllocate(CFGetAllocator(theSet), __kCFStringCharSetMax * sizeof(UniChar), 0));
2097 buffer = __CFCSetStringBuffer(theSet) + __CFCSetStringLength(theSet);
2098 __CFCSetPutStringLength(theSet, __CFCSetStringLength(theSet) + theRange.length);
2099 while (theRange.length--) *buffer++ = (UniChar)theRange.location++;
2100 qsort(__CFCSetStringBuffer(theSet), __CFCSetStringLength(theSet), sizeof(UniChar), chcompar);
2101 __CFCSetPutHasHashValue(theSet, false);
2102 return;
2103 }
2104 }
2105
2106 // OK, I have to be a bitmap
2107 __CFCSetMakeBitmap(theSet);
2108 __CFCSetAddNonBMPPlanesInRange(theSet, theRange);
2109 if (theRange.location < 0x10000) { // theRange is in BMP
2110 if (theRange.location + theRange.length >= NUMCHARACTERS) theRange.length = NUMCHARACTERS - theRange.location;
2111 __CFCSetBitmapAddCharactersInRange(__CFCSetBitmapBits(theSet), (UniChar)theRange.location, (UniChar)(theRange.location + theRange.length - 1));
2112 }
2113 __CFCSetPutHasHashValue(theSet, false);
2114
2115 if (__CFCheckForExapendedSet) __CFCheckForExpandedSet(theSet);
2116 }
2117
2118 void CFCharacterSetRemoveCharactersInRange(CFMutableCharacterSetRef theSet, CFRange theRange) {
2119 CF_OBJC_FUNCDISPATCH1(__kCFCharacterSetTypeID, void, theSet, "removeCharactersInRange:", theRange);
2120
2121 __CFCSetValidateTypeAndMutability(theSet, __PRETTY_FUNCTION__);
2122 __CFCSetValidateRange(theRange, __PRETTY_FUNCTION__);
2123
2124 if (!theRange.length || (!__CFCSetIsInverted(theSet) && __CFCSetIsEmpty(theSet))) return; // empty set
2125
2126 if (__CFCSetIsInverted(theSet)) {
2127 if (__CFCSetIsEmpty(theSet)) {
2128 __CFCSetPutClassType(theSet, __kCFCharSetClassRange);
2129 __CFCSetPutRangeFirstChar(theSet, theRange.location);
2130 __CFCSetPutRangeLength(theSet, theRange.length);
2131 __CFCSetPutHasHashValue(theSet, false);
2132 return;
2133 } else if (__CFCSetIsRange(theSet)) {
2134 CFIndex firstChar = __CFCSetRangeFirstChar(theSet);
2135 CFIndex length = __CFCSetRangeLength(theSet);
2136
2137 if (firstChar == theRange.location) {
2138 __CFCSetPutRangeLength(theSet, __CFMin(length, theRange.length));
2139 __CFCSetPutHasHashValue(theSet, false);
2140 return;
2141 } else if (firstChar < theRange.location && theRange.location <= firstChar + length) {
2142 if (firstChar + length < theRange.location + theRange.length) __CFCSetPutRangeLength(theSet, theRange.length + (theRange.location - firstChar));
2143 __CFCSetPutHasHashValue(theSet, false);
2144 return;
2145 } else if (theRange.location < firstChar && firstChar <= theRange.location + theRange.length) {
2146 __CFCSetPutRangeFirstChar(theSet, theRange.location);
2147 __CFCSetPutRangeLength(theSet, length + (firstChar - theRange.location));
2148 __CFCSetPutHasHashValue(theSet, false);
2149 return;
2150 }
2151 } else if (__CFCSetIsString(theSet) && __CFCSetStringLength(theSet) + theRange.length < __kCFStringCharSetMax) {
2152 UniChar *buffer;
2153 if (!__CFCSetStringBuffer(theSet))
2154 __CFCSetPutStringBuffer(theSet, (UniChar *)CFAllocatorAllocate(CFGetAllocator(theSet), __kCFStringCharSetMax * sizeof(UniChar), 0));
2155 buffer = __CFCSetStringBuffer(theSet) + __CFCSetStringLength(theSet);
2156 __CFCSetPutStringLength(theSet, __CFCSetStringLength(theSet) + theRange.length);
2157 while (theRange.length--) *buffer++ = (UniChar)theRange.location++;
2158 qsort(__CFCSetStringBuffer(theSet), __CFCSetStringLength(theSet), sizeof(UniChar), chcompar);
2159 __CFCSetPutHasHashValue(theSet, false);
2160 return;
2161 }
2162 }
2163
2164 // OK, I have to be a bitmap
2165 __CFCSetMakeBitmap(theSet);
2166 __CFCSetRemoveNonBMPPlanesInRange(theSet, theRange);
2167 if (theRange.location < 0x10000) { // theRange is in BMP
2168 if (theRange.location + theRange.length > NUMCHARACTERS) theRange.length = NUMCHARACTERS - theRange.location;
2169 if (theRange.location == 0 && theRange.length == NUMCHARACTERS) { // Remove all
2170 CFAllocatorDeallocate(CFGetAllocator(theSet), __CFCSetBitmapBits(theSet));
2171 __CFCSetPutBitmapBits(theSet, NULL);
2172 } else {
2173 __CFCSetBitmapRemoveCharactersInRange(__CFCSetBitmapBits(theSet), (UniChar)theRange.location, (UniChar)(theRange.location + theRange.length - 1));
2174 }
2175 }
2176
2177 __CFCSetPutHasHashValue(theSet, false);
2178 if (__CFCheckForExapendedSet) __CFCheckForExpandedSet(theSet);
2179 }
2180
2181 void CFCharacterSetAddCharactersInString(CFMutableCharacterSetRef theSet, CFStringRef theString) {
2182 UniChar *buffer;
2183 CFIndex length;
2184 BOOL hasSurrogate = NO;
2185
2186 CF_OBJC_FUNCDISPATCH1(__kCFCharacterSetTypeID, void, theSet, "addCharactersInString:", theString);
2187
2188 __CFCSetValidateTypeAndMutability(theSet, __PRETTY_FUNCTION__);
2189
2190 if ((__CFCSetIsEmpty(theSet) && __CFCSetIsInverted(theSet)) || !(length = CFStringGetLength(theString))) return;
2191
2192 if (!__CFCSetIsInverted(theSet)) {
2193 CFIndex newLength = length + (__CFCSetIsEmpty(theSet) ? 0 : (__CFCSetIsString(theSet) ? __CFCSetStringLength(theSet) : __kCFStringCharSetMax));
2194
2195 if (newLength < __kCFStringCharSetMax) {
2196 buffer = __CFCSetStringBuffer(theSet);
2197
2198 if (NULL == buffer) {
2199 buffer = (UniChar *)CFAllocatorAllocate(CFGetAllocator(theSet), __kCFStringCharSetMax * sizeof(UniChar), 0);
2200 } else {
2201 buffer += __CFCSetStringLength(theSet);
2202 }
2203
2204 CFStringGetCharacters(theString, CFRangeMake(0, length), (UniChar*)buffer);
2205
2206 if (length > 1) {
2207 UTF16Char *characters = buffer;
2208 const UTF16Char *charactersLimit = characters + length;
2209
2210 while (characters < charactersLimit) {
2211 if (CFStringIsSurrogateHighCharacter(*characters) || CFStringIsSurrogateLowCharacter(*characters)) {
2212 memmove(characters, characters + 1, (charactersLimit - (characters + 1)) * sizeof(*characters));
2213 --charactersLimit;
2214 hasSurrogate = YES;
2215 } else {
2216 ++characters;
2217 }
2218 }
2219
2220 newLength -= (length - (charactersLimit - buffer));
2221 }
2222
2223 if (0 == newLength) {
2224 if (NULL == __CFCSetStringBuffer(theSet)) CFAllocatorDeallocate(CFGetAllocator(theSet), buffer);
2225 } else {
2226 if (NULL == __CFCSetStringBuffer(theSet)) {
2227 __CFCSetPutClassType(theSet, __kCFCharSetClassString);
2228 __CFCSetPutStringBuffer(theSet, buffer);
2229 }
2230 __CFCSetPutStringLength(theSet, newLength);
2231 qsort(__CFCSetStringBuffer(theSet), newLength, sizeof(UniChar), chcompar);
2232 }
2233 __CFCSetPutHasHashValue(theSet, false);
2234
2235 if (hasSurrogate) __CFApplySurrogatesInString(theSet, theString, &CFCharacterSetAddCharactersInRange);
2236
2237 return;
2238 }
2239 }
2240
2241 // OK, I have to be a bitmap
2242 __CFCSetMakeBitmap(theSet);
2243 CFStringInlineBuffer inlineBuffer;
2244 CFIndex idx;
2245
2246 CFStringInitInlineBuffer(theString, &inlineBuffer, CFRangeMake(0, length));
2247
2248 for (idx = 0;idx < length;idx++) {
2249 UTF16Char character = __CFStringGetCharacterFromInlineBufferQuick(&inlineBuffer, idx);
2250
2251 if (CFStringIsSurrogateHighCharacter(character) || CFStringIsSurrogateLowCharacter(character)) {
2252 hasSurrogate = YES;
2253 } else {
2254 __CFCSetBitmapAddCharacter(__CFCSetBitmapBits(theSet), character);
2255 }
2256 }
2257
2258 __CFCSetPutHasHashValue(theSet, false);
2259
2260 if (__CFCheckForExapendedSet) __CFCheckForExpandedSet(theSet);
2261
2262 if (hasSurrogate) __CFApplySurrogatesInString(theSet, theString, &CFCharacterSetAddCharactersInRange);
2263 }
2264
2265 void CFCharacterSetRemoveCharactersInString(CFMutableCharacterSetRef theSet, CFStringRef theString) {
2266 UniChar *buffer;
2267 CFIndex length;
2268 BOOL hasSurrogate = NO;
2269
2270 CF_OBJC_FUNCDISPATCH1(__kCFCharacterSetTypeID, void, theSet, "removeCharactersInString:", theString);
2271
2272 __CFCSetValidateTypeAndMutability(theSet, __PRETTY_FUNCTION__);
2273
2274 if ((__CFCSetIsEmpty(theSet) && !__CFCSetIsInverted(theSet)) || !(length = CFStringGetLength(theString))) return;
2275
2276 if (__CFCSetIsInverted(theSet)) {
2277 CFIndex newLength = length + (__CFCSetIsEmpty(theSet) ? 0 : (__CFCSetIsString(theSet) ? __CFCSetStringLength(theSet) : __kCFStringCharSetMax));
2278
2279 if (newLength < __kCFStringCharSetMax) {
2280 buffer = __CFCSetStringBuffer(theSet);
2281
2282 if (NULL == buffer) {
2283 buffer = (UniChar *)CFAllocatorAllocate(CFGetAllocator(theSet), __kCFStringCharSetMax * sizeof(UniChar), 0);
2284 } else {
2285 buffer += __CFCSetStringLength(theSet);
2286 }
2287
2288 CFStringGetCharacters(theString, CFRangeMake(0, length), (UniChar*)buffer);
2289
2290 if (length > 1) {
2291 UTF16Char *characters = buffer;
2292 const UTF16Char *charactersLimit = characters + length;
2293
2294 while (characters < charactersLimit) {
2295 if (CFStringIsSurrogateHighCharacter(*characters) || CFStringIsSurrogateLowCharacter(*characters)) {
2296 memmove(characters, characters + 1, charactersLimit - (characters + 1));
2297 --charactersLimit;
2298 hasSurrogate = YES;
2299 }
2300 ++characters;
2301 }
2302
2303 newLength -= (length - (charactersLimit - buffer));
2304 }
2305
2306 if (NULL == __CFCSetStringBuffer(theSet)) {
2307 __CFCSetPutClassType(theSet, __kCFCharSetClassString);
2308 __CFCSetPutStringBuffer(theSet, buffer);
2309 }
2310 __CFCSetPutStringLength(theSet, newLength);
2311 qsort(__CFCSetStringBuffer(theSet), newLength, sizeof(UniChar), chcompar);
2312 __CFCSetPutHasHashValue(theSet, false);
2313
2314 if (hasSurrogate) __CFApplySurrogatesInString(theSet, theString, &CFCharacterSetRemoveCharactersInRange);
2315
2316 return;
2317 }
2318 }
2319
2320 // OK, I have to be a bitmap
2321 __CFCSetMakeBitmap(theSet);
2322 CFStringInlineBuffer inlineBuffer;
2323 CFIndex idx;
2324
2325 CFStringInitInlineBuffer(theString, &inlineBuffer, CFRangeMake(0, length));
2326
2327 for (idx = 0;idx < length;idx++) {
2328 UTF16Char character = __CFStringGetCharacterFromInlineBufferQuick(&inlineBuffer, idx);
2329
2330 if (CFStringIsSurrogateHighCharacter(character) || CFStringIsSurrogateLowCharacter(character)) {
2331 hasSurrogate = YES;
2332 } else {
2333 __CFCSetBitmapRemoveCharacter(__CFCSetBitmapBits(theSet), character);
2334 }
2335 }
2336
2337 __CFCSetPutHasHashValue(theSet, false);
2338 if (__CFCheckForExapendedSet) __CFCheckForExpandedSet(theSet);
2339
2340 if (hasSurrogate) __CFApplySurrogatesInString(theSet, theString, &CFCharacterSetRemoveCharactersInRange);
2341 }
2342
2343 void CFCharacterSetUnion(CFMutableCharacterSetRef theSet, CFCharacterSetRef theOtherSet) {
2344 CFCharacterSetRef expandedSet = NULL;
2345
2346 CF_OBJC_FUNCDISPATCH1(__kCFCharacterSetTypeID, void, theSet, "formUnionWithCharacterSet:", theOtherSet);
2347
2348 __CFCSetValidateTypeAndMutability(theSet, __PRETTY_FUNCTION__);
2349
2350 if (__CFCSetIsEmpty(theSet) && __CFCSetIsInverted(theSet)) return; // Inverted empty set contains all char
2351
2352 if (!CF_IS_OBJC(__kCFCharacterSetTypeID, theOtherSet) || (expandedSet = __CFCharacterSetGetExpandedSetForNSCharacterSet(theOtherSet))) { // Really CF, we can do some trick here
2353 if (expandedSet) theOtherSet = expandedSet;
2354
2355 if (__CFCSetIsEmpty(theOtherSet)) {
2356 if (__CFCSetIsInverted(theOtherSet)) {
2357 if (__CFCSetIsString(theSet) && __CFCSetStringBuffer(theSet)) {
2358 CFAllocatorDeallocate(CFGetAllocator(theSet), __CFCSetStringBuffer(theSet));
2359 } else if (__CFCSetIsBitmap(theSet) && __CFCSetBitmapBits(theSet)) {
2360 CFAllocatorDeallocate(CFGetAllocator(theSet), __CFCSetBitmapBits(theSet));
2361 } else if (__CFCSetIsCompactBitmap(theSet) && __CFCSetCompactBitmapBits(theSet)) {
2362 CFAllocatorDeallocate(CFGetAllocator(theSet), __CFCSetCompactBitmapBits(theSet));
2363 }
2364 __CFCSetPutClassType(theSet, __kCFCharSetClassRange);
2365 __CFCSetPutRangeLength(theSet, 0);
2366 __CFCSetPutIsInverted(theSet, true);
2367 __CFCSetPutHasHashValue(theSet, false);
2368 __CFCSetDeallocateAnnexPlane(theSet);
2369 }
2370 } else if (__CFCSetIsBuiltin(theOtherSet) && __CFCSetIsEmpty(theSet)) { // theSet can be builtin set
2371 __CFCSetPutClassType(theSet, __kCFCharSetClassBuiltin);
2372 __CFCSetPutBuiltinType(theSet, __CFCSetBuiltinType(theOtherSet));
2373 if (__CFCSetIsInverted(theOtherSet)) __CFCSetPutIsInverted(theSet, true);
2374 if (__CFCSetAnnexIsInverted(theOtherSet)) __CFCSetAnnexSetIsInverted(theSet, true);
2375 __CFCSetPutHasHashValue(theSet, false);
2376 } else {
2377 if (__CFCSetIsRange(theOtherSet)) {
2378 if (__CFCSetIsInverted(theOtherSet)) {
2379 UTF32Char firstChar = __CFCSetRangeFirstChar(theOtherSet);
2380 CFIndex length = __CFCSetRangeLength(theOtherSet);
2381
2382 if (firstChar > 0) CFCharacterSetAddCharactersInRange(theSet, CFRangeMake(0, firstChar));
2383 firstChar += length;
2384 length = 0x110000 - firstChar;
2385 CFCharacterSetAddCharactersInRange(theSet, CFRangeMake(firstChar, length));
2386 } else {
2387 CFCharacterSetAddCharactersInRange(theSet, CFRangeMake(__CFCSetRangeFirstChar(theOtherSet), __CFCSetRangeLength(theOtherSet)));
2388 }
2389 } else if (__CFCSetIsString(theOtherSet)) {
2390 CFStringRef string = CFStringCreateWithCharactersNoCopy(CFGetAllocator(theSet), __CFCSetStringBuffer(theOtherSet), __CFCSetStringLength(theOtherSet), kCFAllocatorNull);
2391 CFCharacterSetAddCharactersInString(theSet, string);
2392 CFRelease(string);
2393 } else {
2394 __CFCSetMakeBitmap(theSet);
2395 if (__CFCSetIsBitmap(theOtherSet)) {
2396 UInt32 *bitmap1 = (UInt32*)__CFCSetBitmapBits(theSet);
2397 UInt32 *bitmap2 = (UInt32*)__CFCSetBitmapBits(theOtherSet);
2398 CFIndex length = __kCFBitmapSize / sizeof(UInt32);
2399 while (length--) *bitmap1++ |= *bitmap2++;
2400 } else {
2401 UInt32 *bitmap1 = (UInt32*)__CFCSetBitmapBits(theSet);
2402 UInt32 *bitmap2;
2403 CFIndex length = __kCFBitmapSize / sizeof(UInt32);
2404 uint8_t bitmapBuffer[__kCFBitmapSize];
2405 __CFCSetGetBitmap(theOtherSet, bitmapBuffer);
2406 bitmap2 = (UInt32*)bitmapBuffer;
2407 while (length--) *bitmap1++ |= *bitmap2++;
2408 }
2409 __CFCSetPutHasHashValue(theSet, false);
2410 }
2411 if (__CFCSetHasNonBMPPlane(theOtherSet)) {
2412 CFMutableCharacterSetRef otherSetPlane;
2413 int idx;
2414
2415 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
2416 if ((otherSetPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theOtherSet, idx))) {
2417 CFCharacterSetUnion((CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(theSet, idx), otherSetPlane);
2418 }
2419 }
2420 } else if (__CFCSetAnnexIsInverted(theOtherSet)) {
2421 if (__CFCSetHasNonBMPPlane(theSet)) __CFCSetDeallocateAnnexPlane(theSet);
2422 __CFCSetAnnexSetIsInverted(theSet, true);
2423 } else if (__CFCSetIsBuiltin(theOtherSet)) {
2424 CFMutableCharacterSetRef annexPlane;
2425 uint8_t bitmapBuffer[__kCFBitmapSize];
2426 uint8_t result;
2427 int planeIndex;
2428 Boolean isOtherAnnexPlaneInverted = __CFCSetAnnexIsInverted(theOtherSet);
2429 UInt32 *bitmap1;
2430 UInt32 *bitmap2;
2431 CFIndex length;
2432
2433 for (planeIndex = 1;planeIndex <= MAX_ANNEX_PLANE;planeIndex++) {
2434 result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(theOtherSet), planeIndex, bitmapBuffer, (isOtherAnnexPlaneInverted != 0));
2435 if (result != kCFUniCharBitmapEmpty) {
2436 annexPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(theSet, planeIndex);
2437 if (result == kCFUniCharBitmapAll) {
2438 CFCharacterSetAddCharactersInRange(annexPlane, CFRangeMake(0x0000, 0x10000));
2439 } else {
2440 __CFCSetMakeBitmap(annexPlane);
2441 bitmap1 = (UInt32 *)__CFCSetBitmapBits(annexPlane);
2442 length = __kCFBitmapSize / sizeof(UInt32);
2443 bitmap2 = (UInt32*)bitmapBuffer;
2444 while (length--) *bitmap1++ |= *bitmap2++;
2445 }
2446 }
2447 }
2448 }
2449 }
2450 if (__CFCheckForExapendedSet) __CFCheckForExpandedSet(theSet);
2451 } else { // It's NSCharacterSet
2452 CFDataRef bitmapRep = CFCharacterSetCreateBitmapRepresentation(kCFAllocatorSystemDefault, theOtherSet);
2453 const UInt32 *bitmap2 = (bitmapRep && CFDataGetLength(bitmapRep) ? (const UInt32 *)CFDataGetBytePtr(bitmapRep) : NULL);
2454 if (bitmap2) {
2455 UInt32 *bitmap1;
2456 CFIndex length = __kCFBitmapSize / sizeof(UInt32);
2457 __CFCSetMakeBitmap(theSet);
2458 bitmap1 = (UInt32*)__CFCSetBitmapBits(theSet);
2459 while (length--) *bitmap1++ |= *bitmap2++;
2460 __CFCSetPutHasHashValue(theSet, false);
2461 }
2462 CFRelease(bitmapRep);
2463 }
2464 }
2465
2466 void CFCharacterSetIntersect(CFMutableCharacterSetRef theSet, CFCharacterSetRef theOtherSet) {
2467 CFCharacterSetRef expandedSet = NULL;
2468
2469 CF_OBJC_FUNCDISPATCH1(__kCFCharacterSetTypeID, void, theSet, "formIntersectionWithCharacterSet:", theOtherSet);
2470
2471 __CFCSetValidateTypeAndMutability(theSet, __PRETTY_FUNCTION__);
2472
2473 if (__CFCSetIsEmpty(theSet) && !__CFCSetIsInverted(theSet)) return; // empty set
2474
2475 if (!CF_IS_OBJC(__kCFCharacterSetTypeID, theOtherSet) || (expandedSet = __CFCharacterSetGetExpandedSetForNSCharacterSet(theOtherSet))) { // Really CF, we can do some trick here
2476 if (expandedSet) theOtherSet = expandedSet;
2477
2478 if (__CFCSetIsEmpty(theOtherSet)) {
2479 if (!__CFCSetIsInverted(theOtherSet)) {
2480 if (__CFCSetIsString(theSet) && __CFCSetStringBuffer(theSet)) {
2481 CFAllocatorDeallocate(CFGetAllocator(theSet), __CFCSetStringBuffer(theSet));
2482 } else if (__CFCSetIsBitmap(theSet) && __CFCSetBitmapBits(theSet)) {
2483 CFAllocatorDeallocate(CFGetAllocator(theSet), __CFCSetBitmapBits(theSet));
2484 } else if (__CFCSetIsCompactBitmap(theSet) && __CFCSetCompactBitmapBits(theSet)) {
2485 CFAllocatorDeallocate(CFGetAllocator(theSet), __CFCSetCompactBitmapBits(theSet));
2486 }
2487 __CFCSetPutClassType(theSet, __kCFCharSetClassBitmap);
2488 __CFCSetPutBitmapBits(theSet, NULL);
2489 __CFCSetPutIsInverted(theSet, false);
2490 theSet->_hashValue = 0;
2491 __CFCSetPutHasHashValue(theSet, true);
2492 __CFCSetDeallocateAnnexPlane(theSet);
2493 }
2494 } else if (__CFCSetIsEmpty(theSet)) { // non inverted empty set contains all character
2495 __CFCSetPutClassType(theSet, __CFCSetClassType(theOtherSet));
2496 __CFCSetPutHasHashValue(theSet, __CFCSetHasHashValue(theOtherSet));
2497 __CFCSetPutIsInverted(theSet, __CFCSetIsInverted(theOtherSet));
2498 theSet->_hashValue = theOtherSet->_hashValue;
2499 if (__CFCSetHasNonBMPPlane(theOtherSet)) {
2500 CFMutableCharacterSetRef otherSetPlane;
2501 int idx;
2502 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
2503 if ((otherSetPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theOtherSet, idx))) {
2504 otherSetPlane = (CFMutableCharacterSetRef)CFCharacterSetCreateMutableCopy(CFGetAllocator(theSet), otherSetPlane);
2505 __CFCSetPutCharacterSetToAnnexPlane(theSet, otherSetPlane, idx);
2506 CFRelease(otherSetPlane);
2507 }
2508 }
2509 __CFCSetAnnexSetIsInverted(theSet, __CFCSetAnnexIsInverted(theOtherSet));
2510 }
2511
2512 switch (__CFCSetClassType(theOtherSet)) {
2513 case __kCFCharSetClassBuiltin:
2514 __CFCSetPutBuiltinType(theSet, __CFCSetBuiltinType(theOtherSet));
2515 break;
2516
2517 case __kCFCharSetClassRange:
2518 __CFCSetPutRangeFirstChar(theSet, __CFCSetRangeFirstChar(theOtherSet));
2519 __CFCSetPutRangeLength(theSet, __CFCSetRangeLength(theOtherSet));
2520 break;
2521
2522 case __kCFCharSetClassString:
2523 __CFCSetPutStringLength(theSet, __CFCSetStringLength(theOtherSet));
2524 if (!__CFCSetStringBuffer(theSet))
2525 __CFCSetPutStringBuffer(theSet, (UniChar *)CFAllocatorAllocate(CFGetAllocator(theSet), __kCFStringCharSetMax * sizeof(UniChar), 0));
2526 memmove(__CFCSetStringBuffer(theSet), __CFCSetStringBuffer(theOtherSet), __CFCSetStringLength(theSet) * sizeof(UniChar));
2527 break;
2528
2529 case __kCFCharSetClassBitmap:
2530 __CFCSetPutBitmapBits(theSet, (uint8_t *)CFAllocatorAllocate(CFGetAllocator(theSet), sizeof(uint8_t) * __kCFBitmapSize, 0));
2531 memmove(__CFCSetBitmapBits(theSet), __CFCSetBitmapBits(theOtherSet), __kCFBitmapSize);
2532 break;
2533
2534 case __kCFCharSetClassCompactBitmap: {
2535 const uint8_t *cBitmap = __CFCSetCompactBitmapBits(theOtherSet);
2536 uint8_t *newBitmap;
2537 uint32_t size = __CFCSetGetCompactBitmapSize(cBitmap);
2538 newBitmap = (uint8_t *)CFAllocatorAllocate(CFGetAllocator(theSet), sizeof(uint8_t) * size, 0);
2539 __CFCSetPutBitmapBits(theSet, newBitmap);
2540 memmove(newBitmap, cBitmap, size);
2541 }
2542 break;
2543
2544 default:
2545 CFAssert1(0, __kCFLogAssertion, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__); // We should never come here
2546 }
2547 } else {
2548 __CFCSetMakeBitmap(theSet);
2549 if (__CFCSetIsBitmap(theOtherSet)) {
2550 UInt32 *bitmap1 = (UInt32*)__CFCSetBitmapBits(theSet);
2551 UInt32 *bitmap2 = (UInt32*)__CFCSetBitmapBits(theOtherSet);
2552 CFIndex length = __kCFBitmapSize / sizeof(UInt32);
2553 while (length--) *bitmap1++ &= *bitmap2++;
2554 } else {
2555 UInt32 *bitmap1 = (UInt32*)__CFCSetBitmapBits(theSet);
2556 UInt32 *bitmap2;
2557 CFIndex length = __kCFBitmapSize / sizeof(UInt32);
2558 uint8_t bitmapBuffer[__kCFBitmapSize];
2559 __CFCSetGetBitmap(theOtherSet, bitmapBuffer);
2560 bitmap2 = (UInt32*)bitmapBuffer;
2561 while (length--) *bitmap1++ &= *bitmap2++;
2562 }
2563 __CFCSetPutHasHashValue(theSet, false);
2564 if (__CFCSetHasNonBMPPlane(theOtherSet)) {
2565 CFMutableCharacterSetRef annexPlane;
2566 CFMutableCharacterSetRef otherSetPlane;
2567 int idx;
2568 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
2569 if ((otherSetPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theOtherSet, idx))) {
2570 annexPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(theSet, idx);
2571 CFCharacterSetIntersect(annexPlane, otherSetPlane);
2572 if (__CFCSetIsEmpty(annexPlane) && !__CFCSetIsInverted(annexPlane)) __CFCSetPutCharacterSetToAnnexPlane(theSet, NULL, idx);
2573 } else if (__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, idx)) {
2574 __CFCSetPutCharacterSetToAnnexPlane(theSet, NULL, idx);
2575 }
2576 }
2577 if (!__CFCSetHasNonBMPPlane(theSet)) __CFCSetDeallocateAnnexPlane(theSet);
2578 } else if (__CFCSetIsBuiltin(theOtherSet) && !__CFCSetAnnexIsInverted(theOtherSet)) {
2579 CFMutableCharacterSetRef annexPlane;
2580 uint8_t bitmapBuffer[__kCFBitmapSize];
2581 uint8_t result;
2582 int planeIndex;
2583 UInt32 *bitmap1;
2584 UInt32 *bitmap2;
2585 CFIndex length;
2586
2587 for (planeIndex = 1;planeIndex <= MAX_ANNEX_PLANE;planeIndex++) {
2588 annexPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, planeIndex);
2589 if (annexPlane) {
2590 result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(theOtherSet), planeIndex, bitmapBuffer, false);
2591 if (result == kCFUniCharBitmapEmpty) {
2592 __CFCSetPutCharacterSetToAnnexPlane(theSet, NULL, planeIndex);
2593 } else if (result == kCFUniCharBitmapFilled) {
2594 Boolean isEmpty = true;
2595
2596 __CFCSetMakeBitmap(annexPlane);
2597 bitmap1 = (UInt32 *)__CFCSetBitmapBits(annexPlane);
2598 length = __kCFBitmapSize / sizeof(UInt32);
2599 bitmap2 = (UInt32*)bitmapBuffer;
2600
2601 while (length--) {
2602 if ((*bitmap1++ &= *bitmap2++)) isEmpty = false;
2603 }
2604 if (isEmpty) __CFCSetPutCharacterSetToAnnexPlane(theSet, NULL, planeIndex);
2605 }
2606 }
2607 }
2608 if (!__CFCSetHasNonBMPPlane(theSet)) __CFCSetDeallocateAnnexPlane(theSet);
2609 } else if (__CFCSetIsRange(theOtherSet)) {
2610 CFMutableCharacterSetRef tempOtherSet = CFCharacterSetCreateMutable(CFGetAllocator(theSet));
2611 CFMutableCharacterSetRef annexPlane;
2612 CFMutableCharacterSetRef otherSetPlane;
2613 int idx;
2614
2615 __CFCSetAddNonBMPPlanesInRange(tempOtherSet, CFRangeMake(__CFCSetRangeFirstChar(theOtherSet), __CFCSetRangeLength(theOtherSet)));
2616
2617 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
2618 if ((otherSetPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(tempOtherSet, idx))) {
2619 annexPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(theSet, idx);
2620 CFCharacterSetIntersect(annexPlane, otherSetPlane);
2621 if (__CFCSetIsEmpty(annexPlane) && !__CFCSetIsInverted(annexPlane)) __CFCSetPutCharacterSetToAnnexPlane(theSet, NULL, idx);
2622 } else if (__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, idx)) {
2623 __CFCSetPutCharacterSetToAnnexPlane(theSet, NULL, idx);
2624 }
2625 }
2626 if (!__CFCSetHasNonBMPPlane(theSet)) __CFCSetDeallocateAnnexPlane(theSet);
2627 CFRelease(tempOtherSet);
2628 } else if ((__CFCSetHasNonBMPPlane(theSet) || __CFCSetAnnexIsInverted(theSet)) && !__CFCSetAnnexIsInverted(theOtherSet)) {
2629 __CFCSetDeallocateAnnexPlane(theSet);
2630 }
2631 }
2632 if (__CFCheckForExapendedSet) __CFCheckForExpandedSet(theSet);
2633 } else { // It's NSCharacterSet
2634 CFDataRef bitmapRep = CFCharacterSetCreateBitmapRepresentation(kCFAllocatorSystemDefault, theOtherSet);
2635 const UInt32 *bitmap2 = (bitmapRep && CFDataGetLength(bitmapRep) ? (const UInt32 *)CFDataGetBytePtr(bitmapRep) : NULL);
2636 if (bitmap2) {
2637 UInt32 *bitmap1;
2638 CFIndex length = __kCFBitmapSize / sizeof(UInt32);
2639 __CFCSetMakeBitmap(theSet);
2640 bitmap1 = (UInt32*)__CFCSetBitmapBits(theSet);
2641 while (length--) *bitmap1++ &= *bitmap2++;
2642 __CFCSetPutHasHashValue(theSet, false);
2643 }
2644 CFRelease(bitmapRep);
2645 }
2646 }
2647
2648 void CFCharacterSetInvert(CFMutableCharacterSetRef theSet) {
2649
2650 CF_OBJC_FUNCDISPATCH0(__kCFCharacterSetTypeID, void, theSet, "invert");
2651
2652 __CFCSetValidateTypeAndMutability(theSet, __PRETTY_FUNCTION__);
2653
2654 __CFCSetPutHasHashValue(theSet, false);
2655
2656 if (__CFCSetClassType(theSet) == __kCFCharSetClassBitmap) {
2657 CFIndex idx;
2658 CFIndex count = __kCFBitmapSize / sizeof(UInt32);
2659 UInt32 *bitmap = (UInt32*) __CFCSetBitmapBits(theSet);
2660
2661 if (NULL == bitmap) {
2662 bitmap = (UInt32 *)CFAllocatorAllocate(CFGetAllocator(theSet), __kCFBitmapSize, 0);
2663 __CFCSetPutBitmapBits(theSet, (uint8_t *)bitmap);
2664 for (idx = 0;idx < count;idx++) bitmap[idx] = ((UInt32)0xFFFFFFFF);
2665 } else {
2666 for (idx = 0;idx < count;idx++) bitmap[idx] = ~(bitmap[idx]);
2667 }
2668 __CFCSetAllocateAnnexForPlane(theSet, 0); // We need to alloc annex to invert
2669 } else if (__CFCSetClassType(theSet) == __kCFCharSetClassCompactBitmap) {
2670 uint8_t *bitmap = __CFCSetCompactBitmapBits(theSet);
2671 int idx;
2672 int length = 0;
2673 uint8_t value;
2674
2675 for (idx = 0;idx < __kCFCompactBitmapNumPages;idx++) {
2676 value = bitmap[idx];
2677
2678 if (value == 0) {
2679 bitmap[idx] = UINT8_MAX;
2680 } else if (value == UINT8_MAX) {
2681 bitmap[idx] = 0;
2682 } else {
2683 length += __kCFCompactBitmapPageSize;
2684 }
2685 }
2686 bitmap += __kCFCompactBitmapNumPages;
2687 for (idx = 0;idx < length;idx++) bitmap[idx] = ~(bitmap[idx]);
2688 __CFCSetAllocateAnnexForPlane(theSet, 0); // We need to alloc annex to invert
2689 } else {
2690 __CFCSetPutIsInverted(theSet, !__CFCSetIsInverted(theSet));
2691 }
2692 __CFCSetAnnexSetIsInverted(theSet, !__CFCSetAnnexIsInverted(theSet));
2693 }
2694
2695 void CFCharacterSetCompact(CFMutableCharacterSetRef theSet) {
2696 if (__CFCSetIsBitmap(theSet) && __CFCSetBitmapBits(theSet)) __CFCSetMakeCompact(theSet);
2697 if (__CFCSetHasNonBMPPlane(theSet)) {
2698 CFMutableCharacterSetRef annex;
2699 int idx;
2700
2701 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
2702 if ((annex = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, idx)) && __CFCSetIsBitmap(annex) && __CFCSetBitmapBits(annex)) {
2703 __CFCSetMakeCompact(annex);
2704 }
2705 }
2706 }
2707 }
2708
2709 void CFCharacterSetFast(CFMutableCharacterSetRef theSet) {
2710 if (__CFCSetIsCompactBitmap(theSet) && __CFCSetCompactBitmapBits(theSet)) __CFCSetMakeBitmap(theSet);
2711 if (__CFCSetHasNonBMPPlane(theSet)) {
2712 CFMutableCharacterSetRef annex;
2713 int idx;
2714
2715 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
2716 if ((annex = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, idx)) && __CFCSetIsCompactBitmap(annex) && __CFCSetCompactBitmapBits(annex)) {
2717 __CFCSetMakeBitmap(annex);
2718 }
2719 }
2720 }
2721 }
2722
2723 /* Keyed-coding support
2724 */
2725 CFCharacterSetKeyedCodingType _CFCharacterSetGetKeyedCodingType(CFCharacterSetRef cset) {
2726 if (CF_IS_OBJC(__kCFCharacterSetTypeID, cset)) return kCFCharacterSetKeyedCodingTypeBitmap;
2727
2728 switch (__CFCSetClassType(cset)) {
2729 case __kCFCharSetClassBuiltin: return ((__CFCSetBuiltinType(cset) < kCFCharacterSetSymbol) ? kCFCharacterSetKeyedCodingTypeBuiltin : kCFCharacterSetKeyedCodingTypeBuiltinAndBitmap);
2730 case __kCFCharSetClassRange: return kCFCharacterSetKeyedCodingTypeRange;
2731
2732 case __kCFCharSetClassString: // We have to check if we have non-BMP here
2733 if (!__CFCSetHasNonBMPPlane(cset) && !__CFCSetAnnexIsInverted(cset)) return kCFCharacterSetKeyedCodingTypeString; // BMP only. we can archive the string
2734 /* fallthrough */
2735
2736 default:
2737 return kCFCharacterSetKeyedCodingTypeBitmap;
2738 }
2739 }
2740
2741 CFCharacterSetPredefinedSet _CFCharacterSetGetKeyedCodingBuiltinType(CFCharacterSetRef cset) { return __CFCSetBuiltinType(cset); }
2742 CFRange _CFCharacterSetGetKeyedCodingRange(CFCharacterSetRef cset) { return CFRangeMake(__CFCSetRangeFirstChar(cset), __CFCSetRangeLength(cset)); }
2743 CFStringRef _CFCharacterSetCreateKeyedCodingString(CFCharacterSetRef cset) { return CFStringCreateWithCharacters(kCFAllocatorSystemDefault, __CFCSetStringBuffer(cset), __CFCSetStringLength(cset)); }
2744
2745 bool _CFCharacterSetIsInverted(CFCharacterSetRef cset) { return (__CFCSetIsInverted(cset) != 0); }
2746 void _CFCharacterSetSetIsInverted(CFCharacterSetRef cset, bool flag) { __CFCSetPutIsInverted((CFMutableCharacterSetRef)cset, flag); }
2747
2748 /* Inline buffer support
2749 */
2750 void CFCharacterSetInitInlineBuffer(CFCharacterSetRef cset, CFCharacterSetInlineBuffer *buffer) {
2751 memset(buffer, 0, sizeof(CFCharacterSetInlineBuffer));
2752 buffer->cset = cset;
2753 buffer->rangeLimit = 0x10000;
2754
2755 if (CF_IS_OBJC(__kCFCharacterSetTypeID, cset)) {
2756 CFCharacterSetRef expandedSet = __CFCharacterSetGetExpandedSetForNSCharacterSet(cset);
2757
2758 if (NULL == expandedSet) {
2759 buffer->flags = kCFCharacterSetNoBitmapAvailable;
2760 buffer->rangeLimit = 0x110000;
2761
2762 return;
2763 } else {
2764 cset = expandedSet;
2765 }
2766 }
2767
2768 switch (__CFCSetClassType(cset)) {
2769 case __kCFCharSetClassBuiltin:
2770 buffer->bitmap = CFUniCharGetBitmapPtrForPlane(__CFCSetBuiltinType(cset), 0);
2771 buffer->rangeLimit = 0x110000;
2772 if (NULL == buffer->bitmap) {
2773 buffer->flags = kCFCharacterSetNoBitmapAvailable;
2774 } else {
2775 if (__CFCSetIsInverted(cset)) buffer->flags = kCFCharacterSetIsInverted;
2776 }
2777 break;
2778
2779 case __kCFCharSetClassRange:
2780 buffer->rangeStart = __CFCSetRangeFirstChar(cset);
2781 buffer->rangeLimit = __CFCSetRangeFirstChar(cset) + __CFCSetRangeLength(cset);
2782 if (__CFCSetIsInverted(cset)) buffer->flags = kCFCharacterSetIsInverted;
2783 return;
2784
2785 case __kCFCharSetClassString:
2786 buffer->flags = kCFCharacterSetNoBitmapAvailable;
2787 if (__CFCSetStringLength(cset) > 0) {
2788 buffer->rangeStart = *__CFCSetStringBuffer(cset);
2789 buffer->rangeLimit = *(__CFCSetStringBuffer(cset) + __CFCSetStringLength(cset) - 1) + 1;
2790
2791 if (__CFCSetIsInverted(cset)) {
2792 if (0 == buffer->rangeStart) {
2793 buffer->rangeStart = buffer->rangeLimit;
2794 buffer->rangeLimit = 0x10000;
2795 } else if (0x10000 == buffer->rangeLimit) {
2796 buffer->rangeLimit = buffer->rangeStart;
2797 buffer->rangeStart = 0;
2798 } else {
2799 buffer->rangeStart = 0;
2800 buffer->rangeLimit = 0x10000;
2801 }
2802 }
2803 }
2804 break;
2805
2806 case __kCFCharSetClassBitmap:
2807 case __kCFCharSetClassCompactBitmap:
2808 buffer->bitmap = __CFCSetCompactBitmapBits(cset);
2809 if (NULL == buffer->bitmap) {
2810 buffer->flags = kCFCharacterSetIsCompactBitmap;
2811 if (__CFCSetIsInverted(cset)) buffer->flags |= kCFCharacterSetIsInverted;
2812 } else {
2813 if (__kCFCharSetClassCompactBitmap == __CFCSetClassType(cset)) buffer->flags = kCFCharacterSetIsCompactBitmap;
2814 }
2815 break;
2816
2817 default:
2818 CFAssert1(0, __kCFLogAssertion, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__); // We should never come here
2819 return;
2820 }
2821
2822 if (__CFCSetAnnexIsInverted(cset)) {
2823 buffer->rangeLimit = 0x110000;
2824 } else if (__CFCSetHasNonBMPPlane(cset)) {
2825 CFIndex index;
2826
2827 for (index = MAX_ANNEX_PLANE;index > 0;index--) {
2828 if (NULL != __CFCSetGetAnnexPlaneCharacterSetNoAlloc(cset, index)) {
2829 buffer->rangeLimit = (index + 1) << 16;
2830 break;
2831 }
2832 }
2833 }
2834 }