]> git.saurik.com Git - apple/cf.git/blob - CFCharacterSet.c
0daf28af64af35dfa3c4cef1a626df281def6a01
[apple/cf.git] / CFCharacterSet.c
1 /*
2 * Copyright (c) 2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23 /* CFCharacterSet.c
24 Copyright (c) 1999-2009, Apple Inc. All rights reserved.
25 Responsibility: Aki Inoue
26 */
27
28 #include <CoreFoundation/CFCharacterSet.h>
29 #include <CoreFoundation/CFByteOrder.h>
30 #include "CFCharacterSetPriv.h"
31 #include <CoreFoundation/CFData.h>
32 #include <CoreFoundation/CFString.h>
33 #include "CFInternal.h"
34 #include <CoreFoundation/CFUniChar.h>
35 #include "CFUniCharPriv.h"
36 #include <stdlib.h>
37 #include <string.h>
38
39
40 #define BITSPERBYTE 8 /* (CHAR_BIT * sizeof(unsigned char)) */
41 #define LOG_BPB 3
42 #define LOG_BPLW 5
43 #define NUMCHARACTERS 65536
44
45 #define MAX_ANNEX_PLANE (16)
46
47 /* Number of things in the array keeping the bits.
48 */
49 #define __kCFBitmapSize (NUMCHARACTERS / BITSPERBYTE)
50
51 /* How many elements max can be in an __kCFCharSetClassString CFCharacterSet
52 */
53 #define __kCFStringCharSetMax 64
54
55 /* The last builtin set ID number
56 */
57 #define __kCFLastBuiltinSetID kCFCharacterSetNewline
58
59 /* How many elements in the "singles" array before we use binary search.
60 */
61 #define __kCFSetBreakeven 10
62
63 /* This tells us, within 1k or so, whether a thing is POTENTIALLY in the set (in the bitmap blob of the private structure) before we bother to do specific checking.
64 */
65 #define __CFCSetBitsInRange(n, i) (i[n>>15] & (1L << ((n>>10) % 32)))
66
67 /* Compact bitmap params
68 */
69 #define __kCFCompactBitmapNumPages (256)
70
71 #define __kCFCompactBitmapMaxPages (128) // the max pages allocated
72
73 #define __kCFCompactBitmapPageSize (__kCFBitmapSize / __kCFCompactBitmapNumPages)
74
75 typedef struct {
76 CFCharacterSetRef *_nonBMPPlanes;
77 unsigned int _validEntriesBitmap;
78 unsigned char _numOfAllocEntries;
79 unsigned char _isAnnexInverted;
80 uint16_t _padding;
81 } CFCharSetAnnexStruct;
82
83 struct __CFCharacterSet {
84 CFRuntimeBase _base;
85 CFHashCode _hashValue;
86 union {
87 struct {
88 CFIndex _type;
89 } _builtin;
90 struct {
91 UInt32 _firstChar;
92 CFIndex _length;
93 } _range;
94 struct {
95 UniChar *_buffer;
96 CFIndex _length;
97 } _string;
98 struct {
99 uint8_t *_bits;
100 } _bitmap;
101 struct {
102 uint8_t *_cBits;
103 } _compactBitmap;
104 } _variants;
105 CFCharSetAnnexStruct *_annex;
106 };
107
108 /* _base._info values interesting for CFCharacterSet
109 */
110 enum {
111 __kCFCharSetClassTypeMask = 0x0070,
112 __kCFCharSetClassBuiltin = 0x0000,
113 __kCFCharSetClassRange = 0x0010,
114 __kCFCharSetClassString = 0x0020,
115 __kCFCharSetClassBitmap = 0x0030,
116 __kCFCharSetClassSet = 0x0040,
117 __kCFCharSetClassCompactBitmap = 0x0040,
118
119 __kCFCharSetIsInvertedMask = 0x0008,
120 __kCFCharSetIsInverted = 0x0008,
121
122 __kCFCharSetHasHashValueMask = 0x00004,
123 __kCFCharSetHasHashValue = 0x0004,
124
125 /* Generic CFBase values */
126 __kCFCharSetIsMutableMask = 0x0001,
127 __kCFCharSetIsMutable = 0x0001,
128 };
129
130 /* Inline accessor macros for _base._info
131 */
132 CF_INLINE Boolean __CFCSetIsMutable(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetIsMutableMask) == __kCFCharSetIsMutable;}
133 CF_INLINE Boolean __CFCSetIsBuiltin(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetClassTypeMask) == __kCFCharSetClassBuiltin;}
134 CF_INLINE Boolean __CFCSetIsRange(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetClassTypeMask) == __kCFCharSetClassRange;}
135 CF_INLINE Boolean __CFCSetIsString(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetClassTypeMask) == __kCFCharSetClassString;}
136 CF_INLINE Boolean __CFCSetIsBitmap(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetClassTypeMask) == __kCFCharSetClassBitmap;}
137 CF_INLINE Boolean __CFCSetIsCompactBitmap(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetClassTypeMask) == __kCFCharSetClassCompactBitmap;}
138 CF_INLINE Boolean __CFCSetIsInverted(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetIsInvertedMask) == __kCFCharSetIsInverted;}
139 CF_INLINE Boolean __CFCSetHasHashValue(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetHasHashValueMask) == __kCFCharSetHasHashValue;}
140 CF_INLINE UInt32 __CFCSetClassType(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetClassTypeMask);}
141
142 CF_INLINE void __CFCSetPutIsMutable(CFMutableCharacterSetRef cset, Boolean isMutable) {(isMutable ? (cset->_base._cfinfo[CF_INFO_BITS] |= __kCFCharSetIsMutable) : (cset->_base._cfinfo[CF_INFO_BITS] &= ~ __kCFCharSetIsMutable));}
143 CF_INLINE void __CFCSetPutIsInverted(CFMutableCharacterSetRef cset, Boolean isInverted) {(isInverted ? (cset->_base._cfinfo[CF_INFO_BITS] |= __kCFCharSetIsInverted) : (cset->_base._cfinfo[CF_INFO_BITS] &= ~__kCFCharSetIsInverted));}
144 CF_INLINE void __CFCSetPutHasHashValue(CFMutableCharacterSetRef cset, Boolean hasHash) {(hasHash ? (cset->_base._cfinfo[CF_INFO_BITS] |= __kCFCharSetHasHashValue) : (cset->_base._cfinfo[CF_INFO_BITS] &= ~__kCFCharSetHasHashValue));}
145 CF_INLINE void __CFCSetPutClassType(CFMutableCharacterSetRef cset, UInt32 classType) {cset->_base._cfinfo[CF_INFO_BITS] &= ~__kCFCharSetClassTypeMask; cset->_base._cfinfo[CF_INFO_BITS] |= classType;}
146
147
148 /* Inline contents accessor macros
149 */
150 CF_INLINE CFCharacterSetPredefinedSet __CFCSetBuiltinType(CFCharacterSetRef cset) {return cset->_variants._builtin._type;}
151 CF_INLINE UInt32 __CFCSetRangeFirstChar(CFCharacterSetRef cset) {return cset->_variants._range._firstChar;}
152 CF_INLINE CFIndex __CFCSetRangeLength(CFCharacterSetRef cset) {return cset->_variants._range._length;}
153 CF_INLINE UniChar *__CFCSetStringBuffer(CFCharacterSetRef cset) {return (UniChar*)(cset->_variants._string._buffer);}
154 CF_INLINE CFIndex __CFCSetStringLength(CFCharacterSetRef cset) {return cset->_variants._string._length;}
155 CF_INLINE uint8_t *__CFCSetBitmapBits(CFCharacterSetRef cset) {return cset->_variants._bitmap._bits;}
156 CF_INLINE uint8_t *__CFCSetCompactBitmapBits(CFCharacterSetRef cset) {return cset->_variants._compactBitmap._cBits;}
157
158 CF_INLINE void __CFCSetPutBuiltinType(CFMutableCharacterSetRef cset, CFCharacterSetPredefinedSet type) {cset->_variants._builtin._type = type;}
159 CF_INLINE void __CFCSetPutRangeFirstChar(CFMutableCharacterSetRef cset, UInt32 first) {cset->_variants._range._firstChar = first;}
160 CF_INLINE void __CFCSetPutRangeLength(CFMutableCharacterSetRef cset, CFIndex length) {cset->_variants._range._length = length;}
161 CF_INLINE void __CFCSetPutStringBuffer(CFMutableCharacterSetRef cset, UniChar *theBuffer) {cset->_variants._string._buffer = theBuffer;}
162 CF_INLINE void __CFCSetPutStringLength(CFMutableCharacterSetRef cset, CFIndex length) {cset->_variants._string._length = length;}
163 CF_INLINE void __CFCSetPutBitmapBits(CFMutableCharacterSetRef cset, uint8_t *bits) {cset->_variants._bitmap._bits = bits;}
164 CF_INLINE void __CFCSetPutCompactBitmapBits(CFMutableCharacterSetRef cset, uint8_t *bits) {cset->_variants._compactBitmap._cBits = bits;}
165
166 /* Validation funcs
167 */
168 #if defined(CF_ENABLE_ASSERTIONS)
169 CF_INLINE void __CFCSetValidateBuiltinType(CFCharacterSetPredefinedSet type, const char *func) {
170 CFAssert2(type > 0 && type <= __kCFLastBuiltinSetID, __kCFLogAssertion, "%s: Unknowen builtin type %d", func, type);
171 }
172 CF_INLINE void __CFCSetValidateRange(CFRange theRange, const char *func) {
173 CFAssert3(theRange.location >= 0 && theRange.location + theRange.length <= 0x1FFFFF, __kCFLogAssertion, "%s: Range out of Unicode range (location -> %d length -> %d)", func, theRange.location, theRange.length);
174 }
175 CF_INLINE void __CFCSetValidateTypeAndMutability(CFCharacterSetRef cset, const char *func) {
176 __CFGenericValidateType(cset, __kCFCharacterSetTypeID);
177 CFAssert1(__CFCSetIsMutable(cset), __kCFLogAssertion, "%s: Immutable character set passed to mutable function", func);
178 }
179 #else
180 #define __CFCSetValidateBuiltinType(t,f)
181 #define __CFCSetValidateRange(r,f)
182 #define __CFCSetValidateTypeAndMutability(r,f)
183 #endif
184
185 /* Inline utility funcs
186 */
187 static Boolean __CFCSetIsEqualBitmap(const UInt32 *bits1, const UInt32 *bits2) {
188 CFIndex length = __kCFBitmapSize / sizeof(UInt32);
189
190 if (bits1 == bits2) {
191 return true;
192 } else if (bits1 && bits2) {
193 if (bits1 == (const UInt32 *)-1) {
194 while (length--) if ((UInt32)-1 != *bits2++) return false;
195 } else if (bits2 == (const UInt32 *)-1) {
196 while (length--) if ((UInt32)-1 != *bits1++) return false;
197 } else {
198 while (length--) if (*bits1++ != *bits2++) return false;
199 }
200 return true;
201 } else if (!bits1 && !bits2) { // empty set
202 return true;
203 } else {
204 if (bits2) bits1 = bits2;
205 if (bits1 == (const UInt32 *)-1) return false;
206 while (length--) if (*bits1++) return false;
207 return true;
208 }
209 }
210
211 CF_INLINE Boolean __CFCSetIsEqualBitmapInverted(const UInt32 *bits1, const UInt32 *bits2) {
212 CFIndex length = __kCFBitmapSize / sizeof(UInt32);
213
214 while (length--) if (*bits1++ != ~(*(bits2++))) return false;
215 return true;
216 }
217
218 static Boolean __CFCSetIsBitmapEqualToRange(const UInt32 *bits, UniChar firstChar, UniChar lastChar, Boolean isInverted) {
219 CFIndex firstCharIndex = firstChar >> LOG_BPB;
220 CFIndex lastCharIndex = lastChar >> LOG_BPB;
221 CFIndex length;
222 UInt32 value;
223
224 if (firstCharIndex == lastCharIndex) {
225 value = ((((UInt32)0xFF) << (firstChar & (BITSPERBYTE - 1))) & (((UInt32)0xFF) >> ((BITSPERBYTE - 1) - (lastChar & (BITSPERBYTE - 1))))) << (((sizeof(UInt32) - 1) - (firstCharIndex % sizeof(UInt32))) * BITSPERBYTE);
226 value = CFSwapInt32HostToBig(value);
227 firstCharIndex = lastCharIndex = firstChar >> LOG_BPLW;
228 if (*(bits + firstCharIndex) != (isInverted ? ~value : value)) return FALSE;
229 } else {
230 UInt32 firstCharMask;
231 UInt32 lastCharMask;
232
233 length = firstCharIndex % sizeof(UInt32);
234 firstCharMask = (((((UInt32)0xFF) << (firstChar & (BITSPERBYTE - 1))) & 0xFF) << (((sizeof(UInt32) - 1) - length) * BITSPERBYTE)) | (((UInt32)0xFFFFFFFF) >> ((length + 1) * BITSPERBYTE));
235
236 length = lastCharIndex % sizeof(UInt32);
237 lastCharMask = ((((UInt32)0xFF) >> ((BITSPERBYTE - 1) - (lastChar & (BITSPERBYTE - 1)))) << (((sizeof(UInt32) - 1) - length) * BITSPERBYTE)) | (((UInt32)0xFFFFFFFF) << ((sizeof(UInt32) - length) * BITSPERBYTE));
238
239 firstCharIndex = firstChar >> LOG_BPLW;
240 lastCharIndex = lastChar >> LOG_BPLW;
241
242 if (firstCharIndex == lastCharIndex) {
243 firstCharMask &= lastCharMask;
244 value = CFSwapInt32HostToBig(firstCharMask & lastCharMask);
245 if (*(bits + firstCharIndex) != (isInverted ? ~value : value)) return FALSE;
246 } else {
247 value = CFSwapInt32HostToBig(firstCharMask);
248 if (*(bits + firstCharIndex) != (isInverted ? ~value : value)) return FALSE;
249
250 value = CFSwapInt32HostToBig(lastCharMask);
251 if (*(bits + lastCharIndex) != (isInverted ? ~value : value)) return FALSE;
252 }
253 }
254
255 length = firstCharIndex;
256 value = (isInverted ? ((UInt32)0xFFFFFFFF) : 0);
257 while (length--) {
258 if (*(bits++) != value) return FALSE;
259 }
260
261 ++bits; // Skip firstCharIndex
262 length = (lastCharIndex - (firstCharIndex + 1));
263 value = (isInverted ? 0 : ((UInt32)0xFFFFFFFF));
264 while (length-- > 0) {
265 if (*(bits++) != value) return FALSE;
266 }
267 if (firstCharIndex != lastCharIndex) ++bits;
268
269 length = (0xFFFF >> LOG_BPLW) - lastCharIndex;
270 value = (isInverted ? ((UInt32)0xFFFFFFFF) : 0);
271 while (length--) {
272 if (*(bits++) != value) return FALSE;
273 }
274
275 return TRUE;
276 }
277
278 CF_INLINE Boolean __CFCSetIsBitmapSupersetOfBitmap(const UInt32 *bits1, const UInt32 *bits2, Boolean isInverted1, Boolean isInverted2) {
279 CFIndex length = __kCFBitmapSize / sizeof(UInt32);
280 UInt32 val1, val2;
281
282 while (length--) {
283 val2 = (isInverted2 ? ~(*(bits2++)) : *(bits2++));
284 val1 = (isInverted1 ? ~(*(bits1++)) : *(bits1++)) & val2;
285 if (val1 != val2) return false;
286 }
287
288 return true;
289 }
290
291 CF_INLINE Boolean __CFCSetHasNonBMPPlane(CFCharacterSetRef cset) { return ((cset)->_annex && (cset)->_annex->_validEntriesBitmap ? true : false); }
292 CF_INLINE Boolean __CFCSetAnnexIsInverted (CFCharacterSetRef cset) { return ((cset)->_annex && (cset)->_annex->_isAnnexInverted ? true : false); }
293 CF_INLINE UInt32 __CFCSetAnnexValidEntriesBitmap(CFCharacterSetRef cset) { return ((cset)->_annex ? (cset)->_annex->_validEntriesBitmap : 0); }
294
295 CF_INLINE Boolean __CFCSetIsEmpty(CFCharacterSetRef cset) {
296 if (__CFCSetHasNonBMPPlane(cset) || __CFCSetAnnexIsInverted(cset)) return false;
297
298 switch (__CFCSetClassType(cset)) {
299 case __kCFCharSetClassRange: if (!__CFCSetRangeLength(cset)) return true; break;
300 case __kCFCharSetClassString: if (!__CFCSetStringLength(cset)) return true; break;
301 case __kCFCharSetClassBitmap: if (!__CFCSetBitmapBits(cset)) return true; break;
302 case __kCFCharSetClassCompactBitmap: if (!__CFCSetCompactBitmapBits(cset)) return true; break;
303 }
304 return false;
305 }
306
307 CF_INLINE void __CFCSetBitmapAddCharacter(uint8_t *bitmap, UniChar theChar) {
308 bitmap[(theChar) >> LOG_BPB] |= (((unsigned)1) << (theChar & (BITSPERBYTE - 1)));
309 }
310
311 CF_INLINE void __CFCSetBitmapRemoveCharacter(uint8_t *bitmap, UniChar theChar) {
312 bitmap[(theChar) >> LOG_BPB] &= ~(((unsigned)1) << (theChar & (BITSPERBYTE - 1)));
313 }
314
315 CF_INLINE Boolean __CFCSetIsMemberBitmap(const uint8_t *bitmap, UniChar theChar) {
316 return ((bitmap[(theChar) >> LOG_BPB] & (((unsigned)1) << (theChar & (BITSPERBYTE - 1)))) ? true : false);
317 }
318
319 #define NUM_32BIT_SLOTS (NUMCHARACTERS / 32)
320
321 CF_INLINE void __CFCSetBitmapFastFillWithValue(UInt32 *bitmap, uint8_t value) {
322 UInt32 mask = (value << 24) | (value << 16) | (value << 8) | value;
323 UInt32 numSlots = NUMCHARACTERS / 32;
324
325 while (numSlots--) *(bitmap++) = mask;
326 }
327
328 CF_INLINE void __CFCSetBitmapAddCharactersInRange(uint8_t *bitmap, UniChar firstChar, UniChar lastChar) {
329 if (firstChar == lastChar) {
330 bitmap[firstChar >> LOG_BPB] |= (((unsigned)1) << (firstChar & (BITSPERBYTE - 1)));
331 } else {
332 UInt32 idx = firstChar >> LOG_BPB;
333 UInt32 max = lastChar >> LOG_BPB;
334
335 if (idx == max) {
336 bitmap[idx] |= (((unsigned)0xFF) << (firstChar & (BITSPERBYTE - 1))) & (((unsigned)0xFF) >> ((BITSPERBYTE - 1) - (lastChar & (BITSPERBYTE - 1))));
337 } else {
338 bitmap[idx] |= (((unsigned)0xFF) << (firstChar & (BITSPERBYTE - 1)));
339 bitmap[max] |= (((unsigned)0xFF) >> ((BITSPERBYTE - 1) - (lastChar & (BITSPERBYTE - 1))));
340
341 ++idx;
342 while (idx < max) bitmap[idx++] = 0xFF;
343 }
344 }
345 }
346
347 CF_INLINE void __CFCSetBitmapRemoveCharactersInRange(uint8_t *bitmap, UniChar firstChar, UniChar lastChar) {
348 UInt32 idx = firstChar >> LOG_BPB;
349 UInt32 max = lastChar >> LOG_BPB;
350
351 if (idx == max) {
352 bitmap[idx] &= ~((((unsigned)0xFF) << (firstChar & (BITSPERBYTE - 1))) & (((unsigned)0xFF) >> ((BITSPERBYTE - 1) - (lastChar & (BITSPERBYTE - 1)))));
353 } else {
354 bitmap[idx] &= ~(((unsigned)0xFF) << (firstChar & (BITSPERBYTE - 1)));
355 bitmap[max] &= ~(((unsigned)0xFF) >> ((BITSPERBYTE - 1) - (lastChar & (BITSPERBYTE - 1))));
356
357 ++idx;
358 while (idx < max) bitmap[idx++] = 0;
359 }
360 }
361
362 #define __CFCSetAnnexBitmapSetPlane(bitmap,plane) ((bitmap) |= (1 << (plane)))
363 #define __CFCSetAnnexBitmapClearPlane(bitmap,plane) ((bitmap) &= (~(1 << (plane))))
364 #define __CFCSetAnnexBitmapGetPlane(bitmap,plane) ((bitmap) & (1 << (plane)))
365
366 CF_INLINE void __CFCSetAllocateAnnexForPlane(CFCharacterSetRef cset, int plane) {
367 if (cset->_annex == NULL) {
368 ((CFMutableCharacterSetRef)cset)->_annex = (CFCharSetAnnexStruct *)CFAllocatorAllocate(CFGetAllocator(cset), sizeof(CFCharSetAnnexStruct), 0);
369 cset->_annex->_numOfAllocEntries = plane;
370 cset->_annex->_isAnnexInverted = false;
371 cset->_annex->_validEntriesBitmap = 0;
372 cset->_annex->_nonBMPPlanes = ((plane > 0) ? (CFCharacterSetRef*)CFAllocatorAllocate(CFGetAllocator(cset), sizeof(CFCharacterSetRef) * plane, 0) : NULL);
373 } else if (cset->_annex->_numOfAllocEntries < plane) {
374 cset->_annex->_numOfAllocEntries = plane;
375 if (NULL == cset->_annex->_nonBMPPlanes) {
376 cset->_annex->_nonBMPPlanes = (CFCharacterSetRef*)CFAllocatorAllocate(CFGetAllocator(cset), sizeof(CFCharacterSetRef) * plane, 0);
377 } else {
378 cset->_annex->_nonBMPPlanes = (CFCharacterSetRef*)CFAllocatorReallocate(CFGetAllocator(cset), (void *)cset->_annex->_nonBMPPlanes, sizeof(CFCharacterSetRef) * plane, 0);
379 }
380 }
381 }
382
383 CF_INLINE void __CFCSetAnnexSetIsInverted(CFCharacterSetRef cset, Boolean flag) {
384 if (flag) __CFCSetAllocateAnnexForPlane(cset, 0);
385 if (cset->_annex) ((CFMutableCharacterSetRef)cset)->_annex->_isAnnexInverted = flag;
386 }
387
388 CF_INLINE void __CFCSetPutCharacterSetToAnnexPlane(CFCharacterSetRef cset, CFCharacterSetRef annexCSet, int plane) {
389 __CFCSetAllocateAnnexForPlane(cset, plane);
390 if (__CFCSetAnnexBitmapGetPlane(cset->_annex->_validEntriesBitmap, plane)) CFRelease(cset->_annex->_nonBMPPlanes[plane - 1]);
391 if (annexCSet) {
392 cset->_annex->_nonBMPPlanes[plane - 1] = (CFCharacterSetRef)CFRetain(annexCSet);
393 __CFCSetAnnexBitmapSetPlane(cset->_annex->_validEntriesBitmap, plane);
394 } else {
395 __CFCSetAnnexBitmapClearPlane(cset->_annex->_validEntriesBitmap, plane);
396 }
397 }
398
399 CF_INLINE CFCharacterSetRef __CFCSetGetAnnexPlaneCharacterSet(CFCharacterSetRef cset, int plane) {
400 __CFCSetAllocateAnnexForPlane(cset, plane);
401 if (!__CFCSetAnnexBitmapGetPlane(cset->_annex->_validEntriesBitmap, plane)) {
402 cset->_annex->_nonBMPPlanes[plane - 1] = (CFCharacterSetRef)CFCharacterSetCreateMutable(CFGetAllocator(cset));
403 __CFCSetAnnexBitmapSetPlane(cset->_annex->_validEntriesBitmap, plane);
404 }
405 return cset->_annex->_nonBMPPlanes[plane - 1];
406 }
407
408 CF_INLINE CFCharacterSetRef __CFCSetGetAnnexPlaneCharacterSetNoAlloc(CFCharacterSetRef cset, int plane) {
409 return (cset->_annex && __CFCSetAnnexBitmapGetPlane(cset->_annex->_validEntriesBitmap, plane) ? cset->_annex->_nonBMPPlanes[plane - 1] : NULL);
410 }
411
412 CF_INLINE void __CFCSetDeallocateAnnexPlane(CFCharacterSetRef cset) {
413 if (cset->_annex) {
414 int idx;
415
416 for (idx = 0;idx < MAX_ANNEX_PLANE;idx++) {
417 if (__CFCSetAnnexBitmapGetPlane(cset->_annex->_validEntriesBitmap, idx + 1)) {
418 CFRelease(cset->_annex->_nonBMPPlanes[idx]);
419 }
420 }
421 CFAllocatorDeallocate(CFGetAllocator(cset), cset->_annex->_nonBMPPlanes);
422 CFAllocatorDeallocate(CFGetAllocator(cset), cset->_annex);
423 ((CFMutableCharacterSetRef)cset)->_annex = NULL;
424 }
425 }
426
427 CF_INLINE uint8_t __CFCSetGetHeaderValue(const uint8_t *bitmap, int *numPages) {
428 uint8_t value = *bitmap;
429
430 if ((value == 0) || (value == UINT8_MAX)) {
431 int numBytes = __kCFCompactBitmapPageSize - 1;
432
433 while (numBytes > 0) {
434 if (*(++bitmap) != value) break;
435 --numBytes;
436 }
437 if (numBytes == 0) return value;
438 }
439 return (uint8_t)(++(*numPages));
440 }
441
442 CF_INLINE bool __CFCSetIsMemberInCompactBitmap(const uint8_t *compactBitmap, UTF16Char character) {
443 uint8_t value = compactBitmap[(character >> 8)]; // Assuming __kCFCompactBitmapNumPages == 256
444
445 if (value == 0) {
446 return false;
447 } else if (value == UINT8_MAX) {
448 return true;
449 } else {
450 compactBitmap += (__kCFCompactBitmapNumPages + (__kCFCompactBitmapPageSize * (value - 1)));
451 character &= 0xFF; // Assuming __kCFCompactBitmapNumPages == 256
452 return ((compactBitmap[(character / BITSPERBYTE)] & (1 << (character % BITSPERBYTE))) ? true : false);
453 }
454 }
455
456 CF_INLINE uint32_t __CFCSetGetCompactBitmapSize(const uint8_t *compactBitmap) {
457 uint32_t length = __kCFCompactBitmapNumPages;
458 uint32_t size = __kCFCompactBitmapNumPages;
459 uint8_t value;
460
461 while (length-- > 0) {
462 value = *(compactBitmap++);
463 if ((value != 0) && (value != UINT8_MAX)) size += __kCFCompactBitmapPageSize;
464 }
465 return size;
466 }
467
468 /* Take a private "set" structure and make a bitmap from it. Return the bitmap. THE CALLER MUST RELEASE THE RETURNED MEMORY as necessary.
469 */
470
471 CF_INLINE void __CFCSetBitmapProcessManyCharacters(unsigned char *map, unsigned n, unsigned m, Boolean isInverted) {
472 if (isInverted) {
473 __CFCSetBitmapRemoveCharactersInRange(map, n, m);
474 } else {
475 __CFCSetBitmapAddCharactersInRange(map, n, m);
476 }
477 }
478
479 CF_INLINE void __CFExpandCompactBitmap(const uint8_t *src, uint8_t *dst) {
480 const uint8_t *srcBody = src + __kCFCompactBitmapNumPages;
481 int i;
482 uint8_t value;
483
484 for (i = 0;i < __kCFCompactBitmapNumPages;i++) {
485 value = *(src++);
486 if ((value == 0) || (value == UINT8_MAX)) {
487 memset(dst, value, __kCFCompactBitmapPageSize);
488 } else {
489 memmove(dst, srcBody, __kCFCompactBitmapPageSize);
490 srcBody += __kCFCompactBitmapPageSize;
491 }
492 dst += __kCFCompactBitmapPageSize;
493 }
494 }
495
496
497 static void __CFCheckForExpandedSet(CFCharacterSetRef cset) {
498 static int8_t __CFNumberOfPlanesForLogging = -1;
499 static bool warnedOnce = false;
500
501 if (0 > __CFNumberOfPlanesForLogging) {
502 const char *envVar = __CFgetenv("CFCharacterSetCheckForExpandedSet");
503 long value = (envVar ? strtol_l(envVar, NULL, 0, NULL) : 0);
504 __CFNumberOfPlanesForLogging = (int8_t)(((value > 0) && (value <= 16)) ? value : 0);
505 }
506
507 if (__CFNumberOfPlanesForLogging) {
508 uint32_t entries = __CFCSetAnnexValidEntriesBitmap(cset);
509 int count = 0;
510
511 while (entries) {
512 if ((entries & 1) && (++count >= __CFNumberOfPlanesForLogging)) {
513 if (!warnedOnce) {
514 CFLog(kCFLogLevelWarning, CFSTR("An expanded CFMutableCharacter has been detected. Recommend to compact with CFCharacterSetCreateCopy"));
515 warnedOnce = true;
516 }
517 break;
518 }
519 entries >>= 1;
520 }
521 }
522 }
523
524 static void __CFCSetGetBitmap(CFCharacterSetRef cset, uint8_t *bits) {
525 uint8_t *bitmap;
526 CFIndex length = __kCFBitmapSize;
527
528 if (__CFCSetIsBitmap(cset) && (bitmap = __CFCSetBitmapBits(cset))) {
529 memmove(bits, bitmap, __kCFBitmapSize);
530 } else {
531 Boolean isInverted = __CFCSetIsInverted(cset);
532 uint8_t value = (isInverted ? (uint8_t)-1 : 0);
533
534 bitmap = bits;
535 while (length--) *bitmap++ = value; // Initialize the buffer
536
537 if (!__CFCSetIsEmpty(cset)) {
538 switch (__CFCSetClassType(cset)) {
539 case __kCFCharSetClassBuiltin: {
540 UInt8 result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(cset), 0, bits, (isInverted != 0));
541 if (result == kCFUniCharBitmapEmpty && isInverted) {
542 length = __kCFBitmapSize;
543 bitmap = bits;
544 while (length--) *bitmap++ = 0;
545 } else if (result == kCFUniCharBitmapAll && !isInverted) {
546 length = __kCFBitmapSize;
547 bitmap = bits;
548 while (length--) *bitmap++ = (UInt8)0xFF;
549 }
550 }
551 break;
552
553 case __kCFCharSetClassRange: {
554 UInt32 theChar = __CFCSetRangeFirstChar(cset);
555 if (theChar < NUMCHARACTERS) { // the range starts in BMP
556 length = __CFCSetRangeLength(cset);
557 if (theChar + length >= NUMCHARACTERS) length = NUMCHARACTERS - theChar;
558 if (isInverted) {
559 __CFCSetBitmapRemoveCharactersInRange(bits, theChar, (UniChar)(theChar + length) - 1);
560 } else {
561 __CFCSetBitmapAddCharactersInRange(bits, theChar, (UniChar)(theChar + length) - 1);
562 }
563 }
564 }
565 break;
566
567 case __kCFCharSetClassString: {
568 const UniChar *buffer = __CFCSetStringBuffer(cset);
569 length = __CFCSetStringLength(cset);
570 while (length--) (isInverted ? __CFCSetBitmapRemoveCharacter(bits, *buffer++) : __CFCSetBitmapAddCharacter(bits, *buffer++));
571 }
572 break;
573
574 case __kCFCharSetClassCompactBitmap:
575 __CFExpandCompactBitmap(__CFCSetCompactBitmapBits(cset), bits);
576 break;
577 }
578 }
579 }
580 }
581
582 static Boolean __CFCharacterSetEqual(CFTypeRef cf1, CFTypeRef cf2);
583
584 static Boolean __CFCSetIsEqualAnnex(CFCharacterSetRef cf1, CFCharacterSetRef cf2) {
585 CFCharacterSetRef subSet1;
586 CFCharacterSetRef subSet2;
587 Boolean isAnnexInvertStateIdentical = (__CFCSetAnnexIsInverted(cf1) == __CFCSetAnnexIsInverted(cf2) ? true: false);
588 int idx;
589
590 if (isAnnexInvertStateIdentical) {
591 if (__CFCSetAnnexValidEntriesBitmap(cf1) != __CFCSetAnnexValidEntriesBitmap(cf2)) return false;
592 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
593 subSet1 = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(cf1, idx);
594 subSet2 = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(cf2, idx);
595
596 if (subSet1 && !__CFCharacterSetEqual(subSet1, subSet2)) return false;
597 }
598 } else {
599 uint8_t bitsBuf[__kCFBitmapSize];
600 uint8_t bitsBuf2[__kCFBitmapSize];
601
602 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
603 subSet1 = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(cf1, idx);
604 subSet2 = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(cf2, idx);
605
606 if (subSet1 == NULL && subSet2 == NULL) {
607 return false;
608 } else if (subSet1 == NULL) {
609 if (__CFCSetIsBitmap(subSet2)) {
610 if (!__CFCSetIsEqualBitmap((const UInt32 *)__CFCSetBitmapBits(subSet2), (const UInt32 *)-1)) {
611 return false;
612 }
613 } else {
614 __CFCSetGetBitmap(subSet2, bitsBuf);
615 if (!__CFCSetIsEqualBitmap((const UInt32 *)bitsBuf, (const UInt32 *)-1)) {
616 return false;
617 }
618 }
619 } else if (subSet2 == NULL) {
620 if (__CFCSetIsBitmap(subSet1)) {
621 if (!__CFCSetIsEqualBitmap((const UInt32 *)__CFCSetBitmapBits(subSet1), (const UInt32 *)-1)) {
622 return false;
623 }
624 } else {
625 __CFCSetGetBitmap(subSet1, bitsBuf);
626 if (!__CFCSetIsEqualBitmap((const UInt32 *)bitsBuf, (const UInt32 *)-1)) {
627 return false;
628 }
629 }
630 } else {
631 Boolean isBitmap1 = __CFCSetIsBitmap(subSet1);
632 Boolean isBitmap2 = __CFCSetIsBitmap(subSet2);
633
634 if (isBitmap1 && isBitmap2) {
635 if (!__CFCSetIsEqualBitmapInverted((const UInt32 *)__CFCSetBitmapBits(subSet1), (const UInt32 *)__CFCSetBitmapBits(subSet2))) {
636 return false;
637 }
638 } else if (!isBitmap1 && !isBitmap2) {
639 __CFCSetGetBitmap(subSet1, bitsBuf);
640 __CFCSetGetBitmap(subSet2, bitsBuf2);
641 if (!__CFCSetIsEqualBitmapInverted((const UInt32 *)bitsBuf, (const UInt32 *)bitsBuf2)) {
642 return false;
643 }
644 } else {
645 if (isBitmap2) {
646 CFCharacterSetRef tmp = subSet2;
647 subSet2 = subSet1;
648 subSet1 = tmp;
649 }
650 __CFCSetGetBitmap(subSet2, bitsBuf);
651 if (!__CFCSetIsEqualBitmapInverted((const UInt32 *)__CFCSetBitmapBits(subSet1), (const UInt32 *)bitsBuf)) {
652 return false;
653 }
654 }
655 }
656 }
657 }
658 return true;
659 }
660
661 /* Compact bitmap
662 */
663 static uint8_t *__CFCreateCompactBitmap(CFAllocatorRef allocator, const uint8_t *bitmap) {
664 const uint8_t *src;
665 uint8_t *dst;
666 int i;
667 int numPages = 0;
668 uint8_t header[__kCFCompactBitmapNumPages];
669
670 src = bitmap;
671 for (i = 0;i < __kCFCompactBitmapNumPages;i++) {
672 header[i] = __CFCSetGetHeaderValue(src, &numPages);
673
674 // Allocating more pages is probably not interesting enough to be compact
675 if (numPages > __kCFCompactBitmapMaxPages) return NULL;
676 src += __kCFCompactBitmapPageSize;
677 }
678
679 dst = (uint8_t *)CFAllocatorAllocate(allocator, __kCFCompactBitmapNumPages + (__kCFCompactBitmapPageSize * numPages), 0);
680
681 if (numPages > 0) {
682 uint8_t *dstBody = dst + __kCFCompactBitmapNumPages;
683
684 src = bitmap;
685 for (i = 0;i < __kCFCompactBitmapNumPages;i++) {
686 dst[i] = header[i];
687
688 if ((dst[i] != 0) && (dst[i] != UINT8_MAX)) {
689 memmove(dstBody, src, __kCFCompactBitmapPageSize);
690 dstBody += __kCFCompactBitmapPageSize;
691 }
692 src += __kCFCompactBitmapPageSize;
693 }
694 } else {
695 memmove(dst, header, __kCFCompactBitmapNumPages);
696 }
697
698 return dst;
699 }
700
701 static void __CFCSetMakeCompact(CFMutableCharacterSetRef cset) {
702 if (__CFCSetIsBitmap(cset) && __CFCSetBitmapBits(cset)) {
703 uint8_t *bitmap = __CFCSetBitmapBits(cset);
704 uint8_t *cBitmap = __CFCreateCompactBitmap(CFGetAllocator(cset), bitmap);
705
706 if (cBitmap) {
707 CFAllocatorDeallocate(CFGetAllocator(cset), bitmap);
708 __CFCSetPutClassType(cset, __kCFCharSetClassCompactBitmap);
709 __CFCSetPutCompactBitmapBits(cset, cBitmap);
710 }
711 }
712 }
713
714 static void __CFCSetAddNonBMPPlanesInRange(CFMutableCharacterSetRef cset, CFRange range) {
715 int firstChar = (range.location & 0xFFFF);
716 int maxChar = range.location + range.length;
717 int idx = range.location >> 16; // first plane
718 int maxPlane = (maxChar - 1) >> 16; // last plane
719 CFRange planeRange;
720 CFMutableCharacterSetRef annexPlane;
721
722 maxChar &= 0xFFFF;
723
724 for (idx = (idx ? idx : 1);idx <= maxPlane;idx++) {
725 planeRange.location = __CFMax(firstChar, 0);
726 planeRange.length = (idx == maxPlane && maxChar ? maxChar : 0x10000) - planeRange.location;
727 if (__CFCSetAnnexIsInverted(cset)) {
728 if ((annexPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(cset, idx))) {
729 CFCharacterSetRemoveCharactersInRange(annexPlane, planeRange);
730 if (__CFCSetIsEmpty(annexPlane) && !__CFCSetIsInverted(annexPlane)) {
731 CFRelease(annexPlane);
732 __CFCSetAnnexBitmapClearPlane(cset->_annex->_validEntriesBitmap, idx);
733 }
734 }
735 } else {
736 CFCharacterSetAddCharactersInRange((CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(cset, idx), planeRange);
737 }
738 }
739 if (!__CFCSetHasNonBMPPlane(cset) && !__CFCSetAnnexIsInverted(cset)) __CFCSetDeallocateAnnexPlane(cset);
740 }
741
742 static void __CFCSetRemoveNonBMPPlanesInRange(CFMutableCharacterSetRef cset, CFRange range) {
743 int firstChar = (range.location & 0xFFFF);
744 int maxChar = range.location + range.length;
745 int idx = range.location >> 16; // first plane
746 int maxPlane = (maxChar - 1) >> 16; // last plane
747 CFRange planeRange;
748 CFMutableCharacterSetRef annexPlane;
749
750 maxChar &= 0xFFFF;
751
752 for (idx = (idx ? idx : 1);idx <= maxPlane;idx++) {
753 planeRange.location = __CFMax(firstChar, 0);
754 planeRange.length = (idx == maxPlane && maxChar ? maxChar : 0x10000) - planeRange.location;
755 if (__CFCSetAnnexIsInverted(cset)) {
756 CFCharacterSetAddCharactersInRange((CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(cset, idx), planeRange);
757 } else {
758 if ((annexPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(cset, idx))) {
759 CFCharacterSetRemoveCharactersInRange(annexPlane, planeRange);
760 if(__CFCSetIsEmpty(annexPlane) && !__CFCSetIsInverted(annexPlane)) {
761 CFRelease(annexPlane);
762 __CFCSetAnnexBitmapClearPlane(cset->_annex->_validEntriesBitmap, idx);
763 }
764 }
765 }
766 }
767 if (!__CFCSetHasNonBMPPlane(cset) && !__CFCSetAnnexIsInverted(cset)) __CFCSetDeallocateAnnexPlane(cset);
768 }
769
770 static void __CFCSetMakeBitmap(CFMutableCharacterSetRef cset) {
771 if (!__CFCSetIsBitmap(cset) || !__CFCSetBitmapBits(cset)) {
772 CFAllocatorRef allocator = CFGetAllocator(cset);
773 uint8_t *bitmap = (uint8_t *)CFAllocatorAllocate(allocator, __kCFBitmapSize, 0);
774 __CFCSetGetBitmap(cset, bitmap);
775
776 if (__CFCSetIsBuiltin(cset)) {
777 CFIndex numPlanes = CFUniCharGetNumberOfPlanes(__CFCSetBuiltinType(cset));
778
779 if (numPlanes > 1) {
780 CFMutableCharacterSetRef annexSet;
781 uint8_t *annexBitmap = NULL;
782 int idx;
783 UInt8 result;
784
785 __CFCSetAllocateAnnexForPlane(cset, numPlanes - 1);
786 for (idx = 1;idx < numPlanes;idx++) {
787 if (NULL == annexBitmap) {
788 annexBitmap = (uint8_t *)CFAllocatorAllocate(allocator, __kCFBitmapSize, 0);
789 }
790 result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(cset), idx, annexBitmap, false);
791 if (result == kCFUniCharBitmapEmpty) continue;
792 if (result == kCFUniCharBitmapAll) {
793 CFIndex bitmapLength = __kCFBitmapSize;
794 uint8_t *bytes = annexBitmap;
795 while (bitmapLength-- > 0) *(bytes++) = (uint8_t)0xFF;
796 }
797 annexSet = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(cset, idx);
798 __CFCSetPutClassType(annexSet, __kCFCharSetClassBitmap);
799 __CFCSetPutBitmapBits(annexSet, annexBitmap);
800 __CFCSetPutIsInverted(annexSet, false);
801 __CFCSetPutHasHashValue(annexSet, false);
802 annexBitmap = NULL;
803 }
804 if (annexBitmap) CFAllocatorDeallocate(allocator, annexBitmap);
805 }
806 } else if (__CFCSetIsCompactBitmap(cset) && __CFCSetCompactBitmapBits(cset)) {
807 CFAllocatorDeallocate(allocator, __CFCSetCompactBitmapBits(cset));
808 __CFCSetPutCompactBitmapBits(cset, NULL);
809 } else if (__CFCSetIsString(cset) && __CFCSetStringBuffer(cset)) {
810 CFAllocatorDeallocate(allocator, __CFCSetStringBuffer(cset));
811 __CFCSetPutStringBuffer(cset, NULL);
812 } else if (__CFCSetIsRange(cset)) { // We may have to allocate annex here
813 Boolean needsToInvert = (!__CFCSetHasNonBMPPlane(cset) && __CFCSetIsInverted(cset) ? true : false);
814 __CFCSetAddNonBMPPlanesInRange(cset, CFRangeMake(__CFCSetRangeFirstChar(cset), __CFCSetRangeLength(cset)));
815 if (needsToInvert) __CFCSetAnnexSetIsInverted(cset, true);
816 }
817 __CFCSetPutClassType(cset, __kCFCharSetClassBitmap);
818 __CFCSetPutBitmapBits(cset, bitmap);
819 __CFCSetPutIsInverted(cset, false);
820 }
821 }
822
823 CF_INLINE CFMutableCharacterSetRef __CFCSetGenericCreate(CFAllocatorRef allocator, UInt32 flags) {
824 CFMutableCharacterSetRef cset;
825 CFIndex size = sizeof(struct __CFCharacterSet) - sizeof(CFRuntimeBase);
826
827 cset = (CFMutableCharacterSetRef)_CFRuntimeCreateInstance(allocator, CFCharacterSetGetTypeID(), size, NULL);
828 if (NULL == cset) return NULL;
829
830 cset->_base._cfinfo[CF_INFO_BITS] |= flags;
831 cset->_hashValue = 0;
832 cset->_annex = NULL;
833
834 return cset;
835 }
836
837 static void __CFApplySurrogatesInString(CFMutableCharacterSetRef cset, CFStringRef string, void (*applyer)(CFMutableCharacterSetRef, CFRange)) {
838 CFStringInlineBuffer buffer;
839 CFIndex index, length = CFStringGetLength(string);
840 CFRange range = CFRangeMake(0, 0);
841 UTF32Char character;
842
843 CFStringInitInlineBuffer(string, &buffer, CFRangeMake(0, length));
844
845 for (index = 0;index < length;index++) {
846 character = __CFStringGetCharacterFromInlineBufferQuick(&buffer, index);
847
848 if (CFStringIsSurrogateHighCharacter(character) && ((index + 1) < length)) {
849 UTF16Char other = __CFStringGetCharacterFromInlineBufferQuick(&buffer, index + 1);
850
851 if (CFStringIsSurrogateLowCharacter(other)) {
852 character = CFStringGetLongCharacterForSurrogatePair(character, other);
853
854 if ((range.length + range.location) == character) {
855 ++range.length;
856 } else {
857 if (range.length > 0) applyer(cset, range);
858 range.location = character;
859 range.length = 1;
860 }
861 }
862
863 ++index; // skip the low surrogate
864 }
865 }
866
867 if (range.length > 0) applyer(cset, range);
868 }
869
870
871 /* Bsearch theChar for __kCFCharSetClassString
872 */
873 CF_INLINE Boolean __CFCSetBsearchUniChar(const UniChar *theTable, CFIndex length, UniChar theChar) {
874 const UniChar *p, *q, *divider;
875
876 if ((theChar < theTable[0]) || (theChar > theTable[length - 1])) return false;
877
878 p = theTable;
879 q = p + (length - 1);
880 while (p <= q) {
881 divider = p + ((q - p) >> 1); /* divide by 2 */
882 if (theChar < *divider) q = divider - 1;
883 else if (theChar > *divider) p = divider + 1;
884 else return true;
885 }
886 return false;
887 }
888
889 /* Array of instantiated builtin set. Note builtin set ID starts with 1 so the array index is ID - 1
890 */
891 static CFCharacterSetRef *__CFBuiltinSets = NULL;
892
893 /* Global lock for character set
894 */
895 static CFSpinLock_t __CFCharacterSetLock = CFSpinLockInit;
896
897 /* CFBase API functions
898 */
899 static Boolean __CFCharacterSetEqual(CFTypeRef cf1, CFTypeRef cf2) {
900 Boolean isInvertStateIdentical = (__CFCSetIsInverted((CFCharacterSetRef)cf1) == __CFCSetIsInverted((CFCharacterSetRef)cf2) ? true: false);
901 Boolean isAnnexInvertStateIdentical = (__CFCSetAnnexIsInverted((CFCharacterSetRef)cf1) == __CFCSetAnnexIsInverted((CFCharacterSetRef)cf2) ? true: false);
902 CFIndex idx;
903 CFCharacterSetRef subSet1;
904 uint8_t bitsBuf[__kCFBitmapSize];
905 uint8_t *bits;
906 Boolean isBitmap1;
907 Boolean isBitmap2;
908
909 if (__CFCSetHasHashValue((CFCharacterSetRef)cf1) && __CFCSetHasHashValue((CFCharacterSetRef)cf2) && ((CFCharacterSetRef)cf1)->_hashValue != ((CFCharacterSetRef)cf2)->_hashValue) return false;
910 if (__CFCSetIsEmpty((CFCharacterSetRef)cf1) && __CFCSetIsEmpty((CFCharacterSetRef)cf2) && !isInvertStateIdentical) return false;
911
912 if (__CFCSetClassType((CFCharacterSetRef)cf1) == __CFCSetClassType((CFCharacterSetRef)cf2)) { // Types are identical, we can do it fast
913 switch (__CFCSetClassType((CFCharacterSetRef)cf1)) {
914 case __kCFCharSetClassBuiltin:
915 return (__CFCSetBuiltinType((CFCharacterSetRef)cf1) == __CFCSetBuiltinType((CFCharacterSetRef)cf2) && isInvertStateIdentical ? true : false);
916
917 case __kCFCharSetClassRange:
918 return (__CFCSetRangeFirstChar((CFCharacterSetRef)cf1) == __CFCSetRangeFirstChar((CFCharacterSetRef)cf2) && __CFCSetRangeLength((CFCharacterSetRef)cf1) && __CFCSetRangeLength((CFCharacterSetRef)cf2) && isInvertStateIdentical ? true : false);
919
920 case __kCFCharSetClassString:
921 if (__CFCSetStringLength((CFCharacterSetRef)cf1) == __CFCSetStringLength((CFCharacterSetRef)cf2) && isInvertStateIdentical) {
922 const UniChar *buf1 = __CFCSetStringBuffer((CFCharacterSetRef)cf1);
923 const UniChar *buf2 = __CFCSetStringBuffer((CFCharacterSetRef)cf2);
924 CFIndex length = __CFCSetStringLength((CFCharacterSetRef)cf1);
925
926 while (length--) if (*buf1++ != *buf2++) return false;
927 } else {
928 return false;
929 }
930 break;
931
932 case __kCFCharSetClassBitmap:
933 if (!__CFCSetIsEqualBitmap((const UInt32 *)__CFCSetBitmapBits((CFCharacterSetRef)cf1), (const UInt32 *)__CFCSetBitmapBits((CFCharacterSetRef)cf2))) return false;
934 break;
935 }
936 return __CFCSetIsEqualAnnex((CFCharacterSetRef)cf1, (CFCharacterSetRef)cf2);
937 }
938
939 // Check for easy empty cases
940 if (__CFCSetIsEmpty((CFCharacterSetRef)cf1) || __CFCSetIsEmpty((CFCharacterSetRef)cf2)) {
941 CFCharacterSetRef emptySet = (__CFCSetIsEmpty((CFCharacterSetRef)cf1) ? (CFCharacterSetRef)cf1 : (CFCharacterSetRef)cf2);
942 CFCharacterSetRef nonEmptySet = (emptySet == cf1 ? (CFCharacterSetRef)cf2 : (CFCharacterSetRef)cf1);
943
944 if (__CFCSetIsBuiltin(nonEmptySet)) {
945 return false;
946 } else if (__CFCSetIsRange(nonEmptySet)) {
947 if (isInvertStateIdentical) {
948 return (__CFCSetRangeLength(nonEmptySet) ? false : true);
949 } else {
950 return (__CFCSetRangeLength(nonEmptySet) == 0x110000 ? true : false);
951 }
952 } else {
953 if (__CFCSetAnnexIsInverted(nonEmptySet)) {
954 if (__CFCSetAnnexValidEntriesBitmap(nonEmptySet) != 0x1FFFE) return false;
955 } else {
956 if (__CFCSetAnnexValidEntriesBitmap(nonEmptySet)) return false;
957 }
958
959 if (__CFCSetIsBitmap(nonEmptySet)) {
960 bits = __CFCSetBitmapBits(nonEmptySet);
961 } else {
962 bits = bitsBuf;
963 __CFCSetGetBitmap(nonEmptySet, bitsBuf);
964 }
965
966 if (__CFCSetIsEqualBitmap(NULL, (const UInt32 *)bits)) {
967 if (!__CFCSetAnnexIsInverted(nonEmptySet)) return true;
968 } else {
969 return false;
970 }
971
972 // Annex set has to be CFRangeMake(0x10000, 0xfffff)
973 for (idx = 1;idx < MAX_ANNEX_PLANE;idx++) {
974 if (__CFCSetIsBitmap(nonEmptySet)) {
975 if (!__CFCSetIsEqualBitmap((__CFCSetAnnexIsInverted(nonEmptySet) ? NULL : (const UInt32 *)-1), (const UInt32 *)bitsBuf)) return false;
976 } else {
977 __CFCSetGetBitmap(__CFCSetGetAnnexPlaneCharacterSetNoAlloc(nonEmptySet, idx), bitsBuf);
978 if (!__CFCSetIsEqualBitmap((const UInt32 *)-1, (const UInt32 *)bitsBuf)) return false;
979 }
980 }
981 return true;
982 }
983 }
984
985 if (__CFCSetIsBuiltin((CFCharacterSetRef)cf1) || __CFCSetIsBuiltin((CFCharacterSetRef)cf2)) {
986 CFCharacterSetRef builtinSet = (__CFCSetIsBuiltin((CFCharacterSetRef)cf1) ? (CFCharacterSetRef)cf1 : (CFCharacterSetRef)cf2);
987 CFCharacterSetRef nonBuiltinSet = (builtinSet == cf1 ? (CFCharacterSetRef)cf2 : (CFCharacterSetRef)cf1);
988
989
990 if (__CFCSetIsRange(nonBuiltinSet)) {
991 UTF32Char firstChar = __CFCSetRangeFirstChar(nonBuiltinSet);
992 UTF32Char lastChar = (firstChar + __CFCSetRangeLength(nonBuiltinSet) - 1);
993 uint8_t firstPlane = (firstChar >> 16) & 0xFF;
994 uint8_t lastPlane = (lastChar >> 16) & 0xFF;
995 uint8_t result;
996
997 for (idx = 0;idx < MAX_ANNEX_PLANE;idx++) {
998 result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(builtinSet), idx, bitsBuf, (isInvertStateIdentical != 0));
999
1000 if (idx < firstPlane || idx > lastPlane) {
1001 if (result == kCFUniCharBitmapAll) {
1002 return false;
1003 } else if (result == kCFUniCharBitmapFilled) {
1004 if (!__CFCSetIsEqualBitmap(NULL, (const UInt32 *)bitsBuf)) return false;
1005 }
1006 } else if (idx > firstPlane && idx < lastPlane) {
1007 if (result == kCFUniCharBitmapEmpty) {
1008 return false;
1009 } else if (result == kCFUniCharBitmapFilled) {
1010 if (!__CFCSetIsEqualBitmap((const UInt32 *)-1, (const UInt32 *)bitsBuf)) return false;
1011 }
1012 } else {
1013 if (result == kCFUniCharBitmapEmpty) {
1014 return false;
1015 } else if (result == kCFUniCharBitmapAll) {
1016 if (idx == firstPlane) {
1017 if (((firstChar & 0xFFFF) != 0) || (firstPlane == lastPlane && ((lastChar & 0xFFFF) != 0xFFFF))) return false;
1018 } else {
1019 if (((lastChar & 0xFFFF) != 0xFFFF) || (firstPlane == lastPlane && ((firstChar & 0xFFFF) != 0))) return false;
1020 }
1021 } else {
1022 if (idx == firstPlane) {
1023 if (!__CFCSetIsBitmapEqualToRange((const UInt32 *)bitsBuf, firstChar & 0xFFFF, (firstPlane == lastPlane ? lastChar & 0xFFFF : 0xFFFF), false)) return false;
1024 } else {
1025 if (!__CFCSetIsBitmapEqualToRange((const UInt32 *)bitsBuf, (firstPlane == lastPlane ? firstChar & 0xFFFF : 0), lastChar & 0xFFFF, false)) return false;
1026 }
1027 }
1028 }
1029 }
1030 return true;
1031 } else {
1032 uint8_t bitsBuf2[__kCFBitmapSize];
1033 uint8_t result;
1034
1035 result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(builtinSet), 0, bitsBuf, (__CFCSetIsInverted(builtinSet) != 0));
1036 if (result == kCFUniCharBitmapFilled) {
1037 if (__CFCSetIsBitmap(nonBuiltinSet)) {
1038 if (!__CFCSetIsEqualBitmap((const UInt32 *)bitsBuf, (const UInt32 *)__CFCSetBitmapBits(nonBuiltinSet))) return false;
1039 } else {
1040
1041 __CFCSetGetBitmap(nonBuiltinSet, bitsBuf2);
1042 if (!__CFCSetIsEqualBitmap((const UInt32 *)bitsBuf, (const UInt32 *)bitsBuf2)) {
1043 return false;
1044 }
1045 }
1046 } else {
1047 if (__CFCSetIsBitmap(nonBuiltinSet)) {
1048 if (!__CFCSetIsEqualBitmap((result == kCFUniCharBitmapAll ? (const UInt32*)-1 : NULL), (const UInt32 *)__CFCSetBitmapBits(nonBuiltinSet))) return false;
1049 } else {
1050 __CFCSetGetBitmap(nonBuiltinSet, bitsBuf);
1051 if (!__CFCSetIsEqualBitmap((result == kCFUniCharBitmapAll ? (const UInt32*)-1: NULL), (const UInt32 *)bitsBuf)) return false;
1052 }
1053 }
1054
1055 isInvertStateIdentical = (__CFCSetIsInverted(builtinSet) == __CFCSetAnnexIsInverted(nonBuiltinSet) ? true : false);
1056
1057 for (idx = 1;idx < MAX_ANNEX_PLANE;idx++) {
1058 result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(builtinSet), idx, bitsBuf, !isInvertStateIdentical);
1059 subSet1 = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(nonBuiltinSet, idx);
1060
1061 if (result == kCFUniCharBitmapFilled) {
1062 if (NULL == subSet1) {
1063 return false;
1064 } else if (__CFCSetIsBitmap(subSet1)) {
1065 if (!__CFCSetIsEqualBitmap((const UInt32*)bitsBuf, (const UInt32*)__CFCSetBitmapBits(subSet1))) {
1066 return false;
1067 }
1068 } else {
1069
1070 __CFCSetGetBitmap(subSet1, bitsBuf2);
1071 if (!__CFCSetIsEqualBitmap((const UInt32*)bitsBuf, (const UInt32*)bitsBuf2)) {
1072 return false;
1073 }
1074 }
1075 } else {
1076 if (NULL == subSet1) {
1077 if (result == kCFUniCharBitmapAll) {
1078 return false;
1079 }
1080 } else if (__CFCSetIsBitmap(subSet1)) {
1081 if (!__CFCSetIsEqualBitmap((result == kCFUniCharBitmapAll ? (const UInt32*)-1: NULL), (const UInt32*)__CFCSetBitmapBits(subSet1))) {
1082 return false;
1083 }
1084 } else {
1085 __CFCSetGetBitmap(subSet1, bitsBuf);
1086 if (!__CFCSetIsEqualBitmap((result == kCFUniCharBitmapAll ? (const UInt32*)-1: NULL), (const UInt32*)bitsBuf)) {
1087 return false;
1088 }
1089 }
1090 }
1091 }
1092 return true;
1093 }
1094 }
1095
1096 if (__CFCSetIsRange((CFCharacterSetRef)cf1) || __CFCSetIsRange((CFCharacterSetRef)cf2)) {
1097 CFCharacterSetRef rangeSet = (__CFCSetIsRange((CFCharacterSetRef)cf1) ? (CFCharacterSetRef)cf1 : (CFCharacterSetRef)cf2);
1098 CFCharacterSetRef nonRangeSet = (rangeSet == cf1 ? (CFCharacterSetRef)cf2 : (CFCharacterSetRef)cf1);
1099 UTF32Char firstChar = __CFCSetRangeFirstChar(rangeSet);
1100 UTF32Char lastChar = (firstChar + __CFCSetRangeLength(rangeSet) - 1);
1101 uint8_t firstPlane = (firstChar >> 16) & 0xFF;
1102 uint8_t lastPlane = (lastChar >> 16) & 0xFF;
1103 Boolean isRangeSetInverted = __CFCSetIsInverted(rangeSet);
1104
1105 if (__CFCSetIsBitmap(nonRangeSet)) {
1106 bits = __CFCSetBitmapBits(nonRangeSet);
1107 } else {
1108 bits = bitsBuf;
1109 __CFCSetGetBitmap(nonRangeSet, bitsBuf);
1110 }
1111 if (firstPlane == 0) {
1112 if (!__CFCSetIsBitmapEqualToRange((const UInt32*)bits, firstChar, (lastPlane == 0 ? lastChar : 0xFFFF), isRangeSetInverted)) return false;
1113 firstPlane = 1;
1114 firstChar = 0;
1115 } else {
1116 if (!__CFCSetIsEqualBitmap((const UInt32*)bits, (isRangeSetInverted ? (const UInt32 *)-1 : NULL))) return false;
1117 firstChar &= 0xFFFF;
1118 }
1119
1120 lastChar &= 0xFFFF;
1121
1122 isAnnexInvertStateIdentical = (isRangeSetInverted == __CFCSetAnnexIsInverted(nonRangeSet) ? true : false);
1123
1124 for (idx = 1;idx < MAX_ANNEX_PLANE;idx++) {
1125 subSet1 = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(nonRangeSet, idx);
1126 if (NULL == subSet1) {
1127 if (idx < firstPlane || idx > lastPlane) {
1128 if (!isAnnexInvertStateIdentical) return false;
1129 } else if (idx > firstPlane && idx < lastPlane) {
1130 if (isAnnexInvertStateIdentical) return false;
1131 } else if (idx == firstPlane) {
1132 if (isAnnexInvertStateIdentical || firstChar || (idx == lastPlane && lastChar != 0xFFFF)) return false;
1133 } else if (idx == lastPlane) {
1134 if (isAnnexInvertStateIdentical || (idx == firstPlane && firstChar) || (lastChar != 0xFFFF)) return false;
1135 }
1136 } else {
1137 if (__CFCSetIsBitmap(subSet1)) {
1138 bits = __CFCSetBitmapBits(subSet1);
1139 } else {
1140 __CFCSetGetBitmap(subSet1, bitsBuf);
1141 bits = bitsBuf;
1142 }
1143
1144 if (idx < firstPlane || idx > lastPlane) {
1145 if (!__CFCSetIsEqualBitmap((const UInt32*)bits, (isAnnexInvertStateIdentical ? NULL : (const UInt32 *)-1))) return false;
1146 } else if (idx > firstPlane && idx < lastPlane) {
1147 if (!__CFCSetIsEqualBitmap((const UInt32*)bits, (isAnnexInvertStateIdentical ? (const UInt32 *)-1 : NULL))) return false;
1148 } else if (idx == firstPlane) {
1149 if (!__CFCSetIsBitmapEqualToRange((const UInt32*)bits, firstChar, (idx == lastPlane ? lastChar : 0xFFFF), !isAnnexInvertStateIdentical)) return false;
1150 } else if (idx == lastPlane) {
1151 if (!__CFCSetIsBitmapEqualToRange((const UInt32*)bits, (idx == firstPlane ? firstChar : 0), lastChar, !isAnnexInvertStateIdentical)) return false;
1152 }
1153 }
1154 }
1155 return true;
1156 }
1157
1158 isBitmap1 = __CFCSetIsBitmap((CFCharacterSetRef)cf1);
1159 isBitmap2 = __CFCSetIsBitmap((CFCharacterSetRef)cf2);
1160
1161 if (isBitmap1 && isBitmap2) {
1162 if (!__CFCSetIsEqualBitmap((const UInt32 *)__CFCSetBitmapBits((CFCharacterSetRef)cf1), (const UInt32 *)__CFCSetBitmapBits((CFCharacterSetRef)cf2))) return false;
1163 } else if (!isBitmap1 && !isBitmap2) {
1164 uint8_t bitsBuf2[__kCFBitmapSize];
1165
1166 __CFCSetGetBitmap((CFCharacterSetRef)cf1, bitsBuf);
1167 __CFCSetGetBitmap((CFCharacterSetRef)cf2, bitsBuf2);
1168
1169 if (!__CFCSetIsEqualBitmap((const UInt32*)bitsBuf, (const UInt32*)bitsBuf2)) {
1170 return false;
1171 }
1172 } else {
1173 if (isBitmap2) {
1174 CFCharacterSetRef tmp = (CFCharacterSetRef)cf2;
1175 cf2 = cf1;
1176 cf1 = tmp;
1177 }
1178
1179 __CFCSetGetBitmap((CFCharacterSetRef)cf2, bitsBuf);
1180
1181 if (!__CFCSetIsEqualBitmap((const UInt32 *)__CFCSetBitmapBits((CFCharacterSetRef)cf1), (const UInt32 *)bitsBuf)) return false;
1182 }
1183 return __CFCSetIsEqualAnnex((CFCharacterSetRef)cf1, (CFCharacterSetRef)cf2);
1184 }
1185
1186 static CFHashCode __CFCharacterSetHash(CFTypeRef cf) {
1187 if (!__CFCSetHasHashValue((CFCharacterSetRef)cf)) {
1188 if (__CFCSetIsEmpty((CFCharacterSetRef)cf)) {
1189 ((CFMutableCharacterSetRef)cf)->_hashValue = (__CFCSetIsInverted((CFCharacterSetRef)cf) ? ((UInt32)0xFFFFFFFF) : 0);
1190 } else if (__CFCSetIsBitmap( (CFCharacterSetRef) cf )) {
1191 ((CFMutableCharacterSetRef)cf)->_hashValue = CFHashBytes(__CFCSetBitmapBits((CFCharacterSetRef)cf), __kCFBitmapSize);
1192 } else {
1193 uint8_t bitsBuf[__kCFBitmapSize];
1194 __CFCSetGetBitmap((CFCharacterSetRef)cf, bitsBuf);
1195 ((CFMutableCharacterSetRef)cf)->_hashValue = CFHashBytes(bitsBuf, __kCFBitmapSize);
1196 }
1197 __CFCSetPutHasHashValue((CFMutableCharacterSetRef)cf, true);
1198 }
1199 return ((CFCharacterSetRef)cf)->_hashValue;
1200 }
1201
1202 static CFStringRef __CFCharacterSetCopyDescription(CFTypeRef cf) {
1203 CFMutableStringRef string;
1204 CFIndex idx;
1205 CFIndex length;
1206
1207 if (__CFCSetIsEmpty((CFCharacterSetRef)cf)) {
1208 return (CFStringRef)(__CFCSetIsInverted((CFCharacterSetRef)cf) ? CFRetain(CFSTR("<CFCharacterSet All>")) : CFRetain(CFSTR("<CFCharacterSet Empty>")));
1209 }
1210
1211 switch (__CFCSetClassType((CFCharacterSetRef)cf)) {
1212 case __kCFCharSetClassBuiltin:
1213 switch (__CFCSetBuiltinType((CFCharacterSetRef)cf)) {
1214 case kCFCharacterSetControl: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined Control Set>"));
1215 case kCFCharacterSetWhitespace : return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined Whitespace Set>"));
1216 case kCFCharacterSetWhitespaceAndNewline: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined WhitespaceAndNewline Set>"));
1217 case kCFCharacterSetDecimalDigit: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined DecimalDigit Set>"));
1218 case kCFCharacterSetLetter: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined Letter Set>"));
1219 case kCFCharacterSetLowercaseLetter: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined LowercaseLetter Set>"));
1220 case kCFCharacterSetUppercaseLetter: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined UppercaseLetter Set>"));
1221 case kCFCharacterSetNonBase: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined NonBase Set>"));
1222 case kCFCharacterSetDecomposable: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined Decomposable Set>"));
1223 case kCFCharacterSetAlphaNumeric: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined AlphaNumeric Set>"));
1224 case kCFCharacterSetPunctuation: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined Punctuation Set>"));
1225 case kCFCharacterSetIllegal: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined Illegal Set>"));
1226 case kCFCharacterSetCapitalizedLetter: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined CapitalizedLetter Set>"));
1227 case kCFCharacterSetSymbol: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined Symbol Set>"));
1228 case kCFCharacterSetNewline: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined Newline Set>"));
1229 }
1230 break;
1231
1232 case __kCFCharSetClassRange:
1233 return CFStringCreateWithFormat(CFGetAllocator((CFCharacterSetRef)cf), NULL, CFSTR("<CFCharacterSet Range(%d, %d)>"), __CFCSetRangeFirstChar((CFCharacterSetRef)cf), __CFCSetRangeLength((CFCharacterSetRef)cf));
1234
1235 case __kCFCharSetClassString: {
1236 CFStringRef format = CFSTR("<CFCharacterSet Items(");
1237
1238 length = __CFCSetStringLength((CFCharacterSetRef)cf);
1239 string = CFStringCreateMutable(CFGetAllocator(cf), CFStringGetLength(format) + 7 * length + 2); // length of format + "U+XXXX "(7) * length + ")>"(2)
1240 CFStringAppend(string, format);
1241 for (idx = 0;idx < length;idx++) {
1242 CFStringAppendFormat(string, NULL, CFSTR("%sU+%04X"), (idx > 0 ? " " : ""), (UInt32)((__CFCSetStringBuffer((CFCharacterSetRef)cf))[idx]));
1243 }
1244 CFStringAppend(string, CFSTR(")>"));
1245 return string;
1246 }
1247
1248 case __kCFCharSetClassBitmap:
1249 case __kCFCharSetClassCompactBitmap:
1250 return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Bitmap>")); // ??? Should generate description for 8k bitmap ?
1251 }
1252 CFAssert1(0, __kCFLogAssertion, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__); // We should never come here
1253 return NULL;
1254 }
1255
1256 static void __CFCharacterSetDeallocate(CFTypeRef cf) {
1257 CFAllocatorRef allocator = CFGetAllocator(cf);
1258
1259 if (__CFCSetIsBuiltin((CFCharacterSetRef)cf) && !__CFCSetIsMutable((CFCharacterSetRef)cf) && !__CFCSetIsInverted((CFCharacterSetRef)cf)) {
1260 CFCharacterSetRef sharedSet = CFCharacterSetGetPredefined(__CFCSetBuiltinType((CFCharacterSetRef)cf));
1261 if (sharedSet == cf) { // We're trying to dealloc the builtin set
1262 CFAssert1(0, __kCFLogAssertion, "%s: Trying to deallocate predefined set. The process is likely to crash.", __PRETTY_FUNCTION__);
1263 return; // We never deallocate builtin set
1264 }
1265 }
1266
1267 if (__CFCSetIsString((CFCharacterSetRef)cf) && __CFCSetStringBuffer((CFCharacterSetRef)cf)) CFAllocatorDeallocate(allocator, __CFCSetStringBuffer((CFCharacterSetRef)cf));
1268 else if (__CFCSetIsBitmap((CFCharacterSetRef)cf) && __CFCSetBitmapBits((CFCharacterSetRef)cf)) CFAllocatorDeallocate(allocator, __CFCSetBitmapBits((CFCharacterSetRef)cf));
1269 else if (__CFCSetIsCompactBitmap((CFCharacterSetRef)cf) && __CFCSetCompactBitmapBits((CFCharacterSetRef)cf)) CFAllocatorDeallocate(allocator, __CFCSetCompactBitmapBits((CFCharacterSetRef)cf));
1270 __CFCSetDeallocateAnnexPlane((CFCharacterSetRef)cf);
1271 }
1272
1273 static CFTypeID __kCFCharacterSetTypeID = _kCFRuntimeNotATypeID;
1274
1275 static const CFRuntimeClass __CFCharacterSetClass = {
1276 0,
1277 "CFCharacterSet",
1278 NULL, // init
1279 NULL, // copy
1280 __CFCharacterSetDeallocate,
1281 __CFCharacterSetEqual,
1282 __CFCharacterSetHash,
1283 NULL, //
1284 __CFCharacterSetCopyDescription
1285 };
1286
1287 static bool __CFCheckForExapendedSet = false;
1288
1289 __private_extern__ void __CFCharacterSetInitialize(void) {
1290 const char *checkForExpandedSet = __CFgetenv("__CF_DEBUG_EXPANDED_SET");
1291
1292 __kCFCharacterSetTypeID = _CFRuntimeRegisterClass(&__CFCharacterSetClass);
1293
1294 if (checkForExpandedSet && (*checkForExpandedSet == 'Y')) __CFCheckForExapendedSet = true;
1295 }
1296
1297 /* Public functions
1298 */
1299
1300 CFTypeID CFCharacterSetGetTypeID(void) {
1301 return __kCFCharacterSetTypeID;
1302 }
1303
1304 /*** CharacterSet creation ***/
1305 /* Functions to create basic immutable characterset.
1306 */
1307 CFCharacterSetRef CFCharacterSetGetPredefined(CFCharacterSetPredefinedSet theSetIdentifier) {
1308 CFCharacterSetRef cset;
1309
1310 __CFCSetValidateBuiltinType(theSetIdentifier, __PRETTY_FUNCTION__);
1311
1312 __CFSpinLock(&__CFCharacterSetLock);
1313 cset = ((NULL != __CFBuiltinSets) ? __CFBuiltinSets[theSetIdentifier - 1] : NULL);
1314 __CFSpinUnlock(&__CFCharacterSetLock);
1315
1316 if (NULL != cset) return cset;
1317
1318 if (!(cset = __CFCSetGenericCreate(kCFAllocatorSystemDefault, __kCFCharSetClassBuiltin))) return NULL;
1319 __CFCSetPutBuiltinType((CFMutableCharacterSetRef)cset, theSetIdentifier);
1320
1321 __CFSpinLock(&__CFCharacterSetLock);
1322 if (!__CFBuiltinSets) {
1323 __CFBuiltinSets = (CFCharacterSetRef *)CFAllocatorAllocate((CFAllocatorRef)CFRetain(__CFGetDefaultAllocator()), sizeof(CFCharacterSetRef) * __kCFLastBuiltinSetID, 0);
1324 memset(__CFBuiltinSets, 0, sizeof(CFCharacterSetRef) * __kCFLastBuiltinSetID);
1325 }
1326
1327 __CFBuiltinSets[theSetIdentifier - 1] = cset;
1328 __CFSpinUnlock(&__CFCharacterSetLock);
1329
1330 return cset;
1331 }
1332
1333 CFCharacterSetRef CFCharacterSetCreateWithCharactersInRange(CFAllocatorRef allocator, CFRange theRange) {
1334 CFMutableCharacterSetRef cset;
1335
1336 __CFCSetValidateRange(theRange, __PRETTY_FUNCTION__);
1337
1338 if (theRange.length) {
1339 if (!(cset = __CFCSetGenericCreate(allocator, __kCFCharSetClassRange))) return NULL;
1340 __CFCSetPutRangeFirstChar(cset, theRange.location);
1341 __CFCSetPutRangeLength(cset, theRange.length);
1342 } else {
1343 if (!(cset = __CFCSetGenericCreate(allocator, __kCFCharSetClassBitmap))) return NULL;
1344 __CFCSetPutBitmapBits(cset, NULL);
1345 __CFCSetPutHasHashValue(cset, true); // _hashValue is 0
1346 }
1347
1348 return cset;
1349 }
1350
1351 static int chcompar(const void *a, const void *b) {
1352 return -(int)(*(UniChar *)b - *(UniChar *)a);
1353 }
1354
1355 CFCharacterSetRef CFCharacterSetCreateWithCharactersInString(CFAllocatorRef allocator, CFStringRef theString) {
1356 CFIndex length;
1357
1358 length = CFStringGetLength(theString);
1359 if (length < __kCFStringCharSetMax) {
1360 CFMutableCharacterSetRef cset;
1361
1362 if (!(cset = __CFCSetGenericCreate(allocator, __kCFCharSetClassString))) return NULL;
1363 __CFCSetPutStringBuffer(cset, (UniChar *)CFAllocatorAllocate(CFGetAllocator(cset), __kCFStringCharSetMax * sizeof(UniChar), 0));
1364 __CFCSetPutStringLength(cset, length);
1365 CFStringGetCharacters(theString, CFRangeMake(0, length), __CFCSetStringBuffer(cset));
1366 qsort(__CFCSetStringBuffer(cset), length, sizeof(UniChar), chcompar);
1367
1368 if (0 == length) {
1369 __CFCSetPutHasHashValue(cset, true); // _hashValue is 0
1370 } else if (length > 1) { // Check for surrogate
1371 const UTF16Char *characters = __CFCSetStringBuffer(cset);
1372 const UTF16Char *charactersLimit = characters + length;
1373
1374 if ((*characters < 0xDC00UL) && (*(charactersLimit - 1) > 0xDBFFUL)) { // might have surrogate chars
1375 while (characters < charactersLimit) {
1376 if (CFStringIsSurrogateHighCharacter(*characters) || CFStringIsSurrogateLowCharacter(*characters)) {
1377 CFRelease(cset);
1378 cset = NULL;
1379 break;
1380 }
1381 ++characters;
1382 }
1383 }
1384 }
1385 if (NULL != cset) return cset;
1386 }
1387
1388 CFMutableCharacterSetRef mcset = CFCharacterSetCreateMutable(allocator);
1389 CFCharacterSetAddCharactersInString(mcset, theString);
1390 __CFCSetMakeCompact(mcset);
1391 __CFCSetPutIsMutable(mcset, false);
1392 return mcset;
1393 }
1394
1395 CFCharacterSetRef CFCharacterSetCreateWithBitmapRepresentation(CFAllocatorRef allocator, CFDataRef theData) {
1396 CFMutableCharacterSetRef cset;
1397 CFIndex length;
1398
1399 if (!(cset = __CFCSetGenericCreate(allocator, __kCFCharSetClassBitmap))) return NULL;
1400
1401 if (theData && (length = CFDataGetLength(theData)) > 0) {
1402 uint8_t *bitmap;
1403 uint8_t *cBitmap;
1404
1405 if (length < __kCFBitmapSize) {
1406 bitmap = (uint8_t *)CFAllocatorAllocate(allocator, __kCFBitmapSize, 0);
1407 memmove(bitmap, CFDataGetBytePtr(theData), length);
1408 memset(bitmap + length, 0, __kCFBitmapSize - length);
1409
1410 cBitmap = __CFCreateCompactBitmap(allocator, bitmap);
1411
1412 if (cBitmap == NULL) {
1413 __CFCSetPutBitmapBits(cset, bitmap);
1414 } else {
1415 CFAllocatorDeallocate(allocator, bitmap);
1416 __CFCSetPutCompactBitmapBits(cset, cBitmap);
1417 __CFCSetPutClassType(cset, __kCFCharSetClassCompactBitmap);
1418 }
1419 } else {
1420 cBitmap = __CFCreateCompactBitmap(allocator, CFDataGetBytePtr(theData));
1421
1422 if (cBitmap == NULL) {
1423 bitmap = (uint8_t *)CFAllocatorAllocate(allocator, __kCFBitmapSize, 0);
1424 memmove(bitmap, CFDataGetBytePtr(theData), __kCFBitmapSize);
1425
1426 __CFCSetPutBitmapBits(cset, bitmap);
1427 } else {
1428 __CFCSetPutCompactBitmapBits(cset, cBitmap);
1429 __CFCSetPutClassType(cset, __kCFCharSetClassCompactBitmap);
1430 }
1431
1432 if (length > __kCFBitmapSize) {
1433 CFMutableCharacterSetRef annexSet;
1434 const uint8_t *bytes = CFDataGetBytePtr(theData) + __kCFBitmapSize;
1435
1436 length -= __kCFBitmapSize;
1437
1438 while (length > 1) {
1439 annexSet = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(cset, *(bytes++));
1440 --length; // Decrement the plane no byte
1441
1442 if (length < __kCFBitmapSize) {
1443 bitmap = (uint8_t *)CFAllocatorAllocate(allocator, __kCFBitmapSize, 0);
1444 memmove(bitmap, bytes, length);
1445 memset(bitmap + length, 0, __kCFBitmapSize - length);
1446
1447 cBitmap = __CFCreateCompactBitmap(allocator, bitmap);
1448
1449 if (cBitmap == NULL) {
1450 __CFCSetPutBitmapBits(annexSet, bitmap);
1451 } else {
1452 CFAllocatorDeallocate(allocator, bitmap);
1453 __CFCSetPutCompactBitmapBits(annexSet, cBitmap);
1454 __CFCSetPutClassType(annexSet, __kCFCharSetClassCompactBitmap);
1455 }
1456 } else {
1457 cBitmap = __CFCreateCompactBitmap(allocator, bytes);
1458
1459 if (cBitmap == NULL) {
1460 bitmap = (uint8_t *)CFAllocatorAllocate(allocator, __kCFBitmapSize, 0);
1461 memmove(bitmap, bytes, __kCFBitmapSize);
1462
1463 __CFCSetPutBitmapBits(annexSet, bitmap);
1464 } else {
1465 __CFCSetPutCompactBitmapBits(annexSet, cBitmap);
1466 __CFCSetPutClassType(annexSet, __kCFCharSetClassCompactBitmap);
1467 }
1468 }
1469 length -= __kCFBitmapSize;
1470 bytes += __kCFBitmapSize;
1471 }
1472 }
1473 }
1474 } else {
1475 __CFCSetPutBitmapBits(cset, NULL);
1476 __CFCSetPutHasHashValue(cset, true); // Hash value is 0
1477 }
1478
1479 return cset;
1480 }
1481
1482 CFCharacterSetRef CFCharacterSetCreateInvertedSet(CFAllocatorRef alloc, CFCharacterSetRef theSet) {
1483 CFMutableCharacterSetRef cset;
1484
1485 CF_OBJC_FUNCDISPATCH0(__kCFCharacterSetTypeID, CFCharacterSetRef , theSet, "invertedSet");
1486
1487 cset = CFCharacterSetCreateMutableCopy(alloc, theSet);
1488 CFCharacterSetInvert(cset);
1489 __CFCSetPutIsMutable(cset, false);
1490
1491 return cset;
1492 }
1493
1494 /* Functions to create mutable characterset.
1495 */
1496 CFMutableCharacterSetRef CFCharacterSetCreateMutable(CFAllocatorRef allocator) {
1497 CFMutableCharacterSetRef cset;
1498
1499 if (!(cset = __CFCSetGenericCreate(allocator, __kCFCharSetClassBitmap| __kCFCharSetIsMutable))) return NULL;
1500 __CFCSetPutBitmapBits(cset, NULL);
1501 __CFCSetPutHasHashValue(cset, true); // Hash value is 0
1502
1503 return cset;
1504 }
1505
1506 static CFMutableCharacterSetRef __CFCharacterSetCreateCopy(CFAllocatorRef alloc, CFCharacterSetRef theSet, bool isMutable) {
1507 CFMutableCharacterSetRef cset;
1508
1509 CF_OBJC_FUNCDISPATCH0(__kCFCharacterSetTypeID, CFMutableCharacterSetRef , theSet, "mutableCopy");
1510
1511 __CFGenericValidateType(theSet, __kCFCharacterSetTypeID);
1512
1513 if (!isMutable && !__CFCSetIsMutable(theSet)) {
1514 return (CFMutableCharacterSetRef)CFRetain(theSet);
1515 }
1516
1517 cset = CFCharacterSetCreateMutable(alloc);
1518
1519 __CFCSetPutClassType(cset, __CFCSetClassType(theSet));
1520 __CFCSetPutHasHashValue(cset, __CFCSetHasHashValue(theSet));
1521 __CFCSetPutIsInverted(cset, __CFCSetIsInverted(theSet));
1522 cset->_hashValue = theSet->_hashValue;
1523
1524 switch (__CFCSetClassType(theSet)) {
1525 case __kCFCharSetClassBuiltin:
1526 __CFCSetPutBuiltinType(cset, __CFCSetBuiltinType(theSet));
1527 break;
1528
1529 case __kCFCharSetClassRange:
1530 __CFCSetPutRangeFirstChar(cset, __CFCSetRangeFirstChar(theSet));
1531 __CFCSetPutRangeLength(cset, __CFCSetRangeLength(theSet));
1532 break;
1533
1534 case __kCFCharSetClassString:
1535 __CFCSetPutStringBuffer(cset, (UniChar *)CFAllocatorAllocate(alloc, __kCFStringCharSetMax * sizeof(UniChar), 0));
1536
1537 __CFCSetPutStringLength(cset, __CFCSetStringLength(theSet));
1538 memmove(__CFCSetStringBuffer(cset), __CFCSetStringBuffer(theSet), __CFCSetStringLength(theSet) * sizeof(UniChar));
1539 break;
1540
1541 case __kCFCharSetClassBitmap:
1542 if (__CFCSetBitmapBits(theSet)) {
1543 uint8_t * bitmap = (isMutable ? NULL : __CFCreateCompactBitmap(alloc, __CFCSetBitmapBits(theSet)));
1544
1545 if (bitmap == NULL) {
1546 bitmap = (uint8_t *)CFAllocatorAllocate(alloc, sizeof(uint8_t) * __kCFBitmapSize, 0);
1547 memmove(bitmap, __CFCSetBitmapBits(theSet), __kCFBitmapSize);
1548 __CFCSetPutBitmapBits(cset, bitmap);
1549 } else {
1550 __CFCSetPutCompactBitmapBits(cset, bitmap);
1551 __CFCSetPutClassType(cset, __kCFCharSetClassCompactBitmap);
1552 }
1553 } else {
1554 __CFCSetPutBitmapBits(cset, NULL);
1555 }
1556 break;
1557
1558 case __kCFCharSetClassCompactBitmap: {
1559 const uint8_t *compactBitmap = __CFCSetCompactBitmapBits(theSet);
1560
1561 if (compactBitmap) {
1562 uint32_t size = __CFCSetGetCompactBitmapSize(compactBitmap);
1563 uint8_t *newBitmap = (uint8_t *)CFAllocatorAllocate(alloc, size, 0);
1564
1565 memmove(newBitmap, compactBitmap, size);
1566 __CFCSetPutCompactBitmapBits(cset, newBitmap);
1567 }
1568 }
1569 break;
1570
1571 default:
1572 CFAssert1(0, __kCFLogAssertion, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__); // We should never come here
1573 }
1574 if (__CFCSetHasNonBMPPlane(theSet)) {
1575 CFMutableCharacterSetRef annexPlane;
1576 int idx;
1577
1578 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
1579 if ((annexPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, idx))) {
1580 annexPlane = __CFCharacterSetCreateCopy(alloc, annexPlane, isMutable);
1581 __CFCSetPutCharacterSetToAnnexPlane(cset, annexPlane, idx);
1582 CFRelease(annexPlane);
1583 }
1584 }
1585 __CFCSetAnnexSetIsInverted(cset, __CFCSetAnnexIsInverted(theSet));
1586 } else if (__CFCSetAnnexIsInverted(theSet)) {
1587 __CFCSetAnnexSetIsInverted(cset, true);
1588 }
1589
1590 return cset;
1591 }
1592
1593 CFCharacterSetRef CFCharacterSetCreateCopy(CFAllocatorRef alloc, CFCharacterSetRef theSet) {
1594 return __CFCharacterSetCreateCopy(alloc, theSet, false);
1595 }
1596
1597 CFMutableCharacterSetRef CFCharacterSetCreateMutableCopy(CFAllocatorRef alloc, CFCharacterSetRef theSet) {
1598 return __CFCharacterSetCreateCopy(alloc, theSet, true);
1599 }
1600
1601 /*** Basic accessors ***/
1602 Boolean CFCharacterSetIsCharacterMember(CFCharacterSetRef theSet, UniChar theChar) {
1603 CFIndex length;
1604 Boolean isInverted;
1605 Boolean result = false;
1606
1607 CF_OBJC_FUNCDISPATCH1(__kCFCharacterSetTypeID, Boolean, theSet, "longCharacterIsMember:", theChar);
1608
1609 __CFGenericValidateType(theSet, __kCFCharacterSetTypeID);
1610
1611 isInverted = __CFCSetIsInverted(theSet);
1612
1613 switch (__CFCSetClassType(theSet)) {
1614 case __kCFCharSetClassBuiltin:
1615 result = (CFUniCharIsMemberOf(theChar, __CFCSetBuiltinType(theSet)) ? !isInverted : isInverted);
1616 break;
1617
1618 case __kCFCharSetClassRange:
1619 length = __CFCSetRangeLength(theSet);
1620 result = (length && __CFCSetRangeFirstChar(theSet) <= theChar && theChar < __CFCSetRangeFirstChar(theSet) + length ? !isInverted : isInverted);
1621 break;
1622
1623 case __kCFCharSetClassString:
1624 result = ((length = __CFCSetStringLength(theSet)) ? (__CFCSetBsearchUniChar(__CFCSetStringBuffer(theSet), length, theChar) ? !isInverted : isInverted) : isInverted);
1625 break;
1626
1627 case __kCFCharSetClassBitmap:
1628 result = (__CFCSetCompactBitmapBits(theSet) ? (__CFCSetIsMemberBitmap(__CFCSetBitmapBits(theSet), theChar) ? true : false) : isInverted);
1629 break;
1630
1631 case __kCFCharSetClassCompactBitmap:
1632 result = (__CFCSetCompactBitmapBits(theSet) ? (__CFCSetIsMemberInCompactBitmap(__CFCSetCompactBitmapBits(theSet), theChar) ? true : false) : isInverted);
1633 break;
1634
1635 default:
1636 CFAssert1(0, __kCFLogAssertion, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__); // We should never come here
1637 break;
1638 }
1639
1640 return result;
1641 }
1642
1643 Boolean CFCharacterSetIsLongCharacterMember(CFCharacterSetRef theSet, UTF32Char theChar) {
1644 CFIndex length;
1645 UInt32 plane = (theChar >> 16);
1646 Boolean isAnnexInverted = false;
1647 Boolean isInverted;
1648 Boolean result = false;
1649
1650 CF_OBJC_FUNCDISPATCH1(__kCFCharacterSetTypeID, Boolean, theSet, "longCharacterIsMember:", theChar);
1651
1652 __CFGenericValidateType(theSet, __kCFCharacterSetTypeID);
1653
1654 if (plane) {
1655 CFCharacterSetRef annexPlane;
1656
1657 if (__CFCSetIsBuiltin(theSet)) {
1658 isInverted = __CFCSetIsInverted(theSet);
1659 return (CFUniCharIsMemberOf(theChar, __CFCSetBuiltinType(theSet)) ? !isInverted : isInverted);
1660 }
1661
1662 isAnnexInverted = __CFCSetAnnexIsInverted(theSet);
1663
1664 if ((annexPlane = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, plane)) == NULL) {
1665 if (!__CFCSetHasNonBMPPlane(theSet) && __CFCSetIsRange(theSet)) {
1666 isInverted = __CFCSetIsInverted(theSet);
1667 length = __CFCSetRangeLength(theSet);
1668 return (length && __CFCSetRangeFirstChar(theSet) <= theChar && theChar < __CFCSetRangeFirstChar(theSet) + length ? !isInverted : isInverted);
1669 } else {
1670 return (isAnnexInverted ? true : false);
1671 }
1672 } else {
1673 theSet = annexPlane;
1674 theChar &= 0xFFFF;
1675 }
1676 }
1677
1678 isInverted = __CFCSetIsInverted(theSet);
1679
1680 switch (__CFCSetClassType(theSet)) {
1681 case __kCFCharSetClassBuiltin:
1682 result = (CFUniCharIsMemberOf(theChar, __CFCSetBuiltinType(theSet)) ? !isInverted : isInverted);
1683 break;
1684
1685 case __kCFCharSetClassRange:
1686 length = __CFCSetRangeLength(theSet);
1687 result = (length && __CFCSetRangeFirstChar(theSet) <= theChar && theChar < __CFCSetRangeFirstChar(theSet) + length ? !isInverted : isInverted);
1688 break;
1689
1690 case __kCFCharSetClassString:
1691 result = ((length = __CFCSetStringLength(theSet)) ? (__CFCSetBsearchUniChar(__CFCSetStringBuffer(theSet), length, theChar) ? !isInverted : isInverted) : isInverted);
1692 break;
1693
1694 case __kCFCharSetClassBitmap:
1695 result = (__CFCSetCompactBitmapBits(theSet) ? (__CFCSetIsMemberBitmap(__CFCSetBitmapBits(theSet), theChar) ? true : false) : isInverted);
1696 break;
1697
1698 case __kCFCharSetClassCompactBitmap:
1699 result = (__CFCSetCompactBitmapBits(theSet) ? (__CFCSetIsMemberInCompactBitmap(__CFCSetCompactBitmapBits(theSet), theChar) ? true : false) : isInverted);
1700 break;
1701
1702 default:
1703 CFAssert1(0, __kCFLogAssertion, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__); // We should never come here
1704 return false; // To make compiler happy
1705 }
1706
1707 return (result ? !isAnnexInverted : isAnnexInverted);
1708 }
1709
1710 Boolean CFCharacterSetIsSurrogatePairMember(CFCharacterSetRef theSet, UniChar surrogateHigh, UniChar surrogateLow) {
1711 return CFCharacterSetIsLongCharacterMember(theSet, CFCharacterSetGetLongCharacterForSurrogatePair(surrogateHigh, surrogateLow));
1712 }
1713
1714
1715 static inline CFCharacterSetRef __CFCharacterSetGetExpandedSetForNSCharacterSet(const void *characterSet) {
1716 CF_OBJC_FUNCDISPATCH0(__kCFCharacterSetTypeID, CFCharacterSetRef , characterSet, "_expandedCFCharacterSet");
1717 return NULL;
1718 }
1719
1720 Boolean CFCharacterSetIsSupersetOfSet(CFCharacterSetRef theSet, CFCharacterSetRef theOtherSet) {
1721 CFMutableCharacterSetRef copy;
1722 CFCharacterSetRef expandedSet = NULL;
1723 CFCharacterSetRef expandedOtherSet = NULL;
1724 Boolean result;
1725
1726 if ((!CF_IS_OBJC(__kCFCharacterSetTypeID, theSet) || (expandedSet = __CFCharacterSetGetExpandedSetForNSCharacterSet(theSet))) && (!CF_IS_OBJC(__kCFCharacterSetTypeID, theOtherSet) || (expandedOtherSet = __CFCharacterSetGetExpandedSetForNSCharacterSet(theOtherSet)))) { // Really CF, we can do some trick here
1727 if (expandedSet) theSet = expandedSet;
1728 if (expandedOtherSet) theOtherSet = expandedOtherSet;
1729
1730 __CFGenericValidateType(theSet, __kCFCharacterSetTypeID);
1731 __CFGenericValidateType(theOtherSet, __kCFCharacterSetTypeID);
1732
1733 if (__CFCSetIsEmpty(theSet)) {
1734 if (__CFCSetIsInverted(theSet)) {
1735 return TRUE; // Inverted empty set covers all range
1736 } else if (!__CFCSetIsEmpty(theOtherSet) || __CFCSetIsInverted(theOtherSet)) {
1737 return FALSE;
1738 }
1739 } else if (__CFCSetIsEmpty(theOtherSet) && !__CFCSetIsInverted(theOtherSet)) {
1740 return TRUE;
1741 } else {
1742 if (__CFCSetIsBuiltin(theSet) || __CFCSetIsBuiltin(theOtherSet)) {
1743 if (__CFCSetClassType(theSet) == __CFCSetClassType(theOtherSet) && __CFCSetBuiltinType(theSet) == __CFCSetBuiltinType(theOtherSet) && !__CFCSetIsInverted(theSet) && !__CFCSetIsInverted(theOtherSet)) return TRUE;
1744 } else if (__CFCSetIsRange(theSet) || __CFCSetIsRange(theOtherSet)) {
1745 if (__CFCSetClassType(theSet) == __CFCSetClassType(theOtherSet)) {
1746 if (__CFCSetIsInverted(theSet)) {
1747 if (__CFCSetIsInverted(theOtherSet)) {
1748 return (__CFCSetRangeFirstChar(theOtherSet) > __CFCSetRangeFirstChar(theSet) || (__CFCSetRangeFirstChar(theSet) + __CFCSetRangeLength(theSet)) > (__CFCSetRangeFirstChar(theOtherSet) + __CFCSetRangeLength(theOtherSet)) ? FALSE : TRUE);
1749 } else {
1750 return ((__CFCSetRangeFirstChar(theOtherSet) + __CFCSetRangeLength(theOtherSet)) <= __CFCSetRangeFirstChar(theSet) || (__CFCSetRangeFirstChar(theSet) + __CFCSetRangeLength(theSet)) <= __CFCSetRangeFirstChar(theOtherSet) ? TRUE : FALSE);
1751 }
1752 } else {
1753 if (__CFCSetIsInverted(theOtherSet)) {
1754 return ((__CFCSetRangeFirstChar(theSet) == 0 && __CFCSetRangeLength(theSet) == 0x110000) || (__CFCSetRangeFirstChar(theOtherSet) == 0 && (UInt32)__CFCSetRangeLength(theOtherSet) <= __CFCSetRangeFirstChar(theSet)) || ((__CFCSetRangeFirstChar(theSet) + __CFCSetRangeLength(theSet)) <= __CFCSetRangeFirstChar(theOtherSet) && (__CFCSetRangeFirstChar(theOtherSet) + __CFCSetRangeLength(theOtherSet)) == 0x110000) ? TRUE : FALSE);
1755 } else {
1756 return (__CFCSetRangeFirstChar(theOtherSet) < __CFCSetRangeFirstChar(theSet) || (__CFCSetRangeFirstChar(theSet) + __CFCSetRangeLength(theSet)) < (__CFCSetRangeFirstChar(theOtherSet) + __CFCSetRangeLength(theOtherSet)) ? FALSE : TRUE);
1757 }
1758 }
1759 }
1760 } else {
1761 UInt32 theSetAnnexMask = __CFCSetAnnexValidEntriesBitmap(theSet);
1762 UInt32 theOtherSetAnnexMask = __CFCSetAnnexValidEntriesBitmap(theOtherSet);
1763 Boolean isTheSetAnnexInverted = __CFCSetAnnexIsInverted(theSet);
1764 Boolean isTheOtherSetAnnexInverted = __CFCSetAnnexIsInverted(theOtherSet);
1765 uint8_t theSetBuffer[__kCFBitmapSize];
1766 uint8_t theOtherSetBuffer[__kCFBitmapSize];
1767
1768 // We mask plane 1 to plane 16
1769 if (isTheSetAnnexInverted) theSetAnnexMask = (~theSetAnnexMask) & (0xFFFF << 1);
1770 if (isTheOtherSetAnnexInverted) theOtherSetAnnexMask = (~theOtherSetAnnexMask) & (0xFFFF << 1);
1771
1772 __CFCSetGetBitmap(theSet, theSetBuffer);
1773 __CFCSetGetBitmap(theOtherSet, theOtherSetBuffer);
1774
1775 if (!__CFCSetIsBitmapSupersetOfBitmap((const UInt32 *)theSetBuffer, (const UInt32 *)theOtherSetBuffer, FALSE, FALSE)) return FALSE;
1776
1777 if (theOtherSetAnnexMask) {
1778 CFCharacterSetRef theSetAnnex;
1779 CFCharacterSetRef theOtherSetAnnex;
1780 uint32_t idx;
1781
1782 if ((theSetAnnexMask & theOtherSetAnnexMask) != theOtherSetAnnexMask) return FALSE;
1783
1784 for (idx = 1;idx <= 16;idx++) {
1785 theSetAnnex = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, idx);
1786 if (NULL == theSetAnnex) continue; // This case is already handled by the mask above
1787
1788 theOtherSetAnnex = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theOtherSet, idx);
1789
1790 if (NULL == theOtherSetAnnex) {
1791 if (isTheOtherSetAnnexInverted) {
1792 __CFCSetGetBitmap(theSetAnnex, theSetBuffer);
1793 if (!__CFCSetIsEqualBitmap((const UInt32 *)theSetBuffer, (isTheSetAnnexInverted ? NULL : (const UInt32 *)-1))) return FALSE;
1794 }
1795 } else {
1796 __CFCSetGetBitmap(theSetAnnex, theSetBuffer);
1797 __CFCSetGetBitmap(theOtherSetAnnex, theOtherSetBuffer);
1798 if (!__CFCSetIsBitmapSupersetOfBitmap((const UInt32 *)theSetBuffer, (const UInt32 *)theOtherSetBuffer, isTheSetAnnexInverted, isTheOtherSetAnnexInverted)) return FALSE;
1799 }
1800 }
1801 }
1802
1803 return TRUE;
1804 }
1805 }
1806 }
1807
1808 copy = CFCharacterSetCreateMutableCopy(kCFAllocatorSystemDefault, theSet);
1809 CFCharacterSetIntersect(copy, theOtherSet);
1810 result = __CFCharacterSetEqual(copy, theOtherSet);
1811 CFRelease(copy);
1812
1813 return result;
1814 }
1815
1816 Boolean CFCharacterSetHasMemberInPlane(CFCharacterSetRef theSet, CFIndex thePlane) {
1817 Boolean isInverted = __CFCSetIsInverted(theSet);
1818
1819 CF_OBJC_FUNCDISPATCH1(__kCFCharacterSetTypeID, Boolean, theSet, "hasMemberInPlane:", thePlane);
1820
1821 if (__CFCSetIsEmpty(theSet)) {
1822 return (isInverted ? TRUE : FALSE);
1823 } else if (__CFCSetIsBuiltin(theSet)) {
1824 CFCharacterSetPredefinedSet type = __CFCSetBuiltinType(theSet);
1825
1826 if (type == kCFCharacterSetControl) {
1827 if (isInverted || (thePlane == 14)) {
1828 return TRUE; // There is no plane that covers all values || Plane 14 has language tags
1829 } else {
1830 return (CFUniCharGetBitmapPtrForPlane(type, thePlane) ? TRUE : FALSE);
1831 }
1832 } else if ((type < kCFCharacterSetDecimalDigit) || (type == kCFCharacterSetNewline)) {
1833 return (thePlane && !isInverted ? FALSE : TRUE);
1834 } else if (__CFCSetBuiltinType(theSet) == kCFCharacterSetIllegal) {
1835 return (isInverted ? (thePlane < 3 || thePlane > 13 ? TRUE : FALSE) : TRUE); // This is according to Unicode 3.1
1836 } else {
1837 if (isInverted) {
1838 return TRUE; // There is no plane that covers all values
1839 } else {
1840 return (CFUniCharGetBitmapPtrForPlane(type, thePlane) ? TRUE : FALSE);
1841 }
1842 }
1843 } else if (__CFCSetIsRange(theSet)) {
1844 UTF32Char firstChar = __CFCSetRangeFirstChar(theSet);
1845 UTF32Char lastChar = (firstChar + __CFCSetRangeLength(theSet) - 1);
1846 CFIndex firstPlane = firstChar >> 16;
1847 CFIndex lastPlane = lastChar >> 16;
1848
1849 if (isInverted) {
1850 if (thePlane < firstPlane || thePlane > lastPlane) {
1851 return TRUE;
1852 } else if (thePlane > firstPlane && thePlane < lastPlane) {
1853 return FALSE;
1854 } else {
1855 firstChar &= 0xFFFF;
1856 lastChar &= 0xFFFF;
1857 if (thePlane == firstPlane) {
1858 return (firstChar || (firstPlane == lastPlane && lastChar != 0xFFFF) ? TRUE : FALSE);
1859 } else {
1860 return (lastChar != 0xFFFF || (firstPlane == lastPlane && firstChar) ? TRUE : FALSE);
1861 }
1862 }
1863 } else {
1864 return (thePlane < firstPlane || thePlane > lastPlane ? FALSE : TRUE);
1865 }
1866 } else {
1867 if (thePlane == 0) {
1868 switch (__CFCSetClassType(theSet)) {
1869 case __kCFCharSetClassString: if (!__CFCSetStringLength(theSet)) return isInverted; break;
1870 case __kCFCharSetClassCompactBitmap: return (__CFCSetCompactBitmapBits(theSet) ? TRUE : FALSE); break;
1871 case __kCFCharSetClassBitmap: return (__CFCSetBitmapBits(theSet) ? TRUE : FALSE); break;
1872 }
1873 return TRUE;
1874 } else {
1875 CFCharacterSetRef annex = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, thePlane);
1876 if (annex) {
1877 if (__CFCSetIsRange(annex)) {
1878 return (__CFCSetAnnexIsInverted(theSet) && (__CFCSetRangeFirstChar(annex) == 0) && (__CFCSetRangeLength(annex) == 0x10000) ? FALSE : TRUE);
1879 } else if (__CFCSetIsBitmap(annex)) {
1880 return (__CFCSetAnnexIsInverted(theSet) && __CFCSetIsEqualBitmap((const UInt32 *)__CFCSetBitmapBits(annex), (const UInt32 *)-1) ? FALSE : TRUE);
1881 } else {
1882 uint8_t bitsBuf[__kCFBitmapSize];
1883 __CFCSetGetBitmap(annex, bitsBuf);
1884 return (__CFCSetAnnexIsInverted(theSet) && __CFCSetIsEqualBitmap((const UInt32 *)bitsBuf, (const UInt32 *)-1) ? FALSE : TRUE);
1885 }
1886 } else {
1887 return __CFCSetAnnexIsInverted(theSet);
1888 }
1889 }
1890 }
1891
1892 return FALSE;
1893 }
1894
1895
1896 CFDataRef CFCharacterSetCreateBitmapRepresentation(CFAllocatorRef alloc, CFCharacterSetRef theSet) {
1897 CFMutableDataRef data;
1898 int numNonBMPPlanes = 0;
1899 int planeIndices[MAX_ANNEX_PLANE];
1900 int idx;
1901 int length;
1902 bool isAnnexInverted;
1903
1904 CF_OBJC_FUNCDISPATCH0(__kCFCharacterSetTypeID, CFDataRef , theSet, "_retainedBitmapRepresentation");
1905
1906 __CFGenericValidateType(theSet, __kCFCharacterSetTypeID);
1907
1908 isAnnexInverted = (__CFCSetAnnexIsInverted(theSet) != 0);
1909
1910 if (__CFCSetHasNonBMPPlane(theSet)) {
1911 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
1912 if (isAnnexInverted || __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, idx)) {
1913 planeIndices[numNonBMPPlanes++] = idx;
1914 }
1915 }
1916 } else if (__CFCSetIsBuiltin(theSet)) {
1917 numNonBMPPlanes = (__CFCSetIsInverted(theSet) ? MAX_ANNEX_PLANE : CFUniCharGetNumberOfPlanes(__CFCSetBuiltinType(theSet)) - 1);
1918 } else if (__CFCSetIsRange(theSet)) {
1919 UInt32 firstChar = __CFCSetRangeFirstChar(theSet);
1920 UInt32 lastChar = __CFCSetRangeFirstChar(theSet) + __CFCSetRangeLength(theSet) - 1;
1921 int firstPlane = (firstChar >> 16);
1922 int lastPlane = (lastChar >> 16);
1923 bool isInverted = (__CFCSetIsInverted(theSet) != 0);
1924
1925 if (lastPlane > 0) {
1926 if (firstPlane == 0) {
1927 firstPlane = 1;
1928 firstChar = 0x10000;
1929 }
1930 numNonBMPPlanes = (lastPlane - firstPlane) + 1;
1931 if (isInverted) {
1932 numNonBMPPlanes = MAX_ANNEX_PLANE - numNonBMPPlanes;
1933 if (firstPlane == lastPlane) {
1934 if (((firstChar & 0xFFFF) > 0) || ((lastChar & 0xFFFF) < 0xFFFF)) ++numNonBMPPlanes;
1935 } else {
1936 if ((firstChar & 0xFFFF) > 0) ++numNonBMPPlanes;
1937 if ((lastChar & 0xFFFF) < 0xFFFF) ++numNonBMPPlanes;
1938 }
1939 }
1940 } else if (isInverted) {
1941 numNonBMPPlanes = MAX_ANNEX_PLANE;
1942 }
1943 } else if (isAnnexInverted) {
1944 numNonBMPPlanes = MAX_ANNEX_PLANE;
1945 }
1946
1947 length = __kCFBitmapSize + ((__kCFBitmapSize + 1) * numNonBMPPlanes);
1948 data = CFDataCreateMutable(alloc, length);
1949 CFDataSetLength(data, length);
1950 __CFCSetGetBitmap(theSet, CFDataGetMutableBytePtr(data));
1951
1952 if (numNonBMPPlanes > 0) {
1953 uint8_t *bytes = CFDataGetMutableBytePtr(data) + __kCFBitmapSize;
1954
1955 if (__CFCSetHasNonBMPPlane(theSet)) {
1956 CFCharacterSetRef subset;
1957
1958 for (idx = 0;idx < numNonBMPPlanes;idx++) {
1959 *(bytes++) = planeIndices[idx];
1960 if ((subset = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, planeIndices[idx])) == NULL) {
1961 __CFCSetBitmapFastFillWithValue((UInt32 *)bytes, (isAnnexInverted ? 0xFF : 0));
1962 } else {
1963 __CFCSetGetBitmap(subset, bytes);
1964 if (isAnnexInverted) {
1965 uint32_t count = __kCFBitmapSize / sizeof(uint32_t);
1966 uint32_t *bits = (uint32_t *)bytes;
1967
1968 while (count-- > 0) {
1969 *bits = ~(*bits);
1970 ++bits;
1971 }
1972 }
1973 }
1974 bytes += __kCFBitmapSize;
1975 }
1976 } else if (__CFCSetIsBuiltin(theSet)) {
1977 UInt8 result;
1978 CFIndex delta;
1979 Boolean isInverted = __CFCSetIsInverted(theSet);
1980
1981 for (idx = 0;idx < numNonBMPPlanes;idx++) {
1982 if ((result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(theSet), idx + 1, bytes + 1, (isInverted != 0))) == kCFUniCharBitmapEmpty) continue;
1983 *(bytes++) = idx + 1;
1984 if (result == kCFUniCharBitmapAll) {
1985 CFIndex bitmapLength = __kCFBitmapSize;
1986 while (bitmapLength-- > 0) *(bytes++) = (uint8_t)0xFF;
1987 } else {
1988 bytes += __kCFBitmapSize;
1989 }
1990 }
1991 delta = bytes - (const uint8_t *)CFDataGetBytePtr(data);
1992 if (delta < length) CFDataSetLength(data, delta);
1993 } else if (__CFCSetIsRange(theSet)) {
1994 UInt32 firstChar = __CFCSetRangeFirstChar(theSet);
1995 UInt32 lastChar = __CFCSetRangeFirstChar(theSet) + __CFCSetRangeLength(theSet) - 1;
1996 int firstPlane = (firstChar >> 16);
1997 int lastPlane = (lastChar >> 16);
1998
1999 if (firstPlane == 0) {
2000 firstPlane = 1;
2001 firstChar = 0x10000;
2002 }
2003 if (__CFCSetIsInverted(theSet)) {
2004 // Mask out the plane byte
2005 firstChar &= 0xFFFF;
2006 lastChar &= 0xFFFF;
2007
2008 for (idx = 1;idx < firstPlane;idx++) { // Fill up until the first plane
2009 *(bytes++) = idx;
2010 __CFCSetBitmapFastFillWithValue((UInt32 *)bytes, 0xFF);
2011 bytes += __kCFBitmapSize;
2012 }
2013 if (firstPlane == lastPlane) {
2014 if ((firstChar > 0) || (lastChar < 0xFFFF)) {
2015 *(bytes++) = idx;
2016 __CFCSetBitmapFastFillWithValue((UInt32 *)bytes, 0xFF);
2017 __CFCSetBitmapRemoveCharactersInRange(bytes, firstChar, lastChar);
2018 bytes += __kCFBitmapSize;
2019 }
2020 } else if (firstPlane < lastPlane) {
2021 if (firstChar > 0) {
2022 *(bytes++) = idx;
2023 __CFCSetBitmapFastFillWithValue((UInt32 *)bytes, 0);
2024 __CFCSetBitmapAddCharactersInRange(bytes, 0, firstChar - 1);
2025 bytes += __kCFBitmapSize;
2026 }
2027 if (lastChar < 0xFFFF) {
2028 *(bytes++) = idx;
2029 __CFCSetBitmapFastFillWithValue((UInt32 *)bytes, 0);
2030 __CFCSetBitmapAddCharactersInRange(bytes, lastChar, 0xFFFF);
2031 bytes += __kCFBitmapSize;
2032 }
2033 }
2034 for (idx = lastPlane + 1;idx <= MAX_ANNEX_PLANE;idx++) {
2035 *(bytes++) = idx;
2036 __CFCSetBitmapFastFillWithValue((UInt32 *)bytes, 0xFF);
2037 bytes += __kCFBitmapSize;
2038 }
2039 } else {
2040 for (idx = firstPlane;idx <= lastPlane;idx++) {
2041 *(bytes++) = idx;
2042 __CFCSetBitmapAddCharactersInRange(bytes, (idx == firstPlane ? firstChar : 0), (idx == lastPlane ? lastChar : 0xFFFF));
2043 bytes += __kCFBitmapSize;
2044 }
2045 }
2046 } else if (isAnnexInverted) {
2047 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
2048 *(bytes++) = idx;
2049 __CFCSetBitmapFastFillWithValue((UInt32 *)bytes, 0xFF);
2050 bytes += __kCFBitmapSize;
2051 }
2052 }
2053 }
2054
2055 return data;
2056 }
2057
2058 /*** MutableCharacterSet functions ***/
2059 void CFCharacterSetAddCharactersInRange(CFMutableCharacterSetRef theSet, CFRange theRange) {
2060 CF_OBJC_FUNCDISPATCH1(__kCFCharacterSetTypeID, void, theSet, "addCharactersInRange:", theRange);
2061
2062 __CFCSetValidateTypeAndMutability(theSet, __PRETTY_FUNCTION__);
2063 __CFCSetValidateRange(theRange, __PRETTY_FUNCTION__);
2064
2065 if (!theRange.length || (__CFCSetIsInverted(theSet) && __CFCSetIsEmpty(theSet))) return; // Inverted && empty set contains all char
2066
2067 if (!__CFCSetIsInverted(theSet)) {
2068 if (__CFCSetIsEmpty(theSet)) {
2069 __CFCSetPutClassType(theSet, __kCFCharSetClassRange);
2070 __CFCSetPutRangeFirstChar(theSet, theRange.location);
2071 __CFCSetPutRangeLength(theSet, theRange.length);
2072 __CFCSetPutHasHashValue(theSet, false);
2073 return;
2074 } else if (__CFCSetIsRange(theSet)) {
2075 CFIndex firstChar = __CFCSetRangeFirstChar(theSet);
2076 CFIndex length = __CFCSetRangeLength(theSet);
2077
2078 if (firstChar == theRange.location) {
2079 __CFCSetPutRangeLength(theSet, __CFMin(length, theRange.length));
2080 __CFCSetPutHasHashValue(theSet, false);
2081 return;
2082 } else if (firstChar < theRange.location && theRange.location <= firstChar + length) {
2083 if (firstChar + length < theRange.location + theRange.length) __CFCSetPutRangeLength(theSet, theRange.length + (theRange.location - firstChar));
2084 __CFCSetPutHasHashValue(theSet, false);
2085 return;
2086 } else if (theRange.location < firstChar && firstChar <= theRange.location + theRange.length) {
2087 __CFCSetPutRangeFirstChar(theSet, theRange.location);
2088 __CFCSetPutRangeLength(theSet, length + (firstChar - theRange.location));
2089 __CFCSetPutHasHashValue(theSet, false);
2090 return;
2091 }
2092 } else if (__CFCSetIsString(theSet) && __CFCSetStringLength(theSet) + theRange.length < __kCFStringCharSetMax) {
2093 UniChar *buffer;
2094 if (!__CFCSetStringBuffer(theSet))
2095 __CFCSetPutStringBuffer(theSet, (UniChar *)CFAllocatorAllocate(CFGetAllocator(theSet), __kCFStringCharSetMax * sizeof(UniChar), 0));
2096 buffer = __CFCSetStringBuffer(theSet) + __CFCSetStringLength(theSet);
2097 __CFCSetPutStringLength(theSet, __CFCSetStringLength(theSet) + theRange.length);
2098 while (theRange.length--) *buffer++ = (UniChar)theRange.location++;
2099 qsort(__CFCSetStringBuffer(theSet), __CFCSetStringLength(theSet), sizeof(UniChar), chcompar);
2100 __CFCSetPutHasHashValue(theSet, false);
2101 return;
2102 }
2103 }
2104
2105 // OK, I have to be a bitmap
2106 __CFCSetMakeBitmap(theSet);
2107 __CFCSetAddNonBMPPlanesInRange(theSet, theRange);
2108 if (theRange.location < 0x10000) { // theRange is in BMP
2109 if (theRange.location + theRange.length >= NUMCHARACTERS) theRange.length = NUMCHARACTERS - theRange.location;
2110 __CFCSetBitmapAddCharactersInRange(__CFCSetBitmapBits(theSet), (UniChar)theRange.location, (UniChar)(theRange.location + theRange.length - 1));
2111 }
2112 __CFCSetPutHasHashValue(theSet, false);
2113
2114 if (__CFCheckForExapendedSet) __CFCheckForExpandedSet(theSet);
2115 }
2116
2117 void CFCharacterSetRemoveCharactersInRange(CFMutableCharacterSetRef theSet, CFRange theRange) {
2118 CF_OBJC_FUNCDISPATCH1(__kCFCharacterSetTypeID, void, theSet, "removeCharactersInRange:", theRange);
2119
2120 __CFCSetValidateTypeAndMutability(theSet, __PRETTY_FUNCTION__);
2121 __CFCSetValidateRange(theRange, __PRETTY_FUNCTION__);
2122
2123 if (!theRange.length || (!__CFCSetIsInverted(theSet) && __CFCSetIsEmpty(theSet))) return; // empty set
2124
2125 if (__CFCSetIsInverted(theSet)) {
2126 if (__CFCSetIsEmpty(theSet)) {
2127 __CFCSetPutClassType(theSet, __kCFCharSetClassRange);
2128 __CFCSetPutRangeFirstChar(theSet, theRange.location);
2129 __CFCSetPutRangeLength(theSet, theRange.length);
2130 __CFCSetPutHasHashValue(theSet, false);
2131 return;
2132 } else if (__CFCSetIsRange(theSet)) {
2133 CFIndex firstChar = __CFCSetRangeFirstChar(theSet);
2134 CFIndex length = __CFCSetRangeLength(theSet);
2135
2136 if (firstChar == theRange.location) {
2137 __CFCSetPutRangeLength(theSet, __CFMin(length, theRange.length));
2138 __CFCSetPutHasHashValue(theSet, false);
2139 return;
2140 } else if (firstChar < theRange.location && theRange.location <= firstChar + length) {
2141 if (firstChar + length < theRange.location + theRange.length) __CFCSetPutRangeLength(theSet, theRange.length + (theRange.location - firstChar));
2142 __CFCSetPutHasHashValue(theSet, false);
2143 return;
2144 } else if (theRange.location < firstChar && firstChar <= theRange.location + theRange.length) {
2145 __CFCSetPutRangeFirstChar(theSet, theRange.location);
2146 __CFCSetPutRangeLength(theSet, length + (firstChar - theRange.location));
2147 __CFCSetPutHasHashValue(theSet, false);
2148 return;
2149 }
2150 } else if (__CFCSetIsString(theSet) && __CFCSetStringLength(theSet) + theRange.length < __kCFStringCharSetMax) {
2151 UniChar *buffer;
2152 if (!__CFCSetStringBuffer(theSet))
2153 __CFCSetPutStringBuffer(theSet, (UniChar *)CFAllocatorAllocate(CFGetAllocator(theSet), __kCFStringCharSetMax * sizeof(UniChar), 0));
2154 buffer = __CFCSetStringBuffer(theSet) + __CFCSetStringLength(theSet);
2155 __CFCSetPutStringLength(theSet, __CFCSetStringLength(theSet) + theRange.length);
2156 while (theRange.length--) *buffer++ = (UniChar)theRange.location++;
2157 qsort(__CFCSetStringBuffer(theSet), __CFCSetStringLength(theSet), sizeof(UniChar), chcompar);
2158 __CFCSetPutHasHashValue(theSet, false);
2159 return;
2160 }
2161 }
2162
2163 // OK, I have to be a bitmap
2164 __CFCSetMakeBitmap(theSet);
2165 __CFCSetRemoveNonBMPPlanesInRange(theSet, theRange);
2166 if (theRange.location < 0x10000) { // theRange is in BMP
2167 if (theRange.location + theRange.length > NUMCHARACTERS) theRange.length = NUMCHARACTERS - theRange.location;
2168 if (theRange.location == 0 && theRange.length == NUMCHARACTERS) { // Remove all
2169 CFAllocatorDeallocate(CFGetAllocator(theSet), __CFCSetBitmapBits(theSet));
2170 __CFCSetPutBitmapBits(theSet, NULL);
2171 } else {
2172 __CFCSetBitmapRemoveCharactersInRange(__CFCSetBitmapBits(theSet), (UniChar)theRange.location, (UniChar)(theRange.location + theRange.length - 1));
2173 }
2174 }
2175
2176 __CFCSetPutHasHashValue(theSet, false);
2177 if (__CFCheckForExapendedSet) __CFCheckForExpandedSet(theSet);
2178 }
2179
2180 void CFCharacterSetAddCharactersInString(CFMutableCharacterSetRef theSet, CFStringRef theString) {
2181 UniChar *buffer;
2182 CFIndex length;
2183 BOOL hasSurrogate = NO;
2184
2185 CF_OBJC_FUNCDISPATCH1(__kCFCharacterSetTypeID, void, theSet, "addCharactersInString:", theString);
2186
2187 __CFCSetValidateTypeAndMutability(theSet, __PRETTY_FUNCTION__);
2188
2189 if ((__CFCSetIsEmpty(theSet) && __CFCSetIsInverted(theSet)) || !(length = CFStringGetLength(theString))) return;
2190
2191 if (!__CFCSetIsInverted(theSet)) {
2192 CFIndex newLength = length + (__CFCSetIsEmpty(theSet) ? 0 : (__CFCSetIsString(theSet) ? __CFCSetStringLength(theSet) : __kCFStringCharSetMax));
2193
2194 if (newLength < __kCFStringCharSetMax) {
2195 buffer = __CFCSetStringBuffer(theSet);
2196
2197 if (NULL == buffer) {
2198 buffer = (UniChar *)CFAllocatorAllocate(CFGetAllocator(theSet), __kCFStringCharSetMax * sizeof(UniChar), 0);
2199 } else {
2200 buffer += __CFCSetStringLength(theSet);
2201 }
2202
2203 CFStringGetCharacters(theString, CFRangeMake(0, length), (UniChar*)buffer);
2204
2205 if (length > 1) {
2206 UTF16Char *characters = buffer;
2207 const UTF16Char *charactersLimit = characters + length;
2208
2209 while (characters < charactersLimit) {
2210 if (CFStringIsSurrogateHighCharacter(*characters) || CFStringIsSurrogateLowCharacter(*characters)) {
2211 memmove(characters, characters + 1, (charactersLimit - (characters + 1)) * sizeof(*characters));
2212 --charactersLimit;
2213 hasSurrogate = YES;
2214 } else {
2215 ++characters;
2216 }
2217 }
2218
2219 newLength -= (length - (charactersLimit - buffer));
2220 }
2221
2222 if (0 == newLength) {
2223 if (NULL == __CFCSetStringBuffer(theSet)) CFAllocatorDeallocate(CFGetAllocator(theSet), buffer);
2224 } else {
2225 if (NULL == __CFCSetStringBuffer(theSet)) {
2226 __CFCSetPutClassType(theSet, __kCFCharSetClassString);
2227 __CFCSetPutStringBuffer(theSet, buffer);
2228 }
2229 __CFCSetPutStringLength(theSet, newLength);
2230 qsort(__CFCSetStringBuffer(theSet), newLength, sizeof(UniChar), chcompar);
2231 }
2232 __CFCSetPutHasHashValue(theSet, false);
2233
2234 if (hasSurrogate) __CFApplySurrogatesInString(theSet, theString, &CFCharacterSetAddCharactersInRange);
2235
2236 return;
2237 }
2238 }
2239
2240 // OK, I have to be a bitmap
2241 __CFCSetMakeBitmap(theSet);
2242 CFStringInlineBuffer inlineBuffer;
2243 CFIndex idx;
2244
2245 CFStringInitInlineBuffer(theString, &inlineBuffer, CFRangeMake(0, length));
2246
2247 for (idx = 0;idx < length;idx++) {
2248 UTF16Char character = __CFStringGetCharacterFromInlineBufferQuick(&inlineBuffer, idx);
2249
2250 if (CFStringIsSurrogateHighCharacter(character) || CFStringIsSurrogateLowCharacter(character)) {
2251 hasSurrogate = YES;
2252 } else {
2253 __CFCSetBitmapAddCharacter(__CFCSetBitmapBits(theSet), character);
2254 }
2255 }
2256
2257 __CFCSetPutHasHashValue(theSet, false);
2258
2259 if (__CFCheckForExapendedSet) __CFCheckForExpandedSet(theSet);
2260
2261 if (hasSurrogate) __CFApplySurrogatesInString(theSet, theString, &CFCharacterSetAddCharactersInRange);
2262 }
2263
2264 void CFCharacterSetRemoveCharactersInString(CFMutableCharacterSetRef theSet, CFStringRef theString) {
2265 UniChar *buffer;
2266 CFIndex length;
2267 BOOL hasSurrogate = NO;
2268
2269 CF_OBJC_FUNCDISPATCH1(__kCFCharacterSetTypeID, void, theSet, "removeCharactersInString:", theString);
2270
2271 __CFCSetValidateTypeAndMutability(theSet, __PRETTY_FUNCTION__);
2272
2273 if ((__CFCSetIsEmpty(theSet) && !__CFCSetIsInverted(theSet)) || !(length = CFStringGetLength(theString))) return;
2274
2275 if (__CFCSetIsInverted(theSet)) {
2276 CFIndex newLength = length + (__CFCSetIsEmpty(theSet) ? 0 : (__CFCSetIsString(theSet) ? __CFCSetStringLength(theSet) : __kCFStringCharSetMax));
2277
2278 if (newLength < __kCFStringCharSetMax) {
2279 buffer = __CFCSetStringBuffer(theSet);
2280
2281 if (NULL == buffer) {
2282 buffer = (UniChar *)CFAllocatorAllocate(CFGetAllocator(theSet), __kCFStringCharSetMax * sizeof(UniChar), 0);
2283 } else {
2284 buffer += __CFCSetStringLength(theSet);
2285 }
2286
2287 CFStringGetCharacters(theString, CFRangeMake(0, length), (UniChar*)buffer);
2288
2289 if (length > 1) {
2290 UTF16Char *characters = buffer;
2291 const UTF16Char *charactersLimit = characters + length;
2292
2293 while (characters < charactersLimit) {
2294 if (CFStringIsSurrogateHighCharacter(*characters) || CFStringIsSurrogateLowCharacter(*characters)) {
2295 memmove(characters, characters + 1, charactersLimit - (characters + 1));
2296 --charactersLimit;
2297 hasSurrogate = YES;
2298 }
2299 ++characters;
2300 }
2301
2302 newLength -= (length - (charactersLimit - buffer));
2303 }
2304
2305 if (NULL == __CFCSetStringBuffer(theSet)) {
2306 __CFCSetPutClassType(theSet, __kCFCharSetClassString);
2307 __CFCSetPutStringBuffer(theSet, buffer);
2308 }
2309 __CFCSetPutStringLength(theSet, newLength);
2310 qsort(__CFCSetStringBuffer(theSet), newLength, sizeof(UniChar), chcompar);
2311 __CFCSetPutHasHashValue(theSet, false);
2312
2313 if (hasSurrogate) __CFApplySurrogatesInString(theSet, theString, &CFCharacterSetRemoveCharactersInRange);
2314
2315 return;
2316 }
2317 }
2318
2319 // OK, I have to be a bitmap
2320 __CFCSetMakeBitmap(theSet);
2321 CFStringInlineBuffer inlineBuffer;
2322 CFIndex idx;
2323
2324 CFStringInitInlineBuffer(theString, &inlineBuffer, CFRangeMake(0, length));
2325
2326 for (idx = 0;idx < length;idx++) {
2327 UTF16Char character = __CFStringGetCharacterFromInlineBufferQuick(&inlineBuffer, idx);
2328
2329 if (CFStringIsSurrogateHighCharacter(character) || CFStringIsSurrogateLowCharacter(character)) {
2330 hasSurrogate = YES;
2331 } else {
2332 __CFCSetBitmapRemoveCharacter(__CFCSetBitmapBits(theSet), character);
2333 }
2334 }
2335
2336 __CFCSetPutHasHashValue(theSet, false);
2337 if (__CFCheckForExapendedSet) __CFCheckForExpandedSet(theSet);
2338
2339 if (hasSurrogate) __CFApplySurrogatesInString(theSet, theString, &CFCharacterSetRemoveCharactersInRange);
2340 }
2341
2342 void CFCharacterSetUnion(CFMutableCharacterSetRef theSet, CFCharacterSetRef theOtherSet) {
2343 CFCharacterSetRef expandedSet = NULL;
2344
2345 CF_OBJC_FUNCDISPATCH1(__kCFCharacterSetTypeID, void, theSet, "formUnionWithCharacterSet:", theOtherSet);
2346
2347 __CFCSetValidateTypeAndMutability(theSet, __PRETTY_FUNCTION__);
2348
2349 if (__CFCSetIsEmpty(theSet) && __CFCSetIsInverted(theSet)) return; // Inverted empty set contains all char
2350
2351 if (!CF_IS_OBJC(__kCFCharacterSetTypeID, theOtherSet) || (expandedSet = __CFCharacterSetGetExpandedSetForNSCharacterSet(theOtherSet))) { // Really CF, we can do some trick here
2352 if (expandedSet) theOtherSet = expandedSet;
2353
2354 if (__CFCSetIsEmpty(theOtherSet)) {
2355 if (__CFCSetIsInverted(theOtherSet)) {
2356 if (__CFCSetIsString(theSet) && __CFCSetStringBuffer(theSet)) {
2357 CFAllocatorDeallocate(CFGetAllocator(theSet), __CFCSetStringBuffer(theSet));
2358 } else if (__CFCSetIsBitmap(theSet) && __CFCSetBitmapBits(theSet)) {
2359 CFAllocatorDeallocate(CFGetAllocator(theSet), __CFCSetBitmapBits(theSet));
2360 } else if (__CFCSetIsCompactBitmap(theSet) && __CFCSetCompactBitmapBits(theSet)) {
2361 CFAllocatorDeallocate(CFGetAllocator(theSet), __CFCSetCompactBitmapBits(theSet));
2362 }
2363 __CFCSetPutClassType(theSet, __kCFCharSetClassRange);
2364 __CFCSetPutRangeLength(theSet, 0);
2365 __CFCSetPutIsInverted(theSet, true);
2366 __CFCSetPutHasHashValue(theSet, false);
2367 __CFCSetDeallocateAnnexPlane(theSet);
2368 }
2369 } else if (__CFCSetIsBuiltin(theOtherSet) && __CFCSetIsEmpty(theSet)) { // theSet can be builtin set
2370 __CFCSetPutClassType(theSet, __kCFCharSetClassBuiltin);
2371 __CFCSetPutBuiltinType(theSet, __CFCSetBuiltinType(theOtherSet));
2372 if (__CFCSetIsInverted(theOtherSet)) __CFCSetPutIsInverted(theSet, true);
2373 if (__CFCSetAnnexIsInverted(theOtherSet)) __CFCSetAnnexSetIsInverted(theSet, true);
2374 __CFCSetPutHasHashValue(theSet, false);
2375 } else {
2376 if (__CFCSetIsRange(theOtherSet)) {
2377 if (__CFCSetIsInverted(theOtherSet)) {
2378 UTF32Char firstChar = __CFCSetRangeFirstChar(theOtherSet);
2379 CFIndex length = __CFCSetRangeLength(theOtherSet);
2380
2381 if (firstChar > 0) CFCharacterSetAddCharactersInRange(theSet, CFRangeMake(0, firstChar));
2382 firstChar += length;
2383 length = 0x110000 - firstChar;
2384 CFCharacterSetAddCharactersInRange(theSet, CFRangeMake(firstChar, length));
2385 } else {
2386 CFCharacterSetAddCharactersInRange(theSet, CFRangeMake(__CFCSetRangeFirstChar(theOtherSet), __CFCSetRangeLength(theOtherSet)));
2387 }
2388 } else if (__CFCSetIsString(theOtherSet)) {
2389 CFStringRef string = CFStringCreateWithCharactersNoCopy(CFGetAllocator(theSet), __CFCSetStringBuffer(theOtherSet), __CFCSetStringLength(theOtherSet), kCFAllocatorNull);
2390 CFCharacterSetAddCharactersInString(theSet, string);
2391 CFRelease(string);
2392 } else {
2393 __CFCSetMakeBitmap(theSet);
2394 if (__CFCSetIsBitmap(theOtherSet)) {
2395 UInt32 *bitmap1 = (UInt32*)__CFCSetBitmapBits(theSet);
2396 UInt32 *bitmap2 = (UInt32*)__CFCSetBitmapBits(theOtherSet);
2397 CFIndex length = __kCFBitmapSize / sizeof(UInt32);
2398 while (length--) *bitmap1++ |= *bitmap2++;
2399 } else {
2400 UInt32 *bitmap1 = (UInt32*)__CFCSetBitmapBits(theSet);
2401 UInt32 *bitmap2;
2402 CFIndex length = __kCFBitmapSize / sizeof(UInt32);
2403 uint8_t bitmapBuffer[__kCFBitmapSize];
2404 __CFCSetGetBitmap(theOtherSet, bitmapBuffer);
2405 bitmap2 = (UInt32*)bitmapBuffer;
2406 while (length--) *bitmap1++ |= *bitmap2++;
2407 }
2408 __CFCSetPutHasHashValue(theSet, false);
2409 }
2410 if (__CFCSetHasNonBMPPlane(theOtherSet)) {
2411 CFMutableCharacterSetRef otherSetPlane;
2412 int idx;
2413
2414 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
2415 if ((otherSetPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theOtherSet, idx))) {
2416 CFCharacterSetUnion((CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(theSet, idx), otherSetPlane);
2417 }
2418 }
2419 } else if (__CFCSetAnnexIsInverted(theOtherSet)) {
2420 if (__CFCSetHasNonBMPPlane(theSet)) __CFCSetDeallocateAnnexPlane(theSet);
2421 __CFCSetAnnexSetIsInverted(theSet, true);
2422 } else if (__CFCSetIsBuiltin(theOtherSet)) {
2423 CFMutableCharacterSetRef annexPlane;
2424 uint8_t bitmapBuffer[__kCFBitmapSize];
2425 uint8_t result;
2426 int planeIndex;
2427 Boolean isOtherAnnexPlaneInverted = __CFCSetAnnexIsInverted(theOtherSet);
2428 UInt32 *bitmap1;
2429 UInt32 *bitmap2;
2430 CFIndex length;
2431
2432 for (planeIndex = 1;planeIndex <= MAX_ANNEX_PLANE;planeIndex++) {
2433 result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(theOtherSet), planeIndex, bitmapBuffer, (isOtherAnnexPlaneInverted != 0));
2434 if (result != kCFUniCharBitmapEmpty) {
2435 annexPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(theSet, planeIndex);
2436 if (result == kCFUniCharBitmapAll) {
2437 CFCharacterSetAddCharactersInRange(annexPlane, CFRangeMake(0x0000, 0x10000));
2438 } else {
2439 __CFCSetMakeBitmap(annexPlane);
2440 bitmap1 = (UInt32 *)__CFCSetBitmapBits(annexPlane);
2441 length = __kCFBitmapSize / sizeof(UInt32);
2442 bitmap2 = (UInt32*)bitmapBuffer;
2443 while (length--) *bitmap1++ |= *bitmap2++;
2444 }
2445 }
2446 }
2447 }
2448 }
2449 if (__CFCheckForExapendedSet) __CFCheckForExpandedSet(theSet);
2450 } else { // It's NSCharacterSet
2451 CFDataRef bitmapRep = CFCharacterSetCreateBitmapRepresentation(kCFAllocatorSystemDefault, theOtherSet);
2452 const UInt32 *bitmap2 = (bitmapRep && CFDataGetLength(bitmapRep) ? (const UInt32 *)CFDataGetBytePtr(bitmapRep) : NULL);
2453 if (bitmap2) {
2454 UInt32 *bitmap1;
2455 CFIndex length = __kCFBitmapSize / sizeof(UInt32);
2456 __CFCSetMakeBitmap(theSet);
2457 bitmap1 = (UInt32*)__CFCSetBitmapBits(theSet);
2458 while (length--) *bitmap1++ |= *bitmap2++;
2459 __CFCSetPutHasHashValue(theSet, false);
2460 }
2461 CFRelease(bitmapRep);
2462 }
2463 }
2464
2465 void CFCharacterSetIntersect(CFMutableCharacterSetRef theSet, CFCharacterSetRef theOtherSet) {
2466 CFCharacterSetRef expandedSet = NULL;
2467
2468 CF_OBJC_FUNCDISPATCH1(__kCFCharacterSetTypeID, void, theSet, "formIntersectionWithCharacterSet:", theOtherSet);
2469
2470 __CFCSetValidateTypeAndMutability(theSet, __PRETTY_FUNCTION__);
2471
2472 if (__CFCSetIsEmpty(theSet) && !__CFCSetIsInverted(theSet)) return; // empty set
2473
2474 if (!CF_IS_OBJC(__kCFCharacterSetTypeID, theOtherSet) || (expandedSet = __CFCharacterSetGetExpandedSetForNSCharacterSet(theOtherSet))) { // Really CF, we can do some trick here
2475 if (expandedSet) theOtherSet = expandedSet;
2476
2477 if (__CFCSetIsEmpty(theOtherSet)) {
2478 if (!__CFCSetIsInverted(theOtherSet)) {
2479 if (__CFCSetIsString(theSet) && __CFCSetStringBuffer(theSet)) {
2480 CFAllocatorDeallocate(CFGetAllocator(theSet), __CFCSetStringBuffer(theSet));
2481 } else if (__CFCSetIsBitmap(theSet) && __CFCSetBitmapBits(theSet)) {
2482 CFAllocatorDeallocate(CFGetAllocator(theSet), __CFCSetBitmapBits(theSet));
2483 } else if (__CFCSetIsCompactBitmap(theSet) && __CFCSetCompactBitmapBits(theSet)) {
2484 CFAllocatorDeallocate(CFGetAllocator(theSet), __CFCSetCompactBitmapBits(theSet));
2485 }
2486 __CFCSetPutClassType(theSet, __kCFCharSetClassBitmap);
2487 __CFCSetPutBitmapBits(theSet, NULL);
2488 __CFCSetPutIsInverted(theSet, false);
2489 theSet->_hashValue = 0;
2490 __CFCSetPutHasHashValue(theSet, true);
2491 __CFCSetDeallocateAnnexPlane(theSet);
2492 }
2493 } else if (__CFCSetIsEmpty(theSet)) { // non inverted empty set contains all character
2494 __CFCSetPutClassType(theSet, __CFCSetClassType(theOtherSet));
2495 __CFCSetPutHasHashValue(theSet, __CFCSetHasHashValue(theOtherSet));
2496 __CFCSetPutIsInverted(theSet, __CFCSetIsInverted(theOtherSet));
2497 theSet->_hashValue = theOtherSet->_hashValue;
2498 if (__CFCSetHasNonBMPPlane(theOtherSet)) {
2499 CFMutableCharacterSetRef otherSetPlane;
2500 int idx;
2501 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
2502 if ((otherSetPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theOtherSet, idx))) {
2503 otherSetPlane = (CFMutableCharacterSetRef)CFCharacterSetCreateMutableCopy(CFGetAllocator(theSet), otherSetPlane);
2504 __CFCSetPutCharacterSetToAnnexPlane(theSet, otherSetPlane, idx);
2505 CFRelease(otherSetPlane);
2506 }
2507 }
2508 __CFCSetAnnexSetIsInverted(theSet, __CFCSetAnnexIsInverted(theOtherSet));
2509 }
2510
2511 switch (__CFCSetClassType(theOtherSet)) {
2512 case __kCFCharSetClassBuiltin:
2513 __CFCSetPutBuiltinType(theSet, __CFCSetBuiltinType(theOtherSet));
2514 break;
2515
2516 case __kCFCharSetClassRange:
2517 __CFCSetPutRangeFirstChar(theSet, __CFCSetRangeFirstChar(theOtherSet));
2518 __CFCSetPutRangeLength(theSet, __CFCSetRangeLength(theOtherSet));
2519 break;
2520
2521 case __kCFCharSetClassString:
2522 __CFCSetPutStringLength(theSet, __CFCSetStringLength(theOtherSet));
2523 if (!__CFCSetStringBuffer(theSet))
2524 __CFCSetPutStringBuffer(theSet, (UniChar *)CFAllocatorAllocate(CFGetAllocator(theSet), __kCFStringCharSetMax * sizeof(UniChar), 0));
2525 memmove(__CFCSetStringBuffer(theSet), __CFCSetStringBuffer(theOtherSet), __CFCSetStringLength(theSet) * sizeof(UniChar));
2526 break;
2527
2528 case __kCFCharSetClassBitmap:
2529 __CFCSetPutBitmapBits(theSet, (uint8_t *)CFAllocatorAllocate(CFGetAllocator(theSet), sizeof(uint8_t) * __kCFBitmapSize, 0));
2530 memmove(__CFCSetBitmapBits(theSet), __CFCSetBitmapBits(theOtherSet), __kCFBitmapSize);
2531 break;
2532
2533 case __kCFCharSetClassCompactBitmap: {
2534 const uint8_t *cBitmap = __CFCSetCompactBitmapBits(theOtherSet);
2535 uint8_t *newBitmap;
2536 uint32_t size = __CFCSetGetCompactBitmapSize(cBitmap);
2537 newBitmap = (uint8_t *)CFAllocatorAllocate(CFGetAllocator(theSet), sizeof(uint8_t) * size, 0);
2538 __CFCSetPutBitmapBits(theSet, newBitmap);
2539 memmove(newBitmap, cBitmap, size);
2540 }
2541 break;
2542
2543 default:
2544 CFAssert1(0, __kCFLogAssertion, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__); // We should never come here
2545 }
2546 } else {
2547 __CFCSetMakeBitmap(theSet);
2548 if (__CFCSetIsBitmap(theOtherSet)) {
2549 UInt32 *bitmap1 = (UInt32*)__CFCSetBitmapBits(theSet);
2550 UInt32 *bitmap2 = (UInt32*)__CFCSetBitmapBits(theOtherSet);
2551 CFIndex length = __kCFBitmapSize / sizeof(UInt32);
2552 while (length--) *bitmap1++ &= *bitmap2++;
2553 } else {
2554 UInt32 *bitmap1 = (UInt32*)__CFCSetBitmapBits(theSet);
2555 UInt32 *bitmap2;
2556 CFIndex length = __kCFBitmapSize / sizeof(UInt32);
2557 uint8_t bitmapBuffer[__kCFBitmapSize];
2558 __CFCSetGetBitmap(theOtherSet, bitmapBuffer);
2559 bitmap2 = (UInt32*)bitmapBuffer;
2560 while (length--) *bitmap1++ &= *bitmap2++;
2561 }
2562 __CFCSetPutHasHashValue(theSet, false);
2563 if (__CFCSetHasNonBMPPlane(theOtherSet)) {
2564 CFMutableCharacterSetRef annexPlane;
2565 CFMutableCharacterSetRef otherSetPlane;
2566 int idx;
2567 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
2568 if ((otherSetPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theOtherSet, idx))) {
2569 annexPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(theSet, idx);
2570 CFCharacterSetIntersect(annexPlane, otherSetPlane);
2571 if (__CFCSetIsEmpty(annexPlane) && !__CFCSetIsInverted(annexPlane)) __CFCSetPutCharacterSetToAnnexPlane(theSet, NULL, idx);
2572 } else if (__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, idx)) {
2573 __CFCSetPutCharacterSetToAnnexPlane(theSet, NULL, idx);
2574 }
2575 }
2576 if (!__CFCSetHasNonBMPPlane(theSet)) __CFCSetDeallocateAnnexPlane(theSet);
2577 } else if (__CFCSetIsBuiltin(theOtherSet) && !__CFCSetAnnexIsInverted(theOtherSet)) {
2578 CFMutableCharacterSetRef annexPlane;
2579 uint8_t bitmapBuffer[__kCFBitmapSize];
2580 uint8_t result;
2581 int planeIndex;
2582 UInt32 *bitmap1;
2583 UInt32 *bitmap2;
2584 CFIndex length;
2585
2586 for (planeIndex = 1;planeIndex <= MAX_ANNEX_PLANE;planeIndex++) {
2587 annexPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, planeIndex);
2588 if (annexPlane) {
2589 result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(theOtherSet), planeIndex, bitmapBuffer, false);
2590 if (result == kCFUniCharBitmapEmpty) {
2591 __CFCSetPutCharacterSetToAnnexPlane(theSet, NULL, planeIndex);
2592 } else if (result == kCFUniCharBitmapFilled) {
2593 Boolean isEmpty = true;
2594
2595 __CFCSetMakeBitmap(annexPlane);
2596 bitmap1 = (UInt32 *)__CFCSetBitmapBits(annexPlane);
2597 length = __kCFBitmapSize / sizeof(UInt32);
2598 bitmap2 = (UInt32*)bitmapBuffer;
2599
2600 while (length--) {
2601 if ((*bitmap1++ &= *bitmap2++)) isEmpty = false;
2602 }
2603 if (isEmpty) __CFCSetPutCharacterSetToAnnexPlane(theSet, NULL, planeIndex);
2604 }
2605 }
2606 }
2607 if (!__CFCSetHasNonBMPPlane(theSet)) __CFCSetDeallocateAnnexPlane(theSet);
2608 } else if (__CFCSetIsRange(theOtherSet)) {
2609 CFMutableCharacterSetRef tempOtherSet = CFCharacterSetCreateMutable(CFGetAllocator(theSet));
2610 CFMutableCharacterSetRef annexPlane;
2611 CFMutableCharacterSetRef otherSetPlane;
2612 int idx;
2613
2614 __CFCSetAddNonBMPPlanesInRange(tempOtherSet, CFRangeMake(__CFCSetRangeFirstChar(theOtherSet), __CFCSetRangeLength(theOtherSet)));
2615
2616 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
2617 if ((otherSetPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(tempOtherSet, idx))) {
2618 annexPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(theSet, idx);
2619 CFCharacterSetIntersect(annexPlane, otherSetPlane);
2620 if (__CFCSetIsEmpty(annexPlane) && !__CFCSetIsInverted(annexPlane)) __CFCSetPutCharacterSetToAnnexPlane(theSet, NULL, idx);
2621 } else if (__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, idx)) {
2622 __CFCSetPutCharacterSetToAnnexPlane(theSet, NULL, idx);
2623 }
2624 }
2625 if (!__CFCSetHasNonBMPPlane(theSet)) __CFCSetDeallocateAnnexPlane(theSet);
2626 CFRelease(tempOtherSet);
2627 } else if ((__CFCSetHasNonBMPPlane(theSet) || __CFCSetAnnexIsInverted(theSet)) && !__CFCSetAnnexIsInverted(theOtherSet)) {
2628 __CFCSetDeallocateAnnexPlane(theSet);
2629 }
2630 }
2631 if (__CFCheckForExapendedSet) __CFCheckForExpandedSet(theSet);
2632 } else { // It's NSCharacterSet
2633 CFDataRef bitmapRep = CFCharacterSetCreateBitmapRepresentation(kCFAllocatorSystemDefault, theOtherSet);
2634 const UInt32 *bitmap2 = (bitmapRep && CFDataGetLength(bitmapRep) ? (const UInt32 *)CFDataGetBytePtr(bitmapRep) : NULL);
2635 if (bitmap2) {
2636 UInt32 *bitmap1;
2637 CFIndex length = __kCFBitmapSize / sizeof(UInt32);
2638 __CFCSetMakeBitmap(theSet);
2639 bitmap1 = (UInt32*)__CFCSetBitmapBits(theSet);
2640 while (length--) *bitmap1++ &= *bitmap2++;
2641 __CFCSetPutHasHashValue(theSet, false);
2642 }
2643 CFRelease(bitmapRep);
2644 }
2645 }
2646
2647 void CFCharacterSetInvert(CFMutableCharacterSetRef theSet) {
2648
2649 CF_OBJC_FUNCDISPATCH0(__kCFCharacterSetTypeID, void, theSet, "invert");
2650
2651 __CFCSetValidateTypeAndMutability(theSet, __PRETTY_FUNCTION__);
2652
2653 __CFCSetPutHasHashValue(theSet, false);
2654
2655 if (__CFCSetClassType(theSet) == __kCFCharSetClassBitmap) {
2656 CFIndex idx;
2657 CFIndex count = __kCFBitmapSize / sizeof(UInt32);
2658 UInt32 *bitmap = (UInt32*) __CFCSetBitmapBits(theSet);
2659
2660 if (NULL == bitmap) {
2661 bitmap = (UInt32 *)CFAllocatorAllocate(CFGetAllocator(theSet), __kCFBitmapSize, 0);
2662 __CFCSetPutBitmapBits(theSet, (uint8_t *)bitmap);
2663 for (idx = 0;idx < count;idx++) bitmap[idx] = ((UInt32)0xFFFFFFFF);
2664 } else {
2665 for (idx = 0;idx < count;idx++) bitmap[idx] = ~(bitmap[idx]);
2666 }
2667 __CFCSetAllocateAnnexForPlane(theSet, 0); // We need to alloc annex to invert
2668 } else if (__CFCSetClassType(theSet) == __kCFCharSetClassCompactBitmap) {
2669 uint8_t *bitmap = __CFCSetCompactBitmapBits(theSet);
2670 int idx;
2671 int length = 0;
2672 uint8_t value;
2673
2674 for (idx = 0;idx < __kCFCompactBitmapNumPages;idx++) {
2675 value = bitmap[idx];
2676
2677 if (value == 0) {
2678 bitmap[idx] = UINT8_MAX;
2679 } else if (value == UINT8_MAX) {
2680 bitmap[idx] = 0;
2681 } else {
2682 length += __kCFCompactBitmapPageSize;
2683 }
2684 }
2685 bitmap += __kCFCompactBitmapNumPages;
2686 for (idx = 0;idx < length;idx++) bitmap[idx] = ~(bitmap[idx]);
2687 __CFCSetAllocateAnnexForPlane(theSet, 0); // We need to alloc annex to invert
2688 } else {
2689 __CFCSetPutIsInverted(theSet, !__CFCSetIsInverted(theSet));
2690 }
2691 __CFCSetAnnexSetIsInverted(theSet, !__CFCSetAnnexIsInverted(theSet));
2692 }
2693
2694 void CFCharacterSetCompact(CFMutableCharacterSetRef theSet) {
2695 if (__CFCSetIsBitmap(theSet) && __CFCSetBitmapBits(theSet)) __CFCSetMakeCompact(theSet);
2696 if (__CFCSetHasNonBMPPlane(theSet)) {
2697 CFMutableCharacterSetRef annex;
2698 int idx;
2699
2700 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
2701 if ((annex = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, idx)) && __CFCSetIsBitmap(annex) && __CFCSetBitmapBits(annex)) {
2702 __CFCSetMakeCompact(annex);
2703 }
2704 }
2705 }
2706 }
2707
2708 void CFCharacterSetFast(CFMutableCharacterSetRef theSet) {
2709 if (__CFCSetIsCompactBitmap(theSet) && __CFCSetCompactBitmapBits(theSet)) __CFCSetMakeBitmap(theSet);
2710 if (__CFCSetHasNonBMPPlane(theSet)) {
2711 CFMutableCharacterSetRef annex;
2712 int idx;
2713
2714 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
2715 if ((annex = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, idx)) && __CFCSetIsCompactBitmap(annex) && __CFCSetCompactBitmapBits(annex)) {
2716 __CFCSetMakeBitmap(annex);
2717 }
2718 }
2719 }
2720 }
2721
2722 /* Keyed-coding support
2723 */
2724 CFCharacterSetKeyedCodingType _CFCharacterSetGetKeyedCodingType(CFCharacterSetRef cset) {
2725 if (CF_IS_OBJC(__kCFCharacterSetTypeID, cset)) return kCFCharacterSetKeyedCodingTypeBitmap;
2726
2727 switch (__CFCSetClassType(cset)) {
2728 case __kCFCharSetClassBuiltin: return ((__CFCSetBuiltinType(cset) < kCFCharacterSetSymbol) ? kCFCharacterSetKeyedCodingTypeBuiltin : kCFCharacterSetKeyedCodingTypeBuiltinAndBitmap);
2729 case __kCFCharSetClassRange: return kCFCharacterSetKeyedCodingTypeRange;
2730
2731 case __kCFCharSetClassString: // We have to check if we have non-BMP here
2732 if (!__CFCSetHasNonBMPPlane(cset) && !__CFCSetAnnexIsInverted(cset)) return kCFCharacterSetKeyedCodingTypeString; // BMP only. we can archive the string
2733 /* fallthrough */
2734
2735 default:
2736 return kCFCharacterSetKeyedCodingTypeBitmap;
2737 }
2738 }
2739
2740 CFCharacterSetPredefinedSet _CFCharacterSetGetKeyedCodingBuiltinType(CFCharacterSetRef cset) { return __CFCSetBuiltinType(cset); }
2741 CFRange _CFCharacterSetGetKeyedCodingRange(CFCharacterSetRef cset) { return CFRangeMake(__CFCSetRangeFirstChar(cset), __CFCSetRangeLength(cset)); }
2742 CFStringRef _CFCharacterSetCreateKeyedCodingString(CFCharacterSetRef cset) { return CFStringCreateWithCharacters(kCFAllocatorSystemDefault, __CFCSetStringBuffer(cset), __CFCSetStringLength(cset)); }
2743
2744 bool _CFCharacterSetIsInverted(CFCharacterSetRef cset) { return (__CFCSetIsInverted(cset) != 0); }
2745 void _CFCharacterSetSetIsInverted(CFCharacterSetRef cset, bool flag) { __CFCSetPutIsInverted((CFMutableCharacterSetRef)cset, flag); }
2746
2747 /* Inline buffer support
2748 */
2749 void CFCharacterSetInitInlineBuffer(CFCharacterSetRef cset, CFCharacterSetInlineBuffer *buffer) {
2750 memset(buffer, 0, sizeof(CFCharacterSetInlineBuffer));
2751 buffer->cset = cset;
2752 buffer->rangeLimit = 0x10000;
2753
2754 if (CF_IS_OBJC(__kCFCharacterSetTypeID, cset)) {
2755 CFCharacterSetRef expandedSet = __CFCharacterSetGetExpandedSetForNSCharacterSet(cset);
2756
2757 if (NULL == expandedSet) {
2758 buffer->flags = kCFCharacterSetNoBitmapAvailable;
2759 buffer->rangeLimit = 0x110000;
2760
2761 return;
2762 } else {
2763 cset = expandedSet;
2764 }
2765 }
2766
2767 switch (__CFCSetClassType(cset)) {
2768 case __kCFCharSetClassBuiltin:
2769 buffer->bitmap = CFUniCharGetBitmapPtrForPlane(__CFCSetBuiltinType(cset), 0);
2770 buffer->rangeLimit = 0x110000;
2771 if (NULL == buffer->bitmap) {
2772 buffer->flags = kCFCharacterSetNoBitmapAvailable;
2773 } else {
2774 if (__CFCSetIsInverted(cset)) buffer->flags = kCFCharacterSetIsInverted;
2775 }
2776 break;
2777
2778 case __kCFCharSetClassRange:
2779 buffer->rangeStart = __CFCSetRangeFirstChar(cset);
2780 buffer->rangeLimit = __CFCSetRangeFirstChar(cset) + __CFCSetRangeLength(cset);
2781 if (__CFCSetIsInverted(cset)) buffer->flags = kCFCharacterSetIsInverted;
2782 return;
2783
2784 case __kCFCharSetClassString:
2785 buffer->flags = kCFCharacterSetNoBitmapAvailable;
2786 if (__CFCSetStringLength(cset) > 0) {
2787 buffer->rangeStart = *__CFCSetStringBuffer(cset);
2788 buffer->rangeLimit = *(__CFCSetStringBuffer(cset) + __CFCSetStringLength(cset) - 1) + 1;
2789
2790 if (__CFCSetIsInverted(cset)) {
2791 if (0 == buffer->rangeStart) {
2792 buffer->rangeStart = buffer->rangeLimit;
2793 buffer->rangeLimit = 0x10000;
2794 } else if (0x10000 == buffer->rangeLimit) {
2795 buffer->rangeLimit = buffer->rangeStart;
2796 buffer->rangeStart = 0;
2797 } else {
2798 buffer->rangeStart = 0;
2799 buffer->rangeLimit = 0x10000;
2800 }
2801 }
2802 }
2803 break;
2804
2805 case __kCFCharSetClassBitmap:
2806 case __kCFCharSetClassCompactBitmap:
2807 buffer->bitmap = __CFCSetCompactBitmapBits(cset);
2808 if (NULL == buffer->bitmap) {
2809 buffer->flags = kCFCharacterSetIsCompactBitmap;
2810 if (__CFCSetIsInverted(cset)) buffer->flags |= kCFCharacterSetIsInverted;
2811 } else {
2812 if (__kCFCharSetClassCompactBitmap == __CFCSetClassType(cset)) buffer->flags = kCFCharacterSetIsCompactBitmap;
2813 }
2814 break;
2815
2816 default:
2817 CFAssert1(0, __kCFLogAssertion, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__); // We should never come here
2818 return;
2819 }
2820
2821 if (__CFCSetAnnexIsInverted(cset)) {
2822 buffer->rangeLimit = 0x110000;
2823 } else if (__CFCSetHasNonBMPPlane(cset)) {
2824 CFIndex index;
2825
2826 for (index = MAX_ANNEX_PLANE;index > 0;index--) {
2827 if (NULL != __CFCSetGetAnnexPlaneCharacterSetNoAlloc(cset, index)) {
2828 buffer->rangeLimit = (index + 1) << 16;
2829 break;
2830 }
2831 }
2832 }
2833 }