]> git.saurik.com Git - apple/cf.git/blob - CFCharacterSet.c
CF-635.tar.gz
[apple/cf.git] / CFCharacterSet.c
1 /*
2 * Copyright (c) 2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /* CFCharacterSet.c
25 Copyright (c) 1999-2011, Apple Inc. All rights reserved.
26 Responsibility: Aki Inoue
27 */
28
29 #include <CoreFoundation/CFCharacterSet.h>
30 #include <CoreFoundation/CFByteOrder.h>
31 #include "CFCharacterSetPriv.h"
32 #include <CoreFoundation/CFData.h>
33 #include <CoreFoundation/CFString.h>
34 #include "CFInternal.h"
35 #include <CoreFoundation/CFUniChar.h>
36 #include "CFUniCharPriv.h"
37 #include <stdlib.h>
38 #include <string.h>
39
40
41 #define BITSPERBYTE 8 /* (CHAR_BIT * sizeof(unsigned char)) */
42 #define LOG_BPB 3
43 #define LOG_BPLW 5
44 #define NUMCHARACTERS 65536
45
46 #define MAX_ANNEX_PLANE (16)
47
48 /* Number of things in the array keeping the bits.
49 */
50 #define __kCFBitmapSize (NUMCHARACTERS / BITSPERBYTE)
51
52 /* How many elements max can be in an __kCFCharSetClassString CFCharacterSet
53 */
54 #define __kCFStringCharSetMax 64
55
56 /* The last builtin set ID number
57 */
58 #define __kCFLastBuiltinSetID kCFCharacterSetNewline
59
60 /* How many elements in the "singles" array before we use binary search.
61 */
62 #define __kCFSetBreakeven 10
63
64 /* This tells us, within 1k or so, whether a thing is POTENTIALLY in the set (in the bitmap blob of the private structure) before we bother to do specific checking.
65 */
66 #define __CFCSetBitsInRange(n, i) (i[n>>15] & (1L << ((n>>10) % 32)))
67
68 /* Compact bitmap params
69 */
70 #define __kCFCompactBitmapNumPages (256)
71
72 #define __kCFCompactBitmapMaxPages (128) // the max pages allocated
73
74 #define __kCFCompactBitmapPageSize (__kCFBitmapSize / __kCFCompactBitmapNumPages)
75
76 typedef struct {
77 CFCharacterSetRef *_nonBMPPlanes;
78 unsigned int _validEntriesBitmap;
79 unsigned char _numOfAllocEntries;
80 unsigned char _isAnnexInverted;
81 uint16_t _padding;
82 } CFCharSetAnnexStruct;
83
84 struct __CFCharacterSet {
85 CFRuntimeBase _base;
86 CFHashCode _hashValue;
87 union {
88 struct {
89 CFIndex _type;
90 } _builtin;
91 struct {
92 UInt32 _firstChar;
93 CFIndex _length;
94 } _range;
95 struct {
96 UniChar *_buffer;
97 CFIndex _length;
98 } _string;
99 struct {
100 uint8_t *_bits;
101 } _bitmap;
102 struct {
103 uint8_t *_cBits;
104 } _compactBitmap;
105 } _variants;
106 CFCharSetAnnexStruct *_annex;
107 };
108
109 /* _base._info values interesting for CFCharacterSet
110 */
111 enum {
112 __kCFCharSetClassTypeMask = 0x0070,
113 __kCFCharSetClassBuiltin = 0x0000,
114 __kCFCharSetClassRange = 0x0010,
115 __kCFCharSetClassString = 0x0020,
116 __kCFCharSetClassBitmap = 0x0030,
117 __kCFCharSetClassSet = 0x0040,
118 __kCFCharSetClassCompactBitmap = 0x0040,
119
120 __kCFCharSetIsInvertedMask = 0x0008,
121 __kCFCharSetIsInverted = 0x0008,
122
123 __kCFCharSetHasHashValueMask = 0x00004,
124 __kCFCharSetHasHashValue = 0x0004,
125
126 /* Generic CFBase values */
127 __kCFCharSetIsMutableMask = 0x0001,
128 __kCFCharSetIsMutable = 0x0001,
129 };
130
131 /* Inline accessor macros for _base._info
132 */
133 CF_INLINE Boolean __CFCSetIsMutable(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetIsMutableMask) == __kCFCharSetIsMutable;}
134 CF_INLINE Boolean __CFCSetIsBuiltin(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetClassTypeMask) == __kCFCharSetClassBuiltin;}
135 CF_INLINE Boolean __CFCSetIsRange(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetClassTypeMask) == __kCFCharSetClassRange;}
136 CF_INLINE Boolean __CFCSetIsString(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetClassTypeMask) == __kCFCharSetClassString;}
137 CF_INLINE Boolean __CFCSetIsBitmap(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetClassTypeMask) == __kCFCharSetClassBitmap;}
138 CF_INLINE Boolean __CFCSetIsCompactBitmap(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetClassTypeMask) == __kCFCharSetClassCompactBitmap;}
139 CF_INLINE Boolean __CFCSetIsInverted(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetIsInvertedMask) == __kCFCharSetIsInverted;}
140 CF_INLINE Boolean __CFCSetHasHashValue(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetHasHashValueMask) == __kCFCharSetHasHashValue;}
141 CF_INLINE UInt32 __CFCSetClassType(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetClassTypeMask);}
142
143 CF_INLINE void __CFCSetPutIsMutable(CFMutableCharacterSetRef cset, Boolean isMutable) {(isMutable ? (cset->_base._cfinfo[CF_INFO_BITS] |= __kCFCharSetIsMutable) : (cset->_base._cfinfo[CF_INFO_BITS] &= ~ __kCFCharSetIsMutable));}
144 CF_INLINE void __CFCSetPutIsInverted(CFMutableCharacterSetRef cset, Boolean isInverted) {(isInverted ? (cset->_base._cfinfo[CF_INFO_BITS] |= __kCFCharSetIsInverted) : (cset->_base._cfinfo[CF_INFO_BITS] &= ~__kCFCharSetIsInverted));}
145 CF_INLINE void __CFCSetPutHasHashValue(CFMutableCharacterSetRef cset, Boolean hasHash) {(hasHash ? (cset->_base._cfinfo[CF_INFO_BITS] |= __kCFCharSetHasHashValue) : (cset->_base._cfinfo[CF_INFO_BITS] &= ~__kCFCharSetHasHashValue));}
146 CF_INLINE void __CFCSetPutClassType(CFMutableCharacterSetRef cset, UInt32 classType) {cset->_base._cfinfo[CF_INFO_BITS] &= ~__kCFCharSetClassTypeMask; cset->_base._cfinfo[CF_INFO_BITS] |= classType;}
147
148 __private_extern__ Boolean __CFCharacterSetIsMutable(CFCharacterSetRef cset) {return __CFCSetIsMutable(cset);}
149
150 /* Inline contents accessor macros
151 */
152 CF_INLINE CFCharacterSetPredefinedSet __CFCSetBuiltinType(CFCharacterSetRef cset) {return cset->_variants._builtin._type;}
153 CF_INLINE UInt32 __CFCSetRangeFirstChar(CFCharacterSetRef cset) {return cset->_variants._range._firstChar;}
154 CF_INLINE CFIndex __CFCSetRangeLength(CFCharacterSetRef cset) {return cset->_variants._range._length;}
155 CF_INLINE UniChar *__CFCSetStringBuffer(CFCharacterSetRef cset) {return (UniChar*)(cset->_variants._string._buffer);}
156 CF_INLINE CFIndex __CFCSetStringLength(CFCharacterSetRef cset) {return cset->_variants._string._length;}
157 CF_INLINE uint8_t *__CFCSetBitmapBits(CFCharacterSetRef cset) {return cset->_variants._bitmap._bits;}
158 CF_INLINE uint8_t *__CFCSetCompactBitmapBits(CFCharacterSetRef cset) {return cset->_variants._compactBitmap._cBits;}
159
160 CF_INLINE void __CFCSetPutBuiltinType(CFMutableCharacterSetRef cset, CFCharacterSetPredefinedSet type) {cset->_variants._builtin._type = type;}
161 CF_INLINE void __CFCSetPutRangeFirstChar(CFMutableCharacterSetRef cset, UInt32 first) {cset->_variants._range._firstChar = first;}
162 CF_INLINE void __CFCSetPutRangeLength(CFMutableCharacterSetRef cset, CFIndex length) {cset->_variants._range._length = length;}
163 CF_INLINE void __CFCSetPutStringBuffer(CFMutableCharacterSetRef cset, UniChar *theBuffer) {cset->_variants._string._buffer = theBuffer;}
164 CF_INLINE void __CFCSetPutStringLength(CFMutableCharacterSetRef cset, CFIndex length) {cset->_variants._string._length = length;}
165 CF_INLINE void __CFCSetPutBitmapBits(CFMutableCharacterSetRef cset, uint8_t *bits) {cset->_variants._bitmap._bits = bits;}
166 CF_INLINE void __CFCSetPutCompactBitmapBits(CFMutableCharacterSetRef cset, uint8_t *bits) {cset->_variants._compactBitmap._cBits = bits;}
167
168 /* Validation funcs
169 */
170 #if defined(CF_ENABLE_ASSERTIONS)
171 CF_INLINE void __CFCSetValidateBuiltinType(CFCharacterSetPredefinedSet type, const char *func) {
172 CFAssert2(type > 0 && type <= __kCFLastBuiltinSetID, __kCFLogAssertion, "%s: Unknowen builtin type %d", func, type);
173 }
174 CF_INLINE void __CFCSetValidateRange(CFRange theRange, const char *func) {
175 CFAssert3(theRange.location >= 0 && theRange.location + theRange.length <= 0x1FFFFF, __kCFLogAssertion, "%s: Range out of Unicode range (location -> %d length -> %d)", func, theRange.location, theRange.length);
176 }
177 CF_INLINE void __CFCSetValidateTypeAndMutability(CFCharacterSetRef cset, const char *func) {
178 __CFGenericValidateType(cset, __kCFCharacterSetTypeID);
179 CFAssert1(__CFCSetIsMutable(cset), __kCFLogAssertion, "%s: Immutable character set passed to mutable function", func);
180 }
181 #else
182 #define __CFCSetValidateBuiltinType(t,f)
183 #define __CFCSetValidateRange(r,f)
184 #define __CFCSetValidateTypeAndMutability(r,f)
185 #endif
186
187 /* Inline utility funcs
188 */
189 static Boolean __CFCSetIsEqualBitmap(const UInt32 *bits1, const UInt32 *bits2) {
190 CFIndex length = __kCFBitmapSize / sizeof(UInt32);
191
192 if (bits1 == bits2) {
193 return true;
194 } else if (bits1 && bits2) {
195 if (bits1 == (const UInt32 *)-1) {
196 while (length--) if ((UInt32)-1 != *bits2++) return false;
197 } else if (bits2 == (const UInt32 *)-1) {
198 while (length--) if ((UInt32)-1 != *bits1++) return false;
199 } else {
200 while (length--) if (*bits1++ != *bits2++) return false;
201 }
202 return true;
203 } else if (!bits1 && !bits2) { // empty set
204 return true;
205 } else {
206 if (bits2) bits1 = bits2;
207 if (bits1 == (const UInt32 *)-1) return false;
208 while (length--) if (*bits1++) return false;
209 return true;
210 }
211 }
212
213 CF_INLINE Boolean __CFCSetIsEqualBitmapInverted(const UInt32 *bits1, const UInt32 *bits2) {
214 CFIndex length = __kCFBitmapSize / sizeof(UInt32);
215
216 while (length--) if (*bits1++ != ~(*(bits2++))) return false;
217 return true;
218 }
219
220 static Boolean __CFCSetIsBitmapEqualToRange(const UInt32 *bits, UniChar firstChar, UniChar lastChar, Boolean isInverted) {
221 CFIndex firstCharIndex = firstChar >> LOG_BPB;
222 CFIndex lastCharIndex = lastChar >> LOG_BPB;
223 CFIndex length;
224 UInt32 value;
225
226 if (firstCharIndex == lastCharIndex) {
227 value = ((((UInt32)0xFF) << (firstChar & (BITSPERBYTE - 1))) & (((UInt32)0xFF) >> ((BITSPERBYTE - 1) - (lastChar & (BITSPERBYTE - 1))))) << (((sizeof(UInt32) - 1) - (firstCharIndex % sizeof(UInt32))) * BITSPERBYTE);
228 value = CFSwapInt32HostToBig(value);
229 firstCharIndex = lastCharIndex = firstChar >> LOG_BPLW;
230 if (*(bits + firstCharIndex) != (isInverted ? ~value : value)) return FALSE;
231 } else {
232 UInt32 firstCharMask;
233 UInt32 lastCharMask;
234
235 length = firstCharIndex % sizeof(UInt32);
236 firstCharMask = (((((UInt32)0xFF) << (firstChar & (BITSPERBYTE - 1))) & 0xFF) << (((sizeof(UInt32) - 1) - length) * BITSPERBYTE)) | (((UInt32)0xFFFFFFFF) >> ((length + 1) * BITSPERBYTE));
237
238 length = lastCharIndex % sizeof(UInt32);
239 lastCharMask = ((((UInt32)0xFF) >> ((BITSPERBYTE - 1) - (lastChar & (BITSPERBYTE - 1)))) << (((sizeof(UInt32) - 1) - length) * BITSPERBYTE)) | (((UInt32)0xFFFFFFFF) << ((sizeof(UInt32) - length) * BITSPERBYTE));
240
241 firstCharIndex = firstChar >> LOG_BPLW;
242 lastCharIndex = lastChar >> LOG_BPLW;
243
244 if (firstCharIndex == lastCharIndex) {
245 firstCharMask &= lastCharMask;
246 value = CFSwapInt32HostToBig(firstCharMask & lastCharMask);
247 if (*(bits + firstCharIndex) != (isInverted ? ~value : value)) return FALSE;
248 } else {
249 value = CFSwapInt32HostToBig(firstCharMask);
250 if (*(bits + firstCharIndex) != (isInverted ? ~value : value)) return FALSE;
251
252 value = CFSwapInt32HostToBig(lastCharMask);
253 if (*(bits + lastCharIndex) != (isInverted ? ~value : value)) return FALSE;
254 }
255 }
256
257 length = firstCharIndex;
258 value = (isInverted ? ((UInt32)0xFFFFFFFF) : 0);
259 while (length--) {
260 if (*(bits++) != value) return FALSE;
261 }
262
263 ++bits; // Skip firstCharIndex
264 length = (lastCharIndex - (firstCharIndex + 1));
265 value = (isInverted ? 0 : ((UInt32)0xFFFFFFFF));
266 while (length-- > 0) {
267 if (*(bits++) != value) return FALSE;
268 }
269 if (firstCharIndex != lastCharIndex) ++bits;
270
271 length = (0xFFFF >> LOG_BPLW) - lastCharIndex;
272 value = (isInverted ? ((UInt32)0xFFFFFFFF) : 0);
273 while (length--) {
274 if (*(bits++) != value) return FALSE;
275 }
276
277 return TRUE;
278 }
279
280 CF_INLINE Boolean __CFCSetIsBitmapSupersetOfBitmap(const UInt32 *bits1, const UInt32 *bits2, Boolean isInverted1, Boolean isInverted2) {
281 CFIndex length = __kCFBitmapSize / sizeof(UInt32);
282 UInt32 val1, val2;
283
284 while (length--) {
285 val2 = (isInverted2 ? ~(*(bits2++)) : *(bits2++));
286 val1 = (isInverted1 ? ~(*(bits1++)) : *(bits1++)) & val2;
287 if (val1 != val2) return false;
288 }
289
290 return true;
291 }
292
293 CF_INLINE Boolean __CFCSetHasNonBMPPlane(CFCharacterSetRef cset) { return ((cset)->_annex && (cset)->_annex->_validEntriesBitmap ? true : false); }
294 CF_INLINE Boolean __CFCSetAnnexIsInverted (CFCharacterSetRef cset) { return ((cset)->_annex && (cset)->_annex->_isAnnexInverted ? true : false); }
295 CF_INLINE UInt32 __CFCSetAnnexValidEntriesBitmap(CFCharacterSetRef cset) { return ((cset)->_annex ? (cset)->_annex->_validEntriesBitmap : 0); }
296
297 CF_INLINE Boolean __CFCSetIsEmpty(CFCharacterSetRef cset) {
298 if (__CFCSetHasNonBMPPlane(cset) || __CFCSetAnnexIsInverted(cset)) return false;
299
300 switch (__CFCSetClassType(cset)) {
301 case __kCFCharSetClassRange: if (!__CFCSetRangeLength(cset)) return true; break;
302 case __kCFCharSetClassString: if (!__CFCSetStringLength(cset)) return true; break;
303 case __kCFCharSetClassBitmap: if (!__CFCSetBitmapBits(cset)) return true; break;
304 case __kCFCharSetClassCompactBitmap: if (!__CFCSetCompactBitmapBits(cset)) return true; break;
305 }
306 return false;
307 }
308
309 CF_INLINE void __CFCSetBitmapAddCharacter(uint8_t *bitmap, UniChar theChar) {
310 bitmap[(theChar) >> LOG_BPB] |= (((unsigned)1) << (theChar & (BITSPERBYTE - 1)));
311 }
312
313 CF_INLINE void __CFCSetBitmapRemoveCharacter(uint8_t *bitmap, UniChar theChar) {
314 bitmap[(theChar) >> LOG_BPB] &= ~(((unsigned)1) << (theChar & (BITSPERBYTE - 1)));
315 }
316
317 CF_INLINE Boolean __CFCSetIsMemberBitmap(const uint8_t *bitmap, UniChar theChar) {
318 return ((bitmap[(theChar) >> LOG_BPB] & (((unsigned)1) << (theChar & (BITSPERBYTE - 1)))) ? true : false);
319 }
320
321 #define NUM_32BIT_SLOTS (NUMCHARACTERS / 32)
322
323 CF_INLINE void __CFCSetBitmapFastFillWithValue(UInt32 *bitmap, uint8_t value) {
324 UInt32 mask = (value << 24) | (value << 16) | (value << 8) | value;
325 UInt32 numSlots = NUMCHARACTERS / 32;
326
327 while (numSlots--) *(bitmap++) = mask;
328 }
329
330 CF_INLINE void __CFCSetBitmapAddCharactersInRange(uint8_t *bitmap, UniChar firstChar, UniChar lastChar) {
331 if (firstChar == lastChar) {
332 bitmap[firstChar >> LOG_BPB] |= (((unsigned)1) << (firstChar & (BITSPERBYTE - 1)));
333 } else {
334 UInt32 idx = firstChar >> LOG_BPB;
335 UInt32 max = lastChar >> LOG_BPB;
336
337 if (idx == max) {
338 bitmap[idx] |= (((unsigned)0xFF) << (firstChar & (BITSPERBYTE - 1))) & (((unsigned)0xFF) >> ((BITSPERBYTE - 1) - (lastChar & (BITSPERBYTE - 1))));
339 } else {
340 bitmap[idx] |= (((unsigned)0xFF) << (firstChar & (BITSPERBYTE - 1)));
341 bitmap[max] |= (((unsigned)0xFF) >> ((BITSPERBYTE - 1) - (lastChar & (BITSPERBYTE - 1))));
342
343 ++idx;
344 while (idx < max) bitmap[idx++] = 0xFF;
345 }
346 }
347 }
348
349 CF_INLINE void __CFCSetBitmapRemoveCharactersInRange(uint8_t *bitmap, UniChar firstChar, UniChar lastChar) {
350 UInt32 idx = firstChar >> LOG_BPB;
351 UInt32 max = lastChar >> LOG_BPB;
352
353 if (idx == max) {
354 bitmap[idx] &= ~((((unsigned)0xFF) << (firstChar & (BITSPERBYTE - 1))) & (((unsigned)0xFF) >> ((BITSPERBYTE - 1) - (lastChar & (BITSPERBYTE - 1)))));
355 } else {
356 bitmap[idx] &= ~(((unsigned)0xFF) << (firstChar & (BITSPERBYTE - 1)));
357 bitmap[max] &= ~(((unsigned)0xFF) >> ((BITSPERBYTE - 1) - (lastChar & (BITSPERBYTE - 1))));
358
359 ++idx;
360 while (idx < max) bitmap[idx++] = 0;
361 }
362 }
363
364 #define __CFCSetAnnexBitmapSetPlane(bitmap,plane) ((bitmap) |= (1 << (plane)))
365 #define __CFCSetAnnexBitmapClearPlane(bitmap,plane) ((bitmap) &= (~(1 << (plane))))
366 #define __CFCSetAnnexBitmapGetPlane(bitmap,plane) ((bitmap) & (1 << (plane)))
367
368 CF_INLINE void __CFCSetAllocateAnnexForPlane(CFCharacterSetRef cset, int plane) {
369 if (cset->_annex == NULL) {
370 ((CFMutableCharacterSetRef)cset)->_annex = (CFCharSetAnnexStruct *)CFAllocatorAllocate(CFGetAllocator(cset), sizeof(CFCharSetAnnexStruct), 0);
371 cset->_annex->_numOfAllocEntries = plane;
372 cset->_annex->_isAnnexInverted = false;
373 cset->_annex->_validEntriesBitmap = 0;
374 cset->_annex->_nonBMPPlanes = ((plane > 0) ? (CFCharacterSetRef*)CFAllocatorAllocate(CFGetAllocator(cset), sizeof(CFCharacterSetRef) * plane, 0) : NULL);
375 } else if (cset->_annex->_numOfAllocEntries < plane) {
376 cset->_annex->_numOfAllocEntries = plane;
377 if (NULL == cset->_annex->_nonBMPPlanes) {
378 cset->_annex->_nonBMPPlanes = (CFCharacterSetRef*)CFAllocatorAllocate(CFGetAllocator(cset), sizeof(CFCharacterSetRef) * plane, 0);
379 } else {
380 cset->_annex->_nonBMPPlanes = (CFCharacterSetRef*)CFAllocatorReallocate(CFGetAllocator(cset), (void *)cset->_annex->_nonBMPPlanes, sizeof(CFCharacterSetRef) * plane, 0);
381 }
382 }
383 }
384
385 CF_INLINE void __CFCSetAnnexSetIsInverted(CFCharacterSetRef cset, Boolean flag) {
386 if (flag) __CFCSetAllocateAnnexForPlane(cset, 0);
387 if (cset->_annex) ((CFMutableCharacterSetRef)cset)->_annex->_isAnnexInverted = flag;
388 }
389
390 CF_INLINE void __CFCSetPutCharacterSetToAnnexPlane(CFCharacterSetRef cset, CFCharacterSetRef annexCSet, int plane) {
391 __CFCSetAllocateAnnexForPlane(cset, plane);
392 if (__CFCSetAnnexBitmapGetPlane(cset->_annex->_validEntriesBitmap, plane)) CFRelease(cset->_annex->_nonBMPPlanes[plane - 1]);
393 if (annexCSet) {
394 cset->_annex->_nonBMPPlanes[plane - 1] = (CFCharacterSetRef)CFRetain(annexCSet);
395 __CFCSetAnnexBitmapSetPlane(cset->_annex->_validEntriesBitmap, plane);
396 } else {
397 __CFCSetAnnexBitmapClearPlane(cset->_annex->_validEntriesBitmap, plane);
398 }
399 }
400
401 CF_INLINE CFCharacterSetRef __CFCSetGetAnnexPlaneCharacterSet(CFCharacterSetRef cset, int plane) {
402 __CFCSetAllocateAnnexForPlane(cset, plane);
403 if (!__CFCSetAnnexBitmapGetPlane(cset->_annex->_validEntriesBitmap, plane)) {
404 cset->_annex->_nonBMPPlanes[plane - 1] = (CFCharacterSetRef)CFCharacterSetCreateMutable(CFGetAllocator(cset));
405 __CFCSetAnnexBitmapSetPlane(cset->_annex->_validEntriesBitmap, plane);
406 }
407 return cset->_annex->_nonBMPPlanes[plane - 1];
408 }
409
410 CF_INLINE CFCharacterSetRef __CFCSetGetAnnexPlaneCharacterSetNoAlloc(CFCharacterSetRef cset, int plane) {
411 return (cset->_annex && __CFCSetAnnexBitmapGetPlane(cset->_annex->_validEntriesBitmap, plane) ? cset->_annex->_nonBMPPlanes[plane - 1] : NULL);
412 }
413
414 CF_INLINE void __CFCSetDeallocateAnnexPlane(CFCharacterSetRef cset) {
415 if (cset->_annex) {
416 int idx;
417
418 for (idx = 0;idx < MAX_ANNEX_PLANE;idx++) {
419 if (__CFCSetAnnexBitmapGetPlane(cset->_annex->_validEntriesBitmap, idx + 1)) {
420 CFRelease(cset->_annex->_nonBMPPlanes[idx]);
421 }
422 }
423 CFAllocatorDeallocate(CFGetAllocator(cset), cset->_annex->_nonBMPPlanes);
424 CFAllocatorDeallocate(CFGetAllocator(cset), cset->_annex);
425 ((CFMutableCharacterSetRef)cset)->_annex = NULL;
426 }
427 }
428
429 CF_INLINE uint8_t __CFCSetGetHeaderValue(const uint8_t *bitmap, int *numPages) {
430 uint8_t value = *bitmap;
431
432 if ((value == 0) || (value == UINT8_MAX)) {
433 int numBytes = __kCFCompactBitmapPageSize - 1;
434
435 while (numBytes > 0) {
436 if (*(++bitmap) != value) break;
437 --numBytes;
438 }
439 if (numBytes == 0) return value;
440 }
441 return (uint8_t)(++(*numPages));
442 }
443
444 CF_INLINE bool __CFCSetIsMemberInCompactBitmap(const uint8_t *compactBitmap, UTF16Char character) {
445 uint8_t value = compactBitmap[(character >> 8)]; // Assuming __kCFCompactBitmapNumPages == 256
446
447 if (value == 0) {
448 return false;
449 } else if (value == UINT8_MAX) {
450 return true;
451 } else {
452 compactBitmap += (__kCFCompactBitmapNumPages + (__kCFCompactBitmapPageSize * (value - 1)));
453 character &= 0xFF; // Assuming __kCFCompactBitmapNumPages == 256
454 return ((compactBitmap[(character / BITSPERBYTE)] & (1 << (character % BITSPERBYTE))) ? true : false);
455 }
456 }
457
458 CF_INLINE uint32_t __CFCSetGetCompactBitmapSize(const uint8_t *compactBitmap) {
459 uint32_t length = __kCFCompactBitmapNumPages;
460 uint32_t size = __kCFCompactBitmapNumPages;
461 uint8_t value;
462
463 while (length-- > 0) {
464 value = *(compactBitmap++);
465 if ((value != 0) && (value != UINT8_MAX)) size += __kCFCompactBitmapPageSize;
466 }
467 return size;
468 }
469
470 /* Take a private "set" structure and make a bitmap from it. Return the bitmap. THE CALLER MUST RELEASE THE RETURNED MEMORY as necessary.
471 */
472
473 CF_INLINE void __CFCSetBitmapProcessManyCharacters(unsigned char *map, unsigned n, unsigned m, Boolean isInverted) {
474 if (isInverted) {
475 __CFCSetBitmapRemoveCharactersInRange(map, n, m);
476 } else {
477 __CFCSetBitmapAddCharactersInRange(map, n, m);
478 }
479 }
480
481 CF_INLINE void __CFExpandCompactBitmap(const uint8_t *src, uint8_t *dst) {
482 const uint8_t *srcBody = src + __kCFCompactBitmapNumPages;
483 int i;
484 uint8_t value;
485
486 for (i = 0;i < __kCFCompactBitmapNumPages;i++) {
487 value = *(src++);
488 if ((value == 0) || (value == UINT8_MAX)) {
489 memset(dst, value, __kCFCompactBitmapPageSize);
490 } else {
491 memmove(dst, srcBody, __kCFCompactBitmapPageSize);
492 srcBody += __kCFCompactBitmapPageSize;
493 }
494 dst += __kCFCompactBitmapPageSize;
495 }
496 }
497
498
499 static void __CFCheckForExpandedSet(CFCharacterSetRef cset) {
500 static int8_t __CFNumberOfPlanesForLogging = -1;
501 static bool warnedOnce = false;
502
503 if (0 > __CFNumberOfPlanesForLogging) {
504 const char *envVar = __CFgetenv("CFCharacterSetCheckForExpandedSet");
505 long value = (envVar ? strtol_l(envVar, NULL, 0, NULL) : 0);
506 __CFNumberOfPlanesForLogging = (int8_t)(((value > 0) && (value <= 16)) ? value : 0);
507 }
508
509 if (__CFNumberOfPlanesForLogging) {
510 uint32_t entries = __CFCSetAnnexValidEntriesBitmap(cset);
511 int count = 0;
512
513 while (entries) {
514 if ((entries & 1) && (++count >= __CFNumberOfPlanesForLogging)) {
515 if (!warnedOnce) {
516 CFLog(kCFLogLevelWarning, CFSTR("An expanded CFMutableCharacter has been detected. Recommend to compact with CFCharacterSetCreateCopy"));
517 warnedOnce = true;
518 }
519 break;
520 }
521 entries >>= 1;
522 }
523 }
524 }
525
526 static void __CFCSetGetBitmap(CFCharacterSetRef cset, uint8_t *bits) {
527 uint8_t *bitmap;
528 CFIndex length = __kCFBitmapSize;
529
530 if (__CFCSetIsBitmap(cset) && (bitmap = __CFCSetBitmapBits(cset))) {
531 memmove(bits, bitmap, __kCFBitmapSize);
532 } else {
533 Boolean isInverted = __CFCSetIsInverted(cset);
534 uint8_t value = (isInverted ? (uint8_t)-1 : 0);
535
536 bitmap = bits;
537 while (length--) *bitmap++ = value; // Initialize the buffer
538
539 if (!__CFCSetIsEmpty(cset)) {
540 switch (__CFCSetClassType(cset)) {
541 case __kCFCharSetClassBuiltin: {
542 UInt8 result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(cset), 0, bits, (isInverted != 0));
543 if (result == kCFUniCharBitmapEmpty && isInverted) {
544 length = __kCFBitmapSize;
545 bitmap = bits;
546 while (length--) *bitmap++ = 0;
547 } else if (result == kCFUniCharBitmapAll && !isInverted) {
548 length = __kCFBitmapSize;
549 bitmap = bits;
550 while (length--) *bitmap++ = (UInt8)0xFF;
551 }
552 }
553 break;
554
555 case __kCFCharSetClassRange: {
556 UInt32 theChar = __CFCSetRangeFirstChar(cset);
557 if (theChar < NUMCHARACTERS) { // the range starts in BMP
558 length = __CFCSetRangeLength(cset);
559 if (theChar + length >= NUMCHARACTERS) length = NUMCHARACTERS - theChar;
560 if (isInverted) {
561 __CFCSetBitmapRemoveCharactersInRange(bits, theChar, (UniChar)(theChar + length) - 1);
562 } else {
563 __CFCSetBitmapAddCharactersInRange(bits, theChar, (UniChar)(theChar + length) - 1);
564 }
565 }
566 }
567 break;
568
569 case __kCFCharSetClassString: {
570 const UniChar *buffer = __CFCSetStringBuffer(cset);
571 length = __CFCSetStringLength(cset);
572 while (length--) (isInverted ? __CFCSetBitmapRemoveCharacter(bits, *buffer++) : __CFCSetBitmapAddCharacter(bits, *buffer++));
573 }
574 break;
575
576 case __kCFCharSetClassCompactBitmap:
577 __CFExpandCompactBitmap(__CFCSetCompactBitmapBits(cset), bits);
578 break;
579 }
580 }
581 }
582 }
583
584 static Boolean __CFCharacterSetEqual(CFTypeRef cf1, CFTypeRef cf2);
585
586 static Boolean __CFCSetIsEqualAnnex(CFCharacterSetRef cf1, CFCharacterSetRef cf2) {
587 CFCharacterSetRef subSet1;
588 CFCharacterSetRef subSet2;
589 Boolean isAnnexInvertStateIdentical = (__CFCSetAnnexIsInverted(cf1) == __CFCSetAnnexIsInverted(cf2) ? true: false);
590 int idx;
591
592 if (isAnnexInvertStateIdentical) {
593 if (__CFCSetAnnexValidEntriesBitmap(cf1) != __CFCSetAnnexValidEntriesBitmap(cf2)) return false;
594 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
595 subSet1 = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(cf1, idx);
596 subSet2 = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(cf2, idx);
597
598 if (subSet1 && !__CFCharacterSetEqual(subSet1, subSet2)) return false;
599 }
600 } else {
601 uint8_t bitsBuf[__kCFBitmapSize];
602 uint8_t bitsBuf2[__kCFBitmapSize];
603
604 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
605 subSet1 = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(cf1, idx);
606 subSet2 = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(cf2, idx);
607
608 if (subSet1 == NULL && subSet2 == NULL) {
609 return false;
610 } else if (subSet1 == NULL) {
611 if (__CFCSetIsBitmap(subSet2)) {
612 if (!__CFCSetIsEqualBitmap((const UInt32 *)__CFCSetBitmapBits(subSet2), (const UInt32 *)-1)) {
613 return false;
614 }
615 } else {
616 __CFCSetGetBitmap(subSet2, bitsBuf);
617 if (!__CFCSetIsEqualBitmap((const UInt32 *)bitsBuf, (const UInt32 *)-1)) {
618 return false;
619 }
620 }
621 } else if (subSet2 == NULL) {
622 if (__CFCSetIsBitmap(subSet1)) {
623 if (!__CFCSetIsEqualBitmap((const UInt32 *)__CFCSetBitmapBits(subSet1), (const UInt32 *)-1)) {
624 return false;
625 }
626 } else {
627 __CFCSetGetBitmap(subSet1, bitsBuf);
628 if (!__CFCSetIsEqualBitmap((const UInt32 *)bitsBuf, (const UInt32 *)-1)) {
629 return false;
630 }
631 }
632 } else {
633 Boolean isBitmap1 = __CFCSetIsBitmap(subSet1);
634 Boolean isBitmap2 = __CFCSetIsBitmap(subSet2);
635
636 if (isBitmap1 && isBitmap2) {
637 if (!__CFCSetIsEqualBitmapInverted((const UInt32 *)__CFCSetBitmapBits(subSet1), (const UInt32 *)__CFCSetBitmapBits(subSet2))) {
638 return false;
639 }
640 } else if (!isBitmap1 && !isBitmap2) {
641 __CFCSetGetBitmap(subSet1, bitsBuf);
642 __CFCSetGetBitmap(subSet2, bitsBuf2);
643 if (!__CFCSetIsEqualBitmapInverted((const UInt32 *)bitsBuf, (const UInt32 *)bitsBuf2)) {
644 return false;
645 }
646 } else {
647 if (isBitmap2) {
648 CFCharacterSetRef tmp = subSet2;
649 subSet2 = subSet1;
650 subSet1 = tmp;
651 }
652 __CFCSetGetBitmap(subSet2, bitsBuf);
653 if (!__CFCSetIsEqualBitmapInverted((const UInt32 *)__CFCSetBitmapBits(subSet1), (const UInt32 *)bitsBuf)) {
654 return false;
655 }
656 }
657 }
658 }
659 }
660 return true;
661 }
662
663 /* Compact bitmap
664 */
665 static uint8_t *__CFCreateCompactBitmap(CFAllocatorRef allocator, const uint8_t *bitmap) {
666 const uint8_t *src;
667 uint8_t *dst;
668 int i;
669 int numPages = 0;
670 uint8_t header[__kCFCompactBitmapNumPages];
671
672 src = bitmap;
673 for (i = 0;i < __kCFCompactBitmapNumPages;i++) {
674 header[i] = __CFCSetGetHeaderValue(src, &numPages);
675
676 // Allocating more pages is probably not interesting enough to be compact
677 if (numPages > __kCFCompactBitmapMaxPages) return NULL;
678 src += __kCFCompactBitmapPageSize;
679 }
680
681 dst = (uint8_t *)CFAllocatorAllocate(allocator, __kCFCompactBitmapNumPages + (__kCFCompactBitmapPageSize * numPages), 0);
682
683 if (numPages > 0) {
684 uint8_t *dstBody = dst + __kCFCompactBitmapNumPages;
685
686 src = bitmap;
687 for (i = 0;i < __kCFCompactBitmapNumPages;i++) {
688 dst[i] = header[i];
689
690 if ((dst[i] != 0) && (dst[i] != UINT8_MAX)) {
691 memmove(dstBody, src, __kCFCompactBitmapPageSize);
692 dstBody += __kCFCompactBitmapPageSize;
693 }
694 src += __kCFCompactBitmapPageSize;
695 }
696 } else {
697 memmove(dst, header, __kCFCompactBitmapNumPages);
698 }
699
700 return dst;
701 }
702
703 static void __CFCSetMakeCompact(CFMutableCharacterSetRef cset) {
704 if (__CFCSetIsBitmap(cset) && __CFCSetBitmapBits(cset)) {
705 uint8_t *bitmap = __CFCSetBitmapBits(cset);
706 uint8_t *cBitmap = __CFCreateCompactBitmap(CFGetAllocator(cset), bitmap);
707
708 if (cBitmap) {
709 CFAllocatorDeallocate(CFGetAllocator(cset), bitmap);
710 __CFCSetPutClassType(cset, __kCFCharSetClassCompactBitmap);
711 __CFCSetPutCompactBitmapBits(cset, cBitmap);
712 }
713 }
714 }
715
716 static void __CFCSetAddNonBMPPlanesInRange(CFMutableCharacterSetRef cset, CFRange range) {
717 int firstChar = (range.location & 0xFFFF);
718 int maxChar = range.location + range.length;
719 int idx = range.location >> 16; // first plane
720 int maxPlane = (maxChar - 1) >> 16; // last plane
721 CFRange planeRange;
722 CFMutableCharacterSetRef annexPlane;
723
724 maxChar &= 0xFFFF;
725
726 for (idx = (idx ? idx : 1);idx <= maxPlane;idx++) {
727 planeRange.location = __CFMax(firstChar, 0);
728 planeRange.length = (idx == maxPlane && maxChar ? maxChar : 0x10000) - planeRange.location;
729 if (__CFCSetAnnexIsInverted(cset)) {
730 if ((annexPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(cset, idx))) {
731 CFCharacterSetRemoveCharactersInRange(annexPlane, planeRange);
732 if (__CFCSetIsEmpty(annexPlane) && !__CFCSetIsInverted(annexPlane)) {
733 CFRelease(annexPlane);
734 __CFCSetAnnexBitmapClearPlane(cset->_annex->_validEntriesBitmap, idx);
735 }
736 }
737 } else {
738 CFCharacterSetAddCharactersInRange((CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(cset, idx), planeRange);
739 }
740 }
741 if (!__CFCSetHasNonBMPPlane(cset) && !__CFCSetAnnexIsInverted(cset)) __CFCSetDeallocateAnnexPlane(cset);
742 }
743
744 static void __CFCSetRemoveNonBMPPlanesInRange(CFMutableCharacterSetRef cset, CFRange range) {
745 int firstChar = (range.location & 0xFFFF);
746 int maxChar = range.location + range.length;
747 int idx = range.location >> 16; // first plane
748 int maxPlane = (maxChar - 1) >> 16; // last plane
749 CFRange planeRange;
750 CFMutableCharacterSetRef annexPlane;
751
752 maxChar &= 0xFFFF;
753
754 for (idx = (idx ? idx : 1);idx <= maxPlane;idx++) {
755 planeRange.location = __CFMax(firstChar, 0);
756 planeRange.length = (idx == maxPlane && maxChar ? maxChar : 0x10000) - planeRange.location;
757 if (__CFCSetAnnexIsInverted(cset)) {
758 CFCharacterSetAddCharactersInRange((CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(cset, idx), planeRange);
759 } else {
760 if ((annexPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(cset, idx))) {
761 CFCharacterSetRemoveCharactersInRange(annexPlane, planeRange);
762 if(__CFCSetIsEmpty(annexPlane) && !__CFCSetIsInverted(annexPlane)) {
763 CFRelease(annexPlane);
764 __CFCSetAnnexBitmapClearPlane(cset->_annex->_validEntriesBitmap, idx);
765 }
766 }
767 }
768 }
769 if (!__CFCSetHasNonBMPPlane(cset) && !__CFCSetAnnexIsInverted(cset)) __CFCSetDeallocateAnnexPlane(cset);
770 }
771
772 static void __CFCSetMakeBitmap(CFMutableCharacterSetRef cset) {
773 if (!__CFCSetIsBitmap(cset) || !__CFCSetBitmapBits(cset)) {
774 CFAllocatorRef allocator = CFGetAllocator(cset);
775 uint8_t *bitmap = (uint8_t *)CFAllocatorAllocate(allocator, __kCFBitmapSize, 0);
776 __CFCSetGetBitmap(cset, bitmap);
777
778 if (__CFCSetIsBuiltin(cset)) {
779 CFIndex numPlanes = CFUniCharGetNumberOfPlanes(__CFCSetBuiltinType(cset));
780
781 if (numPlanes > 1) {
782 CFMutableCharacterSetRef annexSet;
783 uint8_t *annexBitmap = NULL;
784 int idx;
785 UInt8 result;
786
787 __CFCSetAllocateAnnexForPlane(cset, numPlanes - 1);
788 for (idx = 1;idx < numPlanes;idx++) {
789 if (NULL == annexBitmap) {
790 annexBitmap = (uint8_t *)CFAllocatorAllocate(allocator, __kCFBitmapSize, 0);
791 }
792 result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(cset), idx, annexBitmap, false);
793 if (result == kCFUniCharBitmapEmpty) continue;
794 if (result == kCFUniCharBitmapAll) {
795 CFIndex bitmapLength = __kCFBitmapSize;
796 uint8_t *bytes = annexBitmap;
797 while (bitmapLength-- > 0) *(bytes++) = (uint8_t)0xFF;
798 }
799 annexSet = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(cset, idx);
800 __CFCSetPutClassType(annexSet, __kCFCharSetClassBitmap);
801 __CFCSetPutBitmapBits(annexSet, annexBitmap);
802 __CFCSetPutIsInverted(annexSet, false);
803 __CFCSetPutHasHashValue(annexSet, false);
804 annexBitmap = NULL;
805 }
806 if (annexBitmap) CFAllocatorDeallocate(allocator, annexBitmap);
807 }
808 } else if (__CFCSetIsCompactBitmap(cset) && __CFCSetCompactBitmapBits(cset)) {
809 CFAllocatorDeallocate(allocator, __CFCSetCompactBitmapBits(cset));
810 __CFCSetPutCompactBitmapBits(cset, NULL);
811 } else if (__CFCSetIsString(cset) && __CFCSetStringBuffer(cset)) {
812 CFAllocatorDeallocate(allocator, __CFCSetStringBuffer(cset));
813 __CFCSetPutStringBuffer(cset, NULL);
814 } else if (__CFCSetIsRange(cset)) { // We may have to allocate annex here
815 Boolean needsToInvert = (!__CFCSetHasNonBMPPlane(cset) && __CFCSetIsInverted(cset) ? true : false);
816 __CFCSetAddNonBMPPlanesInRange(cset, CFRangeMake(__CFCSetRangeFirstChar(cset), __CFCSetRangeLength(cset)));
817 if (needsToInvert) __CFCSetAnnexSetIsInverted(cset, true);
818 }
819 __CFCSetPutClassType(cset, __kCFCharSetClassBitmap);
820 __CFCSetPutBitmapBits(cset, bitmap);
821 __CFCSetPutIsInverted(cset, false);
822 }
823 }
824
825 CF_INLINE CFMutableCharacterSetRef __CFCSetGenericCreate(CFAllocatorRef allocator, UInt32 flags) {
826 CFMutableCharacterSetRef cset;
827 CFIndex size = sizeof(struct __CFCharacterSet) - sizeof(CFRuntimeBase);
828
829 cset = (CFMutableCharacterSetRef)_CFRuntimeCreateInstance(allocator, CFCharacterSetGetTypeID(), size, NULL);
830 if (NULL == cset) return NULL;
831
832 cset->_base._cfinfo[CF_INFO_BITS] |= flags;
833 cset->_hashValue = 0;
834 cset->_annex = NULL;
835
836 return cset;
837 }
838
839 static void __CFApplySurrogatesInString(CFMutableCharacterSetRef cset, CFStringRef string, void (*applyer)(CFMutableCharacterSetRef, CFRange)) {
840 CFStringInlineBuffer buffer;
841 CFIndex index, length = CFStringGetLength(string);
842 CFRange range = CFRangeMake(0, 0);
843 UTF32Char character;
844
845 CFStringInitInlineBuffer(string, &buffer, CFRangeMake(0, length));
846
847 for (index = 0;index < length;index++) {
848 character = __CFStringGetCharacterFromInlineBufferQuick(&buffer, index);
849
850 if (CFStringIsSurrogateHighCharacter(character) && ((index + 1) < length)) {
851 UTF16Char other = __CFStringGetCharacterFromInlineBufferQuick(&buffer, index + 1);
852
853 if (CFStringIsSurrogateLowCharacter(other)) {
854 character = CFStringGetLongCharacterForSurrogatePair(character, other);
855
856 if ((range.length + range.location) == character) {
857 ++range.length;
858 } else {
859 if (range.length > 0) applyer(cset, range);
860 range.location = character;
861 range.length = 1;
862 }
863 }
864
865 ++index; // skip the low surrogate
866 }
867 }
868
869 if (range.length > 0) applyer(cset, range);
870 }
871
872
873 /* Bsearch theChar for __kCFCharSetClassString
874 */
875 CF_INLINE Boolean __CFCSetBsearchUniChar(const UniChar *theTable, CFIndex length, UniChar theChar) {
876 const UniChar *p, *q, *divider;
877
878 if ((theChar < theTable[0]) || (theChar > theTable[length - 1])) return false;
879
880 p = theTable;
881 q = p + (length - 1);
882 while (p <= q) {
883 divider = p + ((q - p) >> 1); /* divide by 2 */
884 if (theChar < *divider) q = divider - 1;
885 else if (theChar > *divider) p = divider + 1;
886 else return true;
887 }
888 return false;
889 }
890
891 /* Array of instantiated builtin set. Note builtin set ID starts with 1 so the array index is ID - 1
892 */
893 static CFCharacterSetRef *__CFBuiltinSets = NULL;
894
895 /* Global lock for character set
896 */
897 static CFSpinLock_t __CFCharacterSetLock = CFSpinLockInit;
898
899 /* CFBase API functions
900 */
901 static Boolean __CFCharacterSetEqual(CFTypeRef cf1, CFTypeRef cf2) {
902 Boolean isInvertStateIdentical = (__CFCSetIsInverted((CFCharacterSetRef)cf1) == __CFCSetIsInverted((CFCharacterSetRef)cf2) ? true: false);
903 Boolean isAnnexInvertStateIdentical = (__CFCSetAnnexIsInverted((CFCharacterSetRef)cf1) == __CFCSetAnnexIsInverted((CFCharacterSetRef)cf2) ? true: false);
904 CFIndex idx;
905 CFCharacterSetRef subSet1;
906 uint8_t bitsBuf[__kCFBitmapSize];
907 uint8_t *bits;
908 Boolean isBitmap1;
909 Boolean isBitmap2;
910
911 if (__CFCSetHasHashValue((CFCharacterSetRef)cf1) && __CFCSetHasHashValue((CFCharacterSetRef)cf2) && ((CFCharacterSetRef)cf1)->_hashValue != ((CFCharacterSetRef)cf2)->_hashValue) return false;
912 if (__CFCSetIsEmpty((CFCharacterSetRef)cf1) && __CFCSetIsEmpty((CFCharacterSetRef)cf2) && !isInvertStateIdentical) return false;
913
914 if (__CFCSetClassType((CFCharacterSetRef)cf1) == __CFCSetClassType((CFCharacterSetRef)cf2)) { // Types are identical, we can do it fast
915 switch (__CFCSetClassType((CFCharacterSetRef)cf1)) {
916 case __kCFCharSetClassBuiltin:
917 return (__CFCSetBuiltinType((CFCharacterSetRef)cf1) == __CFCSetBuiltinType((CFCharacterSetRef)cf2) && isInvertStateIdentical ? true : false);
918
919 case __kCFCharSetClassRange:
920 return (__CFCSetRangeFirstChar((CFCharacterSetRef)cf1) == __CFCSetRangeFirstChar((CFCharacterSetRef)cf2) && __CFCSetRangeLength((CFCharacterSetRef)cf1) && __CFCSetRangeLength((CFCharacterSetRef)cf2) && isInvertStateIdentical ? true : false);
921
922 case __kCFCharSetClassString:
923 if (__CFCSetStringLength((CFCharacterSetRef)cf1) == __CFCSetStringLength((CFCharacterSetRef)cf2) && isInvertStateIdentical) {
924 const UniChar *buf1 = __CFCSetStringBuffer((CFCharacterSetRef)cf1);
925 const UniChar *buf2 = __CFCSetStringBuffer((CFCharacterSetRef)cf2);
926 CFIndex length = __CFCSetStringLength((CFCharacterSetRef)cf1);
927
928 while (length--) if (*buf1++ != *buf2++) return false;
929 } else {
930 return false;
931 }
932 break;
933
934 case __kCFCharSetClassBitmap:
935 if (!__CFCSetIsEqualBitmap((const UInt32 *)__CFCSetBitmapBits((CFCharacterSetRef)cf1), (const UInt32 *)__CFCSetBitmapBits((CFCharacterSetRef)cf2))) return false;
936 break;
937 }
938 return __CFCSetIsEqualAnnex((CFCharacterSetRef)cf1, (CFCharacterSetRef)cf2);
939 }
940
941 // Check for easy empty cases
942 if (__CFCSetIsEmpty((CFCharacterSetRef)cf1) || __CFCSetIsEmpty((CFCharacterSetRef)cf2)) {
943 CFCharacterSetRef emptySet = (__CFCSetIsEmpty((CFCharacterSetRef)cf1) ? (CFCharacterSetRef)cf1 : (CFCharacterSetRef)cf2);
944 CFCharacterSetRef nonEmptySet = (emptySet == cf1 ? (CFCharacterSetRef)cf2 : (CFCharacterSetRef)cf1);
945
946 if (__CFCSetIsBuiltin(nonEmptySet)) {
947 return false;
948 } else if (__CFCSetIsRange(nonEmptySet)) {
949 if (isInvertStateIdentical) {
950 return (__CFCSetRangeLength(nonEmptySet) ? false : true);
951 } else {
952 return (__CFCSetRangeLength(nonEmptySet) == 0x110000 ? true : false);
953 }
954 } else {
955 if (__CFCSetAnnexIsInverted(nonEmptySet)) {
956 if (__CFCSetAnnexValidEntriesBitmap(nonEmptySet) != 0x1FFFE) return false;
957 } else {
958 if (__CFCSetAnnexValidEntriesBitmap(nonEmptySet)) return false;
959 }
960
961 if (__CFCSetIsBitmap(nonEmptySet)) {
962 bits = __CFCSetBitmapBits(nonEmptySet);
963 } else {
964 bits = bitsBuf;
965 __CFCSetGetBitmap(nonEmptySet, bitsBuf);
966 }
967
968 if (__CFCSetIsEqualBitmap(NULL, (const UInt32 *)bits)) {
969 if (!__CFCSetAnnexIsInverted(nonEmptySet)) return true;
970 } else {
971 return false;
972 }
973
974 // Annex set has to be CFRangeMake(0x10000, 0xfffff)
975 for (idx = 1;idx < MAX_ANNEX_PLANE;idx++) {
976 if (__CFCSetIsBitmap(nonEmptySet)) {
977 if (!__CFCSetIsEqualBitmap((__CFCSetAnnexIsInverted(nonEmptySet) ? NULL : (const UInt32 *)-1), (const UInt32 *)bitsBuf)) return false;
978 } else {
979 __CFCSetGetBitmap(__CFCSetGetAnnexPlaneCharacterSetNoAlloc(nonEmptySet, idx), bitsBuf);
980 if (!__CFCSetIsEqualBitmap((const UInt32 *)-1, (const UInt32 *)bitsBuf)) return false;
981 }
982 }
983 return true;
984 }
985 }
986
987 if (__CFCSetIsBuiltin((CFCharacterSetRef)cf1) || __CFCSetIsBuiltin((CFCharacterSetRef)cf2)) {
988 CFCharacterSetRef builtinSet = (__CFCSetIsBuiltin((CFCharacterSetRef)cf1) ? (CFCharacterSetRef)cf1 : (CFCharacterSetRef)cf2);
989 CFCharacterSetRef nonBuiltinSet = (builtinSet == cf1 ? (CFCharacterSetRef)cf2 : (CFCharacterSetRef)cf1);
990
991
992 if (__CFCSetIsRange(nonBuiltinSet)) {
993 UTF32Char firstChar = __CFCSetRangeFirstChar(nonBuiltinSet);
994 UTF32Char lastChar = (firstChar + __CFCSetRangeLength(nonBuiltinSet) - 1);
995 uint8_t firstPlane = (firstChar >> 16) & 0xFF;
996 uint8_t lastPlane = (lastChar >> 16) & 0xFF;
997 uint8_t result;
998
999 for (idx = 0;idx < MAX_ANNEX_PLANE;idx++) {
1000 result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(builtinSet), idx, bitsBuf, (isInvertStateIdentical != 0));
1001
1002 if (idx < firstPlane || idx > lastPlane) {
1003 if (result == kCFUniCharBitmapAll) {
1004 return false;
1005 } else if (result == kCFUniCharBitmapFilled) {
1006 if (!__CFCSetIsEqualBitmap(NULL, (const UInt32 *)bitsBuf)) return false;
1007 }
1008 } else if (idx > firstPlane && idx < lastPlane) {
1009 if (result == kCFUniCharBitmapEmpty) {
1010 return false;
1011 } else if (result == kCFUniCharBitmapFilled) {
1012 if (!__CFCSetIsEqualBitmap((const UInt32 *)-1, (const UInt32 *)bitsBuf)) return false;
1013 }
1014 } else {
1015 if (result == kCFUniCharBitmapEmpty) {
1016 return false;
1017 } else if (result == kCFUniCharBitmapAll) {
1018 if (idx == firstPlane) {
1019 if (((firstChar & 0xFFFF) != 0) || (firstPlane == lastPlane && ((lastChar & 0xFFFF) != 0xFFFF))) return false;
1020 } else {
1021 if (((lastChar & 0xFFFF) != 0xFFFF) || (firstPlane == lastPlane && ((firstChar & 0xFFFF) != 0))) return false;
1022 }
1023 } else {
1024 if (idx == firstPlane) {
1025 if (!__CFCSetIsBitmapEqualToRange((const UInt32 *)bitsBuf, firstChar & 0xFFFF, (firstPlane == lastPlane ? lastChar & 0xFFFF : 0xFFFF), false)) return false;
1026 } else {
1027 if (!__CFCSetIsBitmapEqualToRange((const UInt32 *)bitsBuf, (firstPlane == lastPlane ? firstChar & 0xFFFF : 0), lastChar & 0xFFFF, false)) return false;
1028 }
1029 }
1030 }
1031 }
1032 return true;
1033 } else {
1034 uint8_t bitsBuf2[__kCFBitmapSize];
1035 uint8_t result;
1036
1037 result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(builtinSet), 0, bitsBuf, (__CFCSetIsInverted(builtinSet) != 0));
1038 if (result == kCFUniCharBitmapFilled) {
1039 if (__CFCSetIsBitmap(nonBuiltinSet)) {
1040 if (!__CFCSetIsEqualBitmap((const UInt32 *)bitsBuf, (const UInt32 *)__CFCSetBitmapBits(nonBuiltinSet))) return false;
1041 } else {
1042
1043 __CFCSetGetBitmap(nonBuiltinSet, bitsBuf2);
1044 if (!__CFCSetIsEqualBitmap((const UInt32 *)bitsBuf, (const UInt32 *)bitsBuf2)) {
1045 return false;
1046 }
1047 }
1048 } else {
1049 if (__CFCSetIsBitmap(nonBuiltinSet)) {
1050 if (!__CFCSetIsEqualBitmap((result == kCFUniCharBitmapAll ? (const UInt32*)-1 : NULL), (const UInt32 *)__CFCSetBitmapBits(nonBuiltinSet))) return false;
1051 } else {
1052 __CFCSetGetBitmap(nonBuiltinSet, bitsBuf);
1053 if (!__CFCSetIsEqualBitmap((result == kCFUniCharBitmapAll ? (const UInt32*)-1: NULL), (const UInt32 *)bitsBuf)) return false;
1054 }
1055 }
1056
1057 isInvertStateIdentical = (__CFCSetIsInverted(builtinSet) == __CFCSetAnnexIsInverted(nonBuiltinSet) ? true : false);
1058
1059 for (idx = 1;idx < MAX_ANNEX_PLANE;idx++) {
1060 result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(builtinSet), idx, bitsBuf, !isInvertStateIdentical);
1061 subSet1 = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(nonBuiltinSet, idx);
1062
1063 if (result == kCFUniCharBitmapFilled) {
1064 if (NULL == subSet1) {
1065 return false;
1066 } else if (__CFCSetIsBitmap(subSet1)) {
1067 if (!__CFCSetIsEqualBitmap((const UInt32*)bitsBuf, (const UInt32*)__CFCSetBitmapBits(subSet1))) {
1068 return false;
1069 }
1070 } else {
1071
1072 __CFCSetGetBitmap(subSet1, bitsBuf2);
1073 if (!__CFCSetIsEqualBitmap((const UInt32*)bitsBuf, (const UInt32*)bitsBuf2)) {
1074 return false;
1075 }
1076 }
1077 } else {
1078 if (NULL == subSet1) {
1079 if (result == kCFUniCharBitmapAll) {
1080 return false;
1081 }
1082 } else if (__CFCSetIsBitmap(subSet1)) {
1083 if (!__CFCSetIsEqualBitmap((result == kCFUniCharBitmapAll ? (const UInt32*)-1: NULL), (const UInt32*)__CFCSetBitmapBits(subSet1))) {
1084 return false;
1085 }
1086 } else {
1087 __CFCSetGetBitmap(subSet1, bitsBuf);
1088 if (!__CFCSetIsEqualBitmap((result == kCFUniCharBitmapAll ? (const UInt32*)-1: NULL), (const UInt32*)bitsBuf)) {
1089 return false;
1090 }
1091 }
1092 }
1093 }
1094 return true;
1095 }
1096 }
1097
1098 if (__CFCSetIsRange((CFCharacterSetRef)cf1) || __CFCSetIsRange((CFCharacterSetRef)cf2)) {
1099 CFCharacterSetRef rangeSet = (__CFCSetIsRange((CFCharacterSetRef)cf1) ? (CFCharacterSetRef)cf1 : (CFCharacterSetRef)cf2);
1100 CFCharacterSetRef nonRangeSet = (rangeSet == cf1 ? (CFCharacterSetRef)cf2 : (CFCharacterSetRef)cf1);
1101 UTF32Char firstChar = __CFCSetRangeFirstChar(rangeSet);
1102 UTF32Char lastChar = (firstChar + __CFCSetRangeLength(rangeSet) - 1);
1103 uint8_t firstPlane = (firstChar >> 16) & 0xFF;
1104 uint8_t lastPlane = (lastChar >> 16) & 0xFF;
1105 Boolean isRangeSetInverted = __CFCSetIsInverted(rangeSet);
1106
1107 if (__CFCSetIsBitmap(nonRangeSet)) {
1108 bits = __CFCSetBitmapBits(nonRangeSet);
1109 } else {
1110 bits = bitsBuf;
1111 __CFCSetGetBitmap(nonRangeSet, bitsBuf);
1112 }
1113 if (firstPlane == 0) {
1114 if (!__CFCSetIsBitmapEqualToRange((const UInt32*)bits, firstChar, (lastPlane == 0 ? lastChar : 0xFFFF), isRangeSetInverted)) return false;
1115 firstPlane = 1;
1116 firstChar = 0;
1117 } else {
1118 if (!__CFCSetIsEqualBitmap((const UInt32*)bits, (isRangeSetInverted ? (const UInt32 *)-1 : NULL))) return false;
1119 firstChar &= 0xFFFF;
1120 }
1121
1122 lastChar &= 0xFFFF;
1123
1124 isAnnexInvertStateIdentical = (isRangeSetInverted == __CFCSetAnnexIsInverted(nonRangeSet) ? true : false);
1125
1126 for (idx = 1;idx < MAX_ANNEX_PLANE;idx++) {
1127 subSet1 = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(nonRangeSet, idx);
1128 if (NULL == subSet1) {
1129 if (idx < firstPlane || idx > lastPlane) {
1130 if (!isAnnexInvertStateIdentical) return false;
1131 } else if (idx > firstPlane && idx < lastPlane) {
1132 if (isAnnexInvertStateIdentical) return false;
1133 } else if (idx == firstPlane) {
1134 if (isAnnexInvertStateIdentical || firstChar || (idx == lastPlane && lastChar != 0xFFFF)) return false;
1135 } else if (idx == lastPlane) {
1136 if (isAnnexInvertStateIdentical || (idx == firstPlane && firstChar) || (lastChar != 0xFFFF)) return false;
1137 }
1138 } else {
1139 if (__CFCSetIsBitmap(subSet1)) {
1140 bits = __CFCSetBitmapBits(subSet1);
1141 } else {
1142 __CFCSetGetBitmap(subSet1, bitsBuf);
1143 bits = bitsBuf;
1144 }
1145
1146 if (idx < firstPlane || idx > lastPlane) {
1147 if (!__CFCSetIsEqualBitmap((const UInt32*)bits, (isAnnexInvertStateIdentical ? NULL : (const UInt32 *)-1))) return false;
1148 } else if (idx > firstPlane && idx < lastPlane) {
1149 if (!__CFCSetIsEqualBitmap((const UInt32*)bits, (isAnnexInvertStateIdentical ? (const UInt32 *)-1 : NULL))) return false;
1150 } else if (idx == firstPlane) {
1151 if (!__CFCSetIsBitmapEqualToRange((const UInt32*)bits, firstChar, (idx == lastPlane ? lastChar : 0xFFFF), !isAnnexInvertStateIdentical)) return false;
1152 } else if (idx == lastPlane) {
1153 if (!__CFCSetIsBitmapEqualToRange((const UInt32*)bits, (idx == firstPlane ? firstChar : 0), lastChar, !isAnnexInvertStateIdentical)) return false;
1154 }
1155 }
1156 }
1157 return true;
1158 }
1159
1160 isBitmap1 = __CFCSetIsBitmap((CFCharacterSetRef)cf1);
1161 isBitmap2 = __CFCSetIsBitmap((CFCharacterSetRef)cf2);
1162
1163 if (isBitmap1 && isBitmap2) {
1164 if (!__CFCSetIsEqualBitmap((const UInt32 *)__CFCSetBitmapBits((CFCharacterSetRef)cf1), (const UInt32 *)__CFCSetBitmapBits((CFCharacterSetRef)cf2))) return false;
1165 } else if (!isBitmap1 && !isBitmap2) {
1166 uint8_t bitsBuf2[__kCFBitmapSize];
1167
1168 __CFCSetGetBitmap((CFCharacterSetRef)cf1, bitsBuf);
1169 __CFCSetGetBitmap((CFCharacterSetRef)cf2, bitsBuf2);
1170
1171 if (!__CFCSetIsEqualBitmap((const UInt32*)bitsBuf, (const UInt32*)bitsBuf2)) {
1172 return false;
1173 }
1174 } else {
1175 if (isBitmap2) {
1176 CFCharacterSetRef tmp = (CFCharacterSetRef)cf2;
1177 cf2 = cf1;
1178 cf1 = tmp;
1179 }
1180
1181 __CFCSetGetBitmap((CFCharacterSetRef)cf2, bitsBuf);
1182
1183 if (!__CFCSetIsEqualBitmap((const UInt32 *)__CFCSetBitmapBits((CFCharacterSetRef)cf1), (const UInt32 *)bitsBuf)) return false;
1184 }
1185 return __CFCSetIsEqualAnnex((CFCharacterSetRef)cf1, (CFCharacterSetRef)cf2);
1186 }
1187
1188 static CFHashCode __CFCharacterSetHash(CFTypeRef cf) {
1189 if (!__CFCSetHasHashValue((CFCharacterSetRef)cf)) {
1190 if (__CFCSetIsEmpty((CFCharacterSetRef)cf)) {
1191 ((CFMutableCharacterSetRef)cf)->_hashValue = (__CFCSetIsInverted((CFCharacterSetRef)cf) ? ((UInt32)0xFFFFFFFF) : 0);
1192 } else if (__CFCSetIsBitmap( (CFCharacterSetRef) cf )) {
1193 ((CFMutableCharacterSetRef)cf)->_hashValue = CFHashBytes(__CFCSetBitmapBits((CFCharacterSetRef)cf), __kCFBitmapSize);
1194 } else {
1195 uint8_t bitsBuf[__kCFBitmapSize];
1196 __CFCSetGetBitmap((CFCharacterSetRef)cf, bitsBuf);
1197 ((CFMutableCharacterSetRef)cf)->_hashValue = CFHashBytes(bitsBuf, __kCFBitmapSize);
1198 }
1199 __CFCSetPutHasHashValue((CFMutableCharacterSetRef)cf, true);
1200 }
1201 return ((CFCharacterSetRef)cf)->_hashValue;
1202 }
1203
1204 static CFStringRef __CFCharacterSetCopyDescription(CFTypeRef cf) {
1205 CFMutableStringRef string;
1206 CFIndex idx;
1207 CFIndex length;
1208
1209 if (__CFCSetIsEmpty((CFCharacterSetRef)cf)) {
1210 return (CFStringRef)(__CFCSetIsInverted((CFCharacterSetRef)cf) ? CFRetain(CFSTR("<CFCharacterSet All>")) : CFRetain(CFSTR("<CFCharacterSet Empty>")));
1211 }
1212
1213 switch (__CFCSetClassType((CFCharacterSetRef)cf)) {
1214 case __kCFCharSetClassBuiltin:
1215 switch (__CFCSetBuiltinType((CFCharacterSetRef)cf)) {
1216 case kCFCharacterSetControl: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined Control Set>"));
1217 case kCFCharacterSetWhitespace : return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined Whitespace Set>"));
1218 case kCFCharacterSetWhitespaceAndNewline: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined WhitespaceAndNewline Set>"));
1219 case kCFCharacterSetDecimalDigit: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined DecimalDigit Set>"));
1220 case kCFCharacterSetLetter: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined Letter Set>"));
1221 case kCFCharacterSetLowercaseLetter: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined LowercaseLetter Set>"));
1222 case kCFCharacterSetUppercaseLetter: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined UppercaseLetter Set>"));
1223 case kCFCharacterSetNonBase: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined NonBase Set>"));
1224 case kCFCharacterSetDecomposable: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined Decomposable Set>"));
1225 case kCFCharacterSetAlphaNumeric: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined AlphaNumeric Set>"));
1226 case kCFCharacterSetPunctuation: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined Punctuation Set>"));
1227 case kCFCharacterSetIllegal: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined Illegal Set>"));
1228 case kCFCharacterSetCapitalizedLetter: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined CapitalizedLetter Set>"));
1229 case kCFCharacterSetSymbol: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined Symbol Set>"));
1230 case kCFCharacterSetNewline: return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Predefined Newline Set>"));
1231 }
1232 break;
1233
1234 case __kCFCharSetClassRange:
1235 return CFStringCreateWithFormat(CFGetAllocator((CFCharacterSetRef)cf), NULL, CFSTR("<CFCharacterSet Range(%d, %d)>"), __CFCSetRangeFirstChar((CFCharacterSetRef)cf), __CFCSetRangeLength((CFCharacterSetRef)cf));
1236
1237 case __kCFCharSetClassString: {
1238 CFStringRef format = CFSTR("<CFCharacterSet Items(");
1239
1240 length = __CFCSetStringLength((CFCharacterSetRef)cf);
1241 string = CFStringCreateMutable(CFGetAllocator(cf), CFStringGetLength(format) + 7 * length + 2); // length of format + "U+XXXX "(7) * length + ")>"(2)
1242 CFStringAppend(string, format);
1243 for (idx = 0;idx < length;idx++) {
1244 CFStringAppendFormat(string, NULL, CFSTR("%sU+%04X"), (idx > 0 ? " " : ""), (UInt32)((__CFCSetStringBuffer((CFCharacterSetRef)cf))[idx]));
1245 }
1246 CFStringAppend(string, CFSTR(")>"));
1247 return string;
1248 }
1249
1250 case __kCFCharSetClassBitmap:
1251 case __kCFCharSetClassCompactBitmap:
1252 return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Bitmap>")); // ??? Should generate description for 8k bitmap ?
1253 }
1254 CFAssert1(0, __kCFLogAssertion, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__); // We should never come here
1255 return NULL;
1256 }
1257
1258 static void __CFCharacterSetDeallocate(CFTypeRef cf) {
1259 CFAllocatorRef allocator = CFGetAllocator(cf);
1260
1261 if (__CFCSetIsBuiltin((CFCharacterSetRef)cf) && !__CFCSetIsMutable((CFCharacterSetRef)cf) && !__CFCSetIsInverted((CFCharacterSetRef)cf)) {
1262 CFCharacterSetRef sharedSet = CFCharacterSetGetPredefined(__CFCSetBuiltinType((CFCharacterSetRef)cf));
1263 if (sharedSet == cf) { // We're trying to dealloc the builtin set
1264 CFAssert1(0, __kCFLogAssertion, "%s: Trying to deallocate predefined set. The process is likely to crash.", __PRETTY_FUNCTION__);
1265 return; // We never deallocate builtin set
1266 }
1267 }
1268
1269 if (__CFCSetIsString((CFCharacterSetRef)cf) && __CFCSetStringBuffer((CFCharacterSetRef)cf)) CFAllocatorDeallocate(allocator, __CFCSetStringBuffer((CFCharacterSetRef)cf));
1270 else if (__CFCSetIsBitmap((CFCharacterSetRef)cf) && __CFCSetBitmapBits((CFCharacterSetRef)cf)) CFAllocatorDeallocate(allocator, __CFCSetBitmapBits((CFCharacterSetRef)cf));
1271 else if (__CFCSetIsCompactBitmap((CFCharacterSetRef)cf) && __CFCSetCompactBitmapBits((CFCharacterSetRef)cf)) CFAllocatorDeallocate(allocator, __CFCSetCompactBitmapBits((CFCharacterSetRef)cf));
1272 __CFCSetDeallocateAnnexPlane((CFCharacterSetRef)cf);
1273 }
1274
1275 static CFTypeID __kCFCharacterSetTypeID = _kCFRuntimeNotATypeID;
1276
1277 static const CFRuntimeClass __CFCharacterSetClass = {
1278 0,
1279 "CFCharacterSet",
1280 NULL, // init
1281 NULL, // copy
1282 __CFCharacterSetDeallocate,
1283 __CFCharacterSetEqual,
1284 __CFCharacterSetHash,
1285 NULL, //
1286 __CFCharacterSetCopyDescription
1287 };
1288
1289 static bool __CFCheckForExapendedSet = false;
1290
1291 __private_extern__ void __CFCharacterSetInitialize(void) {
1292 const char *checkForExpandedSet = __CFgetenv("__CF_DEBUG_EXPANDED_SET");
1293
1294 __kCFCharacterSetTypeID = _CFRuntimeRegisterClass(&__CFCharacterSetClass);
1295
1296 if (checkForExpandedSet && (*checkForExpandedSet == 'Y')) __CFCheckForExapendedSet = true;
1297 }
1298
1299 /* Public functions
1300 */
1301
1302 CFTypeID CFCharacterSetGetTypeID(void) {
1303 return __kCFCharacterSetTypeID;
1304 }
1305
1306 /*** CharacterSet creation ***/
1307 /* Functions to create basic immutable characterset.
1308 */
1309 CFCharacterSetRef CFCharacterSetGetPredefined(CFCharacterSetPredefinedSet theSetIdentifier) {
1310 CFCharacterSetRef cset;
1311
1312 __CFCSetValidateBuiltinType(theSetIdentifier, __PRETTY_FUNCTION__);
1313
1314 __CFSpinLock(&__CFCharacterSetLock);
1315 cset = ((NULL != __CFBuiltinSets) ? __CFBuiltinSets[theSetIdentifier - 1] : NULL);
1316 __CFSpinUnlock(&__CFCharacterSetLock);
1317
1318 if (NULL != cset) return cset;
1319
1320 if (!(cset = __CFCSetGenericCreate(kCFAllocatorSystemDefault, __kCFCharSetClassBuiltin))) return NULL;
1321 __CFCSetPutBuiltinType((CFMutableCharacterSetRef)cset, theSetIdentifier);
1322
1323 __CFSpinLock(&__CFCharacterSetLock);
1324 if (!__CFBuiltinSets) {
1325 __CFBuiltinSets = (CFCharacterSetRef *)CFAllocatorAllocate((CFAllocatorRef)CFRetain(__CFGetDefaultAllocator()), sizeof(CFCharacterSetRef) * __kCFLastBuiltinSetID, 0);
1326 memset(__CFBuiltinSets, 0, sizeof(CFCharacterSetRef) * __kCFLastBuiltinSetID);
1327 }
1328
1329 __CFBuiltinSets[theSetIdentifier - 1] = cset;
1330 __CFSpinUnlock(&__CFCharacterSetLock);
1331
1332 return cset;
1333 }
1334
1335 CFCharacterSetRef CFCharacterSetCreateWithCharactersInRange(CFAllocatorRef allocator, CFRange theRange) {
1336 CFMutableCharacterSetRef cset;
1337
1338 __CFCSetValidateRange(theRange, __PRETTY_FUNCTION__);
1339
1340 if (theRange.length) {
1341 if (!(cset = __CFCSetGenericCreate(allocator, __kCFCharSetClassRange))) return NULL;
1342 __CFCSetPutRangeFirstChar(cset, theRange.location);
1343 __CFCSetPutRangeLength(cset, theRange.length);
1344 } else {
1345 if (!(cset = __CFCSetGenericCreate(allocator, __kCFCharSetClassBitmap))) return NULL;
1346 __CFCSetPutBitmapBits(cset, NULL);
1347 __CFCSetPutHasHashValue(cset, true); // _hashValue is 0
1348 }
1349
1350 return cset;
1351 }
1352
1353 static int chcompar(const void *a, const void *b) {
1354 return -(int)(*(UniChar *)b - *(UniChar *)a);
1355 }
1356
1357 CFCharacterSetRef CFCharacterSetCreateWithCharactersInString(CFAllocatorRef allocator, CFStringRef theString) {
1358 CFIndex length;
1359
1360 length = CFStringGetLength(theString);
1361 if (length < __kCFStringCharSetMax) {
1362 CFMutableCharacterSetRef cset;
1363
1364 if (!(cset = __CFCSetGenericCreate(allocator, __kCFCharSetClassString))) return NULL;
1365 __CFCSetPutStringBuffer(cset, (UniChar *)CFAllocatorAllocate(CFGetAllocator(cset), __kCFStringCharSetMax * sizeof(UniChar), 0));
1366 __CFCSetPutStringLength(cset, length);
1367 CFStringGetCharacters(theString, CFRangeMake(0, length), __CFCSetStringBuffer(cset));
1368 qsort(__CFCSetStringBuffer(cset), length, sizeof(UniChar), chcompar);
1369
1370 if (0 == length) {
1371 __CFCSetPutHasHashValue(cset, true); // _hashValue is 0
1372 } else if (length > 1) { // Check for surrogate
1373 const UTF16Char *characters = __CFCSetStringBuffer(cset);
1374 const UTF16Char *charactersLimit = characters + length;
1375
1376 if ((*characters < 0xDC00UL) && (*(charactersLimit - 1) > 0xDBFFUL)) { // might have surrogate chars
1377 while (characters < charactersLimit) {
1378 if (CFStringIsSurrogateHighCharacter(*characters) || CFStringIsSurrogateLowCharacter(*characters)) {
1379 CFRelease(cset);
1380 cset = NULL;
1381 break;
1382 }
1383 ++characters;
1384 }
1385 }
1386 }
1387 if (NULL != cset) return cset;
1388 }
1389
1390 CFMutableCharacterSetRef mcset = CFCharacterSetCreateMutable(allocator);
1391 CFCharacterSetAddCharactersInString(mcset, theString);
1392 __CFCSetMakeCompact(mcset);
1393 __CFCSetPutIsMutable(mcset, false);
1394 return mcset;
1395 }
1396
1397 CFCharacterSetRef CFCharacterSetCreateWithBitmapRepresentation(CFAllocatorRef allocator, CFDataRef theData) {
1398 CFMutableCharacterSetRef cset;
1399 CFIndex length;
1400
1401 if (!(cset = __CFCSetGenericCreate(allocator, __kCFCharSetClassBitmap))) return NULL;
1402
1403 if (theData && (length = CFDataGetLength(theData)) > 0) {
1404 uint8_t *bitmap;
1405 uint8_t *cBitmap;
1406
1407 if (length < __kCFBitmapSize) {
1408 bitmap = (uint8_t *)CFAllocatorAllocate(allocator, __kCFBitmapSize, 0);
1409 memmove(bitmap, CFDataGetBytePtr(theData), length);
1410 memset(bitmap + length, 0, __kCFBitmapSize - length);
1411
1412 cBitmap = __CFCreateCompactBitmap(allocator, bitmap);
1413
1414 if (cBitmap == NULL) {
1415 __CFCSetPutBitmapBits(cset, bitmap);
1416 } else {
1417 CFAllocatorDeallocate(allocator, bitmap);
1418 __CFCSetPutCompactBitmapBits(cset, cBitmap);
1419 __CFCSetPutClassType(cset, __kCFCharSetClassCompactBitmap);
1420 }
1421 } else {
1422 cBitmap = __CFCreateCompactBitmap(allocator, CFDataGetBytePtr(theData));
1423
1424 if (cBitmap == NULL) {
1425 bitmap = (uint8_t *)CFAllocatorAllocate(allocator, __kCFBitmapSize, 0);
1426 memmove(bitmap, CFDataGetBytePtr(theData), __kCFBitmapSize);
1427
1428 __CFCSetPutBitmapBits(cset, bitmap);
1429 } else {
1430 __CFCSetPutCompactBitmapBits(cset, cBitmap);
1431 __CFCSetPutClassType(cset, __kCFCharSetClassCompactBitmap);
1432 }
1433
1434 if (length > __kCFBitmapSize) {
1435 CFMutableCharacterSetRef annexSet;
1436 const uint8_t *bytes = CFDataGetBytePtr(theData) + __kCFBitmapSize;
1437
1438 length -= __kCFBitmapSize;
1439
1440 while (length > 1) {
1441 annexSet = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(cset, *(bytes++));
1442 --length; // Decrement the plane no byte
1443
1444 if (length < __kCFBitmapSize) {
1445 bitmap = (uint8_t *)CFAllocatorAllocate(allocator, __kCFBitmapSize, 0);
1446 memmove(bitmap, bytes, length);
1447 memset(bitmap + length, 0, __kCFBitmapSize - length);
1448
1449 cBitmap = __CFCreateCompactBitmap(allocator, bitmap);
1450
1451 if (cBitmap == NULL) {
1452 __CFCSetPutBitmapBits(annexSet, bitmap);
1453 } else {
1454 CFAllocatorDeallocate(allocator, bitmap);
1455 __CFCSetPutCompactBitmapBits(annexSet, cBitmap);
1456 __CFCSetPutClassType(annexSet, __kCFCharSetClassCompactBitmap);
1457 }
1458 } else {
1459 cBitmap = __CFCreateCompactBitmap(allocator, bytes);
1460
1461 if (cBitmap == NULL) {
1462 bitmap = (uint8_t *)CFAllocatorAllocate(allocator, __kCFBitmapSize, 0);
1463 memmove(bitmap, bytes, __kCFBitmapSize);
1464
1465 __CFCSetPutBitmapBits(annexSet, bitmap);
1466 } else {
1467 __CFCSetPutCompactBitmapBits(annexSet, cBitmap);
1468 __CFCSetPutClassType(annexSet, __kCFCharSetClassCompactBitmap);
1469 }
1470 }
1471 length -= __kCFBitmapSize;
1472 bytes += __kCFBitmapSize;
1473 }
1474 }
1475 }
1476 } else {
1477 __CFCSetPutBitmapBits(cset, NULL);
1478 __CFCSetPutHasHashValue(cset, true); // Hash value is 0
1479 }
1480
1481 return cset;
1482 }
1483
1484 CFCharacterSetRef CFCharacterSetCreateInvertedSet(CFAllocatorRef alloc, CFCharacterSetRef theSet) {
1485 CFMutableCharacterSetRef cset;
1486
1487 CF_OBJC_FUNCDISPATCH0(__kCFCharacterSetTypeID, CFCharacterSetRef , theSet, "invertedSet");
1488
1489 cset = CFCharacterSetCreateMutableCopy(alloc, theSet);
1490 CFCharacterSetInvert(cset);
1491 __CFCSetPutIsMutable(cset, false);
1492
1493 return cset;
1494 }
1495
1496 /* Functions to create mutable characterset.
1497 */
1498 CFMutableCharacterSetRef CFCharacterSetCreateMutable(CFAllocatorRef allocator) {
1499 CFMutableCharacterSetRef cset;
1500
1501 if (!(cset = __CFCSetGenericCreate(allocator, __kCFCharSetClassBitmap| __kCFCharSetIsMutable))) return NULL;
1502 __CFCSetPutBitmapBits(cset, NULL);
1503 __CFCSetPutHasHashValue(cset, true); // Hash value is 0
1504
1505 return cset;
1506 }
1507
1508 static CFMutableCharacterSetRef __CFCharacterSetCreateCopy(CFAllocatorRef alloc, CFCharacterSetRef theSet, bool isMutable) {
1509 CFMutableCharacterSetRef cset;
1510
1511 CF_OBJC_FUNCDISPATCH0(__kCFCharacterSetTypeID, CFMutableCharacterSetRef , theSet, "mutableCopy");
1512
1513 __CFGenericValidateType(theSet, __kCFCharacterSetTypeID);
1514
1515 if (!isMutable && !__CFCSetIsMutable(theSet)) {
1516 return (CFMutableCharacterSetRef)CFRetain(theSet);
1517 }
1518
1519 cset = CFCharacterSetCreateMutable(alloc);
1520
1521 __CFCSetPutClassType(cset, __CFCSetClassType(theSet));
1522 __CFCSetPutHasHashValue(cset, __CFCSetHasHashValue(theSet));
1523 __CFCSetPutIsInverted(cset, __CFCSetIsInverted(theSet));
1524 cset->_hashValue = theSet->_hashValue;
1525
1526 switch (__CFCSetClassType(theSet)) {
1527 case __kCFCharSetClassBuiltin:
1528 __CFCSetPutBuiltinType(cset, __CFCSetBuiltinType(theSet));
1529 break;
1530
1531 case __kCFCharSetClassRange:
1532 __CFCSetPutRangeFirstChar(cset, __CFCSetRangeFirstChar(theSet));
1533 __CFCSetPutRangeLength(cset, __CFCSetRangeLength(theSet));
1534 break;
1535
1536 case __kCFCharSetClassString:
1537 __CFCSetPutStringBuffer(cset, (UniChar *)CFAllocatorAllocate(alloc, __kCFStringCharSetMax * sizeof(UniChar), 0));
1538
1539 __CFCSetPutStringLength(cset, __CFCSetStringLength(theSet));
1540 memmove(__CFCSetStringBuffer(cset), __CFCSetStringBuffer(theSet), __CFCSetStringLength(theSet) * sizeof(UniChar));
1541 break;
1542
1543 case __kCFCharSetClassBitmap:
1544 if (__CFCSetBitmapBits(theSet)) {
1545 uint8_t * bitmap = (isMutable ? NULL : __CFCreateCompactBitmap(alloc, __CFCSetBitmapBits(theSet)));
1546
1547 if (bitmap == NULL) {
1548 bitmap = (uint8_t *)CFAllocatorAllocate(alloc, sizeof(uint8_t) * __kCFBitmapSize, 0);
1549 memmove(bitmap, __CFCSetBitmapBits(theSet), __kCFBitmapSize);
1550 __CFCSetPutBitmapBits(cset, bitmap);
1551 } else {
1552 __CFCSetPutCompactBitmapBits(cset, bitmap);
1553 __CFCSetPutClassType(cset, __kCFCharSetClassCompactBitmap);
1554 }
1555 } else {
1556 __CFCSetPutBitmapBits(cset, NULL);
1557 }
1558 break;
1559
1560 case __kCFCharSetClassCompactBitmap: {
1561 const uint8_t *compactBitmap = __CFCSetCompactBitmapBits(theSet);
1562
1563 if (compactBitmap) {
1564 uint32_t size = __CFCSetGetCompactBitmapSize(compactBitmap);
1565 uint8_t *newBitmap = (uint8_t *)CFAllocatorAllocate(alloc, size, 0);
1566
1567 memmove(newBitmap, compactBitmap, size);
1568 __CFCSetPutCompactBitmapBits(cset, newBitmap);
1569 }
1570 }
1571 break;
1572
1573 default:
1574 CFAssert1(0, __kCFLogAssertion, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__); // We should never come here
1575 }
1576 if (__CFCSetHasNonBMPPlane(theSet)) {
1577 CFMutableCharacterSetRef annexPlane;
1578 int idx;
1579
1580 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
1581 if ((annexPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, idx))) {
1582 annexPlane = __CFCharacterSetCreateCopy(alloc, annexPlane, isMutable);
1583 __CFCSetPutCharacterSetToAnnexPlane(cset, annexPlane, idx);
1584 CFRelease(annexPlane);
1585 }
1586 }
1587 __CFCSetAnnexSetIsInverted(cset, __CFCSetAnnexIsInverted(theSet));
1588 } else if (__CFCSetAnnexIsInverted(theSet)) {
1589 __CFCSetAnnexSetIsInverted(cset, true);
1590 }
1591
1592 return cset;
1593 }
1594
1595 CFCharacterSetRef CFCharacterSetCreateCopy(CFAllocatorRef alloc, CFCharacterSetRef theSet) {
1596 return __CFCharacterSetCreateCopy(alloc, theSet, false);
1597 }
1598
1599 CFMutableCharacterSetRef CFCharacterSetCreateMutableCopy(CFAllocatorRef alloc, CFCharacterSetRef theSet) {
1600 return __CFCharacterSetCreateCopy(alloc, theSet, true);
1601 }
1602
1603 /*** Basic accessors ***/
1604 Boolean CFCharacterSetIsCharacterMember(CFCharacterSetRef theSet, UniChar theChar) {
1605 CFIndex length;
1606 Boolean isInverted;
1607 Boolean result = false;
1608
1609 CF_OBJC_FUNCDISPATCH1(__kCFCharacterSetTypeID, Boolean, theSet, "longCharacterIsMember:", theChar);
1610
1611 __CFGenericValidateType(theSet, __kCFCharacterSetTypeID);
1612
1613 isInverted = __CFCSetIsInverted(theSet);
1614
1615 switch (__CFCSetClassType(theSet)) {
1616 case __kCFCharSetClassBuiltin:
1617 result = (CFUniCharIsMemberOf(theChar, __CFCSetBuiltinType(theSet)) ? !isInverted : isInverted);
1618 break;
1619
1620 case __kCFCharSetClassRange:
1621 length = __CFCSetRangeLength(theSet);
1622 result = (length && __CFCSetRangeFirstChar(theSet) <= theChar && theChar < __CFCSetRangeFirstChar(theSet) + length ? !isInverted : isInverted);
1623 break;
1624
1625 case __kCFCharSetClassString:
1626 result = ((length = __CFCSetStringLength(theSet)) ? (__CFCSetBsearchUniChar(__CFCSetStringBuffer(theSet), length, theChar) ? !isInverted : isInverted) : isInverted);
1627 break;
1628
1629 case __kCFCharSetClassBitmap:
1630 result = (__CFCSetCompactBitmapBits(theSet) ? (__CFCSetIsMemberBitmap(__CFCSetBitmapBits(theSet), theChar) ? true : false) : isInverted);
1631 break;
1632
1633 case __kCFCharSetClassCompactBitmap:
1634 result = (__CFCSetCompactBitmapBits(theSet) ? (__CFCSetIsMemberInCompactBitmap(__CFCSetCompactBitmapBits(theSet), theChar) ? true : false) : isInverted);
1635 break;
1636
1637 default:
1638 CFAssert1(0, __kCFLogAssertion, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__); // We should never come here
1639 break;
1640 }
1641
1642 return result;
1643 }
1644
1645 Boolean CFCharacterSetIsLongCharacterMember(CFCharacterSetRef theSet, UTF32Char theChar) {
1646 CFIndex length;
1647 UInt32 plane = (theChar >> 16);
1648 Boolean isAnnexInverted = false;
1649 Boolean isInverted;
1650 Boolean result = false;
1651
1652 CF_OBJC_FUNCDISPATCH1(__kCFCharacterSetTypeID, Boolean, theSet, "longCharacterIsMember:", theChar);
1653
1654 __CFGenericValidateType(theSet, __kCFCharacterSetTypeID);
1655
1656 if (plane) {
1657 CFCharacterSetRef annexPlane;
1658
1659 if (__CFCSetIsBuiltin(theSet)) {
1660 isInverted = __CFCSetIsInverted(theSet);
1661 return (CFUniCharIsMemberOf(theChar, __CFCSetBuiltinType(theSet)) ? !isInverted : isInverted);
1662 }
1663
1664 isAnnexInverted = __CFCSetAnnexIsInverted(theSet);
1665
1666 if ((annexPlane = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, plane)) == NULL) {
1667 if (!__CFCSetHasNonBMPPlane(theSet) && __CFCSetIsRange(theSet)) {
1668 isInverted = __CFCSetIsInverted(theSet);
1669 length = __CFCSetRangeLength(theSet);
1670 return (length && __CFCSetRangeFirstChar(theSet) <= theChar && theChar < __CFCSetRangeFirstChar(theSet) + length ? !isInverted : isInverted);
1671 } else {
1672 return (isAnnexInverted ? true : false);
1673 }
1674 } else {
1675 theSet = annexPlane;
1676 theChar &= 0xFFFF;
1677 }
1678 }
1679
1680 isInverted = __CFCSetIsInverted(theSet);
1681
1682 switch (__CFCSetClassType(theSet)) {
1683 case __kCFCharSetClassBuiltin:
1684 result = (CFUniCharIsMemberOf(theChar, __CFCSetBuiltinType(theSet)) ? !isInverted : isInverted);
1685 break;
1686
1687 case __kCFCharSetClassRange:
1688 length = __CFCSetRangeLength(theSet);
1689 result = (length && __CFCSetRangeFirstChar(theSet) <= theChar && theChar < __CFCSetRangeFirstChar(theSet) + length ? !isInverted : isInverted);
1690 break;
1691
1692 case __kCFCharSetClassString:
1693 result = ((length = __CFCSetStringLength(theSet)) ? (__CFCSetBsearchUniChar(__CFCSetStringBuffer(theSet), length, theChar) ? !isInverted : isInverted) : isInverted);
1694 break;
1695
1696 case __kCFCharSetClassBitmap:
1697 result = (__CFCSetCompactBitmapBits(theSet) ? (__CFCSetIsMemberBitmap(__CFCSetBitmapBits(theSet), theChar) ? true : false) : isInverted);
1698 break;
1699
1700 case __kCFCharSetClassCompactBitmap:
1701 result = (__CFCSetCompactBitmapBits(theSet) ? (__CFCSetIsMemberInCompactBitmap(__CFCSetCompactBitmapBits(theSet), theChar) ? true : false) : isInverted);
1702 break;
1703
1704 default:
1705 CFAssert1(0, __kCFLogAssertion, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__); // We should never come here
1706 return false; // To make compiler happy
1707 }
1708
1709 return (result ? !isAnnexInverted : isAnnexInverted);
1710 }
1711
1712 Boolean CFCharacterSetIsSurrogatePairMember(CFCharacterSetRef theSet, UniChar surrogateHigh, UniChar surrogateLow) {
1713 return CFCharacterSetIsLongCharacterMember(theSet, CFCharacterSetGetLongCharacterForSurrogatePair(surrogateHigh, surrogateLow));
1714 }
1715
1716
1717 static inline CFCharacterSetRef __CFCharacterSetGetExpandedSetForNSCharacterSet(const void *characterSet) {
1718 CF_OBJC_FUNCDISPATCH0(__kCFCharacterSetTypeID, CFCharacterSetRef , characterSet, "_expandedCFCharacterSet");
1719 return NULL;
1720 }
1721
1722 Boolean CFCharacterSetIsSupersetOfSet(CFCharacterSetRef theSet, CFCharacterSetRef theOtherSet) {
1723 CFMutableCharacterSetRef copy;
1724 CFCharacterSetRef expandedSet = NULL;
1725 CFCharacterSetRef expandedOtherSet = NULL;
1726 Boolean result;
1727
1728 if ((!CF_IS_OBJC(__kCFCharacterSetTypeID, theSet) || (expandedSet = __CFCharacterSetGetExpandedSetForNSCharacterSet(theSet))) && (!CF_IS_OBJC(__kCFCharacterSetTypeID, theOtherSet) || (expandedOtherSet = __CFCharacterSetGetExpandedSetForNSCharacterSet(theOtherSet)))) { // Really CF, we can do some trick here
1729 if (expandedSet) theSet = expandedSet;
1730 if (expandedOtherSet) theOtherSet = expandedOtherSet;
1731
1732 __CFGenericValidateType(theSet, __kCFCharacterSetTypeID);
1733 __CFGenericValidateType(theOtherSet, __kCFCharacterSetTypeID);
1734
1735 if (__CFCSetIsEmpty(theSet)) {
1736 if (__CFCSetIsInverted(theSet)) {
1737 return TRUE; // Inverted empty set covers all range
1738 } else if (!__CFCSetIsEmpty(theOtherSet) || __CFCSetIsInverted(theOtherSet)) {
1739 return FALSE;
1740 }
1741 } else if (__CFCSetIsEmpty(theOtherSet) && !__CFCSetIsInverted(theOtherSet)) {
1742 return TRUE;
1743 } else {
1744 if (__CFCSetIsBuiltin(theSet) || __CFCSetIsBuiltin(theOtherSet)) {
1745 if (__CFCSetClassType(theSet) == __CFCSetClassType(theOtherSet) && __CFCSetBuiltinType(theSet) == __CFCSetBuiltinType(theOtherSet) && !__CFCSetIsInverted(theSet) && !__CFCSetIsInverted(theOtherSet)) return TRUE;
1746 } else if (__CFCSetIsRange(theSet) || __CFCSetIsRange(theOtherSet)) {
1747 if (__CFCSetClassType(theSet) == __CFCSetClassType(theOtherSet)) {
1748 if (__CFCSetIsInverted(theSet)) {
1749 if (__CFCSetIsInverted(theOtherSet)) {
1750 return (__CFCSetRangeFirstChar(theOtherSet) > __CFCSetRangeFirstChar(theSet) || (__CFCSetRangeFirstChar(theSet) + __CFCSetRangeLength(theSet)) > (__CFCSetRangeFirstChar(theOtherSet) + __CFCSetRangeLength(theOtherSet)) ? FALSE : TRUE);
1751 } else {
1752 return ((__CFCSetRangeFirstChar(theOtherSet) + __CFCSetRangeLength(theOtherSet)) <= __CFCSetRangeFirstChar(theSet) || (__CFCSetRangeFirstChar(theSet) + __CFCSetRangeLength(theSet)) <= __CFCSetRangeFirstChar(theOtherSet) ? TRUE : FALSE);
1753 }
1754 } else {
1755 if (__CFCSetIsInverted(theOtherSet)) {
1756 return ((__CFCSetRangeFirstChar(theSet) == 0 && __CFCSetRangeLength(theSet) == 0x110000) || (__CFCSetRangeFirstChar(theOtherSet) == 0 && (UInt32)__CFCSetRangeLength(theOtherSet) <= __CFCSetRangeFirstChar(theSet)) || ((__CFCSetRangeFirstChar(theSet) + __CFCSetRangeLength(theSet)) <= __CFCSetRangeFirstChar(theOtherSet) && (__CFCSetRangeFirstChar(theOtherSet) + __CFCSetRangeLength(theOtherSet)) == 0x110000) ? TRUE : FALSE);
1757 } else {
1758 return (__CFCSetRangeFirstChar(theOtherSet) < __CFCSetRangeFirstChar(theSet) || (__CFCSetRangeFirstChar(theSet) + __CFCSetRangeLength(theSet)) < (__CFCSetRangeFirstChar(theOtherSet) + __CFCSetRangeLength(theOtherSet)) ? FALSE : TRUE);
1759 }
1760 }
1761 }
1762 } else {
1763 UInt32 theSetAnnexMask = __CFCSetAnnexValidEntriesBitmap(theSet);
1764 UInt32 theOtherSetAnnexMask = __CFCSetAnnexValidEntriesBitmap(theOtherSet);
1765 Boolean isTheSetAnnexInverted = __CFCSetAnnexIsInverted(theSet);
1766 Boolean isTheOtherSetAnnexInverted = __CFCSetAnnexIsInverted(theOtherSet);
1767 uint8_t theSetBuffer[__kCFBitmapSize];
1768 uint8_t theOtherSetBuffer[__kCFBitmapSize];
1769
1770 // We mask plane 1 to plane 16
1771 if (isTheSetAnnexInverted) theSetAnnexMask = (~theSetAnnexMask) & (0xFFFF << 1);
1772 if (isTheOtherSetAnnexInverted) theOtherSetAnnexMask = (~theOtherSetAnnexMask) & (0xFFFF << 1);
1773
1774 __CFCSetGetBitmap(theSet, theSetBuffer);
1775 __CFCSetGetBitmap(theOtherSet, theOtherSetBuffer);
1776
1777 if (!__CFCSetIsBitmapSupersetOfBitmap((const UInt32 *)theSetBuffer, (const UInt32 *)theOtherSetBuffer, FALSE, FALSE)) return FALSE;
1778
1779 if (theOtherSetAnnexMask) {
1780 CFCharacterSetRef theSetAnnex;
1781 CFCharacterSetRef theOtherSetAnnex;
1782 uint32_t idx;
1783
1784 if ((theSetAnnexMask & theOtherSetAnnexMask) != theOtherSetAnnexMask) return FALSE;
1785
1786 for (idx = 1;idx <= 16;idx++) {
1787 theSetAnnex = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, idx);
1788 if (NULL == theSetAnnex) continue; // This case is already handled by the mask above
1789
1790 theOtherSetAnnex = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theOtherSet, idx);
1791
1792 if (NULL == theOtherSetAnnex) {
1793 if (isTheOtherSetAnnexInverted) {
1794 __CFCSetGetBitmap(theSetAnnex, theSetBuffer);
1795 if (!__CFCSetIsEqualBitmap((const UInt32 *)theSetBuffer, (isTheSetAnnexInverted ? NULL : (const UInt32 *)-1))) return FALSE;
1796 }
1797 } else {
1798 __CFCSetGetBitmap(theSetAnnex, theSetBuffer);
1799 __CFCSetGetBitmap(theOtherSetAnnex, theOtherSetBuffer);
1800 if (!__CFCSetIsBitmapSupersetOfBitmap((const UInt32 *)theSetBuffer, (const UInt32 *)theOtherSetBuffer, isTheSetAnnexInverted, isTheOtherSetAnnexInverted)) return FALSE;
1801 }
1802 }
1803 }
1804
1805 return TRUE;
1806 }
1807 }
1808 }
1809
1810 copy = CFCharacterSetCreateMutableCopy(kCFAllocatorSystemDefault, theSet);
1811 CFCharacterSetIntersect(copy, theOtherSet);
1812 result = __CFCharacterSetEqual(copy, theOtherSet);
1813 CFRelease(copy);
1814
1815 return result;
1816 }
1817
1818 Boolean CFCharacterSetHasMemberInPlane(CFCharacterSetRef theSet, CFIndex thePlane) {
1819 Boolean isInverted = __CFCSetIsInverted(theSet);
1820
1821 CF_OBJC_FUNCDISPATCH1(__kCFCharacterSetTypeID, Boolean, theSet, "hasMemberInPlane:", thePlane);
1822
1823 if (__CFCSetIsEmpty(theSet)) {
1824 return (isInverted ? TRUE : FALSE);
1825 } else if (__CFCSetIsBuiltin(theSet)) {
1826 CFCharacterSetPredefinedSet type = __CFCSetBuiltinType(theSet);
1827
1828 if (type == kCFCharacterSetControl) {
1829 if (isInverted || (thePlane == 14)) {
1830 return TRUE; // There is no plane that covers all values || Plane 14 has language tags
1831 } else {
1832 return (CFUniCharGetBitmapPtrForPlane(type, thePlane) ? TRUE : FALSE);
1833 }
1834 } else if ((type < kCFCharacterSetDecimalDigit) || (type == kCFCharacterSetNewline)) {
1835 return (thePlane && !isInverted ? FALSE : TRUE);
1836 } else if (__CFCSetBuiltinType(theSet) == kCFCharacterSetIllegal) {
1837 return (isInverted ? (thePlane < 3 || thePlane > 13 ? TRUE : FALSE) : TRUE); // This is according to Unicode 3.1
1838 } else {
1839 if (isInverted) {
1840 return TRUE; // There is no plane that covers all values
1841 } else {
1842 return (CFUniCharGetBitmapPtrForPlane(type, thePlane) ? TRUE : FALSE);
1843 }
1844 }
1845 } else if (__CFCSetIsRange(theSet)) {
1846 UTF32Char firstChar = __CFCSetRangeFirstChar(theSet);
1847 UTF32Char lastChar = (firstChar + __CFCSetRangeLength(theSet) - 1);
1848 CFIndex firstPlane = firstChar >> 16;
1849 CFIndex lastPlane = lastChar >> 16;
1850
1851 if (isInverted) {
1852 if (thePlane < firstPlane || thePlane > lastPlane) {
1853 return TRUE;
1854 } else if (thePlane > firstPlane && thePlane < lastPlane) {
1855 return FALSE;
1856 } else {
1857 firstChar &= 0xFFFF;
1858 lastChar &= 0xFFFF;
1859 if (thePlane == firstPlane) {
1860 return (firstChar || (firstPlane == lastPlane && lastChar != 0xFFFF) ? TRUE : FALSE);
1861 } else {
1862 return (lastChar != 0xFFFF || (firstPlane == lastPlane && firstChar) ? TRUE : FALSE);
1863 }
1864 }
1865 } else {
1866 return (thePlane < firstPlane || thePlane > lastPlane ? FALSE : TRUE);
1867 }
1868 } else {
1869 if (thePlane == 0) {
1870 switch (__CFCSetClassType(theSet)) {
1871 case __kCFCharSetClassString: if (!__CFCSetStringLength(theSet)) return isInverted; break;
1872 case __kCFCharSetClassCompactBitmap: return (__CFCSetCompactBitmapBits(theSet) ? TRUE : FALSE); break;
1873 case __kCFCharSetClassBitmap: return (__CFCSetBitmapBits(theSet) ? TRUE : FALSE); break;
1874 }
1875 return TRUE;
1876 } else {
1877 CFCharacterSetRef annex = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, thePlane);
1878 if (annex) {
1879 if (__CFCSetIsRange(annex)) {
1880 return (__CFCSetAnnexIsInverted(theSet) && (__CFCSetRangeFirstChar(annex) == 0) && (__CFCSetRangeLength(annex) == 0x10000) ? FALSE : TRUE);
1881 } else if (__CFCSetIsBitmap(annex)) {
1882 return (__CFCSetAnnexIsInverted(theSet) && __CFCSetIsEqualBitmap((const UInt32 *)__CFCSetBitmapBits(annex), (const UInt32 *)-1) ? FALSE : TRUE);
1883 } else {
1884 uint8_t bitsBuf[__kCFBitmapSize];
1885 __CFCSetGetBitmap(annex, bitsBuf);
1886 return (__CFCSetAnnexIsInverted(theSet) && __CFCSetIsEqualBitmap((const UInt32 *)bitsBuf, (const UInt32 *)-1) ? FALSE : TRUE);
1887 }
1888 } else {
1889 return __CFCSetAnnexIsInverted(theSet);
1890 }
1891 }
1892 }
1893
1894 return FALSE;
1895 }
1896
1897
1898 CFDataRef CFCharacterSetCreateBitmapRepresentation(CFAllocatorRef alloc, CFCharacterSetRef theSet) {
1899 CFMutableDataRef data;
1900 int numNonBMPPlanes = 0;
1901 int planeIndices[MAX_ANNEX_PLANE];
1902 int idx;
1903 int length;
1904 bool isAnnexInverted;
1905
1906 CF_OBJC_FUNCDISPATCH0(__kCFCharacterSetTypeID, CFDataRef , theSet, "_retainedBitmapRepresentation");
1907
1908 __CFGenericValidateType(theSet, __kCFCharacterSetTypeID);
1909
1910 isAnnexInverted = (__CFCSetAnnexIsInverted(theSet) != 0);
1911
1912 if (__CFCSetHasNonBMPPlane(theSet)) {
1913 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
1914 if (isAnnexInverted || __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, idx)) {
1915 planeIndices[numNonBMPPlanes++] = idx;
1916 }
1917 }
1918 } else if (__CFCSetIsBuiltin(theSet)) {
1919 numNonBMPPlanes = (__CFCSetIsInverted(theSet) ? MAX_ANNEX_PLANE : CFUniCharGetNumberOfPlanes(__CFCSetBuiltinType(theSet)) - 1);
1920 } else if (__CFCSetIsRange(theSet)) {
1921 UInt32 firstChar = __CFCSetRangeFirstChar(theSet);
1922 UInt32 lastChar = __CFCSetRangeFirstChar(theSet) + __CFCSetRangeLength(theSet) - 1;
1923 int firstPlane = (firstChar >> 16);
1924 int lastPlane = (lastChar >> 16);
1925 bool isInverted = (__CFCSetIsInverted(theSet) != 0);
1926
1927 if (lastPlane > 0) {
1928 if (firstPlane == 0) {
1929 firstPlane = 1;
1930 firstChar = 0x10000;
1931 }
1932 numNonBMPPlanes = (lastPlane - firstPlane) + 1;
1933 if (isInverted) {
1934 numNonBMPPlanes = MAX_ANNEX_PLANE - numNonBMPPlanes;
1935 if (firstPlane == lastPlane) {
1936 if (((firstChar & 0xFFFF) > 0) || ((lastChar & 0xFFFF) < 0xFFFF)) ++numNonBMPPlanes;
1937 } else {
1938 if ((firstChar & 0xFFFF) > 0) ++numNonBMPPlanes;
1939 if ((lastChar & 0xFFFF) < 0xFFFF) ++numNonBMPPlanes;
1940 }
1941 }
1942 } else if (isInverted) {
1943 numNonBMPPlanes = MAX_ANNEX_PLANE;
1944 }
1945 } else if (isAnnexInverted) {
1946 numNonBMPPlanes = MAX_ANNEX_PLANE;
1947 }
1948
1949 length = __kCFBitmapSize + ((__kCFBitmapSize + 1) * numNonBMPPlanes);
1950 data = CFDataCreateMutable(alloc, length);
1951 CFDataSetLength(data, length);
1952 __CFCSetGetBitmap(theSet, CFDataGetMutableBytePtr(data));
1953
1954 if (numNonBMPPlanes > 0) {
1955 uint8_t *bytes = CFDataGetMutableBytePtr(data) + __kCFBitmapSize;
1956
1957 if (__CFCSetHasNonBMPPlane(theSet)) {
1958 CFCharacterSetRef subset;
1959
1960 for (idx = 0;idx < numNonBMPPlanes;idx++) {
1961 *(bytes++) = planeIndices[idx];
1962 if ((subset = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, planeIndices[idx])) == NULL) {
1963 __CFCSetBitmapFastFillWithValue((UInt32 *)bytes, (isAnnexInverted ? 0xFF : 0));
1964 } else {
1965 __CFCSetGetBitmap(subset, bytes);
1966 if (isAnnexInverted) {
1967 uint32_t count = __kCFBitmapSize / sizeof(uint32_t);
1968 uint32_t *bits = (uint32_t *)bytes;
1969
1970 while (count-- > 0) {
1971 *bits = ~(*bits);
1972 ++bits;
1973 }
1974 }
1975 }
1976 bytes += __kCFBitmapSize;
1977 }
1978 } else if (__CFCSetIsBuiltin(theSet)) {
1979 UInt8 result;
1980 CFIndex delta;
1981 Boolean isInverted = __CFCSetIsInverted(theSet);
1982
1983 for (idx = 0;idx < numNonBMPPlanes;idx++) {
1984 if ((result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(theSet), idx + 1, bytes + 1, (isInverted != 0))) == kCFUniCharBitmapEmpty) continue;
1985 *(bytes++) = idx + 1;
1986 if (result == kCFUniCharBitmapAll) {
1987 CFIndex bitmapLength = __kCFBitmapSize;
1988 while (bitmapLength-- > 0) *(bytes++) = (uint8_t)0xFF;
1989 } else {
1990 bytes += __kCFBitmapSize;
1991 }
1992 }
1993 delta = bytes - (const uint8_t *)CFDataGetBytePtr(data);
1994 if (delta < length) CFDataSetLength(data, delta);
1995 } else if (__CFCSetIsRange(theSet)) {
1996 UInt32 firstChar = __CFCSetRangeFirstChar(theSet);
1997 UInt32 lastChar = __CFCSetRangeFirstChar(theSet) + __CFCSetRangeLength(theSet) - 1;
1998 int firstPlane = (firstChar >> 16);
1999 int lastPlane = (lastChar >> 16);
2000
2001 if (firstPlane == 0) {
2002 firstPlane = 1;
2003 firstChar = 0x10000;
2004 }
2005 if (__CFCSetIsInverted(theSet)) {
2006 // Mask out the plane byte
2007 firstChar &= 0xFFFF;
2008 lastChar &= 0xFFFF;
2009
2010 for (idx = 1;idx < firstPlane;idx++) { // Fill up until the first plane
2011 *(bytes++) = idx;
2012 __CFCSetBitmapFastFillWithValue((UInt32 *)bytes, 0xFF);
2013 bytes += __kCFBitmapSize;
2014 }
2015 if (firstPlane == lastPlane) {
2016 if ((firstChar > 0) || (lastChar < 0xFFFF)) {
2017 *(bytes++) = idx;
2018 __CFCSetBitmapFastFillWithValue((UInt32 *)bytes, 0xFF);
2019 __CFCSetBitmapRemoveCharactersInRange(bytes, firstChar, lastChar);
2020 bytes += __kCFBitmapSize;
2021 }
2022 } else if (firstPlane < lastPlane) {
2023 if (firstChar > 0) {
2024 *(bytes++) = idx;
2025 __CFCSetBitmapFastFillWithValue((UInt32 *)bytes, 0);
2026 __CFCSetBitmapAddCharactersInRange(bytes, 0, firstChar - 1);
2027 bytes += __kCFBitmapSize;
2028 }
2029 if (lastChar < 0xFFFF) {
2030 *(bytes++) = idx;
2031 __CFCSetBitmapFastFillWithValue((UInt32 *)bytes, 0);
2032 __CFCSetBitmapAddCharactersInRange(bytes, lastChar, 0xFFFF);
2033 bytes += __kCFBitmapSize;
2034 }
2035 }
2036 for (idx = lastPlane + 1;idx <= MAX_ANNEX_PLANE;idx++) {
2037 *(bytes++) = idx;
2038 __CFCSetBitmapFastFillWithValue((UInt32 *)bytes, 0xFF);
2039 bytes += __kCFBitmapSize;
2040 }
2041 } else {
2042 for (idx = firstPlane;idx <= lastPlane;idx++) {
2043 *(bytes++) = idx;
2044 __CFCSetBitmapAddCharactersInRange(bytes, (idx == firstPlane ? firstChar : 0), (idx == lastPlane ? lastChar : 0xFFFF));
2045 bytes += __kCFBitmapSize;
2046 }
2047 }
2048 } else if (isAnnexInverted) {
2049 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
2050 *(bytes++) = idx;
2051 __CFCSetBitmapFastFillWithValue((UInt32 *)bytes, 0xFF);
2052 bytes += __kCFBitmapSize;
2053 }
2054 }
2055 }
2056
2057 return data;
2058 }
2059
2060 /*** MutableCharacterSet functions ***/
2061 void CFCharacterSetAddCharactersInRange(CFMutableCharacterSetRef theSet, CFRange theRange) {
2062 CF_OBJC_FUNCDISPATCH1(__kCFCharacterSetTypeID, void, theSet, "addCharactersInRange:", theRange);
2063
2064 __CFCSetValidateTypeAndMutability(theSet, __PRETTY_FUNCTION__);
2065 __CFCSetValidateRange(theRange, __PRETTY_FUNCTION__);
2066
2067 if (!theRange.length || (__CFCSetIsInverted(theSet) && __CFCSetIsEmpty(theSet))) return; // Inverted && empty set contains all char
2068
2069 if (!__CFCSetIsInverted(theSet)) {
2070 if (__CFCSetIsEmpty(theSet)) {
2071 __CFCSetPutClassType(theSet, __kCFCharSetClassRange);
2072 __CFCSetPutRangeFirstChar(theSet, theRange.location);
2073 __CFCSetPutRangeLength(theSet, theRange.length);
2074 __CFCSetPutHasHashValue(theSet, false);
2075 return;
2076 } else if (__CFCSetIsRange(theSet)) {
2077 CFIndex firstChar = __CFCSetRangeFirstChar(theSet);
2078 CFIndex length = __CFCSetRangeLength(theSet);
2079
2080 if (firstChar == theRange.location) {
2081 __CFCSetPutRangeLength(theSet, __CFMin(length, theRange.length));
2082 __CFCSetPutHasHashValue(theSet, false);
2083 return;
2084 } else if (firstChar < theRange.location && theRange.location <= firstChar + length) {
2085 if (firstChar + length < theRange.location + theRange.length) __CFCSetPutRangeLength(theSet, theRange.length + (theRange.location - firstChar));
2086 __CFCSetPutHasHashValue(theSet, false);
2087 return;
2088 } else if (theRange.location < firstChar && firstChar <= theRange.location + theRange.length) {
2089 __CFCSetPutRangeFirstChar(theSet, theRange.location);
2090 __CFCSetPutRangeLength(theSet, length + (firstChar - theRange.location));
2091 __CFCSetPutHasHashValue(theSet, false);
2092 return;
2093 }
2094 } else if (__CFCSetIsString(theSet) && __CFCSetStringLength(theSet) + theRange.length < __kCFStringCharSetMax) {
2095 UniChar *buffer;
2096 if (!__CFCSetStringBuffer(theSet))
2097 __CFCSetPutStringBuffer(theSet, (UniChar *)CFAllocatorAllocate(CFGetAllocator(theSet), __kCFStringCharSetMax * sizeof(UniChar), 0));
2098 buffer = __CFCSetStringBuffer(theSet) + __CFCSetStringLength(theSet);
2099 __CFCSetPutStringLength(theSet, __CFCSetStringLength(theSet) + theRange.length);
2100 while (theRange.length--) *buffer++ = (UniChar)theRange.location++;
2101 qsort(__CFCSetStringBuffer(theSet), __CFCSetStringLength(theSet), sizeof(UniChar), chcompar);
2102 __CFCSetPutHasHashValue(theSet, false);
2103 return;
2104 }
2105 }
2106
2107 // OK, I have to be a bitmap
2108 __CFCSetMakeBitmap(theSet);
2109 __CFCSetAddNonBMPPlanesInRange(theSet, theRange);
2110 if (theRange.location < 0x10000) { // theRange is in BMP
2111 if (theRange.location + theRange.length >= NUMCHARACTERS) theRange.length = NUMCHARACTERS - theRange.location;
2112 __CFCSetBitmapAddCharactersInRange(__CFCSetBitmapBits(theSet), (UniChar)theRange.location, (UniChar)(theRange.location + theRange.length - 1));
2113 }
2114 __CFCSetPutHasHashValue(theSet, false);
2115
2116 if (__CFCheckForExapendedSet) __CFCheckForExpandedSet(theSet);
2117 }
2118
2119 void CFCharacterSetRemoveCharactersInRange(CFMutableCharacterSetRef theSet, CFRange theRange) {
2120 CF_OBJC_FUNCDISPATCH1(__kCFCharacterSetTypeID, void, theSet, "removeCharactersInRange:", theRange);
2121
2122 __CFCSetValidateTypeAndMutability(theSet, __PRETTY_FUNCTION__);
2123 __CFCSetValidateRange(theRange, __PRETTY_FUNCTION__);
2124
2125 if (!theRange.length || (!__CFCSetIsInverted(theSet) && __CFCSetIsEmpty(theSet))) return; // empty set
2126
2127 if (__CFCSetIsInverted(theSet)) {
2128 if (__CFCSetIsEmpty(theSet)) {
2129 __CFCSetPutClassType(theSet, __kCFCharSetClassRange);
2130 __CFCSetPutRangeFirstChar(theSet, theRange.location);
2131 __CFCSetPutRangeLength(theSet, theRange.length);
2132 __CFCSetPutHasHashValue(theSet, false);
2133 return;
2134 } else if (__CFCSetIsRange(theSet)) {
2135 CFIndex firstChar = __CFCSetRangeFirstChar(theSet);
2136 CFIndex length = __CFCSetRangeLength(theSet);
2137
2138 if (firstChar == theRange.location) {
2139 __CFCSetPutRangeLength(theSet, __CFMin(length, theRange.length));
2140 __CFCSetPutHasHashValue(theSet, false);
2141 return;
2142 } else if (firstChar < theRange.location && theRange.location <= firstChar + length) {
2143 if (firstChar + length < theRange.location + theRange.length) __CFCSetPutRangeLength(theSet, theRange.length + (theRange.location - firstChar));
2144 __CFCSetPutHasHashValue(theSet, false);
2145 return;
2146 } else if (theRange.location < firstChar && firstChar <= theRange.location + theRange.length) {
2147 __CFCSetPutRangeFirstChar(theSet, theRange.location);
2148 __CFCSetPutRangeLength(theSet, length + (firstChar - theRange.location));
2149 __CFCSetPutHasHashValue(theSet, false);
2150 return;
2151 }
2152 } else if (__CFCSetIsString(theSet) && __CFCSetStringLength(theSet) + theRange.length < __kCFStringCharSetMax) {
2153 UniChar *buffer;
2154 if (!__CFCSetStringBuffer(theSet))
2155 __CFCSetPutStringBuffer(theSet, (UniChar *)CFAllocatorAllocate(CFGetAllocator(theSet), __kCFStringCharSetMax * sizeof(UniChar), 0));
2156 buffer = __CFCSetStringBuffer(theSet) + __CFCSetStringLength(theSet);
2157 __CFCSetPutStringLength(theSet, __CFCSetStringLength(theSet) + theRange.length);
2158 while (theRange.length--) *buffer++ = (UniChar)theRange.location++;
2159 qsort(__CFCSetStringBuffer(theSet), __CFCSetStringLength(theSet), sizeof(UniChar), chcompar);
2160 __CFCSetPutHasHashValue(theSet, false);
2161 return;
2162 }
2163 }
2164
2165 // OK, I have to be a bitmap
2166 __CFCSetMakeBitmap(theSet);
2167 __CFCSetRemoveNonBMPPlanesInRange(theSet, theRange);
2168 if (theRange.location < 0x10000) { // theRange is in BMP
2169 if (theRange.location + theRange.length > NUMCHARACTERS) theRange.length = NUMCHARACTERS - theRange.location;
2170 if (theRange.location == 0 && theRange.length == NUMCHARACTERS) { // Remove all
2171 CFAllocatorDeallocate(CFGetAllocator(theSet), __CFCSetBitmapBits(theSet));
2172 __CFCSetPutBitmapBits(theSet, NULL);
2173 } else {
2174 __CFCSetBitmapRemoveCharactersInRange(__CFCSetBitmapBits(theSet), (UniChar)theRange.location, (UniChar)(theRange.location + theRange.length - 1));
2175 }
2176 }
2177
2178 __CFCSetPutHasHashValue(theSet, false);
2179 if (__CFCheckForExapendedSet) __CFCheckForExpandedSet(theSet);
2180 }
2181
2182 void CFCharacterSetAddCharactersInString(CFMutableCharacterSetRef theSet, CFStringRef theString) {
2183 UniChar *buffer;
2184 CFIndex length;
2185 BOOL hasSurrogate = NO;
2186
2187 CF_OBJC_FUNCDISPATCH1(__kCFCharacterSetTypeID, void, theSet, "addCharactersInString:", theString);
2188
2189 __CFCSetValidateTypeAndMutability(theSet, __PRETTY_FUNCTION__);
2190
2191 if ((__CFCSetIsEmpty(theSet) && __CFCSetIsInverted(theSet)) || !(length = CFStringGetLength(theString))) return;
2192
2193 if (!__CFCSetIsInverted(theSet)) {
2194 CFIndex newLength = length + (__CFCSetIsEmpty(theSet) ? 0 : (__CFCSetIsString(theSet) ? __CFCSetStringLength(theSet) : __kCFStringCharSetMax));
2195
2196 if (newLength < __kCFStringCharSetMax) {
2197 buffer = __CFCSetStringBuffer(theSet);
2198
2199 if (NULL == buffer) {
2200 buffer = (UniChar *)CFAllocatorAllocate(CFGetAllocator(theSet), __kCFStringCharSetMax * sizeof(UniChar), 0);
2201 } else {
2202 buffer += __CFCSetStringLength(theSet);
2203 }
2204
2205 CFStringGetCharacters(theString, CFRangeMake(0, length), (UniChar*)buffer);
2206
2207 if (length > 1) {
2208 UTF16Char *characters = buffer;
2209 const UTF16Char *charactersLimit = characters + length;
2210
2211 while (characters < charactersLimit) {
2212 if (CFStringIsSurrogateHighCharacter(*characters) || CFStringIsSurrogateLowCharacter(*characters)) {
2213 memmove(characters, characters + 1, (charactersLimit - (characters + 1)) * sizeof(*characters));
2214 --charactersLimit;
2215 hasSurrogate = YES;
2216 } else {
2217 ++characters;
2218 }
2219 }
2220
2221 newLength -= (length - (charactersLimit - buffer));
2222 }
2223
2224 if (0 == newLength) {
2225 if (NULL == __CFCSetStringBuffer(theSet)) CFAllocatorDeallocate(CFGetAllocator(theSet), buffer);
2226 } else {
2227 if (NULL == __CFCSetStringBuffer(theSet)) {
2228 __CFCSetPutClassType(theSet, __kCFCharSetClassString);
2229 __CFCSetPutStringBuffer(theSet, buffer);
2230 }
2231 __CFCSetPutStringLength(theSet, newLength);
2232 qsort(__CFCSetStringBuffer(theSet), newLength, sizeof(UniChar), chcompar);
2233 }
2234 __CFCSetPutHasHashValue(theSet, false);
2235
2236 if (hasSurrogate) __CFApplySurrogatesInString(theSet, theString, &CFCharacterSetAddCharactersInRange);
2237
2238 return;
2239 }
2240 }
2241
2242 // OK, I have to be a bitmap
2243 __CFCSetMakeBitmap(theSet);
2244 CFStringInlineBuffer inlineBuffer;
2245 CFIndex idx;
2246
2247 CFStringInitInlineBuffer(theString, &inlineBuffer, CFRangeMake(0, length));
2248
2249 for (idx = 0;idx < length;idx++) {
2250 UTF16Char character = __CFStringGetCharacterFromInlineBufferQuick(&inlineBuffer, idx);
2251
2252 if (CFStringIsSurrogateHighCharacter(character) || CFStringIsSurrogateLowCharacter(character)) {
2253 hasSurrogate = YES;
2254 } else {
2255 __CFCSetBitmapAddCharacter(__CFCSetBitmapBits(theSet), character);
2256 }
2257 }
2258
2259 __CFCSetPutHasHashValue(theSet, false);
2260
2261 if (__CFCheckForExapendedSet) __CFCheckForExpandedSet(theSet);
2262
2263 if (hasSurrogate) __CFApplySurrogatesInString(theSet, theString, &CFCharacterSetAddCharactersInRange);
2264 }
2265
2266 void CFCharacterSetRemoveCharactersInString(CFMutableCharacterSetRef theSet, CFStringRef theString) {
2267 UniChar *buffer;
2268 CFIndex length;
2269 BOOL hasSurrogate = NO;
2270
2271 CF_OBJC_FUNCDISPATCH1(__kCFCharacterSetTypeID, void, theSet, "removeCharactersInString:", theString);
2272
2273 __CFCSetValidateTypeAndMutability(theSet, __PRETTY_FUNCTION__);
2274
2275 if ((__CFCSetIsEmpty(theSet) && !__CFCSetIsInverted(theSet)) || !(length = CFStringGetLength(theString))) return;
2276
2277 if (__CFCSetIsInverted(theSet)) {
2278 CFIndex newLength = length + (__CFCSetIsEmpty(theSet) ? 0 : (__CFCSetIsString(theSet) ? __CFCSetStringLength(theSet) : __kCFStringCharSetMax));
2279
2280 if (newLength < __kCFStringCharSetMax) {
2281 buffer = __CFCSetStringBuffer(theSet);
2282
2283 if (NULL == buffer) {
2284 buffer = (UniChar *)CFAllocatorAllocate(CFGetAllocator(theSet), __kCFStringCharSetMax * sizeof(UniChar), 0);
2285 } else {
2286 buffer += __CFCSetStringLength(theSet);
2287 }
2288
2289 CFStringGetCharacters(theString, CFRangeMake(0, length), (UniChar*)buffer);
2290
2291 if (length > 1) {
2292 UTF16Char *characters = buffer;
2293 const UTF16Char *charactersLimit = characters + length;
2294
2295 while (characters < charactersLimit) {
2296 if (CFStringIsSurrogateHighCharacter(*characters) || CFStringIsSurrogateLowCharacter(*characters)) {
2297 memmove(characters, characters + 1, charactersLimit - (characters + 1));
2298 --charactersLimit;
2299 hasSurrogate = YES;
2300 }
2301 ++characters;
2302 }
2303
2304 newLength -= (length - (charactersLimit - buffer));
2305 }
2306
2307 if (NULL == __CFCSetStringBuffer(theSet)) {
2308 __CFCSetPutClassType(theSet, __kCFCharSetClassString);
2309 __CFCSetPutStringBuffer(theSet, buffer);
2310 }
2311 __CFCSetPutStringLength(theSet, newLength);
2312 qsort(__CFCSetStringBuffer(theSet), newLength, sizeof(UniChar), chcompar);
2313 __CFCSetPutHasHashValue(theSet, false);
2314
2315 if (hasSurrogate) __CFApplySurrogatesInString(theSet, theString, &CFCharacterSetRemoveCharactersInRange);
2316
2317 return;
2318 }
2319 }
2320
2321 // OK, I have to be a bitmap
2322 __CFCSetMakeBitmap(theSet);
2323 CFStringInlineBuffer inlineBuffer;
2324 CFIndex idx;
2325
2326 CFStringInitInlineBuffer(theString, &inlineBuffer, CFRangeMake(0, length));
2327
2328 for (idx = 0;idx < length;idx++) {
2329 UTF16Char character = __CFStringGetCharacterFromInlineBufferQuick(&inlineBuffer, idx);
2330
2331 if (CFStringIsSurrogateHighCharacter(character) || CFStringIsSurrogateLowCharacter(character)) {
2332 hasSurrogate = YES;
2333 } else {
2334 __CFCSetBitmapRemoveCharacter(__CFCSetBitmapBits(theSet), character);
2335 }
2336 }
2337
2338 __CFCSetPutHasHashValue(theSet, false);
2339 if (__CFCheckForExapendedSet) __CFCheckForExpandedSet(theSet);
2340
2341 if (hasSurrogate) __CFApplySurrogatesInString(theSet, theString, &CFCharacterSetRemoveCharactersInRange);
2342 }
2343
2344 void CFCharacterSetUnion(CFMutableCharacterSetRef theSet, CFCharacterSetRef theOtherSet) {
2345 CFCharacterSetRef expandedSet = NULL;
2346
2347 CF_OBJC_FUNCDISPATCH1(__kCFCharacterSetTypeID, void, theSet, "formUnionWithCharacterSet:", theOtherSet);
2348
2349 __CFCSetValidateTypeAndMutability(theSet, __PRETTY_FUNCTION__);
2350
2351 if (__CFCSetIsEmpty(theSet) && __CFCSetIsInverted(theSet)) return; // Inverted empty set contains all char
2352
2353 if (!CF_IS_OBJC(__kCFCharacterSetTypeID, theOtherSet) || (expandedSet = __CFCharacterSetGetExpandedSetForNSCharacterSet(theOtherSet))) { // Really CF, we can do some trick here
2354 if (expandedSet) theOtherSet = expandedSet;
2355
2356 if (__CFCSetIsEmpty(theOtherSet)) {
2357 if (__CFCSetIsInverted(theOtherSet)) {
2358 if (__CFCSetIsString(theSet) && __CFCSetStringBuffer(theSet)) {
2359 CFAllocatorDeallocate(CFGetAllocator(theSet), __CFCSetStringBuffer(theSet));
2360 } else if (__CFCSetIsBitmap(theSet) && __CFCSetBitmapBits(theSet)) {
2361 CFAllocatorDeallocate(CFGetAllocator(theSet), __CFCSetBitmapBits(theSet));
2362 } else if (__CFCSetIsCompactBitmap(theSet) && __CFCSetCompactBitmapBits(theSet)) {
2363 CFAllocatorDeallocate(CFGetAllocator(theSet), __CFCSetCompactBitmapBits(theSet));
2364 }
2365 __CFCSetPutClassType(theSet, __kCFCharSetClassRange);
2366 __CFCSetPutRangeLength(theSet, 0);
2367 __CFCSetPutIsInverted(theSet, true);
2368 __CFCSetPutHasHashValue(theSet, false);
2369 __CFCSetDeallocateAnnexPlane(theSet);
2370 }
2371 } else if (__CFCSetIsBuiltin(theOtherSet) && __CFCSetIsEmpty(theSet)) { // theSet can be builtin set
2372 __CFCSetPutClassType(theSet, __kCFCharSetClassBuiltin);
2373 __CFCSetPutBuiltinType(theSet, __CFCSetBuiltinType(theOtherSet));
2374 if (__CFCSetIsInverted(theOtherSet)) __CFCSetPutIsInverted(theSet, true);
2375 if (__CFCSetAnnexIsInverted(theOtherSet)) __CFCSetAnnexSetIsInverted(theSet, true);
2376 __CFCSetPutHasHashValue(theSet, false);
2377 } else {
2378 if (__CFCSetIsRange(theOtherSet)) {
2379 if (__CFCSetIsInverted(theOtherSet)) {
2380 UTF32Char firstChar = __CFCSetRangeFirstChar(theOtherSet);
2381 CFIndex length = __CFCSetRangeLength(theOtherSet);
2382
2383 if (firstChar > 0) CFCharacterSetAddCharactersInRange(theSet, CFRangeMake(0, firstChar));
2384 firstChar += length;
2385 length = 0x110000 - firstChar;
2386 CFCharacterSetAddCharactersInRange(theSet, CFRangeMake(firstChar, length));
2387 } else {
2388 CFCharacterSetAddCharactersInRange(theSet, CFRangeMake(__CFCSetRangeFirstChar(theOtherSet), __CFCSetRangeLength(theOtherSet)));
2389 }
2390 } else if (__CFCSetIsString(theOtherSet)) {
2391 CFStringRef string = CFStringCreateWithCharactersNoCopy(CFGetAllocator(theSet), __CFCSetStringBuffer(theOtherSet), __CFCSetStringLength(theOtherSet), kCFAllocatorNull);
2392 CFCharacterSetAddCharactersInString(theSet, string);
2393 CFRelease(string);
2394 } else {
2395 __CFCSetMakeBitmap(theSet);
2396 if (__CFCSetIsBitmap(theOtherSet)) {
2397 UInt32 *bitmap1 = (UInt32*)__CFCSetBitmapBits(theSet);
2398 UInt32 *bitmap2 = (UInt32*)__CFCSetBitmapBits(theOtherSet);
2399 CFIndex length = __kCFBitmapSize / sizeof(UInt32);
2400 while (length--) *bitmap1++ |= *bitmap2++;
2401 } else {
2402 UInt32 *bitmap1 = (UInt32*)__CFCSetBitmapBits(theSet);
2403 UInt32 *bitmap2;
2404 CFIndex length = __kCFBitmapSize / sizeof(UInt32);
2405 uint8_t bitmapBuffer[__kCFBitmapSize];
2406 __CFCSetGetBitmap(theOtherSet, bitmapBuffer);
2407 bitmap2 = (UInt32*)bitmapBuffer;
2408 while (length--) *bitmap1++ |= *bitmap2++;
2409 }
2410 __CFCSetPutHasHashValue(theSet, false);
2411 }
2412 if (__CFCSetHasNonBMPPlane(theOtherSet)) {
2413 CFMutableCharacterSetRef otherSetPlane;
2414 int idx;
2415
2416 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
2417 if ((otherSetPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theOtherSet, idx))) {
2418 CFCharacterSetUnion((CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(theSet, idx), otherSetPlane);
2419 }
2420 }
2421 } else if (__CFCSetAnnexIsInverted(theOtherSet)) {
2422 if (__CFCSetHasNonBMPPlane(theSet)) __CFCSetDeallocateAnnexPlane(theSet);
2423 __CFCSetAnnexSetIsInverted(theSet, true);
2424 } else if (__CFCSetIsBuiltin(theOtherSet)) {
2425 CFMutableCharacterSetRef annexPlane;
2426 uint8_t bitmapBuffer[__kCFBitmapSize];
2427 uint8_t result;
2428 int planeIndex;
2429 Boolean isOtherAnnexPlaneInverted = __CFCSetAnnexIsInverted(theOtherSet);
2430 UInt32 *bitmap1;
2431 UInt32 *bitmap2;
2432 CFIndex length;
2433
2434 for (planeIndex = 1;planeIndex <= MAX_ANNEX_PLANE;planeIndex++) {
2435 result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(theOtherSet), planeIndex, bitmapBuffer, (isOtherAnnexPlaneInverted != 0));
2436 if (result != kCFUniCharBitmapEmpty) {
2437 annexPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(theSet, planeIndex);
2438 if (result == kCFUniCharBitmapAll) {
2439 CFCharacterSetAddCharactersInRange(annexPlane, CFRangeMake(0x0000, 0x10000));
2440 } else {
2441 __CFCSetMakeBitmap(annexPlane);
2442 bitmap1 = (UInt32 *)__CFCSetBitmapBits(annexPlane);
2443 length = __kCFBitmapSize / sizeof(UInt32);
2444 bitmap2 = (UInt32*)bitmapBuffer;
2445 while (length--) *bitmap1++ |= *bitmap2++;
2446 }
2447 }
2448 }
2449 }
2450 }
2451 if (__CFCheckForExapendedSet) __CFCheckForExpandedSet(theSet);
2452 } else { // It's NSCharacterSet
2453 CFDataRef bitmapRep = CFCharacterSetCreateBitmapRepresentation(kCFAllocatorSystemDefault, theOtherSet);
2454 const UInt32 *bitmap2 = (bitmapRep && CFDataGetLength(bitmapRep) ? (const UInt32 *)CFDataGetBytePtr(bitmapRep) : NULL);
2455 if (bitmap2) {
2456 UInt32 *bitmap1;
2457 CFIndex length = __kCFBitmapSize / sizeof(UInt32);
2458 __CFCSetMakeBitmap(theSet);
2459 bitmap1 = (UInt32*)__CFCSetBitmapBits(theSet);
2460 while (length--) *bitmap1++ |= *bitmap2++;
2461 __CFCSetPutHasHashValue(theSet, false);
2462 }
2463 CFRelease(bitmapRep);
2464 }
2465 }
2466
2467 void CFCharacterSetIntersect(CFMutableCharacterSetRef theSet, CFCharacterSetRef theOtherSet) {
2468 CFCharacterSetRef expandedSet = NULL;
2469
2470 CF_OBJC_FUNCDISPATCH1(__kCFCharacterSetTypeID, void, theSet, "formIntersectionWithCharacterSet:", theOtherSet);
2471
2472 __CFCSetValidateTypeAndMutability(theSet, __PRETTY_FUNCTION__);
2473
2474 if (__CFCSetIsEmpty(theSet) && !__CFCSetIsInverted(theSet)) return; // empty set
2475
2476 if (!CF_IS_OBJC(__kCFCharacterSetTypeID, theOtherSet) || (expandedSet = __CFCharacterSetGetExpandedSetForNSCharacterSet(theOtherSet))) { // Really CF, we can do some trick here
2477 if (expandedSet) theOtherSet = expandedSet;
2478
2479 if (__CFCSetIsEmpty(theOtherSet)) {
2480 if (!__CFCSetIsInverted(theOtherSet)) {
2481 if (__CFCSetIsString(theSet) && __CFCSetStringBuffer(theSet)) {
2482 CFAllocatorDeallocate(CFGetAllocator(theSet), __CFCSetStringBuffer(theSet));
2483 } else if (__CFCSetIsBitmap(theSet) && __CFCSetBitmapBits(theSet)) {
2484 CFAllocatorDeallocate(CFGetAllocator(theSet), __CFCSetBitmapBits(theSet));
2485 } else if (__CFCSetIsCompactBitmap(theSet) && __CFCSetCompactBitmapBits(theSet)) {
2486 CFAllocatorDeallocate(CFGetAllocator(theSet), __CFCSetCompactBitmapBits(theSet));
2487 }
2488 __CFCSetPutClassType(theSet, __kCFCharSetClassBitmap);
2489 __CFCSetPutBitmapBits(theSet, NULL);
2490 __CFCSetPutIsInverted(theSet, false);
2491 theSet->_hashValue = 0;
2492 __CFCSetPutHasHashValue(theSet, true);
2493 __CFCSetDeallocateAnnexPlane(theSet);
2494 }
2495 } else if (__CFCSetIsEmpty(theSet)) { // non inverted empty set contains all character
2496 __CFCSetPutClassType(theSet, __CFCSetClassType(theOtherSet));
2497 __CFCSetPutHasHashValue(theSet, __CFCSetHasHashValue(theOtherSet));
2498 __CFCSetPutIsInverted(theSet, __CFCSetIsInverted(theOtherSet));
2499 theSet->_hashValue = theOtherSet->_hashValue;
2500 if (__CFCSetHasNonBMPPlane(theOtherSet)) {
2501 CFMutableCharacterSetRef otherSetPlane;
2502 int idx;
2503 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
2504 if ((otherSetPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theOtherSet, idx))) {
2505 otherSetPlane = (CFMutableCharacterSetRef)CFCharacterSetCreateMutableCopy(CFGetAllocator(theSet), otherSetPlane);
2506 __CFCSetPutCharacterSetToAnnexPlane(theSet, otherSetPlane, idx);
2507 CFRelease(otherSetPlane);
2508 }
2509 }
2510 __CFCSetAnnexSetIsInverted(theSet, __CFCSetAnnexIsInverted(theOtherSet));
2511 }
2512
2513 switch (__CFCSetClassType(theOtherSet)) {
2514 case __kCFCharSetClassBuiltin:
2515 __CFCSetPutBuiltinType(theSet, __CFCSetBuiltinType(theOtherSet));
2516 break;
2517
2518 case __kCFCharSetClassRange:
2519 __CFCSetPutRangeFirstChar(theSet, __CFCSetRangeFirstChar(theOtherSet));
2520 __CFCSetPutRangeLength(theSet, __CFCSetRangeLength(theOtherSet));
2521 break;
2522
2523 case __kCFCharSetClassString:
2524 __CFCSetPutStringLength(theSet, __CFCSetStringLength(theOtherSet));
2525 if (!__CFCSetStringBuffer(theSet))
2526 __CFCSetPutStringBuffer(theSet, (UniChar *)CFAllocatorAllocate(CFGetAllocator(theSet), __kCFStringCharSetMax * sizeof(UniChar), 0));
2527 memmove(__CFCSetStringBuffer(theSet), __CFCSetStringBuffer(theOtherSet), __CFCSetStringLength(theSet) * sizeof(UniChar));
2528 break;
2529
2530 case __kCFCharSetClassBitmap:
2531 __CFCSetPutBitmapBits(theSet, (uint8_t *)CFAllocatorAllocate(CFGetAllocator(theSet), sizeof(uint8_t) * __kCFBitmapSize, 0));
2532 memmove(__CFCSetBitmapBits(theSet), __CFCSetBitmapBits(theOtherSet), __kCFBitmapSize);
2533 break;
2534
2535 case __kCFCharSetClassCompactBitmap: {
2536 const uint8_t *cBitmap = __CFCSetCompactBitmapBits(theOtherSet);
2537 uint8_t *newBitmap;
2538 uint32_t size = __CFCSetGetCompactBitmapSize(cBitmap);
2539 newBitmap = (uint8_t *)CFAllocatorAllocate(CFGetAllocator(theSet), sizeof(uint8_t) * size, 0);
2540 __CFCSetPutBitmapBits(theSet, newBitmap);
2541 memmove(newBitmap, cBitmap, size);
2542 }
2543 break;
2544
2545 default:
2546 CFAssert1(0, __kCFLogAssertion, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__); // We should never come here
2547 }
2548 } else {
2549 __CFCSetMakeBitmap(theSet);
2550 if (__CFCSetIsBitmap(theOtherSet)) {
2551 UInt32 *bitmap1 = (UInt32*)__CFCSetBitmapBits(theSet);
2552 UInt32 *bitmap2 = (UInt32*)__CFCSetBitmapBits(theOtherSet);
2553 CFIndex length = __kCFBitmapSize / sizeof(UInt32);
2554 while (length--) *bitmap1++ &= *bitmap2++;
2555 } else {
2556 UInt32 *bitmap1 = (UInt32*)__CFCSetBitmapBits(theSet);
2557 UInt32 *bitmap2;
2558 CFIndex length = __kCFBitmapSize / sizeof(UInt32);
2559 uint8_t bitmapBuffer[__kCFBitmapSize];
2560 __CFCSetGetBitmap(theOtherSet, bitmapBuffer);
2561 bitmap2 = (UInt32*)bitmapBuffer;
2562 while (length--) *bitmap1++ &= *bitmap2++;
2563 }
2564 __CFCSetPutHasHashValue(theSet, false);
2565 if (__CFCSetHasNonBMPPlane(theOtherSet)) {
2566 CFMutableCharacterSetRef annexPlane;
2567 CFMutableCharacterSetRef otherSetPlane;
2568 int idx;
2569 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
2570 if ((otherSetPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theOtherSet, idx))) {
2571 annexPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(theSet, idx);
2572 CFCharacterSetIntersect(annexPlane, otherSetPlane);
2573 if (__CFCSetIsEmpty(annexPlane) && !__CFCSetIsInverted(annexPlane)) __CFCSetPutCharacterSetToAnnexPlane(theSet, NULL, idx);
2574 } else if (__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, idx)) {
2575 __CFCSetPutCharacterSetToAnnexPlane(theSet, NULL, idx);
2576 }
2577 }
2578 if (!__CFCSetHasNonBMPPlane(theSet)) __CFCSetDeallocateAnnexPlane(theSet);
2579 } else if (__CFCSetIsBuiltin(theOtherSet) && !__CFCSetAnnexIsInverted(theOtherSet)) {
2580 CFMutableCharacterSetRef annexPlane;
2581 uint8_t bitmapBuffer[__kCFBitmapSize];
2582 uint8_t result;
2583 int planeIndex;
2584 UInt32 *bitmap1;
2585 UInt32 *bitmap2;
2586 CFIndex length;
2587
2588 for (planeIndex = 1;planeIndex <= MAX_ANNEX_PLANE;planeIndex++) {
2589 annexPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, planeIndex);
2590 if (annexPlane) {
2591 result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(theOtherSet), planeIndex, bitmapBuffer, false);
2592 if (result == kCFUniCharBitmapEmpty) {
2593 __CFCSetPutCharacterSetToAnnexPlane(theSet, NULL, planeIndex);
2594 } else if (result == kCFUniCharBitmapFilled) {
2595 Boolean isEmpty = true;
2596
2597 __CFCSetMakeBitmap(annexPlane);
2598 bitmap1 = (UInt32 *)__CFCSetBitmapBits(annexPlane);
2599 length = __kCFBitmapSize / sizeof(UInt32);
2600 bitmap2 = (UInt32*)bitmapBuffer;
2601
2602 while (length--) {
2603 if ((*bitmap1++ &= *bitmap2++)) isEmpty = false;
2604 }
2605 if (isEmpty) __CFCSetPutCharacterSetToAnnexPlane(theSet, NULL, planeIndex);
2606 }
2607 }
2608 }
2609 if (!__CFCSetHasNonBMPPlane(theSet)) __CFCSetDeallocateAnnexPlane(theSet);
2610 } else if (__CFCSetIsRange(theOtherSet)) {
2611 CFMutableCharacterSetRef tempOtherSet = CFCharacterSetCreateMutable(CFGetAllocator(theSet));
2612 CFMutableCharacterSetRef annexPlane;
2613 CFMutableCharacterSetRef otherSetPlane;
2614 int idx;
2615
2616 __CFCSetAddNonBMPPlanesInRange(tempOtherSet, CFRangeMake(__CFCSetRangeFirstChar(theOtherSet), __CFCSetRangeLength(theOtherSet)));
2617
2618 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
2619 if ((otherSetPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(tempOtherSet, idx))) {
2620 annexPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(theSet, idx);
2621 CFCharacterSetIntersect(annexPlane, otherSetPlane);
2622 if (__CFCSetIsEmpty(annexPlane) && !__CFCSetIsInverted(annexPlane)) __CFCSetPutCharacterSetToAnnexPlane(theSet, NULL, idx);
2623 } else if (__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, idx)) {
2624 __CFCSetPutCharacterSetToAnnexPlane(theSet, NULL, idx);
2625 }
2626 }
2627 if (!__CFCSetHasNonBMPPlane(theSet)) __CFCSetDeallocateAnnexPlane(theSet);
2628 CFRelease(tempOtherSet);
2629 } else if ((__CFCSetHasNonBMPPlane(theSet) || __CFCSetAnnexIsInverted(theSet)) && !__CFCSetAnnexIsInverted(theOtherSet)) {
2630 __CFCSetDeallocateAnnexPlane(theSet);
2631 }
2632 }
2633 if (__CFCheckForExapendedSet) __CFCheckForExpandedSet(theSet);
2634 } else { // It's NSCharacterSet
2635 CFDataRef bitmapRep = CFCharacterSetCreateBitmapRepresentation(kCFAllocatorSystemDefault, theOtherSet);
2636 const UInt32 *bitmap2 = (bitmapRep && CFDataGetLength(bitmapRep) ? (const UInt32 *)CFDataGetBytePtr(bitmapRep) : NULL);
2637 if (bitmap2) {
2638 UInt32 *bitmap1;
2639 CFIndex length = __kCFBitmapSize / sizeof(UInt32);
2640 __CFCSetMakeBitmap(theSet);
2641 bitmap1 = (UInt32*)__CFCSetBitmapBits(theSet);
2642 while (length--) *bitmap1++ &= *bitmap2++;
2643 __CFCSetPutHasHashValue(theSet, false);
2644 }
2645 CFRelease(bitmapRep);
2646 }
2647 }
2648
2649 void CFCharacterSetInvert(CFMutableCharacterSetRef theSet) {
2650
2651 CF_OBJC_FUNCDISPATCH0(__kCFCharacterSetTypeID, void, theSet, "invert");
2652
2653 __CFCSetValidateTypeAndMutability(theSet, __PRETTY_FUNCTION__);
2654
2655 __CFCSetPutHasHashValue(theSet, false);
2656
2657 if (__CFCSetClassType(theSet) == __kCFCharSetClassBitmap) {
2658 CFIndex idx;
2659 CFIndex count = __kCFBitmapSize / sizeof(UInt32);
2660 UInt32 *bitmap = (UInt32*) __CFCSetBitmapBits(theSet);
2661
2662 if (NULL == bitmap) {
2663 bitmap = (UInt32 *)CFAllocatorAllocate(CFGetAllocator(theSet), __kCFBitmapSize, 0);
2664 __CFCSetPutBitmapBits(theSet, (uint8_t *)bitmap);
2665 for (idx = 0;idx < count;idx++) bitmap[idx] = ((UInt32)0xFFFFFFFF);
2666 } else {
2667 for (idx = 0;idx < count;idx++) bitmap[idx] = ~(bitmap[idx]);
2668 }
2669 __CFCSetAllocateAnnexForPlane(theSet, 0); // We need to alloc annex to invert
2670 } else if (__CFCSetClassType(theSet) == __kCFCharSetClassCompactBitmap) {
2671 uint8_t *bitmap = __CFCSetCompactBitmapBits(theSet);
2672 int idx;
2673 int length = 0;
2674 uint8_t value;
2675
2676 for (idx = 0;idx < __kCFCompactBitmapNumPages;idx++) {
2677 value = bitmap[idx];
2678
2679 if (value == 0) {
2680 bitmap[idx] = UINT8_MAX;
2681 } else if (value == UINT8_MAX) {
2682 bitmap[idx] = 0;
2683 } else {
2684 length += __kCFCompactBitmapPageSize;
2685 }
2686 }
2687 bitmap += __kCFCompactBitmapNumPages;
2688 for (idx = 0;idx < length;idx++) bitmap[idx] = ~(bitmap[idx]);
2689 __CFCSetAllocateAnnexForPlane(theSet, 0); // We need to alloc annex to invert
2690 } else {
2691 __CFCSetPutIsInverted(theSet, !__CFCSetIsInverted(theSet));
2692 }
2693 __CFCSetAnnexSetIsInverted(theSet, !__CFCSetAnnexIsInverted(theSet));
2694 }
2695
2696 void CFCharacterSetCompact(CFMutableCharacterSetRef theSet) {
2697 if (__CFCSetIsBitmap(theSet) && __CFCSetBitmapBits(theSet)) __CFCSetMakeCompact(theSet);
2698 if (__CFCSetHasNonBMPPlane(theSet)) {
2699 CFMutableCharacterSetRef annex;
2700 int idx;
2701
2702 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
2703 if ((annex = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, idx)) && __CFCSetIsBitmap(annex) && __CFCSetBitmapBits(annex)) {
2704 __CFCSetMakeCompact(annex);
2705 }
2706 }
2707 }
2708 }
2709
2710 void CFCharacterSetFast(CFMutableCharacterSetRef theSet) {
2711 if (__CFCSetIsCompactBitmap(theSet) && __CFCSetCompactBitmapBits(theSet)) __CFCSetMakeBitmap(theSet);
2712 if (__CFCSetHasNonBMPPlane(theSet)) {
2713 CFMutableCharacterSetRef annex;
2714 int idx;
2715
2716 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
2717 if ((annex = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, idx)) && __CFCSetIsCompactBitmap(annex) && __CFCSetCompactBitmapBits(annex)) {
2718 __CFCSetMakeBitmap(annex);
2719 }
2720 }
2721 }
2722 }
2723
2724 /* Keyed-coding support
2725 */
2726 CFCharacterSetKeyedCodingType _CFCharacterSetGetKeyedCodingType(CFCharacterSetRef cset) {
2727 if (CF_IS_OBJC(__kCFCharacterSetTypeID, cset)) return kCFCharacterSetKeyedCodingTypeBitmap;
2728
2729 switch (__CFCSetClassType(cset)) {
2730 case __kCFCharSetClassBuiltin: return ((__CFCSetBuiltinType(cset) < kCFCharacterSetSymbol) ? kCFCharacterSetKeyedCodingTypeBuiltin : kCFCharacterSetKeyedCodingTypeBuiltinAndBitmap);
2731 case __kCFCharSetClassRange: return kCFCharacterSetKeyedCodingTypeRange;
2732
2733 case __kCFCharSetClassString: // We have to check if we have non-BMP here
2734 if (!__CFCSetHasNonBMPPlane(cset) && !__CFCSetAnnexIsInverted(cset)) return kCFCharacterSetKeyedCodingTypeString; // BMP only. we can archive the string
2735 /* fallthrough */
2736
2737 default:
2738 return kCFCharacterSetKeyedCodingTypeBitmap;
2739 }
2740 }
2741
2742 CFCharacterSetPredefinedSet _CFCharacterSetGetKeyedCodingBuiltinType(CFCharacterSetRef cset) { return __CFCSetBuiltinType(cset); }
2743 CFRange _CFCharacterSetGetKeyedCodingRange(CFCharacterSetRef cset) { return CFRangeMake(__CFCSetRangeFirstChar(cset), __CFCSetRangeLength(cset)); }
2744 CFStringRef _CFCharacterSetCreateKeyedCodingString(CFCharacterSetRef cset) { return CFStringCreateWithCharacters(kCFAllocatorSystemDefault, __CFCSetStringBuffer(cset), __CFCSetStringLength(cset)); }
2745
2746 bool _CFCharacterSetIsInverted(CFCharacterSetRef cset) { return (__CFCSetIsInverted(cset) != 0); }
2747 void _CFCharacterSetSetIsInverted(CFCharacterSetRef cset, bool flag) { __CFCSetPutIsInverted((CFMutableCharacterSetRef)cset, flag); }
2748
2749 /* Inline buffer support
2750 */
2751 void CFCharacterSetInitInlineBuffer(CFCharacterSetRef cset, CFCharacterSetInlineBuffer *buffer) {
2752 memset(buffer, 0, sizeof(CFCharacterSetInlineBuffer));
2753 buffer->cset = cset;
2754 buffer->rangeLimit = 0x10000;
2755
2756 if (CF_IS_OBJC(__kCFCharacterSetTypeID, cset)) {
2757 CFCharacterSetRef expandedSet = __CFCharacterSetGetExpandedSetForNSCharacterSet(cset);
2758
2759 if (NULL == expandedSet) {
2760 buffer->flags = kCFCharacterSetNoBitmapAvailable;
2761 buffer->rangeLimit = 0x110000;
2762
2763 return;
2764 } else {
2765 cset = expandedSet;
2766 }
2767 }
2768
2769 switch (__CFCSetClassType(cset)) {
2770 case __kCFCharSetClassBuiltin:
2771 buffer->bitmap = CFUniCharGetBitmapPtrForPlane(__CFCSetBuiltinType(cset), 0);
2772 buffer->rangeLimit = 0x110000;
2773 if (NULL == buffer->bitmap) {
2774 buffer->flags = kCFCharacterSetNoBitmapAvailable;
2775 } else {
2776 if (__CFCSetIsInverted(cset)) buffer->flags = kCFCharacterSetIsInverted;
2777 }
2778 break;
2779
2780 case __kCFCharSetClassRange:
2781 buffer->rangeStart = __CFCSetRangeFirstChar(cset);
2782 buffer->rangeLimit = __CFCSetRangeFirstChar(cset) + __CFCSetRangeLength(cset);
2783 if (__CFCSetIsInverted(cset)) buffer->flags = kCFCharacterSetIsInverted;
2784 return;
2785
2786 case __kCFCharSetClassString:
2787 buffer->flags = kCFCharacterSetNoBitmapAvailable;
2788 if (__CFCSetStringLength(cset) > 0) {
2789 buffer->rangeStart = *__CFCSetStringBuffer(cset);
2790 buffer->rangeLimit = *(__CFCSetStringBuffer(cset) + __CFCSetStringLength(cset) - 1) + 1;
2791
2792 if (__CFCSetIsInverted(cset)) {
2793 if (0 == buffer->rangeStart) {
2794 buffer->rangeStart = buffer->rangeLimit;
2795 buffer->rangeLimit = 0x10000;
2796 } else if (0x10000 == buffer->rangeLimit) {
2797 buffer->rangeLimit = buffer->rangeStart;
2798 buffer->rangeStart = 0;
2799 } else {
2800 buffer->rangeStart = 0;
2801 buffer->rangeLimit = 0x10000;
2802 }
2803 }
2804 }
2805 break;
2806
2807 case __kCFCharSetClassBitmap:
2808 case __kCFCharSetClassCompactBitmap:
2809 buffer->bitmap = __CFCSetCompactBitmapBits(cset);
2810 if (NULL == buffer->bitmap) {
2811 buffer->flags = kCFCharacterSetIsCompactBitmap;
2812 if (__CFCSetIsInverted(cset)) buffer->flags |= kCFCharacterSetIsInverted;
2813 } else {
2814 if (__kCFCharSetClassCompactBitmap == __CFCSetClassType(cset)) buffer->flags = kCFCharacterSetIsCompactBitmap;
2815 }
2816 break;
2817
2818 default:
2819 CFAssert1(0, __kCFLogAssertion, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__); // We should never come here
2820 return;
2821 }
2822
2823 if (__CFCSetAnnexIsInverted(cset)) {
2824 buffer->rangeLimit = 0x110000;
2825 } else if (__CFCSetHasNonBMPPlane(cset)) {
2826 CFIndex index;
2827
2828 for (index = MAX_ANNEX_PLANE;index > 0;index--) {
2829 if (NULL != __CFCSetGetAnnexPlaneCharacterSetNoAlloc(cset, index)) {
2830 buffer->rangeLimit = (index + 1) << 16;
2831 break;
2832 }
2833 }
2834 }
2835 }