]> git.saurik.com Git - apple/cf.git/blob - CFCharacterSet.c
CF-476.18.tar.gz
[apple/cf.git] / CFCharacterSet.c
1 /*
2 * Copyright (c) 2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23 /* CFCharacterSet.c
24 Copyright 1999-2002, Apple, Inc. All rights reserved.
25 Responsibility: Aki Inoue
26 */
27
28 #include <CoreFoundation/CFCharacterSet.h>
29 #include <CoreFoundation/CFByteOrder.h>
30 #include "CFCharacterSetPriv.h"
31 #include <CoreFoundation/CFData.h>
32 #include <CoreFoundation/CFString.h>
33 #include "CFInternal.h"
34 #include "CFUniChar.h"
35 #include "CFUniCharPriv.h"
36 #include <stdlib.h>
37 #include <string.h>
38
39
40 #define BITSPERBYTE 8 /* (CHAR_BIT * sizeof(unsigned char)) */
41 #define LOG_BPB 3
42 #define LOG_BPLW 5
43 #define NUMCHARACTERS 65536
44
45 #define MAX_ANNEX_PLANE (16)
46
47 /* Number of things in the array keeping the bits.
48 */
49 #define __kCFBitmapSize (NUMCHARACTERS / BITSPERBYTE)
50
51 /* How many elements max can be in an __kCFCharSetClassString CFCharacterSet
52 */
53 #define __kCFStringCharSetMax 64
54
55 /* The last builtin set ID number
56 */
57 #define __kCFLastBuiltinSetID kCFCharacterSetNewline
58
59 /* How many elements in the "singles" array before we use binary search.
60 */
61 #define __kCFSetBreakeven 10
62
63 /* This tells us, within 1k or so, whether a thing is POTENTIALLY in the set (in the bitmap blob of the private structure) before we bother to do specific checking.
64 */
65 #define __CFCSetBitsInRange(n, i) (i[n>>15] & (1L << ((n>>10) % 32)))
66
67 /* Compact bitmap params
68 */
69 #define __kCFCompactBitmapNumPages (256)
70
71 #define __kCFCompactBitmapMaxPages (128) // the max pages allocated
72
73 #define __kCFCompactBitmapPageSize (__kCFBitmapSize / __kCFCompactBitmapNumPages)
74
75 typedef struct {
76 CFCharacterSetRef *_nonBMPPlanes;
77 unsigned int _validEntriesBitmap;
78 unsigned char _numOfAllocEntries;
79 unsigned char _isAnnexInverted;
80 uint16_t _padding;
81 } CFCharSetAnnexStruct;
82
83 struct __CFCharacterSet {
84 CFRuntimeBase _base;
85 CFHashCode _hashValue;
86 union {
87 struct {
88 CFIndex _type;
89 } _builtin;
90 struct {
91 UInt32 _firstChar;
92 CFIndex _length;
93 } _range;
94 struct {
95 UniChar *_buffer;
96 CFIndex _length;
97 } _string;
98 struct {
99 uint8_t *_bits;
100 } _bitmap;
101 struct {
102 uint8_t *_cBits;
103 } _compactBitmap;
104 } _variants;
105 CFCharSetAnnexStruct *_annex;
106 };
107
108 /* _base._info values interesting for CFCharacterSet
109 */
110 enum {
111 __kCFCharSetClassTypeMask = 0x0070,
112 __kCFCharSetClassBuiltin = 0x0000,
113 __kCFCharSetClassRange = 0x0010,
114 __kCFCharSetClassString = 0x0020,
115 __kCFCharSetClassBitmap = 0x0030,
116 __kCFCharSetClassSet = 0x0040,
117 __kCFCharSetClassCompactBitmap = 0x0040,
118
119 __kCFCharSetIsInvertedMask = 0x0008,
120 __kCFCharSetIsInverted = 0x0008,
121
122 __kCFCharSetHasHashValueMask = 0x00004,
123 __kCFCharSetHasHashValue = 0x0004,
124
125 /* Generic CFBase values */
126 __kCFCharSetIsMutableMask = 0x0001,
127 __kCFCharSetIsMutable = 0x0001,
128 };
129
130 /* Inline accessor macros for _base._info
131 */
132 CF_INLINE Boolean __CFCSetIsMutable(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetIsMutableMask) == __kCFCharSetIsMutable;}
133 CF_INLINE Boolean __CFCSetIsBuiltin(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetClassTypeMask) == __kCFCharSetClassBuiltin;}
134 CF_INLINE Boolean __CFCSetIsRange(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetClassTypeMask) == __kCFCharSetClassRange;}
135 CF_INLINE Boolean __CFCSetIsString(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetClassTypeMask) == __kCFCharSetClassString;}
136 CF_INLINE Boolean __CFCSetIsBitmap(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetClassTypeMask) == __kCFCharSetClassBitmap;}
137 CF_INLINE Boolean __CFCSetIsCompactBitmap(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetClassTypeMask) == __kCFCharSetClassCompactBitmap;}
138 CF_INLINE Boolean __CFCSetIsInverted(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetIsInvertedMask) == __kCFCharSetIsInverted;}
139 CF_INLINE Boolean __CFCSetHasHashValue(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetHasHashValueMask) == __kCFCharSetHasHashValue;}
140 CF_INLINE UInt32 __CFCSetClassType(CFCharacterSetRef cset) {return (cset->_base._cfinfo[CF_INFO_BITS] & __kCFCharSetClassTypeMask);}
141
142 CF_INLINE void __CFCSetPutIsMutable(CFMutableCharacterSetRef cset, Boolean isMutable) {(isMutable ? (cset->_base._cfinfo[CF_INFO_BITS] |= __kCFCharSetIsMutable) : (cset->_base._cfinfo[CF_INFO_BITS] &= ~ __kCFCharSetIsMutable));}
143 CF_INLINE void __CFCSetPutIsInverted(CFMutableCharacterSetRef cset, Boolean isInverted) {(isInverted ? (cset->_base._cfinfo[CF_INFO_BITS] |= __kCFCharSetIsInverted) : (cset->_base._cfinfo[CF_INFO_BITS] &= ~__kCFCharSetIsInverted));}
144 CF_INLINE void __CFCSetPutHasHashValue(CFMutableCharacterSetRef cset, Boolean hasHash) {(hasHash ? (cset->_base._cfinfo[CF_INFO_BITS] |= __kCFCharSetHasHashValue) : (cset->_base._cfinfo[CF_INFO_BITS] &= ~__kCFCharSetHasHashValue));}
145 CF_INLINE void __CFCSetPutClassType(CFMutableCharacterSetRef cset, UInt32 classType) {cset->_base._cfinfo[CF_INFO_BITS] &= ~__kCFCharSetClassTypeMask; cset->_base._cfinfo[CF_INFO_BITS] |= classType;}
146
147
148 /* Inline contents accessor macros
149 */
150 CF_INLINE CFCharacterSetPredefinedSet __CFCSetBuiltinType(CFCharacterSetRef cset) {return cset->_variants._builtin._type;}
151 CF_INLINE UInt32 __CFCSetRangeFirstChar(CFCharacterSetRef cset) {return cset->_variants._range._firstChar;}
152 CF_INLINE CFIndex __CFCSetRangeLength(CFCharacterSetRef cset) {return cset->_variants._range._length;}
153 CF_INLINE UniChar *__CFCSetStringBuffer(CFCharacterSetRef cset) {return (UniChar*)(cset->_variants._string._buffer);}
154 CF_INLINE CFIndex __CFCSetStringLength(CFCharacterSetRef cset) {return cset->_variants._string._length;}
155 CF_INLINE uint8_t *__CFCSetBitmapBits(CFCharacterSetRef cset) {return cset->_variants._bitmap._bits;}
156 CF_INLINE uint8_t *__CFCSetCompactBitmapBits(CFCharacterSetRef cset) {return cset->_variants._compactBitmap._cBits;}
157
158 CF_INLINE void __CFCSetPutBuiltinType(CFMutableCharacterSetRef cset, CFCharacterSetPredefinedSet type) {cset->_variants._builtin._type = type;}
159 CF_INLINE void __CFCSetPutRangeFirstChar(CFMutableCharacterSetRef cset, UInt32 first) {cset->_variants._range._firstChar = first;}
160 CF_INLINE void __CFCSetPutRangeLength(CFMutableCharacterSetRef cset, CFIndex length) {cset->_variants._range._length = length;}
161 CF_INLINE void __CFCSetPutStringBuffer(CFMutableCharacterSetRef cset, UniChar *theBuffer) {cset->_variants._string._buffer = theBuffer;}
162 CF_INLINE void __CFCSetPutStringLength(CFMutableCharacterSetRef cset, CFIndex length) {cset->_variants._string._length = length;}
163 CF_INLINE void __CFCSetPutBitmapBits(CFMutableCharacterSetRef cset, uint8_t *bits) {cset->_variants._bitmap._bits = bits;}
164 CF_INLINE void __CFCSetPutCompactBitmapBits(CFMutableCharacterSetRef cset, uint8_t *bits) {cset->_variants._compactBitmap._cBits = bits;}
165
166 /* Validation funcs
167 */
168 #if defined(CF_ENABLE_ASSERTIONS)
169 CF_INLINE void __CFCSetValidateBuiltinType(CFCharacterSetPredefinedSet type, const char *func) {
170 CFAssert2(type > 0 && type <= __kCFLastBuiltinSetID, __kCFLogAssertion, "%s: Unknowen builtin type %d", func, type);
171 }
172 CF_INLINE void __CFCSetValidateRange(CFRange theRange, const char *func) {
173 CFAssert3(theRange.location >= 0 && theRange.location + theRange.length <= 0x1FFFFF, __kCFLogAssertion, "%s: Range out of Unicode range (location -> %d length -> %d)", func, theRange.location, theRange.length);
174 }
175 CF_INLINE void __CFCSetValidateTypeAndMutability(CFCharacterSetRef cset, const char *func) {
176 __CFGenericValidateType(cset, __kCFCharacterSetTypeID);
177 CFAssert1(__CFCSetIsMutable(cset), __kCFLogAssertion, "%s: Immutable character set passed to mutable function", func);
178 }
179 #else
180 #define __CFCSetValidateBuiltinType(t,f)
181 #define __CFCSetValidateRange(r,f)
182 #define __CFCSetValidateTypeAndMutability(r,f)
183 #endif
184
185 /* Inline utility funcs
186 */
187 static Boolean __CFCSetIsEqualBitmap(const UInt32 *bits1, const UInt32 *bits2) {
188 CFIndex length = __kCFBitmapSize / sizeof(UInt32);
189
190 if (bits1 == bits2) {
191 return true;
192 } else if (bits1 && bits2) {
193 if (bits1 == (const UInt32 *)-1) {
194 while (length--) if ((UInt32)-1 != *bits2++) return false;
195 } else if (bits2 == (const UInt32 *)-1) {
196 while (length--) if ((UInt32)-1 != *bits1++) return false;
197 } else {
198 while (length--) if (*bits1++ != *bits2++) return false;
199 }
200 return true;
201 } else if (!bits1 && !bits2) { // empty set
202 return true;
203 } else {
204 if (bits2) bits1 = bits2;
205 if (bits1 == (const UInt32 *)-1) return false;
206 while (length--) if (*bits1++) return false;
207 return true;
208 }
209 }
210
211 CF_INLINE Boolean __CFCSetIsEqualBitmapInverted(const UInt32 *bits1, const UInt32 *bits2) {
212 CFIndex length = __kCFBitmapSize / sizeof(UInt32);
213
214 while (length--) if (*bits1++ != ~(*(bits2++))) return false;
215 return true;
216 }
217
218 static Boolean __CFCSetIsBitmapEqualToRange(const UInt32 *bits, UniChar firstChar, UniChar lastChar, Boolean isInverted) {
219 CFIndex firstCharIndex = firstChar >> LOG_BPB;
220 CFIndex lastCharIndex = lastChar >> LOG_BPB;
221 CFIndex length;
222 UInt32 value;
223
224 if (firstCharIndex == lastCharIndex) {
225 value = ((((UInt32)0xFF) << (firstChar & (BITSPERBYTE - 1))) & (((UInt32)0xFF) >> ((BITSPERBYTE - 1) - (lastChar & (BITSPERBYTE - 1))))) << (((sizeof(UInt32) - 1) - (firstCharIndex % sizeof(UInt32))) * BITSPERBYTE);
226 value = CFSwapInt32HostToBig(value);
227 firstCharIndex = lastCharIndex = firstChar >> LOG_BPLW;
228 if (*(bits + firstCharIndex) != (isInverted ? ~value : value)) return FALSE;
229 } else {
230 UInt32 firstCharMask;
231 UInt32 lastCharMask;
232
233 length = firstCharIndex % sizeof(UInt32);
234 firstCharMask = (((((UInt32)0xFF) << (firstChar & (BITSPERBYTE - 1))) & 0xFF) << (((sizeof(UInt32) - 1) - length) * BITSPERBYTE)) | (((UInt32)0xFFFFFFFF) >> ((length + 1) * BITSPERBYTE));
235
236 length = lastCharIndex % sizeof(UInt32);
237 lastCharMask = ((((UInt32)0xFF) >> ((BITSPERBYTE - 1) - (lastChar & (BITSPERBYTE - 1)))) << (((sizeof(UInt32) - 1) - length) * BITSPERBYTE)) | (((UInt32)0xFFFFFFFF) << ((sizeof(UInt32) - length) * BITSPERBYTE));
238
239 firstCharIndex = firstChar >> LOG_BPLW;
240 lastCharIndex = lastChar >> LOG_BPLW;
241
242 if (firstCharIndex == lastCharIndex) {
243 firstCharMask &= lastCharMask;
244 value = CFSwapInt32HostToBig(firstCharMask & lastCharMask);
245 if (*(bits + firstCharIndex) != (isInverted ? ~value : value)) return FALSE;
246 } else {
247 value = CFSwapInt32HostToBig(firstCharMask);
248 if (*(bits + firstCharIndex) != (isInverted ? ~value : value)) return FALSE;
249
250 value = CFSwapInt32HostToBig(lastCharMask);
251 if (*(bits + lastCharIndex) != (isInverted ? ~value : value)) return FALSE;
252 }
253 }
254
255 length = firstCharIndex;
256 value = (isInverted ? ((UInt32)0xFFFFFFFF) : 0);
257 while (length--) {
258 if (*(bits++) != value) return FALSE;
259 }
260
261 ++bits; // Skip firstCharIndex
262 length = (lastCharIndex - (firstCharIndex + 1));
263 value = (isInverted ? 0 : ((UInt32)0xFFFFFFFF));
264 while (length-- > 0) {
265 if (*(bits++) != value) return FALSE;
266 }
267 if (firstCharIndex != lastCharIndex) ++bits;
268
269 length = (0xFFFF >> LOG_BPLW) - lastCharIndex;
270 value = (isInverted ? ((UInt32)0xFFFFFFFF) : 0);
271 while (length--) {
272 if (*(bits++) != value) return FALSE;
273 }
274
275 return TRUE;
276 }
277
278 CF_INLINE Boolean __CFCSetIsBitmapSupersetOfBitmap(const UInt32 *bits1, const UInt32 *bits2, Boolean isInverted1, Boolean isInverted2) {
279 CFIndex length = __kCFBitmapSize / sizeof(UInt32);
280 UInt32 val1, val2;
281
282 while (length--) {
283 val2 = (isInverted2 ? ~(*(bits2++)) : *(bits2++));
284 val1 = (isInverted1 ? ~(*(bits1++)) : *(bits1++)) & val2;
285 if (val1 != val2) return false;
286 }
287
288 return true;
289 }
290
291 CF_INLINE Boolean __CFCSetHasNonBMPPlane(CFCharacterSetRef cset) { return ((cset)->_annex && (cset)->_annex->_validEntriesBitmap ? true : false); }
292 CF_INLINE Boolean __CFCSetAnnexIsInverted (CFCharacterSetRef cset) { return ((cset)->_annex && (cset)->_annex->_isAnnexInverted ? true : false); }
293 CF_INLINE UInt32 __CFCSetAnnexValidEntriesBitmap(CFCharacterSetRef cset) { return ((cset)->_annex ? (cset)->_annex->_validEntriesBitmap : 0); }
294
295 CF_INLINE Boolean __CFCSetIsEmpty(CFCharacterSetRef cset) {
296 if (__CFCSetHasNonBMPPlane(cset) || __CFCSetAnnexIsInverted(cset)) return false;
297
298 switch (__CFCSetClassType(cset)) {
299 case __kCFCharSetClassRange: if (!__CFCSetRangeLength(cset)) return true; break;
300 case __kCFCharSetClassString: if (!__CFCSetStringLength(cset)) return true; break;
301 case __kCFCharSetClassBitmap: if (!__CFCSetBitmapBits(cset)) return true; break;
302 case __kCFCharSetClassCompactBitmap: if (!__CFCSetCompactBitmapBits(cset)) return true; break;
303 }
304 return false;
305 }
306
307 CF_INLINE void __CFCSetBitmapAddCharacter(uint8_t *bitmap, UniChar theChar) {
308 bitmap[(theChar) >> LOG_BPB] |= (((unsigned)1) << (theChar & (BITSPERBYTE - 1)));
309 }
310
311 CF_INLINE void __CFCSetBitmapRemoveCharacter(uint8_t *bitmap, UniChar theChar) {
312 bitmap[(theChar) >> LOG_BPB] &= ~(((unsigned)1) << (theChar & (BITSPERBYTE - 1)));
313 }
314
315 CF_INLINE Boolean __CFCSetIsMemberBitmap(const uint8_t *bitmap, UniChar theChar) {
316 return ((bitmap[(theChar) >> LOG_BPB] & (((unsigned)1) << (theChar & (BITSPERBYTE - 1)))) ? true : false);
317 }
318
319 #define NUM_32BIT_SLOTS (NUMCHARACTERS / 32)
320
321 CF_INLINE void __CFCSetBitmapFastFillWithValue(UInt32 *bitmap, uint8_t value) {
322 UInt32 mask = (value << 24) | (value << 16) | (value << 8) | value;
323 UInt32 numSlots = NUMCHARACTERS / 32;
324
325 while (numSlots--) *(bitmap++) = mask;
326 }
327
328 CF_INLINE void __CFCSetBitmapAddCharactersInRange(uint8_t *bitmap, UniChar firstChar, UniChar lastChar) {
329 if (firstChar == lastChar) {
330 bitmap[firstChar >> LOG_BPB] |= (((unsigned)1) << (firstChar & (BITSPERBYTE - 1)));
331 } else {
332 UInt32 idx = firstChar >> LOG_BPB;
333 UInt32 max = lastChar >> LOG_BPB;
334
335 if (idx == max) {
336 bitmap[idx] |= (((unsigned)0xFF) << (firstChar & (BITSPERBYTE - 1))) & (((unsigned)0xFF) >> ((BITSPERBYTE - 1) - (lastChar & (BITSPERBYTE - 1))));
337 } else {
338 bitmap[idx] |= (((unsigned)0xFF) << (firstChar & (BITSPERBYTE - 1)));
339 bitmap[max] |= (((unsigned)0xFF) >> ((BITSPERBYTE - 1) - (lastChar & (BITSPERBYTE - 1))));
340
341 ++idx;
342 while (idx < max) bitmap[idx++] = 0xFF;
343 }
344 }
345 }
346
347 CF_INLINE void __CFCSetBitmapRemoveCharactersInRange(uint8_t *bitmap, UniChar firstChar, UniChar lastChar) {
348 UInt32 idx = firstChar >> LOG_BPB;
349 UInt32 max = lastChar >> LOG_BPB;
350
351 if (idx == max) {
352 bitmap[idx] &= ~((((unsigned)0xFF) << (firstChar & (BITSPERBYTE - 1))) & (((unsigned)0xFF) >> ((BITSPERBYTE - 1) - (lastChar & (BITSPERBYTE - 1)))));
353 } else {
354 bitmap[idx] &= ~(((unsigned)0xFF) << (firstChar & (BITSPERBYTE - 1)));
355 bitmap[max] &= ~(((unsigned)0xFF) >> ((BITSPERBYTE - 1) - (lastChar & (BITSPERBYTE - 1))));
356
357 ++idx;
358 while (idx < max) bitmap[idx++] = 0;
359 }
360 }
361
362 #define __CFCSetAnnexBitmapSetPlane(bitmap,plane) ((bitmap) |= (1 << (plane)))
363 #define __CFCSetAnnexBitmapClearPlane(bitmap,plane) ((bitmap) &= (~(1 << (plane))))
364 #define __CFCSetAnnexBitmapGetPlane(bitmap,plane) ((bitmap) & (1 << (plane)))
365
366 CF_INLINE void __CFCSetAllocateAnnexForPlane(CFCharacterSetRef cset, int plane) {
367 if (cset->_annex == NULL) {
368 ((CFMutableCharacterSetRef)cset)->_annex = (CFCharSetAnnexStruct *)CFAllocatorAllocate(CFGetAllocator(cset), sizeof(CFCharSetAnnexStruct), 0);
369 cset->_annex->_numOfAllocEntries = plane;
370 cset->_annex->_isAnnexInverted = false;
371 cset->_annex->_validEntriesBitmap = 0;
372 cset->_annex->_nonBMPPlanes = ((plane > 0) ? (CFCharacterSetRef*)CFAllocatorAllocate(CFGetAllocator(cset), sizeof(CFCharacterSetRef) * plane, 0) : NULL);
373 } else if (cset->_annex->_numOfAllocEntries < plane) {
374 cset->_annex->_numOfAllocEntries = plane;
375 if (NULL == cset->_annex->_nonBMPPlanes) {
376 cset->_annex->_nonBMPPlanes = (CFCharacterSetRef*)CFAllocatorAllocate(CFGetAllocator(cset), sizeof(CFCharacterSetRef) * plane, 0);
377 } else {
378 cset->_annex->_nonBMPPlanes = (CFCharacterSetRef*)CFAllocatorReallocate(CFGetAllocator(cset), (void *)cset->_annex->_nonBMPPlanes, sizeof(CFCharacterSetRef) * plane, 0);
379 }
380 }
381 }
382
383 CF_INLINE void __CFCSetAnnexSetIsInverted(CFCharacterSetRef cset, Boolean flag) {
384 if (flag) __CFCSetAllocateAnnexForPlane(cset, 0);
385 if (cset->_annex) ((CFMutableCharacterSetRef)cset)->_annex->_isAnnexInverted = flag;
386 }
387
388 CF_INLINE void __CFCSetPutCharacterSetToAnnexPlane(CFCharacterSetRef cset, CFCharacterSetRef annexCSet, int plane) {
389 __CFCSetAllocateAnnexForPlane(cset, plane);
390 if (__CFCSetAnnexBitmapGetPlane(cset->_annex->_validEntriesBitmap, plane)) CFRelease(cset->_annex->_nonBMPPlanes[plane - 1]);
391 if (annexCSet) {
392 cset->_annex->_nonBMPPlanes[plane - 1] = (CFCharacterSetRef)CFRetain(annexCSet);
393 __CFCSetAnnexBitmapSetPlane(cset->_annex->_validEntriesBitmap, plane);
394 } else {
395 __CFCSetAnnexBitmapClearPlane(cset->_annex->_validEntriesBitmap, plane);
396 }
397 }
398
399 CF_INLINE CFCharacterSetRef __CFCSetGetAnnexPlaneCharacterSet(CFCharacterSetRef cset, int plane) {
400 __CFCSetAllocateAnnexForPlane(cset, plane);
401 if (!__CFCSetAnnexBitmapGetPlane(cset->_annex->_validEntriesBitmap, plane)) {
402 cset->_annex->_nonBMPPlanes[plane - 1] = (CFCharacterSetRef)CFCharacterSetCreateMutable(CFGetAllocator(cset));
403 __CFCSetAnnexBitmapSetPlane(cset->_annex->_validEntriesBitmap, plane);
404 }
405 return cset->_annex->_nonBMPPlanes[plane - 1];
406 }
407
408 CF_INLINE CFCharacterSetRef __CFCSetGetAnnexPlaneCharacterSetNoAlloc(CFCharacterSetRef cset, int plane) {
409 return (cset->_annex && __CFCSetAnnexBitmapGetPlane(cset->_annex->_validEntriesBitmap, plane) ? cset->_annex->_nonBMPPlanes[plane - 1] : NULL);
410 }
411
412 CF_INLINE void __CFCSetDeallocateAnnexPlane(CFCharacterSetRef cset) {
413 if (cset->_annex) {
414 int idx;
415
416 for (idx = 0;idx < MAX_ANNEX_PLANE;idx++) {
417 if (__CFCSetAnnexBitmapGetPlane(cset->_annex->_validEntriesBitmap, idx + 1)) {
418 CFRelease(cset->_annex->_nonBMPPlanes[idx]);
419 }
420 }
421 CFAllocatorDeallocate(CFGetAllocator(cset), cset->_annex->_nonBMPPlanes);
422 CFAllocatorDeallocate(CFGetAllocator(cset), cset->_annex);
423 ((CFMutableCharacterSetRef)cset)->_annex = NULL;
424 }
425 }
426
427 CF_INLINE uint8_t __CFCSetGetHeaderValue(const uint8_t *bitmap, int *numPages) {
428 uint8_t value = *bitmap;
429
430 if ((value == 0) || (value == UINT8_MAX)) {
431 int numBytes = __kCFCompactBitmapPageSize - 1;
432
433 while (numBytes > 0) {
434 if (*(++bitmap) != value) break;
435 --numBytes;
436 }
437 if (numBytes == 0) return value;
438 }
439 return (uint8_t)(++(*numPages));
440 }
441
442 CF_INLINE bool __CFCSetIsMemberInCompactBitmap(const uint8_t *compactBitmap, UTF16Char character) {
443 uint8_t value = compactBitmap[(character >> 8)]; // Assuming __kCFCompactBitmapNumPages == 256
444
445 if (value == 0) {
446 return false;
447 } else if (value == UINT8_MAX) {
448 return true;
449 } else {
450 compactBitmap += (__kCFCompactBitmapNumPages + (__kCFCompactBitmapPageSize * (value - 1)));
451 character &= 0xFF; // Assuming __kCFCompactBitmapNumPages == 256
452 return ((compactBitmap[(character / BITSPERBYTE)] & (1 << (character % BITSPERBYTE))) ? true : false);
453 }
454 }
455
456 CF_INLINE uint32_t __CFCSetGetCompactBitmapSize(const uint8_t *compactBitmap) {
457 uint32_t length = __kCFCompactBitmapNumPages;
458 uint32_t size = __kCFCompactBitmapNumPages;
459 uint8_t value;
460
461 while (length-- > 0) {
462 value = *(compactBitmap++);
463 if ((value != 0) && (value != UINT8_MAX)) size += __kCFCompactBitmapPageSize;
464 }
465 return size;
466 }
467
468 /* Take a private "set" structure and make a bitmap from it. Return the bitmap. THE CALLER MUST RELEASE THE RETURNED MEMORY as necessary.
469 */
470
471 CF_INLINE void __CFCSetBitmapProcessManyCharacters(unsigned char *map, unsigned n, unsigned m, Boolean isInverted) {
472 if (isInverted) {
473 __CFCSetBitmapRemoveCharactersInRange(map, n, m);
474 } else {
475 __CFCSetBitmapAddCharactersInRange(map, n, m);
476 }
477 }
478
479 CF_INLINE void __CFExpandCompactBitmap(const uint8_t *src, uint8_t *dst) {
480 const uint8_t *srcBody = src + __kCFCompactBitmapNumPages;
481 int i;
482 uint8_t value;
483
484 for (i = 0;i < __kCFCompactBitmapNumPages;i++) {
485 value = *(src++);
486 if ((value == 0) || (value == UINT8_MAX)) {
487 memset(dst, value, __kCFCompactBitmapPageSize);
488 } else {
489 memmove(dst, srcBody, __kCFCompactBitmapPageSize);
490 srcBody += __kCFCompactBitmapPageSize;
491 }
492 dst += __kCFCompactBitmapPageSize;
493 }
494 }
495
496
497 static void __CFCheckForExpandedSet(CFCharacterSetRef cset) {
498 static int8_t __CFNumberOfPlanesForLogging = -1;
499 static bool warnedOnce = false;
500
501 if (0 > __CFNumberOfPlanesForLogging) {
502 const char *envVar = getenv("CFCharacterSetCheckForExpandedSet");
503 long value = (envVar ? strtol_l(envVar, NULL, 0, NULL) : 0);
504 __CFNumberOfPlanesForLogging = (int8_t)(((value > 0) && (value <= 16)) ? value : 0);
505 }
506
507 if (__CFNumberOfPlanesForLogging) {
508 uint32_t entries = __CFCSetAnnexValidEntriesBitmap(cset);
509 int count = 0;
510
511 while (entries) {
512 if ((entries & 1) && (++count >= __CFNumberOfPlanesForLogging)) {
513 if (!warnedOnce) {
514 CFLog(kCFLogLevelWarning, CFSTR("An expanded CFMutableCharacter has been detected. Recommend to compact with CFCharacterSetCreateCopy"));
515 warnedOnce = true;
516 }
517 break;
518 }
519 entries >>= 1;
520 }
521 }
522 }
523
524 static void __CFCSetGetBitmap(CFCharacterSetRef cset, uint8_t *bits) {
525 uint8_t *bitmap;
526 CFIndex length = __kCFBitmapSize;
527
528 if (__CFCSetIsBitmap(cset) && (bitmap = __CFCSetBitmapBits(cset))) {
529 memmove(bits, bitmap, __kCFBitmapSize);
530 } else {
531 Boolean isInverted = __CFCSetIsInverted(cset);
532 uint8_t value = (isInverted ? (uint8_t)-1 : 0);
533
534 bitmap = bits;
535 while (length--) *bitmap++ = value; // Initialize the buffer
536
537 if (!__CFCSetIsEmpty(cset)) {
538 switch (__CFCSetClassType(cset)) {
539 case __kCFCharSetClassBuiltin: {
540 UInt8 result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(cset), 0, bits, (isInverted != 0));
541 if (result == kCFUniCharBitmapEmpty && isInverted) {
542 length = __kCFBitmapSize;
543 bitmap = bits;
544 while (length--) *bitmap++ = 0;
545 } else if (result == kCFUniCharBitmapAll && !isInverted) {
546 length = __kCFBitmapSize;
547 bitmap = bits;
548 while (length--) *bitmap++ = (UInt8)0xFF;
549 }
550 }
551 break;
552
553 case __kCFCharSetClassRange: {
554 UInt32 theChar = __CFCSetRangeFirstChar(cset);
555 if (theChar < NUMCHARACTERS) { // the range starts in BMP
556 length = __CFCSetRangeLength(cset);
557 if (theChar + length >= NUMCHARACTERS) length = NUMCHARACTERS - theChar;
558 if (isInverted) {
559 __CFCSetBitmapRemoveCharactersInRange(bits, theChar, (UniChar)(theChar + length) - 1);
560 } else {
561 __CFCSetBitmapAddCharactersInRange(bits, theChar, (UniChar)(theChar + length) - 1);
562 }
563 }
564 }
565 break;
566
567 case __kCFCharSetClassString: {
568 const UniChar *buffer = __CFCSetStringBuffer(cset);
569 length = __CFCSetStringLength(cset);
570 while (length--) (isInverted ? __CFCSetBitmapRemoveCharacter(bits, *buffer++) : __CFCSetBitmapAddCharacter(bits, *buffer++));
571 }
572 break;
573
574 case __kCFCharSetClassCompactBitmap:
575 __CFExpandCompactBitmap(__CFCSetCompactBitmapBits(cset), bits);
576 break;
577 }
578 }
579 }
580 }
581
582 static Boolean __CFCharacterSetEqual(CFTypeRef cf1, CFTypeRef cf2);
583
584 static Boolean __CFCSetIsEqualAnnex(CFCharacterSetRef cf1, CFCharacterSetRef cf2) {
585 CFCharacterSetRef subSet1;
586 CFCharacterSetRef subSet2;
587 Boolean isAnnexInvertStateIdentical = (__CFCSetAnnexIsInverted(cf1) == __CFCSetAnnexIsInverted(cf2) ? true: false);
588 int idx;
589
590 if (isAnnexInvertStateIdentical) {
591 if (__CFCSetAnnexValidEntriesBitmap(cf1) != __CFCSetAnnexValidEntriesBitmap(cf2)) return false;
592 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
593 subSet1 = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(cf1, idx);
594 subSet2 = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(cf2, idx);
595
596 if (subSet1 && !__CFCharacterSetEqual(subSet1, subSet2)) return false;
597 }
598 } else {
599 uint8_t bitsBuf[__kCFBitmapSize];
600 uint8_t bitsBuf2[__kCFBitmapSize];
601
602 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
603 subSet1 = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(cf1, idx);
604 subSet2 = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(cf2, idx);
605
606 if (subSet1 == NULL && subSet2 == NULL) {
607 return false;
608 } else if (subSet1 == NULL) {
609 if (__CFCSetIsBitmap(subSet2)) {
610 if (!__CFCSetIsEqualBitmap((const UInt32 *)__CFCSetBitmapBits(subSet2), (const UInt32 *)-1)) {
611 return false;
612 }
613 } else {
614 __CFCSetGetBitmap(subSet2, bitsBuf);
615 if (!__CFCSetIsEqualBitmap((const UInt32 *)bitsBuf, (const UInt32 *)-1)) {
616 return false;
617 }
618 }
619 } else if (subSet2 == NULL) {
620 if (__CFCSetIsBitmap(subSet1)) {
621 if (!__CFCSetIsEqualBitmap((const UInt32 *)__CFCSetBitmapBits(subSet1), (const UInt32 *)-1)) {
622 return false;
623 }
624 } else {
625 __CFCSetGetBitmap(subSet1, bitsBuf);
626 if (!__CFCSetIsEqualBitmap((const UInt32 *)bitsBuf, (const UInt32 *)-1)) {
627 return false;
628 }
629 }
630 } else {
631 Boolean isBitmap1 = __CFCSetIsBitmap(subSet1);
632 Boolean isBitmap2 = __CFCSetIsBitmap(subSet2);
633
634 if (isBitmap1 && isBitmap2) {
635 if (!__CFCSetIsEqualBitmapInverted((const UInt32 *)__CFCSetBitmapBits(subSet1), (const UInt32 *)__CFCSetBitmapBits(subSet2))) {
636 return false;
637 }
638 } else if (!isBitmap1 && !isBitmap2) {
639 __CFCSetGetBitmap(subSet1, bitsBuf);
640 __CFCSetGetBitmap(subSet2, bitsBuf2);
641 if (!__CFCSetIsEqualBitmapInverted((const UInt32 *)bitsBuf, (const UInt32 *)bitsBuf2)) {
642 return false;
643 }
644 } else {
645 if (isBitmap2) {
646 CFCharacterSetRef tmp = subSet2;
647 subSet2 = subSet1;
648 subSet1 = tmp;
649 }
650 __CFCSetGetBitmap(subSet2, bitsBuf);
651 if (!__CFCSetIsEqualBitmapInverted((const UInt32 *)__CFCSetBitmapBits(subSet1), (const UInt32 *)bitsBuf)) {
652 return false;
653 }
654 }
655 }
656 }
657 }
658 return true;
659 }
660
661 /* Compact bitmap
662 */
663 static uint8_t *__CFCreateCompactBitmap(CFAllocatorRef allocator, const uint8_t *bitmap) {
664 const uint8_t *src;
665 uint8_t *dst;
666 int i;
667 int numPages = 0;
668 uint8_t header[__kCFCompactBitmapNumPages];
669
670 src = bitmap;
671 for (i = 0;i < __kCFCompactBitmapNumPages;i++) {
672 header[i] = __CFCSetGetHeaderValue(src, &numPages);
673
674 // Allocating more pages is probably not interesting enough to be compact
675 if (numPages > __kCFCompactBitmapMaxPages) return NULL;
676 src += __kCFCompactBitmapPageSize;
677 }
678
679 dst = (uint8_t *)CFAllocatorAllocate(allocator, __kCFCompactBitmapNumPages + (__kCFCompactBitmapPageSize * numPages), 0);
680
681 if (numPages > 0) {
682 uint8_t *dstBody = dst + __kCFCompactBitmapNumPages;
683
684 src = bitmap;
685 for (i = 0;i < __kCFCompactBitmapNumPages;i++) {
686 dst[i] = header[i];
687
688 if ((dst[i] != 0) && (dst[i] != UINT8_MAX)) {
689 memmove(dstBody, src, __kCFCompactBitmapPageSize);
690 dstBody += __kCFCompactBitmapPageSize;
691 }
692 src += __kCFCompactBitmapPageSize;
693 }
694 } else {
695 memmove(dst, header, __kCFCompactBitmapNumPages);
696 }
697
698 return dst;
699 }
700
701 static void __CFCSetMakeCompact(CFMutableCharacterSetRef cset) {
702 if (__CFCSetIsBitmap(cset) && __CFCSetBitmapBits(cset)) {
703 uint8_t *bitmap = __CFCSetBitmapBits(cset);
704 uint8_t *cBitmap = __CFCreateCompactBitmap(CFGetAllocator(cset), bitmap);
705
706 if (cBitmap) {
707 CFAllocatorDeallocate(CFGetAllocator(cset), bitmap);
708 __CFCSetPutClassType(cset, __kCFCharSetClassCompactBitmap);
709 __CFCSetPutCompactBitmapBits(cset, cBitmap);
710 }
711 }
712 }
713
714 static void __CFCSetAddNonBMPPlanesInRange(CFMutableCharacterSetRef cset, CFRange range) {
715 int firstChar = (range.location & 0xFFFF);
716 int maxChar = range.location + range.length;
717 int idx = range.location >> 16; // first plane
718 int maxPlane = (maxChar - 1) >> 16; // last plane
719 CFRange planeRange;
720 CFMutableCharacterSetRef annexPlane;
721
722 maxChar &= 0xFFFF;
723
724 for (idx = (idx ? idx : 1);idx <= maxPlane;idx++) {
725 planeRange.location = __CFMax(firstChar, 0);
726 planeRange.length = (idx == maxPlane && maxChar ? maxChar : 0x10000) - planeRange.location;
727 if (__CFCSetAnnexIsInverted(cset)) {
728 if ((annexPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(cset, idx))) {
729 CFCharacterSetRemoveCharactersInRange(annexPlane, planeRange);
730 if (__CFCSetIsEmpty(annexPlane) && !__CFCSetIsInverted(annexPlane)) {
731 CFRelease(annexPlane);
732 __CFCSetAnnexBitmapClearPlane(cset->_annex->_validEntriesBitmap, idx);
733 }
734 }
735 } else {
736 CFCharacterSetAddCharactersInRange((CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(cset, idx), planeRange);
737 }
738 }
739 if (!__CFCSetHasNonBMPPlane(cset) && !__CFCSetAnnexIsInverted(cset)) __CFCSetDeallocateAnnexPlane(cset);
740 }
741
742 static void __CFCSetRemoveNonBMPPlanesInRange(CFMutableCharacterSetRef cset, CFRange range) {
743 int firstChar = (range.location & 0xFFFF);
744 int maxChar = range.location + range.length;
745 int idx = range.location >> 16; // first plane
746 int maxPlane = (maxChar - 1) >> 16; // last plane
747 CFRange planeRange;
748 CFMutableCharacterSetRef annexPlane;
749
750 maxChar &= 0xFFFF;
751
752 for (idx = (idx ? idx : 1);idx <= maxPlane;idx++) {
753 planeRange.location = __CFMax(firstChar, 0);
754 planeRange.length = (idx == maxPlane && maxChar ? maxChar : 0x10000) - planeRange.location;
755 if (__CFCSetAnnexIsInverted(cset)) {
756 CFCharacterSetAddCharactersInRange((CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(cset, idx), planeRange);
757 } else {
758 if ((annexPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(cset, idx))) {
759 CFCharacterSetRemoveCharactersInRange(annexPlane, planeRange);
760 if(__CFCSetIsEmpty(annexPlane) && !__CFCSetIsInverted(annexPlane)) {
761 CFRelease(annexPlane);
762 __CFCSetAnnexBitmapClearPlane(cset->_annex->_validEntriesBitmap, idx);
763 }
764 }
765 }
766 }
767 if (!__CFCSetHasNonBMPPlane(cset) && !__CFCSetAnnexIsInverted(cset)) __CFCSetDeallocateAnnexPlane(cset);
768 }
769
770 static void __CFCSetMakeBitmap(CFMutableCharacterSetRef cset) {
771 if (!__CFCSetIsBitmap(cset) || !__CFCSetBitmapBits(cset)) {
772 CFAllocatorRef allocator = CFGetAllocator(cset);
773 uint8_t *bitmap = (uint8_t *)CFAllocatorAllocate(allocator, __kCFBitmapSize, 0);
774 __CFCSetGetBitmap(cset, bitmap);
775
776 if (__CFCSetIsBuiltin(cset)) {
777 CFIndex numPlanes = CFUniCharGetNumberOfPlanes(__CFCSetBuiltinType(cset));
778
779 if (numPlanes > 1) {
780 CFMutableCharacterSetRef annexSet;
781 uint8_t *annexBitmap = NULL;
782 int idx;
783 UInt8 result;
784
785 __CFCSetAllocateAnnexForPlane(cset, numPlanes - 1);
786 for (idx = 1;idx < numPlanes;idx++) {
787 if (NULL == annexBitmap) {
788 annexBitmap = (uint8_t *)CFAllocatorAllocate(allocator, __kCFBitmapSize, 0);
789 }
790 result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(cset), idx, annexBitmap, false);
791 if (result == kCFUniCharBitmapEmpty) continue;
792 if (result == kCFUniCharBitmapAll) {
793 CFIndex bitmapLength = __kCFBitmapSize;
794 uint8_t *bytes = annexBitmap;
795 while (bitmapLength-- > 0) *(bytes++) = (uint8_t)0xFF;
796 }
797 annexSet = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(cset, idx);
798 __CFCSetPutClassType(annexSet, __kCFCharSetClassBitmap);
799 __CFCSetPutBitmapBits(annexSet, annexBitmap);
800 __CFCSetPutIsInverted(annexSet, false);
801 __CFCSetPutHasHashValue(annexSet, false);
802 annexBitmap = NULL;
803 }
804 if (annexBitmap) CFAllocatorDeallocate(allocator, annexBitmap);
805 }
806 } else if (__CFCSetIsCompactBitmap(cset) && __CFCSetCompactBitmapBits(cset)) {
807 CFAllocatorDeallocate(allocator, __CFCSetCompactBitmapBits(cset));
808 __CFCSetPutCompactBitmapBits(cset, NULL);
809 } else if (__CFCSetIsString(cset) && __CFCSetStringBuffer(cset)) {
810 CFAllocatorDeallocate(allocator, __CFCSetStringBuffer(cset));
811 __CFCSetPutStringBuffer(cset, NULL);
812 } else if (__CFCSetIsRange(cset)) { // We may have to allocate annex here
813 Boolean needsToInvert = (!__CFCSetHasNonBMPPlane(cset) && __CFCSetIsInverted(cset) ? true : false);
814 __CFCSetAddNonBMPPlanesInRange(cset, CFRangeMake(__CFCSetRangeFirstChar(cset), __CFCSetRangeLength(cset)));
815 if (needsToInvert) __CFCSetAnnexSetIsInverted(cset, true);
816 }
817 __CFCSetPutClassType(cset, __kCFCharSetClassBitmap);
818 __CFCSetPutBitmapBits(cset, bitmap);
819 __CFCSetPutIsInverted(cset, false);
820 }
821 }
822
823 CF_INLINE CFMutableCharacterSetRef __CFCSetGenericCreate(CFAllocatorRef allocator, UInt32 flags) {
824 CFMutableCharacterSetRef cset;
825 CFIndex size = sizeof(struct __CFCharacterSet) - sizeof(CFRuntimeBase);
826
827 cset = (CFMutableCharacterSetRef)_CFRuntimeCreateInstance(allocator, CFCharacterSetGetTypeID(), size, NULL);
828 if (NULL == cset) return NULL;
829
830 cset->_base._cfinfo[CF_INFO_BITS] |= flags;
831 cset->_hashValue = 0;
832 cset->_annex = NULL;
833
834 return cset;
835 }
836
837 /* Bsearch theChar for __kCFCharSetClassString
838 */
839 CF_INLINE Boolean __CFCSetBsearchUniChar(const UniChar *theTable, CFIndex length, UniChar theChar) {
840 const UniChar *p, *q, *divider;
841
842 if ((theChar < theTable[0]) || (theChar > theTable[length - 1])) return false;
843
844 p = theTable;
845 q = p + (length - 1);
846 while (p <= q) {
847 divider = p + ((q - p) >> 1); /* divide by 2 */
848 if (theChar < *divider) q = divider - 1;
849 else if (theChar > *divider) p = divider + 1;
850 else return true;
851 }
852 return false;
853 }
854
855 /* Predefined cset names
856 Need to add entry here for new builtin types
857 */
858 CONST_STRING_DECL(__kCFCSetNameControl, "<CFCharacterSet Predefined Control Set>")
859 CONST_STRING_DECL(__kCFCSetNameWhitespace, "<CFCharacterSet Predefined Whitespace Set>")
860 CONST_STRING_DECL(__kCFCSetNameWhitespaceAndNewline, "<CFCharacterSet Predefined WhitespaceAndNewline Set>")
861 CONST_STRING_DECL(__kCFCSetNameDecimalDigit, "<CFCharacterSet Predefined DecimalDigit Set>")
862 CONST_STRING_DECL(__kCFCSetNameLetter, "<CFCharacterSet Predefined Letter Set>")
863 CONST_STRING_DECL(__kCFCSetNameLowercaseLetter, "<CFCharacterSet Predefined LowercaseLetter Set>")
864 CONST_STRING_DECL(__kCFCSetNameUppercaseLetter, "<CFCharacterSet Predefined UppercaseLetter Set>")
865 CONST_STRING_DECL(__kCFCSetNameNonBase, "<CFCharacterSet Predefined NonBase Set>")
866 CONST_STRING_DECL(__kCFCSetNameDecomposable, "<CFCharacterSet Predefined Decomposable Set>")
867 CONST_STRING_DECL(__kCFCSetNameAlphaNumeric, "<CFCharacterSet Predefined AlphaNumeric Set>")
868 CONST_STRING_DECL(__kCFCSetNamePunctuation, "<CFCharacterSet Predefined Punctuation Set>")
869 CONST_STRING_DECL(__kCFCSetNameIllegal, "<CFCharacterSet Predefined Illegal Set>")
870 CONST_STRING_DECL(__kCFCSetNameCapitalizedLetter, "<CFCharacterSet Predefined CapitalizedLetter Set>")
871 CONST_STRING_DECL(__kCFCSetNameSymbol, "<CFCharacterSet Predefined Symbol Set>")
872 CONST_STRING_DECL(__kCFCSetNameNewline, "<CFCharacterSet Predefined Newline Set>")
873
874 CONST_STRING_DECL(__kCFCSetNameStringTypeFormat, "<CFCharacterSet Items(")
875
876 /* Array of instantiated builtin set. Note builtin set ID starts with 1 so the array index is ID - 1
877 */
878 static CFCharacterSetRef *__CFBuiltinSets = NULL;
879
880 /* Global lock for character set
881 */
882 static CFSpinLock_t __CFCharacterSetLock = CFSpinLockInit;
883
884 /* CFBase API functions
885 */
886 static Boolean __CFCharacterSetEqual(CFTypeRef cf1, CFTypeRef cf2) {
887 Boolean isInvertStateIdentical = (__CFCSetIsInverted((CFCharacterSetRef)cf1) == __CFCSetIsInverted((CFCharacterSetRef)cf2) ? true: false);
888 Boolean isAnnexInvertStateIdentical = (__CFCSetAnnexIsInverted((CFCharacterSetRef)cf1) == __CFCSetAnnexIsInverted((CFCharacterSetRef)cf2) ? true: false);
889 CFIndex idx;
890 CFCharacterSetRef subSet1;
891 uint8_t bitsBuf[__kCFBitmapSize];
892 uint8_t *bits;
893 Boolean isBitmap1;
894 Boolean isBitmap2;
895
896 if (__CFCSetHasHashValue((CFCharacterSetRef)cf1) && __CFCSetHasHashValue((CFCharacterSetRef)cf2) && ((CFCharacterSetRef)cf1)->_hashValue != ((CFCharacterSetRef)cf2)->_hashValue) return false;
897 if (__CFCSetIsEmpty((CFCharacterSetRef)cf1) && __CFCSetIsEmpty((CFCharacterSetRef)cf2) && !isInvertStateIdentical) return false;
898
899 if (__CFCSetClassType((CFCharacterSetRef)cf1) == __CFCSetClassType((CFCharacterSetRef)cf2)) { // Types are identical, we can do it fast
900 switch (__CFCSetClassType((CFCharacterSetRef)cf1)) {
901 case __kCFCharSetClassBuiltin:
902 return (__CFCSetBuiltinType((CFCharacterSetRef)cf1) == __CFCSetBuiltinType((CFCharacterSetRef)cf2) && isInvertStateIdentical ? true : false);
903
904 case __kCFCharSetClassRange:
905 return (__CFCSetRangeFirstChar((CFCharacterSetRef)cf1) == __CFCSetRangeFirstChar((CFCharacterSetRef)cf2) && __CFCSetRangeLength((CFCharacterSetRef)cf1) && __CFCSetRangeLength((CFCharacterSetRef)cf2) && isInvertStateIdentical ? true : false);
906
907 case __kCFCharSetClassString:
908 if (__CFCSetStringLength((CFCharacterSetRef)cf1) == __CFCSetStringLength((CFCharacterSetRef)cf2) && isInvertStateIdentical) {
909 const UniChar *buf1 = __CFCSetStringBuffer((CFCharacterSetRef)cf1);
910 const UniChar *buf2 = __CFCSetStringBuffer((CFCharacterSetRef)cf2);
911 CFIndex length = __CFCSetStringLength((CFCharacterSetRef)cf1);
912
913 while (length--) if (*buf1++ != *buf2++) return false;
914 } else {
915 return false;
916 }
917 break;
918
919 case __kCFCharSetClassBitmap:
920 if (!__CFCSetIsEqualBitmap((const UInt32 *)__CFCSetBitmapBits((CFCharacterSetRef)cf1), (const UInt32 *)__CFCSetBitmapBits((CFCharacterSetRef)cf2))) return false;
921 break;
922 }
923 return __CFCSetIsEqualAnnex((CFCharacterSetRef)cf1, (CFCharacterSetRef)cf2);
924 }
925
926 // Check for easy empty cases
927 if (__CFCSetIsEmpty((CFCharacterSetRef)cf1) || __CFCSetIsEmpty((CFCharacterSetRef)cf2)) {
928 CFCharacterSetRef emptySet = (__CFCSetIsEmpty((CFCharacterSetRef)cf1) ? (CFCharacterSetRef)cf1 : (CFCharacterSetRef)cf2);
929 CFCharacterSetRef nonEmptySet = (emptySet == cf1 ? (CFCharacterSetRef)cf2 : (CFCharacterSetRef)cf1);
930
931 if (__CFCSetIsBuiltin(nonEmptySet)) {
932 return false;
933 } else if (__CFCSetIsRange(nonEmptySet)) {
934 if (isInvertStateIdentical) {
935 return (__CFCSetRangeLength(nonEmptySet) ? false : true);
936 } else {
937 return (__CFCSetRangeLength(nonEmptySet) == 0x110000 ? true : false);
938 }
939 } else {
940 if (__CFCSetAnnexIsInverted(nonEmptySet)) {
941 if (__CFCSetAnnexValidEntriesBitmap(nonEmptySet) != 0x1FFFE) return false;
942 } else {
943 if (__CFCSetAnnexValidEntriesBitmap(nonEmptySet)) return false;
944 }
945
946 if (__CFCSetIsBitmap(nonEmptySet)) {
947 bits = __CFCSetBitmapBits(nonEmptySet);
948 } else {
949 bits = bitsBuf;
950 __CFCSetGetBitmap(nonEmptySet, bitsBuf);
951 }
952
953 if (__CFCSetIsEqualBitmap(NULL, (const UInt32 *)bits)) {
954 if (!__CFCSetAnnexIsInverted(nonEmptySet)) return true;
955 } else {
956 return false;
957 }
958
959 // Annex set has to be CFRangeMake(0x10000, 0xfffff)
960 for (idx = 1;idx < MAX_ANNEX_PLANE;idx++) {
961 if (__CFCSetIsBitmap(nonEmptySet)) {
962 if (!__CFCSetIsEqualBitmap((__CFCSetAnnexIsInverted(nonEmptySet) ? NULL : (const UInt32 *)-1), (const UInt32 *)bitsBuf)) return false;
963 } else {
964 __CFCSetGetBitmap(__CFCSetGetAnnexPlaneCharacterSetNoAlloc(nonEmptySet, idx), bitsBuf);
965 if (!__CFCSetIsEqualBitmap((const UInt32 *)-1, (const UInt32 *)bitsBuf)) return false;
966 }
967 }
968 return true;
969 }
970 }
971
972 if (__CFCSetIsBuiltin((CFCharacterSetRef)cf1) || __CFCSetIsBuiltin((CFCharacterSetRef)cf2)) {
973 CFCharacterSetRef builtinSet = (__CFCSetIsBuiltin((CFCharacterSetRef)cf1) ? (CFCharacterSetRef)cf1 : (CFCharacterSetRef)cf2);
974 CFCharacterSetRef nonBuiltinSet = (builtinSet == cf1 ? (CFCharacterSetRef)cf2 : (CFCharacterSetRef)cf1);
975
976
977 if (__CFCSetIsRange(nonBuiltinSet)) {
978 UTF32Char firstChar = __CFCSetRangeFirstChar(nonBuiltinSet);
979 UTF32Char lastChar = (firstChar + __CFCSetRangeLength(nonBuiltinSet) - 1);
980 uint8_t firstPlane = (firstChar >> 16) & 0xFF;
981 uint8_t lastPlane = (lastChar >> 16) & 0xFF;
982 uint8_t result;
983
984 for (idx = 0;idx < MAX_ANNEX_PLANE;idx++) {
985 result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(builtinSet), idx, bitsBuf, (isInvertStateIdentical != 0));
986
987 if (idx < firstPlane || idx > lastPlane) {
988 if (result == kCFUniCharBitmapAll) {
989 return false;
990 } else if (result == kCFUniCharBitmapFilled) {
991 if (!__CFCSetIsEqualBitmap(NULL, (const UInt32 *)bitsBuf)) return false;
992 }
993 } else if (idx > firstPlane && idx < lastPlane) {
994 if (result == kCFUniCharBitmapEmpty) {
995 return false;
996 } else if (result == kCFUniCharBitmapFilled) {
997 if (!__CFCSetIsEqualBitmap((const UInt32 *)-1, (const UInt32 *)bitsBuf)) return false;
998 }
999 } else {
1000 if (result == kCFUniCharBitmapEmpty) {
1001 return false;
1002 } else if (result == kCFUniCharBitmapAll) {
1003 if (idx == firstPlane) {
1004 if (((firstChar & 0xFFFF) != 0) || (firstPlane == lastPlane && ((lastChar & 0xFFFF) != 0xFFFF))) return false;
1005 } else {
1006 if (((lastChar & 0xFFFF) != 0xFFFF) || (firstPlane == lastPlane && ((firstChar & 0xFFFF) != 0))) return false;
1007 }
1008 } else {
1009 if (idx == firstPlane) {
1010 if (!__CFCSetIsBitmapEqualToRange((const UInt32 *)bitsBuf, firstChar & 0xFFFF, (firstPlane == lastPlane ? lastChar & 0xFFFF : 0xFFFF), false)) return false;
1011 } else {
1012 if (!__CFCSetIsBitmapEqualToRange((const UInt32 *)bitsBuf, (firstPlane == lastPlane ? firstChar & 0xFFFF : 0), lastChar & 0xFFFF, false)) return false;
1013 }
1014 }
1015 }
1016 }
1017 return true;
1018 } else {
1019 uint8_t bitsBuf2[__kCFBitmapSize];
1020 uint8_t result;
1021
1022 result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(builtinSet), 0, bitsBuf, (__CFCSetIsInverted(builtinSet) != 0));
1023 if (result == kCFUniCharBitmapFilled) {
1024 if (__CFCSetIsBitmap(nonBuiltinSet)) {
1025 if (!__CFCSetIsEqualBitmap((const UInt32 *)bitsBuf, (const UInt32 *)__CFCSetBitmapBits(nonBuiltinSet))) return false;
1026 } else {
1027
1028 __CFCSetGetBitmap(nonBuiltinSet, bitsBuf2);
1029 if (!__CFCSetIsEqualBitmap((const UInt32 *)bitsBuf, (const UInt32 *)bitsBuf2)) {
1030 return false;
1031 }
1032 }
1033 } else {
1034 if (__CFCSetIsBitmap(nonBuiltinSet)) {
1035 if (!__CFCSetIsEqualBitmap((result == kCFUniCharBitmapAll ? (const UInt32*)-1 : NULL), (const UInt32 *)__CFCSetBitmapBits(nonBuiltinSet))) return false;
1036 } else {
1037 __CFCSetGetBitmap(nonBuiltinSet, bitsBuf);
1038 if (!__CFCSetIsEqualBitmap((result == kCFUniCharBitmapAll ? (const UInt32*)-1: NULL), (const UInt32 *)bitsBuf)) return false;
1039 }
1040 }
1041
1042 isInvertStateIdentical = (__CFCSetIsInverted(builtinSet) == __CFCSetAnnexIsInverted(nonBuiltinSet) ? true : false);
1043
1044 for (idx = 1;idx < MAX_ANNEX_PLANE;idx++) {
1045 result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(builtinSet), idx, bitsBuf, !isInvertStateIdentical);
1046 subSet1 = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(nonBuiltinSet, idx);
1047
1048 if (result == kCFUniCharBitmapFilled) {
1049 if (NULL == subSet1) {
1050 return false;
1051 } else if (__CFCSetIsBitmap(subSet1)) {
1052 if (!__CFCSetIsEqualBitmap((const UInt32*)bitsBuf, (const UInt32*)__CFCSetBitmapBits(subSet1))) {
1053 return false;
1054 }
1055 } else {
1056
1057 __CFCSetGetBitmap(subSet1, bitsBuf2);
1058 if (!__CFCSetIsEqualBitmap((const UInt32*)bitsBuf, (const UInt32*)bitsBuf2)) {
1059 return false;
1060 }
1061 }
1062 } else {
1063 if (NULL == subSet1) {
1064 if (result == kCFUniCharBitmapAll) {
1065 return false;
1066 }
1067 } else if (__CFCSetIsBitmap(subSet1)) {
1068 if (!__CFCSetIsEqualBitmap((result == kCFUniCharBitmapAll ? (const UInt32*)-1: NULL), (const UInt32*)__CFCSetBitmapBits(subSet1))) {
1069 return false;
1070 }
1071 } else {
1072 __CFCSetGetBitmap(subSet1, bitsBuf);
1073 if (!__CFCSetIsEqualBitmap((result == kCFUniCharBitmapAll ? (const UInt32*)-1: NULL), (const UInt32*)bitsBuf)) {
1074 return false;
1075 }
1076 }
1077 }
1078 }
1079 return true;
1080 }
1081 }
1082
1083 if (__CFCSetIsRange((CFCharacterSetRef)cf1) || __CFCSetIsRange((CFCharacterSetRef)cf2)) {
1084 CFCharacterSetRef rangeSet = (__CFCSetIsRange((CFCharacterSetRef)cf1) ? (CFCharacterSetRef)cf1 : (CFCharacterSetRef)cf2);
1085 CFCharacterSetRef nonRangeSet = (rangeSet == cf1 ? (CFCharacterSetRef)cf2 : (CFCharacterSetRef)cf1);
1086 UTF32Char firstChar = __CFCSetRangeFirstChar(rangeSet);
1087 UTF32Char lastChar = (firstChar + __CFCSetRangeLength(rangeSet) - 1);
1088 uint8_t firstPlane = (firstChar >> 16) & 0xFF;
1089 uint8_t lastPlane = (lastChar >> 16) & 0xFF;
1090 Boolean isRangeSetInverted = __CFCSetIsInverted(rangeSet);
1091
1092 if (__CFCSetIsBitmap(nonRangeSet)) {
1093 bits = __CFCSetBitmapBits(nonRangeSet);
1094 } else {
1095 bits = bitsBuf;
1096 __CFCSetGetBitmap(nonRangeSet, bitsBuf);
1097 }
1098 if (firstPlane == 0) {
1099 if (!__CFCSetIsBitmapEqualToRange((const UInt32*)bits, firstChar, (lastPlane == 0 ? lastChar : 0xFFFF), isRangeSetInverted)) return false;
1100 firstPlane = 1;
1101 firstChar = 0;
1102 } else {
1103 if (!__CFCSetIsEqualBitmap((const UInt32*)bits, (isRangeSetInverted ? (const UInt32 *)-1 : NULL))) return false;
1104 firstChar &= 0xFFFF;
1105 }
1106
1107 lastChar &= 0xFFFF;
1108
1109 isAnnexInvertStateIdentical = (isRangeSetInverted == __CFCSetAnnexIsInverted(nonRangeSet) ? true : false);
1110
1111 for (idx = 1;idx < MAX_ANNEX_PLANE;idx++) {
1112 subSet1 = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(nonRangeSet, idx);
1113 if (NULL == subSet1) {
1114 if (idx < firstPlane || idx > lastPlane) {
1115 if (!isAnnexInvertStateIdentical) return false;
1116 } else if (idx > firstPlane && idx < lastPlane) {
1117 if (isAnnexInvertStateIdentical) return false;
1118 } else if (idx == firstPlane) {
1119 if (isAnnexInvertStateIdentical || firstChar || (idx == lastPlane && lastChar != 0xFFFF)) return false;
1120 } else if (idx == lastPlane) {
1121 if (isAnnexInvertStateIdentical || (idx == firstPlane && firstChar) || (lastChar != 0xFFFF)) return false;
1122 }
1123 } else {
1124 if (__CFCSetIsBitmap(subSet1)) {
1125 bits = __CFCSetBitmapBits(subSet1);
1126 } else {
1127 __CFCSetGetBitmap(subSet1, bitsBuf);
1128 bits = bitsBuf;
1129 }
1130
1131 if (idx < firstPlane || idx > lastPlane) {
1132 if (!__CFCSetIsEqualBitmap((const UInt32*)bits, (isAnnexInvertStateIdentical ? NULL : (const UInt32 *)-1))) return false;
1133 } else if (idx > firstPlane && idx < lastPlane) {
1134 if (!__CFCSetIsEqualBitmap((const UInt32*)bits, (isAnnexInvertStateIdentical ? (const UInt32 *)-1 : NULL))) return false;
1135 } else if (idx == firstPlane) {
1136 if (!__CFCSetIsBitmapEqualToRange((const UInt32*)bits, firstChar, (idx == lastPlane ? lastChar : 0xFFFF), !isAnnexInvertStateIdentical)) return false;
1137 } else if (idx == lastPlane) {
1138 if (!__CFCSetIsBitmapEqualToRange((const UInt32*)bits, (idx == firstPlane ? firstChar : 0), lastChar, !isAnnexInvertStateIdentical)) return false;
1139 }
1140 }
1141 }
1142 return true;
1143 }
1144
1145 isBitmap1 = __CFCSetIsBitmap((CFCharacterSetRef)cf1);
1146 isBitmap2 = __CFCSetIsBitmap((CFCharacterSetRef)cf2);
1147
1148 if (isBitmap1 && isBitmap2) {
1149 if (!__CFCSetIsEqualBitmap((const UInt32 *)__CFCSetBitmapBits((CFCharacterSetRef)cf1), (const UInt32 *)__CFCSetBitmapBits((CFCharacterSetRef)cf2))) return false;
1150 } else if (!isBitmap1 && !isBitmap2) {
1151 uint8_t bitsBuf2[__kCFBitmapSize];
1152
1153 __CFCSetGetBitmap((CFCharacterSetRef)cf1, bitsBuf);
1154 __CFCSetGetBitmap((CFCharacterSetRef)cf2, bitsBuf2);
1155
1156 if (!__CFCSetIsEqualBitmap((const UInt32*)bitsBuf, (const UInt32*)bitsBuf2)) {
1157 return false;
1158 }
1159 } else {
1160 if (isBitmap2) {
1161 CFCharacterSetRef tmp = (CFCharacterSetRef)cf2;
1162 cf2 = cf1;
1163 cf1 = tmp;
1164 }
1165
1166 __CFCSetGetBitmap((CFCharacterSetRef)cf2, bitsBuf);
1167
1168 if (!__CFCSetIsEqualBitmap((const UInt32 *)__CFCSetBitmapBits((CFCharacterSetRef)cf1), (const UInt32 *)bitsBuf)) return false;
1169 }
1170 return __CFCSetIsEqualAnnex((CFCharacterSetRef)cf1, (CFCharacterSetRef)cf2);
1171 }
1172
1173 static CFHashCode __CFCharacterSetHash(CFTypeRef cf) {
1174 if (!__CFCSetHasHashValue((CFCharacterSetRef)cf)) {
1175 if (__CFCSetIsEmpty((CFCharacterSetRef)cf)) {
1176 ((CFMutableCharacterSetRef)cf)->_hashValue = (__CFCSetIsInverted((CFCharacterSetRef)cf) ? ((UInt32)0xFFFFFFFF) : 0);
1177 } else if (__CFCSetIsBitmap( (CFCharacterSetRef) cf )) {
1178 ((CFMutableCharacterSetRef)cf)->_hashValue = CFHashBytes(__CFCSetBitmapBits((CFCharacterSetRef)cf), __kCFBitmapSize);
1179 } else {
1180 uint8_t bitsBuf[__kCFBitmapSize];
1181 __CFCSetGetBitmap((CFCharacterSetRef)cf, bitsBuf);
1182 ((CFMutableCharacterSetRef)cf)->_hashValue = CFHashBytes(bitsBuf, __kCFBitmapSize);
1183 }
1184 __CFCSetPutHasHashValue((CFMutableCharacterSetRef)cf, true);
1185 }
1186 return ((CFCharacterSetRef)cf)->_hashValue;
1187 }
1188
1189 static CFStringRef __CFCharacterSetCopyDescription(CFTypeRef cf) {
1190 CFMutableStringRef string;
1191 CFIndex idx;
1192 CFIndex length;
1193
1194 if (__CFCSetIsEmpty((CFCharacterSetRef)cf)) {
1195 return (CFStringRef)(__CFCSetIsInverted((CFCharacterSetRef)cf) ? CFRetain(CFSTR("<CFCharacterSet All>")) : CFRetain(CFSTR("<CFCharacterSet Empty>")));
1196 }
1197
1198 switch (__CFCSetClassType((CFCharacterSetRef)cf)) {
1199 case __kCFCharSetClassBuiltin:
1200 switch (__CFCSetBuiltinType((CFCharacterSetRef)cf)) {
1201 case kCFCharacterSetControl: return (CFStringRef)CFRetain(__kCFCSetNameControl);
1202 case kCFCharacterSetWhitespace : return (CFStringRef)CFRetain(__kCFCSetNameWhitespace);
1203 case kCFCharacterSetWhitespaceAndNewline: return (CFStringRef)CFRetain(__kCFCSetNameWhitespaceAndNewline);
1204 case kCFCharacterSetDecimalDigit: return (CFStringRef)CFRetain(__kCFCSetNameDecimalDigit);
1205 case kCFCharacterSetLetter: return (CFStringRef)CFRetain(__kCFCSetNameLetter);
1206 case kCFCharacterSetLowercaseLetter: return (CFStringRef)CFRetain(__kCFCSetNameLowercaseLetter);
1207 case kCFCharacterSetUppercaseLetter: return (CFStringRef)CFRetain(__kCFCSetNameUppercaseLetter);
1208 case kCFCharacterSetNonBase: return (CFStringRef)CFRetain(__kCFCSetNameNonBase);
1209 case kCFCharacterSetDecomposable: return (CFStringRef)CFRetain(__kCFCSetNameDecomposable);
1210 case kCFCharacterSetAlphaNumeric: return (CFStringRef)CFRetain(__kCFCSetNameAlphaNumeric);
1211 case kCFCharacterSetPunctuation: return (CFStringRef)CFRetain(__kCFCSetNamePunctuation);
1212 case kCFCharacterSetIllegal: return (CFStringRef)CFRetain(__kCFCSetNameIllegal);
1213 case kCFCharacterSetCapitalizedLetter: return (CFStringRef)CFRetain(__kCFCSetNameCapitalizedLetter);
1214 case kCFCharacterSetSymbol: return (CFStringRef)CFRetain(__kCFCSetNameSymbol);
1215 case kCFCharacterSetNewline: return (CFStringRef)CFRetain(__kCFCSetNameNewline);
1216 }
1217 break;
1218
1219 case __kCFCharSetClassRange:
1220 return CFStringCreateWithFormat(CFGetAllocator((CFCharacterSetRef)cf), NULL, CFSTR("<CFCharacterSet Range(%d, %d)>"), __CFCSetRangeFirstChar((CFCharacterSetRef)cf), __CFCSetRangeLength((CFCharacterSetRef)cf));
1221
1222 case __kCFCharSetClassString:
1223 length = __CFCSetStringLength((CFCharacterSetRef)cf);
1224 string = CFStringCreateMutable(CFGetAllocator(cf), CFStringGetLength(__kCFCSetNameStringTypeFormat) + 7 * length + 2); // length of__kCFCSetNameStringTypeFormat + "U+XXXX "(7) * length + ")>"(2)
1225 CFStringAppend(string, __kCFCSetNameStringTypeFormat);
1226 for (idx = 0;idx < length;idx++) {
1227 CFStringAppendFormat(string, NULL, CFSTR("%sU+%04X"), (idx > 0 ? " " : ""), (UInt32)((__CFCSetStringBuffer((CFCharacterSetRef)cf))[idx]));
1228 }
1229 CFStringAppend(string, CFSTR(")>"));
1230 return string;
1231
1232 case __kCFCharSetClassBitmap:
1233 case __kCFCharSetClassCompactBitmap:
1234 return (CFStringRef)CFRetain(CFSTR("<CFCharacterSet Bitmap>")); // ??? Should generate description for 8k bitmap ?
1235 }
1236 CFAssert1(0, __kCFLogAssertion, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__); // We should never come here
1237 return NULL;
1238 }
1239
1240 static void __CFCharacterSetDeallocate(CFTypeRef cf) {
1241 CFAllocatorRef allocator = CFGetAllocator(cf);
1242
1243 if (__CFCSetIsBuiltin((CFCharacterSetRef)cf) && !__CFCSetIsMutable((CFCharacterSetRef)cf) && !__CFCSetIsInverted((CFCharacterSetRef)cf)) {
1244 CFCharacterSetRef sharedSet = CFCharacterSetGetPredefined(__CFCSetBuiltinType((CFCharacterSetRef)cf));
1245 if (sharedSet == cf) { // We're trying to dealloc the builtin set
1246 CFAssert1(0, __kCFLogAssertion, "%s: Trying to deallocate predefined set. The process is likely to crash.", __PRETTY_FUNCTION__);
1247 return; // We never deallocate builtin set
1248 }
1249 }
1250
1251 if (__CFCSetIsString((CFCharacterSetRef)cf) && __CFCSetStringBuffer((CFCharacterSetRef)cf)) CFAllocatorDeallocate(allocator, __CFCSetStringBuffer((CFCharacterSetRef)cf));
1252 else if (__CFCSetIsBitmap((CFCharacterSetRef)cf) && __CFCSetBitmapBits((CFCharacterSetRef)cf)) CFAllocatorDeallocate(allocator, __CFCSetBitmapBits((CFCharacterSetRef)cf));
1253 else if (__CFCSetIsCompactBitmap((CFCharacterSetRef)cf) && __CFCSetCompactBitmapBits((CFCharacterSetRef)cf)) CFAllocatorDeallocate(allocator, __CFCSetCompactBitmapBits((CFCharacterSetRef)cf));
1254 __CFCSetDeallocateAnnexPlane((CFCharacterSetRef)cf);
1255 }
1256
1257 static CFTypeID __kCFCharacterSetTypeID = _kCFRuntimeNotATypeID;
1258
1259 static const CFRuntimeClass __CFCharacterSetClass = {
1260 0,
1261 "CFCharacterSet",
1262 NULL, // init
1263 NULL, // copy
1264 __CFCharacterSetDeallocate,
1265 __CFCharacterSetEqual,
1266 __CFCharacterSetHash,
1267 NULL, //
1268 __CFCharacterSetCopyDescription
1269 };
1270
1271 static bool __CFCheckForExapendedSet = false;
1272
1273 __private_extern__ void __CFCharacterSetInitialize(void) {
1274 const char *checkForExpandedSet = getenv("__CF_DEBUG_EXPANDED_SET");
1275
1276 __kCFCharacterSetTypeID = _CFRuntimeRegisterClass(&__CFCharacterSetClass);
1277
1278 if (checkForExpandedSet && (*checkForExpandedSet == 'Y')) __CFCheckForExapendedSet = true;
1279 }
1280
1281 /* Public functions
1282 */
1283
1284 CFTypeID CFCharacterSetGetTypeID(void) {
1285 return __kCFCharacterSetTypeID;
1286 }
1287
1288 /*** CharacterSet creation ***/
1289 /* Functions to create basic immutable characterset.
1290 */
1291 CFCharacterSetRef CFCharacterSetGetPredefined(CFCharacterSetPredefinedSet theSetIdentifier) {
1292 CFCharacterSetRef cset;
1293
1294 __CFCSetValidateBuiltinType(theSetIdentifier, __PRETTY_FUNCTION__);
1295
1296 __CFSpinLock(&__CFCharacterSetLock);
1297 cset = ((NULL != __CFBuiltinSets) ? __CFBuiltinSets[theSetIdentifier - 1] : NULL);
1298 __CFSpinUnlock(&__CFCharacterSetLock);
1299
1300 if (NULL != cset) return cset;
1301
1302 if (!(cset = __CFCSetGenericCreate(kCFAllocatorSystemDefault, __kCFCharSetClassBuiltin))) return NULL;
1303 __CFCSetPutBuiltinType((CFMutableCharacterSetRef)cset, theSetIdentifier);
1304
1305 __CFSpinLock(&__CFCharacterSetLock);
1306 if (!__CFBuiltinSets) {
1307 __CFBuiltinSets = (CFCharacterSetRef *)CFAllocatorAllocate((CFAllocatorRef)CFRetain(__CFGetDefaultAllocator()), sizeof(CFCharacterSetRef) * __kCFLastBuiltinSetID, 0);
1308 memset(__CFBuiltinSets, 0, sizeof(CFCharacterSetRef) * __kCFLastBuiltinSetID);
1309 }
1310
1311 __CFBuiltinSets[theSetIdentifier - 1] = cset;
1312 __CFSpinUnlock(&__CFCharacterSetLock);
1313
1314 return cset;
1315 }
1316
1317 CFCharacterSetRef CFCharacterSetCreateWithCharactersInRange(CFAllocatorRef allocator, CFRange theRange) {
1318 CFMutableCharacterSetRef cset;
1319
1320 __CFCSetValidateRange(theRange, __PRETTY_FUNCTION__);
1321
1322 if (theRange.length) {
1323 if (!(cset = __CFCSetGenericCreate(allocator, __kCFCharSetClassRange))) return NULL;
1324 __CFCSetPutRangeFirstChar(cset, theRange.location);
1325 __CFCSetPutRangeLength(cset, theRange.length);
1326 } else {
1327 if (!(cset = __CFCSetGenericCreate(allocator, __kCFCharSetClassBitmap))) return NULL;
1328 __CFCSetPutBitmapBits(cset, NULL);
1329 __CFCSetPutHasHashValue(cset, true); // _hashValue is 0
1330 }
1331
1332 return cset;
1333 }
1334
1335 static int chcompar(const void *a, const void *b) {
1336 return -(int)(*(UniChar *)b - *(UniChar *)a);
1337 }
1338
1339 CFCharacterSetRef CFCharacterSetCreateWithCharactersInString(CFAllocatorRef allocator, CFStringRef theString) {
1340 CFIndex length;
1341
1342 length = CFStringGetLength(theString);
1343 if (length < __kCFStringCharSetMax) {
1344 CFMutableCharacterSetRef cset;
1345
1346 if (!(cset = __CFCSetGenericCreate(allocator, __kCFCharSetClassString))) return NULL;
1347 __CFCSetPutStringBuffer(cset, (UniChar *)CFAllocatorAllocate(CFGetAllocator(cset), __kCFStringCharSetMax * sizeof(UniChar), 0));
1348 __CFCSetPutStringLength(cset, length);
1349 CFStringGetCharacters(theString, CFRangeMake(0, length), __CFCSetStringBuffer(cset));
1350 qsort(__CFCSetStringBuffer(cset), length, sizeof(UniChar), chcompar);
1351 if (!length) __CFCSetPutHasHashValue(cset, true); // _hashValue is 0
1352 return cset;
1353 } else {
1354 CFMutableCharacterSetRef mcset = CFCharacterSetCreateMutable(allocator);
1355 CFCharacterSetAddCharactersInString(mcset, theString);
1356 __CFCSetMakeCompact(mcset);
1357 __CFCSetPutIsMutable(mcset, false);
1358 return mcset;
1359 }
1360 }
1361
1362 CFCharacterSetRef CFCharacterSetCreateWithBitmapRepresentation(CFAllocatorRef allocator, CFDataRef theData) {
1363 CFMutableCharacterSetRef cset;
1364 CFIndex length;
1365
1366 if (!(cset = __CFCSetGenericCreate(allocator, __kCFCharSetClassBitmap))) return NULL;
1367
1368 if (theData && (length = CFDataGetLength(theData)) > 0) {
1369 uint8_t *bitmap;
1370 uint8_t *cBitmap;
1371
1372 if (length < __kCFBitmapSize) {
1373 bitmap = (uint8_t *)CFAllocatorAllocate(allocator, __kCFBitmapSize, 0);
1374 memmove(bitmap, CFDataGetBytePtr(theData), length);
1375 memset(bitmap + length, 0, __kCFBitmapSize - length);
1376
1377 cBitmap = __CFCreateCompactBitmap(allocator, bitmap);
1378
1379 if (cBitmap == NULL) {
1380 __CFCSetPutBitmapBits(cset, bitmap);
1381 } else {
1382 CFAllocatorDeallocate(allocator, bitmap);
1383 __CFCSetPutCompactBitmapBits(cset, cBitmap);
1384 __CFCSetPutClassType(cset, __kCFCharSetClassCompactBitmap);
1385 }
1386 } else {
1387 cBitmap = __CFCreateCompactBitmap(allocator, CFDataGetBytePtr(theData));
1388
1389 if (cBitmap == NULL) {
1390 bitmap = (uint8_t *)CFAllocatorAllocate(allocator, __kCFBitmapSize, 0);
1391 memmove(bitmap, CFDataGetBytePtr(theData), __kCFBitmapSize);
1392
1393 __CFCSetPutBitmapBits(cset, bitmap);
1394 } else {
1395 __CFCSetPutCompactBitmapBits(cset, cBitmap);
1396 __CFCSetPutClassType(cset, __kCFCharSetClassCompactBitmap);
1397 }
1398
1399 if (length > __kCFBitmapSize) {
1400 CFMutableCharacterSetRef annexSet;
1401 const uint8_t *bytes = CFDataGetBytePtr(theData) + __kCFBitmapSize;
1402
1403 length -= __kCFBitmapSize;
1404
1405 while (length > 1) {
1406 annexSet = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(cset, *(bytes++));
1407 --length; // Decrement the plane no byte
1408
1409 if (length < __kCFBitmapSize) {
1410 bitmap = (uint8_t *)CFAllocatorAllocate(allocator, __kCFBitmapSize, 0);
1411 memmove(bitmap, bytes, length);
1412 memset(bitmap + length, 0, __kCFBitmapSize - length);
1413
1414 cBitmap = __CFCreateCompactBitmap(allocator, bitmap);
1415
1416 if (cBitmap == NULL) {
1417 __CFCSetPutBitmapBits(annexSet, bitmap);
1418 } else {
1419 CFAllocatorDeallocate(allocator, bitmap);
1420 __CFCSetPutCompactBitmapBits(annexSet, cBitmap);
1421 __CFCSetPutClassType(annexSet, __kCFCharSetClassCompactBitmap);
1422 }
1423 } else {
1424 cBitmap = __CFCreateCompactBitmap(allocator, bytes);
1425
1426 if (cBitmap == NULL) {
1427 bitmap = (uint8_t *)CFAllocatorAllocate(allocator, __kCFBitmapSize, 0);
1428 memmove(bitmap, bytes, __kCFBitmapSize);
1429
1430 __CFCSetPutBitmapBits(annexSet, bitmap);
1431 } else {
1432 __CFCSetPutCompactBitmapBits(annexSet, cBitmap);
1433 __CFCSetPutClassType(annexSet, __kCFCharSetClassCompactBitmap);
1434 }
1435 }
1436 length -= __kCFBitmapSize;
1437 bytes += __kCFBitmapSize;
1438 }
1439 }
1440 }
1441 } else {
1442 __CFCSetPutBitmapBits(cset, NULL);
1443 __CFCSetPutHasHashValue(cset, true); // Hash value is 0
1444 }
1445
1446 return cset;
1447 }
1448
1449 CFCharacterSetRef CFCharacterSetCreateInvertedSet(CFAllocatorRef alloc, CFCharacterSetRef theSet) {
1450 CFMutableCharacterSetRef cset;
1451
1452 CF_OBJC_FUNCDISPATCH0(__kCFCharacterSetTypeID, CFCharacterSetRef , theSet, "invertedSet");
1453
1454 cset = CFCharacterSetCreateMutableCopy(alloc, theSet);
1455 CFCharacterSetInvert(cset);
1456 __CFCSetPutIsMutable(cset, false);
1457
1458 return cset;
1459 }
1460
1461 /* Functions to create mutable characterset.
1462 */
1463 CFMutableCharacterSetRef CFCharacterSetCreateMutable(CFAllocatorRef allocator) {
1464 CFMutableCharacterSetRef cset;
1465
1466 if (!(cset = __CFCSetGenericCreate(allocator, __kCFCharSetClassBitmap| __kCFCharSetIsMutable))) return NULL;
1467 __CFCSetPutBitmapBits(cset, NULL);
1468 __CFCSetPutHasHashValue(cset, true); // Hash value is 0
1469
1470 return cset;
1471 }
1472
1473 CFMutableCharacterSetRef __CFCharacterSetCreateCopy(CFAllocatorRef alloc, CFCharacterSetRef theSet, bool isMutable) {
1474 CFMutableCharacterSetRef cset;
1475
1476 CF_OBJC_FUNCDISPATCH0(__kCFCharacterSetTypeID, CFMutableCharacterSetRef , theSet, "mutableCopy");
1477
1478 __CFGenericValidateType(theSet, __kCFCharacterSetTypeID);
1479
1480 if (!isMutable && !__CFCSetIsMutable(theSet)) {
1481 return (CFMutableCharacterSetRef)CFRetain(theSet);
1482 }
1483
1484 cset = CFCharacterSetCreateMutable(alloc);
1485
1486 __CFCSetPutClassType(cset, __CFCSetClassType(theSet));
1487 __CFCSetPutHasHashValue(cset, __CFCSetHasHashValue(theSet));
1488 __CFCSetPutIsInverted(cset, __CFCSetIsInverted(theSet));
1489 cset->_hashValue = theSet->_hashValue;
1490
1491 switch (__CFCSetClassType(theSet)) {
1492 case __kCFCharSetClassBuiltin:
1493 __CFCSetPutBuiltinType(cset, __CFCSetBuiltinType(theSet));
1494 break;
1495
1496 case __kCFCharSetClassRange:
1497 __CFCSetPutRangeFirstChar(cset, __CFCSetRangeFirstChar(theSet));
1498 __CFCSetPutRangeLength(cset, __CFCSetRangeLength(theSet));
1499 break;
1500
1501 case __kCFCharSetClassString:
1502 __CFCSetPutStringBuffer(cset, (UniChar *)CFAllocatorAllocate(alloc, __kCFStringCharSetMax * sizeof(UniChar), 0));
1503
1504 __CFCSetPutStringLength(cset, __CFCSetStringLength(theSet));
1505 memmove(__CFCSetStringBuffer(cset), __CFCSetStringBuffer(theSet), __CFCSetStringLength(theSet) * sizeof(UniChar));
1506 break;
1507
1508 case __kCFCharSetClassBitmap:
1509 if (__CFCSetBitmapBits(theSet)) {
1510 uint8_t * bitmap = (isMutable ? NULL : __CFCreateCompactBitmap(alloc, __CFCSetBitmapBits(theSet)));
1511
1512 if (bitmap == NULL) {
1513 bitmap = (uint8_t *)CFAllocatorAllocate(alloc, sizeof(uint8_t) * __kCFBitmapSize, 0);
1514 memmove(bitmap, __CFCSetBitmapBits(theSet), __kCFBitmapSize);
1515 __CFCSetPutBitmapBits(cset, bitmap);
1516 } else {
1517 __CFCSetPutCompactBitmapBits(cset, bitmap);
1518 __CFCSetPutClassType(cset, __kCFCharSetClassCompactBitmap);
1519 }
1520 } else {
1521 __CFCSetPutBitmapBits(cset, NULL);
1522 }
1523 break;
1524
1525 case __kCFCharSetClassCompactBitmap: {
1526 const uint8_t *compactBitmap = __CFCSetCompactBitmapBits(theSet);
1527
1528 if (compactBitmap) {
1529 uint32_t size = __CFCSetGetCompactBitmapSize(compactBitmap);
1530 uint8_t *newBitmap = (uint8_t *)CFAllocatorAllocate(alloc, size, 0);
1531
1532 memmove(newBitmap, compactBitmap, size);
1533 __CFCSetPutCompactBitmapBits(cset, newBitmap);
1534 }
1535 }
1536 break;
1537
1538 default:
1539 CFAssert1(0, __kCFLogAssertion, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__); // We should never come here
1540 }
1541 if (__CFCSetHasNonBMPPlane(theSet)) {
1542 CFMutableCharacterSetRef annexPlane;
1543 int idx;
1544
1545 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
1546 if ((annexPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, idx))) {
1547 annexPlane = __CFCharacterSetCreateCopy(alloc, annexPlane, isMutable);
1548 __CFCSetPutCharacterSetToAnnexPlane(cset, annexPlane, idx);
1549 CFRelease(annexPlane);
1550 }
1551 }
1552 __CFCSetAnnexSetIsInverted(cset, __CFCSetAnnexIsInverted(theSet));
1553 } else if (__CFCSetAnnexIsInverted(theSet)) {
1554 __CFCSetAllocateAnnexForPlane(cset, 0); // We need to alloc annex to invert
1555 __CFCSetAnnexSetIsInverted(cset, true);
1556 }
1557
1558 return cset;
1559 }
1560
1561 CFCharacterSetRef CFCharacterSetCreateCopy(CFAllocatorRef alloc, CFCharacterSetRef theSet) {
1562 return __CFCharacterSetCreateCopy(alloc, theSet, false);
1563 }
1564
1565 CFMutableCharacterSetRef CFCharacterSetCreateMutableCopy(CFAllocatorRef alloc, CFCharacterSetRef theSet) {
1566 return __CFCharacterSetCreateCopy(alloc, theSet, true);
1567 }
1568
1569 /*** Basic accessors ***/
1570 Boolean CFCharacterSetIsCharacterMember(CFCharacterSetRef theSet, UniChar theChar) {
1571 CFIndex length;
1572 Boolean isInverted;
1573 Boolean result = false;
1574
1575 CF_OBJC_FUNCDISPATCH1(__kCFCharacterSetTypeID, Boolean, theSet, "longCharacterIsMember:", theChar);
1576
1577 __CFGenericValidateType(theSet, __kCFCharacterSetTypeID);
1578
1579 isInverted = __CFCSetIsInverted(theSet);
1580
1581 switch (__CFCSetClassType(theSet)) {
1582 case __kCFCharSetClassBuiltin:
1583 result = (CFUniCharIsMemberOf(theChar, __CFCSetBuiltinType(theSet)) ? !isInverted : isInverted);
1584 break;
1585
1586 case __kCFCharSetClassRange:
1587 length = __CFCSetRangeLength(theSet);
1588 result = (length && __CFCSetRangeFirstChar(theSet) <= theChar && theChar < __CFCSetRangeFirstChar(theSet) + length ? !isInverted : isInverted);
1589 break;
1590
1591 case __kCFCharSetClassString:
1592 result = ((length = __CFCSetStringLength(theSet)) ? (__CFCSetBsearchUniChar(__CFCSetStringBuffer(theSet), length, theChar) ? !isInverted : isInverted) : isInverted);
1593 break;
1594
1595 case __kCFCharSetClassBitmap:
1596 result = (__CFCSetCompactBitmapBits(theSet) ? (__CFCSetIsMemberBitmap(__CFCSetBitmapBits(theSet), theChar) ? true : false) : isInverted);
1597 break;
1598
1599 case __kCFCharSetClassCompactBitmap:
1600 result = (__CFCSetCompactBitmapBits(theSet) ? (__CFCSetIsMemberInCompactBitmap(__CFCSetCompactBitmapBits(theSet), theChar) ? true : false) : isInverted);
1601 break;
1602
1603 default:
1604 CFAssert1(0, __kCFLogAssertion, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__); // We should never come here
1605 break;
1606 }
1607
1608 return result;
1609 }
1610
1611 Boolean CFCharacterSetIsLongCharacterMember(CFCharacterSetRef theSet, UTF32Char theChar) {
1612 CFIndex length;
1613 UInt32 plane = (theChar >> 16);
1614 Boolean isAnnexInverted = false;
1615 Boolean isInverted;
1616 Boolean result = false;
1617
1618 CF_OBJC_FUNCDISPATCH1(__kCFCharacterSetTypeID, Boolean, theSet, "longCharacterIsMember:", theChar);
1619
1620 __CFGenericValidateType(theSet, __kCFCharacterSetTypeID);
1621
1622 if (plane) {
1623 CFCharacterSetRef annexPlane;
1624
1625 if (__CFCSetIsBuiltin(theSet)) {
1626 isInverted = __CFCSetIsInverted(theSet);
1627 return (CFUniCharIsMemberOf(theChar, __CFCSetBuiltinType(theSet)) ? !isInverted : isInverted);
1628 }
1629
1630 isAnnexInverted = __CFCSetAnnexIsInverted(theSet);
1631
1632 if ((annexPlane = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, plane)) == NULL) {
1633 if (!__CFCSetHasNonBMPPlane(theSet) && __CFCSetIsRange(theSet)) {
1634 isInverted = __CFCSetIsInverted(theSet);
1635 length = __CFCSetRangeLength(theSet);
1636 return (length && __CFCSetRangeFirstChar(theSet) <= theChar && theChar < __CFCSetRangeFirstChar(theSet) + length ? !isInverted : isInverted);
1637 } else {
1638 return (isAnnexInverted ? true : false);
1639 }
1640 } else {
1641 theSet = annexPlane;
1642 theChar &= 0xFFFF;
1643 }
1644 }
1645
1646 isInverted = __CFCSetIsInverted(theSet);
1647
1648 switch (__CFCSetClassType(theSet)) {
1649 case __kCFCharSetClassBuiltin:
1650 result = (CFUniCharIsMemberOf(theChar, __CFCSetBuiltinType(theSet)) ? !isInverted : isInverted);
1651 break;
1652
1653 case __kCFCharSetClassRange:
1654 length = __CFCSetRangeLength(theSet);
1655 result = (length && __CFCSetRangeFirstChar(theSet) <= theChar && theChar < __CFCSetRangeFirstChar(theSet) + length ? !isInverted : isInverted);
1656 break;
1657
1658 case __kCFCharSetClassString:
1659 result = ((length = __CFCSetStringLength(theSet)) ? (__CFCSetBsearchUniChar(__CFCSetStringBuffer(theSet), length, theChar) ? !isInverted : isInverted) : isInverted);
1660 break;
1661
1662 case __kCFCharSetClassBitmap:
1663 result = (__CFCSetCompactBitmapBits(theSet) ? (__CFCSetIsMemberBitmap(__CFCSetBitmapBits(theSet), theChar) ? true : false) : isInverted);
1664 break;
1665
1666 case __kCFCharSetClassCompactBitmap:
1667 result = (__CFCSetCompactBitmapBits(theSet) ? (__CFCSetIsMemberInCompactBitmap(__CFCSetCompactBitmapBits(theSet), theChar) ? true : false) : isInverted);
1668 break;
1669
1670 default:
1671 CFAssert1(0, __kCFLogAssertion, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__); // We should never come here
1672 return false; // To make compiler happy
1673 }
1674
1675 return (result ? !isAnnexInverted : isAnnexInverted);
1676 }
1677
1678 Boolean CFCharacterSetIsSurrogatePairMember(CFCharacterSetRef theSet, UniChar surrogateHigh, UniChar surrogateLow) {
1679 return CFCharacterSetIsLongCharacterMember(theSet, CFCharacterSetGetLongCharacterForSurrogatePair(surrogateHigh, surrogateLow));
1680 }
1681
1682
1683 static inline CFCharacterSetRef __CFCharacterSetGetExpandedSetForNSCharacterSet(const void *characterSet) {
1684 CF_OBJC_FUNCDISPATCH0(__kCFCharacterSetTypeID, CFCharacterSetRef , characterSet, "_expandedCFCharacterSet");
1685 return NULL;
1686 }
1687
1688 Boolean CFCharacterSetIsSupersetOfSet(CFCharacterSetRef theSet, CFCharacterSetRef theOtherSet) {
1689 CFMutableCharacterSetRef copy;
1690 CFCharacterSetRef expandedSet = NULL;
1691 CFCharacterSetRef expandedOtherSet = NULL;
1692 Boolean result;
1693
1694 if ((!CF_IS_OBJC(__kCFCharacterSetTypeID, theSet) || (expandedSet = __CFCharacterSetGetExpandedSetForNSCharacterSet(theSet))) && (!CF_IS_OBJC(__kCFCharacterSetTypeID, theOtherSet) || (expandedOtherSet = __CFCharacterSetGetExpandedSetForNSCharacterSet(theOtherSet)))) { // Really CF, we can do some trick here
1695 if (expandedSet) theSet = expandedSet;
1696 if (expandedOtherSet) theOtherSet = expandedOtherSet;
1697
1698 __CFGenericValidateType(theSet, __kCFCharacterSetTypeID);
1699 __CFGenericValidateType(theOtherSet, __kCFCharacterSetTypeID);
1700
1701 if (__CFCSetIsEmpty(theSet)) {
1702 if (__CFCSetIsInverted(theSet)) {
1703 return TRUE; // Inverted empty set covers all range
1704 } else if (!__CFCSetIsEmpty(theOtherSet) || __CFCSetIsInverted(theOtherSet)) {
1705 return FALSE;
1706 }
1707 } else if (__CFCSetIsEmpty(theOtherSet) && !__CFCSetIsInverted(theOtherSet)) {
1708 return TRUE;
1709 } else {
1710 if (__CFCSetIsBuiltin(theSet) || __CFCSetIsBuiltin(theOtherSet)) {
1711 if (__CFCSetClassType(theSet) == __CFCSetClassType(theOtherSet) && __CFCSetBuiltinType(theSet) == __CFCSetBuiltinType(theOtherSet) && !__CFCSetIsInverted(theSet) && !__CFCSetIsInverted(theOtherSet)) return TRUE;
1712 } else if (__CFCSetIsRange(theSet) || __CFCSetIsRange(theOtherSet)) {
1713 if (__CFCSetClassType(theSet) == __CFCSetClassType(theOtherSet)) {
1714 if (__CFCSetIsInverted(theSet)) {
1715 if (__CFCSetIsInverted(theOtherSet)) {
1716 return (__CFCSetRangeFirstChar(theOtherSet) > __CFCSetRangeFirstChar(theSet) || (__CFCSetRangeFirstChar(theSet) + __CFCSetRangeLength(theSet)) > (__CFCSetRangeFirstChar(theOtherSet) + __CFCSetRangeLength(theOtherSet)) ? FALSE : TRUE);
1717 } else {
1718 return ((__CFCSetRangeFirstChar(theOtherSet) + __CFCSetRangeLength(theOtherSet)) <= __CFCSetRangeFirstChar(theSet) || (__CFCSetRangeFirstChar(theSet) + __CFCSetRangeLength(theSet)) <= __CFCSetRangeFirstChar(theOtherSet) ? TRUE : FALSE);
1719 }
1720 } else {
1721 if (__CFCSetIsInverted(theOtherSet)) {
1722 return ((__CFCSetRangeFirstChar(theSet) == 0 && __CFCSetRangeLength(theSet) == 0x110000) || (__CFCSetRangeFirstChar(theOtherSet) == 0 && (UInt32)__CFCSetRangeLength(theOtherSet) <= __CFCSetRangeFirstChar(theSet)) || ((__CFCSetRangeFirstChar(theSet) + __CFCSetRangeLength(theSet)) <= __CFCSetRangeFirstChar(theOtherSet) && (__CFCSetRangeFirstChar(theOtherSet) + __CFCSetRangeLength(theOtherSet)) == 0x110000) ? TRUE : FALSE);
1723 } else {
1724 return (__CFCSetRangeFirstChar(theOtherSet) < __CFCSetRangeFirstChar(theSet) || (__CFCSetRangeFirstChar(theSet) + __CFCSetRangeLength(theSet)) < (__CFCSetRangeFirstChar(theOtherSet) + __CFCSetRangeLength(theOtherSet)) ? FALSE : TRUE);
1725 }
1726 }
1727 }
1728 } else {
1729 UInt32 theSetAnnexMask = __CFCSetAnnexValidEntriesBitmap(theSet);
1730 UInt32 theOtherSetAnnexMask = __CFCSetAnnexValidEntriesBitmap(theOtherSet);
1731 Boolean isTheSetAnnexInverted = __CFCSetAnnexIsInverted(theSet);
1732 Boolean isTheOtherSetAnnexInverted = __CFCSetAnnexIsInverted(theOtherSet);
1733 uint8_t theSetBuffer[__kCFBitmapSize];
1734 uint8_t theOtherSetBuffer[__kCFBitmapSize];
1735
1736 // We mask plane 1 to plane 16
1737 if (isTheSetAnnexInverted) theSetAnnexMask = (~theSetAnnexMask) & (0xFFFF < 1);
1738 if (isTheOtherSetAnnexInverted) theOtherSetAnnexMask = (~theOtherSetAnnexMask) & (0xFFFF < 1);
1739
1740 __CFCSetGetBitmap(theSet, theSetBuffer);
1741 __CFCSetGetBitmap(theOtherSet, theOtherSetBuffer);
1742
1743 if (!__CFCSetIsBitmapSupersetOfBitmap((const UInt32 *)theSetBuffer, (const UInt32 *)theOtherSetBuffer, FALSE, FALSE)) return FALSE;
1744
1745 if (theOtherSetAnnexMask) {
1746 CFCharacterSetRef theSetAnnex;
1747 CFCharacterSetRef theOtherSetAnnex;
1748 uint32_t idx;
1749
1750 if ((theSetAnnexMask & theOtherSetAnnexMask) != theOtherSetAnnexMask) return FALSE;
1751
1752 for (idx = 1;idx <= 16;idx++) {
1753 theSetAnnex = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, idx);
1754 if (NULL == theSetAnnex) continue; // This case is already handled by the mask above
1755
1756 theOtherSetAnnex = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theOtherSet, idx);
1757
1758 if (NULL == theOtherSetAnnex) {
1759 if (isTheOtherSetAnnexInverted) {
1760 __CFCSetGetBitmap(theSetAnnex, theSetBuffer);
1761 if (!__CFCSetIsEqualBitmap((const UInt32 *)theSetBuffer, (isTheSetAnnexInverted ? NULL : (const UInt32 *)-1))) return FALSE;
1762 }
1763 } else {
1764 __CFCSetGetBitmap(theSetAnnex, theSetBuffer);
1765 __CFCSetGetBitmap(theOtherSetAnnex, theOtherSetBuffer);
1766 if (!__CFCSetIsBitmapSupersetOfBitmap((const UInt32 *)theSetBuffer, (const UInt32 *)theOtherSetBuffer, isTheSetAnnexInverted, isTheOtherSetAnnexInverted)) return FALSE;
1767 }
1768 }
1769 }
1770
1771 return TRUE;
1772 }
1773 }
1774 }
1775
1776 copy = CFCharacterSetCreateMutableCopy(kCFAllocatorSystemDefault, theSet);
1777 CFCharacterSetIntersect(copy, theOtherSet);
1778 result = __CFCharacterSetEqual(copy, theOtherSet);
1779 CFRelease(copy);
1780
1781 return result;
1782 }
1783
1784 Boolean CFCharacterSetHasMemberInPlane(CFCharacterSetRef theSet, CFIndex thePlane) {
1785 Boolean isInverted = __CFCSetIsInverted(theSet);
1786
1787 CF_OBJC_FUNCDISPATCH1(__kCFCharacterSetTypeID, Boolean, theSet, "hasMemberInPlane:", thePlane);
1788
1789 if (__CFCSetIsEmpty(theSet)) {
1790 return (isInverted ? TRUE : FALSE);
1791 } else if (__CFCSetIsBuiltin(theSet)) {
1792 CFCharacterSetPredefinedSet type = __CFCSetBuiltinType(theSet);
1793
1794 if (type == kCFCharacterSetControl) {
1795 if (isInverted || (thePlane == 14)) {
1796 return TRUE; // There is no plane that covers all values || Plane 14 has language tags
1797 } else {
1798 return (CFUniCharGetBitmapPtrForPlane(type, thePlane) ? TRUE : FALSE);
1799 }
1800 } else if ((type < kCFCharacterSetDecimalDigit) || (type == kCFCharacterSetNewline)) {
1801 return (thePlane && !isInverted ? FALSE : TRUE);
1802 } else if (__CFCSetBuiltinType(theSet) == kCFCharacterSetIllegal) {
1803 return (isInverted ? (thePlane < 3 || thePlane > 13 ? TRUE : FALSE) : TRUE); // This is according to Unicode 3.1
1804 } else {
1805 if (isInverted) {
1806 return TRUE; // There is no plane that covers all values
1807 } else {
1808 return (CFUniCharGetBitmapPtrForPlane(type, thePlane) ? TRUE : FALSE);
1809 }
1810 }
1811 } else if (__CFCSetIsRange(theSet)) {
1812 UTF32Char firstChar = __CFCSetRangeFirstChar(theSet);
1813 UTF32Char lastChar = (firstChar + __CFCSetRangeLength(theSet) - 1);
1814 CFIndex firstPlane = firstChar >> 16;
1815 CFIndex lastPlane = lastChar >> 16;
1816
1817 if (isInverted) {
1818 if (thePlane < firstPlane || thePlane > lastPlane) {
1819 return TRUE;
1820 } else if (thePlane > firstPlane && thePlane < lastPlane) {
1821 return FALSE;
1822 } else {
1823 firstChar &= 0xFFFF;
1824 lastChar &= 0xFFFF;
1825 if (thePlane == firstPlane) {
1826 return (firstChar || (firstPlane == lastPlane && lastChar != 0xFFFF) ? TRUE : FALSE);
1827 } else {
1828 return (lastChar != 0xFFFF || (firstPlane == lastPlane && firstChar) ? TRUE : FALSE);
1829 }
1830 }
1831 } else {
1832 return (thePlane < firstPlane || thePlane > lastPlane ? FALSE : TRUE);
1833 }
1834 } else {
1835 if (thePlane == 0) {
1836 switch (__CFCSetClassType(theSet)) {
1837 case __kCFCharSetClassString: if (!__CFCSetStringLength(theSet)) return isInverted; break;
1838 case __kCFCharSetClassCompactBitmap: return (__CFCSetCompactBitmapBits(theSet) ? TRUE : FALSE); break;
1839 case __kCFCharSetClassBitmap: return (__CFCSetBitmapBits(theSet) ? TRUE : FALSE); break;
1840 }
1841 return TRUE;
1842 } else {
1843 CFCharacterSetRef annex = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, thePlane);
1844 if (annex) {
1845 if (__CFCSetIsRange(annex)) {
1846 return (__CFCSetAnnexIsInverted(theSet) && (__CFCSetRangeFirstChar(annex) == 0) && (__CFCSetRangeLength(annex) == 0x10000) ? FALSE : TRUE);
1847 } else if (__CFCSetIsBitmap(annex)) {
1848 return (__CFCSetAnnexIsInverted(theSet) && __CFCSetIsEqualBitmap((const UInt32 *)__CFCSetBitmapBits(annex), (const UInt32 *)-1) ? FALSE : TRUE);
1849 } else {
1850 uint8_t bitsBuf[__kCFBitmapSize];
1851 __CFCSetGetBitmap(annex, bitsBuf);
1852 return (__CFCSetAnnexIsInverted(theSet) && __CFCSetIsEqualBitmap((const UInt32 *)bitsBuf, (const UInt32 *)-1) ? FALSE : TRUE);
1853 }
1854 } else {
1855 return __CFCSetAnnexIsInverted(theSet);
1856 }
1857 }
1858 }
1859
1860 return FALSE;
1861 }
1862
1863
1864 CFDataRef CFCharacterSetCreateBitmapRepresentation(CFAllocatorRef alloc, CFCharacterSetRef theSet) {
1865 CFMutableDataRef data;
1866 int numNonBMPPlanes = 0;
1867 int planeIndices[MAX_ANNEX_PLANE];
1868 int idx;
1869 int length;
1870 bool isAnnexInverted;
1871
1872 CF_OBJC_FUNCDISPATCH0(__kCFCharacterSetTypeID, CFDataRef , theSet, "_retainedBitmapRepresentation");
1873
1874 __CFGenericValidateType(theSet, __kCFCharacterSetTypeID);
1875
1876 isAnnexInverted = (__CFCSetAnnexIsInverted(theSet) != 0);
1877
1878 if (__CFCSetHasNonBMPPlane(theSet)) {
1879 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
1880 if (isAnnexInverted || __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, idx)) {
1881 planeIndices[numNonBMPPlanes++] = idx;
1882 }
1883 }
1884 } else if (__CFCSetIsBuiltin(theSet)) {
1885 numNonBMPPlanes = (__CFCSetIsInverted(theSet) ? MAX_ANNEX_PLANE : CFUniCharGetNumberOfPlanes(__CFCSetBuiltinType(theSet)) - 1);
1886 } else if (__CFCSetIsRange(theSet)) {
1887 UInt32 firstChar = __CFCSetRangeFirstChar(theSet);
1888 UInt32 lastChar = __CFCSetRangeFirstChar(theSet) + __CFCSetRangeLength(theSet) - 1;
1889 int firstPlane = (firstChar >> 16);
1890 int lastPlane = (lastChar >> 16);
1891 bool isInverted = (__CFCSetIsInverted(theSet) != 0);
1892
1893 if (lastPlane > 0) {
1894 if (firstPlane == 0) {
1895 firstPlane = 1;
1896 firstChar = 0x10000;
1897 }
1898 numNonBMPPlanes = (lastPlane - firstPlane) + 1;
1899 if (isInverted) {
1900 numNonBMPPlanes = MAX_ANNEX_PLANE - numNonBMPPlanes;
1901 if (firstPlane == lastPlane) {
1902 if (((firstChar & 0xFFFF) > 0) || ((lastChar & 0xFFFF) < 0xFFFF)) ++numNonBMPPlanes;
1903 } else {
1904 if ((firstChar & 0xFFFF) > 0) ++numNonBMPPlanes;
1905 if ((lastChar & 0xFFFF) < 0xFFFF) ++numNonBMPPlanes;
1906 }
1907 }
1908 } else if (isInverted) {
1909 numNonBMPPlanes = MAX_ANNEX_PLANE;
1910 }
1911 } else if (isAnnexInverted) {
1912 numNonBMPPlanes = MAX_ANNEX_PLANE;
1913 }
1914
1915 length = __kCFBitmapSize + ((__kCFBitmapSize + 1) * numNonBMPPlanes);
1916 data = CFDataCreateMutable(alloc, length);
1917 CFDataSetLength(data, length);
1918 __CFCSetGetBitmap(theSet, CFDataGetMutableBytePtr(data));
1919
1920 if (numNonBMPPlanes > 0) {
1921 uint8_t *bytes = CFDataGetMutableBytePtr(data) + __kCFBitmapSize;
1922
1923 if (__CFCSetHasNonBMPPlane(theSet)) {
1924 CFCharacterSetRef subset;
1925
1926 for (idx = 0;idx < numNonBMPPlanes;idx++) {
1927 *(bytes++) = planeIndices[idx];
1928 if ((subset = __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, planeIndices[idx])) == NULL) {
1929 __CFCSetBitmapFastFillWithValue((UInt32 *)bytes, (isAnnexInverted ? 0xFF : 0));
1930 } else {
1931 __CFCSetGetBitmap(subset, bytes);
1932 if (isAnnexInverted) {
1933 uint32_t count = __kCFBitmapSize / sizeof(uint32_t);
1934 uint32_t *bits = (uint32_t *)bytes;
1935
1936 while (count-- > 0) {
1937 *bits = ~(*bits);
1938 ++bits;
1939 }
1940 }
1941 }
1942 bytes += __kCFBitmapSize;
1943 }
1944 } else if (__CFCSetIsBuiltin(theSet)) {
1945 UInt8 result;
1946 CFIndex delta;
1947 Boolean isInverted = __CFCSetIsInverted(theSet);
1948
1949 for (idx = 0;idx < numNonBMPPlanes;idx++) {
1950 if ((result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(theSet), idx + 1, bytes + 1, (isInverted != 0))) == kCFUniCharBitmapEmpty) continue;
1951 *(bytes++) = idx + 1;
1952 if (result == kCFUniCharBitmapAll) {
1953 CFIndex bitmapLength = __kCFBitmapSize;
1954 while (bitmapLength-- > 0) *(bytes++) = (uint8_t)0xFF;
1955 } else {
1956 bytes += __kCFBitmapSize;
1957 }
1958 }
1959 delta = bytes - (const uint8_t *)CFDataGetBytePtr(data);
1960 if (delta < length) CFDataSetLength(data, delta);
1961 } else if (__CFCSetIsRange(theSet)) {
1962 UInt32 firstChar = __CFCSetRangeFirstChar(theSet);
1963 UInt32 lastChar = __CFCSetRangeFirstChar(theSet) + __CFCSetRangeLength(theSet) - 1;
1964 int firstPlane = (firstChar >> 16);
1965 int lastPlane = (lastChar >> 16);
1966
1967 if (firstPlane == 0) {
1968 firstPlane = 1;
1969 firstChar = 0x10000;
1970 }
1971 if (__CFCSetIsInverted(theSet)) {
1972 // Mask out the plane byte
1973 firstChar &= 0xFFFF;
1974 lastChar &= 0xFFFF;
1975
1976 for (idx = 1;idx < firstPlane;idx++) { // Fill up until the first plane
1977 *(bytes++) = idx;
1978 __CFCSetBitmapFastFillWithValue((UInt32 *)bytes, 0xFF);
1979 bytes += __kCFBitmapSize;
1980 }
1981 if (firstPlane == lastPlane) {
1982 if ((firstChar > 0) || (lastChar < 0xFFFF)) {
1983 *(bytes++) = idx;
1984 __CFCSetBitmapFastFillWithValue((UInt32 *)bytes, 0xFF);
1985 __CFCSetBitmapRemoveCharactersInRange(bytes, firstChar, lastChar);
1986 bytes += __kCFBitmapSize;
1987 }
1988 } else if (firstPlane < lastPlane) {
1989 if (firstChar > 0) {
1990 *(bytes++) = idx;
1991 __CFCSetBitmapFastFillWithValue((UInt32 *)bytes, 0);
1992 __CFCSetBitmapAddCharactersInRange(bytes, 0, firstChar - 1);
1993 bytes += __kCFBitmapSize;
1994 }
1995 if (lastChar < 0xFFFF) {
1996 *(bytes++) = idx;
1997 __CFCSetBitmapFastFillWithValue((UInt32 *)bytes, 0);
1998 __CFCSetBitmapAddCharactersInRange(bytes, lastChar, 0xFFFF);
1999 bytes += __kCFBitmapSize;
2000 }
2001 }
2002 for (idx = lastPlane + 1;idx <= MAX_ANNEX_PLANE;idx++) {
2003 *(bytes++) = idx;
2004 __CFCSetBitmapFastFillWithValue((UInt32 *)bytes, 0xFF);
2005 bytes += __kCFBitmapSize;
2006 }
2007 } else {
2008 for (idx = firstPlane;idx <= lastPlane;idx++) {
2009 *(bytes++) = idx;
2010 __CFCSetBitmapAddCharactersInRange(bytes, (idx == firstPlane ? firstChar : 0), (idx == lastPlane ? lastChar : 0xFFFF));
2011 bytes += __kCFBitmapSize;
2012 }
2013 }
2014 } else if (isAnnexInverted) {
2015 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
2016 *(bytes++) = idx;
2017 __CFCSetBitmapFastFillWithValue((UInt32 *)bytes, 0xFF);
2018 bytes += __kCFBitmapSize;
2019 }
2020 }
2021 }
2022
2023 return data;
2024 }
2025
2026 /*** MutableCharacterSet functions ***/
2027 void CFCharacterSetAddCharactersInRange(CFMutableCharacterSetRef theSet, CFRange theRange) {
2028 CF_OBJC_FUNCDISPATCH1(__kCFCharacterSetTypeID, void, theSet, "addCharactersInRange:", theRange);
2029
2030 __CFCSetValidateTypeAndMutability(theSet, __PRETTY_FUNCTION__);
2031 __CFCSetValidateRange(theRange, __PRETTY_FUNCTION__);
2032
2033 if (!theRange.length || (__CFCSetIsInverted(theSet) && __CFCSetIsEmpty(theSet))) return; // Inverted && empty set contains all char
2034
2035 if (!__CFCSetIsInverted(theSet)) {
2036 if (__CFCSetIsEmpty(theSet)) {
2037 __CFCSetPutClassType(theSet, __kCFCharSetClassRange);
2038 __CFCSetPutRangeFirstChar(theSet, theRange.location);
2039 __CFCSetPutRangeLength(theSet, theRange.length);
2040 __CFCSetPutHasHashValue(theSet, false);
2041 return;
2042 } else if (__CFCSetIsRange(theSet)) {
2043 CFIndex firstChar = __CFCSetRangeFirstChar(theSet);
2044 CFIndex length = __CFCSetRangeLength(theSet);
2045
2046 if (firstChar == theRange.location) {
2047 __CFCSetPutRangeLength(theSet, __CFMin(length, theRange.length));
2048 __CFCSetPutHasHashValue(theSet, false);
2049 return;
2050 } else if (firstChar < theRange.location && theRange.location <= firstChar + length) {
2051 if (firstChar + length < theRange.location + theRange.length) __CFCSetPutRangeLength(theSet, theRange.length + (theRange.location - firstChar));
2052 __CFCSetPutHasHashValue(theSet, false);
2053 return;
2054 } else if (theRange.location < firstChar && firstChar <= theRange.location + theRange.length) {
2055 __CFCSetPutRangeFirstChar(theSet, theRange.location);
2056 __CFCSetPutRangeLength(theSet, length + (firstChar - theRange.location));
2057 __CFCSetPutHasHashValue(theSet, false);
2058 return;
2059 }
2060 } else if (__CFCSetIsString(theSet) && __CFCSetStringLength(theSet) + theRange.length < __kCFStringCharSetMax) {
2061 UniChar *buffer;
2062 if (!__CFCSetStringBuffer(theSet))
2063 __CFCSetPutStringBuffer(theSet, (UniChar *)CFAllocatorAllocate(CFGetAllocator(theSet), __kCFStringCharSetMax * sizeof(UniChar), 0));
2064 buffer = __CFCSetStringBuffer(theSet) + __CFCSetStringLength(theSet);
2065 __CFCSetPutStringLength(theSet, __CFCSetStringLength(theSet) + theRange.length);
2066 while (theRange.length--) *buffer++ = (UniChar)theRange.location++;
2067 qsort(__CFCSetStringBuffer(theSet), __CFCSetStringLength(theSet), sizeof(UniChar), chcompar);
2068 __CFCSetPutHasHashValue(theSet, false);
2069 return;
2070 }
2071 }
2072
2073 // OK, I have to be a bitmap
2074 __CFCSetMakeBitmap(theSet);
2075 __CFCSetAddNonBMPPlanesInRange(theSet, theRange);
2076 if (theRange.location < 0x10000) { // theRange is in BMP
2077 if (theRange.location + theRange.length >= NUMCHARACTERS) theRange.length = NUMCHARACTERS - theRange.location;
2078 __CFCSetBitmapAddCharactersInRange(__CFCSetBitmapBits(theSet), (UniChar)theRange.location, (UniChar)(theRange.location + theRange.length - 1));
2079 }
2080 __CFCSetPutHasHashValue(theSet, false);
2081
2082 if (__CFCheckForExapendedSet) __CFCheckForExpandedSet(theSet);
2083 }
2084
2085 void CFCharacterSetRemoveCharactersInRange(CFMutableCharacterSetRef theSet, CFRange theRange) {
2086 CF_OBJC_FUNCDISPATCH1(__kCFCharacterSetTypeID, void, theSet, "removeCharactersInRange:", theRange);
2087
2088 __CFCSetValidateTypeAndMutability(theSet, __PRETTY_FUNCTION__);
2089 __CFCSetValidateRange(theRange, __PRETTY_FUNCTION__);
2090
2091 if (!theRange.length || (!__CFCSetIsInverted(theSet) && __CFCSetIsEmpty(theSet))) return; // empty set
2092
2093 if (__CFCSetIsInverted(theSet)) {
2094 if (__CFCSetIsEmpty(theSet)) {
2095 __CFCSetPutClassType(theSet, __kCFCharSetClassRange);
2096 __CFCSetPutRangeFirstChar(theSet, theRange.location);
2097 __CFCSetPutRangeLength(theSet, theRange.length);
2098 __CFCSetPutHasHashValue(theSet, false);
2099 return;
2100 } else if (__CFCSetIsRange(theSet)) {
2101 CFIndex firstChar = __CFCSetRangeFirstChar(theSet);
2102 CFIndex length = __CFCSetRangeLength(theSet);
2103
2104 if (firstChar == theRange.location) {
2105 __CFCSetPutRangeLength(theSet, __CFMin(length, theRange.length));
2106 __CFCSetPutHasHashValue(theSet, false);
2107 return;
2108 } else if (firstChar < theRange.location && theRange.location <= firstChar + length) {
2109 if (firstChar + length < theRange.location + theRange.length) __CFCSetPutRangeLength(theSet, theRange.length + (theRange.location - firstChar));
2110 __CFCSetPutHasHashValue(theSet, false);
2111 return;
2112 } else if (theRange.location < firstChar && firstChar <= theRange.location + theRange.length) {
2113 __CFCSetPutRangeFirstChar(theSet, theRange.location);
2114 __CFCSetPutRangeLength(theSet, length + (firstChar - theRange.location));
2115 __CFCSetPutHasHashValue(theSet, false);
2116 return;
2117 }
2118 } else if (__CFCSetIsString(theSet) && __CFCSetStringLength(theSet) + theRange.length < __kCFStringCharSetMax) {
2119 UniChar *buffer;
2120 if (!__CFCSetStringBuffer(theSet))
2121 __CFCSetPutStringBuffer(theSet, (UniChar *)CFAllocatorAllocate(CFGetAllocator(theSet), __kCFStringCharSetMax * sizeof(UniChar), 0));
2122 buffer = __CFCSetStringBuffer(theSet) + __CFCSetStringLength(theSet);
2123 __CFCSetPutStringLength(theSet, __CFCSetStringLength(theSet) + theRange.length);
2124 while (theRange.length--) *buffer++ = (UniChar)theRange.location++;
2125 qsort(__CFCSetStringBuffer(theSet), __CFCSetStringLength(theSet), sizeof(UniChar), chcompar);
2126 __CFCSetPutHasHashValue(theSet, false);
2127 return;
2128 }
2129 }
2130
2131 // OK, I have to be a bitmap
2132 __CFCSetMakeBitmap(theSet);
2133 __CFCSetRemoveNonBMPPlanesInRange(theSet, theRange);
2134 if (theRange.location < 0x10000) { // theRange is in BMP
2135 if (theRange.location + theRange.length > NUMCHARACTERS) theRange.length = NUMCHARACTERS - theRange.location;
2136 if (theRange.location == 0 && theRange.length == NUMCHARACTERS) { // Remove all
2137 CFAllocatorDeallocate(CFGetAllocator(theSet), __CFCSetBitmapBits(theSet));
2138 __CFCSetPutBitmapBits(theSet, NULL);
2139 } else {
2140 __CFCSetBitmapRemoveCharactersInRange(__CFCSetBitmapBits(theSet), (UniChar)theRange.location, (UniChar)(theRange.location + theRange.length - 1));
2141 }
2142 }
2143
2144 __CFCSetPutHasHashValue(theSet, false);
2145 if (__CFCheckForExapendedSet) __CFCheckForExpandedSet(theSet);
2146 }
2147
2148 void CFCharacterSetAddCharactersInString(CFMutableCharacterSetRef theSet, CFStringRef theString) {
2149 const UniChar *buffer;
2150 CFIndex length;
2151
2152 CF_OBJC_FUNCDISPATCH1(__kCFCharacterSetTypeID, void, theSet, "addCharactersInString:", theString);
2153
2154 __CFCSetValidateTypeAndMutability(theSet, __PRETTY_FUNCTION__);
2155
2156 if ((__CFCSetIsEmpty(theSet) && __CFCSetIsInverted(theSet)) || !(length = CFStringGetLength(theString))) return;
2157
2158 if (!__CFCSetIsInverted(theSet)) {
2159 CFIndex newLength = length + (__CFCSetIsEmpty(theSet) ? 0 : (__CFCSetIsString(theSet) ? __CFCSetStringLength(theSet) : __kCFStringCharSetMax));
2160
2161 if (newLength < __kCFStringCharSetMax) {
2162 if (__CFCSetIsEmpty(theSet)) {
2163 __CFCSetPutClassType(theSet, __kCFCharSetClassString);
2164 __CFCSetPutStringLength(theSet, 0); // Make sure to reset this
2165 }
2166
2167 if (!__CFCSetStringBuffer(theSet))
2168 __CFCSetPutStringBuffer(theSet, (UniChar *)CFAllocatorAllocate(CFGetAllocator(theSet), __kCFStringCharSetMax * sizeof(UniChar), 0));
2169 buffer = __CFCSetStringBuffer(theSet) + __CFCSetStringLength(theSet);
2170
2171 __CFCSetPutClassType(theSet, __kCFCharSetClassString);
2172 __CFCSetPutStringLength(theSet, newLength);
2173 CFStringGetCharacters(theString, CFRangeMake(0, length), (UniChar*)buffer);
2174 qsort(__CFCSetStringBuffer(theSet), newLength, sizeof(UniChar), chcompar);
2175 __CFCSetPutHasHashValue(theSet, false);
2176 return;
2177 }
2178 }
2179
2180 // OK, I have to be a bitmap
2181 __CFCSetMakeBitmap(theSet);
2182 if ((buffer = CFStringGetCharactersPtr(theString))) {
2183 while (length--) __CFCSetBitmapAddCharacter(__CFCSetBitmapBits(theSet), *buffer++);
2184 } else {
2185 CFStringInlineBuffer inlineBuffer;
2186 CFIndex idx;
2187
2188 CFStringInitInlineBuffer(theString, &inlineBuffer, CFRangeMake(0, length));
2189 for (idx = 0;idx < length;idx++) __CFCSetBitmapAddCharacter(__CFCSetBitmapBits(theSet), __CFStringGetCharacterFromInlineBufferQuick(&inlineBuffer, idx));
2190 }
2191 __CFCSetPutHasHashValue(theSet, false);
2192 if (__CFCheckForExapendedSet) __CFCheckForExpandedSet(theSet);
2193 }
2194
2195 void CFCharacterSetRemoveCharactersInString(CFMutableCharacterSetRef theSet, CFStringRef theString) {
2196 const UniChar *buffer;
2197 CFIndex length;
2198
2199 CF_OBJC_FUNCDISPATCH1(__kCFCharacterSetTypeID, void, theSet, "removeCharactersInString:", theString);
2200
2201 __CFCSetValidateTypeAndMutability(theSet, __PRETTY_FUNCTION__);
2202
2203 if ((__CFCSetIsEmpty(theSet) && !__CFCSetIsInverted(theSet)) || !(length = CFStringGetLength(theString))) return;
2204
2205 if (__CFCSetIsInverted(theSet)) {
2206 CFIndex newLength = length + (__CFCSetIsEmpty(theSet) ? 0 : (__CFCSetIsString(theSet) ? __CFCSetStringLength(theSet) : __kCFStringCharSetMax));
2207
2208 if (newLength < __kCFStringCharSetMax) {
2209 if (__CFCSetIsEmpty(theSet)) {
2210 __CFCSetPutClassType(theSet, __kCFCharSetClassString);
2211 __CFCSetPutStringLength(theSet, 0); // Make sure to reset this
2212 }
2213
2214 if (!__CFCSetStringBuffer(theSet))
2215 __CFCSetPutStringBuffer(theSet, (UniChar *)CFAllocatorAllocate(CFGetAllocator(theSet), __kCFStringCharSetMax * sizeof(UniChar), 0));
2216 buffer = __CFCSetStringBuffer(theSet) + __CFCSetStringLength(theSet);
2217
2218 __CFCSetPutClassType(theSet, __kCFCharSetClassString);
2219 __CFCSetPutStringLength(theSet, newLength);
2220 CFStringGetCharacters(theString, CFRangeMake(0, length), (UniChar *)buffer);
2221 qsort(__CFCSetStringBuffer(theSet), newLength, sizeof(UniChar), chcompar);
2222 __CFCSetPutHasHashValue(theSet, false);
2223 return;
2224 }
2225 }
2226
2227 // OK, I have to be a bitmap
2228 __CFCSetMakeBitmap(theSet);
2229 if ((buffer = CFStringGetCharactersPtr(theString))) {
2230 while (length--) __CFCSetBitmapRemoveCharacter(__CFCSetBitmapBits(theSet), *buffer++);
2231 } else {
2232 CFStringInlineBuffer inlineBuffer;
2233 CFIndex idx;
2234
2235 CFStringInitInlineBuffer(theString, &inlineBuffer, CFRangeMake(0, length));
2236 for (idx = 0;idx < length;idx++) __CFCSetBitmapRemoveCharacter(__CFCSetBitmapBits(theSet), __CFStringGetCharacterFromInlineBufferQuick(&inlineBuffer, idx));
2237 }
2238 __CFCSetPutHasHashValue(theSet, false);
2239 if (__CFCheckForExapendedSet) __CFCheckForExpandedSet(theSet);
2240 }
2241
2242 void CFCharacterSetUnion(CFMutableCharacterSetRef theSet, CFCharacterSetRef theOtherSet) {
2243 CFCharacterSetRef expandedSet = NULL;
2244
2245 CF_OBJC_FUNCDISPATCH1(__kCFCharacterSetTypeID, void, theSet, "formUnionWithCharacterSet:", theOtherSet);
2246
2247 __CFCSetValidateTypeAndMutability(theSet, __PRETTY_FUNCTION__);
2248
2249 if (__CFCSetIsEmpty(theSet) && __CFCSetIsInverted(theSet)) return; // Inverted empty set contains all char
2250
2251 if (!CF_IS_OBJC(__kCFCharacterSetTypeID, theOtherSet) || (expandedSet = __CFCharacterSetGetExpandedSetForNSCharacterSet(theOtherSet))) { // Really CF, we can do some trick here
2252 if (expandedSet) theOtherSet = expandedSet;
2253
2254 if (__CFCSetIsEmpty(theOtherSet)) {
2255 if (__CFCSetIsInverted(theOtherSet)) {
2256 if (__CFCSetIsString(theSet) && __CFCSetStringBuffer(theSet)) {
2257 CFAllocatorDeallocate(CFGetAllocator(theSet), __CFCSetStringBuffer(theSet));
2258 } else if (__CFCSetIsBitmap(theSet) && __CFCSetBitmapBits(theSet)) {
2259 CFAllocatorDeallocate(CFGetAllocator(theSet), __CFCSetBitmapBits(theSet));
2260 } else if (__CFCSetIsCompactBitmap(theSet) && __CFCSetCompactBitmapBits(theSet)) {
2261 CFAllocatorDeallocate(CFGetAllocator(theSet), __CFCSetCompactBitmapBits(theSet));
2262 }
2263 __CFCSetPutClassType(theSet, __kCFCharSetClassRange);
2264 __CFCSetPutRangeLength(theSet, 0);
2265 __CFCSetPutIsInverted(theSet, true);
2266 __CFCSetPutHasHashValue(theSet, false);
2267 __CFCSetDeallocateAnnexPlane(theSet);
2268 }
2269 } else if (__CFCSetIsBuiltin(theOtherSet) && __CFCSetIsEmpty(theSet)) { // theSet can be builtin set
2270 __CFCSetPutClassType(theSet, __kCFCharSetClassBuiltin);
2271 __CFCSetPutBuiltinType(theSet, __CFCSetBuiltinType(theOtherSet));
2272 __CFCSetPutHasHashValue(theSet, false);
2273 } else {
2274 if (__CFCSetIsRange(theOtherSet)) {
2275 if (__CFCSetIsInverted(theOtherSet)) {
2276 UTF32Char firstChar = __CFCSetRangeFirstChar(theOtherSet);
2277 CFIndex length = __CFCSetRangeLength(theOtherSet);
2278
2279 if (firstChar > 0) CFCharacterSetAddCharactersInRange(theSet, CFRangeMake(0, firstChar));
2280 firstChar += length;
2281 length = 0x110000 - firstChar;
2282 CFCharacterSetAddCharactersInRange(theSet, CFRangeMake(firstChar, length));
2283 } else {
2284 CFCharacterSetAddCharactersInRange(theSet, CFRangeMake(__CFCSetRangeFirstChar(theOtherSet), __CFCSetRangeLength(theOtherSet)));
2285 }
2286 } else if (__CFCSetIsString(theOtherSet)) {
2287 CFStringRef string = CFStringCreateWithCharactersNoCopy(CFGetAllocator(theSet), __CFCSetStringBuffer(theOtherSet), __CFCSetStringLength(theOtherSet), kCFAllocatorNull);
2288 CFCharacterSetAddCharactersInString(theSet, string);
2289 CFRelease(string);
2290 } else {
2291 __CFCSetMakeBitmap(theSet);
2292 if (__CFCSetIsBitmap(theOtherSet)) {
2293 UInt32 *bitmap1 = (UInt32*)__CFCSetBitmapBits(theSet);
2294 UInt32 *bitmap2 = (UInt32*)__CFCSetBitmapBits(theOtherSet);
2295 CFIndex length = __kCFBitmapSize / sizeof(UInt32);
2296 while (length--) *bitmap1++ |= *bitmap2++;
2297 } else {
2298 UInt32 *bitmap1 = (UInt32*)__CFCSetBitmapBits(theSet);
2299 UInt32 *bitmap2;
2300 CFIndex length = __kCFBitmapSize / sizeof(UInt32);
2301 uint8_t bitmapBuffer[__kCFBitmapSize];
2302 __CFCSetGetBitmap(theOtherSet, bitmapBuffer);
2303 bitmap2 = (UInt32*)bitmapBuffer;
2304 while (length--) *bitmap1++ |= *bitmap2++;
2305 }
2306 __CFCSetPutHasHashValue(theSet, false);
2307 }
2308 if (__CFCSetHasNonBMPPlane(theOtherSet)) {
2309 CFMutableCharacterSetRef otherSetPlane;
2310 int idx;
2311
2312 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
2313 if ((otherSetPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theOtherSet, idx))) {
2314 CFCharacterSetUnion((CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(theSet, idx), otherSetPlane);
2315 }
2316 }
2317 } else if (__CFCSetIsBuiltin(theOtherSet)) {
2318 CFMutableCharacterSetRef annexPlane;
2319 uint8_t bitmapBuffer[__kCFBitmapSize];
2320 uint8_t result;
2321 int planeIndex;
2322 Boolean isOtherAnnexPlaneInverted = __CFCSetAnnexIsInverted(theOtherSet);
2323 UInt32 *bitmap1;
2324 UInt32 *bitmap2;
2325 CFIndex length;
2326
2327 for (planeIndex = 1;planeIndex <= MAX_ANNEX_PLANE;planeIndex++) {
2328 result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(theOtherSet), planeIndex, bitmapBuffer, (isOtherAnnexPlaneInverted != 0));
2329 if (result != kCFUniCharBitmapEmpty) {
2330 annexPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(theSet, planeIndex);
2331 if (result == kCFUniCharBitmapAll) {
2332 CFCharacterSetAddCharactersInRange(annexPlane, CFRangeMake(0x0000, 0x10000));
2333 } else {
2334 __CFCSetMakeBitmap(annexPlane);
2335 bitmap1 = (UInt32 *)__CFCSetBitmapBits(annexPlane);
2336 length = __kCFBitmapSize / sizeof(UInt32);
2337 bitmap2 = (UInt32*)bitmapBuffer;
2338 while (length--) *bitmap1++ |= *bitmap2++;
2339 }
2340 }
2341 }
2342 }
2343 }
2344 if (__CFCheckForExapendedSet) __CFCheckForExpandedSet(theSet);
2345 } else { // It's NSCharacterSet
2346 CFDataRef bitmapRep = CFCharacterSetCreateBitmapRepresentation(kCFAllocatorSystemDefault, theOtherSet);
2347 const UInt32 *bitmap2 = (bitmapRep && CFDataGetLength(bitmapRep) ? (const UInt32 *)CFDataGetBytePtr(bitmapRep) : NULL);
2348 if (bitmap2) {
2349 UInt32 *bitmap1;
2350 CFIndex length = __kCFBitmapSize / sizeof(UInt32);
2351 __CFCSetMakeBitmap(theSet);
2352 bitmap1 = (UInt32*)__CFCSetBitmapBits(theSet);
2353 while (length--) *bitmap1++ |= *bitmap2++;
2354 __CFCSetPutHasHashValue(theSet, false);
2355 }
2356 CFRelease(bitmapRep);
2357 }
2358 }
2359
2360 void CFCharacterSetIntersect(CFMutableCharacterSetRef theSet, CFCharacterSetRef theOtherSet) {
2361 CFCharacterSetRef expandedSet = NULL;
2362
2363 CF_OBJC_FUNCDISPATCH1(__kCFCharacterSetTypeID, void, theSet, "formIntersectionWithCharacterSet:", theOtherSet);
2364
2365 __CFCSetValidateTypeAndMutability(theSet, __PRETTY_FUNCTION__);
2366
2367 if (__CFCSetIsEmpty(theSet) && !__CFCSetIsInverted(theSet)) return; // empty set
2368
2369 if (!CF_IS_OBJC(__kCFCharacterSetTypeID, theOtherSet) || (expandedSet = __CFCharacterSetGetExpandedSetForNSCharacterSet(theOtherSet))) { // Really CF, we can do some trick here
2370 if (expandedSet) theOtherSet = expandedSet;
2371
2372 if (__CFCSetIsEmpty(theOtherSet)) {
2373 if (!__CFCSetIsInverted(theOtherSet)) {
2374 if (__CFCSetIsString(theSet) && __CFCSetStringBuffer(theSet)) {
2375 CFAllocatorDeallocate(CFGetAllocator(theSet), __CFCSetStringBuffer(theSet));
2376 } else if (__CFCSetIsBitmap(theSet) && __CFCSetBitmapBits(theSet)) {
2377 CFAllocatorDeallocate(CFGetAllocator(theSet), __CFCSetBitmapBits(theSet));
2378 } else if (__CFCSetIsCompactBitmap(theSet) && __CFCSetCompactBitmapBits(theSet)) {
2379 CFAllocatorDeallocate(CFGetAllocator(theSet), __CFCSetCompactBitmapBits(theSet));
2380 }
2381 __CFCSetPutClassType(theSet, __kCFCharSetClassBitmap);
2382 __CFCSetPutBitmapBits(theSet, NULL);
2383 __CFCSetPutIsInverted(theSet, false);
2384 theSet->_hashValue = 0;
2385 __CFCSetPutHasHashValue(theSet, true);
2386 __CFCSetDeallocateAnnexPlane(theSet);
2387 }
2388 } else if (__CFCSetIsEmpty(theSet)) { // non inverted empty set contains all character
2389 __CFCSetPutClassType(theSet, __CFCSetClassType(theOtherSet));
2390 __CFCSetPutHasHashValue(theSet, __CFCSetHasHashValue(theOtherSet));
2391 __CFCSetPutIsInverted(theSet, __CFCSetIsInverted(theOtherSet));
2392 theSet->_hashValue = theOtherSet->_hashValue;
2393 if (__CFCSetHasNonBMPPlane(theOtherSet)) {
2394 CFMutableCharacterSetRef otherSetPlane;
2395 int idx;
2396 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
2397 if ((otherSetPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theOtherSet, idx))) {
2398 otherSetPlane = (CFMutableCharacterSetRef)CFCharacterSetCreateMutableCopy(CFGetAllocator(theSet), otherSetPlane);
2399 __CFCSetPutCharacterSetToAnnexPlane(theSet, otherSetPlane, idx);
2400 CFRelease(otherSetPlane);
2401 }
2402 }
2403 __CFCSetAnnexSetIsInverted(theSet, __CFCSetAnnexIsInverted(theOtherSet));
2404 }
2405
2406 switch (__CFCSetClassType(theOtherSet)) {
2407 case __kCFCharSetClassBuiltin:
2408 __CFCSetPutBuiltinType(theSet, __CFCSetBuiltinType(theOtherSet));
2409 break;
2410
2411 case __kCFCharSetClassRange:
2412 __CFCSetPutRangeFirstChar(theSet, __CFCSetRangeFirstChar(theOtherSet));
2413 __CFCSetPutRangeLength(theSet, __CFCSetRangeLength(theOtherSet));
2414 break;
2415
2416 case __kCFCharSetClassString:
2417 __CFCSetPutStringLength(theSet, __CFCSetStringLength(theOtherSet));
2418 if (!__CFCSetStringBuffer(theSet))
2419 __CFCSetPutStringBuffer(theSet, (UniChar *)CFAllocatorAllocate(CFGetAllocator(theSet), __kCFStringCharSetMax * sizeof(UniChar), 0));
2420 memmove(__CFCSetStringBuffer(theSet), __CFCSetStringBuffer(theOtherSet), __CFCSetStringLength(theSet) * sizeof(UniChar));
2421 break;
2422
2423 case __kCFCharSetClassBitmap:
2424 __CFCSetPutBitmapBits(theSet, (uint8_t *)CFAllocatorAllocate(CFGetAllocator(theSet), sizeof(uint8_t) * __kCFBitmapSize, 0));
2425 memmove(__CFCSetBitmapBits(theSet), __CFCSetBitmapBits(theOtherSet), __kCFBitmapSize);
2426 break;
2427
2428 case __kCFCharSetClassCompactBitmap: {
2429 const uint8_t *cBitmap = __CFCSetCompactBitmapBits(theOtherSet);
2430 uint8_t *newBitmap;
2431 uint32_t size = __CFCSetGetCompactBitmapSize(cBitmap);
2432 newBitmap = (uint8_t *)CFAllocatorAllocate(CFGetAllocator(theSet), sizeof(uint8_t) * size, 0);
2433 __CFCSetPutBitmapBits(theSet, newBitmap);
2434 memmove(newBitmap, cBitmap, size);
2435 }
2436 break;
2437
2438 default:
2439 CFAssert1(0, __kCFLogAssertion, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__); // We should never come here
2440 }
2441 } else {
2442 __CFCSetMakeBitmap(theSet);
2443 if (__CFCSetIsBitmap(theOtherSet)) {
2444 UInt32 *bitmap1 = (UInt32*)__CFCSetBitmapBits(theSet);
2445 UInt32 *bitmap2 = (UInt32*)__CFCSetBitmapBits(theOtherSet);
2446 CFIndex length = __kCFBitmapSize / sizeof(UInt32);
2447 while (length--) *bitmap1++ &= *bitmap2++;
2448 } else {
2449 UInt32 *bitmap1 = (UInt32*)__CFCSetBitmapBits(theSet);
2450 UInt32 *bitmap2;
2451 CFIndex length = __kCFBitmapSize / sizeof(UInt32);
2452 uint8_t bitmapBuffer[__kCFBitmapSize];
2453 __CFCSetGetBitmap(theOtherSet, bitmapBuffer);
2454 bitmap2 = (UInt32*)bitmapBuffer;
2455 while (length--) *bitmap1++ &= *bitmap2++;
2456 }
2457 __CFCSetPutHasHashValue(theSet, false);
2458 if (__CFCSetHasNonBMPPlane(theOtherSet)) {
2459 CFMutableCharacterSetRef annexPlane;
2460 CFMutableCharacterSetRef otherSetPlane;
2461 int idx;
2462 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
2463 if ((otherSetPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theOtherSet, idx))) {
2464 annexPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(theSet, idx);
2465 CFCharacterSetIntersect(annexPlane, otherSetPlane);
2466 if (__CFCSetIsEmpty(annexPlane) && !__CFCSetIsInverted(annexPlane)) __CFCSetPutCharacterSetToAnnexPlane(theSet, NULL, idx);
2467 } else if (__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, idx)) {
2468 __CFCSetPutCharacterSetToAnnexPlane(theSet, NULL, idx);
2469 }
2470 }
2471 if (!__CFCSetHasNonBMPPlane(theSet)) __CFCSetDeallocateAnnexPlane(theSet);
2472 } else if (__CFCSetIsBuiltin(theOtherSet)) {
2473 CFMutableCharacterSetRef annexPlane;
2474 uint8_t bitmapBuffer[__kCFBitmapSize];
2475 uint8_t result;
2476 int planeIndex;
2477 Boolean isOtherAnnexPlaneInverted = __CFCSetAnnexIsInverted(theOtherSet);
2478 UInt32 *bitmap1;
2479 UInt32 *bitmap2;
2480 CFIndex length;
2481
2482 for (planeIndex = 1;planeIndex <= MAX_ANNEX_PLANE;planeIndex++) {
2483 annexPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, planeIndex);
2484 if (annexPlane) {
2485 result = CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(theOtherSet), planeIndex, bitmapBuffer, (isOtherAnnexPlaneInverted != 0));
2486 if (result == kCFUniCharBitmapEmpty) {
2487 __CFCSetPutCharacterSetToAnnexPlane(theSet, NULL, planeIndex);
2488 } else if (result == kCFUniCharBitmapFilled) {
2489 Boolean isEmpty = true;
2490
2491 __CFCSetMakeBitmap(annexPlane);
2492 bitmap1 = (UInt32 *)__CFCSetBitmapBits(annexPlane);
2493 length = __kCFBitmapSize / sizeof(UInt32);
2494 bitmap2 = (UInt32*)bitmapBuffer;
2495
2496 while (length--) {
2497 if ((*bitmap1++ &= *bitmap2++)) isEmpty = false;
2498 }
2499 if (isEmpty) __CFCSetPutCharacterSetToAnnexPlane(theSet, NULL, planeIndex);
2500 }
2501 }
2502 }
2503 if (!__CFCSetHasNonBMPPlane(theSet)) __CFCSetDeallocateAnnexPlane(theSet);
2504 } else if (__CFCSetIsRange(theOtherSet)) {
2505 CFMutableCharacterSetRef tempOtherSet = CFCharacterSetCreateMutable(CFGetAllocator(theSet));
2506 CFMutableCharacterSetRef annexPlane;
2507 CFMutableCharacterSetRef otherSetPlane;
2508 int idx;
2509
2510 __CFCSetAddNonBMPPlanesInRange(tempOtherSet, CFRangeMake(__CFCSetRangeFirstChar(theOtherSet), __CFCSetRangeLength(theOtherSet)));
2511
2512 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
2513 if ((otherSetPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(tempOtherSet, idx))) {
2514 annexPlane = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSet(theSet, idx);
2515 CFCharacterSetIntersect(annexPlane, otherSetPlane);
2516 if (__CFCSetIsEmpty(annexPlane) && !__CFCSetIsInverted(annexPlane)) __CFCSetPutCharacterSetToAnnexPlane(theSet, NULL, idx);
2517 } else if (__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, idx)) {
2518 __CFCSetPutCharacterSetToAnnexPlane(theSet, NULL, idx);
2519 }
2520 }
2521 if (!__CFCSetHasNonBMPPlane(theSet)) __CFCSetDeallocateAnnexPlane(theSet);
2522 CFRelease(tempOtherSet);
2523 } else if (__CFCSetHasNonBMPPlane(theSet)) {
2524 __CFCSetDeallocateAnnexPlane(theSet);
2525 }
2526 }
2527 if (__CFCheckForExapendedSet) __CFCheckForExpandedSet(theSet);
2528 } else { // It's NSCharacterSet
2529 CFDataRef bitmapRep = CFCharacterSetCreateBitmapRepresentation(kCFAllocatorSystemDefault, theOtherSet);
2530 const UInt32 *bitmap2 = (bitmapRep && CFDataGetLength(bitmapRep) ? (const UInt32 *)CFDataGetBytePtr(bitmapRep) : NULL);
2531 if (bitmap2) {
2532 UInt32 *bitmap1;
2533 CFIndex length = __kCFBitmapSize / sizeof(UInt32);
2534 __CFCSetMakeBitmap(theSet);
2535 bitmap1 = (UInt32*)__CFCSetBitmapBits(theSet);
2536 while (length--) *bitmap1++ &= *bitmap2++;
2537 __CFCSetPutHasHashValue(theSet, false);
2538 }
2539 CFRelease(bitmapRep);
2540 }
2541 }
2542
2543 void CFCharacterSetInvert(CFMutableCharacterSetRef theSet) {
2544
2545 CF_OBJC_FUNCDISPATCH0(__kCFCharacterSetTypeID, void, theSet, "invert");
2546
2547 __CFCSetValidateTypeAndMutability(theSet, __PRETTY_FUNCTION__);
2548
2549 __CFCSetPutHasHashValue(theSet, false);
2550
2551 if (__CFCSetClassType(theSet) == __kCFCharSetClassBitmap) {
2552 CFIndex idx;
2553 CFIndex count = __kCFBitmapSize / sizeof(UInt32);
2554 UInt32 *bitmap = (UInt32*) __CFCSetBitmapBits(theSet);
2555
2556 if (NULL == bitmap) {
2557 bitmap = (UInt32 *)CFAllocatorAllocate(CFGetAllocator(theSet), __kCFBitmapSize, 0);
2558 __CFCSetPutBitmapBits(theSet, (uint8_t *)bitmap);
2559 for (idx = 0;idx < count;idx++) bitmap[idx] = ((UInt32)0xFFFFFFFF);
2560 } else {
2561 for (idx = 0;idx < count;idx++) bitmap[idx] = ~(bitmap[idx]);
2562 }
2563 __CFCSetAllocateAnnexForPlane(theSet, 0); // We need to alloc annex to invert
2564 } else if (__CFCSetClassType(theSet) == __kCFCharSetClassCompactBitmap) {
2565 uint8_t *bitmap = __CFCSetCompactBitmapBits(theSet);
2566 int idx;
2567 int length = 0;
2568 uint8_t value;
2569
2570 for (idx = 0;idx < __kCFCompactBitmapNumPages;idx++) {
2571 value = bitmap[idx];
2572
2573 if (value == 0) {
2574 bitmap[idx] = UINT8_MAX;
2575 } else if (value == UINT8_MAX) {
2576 bitmap[idx] = 0;
2577 } else {
2578 length += __kCFCompactBitmapPageSize;
2579 }
2580 }
2581 bitmap += __kCFCompactBitmapNumPages;
2582 for (idx = 0;idx < length;idx++) bitmap[idx] = ~(bitmap[idx]);
2583 __CFCSetAllocateAnnexForPlane(theSet, 0); // We need to alloc annex to invert
2584 } else {
2585 __CFCSetPutIsInverted(theSet, !__CFCSetIsInverted(theSet));
2586 }
2587 __CFCSetAnnexSetIsInverted(theSet, !__CFCSetAnnexIsInverted(theSet));
2588 }
2589
2590 void CFCharacterSetCompact(CFMutableCharacterSetRef theSet) {
2591 if (__CFCSetIsBitmap(theSet) && __CFCSetBitmapBits(theSet)) __CFCSetMakeCompact(theSet);
2592 if (__CFCSetHasNonBMPPlane(theSet)) {
2593 CFMutableCharacterSetRef annex;
2594 int idx;
2595
2596 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
2597 if ((annex = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, idx)) && __CFCSetIsBitmap(annex) && __CFCSetBitmapBits(annex)) {
2598 __CFCSetMakeCompact(annex);
2599 }
2600 }
2601 }
2602 }
2603
2604 void CFCharacterSetFast(CFMutableCharacterSetRef theSet) {
2605 if (__CFCSetIsCompactBitmap(theSet) && __CFCSetCompactBitmapBits(theSet)) __CFCSetMakeBitmap(theSet);
2606 if (__CFCSetHasNonBMPPlane(theSet)) {
2607 CFMutableCharacterSetRef annex;
2608 int idx;
2609
2610 for (idx = 1;idx <= MAX_ANNEX_PLANE;idx++) {
2611 if ((annex = (CFMutableCharacterSetRef)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet, idx)) && __CFCSetIsCompactBitmap(annex) && __CFCSetCompactBitmapBits(annex)) {
2612 __CFCSetMakeBitmap(annex);
2613 }
2614 }
2615 }
2616 }
2617
2618 /* Keyed-coding support
2619 */
2620 CFCharacterSetKeyedCodingType _CFCharacterSetGetKeyedCodingType(CFCharacterSetRef cset) {
2621 switch (__CFCSetClassType(cset)) {
2622 case __kCFCharSetClassBuiltin: return ((__CFCSetBuiltinType(cset) < kCFCharacterSetSymbol) ? kCFCharacterSetKeyedCodingTypeBuiltin : kCFCharacterSetKeyedCodingTypeBuiltinAndBitmap);
2623 case __kCFCharSetClassRange: return kCFCharacterSetKeyedCodingTypeRange;
2624
2625 case __kCFCharSetClassString: // We have to check if we have non-BMP here
2626 if (!__CFCSetHasNonBMPPlane(cset) && !__CFCSetAnnexIsInverted(cset)) return kCFCharacterSetKeyedCodingTypeString; // BMP only. we can archive the string
2627 /* fallthrough */
2628
2629 default:
2630 return kCFCharacterSetKeyedCodingTypeBitmap;
2631 }
2632 }
2633
2634 CFCharacterSetPredefinedSet _CFCharacterSetGetKeyedCodingBuiltinType(CFCharacterSetRef cset) { return __CFCSetBuiltinType(cset); }
2635 CFRange _CFCharacterSetGetKeyedCodingRange(CFCharacterSetRef cset) { return CFRangeMake(__CFCSetRangeFirstChar(cset), __CFCSetRangeLength(cset)); }
2636 CFStringRef _CFCharacterSetCreateKeyedCodingString(CFCharacterSetRef cset) { return CFStringCreateWithCharacters(kCFAllocatorSystemDefault, __CFCSetStringBuffer(cset), __CFCSetStringLength(cset)); }
2637
2638 bool _CFCharacterSetIsInverted(CFCharacterSetRef cset) { return (__CFCSetIsInverted(cset) != 0); }
2639 void _CFCharacterSetSetIsInverted(CFCharacterSetRef cset, bool flag) { __CFCSetPutIsInverted((CFMutableCharacterSetRef)cset, flag); }
2640
2641 /* Inline buffer support
2642 */
2643 void CFCharacterSetInitInlineBuffer(CFCharacterSetRef cset, CFCharacterSetInlineBuffer *buffer) {
2644 memset(buffer, 0, sizeof(CFCharacterSetInlineBuffer));
2645 buffer->cset = cset;
2646 buffer->rangeLimit = 0x10000;
2647
2648 if (CF_IS_OBJC(__kCFCharacterSetTypeID, cset)) {
2649 CFCharacterSetRef expandedSet = __CFCharacterSetGetExpandedSetForNSCharacterSet(cset);
2650
2651 if (NULL == expandedSet) {
2652 buffer->flags = kCFCharacterSetNoBitmapAvailable;
2653 buffer->rangeLimit = 0x110000;
2654
2655 return;
2656 } else {
2657 cset = expandedSet;
2658 }
2659 }
2660
2661 switch (__CFCSetClassType(cset)) {
2662 case __kCFCharSetClassBuiltin:
2663 buffer->bitmap = CFUniCharGetBitmapPtrForPlane(__CFCSetBuiltinType(cset), 0);
2664 buffer->rangeLimit = 0x110000;
2665 if (NULL == buffer->bitmap) {
2666 buffer->flags = kCFCharacterSetNoBitmapAvailable;
2667 } else {
2668 if (__CFCSetIsInverted(cset)) buffer->flags = kCFCharacterSetIsInverted;
2669 }
2670 break;
2671
2672 case __kCFCharSetClassRange:
2673 buffer->rangeStart = __CFCSetRangeFirstChar(cset);
2674 buffer->rangeLimit = __CFCSetRangeFirstChar(cset) + __CFCSetRangeLength(cset);
2675 if (__CFCSetIsInverted(cset)) buffer->flags = kCFCharacterSetIsInverted;
2676 return;
2677
2678 case __kCFCharSetClassString:
2679 buffer->flags = kCFCharacterSetNoBitmapAvailable;
2680 if (__CFCSetStringLength(cset) > 0) {
2681 buffer->rangeStart = *__CFCSetStringBuffer(cset);
2682 buffer->rangeLimit = *(__CFCSetStringBuffer(cset) + __CFCSetStringLength(cset) - 1) + 1;
2683
2684 if (__CFCSetIsInverted(cset)) {
2685 if (0 == buffer->rangeStart) {
2686 buffer->rangeStart = buffer->rangeLimit;
2687 buffer->rangeLimit = 0x10000;
2688 } else if (0x10000 == buffer->rangeLimit) {
2689 buffer->rangeLimit = buffer->rangeStart;
2690 buffer->rangeStart = 0;
2691 } else {
2692 buffer->rangeStart = 0;
2693 buffer->rangeLimit = 0x10000;
2694 }
2695 }
2696 }
2697 break;
2698
2699 case __kCFCharSetClassBitmap:
2700 case __kCFCharSetClassCompactBitmap:
2701 buffer->bitmap = __CFCSetCompactBitmapBits(cset);
2702 if (NULL == buffer->bitmap) {
2703 buffer->flags = kCFCharacterSetIsCompactBitmap;
2704 if (__CFCSetIsInverted(cset)) buffer->flags |= kCFCharacterSetIsInverted;
2705 } else {
2706 if (__kCFCharSetClassCompactBitmap == __CFCSetClassType(cset)) buffer->flags = kCFCharacterSetIsCompactBitmap;
2707 }
2708 break;
2709
2710 default:
2711 CFAssert1(0, __kCFLogAssertion, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__); // We should never come here
2712 return;
2713 }
2714
2715 if (__CFCSetAnnexIsInverted(cset)) {
2716 buffer->rangeLimit = 0x110000;
2717 } else if (__CFCSetHasNonBMPPlane(cset)) {
2718 CFIndex index;
2719
2720 for (index = MAX_ANNEX_PLANE;index > 0;index--) {
2721 if (NULL != __CFCSetGetAnnexPlaneCharacterSetNoAlloc(cset, index)) {
2722 buffer->rangeLimit = (index + 1) << 16;
2723 break;
2724 }
2725 }
2726 }
2727 }