2 * Copyright (c) 2015 Apple Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
25 Copyright (c) 1999-2014, Apple Inc. All rights reserved.
26 Responsibility: Aki Inoue
29 #include <CoreFoundation/CFCharacterSet.h>
30 #include <CoreFoundation/CFByteOrder.h>
31 #include "CFCharacterSetPriv.h"
32 #include <CoreFoundation/CFData.h>
33 #include <CoreFoundation/CFString.h>
34 #include "CFInternal.h"
35 #include <CoreFoundation/CFUniChar.h>
36 #include "CFUniCharPriv.h"
41 #define BITSPERBYTE 8 /* (CHAR_BIT * sizeof(unsigned char)) */
44 #define NUMCHARACTERS 65536
46 #define MAX_ANNEX_PLANE (16)
48 /* Number of things in the array keeping the bits.
50 #define __kCFBitmapSize (NUMCHARACTERS / BITSPERBYTE)
52 /* How many elements max can be in an __kCFCharSetClassString CFCharacterSet
54 #define __kCFStringCharSetMax 64
56 /* The last builtin set ID number
58 #define __kCFLastBuiltinSetID kCFCharacterSetNewline
60 /* How many elements in the "singles" array before we use binary search.
62 #define __kCFSetBreakeven 10
64 /* This tells us, within 1k or so, whether a thing is POTENTIALLY in the set (in the bitmap blob of the private structure) before we bother to do specific checking.
66 #define __CFCSetBitsInRange(n, i) (i[n>>15] & (1L << ((n>>10) % 32)))
68 /* Compact bitmap params
70 #define __kCFCompactBitmapNumPages (256)
72 #define __kCFCompactBitmapMaxPages (128) // the max pages allocated
74 #define __kCFCompactBitmapPageSize (__kCFBitmapSize / __kCFCompactBitmapNumPages)
77 CFCharacterSetRef
*_nonBMPPlanes
;
78 unsigned int _validEntriesBitmap
;
79 unsigned char _numOfAllocEntries
;
80 unsigned char _isAnnexInverted
;
82 } CFCharSetAnnexStruct
;
84 struct __CFCharacterSet
{
86 CFHashCode _hashValue
;
106 CFCharSetAnnexStruct
*_annex
;
109 /* _base._info values interesting for CFCharacterSet
112 __kCFCharSetClassTypeMask
= 0x0070,
113 __kCFCharSetClassBuiltin
= 0x0000,
114 __kCFCharSetClassRange
= 0x0010,
115 __kCFCharSetClassString
= 0x0020,
116 __kCFCharSetClassBitmap
= 0x0030,
117 __kCFCharSetClassSet
= 0x0040,
118 __kCFCharSetClassCompactBitmap
= 0x0040,
120 __kCFCharSetIsInvertedMask
= 0x0008,
121 __kCFCharSetIsInverted
= 0x0008,
123 __kCFCharSetHasHashValueMask
= 0x00004,
124 __kCFCharSetHasHashValue
= 0x0004,
126 /* Generic CFBase values */
127 __kCFCharSetIsMutableMask
= 0x0001,
128 __kCFCharSetIsMutable
= 0x0001,
131 /* Inline accessor macros for _base._info
133 CF_INLINE Boolean
__CFCSetIsMutable(CFCharacterSetRef cset
) {return (cset
->_base
._cfinfo
[CF_INFO_BITS
] & __kCFCharSetIsMutableMask
) == __kCFCharSetIsMutable
;}
134 CF_INLINE Boolean
__CFCSetIsBuiltin(CFCharacterSetRef cset
) {return (cset
->_base
._cfinfo
[CF_INFO_BITS
] & __kCFCharSetClassTypeMask
) == __kCFCharSetClassBuiltin
;}
135 CF_INLINE Boolean
__CFCSetIsRange(CFCharacterSetRef cset
) {return (cset
->_base
._cfinfo
[CF_INFO_BITS
] & __kCFCharSetClassTypeMask
) == __kCFCharSetClassRange
;}
136 CF_INLINE Boolean
__CFCSetIsString(CFCharacterSetRef cset
) {return (cset
->_base
._cfinfo
[CF_INFO_BITS
] & __kCFCharSetClassTypeMask
) == __kCFCharSetClassString
;}
137 CF_INLINE Boolean
__CFCSetIsBitmap(CFCharacterSetRef cset
) {return (cset
->_base
._cfinfo
[CF_INFO_BITS
] & __kCFCharSetClassTypeMask
) == __kCFCharSetClassBitmap
;}
138 CF_INLINE Boolean
__CFCSetIsCompactBitmap(CFCharacterSetRef cset
) {return (cset
->_base
._cfinfo
[CF_INFO_BITS
] & __kCFCharSetClassTypeMask
) == __kCFCharSetClassCompactBitmap
;}
139 CF_INLINE Boolean
__CFCSetIsInverted(CFCharacterSetRef cset
) {return (cset
->_base
._cfinfo
[CF_INFO_BITS
] & __kCFCharSetIsInvertedMask
) == __kCFCharSetIsInverted
;}
140 CF_INLINE Boolean
__CFCSetHasHashValue(CFCharacterSetRef cset
) {return (cset
->_base
._cfinfo
[CF_INFO_BITS
] & __kCFCharSetHasHashValueMask
) == __kCFCharSetHasHashValue
;}
141 CF_INLINE UInt32
__CFCSetClassType(CFCharacterSetRef cset
) {return (cset
->_base
._cfinfo
[CF_INFO_BITS
] & __kCFCharSetClassTypeMask
);}
143 CF_INLINE
void __CFCSetPutIsMutable(CFMutableCharacterSetRef cset
, Boolean isMutable
) {(isMutable
? (cset
->_base
._cfinfo
[CF_INFO_BITS
] |= __kCFCharSetIsMutable
) : (cset
->_base
._cfinfo
[CF_INFO_BITS
] &= ~ __kCFCharSetIsMutable
));}
144 CF_INLINE
void __CFCSetPutIsInverted(CFMutableCharacterSetRef cset
, Boolean isInverted
) {(isInverted
? (cset
->_base
._cfinfo
[CF_INFO_BITS
] |= __kCFCharSetIsInverted
) : (cset
->_base
._cfinfo
[CF_INFO_BITS
] &= ~__kCFCharSetIsInverted
));}
145 CF_INLINE
void __CFCSetPutHasHashValue(CFMutableCharacterSetRef cset
, Boolean hasHash
) {(hasHash
? (cset
->_base
._cfinfo
[CF_INFO_BITS
] |= __kCFCharSetHasHashValue
) : (cset
->_base
._cfinfo
[CF_INFO_BITS
] &= ~__kCFCharSetHasHashValue
));}
146 CF_INLINE
void __CFCSetPutClassType(CFMutableCharacterSetRef cset
, UInt32 classType
) {cset
->_base
._cfinfo
[CF_INFO_BITS
] &= ~__kCFCharSetClassTypeMask
; cset
->_base
._cfinfo
[CF_INFO_BITS
] |= classType
;}
148 CF_PRIVATE Boolean
__CFCharacterSetIsMutable(CFCharacterSetRef cset
) {return __CFCSetIsMutable(cset
);}
150 /* Inline contents accessor macros
152 CF_INLINE CFCharacterSetPredefinedSet
__CFCSetBuiltinType(CFCharacterSetRef cset
) {return cset
->_variants
._builtin
._type
;}
153 CF_INLINE UInt32
__CFCSetRangeFirstChar(CFCharacterSetRef cset
) {return cset
->_variants
._range
._firstChar
;}
154 CF_INLINE CFIndex
__CFCSetRangeLength(CFCharacterSetRef cset
) {return cset
->_variants
._range
._length
;}
155 CF_INLINE UniChar
*__CFCSetStringBuffer(CFCharacterSetRef cset
) {return (UniChar
*)(cset
->_variants
._string
._buffer
);}
156 CF_INLINE CFIndex
__CFCSetStringLength(CFCharacterSetRef cset
) {return cset
->_variants
._string
._length
;}
157 CF_INLINE
uint8_t *__CFCSetBitmapBits(CFCharacterSetRef cset
) {return cset
->_variants
._bitmap
._bits
;}
158 CF_INLINE
uint8_t *__CFCSetCompactBitmapBits(CFCharacterSetRef cset
) {return cset
->_variants
._compactBitmap
._cBits
;}
160 CF_INLINE
void __CFCSetPutBuiltinType(CFMutableCharacterSetRef cset
, CFCharacterSetPredefinedSet type
) {cset
->_variants
._builtin
._type
= type
;}
161 CF_INLINE
void __CFCSetPutRangeFirstChar(CFMutableCharacterSetRef cset
, UInt32 first
) {cset
->_variants
._range
._firstChar
= first
;}
162 CF_INLINE
void __CFCSetPutRangeLength(CFMutableCharacterSetRef cset
, CFIndex length
) {cset
->_variants
._range
._length
= length
;}
163 CF_INLINE
void __CFCSetPutStringBuffer(CFMutableCharacterSetRef cset
, UniChar
*theBuffer
) {cset
->_variants
._string
._buffer
= theBuffer
;}
164 CF_INLINE
void __CFCSetPutStringLength(CFMutableCharacterSetRef cset
, CFIndex length
) {cset
->_variants
._string
._length
= length
;}
165 CF_INLINE
void __CFCSetPutBitmapBits(CFMutableCharacterSetRef cset
, uint8_t *bits
) {cset
->_variants
._bitmap
._bits
= bits
;}
166 CF_INLINE
void __CFCSetPutCompactBitmapBits(CFMutableCharacterSetRef cset
, uint8_t *bits
) {cset
->_variants
._compactBitmap
._cBits
= bits
;}
170 #if defined(CF_ENABLE_ASSERTIONS)
171 CF_INLINE
void __CFCSetValidateBuiltinType(CFCharacterSetPredefinedSet type
, const char *func
) {
172 CFAssert2(type
> 0 && type
<= __kCFLastBuiltinSetID
, __kCFLogAssertion
, "%s: Unknowen builtin type %d", func
, type
);
174 CF_INLINE
void __CFCSetValidateRange(CFRange theRange
, const char *func
) {
175 CFAssert3(theRange
.location
>= 0 && theRange
.location
+ theRange
.length
<= 0x1FFFFF, __kCFLogAssertion
, "%s: Range out of Unicode range (location -> %d length -> %d)", func
, theRange
.location
, theRange
.length
);
177 CF_INLINE
void __CFCSetValidateTypeAndMutability(CFCharacterSetRef cset
, const char *func
) {
178 __CFGenericValidateType(cset
, __kCFCharacterSetTypeID
);
179 CFAssert1(__CFCSetIsMutable(cset
), __kCFLogAssertion
, "%s: Immutable character set passed to mutable function", func
);
182 #define __CFCSetValidateBuiltinType(t,f)
183 #define __CFCSetValidateRange(r,f)
184 #define __CFCSetValidateTypeAndMutability(r,f)
187 /* Inline utility funcs
189 static Boolean
__CFCSetIsEqualBitmap(const UInt32
*bits1
, const UInt32
*bits2
) {
190 CFIndex length
= __kCFBitmapSize
/ sizeof(UInt32
);
192 if (bits1
== bits2
) {
194 } else if (bits1
&& bits2
) {
195 if (bits1
== (const UInt32
*)-1) {
196 while (length
--) if ((UInt32
)-1 != *bits2
++) return false;
197 } else if (bits2
== (const UInt32
*)-1) {
198 while (length
--) if ((UInt32
)-1 != *bits1
++) return false;
200 while (length
--) if (*bits1
++ != *bits2
++) return false;
203 } else if (!bits1
&& !bits2
) { // empty set
206 if (bits2
) bits1
= bits2
;
207 if (bits1
== (const UInt32
*)-1) return false;
208 while (length
--) if (*bits1
++) return false;
213 CF_INLINE Boolean
__CFCSetIsEqualBitmapInverted(const UInt32
*bits1
, const UInt32
*bits2
) {
214 CFIndex length
= __kCFBitmapSize
/ sizeof(UInt32
);
216 while (length
--) if (*bits1
++ != ~(*(bits2
++))) return false;
220 static Boolean
__CFCSetIsBitmapEqualToRange(const UInt32
*bits
, UniChar firstChar
, UniChar lastChar
, Boolean isInverted
) {
221 CFIndex firstCharIndex
= firstChar
>> LOG_BPB
;
222 CFIndex lastCharIndex
= lastChar
>> LOG_BPB
;
226 if (firstCharIndex
== lastCharIndex
) {
227 value
= ((((UInt32
)0xFF) << (firstChar
& (BITSPERBYTE
- 1))) & (((UInt32
)0xFF) >> ((BITSPERBYTE
- 1) - (lastChar
& (BITSPERBYTE
- 1))))) << (((sizeof(UInt32
) - 1) - (firstCharIndex
% sizeof(UInt32
))) * BITSPERBYTE
);
228 value
= CFSwapInt32HostToBig(value
);
229 firstCharIndex
= lastCharIndex
= firstChar
>> LOG_BPLW
;
230 if (*(bits
+ firstCharIndex
) != (isInverted
? ~value
: value
)) return FALSE
;
232 UInt32 firstCharMask
;
235 length
= firstCharIndex
% sizeof(UInt32
);
236 firstCharMask
= (((((UInt32
)0xFF) << (firstChar
& (BITSPERBYTE
- 1))) & 0xFF) << (((sizeof(UInt32
) - 1) - length
) * BITSPERBYTE
)) | (((UInt32
)0xFFFFFFFF) >> ((length
+ 1) * BITSPERBYTE
));
238 length
= lastCharIndex
% sizeof(UInt32
);
239 lastCharMask
= ((((UInt32
)0xFF) >> ((BITSPERBYTE
- 1) - (lastChar
& (BITSPERBYTE
- 1)))) << (((sizeof(UInt32
) - 1) - length
) * BITSPERBYTE
)) | (((UInt32
)0xFFFFFFFF) << ((sizeof(UInt32
) - length
) * BITSPERBYTE
));
241 firstCharIndex
= firstChar
>> LOG_BPLW
;
242 lastCharIndex
= lastChar
>> LOG_BPLW
;
244 if (firstCharIndex
== lastCharIndex
) {
245 firstCharMask
&= lastCharMask
;
246 value
= CFSwapInt32HostToBig(firstCharMask
& lastCharMask
);
247 if (*(bits
+ firstCharIndex
) != (isInverted
? ~value
: value
)) return FALSE
;
249 value
= CFSwapInt32HostToBig(firstCharMask
);
250 if (*(bits
+ firstCharIndex
) != (isInverted
? ~value
: value
)) return FALSE
;
252 value
= CFSwapInt32HostToBig(lastCharMask
);
253 if (*(bits
+ lastCharIndex
) != (isInverted
? ~value
: value
)) return FALSE
;
257 length
= firstCharIndex
;
258 value
= (isInverted
? ((UInt32
)0xFFFFFFFF) : 0);
260 if (*(bits
++) != value
) return FALSE
;
263 ++bits
; // Skip firstCharIndex
264 length
= (lastCharIndex
- (firstCharIndex
+ 1));
265 value
= (isInverted
? 0 : ((UInt32
)0xFFFFFFFF));
266 while (length
-- > 0) {
267 if (*(bits
++) != value
) return FALSE
;
269 if (firstCharIndex
!= lastCharIndex
) ++bits
;
271 length
= (0xFFFF >> LOG_BPLW
) - lastCharIndex
;
272 value
= (isInverted
? ((UInt32
)0xFFFFFFFF) : 0);
274 if (*(bits
++) != value
) return FALSE
;
280 CF_INLINE Boolean
__CFCSetIsBitmapSupersetOfBitmap(const UInt32
*bits1
, const UInt32
*bits2
, Boolean isInverted1
, Boolean isInverted2
) {
281 CFIndex length
= __kCFBitmapSize
/ sizeof(UInt32
);
285 val2
= (isInverted2
? ~(*(bits2
++)) : *(bits2
++));
286 val1
= (isInverted1
? ~(*(bits1
++)) : *(bits1
++)) & val2
;
287 if (val1
!= val2
) return false;
293 CF_INLINE Boolean
__CFCSetHasNonBMPPlane(CFCharacterSetRef cset
) { return ((cset
)->_annex
&& (cset
)->_annex
->_validEntriesBitmap
? true : false); }
294 CF_INLINE Boolean
__CFCSetAnnexIsInverted (CFCharacterSetRef cset
) { return ((cset
)->_annex
&& (cset
)->_annex
->_isAnnexInverted
? true : false); }
295 CF_INLINE UInt32
__CFCSetAnnexValidEntriesBitmap(CFCharacterSetRef cset
) { return ((cset
)->_annex
? (cset
)->_annex
->_validEntriesBitmap
: 0); }
297 CF_INLINE Boolean
__CFCSetIsEmpty(CFCharacterSetRef cset
) {
298 if (__CFCSetHasNonBMPPlane(cset
) || __CFCSetAnnexIsInverted(cset
)) return false;
300 switch (__CFCSetClassType(cset
)) {
301 case __kCFCharSetClassRange
: if (!__CFCSetRangeLength(cset
)) return true; break;
302 case __kCFCharSetClassString
: if (!__CFCSetStringLength(cset
)) return true; break;
303 case __kCFCharSetClassBitmap
: if (!__CFCSetBitmapBits(cset
)) return true; break;
304 case __kCFCharSetClassCompactBitmap
: if (!__CFCSetCompactBitmapBits(cset
)) return true; break;
309 CF_INLINE
void __CFCSetBitmapAddCharacter(uint8_t *bitmap
, UniChar theChar
) {
310 bitmap
[(theChar
) >> LOG_BPB
] |= (((unsigned)1) << (theChar
& (BITSPERBYTE
- 1)));
313 CF_INLINE
void __CFCSetBitmapRemoveCharacter(uint8_t *bitmap
, UniChar theChar
) {
314 bitmap
[(theChar
) >> LOG_BPB
] &= ~(((unsigned)1) << (theChar
& (BITSPERBYTE
- 1)));
317 CF_INLINE Boolean
__CFCSetIsMemberBitmap(const uint8_t *bitmap
, UniChar theChar
) {
318 return ((bitmap
[(theChar
) >> LOG_BPB
] & (((unsigned)1) << (theChar
& (BITSPERBYTE
- 1)))) ? true : false);
321 #define NUM_32BIT_SLOTS (NUMCHARACTERS / 32)
323 CF_INLINE
void __CFCSetBitmapFastFillWithValue(UInt32
*bitmap
, uint8_t value
) {
324 UInt32 mask
= (value
<< 24) | (value
<< 16) | (value
<< 8) | value
;
325 UInt32 numSlots
= NUMCHARACTERS
/ 32;
327 while (numSlots
--) *(bitmap
++) = mask
;
330 CF_INLINE
void __CFCSetBitmapAddCharactersInRange(uint8_t *bitmap
, UniChar firstChar
, UniChar lastChar
) {
331 if (firstChar
== lastChar
) {
332 bitmap
[firstChar
>> LOG_BPB
] |= (((unsigned)1) << (firstChar
& (BITSPERBYTE
- 1)));
334 UInt32 idx
= firstChar
>> LOG_BPB
;
335 UInt32 max
= lastChar
>> LOG_BPB
;
338 bitmap
[idx
] |= (((unsigned)0xFF) << (firstChar
& (BITSPERBYTE
- 1))) & (((unsigned)0xFF) >> ((BITSPERBYTE
- 1) - (lastChar
& (BITSPERBYTE
- 1))));
340 bitmap
[idx
] |= (((unsigned)0xFF) << (firstChar
& (BITSPERBYTE
- 1)));
341 bitmap
[max
] |= (((unsigned)0xFF) >> ((BITSPERBYTE
- 1) - (lastChar
& (BITSPERBYTE
- 1))));
344 while (idx
< max
) bitmap
[idx
++] = 0xFF;
349 CF_INLINE
void __CFCSetBitmapRemoveCharactersInRange(uint8_t *bitmap
, UniChar firstChar
, UniChar lastChar
) {
350 UInt32 idx
= firstChar
>> LOG_BPB
;
351 UInt32 max
= lastChar
>> LOG_BPB
;
354 bitmap
[idx
] &= ~((((unsigned)0xFF) << (firstChar
& (BITSPERBYTE
- 1))) & (((unsigned)0xFF) >> ((BITSPERBYTE
- 1) - (lastChar
& (BITSPERBYTE
- 1)))));
356 bitmap
[idx
] &= ~(((unsigned)0xFF) << (firstChar
& (BITSPERBYTE
- 1)));
357 bitmap
[max
] &= ~(((unsigned)0xFF) >> ((BITSPERBYTE
- 1) - (lastChar
& (BITSPERBYTE
- 1))));
360 while (idx
< max
) bitmap
[idx
++] = 0;
364 #define __CFCSetAnnexBitmapSetPlane(bitmap,plane) ((bitmap) |= (1 << (plane)))
365 #define __CFCSetAnnexBitmapClearPlane(bitmap,plane) ((bitmap) &= (~(1 << (plane))))
366 #define __CFCSetAnnexBitmapGetPlane(bitmap,plane) ((bitmap) & (1 << (plane)))
368 CF_INLINE
void __CFCSetAllocateAnnexForPlane(CFCharacterSetRef cset
, int plane
) {
369 if (cset
->_annex
== NULL
) {
370 ((CFMutableCharacterSetRef
)cset
)->_annex
= (CFCharSetAnnexStruct
*)CFAllocatorAllocate(CFGetAllocator(cset
), sizeof(CFCharSetAnnexStruct
), 0);
371 cset
->_annex
->_numOfAllocEntries
= plane
;
372 cset
->_annex
->_isAnnexInverted
= false;
373 cset
->_annex
->_validEntriesBitmap
= 0;
374 cset
->_annex
->_nonBMPPlanes
= ((plane
> 0) ? (CFCharacterSetRef
*)CFAllocatorAllocate(CFGetAllocator(cset
), sizeof(CFCharacterSetRef
) * plane
, 0) : NULL
);
375 } else if (cset
->_annex
->_numOfAllocEntries
< plane
) {
376 cset
->_annex
->_numOfAllocEntries
= plane
;
377 if (NULL
== cset
->_annex
->_nonBMPPlanes
) {
378 cset
->_annex
->_nonBMPPlanes
= (CFCharacterSetRef
*)CFAllocatorAllocate(CFGetAllocator(cset
), sizeof(CFCharacterSetRef
) * plane
, 0);
380 cset
->_annex
->_nonBMPPlanes
= (CFCharacterSetRef
*)CFAllocatorReallocate(CFGetAllocator(cset
), (void *)cset
->_annex
->_nonBMPPlanes
, sizeof(CFCharacterSetRef
) * plane
, 0);
385 CF_INLINE
void __CFCSetAnnexSetIsInverted(CFCharacterSetRef cset
, Boolean flag
) {
386 if (flag
) __CFCSetAllocateAnnexForPlane(cset
, 0);
387 if (cset
->_annex
) ((CFMutableCharacterSetRef
)cset
)->_annex
->_isAnnexInverted
= flag
;
390 CF_INLINE
void __CFCSetPutCharacterSetToAnnexPlane(CFCharacterSetRef cset
, CFCharacterSetRef annexCSet
, int plane
) {
391 __CFCSetAllocateAnnexForPlane(cset
, plane
);
392 if (__CFCSetAnnexBitmapGetPlane(cset
->_annex
->_validEntriesBitmap
, plane
)) CFRelease(cset
->_annex
->_nonBMPPlanes
[plane
- 1]);
394 cset
->_annex
->_nonBMPPlanes
[plane
- 1] = (CFCharacterSetRef
)CFRetain(annexCSet
);
395 __CFCSetAnnexBitmapSetPlane(cset
->_annex
->_validEntriesBitmap
, plane
);
397 __CFCSetAnnexBitmapClearPlane(cset
->_annex
->_validEntriesBitmap
, plane
);
401 CF_INLINE CFCharacterSetRef
__CFCSetGetAnnexPlaneCharacterSet(CFCharacterSetRef cset
, int plane
) {
402 __CFCSetAllocateAnnexForPlane(cset
, plane
);
403 if (!__CFCSetAnnexBitmapGetPlane(cset
->_annex
->_validEntriesBitmap
, plane
)) {
404 cset
->_annex
->_nonBMPPlanes
[plane
- 1] = (CFCharacterSetRef
)CFCharacterSetCreateMutable(CFGetAllocator(cset
));
405 __CFCSetAnnexBitmapSetPlane(cset
->_annex
->_validEntriesBitmap
, plane
);
407 return cset
->_annex
->_nonBMPPlanes
[plane
- 1];
410 CF_INLINE CFCharacterSetRef
__CFCSetGetAnnexPlaneCharacterSetNoAlloc(CFCharacterSetRef cset
, int plane
) {
411 return (cset
->_annex
&& __CFCSetAnnexBitmapGetPlane(cset
->_annex
->_validEntriesBitmap
, plane
) ? cset
->_annex
->_nonBMPPlanes
[plane
- 1] : NULL
);
414 CF_INLINE
void __CFCSetDeallocateAnnexPlane(CFCharacterSetRef cset
) {
418 for (idx
= 0;idx
< MAX_ANNEX_PLANE
;idx
++) {
419 if (__CFCSetAnnexBitmapGetPlane(cset
->_annex
->_validEntriesBitmap
, idx
+ 1)) {
420 CFRelease(cset
->_annex
->_nonBMPPlanes
[idx
]);
423 CFAllocatorDeallocate(CFGetAllocator(cset
), cset
->_annex
->_nonBMPPlanes
);
424 CFAllocatorDeallocate(CFGetAllocator(cset
), cset
->_annex
);
425 ((CFMutableCharacterSetRef
)cset
)->_annex
= NULL
;
429 CF_INLINE
uint8_t __CFCSetGetHeaderValue(const uint8_t *bitmap
, int *numPages
) {
430 uint8_t value
= *bitmap
;
432 if ((value
== 0) || (value
== UINT8_MAX
)) {
433 int numBytes
= __kCFCompactBitmapPageSize
- 1;
435 while (numBytes
> 0) {
436 if (*(++bitmap
) != value
) break;
439 if (numBytes
== 0) return value
;
441 return (uint8_t)(++(*numPages
));
444 CF_INLINE
bool __CFCSetIsMemberInCompactBitmap(const uint8_t *compactBitmap
, UTF16Char character
) {
445 uint8_t value
= compactBitmap
[(character
>> 8)]; // Assuming __kCFCompactBitmapNumPages == 256
449 } else if (value
== UINT8_MAX
) {
452 compactBitmap
+= (__kCFCompactBitmapNumPages
+ (__kCFCompactBitmapPageSize
* (value
- 1)));
453 character
&= 0xFF; // Assuming __kCFCompactBitmapNumPages == 256
454 return ((compactBitmap
[(character
/ BITSPERBYTE
)] & (1 << (character
% BITSPERBYTE
))) ? true : false);
458 CF_INLINE
uint32_t __CFCSetGetCompactBitmapSize(const uint8_t *compactBitmap
) {
459 uint32_t length
= __kCFCompactBitmapNumPages
;
460 uint32_t size
= __kCFCompactBitmapNumPages
;
463 while (length
-- > 0) {
464 value
= *(compactBitmap
++);
465 if ((value
!= 0) && (value
!= UINT8_MAX
)) size
+= __kCFCompactBitmapPageSize
;
470 CF_INLINE
void __CFExpandCompactBitmap(const uint8_t *src
, uint8_t *dst
) {
471 const uint8_t *srcBody
= src
+ __kCFCompactBitmapNumPages
;
475 for (i
= 0;i
< __kCFCompactBitmapNumPages
;i
++) {
477 if ((value
== 0) || (value
== UINT8_MAX
)) {
478 memset(dst
, value
, __kCFCompactBitmapPageSize
);
480 memmove(dst
, srcBody
, __kCFCompactBitmapPageSize
);
481 srcBody
+= __kCFCompactBitmapPageSize
;
483 dst
+= __kCFCompactBitmapPageSize
;
488 static void __CFCheckForExpandedSet(CFCharacterSetRef cset
) {
489 static int8_t __CFNumberOfPlanesForLogging
= -1;
490 static bool warnedOnce
= false;
492 if (0 > __CFNumberOfPlanesForLogging
) {
493 const char *envVar
= __CFgetenv("CFCharacterSetCheckForExpandedSet");
494 long value
= (envVar
? strtol_l(envVar
, NULL
, 0, NULL
) : 0);
495 __CFNumberOfPlanesForLogging
= (int8_t)(((value
> 0) && (value
<= 16)) ? value
: 0);
498 if (__CFNumberOfPlanesForLogging
) {
499 uint32_t entries
= __CFCSetAnnexValidEntriesBitmap(cset
);
503 if ((entries
& 1) && (++count
>= __CFNumberOfPlanesForLogging
)) {
505 CFLog(kCFLogLevelWarning
, CFSTR("An expanded CFMutableCharacter has been detected. Recommend to compact with CFCharacterSetCreateCopy"));
515 static void __CFCSetGetBitmap(CFCharacterSetRef cset
, uint8_t *bits
) {
517 CFIndex length
= __kCFBitmapSize
;
519 if (__CFCSetIsBitmap(cset
) && (bitmap
= __CFCSetBitmapBits(cset
))) {
520 memmove(bits
, bitmap
, __kCFBitmapSize
);
522 Boolean isInverted
= __CFCSetIsInverted(cset
);
523 uint8_t value
= (isInverted
? (uint8_t)-1 : 0);
526 while (length
--) *bitmap
++ = value
; // Initialize the buffer
528 if (!__CFCSetIsEmpty(cset
)) {
529 switch (__CFCSetClassType(cset
)) {
530 case __kCFCharSetClassBuiltin
: {
531 UInt8 result
= CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(cset
), 0, bits
, (isInverted
!= 0));
532 if (result
== kCFUniCharBitmapEmpty
&& isInverted
) {
533 length
= __kCFBitmapSize
;
535 while (length
--) *bitmap
++ = 0;
536 } else if (result
== kCFUniCharBitmapAll
&& !isInverted
) {
537 length
= __kCFBitmapSize
;
539 while (length
--) *bitmap
++ = (UInt8
)0xFF;
544 case __kCFCharSetClassRange
: {
545 UInt32 theChar
= __CFCSetRangeFirstChar(cset
);
546 if (theChar
< NUMCHARACTERS
) { // the range starts in BMP
547 length
= __CFCSetRangeLength(cset
);
548 if (theChar
+ length
>= NUMCHARACTERS
) length
= NUMCHARACTERS
- theChar
;
550 __CFCSetBitmapRemoveCharactersInRange(bits
, theChar
, (UniChar
)(theChar
+ length
) - 1);
552 __CFCSetBitmapAddCharactersInRange(bits
, theChar
, (UniChar
)(theChar
+ length
) - 1);
558 case __kCFCharSetClassString
: {
559 const UniChar
*buffer
= __CFCSetStringBuffer(cset
);
560 length
= __CFCSetStringLength(cset
);
561 while (length
--) (isInverted
? __CFCSetBitmapRemoveCharacter(bits
, *buffer
++) : __CFCSetBitmapAddCharacter(bits
, *buffer
++));
565 case __kCFCharSetClassCompactBitmap
:
566 __CFExpandCompactBitmap(__CFCSetCompactBitmapBits(cset
), bits
);
573 static Boolean
__CFCharacterSetEqual(CFTypeRef cf1
, CFTypeRef cf2
);
575 static Boolean
__CFCSetIsEqualAnnex(CFCharacterSetRef cf1
, CFCharacterSetRef cf2
) {
576 CFCharacterSetRef subSet1
;
577 CFCharacterSetRef subSet2
;
578 Boolean isAnnexInvertStateIdentical
= (__CFCSetAnnexIsInverted(cf1
) == __CFCSetAnnexIsInverted(cf2
) ? true: false);
581 if (isAnnexInvertStateIdentical
) {
582 if (__CFCSetAnnexValidEntriesBitmap(cf1
) != __CFCSetAnnexValidEntriesBitmap(cf2
)) return false;
583 for (idx
= 1;idx
<= MAX_ANNEX_PLANE
;idx
++) {
584 subSet1
= __CFCSetGetAnnexPlaneCharacterSetNoAlloc(cf1
, idx
);
585 subSet2
= __CFCSetGetAnnexPlaneCharacterSetNoAlloc(cf2
, idx
);
587 if (subSet1
&& !__CFCharacterSetEqual(subSet1
, subSet2
)) return false;
590 uint8_t bitsBuf
[__kCFBitmapSize
];
591 uint8_t bitsBuf2
[__kCFBitmapSize
];
593 for (idx
= 1;idx
<= MAX_ANNEX_PLANE
;idx
++) {
594 subSet1
= __CFCSetGetAnnexPlaneCharacterSetNoAlloc(cf1
, idx
);
595 subSet2
= __CFCSetGetAnnexPlaneCharacterSetNoAlloc(cf2
, idx
);
597 if (subSet1
== NULL
&& subSet2
== NULL
) {
599 } else if (subSet1
== NULL
) {
600 if (__CFCSetIsBitmap(subSet2
)) {
601 if (!__CFCSetIsEqualBitmap((const UInt32
*)__CFCSetBitmapBits(subSet2
), (const UInt32
*)-1)) {
605 __CFCSetGetBitmap(subSet2
, bitsBuf
);
606 if (!__CFCSetIsEqualBitmap((const UInt32
*)bitsBuf
, (const UInt32
*)-1)) {
610 } else if (subSet2
== NULL
) {
611 if (__CFCSetIsBitmap(subSet1
)) {
612 if (!__CFCSetIsEqualBitmap((const UInt32
*)__CFCSetBitmapBits(subSet1
), (const UInt32
*)-1)) {
616 __CFCSetGetBitmap(subSet1
, bitsBuf
);
617 if (!__CFCSetIsEqualBitmap((const UInt32
*)bitsBuf
, (const UInt32
*)-1)) {
622 Boolean isBitmap1
= __CFCSetIsBitmap(subSet1
);
623 Boolean isBitmap2
= __CFCSetIsBitmap(subSet2
);
625 if (isBitmap1
&& isBitmap2
) {
626 if (!__CFCSetIsEqualBitmapInverted((const UInt32
*)__CFCSetBitmapBits(subSet1
), (const UInt32
*)__CFCSetBitmapBits(subSet2
))) {
629 } else if (!isBitmap1
&& !isBitmap2
) {
630 __CFCSetGetBitmap(subSet1
, bitsBuf
);
631 __CFCSetGetBitmap(subSet2
, bitsBuf2
);
632 if (!__CFCSetIsEqualBitmapInverted((const UInt32
*)bitsBuf
, (const UInt32
*)bitsBuf2
)) {
637 CFCharacterSetRef tmp
= subSet2
;
641 __CFCSetGetBitmap(subSet2
, bitsBuf
);
642 if (!__CFCSetIsEqualBitmapInverted((const UInt32
*)__CFCSetBitmapBits(subSet1
), (const UInt32
*)bitsBuf
)) {
654 static uint8_t *__CFCreateCompactBitmap(CFAllocatorRef allocator
, const uint8_t *bitmap
) {
659 uint8_t header
[__kCFCompactBitmapNumPages
];
662 for (i
= 0;i
< __kCFCompactBitmapNumPages
;i
++) {
663 header
[i
] = __CFCSetGetHeaderValue(src
, &numPages
);
665 // Allocating more pages is probably not interesting enough to be compact
666 if (numPages
> __kCFCompactBitmapMaxPages
) return NULL
;
667 src
+= __kCFCompactBitmapPageSize
;
670 dst
= (uint8_t *)CFAllocatorAllocate(allocator
, __kCFCompactBitmapNumPages
+ (__kCFCompactBitmapPageSize
* numPages
), 0);
673 uint8_t *dstBody
= dst
+ __kCFCompactBitmapNumPages
;
676 for (i
= 0;i
< __kCFCompactBitmapNumPages
;i
++) {
679 if ((dst
[i
] != 0) && (dst
[i
] != UINT8_MAX
)) {
680 memmove(dstBody
, src
, __kCFCompactBitmapPageSize
);
681 dstBody
+= __kCFCompactBitmapPageSize
;
683 src
+= __kCFCompactBitmapPageSize
;
686 memmove(dst
, header
, __kCFCompactBitmapNumPages
);
692 static void __CFCSetMakeCompact(CFMutableCharacterSetRef cset
) {
693 if (__CFCSetIsBitmap(cset
) && __CFCSetBitmapBits(cset
)) {
694 uint8_t *bitmap
= __CFCSetBitmapBits(cset
);
695 uint8_t *cBitmap
= __CFCreateCompactBitmap(CFGetAllocator(cset
), bitmap
);
698 CFAllocatorDeallocate(CFGetAllocator(cset
), bitmap
);
699 __CFCSetPutClassType(cset
, __kCFCharSetClassCompactBitmap
);
700 __CFCSetPutCompactBitmapBits(cset
, cBitmap
);
705 static void __CFCSetAddNonBMPPlanesInRange(CFMutableCharacterSetRef cset
, CFRange range
) {
706 int firstChar
= (range
.location
& 0xFFFF);
707 int maxChar
= range
.location
+ range
.length
;
708 int idx
= range
.location
>> 16; // first plane
709 int maxPlane
= (maxChar
- 1) >> 16; // last plane
711 CFMutableCharacterSetRef annexPlane
;
715 for (idx
= (idx
? idx
: 1);idx
<= maxPlane
;idx
++) {
716 planeRange
.location
= __CFMax(firstChar
, 0);
717 planeRange
.length
= (idx
== maxPlane
&& maxChar
? maxChar
: 0x10000) - planeRange
.location
;
718 if (__CFCSetAnnexIsInverted(cset
)) {
719 if ((annexPlane
= (CFMutableCharacterSetRef
)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(cset
, idx
))) {
720 CFCharacterSetRemoveCharactersInRange(annexPlane
, planeRange
);
721 if (__CFCSetIsEmpty(annexPlane
) && !__CFCSetIsInverted(annexPlane
)) {
722 CFRelease(annexPlane
);
723 __CFCSetAnnexBitmapClearPlane(cset
->_annex
->_validEntriesBitmap
, idx
);
727 CFCharacterSetAddCharactersInRange((CFMutableCharacterSetRef
)__CFCSetGetAnnexPlaneCharacterSet(cset
, idx
), planeRange
);
730 if (!__CFCSetHasNonBMPPlane(cset
) && !__CFCSetAnnexIsInverted(cset
)) __CFCSetDeallocateAnnexPlane(cset
);
733 static void __CFCSetRemoveNonBMPPlanesInRange(CFMutableCharacterSetRef cset
, CFRange range
) {
734 int firstChar
= (range
.location
& 0xFFFF);
735 int maxChar
= range
.location
+ range
.length
;
736 int idx
= range
.location
>> 16; // first plane
737 int maxPlane
= (maxChar
- 1) >> 16; // last plane
739 CFMutableCharacterSetRef annexPlane
;
743 for (idx
= (idx
? idx
: 1);idx
<= maxPlane
;idx
++) {
744 planeRange
.location
= __CFMax(firstChar
, 0);
745 planeRange
.length
= (idx
== maxPlane
&& maxChar
? maxChar
: 0x10000) - planeRange
.location
;
746 if (__CFCSetAnnexIsInverted(cset
)) {
747 CFCharacterSetAddCharactersInRange((CFMutableCharacterSetRef
)__CFCSetGetAnnexPlaneCharacterSet(cset
, idx
), planeRange
);
749 if ((annexPlane
= (CFMutableCharacterSetRef
)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(cset
, idx
))) {
750 CFCharacterSetRemoveCharactersInRange(annexPlane
, planeRange
);
751 if(__CFCSetIsEmpty(annexPlane
) && !__CFCSetIsInverted(annexPlane
)) {
752 CFRelease(annexPlane
);
753 __CFCSetAnnexBitmapClearPlane(cset
->_annex
->_validEntriesBitmap
, idx
);
758 if (!__CFCSetHasNonBMPPlane(cset
) && !__CFCSetAnnexIsInverted(cset
)) __CFCSetDeallocateAnnexPlane(cset
);
761 static void __CFCSetMakeBitmap(CFMutableCharacterSetRef cset
) {
762 if (!__CFCSetIsBitmap(cset
) || !__CFCSetBitmapBits(cset
)) {
763 CFAllocatorRef allocator
= CFGetAllocator(cset
);
764 uint8_t *bitmap
= (uint8_t *)CFAllocatorAllocate(allocator
, __kCFBitmapSize
, 0);
765 __CFCSetGetBitmap(cset
, bitmap
);
767 if (__CFCSetIsBuiltin(cset
)) {
768 CFIndex numPlanes
= CFUniCharGetNumberOfPlanes(__CFCSetBuiltinType(cset
));
771 CFMutableCharacterSetRef annexSet
;
772 uint8_t *annexBitmap
= NULL
;
776 __CFCSetAllocateAnnexForPlane(cset
, numPlanes
- 1);
777 for (idx
= 1;idx
< numPlanes
;idx
++) {
778 if (NULL
== annexBitmap
) {
779 annexBitmap
= (uint8_t *)CFAllocatorAllocate(allocator
, __kCFBitmapSize
, 0);
781 result
= CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(cset
), idx
, annexBitmap
, false);
782 if (result
== kCFUniCharBitmapEmpty
) continue;
783 if (result
== kCFUniCharBitmapAll
) {
784 CFIndex bitmapLength
= __kCFBitmapSize
;
785 uint8_t *bytes
= annexBitmap
;
786 while (bitmapLength
-- > 0) *(bytes
++) = (uint8_t)0xFF;
788 annexSet
= (CFMutableCharacterSetRef
)__CFCSetGetAnnexPlaneCharacterSet(cset
, idx
);
789 __CFCSetPutClassType(annexSet
, __kCFCharSetClassBitmap
);
790 __CFCSetPutBitmapBits(annexSet
, annexBitmap
);
791 __CFCSetPutIsInverted(annexSet
, false);
792 __CFCSetPutHasHashValue(annexSet
, false);
795 if (annexBitmap
) CFAllocatorDeallocate(allocator
, annexBitmap
);
797 } else if (__CFCSetIsCompactBitmap(cset
) && __CFCSetCompactBitmapBits(cset
)) {
798 CFAllocatorDeallocate(allocator
, __CFCSetCompactBitmapBits(cset
));
799 __CFCSetPutCompactBitmapBits(cset
, NULL
);
800 } else if (__CFCSetIsString(cset
) && __CFCSetStringBuffer(cset
)) {
801 CFAllocatorDeallocate(allocator
, __CFCSetStringBuffer(cset
));
802 __CFCSetPutStringBuffer(cset
, NULL
);
803 } else if (__CFCSetIsRange(cset
)) { // We may have to allocate annex here
804 Boolean needsToInvert
= (!__CFCSetHasNonBMPPlane(cset
) && __CFCSetIsInverted(cset
) ? true : false);
805 __CFCSetAddNonBMPPlanesInRange(cset
, CFRangeMake(__CFCSetRangeFirstChar(cset
), __CFCSetRangeLength(cset
)));
806 if (needsToInvert
) __CFCSetAnnexSetIsInverted(cset
, true);
808 __CFCSetPutClassType(cset
, __kCFCharSetClassBitmap
);
809 __CFCSetPutBitmapBits(cset
, bitmap
);
810 __CFCSetPutIsInverted(cset
, false);
814 CF_INLINE CFMutableCharacterSetRef
__CFCSetGenericCreate(CFAllocatorRef allocator
, UInt32 flags
) {
815 CFMutableCharacterSetRef cset
;
816 CFIndex size
= sizeof(struct __CFCharacterSet
) - sizeof(CFRuntimeBase
);
818 cset
= (CFMutableCharacterSetRef
)_CFRuntimeCreateInstance(allocator
, CFCharacterSetGetTypeID(), size
, NULL
);
819 if (NULL
== cset
) return NULL
;
821 cset
->_base
._cfinfo
[CF_INFO_BITS
] |= flags
;
822 cset
->_hashValue
= 0;
828 static void __CFApplySurrogatesInString(CFMutableCharacterSetRef cset
, CFStringRef string
, void (*applyer
)(CFMutableCharacterSetRef
, CFRange
)) {
829 CFStringInlineBuffer buffer
;
830 CFIndex index
, length
= CFStringGetLength(string
);
831 CFRange range
= CFRangeMake(0, 0);
834 CFStringInitInlineBuffer(string
, &buffer
, CFRangeMake(0, length
));
836 for (index
= 0;index
< length
;index
++) {
837 character
= __CFStringGetCharacterFromInlineBufferQuick(&buffer
, index
);
839 if (CFStringIsSurrogateHighCharacter(character
) && ((index
+ 1) < length
)) {
840 UTF16Char other
= __CFStringGetCharacterFromInlineBufferQuick(&buffer
, index
+ 1);
842 if (CFStringIsSurrogateLowCharacter(other
)) {
843 character
= CFStringGetLongCharacterForSurrogatePair(character
, other
);
845 if ((range
.length
+ range
.location
) == character
) {
848 if (range
.length
> 0) applyer(cset
, range
);
849 range
.location
= character
;
854 ++index
; // skip the low surrogate
858 if (range
.length
> 0) applyer(cset
, range
);
862 /* Bsearch theChar for __kCFCharSetClassString
864 CF_INLINE Boolean
__CFCSetBsearchUniChar(const UniChar
*theTable
, CFIndex length
, UniChar theChar
) {
865 const UniChar
*p
, *q
, *divider
;
867 if ((theChar
< theTable
[0]) || (theChar
> theTable
[length
- 1])) return false;
870 q
= p
+ (length
- 1);
872 divider
= p
+ ((q
- p
) >> 1); /* divide by 2 */
873 if (theChar
< *divider
) q
= divider
- 1;
874 else if (theChar
> *divider
) p
= divider
+ 1;
880 /* Array of instantiated builtin set. Note builtin set ID starts with 1 so the array index is ID - 1
882 static CFCharacterSetRef
*__CFBuiltinSets
= NULL
;
884 /* Global lock for character set
886 static OSSpinLock __CFCharacterSetLock
= OS_SPINLOCK_INIT
;
888 /* CFBase API functions
890 static Boolean
__CFCharacterSetEqual(CFTypeRef cf1
, CFTypeRef cf2
) {
891 Boolean isInvertStateIdentical
= (__CFCSetIsInverted((CFCharacterSetRef
)cf1
) == __CFCSetIsInverted((CFCharacterSetRef
)cf2
) ? true: false);
892 Boolean isAnnexInvertStateIdentical
= (__CFCSetAnnexIsInverted((CFCharacterSetRef
)cf1
) == __CFCSetAnnexIsInverted((CFCharacterSetRef
)cf2
) ? true: false);
894 CFCharacterSetRef subSet1
;
895 uint8_t bitsBuf
[__kCFBitmapSize
];
900 if (__CFCSetHasHashValue((CFCharacterSetRef
)cf1
) && __CFCSetHasHashValue((CFCharacterSetRef
)cf2
) && ((CFCharacterSetRef
)cf1
)->_hashValue
!= ((CFCharacterSetRef
)cf2
)->_hashValue
) return false;
901 if (__CFCSetIsEmpty((CFCharacterSetRef
)cf1
) && __CFCSetIsEmpty((CFCharacterSetRef
)cf2
) && !isInvertStateIdentical
) return false;
903 if ((__CFCSetClassType((CFCharacterSetRef
)cf1
) == __CFCSetClassType((CFCharacterSetRef
)cf2
)) && !__CFCSetIsCompactBitmap((CFCharacterSetRef
)cf1
)) { // Types are identical, we can do it fast
904 switch (__CFCSetClassType((CFCharacterSetRef
)cf1
)) {
905 case __kCFCharSetClassBuiltin
:
906 return (__CFCSetBuiltinType((CFCharacterSetRef
)cf1
) == __CFCSetBuiltinType((CFCharacterSetRef
)cf2
) && isInvertStateIdentical
? true : false);
908 case __kCFCharSetClassRange
:
909 return (__CFCSetRangeFirstChar((CFCharacterSetRef
)cf1
) == __CFCSetRangeFirstChar((CFCharacterSetRef
)cf2
) && __CFCSetRangeLength((CFCharacterSetRef
)cf1
) && __CFCSetRangeLength((CFCharacterSetRef
)cf2
) && isInvertStateIdentical
? true : false);
911 case __kCFCharSetClassString
:
912 if (isInvertStateIdentical
) {
913 const UniChar
*buf1
= __CFCSetStringBuffer((CFCharacterSetRef
)cf1
);
914 const UniChar
*buf1End
= buf1
+ __CFCSetStringLength((CFCharacterSetRef
)cf1
);
915 const UniChar
*buf2
= __CFCSetStringBuffer((CFCharacterSetRef
)cf2
);
916 const UniChar
*buf2End
= buf2
+ __CFCSetStringLength((CFCharacterSetRef
)cf2
);
918 while ((buf1
< buf1End
) && (buf2
< buf2End
)) {
919 UniChar char1
= *buf1
;
920 UniChar char2
= *buf2
;
922 if (char1
!= char2
) return false;
924 do { ++buf1
; } while ((buf1
< buf1End
) && (char1
== *buf1
));
925 do { ++buf2
; } while ((buf2
< buf2End
) && (char2
== *buf2
));
932 case __kCFCharSetClassBitmap
:
933 if (!__CFCSetIsEqualBitmap((const UInt32
*)__CFCSetBitmapBits((CFCharacterSetRef
)cf1
), (const UInt32
*)__CFCSetBitmapBits((CFCharacterSetRef
)cf2
))) return false;
936 return __CFCSetIsEqualAnnex((CFCharacterSetRef
)cf1
, (CFCharacterSetRef
)cf2
);
939 // Check for easy empty cases
940 if (__CFCSetIsEmpty((CFCharacterSetRef
)cf1
) || __CFCSetIsEmpty((CFCharacterSetRef
)cf2
)) {
941 CFCharacterSetRef emptySet
= (__CFCSetIsEmpty((CFCharacterSetRef
)cf1
) ? (CFCharacterSetRef
)cf1
: (CFCharacterSetRef
)cf2
);
942 CFCharacterSetRef nonEmptySet
= (emptySet
== cf1
? (CFCharacterSetRef
)cf2
: (CFCharacterSetRef
)cf1
);
944 if (__CFCSetIsBuiltin(nonEmptySet
)) {
946 } else if (__CFCSetIsRange(nonEmptySet
)) {
947 if (isInvertStateIdentical
) {
948 return (__CFCSetRangeLength(nonEmptySet
) ? false : true);
950 return (__CFCSetRangeLength(nonEmptySet
) == 0x110000 ? true : false);
953 if (__CFCSetAnnexIsInverted(nonEmptySet
)) {
954 if (__CFCSetAnnexValidEntriesBitmap(nonEmptySet
) != 0x1FFFE) return false;
956 if (__CFCSetAnnexValidEntriesBitmap(nonEmptySet
)) return false;
959 if (__CFCSetIsBitmap(nonEmptySet
)) {
960 bits
= __CFCSetBitmapBits(nonEmptySet
);
963 __CFCSetGetBitmap(nonEmptySet
, bitsBuf
);
966 if (__CFCSetIsEqualBitmap(NULL
, (const UInt32
*)bits
)) {
967 if (!__CFCSetAnnexIsInverted(nonEmptySet
)) return true;
972 // Annex set has to be CFRangeMake(0x10000, 0xfffff)
973 for (idx
= 1;idx
< MAX_ANNEX_PLANE
;idx
++) {
974 if (__CFCSetIsBitmap(nonEmptySet
)) {
975 if (!__CFCSetIsEqualBitmap((__CFCSetAnnexIsInverted(nonEmptySet
) ? NULL
: (const UInt32
*)-1), (const UInt32
*)bitsBuf
)) return false;
977 __CFCSetGetBitmap(__CFCSetGetAnnexPlaneCharacterSetNoAlloc(nonEmptySet
, idx
), bitsBuf
);
978 if (!__CFCSetIsEqualBitmap((const UInt32
*)-1, (const UInt32
*)bitsBuf
)) return false;
985 if (__CFCSetIsBuiltin((CFCharacterSetRef
)cf1
) || __CFCSetIsBuiltin((CFCharacterSetRef
)cf2
)) {
986 CFCharacterSetRef builtinSet
= (__CFCSetIsBuiltin((CFCharacterSetRef
)cf1
) ? (CFCharacterSetRef
)cf1
: (CFCharacterSetRef
)cf2
);
987 CFCharacterSetRef nonBuiltinSet
= (builtinSet
== cf1
? (CFCharacterSetRef
)cf2
: (CFCharacterSetRef
)cf1
);
990 if (__CFCSetIsRange(nonBuiltinSet
)) {
991 UTF32Char firstChar
= __CFCSetRangeFirstChar(nonBuiltinSet
);
992 UTF32Char lastChar
= (firstChar
+ __CFCSetRangeLength(nonBuiltinSet
) - 1);
993 uint8_t firstPlane
= (firstChar
>> 16) & 0xFF;
994 uint8_t lastPlane
= (lastChar
>> 16) & 0xFF;
997 for (idx
= 0;idx
< MAX_ANNEX_PLANE
;idx
++) {
998 result
= CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(builtinSet
), idx
, bitsBuf
, (isInvertStateIdentical
!= 0));
1000 if (idx
< firstPlane
|| idx
> lastPlane
) {
1001 if (result
== kCFUniCharBitmapAll
) {
1003 } else if (result
== kCFUniCharBitmapFilled
) {
1004 if (!__CFCSetIsEqualBitmap(NULL
, (const UInt32
*)bitsBuf
)) return false;
1006 } else if (idx
> firstPlane
&& idx
< lastPlane
) {
1007 if (result
== kCFUniCharBitmapEmpty
) {
1009 } else if (result
== kCFUniCharBitmapFilled
) {
1010 if (!__CFCSetIsEqualBitmap((const UInt32
*)-1, (const UInt32
*)bitsBuf
)) return false;
1013 if (result
== kCFUniCharBitmapEmpty
) {
1015 } else if (result
== kCFUniCharBitmapAll
) {
1016 if (idx
== firstPlane
) {
1017 if (((firstChar
& 0xFFFF) != 0) || (firstPlane
== lastPlane
&& ((lastChar
& 0xFFFF) != 0xFFFF))) return false;
1019 if (((lastChar
& 0xFFFF) != 0xFFFF) || (firstPlane
== lastPlane
&& ((firstChar
& 0xFFFF) != 0))) return false;
1022 if (idx
== firstPlane
) {
1023 if (!__CFCSetIsBitmapEqualToRange((const UInt32
*)bitsBuf
, firstChar
& 0xFFFF, (firstPlane
== lastPlane
? lastChar
& 0xFFFF : 0xFFFF), false)) return false;
1025 if (!__CFCSetIsBitmapEqualToRange((const UInt32
*)bitsBuf
, (firstPlane
== lastPlane
? firstChar
& 0xFFFF : 0), lastChar
& 0xFFFF, false)) return false;
1032 uint8_t bitsBuf2
[__kCFBitmapSize
];
1035 result
= CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(builtinSet
), 0, bitsBuf
, (__CFCSetIsInverted(builtinSet
) != 0));
1036 if (result
== kCFUniCharBitmapFilled
) {
1037 if (__CFCSetIsBitmap(nonBuiltinSet
)) {
1038 if (!__CFCSetIsEqualBitmap((const UInt32
*)bitsBuf
, (const UInt32
*)__CFCSetBitmapBits(nonBuiltinSet
))) return false;
1041 __CFCSetGetBitmap(nonBuiltinSet
, bitsBuf2
);
1042 if (!__CFCSetIsEqualBitmap((const UInt32
*)bitsBuf
, (const UInt32
*)bitsBuf2
)) {
1047 if (__CFCSetIsBitmap(nonBuiltinSet
)) {
1048 if (!__CFCSetIsEqualBitmap((result
== kCFUniCharBitmapAll
? (const UInt32
*)-1 : NULL
), (const UInt32
*)__CFCSetBitmapBits(nonBuiltinSet
))) return false;
1050 __CFCSetGetBitmap(nonBuiltinSet
, bitsBuf
);
1051 if (!__CFCSetIsEqualBitmap((result
== kCFUniCharBitmapAll
? (const UInt32
*)-1: NULL
), (const UInt32
*)bitsBuf
)) return false;
1055 isInvertStateIdentical
= (__CFCSetIsInverted(builtinSet
) == __CFCSetAnnexIsInverted(nonBuiltinSet
) ? true : false);
1057 for (idx
= 1;idx
< MAX_ANNEX_PLANE
;idx
++) {
1058 result
= CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(builtinSet
), idx
, bitsBuf
, !isInvertStateIdentical
);
1059 subSet1
= __CFCSetGetAnnexPlaneCharacterSetNoAlloc(nonBuiltinSet
, idx
);
1061 if (result
== kCFUniCharBitmapFilled
) {
1062 if (NULL
== subSet1
) {
1064 } else if (__CFCSetIsBitmap(subSet1
)) {
1065 if (!__CFCSetIsEqualBitmap((const UInt32
*)bitsBuf
, (const UInt32
*)__CFCSetBitmapBits(subSet1
))) {
1070 __CFCSetGetBitmap(subSet1
, bitsBuf2
);
1071 if (!__CFCSetIsEqualBitmap((const UInt32
*)bitsBuf
, (const UInt32
*)bitsBuf2
)) {
1076 if (NULL
== subSet1
) {
1077 if (result
== kCFUniCharBitmapAll
) {
1080 } else if (__CFCSetIsBitmap(subSet1
)) {
1081 if (!__CFCSetIsEqualBitmap((result
== kCFUniCharBitmapAll
? (const UInt32
*)-1: NULL
), (const UInt32
*)__CFCSetBitmapBits(subSet1
))) {
1085 __CFCSetGetBitmap(subSet1
, bitsBuf
);
1086 if (!__CFCSetIsEqualBitmap((result
== kCFUniCharBitmapAll
? (const UInt32
*)-1: NULL
), (const UInt32
*)bitsBuf
)) {
1096 if (__CFCSetIsRange((CFCharacterSetRef
)cf1
) || __CFCSetIsRange((CFCharacterSetRef
)cf2
)) {
1097 CFCharacterSetRef rangeSet
= (__CFCSetIsRange((CFCharacterSetRef
)cf1
) ? (CFCharacterSetRef
)cf1
: (CFCharacterSetRef
)cf2
);
1098 CFCharacterSetRef nonRangeSet
= (rangeSet
== cf1
? (CFCharacterSetRef
)cf2
: (CFCharacterSetRef
)cf1
);
1099 UTF32Char firstChar
= __CFCSetRangeFirstChar(rangeSet
);
1100 UTF32Char lastChar
= (firstChar
+ __CFCSetRangeLength(rangeSet
) - 1);
1101 uint8_t firstPlane
= (firstChar
>> 16) & 0xFF;
1102 uint8_t lastPlane
= (lastChar
>> 16) & 0xFF;
1103 Boolean isRangeSetInverted
= __CFCSetIsInverted(rangeSet
);
1105 if (__CFCSetIsBitmap(nonRangeSet
)) {
1106 bits
= __CFCSetBitmapBits(nonRangeSet
);
1109 __CFCSetGetBitmap(nonRangeSet
, bitsBuf
);
1111 if (firstPlane
== 0) {
1112 if (!__CFCSetIsBitmapEqualToRange((const UInt32
*)bits
, firstChar
, (lastPlane
== 0 ? lastChar
: 0xFFFF), isRangeSetInverted
)) return false;
1116 if (!__CFCSetIsEqualBitmap((const UInt32
*)bits
, (isRangeSetInverted
? (const UInt32
*)-1 : NULL
))) return false;
1117 firstChar
&= 0xFFFF;
1122 isAnnexInvertStateIdentical
= (isRangeSetInverted
== __CFCSetAnnexIsInverted(nonRangeSet
) ? true : false);
1124 for (idx
= 1;idx
< MAX_ANNEX_PLANE
;idx
++) {
1125 subSet1
= __CFCSetGetAnnexPlaneCharacterSetNoAlloc(nonRangeSet
, idx
);
1126 if (NULL
== subSet1
) {
1127 if (idx
< firstPlane
|| idx
> lastPlane
) {
1128 if (!isAnnexInvertStateIdentical
) return false;
1129 } else if (idx
> firstPlane
&& idx
< lastPlane
) {
1130 if (isAnnexInvertStateIdentical
) return false;
1131 } else if (idx
== firstPlane
) {
1132 if (isAnnexInvertStateIdentical
|| firstChar
|| (idx
== lastPlane
&& lastChar
!= 0xFFFF)) return false;
1133 } else if (idx
== lastPlane
) {
1134 if (isAnnexInvertStateIdentical
|| (idx
== firstPlane
&& firstChar
) || (lastChar
!= 0xFFFF)) return false;
1137 if (__CFCSetIsBitmap(subSet1
)) {
1138 bits
= __CFCSetBitmapBits(subSet1
);
1140 __CFCSetGetBitmap(subSet1
, bitsBuf
);
1144 if (idx
< firstPlane
|| idx
> lastPlane
) {
1145 if (!__CFCSetIsEqualBitmap((const UInt32
*)bits
, (isAnnexInvertStateIdentical
? NULL
: (const UInt32
*)-1))) return false;
1146 } else if (idx
> firstPlane
&& idx
< lastPlane
) {
1147 if (!__CFCSetIsEqualBitmap((const UInt32
*)bits
, (isAnnexInvertStateIdentical
? (const UInt32
*)-1 : NULL
))) return false;
1148 } else if (idx
== firstPlane
) {
1149 if (!__CFCSetIsBitmapEqualToRange((const UInt32
*)bits
, firstChar
, (idx
== lastPlane
? lastChar
: 0xFFFF), !isAnnexInvertStateIdentical
)) return false;
1150 } else if (idx
== lastPlane
) {
1151 if (!__CFCSetIsBitmapEqualToRange((const UInt32
*)bits
, (idx
== firstPlane
? firstChar
: 0), lastChar
, !isAnnexInvertStateIdentical
)) return false;
1158 isBitmap1
= __CFCSetIsBitmap((CFCharacterSetRef
)cf1
);
1159 isBitmap2
= __CFCSetIsBitmap((CFCharacterSetRef
)cf2
);
1161 if (isBitmap1
&& isBitmap2
) {
1162 if (!__CFCSetIsEqualBitmap((const UInt32
*)__CFCSetBitmapBits((CFCharacterSetRef
)cf1
), (const UInt32
*)__CFCSetBitmapBits((CFCharacterSetRef
)cf2
))) return false;
1163 } else if (!isBitmap1
&& !isBitmap2
) {
1164 uint8_t bitsBuf2
[__kCFBitmapSize
];
1166 __CFCSetGetBitmap((CFCharacterSetRef
)cf1
, bitsBuf
);
1167 __CFCSetGetBitmap((CFCharacterSetRef
)cf2
, bitsBuf2
);
1169 if (!__CFCSetIsEqualBitmap((const UInt32
*)bitsBuf
, (const UInt32
*)bitsBuf2
)) {
1174 CFCharacterSetRef tmp
= (CFCharacterSetRef
)cf2
;
1179 __CFCSetGetBitmap((CFCharacterSetRef
)cf2
, bitsBuf
);
1181 if (!__CFCSetIsEqualBitmap((const UInt32
*)__CFCSetBitmapBits((CFCharacterSetRef
)cf1
), (const UInt32
*)bitsBuf
)) return false;
1183 return __CFCSetIsEqualAnnex((CFCharacterSetRef
)cf1
, (CFCharacterSetRef
)cf2
);
1186 static CFHashCode
__CFCharacterSetHash(CFTypeRef cf
) {
1187 if (!__CFCSetHasHashValue((CFCharacterSetRef
)cf
)) {
1188 if (__CFCSetIsEmpty((CFCharacterSetRef
)cf
)) {
1189 ((CFMutableCharacterSetRef
)cf
)->_hashValue
= (__CFCSetIsInverted((CFCharacterSetRef
)cf
) ? ((UInt32
)0xFFFFFFFF) : 0);
1190 } else if (__CFCSetIsBitmap( (CFCharacterSetRef
) cf
)) {
1191 ((CFMutableCharacterSetRef
)cf
)->_hashValue
= CFHashBytes(__CFCSetBitmapBits((CFCharacterSetRef
)cf
), __kCFBitmapSize
);
1193 uint8_t bitsBuf
[__kCFBitmapSize
];
1194 __CFCSetGetBitmap((CFCharacterSetRef
)cf
, bitsBuf
);
1195 ((CFMutableCharacterSetRef
)cf
)->_hashValue
= CFHashBytes(bitsBuf
, __kCFBitmapSize
);
1197 __CFCSetPutHasHashValue((CFMutableCharacterSetRef
)cf
, true);
1199 return ((CFCharacterSetRef
)cf
)->_hashValue
;
1202 static CFStringRef
__CFCharacterSetCopyDescription(CFTypeRef cf
) {
1203 CFMutableStringRef string
;
1207 if (__CFCSetIsEmpty((CFCharacterSetRef
)cf
)) {
1208 return (CFStringRef
)(__CFCSetIsInverted((CFCharacterSetRef
)cf
) ? CFRetain(CFSTR("<CFCharacterSet All>")) : CFRetain(CFSTR("<CFCharacterSet Empty>")));
1211 switch (__CFCSetClassType((CFCharacterSetRef
)cf
)) {
1212 case __kCFCharSetClassBuiltin
:
1213 switch (__CFCSetBuiltinType((CFCharacterSetRef
)cf
)) {
1214 case kCFCharacterSetControl
: return (CFStringRef
)CFRetain(CFSTR("<CFCharacterSet Predefined Control Set>"));
1215 case kCFCharacterSetWhitespace
: return (CFStringRef
)CFRetain(CFSTR("<CFCharacterSet Predefined Whitespace Set>"));
1216 case kCFCharacterSetWhitespaceAndNewline
: return (CFStringRef
)CFRetain(CFSTR("<CFCharacterSet Predefined WhitespaceAndNewline Set>"));
1217 case kCFCharacterSetDecimalDigit
: return (CFStringRef
)CFRetain(CFSTR("<CFCharacterSet Predefined DecimalDigit Set>"));
1218 case kCFCharacterSetLetter
: return (CFStringRef
)CFRetain(CFSTR("<CFCharacterSet Predefined Letter Set>"));
1219 case kCFCharacterSetLowercaseLetter
: return (CFStringRef
)CFRetain(CFSTR("<CFCharacterSet Predefined LowercaseLetter Set>"));
1220 case kCFCharacterSetUppercaseLetter
: return (CFStringRef
)CFRetain(CFSTR("<CFCharacterSet Predefined UppercaseLetter Set>"));
1221 case kCFCharacterSetNonBase
: return (CFStringRef
)CFRetain(CFSTR("<CFCharacterSet Predefined NonBase Set>"));
1222 case kCFCharacterSetDecomposable
: return (CFStringRef
)CFRetain(CFSTR("<CFCharacterSet Predefined Decomposable Set>"));
1223 case kCFCharacterSetAlphaNumeric
: return (CFStringRef
)CFRetain(CFSTR("<CFCharacterSet Predefined AlphaNumeric Set>"));
1224 case kCFCharacterSetPunctuation
: return (CFStringRef
)CFRetain(CFSTR("<CFCharacterSet Predefined Punctuation Set>"));
1225 case kCFCharacterSetIllegal
: return (CFStringRef
)CFRetain(CFSTR("<CFCharacterSet Predefined Illegal Set>"));
1226 case kCFCharacterSetCapitalizedLetter
: return (CFStringRef
)CFRetain(CFSTR("<CFCharacterSet Predefined CapitalizedLetter Set>"));
1227 case kCFCharacterSetSymbol
: return (CFStringRef
)CFRetain(CFSTR("<CFCharacterSet Predefined Symbol Set>"));
1228 case kCFCharacterSetNewline
: return (CFStringRef
)CFRetain(CFSTR("<CFCharacterSet Predefined Newline Set>"));
1232 case __kCFCharSetClassRange
:
1233 return CFStringCreateWithFormat(CFGetAllocator((CFCharacterSetRef
)cf
), NULL
, CFSTR("<CFCharacterSet Range(%u, %ld)>"), (unsigned int)__CFCSetRangeFirstChar((CFCharacterSetRef
)cf
), (long)__CFCSetRangeLength((CFCharacterSetRef
)cf
));
1235 case __kCFCharSetClassString
: {
1236 CFStringRef format
= CFSTR("<CFCharacterSet Items(");
1238 length
= __CFCSetStringLength((CFCharacterSetRef
)cf
);
1239 string
= CFStringCreateMutable(CFGetAllocator(cf
), CFStringGetLength(format
) + 7 * length
+ 2); // length of format + "U+XXXX "(7) * length + ")>"(2)
1240 CFStringAppend(string
, format
);
1241 for (idx
= 0;idx
< length
;idx
++) {
1242 CFStringAppendFormat(string
, NULL
, CFSTR("%sU+%04X"), (idx
> 0 ? " " : ""), (unsigned int)((__CFCSetStringBuffer((CFCharacterSetRef
)cf
))[idx
]));
1244 CFStringAppend(string
, CFSTR(")>"));
1248 case __kCFCharSetClassBitmap
:
1249 case __kCFCharSetClassCompactBitmap
:
1250 return (CFStringRef
)CFRetain(CFSTR("<CFCharacterSet Bitmap>")); // ??? Should generate description for 8k bitmap ?
1252 CFAssert1(0, __kCFLogAssertion
, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__
); // We should never come here
1256 static void __CFCharacterSetDeallocate(CFTypeRef cf
) {
1257 CFAllocatorRef allocator
= CFGetAllocator(cf
);
1259 if (__CFCSetIsBuiltin((CFCharacterSetRef
)cf
) && !__CFCSetIsMutable((CFCharacterSetRef
)cf
) && !__CFCSetIsInverted((CFCharacterSetRef
)cf
)) {
1260 CFCharacterSetRef sharedSet
= CFCharacterSetGetPredefined(__CFCSetBuiltinType((CFCharacterSetRef
)cf
));
1261 if (sharedSet
== cf
) { // We're trying to dealloc the builtin set
1262 CFAssert1(0, __kCFLogAssertion
, "%s: Trying to deallocate predefined set. The process is likely to crash.", __PRETTY_FUNCTION__
);
1263 return; // We never deallocate builtin set
1267 if (__CFCSetIsString((CFCharacterSetRef
)cf
) && __CFCSetStringBuffer((CFCharacterSetRef
)cf
)) CFAllocatorDeallocate(allocator
, __CFCSetStringBuffer((CFCharacterSetRef
)cf
));
1268 else if (__CFCSetIsBitmap((CFCharacterSetRef
)cf
) && __CFCSetBitmapBits((CFCharacterSetRef
)cf
)) CFAllocatorDeallocate(allocator
, __CFCSetBitmapBits((CFCharacterSetRef
)cf
));
1269 else if (__CFCSetIsCompactBitmap((CFCharacterSetRef
)cf
) && __CFCSetCompactBitmapBits((CFCharacterSetRef
)cf
)) CFAllocatorDeallocate(allocator
, __CFCSetCompactBitmapBits((CFCharacterSetRef
)cf
));
1270 __CFCSetDeallocateAnnexPlane((CFCharacterSetRef
)cf
);
1273 static CFTypeID __kCFCharacterSetTypeID
= _kCFRuntimeNotATypeID
;
1275 static const CFRuntimeClass __CFCharacterSetClass
= {
1280 __CFCharacterSetDeallocate
,
1281 __CFCharacterSetEqual
,
1282 __CFCharacterSetHash
,
1284 __CFCharacterSetCopyDescription
1287 static bool __CFCheckForExapendedSet
= false;
1289 CF_PRIVATE
void __CFCharacterSetInitialize(void) {
1290 static dispatch_once_t initOnce
;
1291 dispatch_once(&initOnce
, ^{
1292 __kCFCharacterSetTypeID
= _CFRuntimeRegisterClass(&__CFCharacterSetClass
); // initOnce covered
1293 const char *checkForExpandedSet
= __CFgetenv("__CF_DEBUG_EXPANDED_SET");
1294 if (checkForExpandedSet
&& (*checkForExpandedSet
== 'Y')) __CFCheckForExapendedSet
= true;
1295 __CFBuiltinSets
= (CFCharacterSetRef
*)CFAllocatorAllocate((CFAllocatorRef
)CFRetain(__CFGetDefaultAllocator()), sizeof(CFCharacterSetRef
) * __kCFLastBuiltinSetID
, 0);
1296 memset(__CFBuiltinSets
, 0, sizeof(CFCharacterSetRef
) * __kCFLastBuiltinSetID
);
1303 CFTypeID
CFCharacterSetGetTypeID(void) {
1304 return __kCFCharacterSetTypeID
;
1307 /*** CharacterSet creation ***/
1308 /* Functions to create basic immutable characterset.
1310 CFCharacterSetRef
CFCharacterSetGetPredefined(CFCharacterSetPredefinedSet theSetIdentifier
) {
1311 CFCharacterSetRef cset
;
1313 __CFCSetValidateBuiltinType(theSetIdentifier
, __PRETTY_FUNCTION__
);
1315 OSSpinLockLock(&__CFCharacterSetLock
);
1316 cset
= ((NULL
!= __CFBuiltinSets
) ? __CFBuiltinSets
[theSetIdentifier
- 1] : NULL
);
1317 OSSpinLockUnlock(&__CFCharacterSetLock
);
1319 if (NULL
!= cset
) return cset
;
1321 if (!(cset
= __CFCSetGenericCreate(kCFAllocatorSystemDefault
, __kCFCharSetClassBuiltin
))) return NULL
;
1322 __CFCSetPutBuiltinType((CFMutableCharacterSetRef
)cset
, theSetIdentifier
);
1324 OSSpinLockLock(&__CFCharacterSetLock
);
1325 __CFBuiltinSets
[theSetIdentifier
- 1] = cset
;
1326 OSSpinLockUnlock(&__CFCharacterSetLock
);
1331 CFCharacterSetRef
CFCharacterSetCreateWithCharactersInRange(CFAllocatorRef allocator
, CFRange theRange
) {
1332 CFMutableCharacterSetRef cset
;
1334 __CFCSetValidateRange(theRange
, __PRETTY_FUNCTION__
);
1336 if (theRange
.length
) {
1337 if (!(cset
= __CFCSetGenericCreate(allocator
, __kCFCharSetClassRange
))) return NULL
;
1338 __CFCSetPutRangeFirstChar(cset
, theRange
.location
);
1339 __CFCSetPutRangeLength(cset
, theRange
.length
);
1341 if (!(cset
= __CFCSetGenericCreate(allocator
, __kCFCharSetClassBitmap
))) return NULL
;
1342 __CFCSetPutBitmapBits(cset
, NULL
);
1343 __CFCSetPutHasHashValue(cset
, true); // _hashValue is 0
1349 static int chcompar(const void *a
, const void *b
) {
1350 return -(int)(*(UniChar
*)b
- *(UniChar
*)a
);
1353 CFCharacterSetRef
CFCharacterSetCreateWithCharactersInString(CFAllocatorRef allocator
, CFStringRef theString
) {
1356 length
= CFStringGetLength(theString
);
1357 if (length
< __kCFStringCharSetMax
) {
1358 CFMutableCharacterSetRef cset
;
1360 if (!(cset
= __CFCSetGenericCreate(allocator
, __kCFCharSetClassString
))) return NULL
;
1361 __CFCSetPutStringBuffer(cset
, (UniChar
*)CFAllocatorAllocate(CFGetAllocator(cset
), __kCFStringCharSetMax
* sizeof(UniChar
), 0));
1362 __CFCSetPutStringLength(cset
, length
);
1363 CFStringGetCharacters(theString
, CFRangeMake(0, length
), __CFCSetStringBuffer(cset
));
1364 qsort(__CFCSetStringBuffer(cset
), length
, sizeof(UniChar
), chcompar
);
1367 __CFCSetPutHasHashValue(cset
, true); // _hashValue is 0
1368 } else if (length
> 1) { // Check for surrogate
1369 const UTF16Char
*characters
= __CFCSetStringBuffer(cset
);
1370 const UTF16Char
*charactersLimit
= characters
+ length
;
1372 if ((*characters
< 0xDC00UL
) && (*(charactersLimit
- 1) > 0xDBFFUL
)) { // might have surrogate chars
1373 while (characters
< charactersLimit
) {
1374 if (CFStringIsSurrogateHighCharacter(*characters
) || CFStringIsSurrogateLowCharacter(*characters
)) {
1383 if (NULL
!= cset
) return cset
;
1386 CFMutableCharacterSetRef mcset
= CFCharacterSetCreateMutable(allocator
);
1387 CFCharacterSetAddCharactersInString(mcset
, theString
);
1388 __CFCSetMakeCompact(mcset
);
1389 __CFCSetPutIsMutable(mcset
, false);
1393 CFCharacterSetRef
CFCharacterSetCreateWithBitmapRepresentation(CFAllocatorRef allocator
, CFDataRef theData
) {
1394 CFMutableCharacterSetRef cset
;
1397 if (!(cset
= __CFCSetGenericCreate(allocator
, __kCFCharSetClassBitmap
))) return NULL
;
1399 if (theData
&& (length
= CFDataGetLength(theData
)) > 0) {
1403 if (length
< __kCFBitmapSize
) {
1404 bitmap
= (uint8_t *)CFAllocatorAllocate(allocator
, __kCFBitmapSize
, 0);
1405 memmove(bitmap
, CFDataGetBytePtr(theData
), length
);
1406 memset(bitmap
+ length
, 0, __kCFBitmapSize
- length
);
1408 cBitmap
= __CFCreateCompactBitmap(allocator
, bitmap
);
1410 if (cBitmap
== NULL
) {
1411 __CFCSetPutBitmapBits(cset
, bitmap
);
1413 CFAllocatorDeallocate(allocator
, bitmap
);
1414 __CFCSetPutCompactBitmapBits(cset
, cBitmap
);
1415 __CFCSetPutClassType(cset
, __kCFCharSetClassCompactBitmap
);
1418 cBitmap
= __CFCreateCompactBitmap(allocator
, CFDataGetBytePtr(theData
));
1420 if (cBitmap
== NULL
) {
1421 bitmap
= (uint8_t *)CFAllocatorAllocate(allocator
, __kCFBitmapSize
, 0);
1422 memmove(bitmap
, CFDataGetBytePtr(theData
), __kCFBitmapSize
);
1424 __CFCSetPutBitmapBits(cset
, bitmap
);
1426 __CFCSetPutCompactBitmapBits(cset
, cBitmap
);
1427 __CFCSetPutClassType(cset
, __kCFCharSetClassCompactBitmap
);
1430 if (length
> __kCFBitmapSize
) {
1431 CFMutableCharacterSetRef annexSet
;
1432 const uint8_t *bytes
= CFDataGetBytePtr(theData
) + __kCFBitmapSize
;
1434 length
-= __kCFBitmapSize
;
1436 while (length
> 1) {
1437 annexSet
= (CFMutableCharacterSetRef
)__CFCSetGetAnnexPlaneCharacterSet(cset
, *(bytes
++));
1438 --length
; // Decrement the plane no byte
1440 if (length
< __kCFBitmapSize
) {
1441 bitmap
= (uint8_t *)CFAllocatorAllocate(allocator
, __kCFBitmapSize
, 0);
1442 memmove(bitmap
, bytes
, length
);
1443 memset(bitmap
+ length
, 0, __kCFBitmapSize
- length
);
1445 cBitmap
= __CFCreateCompactBitmap(allocator
, bitmap
);
1447 if (cBitmap
== NULL
) {
1448 __CFCSetPutBitmapBits(annexSet
, bitmap
);
1450 CFAllocatorDeallocate(allocator
, bitmap
);
1451 __CFCSetPutCompactBitmapBits(annexSet
, cBitmap
);
1452 __CFCSetPutClassType(annexSet
, __kCFCharSetClassCompactBitmap
);
1455 cBitmap
= __CFCreateCompactBitmap(allocator
, bytes
);
1457 if (cBitmap
== NULL
) {
1458 bitmap
= (uint8_t *)CFAllocatorAllocate(allocator
, __kCFBitmapSize
, 0);
1459 memmove(bitmap
, bytes
, __kCFBitmapSize
);
1461 __CFCSetPutBitmapBits(annexSet
, bitmap
);
1463 __CFCSetPutCompactBitmapBits(annexSet
, cBitmap
);
1464 __CFCSetPutClassType(annexSet
, __kCFCharSetClassCompactBitmap
);
1467 length
-= __kCFBitmapSize
;
1468 bytes
+= __kCFBitmapSize
;
1473 __CFCSetPutBitmapBits(cset
, NULL
);
1474 __CFCSetPutHasHashValue(cset
, true); // Hash value is 0
1480 CFCharacterSetRef
CFCharacterSetCreateInvertedSet(CFAllocatorRef alloc
, CFCharacterSetRef theSet
) {
1481 CFMutableCharacterSetRef cset
;
1483 CF_OBJC_FUNCDISPATCHV(__kCFCharacterSetTypeID
, CFCharacterSetRef
, (NSCharacterSet
*)theSet
, invertedSet
);
1485 cset
= CFCharacterSetCreateMutableCopy(alloc
, theSet
);
1486 CFCharacterSetInvert(cset
);
1487 __CFCSetPutIsMutable(cset
, false);
1492 /* Functions to create mutable characterset.
1494 CFMutableCharacterSetRef
CFCharacterSetCreateMutable(CFAllocatorRef allocator
) {
1495 CFMutableCharacterSetRef cset
;
1497 if (!(cset
= __CFCSetGenericCreate(allocator
, __kCFCharSetClassBitmap
| __kCFCharSetIsMutable
))) return NULL
;
1498 __CFCSetPutBitmapBits(cset
, NULL
);
1499 __CFCSetPutHasHashValue(cset
, true); // Hash value is 0
1504 static CFMutableCharacterSetRef
__CFCharacterSetCreateCopy(CFAllocatorRef alloc
, CFCharacterSetRef theSet
, bool isMutable
) {
1505 CFMutableCharacterSetRef cset
;
1507 CF_OBJC_FUNCDISPATCHV(__kCFCharacterSetTypeID
, CFMutableCharacterSetRef
, (NSCharacterSet
*)theSet
, mutableCopy
);
1509 __CFGenericValidateType(theSet
, __kCFCharacterSetTypeID
);
1511 if (!isMutable
&& !__CFCSetIsMutable(theSet
)) {
1512 return (CFMutableCharacterSetRef
)CFRetain(theSet
);
1515 cset
= CFCharacterSetCreateMutable(alloc
);
1517 __CFCSetPutClassType(cset
, __CFCSetClassType(theSet
));
1518 __CFCSetPutHasHashValue(cset
, __CFCSetHasHashValue(theSet
));
1519 __CFCSetPutIsInverted(cset
, __CFCSetIsInverted(theSet
));
1520 cset
->_hashValue
= theSet
->_hashValue
;
1522 switch (__CFCSetClassType(theSet
)) {
1523 case __kCFCharSetClassBuiltin
:
1524 __CFCSetPutBuiltinType(cset
, __CFCSetBuiltinType(theSet
));
1527 case __kCFCharSetClassRange
:
1528 __CFCSetPutRangeFirstChar(cset
, __CFCSetRangeFirstChar(theSet
));
1529 __CFCSetPutRangeLength(cset
, __CFCSetRangeLength(theSet
));
1532 case __kCFCharSetClassString
:
1533 __CFCSetPutStringBuffer(cset
, (UniChar
*)CFAllocatorAllocate(alloc
, __kCFStringCharSetMax
* sizeof(UniChar
), 0));
1535 __CFCSetPutStringLength(cset
, __CFCSetStringLength(theSet
));
1536 memmove(__CFCSetStringBuffer(cset
), __CFCSetStringBuffer(theSet
), __CFCSetStringLength(theSet
) * sizeof(UniChar
));
1539 case __kCFCharSetClassBitmap
:
1540 if (__CFCSetBitmapBits(theSet
)) {
1541 uint8_t * bitmap
= (isMutable
? NULL
: __CFCreateCompactBitmap(alloc
, __CFCSetBitmapBits(theSet
)));
1543 if (bitmap
== NULL
) {
1544 bitmap
= (uint8_t *)CFAllocatorAllocate(alloc
, sizeof(uint8_t) * __kCFBitmapSize
, 0);
1545 memmove(bitmap
, __CFCSetBitmapBits(theSet
), __kCFBitmapSize
);
1546 __CFCSetPutBitmapBits(cset
, bitmap
);
1548 __CFCSetPutCompactBitmapBits(cset
, bitmap
);
1549 __CFCSetPutClassType(cset
, __kCFCharSetClassCompactBitmap
);
1552 __CFCSetPutBitmapBits(cset
, NULL
);
1556 case __kCFCharSetClassCompactBitmap
: {
1557 const uint8_t *compactBitmap
= __CFCSetCompactBitmapBits(theSet
);
1559 if (compactBitmap
) {
1560 uint32_t size
= __CFCSetGetCompactBitmapSize(compactBitmap
);
1561 uint8_t *newBitmap
= (uint8_t *)CFAllocatorAllocate(alloc
, size
, 0);
1563 memmove(newBitmap
, compactBitmap
, size
);
1564 __CFCSetPutCompactBitmapBits(cset
, newBitmap
);
1570 CFAssert1(0, __kCFLogAssertion
, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__
); // We should never come here
1572 if (__CFCSetHasNonBMPPlane(theSet
)) {
1573 CFMutableCharacterSetRef annexPlane
;
1576 for (idx
= 1;idx
<= MAX_ANNEX_PLANE
;idx
++) {
1577 if ((annexPlane
= (CFMutableCharacterSetRef
)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet
, idx
))) {
1578 annexPlane
= __CFCharacterSetCreateCopy(alloc
, annexPlane
, isMutable
);
1579 __CFCSetPutCharacterSetToAnnexPlane(cset
, annexPlane
, idx
);
1580 CFRelease(annexPlane
);
1583 __CFCSetAnnexSetIsInverted(cset
, __CFCSetAnnexIsInverted(theSet
));
1584 } else if (__CFCSetAnnexIsInverted(theSet
)) {
1585 __CFCSetAnnexSetIsInverted(cset
, true);
1591 CFCharacterSetRef
CFCharacterSetCreateCopy(CFAllocatorRef alloc
, CFCharacterSetRef theSet
) {
1592 return __CFCharacterSetCreateCopy(alloc
, theSet
, false);
1595 CFMutableCharacterSetRef
CFCharacterSetCreateMutableCopy(CFAllocatorRef alloc
, CFCharacterSetRef theSet
) {
1596 return __CFCharacterSetCreateCopy(alloc
, theSet
, true);
1599 /*** Basic accessors ***/
1600 Boolean
CFCharacterSetIsCharacterMember(CFCharacterSetRef theSet
, UniChar theChar
) {
1603 Boolean result
= false;
1605 CF_OBJC_FUNCDISPATCHV(__kCFCharacterSetTypeID
, Boolean
, (NSCharacterSet
*)theSet
, longCharacterIsMember
:(UTF32Char
)theChar
);
1607 __CFGenericValidateType(theSet
, __kCFCharacterSetTypeID
);
1609 isInverted
= __CFCSetIsInverted(theSet
);
1611 switch (__CFCSetClassType(theSet
)) {
1612 case __kCFCharSetClassBuiltin
:
1613 result
= (CFUniCharIsMemberOf(theChar
, __CFCSetBuiltinType(theSet
)) ? !isInverted
: isInverted
);
1616 case __kCFCharSetClassRange
:
1617 length
= __CFCSetRangeLength(theSet
);
1618 result
= (length
&& __CFCSetRangeFirstChar(theSet
) <= theChar
&& theChar
< __CFCSetRangeFirstChar(theSet
) + length
? !isInverted
: isInverted
);
1621 case __kCFCharSetClassString
:
1622 result
= ((length
= __CFCSetStringLength(theSet
)) ? (__CFCSetBsearchUniChar(__CFCSetStringBuffer(theSet
), length
, theChar
) ? !isInverted
: isInverted
) : isInverted
);
1625 case __kCFCharSetClassBitmap
:
1626 result
= (__CFCSetCompactBitmapBits(theSet
) ? (__CFCSetIsMemberBitmap(__CFCSetBitmapBits(theSet
), theChar
) ? true : false) : isInverted
);
1629 case __kCFCharSetClassCompactBitmap
:
1630 result
= (__CFCSetCompactBitmapBits(theSet
) ? (__CFCSetIsMemberInCompactBitmap(__CFCSetCompactBitmapBits(theSet
), theChar
) ? true : false) : isInverted
);
1634 CFAssert1(0, __kCFLogAssertion
, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__
); // We should never come here
1641 Boolean
CFCharacterSetIsLongCharacterMember(CFCharacterSetRef theSet
, UTF32Char theChar
) {
1643 UInt32 plane
= (theChar
>> 16);
1644 Boolean isAnnexInverted
= false;
1646 Boolean result
= false;
1648 CF_OBJC_FUNCDISPATCHV(__kCFCharacterSetTypeID
, Boolean
, (NSCharacterSet
*)theSet
, longCharacterIsMember
:(UTF32Char
)theChar
);
1650 __CFGenericValidateType(theSet
, __kCFCharacterSetTypeID
);
1653 CFCharacterSetRef annexPlane
;
1655 if (__CFCSetIsBuiltin(theSet
)) {
1656 isInverted
= __CFCSetIsInverted(theSet
);
1657 return (CFUniCharIsMemberOf(theChar
, __CFCSetBuiltinType(theSet
)) ? !isInverted
: isInverted
);
1660 isAnnexInverted
= __CFCSetAnnexIsInverted(theSet
);
1662 if ((annexPlane
= __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet
, plane
)) == NULL
) {
1663 if (!__CFCSetHasNonBMPPlane(theSet
) && __CFCSetIsRange(theSet
)) {
1664 isInverted
= __CFCSetIsInverted(theSet
);
1665 length
= __CFCSetRangeLength(theSet
);
1666 return (length
&& __CFCSetRangeFirstChar(theSet
) <= theChar
&& theChar
< __CFCSetRangeFirstChar(theSet
) + length
? !isInverted
: isInverted
);
1668 return (isAnnexInverted
? true : false);
1671 theSet
= annexPlane
;
1676 isInverted
= __CFCSetIsInverted(theSet
);
1678 switch (__CFCSetClassType(theSet
)) {
1679 case __kCFCharSetClassBuiltin
:
1680 result
= (CFUniCharIsMemberOf(theChar
, __CFCSetBuiltinType(theSet
)) ? !isInverted
: isInverted
);
1683 case __kCFCharSetClassRange
:
1684 length
= __CFCSetRangeLength(theSet
);
1685 result
= (length
&& __CFCSetRangeFirstChar(theSet
) <= theChar
&& theChar
< __CFCSetRangeFirstChar(theSet
) + length
? !isInverted
: isInverted
);
1688 case __kCFCharSetClassString
:
1689 result
= ((length
= __CFCSetStringLength(theSet
)) ? (__CFCSetBsearchUniChar(__CFCSetStringBuffer(theSet
), length
, theChar
) ? !isInverted
: isInverted
) : isInverted
);
1692 case __kCFCharSetClassBitmap
:
1693 result
= (__CFCSetCompactBitmapBits(theSet
) ? (__CFCSetIsMemberBitmap(__CFCSetBitmapBits(theSet
), theChar
) ? true : false) : isInverted
);
1696 case __kCFCharSetClassCompactBitmap
:
1697 result
= (__CFCSetCompactBitmapBits(theSet
) ? (__CFCSetIsMemberInCompactBitmap(__CFCSetCompactBitmapBits(theSet
), theChar
) ? true : false) : isInverted
);
1701 CFAssert1(0, __kCFLogAssertion
, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__
); // We should never come here
1702 return false; // To make compiler happy
1705 return (result
? !isAnnexInverted
: isAnnexInverted
);
1708 Boolean
CFCharacterSetIsSurrogatePairMember(CFCharacterSetRef theSet
, UniChar surrogateHigh
, UniChar surrogateLow
) {
1709 return CFCharacterSetIsLongCharacterMember(theSet
, CFCharacterSetGetLongCharacterForSurrogatePair(surrogateHigh
, surrogateLow
));
1713 static inline CFCharacterSetRef
__CFCharacterSetGetExpandedSetForNSCharacterSet(const void *characterSet
) {
1714 CF_OBJC_FUNCDISPATCHV(__kCFCharacterSetTypeID
, CFCharacterSetRef
, (NSCharacterSet
*)characterSet
, _expandedCFCharacterSet
);
1718 Boolean
CFCharacterSetIsSupersetOfSet(CFCharacterSetRef theSet
, CFCharacterSetRef theOtherSet
) {
1719 CFMutableCharacterSetRef copy
;
1720 CFCharacterSetRef expandedSet
= NULL
;
1721 CFCharacterSetRef expandedOtherSet
= NULL
;
1724 if ((!CF_IS_OBJC(__kCFCharacterSetTypeID
, theSet
) || (expandedSet
= __CFCharacterSetGetExpandedSetForNSCharacterSet(theSet
))) && (!CF_IS_OBJC(__kCFCharacterSetTypeID
, theOtherSet
) || (expandedOtherSet
= __CFCharacterSetGetExpandedSetForNSCharacterSet(theOtherSet
)))) { // Really CF, we can do some trick here
1725 if (expandedSet
) theSet
= expandedSet
;
1726 if (expandedOtherSet
) theOtherSet
= expandedOtherSet
;
1728 __CFGenericValidateType(theSet
, __kCFCharacterSetTypeID
);
1729 __CFGenericValidateType(theOtherSet
, __kCFCharacterSetTypeID
);
1731 if (__CFCSetIsEmpty(theSet
)) {
1732 if (__CFCSetIsInverted(theSet
)) {
1733 return TRUE
; // Inverted empty set covers all range
1734 } else if (!__CFCSetIsEmpty(theOtherSet
) || __CFCSetIsInverted(theOtherSet
)) {
1737 } else if (__CFCSetIsEmpty(theOtherSet
) && !__CFCSetIsInverted(theOtherSet
)) {
1740 if (__CFCSetIsBuiltin(theSet
) || __CFCSetIsBuiltin(theOtherSet
)) {
1741 if (__CFCSetClassType(theSet
) == __CFCSetClassType(theOtherSet
) && __CFCSetBuiltinType(theSet
) == __CFCSetBuiltinType(theOtherSet
) && !__CFCSetIsInverted(theSet
) && !__CFCSetIsInverted(theOtherSet
)) return TRUE
;
1742 } else if (__CFCSetIsRange(theSet
) || __CFCSetIsRange(theOtherSet
)) {
1743 if (__CFCSetClassType(theSet
) == __CFCSetClassType(theOtherSet
)) {
1744 if (__CFCSetIsInverted(theSet
)) {
1745 if (__CFCSetIsInverted(theOtherSet
)) {
1746 return (__CFCSetRangeFirstChar(theOtherSet
) > __CFCSetRangeFirstChar(theSet
) || (__CFCSetRangeFirstChar(theSet
) + __CFCSetRangeLength(theSet
)) > (__CFCSetRangeFirstChar(theOtherSet
) + __CFCSetRangeLength(theOtherSet
)) ? FALSE
: TRUE
);
1748 return ((__CFCSetRangeFirstChar(theOtherSet
) + __CFCSetRangeLength(theOtherSet
)) <= __CFCSetRangeFirstChar(theSet
) || (__CFCSetRangeFirstChar(theSet
) + __CFCSetRangeLength(theSet
)) <= __CFCSetRangeFirstChar(theOtherSet
) ? TRUE
: FALSE
);
1751 if (__CFCSetIsInverted(theOtherSet
)) {
1752 return ((__CFCSetRangeFirstChar(theSet
) == 0 && __CFCSetRangeLength(theSet
) == 0x110000) || (__CFCSetRangeFirstChar(theOtherSet
) == 0 && (UInt32
)__CFCSetRangeLength(theOtherSet
) <= __CFCSetRangeFirstChar(theSet
)) || ((__CFCSetRangeFirstChar(theSet
) + __CFCSetRangeLength(theSet
)) <= __CFCSetRangeFirstChar(theOtherSet
) && (__CFCSetRangeFirstChar(theOtherSet
) + __CFCSetRangeLength(theOtherSet
)) == 0x110000) ? TRUE
: FALSE
);
1754 return (__CFCSetRangeFirstChar(theOtherSet
) < __CFCSetRangeFirstChar(theSet
) || (__CFCSetRangeFirstChar(theSet
) + __CFCSetRangeLength(theSet
)) < (__CFCSetRangeFirstChar(theOtherSet
) + __CFCSetRangeLength(theOtherSet
)) ? FALSE
: TRUE
);
1759 UInt32 theSetAnnexMask
= __CFCSetAnnexValidEntriesBitmap(theSet
);
1760 UInt32 theOtherSetAnnexMask
= __CFCSetAnnexValidEntriesBitmap(theOtherSet
);
1761 Boolean isTheSetAnnexInverted
= __CFCSetAnnexIsInverted(theSet
);
1762 Boolean isTheOtherSetAnnexInverted
= __CFCSetAnnexIsInverted(theOtherSet
);
1763 uint8_t theSetBuffer
[__kCFBitmapSize
];
1764 uint8_t theOtherSetBuffer
[__kCFBitmapSize
];
1766 // We mask plane 1 to plane 16
1767 if (isTheSetAnnexInverted
) theSetAnnexMask
= (~theSetAnnexMask
) & (0xFFFF << 1);
1768 if (isTheOtherSetAnnexInverted
) theOtherSetAnnexMask
= (~theOtherSetAnnexMask
) & (0xFFFF << 1);
1770 __CFCSetGetBitmap(theSet
, theSetBuffer
);
1771 __CFCSetGetBitmap(theOtherSet
, theOtherSetBuffer
);
1773 if (!__CFCSetIsBitmapSupersetOfBitmap((const UInt32
*)theSetBuffer
, (const UInt32
*)theOtherSetBuffer
, FALSE
, FALSE
)) return FALSE
;
1775 if (theOtherSetAnnexMask
) {
1776 CFCharacterSetRef theSetAnnex
;
1777 CFCharacterSetRef theOtherSetAnnex
;
1780 if ((theSetAnnexMask
& theOtherSetAnnexMask
) != theOtherSetAnnexMask
) return FALSE
;
1782 for (idx
= 1;idx
<= 16;idx
++) {
1783 theSetAnnex
= __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet
, idx
);
1784 if (NULL
== theSetAnnex
) continue; // This case is already handled by the mask above
1786 theOtherSetAnnex
= __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theOtherSet
, idx
);
1788 if (NULL
== theOtherSetAnnex
) {
1789 if (isTheOtherSetAnnexInverted
) {
1790 __CFCSetGetBitmap(theSetAnnex
, theSetBuffer
);
1791 if (!__CFCSetIsEqualBitmap((const UInt32
*)theSetBuffer
, (isTheSetAnnexInverted
? NULL
: (const UInt32
*)-1))) return FALSE
;
1794 __CFCSetGetBitmap(theSetAnnex
, theSetBuffer
);
1795 __CFCSetGetBitmap(theOtherSetAnnex
, theOtherSetBuffer
);
1796 if (!__CFCSetIsBitmapSupersetOfBitmap((const UInt32
*)theSetBuffer
, (const UInt32
*)theOtherSetBuffer
, isTheSetAnnexInverted
, isTheOtherSetAnnexInverted
)) return FALSE
;
1806 copy
= CFCharacterSetCreateMutableCopy(kCFAllocatorSystemDefault
, theSet
);
1807 CFCharacterSetIntersect(copy
, theOtherSet
);
1808 result
= __CFCharacterSetEqual(copy
, theOtherSet
);
1814 Boolean
CFCharacterSetHasMemberInPlane(CFCharacterSetRef theSet
, CFIndex thePlane
) {
1815 Boolean isInverted
= __CFCSetIsInverted(theSet
);
1817 CF_OBJC_FUNCDISPATCHV(__kCFCharacterSetTypeID
, Boolean
, (NSCharacterSet
*)theSet
, hasMemberInPlane
:(uint8_t)thePlane
);
1819 if (__CFCSetIsEmpty(theSet
)) {
1820 return (isInverted
? TRUE
: FALSE
);
1821 } else if (__CFCSetIsBuiltin(theSet
)) {
1822 CFCharacterSetPredefinedSet type
= __CFCSetBuiltinType(theSet
);
1824 if (type
== kCFCharacterSetControl
) {
1825 if (isInverted
|| (thePlane
== 14)) {
1826 return TRUE
; // There is no plane that covers all values || Plane 14 has language tags
1828 return (CFUniCharGetBitmapPtrForPlane(type
, thePlane
) ? TRUE
: FALSE
);
1830 } else if ((type
< kCFCharacterSetDecimalDigit
) || (type
== kCFCharacterSetNewline
)) {
1831 return (thePlane
&& !isInverted
? FALSE
: TRUE
);
1832 } else if (__CFCSetBuiltinType(theSet
) == kCFCharacterSetIllegal
) {
1833 return (isInverted
? (thePlane
< 3 || thePlane
> 13 ? TRUE
: FALSE
) : TRUE
); // This is according to Unicode 3.1
1836 return TRUE
; // There is no plane that covers all values
1838 return (CFUniCharGetBitmapPtrForPlane(type
, thePlane
) ? TRUE
: FALSE
);
1841 } else if (__CFCSetIsRange(theSet
)) {
1842 UTF32Char firstChar
= __CFCSetRangeFirstChar(theSet
);
1843 UTF32Char lastChar
= (firstChar
+ __CFCSetRangeLength(theSet
) - 1);
1844 CFIndex firstPlane
= firstChar
>> 16;
1845 CFIndex lastPlane
= lastChar
>> 16;
1848 if (thePlane
< firstPlane
|| thePlane
> lastPlane
) {
1850 } else if (thePlane
> firstPlane
&& thePlane
< lastPlane
) {
1853 firstChar
&= 0xFFFF;
1855 if (thePlane
== firstPlane
) {
1856 return (firstChar
|| (firstPlane
== lastPlane
&& lastChar
!= 0xFFFF) ? TRUE
: FALSE
);
1858 return (lastChar
!= 0xFFFF || (firstPlane
== lastPlane
&& firstChar
) ? TRUE
: FALSE
);
1862 return (thePlane
< firstPlane
|| thePlane
> lastPlane
? FALSE
: TRUE
);
1865 if (thePlane
== 0) {
1866 switch (__CFCSetClassType(theSet
)) {
1867 case __kCFCharSetClassString
: if (!__CFCSetStringLength(theSet
)) return isInverted
; break;
1868 case __kCFCharSetClassCompactBitmap
: return (__CFCSetCompactBitmapBits(theSet
) ? TRUE
: FALSE
); break;
1869 case __kCFCharSetClassBitmap
: return (__CFCSetBitmapBits(theSet
) ? TRUE
: FALSE
); break;
1873 CFCharacterSetRef annex
= __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet
, thePlane
);
1875 if (__CFCSetIsRange(annex
)) {
1876 return (__CFCSetAnnexIsInverted(theSet
) && (__CFCSetRangeFirstChar(annex
) == 0) && (__CFCSetRangeLength(annex
) == 0x10000) ? FALSE
: TRUE
);
1877 } else if (__CFCSetIsBitmap(annex
)) {
1878 return (__CFCSetAnnexIsInverted(theSet
) && __CFCSetIsEqualBitmap((const UInt32
*)__CFCSetBitmapBits(annex
), (const UInt32
*)-1) ? FALSE
: TRUE
);
1880 uint8_t bitsBuf
[__kCFBitmapSize
];
1881 __CFCSetGetBitmap(annex
, bitsBuf
);
1882 return (__CFCSetAnnexIsInverted(theSet
) && __CFCSetIsEqualBitmap((const UInt32
*)bitsBuf
, (const UInt32
*)-1) ? FALSE
: TRUE
);
1885 return __CFCSetAnnexIsInverted(theSet
);
1894 CFDataRef
CFCharacterSetCreateBitmapRepresentation(CFAllocatorRef alloc
, CFCharacterSetRef theSet
) {
1895 CFMutableDataRef data
;
1896 int numNonBMPPlanes
= 0;
1897 int planeIndices
[MAX_ANNEX_PLANE
];
1900 bool isAnnexInverted
;
1902 CF_OBJC_FUNCDISPATCHV(__kCFCharacterSetTypeID
, CFDataRef
, (NSCharacterSet
*)theSet
, _retainedBitmapRepresentation
);
1904 __CFGenericValidateType(theSet
, __kCFCharacterSetTypeID
);
1906 isAnnexInverted
= (__CFCSetAnnexIsInverted(theSet
) != 0);
1908 if (__CFCSetHasNonBMPPlane(theSet
)) {
1909 for (idx
= 1;idx
<= MAX_ANNEX_PLANE
;idx
++) {
1910 if (isAnnexInverted
|| __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet
, idx
)) {
1911 planeIndices
[numNonBMPPlanes
++] = idx
;
1914 } else if (__CFCSetIsBuiltin(theSet
)) {
1915 numNonBMPPlanes
= (__CFCSetIsInverted(theSet
) ? MAX_ANNEX_PLANE
: CFUniCharGetNumberOfPlanes(__CFCSetBuiltinType(theSet
)) - 1);
1916 } else if (__CFCSetIsRange(theSet
)) {
1917 UInt32 firstChar
= __CFCSetRangeFirstChar(theSet
);
1918 UInt32 lastChar
= __CFCSetRangeFirstChar(theSet
) + __CFCSetRangeLength(theSet
) - 1;
1919 int firstPlane
= (firstChar
>> 16);
1920 int lastPlane
= (lastChar
>> 16);
1921 bool isInverted
= (__CFCSetIsInverted(theSet
) != 0);
1923 if (lastPlane
> 0) {
1924 if (firstPlane
== 0) {
1926 firstChar
= 0x10000;
1928 numNonBMPPlanes
= (lastPlane
- firstPlane
) + 1;
1930 numNonBMPPlanes
= MAX_ANNEX_PLANE
- numNonBMPPlanes
;
1931 if (firstPlane
== lastPlane
) {
1932 if (((firstChar
& 0xFFFF) > 0) || ((lastChar
& 0xFFFF) < 0xFFFF)) ++numNonBMPPlanes
;
1934 if ((firstChar
& 0xFFFF) > 0) ++numNonBMPPlanes
;
1935 if ((lastChar
& 0xFFFF) < 0xFFFF) ++numNonBMPPlanes
;
1938 } else if (isInverted
) {
1939 numNonBMPPlanes
= MAX_ANNEX_PLANE
;
1941 } else if (isAnnexInverted
) {
1942 numNonBMPPlanes
= MAX_ANNEX_PLANE
;
1945 length
= __kCFBitmapSize
+ ((__kCFBitmapSize
+ 1) * numNonBMPPlanes
);
1946 data
= CFDataCreateMutable(alloc
, length
);
1947 CFDataSetLength(data
, length
);
1948 __CFCSetGetBitmap(theSet
, CFDataGetMutableBytePtr(data
));
1950 if (numNonBMPPlanes
> 0) {
1951 uint8_t *bytes
= CFDataGetMutableBytePtr(data
) + __kCFBitmapSize
;
1953 if (__CFCSetHasNonBMPPlane(theSet
)) {
1954 CFCharacterSetRef subset
;
1956 for (idx
= 0;idx
< numNonBMPPlanes
;idx
++) {
1957 *(bytes
++) = planeIndices
[idx
];
1958 if ((subset
= __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet
, planeIndices
[idx
])) == NULL
) {
1959 __CFCSetBitmapFastFillWithValue((UInt32
*)bytes
, (isAnnexInverted
? 0xFF : 0));
1961 __CFCSetGetBitmap(subset
, bytes
);
1962 if (isAnnexInverted
) {
1963 uint32_t count
= __kCFBitmapSize
/ sizeof(uint32_t);
1964 uint32_t *bits
= (uint32_t *)bytes
;
1966 while (count
-- > 0) {
1972 bytes
+= __kCFBitmapSize
;
1974 } else if (__CFCSetIsBuiltin(theSet
)) {
1977 Boolean isInverted
= __CFCSetIsInverted(theSet
);
1979 for (idx
= 0;idx
< numNonBMPPlanes
;idx
++) {
1980 if ((result
= CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(theSet
), idx
+ 1, bytes
+ 1, (isInverted
!= 0))) == kCFUniCharBitmapEmpty
) continue;
1981 *(bytes
++) = idx
+ 1;
1982 if (result
== kCFUniCharBitmapAll
) {
1983 CFIndex bitmapLength
= __kCFBitmapSize
;
1984 while (bitmapLength
-- > 0) *(bytes
++) = (uint8_t)0xFF;
1986 bytes
+= __kCFBitmapSize
;
1989 delta
= bytes
- (const uint8_t *)CFDataGetBytePtr(data
);
1990 if (delta
< length
) CFDataSetLength(data
, delta
);
1991 } else if (__CFCSetIsRange(theSet
)) {
1992 UInt32 firstChar
= __CFCSetRangeFirstChar(theSet
);
1993 UInt32 lastChar
= __CFCSetRangeFirstChar(theSet
) + __CFCSetRangeLength(theSet
) - 1;
1994 int firstPlane
= (firstChar
>> 16);
1995 int lastPlane
= (lastChar
>> 16);
1997 if (firstPlane
== 0) {
1999 firstChar
= 0x10000;
2001 if (__CFCSetIsInverted(theSet
)) {
2002 // Mask out the plane byte
2003 firstChar
&= 0xFFFF;
2006 for (idx
= 1;idx
< firstPlane
;idx
++) { // Fill up until the first plane
2008 __CFCSetBitmapFastFillWithValue((UInt32
*)bytes
, 0xFF);
2009 bytes
+= __kCFBitmapSize
;
2011 if (firstPlane
== lastPlane
) {
2012 if ((firstChar
> 0) || (lastChar
< 0xFFFF)) {
2014 __CFCSetBitmapFastFillWithValue((UInt32
*)bytes
, 0xFF);
2015 __CFCSetBitmapRemoveCharactersInRange(bytes
, firstChar
, lastChar
);
2016 bytes
+= __kCFBitmapSize
;
2018 } else if (firstPlane
< lastPlane
) {
2019 if (firstChar
> 0) {
2021 __CFCSetBitmapFastFillWithValue((UInt32
*)bytes
, 0);
2022 __CFCSetBitmapAddCharactersInRange(bytes
, 0, firstChar
- 1);
2023 bytes
+= __kCFBitmapSize
;
2025 if (lastChar
< 0xFFFF) {
2027 __CFCSetBitmapFastFillWithValue((UInt32
*)bytes
, 0);
2028 __CFCSetBitmapAddCharactersInRange(bytes
, lastChar
, 0xFFFF);
2029 bytes
+= __kCFBitmapSize
;
2032 for (idx
= lastPlane
+ 1;idx
<= MAX_ANNEX_PLANE
;idx
++) {
2034 __CFCSetBitmapFastFillWithValue((UInt32
*)bytes
, 0xFF);
2035 bytes
+= __kCFBitmapSize
;
2038 for (idx
= firstPlane
;idx
<= lastPlane
;idx
++) {
2040 __CFCSetBitmapAddCharactersInRange(bytes
, (idx
== firstPlane
? firstChar
: 0), (idx
== lastPlane
? lastChar
: 0xFFFF));
2041 bytes
+= __kCFBitmapSize
;
2044 } else if (isAnnexInverted
) {
2045 for (idx
= 1;idx
<= MAX_ANNEX_PLANE
;idx
++) {
2047 __CFCSetBitmapFastFillWithValue((UInt32
*)bytes
, 0xFF);
2048 bytes
+= __kCFBitmapSize
;
2056 /*** MutableCharacterSet functions ***/
2057 void CFCharacterSetAddCharactersInRange(CFMutableCharacterSetRef theSet
, CFRange theRange
) {
2058 CF_OBJC_FUNCDISPATCHV(__kCFCharacterSetTypeID
, void, (NSMutableCharacterSet
*)theSet
, addCharactersInRange
:NSMakeRange(theRange
.location
, theRange
.length
));
2060 __CFCSetValidateTypeAndMutability(theSet
, __PRETTY_FUNCTION__
);
2061 __CFCSetValidateRange(theRange
, __PRETTY_FUNCTION__
);
2063 if (__CFCSetIsBuiltin((CFCharacterSetRef
)theSet
) && !__CFCSetIsMutable((CFCharacterSetRef
)theSet
) && !__CFCSetIsInverted((CFCharacterSetRef
)theSet
)) {
2064 CFCharacterSetRef sharedSet
= CFCharacterSetGetPredefined(__CFCSetBuiltinType((CFCharacterSetRef
)theSet
));
2065 if (sharedSet
== theSet
) { // We're trying to dealloc the builtin set
2066 CFAssert1(0, __kCFLogAssertion
, "%s: Trying to mutable predefined set.", __PRETTY_FUNCTION__
);
2067 return; // We don't mutate builtin set
2071 if (!theRange
.length
|| (__CFCSetIsInverted(theSet
) && __CFCSetIsEmpty(theSet
))) return; // Inverted && empty set contains all char
2073 if (!__CFCSetIsInverted(theSet
)) {
2074 if (__CFCSetIsEmpty(theSet
)) {
2075 __CFCSetPutClassType(theSet
, __kCFCharSetClassRange
);
2076 __CFCSetPutRangeFirstChar(theSet
, theRange
.location
);
2077 __CFCSetPutRangeLength(theSet
, theRange
.length
);
2078 __CFCSetPutHasHashValue(theSet
, false);
2080 } else if (__CFCSetIsRange(theSet
)) {
2081 CFIndex firstChar
= __CFCSetRangeFirstChar(theSet
);
2082 CFIndex length
= __CFCSetRangeLength(theSet
);
2084 if (firstChar
== theRange
.location
) {
2085 __CFCSetPutRangeLength(theSet
, __CFMax(length
, theRange
.length
));
2086 __CFCSetPutHasHashValue(theSet
, false);
2088 } else if (firstChar
< theRange
.location
&& theRange
.location
<= firstChar
+ length
) {
2089 if (firstChar
+ length
< theRange
.location
+ theRange
.length
) __CFCSetPutRangeLength(theSet
, theRange
.length
+ (theRange
.location
- firstChar
));
2090 __CFCSetPutHasHashValue(theSet
, false);
2092 } else if (theRange
.location
< firstChar
&& firstChar
<= theRange
.location
+ theRange
.length
) {
2093 __CFCSetPutRangeFirstChar(theSet
, theRange
.location
);
2094 __CFCSetPutRangeLength(theSet
, length
+ (firstChar
- theRange
.location
));
2095 __CFCSetPutHasHashValue(theSet
, false);
2098 } else if (__CFCSetIsString(theSet
) && __CFCSetStringLength(theSet
) + theRange
.length
< __kCFStringCharSetMax
) {
2100 if (!__CFCSetStringBuffer(theSet
))
2101 __CFCSetPutStringBuffer(theSet
, (UniChar
*)CFAllocatorAllocate(CFGetAllocator(theSet
), __kCFStringCharSetMax
* sizeof(UniChar
), 0));
2102 buffer
= __CFCSetStringBuffer(theSet
) + __CFCSetStringLength(theSet
);
2103 __CFCSetPutStringLength(theSet
, __CFCSetStringLength(theSet
) + theRange
.length
);
2104 while (theRange
.length
--) *buffer
++ = (UniChar
)theRange
.location
++;
2105 qsort(__CFCSetStringBuffer(theSet
), __CFCSetStringLength(theSet
), sizeof(UniChar
), chcompar
);
2106 __CFCSetPutHasHashValue(theSet
, false);
2111 // OK, I have to be a bitmap
2112 __CFCSetMakeBitmap(theSet
);
2113 __CFCSetAddNonBMPPlanesInRange(theSet
, theRange
);
2114 if (theRange
.location
< 0x10000) { // theRange is in BMP
2115 if (theRange
.location
+ theRange
.length
>= NUMCHARACTERS
) theRange
.length
= NUMCHARACTERS
- theRange
.location
;
2116 __CFCSetBitmapAddCharactersInRange(__CFCSetBitmapBits(theSet
), (UniChar
)theRange
.location
, (UniChar
)(theRange
.location
+ theRange
.length
- 1));
2118 __CFCSetPutHasHashValue(theSet
, false);
2120 if (__CFCheckForExapendedSet
) __CFCheckForExpandedSet(theSet
);
2123 void CFCharacterSetRemoveCharactersInRange(CFMutableCharacterSetRef theSet
, CFRange theRange
) {
2124 CF_OBJC_FUNCDISPATCHV(__kCFCharacterSetTypeID
, void, (NSMutableCharacterSet
*)theSet
, removeCharactersInRange
:NSMakeRange(theRange
.location
, theRange
.length
));
2126 __CFCSetValidateTypeAndMutability(theSet
, __PRETTY_FUNCTION__
);
2127 __CFCSetValidateRange(theRange
, __PRETTY_FUNCTION__
);
2129 if (__CFCSetIsBuiltin((CFCharacterSetRef
)theSet
) && !__CFCSetIsMutable((CFCharacterSetRef
)theSet
) && !__CFCSetIsInverted((CFCharacterSetRef
)theSet
)) {
2130 CFCharacterSetRef sharedSet
= CFCharacterSetGetPredefined(__CFCSetBuiltinType((CFCharacterSetRef
)theSet
));
2131 if (sharedSet
== theSet
) { // We're trying to dealloc the builtin set
2132 CFAssert1(0, __kCFLogAssertion
, "%s: Trying to mutable predefined set.", __PRETTY_FUNCTION__
);
2133 return; // We don't mutate builtin set
2137 if (!theRange
.length
|| (!__CFCSetIsInverted(theSet
) && __CFCSetIsEmpty(theSet
))) return; // empty set
2139 if (__CFCSetIsInverted(theSet
)) {
2140 if (__CFCSetIsEmpty(theSet
)) {
2141 __CFCSetPutClassType(theSet
, __kCFCharSetClassRange
);
2142 __CFCSetPutRangeFirstChar(theSet
, theRange
.location
);
2143 __CFCSetPutRangeLength(theSet
, theRange
.length
);
2144 __CFCSetPutHasHashValue(theSet
, false);
2146 } else if (__CFCSetIsRange(theSet
)) {
2147 CFIndex firstChar
= __CFCSetRangeFirstChar(theSet
);
2148 CFIndex length
= __CFCSetRangeLength(theSet
);
2150 if (firstChar
== theRange
.location
) {
2151 __CFCSetPutRangeLength(theSet
, __CFMin(length
, theRange
.length
));
2152 __CFCSetPutHasHashValue(theSet
, false);
2154 } else if (firstChar
< theRange
.location
&& theRange
.location
<= firstChar
+ length
) {
2155 if (firstChar
+ length
< theRange
.location
+ theRange
.length
) __CFCSetPutRangeLength(theSet
, theRange
.length
+ (theRange
.location
- firstChar
));
2156 __CFCSetPutHasHashValue(theSet
, false);
2158 } else if (theRange
.location
< firstChar
&& firstChar
<= theRange
.location
+ theRange
.length
) {
2159 __CFCSetPutRangeFirstChar(theSet
, theRange
.location
);
2160 __CFCSetPutRangeLength(theSet
, length
+ (firstChar
- theRange
.location
));
2161 __CFCSetPutHasHashValue(theSet
, false);
2164 } else if (__CFCSetIsString(theSet
) && __CFCSetStringLength(theSet
) + theRange
.length
< __kCFStringCharSetMax
) {
2166 if (!__CFCSetStringBuffer(theSet
))
2167 __CFCSetPutStringBuffer(theSet
, (UniChar
*)CFAllocatorAllocate(CFGetAllocator(theSet
), __kCFStringCharSetMax
* sizeof(UniChar
), 0));
2168 buffer
= __CFCSetStringBuffer(theSet
) + __CFCSetStringLength(theSet
);
2169 __CFCSetPutStringLength(theSet
, __CFCSetStringLength(theSet
) + theRange
.length
);
2170 while (theRange
.length
--) *buffer
++ = (UniChar
)theRange
.location
++;
2171 qsort(__CFCSetStringBuffer(theSet
), __CFCSetStringLength(theSet
), sizeof(UniChar
), chcompar
);
2172 __CFCSetPutHasHashValue(theSet
, false);
2177 // OK, I have to be a bitmap
2178 __CFCSetMakeBitmap(theSet
);
2179 __CFCSetRemoveNonBMPPlanesInRange(theSet
, theRange
);
2180 if (theRange
.location
< 0x10000) { // theRange is in BMP
2181 if (theRange
.location
+ theRange
.length
> NUMCHARACTERS
) theRange
.length
= NUMCHARACTERS
- theRange
.location
;
2182 if (theRange
.location
== 0 && theRange
.length
== NUMCHARACTERS
) { // Remove all
2183 CFAllocatorDeallocate(CFGetAllocator(theSet
), __CFCSetBitmapBits(theSet
));
2184 __CFCSetPutBitmapBits(theSet
, NULL
);
2186 __CFCSetBitmapRemoveCharactersInRange(__CFCSetBitmapBits(theSet
), (UniChar
)theRange
.location
, (UniChar
)(theRange
.location
+ theRange
.length
- 1));
2190 __CFCSetPutHasHashValue(theSet
, false);
2191 if (__CFCheckForExapendedSet
) __CFCheckForExpandedSet(theSet
);
2194 void CFCharacterSetAddCharactersInString(CFMutableCharacterSetRef theSet
, CFStringRef theString
) {
2197 BOOL hasSurrogate
= NO
;
2199 CF_OBJC_FUNCDISPATCHV(__kCFCharacterSetTypeID
, void, (NSMutableCharacterSet
*)theSet
, addCharactersInString
:(NSString
*)theString
);
2201 __CFCSetValidateTypeAndMutability(theSet
, __PRETTY_FUNCTION__
);
2203 if (__CFCSetIsBuiltin((CFCharacterSetRef
)theSet
) && !__CFCSetIsMutable((CFCharacterSetRef
)theSet
) && !__CFCSetIsInverted((CFCharacterSetRef
)theSet
)) {
2204 CFCharacterSetRef sharedSet
= CFCharacterSetGetPredefined(__CFCSetBuiltinType((CFCharacterSetRef
)theSet
));
2205 if (sharedSet
== theSet
) { // We're trying to dealloc the builtin set
2206 CFAssert1(0, __kCFLogAssertion
, "%s: Trying to mutable predefined set.", __PRETTY_FUNCTION__
);
2207 return; // We don't mutate builtin set
2211 if ((__CFCSetIsEmpty(theSet
) && __CFCSetIsInverted(theSet
)) || !(length
= CFStringGetLength(theString
))) return;
2213 if (!__CFCSetIsInverted(theSet
)) {
2214 CFIndex newLength
= length
+ (__CFCSetIsEmpty(theSet
) ? 0 : (__CFCSetIsString(theSet
) ? __CFCSetStringLength(theSet
) : __kCFStringCharSetMax
));
2216 if (newLength
< __kCFStringCharSetMax
) {
2217 buffer
= __CFCSetStringBuffer(theSet
);
2219 if (NULL
== buffer
) {
2220 buffer
= (UniChar
*)CFAllocatorAllocate(CFGetAllocator(theSet
), __kCFStringCharSetMax
* sizeof(UniChar
), 0);
2222 buffer
+= __CFCSetStringLength(theSet
);
2225 CFStringGetCharacters(theString
, CFRangeMake(0, length
), (UniChar
*)buffer
);
2228 UTF16Char
*characters
= buffer
;
2229 const UTF16Char
*charactersLimit
= characters
+ length
;
2231 while (characters
< charactersLimit
) {
2232 if (CFStringIsSurrogateHighCharacter(*characters
) || CFStringIsSurrogateLowCharacter(*characters
)) {
2233 memmove(characters
, characters
+ 1, (charactersLimit
- (characters
+ 1)) * sizeof(*characters
));
2241 newLength
-= (length
- (charactersLimit
- buffer
));
2244 if (0 == newLength
) {
2245 if (NULL
== __CFCSetStringBuffer(theSet
)) CFAllocatorDeallocate(CFGetAllocator(theSet
), buffer
);
2247 if (NULL
== __CFCSetStringBuffer(theSet
)) {
2248 __CFCSetPutClassType(theSet
, __kCFCharSetClassString
);
2249 __CFCSetPutStringBuffer(theSet
, buffer
);
2251 __CFCSetPutStringLength(theSet
, newLength
);
2252 qsort(__CFCSetStringBuffer(theSet
), newLength
, sizeof(UniChar
), chcompar
);
2254 __CFCSetPutHasHashValue(theSet
, false);
2256 if (hasSurrogate
) __CFApplySurrogatesInString(theSet
, theString
, &CFCharacterSetAddCharactersInRange
);
2262 // OK, I have to be a bitmap
2263 __CFCSetMakeBitmap(theSet
);
2264 CFStringInlineBuffer inlineBuffer
;
2267 CFStringInitInlineBuffer(theString
, &inlineBuffer
, CFRangeMake(0, length
));
2269 for (idx
= 0;idx
< length
;idx
++) {
2270 UTF16Char character
= __CFStringGetCharacterFromInlineBufferQuick(&inlineBuffer
, idx
);
2272 if (CFStringIsSurrogateHighCharacter(character
) || CFStringIsSurrogateLowCharacter(character
)) {
2275 __CFCSetBitmapAddCharacter(__CFCSetBitmapBits(theSet
), character
);
2279 __CFCSetPutHasHashValue(theSet
, false);
2281 if (__CFCheckForExapendedSet
) __CFCheckForExpandedSet(theSet
);
2283 if (hasSurrogate
) __CFApplySurrogatesInString(theSet
, theString
, &CFCharacterSetAddCharactersInRange
);
2286 void CFCharacterSetRemoveCharactersInString(CFMutableCharacterSetRef theSet
, CFStringRef theString
) {
2289 BOOL hasSurrogate
= NO
;
2291 CF_OBJC_FUNCDISPATCHV(__kCFCharacterSetTypeID
, void, (NSMutableCharacterSet
*)theSet
, removeCharactersInString
:(NSString
*)theString
);
2293 __CFCSetValidateTypeAndMutability(theSet
, __PRETTY_FUNCTION__
);
2295 if (__CFCSetIsBuiltin((CFCharacterSetRef
)theSet
) && !__CFCSetIsMutable((CFCharacterSetRef
)theSet
) && !__CFCSetIsInverted((CFCharacterSetRef
)theSet
)) {
2296 CFCharacterSetRef sharedSet
= CFCharacterSetGetPredefined(__CFCSetBuiltinType((CFCharacterSetRef
)theSet
));
2297 if (sharedSet
== theSet
) { // We're trying to dealloc the builtin set
2298 CFAssert1(0, __kCFLogAssertion
, "%s: Trying to mutable predefined set.", __PRETTY_FUNCTION__
);
2299 return; // We don't mutate builtin set
2303 if ((__CFCSetIsEmpty(theSet
) && !__CFCSetIsInverted(theSet
)) || !(length
= CFStringGetLength(theString
))) return;
2305 if (__CFCSetIsInverted(theSet
)) {
2306 CFIndex newLength
= length
+ (__CFCSetIsEmpty(theSet
) ? 0 : (__CFCSetIsString(theSet
) ? __CFCSetStringLength(theSet
) : __kCFStringCharSetMax
));
2308 if (newLength
< __kCFStringCharSetMax
) {
2309 buffer
= __CFCSetStringBuffer(theSet
);
2311 if (NULL
== buffer
) {
2312 buffer
= (UniChar
*)CFAllocatorAllocate(CFGetAllocator(theSet
), __kCFStringCharSetMax
* sizeof(UniChar
), 0);
2314 buffer
+= __CFCSetStringLength(theSet
);
2317 CFStringGetCharacters(theString
, CFRangeMake(0, length
), (UniChar
*)buffer
);
2320 UTF16Char
*characters
= buffer
;
2321 const UTF16Char
*charactersLimit
= characters
+ length
;
2323 while (characters
< charactersLimit
) {
2324 if (CFStringIsSurrogateHighCharacter(*characters
) || CFStringIsSurrogateLowCharacter(*characters
)) {
2325 memmove(characters
, characters
+ 1, charactersLimit
- (characters
+ 1));
2332 newLength
-= (length
- (charactersLimit
- buffer
));
2335 if (NULL
== __CFCSetStringBuffer(theSet
)) {
2336 __CFCSetPutClassType(theSet
, __kCFCharSetClassString
);
2337 __CFCSetPutStringBuffer(theSet
, buffer
);
2339 __CFCSetPutStringLength(theSet
, newLength
);
2340 qsort(__CFCSetStringBuffer(theSet
), newLength
, sizeof(UniChar
), chcompar
);
2341 __CFCSetPutHasHashValue(theSet
, false);
2343 if (hasSurrogate
) __CFApplySurrogatesInString(theSet
, theString
, &CFCharacterSetRemoveCharactersInRange
);
2349 // OK, I have to be a bitmap
2350 __CFCSetMakeBitmap(theSet
);
2351 CFStringInlineBuffer inlineBuffer
;
2354 CFStringInitInlineBuffer(theString
, &inlineBuffer
, CFRangeMake(0, length
));
2356 for (idx
= 0;idx
< length
;idx
++) {
2357 UTF16Char character
= __CFStringGetCharacterFromInlineBufferQuick(&inlineBuffer
, idx
);
2359 if (CFStringIsSurrogateHighCharacter(character
) || CFStringIsSurrogateLowCharacter(character
)) {
2362 __CFCSetBitmapRemoveCharacter(__CFCSetBitmapBits(theSet
), character
);
2366 __CFCSetPutHasHashValue(theSet
, false);
2367 if (__CFCheckForExapendedSet
) __CFCheckForExpandedSet(theSet
);
2369 if (hasSurrogate
) __CFApplySurrogatesInString(theSet
, theString
, &CFCharacterSetRemoveCharactersInRange
);
2372 void CFCharacterSetUnion(CFMutableCharacterSetRef theSet
, CFCharacterSetRef theOtherSet
) {
2373 CFCharacterSetRef expandedSet
= NULL
;
2375 CF_OBJC_FUNCDISPATCHV(__kCFCharacterSetTypeID
, void, (NSMutableCharacterSet
*)theSet
, formUnionWithCharacterSet
:(NSCharacterSet
*)theOtherSet
);
2377 __CFCSetValidateTypeAndMutability(theSet
, __PRETTY_FUNCTION__
);
2379 if (__CFCSetIsBuiltin((CFCharacterSetRef
)theSet
) && !__CFCSetIsMutable((CFCharacterSetRef
)theSet
) && !__CFCSetIsInverted((CFCharacterSetRef
)theSet
)) {
2380 CFCharacterSetRef sharedSet
= CFCharacterSetGetPredefined(__CFCSetBuiltinType((CFCharacterSetRef
)theSet
));
2381 if (sharedSet
== theSet
) { // We're trying to dealloc the builtin set
2382 CFAssert1(0, __kCFLogAssertion
, "%s: Trying to mutable predefined set.", __PRETTY_FUNCTION__
);
2383 return; // We don't mutate builtin set
2387 if (__CFCSetIsEmpty(theSet
) && __CFCSetIsInverted(theSet
)) return; // Inverted empty set contains all char
2389 if (!CF_IS_OBJC(__kCFCharacterSetTypeID
, theOtherSet
) || (expandedSet
= __CFCharacterSetGetExpandedSetForNSCharacterSet(theOtherSet
))) { // Really CF, we can do some trick here
2390 if (expandedSet
) theOtherSet
= expandedSet
;
2392 if (__CFCSetIsEmpty(theOtherSet
)) {
2393 if (__CFCSetIsInverted(theOtherSet
)) {
2394 if (__CFCSetIsString(theSet
) && __CFCSetStringBuffer(theSet
)) {
2395 CFAllocatorDeallocate(CFGetAllocator(theSet
), __CFCSetStringBuffer(theSet
));
2396 } else if (__CFCSetIsBitmap(theSet
) && __CFCSetBitmapBits(theSet
)) {
2397 CFAllocatorDeallocate(CFGetAllocator(theSet
), __CFCSetBitmapBits(theSet
));
2398 } else if (__CFCSetIsCompactBitmap(theSet
) && __CFCSetCompactBitmapBits(theSet
)) {
2399 CFAllocatorDeallocate(CFGetAllocator(theSet
), __CFCSetCompactBitmapBits(theSet
));
2401 __CFCSetPutClassType(theSet
, __kCFCharSetClassRange
);
2402 __CFCSetPutRangeLength(theSet
, 0);
2403 __CFCSetPutIsInverted(theSet
, true);
2404 __CFCSetPutHasHashValue(theSet
, false);
2405 __CFCSetDeallocateAnnexPlane(theSet
);
2407 } else if (__CFCSetIsBuiltin(theOtherSet
) && __CFCSetIsEmpty(theSet
)) { // theSet can be builtin set
2408 __CFCSetPutClassType(theSet
, __kCFCharSetClassBuiltin
);
2409 __CFCSetPutBuiltinType(theSet
, __CFCSetBuiltinType(theOtherSet
));
2410 if (__CFCSetIsInverted(theOtherSet
)) __CFCSetPutIsInverted(theSet
, true);
2411 if (__CFCSetAnnexIsInverted(theOtherSet
)) __CFCSetAnnexSetIsInverted(theSet
, true);
2412 __CFCSetPutHasHashValue(theSet
, false);
2414 if (__CFCSetIsRange(theOtherSet
)) {
2415 if (__CFCSetIsInverted(theOtherSet
)) {
2416 UTF32Char firstChar
= __CFCSetRangeFirstChar(theOtherSet
);
2417 CFIndex length
= __CFCSetRangeLength(theOtherSet
);
2419 if (firstChar
> 0) CFCharacterSetAddCharactersInRange(theSet
, CFRangeMake(0, firstChar
));
2420 firstChar
+= length
;
2421 length
= 0x110000 - firstChar
;
2422 CFCharacterSetAddCharactersInRange(theSet
, CFRangeMake(firstChar
, length
));
2424 CFCharacterSetAddCharactersInRange(theSet
, CFRangeMake(__CFCSetRangeFirstChar(theOtherSet
), __CFCSetRangeLength(theOtherSet
)));
2426 } else if (__CFCSetIsString(theOtherSet
)) {
2427 CFStringRef string
= CFStringCreateWithCharactersNoCopy(CFGetAllocator(theSet
), __CFCSetStringBuffer(theOtherSet
), __CFCSetStringLength(theOtherSet
), kCFAllocatorNull
);
2428 CFCharacterSetAddCharactersInString(theSet
, string
);
2431 __CFCSetMakeBitmap(theSet
);
2432 if (__CFCSetIsBitmap(theOtherSet
)) {
2433 UInt32
*bitmap1
= (UInt32
*)__CFCSetBitmapBits(theSet
);
2434 UInt32
*bitmap2
= (UInt32
*)__CFCSetBitmapBits(theOtherSet
);
2435 CFIndex length
= __kCFBitmapSize
/ sizeof(UInt32
);
2436 while (length
--) *bitmap1
++ |= *bitmap2
++;
2438 UInt32
*bitmap1
= (UInt32
*)__CFCSetBitmapBits(theSet
);
2440 CFIndex length
= __kCFBitmapSize
/ sizeof(UInt32
);
2441 uint8_t bitmapBuffer
[__kCFBitmapSize
];
2442 __CFCSetGetBitmap(theOtherSet
, bitmapBuffer
);
2443 bitmap2
= (UInt32
*)bitmapBuffer
;
2444 while (length
--) *bitmap1
++ |= *bitmap2
++;
2446 __CFCSetPutHasHashValue(theSet
, false);
2448 if (__CFCSetHasNonBMPPlane(theOtherSet
)) {
2449 CFMutableCharacterSetRef otherSetPlane
;
2452 for (idx
= 1;idx
<= MAX_ANNEX_PLANE
;idx
++) {
2453 if ((otherSetPlane
= (CFMutableCharacterSetRef
)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theOtherSet
, idx
))) {
2454 CFCharacterSetUnion((CFMutableCharacterSetRef
)__CFCSetGetAnnexPlaneCharacterSet(theSet
, idx
), otherSetPlane
);
2457 } else if (__CFCSetAnnexIsInverted(theOtherSet
)) {
2458 if (__CFCSetHasNonBMPPlane(theSet
)) __CFCSetDeallocateAnnexPlane(theSet
);
2459 __CFCSetAnnexSetIsInverted(theSet
, true);
2460 } else if (__CFCSetIsBuiltin(theOtherSet
)) {
2461 CFMutableCharacterSetRef annexPlane
;
2462 uint8_t bitmapBuffer
[__kCFBitmapSize
];
2465 Boolean isOtherAnnexPlaneInverted
= __CFCSetAnnexIsInverted(theOtherSet
);
2470 for (planeIndex
= 1;planeIndex
<= MAX_ANNEX_PLANE
;planeIndex
++) {
2471 result
= CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(theOtherSet
), planeIndex
, bitmapBuffer
, (isOtherAnnexPlaneInverted
!= 0));
2472 if (result
!= kCFUniCharBitmapEmpty
) {
2473 annexPlane
= (CFMutableCharacterSetRef
)__CFCSetGetAnnexPlaneCharacterSet(theSet
, planeIndex
);
2474 if (result
== kCFUniCharBitmapAll
) {
2475 CFCharacterSetAddCharactersInRange(annexPlane
, CFRangeMake(0x0000, 0x10000));
2477 __CFCSetMakeBitmap(annexPlane
);
2478 bitmap1
= (UInt32
*)__CFCSetBitmapBits(annexPlane
);
2479 length
= __kCFBitmapSize
/ sizeof(UInt32
);
2480 bitmap2
= (UInt32
*)bitmapBuffer
;
2481 while (length
--) *bitmap1
++ |= *bitmap2
++;
2487 if (__CFCheckForExapendedSet
) __CFCheckForExpandedSet(theSet
);
2488 } else { // It's NSCharacterSet
2489 CFDataRef bitmapRep
= CFCharacterSetCreateBitmapRepresentation(kCFAllocatorSystemDefault
, theOtherSet
);
2490 const UInt32
*bitmap2
= (bitmapRep
&& CFDataGetLength(bitmapRep
) ? (const UInt32
*)CFDataGetBytePtr(bitmapRep
) : NULL
);
2493 CFIndex length
= __kCFBitmapSize
/ sizeof(UInt32
);
2494 __CFCSetMakeBitmap(theSet
);
2495 bitmap1
= (UInt32
*)__CFCSetBitmapBits(theSet
);
2496 while (length
--) *bitmap1
++ |= *bitmap2
++;
2497 __CFCSetPutHasHashValue(theSet
, false);
2499 CFRelease(bitmapRep
);
2503 void CFCharacterSetIntersect(CFMutableCharacterSetRef theSet
, CFCharacterSetRef theOtherSet
) {
2504 CFCharacterSetRef expandedSet
= NULL
;
2506 CF_OBJC_FUNCDISPATCHV(__kCFCharacterSetTypeID
, void, (NSMutableCharacterSet
*)theSet
, formIntersectionWithCharacterSet
:(NSCharacterSet
*)theOtherSet
);
2508 __CFCSetValidateTypeAndMutability(theSet
, __PRETTY_FUNCTION__
);
2510 if (__CFCSetIsBuiltin((CFCharacterSetRef
)theSet
) && !__CFCSetIsMutable((CFCharacterSetRef
)theSet
) && !__CFCSetIsInverted((CFCharacterSetRef
)theSet
)) {
2511 CFCharacterSetRef sharedSet
= CFCharacterSetGetPredefined(__CFCSetBuiltinType((CFCharacterSetRef
)theSet
));
2512 if (sharedSet
== theSet
) { // We're trying to dealloc the builtin set
2513 CFAssert1(0, __kCFLogAssertion
, "%s: Trying to mutable predefined set.", __PRETTY_FUNCTION__
);
2514 return; // We don't mutate builtin set
2518 if (__CFCSetIsEmpty(theSet
) && !__CFCSetIsInverted(theSet
)) return; // empty set
2520 if (!CF_IS_OBJC(__kCFCharacterSetTypeID
, theOtherSet
) || (expandedSet
= __CFCharacterSetGetExpandedSetForNSCharacterSet(theOtherSet
))) { // Really CF, we can do some trick here
2521 if (expandedSet
) theOtherSet
= expandedSet
;
2523 if (__CFCSetIsEmpty(theOtherSet
)) {
2524 if (!__CFCSetIsInverted(theOtherSet
)) {
2525 if (__CFCSetIsString(theSet
) && __CFCSetStringBuffer(theSet
)) {
2526 CFAllocatorDeallocate(CFGetAllocator(theSet
), __CFCSetStringBuffer(theSet
));
2527 } else if (__CFCSetIsBitmap(theSet
) && __CFCSetBitmapBits(theSet
)) {
2528 CFAllocatorDeallocate(CFGetAllocator(theSet
), __CFCSetBitmapBits(theSet
));
2529 } else if (__CFCSetIsCompactBitmap(theSet
) && __CFCSetCompactBitmapBits(theSet
)) {
2530 CFAllocatorDeallocate(CFGetAllocator(theSet
), __CFCSetCompactBitmapBits(theSet
));
2532 __CFCSetPutClassType(theSet
, __kCFCharSetClassBitmap
);
2533 __CFCSetPutBitmapBits(theSet
, NULL
);
2534 __CFCSetPutIsInverted(theSet
, false);
2535 theSet
->_hashValue
= 0;
2536 __CFCSetPutHasHashValue(theSet
, true);
2537 __CFCSetDeallocateAnnexPlane(theSet
);
2539 } else if (__CFCSetIsEmpty(theSet
)) { // non inverted empty set contains all character
2540 __CFCSetPutClassType(theSet
, __CFCSetClassType(theOtherSet
));
2541 __CFCSetPutHasHashValue(theSet
, __CFCSetHasHashValue(theOtherSet
));
2542 __CFCSetPutIsInverted(theSet
, __CFCSetIsInverted(theOtherSet
));
2543 theSet
->_hashValue
= theOtherSet
->_hashValue
;
2544 if (__CFCSetHasNonBMPPlane(theOtherSet
)) {
2545 CFMutableCharacterSetRef otherSetPlane
;
2547 for (idx
= 1;idx
<= MAX_ANNEX_PLANE
;idx
++) {
2548 if ((otherSetPlane
= (CFMutableCharacterSetRef
)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theOtherSet
, idx
))) {
2549 otherSetPlane
= (CFMutableCharacterSetRef
)CFCharacterSetCreateMutableCopy(CFGetAllocator(theSet
), otherSetPlane
);
2550 __CFCSetPutCharacterSetToAnnexPlane(theSet
, otherSetPlane
, idx
);
2551 CFRelease(otherSetPlane
);
2554 __CFCSetAnnexSetIsInverted(theSet
, __CFCSetAnnexIsInverted(theOtherSet
));
2557 switch (__CFCSetClassType(theOtherSet
)) {
2558 case __kCFCharSetClassBuiltin
:
2559 __CFCSetPutBuiltinType(theSet
, __CFCSetBuiltinType(theOtherSet
));
2562 case __kCFCharSetClassRange
:
2563 __CFCSetPutRangeFirstChar(theSet
, __CFCSetRangeFirstChar(theOtherSet
));
2564 __CFCSetPutRangeLength(theSet
, __CFCSetRangeLength(theOtherSet
));
2567 case __kCFCharSetClassString
:
2568 __CFCSetPutStringLength(theSet
, __CFCSetStringLength(theOtherSet
));
2569 if (!__CFCSetStringBuffer(theSet
))
2570 __CFCSetPutStringBuffer(theSet
, (UniChar
*)CFAllocatorAllocate(CFGetAllocator(theSet
), __kCFStringCharSetMax
* sizeof(UniChar
), 0));
2571 memmove(__CFCSetStringBuffer(theSet
), __CFCSetStringBuffer(theOtherSet
), __CFCSetStringLength(theSet
) * sizeof(UniChar
));
2574 case __kCFCharSetClassBitmap
:
2575 __CFCSetPutBitmapBits(theSet
, (uint8_t *)CFAllocatorAllocate(CFGetAllocator(theSet
), sizeof(uint8_t) * __kCFBitmapSize
, 0));
2576 memmove(__CFCSetBitmapBits(theSet
), __CFCSetBitmapBits(theOtherSet
), __kCFBitmapSize
);
2579 case __kCFCharSetClassCompactBitmap
: {
2580 const uint8_t *cBitmap
= __CFCSetCompactBitmapBits(theOtherSet
);
2582 uint32_t size
= __CFCSetGetCompactBitmapSize(cBitmap
);
2583 newBitmap
= (uint8_t *)CFAllocatorAllocate(CFGetAllocator(theSet
), sizeof(uint8_t) * size
, 0);
2584 __CFCSetPutBitmapBits(theSet
, newBitmap
);
2585 memmove(newBitmap
, cBitmap
, size
);
2590 CFAssert1(0, __kCFLogAssertion
, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__
); // We should never come here
2593 __CFCSetMakeBitmap(theSet
);
2594 if (__CFCSetIsBitmap(theOtherSet
)) {
2595 UInt32
*bitmap1
= (UInt32
*)__CFCSetBitmapBits(theSet
);
2596 UInt32
*bitmap2
= (UInt32
*)__CFCSetBitmapBits(theOtherSet
);
2597 CFIndex length
= __kCFBitmapSize
/ sizeof(UInt32
);
2598 while (length
--) *bitmap1
++ &= *bitmap2
++;
2600 UInt32
*bitmap1
= (UInt32
*)__CFCSetBitmapBits(theSet
);
2602 CFIndex length
= __kCFBitmapSize
/ sizeof(UInt32
);
2603 uint8_t bitmapBuffer
[__kCFBitmapSize
];
2604 __CFCSetGetBitmap(theOtherSet
, bitmapBuffer
);
2605 bitmap2
= (UInt32
*)bitmapBuffer
;
2606 while (length
--) *bitmap1
++ &= *bitmap2
++;
2608 __CFCSetPutHasHashValue(theSet
, false);
2609 if (__CFCSetHasNonBMPPlane(theOtherSet
)) {
2610 CFMutableCharacterSetRef annexPlane
;
2611 CFMutableCharacterSetRef otherSetPlane
;
2612 CFMutableCharacterSetRef emptySet
= CFCharacterSetCreateMutable(NULL
);
2614 for (idx
= 1;idx
<= MAX_ANNEX_PLANE
;idx
++) {
2615 if ((otherSetPlane
= (CFMutableCharacterSetRef
)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theOtherSet
, idx
))) {
2616 if (__CFCSetAnnexIsInverted(theOtherSet
)) CFCharacterSetInvert(otherSetPlane
);
2617 annexPlane
= (CFMutableCharacterSetRef
)__CFCSetGetAnnexPlaneCharacterSet(theSet
, idx
);
2618 if (__CFCSetAnnexIsInverted(theSet
)) CFCharacterSetInvert(annexPlane
);
2619 CFCharacterSetIntersect(annexPlane
, otherSetPlane
);
2620 if (__CFCSetAnnexIsInverted(theSet
)) CFCharacterSetInvert(annexPlane
);
2621 if (__CFCSetAnnexIsInverted(theOtherSet
)) CFCharacterSetInvert(otherSetPlane
);
2622 if (__CFCSetIsEmpty(annexPlane
) && !__CFCSetIsInverted(annexPlane
)) __CFCSetPutCharacterSetToAnnexPlane(theSet
, NULL
, idx
);
2623 } else if ((annexPlane
= (CFMutableCharacterSetRef
) __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet
, idx
))) {
2624 if (__CFCSetAnnexIsInverted(theSet
)) { // if the annexPlane is inverted, we need to set the plane to empty
2625 CFCharacterSetInvert(annexPlane
);
2626 CFCharacterSetIntersect(annexPlane
, emptySet
);
2627 CFCharacterSetInvert(annexPlane
);
2628 } else { // the annexPlane is not inverted, we can clear the plane
2629 __CFCSetPutCharacterSetToAnnexPlane(theSet
, NULL
, idx
);
2631 } else if ((__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet
, idx
) == NULL
) && __CFCSetAnnexIsInverted(theSet
)) {
2632 // the set has no such annex plane and the annex plane is inverted, it means the set contains everything in the annex plane
2633 annexPlane
= (CFMutableCharacterSetRef
)__CFCSetGetAnnexPlaneCharacterSet(theSet
, idx
);
2634 if (__CFCSetAnnexIsInverted(theSet
)) CFCharacterSetInvert(annexPlane
);
2635 CFCharacterSetIntersect(annexPlane
, emptySet
);
2636 if (__CFCSetAnnexIsInverted(theSet
)) CFCharacterSetInvert(annexPlane
);
2639 CFRelease(emptySet
);
2640 if (!__CFCSetHasNonBMPPlane(theSet
)) __CFCSetDeallocateAnnexPlane(theSet
);
2641 } else if (__CFCSetIsBuiltin(theOtherSet
) && !__CFCSetAnnexIsInverted(theOtherSet
)) {
2642 CFMutableCharacterSetRef annexPlane
;
2643 uint8_t bitmapBuffer
[__kCFBitmapSize
];
2650 for (planeIndex
= 1;planeIndex
<= MAX_ANNEX_PLANE
;planeIndex
++) {
2651 annexPlane
= (CFMutableCharacterSetRef
)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet
, planeIndex
);
2653 result
= CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(theOtherSet
), planeIndex
, bitmapBuffer
, false);
2654 if (result
== kCFUniCharBitmapEmpty
) {
2655 __CFCSetPutCharacterSetToAnnexPlane(theSet
, NULL
, planeIndex
);
2656 } else if (result
== kCFUniCharBitmapFilled
) {
2657 Boolean isEmpty
= true;
2659 __CFCSetMakeBitmap(annexPlane
);
2660 bitmap1
= (UInt32
*)__CFCSetBitmapBits(annexPlane
);
2661 length
= __kCFBitmapSize
/ sizeof(UInt32
);
2662 bitmap2
= (UInt32
*)bitmapBuffer
;
2665 if ((*bitmap1
++ &= *bitmap2
++)) isEmpty
= false;
2667 if (isEmpty
) __CFCSetPutCharacterSetToAnnexPlane(theSet
, NULL
, planeIndex
);
2671 if (!__CFCSetHasNonBMPPlane(theSet
)) __CFCSetDeallocateAnnexPlane(theSet
);
2672 } else if (__CFCSetIsRange(theOtherSet
)) {
2673 CFMutableCharacterSetRef tempOtherSet
= CFCharacterSetCreateMutable(CFGetAllocator(theSet
));
2674 CFMutableCharacterSetRef annexPlane
;
2675 CFMutableCharacterSetRef otherSetPlane
;
2676 CFMutableCharacterSetRef emptySet
= CFCharacterSetCreateMutable(NULL
);
2679 __CFCSetAddNonBMPPlanesInRange(tempOtherSet
, CFRangeMake(__CFCSetRangeFirstChar(theOtherSet
), __CFCSetRangeLength(theOtherSet
)));
2681 for (idx
= 1;idx
<= MAX_ANNEX_PLANE
;idx
++) {
2682 if ((otherSetPlane
= (CFMutableCharacterSetRef
)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(tempOtherSet
, idx
))) {
2683 annexPlane
= (CFMutableCharacterSetRef
)__CFCSetGetAnnexPlaneCharacterSet(theSet
, idx
);
2684 if (__CFCSetAnnexIsInverted(tempOtherSet
)) CFCharacterSetInvert(otherSetPlane
);
2685 if (__CFCSetAnnexIsInverted(theSet
)) CFCharacterSetInvert(annexPlane
);
2686 CFCharacterSetIntersect(annexPlane
, otherSetPlane
);
2687 if (__CFCSetAnnexIsInverted(theSet
)) CFCharacterSetInvert(annexPlane
);
2688 if (__CFCSetAnnexIsInverted(tempOtherSet
)) CFCharacterSetInvert(otherSetPlane
);
2689 if (__CFCSetIsEmpty(annexPlane
) && !__CFCSetIsInverted(annexPlane
)) __CFCSetPutCharacterSetToAnnexPlane(theSet
, NULL
, idx
);
2690 } else if ((annexPlane
= (CFMutableCharacterSetRef
) __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet
, idx
))) {
2691 if (__CFCSetAnnexIsInverted(theSet
)) {
2692 CFCharacterSetInvert(annexPlane
);
2693 CFCharacterSetIntersect(annexPlane
, emptySet
);
2694 CFCharacterSetInvert(annexPlane
);
2696 __CFCSetPutCharacterSetToAnnexPlane(theSet
, NULL
, idx
);
2698 } else if ((__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet
, idx
) == NULL
) && __CFCSetAnnexIsInverted(theSet
)) {
2699 // the set has no such annex plane and the annex plane is inverted, it means the set contains everything in the annex plane
2700 annexPlane
= (CFMutableCharacterSetRef
)__CFCSetGetAnnexPlaneCharacterSet(theSet
, idx
);
2701 if (__CFCSetAnnexIsInverted(theSet
)) CFCharacterSetInvert(annexPlane
);
2702 CFCharacterSetIntersect(annexPlane
, emptySet
);
2703 if (__CFCSetAnnexIsInverted(theSet
)) CFCharacterSetInvert(annexPlane
);
2706 if (!__CFCSetHasNonBMPPlane(theSet
)) __CFCSetDeallocateAnnexPlane(theSet
);
2707 CFRelease(tempOtherSet
);
2708 CFRelease(emptySet
);
2709 } else if ((__CFCSetHasNonBMPPlane(theSet
) || __CFCSetAnnexIsInverted(theSet
)) && !__CFCSetAnnexIsInverted(theOtherSet
)) {
2710 __CFCSetDeallocateAnnexPlane(theSet
);
2713 if (__CFCheckForExapendedSet
) __CFCheckForExpandedSet(theSet
);
2714 } else { // It's NSCharacterSet
2715 CFDataRef bitmapRep
= CFCharacterSetCreateBitmapRepresentation(kCFAllocatorSystemDefault
, theOtherSet
);
2716 const UInt32
*bitmap2
= (bitmapRep
&& CFDataGetLength(bitmapRep
) ? (const UInt32
*)CFDataGetBytePtr(bitmapRep
) : NULL
);
2719 CFIndex length
= __kCFBitmapSize
/ sizeof(UInt32
);
2720 __CFCSetMakeBitmap(theSet
);
2721 bitmap1
= (UInt32
*)__CFCSetBitmapBits(theSet
);
2722 while (length
--) *bitmap1
++ &= *bitmap2
++;
2723 __CFCSetPutHasHashValue(theSet
, false);
2725 CFRelease(bitmapRep
);
2729 void CFCharacterSetInvert(CFMutableCharacterSetRef theSet
) {
2731 CF_OBJC_FUNCDISPATCHV(__kCFCharacterSetTypeID
, void, (NSMutableCharacterSet
*)theSet
, invert
);
2733 __CFCSetValidateTypeAndMutability(theSet
, __PRETTY_FUNCTION__
);
2735 if (__CFCSetIsBuiltin((CFCharacterSetRef
)theSet
) && !__CFCSetIsMutable((CFCharacterSetRef
)theSet
) && !__CFCSetIsInverted((CFCharacterSetRef
)theSet
)) {
2736 CFCharacterSetRef sharedSet
= CFCharacterSetGetPredefined(__CFCSetBuiltinType((CFCharacterSetRef
)theSet
));
2737 if (sharedSet
== theSet
) { // We're trying to dealloc the builtin set
2738 CFAssert1(0, __kCFLogAssertion
, "%s: Trying to mutable predefined set.", __PRETTY_FUNCTION__
);
2739 return; // We don't mutate builtin set
2743 __CFCSetPutHasHashValue(theSet
, false);
2745 if (__CFCSetClassType(theSet
) == __kCFCharSetClassBitmap
) {
2747 CFIndex count
= __kCFBitmapSize
/ sizeof(UInt32
);
2748 UInt32
*bitmap
= (UInt32
*) __CFCSetBitmapBits(theSet
);
2750 if (NULL
== bitmap
) {
2751 bitmap
= (UInt32
*)CFAllocatorAllocate(CFGetAllocator(theSet
), __kCFBitmapSize
, 0);
2752 __CFCSetPutBitmapBits(theSet
, (uint8_t *)bitmap
);
2753 for (idx
= 0;idx
< count
;idx
++) bitmap
[idx
] = ((UInt32
)0xFFFFFFFF);
2755 for (idx
= 0;idx
< count
;idx
++) bitmap
[idx
] = ~(bitmap
[idx
]);
2757 __CFCSetAllocateAnnexForPlane(theSet
, 0); // We need to alloc annex to invert
2758 } else if (__CFCSetClassType(theSet
) == __kCFCharSetClassCompactBitmap
) {
2759 uint8_t *bitmap
= __CFCSetCompactBitmapBits(theSet
);
2764 for (idx
= 0;idx
< __kCFCompactBitmapNumPages
;idx
++) {
2765 value
= bitmap
[idx
];
2768 bitmap
[idx
] = UINT8_MAX
;
2769 } else if (value
== UINT8_MAX
) {
2772 length
+= __kCFCompactBitmapPageSize
;
2775 bitmap
+= __kCFCompactBitmapNumPages
;
2776 for (idx
= 0;idx
< length
;idx
++) bitmap
[idx
] = ~(bitmap
[idx
]);
2777 __CFCSetAllocateAnnexForPlane(theSet
, 0); // We need to alloc annex to invert
2779 __CFCSetPutIsInverted(theSet
, !__CFCSetIsInverted(theSet
));
2781 __CFCSetAnnexSetIsInverted(theSet
, !__CFCSetAnnexIsInverted(theSet
));
2784 void CFCharacterSetCompact(CFMutableCharacterSetRef theSet
) {
2785 if (__CFCSetIsBitmap(theSet
) && __CFCSetBitmapBits(theSet
)) __CFCSetMakeCompact(theSet
);
2786 if (__CFCSetHasNonBMPPlane(theSet
)) {
2787 CFMutableCharacterSetRef annex
;
2790 for (idx
= 1;idx
<= MAX_ANNEX_PLANE
;idx
++) {
2791 if ((annex
= (CFMutableCharacterSetRef
)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet
, idx
)) && __CFCSetIsBitmap(annex
) && __CFCSetBitmapBits(annex
)) {
2792 __CFCSetMakeCompact(annex
);
2798 void CFCharacterSetFast(CFMutableCharacterSetRef theSet
) {
2799 if (__CFCSetIsCompactBitmap(theSet
) && __CFCSetCompactBitmapBits(theSet
)) __CFCSetMakeBitmap(theSet
);
2800 if (__CFCSetHasNonBMPPlane(theSet
)) {
2801 CFMutableCharacterSetRef annex
;
2804 for (idx
= 1;idx
<= MAX_ANNEX_PLANE
;idx
++) {
2805 if ((annex
= (CFMutableCharacterSetRef
)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet
, idx
)) && __CFCSetIsCompactBitmap(annex
) && __CFCSetCompactBitmapBits(annex
)) {
2806 __CFCSetMakeBitmap(annex
);
2812 /* Keyed-coding support
2814 CFCharacterSetKeyedCodingType
_CFCharacterSetGetKeyedCodingType(CFCharacterSetRef cset
) {
2815 if (CF_IS_OBJC(__kCFCharacterSetTypeID
, cset
)) return kCFCharacterSetKeyedCodingTypeBitmap
;
2817 switch (__CFCSetClassType(cset
)) {
2818 case __kCFCharSetClassBuiltin
: return ((__CFCSetBuiltinType(cset
) < kCFCharacterSetSymbol
) ? kCFCharacterSetKeyedCodingTypeBuiltin
: kCFCharacterSetKeyedCodingTypeBuiltinAndBitmap
);
2819 case __kCFCharSetClassRange
: return kCFCharacterSetKeyedCodingTypeRange
;
2821 case __kCFCharSetClassString
: // We have to check if we have non-BMP here
2822 if (!__CFCSetHasNonBMPPlane(cset
) && !__CFCSetAnnexIsInverted(cset
)) return kCFCharacterSetKeyedCodingTypeString
; // BMP only. we can archive the string
2826 return kCFCharacterSetKeyedCodingTypeBitmap
;
2830 CFCharacterSetPredefinedSet
_CFCharacterSetGetKeyedCodingBuiltinType(CFCharacterSetRef cset
) { return __CFCSetBuiltinType(cset
); }
2831 CFRange
_CFCharacterSetGetKeyedCodingRange(CFCharacterSetRef cset
) { return CFRangeMake(__CFCSetRangeFirstChar(cset
), __CFCSetRangeLength(cset
)); }
2832 CFStringRef
_CFCharacterSetCreateKeyedCodingString(CFCharacterSetRef cset
) { return CFStringCreateWithCharacters(kCFAllocatorSystemDefault
, __CFCSetStringBuffer(cset
), __CFCSetStringLength(cset
)); }
2834 bool _CFCharacterSetIsInverted(CFCharacterSetRef cset
) { return (__CFCSetIsInverted(cset
) != 0); }
2835 void _CFCharacterSetSetIsInverted(CFCharacterSetRef cset
, bool flag
) { __CFCSetPutIsInverted((CFMutableCharacterSetRef
)cset
, flag
); }
2837 /* Inline buffer support
2839 void CFCharacterSetInitInlineBuffer(CFCharacterSetRef cset
, CFCharacterSetInlineBuffer
*buffer
) {
2840 memset(buffer
, 0, sizeof(CFCharacterSetInlineBuffer
));
2841 buffer
->cset
= cset
;
2842 buffer
->rangeLimit
= 0x10000;
2844 if (CF_IS_OBJC(__kCFCharacterSetTypeID
, cset
)) {
2845 CFCharacterSetRef expandedSet
= __CFCharacterSetGetExpandedSetForNSCharacterSet(cset
);
2847 if (NULL
== expandedSet
) {
2848 buffer
->flags
= kCFCharacterSetNoBitmapAvailable
;
2849 buffer
->rangeLimit
= 0x110000;
2857 switch (__CFCSetClassType(cset
)) {
2858 case __kCFCharSetClassBuiltin
:
2859 buffer
->bitmap
= CFUniCharGetBitmapPtrForPlane(__CFCSetBuiltinType(cset
), 0);
2860 buffer
->rangeLimit
= 0x110000;
2861 if (NULL
== buffer
->bitmap
) {
2862 buffer
->flags
= kCFCharacterSetNoBitmapAvailable
;
2864 if (__CFCSetIsInverted(cset
)) buffer
->flags
= kCFCharacterSetIsInverted
;
2868 case __kCFCharSetClassRange
:
2869 buffer
->rangeStart
= __CFCSetRangeFirstChar(cset
);
2870 buffer
->rangeLimit
= __CFCSetRangeFirstChar(cset
) + __CFCSetRangeLength(cset
);
2871 if (__CFCSetIsInverted(cset
)) buffer
->flags
= kCFCharacterSetIsInverted
;
2874 case __kCFCharSetClassString
:
2875 buffer
->flags
= kCFCharacterSetNoBitmapAvailable
;
2876 if (__CFCSetStringLength(cset
) > 0) {
2877 buffer
->rangeStart
= *__CFCSetStringBuffer(cset
);
2878 buffer
->rangeLimit
= *(__CFCSetStringBuffer(cset
) + __CFCSetStringLength(cset
) - 1) + 1;
2880 if (__CFCSetIsInverted(cset
)) {
2881 if (0 == buffer
->rangeStart
) {
2882 buffer
->rangeStart
= buffer
->rangeLimit
;
2883 buffer
->rangeLimit
= 0x10000;
2884 } else if (0x10000 == buffer
->rangeLimit
) {
2885 buffer
->rangeLimit
= buffer
->rangeStart
;
2886 buffer
->rangeStart
= 0;
2888 buffer
->rangeStart
= 0;
2889 buffer
->rangeLimit
= 0x10000;
2895 case __kCFCharSetClassBitmap
:
2896 case __kCFCharSetClassCompactBitmap
:
2897 buffer
->bitmap
= __CFCSetCompactBitmapBits(cset
);
2898 if (NULL
== buffer
->bitmap
) {
2899 buffer
->flags
= kCFCharacterSetIsCompactBitmap
;
2900 if (__CFCSetIsInverted(cset
)) buffer
->flags
|= kCFCharacterSetIsInverted
;
2902 if (__kCFCharSetClassCompactBitmap
== __CFCSetClassType(cset
)) buffer
->flags
= kCFCharacterSetIsCompactBitmap
;
2907 CFAssert1(0, __kCFLogAssertion
, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__
); // We should never come here
2911 if (__CFCSetAnnexIsInverted(cset
)) {
2912 buffer
->rangeLimit
= 0x110000;
2913 } else if (__CFCSetHasNonBMPPlane(cset
)) {
2916 for (index
= MAX_ANNEX_PLANE
;index
> 0;index
--) {
2917 if (NULL
!= __CFCSetGetAnnexPlaneCharacterSetNoAlloc(cset
, index
)) {
2918 buffer
->rangeLimit
= (index
+ 1) << 16;