2 * Copyright (c) 2011 Apple Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
25 Copyright (c) 1999-2011, Apple Inc. All rights reserved.
26 Responsibility: Aki Inoue
29 #include <CoreFoundation/CFCharacterSet.h>
30 #include <CoreFoundation/CFByteOrder.h>
31 #include "CFCharacterSetPriv.h"
32 #include <CoreFoundation/CFData.h>
33 #include <CoreFoundation/CFString.h>
34 #include "CFInternal.h"
35 #include <CoreFoundation/CFUniChar.h>
36 #include "CFUniCharPriv.h"
41 #define BITSPERBYTE 8 /* (CHAR_BIT * sizeof(unsigned char)) */
44 #define NUMCHARACTERS 65536
46 #define MAX_ANNEX_PLANE (16)
48 /* Number of things in the array keeping the bits.
50 #define __kCFBitmapSize (NUMCHARACTERS / BITSPERBYTE)
52 /* How many elements max can be in an __kCFCharSetClassString CFCharacterSet
54 #define __kCFStringCharSetMax 64
56 /* The last builtin set ID number
58 #define __kCFLastBuiltinSetID kCFCharacterSetNewline
60 /* How many elements in the "singles" array before we use binary search.
62 #define __kCFSetBreakeven 10
64 /* This tells us, within 1k or so, whether a thing is POTENTIALLY in the set (in the bitmap blob of the private structure) before we bother to do specific checking.
66 #define __CFCSetBitsInRange(n, i) (i[n>>15] & (1L << ((n>>10) % 32)))
68 /* Compact bitmap params
70 #define __kCFCompactBitmapNumPages (256)
72 #define __kCFCompactBitmapMaxPages (128) // the max pages allocated
74 #define __kCFCompactBitmapPageSize (__kCFBitmapSize / __kCFCompactBitmapNumPages)
77 CFCharacterSetRef
*_nonBMPPlanes
;
78 unsigned int _validEntriesBitmap
;
79 unsigned char _numOfAllocEntries
;
80 unsigned char _isAnnexInverted
;
82 } CFCharSetAnnexStruct
;
84 struct __CFCharacterSet
{
86 CFHashCode _hashValue
;
106 CFCharSetAnnexStruct
*_annex
;
109 /* _base._info values interesting for CFCharacterSet
112 __kCFCharSetClassTypeMask
= 0x0070,
113 __kCFCharSetClassBuiltin
= 0x0000,
114 __kCFCharSetClassRange
= 0x0010,
115 __kCFCharSetClassString
= 0x0020,
116 __kCFCharSetClassBitmap
= 0x0030,
117 __kCFCharSetClassSet
= 0x0040,
118 __kCFCharSetClassCompactBitmap
= 0x0040,
120 __kCFCharSetIsInvertedMask
= 0x0008,
121 __kCFCharSetIsInverted
= 0x0008,
123 __kCFCharSetHasHashValueMask
= 0x00004,
124 __kCFCharSetHasHashValue
= 0x0004,
126 /* Generic CFBase values */
127 __kCFCharSetIsMutableMask
= 0x0001,
128 __kCFCharSetIsMutable
= 0x0001,
131 /* Inline accessor macros for _base._info
133 CF_INLINE Boolean
__CFCSetIsMutable(CFCharacterSetRef cset
) {return (cset
->_base
._cfinfo
[CF_INFO_BITS
] & __kCFCharSetIsMutableMask
) == __kCFCharSetIsMutable
;}
134 CF_INLINE Boolean
__CFCSetIsBuiltin(CFCharacterSetRef cset
) {return (cset
->_base
._cfinfo
[CF_INFO_BITS
] & __kCFCharSetClassTypeMask
) == __kCFCharSetClassBuiltin
;}
135 CF_INLINE Boolean
__CFCSetIsRange(CFCharacterSetRef cset
) {return (cset
->_base
._cfinfo
[CF_INFO_BITS
] & __kCFCharSetClassTypeMask
) == __kCFCharSetClassRange
;}
136 CF_INLINE Boolean
__CFCSetIsString(CFCharacterSetRef cset
) {return (cset
->_base
._cfinfo
[CF_INFO_BITS
] & __kCFCharSetClassTypeMask
) == __kCFCharSetClassString
;}
137 CF_INLINE Boolean
__CFCSetIsBitmap(CFCharacterSetRef cset
) {return (cset
->_base
._cfinfo
[CF_INFO_BITS
] & __kCFCharSetClassTypeMask
) == __kCFCharSetClassBitmap
;}
138 CF_INLINE Boolean
__CFCSetIsCompactBitmap(CFCharacterSetRef cset
) {return (cset
->_base
._cfinfo
[CF_INFO_BITS
] & __kCFCharSetClassTypeMask
) == __kCFCharSetClassCompactBitmap
;}
139 CF_INLINE Boolean
__CFCSetIsInverted(CFCharacterSetRef cset
) {return (cset
->_base
._cfinfo
[CF_INFO_BITS
] & __kCFCharSetIsInvertedMask
) == __kCFCharSetIsInverted
;}
140 CF_INLINE Boolean
__CFCSetHasHashValue(CFCharacterSetRef cset
) {return (cset
->_base
._cfinfo
[CF_INFO_BITS
] & __kCFCharSetHasHashValueMask
) == __kCFCharSetHasHashValue
;}
141 CF_INLINE UInt32
__CFCSetClassType(CFCharacterSetRef cset
) {return (cset
->_base
._cfinfo
[CF_INFO_BITS
] & __kCFCharSetClassTypeMask
);}
143 CF_INLINE
void __CFCSetPutIsMutable(CFMutableCharacterSetRef cset
, Boolean isMutable
) {(isMutable
? (cset
->_base
._cfinfo
[CF_INFO_BITS
] |= __kCFCharSetIsMutable
) : (cset
->_base
._cfinfo
[CF_INFO_BITS
] &= ~ __kCFCharSetIsMutable
));}
144 CF_INLINE
void __CFCSetPutIsInverted(CFMutableCharacterSetRef cset
, Boolean isInverted
) {(isInverted
? (cset
->_base
._cfinfo
[CF_INFO_BITS
] |= __kCFCharSetIsInverted
) : (cset
->_base
._cfinfo
[CF_INFO_BITS
] &= ~__kCFCharSetIsInverted
));}
145 CF_INLINE
void __CFCSetPutHasHashValue(CFMutableCharacterSetRef cset
, Boolean hasHash
) {(hasHash
? (cset
->_base
._cfinfo
[CF_INFO_BITS
] |= __kCFCharSetHasHashValue
) : (cset
->_base
._cfinfo
[CF_INFO_BITS
] &= ~__kCFCharSetHasHashValue
));}
146 CF_INLINE
void __CFCSetPutClassType(CFMutableCharacterSetRef cset
, UInt32 classType
) {cset
->_base
._cfinfo
[CF_INFO_BITS
] &= ~__kCFCharSetClassTypeMask
; cset
->_base
._cfinfo
[CF_INFO_BITS
] |= classType
;}
148 __private_extern__ Boolean
__CFCharacterSetIsMutable(CFCharacterSetRef cset
) {return __CFCSetIsMutable(cset
);}
150 /* Inline contents accessor macros
152 CF_INLINE CFCharacterSetPredefinedSet
__CFCSetBuiltinType(CFCharacterSetRef cset
) {return cset
->_variants
._builtin
._type
;}
153 CF_INLINE UInt32
__CFCSetRangeFirstChar(CFCharacterSetRef cset
) {return cset
->_variants
._range
._firstChar
;}
154 CF_INLINE CFIndex
__CFCSetRangeLength(CFCharacterSetRef cset
) {return cset
->_variants
._range
._length
;}
155 CF_INLINE UniChar
*__CFCSetStringBuffer(CFCharacterSetRef cset
) {return (UniChar
*)(cset
->_variants
._string
._buffer
);}
156 CF_INLINE CFIndex
__CFCSetStringLength(CFCharacterSetRef cset
) {return cset
->_variants
._string
._length
;}
157 CF_INLINE
uint8_t *__CFCSetBitmapBits(CFCharacterSetRef cset
) {return cset
->_variants
._bitmap
._bits
;}
158 CF_INLINE
uint8_t *__CFCSetCompactBitmapBits(CFCharacterSetRef cset
) {return cset
->_variants
._compactBitmap
._cBits
;}
160 CF_INLINE
void __CFCSetPutBuiltinType(CFMutableCharacterSetRef cset
, CFCharacterSetPredefinedSet type
) {cset
->_variants
._builtin
._type
= type
;}
161 CF_INLINE
void __CFCSetPutRangeFirstChar(CFMutableCharacterSetRef cset
, UInt32 first
) {cset
->_variants
._range
._firstChar
= first
;}
162 CF_INLINE
void __CFCSetPutRangeLength(CFMutableCharacterSetRef cset
, CFIndex length
) {cset
->_variants
._range
._length
= length
;}
163 CF_INLINE
void __CFCSetPutStringBuffer(CFMutableCharacterSetRef cset
, UniChar
*theBuffer
) {cset
->_variants
._string
._buffer
= theBuffer
;}
164 CF_INLINE
void __CFCSetPutStringLength(CFMutableCharacterSetRef cset
, CFIndex length
) {cset
->_variants
._string
._length
= length
;}
165 CF_INLINE
void __CFCSetPutBitmapBits(CFMutableCharacterSetRef cset
, uint8_t *bits
) {cset
->_variants
._bitmap
._bits
= bits
;}
166 CF_INLINE
void __CFCSetPutCompactBitmapBits(CFMutableCharacterSetRef cset
, uint8_t *bits
) {cset
->_variants
._compactBitmap
._cBits
= bits
;}
170 #if defined(CF_ENABLE_ASSERTIONS)
171 CF_INLINE
void __CFCSetValidateBuiltinType(CFCharacterSetPredefinedSet type
, const char *func
) {
172 CFAssert2(type
> 0 && type
<= __kCFLastBuiltinSetID
, __kCFLogAssertion
, "%s: Unknowen builtin type %d", func
, type
);
174 CF_INLINE
void __CFCSetValidateRange(CFRange theRange
, const char *func
) {
175 CFAssert3(theRange
.location
>= 0 && theRange
.location
+ theRange
.length
<= 0x1FFFFF, __kCFLogAssertion
, "%s: Range out of Unicode range (location -> %d length -> %d)", func
, theRange
.location
, theRange
.length
);
177 CF_INLINE
void __CFCSetValidateTypeAndMutability(CFCharacterSetRef cset
, const char *func
) {
178 __CFGenericValidateType(cset
, __kCFCharacterSetTypeID
);
179 CFAssert1(__CFCSetIsMutable(cset
), __kCFLogAssertion
, "%s: Immutable character set passed to mutable function", func
);
182 #define __CFCSetValidateBuiltinType(t,f)
183 #define __CFCSetValidateRange(r,f)
184 #define __CFCSetValidateTypeAndMutability(r,f)
187 /* Inline utility funcs
189 static Boolean
__CFCSetIsEqualBitmap(const UInt32
*bits1
, const UInt32
*bits2
) {
190 CFIndex length
= __kCFBitmapSize
/ sizeof(UInt32
);
192 if (bits1
== bits2
) {
194 } else if (bits1
&& bits2
) {
195 if (bits1
== (const UInt32
*)-1) {
196 while (length
--) if ((UInt32
)-1 != *bits2
++) return false;
197 } else if (bits2
== (const UInt32
*)-1) {
198 while (length
--) if ((UInt32
)-1 != *bits1
++) return false;
200 while (length
--) if (*bits1
++ != *bits2
++) return false;
203 } else if (!bits1
&& !bits2
) { // empty set
206 if (bits2
) bits1
= bits2
;
207 if (bits1
== (const UInt32
*)-1) return false;
208 while (length
--) if (*bits1
++) return false;
213 CF_INLINE Boolean
__CFCSetIsEqualBitmapInverted(const UInt32
*bits1
, const UInt32
*bits2
) {
214 CFIndex length
= __kCFBitmapSize
/ sizeof(UInt32
);
216 while (length
--) if (*bits1
++ != ~(*(bits2
++))) return false;
220 static Boolean
__CFCSetIsBitmapEqualToRange(const UInt32
*bits
, UniChar firstChar
, UniChar lastChar
, Boolean isInverted
) {
221 CFIndex firstCharIndex
= firstChar
>> LOG_BPB
;
222 CFIndex lastCharIndex
= lastChar
>> LOG_BPB
;
226 if (firstCharIndex
== lastCharIndex
) {
227 value
= ((((UInt32
)0xFF) << (firstChar
& (BITSPERBYTE
- 1))) & (((UInt32
)0xFF) >> ((BITSPERBYTE
- 1) - (lastChar
& (BITSPERBYTE
- 1))))) << (((sizeof(UInt32
) - 1) - (firstCharIndex
% sizeof(UInt32
))) * BITSPERBYTE
);
228 value
= CFSwapInt32HostToBig(value
);
229 firstCharIndex
= lastCharIndex
= firstChar
>> LOG_BPLW
;
230 if (*(bits
+ firstCharIndex
) != (isInverted
? ~value
: value
)) return FALSE
;
232 UInt32 firstCharMask
;
235 length
= firstCharIndex
% sizeof(UInt32
);
236 firstCharMask
= (((((UInt32
)0xFF) << (firstChar
& (BITSPERBYTE
- 1))) & 0xFF) << (((sizeof(UInt32
) - 1) - length
) * BITSPERBYTE
)) | (((UInt32
)0xFFFFFFFF) >> ((length
+ 1) * BITSPERBYTE
));
238 length
= lastCharIndex
% sizeof(UInt32
);
239 lastCharMask
= ((((UInt32
)0xFF) >> ((BITSPERBYTE
- 1) - (lastChar
& (BITSPERBYTE
- 1)))) << (((sizeof(UInt32
) - 1) - length
) * BITSPERBYTE
)) | (((UInt32
)0xFFFFFFFF) << ((sizeof(UInt32
) - length
) * BITSPERBYTE
));
241 firstCharIndex
= firstChar
>> LOG_BPLW
;
242 lastCharIndex
= lastChar
>> LOG_BPLW
;
244 if (firstCharIndex
== lastCharIndex
) {
245 firstCharMask
&= lastCharMask
;
246 value
= CFSwapInt32HostToBig(firstCharMask
& lastCharMask
);
247 if (*(bits
+ firstCharIndex
) != (isInverted
? ~value
: value
)) return FALSE
;
249 value
= CFSwapInt32HostToBig(firstCharMask
);
250 if (*(bits
+ firstCharIndex
) != (isInverted
? ~value
: value
)) return FALSE
;
252 value
= CFSwapInt32HostToBig(lastCharMask
);
253 if (*(bits
+ lastCharIndex
) != (isInverted
? ~value
: value
)) return FALSE
;
257 length
= firstCharIndex
;
258 value
= (isInverted
? ((UInt32
)0xFFFFFFFF) : 0);
260 if (*(bits
++) != value
) return FALSE
;
263 ++bits
; // Skip firstCharIndex
264 length
= (lastCharIndex
- (firstCharIndex
+ 1));
265 value
= (isInverted
? 0 : ((UInt32
)0xFFFFFFFF));
266 while (length
-- > 0) {
267 if (*(bits
++) != value
) return FALSE
;
269 if (firstCharIndex
!= lastCharIndex
) ++bits
;
271 length
= (0xFFFF >> LOG_BPLW
) - lastCharIndex
;
272 value
= (isInverted
? ((UInt32
)0xFFFFFFFF) : 0);
274 if (*(bits
++) != value
) return FALSE
;
280 CF_INLINE Boolean
__CFCSetIsBitmapSupersetOfBitmap(const UInt32
*bits1
, const UInt32
*bits2
, Boolean isInverted1
, Boolean isInverted2
) {
281 CFIndex length
= __kCFBitmapSize
/ sizeof(UInt32
);
285 val2
= (isInverted2
? ~(*(bits2
++)) : *(bits2
++));
286 val1
= (isInverted1
? ~(*(bits1
++)) : *(bits1
++)) & val2
;
287 if (val1
!= val2
) return false;
293 CF_INLINE Boolean
__CFCSetHasNonBMPPlane(CFCharacterSetRef cset
) { return ((cset
)->_annex
&& (cset
)->_annex
->_validEntriesBitmap
? true : false); }
294 CF_INLINE Boolean
__CFCSetAnnexIsInverted (CFCharacterSetRef cset
) { return ((cset
)->_annex
&& (cset
)->_annex
->_isAnnexInverted
? true : false); }
295 CF_INLINE UInt32
__CFCSetAnnexValidEntriesBitmap(CFCharacterSetRef cset
) { return ((cset
)->_annex
? (cset
)->_annex
->_validEntriesBitmap
: 0); }
297 CF_INLINE Boolean
__CFCSetIsEmpty(CFCharacterSetRef cset
) {
298 if (__CFCSetHasNonBMPPlane(cset
) || __CFCSetAnnexIsInverted(cset
)) return false;
300 switch (__CFCSetClassType(cset
)) {
301 case __kCFCharSetClassRange
: if (!__CFCSetRangeLength(cset
)) return true; break;
302 case __kCFCharSetClassString
: if (!__CFCSetStringLength(cset
)) return true; break;
303 case __kCFCharSetClassBitmap
: if (!__CFCSetBitmapBits(cset
)) return true; break;
304 case __kCFCharSetClassCompactBitmap
: if (!__CFCSetCompactBitmapBits(cset
)) return true; break;
309 CF_INLINE
void __CFCSetBitmapAddCharacter(uint8_t *bitmap
, UniChar theChar
) {
310 bitmap
[(theChar
) >> LOG_BPB
] |= (((unsigned)1) << (theChar
& (BITSPERBYTE
- 1)));
313 CF_INLINE
void __CFCSetBitmapRemoveCharacter(uint8_t *bitmap
, UniChar theChar
) {
314 bitmap
[(theChar
) >> LOG_BPB
] &= ~(((unsigned)1) << (theChar
& (BITSPERBYTE
- 1)));
317 CF_INLINE Boolean
__CFCSetIsMemberBitmap(const uint8_t *bitmap
, UniChar theChar
) {
318 return ((bitmap
[(theChar
) >> LOG_BPB
] & (((unsigned)1) << (theChar
& (BITSPERBYTE
- 1)))) ? true : false);
321 #define NUM_32BIT_SLOTS (NUMCHARACTERS / 32)
323 CF_INLINE
void __CFCSetBitmapFastFillWithValue(UInt32
*bitmap
, uint8_t value
) {
324 UInt32 mask
= (value
<< 24) | (value
<< 16) | (value
<< 8) | value
;
325 UInt32 numSlots
= NUMCHARACTERS
/ 32;
327 while (numSlots
--) *(bitmap
++) = mask
;
330 CF_INLINE
void __CFCSetBitmapAddCharactersInRange(uint8_t *bitmap
, UniChar firstChar
, UniChar lastChar
) {
331 if (firstChar
== lastChar
) {
332 bitmap
[firstChar
>> LOG_BPB
] |= (((unsigned)1) << (firstChar
& (BITSPERBYTE
- 1)));
334 UInt32 idx
= firstChar
>> LOG_BPB
;
335 UInt32 max
= lastChar
>> LOG_BPB
;
338 bitmap
[idx
] |= (((unsigned)0xFF) << (firstChar
& (BITSPERBYTE
- 1))) & (((unsigned)0xFF) >> ((BITSPERBYTE
- 1) - (lastChar
& (BITSPERBYTE
- 1))));
340 bitmap
[idx
] |= (((unsigned)0xFF) << (firstChar
& (BITSPERBYTE
- 1)));
341 bitmap
[max
] |= (((unsigned)0xFF) >> ((BITSPERBYTE
- 1) - (lastChar
& (BITSPERBYTE
- 1))));
344 while (idx
< max
) bitmap
[idx
++] = 0xFF;
349 CF_INLINE
void __CFCSetBitmapRemoveCharactersInRange(uint8_t *bitmap
, UniChar firstChar
, UniChar lastChar
) {
350 UInt32 idx
= firstChar
>> LOG_BPB
;
351 UInt32 max
= lastChar
>> LOG_BPB
;
354 bitmap
[idx
] &= ~((((unsigned)0xFF) << (firstChar
& (BITSPERBYTE
- 1))) & (((unsigned)0xFF) >> ((BITSPERBYTE
- 1) - (lastChar
& (BITSPERBYTE
- 1)))));
356 bitmap
[idx
] &= ~(((unsigned)0xFF) << (firstChar
& (BITSPERBYTE
- 1)));
357 bitmap
[max
] &= ~(((unsigned)0xFF) >> ((BITSPERBYTE
- 1) - (lastChar
& (BITSPERBYTE
- 1))));
360 while (idx
< max
) bitmap
[idx
++] = 0;
364 #define __CFCSetAnnexBitmapSetPlane(bitmap,plane) ((bitmap) |= (1 << (plane)))
365 #define __CFCSetAnnexBitmapClearPlane(bitmap,plane) ((bitmap) &= (~(1 << (plane))))
366 #define __CFCSetAnnexBitmapGetPlane(bitmap,plane) ((bitmap) & (1 << (plane)))
368 CF_INLINE
void __CFCSetAllocateAnnexForPlane(CFCharacterSetRef cset
, int plane
) {
369 if (cset
->_annex
== NULL
) {
370 ((CFMutableCharacterSetRef
)cset
)->_annex
= (CFCharSetAnnexStruct
*)CFAllocatorAllocate(CFGetAllocator(cset
), sizeof(CFCharSetAnnexStruct
), 0);
371 cset
->_annex
->_numOfAllocEntries
= plane
;
372 cset
->_annex
->_isAnnexInverted
= false;
373 cset
->_annex
->_validEntriesBitmap
= 0;
374 cset
->_annex
->_nonBMPPlanes
= ((plane
> 0) ? (CFCharacterSetRef
*)CFAllocatorAllocate(CFGetAllocator(cset
), sizeof(CFCharacterSetRef
) * plane
, 0) : NULL
);
375 } else if (cset
->_annex
->_numOfAllocEntries
< plane
) {
376 cset
->_annex
->_numOfAllocEntries
= plane
;
377 if (NULL
== cset
->_annex
->_nonBMPPlanes
) {
378 cset
->_annex
->_nonBMPPlanes
= (CFCharacterSetRef
*)CFAllocatorAllocate(CFGetAllocator(cset
), sizeof(CFCharacterSetRef
) * plane
, 0);
380 cset
->_annex
->_nonBMPPlanes
= (CFCharacterSetRef
*)CFAllocatorReallocate(CFGetAllocator(cset
), (void *)cset
->_annex
->_nonBMPPlanes
, sizeof(CFCharacterSetRef
) * plane
, 0);
385 CF_INLINE
void __CFCSetAnnexSetIsInverted(CFCharacterSetRef cset
, Boolean flag
) {
386 if (flag
) __CFCSetAllocateAnnexForPlane(cset
, 0);
387 if (cset
->_annex
) ((CFMutableCharacterSetRef
)cset
)->_annex
->_isAnnexInverted
= flag
;
390 CF_INLINE
void __CFCSetPutCharacterSetToAnnexPlane(CFCharacterSetRef cset
, CFCharacterSetRef annexCSet
, int plane
) {
391 __CFCSetAllocateAnnexForPlane(cset
, plane
);
392 if (__CFCSetAnnexBitmapGetPlane(cset
->_annex
->_validEntriesBitmap
, plane
)) CFRelease(cset
->_annex
->_nonBMPPlanes
[plane
- 1]);
394 cset
->_annex
->_nonBMPPlanes
[plane
- 1] = (CFCharacterSetRef
)CFRetain(annexCSet
);
395 __CFCSetAnnexBitmapSetPlane(cset
->_annex
->_validEntriesBitmap
, plane
);
397 __CFCSetAnnexBitmapClearPlane(cset
->_annex
->_validEntriesBitmap
, plane
);
401 CF_INLINE CFCharacterSetRef
__CFCSetGetAnnexPlaneCharacterSet(CFCharacterSetRef cset
, int plane
) {
402 __CFCSetAllocateAnnexForPlane(cset
, plane
);
403 if (!__CFCSetAnnexBitmapGetPlane(cset
->_annex
->_validEntriesBitmap
, plane
)) {
404 cset
->_annex
->_nonBMPPlanes
[plane
- 1] = (CFCharacterSetRef
)CFCharacterSetCreateMutable(CFGetAllocator(cset
));
405 __CFCSetAnnexBitmapSetPlane(cset
->_annex
->_validEntriesBitmap
, plane
);
407 return cset
->_annex
->_nonBMPPlanes
[plane
- 1];
410 CF_INLINE CFCharacterSetRef
__CFCSetGetAnnexPlaneCharacterSetNoAlloc(CFCharacterSetRef cset
, int plane
) {
411 return (cset
->_annex
&& __CFCSetAnnexBitmapGetPlane(cset
->_annex
->_validEntriesBitmap
, plane
) ? cset
->_annex
->_nonBMPPlanes
[plane
- 1] : NULL
);
414 CF_INLINE
void __CFCSetDeallocateAnnexPlane(CFCharacterSetRef cset
) {
418 for (idx
= 0;idx
< MAX_ANNEX_PLANE
;idx
++) {
419 if (__CFCSetAnnexBitmapGetPlane(cset
->_annex
->_validEntriesBitmap
, idx
+ 1)) {
420 CFRelease(cset
->_annex
->_nonBMPPlanes
[idx
]);
423 CFAllocatorDeallocate(CFGetAllocator(cset
), cset
->_annex
->_nonBMPPlanes
);
424 CFAllocatorDeallocate(CFGetAllocator(cset
), cset
->_annex
);
425 ((CFMutableCharacterSetRef
)cset
)->_annex
= NULL
;
429 CF_INLINE
uint8_t __CFCSetGetHeaderValue(const uint8_t *bitmap
, int *numPages
) {
430 uint8_t value
= *bitmap
;
432 if ((value
== 0) || (value
== UINT8_MAX
)) {
433 int numBytes
= __kCFCompactBitmapPageSize
- 1;
435 while (numBytes
> 0) {
436 if (*(++bitmap
) != value
) break;
439 if (numBytes
== 0) return value
;
441 return (uint8_t)(++(*numPages
));
444 CF_INLINE
bool __CFCSetIsMemberInCompactBitmap(const uint8_t *compactBitmap
, UTF16Char character
) {
445 uint8_t value
= compactBitmap
[(character
>> 8)]; // Assuming __kCFCompactBitmapNumPages == 256
449 } else if (value
== UINT8_MAX
) {
452 compactBitmap
+= (__kCFCompactBitmapNumPages
+ (__kCFCompactBitmapPageSize
* (value
- 1)));
453 character
&= 0xFF; // Assuming __kCFCompactBitmapNumPages == 256
454 return ((compactBitmap
[(character
/ BITSPERBYTE
)] & (1 << (character
% BITSPERBYTE
))) ? true : false);
458 CF_INLINE
uint32_t __CFCSetGetCompactBitmapSize(const uint8_t *compactBitmap
) {
459 uint32_t length
= __kCFCompactBitmapNumPages
;
460 uint32_t size
= __kCFCompactBitmapNumPages
;
463 while (length
-- > 0) {
464 value
= *(compactBitmap
++);
465 if ((value
!= 0) && (value
!= UINT8_MAX
)) size
+= __kCFCompactBitmapPageSize
;
470 /* Take a private "set" structure and make a bitmap from it. Return the bitmap. THE CALLER MUST RELEASE THE RETURNED MEMORY as necessary.
473 CF_INLINE
void __CFCSetBitmapProcessManyCharacters(unsigned char *map
, unsigned n
, unsigned m
, Boolean isInverted
) {
475 __CFCSetBitmapRemoveCharactersInRange(map
, n
, m
);
477 __CFCSetBitmapAddCharactersInRange(map
, n
, m
);
481 CF_INLINE
void __CFExpandCompactBitmap(const uint8_t *src
, uint8_t *dst
) {
482 const uint8_t *srcBody
= src
+ __kCFCompactBitmapNumPages
;
486 for (i
= 0;i
< __kCFCompactBitmapNumPages
;i
++) {
488 if ((value
== 0) || (value
== UINT8_MAX
)) {
489 memset(dst
, value
, __kCFCompactBitmapPageSize
);
491 memmove(dst
, srcBody
, __kCFCompactBitmapPageSize
);
492 srcBody
+= __kCFCompactBitmapPageSize
;
494 dst
+= __kCFCompactBitmapPageSize
;
499 static void __CFCheckForExpandedSet(CFCharacterSetRef cset
) {
500 static int8_t __CFNumberOfPlanesForLogging
= -1;
501 static bool warnedOnce
= false;
503 if (0 > __CFNumberOfPlanesForLogging
) {
504 const char *envVar
= __CFgetenv("CFCharacterSetCheckForExpandedSet");
505 long value
= (envVar
? strtol_l(envVar
, NULL
, 0, NULL
) : 0);
506 __CFNumberOfPlanesForLogging
= (int8_t)(((value
> 0) && (value
<= 16)) ? value
: 0);
509 if (__CFNumberOfPlanesForLogging
) {
510 uint32_t entries
= __CFCSetAnnexValidEntriesBitmap(cset
);
514 if ((entries
& 1) && (++count
>= __CFNumberOfPlanesForLogging
)) {
516 CFLog(kCFLogLevelWarning
, CFSTR("An expanded CFMutableCharacter has been detected. Recommend to compact with CFCharacterSetCreateCopy"));
526 static void __CFCSetGetBitmap(CFCharacterSetRef cset
, uint8_t *bits
) {
528 CFIndex length
= __kCFBitmapSize
;
530 if (__CFCSetIsBitmap(cset
) && (bitmap
= __CFCSetBitmapBits(cset
))) {
531 memmove(bits
, bitmap
, __kCFBitmapSize
);
533 Boolean isInverted
= __CFCSetIsInverted(cset
);
534 uint8_t value
= (isInverted
? (uint8_t)-1 : 0);
537 while (length
--) *bitmap
++ = value
; // Initialize the buffer
539 if (!__CFCSetIsEmpty(cset
)) {
540 switch (__CFCSetClassType(cset
)) {
541 case __kCFCharSetClassBuiltin
: {
542 UInt8 result
= CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(cset
), 0, bits
, (isInverted
!= 0));
543 if (result
== kCFUniCharBitmapEmpty
&& isInverted
) {
544 length
= __kCFBitmapSize
;
546 while (length
--) *bitmap
++ = 0;
547 } else if (result
== kCFUniCharBitmapAll
&& !isInverted
) {
548 length
= __kCFBitmapSize
;
550 while (length
--) *bitmap
++ = (UInt8
)0xFF;
555 case __kCFCharSetClassRange
: {
556 UInt32 theChar
= __CFCSetRangeFirstChar(cset
);
557 if (theChar
< NUMCHARACTERS
) { // the range starts in BMP
558 length
= __CFCSetRangeLength(cset
);
559 if (theChar
+ length
>= NUMCHARACTERS
) length
= NUMCHARACTERS
- theChar
;
561 __CFCSetBitmapRemoveCharactersInRange(bits
, theChar
, (UniChar
)(theChar
+ length
) - 1);
563 __CFCSetBitmapAddCharactersInRange(bits
, theChar
, (UniChar
)(theChar
+ length
) - 1);
569 case __kCFCharSetClassString
: {
570 const UniChar
*buffer
= __CFCSetStringBuffer(cset
);
571 length
= __CFCSetStringLength(cset
);
572 while (length
--) (isInverted
? __CFCSetBitmapRemoveCharacter(bits
, *buffer
++) : __CFCSetBitmapAddCharacter(bits
, *buffer
++));
576 case __kCFCharSetClassCompactBitmap
:
577 __CFExpandCompactBitmap(__CFCSetCompactBitmapBits(cset
), bits
);
584 static Boolean
__CFCharacterSetEqual(CFTypeRef cf1
, CFTypeRef cf2
);
586 static Boolean
__CFCSetIsEqualAnnex(CFCharacterSetRef cf1
, CFCharacterSetRef cf2
) {
587 CFCharacterSetRef subSet1
;
588 CFCharacterSetRef subSet2
;
589 Boolean isAnnexInvertStateIdentical
= (__CFCSetAnnexIsInverted(cf1
) == __CFCSetAnnexIsInverted(cf2
) ? true: false);
592 if (isAnnexInvertStateIdentical
) {
593 if (__CFCSetAnnexValidEntriesBitmap(cf1
) != __CFCSetAnnexValidEntriesBitmap(cf2
)) return false;
594 for (idx
= 1;idx
<= MAX_ANNEX_PLANE
;idx
++) {
595 subSet1
= __CFCSetGetAnnexPlaneCharacterSetNoAlloc(cf1
, idx
);
596 subSet2
= __CFCSetGetAnnexPlaneCharacterSetNoAlloc(cf2
, idx
);
598 if (subSet1
&& !__CFCharacterSetEqual(subSet1
, subSet2
)) return false;
601 uint8_t bitsBuf
[__kCFBitmapSize
];
602 uint8_t bitsBuf2
[__kCFBitmapSize
];
604 for (idx
= 1;idx
<= MAX_ANNEX_PLANE
;idx
++) {
605 subSet1
= __CFCSetGetAnnexPlaneCharacterSetNoAlloc(cf1
, idx
);
606 subSet2
= __CFCSetGetAnnexPlaneCharacterSetNoAlloc(cf2
, idx
);
608 if (subSet1
== NULL
&& subSet2
== NULL
) {
610 } else if (subSet1
== NULL
) {
611 if (__CFCSetIsBitmap(subSet2
)) {
612 if (!__CFCSetIsEqualBitmap((const UInt32
*)__CFCSetBitmapBits(subSet2
), (const UInt32
*)-1)) {
616 __CFCSetGetBitmap(subSet2
, bitsBuf
);
617 if (!__CFCSetIsEqualBitmap((const UInt32
*)bitsBuf
, (const UInt32
*)-1)) {
621 } else if (subSet2
== NULL
) {
622 if (__CFCSetIsBitmap(subSet1
)) {
623 if (!__CFCSetIsEqualBitmap((const UInt32
*)__CFCSetBitmapBits(subSet1
), (const UInt32
*)-1)) {
627 __CFCSetGetBitmap(subSet1
, bitsBuf
);
628 if (!__CFCSetIsEqualBitmap((const UInt32
*)bitsBuf
, (const UInt32
*)-1)) {
633 Boolean isBitmap1
= __CFCSetIsBitmap(subSet1
);
634 Boolean isBitmap2
= __CFCSetIsBitmap(subSet2
);
636 if (isBitmap1
&& isBitmap2
) {
637 if (!__CFCSetIsEqualBitmapInverted((const UInt32
*)__CFCSetBitmapBits(subSet1
), (const UInt32
*)__CFCSetBitmapBits(subSet2
))) {
640 } else if (!isBitmap1
&& !isBitmap2
) {
641 __CFCSetGetBitmap(subSet1
, bitsBuf
);
642 __CFCSetGetBitmap(subSet2
, bitsBuf2
);
643 if (!__CFCSetIsEqualBitmapInverted((const UInt32
*)bitsBuf
, (const UInt32
*)bitsBuf2
)) {
648 CFCharacterSetRef tmp
= subSet2
;
652 __CFCSetGetBitmap(subSet2
, bitsBuf
);
653 if (!__CFCSetIsEqualBitmapInverted((const UInt32
*)__CFCSetBitmapBits(subSet1
), (const UInt32
*)bitsBuf
)) {
665 static uint8_t *__CFCreateCompactBitmap(CFAllocatorRef allocator
, const uint8_t *bitmap
) {
670 uint8_t header
[__kCFCompactBitmapNumPages
];
673 for (i
= 0;i
< __kCFCompactBitmapNumPages
;i
++) {
674 header
[i
] = __CFCSetGetHeaderValue(src
, &numPages
);
676 // Allocating more pages is probably not interesting enough to be compact
677 if (numPages
> __kCFCompactBitmapMaxPages
) return NULL
;
678 src
+= __kCFCompactBitmapPageSize
;
681 dst
= (uint8_t *)CFAllocatorAllocate(allocator
, __kCFCompactBitmapNumPages
+ (__kCFCompactBitmapPageSize
* numPages
), 0);
684 uint8_t *dstBody
= dst
+ __kCFCompactBitmapNumPages
;
687 for (i
= 0;i
< __kCFCompactBitmapNumPages
;i
++) {
690 if ((dst
[i
] != 0) && (dst
[i
] != UINT8_MAX
)) {
691 memmove(dstBody
, src
, __kCFCompactBitmapPageSize
);
692 dstBody
+= __kCFCompactBitmapPageSize
;
694 src
+= __kCFCompactBitmapPageSize
;
697 memmove(dst
, header
, __kCFCompactBitmapNumPages
);
703 static void __CFCSetMakeCompact(CFMutableCharacterSetRef cset
) {
704 if (__CFCSetIsBitmap(cset
) && __CFCSetBitmapBits(cset
)) {
705 uint8_t *bitmap
= __CFCSetBitmapBits(cset
);
706 uint8_t *cBitmap
= __CFCreateCompactBitmap(CFGetAllocator(cset
), bitmap
);
709 CFAllocatorDeallocate(CFGetAllocator(cset
), bitmap
);
710 __CFCSetPutClassType(cset
, __kCFCharSetClassCompactBitmap
);
711 __CFCSetPutCompactBitmapBits(cset
, cBitmap
);
716 static void __CFCSetAddNonBMPPlanesInRange(CFMutableCharacterSetRef cset
, CFRange range
) {
717 int firstChar
= (range
.location
& 0xFFFF);
718 int maxChar
= range
.location
+ range
.length
;
719 int idx
= range
.location
>> 16; // first plane
720 int maxPlane
= (maxChar
- 1) >> 16; // last plane
722 CFMutableCharacterSetRef annexPlane
;
726 for (idx
= (idx
? idx
: 1);idx
<= maxPlane
;idx
++) {
727 planeRange
.location
= __CFMax(firstChar
, 0);
728 planeRange
.length
= (idx
== maxPlane
&& maxChar
? maxChar
: 0x10000) - planeRange
.location
;
729 if (__CFCSetAnnexIsInverted(cset
)) {
730 if ((annexPlane
= (CFMutableCharacterSetRef
)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(cset
, idx
))) {
731 CFCharacterSetRemoveCharactersInRange(annexPlane
, planeRange
);
732 if (__CFCSetIsEmpty(annexPlane
) && !__CFCSetIsInverted(annexPlane
)) {
733 CFRelease(annexPlane
);
734 __CFCSetAnnexBitmapClearPlane(cset
->_annex
->_validEntriesBitmap
, idx
);
738 CFCharacterSetAddCharactersInRange((CFMutableCharacterSetRef
)__CFCSetGetAnnexPlaneCharacterSet(cset
, idx
), planeRange
);
741 if (!__CFCSetHasNonBMPPlane(cset
) && !__CFCSetAnnexIsInverted(cset
)) __CFCSetDeallocateAnnexPlane(cset
);
744 static void __CFCSetRemoveNonBMPPlanesInRange(CFMutableCharacterSetRef cset
, CFRange range
) {
745 int firstChar
= (range
.location
& 0xFFFF);
746 int maxChar
= range
.location
+ range
.length
;
747 int idx
= range
.location
>> 16; // first plane
748 int maxPlane
= (maxChar
- 1) >> 16; // last plane
750 CFMutableCharacterSetRef annexPlane
;
754 for (idx
= (idx
? idx
: 1);idx
<= maxPlane
;idx
++) {
755 planeRange
.location
= __CFMax(firstChar
, 0);
756 planeRange
.length
= (idx
== maxPlane
&& maxChar
? maxChar
: 0x10000) - planeRange
.location
;
757 if (__CFCSetAnnexIsInverted(cset
)) {
758 CFCharacterSetAddCharactersInRange((CFMutableCharacterSetRef
)__CFCSetGetAnnexPlaneCharacterSet(cset
, idx
), planeRange
);
760 if ((annexPlane
= (CFMutableCharacterSetRef
)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(cset
, idx
))) {
761 CFCharacterSetRemoveCharactersInRange(annexPlane
, planeRange
);
762 if(__CFCSetIsEmpty(annexPlane
) && !__CFCSetIsInverted(annexPlane
)) {
763 CFRelease(annexPlane
);
764 __CFCSetAnnexBitmapClearPlane(cset
->_annex
->_validEntriesBitmap
, idx
);
769 if (!__CFCSetHasNonBMPPlane(cset
) && !__CFCSetAnnexIsInverted(cset
)) __CFCSetDeallocateAnnexPlane(cset
);
772 static void __CFCSetMakeBitmap(CFMutableCharacterSetRef cset
) {
773 if (!__CFCSetIsBitmap(cset
) || !__CFCSetBitmapBits(cset
)) {
774 CFAllocatorRef allocator
= CFGetAllocator(cset
);
775 uint8_t *bitmap
= (uint8_t *)CFAllocatorAllocate(allocator
, __kCFBitmapSize
, 0);
776 __CFCSetGetBitmap(cset
, bitmap
);
778 if (__CFCSetIsBuiltin(cset
)) {
779 CFIndex numPlanes
= CFUniCharGetNumberOfPlanes(__CFCSetBuiltinType(cset
));
782 CFMutableCharacterSetRef annexSet
;
783 uint8_t *annexBitmap
= NULL
;
787 __CFCSetAllocateAnnexForPlane(cset
, numPlanes
- 1);
788 for (idx
= 1;idx
< numPlanes
;idx
++) {
789 if (NULL
== annexBitmap
) {
790 annexBitmap
= (uint8_t *)CFAllocatorAllocate(allocator
, __kCFBitmapSize
, 0);
792 result
= CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(cset
), idx
, annexBitmap
, false);
793 if (result
== kCFUniCharBitmapEmpty
) continue;
794 if (result
== kCFUniCharBitmapAll
) {
795 CFIndex bitmapLength
= __kCFBitmapSize
;
796 uint8_t *bytes
= annexBitmap
;
797 while (bitmapLength
-- > 0) *(bytes
++) = (uint8_t)0xFF;
799 annexSet
= (CFMutableCharacterSetRef
)__CFCSetGetAnnexPlaneCharacterSet(cset
, idx
);
800 __CFCSetPutClassType(annexSet
, __kCFCharSetClassBitmap
);
801 __CFCSetPutBitmapBits(annexSet
, annexBitmap
);
802 __CFCSetPutIsInverted(annexSet
, false);
803 __CFCSetPutHasHashValue(annexSet
, false);
806 if (annexBitmap
) CFAllocatorDeallocate(allocator
, annexBitmap
);
808 } else if (__CFCSetIsCompactBitmap(cset
) && __CFCSetCompactBitmapBits(cset
)) {
809 CFAllocatorDeallocate(allocator
, __CFCSetCompactBitmapBits(cset
));
810 __CFCSetPutCompactBitmapBits(cset
, NULL
);
811 } else if (__CFCSetIsString(cset
) && __CFCSetStringBuffer(cset
)) {
812 CFAllocatorDeallocate(allocator
, __CFCSetStringBuffer(cset
));
813 __CFCSetPutStringBuffer(cset
, NULL
);
814 } else if (__CFCSetIsRange(cset
)) { // We may have to allocate annex here
815 Boolean needsToInvert
= (!__CFCSetHasNonBMPPlane(cset
) && __CFCSetIsInverted(cset
) ? true : false);
816 __CFCSetAddNonBMPPlanesInRange(cset
, CFRangeMake(__CFCSetRangeFirstChar(cset
), __CFCSetRangeLength(cset
)));
817 if (needsToInvert
) __CFCSetAnnexSetIsInverted(cset
, true);
819 __CFCSetPutClassType(cset
, __kCFCharSetClassBitmap
);
820 __CFCSetPutBitmapBits(cset
, bitmap
);
821 __CFCSetPutIsInverted(cset
, false);
825 CF_INLINE CFMutableCharacterSetRef
__CFCSetGenericCreate(CFAllocatorRef allocator
, UInt32 flags
) {
826 CFMutableCharacterSetRef cset
;
827 CFIndex size
= sizeof(struct __CFCharacterSet
) - sizeof(CFRuntimeBase
);
829 cset
= (CFMutableCharacterSetRef
)_CFRuntimeCreateInstance(allocator
, CFCharacterSetGetTypeID(), size
, NULL
);
830 if (NULL
== cset
) return NULL
;
832 cset
->_base
._cfinfo
[CF_INFO_BITS
] |= flags
;
833 cset
->_hashValue
= 0;
839 static void __CFApplySurrogatesInString(CFMutableCharacterSetRef cset
, CFStringRef string
, void (*applyer
)(CFMutableCharacterSetRef
, CFRange
)) {
840 CFStringInlineBuffer buffer
;
841 CFIndex index
, length
= CFStringGetLength(string
);
842 CFRange range
= CFRangeMake(0, 0);
845 CFStringInitInlineBuffer(string
, &buffer
, CFRangeMake(0, length
));
847 for (index
= 0;index
< length
;index
++) {
848 character
= __CFStringGetCharacterFromInlineBufferQuick(&buffer
, index
);
850 if (CFStringIsSurrogateHighCharacter(character
) && ((index
+ 1) < length
)) {
851 UTF16Char other
= __CFStringGetCharacterFromInlineBufferQuick(&buffer
, index
+ 1);
853 if (CFStringIsSurrogateLowCharacter(other
)) {
854 character
= CFStringGetLongCharacterForSurrogatePair(character
, other
);
856 if ((range
.length
+ range
.location
) == character
) {
859 if (range
.length
> 0) applyer(cset
, range
);
860 range
.location
= character
;
865 ++index
; // skip the low surrogate
869 if (range
.length
> 0) applyer(cset
, range
);
873 /* Bsearch theChar for __kCFCharSetClassString
875 CF_INLINE Boolean
__CFCSetBsearchUniChar(const UniChar
*theTable
, CFIndex length
, UniChar theChar
) {
876 const UniChar
*p
, *q
, *divider
;
878 if ((theChar
< theTable
[0]) || (theChar
> theTable
[length
- 1])) return false;
881 q
= p
+ (length
- 1);
883 divider
= p
+ ((q
- p
) >> 1); /* divide by 2 */
884 if (theChar
< *divider
) q
= divider
- 1;
885 else if (theChar
> *divider
) p
= divider
+ 1;
891 /* Array of instantiated builtin set. Note builtin set ID starts with 1 so the array index is ID - 1
893 static CFCharacterSetRef
*__CFBuiltinSets
= NULL
;
895 /* Global lock for character set
897 static CFSpinLock_t __CFCharacterSetLock
= CFSpinLockInit
;
899 /* CFBase API functions
901 static Boolean
__CFCharacterSetEqual(CFTypeRef cf1
, CFTypeRef cf2
) {
902 Boolean isInvertStateIdentical
= (__CFCSetIsInverted((CFCharacterSetRef
)cf1
) == __CFCSetIsInverted((CFCharacterSetRef
)cf2
) ? true: false);
903 Boolean isAnnexInvertStateIdentical
= (__CFCSetAnnexIsInverted((CFCharacterSetRef
)cf1
) == __CFCSetAnnexIsInverted((CFCharacterSetRef
)cf2
) ? true: false);
905 CFCharacterSetRef subSet1
;
906 uint8_t bitsBuf
[__kCFBitmapSize
];
911 if (__CFCSetHasHashValue((CFCharacterSetRef
)cf1
) && __CFCSetHasHashValue((CFCharacterSetRef
)cf2
) && ((CFCharacterSetRef
)cf1
)->_hashValue
!= ((CFCharacterSetRef
)cf2
)->_hashValue
) return false;
912 if (__CFCSetIsEmpty((CFCharacterSetRef
)cf1
) && __CFCSetIsEmpty((CFCharacterSetRef
)cf2
) && !isInvertStateIdentical
) return false;
914 if (__CFCSetClassType((CFCharacterSetRef
)cf1
) == __CFCSetClassType((CFCharacterSetRef
)cf2
)) { // Types are identical, we can do it fast
915 switch (__CFCSetClassType((CFCharacterSetRef
)cf1
)) {
916 case __kCFCharSetClassBuiltin
:
917 return (__CFCSetBuiltinType((CFCharacterSetRef
)cf1
) == __CFCSetBuiltinType((CFCharacterSetRef
)cf2
) && isInvertStateIdentical
? true : false);
919 case __kCFCharSetClassRange
:
920 return (__CFCSetRangeFirstChar((CFCharacterSetRef
)cf1
) == __CFCSetRangeFirstChar((CFCharacterSetRef
)cf2
) && __CFCSetRangeLength((CFCharacterSetRef
)cf1
) && __CFCSetRangeLength((CFCharacterSetRef
)cf2
) && isInvertStateIdentical
? true : false);
922 case __kCFCharSetClassString
:
923 if (__CFCSetStringLength((CFCharacterSetRef
)cf1
) == __CFCSetStringLength((CFCharacterSetRef
)cf2
) && isInvertStateIdentical
) {
924 const UniChar
*buf1
= __CFCSetStringBuffer((CFCharacterSetRef
)cf1
);
925 const UniChar
*buf2
= __CFCSetStringBuffer((CFCharacterSetRef
)cf2
);
926 CFIndex length
= __CFCSetStringLength((CFCharacterSetRef
)cf1
);
928 while (length
--) if (*buf1
++ != *buf2
++) return false;
934 case __kCFCharSetClassBitmap
:
935 if (!__CFCSetIsEqualBitmap((const UInt32
*)__CFCSetBitmapBits((CFCharacterSetRef
)cf1
), (const UInt32
*)__CFCSetBitmapBits((CFCharacterSetRef
)cf2
))) return false;
938 return __CFCSetIsEqualAnnex((CFCharacterSetRef
)cf1
, (CFCharacterSetRef
)cf2
);
941 // Check for easy empty cases
942 if (__CFCSetIsEmpty((CFCharacterSetRef
)cf1
) || __CFCSetIsEmpty((CFCharacterSetRef
)cf2
)) {
943 CFCharacterSetRef emptySet
= (__CFCSetIsEmpty((CFCharacterSetRef
)cf1
) ? (CFCharacterSetRef
)cf1
: (CFCharacterSetRef
)cf2
);
944 CFCharacterSetRef nonEmptySet
= (emptySet
== cf1
? (CFCharacterSetRef
)cf2
: (CFCharacterSetRef
)cf1
);
946 if (__CFCSetIsBuiltin(nonEmptySet
)) {
948 } else if (__CFCSetIsRange(nonEmptySet
)) {
949 if (isInvertStateIdentical
) {
950 return (__CFCSetRangeLength(nonEmptySet
) ? false : true);
952 return (__CFCSetRangeLength(nonEmptySet
) == 0x110000 ? true : false);
955 if (__CFCSetAnnexIsInverted(nonEmptySet
)) {
956 if (__CFCSetAnnexValidEntriesBitmap(nonEmptySet
) != 0x1FFFE) return false;
958 if (__CFCSetAnnexValidEntriesBitmap(nonEmptySet
)) return false;
961 if (__CFCSetIsBitmap(nonEmptySet
)) {
962 bits
= __CFCSetBitmapBits(nonEmptySet
);
965 __CFCSetGetBitmap(nonEmptySet
, bitsBuf
);
968 if (__CFCSetIsEqualBitmap(NULL
, (const UInt32
*)bits
)) {
969 if (!__CFCSetAnnexIsInverted(nonEmptySet
)) return true;
974 // Annex set has to be CFRangeMake(0x10000, 0xfffff)
975 for (idx
= 1;idx
< MAX_ANNEX_PLANE
;idx
++) {
976 if (__CFCSetIsBitmap(nonEmptySet
)) {
977 if (!__CFCSetIsEqualBitmap((__CFCSetAnnexIsInverted(nonEmptySet
) ? NULL
: (const UInt32
*)-1), (const UInt32
*)bitsBuf
)) return false;
979 __CFCSetGetBitmap(__CFCSetGetAnnexPlaneCharacterSetNoAlloc(nonEmptySet
, idx
), bitsBuf
);
980 if (!__CFCSetIsEqualBitmap((const UInt32
*)-1, (const UInt32
*)bitsBuf
)) return false;
987 if (__CFCSetIsBuiltin((CFCharacterSetRef
)cf1
) || __CFCSetIsBuiltin((CFCharacterSetRef
)cf2
)) {
988 CFCharacterSetRef builtinSet
= (__CFCSetIsBuiltin((CFCharacterSetRef
)cf1
) ? (CFCharacterSetRef
)cf1
: (CFCharacterSetRef
)cf2
);
989 CFCharacterSetRef nonBuiltinSet
= (builtinSet
== cf1
? (CFCharacterSetRef
)cf2
: (CFCharacterSetRef
)cf1
);
992 if (__CFCSetIsRange(nonBuiltinSet
)) {
993 UTF32Char firstChar
= __CFCSetRangeFirstChar(nonBuiltinSet
);
994 UTF32Char lastChar
= (firstChar
+ __CFCSetRangeLength(nonBuiltinSet
) - 1);
995 uint8_t firstPlane
= (firstChar
>> 16) & 0xFF;
996 uint8_t lastPlane
= (lastChar
>> 16) & 0xFF;
999 for (idx
= 0;idx
< MAX_ANNEX_PLANE
;idx
++) {
1000 result
= CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(builtinSet
), idx
, bitsBuf
, (isInvertStateIdentical
!= 0));
1002 if (idx
< firstPlane
|| idx
> lastPlane
) {
1003 if (result
== kCFUniCharBitmapAll
) {
1005 } else if (result
== kCFUniCharBitmapFilled
) {
1006 if (!__CFCSetIsEqualBitmap(NULL
, (const UInt32
*)bitsBuf
)) return false;
1008 } else if (idx
> firstPlane
&& idx
< lastPlane
) {
1009 if (result
== kCFUniCharBitmapEmpty
) {
1011 } else if (result
== kCFUniCharBitmapFilled
) {
1012 if (!__CFCSetIsEqualBitmap((const UInt32
*)-1, (const UInt32
*)bitsBuf
)) return false;
1015 if (result
== kCFUniCharBitmapEmpty
) {
1017 } else if (result
== kCFUniCharBitmapAll
) {
1018 if (idx
== firstPlane
) {
1019 if (((firstChar
& 0xFFFF) != 0) || (firstPlane
== lastPlane
&& ((lastChar
& 0xFFFF) != 0xFFFF))) return false;
1021 if (((lastChar
& 0xFFFF) != 0xFFFF) || (firstPlane
== lastPlane
&& ((firstChar
& 0xFFFF) != 0))) return false;
1024 if (idx
== firstPlane
) {
1025 if (!__CFCSetIsBitmapEqualToRange((const UInt32
*)bitsBuf
, firstChar
& 0xFFFF, (firstPlane
== lastPlane
? lastChar
& 0xFFFF : 0xFFFF), false)) return false;
1027 if (!__CFCSetIsBitmapEqualToRange((const UInt32
*)bitsBuf
, (firstPlane
== lastPlane
? firstChar
& 0xFFFF : 0), lastChar
& 0xFFFF, false)) return false;
1034 uint8_t bitsBuf2
[__kCFBitmapSize
];
1037 result
= CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(builtinSet
), 0, bitsBuf
, (__CFCSetIsInverted(builtinSet
) != 0));
1038 if (result
== kCFUniCharBitmapFilled
) {
1039 if (__CFCSetIsBitmap(nonBuiltinSet
)) {
1040 if (!__CFCSetIsEqualBitmap((const UInt32
*)bitsBuf
, (const UInt32
*)__CFCSetBitmapBits(nonBuiltinSet
))) return false;
1043 __CFCSetGetBitmap(nonBuiltinSet
, bitsBuf2
);
1044 if (!__CFCSetIsEqualBitmap((const UInt32
*)bitsBuf
, (const UInt32
*)bitsBuf2
)) {
1049 if (__CFCSetIsBitmap(nonBuiltinSet
)) {
1050 if (!__CFCSetIsEqualBitmap((result
== kCFUniCharBitmapAll
? (const UInt32
*)-1 : NULL
), (const UInt32
*)__CFCSetBitmapBits(nonBuiltinSet
))) return false;
1052 __CFCSetGetBitmap(nonBuiltinSet
, bitsBuf
);
1053 if (!__CFCSetIsEqualBitmap((result
== kCFUniCharBitmapAll
? (const UInt32
*)-1: NULL
), (const UInt32
*)bitsBuf
)) return false;
1057 isInvertStateIdentical
= (__CFCSetIsInverted(builtinSet
) == __CFCSetAnnexIsInverted(nonBuiltinSet
) ? true : false);
1059 for (idx
= 1;idx
< MAX_ANNEX_PLANE
;idx
++) {
1060 result
= CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(builtinSet
), idx
, bitsBuf
, !isInvertStateIdentical
);
1061 subSet1
= __CFCSetGetAnnexPlaneCharacterSetNoAlloc(nonBuiltinSet
, idx
);
1063 if (result
== kCFUniCharBitmapFilled
) {
1064 if (NULL
== subSet1
) {
1066 } else if (__CFCSetIsBitmap(subSet1
)) {
1067 if (!__CFCSetIsEqualBitmap((const UInt32
*)bitsBuf
, (const UInt32
*)__CFCSetBitmapBits(subSet1
))) {
1072 __CFCSetGetBitmap(subSet1
, bitsBuf2
);
1073 if (!__CFCSetIsEqualBitmap((const UInt32
*)bitsBuf
, (const UInt32
*)bitsBuf2
)) {
1078 if (NULL
== subSet1
) {
1079 if (result
== kCFUniCharBitmapAll
) {
1082 } else if (__CFCSetIsBitmap(subSet1
)) {
1083 if (!__CFCSetIsEqualBitmap((result
== kCFUniCharBitmapAll
? (const UInt32
*)-1: NULL
), (const UInt32
*)__CFCSetBitmapBits(subSet1
))) {
1087 __CFCSetGetBitmap(subSet1
, bitsBuf
);
1088 if (!__CFCSetIsEqualBitmap((result
== kCFUniCharBitmapAll
? (const UInt32
*)-1: NULL
), (const UInt32
*)bitsBuf
)) {
1098 if (__CFCSetIsRange((CFCharacterSetRef
)cf1
) || __CFCSetIsRange((CFCharacterSetRef
)cf2
)) {
1099 CFCharacterSetRef rangeSet
= (__CFCSetIsRange((CFCharacterSetRef
)cf1
) ? (CFCharacterSetRef
)cf1
: (CFCharacterSetRef
)cf2
);
1100 CFCharacterSetRef nonRangeSet
= (rangeSet
== cf1
? (CFCharacterSetRef
)cf2
: (CFCharacterSetRef
)cf1
);
1101 UTF32Char firstChar
= __CFCSetRangeFirstChar(rangeSet
);
1102 UTF32Char lastChar
= (firstChar
+ __CFCSetRangeLength(rangeSet
) - 1);
1103 uint8_t firstPlane
= (firstChar
>> 16) & 0xFF;
1104 uint8_t lastPlane
= (lastChar
>> 16) & 0xFF;
1105 Boolean isRangeSetInverted
= __CFCSetIsInverted(rangeSet
);
1107 if (__CFCSetIsBitmap(nonRangeSet
)) {
1108 bits
= __CFCSetBitmapBits(nonRangeSet
);
1111 __CFCSetGetBitmap(nonRangeSet
, bitsBuf
);
1113 if (firstPlane
== 0) {
1114 if (!__CFCSetIsBitmapEqualToRange((const UInt32
*)bits
, firstChar
, (lastPlane
== 0 ? lastChar
: 0xFFFF), isRangeSetInverted
)) return false;
1118 if (!__CFCSetIsEqualBitmap((const UInt32
*)bits
, (isRangeSetInverted
? (const UInt32
*)-1 : NULL
))) return false;
1119 firstChar
&= 0xFFFF;
1124 isAnnexInvertStateIdentical
= (isRangeSetInverted
== __CFCSetAnnexIsInverted(nonRangeSet
) ? true : false);
1126 for (idx
= 1;idx
< MAX_ANNEX_PLANE
;idx
++) {
1127 subSet1
= __CFCSetGetAnnexPlaneCharacterSetNoAlloc(nonRangeSet
, idx
);
1128 if (NULL
== subSet1
) {
1129 if (idx
< firstPlane
|| idx
> lastPlane
) {
1130 if (!isAnnexInvertStateIdentical
) return false;
1131 } else if (idx
> firstPlane
&& idx
< lastPlane
) {
1132 if (isAnnexInvertStateIdentical
) return false;
1133 } else if (idx
== firstPlane
) {
1134 if (isAnnexInvertStateIdentical
|| firstChar
|| (idx
== lastPlane
&& lastChar
!= 0xFFFF)) return false;
1135 } else if (idx
== lastPlane
) {
1136 if (isAnnexInvertStateIdentical
|| (idx
== firstPlane
&& firstChar
) || (lastChar
!= 0xFFFF)) return false;
1139 if (__CFCSetIsBitmap(subSet1
)) {
1140 bits
= __CFCSetBitmapBits(subSet1
);
1142 __CFCSetGetBitmap(subSet1
, bitsBuf
);
1146 if (idx
< firstPlane
|| idx
> lastPlane
) {
1147 if (!__CFCSetIsEqualBitmap((const UInt32
*)bits
, (isAnnexInvertStateIdentical
? NULL
: (const UInt32
*)-1))) return false;
1148 } else if (idx
> firstPlane
&& idx
< lastPlane
) {
1149 if (!__CFCSetIsEqualBitmap((const UInt32
*)bits
, (isAnnexInvertStateIdentical
? (const UInt32
*)-1 : NULL
))) return false;
1150 } else if (idx
== firstPlane
) {
1151 if (!__CFCSetIsBitmapEqualToRange((const UInt32
*)bits
, firstChar
, (idx
== lastPlane
? lastChar
: 0xFFFF), !isAnnexInvertStateIdentical
)) return false;
1152 } else if (idx
== lastPlane
) {
1153 if (!__CFCSetIsBitmapEqualToRange((const UInt32
*)bits
, (idx
== firstPlane
? firstChar
: 0), lastChar
, !isAnnexInvertStateIdentical
)) return false;
1160 isBitmap1
= __CFCSetIsBitmap((CFCharacterSetRef
)cf1
);
1161 isBitmap2
= __CFCSetIsBitmap((CFCharacterSetRef
)cf2
);
1163 if (isBitmap1
&& isBitmap2
) {
1164 if (!__CFCSetIsEqualBitmap((const UInt32
*)__CFCSetBitmapBits((CFCharacterSetRef
)cf1
), (const UInt32
*)__CFCSetBitmapBits((CFCharacterSetRef
)cf2
))) return false;
1165 } else if (!isBitmap1
&& !isBitmap2
) {
1166 uint8_t bitsBuf2
[__kCFBitmapSize
];
1168 __CFCSetGetBitmap((CFCharacterSetRef
)cf1
, bitsBuf
);
1169 __CFCSetGetBitmap((CFCharacterSetRef
)cf2
, bitsBuf2
);
1171 if (!__CFCSetIsEqualBitmap((const UInt32
*)bitsBuf
, (const UInt32
*)bitsBuf2
)) {
1176 CFCharacterSetRef tmp
= (CFCharacterSetRef
)cf2
;
1181 __CFCSetGetBitmap((CFCharacterSetRef
)cf2
, bitsBuf
);
1183 if (!__CFCSetIsEqualBitmap((const UInt32
*)__CFCSetBitmapBits((CFCharacterSetRef
)cf1
), (const UInt32
*)bitsBuf
)) return false;
1185 return __CFCSetIsEqualAnnex((CFCharacterSetRef
)cf1
, (CFCharacterSetRef
)cf2
);
1188 static CFHashCode
__CFCharacterSetHash(CFTypeRef cf
) {
1189 if (!__CFCSetHasHashValue((CFCharacterSetRef
)cf
)) {
1190 if (__CFCSetIsEmpty((CFCharacterSetRef
)cf
)) {
1191 ((CFMutableCharacterSetRef
)cf
)->_hashValue
= (__CFCSetIsInverted((CFCharacterSetRef
)cf
) ? ((UInt32
)0xFFFFFFFF) : 0);
1192 } else if (__CFCSetIsBitmap( (CFCharacterSetRef
) cf
)) {
1193 ((CFMutableCharacterSetRef
)cf
)->_hashValue
= CFHashBytes(__CFCSetBitmapBits((CFCharacterSetRef
)cf
), __kCFBitmapSize
);
1195 uint8_t bitsBuf
[__kCFBitmapSize
];
1196 __CFCSetGetBitmap((CFCharacterSetRef
)cf
, bitsBuf
);
1197 ((CFMutableCharacterSetRef
)cf
)->_hashValue
= CFHashBytes(bitsBuf
, __kCFBitmapSize
);
1199 __CFCSetPutHasHashValue((CFMutableCharacterSetRef
)cf
, true);
1201 return ((CFCharacterSetRef
)cf
)->_hashValue
;
1204 static CFStringRef
__CFCharacterSetCopyDescription(CFTypeRef cf
) {
1205 CFMutableStringRef string
;
1209 if (__CFCSetIsEmpty((CFCharacterSetRef
)cf
)) {
1210 return (CFStringRef
)(__CFCSetIsInverted((CFCharacterSetRef
)cf
) ? CFRetain(CFSTR("<CFCharacterSet All>")) : CFRetain(CFSTR("<CFCharacterSet Empty>")));
1213 switch (__CFCSetClassType((CFCharacterSetRef
)cf
)) {
1214 case __kCFCharSetClassBuiltin
:
1215 switch (__CFCSetBuiltinType((CFCharacterSetRef
)cf
)) {
1216 case kCFCharacterSetControl
: return (CFStringRef
)CFRetain(CFSTR("<CFCharacterSet Predefined Control Set>"));
1217 case kCFCharacterSetWhitespace
: return (CFStringRef
)CFRetain(CFSTR("<CFCharacterSet Predefined Whitespace Set>"));
1218 case kCFCharacterSetWhitespaceAndNewline
: return (CFStringRef
)CFRetain(CFSTR("<CFCharacterSet Predefined WhitespaceAndNewline Set>"));
1219 case kCFCharacterSetDecimalDigit
: return (CFStringRef
)CFRetain(CFSTR("<CFCharacterSet Predefined DecimalDigit Set>"));
1220 case kCFCharacterSetLetter
: return (CFStringRef
)CFRetain(CFSTR("<CFCharacterSet Predefined Letter Set>"));
1221 case kCFCharacterSetLowercaseLetter
: return (CFStringRef
)CFRetain(CFSTR("<CFCharacterSet Predefined LowercaseLetter Set>"));
1222 case kCFCharacterSetUppercaseLetter
: return (CFStringRef
)CFRetain(CFSTR("<CFCharacterSet Predefined UppercaseLetter Set>"));
1223 case kCFCharacterSetNonBase
: return (CFStringRef
)CFRetain(CFSTR("<CFCharacterSet Predefined NonBase Set>"));
1224 case kCFCharacterSetDecomposable
: return (CFStringRef
)CFRetain(CFSTR("<CFCharacterSet Predefined Decomposable Set>"));
1225 case kCFCharacterSetAlphaNumeric
: return (CFStringRef
)CFRetain(CFSTR("<CFCharacterSet Predefined AlphaNumeric Set>"));
1226 case kCFCharacterSetPunctuation
: return (CFStringRef
)CFRetain(CFSTR("<CFCharacterSet Predefined Punctuation Set>"));
1227 case kCFCharacterSetIllegal
: return (CFStringRef
)CFRetain(CFSTR("<CFCharacterSet Predefined Illegal Set>"));
1228 case kCFCharacterSetCapitalizedLetter
: return (CFStringRef
)CFRetain(CFSTR("<CFCharacterSet Predefined CapitalizedLetter Set>"));
1229 case kCFCharacterSetSymbol
: return (CFStringRef
)CFRetain(CFSTR("<CFCharacterSet Predefined Symbol Set>"));
1230 case kCFCharacterSetNewline
: return (CFStringRef
)CFRetain(CFSTR("<CFCharacterSet Predefined Newline Set>"));
1234 case __kCFCharSetClassRange
:
1235 return CFStringCreateWithFormat(CFGetAllocator((CFCharacterSetRef
)cf
), NULL
, CFSTR("<CFCharacterSet Range(%d, %d)>"), __CFCSetRangeFirstChar((CFCharacterSetRef
)cf
), __CFCSetRangeLength((CFCharacterSetRef
)cf
));
1237 case __kCFCharSetClassString
: {
1238 CFStringRef format
= CFSTR("<CFCharacterSet Items(");
1240 length
= __CFCSetStringLength((CFCharacterSetRef
)cf
);
1241 string
= CFStringCreateMutable(CFGetAllocator(cf
), CFStringGetLength(format
) + 7 * length
+ 2); // length of format + "U+XXXX "(7) * length + ")>"(2)
1242 CFStringAppend(string
, format
);
1243 for (idx
= 0;idx
< length
;idx
++) {
1244 CFStringAppendFormat(string
, NULL
, CFSTR("%sU+%04X"), (idx
> 0 ? " " : ""), (UInt32
)((__CFCSetStringBuffer((CFCharacterSetRef
)cf
))[idx
]));
1246 CFStringAppend(string
, CFSTR(")>"));
1250 case __kCFCharSetClassBitmap
:
1251 case __kCFCharSetClassCompactBitmap
:
1252 return (CFStringRef
)CFRetain(CFSTR("<CFCharacterSet Bitmap>")); // ??? Should generate description for 8k bitmap ?
1254 CFAssert1(0, __kCFLogAssertion
, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__
); // We should never come here
1258 static void __CFCharacterSetDeallocate(CFTypeRef cf
) {
1259 CFAllocatorRef allocator
= CFGetAllocator(cf
);
1261 if (__CFCSetIsBuiltin((CFCharacterSetRef
)cf
) && !__CFCSetIsMutable((CFCharacterSetRef
)cf
) && !__CFCSetIsInverted((CFCharacterSetRef
)cf
)) {
1262 CFCharacterSetRef sharedSet
= CFCharacterSetGetPredefined(__CFCSetBuiltinType((CFCharacterSetRef
)cf
));
1263 if (sharedSet
== cf
) { // We're trying to dealloc the builtin set
1264 CFAssert1(0, __kCFLogAssertion
, "%s: Trying to deallocate predefined set. The process is likely to crash.", __PRETTY_FUNCTION__
);
1265 return; // We never deallocate builtin set
1269 if (__CFCSetIsString((CFCharacterSetRef
)cf
) && __CFCSetStringBuffer((CFCharacterSetRef
)cf
)) CFAllocatorDeallocate(allocator
, __CFCSetStringBuffer((CFCharacterSetRef
)cf
));
1270 else if (__CFCSetIsBitmap((CFCharacterSetRef
)cf
) && __CFCSetBitmapBits((CFCharacterSetRef
)cf
)) CFAllocatorDeallocate(allocator
, __CFCSetBitmapBits((CFCharacterSetRef
)cf
));
1271 else if (__CFCSetIsCompactBitmap((CFCharacterSetRef
)cf
) && __CFCSetCompactBitmapBits((CFCharacterSetRef
)cf
)) CFAllocatorDeallocate(allocator
, __CFCSetCompactBitmapBits((CFCharacterSetRef
)cf
));
1272 __CFCSetDeallocateAnnexPlane((CFCharacterSetRef
)cf
);
1275 static CFTypeID __kCFCharacterSetTypeID
= _kCFRuntimeNotATypeID
;
1277 static const CFRuntimeClass __CFCharacterSetClass
= {
1282 __CFCharacterSetDeallocate
,
1283 __CFCharacterSetEqual
,
1284 __CFCharacterSetHash
,
1286 __CFCharacterSetCopyDescription
1289 static bool __CFCheckForExapendedSet
= false;
1291 __private_extern__
void __CFCharacterSetInitialize(void) {
1292 const char *checkForExpandedSet
= __CFgetenv("__CF_DEBUG_EXPANDED_SET");
1294 __kCFCharacterSetTypeID
= _CFRuntimeRegisterClass(&__CFCharacterSetClass
);
1296 if (checkForExpandedSet
&& (*checkForExpandedSet
== 'Y')) __CFCheckForExapendedSet
= true;
1302 CFTypeID
CFCharacterSetGetTypeID(void) {
1303 return __kCFCharacterSetTypeID
;
1306 /*** CharacterSet creation ***/
1307 /* Functions to create basic immutable characterset.
1309 CFCharacterSetRef
CFCharacterSetGetPredefined(CFCharacterSetPredefinedSet theSetIdentifier
) {
1310 CFCharacterSetRef cset
;
1312 __CFCSetValidateBuiltinType(theSetIdentifier
, __PRETTY_FUNCTION__
);
1314 __CFSpinLock(&__CFCharacterSetLock
);
1315 cset
= ((NULL
!= __CFBuiltinSets
) ? __CFBuiltinSets
[theSetIdentifier
- 1] : NULL
);
1316 __CFSpinUnlock(&__CFCharacterSetLock
);
1318 if (NULL
!= cset
) return cset
;
1320 if (!(cset
= __CFCSetGenericCreate(kCFAllocatorSystemDefault
, __kCFCharSetClassBuiltin
))) return NULL
;
1321 __CFCSetPutBuiltinType((CFMutableCharacterSetRef
)cset
, theSetIdentifier
);
1323 __CFSpinLock(&__CFCharacterSetLock
);
1324 if (!__CFBuiltinSets
) {
1325 __CFBuiltinSets
= (CFCharacterSetRef
*)CFAllocatorAllocate((CFAllocatorRef
)CFRetain(__CFGetDefaultAllocator()), sizeof(CFCharacterSetRef
) * __kCFLastBuiltinSetID
, 0);
1326 memset(__CFBuiltinSets
, 0, sizeof(CFCharacterSetRef
) * __kCFLastBuiltinSetID
);
1329 __CFBuiltinSets
[theSetIdentifier
- 1] = cset
;
1330 __CFSpinUnlock(&__CFCharacterSetLock
);
1335 CFCharacterSetRef
CFCharacterSetCreateWithCharactersInRange(CFAllocatorRef allocator
, CFRange theRange
) {
1336 CFMutableCharacterSetRef cset
;
1338 __CFCSetValidateRange(theRange
, __PRETTY_FUNCTION__
);
1340 if (theRange
.length
) {
1341 if (!(cset
= __CFCSetGenericCreate(allocator
, __kCFCharSetClassRange
))) return NULL
;
1342 __CFCSetPutRangeFirstChar(cset
, theRange
.location
);
1343 __CFCSetPutRangeLength(cset
, theRange
.length
);
1345 if (!(cset
= __CFCSetGenericCreate(allocator
, __kCFCharSetClassBitmap
))) return NULL
;
1346 __CFCSetPutBitmapBits(cset
, NULL
);
1347 __CFCSetPutHasHashValue(cset
, true); // _hashValue is 0
1353 static int chcompar(const void *a
, const void *b
) {
1354 return -(int)(*(UniChar
*)b
- *(UniChar
*)a
);
1357 CFCharacterSetRef
CFCharacterSetCreateWithCharactersInString(CFAllocatorRef allocator
, CFStringRef theString
) {
1360 length
= CFStringGetLength(theString
);
1361 if (length
< __kCFStringCharSetMax
) {
1362 CFMutableCharacterSetRef cset
;
1364 if (!(cset
= __CFCSetGenericCreate(allocator
, __kCFCharSetClassString
))) return NULL
;
1365 __CFCSetPutStringBuffer(cset
, (UniChar
*)CFAllocatorAllocate(CFGetAllocator(cset
), __kCFStringCharSetMax
* sizeof(UniChar
), 0));
1366 __CFCSetPutStringLength(cset
, length
);
1367 CFStringGetCharacters(theString
, CFRangeMake(0, length
), __CFCSetStringBuffer(cset
));
1368 qsort(__CFCSetStringBuffer(cset
), length
, sizeof(UniChar
), chcompar
);
1371 __CFCSetPutHasHashValue(cset
, true); // _hashValue is 0
1372 } else if (length
> 1) { // Check for surrogate
1373 const UTF16Char
*characters
= __CFCSetStringBuffer(cset
);
1374 const UTF16Char
*charactersLimit
= characters
+ length
;
1376 if ((*characters
< 0xDC00UL
) && (*(charactersLimit
- 1) > 0xDBFFUL
)) { // might have surrogate chars
1377 while (characters
< charactersLimit
) {
1378 if (CFStringIsSurrogateHighCharacter(*characters
) || CFStringIsSurrogateLowCharacter(*characters
)) {
1387 if (NULL
!= cset
) return cset
;
1390 CFMutableCharacterSetRef mcset
= CFCharacterSetCreateMutable(allocator
);
1391 CFCharacterSetAddCharactersInString(mcset
, theString
);
1392 __CFCSetMakeCompact(mcset
);
1393 __CFCSetPutIsMutable(mcset
, false);
1397 CFCharacterSetRef
CFCharacterSetCreateWithBitmapRepresentation(CFAllocatorRef allocator
, CFDataRef theData
) {
1398 CFMutableCharacterSetRef cset
;
1401 if (!(cset
= __CFCSetGenericCreate(allocator
, __kCFCharSetClassBitmap
))) return NULL
;
1403 if (theData
&& (length
= CFDataGetLength(theData
)) > 0) {
1407 if (length
< __kCFBitmapSize
) {
1408 bitmap
= (uint8_t *)CFAllocatorAllocate(allocator
, __kCFBitmapSize
, 0);
1409 memmove(bitmap
, CFDataGetBytePtr(theData
), length
);
1410 memset(bitmap
+ length
, 0, __kCFBitmapSize
- length
);
1412 cBitmap
= __CFCreateCompactBitmap(allocator
, bitmap
);
1414 if (cBitmap
== NULL
) {
1415 __CFCSetPutBitmapBits(cset
, bitmap
);
1417 CFAllocatorDeallocate(allocator
, bitmap
);
1418 __CFCSetPutCompactBitmapBits(cset
, cBitmap
);
1419 __CFCSetPutClassType(cset
, __kCFCharSetClassCompactBitmap
);
1422 cBitmap
= __CFCreateCompactBitmap(allocator
, CFDataGetBytePtr(theData
));
1424 if (cBitmap
== NULL
) {
1425 bitmap
= (uint8_t *)CFAllocatorAllocate(allocator
, __kCFBitmapSize
, 0);
1426 memmove(bitmap
, CFDataGetBytePtr(theData
), __kCFBitmapSize
);
1428 __CFCSetPutBitmapBits(cset
, bitmap
);
1430 __CFCSetPutCompactBitmapBits(cset
, cBitmap
);
1431 __CFCSetPutClassType(cset
, __kCFCharSetClassCompactBitmap
);
1434 if (length
> __kCFBitmapSize
) {
1435 CFMutableCharacterSetRef annexSet
;
1436 const uint8_t *bytes
= CFDataGetBytePtr(theData
) + __kCFBitmapSize
;
1438 length
-= __kCFBitmapSize
;
1440 while (length
> 1) {
1441 annexSet
= (CFMutableCharacterSetRef
)__CFCSetGetAnnexPlaneCharacterSet(cset
, *(bytes
++));
1442 --length
; // Decrement the plane no byte
1444 if (length
< __kCFBitmapSize
) {
1445 bitmap
= (uint8_t *)CFAllocatorAllocate(allocator
, __kCFBitmapSize
, 0);
1446 memmove(bitmap
, bytes
, length
);
1447 memset(bitmap
+ length
, 0, __kCFBitmapSize
- length
);
1449 cBitmap
= __CFCreateCompactBitmap(allocator
, bitmap
);
1451 if (cBitmap
== NULL
) {
1452 __CFCSetPutBitmapBits(annexSet
, bitmap
);
1454 CFAllocatorDeallocate(allocator
, bitmap
);
1455 __CFCSetPutCompactBitmapBits(annexSet
, cBitmap
);
1456 __CFCSetPutClassType(annexSet
, __kCFCharSetClassCompactBitmap
);
1459 cBitmap
= __CFCreateCompactBitmap(allocator
, bytes
);
1461 if (cBitmap
== NULL
) {
1462 bitmap
= (uint8_t *)CFAllocatorAllocate(allocator
, __kCFBitmapSize
, 0);
1463 memmove(bitmap
, bytes
, __kCFBitmapSize
);
1465 __CFCSetPutBitmapBits(annexSet
, bitmap
);
1467 __CFCSetPutCompactBitmapBits(annexSet
, cBitmap
);
1468 __CFCSetPutClassType(annexSet
, __kCFCharSetClassCompactBitmap
);
1471 length
-= __kCFBitmapSize
;
1472 bytes
+= __kCFBitmapSize
;
1477 __CFCSetPutBitmapBits(cset
, NULL
);
1478 __CFCSetPutHasHashValue(cset
, true); // Hash value is 0
1484 CFCharacterSetRef
CFCharacterSetCreateInvertedSet(CFAllocatorRef alloc
, CFCharacterSetRef theSet
) {
1485 CFMutableCharacterSetRef cset
;
1487 CF_OBJC_FUNCDISPATCH0(__kCFCharacterSetTypeID
, CFCharacterSetRef
, theSet
, "invertedSet");
1489 cset
= CFCharacterSetCreateMutableCopy(alloc
, theSet
);
1490 CFCharacterSetInvert(cset
);
1491 __CFCSetPutIsMutable(cset
, false);
1496 /* Functions to create mutable characterset.
1498 CFMutableCharacterSetRef
CFCharacterSetCreateMutable(CFAllocatorRef allocator
) {
1499 CFMutableCharacterSetRef cset
;
1501 if (!(cset
= __CFCSetGenericCreate(allocator
, __kCFCharSetClassBitmap
| __kCFCharSetIsMutable
))) return NULL
;
1502 __CFCSetPutBitmapBits(cset
, NULL
);
1503 __CFCSetPutHasHashValue(cset
, true); // Hash value is 0
1508 static CFMutableCharacterSetRef
__CFCharacterSetCreateCopy(CFAllocatorRef alloc
, CFCharacterSetRef theSet
, bool isMutable
) {
1509 CFMutableCharacterSetRef cset
;
1511 CF_OBJC_FUNCDISPATCH0(__kCFCharacterSetTypeID
, CFMutableCharacterSetRef
, theSet
, "mutableCopy");
1513 __CFGenericValidateType(theSet
, __kCFCharacterSetTypeID
);
1515 if (!isMutable
&& !__CFCSetIsMutable(theSet
)) {
1516 return (CFMutableCharacterSetRef
)CFRetain(theSet
);
1519 cset
= CFCharacterSetCreateMutable(alloc
);
1521 __CFCSetPutClassType(cset
, __CFCSetClassType(theSet
));
1522 __CFCSetPutHasHashValue(cset
, __CFCSetHasHashValue(theSet
));
1523 __CFCSetPutIsInverted(cset
, __CFCSetIsInverted(theSet
));
1524 cset
->_hashValue
= theSet
->_hashValue
;
1526 switch (__CFCSetClassType(theSet
)) {
1527 case __kCFCharSetClassBuiltin
:
1528 __CFCSetPutBuiltinType(cset
, __CFCSetBuiltinType(theSet
));
1531 case __kCFCharSetClassRange
:
1532 __CFCSetPutRangeFirstChar(cset
, __CFCSetRangeFirstChar(theSet
));
1533 __CFCSetPutRangeLength(cset
, __CFCSetRangeLength(theSet
));
1536 case __kCFCharSetClassString
:
1537 __CFCSetPutStringBuffer(cset
, (UniChar
*)CFAllocatorAllocate(alloc
, __kCFStringCharSetMax
* sizeof(UniChar
), 0));
1539 __CFCSetPutStringLength(cset
, __CFCSetStringLength(theSet
));
1540 memmove(__CFCSetStringBuffer(cset
), __CFCSetStringBuffer(theSet
), __CFCSetStringLength(theSet
) * sizeof(UniChar
));
1543 case __kCFCharSetClassBitmap
:
1544 if (__CFCSetBitmapBits(theSet
)) {
1545 uint8_t * bitmap
= (isMutable
? NULL
: __CFCreateCompactBitmap(alloc
, __CFCSetBitmapBits(theSet
)));
1547 if (bitmap
== NULL
) {
1548 bitmap
= (uint8_t *)CFAllocatorAllocate(alloc
, sizeof(uint8_t) * __kCFBitmapSize
, 0);
1549 memmove(bitmap
, __CFCSetBitmapBits(theSet
), __kCFBitmapSize
);
1550 __CFCSetPutBitmapBits(cset
, bitmap
);
1552 __CFCSetPutCompactBitmapBits(cset
, bitmap
);
1553 __CFCSetPutClassType(cset
, __kCFCharSetClassCompactBitmap
);
1556 __CFCSetPutBitmapBits(cset
, NULL
);
1560 case __kCFCharSetClassCompactBitmap
: {
1561 const uint8_t *compactBitmap
= __CFCSetCompactBitmapBits(theSet
);
1563 if (compactBitmap
) {
1564 uint32_t size
= __CFCSetGetCompactBitmapSize(compactBitmap
);
1565 uint8_t *newBitmap
= (uint8_t *)CFAllocatorAllocate(alloc
, size
, 0);
1567 memmove(newBitmap
, compactBitmap
, size
);
1568 __CFCSetPutCompactBitmapBits(cset
, newBitmap
);
1574 CFAssert1(0, __kCFLogAssertion
, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__
); // We should never come here
1576 if (__CFCSetHasNonBMPPlane(theSet
)) {
1577 CFMutableCharacterSetRef annexPlane
;
1580 for (idx
= 1;idx
<= MAX_ANNEX_PLANE
;idx
++) {
1581 if ((annexPlane
= (CFMutableCharacterSetRef
)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet
, idx
))) {
1582 annexPlane
= __CFCharacterSetCreateCopy(alloc
, annexPlane
, isMutable
);
1583 __CFCSetPutCharacterSetToAnnexPlane(cset
, annexPlane
, idx
);
1584 CFRelease(annexPlane
);
1587 __CFCSetAnnexSetIsInverted(cset
, __CFCSetAnnexIsInverted(theSet
));
1588 } else if (__CFCSetAnnexIsInverted(theSet
)) {
1589 __CFCSetAnnexSetIsInverted(cset
, true);
1595 CFCharacterSetRef
CFCharacterSetCreateCopy(CFAllocatorRef alloc
, CFCharacterSetRef theSet
) {
1596 return __CFCharacterSetCreateCopy(alloc
, theSet
, false);
1599 CFMutableCharacterSetRef
CFCharacterSetCreateMutableCopy(CFAllocatorRef alloc
, CFCharacterSetRef theSet
) {
1600 return __CFCharacterSetCreateCopy(alloc
, theSet
, true);
1603 /*** Basic accessors ***/
1604 Boolean
CFCharacterSetIsCharacterMember(CFCharacterSetRef theSet
, UniChar theChar
) {
1607 Boolean result
= false;
1609 CF_OBJC_FUNCDISPATCH1(__kCFCharacterSetTypeID
, Boolean
, theSet
, "longCharacterIsMember:", theChar
);
1611 __CFGenericValidateType(theSet
, __kCFCharacterSetTypeID
);
1613 isInverted
= __CFCSetIsInverted(theSet
);
1615 switch (__CFCSetClassType(theSet
)) {
1616 case __kCFCharSetClassBuiltin
:
1617 result
= (CFUniCharIsMemberOf(theChar
, __CFCSetBuiltinType(theSet
)) ? !isInverted
: isInverted
);
1620 case __kCFCharSetClassRange
:
1621 length
= __CFCSetRangeLength(theSet
);
1622 result
= (length
&& __CFCSetRangeFirstChar(theSet
) <= theChar
&& theChar
< __CFCSetRangeFirstChar(theSet
) + length
? !isInverted
: isInverted
);
1625 case __kCFCharSetClassString
:
1626 result
= ((length
= __CFCSetStringLength(theSet
)) ? (__CFCSetBsearchUniChar(__CFCSetStringBuffer(theSet
), length
, theChar
) ? !isInverted
: isInverted
) : isInverted
);
1629 case __kCFCharSetClassBitmap
:
1630 result
= (__CFCSetCompactBitmapBits(theSet
) ? (__CFCSetIsMemberBitmap(__CFCSetBitmapBits(theSet
), theChar
) ? true : false) : isInverted
);
1633 case __kCFCharSetClassCompactBitmap
:
1634 result
= (__CFCSetCompactBitmapBits(theSet
) ? (__CFCSetIsMemberInCompactBitmap(__CFCSetCompactBitmapBits(theSet
), theChar
) ? true : false) : isInverted
);
1638 CFAssert1(0, __kCFLogAssertion
, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__
); // We should never come here
1645 Boolean
CFCharacterSetIsLongCharacterMember(CFCharacterSetRef theSet
, UTF32Char theChar
) {
1647 UInt32 plane
= (theChar
>> 16);
1648 Boolean isAnnexInverted
= false;
1650 Boolean result
= false;
1652 CF_OBJC_FUNCDISPATCH1(__kCFCharacterSetTypeID
, Boolean
, theSet
, "longCharacterIsMember:", theChar
);
1654 __CFGenericValidateType(theSet
, __kCFCharacterSetTypeID
);
1657 CFCharacterSetRef annexPlane
;
1659 if (__CFCSetIsBuiltin(theSet
)) {
1660 isInverted
= __CFCSetIsInverted(theSet
);
1661 return (CFUniCharIsMemberOf(theChar
, __CFCSetBuiltinType(theSet
)) ? !isInverted
: isInverted
);
1664 isAnnexInverted
= __CFCSetAnnexIsInverted(theSet
);
1666 if ((annexPlane
= __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet
, plane
)) == NULL
) {
1667 if (!__CFCSetHasNonBMPPlane(theSet
) && __CFCSetIsRange(theSet
)) {
1668 isInverted
= __CFCSetIsInverted(theSet
);
1669 length
= __CFCSetRangeLength(theSet
);
1670 return (length
&& __CFCSetRangeFirstChar(theSet
) <= theChar
&& theChar
< __CFCSetRangeFirstChar(theSet
) + length
? !isInverted
: isInverted
);
1672 return (isAnnexInverted
? true : false);
1675 theSet
= annexPlane
;
1680 isInverted
= __CFCSetIsInverted(theSet
);
1682 switch (__CFCSetClassType(theSet
)) {
1683 case __kCFCharSetClassBuiltin
:
1684 result
= (CFUniCharIsMemberOf(theChar
, __CFCSetBuiltinType(theSet
)) ? !isInverted
: isInverted
);
1687 case __kCFCharSetClassRange
:
1688 length
= __CFCSetRangeLength(theSet
);
1689 result
= (length
&& __CFCSetRangeFirstChar(theSet
) <= theChar
&& theChar
< __CFCSetRangeFirstChar(theSet
) + length
? !isInverted
: isInverted
);
1692 case __kCFCharSetClassString
:
1693 result
= ((length
= __CFCSetStringLength(theSet
)) ? (__CFCSetBsearchUniChar(__CFCSetStringBuffer(theSet
), length
, theChar
) ? !isInverted
: isInverted
) : isInverted
);
1696 case __kCFCharSetClassBitmap
:
1697 result
= (__CFCSetCompactBitmapBits(theSet
) ? (__CFCSetIsMemberBitmap(__CFCSetBitmapBits(theSet
), theChar
) ? true : false) : isInverted
);
1700 case __kCFCharSetClassCompactBitmap
:
1701 result
= (__CFCSetCompactBitmapBits(theSet
) ? (__CFCSetIsMemberInCompactBitmap(__CFCSetCompactBitmapBits(theSet
), theChar
) ? true : false) : isInverted
);
1705 CFAssert1(0, __kCFLogAssertion
, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__
); // We should never come here
1706 return false; // To make compiler happy
1709 return (result
? !isAnnexInverted
: isAnnexInverted
);
1712 Boolean
CFCharacterSetIsSurrogatePairMember(CFCharacterSetRef theSet
, UniChar surrogateHigh
, UniChar surrogateLow
) {
1713 return CFCharacterSetIsLongCharacterMember(theSet
, CFCharacterSetGetLongCharacterForSurrogatePair(surrogateHigh
, surrogateLow
));
1717 static inline CFCharacterSetRef
__CFCharacterSetGetExpandedSetForNSCharacterSet(const void *characterSet
) {
1718 CF_OBJC_FUNCDISPATCH0(__kCFCharacterSetTypeID
, CFCharacterSetRef
, characterSet
, "_expandedCFCharacterSet");
1722 Boolean
CFCharacterSetIsSupersetOfSet(CFCharacterSetRef theSet
, CFCharacterSetRef theOtherSet
) {
1723 CFMutableCharacterSetRef copy
;
1724 CFCharacterSetRef expandedSet
= NULL
;
1725 CFCharacterSetRef expandedOtherSet
= NULL
;
1728 if ((!CF_IS_OBJC(__kCFCharacterSetTypeID
, theSet
) || (expandedSet
= __CFCharacterSetGetExpandedSetForNSCharacterSet(theSet
))) && (!CF_IS_OBJC(__kCFCharacterSetTypeID
, theOtherSet
) || (expandedOtherSet
= __CFCharacterSetGetExpandedSetForNSCharacterSet(theOtherSet
)))) { // Really CF, we can do some trick here
1729 if (expandedSet
) theSet
= expandedSet
;
1730 if (expandedOtherSet
) theOtherSet
= expandedOtherSet
;
1732 __CFGenericValidateType(theSet
, __kCFCharacterSetTypeID
);
1733 __CFGenericValidateType(theOtherSet
, __kCFCharacterSetTypeID
);
1735 if (__CFCSetIsEmpty(theSet
)) {
1736 if (__CFCSetIsInverted(theSet
)) {
1737 return TRUE
; // Inverted empty set covers all range
1738 } else if (!__CFCSetIsEmpty(theOtherSet
) || __CFCSetIsInverted(theOtherSet
)) {
1741 } else if (__CFCSetIsEmpty(theOtherSet
) && !__CFCSetIsInverted(theOtherSet
)) {
1744 if (__CFCSetIsBuiltin(theSet
) || __CFCSetIsBuiltin(theOtherSet
)) {
1745 if (__CFCSetClassType(theSet
) == __CFCSetClassType(theOtherSet
) && __CFCSetBuiltinType(theSet
) == __CFCSetBuiltinType(theOtherSet
) && !__CFCSetIsInverted(theSet
) && !__CFCSetIsInverted(theOtherSet
)) return TRUE
;
1746 } else if (__CFCSetIsRange(theSet
) || __CFCSetIsRange(theOtherSet
)) {
1747 if (__CFCSetClassType(theSet
) == __CFCSetClassType(theOtherSet
)) {
1748 if (__CFCSetIsInverted(theSet
)) {
1749 if (__CFCSetIsInverted(theOtherSet
)) {
1750 return (__CFCSetRangeFirstChar(theOtherSet
) > __CFCSetRangeFirstChar(theSet
) || (__CFCSetRangeFirstChar(theSet
) + __CFCSetRangeLength(theSet
)) > (__CFCSetRangeFirstChar(theOtherSet
) + __CFCSetRangeLength(theOtherSet
)) ? FALSE
: TRUE
);
1752 return ((__CFCSetRangeFirstChar(theOtherSet
) + __CFCSetRangeLength(theOtherSet
)) <= __CFCSetRangeFirstChar(theSet
) || (__CFCSetRangeFirstChar(theSet
) + __CFCSetRangeLength(theSet
)) <= __CFCSetRangeFirstChar(theOtherSet
) ? TRUE
: FALSE
);
1755 if (__CFCSetIsInverted(theOtherSet
)) {
1756 return ((__CFCSetRangeFirstChar(theSet
) == 0 && __CFCSetRangeLength(theSet
) == 0x110000) || (__CFCSetRangeFirstChar(theOtherSet
) == 0 && (UInt32
)__CFCSetRangeLength(theOtherSet
) <= __CFCSetRangeFirstChar(theSet
)) || ((__CFCSetRangeFirstChar(theSet
) + __CFCSetRangeLength(theSet
)) <= __CFCSetRangeFirstChar(theOtherSet
) && (__CFCSetRangeFirstChar(theOtherSet
) + __CFCSetRangeLength(theOtherSet
)) == 0x110000) ? TRUE
: FALSE
);
1758 return (__CFCSetRangeFirstChar(theOtherSet
) < __CFCSetRangeFirstChar(theSet
) || (__CFCSetRangeFirstChar(theSet
) + __CFCSetRangeLength(theSet
)) < (__CFCSetRangeFirstChar(theOtherSet
) + __CFCSetRangeLength(theOtherSet
)) ? FALSE
: TRUE
);
1763 UInt32 theSetAnnexMask
= __CFCSetAnnexValidEntriesBitmap(theSet
);
1764 UInt32 theOtherSetAnnexMask
= __CFCSetAnnexValidEntriesBitmap(theOtherSet
);
1765 Boolean isTheSetAnnexInverted
= __CFCSetAnnexIsInverted(theSet
);
1766 Boolean isTheOtherSetAnnexInverted
= __CFCSetAnnexIsInverted(theOtherSet
);
1767 uint8_t theSetBuffer
[__kCFBitmapSize
];
1768 uint8_t theOtherSetBuffer
[__kCFBitmapSize
];
1770 // We mask plane 1 to plane 16
1771 if (isTheSetAnnexInverted
) theSetAnnexMask
= (~theSetAnnexMask
) & (0xFFFF << 1);
1772 if (isTheOtherSetAnnexInverted
) theOtherSetAnnexMask
= (~theOtherSetAnnexMask
) & (0xFFFF << 1);
1774 __CFCSetGetBitmap(theSet
, theSetBuffer
);
1775 __CFCSetGetBitmap(theOtherSet
, theOtherSetBuffer
);
1777 if (!__CFCSetIsBitmapSupersetOfBitmap((const UInt32
*)theSetBuffer
, (const UInt32
*)theOtherSetBuffer
, FALSE
, FALSE
)) return FALSE
;
1779 if (theOtherSetAnnexMask
) {
1780 CFCharacterSetRef theSetAnnex
;
1781 CFCharacterSetRef theOtherSetAnnex
;
1784 if ((theSetAnnexMask
& theOtherSetAnnexMask
) != theOtherSetAnnexMask
) return FALSE
;
1786 for (idx
= 1;idx
<= 16;idx
++) {
1787 theSetAnnex
= __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet
, idx
);
1788 if (NULL
== theSetAnnex
) continue; // This case is already handled by the mask above
1790 theOtherSetAnnex
= __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theOtherSet
, idx
);
1792 if (NULL
== theOtherSetAnnex
) {
1793 if (isTheOtherSetAnnexInverted
) {
1794 __CFCSetGetBitmap(theSetAnnex
, theSetBuffer
);
1795 if (!__CFCSetIsEqualBitmap((const UInt32
*)theSetBuffer
, (isTheSetAnnexInverted
? NULL
: (const UInt32
*)-1))) return FALSE
;
1798 __CFCSetGetBitmap(theSetAnnex
, theSetBuffer
);
1799 __CFCSetGetBitmap(theOtherSetAnnex
, theOtherSetBuffer
);
1800 if (!__CFCSetIsBitmapSupersetOfBitmap((const UInt32
*)theSetBuffer
, (const UInt32
*)theOtherSetBuffer
, isTheSetAnnexInverted
, isTheOtherSetAnnexInverted
)) return FALSE
;
1810 copy
= CFCharacterSetCreateMutableCopy(kCFAllocatorSystemDefault
, theSet
);
1811 CFCharacterSetIntersect(copy
, theOtherSet
);
1812 result
= __CFCharacterSetEqual(copy
, theOtherSet
);
1818 Boolean
CFCharacterSetHasMemberInPlane(CFCharacterSetRef theSet
, CFIndex thePlane
) {
1819 Boolean isInverted
= __CFCSetIsInverted(theSet
);
1821 CF_OBJC_FUNCDISPATCH1(__kCFCharacterSetTypeID
, Boolean
, theSet
, "hasMemberInPlane:", thePlane
);
1823 if (__CFCSetIsEmpty(theSet
)) {
1824 return (isInverted
? TRUE
: FALSE
);
1825 } else if (__CFCSetIsBuiltin(theSet
)) {
1826 CFCharacterSetPredefinedSet type
= __CFCSetBuiltinType(theSet
);
1828 if (type
== kCFCharacterSetControl
) {
1829 if (isInverted
|| (thePlane
== 14)) {
1830 return TRUE
; // There is no plane that covers all values || Plane 14 has language tags
1832 return (CFUniCharGetBitmapPtrForPlane(type
, thePlane
) ? TRUE
: FALSE
);
1834 } else if ((type
< kCFCharacterSetDecimalDigit
) || (type
== kCFCharacterSetNewline
)) {
1835 return (thePlane
&& !isInverted
? FALSE
: TRUE
);
1836 } else if (__CFCSetBuiltinType(theSet
) == kCFCharacterSetIllegal
) {
1837 return (isInverted
? (thePlane
< 3 || thePlane
> 13 ? TRUE
: FALSE
) : TRUE
); // This is according to Unicode 3.1
1840 return TRUE
; // There is no plane that covers all values
1842 return (CFUniCharGetBitmapPtrForPlane(type
, thePlane
) ? TRUE
: FALSE
);
1845 } else if (__CFCSetIsRange(theSet
)) {
1846 UTF32Char firstChar
= __CFCSetRangeFirstChar(theSet
);
1847 UTF32Char lastChar
= (firstChar
+ __CFCSetRangeLength(theSet
) - 1);
1848 CFIndex firstPlane
= firstChar
>> 16;
1849 CFIndex lastPlane
= lastChar
>> 16;
1852 if (thePlane
< firstPlane
|| thePlane
> lastPlane
) {
1854 } else if (thePlane
> firstPlane
&& thePlane
< lastPlane
) {
1857 firstChar
&= 0xFFFF;
1859 if (thePlane
== firstPlane
) {
1860 return (firstChar
|| (firstPlane
== lastPlane
&& lastChar
!= 0xFFFF) ? TRUE
: FALSE
);
1862 return (lastChar
!= 0xFFFF || (firstPlane
== lastPlane
&& firstChar
) ? TRUE
: FALSE
);
1866 return (thePlane
< firstPlane
|| thePlane
> lastPlane
? FALSE
: TRUE
);
1869 if (thePlane
== 0) {
1870 switch (__CFCSetClassType(theSet
)) {
1871 case __kCFCharSetClassString
: if (!__CFCSetStringLength(theSet
)) return isInverted
; break;
1872 case __kCFCharSetClassCompactBitmap
: return (__CFCSetCompactBitmapBits(theSet
) ? TRUE
: FALSE
); break;
1873 case __kCFCharSetClassBitmap
: return (__CFCSetBitmapBits(theSet
) ? TRUE
: FALSE
); break;
1877 CFCharacterSetRef annex
= __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet
, thePlane
);
1879 if (__CFCSetIsRange(annex
)) {
1880 return (__CFCSetAnnexIsInverted(theSet
) && (__CFCSetRangeFirstChar(annex
) == 0) && (__CFCSetRangeLength(annex
) == 0x10000) ? FALSE
: TRUE
);
1881 } else if (__CFCSetIsBitmap(annex
)) {
1882 return (__CFCSetAnnexIsInverted(theSet
) && __CFCSetIsEqualBitmap((const UInt32
*)__CFCSetBitmapBits(annex
), (const UInt32
*)-1) ? FALSE
: TRUE
);
1884 uint8_t bitsBuf
[__kCFBitmapSize
];
1885 __CFCSetGetBitmap(annex
, bitsBuf
);
1886 return (__CFCSetAnnexIsInverted(theSet
) && __CFCSetIsEqualBitmap((const UInt32
*)bitsBuf
, (const UInt32
*)-1) ? FALSE
: TRUE
);
1889 return __CFCSetAnnexIsInverted(theSet
);
1898 CFDataRef
CFCharacterSetCreateBitmapRepresentation(CFAllocatorRef alloc
, CFCharacterSetRef theSet
) {
1899 CFMutableDataRef data
;
1900 int numNonBMPPlanes
= 0;
1901 int planeIndices
[MAX_ANNEX_PLANE
];
1904 bool isAnnexInverted
;
1906 CF_OBJC_FUNCDISPATCH0(__kCFCharacterSetTypeID
, CFDataRef
, theSet
, "_retainedBitmapRepresentation");
1908 __CFGenericValidateType(theSet
, __kCFCharacterSetTypeID
);
1910 isAnnexInverted
= (__CFCSetAnnexIsInverted(theSet
) != 0);
1912 if (__CFCSetHasNonBMPPlane(theSet
)) {
1913 for (idx
= 1;idx
<= MAX_ANNEX_PLANE
;idx
++) {
1914 if (isAnnexInverted
|| __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet
, idx
)) {
1915 planeIndices
[numNonBMPPlanes
++] = idx
;
1918 } else if (__CFCSetIsBuiltin(theSet
)) {
1919 numNonBMPPlanes
= (__CFCSetIsInverted(theSet
) ? MAX_ANNEX_PLANE
: CFUniCharGetNumberOfPlanes(__CFCSetBuiltinType(theSet
)) - 1);
1920 } else if (__CFCSetIsRange(theSet
)) {
1921 UInt32 firstChar
= __CFCSetRangeFirstChar(theSet
);
1922 UInt32 lastChar
= __CFCSetRangeFirstChar(theSet
) + __CFCSetRangeLength(theSet
) - 1;
1923 int firstPlane
= (firstChar
>> 16);
1924 int lastPlane
= (lastChar
>> 16);
1925 bool isInverted
= (__CFCSetIsInverted(theSet
) != 0);
1927 if (lastPlane
> 0) {
1928 if (firstPlane
== 0) {
1930 firstChar
= 0x10000;
1932 numNonBMPPlanes
= (lastPlane
- firstPlane
) + 1;
1934 numNonBMPPlanes
= MAX_ANNEX_PLANE
- numNonBMPPlanes
;
1935 if (firstPlane
== lastPlane
) {
1936 if (((firstChar
& 0xFFFF) > 0) || ((lastChar
& 0xFFFF) < 0xFFFF)) ++numNonBMPPlanes
;
1938 if ((firstChar
& 0xFFFF) > 0) ++numNonBMPPlanes
;
1939 if ((lastChar
& 0xFFFF) < 0xFFFF) ++numNonBMPPlanes
;
1942 } else if (isInverted
) {
1943 numNonBMPPlanes
= MAX_ANNEX_PLANE
;
1945 } else if (isAnnexInverted
) {
1946 numNonBMPPlanes
= MAX_ANNEX_PLANE
;
1949 length
= __kCFBitmapSize
+ ((__kCFBitmapSize
+ 1) * numNonBMPPlanes
);
1950 data
= CFDataCreateMutable(alloc
, length
);
1951 CFDataSetLength(data
, length
);
1952 __CFCSetGetBitmap(theSet
, CFDataGetMutableBytePtr(data
));
1954 if (numNonBMPPlanes
> 0) {
1955 uint8_t *bytes
= CFDataGetMutableBytePtr(data
) + __kCFBitmapSize
;
1957 if (__CFCSetHasNonBMPPlane(theSet
)) {
1958 CFCharacterSetRef subset
;
1960 for (idx
= 0;idx
< numNonBMPPlanes
;idx
++) {
1961 *(bytes
++) = planeIndices
[idx
];
1962 if ((subset
= __CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet
, planeIndices
[idx
])) == NULL
) {
1963 __CFCSetBitmapFastFillWithValue((UInt32
*)bytes
, (isAnnexInverted
? 0xFF : 0));
1965 __CFCSetGetBitmap(subset
, bytes
);
1966 if (isAnnexInverted
) {
1967 uint32_t count
= __kCFBitmapSize
/ sizeof(uint32_t);
1968 uint32_t *bits
= (uint32_t *)bytes
;
1970 while (count
-- > 0) {
1976 bytes
+= __kCFBitmapSize
;
1978 } else if (__CFCSetIsBuiltin(theSet
)) {
1981 Boolean isInverted
= __CFCSetIsInverted(theSet
);
1983 for (idx
= 0;idx
< numNonBMPPlanes
;idx
++) {
1984 if ((result
= CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(theSet
), idx
+ 1, bytes
+ 1, (isInverted
!= 0))) == kCFUniCharBitmapEmpty
) continue;
1985 *(bytes
++) = idx
+ 1;
1986 if (result
== kCFUniCharBitmapAll
) {
1987 CFIndex bitmapLength
= __kCFBitmapSize
;
1988 while (bitmapLength
-- > 0) *(bytes
++) = (uint8_t)0xFF;
1990 bytes
+= __kCFBitmapSize
;
1993 delta
= bytes
- (const uint8_t *)CFDataGetBytePtr(data
);
1994 if (delta
< length
) CFDataSetLength(data
, delta
);
1995 } else if (__CFCSetIsRange(theSet
)) {
1996 UInt32 firstChar
= __CFCSetRangeFirstChar(theSet
);
1997 UInt32 lastChar
= __CFCSetRangeFirstChar(theSet
) + __CFCSetRangeLength(theSet
) - 1;
1998 int firstPlane
= (firstChar
>> 16);
1999 int lastPlane
= (lastChar
>> 16);
2001 if (firstPlane
== 0) {
2003 firstChar
= 0x10000;
2005 if (__CFCSetIsInverted(theSet
)) {
2006 // Mask out the plane byte
2007 firstChar
&= 0xFFFF;
2010 for (idx
= 1;idx
< firstPlane
;idx
++) { // Fill up until the first plane
2012 __CFCSetBitmapFastFillWithValue((UInt32
*)bytes
, 0xFF);
2013 bytes
+= __kCFBitmapSize
;
2015 if (firstPlane
== lastPlane
) {
2016 if ((firstChar
> 0) || (lastChar
< 0xFFFF)) {
2018 __CFCSetBitmapFastFillWithValue((UInt32
*)bytes
, 0xFF);
2019 __CFCSetBitmapRemoveCharactersInRange(bytes
, firstChar
, lastChar
);
2020 bytes
+= __kCFBitmapSize
;
2022 } else if (firstPlane
< lastPlane
) {
2023 if (firstChar
> 0) {
2025 __CFCSetBitmapFastFillWithValue((UInt32
*)bytes
, 0);
2026 __CFCSetBitmapAddCharactersInRange(bytes
, 0, firstChar
- 1);
2027 bytes
+= __kCFBitmapSize
;
2029 if (lastChar
< 0xFFFF) {
2031 __CFCSetBitmapFastFillWithValue((UInt32
*)bytes
, 0);
2032 __CFCSetBitmapAddCharactersInRange(bytes
, lastChar
, 0xFFFF);
2033 bytes
+= __kCFBitmapSize
;
2036 for (idx
= lastPlane
+ 1;idx
<= MAX_ANNEX_PLANE
;idx
++) {
2038 __CFCSetBitmapFastFillWithValue((UInt32
*)bytes
, 0xFF);
2039 bytes
+= __kCFBitmapSize
;
2042 for (idx
= firstPlane
;idx
<= lastPlane
;idx
++) {
2044 __CFCSetBitmapAddCharactersInRange(bytes
, (idx
== firstPlane
? firstChar
: 0), (idx
== lastPlane
? lastChar
: 0xFFFF));
2045 bytes
+= __kCFBitmapSize
;
2048 } else if (isAnnexInverted
) {
2049 for (idx
= 1;idx
<= MAX_ANNEX_PLANE
;idx
++) {
2051 __CFCSetBitmapFastFillWithValue((UInt32
*)bytes
, 0xFF);
2052 bytes
+= __kCFBitmapSize
;
2060 /*** MutableCharacterSet functions ***/
2061 void CFCharacterSetAddCharactersInRange(CFMutableCharacterSetRef theSet
, CFRange theRange
) {
2062 CF_OBJC_FUNCDISPATCH1(__kCFCharacterSetTypeID
, void, theSet
, "addCharactersInRange:", theRange
);
2064 __CFCSetValidateTypeAndMutability(theSet
, __PRETTY_FUNCTION__
);
2065 __CFCSetValidateRange(theRange
, __PRETTY_FUNCTION__
);
2067 if (!theRange
.length
|| (__CFCSetIsInverted(theSet
) && __CFCSetIsEmpty(theSet
))) return; // Inverted && empty set contains all char
2069 if (!__CFCSetIsInverted(theSet
)) {
2070 if (__CFCSetIsEmpty(theSet
)) {
2071 __CFCSetPutClassType(theSet
, __kCFCharSetClassRange
);
2072 __CFCSetPutRangeFirstChar(theSet
, theRange
.location
);
2073 __CFCSetPutRangeLength(theSet
, theRange
.length
);
2074 __CFCSetPutHasHashValue(theSet
, false);
2076 } else if (__CFCSetIsRange(theSet
)) {
2077 CFIndex firstChar
= __CFCSetRangeFirstChar(theSet
);
2078 CFIndex length
= __CFCSetRangeLength(theSet
);
2080 if (firstChar
== theRange
.location
) {
2081 __CFCSetPutRangeLength(theSet
, __CFMin(length
, theRange
.length
));
2082 __CFCSetPutHasHashValue(theSet
, false);
2084 } else if (firstChar
< theRange
.location
&& theRange
.location
<= firstChar
+ length
) {
2085 if (firstChar
+ length
< theRange
.location
+ theRange
.length
) __CFCSetPutRangeLength(theSet
, theRange
.length
+ (theRange
.location
- firstChar
));
2086 __CFCSetPutHasHashValue(theSet
, false);
2088 } else if (theRange
.location
< firstChar
&& firstChar
<= theRange
.location
+ theRange
.length
) {
2089 __CFCSetPutRangeFirstChar(theSet
, theRange
.location
);
2090 __CFCSetPutRangeLength(theSet
, length
+ (firstChar
- theRange
.location
));
2091 __CFCSetPutHasHashValue(theSet
, false);
2094 } else if (__CFCSetIsString(theSet
) && __CFCSetStringLength(theSet
) + theRange
.length
< __kCFStringCharSetMax
) {
2096 if (!__CFCSetStringBuffer(theSet
))
2097 __CFCSetPutStringBuffer(theSet
, (UniChar
*)CFAllocatorAllocate(CFGetAllocator(theSet
), __kCFStringCharSetMax
* sizeof(UniChar
), 0));
2098 buffer
= __CFCSetStringBuffer(theSet
) + __CFCSetStringLength(theSet
);
2099 __CFCSetPutStringLength(theSet
, __CFCSetStringLength(theSet
) + theRange
.length
);
2100 while (theRange
.length
--) *buffer
++ = (UniChar
)theRange
.location
++;
2101 qsort(__CFCSetStringBuffer(theSet
), __CFCSetStringLength(theSet
), sizeof(UniChar
), chcompar
);
2102 __CFCSetPutHasHashValue(theSet
, false);
2107 // OK, I have to be a bitmap
2108 __CFCSetMakeBitmap(theSet
);
2109 __CFCSetAddNonBMPPlanesInRange(theSet
, theRange
);
2110 if (theRange
.location
< 0x10000) { // theRange is in BMP
2111 if (theRange
.location
+ theRange
.length
>= NUMCHARACTERS
) theRange
.length
= NUMCHARACTERS
- theRange
.location
;
2112 __CFCSetBitmapAddCharactersInRange(__CFCSetBitmapBits(theSet
), (UniChar
)theRange
.location
, (UniChar
)(theRange
.location
+ theRange
.length
- 1));
2114 __CFCSetPutHasHashValue(theSet
, false);
2116 if (__CFCheckForExapendedSet
) __CFCheckForExpandedSet(theSet
);
2119 void CFCharacterSetRemoveCharactersInRange(CFMutableCharacterSetRef theSet
, CFRange theRange
) {
2120 CF_OBJC_FUNCDISPATCH1(__kCFCharacterSetTypeID
, void, theSet
, "removeCharactersInRange:", theRange
);
2122 __CFCSetValidateTypeAndMutability(theSet
, __PRETTY_FUNCTION__
);
2123 __CFCSetValidateRange(theRange
, __PRETTY_FUNCTION__
);
2125 if (!theRange
.length
|| (!__CFCSetIsInverted(theSet
) && __CFCSetIsEmpty(theSet
))) return; // empty set
2127 if (__CFCSetIsInverted(theSet
)) {
2128 if (__CFCSetIsEmpty(theSet
)) {
2129 __CFCSetPutClassType(theSet
, __kCFCharSetClassRange
);
2130 __CFCSetPutRangeFirstChar(theSet
, theRange
.location
);
2131 __CFCSetPutRangeLength(theSet
, theRange
.length
);
2132 __CFCSetPutHasHashValue(theSet
, false);
2134 } else if (__CFCSetIsRange(theSet
)) {
2135 CFIndex firstChar
= __CFCSetRangeFirstChar(theSet
);
2136 CFIndex length
= __CFCSetRangeLength(theSet
);
2138 if (firstChar
== theRange
.location
) {
2139 __CFCSetPutRangeLength(theSet
, __CFMin(length
, theRange
.length
));
2140 __CFCSetPutHasHashValue(theSet
, false);
2142 } else if (firstChar
< theRange
.location
&& theRange
.location
<= firstChar
+ length
) {
2143 if (firstChar
+ length
< theRange
.location
+ theRange
.length
) __CFCSetPutRangeLength(theSet
, theRange
.length
+ (theRange
.location
- firstChar
));
2144 __CFCSetPutHasHashValue(theSet
, false);
2146 } else if (theRange
.location
< firstChar
&& firstChar
<= theRange
.location
+ theRange
.length
) {
2147 __CFCSetPutRangeFirstChar(theSet
, theRange
.location
);
2148 __CFCSetPutRangeLength(theSet
, length
+ (firstChar
- theRange
.location
));
2149 __CFCSetPutHasHashValue(theSet
, false);
2152 } else if (__CFCSetIsString(theSet
) && __CFCSetStringLength(theSet
) + theRange
.length
< __kCFStringCharSetMax
) {
2154 if (!__CFCSetStringBuffer(theSet
))
2155 __CFCSetPutStringBuffer(theSet
, (UniChar
*)CFAllocatorAllocate(CFGetAllocator(theSet
), __kCFStringCharSetMax
* sizeof(UniChar
), 0));
2156 buffer
= __CFCSetStringBuffer(theSet
) + __CFCSetStringLength(theSet
);
2157 __CFCSetPutStringLength(theSet
, __CFCSetStringLength(theSet
) + theRange
.length
);
2158 while (theRange
.length
--) *buffer
++ = (UniChar
)theRange
.location
++;
2159 qsort(__CFCSetStringBuffer(theSet
), __CFCSetStringLength(theSet
), sizeof(UniChar
), chcompar
);
2160 __CFCSetPutHasHashValue(theSet
, false);
2165 // OK, I have to be a bitmap
2166 __CFCSetMakeBitmap(theSet
);
2167 __CFCSetRemoveNonBMPPlanesInRange(theSet
, theRange
);
2168 if (theRange
.location
< 0x10000) { // theRange is in BMP
2169 if (theRange
.location
+ theRange
.length
> NUMCHARACTERS
) theRange
.length
= NUMCHARACTERS
- theRange
.location
;
2170 if (theRange
.location
== 0 && theRange
.length
== NUMCHARACTERS
) { // Remove all
2171 CFAllocatorDeallocate(CFGetAllocator(theSet
), __CFCSetBitmapBits(theSet
));
2172 __CFCSetPutBitmapBits(theSet
, NULL
);
2174 __CFCSetBitmapRemoveCharactersInRange(__CFCSetBitmapBits(theSet
), (UniChar
)theRange
.location
, (UniChar
)(theRange
.location
+ theRange
.length
- 1));
2178 __CFCSetPutHasHashValue(theSet
, false);
2179 if (__CFCheckForExapendedSet
) __CFCheckForExpandedSet(theSet
);
2182 void CFCharacterSetAddCharactersInString(CFMutableCharacterSetRef theSet
, CFStringRef theString
) {
2185 BOOL hasSurrogate
= NO
;
2187 CF_OBJC_FUNCDISPATCH1(__kCFCharacterSetTypeID
, void, theSet
, "addCharactersInString:", theString
);
2189 __CFCSetValidateTypeAndMutability(theSet
, __PRETTY_FUNCTION__
);
2191 if ((__CFCSetIsEmpty(theSet
) && __CFCSetIsInverted(theSet
)) || !(length
= CFStringGetLength(theString
))) return;
2193 if (!__CFCSetIsInverted(theSet
)) {
2194 CFIndex newLength
= length
+ (__CFCSetIsEmpty(theSet
) ? 0 : (__CFCSetIsString(theSet
) ? __CFCSetStringLength(theSet
) : __kCFStringCharSetMax
));
2196 if (newLength
< __kCFStringCharSetMax
) {
2197 buffer
= __CFCSetStringBuffer(theSet
);
2199 if (NULL
== buffer
) {
2200 buffer
= (UniChar
*)CFAllocatorAllocate(CFGetAllocator(theSet
), __kCFStringCharSetMax
* sizeof(UniChar
), 0);
2202 buffer
+= __CFCSetStringLength(theSet
);
2205 CFStringGetCharacters(theString
, CFRangeMake(0, length
), (UniChar
*)buffer
);
2208 UTF16Char
*characters
= buffer
;
2209 const UTF16Char
*charactersLimit
= characters
+ length
;
2211 while (characters
< charactersLimit
) {
2212 if (CFStringIsSurrogateHighCharacter(*characters
) || CFStringIsSurrogateLowCharacter(*characters
)) {
2213 memmove(characters
, characters
+ 1, (charactersLimit
- (characters
+ 1)) * sizeof(*characters
));
2221 newLength
-= (length
- (charactersLimit
- buffer
));
2224 if (0 == newLength
) {
2225 if (NULL
== __CFCSetStringBuffer(theSet
)) CFAllocatorDeallocate(CFGetAllocator(theSet
), buffer
);
2227 if (NULL
== __CFCSetStringBuffer(theSet
)) {
2228 __CFCSetPutClassType(theSet
, __kCFCharSetClassString
);
2229 __CFCSetPutStringBuffer(theSet
, buffer
);
2231 __CFCSetPutStringLength(theSet
, newLength
);
2232 qsort(__CFCSetStringBuffer(theSet
), newLength
, sizeof(UniChar
), chcompar
);
2234 __CFCSetPutHasHashValue(theSet
, false);
2236 if (hasSurrogate
) __CFApplySurrogatesInString(theSet
, theString
, &CFCharacterSetAddCharactersInRange
);
2242 // OK, I have to be a bitmap
2243 __CFCSetMakeBitmap(theSet
);
2244 CFStringInlineBuffer inlineBuffer
;
2247 CFStringInitInlineBuffer(theString
, &inlineBuffer
, CFRangeMake(0, length
));
2249 for (idx
= 0;idx
< length
;idx
++) {
2250 UTF16Char character
= __CFStringGetCharacterFromInlineBufferQuick(&inlineBuffer
, idx
);
2252 if (CFStringIsSurrogateHighCharacter(character
) || CFStringIsSurrogateLowCharacter(character
)) {
2255 __CFCSetBitmapAddCharacter(__CFCSetBitmapBits(theSet
), character
);
2259 __CFCSetPutHasHashValue(theSet
, false);
2261 if (__CFCheckForExapendedSet
) __CFCheckForExpandedSet(theSet
);
2263 if (hasSurrogate
) __CFApplySurrogatesInString(theSet
, theString
, &CFCharacterSetAddCharactersInRange
);
2266 void CFCharacterSetRemoveCharactersInString(CFMutableCharacterSetRef theSet
, CFStringRef theString
) {
2269 BOOL hasSurrogate
= NO
;
2271 CF_OBJC_FUNCDISPATCH1(__kCFCharacterSetTypeID
, void, theSet
, "removeCharactersInString:", theString
);
2273 __CFCSetValidateTypeAndMutability(theSet
, __PRETTY_FUNCTION__
);
2275 if ((__CFCSetIsEmpty(theSet
) && !__CFCSetIsInverted(theSet
)) || !(length
= CFStringGetLength(theString
))) return;
2277 if (__CFCSetIsInverted(theSet
)) {
2278 CFIndex newLength
= length
+ (__CFCSetIsEmpty(theSet
) ? 0 : (__CFCSetIsString(theSet
) ? __CFCSetStringLength(theSet
) : __kCFStringCharSetMax
));
2280 if (newLength
< __kCFStringCharSetMax
) {
2281 buffer
= __CFCSetStringBuffer(theSet
);
2283 if (NULL
== buffer
) {
2284 buffer
= (UniChar
*)CFAllocatorAllocate(CFGetAllocator(theSet
), __kCFStringCharSetMax
* sizeof(UniChar
), 0);
2286 buffer
+= __CFCSetStringLength(theSet
);
2289 CFStringGetCharacters(theString
, CFRangeMake(0, length
), (UniChar
*)buffer
);
2292 UTF16Char
*characters
= buffer
;
2293 const UTF16Char
*charactersLimit
= characters
+ length
;
2295 while (characters
< charactersLimit
) {
2296 if (CFStringIsSurrogateHighCharacter(*characters
) || CFStringIsSurrogateLowCharacter(*characters
)) {
2297 memmove(characters
, characters
+ 1, charactersLimit
- (characters
+ 1));
2304 newLength
-= (length
- (charactersLimit
- buffer
));
2307 if (NULL
== __CFCSetStringBuffer(theSet
)) {
2308 __CFCSetPutClassType(theSet
, __kCFCharSetClassString
);
2309 __CFCSetPutStringBuffer(theSet
, buffer
);
2311 __CFCSetPutStringLength(theSet
, newLength
);
2312 qsort(__CFCSetStringBuffer(theSet
), newLength
, sizeof(UniChar
), chcompar
);
2313 __CFCSetPutHasHashValue(theSet
, false);
2315 if (hasSurrogate
) __CFApplySurrogatesInString(theSet
, theString
, &CFCharacterSetRemoveCharactersInRange
);
2321 // OK, I have to be a bitmap
2322 __CFCSetMakeBitmap(theSet
);
2323 CFStringInlineBuffer inlineBuffer
;
2326 CFStringInitInlineBuffer(theString
, &inlineBuffer
, CFRangeMake(0, length
));
2328 for (idx
= 0;idx
< length
;idx
++) {
2329 UTF16Char character
= __CFStringGetCharacterFromInlineBufferQuick(&inlineBuffer
, idx
);
2331 if (CFStringIsSurrogateHighCharacter(character
) || CFStringIsSurrogateLowCharacter(character
)) {
2334 __CFCSetBitmapRemoveCharacter(__CFCSetBitmapBits(theSet
), character
);
2338 __CFCSetPutHasHashValue(theSet
, false);
2339 if (__CFCheckForExapendedSet
) __CFCheckForExpandedSet(theSet
);
2341 if (hasSurrogate
) __CFApplySurrogatesInString(theSet
, theString
, &CFCharacterSetRemoveCharactersInRange
);
2344 void CFCharacterSetUnion(CFMutableCharacterSetRef theSet
, CFCharacterSetRef theOtherSet
) {
2345 CFCharacterSetRef expandedSet
= NULL
;
2347 CF_OBJC_FUNCDISPATCH1(__kCFCharacterSetTypeID
, void, theSet
, "formUnionWithCharacterSet:", theOtherSet
);
2349 __CFCSetValidateTypeAndMutability(theSet
, __PRETTY_FUNCTION__
);
2351 if (__CFCSetIsEmpty(theSet
) && __CFCSetIsInverted(theSet
)) return; // Inverted empty set contains all char
2353 if (!CF_IS_OBJC(__kCFCharacterSetTypeID
, theOtherSet
) || (expandedSet
= __CFCharacterSetGetExpandedSetForNSCharacterSet(theOtherSet
))) { // Really CF, we can do some trick here
2354 if (expandedSet
) theOtherSet
= expandedSet
;
2356 if (__CFCSetIsEmpty(theOtherSet
)) {
2357 if (__CFCSetIsInverted(theOtherSet
)) {
2358 if (__CFCSetIsString(theSet
) && __CFCSetStringBuffer(theSet
)) {
2359 CFAllocatorDeallocate(CFGetAllocator(theSet
), __CFCSetStringBuffer(theSet
));
2360 } else if (__CFCSetIsBitmap(theSet
) && __CFCSetBitmapBits(theSet
)) {
2361 CFAllocatorDeallocate(CFGetAllocator(theSet
), __CFCSetBitmapBits(theSet
));
2362 } else if (__CFCSetIsCompactBitmap(theSet
) && __CFCSetCompactBitmapBits(theSet
)) {
2363 CFAllocatorDeallocate(CFGetAllocator(theSet
), __CFCSetCompactBitmapBits(theSet
));
2365 __CFCSetPutClassType(theSet
, __kCFCharSetClassRange
);
2366 __CFCSetPutRangeLength(theSet
, 0);
2367 __CFCSetPutIsInverted(theSet
, true);
2368 __CFCSetPutHasHashValue(theSet
, false);
2369 __CFCSetDeallocateAnnexPlane(theSet
);
2371 } else if (__CFCSetIsBuiltin(theOtherSet
) && __CFCSetIsEmpty(theSet
)) { // theSet can be builtin set
2372 __CFCSetPutClassType(theSet
, __kCFCharSetClassBuiltin
);
2373 __CFCSetPutBuiltinType(theSet
, __CFCSetBuiltinType(theOtherSet
));
2374 if (__CFCSetIsInverted(theOtherSet
)) __CFCSetPutIsInverted(theSet
, true);
2375 if (__CFCSetAnnexIsInverted(theOtherSet
)) __CFCSetAnnexSetIsInverted(theSet
, true);
2376 __CFCSetPutHasHashValue(theSet
, false);
2378 if (__CFCSetIsRange(theOtherSet
)) {
2379 if (__CFCSetIsInverted(theOtherSet
)) {
2380 UTF32Char firstChar
= __CFCSetRangeFirstChar(theOtherSet
);
2381 CFIndex length
= __CFCSetRangeLength(theOtherSet
);
2383 if (firstChar
> 0) CFCharacterSetAddCharactersInRange(theSet
, CFRangeMake(0, firstChar
));
2384 firstChar
+= length
;
2385 length
= 0x110000 - firstChar
;
2386 CFCharacterSetAddCharactersInRange(theSet
, CFRangeMake(firstChar
, length
));
2388 CFCharacterSetAddCharactersInRange(theSet
, CFRangeMake(__CFCSetRangeFirstChar(theOtherSet
), __CFCSetRangeLength(theOtherSet
)));
2390 } else if (__CFCSetIsString(theOtherSet
)) {
2391 CFStringRef string
= CFStringCreateWithCharactersNoCopy(CFGetAllocator(theSet
), __CFCSetStringBuffer(theOtherSet
), __CFCSetStringLength(theOtherSet
), kCFAllocatorNull
);
2392 CFCharacterSetAddCharactersInString(theSet
, string
);
2395 __CFCSetMakeBitmap(theSet
);
2396 if (__CFCSetIsBitmap(theOtherSet
)) {
2397 UInt32
*bitmap1
= (UInt32
*)__CFCSetBitmapBits(theSet
);
2398 UInt32
*bitmap2
= (UInt32
*)__CFCSetBitmapBits(theOtherSet
);
2399 CFIndex length
= __kCFBitmapSize
/ sizeof(UInt32
);
2400 while (length
--) *bitmap1
++ |= *bitmap2
++;
2402 UInt32
*bitmap1
= (UInt32
*)__CFCSetBitmapBits(theSet
);
2404 CFIndex length
= __kCFBitmapSize
/ sizeof(UInt32
);
2405 uint8_t bitmapBuffer
[__kCFBitmapSize
];
2406 __CFCSetGetBitmap(theOtherSet
, bitmapBuffer
);
2407 bitmap2
= (UInt32
*)bitmapBuffer
;
2408 while (length
--) *bitmap1
++ |= *bitmap2
++;
2410 __CFCSetPutHasHashValue(theSet
, false);
2412 if (__CFCSetHasNonBMPPlane(theOtherSet
)) {
2413 CFMutableCharacterSetRef otherSetPlane
;
2416 for (idx
= 1;idx
<= MAX_ANNEX_PLANE
;idx
++) {
2417 if ((otherSetPlane
= (CFMutableCharacterSetRef
)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theOtherSet
, idx
))) {
2418 CFCharacterSetUnion((CFMutableCharacterSetRef
)__CFCSetGetAnnexPlaneCharacterSet(theSet
, idx
), otherSetPlane
);
2421 } else if (__CFCSetAnnexIsInverted(theOtherSet
)) {
2422 if (__CFCSetHasNonBMPPlane(theSet
)) __CFCSetDeallocateAnnexPlane(theSet
);
2423 __CFCSetAnnexSetIsInverted(theSet
, true);
2424 } else if (__CFCSetIsBuiltin(theOtherSet
)) {
2425 CFMutableCharacterSetRef annexPlane
;
2426 uint8_t bitmapBuffer
[__kCFBitmapSize
];
2429 Boolean isOtherAnnexPlaneInverted
= __CFCSetAnnexIsInverted(theOtherSet
);
2434 for (planeIndex
= 1;planeIndex
<= MAX_ANNEX_PLANE
;planeIndex
++) {
2435 result
= CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(theOtherSet
), planeIndex
, bitmapBuffer
, (isOtherAnnexPlaneInverted
!= 0));
2436 if (result
!= kCFUniCharBitmapEmpty
) {
2437 annexPlane
= (CFMutableCharacterSetRef
)__CFCSetGetAnnexPlaneCharacterSet(theSet
, planeIndex
);
2438 if (result
== kCFUniCharBitmapAll
) {
2439 CFCharacterSetAddCharactersInRange(annexPlane
, CFRangeMake(0x0000, 0x10000));
2441 __CFCSetMakeBitmap(annexPlane
);
2442 bitmap1
= (UInt32
*)__CFCSetBitmapBits(annexPlane
);
2443 length
= __kCFBitmapSize
/ sizeof(UInt32
);
2444 bitmap2
= (UInt32
*)bitmapBuffer
;
2445 while (length
--) *bitmap1
++ |= *bitmap2
++;
2451 if (__CFCheckForExapendedSet
) __CFCheckForExpandedSet(theSet
);
2452 } else { // It's NSCharacterSet
2453 CFDataRef bitmapRep
= CFCharacterSetCreateBitmapRepresentation(kCFAllocatorSystemDefault
, theOtherSet
);
2454 const UInt32
*bitmap2
= (bitmapRep
&& CFDataGetLength(bitmapRep
) ? (const UInt32
*)CFDataGetBytePtr(bitmapRep
) : NULL
);
2457 CFIndex length
= __kCFBitmapSize
/ sizeof(UInt32
);
2458 __CFCSetMakeBitmap(theSet
);
2459 bitmap1
= (UInt32
*)__CFCSetBitmapBits(theSet
);
2460 while (length
--) *bitmap1
++ |= *bitmap2
++;
2461 __CFCSetPutHasHashValue(theSet
, false);
2463 CFRelease(bitmapRep
);
2467 void CFCharacterSetIntersect(CFMutableCharacterSetRef theSet
, CFCharacterSetRef theOtherSet
) {
2468 CFCharacterSetRef expandedSet
= NULL
;
2470 CF_OBJC_FUNCDISPATCH1(__kCFCharacterSetTypeID
, void, theSet
, "formIntersectionWithCharacterSet:", theOtherSet
);
2472 __CFCSetValidateTypeAndMutability(theSet
, __PRETTY_FUNCTION__
);
2474 if (__CFCSetIsEmpty(theSet
) && !__CFCSetIsInverted(theSet
)) return; // empty set
2476 if (!CF_IS_OBJC(__kCFCharacterSetTypeID
, theOtherSet
) || (expandedSet
= __CFCharacterSetGetExpandedSetForNSCharacterSet(theOtherSet
))) { // Really CF, we can do some trick here
2477 if (expandedSet
) theOtherSet
= expandedSet
;
2479 if (__CFCSetIsEmpty(theOtherSet
)) {
2480 if (!__CFCSetIsInverted(theOtherSet
)) {
2481 if (__CFCSetIsString(theSet
) && __CFCSetStringBuffer(theSet
)) {
2482 CFAllocatorDeallocate(CFGetAllocator(theSet
), __CFCSetStringBuffer(theSet
));
2483 } else if (__CFCSetIsBitmap(theSet
) && __CFCSetBitmapBits(theSet
)) {
2484 CFAllocatorDeallocate(CFGetAllocator(theSet
), __CFCSetBitmapBits(theSet
));
2485 } else if (__CFCSetIsCompactBitmap(theSet
) && __CFCSetCompactBitmapBits(theSet
)) {
2486 CFAllocatorDeallocate(CFGetAllocator(theSet
), __CFCSetCompactBitmapBits(theSet
));
2488 __CFCSetPutClassType(theSet
, __kCFCharSetClassBitmap
);
2489 __CFCSetPutBitmapBits(theSet
, NULL
);
2490 __CFCSetPutIsInverted(theSet
, false);
2491 theSet
->_hashValue
= 0;
2492 __CFCSetPutHasHashValue(theSet
, true);
2493 __CFCSetDeallocateAnnexPlane(theSet
);
2495 } else if (__CFCSetIsEmpty(theSet
)) { // non inverted empty set contains all character
2496 __CFCSetPutClassType(theSet
, __CFCSetClassType(theOtherSet
));
2497 __CFCSetPutHasHashValue(theSet
, __CFCSetHasHashValue(theOtherSet
));
2498 __CFCSetPutIsInverted(theSet
, __CFCSetIsInverted(theOtherSet
));
2499 theSet
->_hashValue
= theOtherSet
->_hashValue
;
2500 if (__CFCSetHasNonBMPPlane(theOtherSet
)) {
2501 CFMutableCharacterSetRef otherSetPlane
;
2503 for (idx
= 1;idx
<= MAX_ANNEX_PLANE
;idx
++) {
2504 if ((otherSetPlane
= (CFMutableCharacterSetRef
)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theOtherSet
, idx
))) {
2505 otherSetPlane
= (CFMutableCharacterSetRef
)CFCharacterSetCreateMutableCopy(CFGetAllocator(theSet
), otherSetPlane
);
2506 __CFCSetPutCharacterSetToAnnexPlane(theSet
, otherSetPlane
, idx
);
2507 CFRelease(otherSetPlane
);
2510 __CFCSetAnnexSetIsInverted(theSet
, __CFCSetAnnexIsInverted(theOtherSet
));
2513 switch (__CFCSetClassType(theOtherSet
)) {
2514 case __kCFCharSetClassBuiltin
:
2515 __CFCSetPutBuiltinType(theSet
, __CFCSetBuiltinType(theOtherSet
));
2518 case __kCFCharSetClassRange
:
2519 __CFCSetPutRangeFirstChar(theSet
, __CFCSetRangeFirstChar(theOtherSet
));
2520 __CFCSetPutRangeLength(theSet
, __CFCSetRangeLength(theOtherSet
));
2523 case __kCFCharSetClassString
:
2524 __CFCSetPutStringLength(theSet
, __CFCSetStringLength(theOtherSet
));
2525 if (!__CFCSetStringBuffer(theSet
))
2526 __CFCSetPutStringBuffer(theSet
, (UniChar
*)CFAllocatorAllocate(CFGetAllocator(theSet
), __kCFStringCharSetMax
* sizeof(UniChar
), 0));
2527 memmove(__CFCSetStringBuffer(theSet
), __CFCSetStringBuffer(theOtherSet
), __CFCSetStringLength(theSet
) * sizeof(UniChar
));
2530 case __kCFCharSetClassBitmap
:
2531 __CFCSetPutBitmapBits(theSet
, (uint8_t *)CFAllocatorAllocate(CFGetAllocator(theSet
), sizeof(uint8_t) * __kCFBitmapSize
, 0));
2532 memmove(__CFCSetBitmapBits(theSet
), __CFCSetBitmapBits(theOtherSet
), __kCFBitmapSize
);
2535 case __kCFCharSetClassCompactBitmap
: {
2536 const uint8_t *cBitmap
= __CFCSetCompactBitmapBits(theOtherSet
);
2538 uint32_t size
= __CFCSetGetCompactBitmapSize(cBitmap
);
2539 newBitmap
= (uint8_t *)CFAllocatorAllocate(CFGetAllocator(theSet
), sizeof(uint8_t) * size
, 0);
2540 __CFCSetPutBitmapBits(theSet
, newBitmap
);
2541 memmove(newBitmap
, cBitmap
, size
);
2546 CFAssert1(0, __kCFLogAssertion
, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__
); // We should never come here
2549 __CFCSetMakeBitmap(theSet
);
2550 if (__CFCSetIsBitmap(theOtherSet
)) {
2551 UInt32
*bitmap1
= (UInt32
*)__CFCSetBitmapBits(theSet
);
2552 UInt32
*bitmap2
= (UInt32
*)__CFCSetBitmapBits(theOtherSet
);
2553 CFIndex length
= __kCFBitmapSize
/ sizeof(UInt32
);
2554 while (length
--) *bitmap1
++ &= *bitmap2
++;
2556 UInt32
*bitmap1
= (UInt32
*)__CFCSetBitmapBits(theSet
);
2558 CFIndex length
= __kCFBitmapSize
/ sizeof(UInt32
);
2559 uint8_t bitmapBuffer
[__kCFBitmapSize
];
2560 __CFCSetGetBitmap(theOtherSet
, bitmapBuffer
);
2561 bitmap2
= (UInt32
*)bitmapBuffer
;
2562 while (length
--) *bitmap1
++ &= *bitmap2
++;
2564 __CFCSetPutHasHashValue(theSet
, false);
2565 if (__CFCSetHasNonBMPPlane(theOtherSet
)) {
2566 CFMutableCharacterSetRef annexPlane
;
2567 CFMutableCharacterSetRef otherSetPlane
;
2569 for (idx
= 1;idx
<= MAX_ANNEX_PLANE
;idx
++) {
2570 if ((otherSetPlane
= (CFMutableCharacterSetRef
)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theOtherSet
, idx
))) {
2571 annexPlane
= (CFMutableCharacterSetRef
)__CFCSetGetAnnexPlaneCharacterSet(theSet
, idx
);
2572 CFCharacterSetIntersect(annexPlane
, otherSetPlane
);
2573 if (__CFCSetIsEmpty(annexPlane
) && !__CFCSetIsInverted(annexPlane
)) __CFCSetPutCharacterSetToAnnexPlane(theSet
, NULL
, idx
);
2574 } else if (__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet
, idx
)) {
2575 __CFCSetPutCharacterSetToAnnexPlane(theSet
, NULL
, idx
);
2578 if (!__CFCSetHasNonBMPPlane(theSet
)) __CFCSetDeallocateAnnexPlane(theSet
);
2579 } else if (__CFCSetIsBuiltin(theOtherSet
) && !__CFCSetAnnexIsInverted(theOtherSet
)) {
2580 CFMutableCharacterSetRef annexPlane
;
2581 uint8_t bitmapBuffer
[__kCFBitmapSize
];
2588 for (planeIndex
= 1;planeIndex
<= MAX_ANNEX_PLANE
;planeIndex
++) {
2589 annexPlane
= (CFMutableCharacterSetRef
)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet
, planeIndex
);
2591 result
= CFUniCharGetBitmapForPlane(__CFCSetBuiltinType(theOtherSet
), planeIndex
, bitmapBuffer
, false);
2592 if (result
== kCFUniCharBitmapEmpty
) {
2593 __CFCSetPutCharacterSetToAnnexPlane(theSet
, NULL
, planeIndex
);
2594 } else if (result
== kCFUniCharBitmapFilled
) {
2595 Boolean isEmpty
= true;
2597 __CFCSetMakeBitmap(annexPlane
);
2598 bitmap1
= (UInt32
*)__CFCSetBitmapBits(annexPlane
);
2599 length
= __kCFBitmapSize
/ sizeof(UInt32
);
2600 bitmap2
= (UInt32
*)bitmapBuffer
;
2603 if ((*bitmap1
++ &= *bitmap2
++)) isEmpty
= false;
2605 if (isEmpty
) __CFCSetPutCharacterSetToAnnexPlane(theSet
, NULL
, planeIndex
);
2609 if (!__CFCSetHasNonBMPPlane(theSet
)) __CFCSetDeallocateAnnexPlane(theSet
);
2610 } else if (__CFCSetIsRange(theOtherSet
)) {
2611 CFMutableCharacterSetRef tempOtherSet
= CFCharacterSetCreateMutable(CFGetAllocator(theSet
));
2612 CFMutableCharacterSetRef annexPlane
;
2613 CFMutableCharacterSetRef otherSetPlane
;
2616 __CFCSetAddNonBMPPlanesInRange(tempOtherSet
, CFRangeMake(__CFCSetRangeFirstChar(theOtherSet
), __CFCSetRangeLength(theOtherSet
)));
2618 for (idx
= 1;idx
<= MAX_ANNEX_PLANE
;idx
++) {
2619 if ((otherSetPlane
= (CFMutableCharacterSetRef
)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(tempOtherSet
, idx
))) {
2620 annexPlane
= (CFMutableCharacterSetRef
)__CFCSetGetAnnexPlaneCharacterSet(theSet
, idx
);
2621 CFCharacterSetIntersect(annexPlane
, otherSetPlane
);
2622 if (__CFCSetIsEmpty(annexPlane
) && !__CFCSetIsInverted(annexPlane
)) __CFCSetPutCharacterSetToAnnexPlane(theSet
, NULL
, idx
);
2623 } else if (__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet
, idx
)) {
2624 __CFCSetPutCharacterSetToAnnexPlane(theSet
, NULL
, idx
);
2627 if (!__CFCSetHasNonBMPPlane(theSet
)) __CFCSetDeallocateAnnexPlane(theSet
);
2628 CFRelease(tempOtherSet
);
2629 } else if ((__CFCSetHasNonBMPPlane(theSet
) || __CFCSetAnnexIsInverted(theSet
)) && !__CFCSetAnnexIsInverted(theOtherSet
)) {
2630 __CFCSetDeallocateAnnexPlane(theSet
);
2633 if (__CFCheckForExapendedSet
) __CFCheckForExpandedSet(theSet
);
2634 } else { // It's NSCharacterSet
2635 CFDataRef bitmapRep
= CFCharacterSetCreateBitmapRepresentation(kCFAllocatorSystemDefault
, theOtherSet
);
2636 const UInt32
*bitmap2
= (bitmapRep
&& CFDataGetLength(bitmapRep
) ? (const UInt32
*)CFDataGetBytePtr(bitmapRep
) : NULL
);
2639 CFIndex length
= __kCFBitmapSize
/ sizeof(UInt32
);
2640 __CFCSetMakeBitmap(theSet
);
2641 bitmap1
= (UInt32
*)__CFCSetBitmapBits(theSet
);
2642 while (length
--) *bitmap1
++ &= *bitmap2
++;
2643 __CFCSetPutHasHashValue(theSet
, false);
2645 CFRelease(bitmapRep
);
2649 void CFCharacterSetInvert(CFMutableCharacterSetRef theSet
) {
2651 CF_OBJC_FUNCDISPATCH0(__kCFCharacterSetTypeID
, void, theSet
, "invert");
2653 __CFCSetValidateTypeAndMutability(theSet
, __PRETTY_FUNCTION__
);
2655 __CFCSetPutHasHashValue(theSet
, false);
2657 if (__CFCSetClassType(theSet
) == __kCFCharSetClassBitmap
) {
2659 CFIndex count
= __kCFBitmapSize
/ sizeof(UInt32
);
2660 UInt32
*bitmap
= (UInt32
*) __CFCSetBitmapBits(theSet
);
2662 if (NULL
== bitmap
) {
2663 bitmap
= (UInt32
*)CFAllocatorAllocate(CFGetAllocator(theSet
), __kCFBitmapSize
, 0);
2664 __CFCSetPutBitmapBits(theSet
, (uint8_t *)bitmap
);
2665 for (idx
= 0;idx
< count
;idx
++) bitmap
[idx
] = ((UInt32
)0xFFFFFFFF);
2667 for (idx
= 0;idx
< count
;idx
++) bitmap
[idx
] = ~(bitmap
[idx
]);
2669 __CFCSetAllocateAnnexForPlane(theSet
, 0); // We need to alloc annex to invert
2670 } else if (__CFCSetClassType(theSet
) == __kCFCharSetClassCompactBitmap
) {
2671 uint8_t *bitmap
= __CFCSetCompactBitmapBits(theSet
);
2676 for (idx
= 0;idx
< __kCFCompactBitmapNumPages
;idx
++) {
2677 value
= bitmap
[idx
];
2680 bitmap
[idx
] = UINT8_MAX
;
2681 } else if (value
== UINT8_MAX
) {
2684 length
+= __kCFCompactBitmapPageSize
;
2687 bitmap
+= __kCFCompactBitmapNumPages
;
2688 for (idx
= 0;idx
< length
;idx
++) bitmap
[idx
] = ~(bitmap
[idx
]);
2689 __CFCSetAllocateAnnexForPlane(theSet
, 0); // We need to alloc annex to invert
2691 __CFCSetPutIsInverted(theSet
, !__CFCSetIsInverted(theSet
));
2693 __CFCSetAnnexSetIsInverted(theSet
, !__CFCSetAnnexIsInverted(theSet
));
2696 void CFCharacterSetCompact(CFMutableCharacterSetRef theSet
) {
2697 if (__CFCSetIsBitmap(theSet
) && __CFCSetBitmapBits(theSet
)) __CFCSetMakeCompact(theSet
);
2698 if (__CFCSetHasNonBMPPlane(theSet
)) {
2699 CFMutableCharacterSetRef annex
;
2702 for (idx
= 1;idx
<= MAX_ANNEX_PLANE
;idx
++) {
2703 if ((annex
= (CFMutableCharacterSetRef
)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet
, idx
)) && __CFCSetIsBitmap(annex
) && __CFCSetBitmapBits(annex
)) {
2704 __CFCSetMakeCompact(annex
);
2710 void CFCharacterSetFast(CFMutableCharacterSetRef theSet
) {
2711 if (__CFCSetIsCompactBitmap(theSet
) && __CFCSetCompactBitmapBits(theSet
)) __CFCSetMakeBitmap(theSet
);
2712 if (__CFCSetHasNonBMPPlane(theSet
)) {
2713 CFMutableCharacterSetRef annex
;
2716 for (idx
= 1;idx
<= MAX_ANNEX_PLANE
;idx
++) {
2717 if ((annex
= (CFMutableCharacterSetRef
)__CFCSetGetAnnexPlaneCharacterSetNoAlloc(theSet
, idx
)) && __CFCSetIsCompactBitmap(annex
) && __CFCSetCompactBitmapBits(annex
)) {
2718 __CFCSetMakeBitmap(annex
);
2724 /* Keyed-coding support
2726 CFCharacterSetKeyedCodingType
_CFCharacterSetGetKeyedCodingType(CFCharacterSetRef cset
) {
2727 if (CF_IS_OBJC(__kCFCharacterSetTypeID
, cset
)) return kCFCharacterSetKeyedCodingTypeBitmap
;
2729 switch (__CFCSetClassType(cset
)) {
2730 case __kCFCharSetClassBuiltin
: return ((__CFCSetBuiltinType(cset
) < kCFCharacterSetSymbol
) ? kCFCharacterSetKeyedCodingTypeBuiltin
: kCFCharacterSetKeyedCodingTypeBuiltinAndBitmap
);
2731 case __kCFCharSetClassRange
: return kCFCharacterSetKeyedCodingTypeRange
;
2733 case __kCFCharSetClassString
: // We have to check if we have non-BMP here
2734 if (!__CFCSetHasNonBMPPlane(cset
) && !__CFCSetAnnexIsInverted(cset
)) return kCFCharacterSetKeyedCodingTypeString
; // BMP only. we can archive the string
2738 return kCFCharacterSetKeyedCodingTypeBitmap
;
2742 CFCharacterSetPredefinedSet
_CFCharacterSetGetKeyedCodingBuiltinType(CFCharacterSetRef cset
) { return __CFCSetBuiltinType(cset
); }
2743 CFRange
_CFCharacterSetGetKeyedCodingRange(CFCharacterSetRef cset
) { return CFRangeMake(__CFCSetRangeFirstChar(cset
), __CFCSetRangeLength(cset
)); }
2744 CFStringRef
_CFCharacterSetCreateKeyedCodingString(CFCharacterSetRef cset
) { return CFStringCreateWithCharacters(kCFAllocatorSystemDefault
, __CFCSetStringBuffer(cset
), __CFCSetStringLength(cset
)); }
2746 bool _CFCharacterSetIsInverted(CFCharacterSetRef cset
) { return (__CFCSetIsInverted(cset
) != 0); }
2747 void _CFCharacterSetSetIsInverted(CFCharacterSetRef cset
, bool flag
) { __CFCSetPutIsInverted((CFMutableCharacterSetRef
)cset
, flag
); }
2749 /* Inline buffer support
2751 void CFCharacterSetInitInlineBuffer(CFCharacterSetRef cset
, CFCharacterSetInlineBuffer
*buffer
) {
2752 memset(buffer
, 0, sizeof(CFCharacterSetInlineBuffer
));
2753 buffer
->cset
= cset
;
2754 buffer
->rangeLimit
= 0x10000;
2756 if (CF_IS_OBJC(__kCFCharacterSetTypeID
, cset
)) {
2757 CFCharacterSetRef expandedSet
= __CFCharacterSetGetExpandedSetForNSCharacterSet(cset
);
2759 if (NULL
== expandedSet
) {
2760 buffer
->flags
= kCFCharacterSetNoBitmapAvailable
;
2761 buffer
->rangeLimit
= 0x110000;
2769 switch (__CFCSetClassType(cset
)) {
2770 case __kCFCharSetClassBuiltin
:
2771 buffer
->bitmap
= CFUniCharGetBitmapPtrForPlane(__CFCSetBuiltinType(cset
), 0);
2772 buffer
->rangeLimit
= 0x110000;
2773 if (NULL
== buffer
->bitmap
) {
2774 buffer
->flags
= kCFCharacterSetNoBitmapAvailable
;
2776 if (__CFCSetIsInverted(cset
)) buffer
->flags
= kCFCharacterSetIsInverted
;
2780 case __kCFCharSetClassRange
:
2781 buffer
->rangeStart
= __CFCSetRangeFirstChar(cset
);
2782 buffer
->rangeLimit
= __CFCSetRangeFirstChar(cset
) + __CFCSetRangeLength(cset
);
2783 if (__CFCSetIsInverted(cset
)) buffer
->flags
= kCFCharacterSetIsInverted
;
2786 case __kCFCharSetClassString
:
2787 buffer
->flags
= kCFCharacterSetNoBitmapAvailable
;
2788 if (__CFCSetStringLength(cset
) > 0) {
2789 buffer
->rangeStart
= *__CFCSetStringBuffer(cset
);
2790 buffer
->rangeLimit
= *(__CFCSetStringBuffer(cset
) + __CFCSetStringLength(cset
) - 1) + 1;
2792 if (__CFCSetIsInverted(cset
)) {
2793 if (0 == buffer
->rangeStart
) {
2794 buffer
->rangeStart
= buffer
->rangeLimit
;
2795 buffer
->rangeLimit
= 0x10000;
2796 } else if (0x10000 == buffer
->rangeLimit
) {
2797 buffer
->rangeLimit
= buffer
->rangeStart
;
2798 buffer
->rangeStart
= 0;
2800 buffer
->rangeStart
= 0;
2801 buffer
->rangeLimit
= 0x10000;
2807 case __kCFCharSetClassBitmap
:
2808 case __kCFCharSetClassCompactBitmap
:
2809 buffer
->bitmap
= __CFCSetCompactBitmapBits(cset
);
2810 if (NULL
== buffer
->bitmap
) {
2811 buffer
->flags
= kCFCharacterSetIsCompactBitmap
;
2812 if (__CFCSetIsInverted(cset
)) buffer
->flags
|= kCFCharacterSetIsInverted
;
2814 if (__kCFCharSetClassCompactBitmap
== __CFCSetClassType(cset
)) buffer
->flags
= kCFCharacterSetIsCompactBitmap
;
2819 CFAssert1(0, __kCFLogAssertion
, "%s: Internal inconsistency error: unknown character set type", __PRETTY_FUNCTION__
); // We should never come here
2823 if (__CFCSetAnnexIsInverted(cset
)) {
2824 buffer
->rangeLimit
= 0x110000;
2825 } else if (__CFCSetHasNonBMPPlane(cset
)) {
2828 for (index
= MAX_ANNEX_PLANE
;index
> 0;index
--) {
2829 if (NULL
!= __CFCSetGetAnnexPlaneCharacterSetNoAlloc(cset
, index
)) {
2830 buffer
->rangeLimit
= (index
+ 1) << 16;