2 * Copyright (c) 2005 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
24 Copyright 1998-2002, Apple, Inc. All rights reserved.
25 Responsibility: Ali Ozer
27 !!! For performance reasons, it's important that all functions marked CF_INLINE in this file are inlined.
30 #include <CoreFoundation/CFBase.h>
31 #include <CoreFoundation/CFString.h>
32 #include <CoreFoundation/CFDictionary.h>
33 #include "CFStringEncodingConverterExt.h"
34 #include "CFUniChar.h"
35 #include "CFUnicodeDecomposition.h"
36 #include "CFUnicodePrecomposition.h"
37 #include "CFUtilitiesPriv.h"
38 #include "CFInternal.h"
42 #if defined (__MACOS8__)
43 #include <Script.h> // For GetScriptManagerVariable
44 #include <Processes.h> // For logging
46 #include <UnicodeConverter.h>
47 #include <TextEncodingConverter.h>
48 #elif defined(__MACH__) || defined(__LINUX__) || defined(__FREEBSD__)
51 #if defined(__WIN32__)
53 #endif /* __WIN32__ */
56 extern size_t malloc_good_size(size_t size
);
58 extern void __CFStrConvertBytesToUnicode(const uint8_t *bytes
, UniChar
*buffer
, CFIndex numChars
);
62 // Special allocator used by CFSTRs to catch deallocations
63 static CFAllocatorRef constantStringAllocatorForDebugging
= NULL
;
65 // We put this into C & Pascal strings if we can't convert
66 #define CONVERSIONFAILURESTR "CFString conversion failed"
68 // We set this to true when purging the constant string table, so CFStringDeallocate doesn't assert
69 static Boolean __CFConstantStringTableBeingFreed
= false;
75 // This section is for CFString compatibility and other behaviors...
77 static CFOptionFlags _CFStringCompatibilityMask
= 0;
81 void _CFStringSetCompatibility(CFOptionFlags mask
) {
82 _CFStringCompatibilityMask
|= mask
;
85 CF_INLINE Boolean
__CFStringGetCompatibility(CFOptionFlags mask
) {
86 return (_CFStringCompatibilityMask
& mask
) == mask
;
91 // Two constant strings used by CFString; these are initialized in CFStringInitialize
92 CONST_STRING_DECL(kCFEmptyString
, "")
93 CONST_STRING_DECL(kCFNSDecimalSeparatorKey
, "NSDecimalSeparator")
96 /* !!! Never do sizeof(CFString); the union is here just to make it easier to access some fields.
100 union { // In many cases the allocated structs are smaller than these
108 CFAllocatorRef contentsDeallocator
; // Just the dealloc func is used
109 } notInlineImmutable1
;
112 CFAllocatorRef contentsDeallocator
; // Just the dealloc func is used
113 } notInlineImmutable2
;
117 UInt32 capacityFields
; // Currently only stores capacity
118 UInt32 gapEtc
; // Stores some bits, plus desired or fixed capacity
119 CFAllocatorRef contentsAllocator
; // Optional
126 E = not inline contents
130 D = explicit deallocator for contents (for mutable objects, allocator)
133 Also need (only for mutable)
136 Cap, DesCap = capacity
138 B7 B6 B5 B4 B3 B2 B1 B0
143 0 1 E (freed with default allocator)
147 !!! Note: Constant CFStrings use the bit patterns:
148 C8 (11001000 = default allocator, not inline, not freed contents; 8-bit; has NULL byte; doesn't have length; is immutable)
149 D0 (11010000 = default allocator, not inline, not freed contents; Unicode; is immutable)
150 The bit usages should not be modified in a way that would effect these bit patterns.
154 __kCFFreeContentsWhenDoneMask
= 0x020,
155 __kCFFreeContentsWhenDone
= 0x020,
156 __kCFContentsMask
= 0x060,
157 __kCFHasInlineContents
= 0x000,
158 __kCFNotInlineContentsNoFree
= 0x040, // Don't free
159 __kCFNotInlineContentsDefaultFree
= 0x020, // Use allocator's free function
160 __kCFNotInlineContentsCustomFree
= 0x060, // Use a specially provided free function
161 __kCFHasContentsAllocatorMask
= 0x060,
162 __kCFHasContentsAllocator
= 0x060, // (For mutable strings) use a specially provided allocator
163 __kCFHasContentsDeallocatorMask
= 0x060,
164 __kCFHasContentsDeallocator
= 0x060,
165 __kCFIsMutableMask
= 0x01,
166 __kCFIsMutable
= 0x01,
167 __kCFIsUnicodeMask
= 0x10,
168 __kCFIsUnicode
= 0x10,
169 __kCFHasNullByteMask
= 0x08,
170 __kCFHasNullByte
= 0x08,
171 __kCFHasLengthByteMask
= 0x04,
172 __kCFHasLengthByte
= 0x04,
173 // !!! Bit 0x02 has been freed up
174 // These are in variants.notInlineMutable.gapEtc
175 __kCFGapMask
= 0x00ffffff,
176 __kCFGapBitNumber
= 24,
177 __kCFDesiredCapacityMask
= 0x00ffffff, // Currently gap and fixed share same bits as gap not implemented
178 __kCFDesiredCapacityBitNumber
= 24,
179 __kCFIsFixedMask
= 0x80000000,
180 __kCFIsFixed
= 0x80000000,
181 __kCFHasGapMask
= 0x40000000,
182 __kCFHasGap
= 0x40000000,
183 __kCFCapacityProvidedExternallyMask
= 0x20000000, // Set if the external buffer capacity is set explicitly by the developer
184 __kCFCapacityProvidedExternally
= 0x20000000,
185 __kCFIsExternalMutableMask
= 0x10000000, // Determines whether the buffer is controlled by the developer
186 __kCFIsExternalMutable
= 0x10000000
187 // 0x0f000000: 4 additional bits available for use in mutable strings
192 // Mutable strings are not inline
193 // Compile-time constant strings are not inline
194 // Mutable strings always have explicit length (but they might also have length byte and null byte)
195 // If there is an explicit length, always use that instead of the length byte (length byte is useful for quickly returning pascal strings)
196 // Never look at the length byte for the length; use __CFStrLength or __CFStrLength2
198 /* The following set of functions and macros need to be updated on change to the bit configuration
200 CF_INLINE Boolean
__CFStrIsMutable(CFStringRef str
) {return (str
->base
._info
& __kCFIsMutableMask
) == __kCFIsMutable
;}
201 CF_INLINE Boolean
__CFStrIsInline(CFStringRef str
) {return (str
->base
._info
& __kCFContentsMask
) == __kCFHasInlineContents
;}
202 CF_INLINE Boolean
__CFStrFreeContentsWhenDone(CFStringRef str
) {return (str
->base
._info
& __kCFFreeContentsWhenDoneMask
) == __kCFFreeContentsWhenDone
;}
203 CF_INLINE Boolean
__CFStrHasContentsDeallocator(CFStringRef str
) {return (str
->base
._info
& __kCFHasContentsDeallocatorMask
) == __kCFHasContentsDeallocator
;}
204 CF_INLINE Boolean
__CFStrIsUnicode(CFStringRef str
) {return (str
->base
._info
& __kCFIsUnicodeMask
) == __kCFIsUnicode
;}
205 CF_INLINE Boolean
__CFStrIsEightBit(CFStringRef str
) {return (str
->base
._info
& __kCFIsUnicodeMask
) != __kCFIsUnicode
;}
206 CF_INLINE Boolean
__CFStrHasNullByte(CFStringRef str
) {return (str
->base
._info
& __kCFHasNullByteMask
) == __kCFHasNullByte
;}
207 CF_INLINE Boolean
__CFStrHasLengthByte(CFStringRef str
) {return (str
->base
._info
& __kCFHasLengthByteMask
) == __kCFHasLengthByte
;}
208 CF_INLINE Boolean
__CFStrHasExplicitLength(CFStringRef str
) {return (str
->base
._info
& (__kCFIsMutableMask
| __kCFHasLengthByteMask
)) != __kCFHasLengthByte
;} // Has explicit length if (1) mutable or (2) not mutable and no length byte
209 CF_INLINE Boolean
__CFStrIsConstant(CFStringRef str
) {return (str
->base
._rc
) == 0;}
211 CF_INLINE SInt32
__CFStrSkipAnyLengthByte(CFStringRef str
) {return ((str
->base
._info
& __kCFHasLengthByteMask
) == __kCFHasLengthByte
) ? 1 : 0;} // Number of bytes to skip over the length byte in the contents
213 /* Returns ptr to the buffer (which might include the length byte)
215 CF_INLINE
const void *__CFStrContents(CFStringRef str
) {
216 if (__CFStrIsInline(str
)) {
217 return (const void *)(((UInt32
)&(str
->variants
)) + (__CFStrHasExplicitLength(str
) ? sizeof(UInt32
) : 0));
218 } else { // Not inline; pointer is always word 2
219 return str
->variants
.notInlineImmutable1
.buffer
;
223 static CFAllocatorRef
*__CFStrContentsDeallocatorPtr(CFStringRef str
) {
224 return __CFStrHasExplicitLength(str
) ? &(((CFMutableStringRef
)str
)->variants
.notInlineImmutable1
.contentsDeallocator
) : &(((CFMutableStringRef
)str
)->variants
.notInlineImmutable2
.contentsDeallocator
); }
226 // Assumption: Called with immutable strings only, and on strings that are known to have a contentsDeallocator
227 CF_INLINE CFAllocatorRef
__CFStrContentsDeallocator(CFStringRef str
) {
228 return *__CFStrContentsDeallocatorPtr(str
);
231 // Assumption: Called with immutable strings only, and on strings that are known to have a contentsDeallocator
232 CF_INLINE
void __CFStrSetContentsDeallocator(CFStringRef str
, CFAllocatorRef contentsAllocator
) {
233 *__CFStrContentsDeallocatorPtr(str
) = contentsAllocator
;
236 static CFAllocatorRef
*__CFStrContentsAllocatorPtr(CFStringRef str
) {
237 CFAssert(!__CFStrIsInline(str
), __kCFLogAssertion
, "Asking for contents allocator of inline string");
238 CFAssert(__CFStrIsMutable(str
), __kCFLogAssertion
, "Asking for contents allocator of an immutable string");
239 return (CFAllocatorRef
*)&(str
->variants
.notInlineMutable
.contentsAllocator
);
242 // Assumption: Called with strings that have a contents allocator; also, contents allocator follows custom
243 CF_INLINE CFAllocatorRef
__CFStrContentsAllocator(CFMutableStringRef str
) {
244 return *(__CFStrContentsAllocatorPtr(str
));
247 // Assumption: Called with strings that have a contents allocator; also, contents allocator follows custom
248 CF_INLINE
void __CFStrSetContentsAllocator(CFMutableStringRef str
, CFAllocatorRef alloc
) {
249 *(__CFStrContentsAllocatorPtr(str
)) = alloc
;
252 /* Returns length; use __CFStrLength2 if contents buffer pointer has already been computed.
254 CF_INLINE CFIndex
__CFStrLength(CFStringRef str
) {
255 if (__CFStrHasExplicitLength(str
)) {
256 if (__CFStrIsInline(str
)) {
257 return str
->variants
.inline1
.length
;
259 return str
->variants
.notInlineImmutable1
.length
;
262 return (CFIndex
)(*((uint8_t *)__CFStrContents(str
)));
266 CF_INLINE CFIndex
__CFStrLength2(CFStringRef str
, const void *buffer
) {
267 if (__CFStrHasExplicitLength(str
)) {
268 if (__CFStrIsInline(str
)) {
269 return str
->variants
.inline1
.length
;
271 return str
->variants
.notInlineImmutable1
.length
;
274 return (CFIndex
)(*((uint8_t *)buffer
));
279 Boolean
__CFStringIsEightBit(CFStringRef str
) {
280 return __CFStrIsEightBit(str
);
283 /* Sets the content pointer for immutable or mutable strings.
285 CF_INLINE
void __CFStrSetContentPtr(CFStringRef str
, const void *p
)
287 // XXX_PCB catch all writes for mutable string case.
288 CF_WRITE_BARRIER_BASE_ASSIGN(__CFGetAllocator(str
), str
, ((CFMutableStringRef
)str
)->variants
.notInlineImmutable1
.buffer
, (void *)p
);
290 CF_INLINE
void __CFStrSetInfoBits(CFStringRef str
, UInt32 v
) {__CFBitfieldSetValue(((CFMutableStringRef
)str
)->base
._info
, 6, 0, v
);}
292 CF_INLINE
void __CFStrSetExplicitLength(CFStringRef str
, CFIndex v
) {
293 if (__CFStrIsInline(str
)) {
294 ((CFMutableStringRef
)str
)->variants
.inline1
.length
= v
;
296 ((CFMutableStringRef
)str
)->variants
.notInlineImmutable1
.length
= v
;
300 // Assumption: Called with mutable strings only
301 CF_INLINE Boolean
__CFStrIsFixed(CFStringRef str
) {return (str
->variants
.notInlineMutable
.gapEtc
& __kCFIsFixedMask
) == __kCFIsFixed
;}
302 CF_INLINE Boolean
__CFStrHasContentsAllocator(CFStringRef str
) {return (str
->base
._info
& __kCFHasContentsAllocatorMask
) == __kCFHasContentsAllocator
;}
303 CF_INLINE Boolean
__CFStrIsExternalMutable(CFStringRef str
) {return (str
->variants
.notInlineMutable
.gapEtc
& __kCFIsExternalMutableMask
) == __kCFIsExternalMutable
;}
305 // If capacity is provided externally, we only change it when we need to grow beyond it
306 CF_INLINE Boolean
__CFStrCapacityProvidedExternally(CFStringRef str
) {return (str
->variants
.notInlineMutable
.gapEtc
& __kCFCapacityProvidedExternallyMask
) == __kCFCapacityProvidedExternally
;}
307 CF_INLINE
void __CFStrSetCapacityProvidedExternally(CFMutableStringRef str
) {str
->variants
.notInlineMutable
.gapEtc
|= __kCFCapacityProvidedExternally
;}
308 CF_INLINE
void __CFStrClearCapacityProvidedExternally(CFMutableStringRef str
) {str
->variants
.notInlineMutable
.gapEtc
&= ~__kCFCapacityProvidedExternally
;}
311 CF_INLINE
void __CFStrSetIsFixed(CFMutableStringRef str
) {str
->variants
.notInlineMutable
.gapEtc
|= __kCFIsFixed
;}
312 CF_INLINE
void __CFStrSetIsExternalMutable(CFMutableStringRef str
) {str
->variants
.notInlineMutable
.gapEtc
|= __kCFIsExternalMutable
;}
313 CF_INLINE
void __CFStrSetHasGap(CFMutableStringRef str
) {str
->variants
.notInlineMutable
.gapEtc
|= __kCFHasGap
;}
314 CF_INLINE
void __CFStrSetUnicode(CFMutableStringRef str
) {str
->base
._info
|= __kCFIsUnicode
;}
315 CF_INLINE
void __CFStrClearUnicode(CFMutableStringRef str
) {str
->base
._info
&= ~__kCFIsUnicode
;}
316 CF_INLINE
void __CFStrSetHasLengthAndNullBytes(CFMutableStringRef str
) {str
->base
._info
|= (__kCFHasLengthByte
| __kCFHasNullByte
);}
317 CF_INLINE
void __CFStrClearHasLengthAndNullBytes(CFMutableStringRef str
) {str
->base
._info
&= ~(__kCFHasLengthByte
| __kCFHasNullByte
);}
320 static void *__CFStrAllocateMutableContents(CFMutableStringRef str
, CFIndex size
) {
322 CFAllocatorRef alloc
= (__CFStrHasContentsAllocator(str
)) ? __CFStrContentsAllocator(str
) : __CFGetAllocator(str
);
323 ptr
= CFAllocatorAllocate(alloc
, size
, 0);
324 if (__CFOASafe
) __CFSetLastAllocationEventName(ptr
, "CFString (store)");
328 static void __CFStrDeallocateMutableContents(CFMutableStringRef str
, void *buffer
) {
329 CFAllocatorRef alloc
= (__CFStrHasContentsAllocator(str
)) ? __CFStrContentsAllocator(str
) : __CFGetAllocator(str
);
330 if (CF_IS_COLLECTABLE_ALLOCATOR(alloc
)) {
331 // GC: for finalization safety, let collector reclaim the buffer in the next GC cycle.
332 auto_zone_release(__CFCollectableZone
, buffer
);
334 CFAllocatorDeallocate(alloc
, buffer
);
339 // The following set of functions should only be called on mutable strings
341 /* "Capacity" is stored in number of bytes, not characters. It indicates the total number of bytes in the contents buffer.
342 "Desired capacity" is in number of characters; it is the client requested capacity; if fixed, it is the upper bound on the mutable string backing store.
344 CF_INLINE CFIndex
__CFStrCapacity(CFStringRef str
) {return str
->variants
.notInlineMutable
.capacityFields
;}
345 CF_INLINE
void __CFStrSetCapacity(CFMutableStringRef str
, CFIndex cap
) {str
->variants
.notInlineMutable
.capacityFields
= cap
;}
346 CF_INLINE CFIndex
__CFStrDesiredCapacity(CFStringRef str
) {return __CFBitfieldGetValue(str
->variants
.notInlineMutable
.gapEtc
, __kCFDesiredCapacityBitNumber
, 0);}
347 CF_INLINE
void __CFStrSetDesiredCapacity(CFMutableStringRef str
, CFIndex size
) {__CFBitfieldSetValue(str
->variants
.notInlineMutable
.gapEtc
, __kCFDesiredCapacityBitNumber
, 0, size
);}
352 /* CFString specific init flags
353 Note that you cannot count on the external buffer not being copied.
354 Also, if you specify an external buffer, you should not change it behind the CFString's back.
357 __kCFThinUnicodeIfPossible
= 0x1000000, /* See if the Unicode contents can be thinned down to 8-bit */
358 kCFStringPascal
= 0x10000, /* Indicating that the string data has a Pascal string structure (length byte at start) */
359 kCFStringNoCopyProvidedContents
= 0x20000, /* Don't copy the provided string contents if possible; free it when no longer needed */
360 kCFStringNoCopyNoFreeProvidedContents
= 0x30000 /* Don't copy the provided string contents if possible; don't free it when no longer needed */
365 static CFStringEncoding __CFDefaultSystemEncoding
= kCFStringEncodingInvalidId
;
366 static CFStringEncoding __CFDefaultFileSystemEncoding
= kCFStringEncodingInvalidId
;
367 CFStringEncoding __CFDefaultEightBitStringEncoding
= kCFStringEncodingInvalidId
;
369 CFStringEncoding
CFStringGetSystemEncoding(void) {
371 if (__CFDefaultSystemEncoding
== kCFStringEncodingInvalidId
) {
372 const CFStringEncodingConverter
*converter
= NULL
;
373 #if defined(__MACOS8__) || defined(__MACH__)
374 __CFDefaultSystemEncoding
= kCFStringEncodingMacRoman
; // MacRoman is built-in so always available
375 #elif defined(__WIN32__)
376 __CFDefaultSystemEncoding
= kCFStringEncodingWindowsLatin1
; // WinLatin1 is built-in so always available
377 #elif defined(__LINUX__) || defined(__FREEBSD__)
378 __CFDefaultSystemEncoding
= kCFStringEncodingISOLatin1
; // a reasonable default
379 #else // Solaris && HP-UX ?
380 __CFDefaultSystemEncoding
= kCFStringEncodingISOLatin1
; // a reasonable default
382 converter
= CFStringEncodingGetConverter(__CFDefaultSystemEncoding
);
384 __CFSetCharToUniCharFunc(converter
->encodingClass
== kCFStringEncodingConverterCheapEightBit
? converter
->toUnicode
: NULL
);
387 return __CFDefaultSystemEncoding
;
390 // Fast version for internal use
392 CF_INLINE CFStringEncoding
__CFStringGetSystemEncoding(void) {
393 if (__CFDefaultSystemEncoding
== kCFStringEncodingInvalidId
) (void)CFStringGetSystemEncoding();
394 return __CFDefaultSystemEncoding
;
397 CFStringEncoding
CFStringFileSystemEncoding(void) {
398 if (__CFDefaultFileSystemEncoding
== kCFStringEncodingInvalidId
) {
399 #if defined(__MACH__)
400 __CFDefaultFileSystemEncoding
= kCFStringEncodingUTF8
;
402 __CFDefaultFileSystemEncoding
= CFStringGetSystemEncoding();
406 return __CFDefaultFileSystemEncoding
;
409 /* ??? Is returning length when no other answer is available the right thing?
411 CFIndex
CFStringGetMaximumSizeForEncoding(CFIndex length
, CFStringEncoding encoding
) {
412 if (encoding
== kCFStringEncodingUTF8
) {
413 return _CFExecutableLinkedOnOrAfter(CFSystemVersionPanther
) ? (length
* 3) : (length
* 6); // 1 Unichar could expand to 3 bytes; we return 6 for older apps for compatibility
414 } else if ((encoding
== kCFStringEncodingUTF32
) || (encoding
== kCFStringEncodingUTF32BE
) || (encoding
== kCFStringEncodingUTF32LE
)) { // UTF-32
415 return length
* sizeof(UTF32Char
);
417 encoding
&= 0xFFF; // Mask off non-base part
420 case kCFStringEncodingUnicode
:
421 return length
* sizeof(UniChar
);
423 case kCFStringEncodingNonLossyASCII
:
424 return length
* 6; // 1 Unichar could expand to 6 bytes
426 case kCFStringEncodingMacRoman
:
427 case kCFStringEncodingWindowsLatin1
:
428 case kCFStringEncodingISOLatin1
:
429 case kCFStringEncodingNextStepLatin
:
430 case kCFStringEncodingASCII
:
431 return length
/ sizeof(uint8_t);
434 return length
/ sizeof(uint8_t);
439 /* Returns whether the indicated encoding can be stored in 8-bit chars
441 CF_INLINE Boolean
__CFStrEncodingCanBeStoredInEightBit(CFStringEncoding encoding
) {
442 switch (encoding
& 0xFFF) { // just use encoding base
443 case kCFStringEncodingInvalidId
:
444 case kCFStringEncodingUnicode
:
445 case kCFStringEncodingNonLossyASCII
:
448 case kCFStringEncodingMacRoman
:
449 case kCFStringEncodingWindowsLatin1
:
450 case kCFStringEncodingISOLatin1
:
451 case kCFStringEncodingNextStepLatin
:
452 case kCFStringEncodingASCII
:
455 default: return false;
459 /* Returns the encoding used in eight bit CFStrings (can't be any encoding which isn't 1-to-1 with Unicode)
460 ??? Perhaps only ASCII fits the bill due to Unicode decomposition.
462 CFStringEncoding
__CFStringComputeEightBitStringEncoding(void) {
463 if (__CFDefaultEightBitStringEncoding
== kCFStringEncodingInvalidId
) {
464 CFStringEncoding systemEncoding
= CFStringGetSystemEncoding();
465 if (systemEncoding
== kCFStringEncodingInvalidId
) { // We're right in the middle of querying system encoding from default database. Delaying to set until system encoding is determined.
466 return kCFStringEncodingASCII
;
467 } else if (__CFStrEncodingCanBeStoredInEightBit(systemEncoding
)) {
468 __CFDefaultEightBitStringEncoding
= systemEncoding
;
470 __CFDefaultEightBitStringEncoding
= kCFStringEncodingASCII
;
474 return __CFDefaultEightBitStringEncoding
;
477 /* Returns whether the provided bytes can be stored in ASCII
479 CF_INLINE Boolean
__CFBytesInASCII(const uint8_t *bytes
, CFIndex len
) {
480 while (len
--) if ((uint8_t)(*bytes
++) >= 128) return false;
484 /* Returns whether the provided 8-bit string in the specified encoding can be stored in an 8-bit CFString.
486 CF_INLINE Boolean
__CFCanUseEightBitCFStringForBytes(const uint8_t *bytes
, CFIndex len
, CFStringEncoding encoding
) {
487 if (encoding
== __CFStringGetEightBitStringEncoding()) return true;
488 if (__CFStringEncodingIsSupersetOfASCII(encoding
) && __CFBytesInASCII(bytes
, len
)) return true;
493 /* Returns whether a length byte can be tacked on to a string of the indicated length.
495 CF_INLINE Boolean
__CFCanUseLengthByte(CFIndex len
) {
496 #define __kCFMaxPascalStrLen 255
497 return (len
<= __kCFMaxPascalStrLen
) ? true : false;
500 /* Various string assertions
502 #define __CFAssertIsString(cf) __CFGenericValidateType(cf, __kCFStringTypeID)
503 #define __CFAssertIndexIsInStringBounds(cf, idx) CFAssert3((idx) >= 0 && (idx) < __CFStrLength(cf), __kCFLogAssertion, "%s(): string index %d out of bounds (length %d)", __PRETTY_FUNCTION__, idx, __CFStrLength(cf))
504 #define __CFAssertRangeIsInStringBounds(cf, idx, count) CFAssert4((idx) >= 0 && (idx + count) <= __CFStrLength(cf), __kCFLogAssertion, "%s(): string range %d,%d out of bounds (length %d)", __PRETTY_FUNCTION__, idx, count, __CFStrLength(cf))
505 #define __CFAssertLengthIsOK(len) CFAssert2(len < __kCFMaxLength, __kCFLogAssertion, "%s(): length %d too large", __PRETTY_FUNCTION__, len)
506 #define __CFAssertIsStringAndMutable(cf) {__CFGenericValidateType(cf, __kCFStringTypeID); CFAssert1(__CFStrIsMutable(cf), __kCFLogAssertion, "%s(): string not mutable", __PRETTY_FUNCTION__);}
507 #define __CFAssertIsStringAndExternalMutable(cf) {__CFGenericValidateType(cf, __kCFStringTypeID); CFAssert1(__CFStrIsMutable(cf) && __CFStrIsExternalMutable(cf), __kCFLogAssertion, "%s(): string not external mutable", __PRETTY_FUNCTION__);}
508 #define __CFAssertIsNotNegative(idx) CFAssert2(idx >= 0, __kCFLogAssertion, "%s(): index %d is negative", __PRETTY_FUNCTION__, idx)
509 #define __CFAssertIfFixedLengthIsOK(cf, reqLen) CFAssert2(!__CFStrIsFixed(cf) || (reqLen <= __CFStrDesiredCapacity(cf)), __kCFLogAssertion, "%s(): length %d too large", __PRETTY_FUNCTION__, reqLen)
512 /* Basic algorithm is to shrink memory when capacity is SHRINKFACTOR times the required capacity or to allocate memory when the capacity is less than GROWFACTOR times the required capacity.
513 Additional complications are applied in the following order:
514 - desiredCapacity, which is the minimum (except initially things can be at zero)
515 - rounding up to factor of 8
516 - compressing (to fit the number if 16 bits), which effectively rounds up to factor of 256
518 #define SHRINKFACTOR(c) (c / 2)
519 #define GROWFACTOR(c) ((c * 3 + 1) / 2)
521 CF_INLINE CFIndex
__CFStrNewCapacity(CFMutableStringRef str
, CFIndex reqCapacity
, CFIndex capacity
, Boolean leaveExtraRoom
, CFIndex charSize
) {
522 if (capacity
!= 0 || reqCapacity
!= 0) { /* If initially zero, and space not needed, leave it at that... */
523 if ((capacity
< reqCapacity
) || /* We definitely need the room... */
524 (!__CFStrCapacityProvidedExternally(str
) && /* Assuming we control the capacity... */
525 ((reqCapacity
< SHRINKFACTOR(capacity
)) || /* ...we have too much room! */
526 (!leaveExtraRoom
&& (reqCapacity
< capacity
))))) { /* ...we need to eliminate the extra space... */
527 CFIndex newCapacity
= leaveExtraRoom
? GROWFACTOR(reqCapacity
) : reqCapacity
; /* Grow by 3/2 if extra room is desired */
528 CFIndex desiredCapacity
= __CFStrDesiredCapacity(str
) * charSize
;
529 if (newCapacity
< desiredCapacity
) { /* If less than desired, bump up to desired */
530 newCapacity
= desiredCapacity
;
531 } else if (__CFStrIsFixed(str
)) { /* Otherwise, if fixed, no need to go above the desired (fixed) capacity */
532 newCapacity
= __CFMax(desiredCapacity
, reqCapacity
); /* !!! So, fixed is not really fixed, but "tight" */
534 if (__CFStrHasContentsAllocator(str
)) { /* Also apply any preferred size from the allocator; should we do something for */
535 newCapacity
= CFAllocatorGetPreferredSizeForSize(__CFStrContentsAllocator(str
), newCapacity
, 0);
536 #if defined(__MACH__)
538 newCapacity
= malloc_good_size(newCapacity
);
541 return newCapacity
; // If packing: __CFStrUnpackNumber(__CFStrPackNumber(newCapacity));
548 /* rearrangeBlocks() rearranges the blocks of data within the buffer so that they are "evenly spaced". buffer is assumed to have enough room for the result.
549 numBlocks is current total number of blocks within buffer.
550 blockSize is the size of each block in bytes
551 ranges and numRanges hold the ranges that are no longer needed; ranges are stored sorted in increasing order, and don't overlap
552 insertLength is the final spacing between the remaining blocks
554 Example: buffer = A B C D E F G H, blockSize = 1, ranges = { (2,1) , (4,2) } (so we want to "delete" C and E F), fromEnd = NO
555 if insertLength = 4, result = A B ? ? ? ? D ? ? ? ? G H
556 if insertLength = 0, result = A B D G H
558 Example: buffer = A B C D E F G H I J K L M N O P Q R S T U, blockSize = 1, ranges { (1,1), (3,1), (5,11), (17,1), (19,1) }, fromEnd = NO
559 if insertLength = 3, result = A ? ? ? C ? ? ? E ? ? ? Q ? ? ? S ? ? ? U
562 typedef struct _CFStringDeferredRange
{
566 } CFStringDeferredRange
;
568 typedef struct _CFStringStackInfo
{
569 int capacity
; // Capacity (if capacity == count, need to realloc to add another)
570 int count
; // Number of elements actually stored
571 CFStringDeferredRange
*stack
;
572 Boolean hasMalloced
; // Indicates "stack" is allocated and needs to be deallocated when done
576 CF_INLINE
void pop (CFStringStackInfo
*si
, CFStringDeferredRange
*topRange
) {
577 si
->count
= si
->count
- 1;
578 *topRange
= si
->stack
[si
->count
];
581 CF_INLINE
void push (CFStringStackInfo
*si
, const CFStringDeferredRange
*newRange
) {
582 if (si
->count
== si
->capacity
) {
583 // increase size of the stack
584 si
->capacity
= (si
->capacity
+ 4) * 2;
585 if (si
->hasMalloced
) {
586 si
->stack
= CFAllocatorReallocate(NULL
, si
->stack
, si
->capacity
* sizeof(CFStringDeferredRange
), 0);
588 CFStringDeferredRange
*newStack
= (CFStringDeferredRange
*)CFAllocatorAllocate(NULL
, si
->capacity
* sizeof(CFStringDeferredRange
), 0);
589 memmove(newStack
, si
->stack
, si
->count
* sizeof(CFStringDeferredRange
));
590 si
->stack
= newStack
;
591 si
->hasMalloced
= true;
594 si
->stack
[si
->count
] = *newRange
;
595 si
->count
= si
->count
+ 1;
598 static void rearrangeBlocks(
602 const CFRange
*ranges
,
604 CFIndex insertLength
) {
606 #define origStackSize 10
607 CFStringDeferredRange origStack
[origStackSize
];
608 CFStringStackInfo si
= {origStackSize
, 0, origStack
, false, {0, 0, 0}};
609 CFStringDeferredRange currentNonRange
= {0, 0, 0};
610 int currentRange
= 0;
611 int amountShifted
= 0;
613 // must have at least 1 range left.
615 while (currentRange
< numRanges
) {
616 currentNonRange
.beginning
= (ranges
[currentRange
].location
+ ranges
[currentRange
].length
) * blockSize
;
617 if ((numRanges
- currentRange
) == 1) {
619 currentNonRange
.length
= numBlocks
* blockSize
- currentNonRange
.beginning
;
620 if (currentNonRange
.length
== 0) break;
622 currentNonRange
.length
= (ranges
[currentRange
+ 1].location
* blockSize
) - currentNonRange
.beginning
;
624 currentNonRange
.shift
= amountShifted
+ (insertLength
* blockSize
) - (ranges
[currentRange
].length
* blockSize
);
625 amountShifted
= currentNonRange
.shift
;
626 if (amountShifted
<= 0) {
627 // process current item and rest of stack
628 if (currentNonRange
.shift
&& currentNonRange
.length
) memmove (&buffer
[currentNonRange
.beginning
+ currentNonRange
.shift
], &buffer
[currentNonRange
.beginning
], currentNonRange
.length
);
629 while (si
.count
> 0) {
630 pop (&si
, ¤tNonRange
); // currentNonRange now equals the top element of the stack.
631 if (currentNonRange
.shift
&& currentNonRange
.length
) memmove (&buffer
[currentNonRange
.beginning
+ currentNonRange
.shift
], &buffer
[currentNonRange
.beginning
], currentNonRange
.length
);
634 // add currentNonRange to stack.
635 push (&si
, ¤tNonRange
);
640 // no more ranges. if anything is on the stack, process.
642 while (si
.count
> 0) {
643 pop (&si
, ¤tNonRange
); // currentNonRange now equals the top element of the stack.
644 if (currentNonRange
.shift
&& currentNonRange
.length
) memmove (&buffer
[currentNonRange
.beginning
+ currentNonRange
.shift
], &buffer
[currentNonRange
.beginning
], currentNonRange
.length
);
646 if (si
.hasMalloced
) CFAllocatorDeallocate (NULL
, si
.stack
);
649 /* See comments for rearrangeBlocks(); this is the same, but the string is assembled in another buffer (dstBuffer), so the algorithm is much easier. We also take care of the case where the source is not-Unicode but destination is. (The reverse case is not supported.)
651 static void copyBlocks(
652 const uint8_t *srcBuffer
,
655 Boolean srcIsUnicode
,
656 Boolean dstIsUnicode
,
657 const CFRange
*ranges
,
659 CFIndex insertLength
) {
661 CFIndex srcLocationInBytes
= 0; // in order to avoid multiplying all the time, this is in terms of bytes, not blocks
662 CFIndex dstLocationInBytes
= 0; // ditto
663 CFIndex srcBlockSize
= srcIsUnicode
? sizeof(UniChar
) : sizeof(uint8_t);
664 CFIndex insertLengthInBytes
= insertLength
* (dstIsUnicode
? sizeof(UniChar
) : sizeof(uint8_t));
665 CFIndex rangeIndex
= 0;
666 CFIndex srcToDstMultiplier
= (srcIsUnicode
== dstIsUnicode
) ? 1 : (sizeof(UniChar
) / sizeof(uint8_t));
668 // Loop over the ranges, copying the range to be preserved (right before each range)
669 while (rangeIndex
< numRanges
) {
670 CFIndex srcLengthInBytes
= ranges
[rangeIndex
].location
* srcBlockSize
- srcLocationInBytes
; // srcLengthInBytes is in terms of bytes, not blocks; represents length of region to be preserved
671 if (srcLengthInBytes
> 0) {
672 if (srcIsUnicode
== dstIsUnicode
) {
673 memmove(dstBuffer
+ dstLocationInBytes
, srcBuffer
+ srcLocationInBytes
, srcLengthInBytes
);
675 __CFStrConvertBytesToUnicode(srcBuffer
+ srcLocationInBytes
, (UniChar
*)(dstBuffer
+ dstLocationInBytes
), srcLengthInBytes
);
678 srcLocationInBytes
+= srcLengthInBytes
+ ranges
[rangeIndex
].length
* srcBlockSize
; // Skip over the just-copied and to-be-deleted stuff
679 dstLocationInBytes
+= srcLengthInBytes
* srcToDstMultiplier
+ insertLengthInBytes
;
683 // Do last range (the one beyond last range)
684 if (srcLocationInBytes
< srcLength
* srcBlockSize
) {
685 if (srcIsUnicode
== dstIsUnicode
) {
686 memmove(dstBuffer
+ dstLocationInBytes
, srcBuffer
+ srcLocationInBytes
, srcLength
* srcBlockSize
- srcLocationInBytes
);
688 __CFStrConvertBytesToUnicode(srcBuffer
+ srcLocationInBytes
, (UniChar
*)(dstBuffer
+ dstLocationInBytes
), srcLength
* srcBlockSize
- srcLocationInBytes
);
694 /* Reallocates the backing store of the string to accomodate the new length. Space is reserved or characters are deleted as indicated by insertLength and the ranges in deleteRanges. The length is updated to reflect the new state. Will also maintain a length byte and a null byte in 8-bit strings. If length cannot fit in length byte, the space will still be reserved, but will be 0. (Hence the reason the length byte should never be looked at as length unless there is no explicit length.)
696 static void __CFStringChangeSizeMultiple(CFMutableStringRef str
, const CFRange
*deleteRanges
, CFIndex numDeleteRanges
, CFIndex insertLength
, Boolean makeUnicode
) {
697 const uint8_t *curContents
= __CFStrContents(str
);
698 CFIndex curLength
= curContents
? __CFStrLength2(str
, curContents
) : 0;
701 // Compute new length of the string
702 if (numDeleteRanges
== 1) {
703 newLength
= curLength
+ insertLength
- deleteRanges
[0].length
;
706 newLength
= curLength
+ insertLength
* numDeleteRanges
;
707 for (cnt
= 0; cnt
< numDeleteRanges
; cnt
++) newLength
-= deleteRanges
[cnt
].length
;
710 __CFAssertIfFixedLengthIsOK(str
, newLength
);
712 if (newLength
== 0) {
713 // An somewhat optimized code-path for this special case, with the following implicit values:
714 // newIsUnicode = false
715 // useLengthAndNullBytes = false
716 // newCharSize = sizeof(uint8_t)
717 // If the newCapacity happens to be the same as the old, we don't free the buffer; otherwise we just free it totally
718 // instead of doing a potentially useless reallocation (as the needed capacity later might turn out to be different anyway)
719 CFIndex curCapacity
= __CFStrCapacity(str
);
720 CFIndex newCapacity
= __CFStrNewCapacity(str
, 0, curCapacity
, true, sizeof(uint8_t));
721 if (newCapacity
!= curCapacity
) { // If we're reallocing anyway (larger or smaller --- larger could happen if desired capacity was changed in the meantime), let's just free it all
722 if (curContents
) __CFStrDeallocateMutableContents(str
, (uint8_t *)curContents
);
723 __CFStrSetContentPtr(str
, NULL
);
724 __CFStrSetCapacity(str
, 0);
725 __CFStrClearCapacityProvidedExternally(str
);
726 __CFStrClearHasLengthAndNullBytes(str
);
727 if (!__CFStrIsExternalMutable(str
)) __CFStrClearUnicode(str
); // External mutable implies Unicode
729 if (!__CFStrIsExternalMutable(str
)) {
730 __CFStrClearUnicode(str
);
731 if (curCapacity
>= (int)(sizeof(uint8_t) * 2)) { // If there's room
732 __CFStrSetHasLengthAndNullBytes(str
);
733 ((uint8_t *)curContents
)[0] = ((uint8_t *)curContents
)[1] = 0;
735 __CFStrClearHasLengthAndNullBytes(str
);
739 __CFStrSetExplicitLength(str
, 0);
740 } else { /* This else-clause assumes newLength > 0 */
741 Boolean oldIsUnicode
= __CFStrIsUnicode(str
);
742 Boolean newIsUnicode
= makeUnicode
|| (oldIsUnicode
/* && (newLength > 0) - implicit */ ) || __CFStrIsExternalMutable(str
);
743 CFIndex newCharSize
= newIsUnicode
? sizeof(UniChar
) : sizeof(uint8_t);
744 Boolean useLengthAndNullBytes
= !newIsUnicode
/* && (newLength > 0) - implicit */;
745 CFIndex numExtraBytes
= useLengthAndNullBytes
? 2 : 0; /* 2 extra bytes to keep the length byte & null... */
746 CFIndex curCapacity
= __CFStrCapacity(str
);
747 CFIndex newCapacity
= __CFStrNewCapacity(str
, newLength
* newCharSize
+ numExtraBytes
, curCapacity
, true, newCharSize
);
748 Boolean allocNewBuffer
= (newCapacity
!= curCapacity
) || (curLength
> 0 && !oldIsUnicode
&& newIsUnicode
); /* We alloc new buffer if oldIsUnicode != newIsUnicode because the contents have to be copied */
749 uint8_t *newContents
= allocNewBuffer
? __CFStrAllocateMutableContents(str
, newCapacity
) : (uint8_t *)curContents
;
750 Boolean hasLengthAndNullBytes
= __CFStrHasLengthByte(str
);
752 CFAssert1(hasLengthAndNullBytes
== __CFStrHasNullByte(str
), __kCFLogAssertion
, "%s(): Invalid state in 8-bit string", __PRETTY_FUNCTION__
);
754 if (hasLengthAndNullBytes
) curContents
++;
755 if (useLengthAndNullBytes
) newContents
++;
758 if (oldIsUnicode
== newIsUnicode
) {
759 if (newContents
== curContents
) {
760 rearrangeBlocks(newContents
, curLength
, newCharSize
, deleteRanges
, numDeleteRanges
, insertLength
);
762 copyBlocks(curContents
, newContents
, curLength
, oldIsUnicode
, newIsUnicode
, deleteRanges
, numDeleteRanges
, insertLength
);
764 } else if (newIsUnicode
) { /* this implies we have a new buffer */
765 copyBlocks(curContents
, newContents
, curLength
, oldIsUnicode
, newIsUnicode
, deleteRanges
, numDeleteRanges
, insertLength
);
767 if (hasLengthAndNullBytes
) curContents
--; /* Undo the damage from above */
768 if (allocNewBuffer
) __CFStrDeallocateMutableContents(str
, (void *)curContents
);
772 if (useLengthAndNullBytes
) {
773 newContents
[newLength
] = 0; /* Always have null byte, if not unicode */
774 newContents
--; /* Undo the damage from above */
775 newContents
[0] = __CFCanUseLengthByte(newLength
) ? (uint8_t)newLength
: 0;
776 if (!hasLengthAndNullBytes
) __CFStrSetHasLengthAndNullBytes(str
);
778 if (hasLengthAndNullBytes
) __CFStrClearHasLengthAndNullBytes(str
);
780 if (oldIsUnicode
) __CFStrClearUnicode(str
);
781 } else { // New is unicode...
782 if (!oldIsUnicode
) __CFStrSetUnicode(str
);
783 if (hasLengthAndNullBytes
) __CFStrClearHasLengthAndNullBytes(str
);
785 __CFStrSetExplicitLength(str
, newLength
);
787 if (allocNewBuffer
) {
788 __CFStrSetCapacity(str
, newCapacity
);
789 __CFStrClearCapacityProvidedExternally(str
);
790 __CFStrSetContentPtr(str
, newContents
);
795 /* Same as above, but takes one range (very common case)
797 CF_INLINE
void __CFStringChangeSize(CFMutableStringRef str
, CFRange range
, CFIndex insertLength
, Boolean makeUnicode
) {
798 __CFStringChangeSizeMultiple(str
, &range
, 1, insertLength
, makeUnicode
);
802 static void __CFStringDeallocate(CFTypeRef cf
) {
803 CFStringRef str
= cf
;
805 // constantStringAllocatorForDebugging is not around unless DEBUG is defined, but neither is CFAssert2()...
806 CFAssert1(__CFConstantStringTableBeingFreed
|| CFGetAllocator(str
) != constantStringAllocatorForDebugging
, __kCFLogAssertion
, "Tried to deallocate CFSTR(\"%@\")", str
);
808 if (!__CFStrIsInline(str
)) {
810 Boolean
mutable = __CFStrIsMutable(str
);
811 if (__CFStrFreeContentsWhenDone(str
) && (contents
= (uint8_t *)__CFStrContents(str
))) {
813 __CFStrDeallocateMutableContents((CFMutableStringRef
)str
, contents
);
815 if (__CFStrHasContentsDeallocator(str
)) {
816 CFAllocatorRef contentsDeallocator
= __CFStrContentsDeallocator(str
);
817 CFAllocatorDeallocate(contentsDeallocator
, contents
);
818 CFRelease(contentsDeallocator
);
820 CFAllocatorRef alloc
= __CFGetAllocator(str
);
821 CFAllocatorDeallocate(alloc
, contents
);
825 if (mutable && __CFStrHasContentsAllocator(str
)) CFRelease(__CFStrContentsAllocator((CFMutableStringRef
)str
));
829 static Boolean
__CFStringEqual(CFTypeRef cf1
, CFTypeRef cf2
) {
830 CFStringRef str1
= cf1
;
831 CFStringRef str2
= cf2
;
832 const uint8_t *contents1
;
833 const uint8_t *contents2
;
836 /* !!! We do not need IsString assertions, as the CFBase runtime assures this */
837 /* !!! We do not need == test, as the CFBase runtime assures this */
839 contents1
= __CFStrContents(str1
);
840 contents2
= __CFStrContents(str2
);
841 len1
= __CFStrLength2(str1
, contents1
);
843 if (len1
!= __CFStrLength2(str2
, contents2
)) return false;
845 contents1
+= __CFStrSkipAnyLengthByte(str1
);
846 contents2
+= __CFStrSkipAnyLengthByte(str2
);
848 if (__CFStrIsEightBit(str1
) && __CFStrIsEightBit(str2
)) {
849 return memcmp((const char *)contents1
, (const char *)contents2
, len1
) ? false : true;
850 } else if (__CFStrIsEightBit(str1
)) { /* One string has Unicode contents */
851 CFStringInlineBuffer buf
;
854 CFStringInitInlineBuffer(str1
, &buf
, CFRangeMake(0, len1
));
855 for (buf_idx
= 0; buf_idx
< len1
; buf_idx
++) {
856 if (__CFStringGetCharacterFromInlineBufferQuick(&buf
, buf_idx
) != ((UniChar
*)contents2
)[buf_idx
]) return false;
858 } else if (__CFStrIsEightBit(str2
)) { /* One string has Unicode contents */
859 CFStringInlineBuffer buf
;
862 CFStringInitInlineBuffer(str2
, &buf
, CFRangeMake(0, len1
));
863 for (buf_idx
= 0; buf_idx
< len1
; buf_idx
++) {
864 if (__CFStringGetCharacterFromInlineBufferQuick(&buf
, buf_idx
) != ((UniChar
*)contents1
)[buf_idx
]) return false;
866 } else { /* Both strings have Unicode contents */
868 for (idx
= 0; idx
< len1
; idx
++) {
869 if (((UniChar
*)contents1
)[idx
] != ((UniChar
*)contents2
)[idx
]) return false;
876 /* String hashing: Should give the same results whatever the encoding; so we hash UniChars.
877 If the length is less than or equal to 24, then the hash function is simply the
878 following (n is the nth UniChar character, starting from 0):
881 hash(n) = hash(n-1) * 257 + unichar(n);
882 Hash = hash(length-1) * ((length & 31) + 1)
884 If the length is greater than 24, then the above algorithm applies to
885 characters 0..7 and length-16..length-1; thus the first 8 and last 16 characters.
887 Note that the loops below are unrolled; and: 257^2 = 66049; 257^3 = 16974593; 257^4 = 4362470401; 67503105 is 257^4 - 256^4
888 If hashcode is changed from UInt32 to something else, this last piece needs to be readjusted.
890 NOTE: The hash algorithm used to be duplicated in CF and Foundation; but now it should only be in the four functions below.
893 /* In this function, actualLen is the length of the original string; but len is the number of characters in buffer. The buffer is expected to contain the parts of the string relevant to hashing.
895 CF_INLINE CFHashCode
__CFStrHashCharacters(const UniChar
*uContents
, CFIndex len
, CFIndex actualLen
) {
896 CFHashCode result
= actualLen
;
898 const UniChar
*end4
= uContents
+ (len
& ~3);
899 const UniChar
*end
= uContents
+ len
;
900 while (uContents
< end4
) { // First count in fours
901 result
= result
* 67503105 + uContents
[0] * 16974593 + uContents
[1] * 66049 + uContents
[2] * 257 + uContents
[3];
904 while (uContents
< end
) { // Then for the last <4 chars, count in ones...
905 result
= result
* 257 + *uContents
++;
908 result
= result
* 67503105 + uContents
[0] * 16974593 + uContents
[1] * 66049 + uContents
[2] * 257 + uContents
[3];
909 result
= result
* 67503105 + uContents
[4] * 16974593 + uContents
[5] * 66049 + uContents
[6] * 257 + uContents
[7];
910 uContents
+= (len
- 16);
911 result
= result
* 67503105 + uContents
[0] * 16974593 + uContents
[1] * 66049 + uContents
[2] * 257 + uContents
[3];
912 result
= result
* 67503105 + uContents
[4] * 16974593 + uContents
[5] * 66049 + uContents
[6] * 257 + uContents
[7];
913 result
= result
* 67503105 + uContents
[8] * 16974593 + uContents
[9] * 66049 + uContents
[10] * 257 + uContents
[11];
914 result
= result
* 67503105 + uContents
[12] * 16974593 + uContents
[13] * 66049 + uContents
[14] * 257 + uContents
[15];
916 return result
+ (result
<< (actualLen
& 31));
919 /* This hashes cString in the eight bit string encoding. It also includes the little debug-time sanity check.
921 CF_INLINE CFHashCode
__CFStrHashEightBit(const uint8_t *contents
, CFIndex len
) {
923 const uint8_t *origContents
= contents
;
925 CFHashCode result
= len
;
927 const uint8_t *end4
= contents
+ (len
& ~3);
928 const uint8_t *end
= contents
+ len
;
929 while (contents
< end4
) { // First count in fours
930 result
= result
* 67503105 + __CFCharToUniCharTable
[contents
[0]] * 16974593 + __CFCharToUniCharTable
[contents
[1]] * 66049 + __CFCharToUniCharTable
[contents
[2]] * 257 + __CFCharToUniCharTable
[contents
[3]];
933 while (contents
< end
) { // Then for the last <4 chars, count single chars
934 result
= result
* 257 + __CFCharToUniCharTable
[*contents
++];
937 result
= result
* 67503105 + __CFCharToUniCharTable
[contents
[0]] * 16974593 + __CFCharToUniCharTable
[contents
[1]] * 66049 + __CFCharToUniCharTable
[contents
[2]] * 257 + __CFCharToUniCharTable
[contents
[3]];
938 result
= result
* 67503105 + __CFCharToUniCharTable
[contents
[4]] * 16974593 + __CFCharToUniCharTable
[contents
[5]] * 66049 + __CFCharToUniCharTable
[contents
[6]] * 257 + __CFCharToUniCharTable
[contents
[7]];
939 contents
+= (len
- 16);
940 result
= result
* 67503105 + __CFCharToUniCharTable
[contents
[0]] * 16974593 + __CFCharToUniCharTable
[contents
[1]] * 66049 + __CFCharToUniCharTable
[contents
[2]] * 257 + __CFCharToUniCharTable
[contents
[3]];
941 result
= result
* 67503105 + __CFCharToUniCharTable
[contents
[4]] * 16974593 + __CFCharToUniCharTable
[contents
[5]] * 66049 + __CFCharToUniCharTable
[contents
[6]] * 257 + __CFCharToUniCharTable
[contents
[7]];
942 result
= result
* 67503105 + __CFCharToUniCharTable
[contents
[8]] * 16974593 + __CFCharToUniCharTable
[contents
[9]] * 66049 + __CFCharToUniCharTable
[contents
[10]] * 257 + __CFCharToUniCharTable
[contents
[11]];
943 result
= result
* 67503105 + __CFCharToUniCharTable
[contents
[12]] * 16974593 + __CFCharToUniCharTable
[contents
[13]] * 66049 + __CFCharToUniCharTable
[contents
[14]] * 257 + __CFCharToUniCharTable
[contents
[15]];
946 if (!__CFCharToUniCharFunc
) { // A little sanity verification: If this is not set, trying to hash high byte chars would be a bad idea
949 contents
= origContents
;
951 for (cnt
= 0; cnt
< len
; cnt
++) if (contents
[cnt
] >= 128) err
= true;
953 for (cnt
= 0; cnt
< 8; cnt
++) if (contents
[cnt
] >= 128) err
= true;
954 for (cnt
= len
- 16; cnt
< len
; cnt
++) if (contents
[cnt
] >= 128) err
= true;
957 // Can't do log here, as it might be too early
958 fprintf(stderr
, "Warning: CFHash() attempting to hash CFString containing high bytes before properly initialized to do so\n");
962 return result
+ (result
<< (len
& 31));
965 CFHashCode
CFStringHashISOLatin1CString(const uint8_t *bytes
, CFIndex len
) {
966 CFHashCode result
= len
;
968 const uint8_t *end4
= bytes
+ (len
& ~3);
969 const uint8_t *end
= bytes
+ len
;
970 while (bytes
< end4
) { // First count in fours
971 result
= result
* 67503105 + bytes
[0] * 16974593 + bytes
[1] * 66049 + bytes
[2] * 257 + bytes
[3];
974 while (bytes
< end
) { // Then for the last <4 chars, count in ones...
975 result
= result
* 257 + *bytes
++;
978 result
= result
* 67503105 + bytes
[0] * 16974593 + bytes
[1] * 66049 + bytes
[2] * 257 + bytes
[3];
979 result
= result
* 67503105 + bytes
[4] * 16974593 + bytes
[5] * 66049 + bytes
[6] * 257 + bytes
[7];
981 result
= result
* 67503105 + bytes
[0] * 16974593 + bytes
[1] * 66049 + bytes
[2] * 257 + bytes
[3];
982 result
= result
* 67503105 + bytes
[4] * 16974593 + bytes
[5] * 66049 + bytes
[6] * 257 + bytes
[7];
983 result
= result
* 67503105 + bytes
[8] * 16974593 + bytes
[9] * 66049 + bytes
[10] * 257 + bytes
[11];
984 result
= result
* 67503105 + bytes
[12] * 16974593 + bytes
[13] * 66049 + bytes
[14] * 257 + bytes
[15];
986 return result
+ (result
<< (len
& 31));
989 CFHashCode
CFStringHashCString(const uint8_t *bytes
, CFIndex len
) {
990 return __CFStrHashEightBit(bytes
, len
);
993 CFHashCode
CFStringHashCharacters(const UniChar
*characters
, CFIndex len
) {
994 return __CFStrHashCharacters(characters
, len
, len
);
997 /* This is meant to be called from NSString or subclassers only. It is an error for this to be called without the ObjC runtime or an argument which is not an NSString or subclass. It can be called with NSCFString, although that would be inefficient (causing indirection) and won't normally happen anyway, as NSCFString overrides hash.
999 CFHashCode
CFStringHashNSString(CFStringRef str
) {
1001 CFIndex bufLen
; // Number of characters in the buffer for hashing
1002 CFIndex len
; // Actual length of the string
1004 CF_OBJC_CALL0(CFIndex
, len
, str
, "length");
1006 CF_OBJC_VOIDCALL2(str
, "getCharacters:range:", buffer
, CFRangeMake(0, len
));
1009 CF_OBJC_VOIDCALL2(str
, "getCharacters:range:", buffer
, CFRangeMake(0, 8));
1010 CF_OBJC_VOIDCALL2(str
, "getCharacters:range:", buffer
+8, CFRangeMake(len
-16, 16));
1013 return __CFStrHashCharacters(buffer
, bufLen
, len
);
1016 CFHashCode
__CFStringHash(CFTypeRef cf
) {
1017 /* !!! We do not need an IsString assertion here, as this is called by the CFBase runtime only */
1018 CFStringRef str
= cf
;
1019 const uint8_t *contents
= __CFStrContents(str
);
1020 CFIndex len
= __CFStrLength2(str
, contents
);
1022 if (__CFStrIsEightBit(str
)) {
1023 contents
+= __CFStrSkipAnyLengthByte(str
);
1024 return __CFStrHashEightBit(contents
, len
);
1026 return __CFStrHashCharacters((const UniChar
*)contents
, len
, len
);
1031 static CFStringRef
__CFStringCopyDescription(CFTypeRef cf
) {
1032 return CFStringCreateWithFormat(kCFAllocatorDefault
, NULL
, CFSTR("<CFString %p [%p]>{contents = \"%@\"}"), cf
, __CFGetAllocator(cf
), cf
);
1035 static CFStringRef
__CFStringCopyFormattingDescription(CFTypeRef cf
, CFDictionaryRef formatOptions
) {
1036 return CFStringCreateCopy(__CFGetAllocator(cf
), cf
);
1039 static CFTypeID __kCFStringTypeID
= _kCFRuntimeNotATypeID
;
1041 static const CFRuntimeClass __CFStringClass
= {
1045 (void *)CFStringCreateCopy
,
1046 __CFStringDeallocate
,
1049 __CFStringCopyFormattingDescription
,
1050 __CFStringCopyDescription
1053 __private_extern__
void __CFStringInitialize(void) {
1054 __kCFStringTypeID
= _CFRuntimeRegisterClass(&__CFStringClass
);
1057 CFTypeID
CFStringGetTypeID(void) {
1058 return __kCFStringTypeID
;
1062 static Boolean
CFStrIsUnicode(CFStringRef str
) {
1063 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID
, Boolean
, str
, "_encodingCantBeStoredInEightBitCFString");
1064 return __CFStrIsUnicode(str
);
1069 #define ALLOCATORSFREEFUNC ((void *)-1)
1071 /* contentsDeallocator indicates how to free the data if it's noCopy == true:
1072 kCFAllocatorNull: don't free
1073 ALLOCATORSFREEFUNC: free with main allocator's free func (don't pass in the real func ptr here)
1074 NULL: default allocator
1075 otherwise it's the allocator that should be used (it will be explicitly stored)
1076 if noCopy == false, then freeFunc should be ALLOCATORSFREEFUNC
1077 hasLengthByte, hasNullByte: refers to bytes; used only if encoding != Unicode
1078 possiblyExternalFormat indicates that the bytes might have BOM and be swapped
1079 tryToReduceUnicode means that the Unicode should be checked to see if it contains just ASCII (and reduce it if so)
1080 numBytes contains the actual number of bytes in "bytes", including Length byte,
1081 BUT not the NULL byte at the end
1082 bytes should not contain BOM characters
1083 !!! Various flags should be combined to reduce number of arguments, if possible
1085 __private_extern__ CFStringRef
__CFStringCreateImmutableFunnel3(
1086 CFAllocatorRef alloc
, const void *bytes
, CFIndex numBytes
, CFStringEncoding encoding
,
1087 Boolean possiblyExternalFormat
, Boolean tryToReduceUnicode
, Boolean hasLengthByte
, Boolean hasNullByte
, Boolean noCopy
,
1088 CFAllocatorRef contentsDeallocator
, UInt32 converterFlags
) {
1090 CFMutableStringRef str
;
1091 CFVarWidthCharBuffer vBuf
;
1093 Boolean useLengthByte
= false;
1094 Boolean useNullByte
= false;
1095 Boolean useInlineData
= false;
1097 if (alloc
== NULL
) alloc
= __CFGetDefaultAllocator();
1099 if (contentsDeallocator
== ALLOCATORSFREEFUNC
) {
1100 contentsDeallocator
= alloc
;
1101 } else if (contentsDeallocator
== NULL
) {
1102 contentsDeallocator
= __CFGetDefaultAllocator();
1105 if ((NULL
!= kCFEmptyString
) && (numBytes
== 0) && (alloc
== kCFAllocatorSystemDefault
)) { // If we are using the system default allocator, and the string is empty, then use the empty string!
1106 if (noCopy
&& (contentsDeallocator
!= kCFAllocatorNull
)) { // See 2365208... This change was done after Sonata; before we didn't free the bytes at all (leak).
1107 CFAllocatorDeallocate(contentsDeallocator
, (void *)bytes
);
1109 return CFRetain(kCFEmptyString
); // Quick exit; won't catch all empty strings, but most
1112 // At this point, contentsDeallocator is either same as alloc, or kCFAllocatorNull, or something else, but not NULL
1114 vBuf
.shouldFreeChars
= false; // We use this to remember to free the buffer possibly allocated by decode
1116 // First check to see if the data needs to be converted...
1117 // ??? We could be more efficient here and in some cases (Unicode data) eliminate a copy
1119 if ((encoding
== kCFStringEncodingUnicode
&& possiblyExternalFormat
) || (encoding
!= kCFStringEncodingUnicode
&& !__CFCanUseEightBitCFStringForBytes(bytes
, numBytes
, encoding
))) {
1120 const void *realBytes
= (uint8_t*) bytes
+ (hasLengthByte
? 1 : 0);
1121 CFIndex realNumBytes
= numBytes
- (hasLengthByte
? 1 : 0);
1122 Boolean usingPassedInMemory
= false;
1124 vBuf
.allocator
= __CFGetDefaultAllocator(); // We don't want to use client's allocator for temp stuff
1125 vBuf
.chars
.unicode
= NULL
; // This will cause the decode function to allocate memory if necessary
1127 if (!__CFStringDecodeByteStream3(realBytes
, realNumBytes
, encoding
, false, &vBuf
, &usingPassedInMemory
, converterFlags
)) {
1128 return NULL
; // !!! Is this acceptable failure mode?
1131 encoding
= vBuf
.isASCII
? kCFStringEncodingASCII
: kCFStringEncodingUnicode
;
1133 if (!usingPassedInMemory
) {
1135 // Make the parameters fit the new situation
1136 numBytes
= vBuf
.isASCII
? vBuf
.numChars
: (vBuf
.numChars
* sizeof(UniChar
));
1137 hasLengthByte
= hasNullByte
= false;
1139 // Get rid of the original buffer if its not being used
1140 if (noCopy
&& contentsDeallocator
!= kCFAllocatorNull
) {
1141 CFAllocatorDeallocate(contentsDeallocator
, (void *)bytes
);
1143 contentsDeallocator
= alloc
; // At this point we are using the string's allocator, as the original buffer is gone...
1145 // See if we can reuse any storage the decode func might have allocated
1146 // We do this only for Unicode, as otherwise we would not have NULL and Length bytes
1148 if (vBuf
.shouldFreeChars
&& (alloc
== vBuf
.allocator
) && encoding
== kCFStringEncodingUnicode
) {
1149 vBuf
.shouldFreeChars
= false; // Transferring ownership to the CFString
1150 bytes
= CFAllocatorReallocate(vBuf
.allocator
, (void *)vBuf
.chars
.unicode
, numBytes
, 0); // Tighten up the storage
1153 bytes
= vBuf
.chars
.unicode
;
1154 noCopy
= false; // Can't do noCopy anymore
1155 // If vBuf.shouldFreeChars is true, the buffer will be freed as intended near the end of this func
1160 // At this point, all necessary input arguments have been changed to reflect the new state
1162 } else if (encoding
== kCFStringEncodingUnicode
&& tryToReduceUnicode
) { // Check to see if we can reduce Unicode to ASCII
1164 CFIndex len
= numBytes
/ sizeof(UniChar
);
1165 Boolean allASCII
= true;
1167 for (cnt
= 0; cnt
< len
; cnt
++) if (((const UniChar
*)bytes
)[cnt
] > 127) {
1172 if (allASCII
) { // Yes we can!
1174 hasLengthByte
= __CFCanUseLengthByte(len
);
1176 numBytes
= (len
+ 1 + (hasLengthByte
? 1 : 0)) * sizeof(uint8_t); // NULL and possible length byte
1177 // See if we can use that temporary local buffer in vBuf...
1178 if (numBytes
>= __kCFVarWidthLocalBufferSize
) {
1179 mem
= ptr
= (uint8_t *)CFAllocatorAllocate(alloc
, numBytes
, 0);
1180 if (__CFOASafe
) __CFSetLastAllocationEventName(mem
, "CFString (store)");
1182 mem
= ptr
= (uint8_t *)(vBuf
.localBuffer
);
1184 // Copy the Unicode bytes into the new ASCII buffer
1185 if (hasLengthByte
) *ptr
++ = len
;
1186 for (cnt
= 0; cnt
< len
; cnt
++) ptr
[cnt
] = ((const UniChar
*)bytes
)[cnt
];
1188 if (noCopy
&& contentsDeallocator
!= kCFAllocatorNull
) {
1189 CFAllocatorDeallocate(contentsDeallocator
, (void *)bytes
);
1191 // Now make everything look like we had an ASCII buffer to start with
1193 encoding
= kCFStringEncodingASCII
;
1194 contentsDeallocator
= alloc
; // At this point we are using the string's allocator, as the original buffer is gone...
1195 noCopy
= (numBytes
>= __kCFVarWidthLocalBufferSize
); // If we had to allocate it, make sure it's kept around
1196 numBytes
--; // Should not contain the NULL byte at end...
1199 // At this point, all necessary input arguments have been changed to reflect the new state
1202 // Now determine the necessary size
1206 size
= sizeof(void *); // Pointer to the buffer
1207 if (contentsDeallocator
!= alloc
&& contentsDeallocator
!= kCFAllocatorNull
) {
1208 size
+= sizeof(void *); // The contentsDeallocator
1210 if (!hasLengthByte
) size
+= sizeof(SInt32
); // Explicit length
1211 useLengthByte
= hasLengthByte
;
1212 useNullByte
= hasNullByte
;
1214 } else { // Inline data; reserve space for it
1216 useInlineData
= true;
1219 if (hasLengthByte
|| (encoding
!= kCFStringEncodingUnicode
&& __CFCanUseLengthByte(numBytes
))) {
1220 useLengthByte
= true;
1221 if (!hasLengthByte
) size
+= 1;
1223 size
+= sizeof(SInt32
); // Explicit length
1225 if (hasNullByte
|| encoding
!= kCFStringEncodingUnicode
) {
1231 #ifdef STRING_SIZE_STATS
1232 // Dump alloced CFString size info every so often
1234 static unsigned sizes
[256] = {0};
1235 int allocedSize
= size
+ sizeof(CFRuntimeBase
);
1236 if (allocedSize
< 255) sizes
[allocedSize
]++; else sizes
[255]++;
1237 if ((++cnt
% 1000) == 0) {
1238 printf ("\nTotal: %d\n", cnt
);
1239 int i
; for (i
= 0; i
< 256; i
++) printf("%03d: %5d%s", i
, sizes
[i
], ((i
% 8) == 7) ? "\n" : " ");
1243 // Finally, allocate!
1245 str
= (CFMutableStringRef
)_CFRuntimeCreateInstance(alloc
, __kCFStringTypeID
, size
, NULL
);
1247 if (__CFOASafe
) __CFSetLastAllocationEventName(str
, "CFString (immutable)");
1249 __CFStrSetInfoBits(str
,
1250 (useInlineData
? __kCFHasInlineContents
: (contentsDeallocator
== alloc
? __kCFNotInlineContentsDefaultFree
: (contentsDeallocator
== kCFAllocatorNull
? __kCFNotInlineContentsNoFree
: __kCFNotInlineContentsCustomFree
))) |
1251 ((encoding
== kCFStringEncodingUnicode
) ? __kCFIsUnicode
: 0) |
1252 (useNullByte
? __kCFHasNullByte
: 0) |
1253 (useLengthByte
? __kCFHasLengthByte
: 0));
1255 if (!useLengthByte
) {
1256 CFIndex length
= numBytes
- (hasLengthByte
? 1 : 0);
1257 if (encoding
== kCFStringEncodingUnicode
) length
/= sizeof(UniChar
);
1258 __CFStrSetExplicitLength(str
, length
);
1261 if (useInlineData
) {
1262 uint8_t *contents
= (uint8_t *)__CFStrContents(str
);
1263 if (useLengthByte
&& !hasLengthByte
) *contents
++ = numBytes
;
1264 memmove(contents
, bytes
, numBytes
);
1265 if (useNullByte
) contents
[numBytes
] = 0;
1267 __CFStrSetContentPtr(str
, bytes
);
1268 if (contentsDeallocator
!= alloc
&& contentsDeallocator
!= kCFAllocatorNull
) __CFStrSetContentsDeallocator(str
, CFRetain(contentsDeallocator
));
1271 if (contentsDeallocator
!= kCFAllocatorNull
) CFAllocatorDeallocate(contentsDeallocator
, (void *)bytes
);
1273 if (vBuf
.shouldFreeChars
) CFAllocatorDeallocate(vBuf
.allocator
, (void *)bytes
);
1278 /* !!! __CFStringCreateImmutableFunnel2() is kept around for compatibility; it should be deprecated
1280 CFStringRef
__CFStringCreateImmutableFunnel2(
1281 CFAllocatorRef alloc
, const void *bytes
, CFIndex numBytes
, CFStringEncoding encoding
,
1282 Boolean possiblyExternalFormat
, Boolean tryToReduceUnicode
, Boolean hasLengthByte
, Boolean hasNullByte
, Boolean noCopy
,
1283 CFAllocatorRef contentsDeallocator
) {
1284 return __CFStringCreateImmutableFunnel3(alloc
, bytes
, numBytes
, encoding
, possiblyExternalFormat
, tryToReduceUnicode
, hasLengthByte
, hasNullByte
, noCopy
, contentsDeallocator
, 0);
1289 CFStringRef
CFStringCreateWithPascalString(CFAllocatorRef alloc
, ConstStringPtr pStr
, CFStringEncoding encoding
) {
1290 CFIndex len
= (CFIndex
)(*(uint8_t *)pStr
);
1291 return __CFStringCreateImmutableFunnel3(alloc
, pStr
, len
+1, encoding
, false, false, true, false, false, ALLOCATORSFREEFUNC
, 0);
1295 CFStringRef
CFStringCreateWithCString(CFAllocatorRef alloc
, const char *cStr
, CFStringEncoding encoding
) {
1296 CFIndex len
= strlen(cStr
);
1297 return __CFStringCreateImmutableFunnel3(alloc
, cStr
, len
, encoding
, false, false, false, true, false, ALLOCATORSFREEFUNC
, 0);
1300 CFStringRef
CFStringCreateWithPascalStringNoCopy(CFAllocatorRef alloc
, ConstStringPtr pStr
, CFStringEncoding encoding
, CFAllocatorRef contentsDeallocator
) {
1301 CFIndex len
= (CFIndex
)(*(uint8_t *)pStr
);
1302 return __CFStringCreateImmutableFunnel3(alloc
, pStr
, len
+1, encoding
, false, false, true, false, true, contentsDeallocator
, 0);
1306 CFStringRef
CFStringCreateWithCStringNoCopy(CFAllocatorRef alloc
, const char *cStr
, CFStringEncoding encoding
, CFAllocatorRef contentsDeallocator
) {
1307 CFIndex len
= strlen(cStr
);
1308 return __CFStringCreateImmutableFunnel3(alloc
, cStr
, len
, encoding
, false, false, false, true, true, contentsDeallocator
, 0);
1312 CFStringRef
CFStringCreateWithCharacters(CFAllocatorRef alloc
, const UniChar
*chars
, CFIndex numChars
) {
1313 return __CFStringCreateImmutableFunnel3(alloc
, chars
, numChars
* sizeof(UniChar
), kCFStringEncodingUnicode
, false, true, false, false, false, ALLOCATORSFREEFUNC
, 0);
1317 CFStringRef
CFStringCreateWithCharactersNoCopy(CFAllocatorRef alloc
, const UniChar
*chars
, CFIndex numChars
, CFAllocatorRef contentsDeallocator
) {
1318 return __CFStringCreateImmutableFunnel3(alloc
, chars
, numChars
* sizeof(UniChar
), kCFStringEncodingUnicode
, false, false, false, false, true, contentsDeallocator
, 0);
1322 CFStringRef
CFStringCreateWithBytes(CFAllocatorRef alloc
, const uint8_t *bytes
, CFIndex numBytes
, CFStringEncoding encoding
, Boolean externalFormat
) {
1323 return __CFStringCreateImmutableFunnel3(alloc
, bytes
, numBytes
, encoding
, externalFormat
, true, false, false, false, ALLOCATORSFREEFUNC
, 0);
1326 CFStringRef
_CFStringCreateWithBytesNoCopy(CFAllocatorRef alloc
, const uint8_t *bytes
, CFIndex numBytes
, CFStringEncoding encoding
, Boolean externalFormat
, CFAllocatorRef contentsDeallocator
) {
1327 return __CFStringCreateImmutableFunnel3(alloc
, bytes
, numBytes
, encoding
, externalFormat
, true, false, false, true, contentsDeallocator
, 0);
1330 CFStringRef
CFStringCreateWithBytesNoCopy(CFAllocatorRef alloc
, const uint8_t *bytes
, CFIndex numBytes
, CFStringEncoding encoding
, Boolean externalFormat
, CFAllocatorRef contentsDeallocator
) {
1331 return _CFStringCreateWithBytesNoCopy(alloc
, bytes
, numBytes
, encoding
, externalFormat
, contentsDeallocator
);
1334 CFStringRef
CFStringCreateWithFormatAndArguments(CFAllocatorRef alloc
, CFDictionaryRef formatOptions
, CFStringRef format
, va_list arguments
) {
1335 return _CFStringCreateWithFormatAndArgumentsAux(alloc
, NULL
, formatOptions
, format
, arguments
);
1338 CFStringRef
_CFStringCreateWithFormatAndArgumentsAux(CFAllocatorRef alloc
, CFStringRef (*copyDescFunc
)(void *, CFDictionaryRef
), CFDictionaryRef formatOptions
, CFStringRef format
, va_list arguments
) {
1340 CFMutableStringRef outputString
= CFStringCreateMutable(__CFGetDefaultAllocator(), 0); //should use alloc if no copy/release
1341 __CFStrSetDesiredCapacity(outputString
, 120); // Given this will be tightened later, choosing a larger working string is fine
1342 _CFStringAppendFormatAndArgumentsAux(outputString
, copyDescFunc
, formatOptions
, format
, arguments
);
1343 // ??? copy/release should not be necessary here -- just make immutable, compress if possible
1344 // (However, this does make the string inline, and cause the supplied allocator to be used...)
1345 str
= CFStringCreateCopy(alloc
, outputString
);
1346 CFRelease(outputString
);
1350 CFStringRef
CFStringCreateWithFormat(CFAllocatorRef alloc
, CFDictionaryRef formatOptions
, CFStringRef format
, ...) {
1354 va_start(argList
, format
);
1355 result
= CFStringCreateWithFormatAndArguments(alloc
, formatOptions
, format
, argList
);
1361 CFStringRef
CFStringCreateWithSubstring(CFAllocatorRef alloc
, CFStringRef str
, CFRange range
) {
1362 if (CF_IS_OBJC(__kCFStringTypeID
, str
)) {
1363 static SEL s
= NULL
;
1364 CFStringRef (*func
)(void *, SEL
, ...) = (void *)__CFSendObjCMsg
;
1365 if (!s
) s
= sel_registerName("_createSubstringWithRange:");
1366 CFStringRef result
= func((void *)str
, s
, CFRangeMake(range
.location
, range
.length
));
1367 if (result
&& CF_USING_COLLECTABLE_MEMORY
) CFRetain(result
); // needs hard retain.
1370 // CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, CFStringRef , str, "_createSubstringWithRange:", CFRangeMake(range.location, range.length));
1372 __CFAssertIsString(str
);
1373 __CFAssertRangeIsInStringBounds(str
, range
.location
, range
.length
);
1375 if ((range
.location
== 0) && (range
.length
== __CFStrLength(str
))) { /* The substring is the whole string... */
1376 return CFStringCreateCopy(alloc
, str
);
1377 } else if (__CFStrIsEightBit(str
)) {
1378 const uint8_t *contents
= __CFStrContents(str
);
1379 return __CFStringCreateImmutableFunnel3(alloc
, contents
+ range
.location
+ __CFStrSkipAnyLengthByte(str
), range
.length
, __CFStringGetEightBitStringEncoding(), false, false, false, false, false, ALLOCATORSFREEFUNC
, 0);
1381 const UniChar
*contents
= __CFStrContents(str
);
1382 return __CFStringCreateImmutableFunnel3(alloc
, contents
+ range
.location
, range
.length
* sizeof(UniChar
), kCFStringEncodingUnicode
, false, true, false, false, false, ALLOCATORSFREEFUNC
, 0);
1386 CFStringRef
CFStringCreateCopy(CFAllocatorRef alloc
, CFStringRef str
) {
1387 if (CF_IS_OBJC(__kCFStringTypeID
, str
)) {
1388 static SEL s
= NULL
;
1389 CFStringRef (*func
)(void *, SEL
, ...) = (void *)__CFSendObjCMsg
;
1390 if (!s
) s
= sel_registerName("copy");
1391 CFStringRef result
= func((void *)str
, s
);
1392 if (result
&& CF_USING_COLLECTABLE_MEMORY
) CFRetain(result
); // needs hard retain.
1395 // CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, CFStringRef, str, "copy");
1397 __CFAssertIsString(str
);
1398 if (!__CFStrIsMutable(str
) && // If the string is not mutable
1399 ((alloc
? alloc
: __CFGetDefaultAllocator()) == __CFGetAllocator(str
)) && // and it has the same allocator as the one we're using
1400 (__CFStrIsInline(str
) || __CFStrFreeContentsWhenDone(str
) || __CFStrIsConstant(str
))) { // and the characters are inline, or are owned by the string, or the string is constant
1401 CFRetain(str
); // Then just retain instead of making a true copy
1404 if (__CFStrIsEightBit(str
)) {
1405 const uint8_t *contents
= __CFStrContents(str
);
1406 return __CFStringCreateImmutableFunnel3(alloc
, contents
+ __CFStrSkipAnyLengthByte(str
), __CFStrLength2(str
, contents
), __CFStringGetEightBitStringEncoding(), false, false, false, false, false, ALLOCATORSFREEFUNC
, 0);
1408 const UniChar
*contents
= __CFStrContents(str
);
1409 return __CFStringCreateImmutableFunnel3(alloc
, contents
, __CFStrLength2(str
, contents
) * sizeof(UniChar
), kCFStringEncodingUnicode
, false, true, false, false, false, ALLOCATORSFREEFUNC
, 0);
1415 /*** Constant string stuff... ***/
1417 static CFMutableDictionaryRef constantStringTable
= NULL
;
1419 /* For now we call a function to create a constant string and keep previously created constant strings in a dictionary. The keys are the 8-bit constant C-strings from the compiler; the values are the CFStrings created for them.
1422 static CFStringRef
__cStrCopyDescription(const void *ptr
) {
1423 return CFStringCreateWithCStringNoCopy(NULL
, (const char *)ptr
, __CFStringGetEightBitStringEncoding(), kCFAllocatorNull
);
1426 static Boolean
__cStrEqual(const void *ptr1
, const void *ptr2
) {
1427 return (strcmp((const char *)ptr1
, (const char *)ptr2
) == 0);
1430 static CFHashCode
__cStrHash(const void *ptr
) {
1431 // It doesn't quite matter if we convert to Unicode correctly, as long as we do it consistently
1432 const unsigned char *cStr
= (const unsigned char *)ptr
;
1433 CFIndex len
= strlen(cStr
);
1434 CFHashCode result
= 0;
1435 if (len
<= 4) { // All chars
1437 while (cnt
--) result
+= (result
<< 8) + *cStr
++;
1438 } else { // First and last 2 chars
1439 result
+= (result
<< 8) + cStr
[0];
1440 result
+= (result
<< 8) + cStr
[1];
1441 result
+= (result
<< 8) + cStr
[len
-2];
1442 result
+= (result
<< 8) + cStr
[len
-1];
1444 result
+= (result
<< (len
& 31));
1449 /* We use a special allocator (which simply calls through to the default) for constant strings so that we can catch them being freed...
1451 static void *csRealloc(void *oPtr
, CFIndex size
, CFOptionFlags hint
, void *info
) {
1452 return CFAllocatorReallocate(NULL
, oPtr
, size
, hint
);
1455 static void *csAlloc(CFIndex size
, CFOptionFlags hint
, void *info
) {
1456 return CFAllocatorAllocate(NULL
, size
, hint
);
1459 static void csDealloc(void *ptr
, void *info
) {
1460 CFAllocatorDeallocate(NULL
, ptr
);
1463 static CFStringRef
csCopyDescription(const void *info
) {
1464 return CFRetain(CFSTR("Debug allocator for CFSTRs"));
1468 static CFSpinLock_t _CFSTRLock
= 0;
1470 CFStringRef
__CFStringMakeConstantString(const char *cStr
) {
1473 //StringTest checks that we share kCFEmptyString, which is defeated by constantStringAllocatorForDebugging
1474 if ('\0' == *cStr
) return kCFEmptyString
;
1476 if (constantStringTable
== NULL
) {
1477 CFDictionaryKeyCallBacks constantStringCallBacks
= {0, NULL
, NULL
, __cStrCopyDescription
, __cStrEqual
, __cStrHash
};
1478 CFMutableDictionaryRef table
= CFDictionaryCreateMutable(NULL
, 0, &constantStringCallBacks
, &kCFTypeDictionaryValueCallBacks
);
1479 _CFDictionarySetCapacity(table
, 2500); // avoid lots of rehashing
1480 __CFSpinLock(&_CFSTRLock
);
1481 if (constantStringTable
== NULL
) constantStringTable
= table
;
1482 __CFSpinUnlock(&_CFSTRLock
);
1483 if (constantStringTable
!= table
) CFRelease(table
);
1486 CFAllocatorContext context
= {0, NULL
, NULL
, NULL
, csCopyDescription
, csAlloc
, csRealloc
, csDealloc
, NULL
};
1487 constantStringAllocatorForDebugging
= _CFAllocatorCreateGC(NULL
, &context
);
1490 #define constantStringAllocatorForDebugging NULL
1494 __CFSpinLock(&_CFSTRLock
);
1495 if ((result
= (CFStringRef
)CFDictionaryGetValue(constantStringTable
, cStr
))) {
1496 __CFSpinUnlock(&_CFSTRLock
);
1498 __CFSpinUnlock(&_CFSTRLock
);
1502 Boolean isASCII
= true;
1503 // Given this code path is rarer these days, OK to do this extra work to verify the strings
1504 const unsigned char *tmp
= cStr
;
1512 CFMutableStringRef ms
= CFStringCreateMutable(NULL
, 0);
1515 CFStringAppendFormat(ms
, NULL
, (*tmp
> 127) ? CFSTR("\\%3o") : CFSTR("%1c"), *tmp
);
1518 CFLog(0, CFSTR("WARNING: CFSTR(\"%@\") has non-7 bit chars, interpreting using MacOS Roman encoding for now, but this will change. Please eliminate usages of non-7 bit chars (including escaped characters above \\177 octal) in CFSTR()."), ms
);
1521 // Treat non-7 bit chars in CFSTR() as MacOSRoman, for compatibility
1522 result
= CFStringCreateWithCString(constantStringAllocatorForDebugging
, cStr
, kCFStringEncodingMacRoman
);
1523 if (result
== NULL
) {
1524 CFLog(__kCFLogAssertion
, CFSTR("Can't interpret CFSTR() as MacOS Roman, crashing"));
1527 if (__CFOASafe
) __CFSetLastAllocationEventName((void *)result
, "CFString (CFSTR)");
1528 if (__CFStrIsEightBit(result
)) {
1529 key
= (char *)__CFStrContents(result
) + __CFStrSkipAnyLengthByte(result
);
1530 } else { // For some reason the string is not 8-bit!
1531 key
= CFAllocatorAllocate(NULL
, strlen(cStr
) + 1, 0);
1532 if (__CFOASafe
) __CFSetLastAllocationEventName((void *)key
, "CFString (CFSTR key)");
1533 strcpy(key
, cStr
); // !!! We will leak this, if the string is removed from the table (or table is freed)
1538 CFStringRef resultToBeReleased
= result
;
1541 __CFSpinLock(&_CFSTRLock
);
1542 count
= CFDictionaryGetCount(constantStringTable
);
1543 CFDictionaryAddValue(constantStringTable
, key
, result
);
1544 if (CFDictionaryGetCount(constantStringTable
) == count
) { // add did nothing, someone already put it there
1545 result
= (CFStringRef
)CFDictionaryGetValue(constantStringTable
, key
);
1547 __CFSpinUnlock(&_CFSTRLock
);
1549 // Can't release this in the DEBUG case; will get assertion failure
1550 CFRelease(resultToBeReleased
);
1558 #if defined(__MACOS8__) || defined(__WIN32__)
1560 void __CFStringCleanup (void) {
1561 /* in case library is unloaded, release store for the constant string table */
1562 if (constantStringTable
!= NULL
) {
1564 __CFConstantStringTableBeingFreed
= true;
1565 CFRelease(constantStringTable
);
1566 __CFConstantStringTableBeingFreed
= false;
1568 CFRelease(constantStringTable
);
1572 CFAllocatorDeallocate( constantStringAllocatorForDebugging
, (void*) constantStringAllocatorForDebugging
);
1579 // Can pass in NSString as replacement string
1580 // Call with numRanges > 0, and incrementing ranges
1582 static void __CFStringReplaceMultiple(CFMutableStringRef str
, CFRange
*ranges
, CFIndex numRanges
, CFStringRef replacement
) {
1584 CFStringRef copy
= NULL
;
1585 if (replacement
== str
) copy
= replacement
= CFStringCreateCopy(NULL
, replacement
); // Very special and hopefully rare case
1586 CFIndex replacementLength
= CFStringGetLength(replacement
);
1588 __CFStringChangeSizeMultiple(str
, ranges
, numRanges
, replacementLength
, (replacementLength
> 0) && CFStrIsUnicode(replacement
));
1590 if (__CFStrIsUnicode(str
)) {
1591 UniChar
*contents
= (UniChar
*)__CFStrContents(str
);
1592 UniChar
*firstReplacement
= contents
+ ranges
[0].location
;
1593 // Extract the replacementString into the first location, then copy from there
1594 CFStringGetCharacters(replacement
, CFRangeMake(0, replacementLength
), firstReplacement
);
1595 for (cnt
= 1; cnt
< numRanges
; cnt
++) {
1596 // The ranges are in terms of the original string; so offset by the change in length due to insertion
1597 contents
+= replacementLength
- ranges
[cnt
- 1].length
;
1598 memmove(contents
+ ranges
[cnt
].location
, firstReplacement
, replacementLength
* sizeof(UniChar
));
1601 uint8_t *contents
= (uint8_t *)__CFStrContents(str
);
1602 uint8_t *firstReplacement
= contents
+ ranges
[0].location
+ __CFStrSkipAnyLengthByte(str
);
1603 // Extract the replacementString into the first location, then copy from there
1604 CFStringGetBytes(replacement
, CFRangeMake(0, replacementLength
), __CFStringGetEightBitStringEncoding(), 0, false, firstReplacement
, replacementLength
, NULL
);
1605 contents
+= __CFStrSkipAnyLengthByte(str
); // Now contents will simply track the location to insert next string into
1606 for (cnt
= 1; cnt
< numRanges
; cnt
++) {
1607 // The ranges are in terms of the original string; so offset by the change in length due to insertion
1608 contents
+= replacementLength
- ranges
[cnt
- 1].length
;
1609 memmove(contents
+ ranges
[cnt
].location
, firstReplacement
, replacementLength
);
1612 if (copy
) CFRelease(copy
);
1615 // Can pass in NSString as replacement string
1617 CF_INLINE
void __CFStringReplace(CFMutableStringRef str
, CFRange range
, CFStringRef replacement
) {
1618 CFStringRef copy
= NULL
;
1619 if (replacement
== str
) copy
= replacement
= CFStringCreateCopy(NULL
, replacement
); // Very special and hopefully rare case
1620 CFIndex replacementLength
= CFStringGetLength(replacement
);
1622 __CFStringChangeSize(str
, range
, replacementLength
, (replacementLength
> 0) && CFStrIsUnicode(replacement
));
1624 if (__CFStrIsUnicode(str
)) {
1625 UniChar
*contents
= (UniChar
*)__CFStrContents(str
);
1626 CFStringGetCharacters(replacement
, CFRangeMake(0, replacementLength
), contents
+ range
.location
);
1628 uint8_t *contents
= (uint8_t *)__CFStrContents(str
);
1629 CFStringGetBytes(replacement
, CFRangeMake(0, replacementLength
), __CFStringGetEightBitStringEncoding(), 0, false, contents
+ range
.location
+ __CFStrSkipAnyLengthByte(str
), replacementLength
, NULL
);
1632 if (copy
) CFRelease(copy
);
1635 /* If client does not provide a minimum capacity
1637 #define DEFAULTMINCAPACITY 32
1639 CF_INLINE CFMutableStringRef
__CFStringCreateMutableFunnel(CFAllocatorRef alloc
, CFIndex maxLength
, UInt32 additionalInfoBits
) {
1640 CFMutableStringRef str
;
1641 Boolean hasExternalContentsAllocator
= (additionalInfoBits
& __kCFHasContentsAllocator
) ? true : false;
1643 if (alloc
== NULL
) alloc
= __CFGetDefaultAllocator();
1645 // Note that if there is an externalContentsAllocator, then we also have the storage for the string allocator...
1646 str
= (CFMutableStringRef
)_CFRuntimeCreateInstance(alloc
, __kCFStringTypeID
, sizeof(void *) + sizeof(UInt32
) * 3 + (hasExternalContentsAllocator
? sizeof(CFAllocatorRef
) : 0), NULL
);
1648 if (__CFOASafe
) __CFSetLastAllocationEventName(str
, "CFString (mutable)");
1650 __CFStrSetInfoBits(str
, __kCFIsMutable
| additionalInfoBits
);
1651 str
->variants
.notInlineMutable
.buffer
= NULL
;
1652 __CFStrSetExplicitLength(str
, 0);
1653 str
->variants
.notInlineMutable
.gapEtc
= 0;
1654 if (maxLength
!= 0) __CFStrSetIsFixed(str
);
1655 __CFStrSetDesiredCapacity(str
, (maxLength
== 0) ? DEFAULTMINCAPACITY
: maxLength
);
1656 __CFStrSetCapacity(str
, 0);
1661 CFMutableStringRef
CFStringCreateMutableWithExternalCharactersNoCopy(CFAllocatorRef alloc
, UniChar
*chars
, CFIndex numChars
, CFIndex capacity
, CFAllocatorRef externalCharactersAllocator
) {
1662 CFOptionFlags contentsAllocationBits
= externalCharactersAllocator
? ((externalCharactersAllocator
== kCFAllocatorNull
) ? __kCFNotInlineContentsNoFree
: __kCFHasContentsAllocator
) : __kCFNotInlineContentsDefaultFree
;
1663 CFMutableStringRef string
= __CFStringCreateMutableFunnel(alloc
, 0, contentsAllocationBits
| __kCFIsUnicode
);
1665 __CFStrSetIsExternalMutable(string
);
1666 if (contentsAllocationBits
== __kCFHasContentsAllocator
) __CFStrSetContentsAllocator(string
, CFRetain(externalCharactersAllocator
));
1667 CFStringSetExternalCharactersNoCopy(string
, chars
, numChars
, capacity
);
1672 CFMutableStringRef
CFStringCreateMutable(CFAllocatorRef alloc
, CFIndex maxLength
) {
1673 return __CFStringCreateMutableFunnel(alloc
, maxLength
, __kCFNotInlineContentsDefaultFree
);
1676 CFMutableStringRef
CFStringCreateMutableCopy(CFAllocatorRef alloc
, CFIndex maxLength
, CFStringRef string
) {
1677 CFMutableStringRef newString
;
1679 if (CF_IS_OBJC(__kCFStringTypeID
, string
)) {
1680 static SEL s
= NULL
;
1681 CFMutableStringRef (*func
)(void *, SEL
, ...) = (void *)__CFSendObjCMsg
;
1682 if (!s
) s
= sel_registerName("mutableCopy");
1683 newString
= func((void *)string
, s
);
1684 if (CF_USING_COLLECTABLE_MEMORY
) auto_zone_retain(__CFCollectableZone
, newString
); // needs hard retain IF using GC
1687 // CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, CFMutableStringRef, string, "mutableCopy");
1689 __CFAssertIsString(string
);
1691 newString
= CFStringCreateMutable(alloc
, maxLength
);
1692 __CFStringReplace(newString
, CFRangeMake(0, 0), string
);
1698 __private_extern__
void _CFStrSetDesiredCapacity(CFMutableStringRef str
, CFIndex len
) {
1699 __CFAssertIsStringAndMutable(str
);
1700 __CFStrSetDesiredCapacity(str
, len
);
1704 /* This one is for CF
1706 CFIndex
CFStringGetLength(CFStringRef str
) {
1707 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID
, CFIndex
, str
, "length");
1709 __CFAssertIsString(str
);
1710 return __CFStrLength(str
);
1713 /* This one is for NSCFString; it does not ObjC dispatch or assertion check
1715 CFIndex
_CFStringGetLength2(CFStringRef str
) {
1716 return __CFStrLength(str
);
1720 /* Guts of CFStringGetCharacterAtIndex(); called from the two functions below. Don't call it from elsewhere.
1722 CF_INLINE UniChar
__CFStringGetCharacterAtIndexGuts(CFStringRef str
, CFIndex idx
, const uint8_t *contents
) {
1723 if (__CFStrIsEightBit(str
)) {
1724 contents
+= __CFStrSkipAnyLengthByte(str
);
1726 if (!__CFCharToUniCharFunc
&& (contents
[idx
] >= 128)) {
1727 // Can't do log here, as it might be too early
1728 fprintf(stderr
, "Warning: CFStringGetCharacterAtIndex() attempted on CFString containing high bytes before properly initialized to do so\n");
1731 return __CFCharToUniCharTable
[contents
[idx
]];
1734 return ((UniChar
*)contents
)[idx
];
1737 /* This one is for the CF API
1739 UniChar
CFStringGetCharacterAtIndex(CFStringRef str
, CFIndex idx
) {
1740 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID
, UniChar
, str
, "characterAtIndex:", idx
);
1742 __CFAssertIsString(str
);
1743 __CFAssertIndexIsInStringBounds(str
, idx
);
1744 return __CFStringGetCharacterAtIndexGuts(str
, idx
, __CFStrContents(str
));
1747 /* This one is for NSCFString usage; it doesn't do ObjC dispatch; but it does do range check
1749 int _CFStringCheckAndGetCharacterAtIndex(CFStringRef str
, CFIndex idx
, UniChar
*ch
) {
1750 const uint8_t *contents
= __CFStrContents(str
);
1751 if (idx
>= __CFStrLength2(str
, contents
) && __CFStringNoteErrors()) return _CFStringErrBounds
;
1752 *ch
= __CFStringGetCharacterAtIndexGuts(str
, idx
, contents
);
1753 return _CFStringErrNone
;
1757 /* Guts of CFStringGetCharacters(); called from the two functions below. Don't call it from elsewhere.
1759 CF_INLINE
void __CFStringGetCharactersGuts(CFStringRef str
, CFRange range
, UniChar
*buffer
, const uint8_t *contents
) {
1760 if (__CFStrIsEightBit(str
)) {
1761 __CFStrConvertBytesToUnicode(((uint8_t *)contents
) + (range
.location
+ __CFStrSkipAnyLengthByte(str
)), buffer
, range
.length
);
1763 const UniChar
*uContents
= ((UniChar
*)contents
) + range
.location
;
1764 memmove(buffer
, uContents
, range
.length
* sizeof(UniChar
));
1768 /* This one is for the CF API
1770 void CFStringGetCharacters(CFStringRef str
, CFRange range
, UniChar
*buffer
) {
1771 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID
, void, str
, "getCharacters:range:", buffer
, CFRangeMake(range
.location
, range
.length
));
1773 __CFAssertIsString(str
);
1774 __CFAssertRangeIsInStringBounds(str
, range
.location
, range
.length
);
1775 __CFStringGetCharactersGuts(str
, range
, buffer
, __CFStrContents(str
));
1778 /* This one is for NSCFString usage; it doesn't do ObjC dispatch; but it does do range check
1780 int _CFStringCheckAndGetCharacters(CFStringRef str
, CFRange range
, UniChar
*buffer
) {
1781 const uint8_t *contents
= __CFStrContents(str
);
1782 if (range
.location
+ range
.length
> __CFStrLength2(str
, contents
) && __CFStringNoteErrors()) return _CFStringErrBounds
;
1783 __CFStringGetCharactersGuts(str
, range
, buffer
, contents
);
1784 return _CFStringErrNone
;
1788 CFIndex
CFStringGetBytes(CFStringRef str
, CFRange range
, CFStringEncoding encoding
, uint8_t lossByte
, Boolean isExternalRepresentation
, uint8_t *buffer
, CFIndex maxBufLen
, CFIndex
*usedBufLen
) {
1790 /* No objc dispatch needed here since __CFStringEncodeByteStream works with both CFString and NSString */
1791 __CFAssertIsNotNegative(maxBufLen
);
1793 if (!CF_IS_OBJC(__kCFStringTypeID
, str
)) { // If we can grope the ivars, let's do it...
1794 __CFAssertIsString(str
);
1795 __CFAssertRangeIsInStringBounds(str
, range
.location
, range
.length
);
1797 if (__CFStrIsEightBit(str
) && ((__CFStringGetEightBitStringEncoding() == encoding
) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII
&& __CFStringEncodingIsSupersetOfASCII(encoding
)))) { // Requested encoding is equal to the encoding in string
1798 const unsigned char *contents
= __CFStrContents(str
);
1799 CFIndex cLength
= range
.length
;
1802 if (cLength
> maxBufLen
) cLength
= maxBufLen
;
1803 memmove(buffer
, contents
+ __CFStrSkipAnyLengthByte(str
) + range
.location
, cLength
);
1805 if (usedBufLen
) *usedBufLen
= cLength
;
1811 return __CFStringEncodeByteStream(str
, range
.location
, range
.length
, isExternalRepresentation
, encoding
, lossByte
, buffer
, maxBufLen
, usedBufLen
);
1815 ConstStringPtr
CFStringGetPascalStringPtr (CFStringRef str
, CFStringEncoding encoding
) {
1817 if (!CF_IS_OBJC(__kCFStringTypeID
, str
)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
1818 __CFAssertIsString(str
);
1819 if (__CFStrHasLengthByte(str
) && __CFStrIsEightBit(str
) && ((__CFStringGetEightBitStringEncoding() == encoding
) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII
&& __CFStringEncodingIsSupersetOfASCII(encoding
)))) { // Requested encoding is equal to the encoding in string || the contents is in ASCII
1820 const uint8_t *contents
= __CFStrContents(str
);
1821 if (__CFStrHasExplicitLength(str
) && (__CFStrLength2(str
, contents
) != (SInt32
)(*contents
))) return NULL
; // Invalid length byte
1822 return (ConstStringPtr
)contents
;
1824 // ??? Also check for encoding = SystemEncoding and perhaps bytes are all ASCII?
1830 const char * CFStringGetCStringPtr(CFStringRef str
, CFStringEncoding encoding
) {
1832 if (encoding
!= __CFStringGetEightBitStringEncoding() && (kCFStringEncodingASCII
!= __CFStringGetEightBitStringEncoding() || !__CFStringEncodingIsSupersetOfASCII(encoding
))) return NULL
;
1833 // ??? Also check for encoding = SystemEncoding and perhaps bytes are all ASCII?
1835 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID
, const char *, str
, "_fastCStringContents:", true);
1837 __CFAssertIsString(str
);
1839 if (__CFStrHasNullByte(str
)) {
1840 return (const char *)__CFStrContents(str
) + __CFStrSkipAnyLengthByte(str
);
1847 const UniChar
*CFStringGetCharactersPtr(CFStringRef str
) {
1849 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID
, const UniChar
*, str
, "_fastCharacterContents");
1851 __CFAssertIsString(str
);
1852 if (__CFStrIsUnicode(str
)) return (const UniChar
*)__CFStrContents(str
);
1857 Boolean
CFStringGetPascalString(CFStringRef str
, Str255 buffer
, CFIndex bufferSize
, CFStringEncoding encoding
) {
1861 __CFAssertIsNotNegative(bufferSize
);
1862 if (bufferSize
< 1) return false;
1864 if (CF_IS_OBJC(__kCFStringTypeID
, str
)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
1865 length
= CFStringGetLength(str
);
1866 if (!__CFCanUseLengthByte(length
)) return false; // Can't fit into pstring
1868 const uint8_t *contents
;
1870 __CFAssertIsString(str
);
1872 contents
= __CFStrContents(str
);
1873 length
= __CFStrLength2(str
, contents
);
1875 if (!__CFCanUseLengthByte(length
)) return false; // Can't fit into pstring
1877 if (__CFStrIsEightBit(str
) && ((__CFStringGetEightBitStringEncoding() == encoding
) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII
&& __CFStringEncodingIsSupersetOfASCII(encoding
)))) { // Requested encoding is equal to the encoding in string
1878 if (length
>= bufferSize
) return false;
1879 memmove((void*)(1 + (const char*)buffer
), (__CFStrSkipAnyLengthByte(str
) + contents
), length
);
1885 if (__CFStringEncodeByteStream(str
, 0, length
, false, encoding
, false, (void*)(1 + (uint8_t*)buffer
), bufferSize
- 1, &usedLen
) != length
) {
1887 if (bufferSize
> 0) {
1888 strncpy((char *)buffer
+ 1, CONVERSIONFAILURESTR
, bufferSize
- 1);
1889 buffer
[0] = (CFIndex
)sizeof(CONVERSIONFAILURESTR
) < (bufferSize
- 1) ? (CFIndex
)sizeof(CONVERSIONFAILURESTR
) : (bufferSize
- 1);
1892 if (bufferSize
> 0) buffer
[0] = 0;
1900 Boolean
CFStringGetCString(CFStringRef str
, char *buffer
, CFIndex bufferSize
, CFStringEncoding encoding
) {
1901 const uint8_t *contents
;
1904 __CFAssertIsNotNegative(bufferSize
);
1905 if (bufferSize
< 1) return false;
1907 CF_OBJC_FUNCDISPATCH3(__kCFStringTypeID
, Boolean
, str
, "_getCString:maxLength:encoding:", buffer
, bufferSize
- 1, encoding
);
1909 __CFAssertIsString(str
);
1911 contents
= __CFStrContents(str
);
1912 len
= __CFStrLength2(str
, contents
);
1914 if (__CFStrIsEightBit(str
) && ((__CFStringGetEightBitStringEncoding() == encoding
) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII
&& __CFStringEncodingIsSupersetOfASCII(encoding
)))) { // Requested encoding is equal to the encoding in string
1915 if (len
>= bufferSize
) return false;
1916 memmove(buffer
, contents
+ __CFStrSkipAnyLengthByte(str
), len
);
1922 if (__CFStringEncodeByteStream(str
, 0, len
, false, encoding
, false, (unsigned char*) buffer
, bufferSize
- 1, &usedLen
) == len
) {
1923 buffer
[usedLen
] = '\0';
1927 strncpy(buffer
, CONVERSIONFAILURESTR
, bufferSize
);
1929 if (bufferSize
> 0) buffer
[0] = 0;
1937 CF_INLINE
bool _CFCanUseLocale(CFLocaleRef locale
) {
1941 static const char *_CFStrGetLanguageIdentifierForLocale(CFLocaleRef locale
) {
1945 #define MAX_CASE_MAPPING_BUF (8)
1946 #define ZERO_WIDTH_JOINER (0x200D)
1947 #define COMBINING_GRAPHEME_JOINER (0x034F)
1949 #define HANGUL_CHOSEONG_START (0x1100)
1950 #define HANGUL_CHOSEONG_END (0x115F)
1951 #define HANGUL_JUNGSEONG_START (0x1160)
1952 #define HANGUL_JUNGSEONG_END (0x11A2)
1953 #define HANGUL_JONGSEONG_START (0x11A8)
1954 #define HANGUL_JONGSEONG_END (0x11F9)
1956 #define HANGUL_SYLLABLE_START (0xAC00)
1957 #define HANGUL_SYLLABLE_END (0xD7AF)
1960 // Returns the length of characters filled into outCharacters. If no change, returns 0. maxBufLen shoule be at least 8
1961 static inline CFIndex
__CFStringFoldCharacterClusterAtIndex(UTF32Char character
, CFStringInlineBuffer
*buffer
, CFIndex index
, CFOptionFlags flags
, const uint8_t *langCode
, UTF32Char
*outCharacters
, CFIndex maxBufferLength
, CFIndex
*consumedLength
) {
1962 CFIndex filledLength
= 0, currentIndex
= index
;
1964 if (0 != character
) {
1965 UTF16Char lowSurrogate
;
1966 CFIndex planeNo
= (character
>> 16);
1967 bool isTurkikCapitalI
= false;
1968 static const uint8_t *decompBMP
= NULL
;
1969 static const uint8_t *nonBaseBMP
= NULL
;
1971 if (NULL
== decompBMP
) {
1972 decompBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet
, 0);
1973 nonBaseBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet
, 0);
1978 if ((character
< 0x0080) && ((NULL
== langCode
) || (character
!= 'I'))) { // ASCII
1979 if ((flags
& kCFCompareCaseInsensitive
) && (character
>= 'A') && (character
<= 'Z')) {
1980 character
+= ('a' - 'A');
1981 *outCharacters
= character
;
1985 // do width-insensitive mapping
1986 if ((flags
& kCFCompareWidthInsensitive
) && (character
>= 0xFF00) && (character
<= 0xFFEF)) {
1987 (void)CFUniCharCompatibilityDecompose(&character
, 1, 1);
1988 *outCharacters
= character
;
1993 if ((0 == planeNo
) && CFUniCharIsSurrogateHighCharacter(character
) && CFUniCharIsSurrogateLowCharacter((lowSurrogate
= CFStringGetCharacterFromInlineBuffer(buffer
, currentIndex
)))) {
1994 character
= CFUniCharGetLongCharacterForSurrogatePair(character
, lowSurrogate
);
1996 planeNo
= (character
>> 16);
2000 if (flags
& (kCFCompareDiacriticsInsensitive
|kCFCompareNonliteral
)) {
2001 if (CFUniCharIsMemberOfBitmap(character
, ((0 == planeNo
) ? decompBMP
: CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet
, planeNo
)))) {
2002 filledLength
= CFUniCharDecomposeCharacter(character
, outCharacters
, maxBufferLength
);
2003 character
= *outCharacters
;
2004 if ((flags
& kCFCompareDiacriticsInsensitive
) && (character
< 0x0510)) filledLength
= 1; // reset if Roman, Greek, Cyrillic
2009 if (flags
& kCFCompareCaseInsensitive
) {
2010 const uint8_t *nonBaseBitmap
;
2011 bool filterNonBase
= (((flags
& kCFCompareDiacriticsInsensitive
) && (character
< 0x0510)) ? true : false);
2012 static const uint8_t *lowerBMP
= NULL
;
2013 static const uint8_t *caseFoldBMP
= NULL
;
2015 if (NULL
== lowerBMP
) {
2016 lowerBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfLowercaseCharacterSet
, 0);
2017 caseFoldBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfCaseFoldingCharacterSet
, 0);
2020 if ((NULL
!= langCode
) && ('I' == character
) && ((0 == strcmp(langCode
, "tr")) || (0 == strcmp(langCode
, "az")))) { // do Turkik special-casing
2021 if (filledLength
> 1) {
2022 if (0x0307 == outCharacters
[1]) {
2023 memmove(&(outCharacters
[index
]), &(outCharacters
[index
+ 1]), sizeof(UTF32Char
) * (--filledLength
));
2024 character
= *outCharacters
= 'i';
2025 isTurkikCapitalI
= true;
2027 } else if (0x0307 == CFStringGetCharacterFromInlineBuffer(buffer
, currentIndex
)) {
2028 character
= *outCharacters
= 'i';
2031 isTurkikCapitalI
= true;
2034 if (!isTurkikCapitalI
&& (CFUniCharIsMemberOfBitmap(character
, ((0 == planeNo
) ? lowerBMP
: CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfLowercaseCharacterSet
, planeNo
))) || CFUniCharIsMemberOfBitmap(character
, ((0 == planeNo
) ? caseFoldBMP
: CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfCaseFoldingCharacterSet
, planeNo
))))) {
2035 UTF16Char caseFoldBuffer
[MAX_CASE_MAPPING_BUF
];
2036 const UTF16Char
*bufferP
= caseFoldBuffer
, *bufferLimit
;
2037 UTF32Char
*outCharactersP
= outCharacters
;
2038 uint32_t bufferLength
= CFUniCharMapCaseTo(character
, caseFoldBuffer
, MAX_CASE_MAPPING_BUF
, kCFUniCharCaseFold
, 0, langCode
);
2040 bufferLimit
= bufferP
+ bufferLength
;
2042 if (filledLength
> 0) --filledLength
; // decrement filledLength (will add back later)
2044 // make space for casefold characters
2045 if ((filledLength
> 0) && (bufferLength
> 1)) {
2046 CFIndex totalScalerLength
= 0;
2048 while (bufferP
< bufferLimit
) {
2049 if (CFUniCharIsSurrogateHighCharacter(*(bufferP
++)) && (bufferP
< bufferLimit
) && CFUniCharIsSurrogateLowCharacter(*bufferP
)) ++bufferP
;
2050 ++totalScalerLength
;
2052 memmove(outCharacters
+ totalScalerLength
, outCharacters
+ 1, filledLength
* sizeof(UTF32Char
));
2053 bufferP
= caseFoldBuffer
;
2057 while (bufferP
< bufferLimit
) {
2058 character
= *(bufferP
++);
2059 if (CFUniCharIsSurrogateHighCharacter(character
) && (bufferP
< bufferLimit
) && CFUniCharIsSurrogateLowCharacter(*bufferP
)) {
2060 character
= CFUniCharGetLongCharacterForSurrogatePair(character
, *(bufferP
++));
2061 nonBaseBitmap
= CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet
, (character
>> 16));
2063 nonBaseBitmap
= nonBaseBMP
;
2066 if (!filterNonBase
|| !CFUniCharIsMemberOfBitmap(character
, nonBaseBitmap
)) {
2067 *(outCharactersP
++) = character
;
2075 // collect following combining marks
2076 if (flags
& (kCFCompareDiacriticsInsensitive
|kCFCompareNonliteral
)) {
2077 const uint8_t *nonBaseBitmap
;
2078 const uint8_t *decompBitmap
;
2079 bool doFill
= (((flags
& kCFCompareDiacriticsInsensitive
) && (character
< 0x0510)) ? false : true);
2081 if (doFill
&& (0 == filledLength
)) { // check if really needs to fill
2082 UTF32Char nonBaseCharacter
= CFStringGetCharacterFromInlineBuffer(buffer
, currentIndex
);
2084 if (CFUniCharIsSurrogateHighCharacter(nonBaseCharacter
) && CFUniCharIsSurrogateLowCharacter((lowSurrogate
= CFStringGetCharacterFromInlineBuffer(buffer
, currentIndex
+ 1)))) {
2085 nonBaseCharacter
= CFUniCharGetLongCharacterForSurrogatePair(nonBaseCharacter
, lowSurrogate
);
2086 nonBaseBitmap
= CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet
, (nonBaseCharacter
>> 16));
2087 decompBitmap
= CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet
, (nonBaseCharacter
>> 16));
2089 nonBaseBitmap
= nonBaseBMP
;
2090 decompBitmap
= decompBMP
;
2093 if (CFUniCharIsMemberOfBitmap(nonBaseCharacter
, nonBaseBitmap
)) {
2094 outCharacters
[filledLength
++] = character
;
2096 if ((0 == (flags
& kCFCompareDiacriticsInsensitive
)) || (nonBaseCharacter
> 0x050F)) {
2097 if (CFUniCharIsMemberOfBitmap(nonBaseCharacter
, decompBitmap
)) {
2098 filledLength
+= CFUniCharDecomposeCharacter(nonBaseCharacter
, &(outCharacters
[filledLength
]), maxBufferLength
- filledLength
);
2100 outCharacters
[filledLength
++] = nonBaseCharacter
;
2103 currentIndex
+= ((nonBaseBitmap
== nonBaseBMP
) ? 1 : 2);
2109 while (filledLength
< maxBufferLength
) { // do the rest
2110 character
= CFStringGetCharacterFromInlineBuffer(buffer
, currentIndex
);
2112 if (CFUniCharIsSurrogateHighCharacter(character
) && CFUniCharIsSurrogateLowCharacter((lowSurrogate
= CFStringGetCharacterFromInlineBuffer(buffer
, currentIndex
+ 1)))) {
2113 character
= CFUniCharGetLongCharacterForSurrogatePair(character
, lowSurrogate
);
2114 nonBaseBitmap
= CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet
, (character
>> 16));
2115 decompBitmap
= CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet
, (character
>> 16));
2117 nonBaseBitmap
= nonBaseBMP
;
2118 decompBitmap
= decompBMP
;
2120 if (isTurkikCapitalI
) {
2121 isTurkikCapitalI
= false;
2122 } else if (CFUniCharIsMemberOfBitmap(character
, nonBaseBitmap
)) {
2123 if (doFill
&& ((0 == (flags
& kCFCompareDiacriticsInsensitive
)) || (character
> 0x050F))) {
2124 if (CFUniCharIsMemberOfBitmap(character
, decompBitmap
)) {
2125 CFIndex currentLength
= CFUniCharDecomposeCharacter(character
, &(outCharacters
[filledLength
]), maxBufferLength
- filledLength
);
2127 if (0 == currentLength
) break; // didn't fit
2129 filledLength
+= currentLength
;
2131 outCharacters
[filledLength
++] = character
;
2134 currentIndex
+= ((nonBaseBitmap
== nonBaseBMP
) ? 1 : 2);
2140 if (filledLength
> 1) CFUniCharPrioritySort(outCharacters
, filledLength
); // priority sort
2144 if ((filledLength
> 0) && (NULL
!= consumedLength
)) *consumedLength
= (currentIndex
- index
);
2146 return filledLength
;
2149 /* Special casing for Uk sorting */
2150 #define DO_IGNORE_PUNCTUATION 1
2151 #if DO_IGNORE_PUNCTUATION
2152 #define UKRAINIAN_LANG_CODE (45)
2153 static bool __CFLocaleChecked
= false;
2154 static const uint8_t *__CFPunctSetBMP
= NULL
;
2155 #endif /* DO_IGNORE_PUNCTUATION */
2157 /* ??? We need to implement some additional flags here
2158 ??? Also, pay attention to flag 2, which is the NS flag (which CF has as flag 16, w/opposite meaning).
2160 CFComparisonResult
CFStringCompareWithOptions(CFStringRef string
, CFStringRef string2
, CFRange rangeToCompare
, CFOptionFlags compareOptions
) {
2161 /* No objc dispatch needed here since CFStringInlineBuffer works with both CFString and NSString */
2162 CFStringInlineBuffer strBuf1
, strBuf2
;
2164 const uint8_t *punctBMP
= NULL
;
2165 Boolean caseInsensitive
= (compareOptions
& kCFCompareCaseInsensitive
? true : false);
2166 Boolean decompose
= (compareOptions
& kCFCompareNonliteral
? true : false);
2167 Boolean numerically
= (compareOptions
& kCFCompareNumerically
? true : false);
2168 Boolean localized
= (compareOptions
& kCFCompareLocalized
? true : false);
2170 #if DO_IGNORE_PUNCTUATION
2172 if (!__CFLocaleChecked
) {
2173 CFArrayRef locales
= _CFBundleCopyUserLanguages(false);
2175 if (locales
&& (CFArrayGetCount(locales
) > 0)) {
2178 if (CFBundleGetLocalizationInfoForLocalization((CFStringRef
)CFArrayGetValueAtIndex(locales
, 0), &langCode
, NULL
, NULL
, NULL
) && (langCode
== UKRAINIAN_LANG_CODE
)) {
2179 __CFPunctSetBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet
, 0);
2184 __CFLocaleChecked
= true;
2187 punctBMP
= __CFPunctSetBMP
;
2189 #endif /* DO_IGNORE_PUNCTUATION */
2191 CFStringInitInlineBuffer(string
, &strBuf1
, CFRangeMake(rangeToCompare
.location
, rangeToCompare
.length
));
2192 CFIndex strBuf1_idx
= 0;
2193 CFIndex string2_len
= CFStringGetLength(string2
);
2194 CFStringInitInlineBuffer(string2
, &strBuf2
, CFRangeMake(0, string2_len
));
2195 CFIndex strBuf2_idx
= 0;
2197 while (strBuf1_idx
< rangeToCompare
.length
&& strBuf2_idx
< string2_len
) {
2198 ch1
= CFStringGetCharacterFromInlineBuffer(&strBuf1
, strBuf1_idx
);
2199 ch2
= CFStringGetCharacterFromInlineBuffer(&strBuf2
, strBuf2_idx
);
2201 if (numerically
&& (ch1
<= '9' && ch1
>= '0') && (ch2
<= '9' && ch2
>= '0')) { // If both are not digits, then don't do numerical comparison
2202 uint64_t n1
= 0; // !!! Doesn't work if numbers are > max uint64_t
2205 n1
= n1
* 10 + (ch1
- '0');
2207 if (rangeToCompare
.length
<= strBuf1_idx
) break;
2208 ch1
= CFStringGetCharacterFromInlineBuffer(&strBuf1
, strBuf1_idx
);
2209 } while (ch1
<= '9' && ch1
>= '0');
2211 n2
= n2
* 10 + (ch2
- '0');
2213 if (string2_len
<= strBuf2_idx
) break;
2214 ch2
= CFStringGetCharacterFromInlineBuffer(&strBuf2
, strBuf2_idx
);
2215 } while (ch2
<= '9' && ch2
>= '0');
2216 if (n1
< n2
) return kCFCompareLessThan
; else if (n1
> n2
) return kCFCompareGreaterThan
;
2217 continue; // If numbers were equal, go back to top without incrementing the buffer pointers
2220 if (CFUniCharIsSurrogateHighCharacter(ch1
)) {
2222 if (strBuf1_idx
< rangeToCompare
.length
&& CFUniCharIsSurrogateLowCharacter(CFStringGetCharacterFromInlineBuffer(&strBuf1
, strBuf1_idx
))) {
2223 ch1
= CFUniCharGetLongCharacterForSurrogatePair(ch1
, CFStringGetCharacterFromInlineBuffer(&strBuf1
, strBuf1_idx
));
2228 if (CFUniCharIsSurrogateHighCharacter(ch2
)) {
2230 if (strBuf2_idx
< string2_len
&& CFUniCharIsSurrogateLowCharacter(CFStringGetCharacterFromInlineBuffer(&strBuf2
, strBuf2_idx
))) {
2231 ch2
= CFUniCharGetLongCharacterForSurrogatePair(ch2
, CFStringGetCharacterFromInlineBuffer(&strBuf2
, strBuf2_idx
));
2238 #if DO_IGNORE_PUNCTUATION
2240 if (CFUniCharIsMemberOfBitmap(ch1
, (ch1
< 0x10000 ? punctBMP
: CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet
, (ch1
>> 16))))) {
2241 ++strBuf1_idx
; continue;
2243 if (CFUniCharIsMemberOfBitmap(ch2
, (ch2
< 0x10000 ? punctBMP
: CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet
, (ch2
>> 16))))) {
2244 ++strBuf2_idx
; continue;
2247 #endif /* DO_IGNORE_PUNCTUATION */
2248 // We standardize to lowercase here since currently, as of Unicode 3.1.1, it's one-to-one mapping.
2249 // Note we map to uppercase for both SMALL LETTER SIGMA and SMALL LETTER FINAL SIGMA
2250 if (caseInsensitive
) {
2252 ch1
-= ((ch1
>= 'A' && ch1
<= 'Z') ? 'A' - 'a' : 0);
2253 } else if (ch1
== 0x03C2 || ch1
== 0x03C3 || ch1
== 0x03A3) { // SMALL SIGMA
2256 UniChar buffer
[MAX_CASE_MAPPING_BUF
];
2258 if (CFUniCharMapCaseTo(ch1
, buffer
, MAX_CASE_MAPPING_BUF
, kCFUniCharToLowercase
, 0, NULL
) > 1) { // It's supposed to be surrogates
2259 ch1
= CFUniCharGetLongCharacterForSurrogatePair(buffer
[0], buffer
[1]);
2265 ch2
-= ((ch2
>= 'A' && ch2
<= 'Z') ? 'A' - 'a' : 0);
2266 } else if (ch2
== 0x03C2 || ch2
== 0x03C3 || ch2
== 0x03A3) { // SMALL SIGMA
2269 UniChar buffer
[MAX_CASE_MAPPING_BUF
];
2271 if (CFUniCharMapCaseTo(ch2
, buffer
, MAX_CASE_MAPPING_BUF
, kCFUniCharToLowercase
, 0, NULL
) > 1) { // It's supposed to be surrogates
2272 ch2
= CFUniCharGetLongCharacterForSurrogatePair(buffer
[0], buffer
[1]);
2279 if (ch1
!= ch2
) { // still different
2280 if (decompose
) { // ??? This is not exactly the canonical comparison (We need to do priority sort)
2281 Boolean isCh1Decomposable
= (ch1
> 0x7F && CFUniCharIsMemberOf(ch1
, kCFUniCharDecomposableCharacterSet
));
2282 Boolean isCh2Decomposable
= (ch2
> 0x7F && CFUniCharIsMemberOf(ch2
, kCFUniCharDecomposableCharacterSet
));
2284 if (isCh1Decomposable
!= isCh2Decomposable
) {
2285 UTF32Char decomposedCharater
[MAX_DECOMPOSED_LENGTH
];
2286 UInt32 decomposedCharacterLength
;
2289 if (isCh1Decomposable
) {
2290 decomposedCharacterLength
= CFUniCharDecomposeCharacter(ch1
, decomposedCharater
, MAX_DECOMPOSED_LENGTH
);
2291 if ((string2_len
- strBuf2_idx
) < decomposedCharacterLength
) { // the remaining other length is shorter
2292 if (ch1
< ch2
) return kCFCompareLessThan
; else if (ch1
> ch2
) return kCFCompareGreaterThan
;
2294 for (idx
= 0; idx
< decomposedCharacterLength
; idx
++) {
2295 ch1
= decomposedCharater
[idx
];
2296 if (ch1
< ch2
) return kCFCompareLessThan
; else if (ch1
> ch2
) return kCFCompareGreaterThan
;
2297 strBuf2_idx
++; ch2
= (strBuf2_idx
< string2_len
? CFStringGetCharacterFromInlineBuffer(&strBuf2
, strBuf2_idx
) : 0xffff);
2298 if (CFUniCharIsSurrogateHighCharacter(ch2
)) {
2300 if (strBuf2_idx
< string2_len
&& CFUniCharIsSurrogateLowCharacter(CFStringGetCharacterFromInlineBuffer(&strBuf2
, strBuf2_idx
))) {
2301 ch2
= CFUniCharGetLongCharacterForSurrogatePair(ch2
, CFStringGetCharacterFromInlineBuffer(&strBuf2
, strBuf2_idx
));
2307 strBuf1_idx
++; continue;
2308 } else { // ch2 is decomposable, then
2309 decomposedCharacterLength
= CFUniCharDecomposeCharacter(ch2
, decomposedCharater
, MAX_DECOMPOSED_LENGTH
);
2310 if ((rangeToCompare
.length
- strBuf1_idx
) < decomposedCharacterLength
) { // the remaining other length is shorter
2311 if (ch1
< ch2
) return kCFCompareLessThan
; else if (ch1
> ch2
) return kCFCompareGreaterThan
;
2313 for (idx
= 0; idx
< decomposedCharacterLength
&& strBuf1_idx
< rangeToCompare
.length
; idx
++) {
2314 ch2
= decomposedCharater
[idx
];
2315 if (ch1
< ch2
) return kCFCompareLessThan
; else if (ch1
> ch2
) return kCFCompareGreaterThan
;
2316 strBuf1_idx
++; ch1
= (strBuf1_idx
< rangeToCompare
.length
? CFStringGetCharacterFromInlineBuffer(&strBuf1
, strBuf1_idx
) : 0xffff);
2317 if (CFUniCharIsSurrogateHighCharacter(ch1
)) {
2319 if (strBuf1_idx
< rangeToCompare
.length
&& CFUniCharIsSurrogateLowCharacter(CFStringGetCharacterFromInlineBuffer(&strBuf1
, strBuf1_idx
))) {
2320 ch1
= CFUniCharGetLongCharacterForSurrogatePair(ch1
, CFStringGetCharacterFromInlineBuffer(&strBuf1
, strBuf1_idx
));
2326 strBuf2_idx
++; continue;
2330 if (ch1
< ch2
) return kCFCompareLessThan
; else if (ch1
> ch2
) return kCFCompareGreaterThan
;
2333 strBuf1_idx
++; strBuf2_idx
++;
2335 if (strBuf1_idx
< rangeToCompare
.length
) {
2336 return kCFCompareGreaterThan
;
2337 } else if (strBuf2_idx
< string2_len
) {
2338 return kCFCompareLessThan
;
2340 return kCFCompareEqualTo
;
2345 CFComparisonResult
CFStringCompare(CFStringRef string
, CFStringRef str2
, CFOptionFlags options
) {
2346 return CFStringCompareWithOptions(string
, str2
, CFRangeMake(0, CFStringGetLength(string
)), options
);
2349 #define kCFStringStackBufferLength (64)
2351 Boolean
CFStringFindWithOptions(CFStringRef string
, CFStringRef stringToFind
, CFRange rangeToSearch
, CFOptionFlags compareOptions
, CFRange
*result
) {
2352 /* No objc dispatch needed here since CFStringInlineBuffer works with both CFString and NSString */
2353 CFIndex findStrLen
= CFStringGetLength(stringToFind
);
2354 Boolean didFind
= false;
2355 bool lengthVariants
= ((compareOptions
& (kCFCompareCaseInsensitive
|kCFCompareNonliteral
|kCFCompareDiacriticsInsensitive
)) ? true : false);
2357 if ((findStrLen
> 0) && (rangeToSearch
.length
> 0) && ((findStrLen
<= rangeToSearch
.length
) || lengthVariants
)) {
2358 UTF32Char strBuf1
[kCFStringStackBufferLength
];
2359 UTF32Char strBuf2
[kCFStringStackBufferLength
];
2360 CFStringInlineBuffer inlineBuf1
, inlineBuf2
;
2361 UTF32Char str1Char
, str2Char
;
2362 CFStringEncoding eightBitEncoding
= __CFStringGetEightBitStringEncoding();
2363 const uint8_t *str1Bytes
= CFStringGetCStringPtr(string
, eightBitEncoding
);
2364 const uint8_t *str2Bytes
= CFStringGetCStringPtr(stringToFind
, eightBitEncoding
);
2365 const UTF32Char
*characters
, *charactersLimit
;
2366 const uint8_t *langCode
= NULL
;
2367 CFIndex fromLoc
, toLoc
;
2368 CFIndex str1Index
, str2Index
;
2369 CFIndex strBuf1Len
, strBuf2Len
;
2370 bool equalityOptions
= ((lengthVariants
|| (compareOptions
& kCFCompareWidthInsensitive
)) ? true : false);
2371 bool caseInsensitive
= ((compareOptions
& kCFCompareCaseInsensitive
) ? true : false);
2375 CFStringInitInlineBuffer(string
, &inlineBuf1
, CFRangeMake(0, rangeToSearch
.location
+ rangeToSearch
.length
));
2376 CFStringInitInlineBuffer(stringToFind
, &inlineBuf2
, CFRangeMake(0, findStrLen
));
2378 if (compareOptions
& kCFCompareBackwards
) {
2379 fromLoc
= rangeToSearch
.location
+ rangeToSearch
.length
- (lengthVariants
? 1 : findStrLen
);
2380 toLoc
= (((compareOptions
& kCFCompareAnchored
) && !lengthVariants
) ? fromLoc
: rangeToSearch
.location
);
2382 fromLoc
= rangeToSearch
.location
;
2383 toLoc
= ((compareOptions
& kCFCompareAnchored
) ? fromLoc
: rangeToSearch
.location
+ rangeToSearch
.length
- (lengthVariants
? 1 : findStrLen
));
2386 delta
= ((fromLoc
<= toLoc
) ? 1 : -1);
2388 if ((NULL
!= str1Bytes
) && (NULL
!= str2Bytes
)) {
2389 CFIndex maxStr1Index
= (rangeToSearch
.location
+ rangeToSearch
.length
);
2390 uint8_t str1Byte
, str2Byte
;
2393 str1Index
= fromLoc
;
2396 while ((str1Index
< maxStr1Index
) && (str2Index
< findStrLen
)) {
2397 str1Byte
= str1Bytes
[str1Index
];
2398 str2Byte
= str2Bytes
[str2Index
];
2400 if (str1Byte
!= str2Byte
) {
2401 if (equalityOptions
) {
2402 if ((str1Byte
< 0x80) && ((NULL
== langCode
) || ('I' != str1Byte
))) {
2403 if (caseInsensitive
&& (str1Byte
>= 'A') && (str1Byte
<= 'Z')) str1Byte
+= ('a' - 'A');
2404 *strBuf1
= str1Byte
;
2407 str1Char
= CFStringGetCharacterFromInlineBuffer(&inlineBuf1
, str1Index
);
2408 strBuf1Len
= __CFStringFoldCharacterClusterAtIndex(str1Char
, &inlineBuf1
, str1Index
, compareOptions
, langCode
, strBuf1
, kCFStringStackBufferLength
, NULL
);
2409 if (1 > strBuf1Len
) {
2410 *strBuf1
= str1Char
;
2414 if ((str2Byte
< 0x80) && ((NULL
== langCode
) || ('I' != str2Byte
))) {
2415 if (caseInsensitive
&& (str2Byte
>= 'A') && (str2Byte
<= 'Z')) str2Byte
+= ('a' - 'A');
2416 *strBuf2
= str2Byte
;
2419 str2Char
= CFStringGetCharacterFromInlineBuffer(&inlineBuf2
, str2Index
);
2420 strBuf2Len
= __CFStringFoldCharacterClusterAtIndex(str2Char
, &inlineBuf2
, str2Index
, compareOptions
, langCode
, strBuf2
, kCFStringStackBufferLength
, NULL
);
2421 if (1 > strBuf2Len
) {
2422 *strBuf2
= str2Char
;
2427 if ((1 == strBuf1Len
) && (1 == strBuf2Len
)) { // normal case
2428 if (*strBuf1
!= *strBuf2
) break;
2432 if (!caseInsensitive
&& (strBuf1Len
!= strBuf2Len
)) break;
2433 if (memcmp(strBuf1
, strBuf2
, sizeof(UTF32Char
) * __CFMin(strBuf1Len
, strBuf2Len
))) break;
2435 if (strBuf1Len
< strBuf2Len
) {
2436 delta
= strBuf2Len
- strBuf1Len
;
2438 if ((str1Index
+ strBuf1Len
+ delta
) > (rangeToSearch
.location
+ rangeToSearch
.length
)) break;
2440 characters
= &(strBuf2
[strBuf1Len
]);
2441 charactersLimit
= characters
+ delta
;
2443 while (characters
< charactersLimit
) {
2444 strBuf1Len
= __CFStringFoldCharacterClusterAtIndex(CFStringGetCharacterFromInlineBuffer(&inlineBuf1
, str1Index
+ 1), &inlineBuf1
, str1Index
+ 1, compareOptions
, langCode
, strBuf1
, kCFStringStackBufferLength
, NULL
);
2445 if ((strBuf1Len
> 0) || (*characters
!= *strBuf1
)) break;
2446 ++characters
; ++str1Index
;
2448 if (characters
< charactersLimit
) break;
2449 } else if (strBuf2Len
< strBuf1Len
) {
2450 delta
= strBuf1Len
- strBuf2Len
;
2452 if ((str2Index
+ strBuf2Len
+ delta
) > findStrLen
) break;
2454 characters
= &(strBuf1
[strBuf2Len
]);
2455 charactersLimit
= characters
+ delta
;
2457 while (characters
< charactersLimit
) {
2458 strBuf2Len
= __CFStringFoldCharacterClusterAtIndex(CFStringGetCharacterFromInlineBuffer(&inlineBuf2
, str1Index
+ 1), &inlineBuf2
, str2Index
+ 1, compareOptions
, langCode
, strBuf2
, kCFStringStackBufferLength
, NULL
);
2459 if ((strBuf2Len
> 0) || (*characters
!= *strBuf2
)) break;
2460 ++characters
; ++str2Index
;
2462 if (characters
< charactersLimit
) break;
2469 ++str1Index
; ++str2Index
;
2472 if (str2Index
== findStrLen
) {
2473 if (((kCFCompareBackwards
|kCFCompareAnchored
) != (compareOptions
& (kCFCompareBackwards
|kCFCompareAnchored
))) || (str1Index
== (rangeToSearch
.location
+ rangeToSearch
.length
))) {
2475 if (NULL
!= result
) *result
= CFRangeMake(fromLoc
, str1Index
- fromLoc
);
2480 if (fromLoc
== toLoc
) break;
2483 } else if (equalityOptions
) {
2484 UTF16Char otherChar
;
2485 CFIndex str1UsedLen
, str2UsedLen
, strBuf1Index
= 0, strBuf2Index
= 0;
2486 bool diacriticsInsensitive
= ((compareOptions
& kCFCompareDiacriticsInsensitive
) ? true : false);
2487 static const uint8_t *nonBaseBMP
= NULL
;
2488 static const uint8_t *combClassBMP
= NULL
;
2490 if (NULL
== nonBaseBMP
) {
2491 nonBaseBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet
, 0);
2492 combClassBMP
= CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty
, 0);
2496 str1Index
= fromLoc
;
2499 strBuf1Len
= strBuf2Len
= 0;
2501 while (str2Index
< findStrLen
) {
2502 if (strBuf1Len
== 0) {
2503 str1Char
= CFStringGetCharacterFromInlineBuffer(&inlineBuf1
, str1Index
);
2504 if (caseInsensitive
&& (str1Char
>= 'A') && (str1Char
<= 'Z') && ((NULL
== langCode
) || (str1Char
!= 'I'))) str1Char
+= ('a' - 'A');
2507 str1Char
= strBuf1
[strBuf1Index
++];
2509 if (strBuf2Len
== 0) {
2510 str2Char
= CFStringGetCharacterFromInlineBuffer(&inlineBuf2
, str2Index
);
2511 if (caseInsensitive
&& (str2Char
>= 'A') && (str2Char
<= 'Z') && ((NULL
== langCode
) || (str2Char
!= 'I'))) str2Char
+= ('a' - 'A');
2514 str2Char
= strBuf2
[strBuf2Index
++];
2517 if (str1Char
!= str2Char
) {
2518 if ((str1Char
< 0x80) && (str2Char
< 0x80) && ((NULL
== langCode
) || !caseInsensitive
)) break;
2520 if (CFUniCharIsSurrogateHighCharacter(str1Char
) && CFUniCharIsSurrogateLowCharacter((otherChar
= CFStringGetCharacterFromInlineBuffer(&inlineBuf1
, str1Index
+ 1)))) {
2521 str1Char
= CFUniCharGetLongCharacterForSurrogatePair(str1Char
, otherChar
);
2525 if (CFUniCharIsSurrogateHighCharacter(str2Char
) && CFUniCharIsSurrogateLowCharacter((otherChar
= CFStringGetCharacterFromInlineBuffer(&inlineBuf2
, str2Index
+ 1)))) {
2526 str2Char
= CFUniCharGetLongCharacterForSurrogatePair(str2Char
, otherChar
);
2530 if (diacriticsInsensitive
&& (str1Index
> fromLoc
)) {
2531 if ((0 == strBuf1Len
) && CFUniCharIsMemberOfBitmap(str1Char
, ((str1Char
< 0x10000) ? nonBaseBMP
: CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet
, (str1Char
>> 16))))) str1Char
= str2Char
;
2532 if ((0 == strBuf2Len
) && CFUniCharIsMemberOfBitmap(str2Char
, ((str2Char
< 0x10000) ? nonBaseBMP
: CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet
, (str2Char
>> 16))))) str2Char
= str1Char
;
2535 if (str1Char
!= str2Char
) {
2536 if (0 == strBuf1Len
) {
2537 strBuf1Len
= __CFStringFoldCharacterClusterAtIndex(str1Char
, &inlineBuf1
, str1Index
, compareOptions
, langCode
, strBuf1
, kCFStringStackBufferLength
, &str1UsedLen
);
2538 if (strBuf1Len
> 0) {
2539 str1Char
= *strBuf1
;
2544 if ((0 == strBuf1Len
) && (0 < strBuf2Len
)) break;
2546 if ((0 == strBuf2Len
) && ((0 == strBuf1Len
) || (str1Char
!= str2Char
))) {
2547 strBuf2Len
= __CFStringFoldCharacterClusterAtIndex(str2Char
, &inlineBuf2
, str2Index
, compareOptions
, langCode
, strBuf2
, kCFStringStackBufferLength
, &str2UsedLen
);
2548 if ((0 == strBuf2Len
) || (str1Char
!= *strBuf2
)) break;
2553 if ((strBuf1Len
> 0) && (strBuf2Len
> 0)) {
2554 while ((strBuf1Index
< strBuf1Len
) && (strBuf2Index
< strBuf2Len
)) {
2555 if (strBuf1
[strBuf1Index
] != strBuf2
[strBuf2Index
]) break;
2556 ++strBuf1Index
; ++strBuf2Index
;
2558 if ((strBuf1Index
< strBuf1Len
) && (strBuf2Index
< strBuf2Len
)) break;
2562 if ((strBuf1Len
> 0) && (strBuf1Index
== strBuf1Len
)) strBuf1Len
= 0;
2563 if ((strBuf2Len
> 0) && (strBuf2Index
== strBuf2Len
)) strBuf2Len
= 0;
2565 if (strBuf1Len
== 0) str1Index
+= str1UsedLen
;
2566 if (strBuf2Len
== 0) str2Index
+= str2UsedLen
;
2569 if (str2Index
== findStrLen
) {
2572 if (strBuf1Len
> 0) {
2575 if ((compareOptions
& kCFCompareDiacriticsInsensitive
) && (strBuf1
[0] < 0x0510)) {
2576 while (strBuf1Index
< strBuf1Len
) {
2577 if (!CFUniCharIsMemberOfBitmap(strBuf1
[strBuf1Index
], ((strBuf1
[strBuf1Index
] < 0x10000) ? nonBaseBMP
: CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet
, (strBuf1
[strBuf1Index
] >> 16))))) break;
2581 if (strBuf1Index
== strBuf1Len
) {
2582 str1Index
+= str1UsedLen
;
2588 if (match
&& (compareOptions
& (kCFCompareDiacriticsInsensitive
|kCFCompareNonliteral
)) && (str1Index
< (rangeToSearch
.location
+ rangeToSearch
.length
))) {
2589 const uint8_t *nonBaseBitmap
;
2591 str1Char
= CFStringGetCharacterFromInlineBuffer(&inlineBuf1
, str1Index
);
2593 if (CFUniCharIsSurrogateHighCharacter(str1Char
) && CFUniCharIsSurrogateLowCharacter((otherChar
= CFStringGetCharacterFromInlineBuffer(&inlineBuf1
, str1Index
+ 1)))) {
2594 str1Char
= CFUniCharGetLongCharacterForSurrogatePair(str1Char
, otherChar
);
2595 nonBaseBitmap
= CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet
, (str1Char
>> 16));
2597 nonBaseBitmap
= nonBaseBMP
;
2600 if (CFUniCharIsMemberOfBitmap(str1Char
, nonBaseBitmap
)) {
2601 if (diacriticsInsensitive
) {
2602 if (str1Char
< 0x10000) {
2603 CFIndex index
= str1Index
;
2606 str1Char
= CFStringGetCharacterFromInlineBuffer(&inlineBuf1
, --index
);
2607 } while (CFUniCharIsMemberOfBitmap(str1Char
, nonBaseBMP
), (rangeToSearch
.location
< index
));
2609 if (str1Char
< 0x0510) {
2610 CFIndex maxIndex
= (rangeToSearch
.location
+ rangeToSearch
.length
);
2612 while (++str1Index
< maxIndex
) if (!CFUniCharIsMemberOfBitmap(CFStringGetCharacterFromInlineBuffer(&inlineBuf1
, str1Index
), nonBaseBMP
)) break;
2618 } else if (!diacriticsInsensitive
) {
2619 otherChar
= CFStringGetCharacterFromInlineBuffer(&inlineBuf1
, str1Index
- 1);
2621 // this is assuming viramas are only in BMP ???
2622 if ((str1Char
== COMBINING_GRAPHEME_JOINER
) || (otherChar
== COMBINING_GRAPHEME_JOINER
) || (otherChar
== ZERO_WIDTH_JOINER
) || ((otherChar
>= HANGUL_CHOSEONG_START
) && (otherChar
<= HANGUL_JONGSEONG_END
)) || (CFUniCharGetCombiningPropertyForCharacter(otherChar
, combClassBMP
) == 9)) {
2623 CFRange clusterRange
= CFStringGetRangeOfCharacterClusterAtIndex(string
, str1Index
- 1, kCFStringGramphemeCluster
);
2625 if (str1Index
< (clusterRange
.location
+ clusterRange
.length
)) match
= false;
2631 if (((kCFCompareBackwards
|kCFCompareAnchored
) != (compareOptions
& (kCFCompareBackwards
|kCFCompareAnchored
))) || (str1Index
== (rangeToSearch
.location
+ rangeToSearch
.length
))) {
2633 if (NULL
!= result
) *result
= CFRangeMake(fromLoc
, str1Index
- fromLoc
);
2639 if (fromLoc
== toLoc
) break;
2644 str1Index
= fromLoc
;
2647 while (str2Index
< findStrLen
) {
2648 if (CFStringGetCharacterFromInlineBuffer(&inlineBuf1
, str1Index
) != CFStringGetCharacterFromInlineBuffer(&inlineBuf2
, str2Index
)) break;
2650 ++str1Index
; ++str2Index
;
2653 if (str2Index
== findStrLen
) {
2655 if (NULL
!= result
) *result
= CFRangeMake(fromLoc
, findStrLen
);
2659 if (fromLoc
== toLoc
) break;
2668 // Functions to deal with special arrays of CFRange, CFDataRef, created by CFStringCreateArrayWithFindResults()
2670 static const void *__rangeRetain(CFAllocatorRef allocator
, const void *ptr
) {
2671 CFRetain(*(CFDataRef
*)((uint8_t *)ptr
+ sizeof(CFRange
)));
2675 static void __rangeRelease(CFAllocatorRef allocator
, const void *ptr
) {
2676 CFRelease(*(CFDataRef
*)((uint8_t *)ptr
+ sizeof(CFRange
)));
2679 static CFStringRef
__rangeCopyDescription(const void *ptr
) {
2680 CFRange range
= *(CFRange
*)ptr
;
2681 return CFStringCreateWithFormat(NULL
/* ??? allocator */, NULL
, CFSTR("{%d, %d}"), range
.location
, range
.length
);
2684 static Boolean
__rangeEqual(const void *ptr1
, const void *ptr2
) {
2685 CFRange range1
= *(CFRange
*)ptr1
;
2686 CFRange range2
= *(CFRange
*)ptr2
;
2687 return (range1
.location
== range2
.location
) && (range1
.length
== range2
.length
);
2691 CFArrayRef
CFStringCreateArrayWithFindResults(CFAllocatorRef alloc
, CFStringRef string
, CFStringRef stringToFind
, CFRange rangeToSearch
, CFOptionFlags compareOptions
) {
2693 Boolean backwards
= compareOptions
& kCFCompareBackwards
;
2694 UInt32 endIndex
= rangeToSearch
.location
+ rangeToSearch
.length
;
2695 CFMutableDataRef rangeStorage
= NULL
; // Basically an array of CFRange, CFDataRef (packed)
2696 uint8_t *rangeStorageBytes
= NULL
;
2697 CFIndex foundCount
= 0;
2698 CFIndex capacity
= 0; // Number of CFRange, CFDataRef element slots in rangeStorage
2700 if (alloc
== NULL
) alloc
= __CFGetDefaultAllocator();
2702 while ((rangeToSearch
.length
> 0) && CFStringFindWithOptions(string
, stringToFind
, rangeToSearch
, compareOptions
, &foundRange
)) {
2703 // Determine the next range
2705 rangeToSearch
.length
= foundRange
.location
- rangeToSearch
.location
;
2707 rangeToSearch
.location
= foundRange
.location
+ foundRange
.length
;
2708 rangeToSearch
.length
= endIndex
- rangeToSearch
.location
;
2711 // If necessary, grow the data and squirrel away the found range
2712 if (foundCount
>= capacity
) {
2713 if (rangeStorage
== NULL
) rangeStorage
= CFDataCreateMutable(alloc
, 0);
2714 capacity
= (capacity
+ 4) * 2;
2715 CFDataSetLength(rangeStorage
, capacity
* (sizeof(CFRange
) + sizeof(CFDataRef
)));
2716 rangeStorageBytes
= (uint8_t *)CFDataGetMutableBytePtr(rangeStorage
) + foundCount
* (sizeof(CFRange
) + sizeof(CFDataRef
));
2718 memmove(rangeStorageBytes
, &foundRange
, sizeof(CFRange
)); // The range
2719 memmove(rangeStorageBytes
+ sizeof(CFRange
), &rangeStorage
, sizeof(CFDataRef
)); // The data
2720 rangeStorageBytes
+= (sizeof(CFRange
) + sizeof(CFDataRef
));
2724 if (foundCount
> 0) {
2726 CFMutableArrayRef array
;
2727 const CFArrayCallBacks callbacks
= {0, __rangeRetain
, __rangeRelease
, __rangeCopyDescription
, __rangeEqual
};
2729 CFDataSetLength(rangeStorage
, foundCount
* (sizeof(CFRange
) + sizeof(CFDataRef
))); // Tighten storage up
2730 rangeStorageBytes
= (uint8_t *)CFDataGetMutableBytePtr(rangeStorage
);
2732 array
= CFArrayCreateMutable(alloc
, foundCount
* sizeof(CFRange
*), &callbacks
);
2733 for (cnt
= 0; cnt
< foundCount
; cnt
++) {
2734 // Each element points to the appropriate CFRange in the CFData
2735 CFArrayAppendValue(array
, rangeStorageBytes
+ cnt
* (sizeof(CFRange
) + sizeof(CFDataRef
)));
2737 CFRelease(rangeStorage
); // We want the data to go away when all CFRanges inside it are released...
2745 CFRange
CFStringFind(CFStringRef string
, CFStringRef stringToFind
, CFOptionFlags compareOptions
) {
2748 if (CFStringFindWithOptions(string
, stringToFind
, CFRangeMake(0, CFStringGetLength(string
)), compareOptions
, &foundRange
)) {
2751 return CFRangeMake(kCFNotFound
, 0);
2755 Boolean
CFStringHasPrefix(CFStringRef string
, CFStringRef prefix
) {
2756 return CFStringFindWithOptions(string
, prefix
, CFRangeMake(0, CFStringGetLength(string
)), kCFCompareAnchored
, NULL
);
2759 Boolean
CFStringHasSuffix(CFStringRef string
, CFStringRef suffix
) {
2760 return CFStringFindWithOptions(string
, suffix
, CFRangeMake(0, CFStringGetLength(string
)), kCFCompareAnchored
|kCFCompareBackwards
, NULL
);
2763 #define MAX_TRANSCODING_LENGTH 4
2765 #define HANGUL_JONGSEONG_COUNT (28)
2767 CF_INLINE
bool _CFStringIsHangulLVT(UTF32Char character
) {
2768 return (((character
- HANGUL_SYLLABLE_START
) % HANGUL_JONGSEONG_COUNT
) ? true : false);
2771 static uint8_t __CFTranscodingHintLength
[] = {
2772 2, 3, 4, 4, 4, 4, 4, 2, 2, 2, 2, 4, 0, 0, 0, 0
2776 kCFStringHangulStateL
,
2777 kCFStringHangulStateV
,
2778 kCFStringHangulStateT
,
2779 kCFStringHangulStateLV
,
2780 kCFStringHangulStateLVT
,
2781 kCFStringHangulStateBreak
2784 static CFRange
_CFStringInlineBufferGetComposedRange(CFStringInlineBuffer
*buffer
, CFIndex start
, CFStringCharacterClusterType type
, const uint8_t *nonBaseBMP
) {
2785 CFIndex end
= start
+ 1;
2786 const uint8_t *nonBase
= nonBaseBMP
;
2787 UTF32Char character
;
2788 UTF16Char otherSurrogate
;
2791 character
= CFStringGetCharacterFromInlineBuffer(buffer
, start
);
2794 // We don't combine characters in Armenian ~ Limbu range for backward deletion
2795 if ((type
!= kCFStringBackwardDeletionCluster
) || (character
< 0x0530) || (character
> 0x194F)) {
2796 // Check if the current is surrogate
2797 if (CFUniCharIsSurrogateHighCharacter(character
) && CFUniCharIsSurrogateLowCharacter((otherSurrogate
= CFStringGetCharacterFromInlineBuffer(buffer
, start
+ 1)))) {
2799 character
= CFUniCharGetLongCharacterForSurrogatePair(character
, otherSurrogate
);
2800 nonBase
= CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet
, (character
>> 16));
2805 if ((type
== kCFStringBackwardDeletionCluster
) && (character
>= 0x0530) && (character
< 0x1950)) break;
2807 if (character
< 0x10000) { // the first round could be already be non-BMP
2808 if (CFUniCharIsSurrogateLowCharacter(character
) && CFUniCharIsSurrogateHighCharacter((otherSurrogate
= CFStringGetCharacterFromInlineBuffer(buffer
, start
- 1)))) {
2809 character
= CFUniCharGetLongCharacterForSurrogatePair(otherSurrogate
, character
);
2810 nonBase
= CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet
, (character
>> 16));
2813 nonBase
= nonBaseBMP
;
2817 if (!CFUniCharIsMemberOfBitmap(character
, nonBase
) && (character
!= 0xFF9E) && (character
!= 0xFF9F) && ((character
& 0x1FFFF0) != 0xF870)) break;
2821 character
= CFStringGetCharacterFromInlineBuffer(buffer
, start
);
2826 if (((character
>= HANGUL_CHOSEONG_START
) && (character
<= HANGUL_JONGSEONG_END
)) || ((character
>= HANGUL_SYLLABLE_START
) && (character
<= HANGUL_SYLLABLE_END
))) {
2828 uint8_t initialState
;
2830 if (character
< HANGUL_JUNGSEONG_START
) {
2831 state
= kCFStringHangulStateL
;
2832 } else if (character
< HANGUL_JONGSEONG_START
) {
2833 state
= kCFStringHangulStateV
;
2834 } else if (character
< HANGUL_SYLLABLE_START
) {
2835 state
= kCFStringHangulStateT
;
2837 state
= (_CFStringIsHangulLVT(character
) ? kCFStringHangulStateLVT
: kCFStringHangulStateLV
);
2839 initialState
= state
;
2842 while (((character
= CFStringGetCharacterFromInlineBuffer(buffer
, start
- 1)) >= HANGUL_CHOSEONG_START
) && (character
<= HANGUL_SYLLABLE_END
) && ((character
<= HANGUL_JONGSEONG_END
) || (character
>= HANGUL_SYLLABLE_START
))) {
2844 case kCFStringHangulStateV
:
2845 if (character
<= HANGUL_CHOSEONG_END
) {
2846 state
= kCFStringHangulStateL
;
2847 } else if ((character
>= HANGUL_SYLLABLE_START
) && (character
<= HANGUL_SYLLABLE_END
) && !_CFStringIsHangulLVT(character
)) {
2848 state
= kCFStringHangulStateLV
;
2849 } else if (character
> HANGUL_JUNGSEONG_END
) {
2850 state
= kCFStringHangulStateBreak
;
2854 case kCFStringHangulStateT
:
2855 if ((character
>= HANGUL_JUNGSEONG_START
) && (character
<= HANGUL_JUNGSEONG_END
)) {
2856 state
= kCFStringHangulStateV
;
2857 } else if ((character
>= HANGUL_SYLLABLE_START
) && (character
<= HANGUL_SYLLABLE_END
)) {
2858 state
= (_CFStringIsHangulLVT(character
) ? kCFStringHangulStateLVT
: kCFStringHangulStateLV
);
2859 } else if (character
< HANGUL_JUNGSEONG_START
) {
2860 state
= kCFStringHangulStateBreak
;
2865 state
= ((character
< HANGUL_JUNGSEONG_START
) ? kCFStringHangulStateL
: kCFStringHangulStateBreak
);
2869 if (state
== kCFStringHangulStateBreak
) break;
2874 state
= initialState
;
2875 while (((character
= CFStringGetCharacterFromInlineBuffer(buffer
, end
)) > 0) && (((character
>= HANGUL_CHOSEONG_START
) && (character
<= HANGUL_JONGSEONG_END
)) || ((character
>= HANGUL_SYLLABLE_START
) && (character
<= HANGUL_SYLLABLE_END
)))) {
2877 case kCFStringHangulStateLV
:
2878 case kCFStringHangulStateV
:
2879 if ((character
>= HANGUL_JUNGSEONG_START
) && (character
<= HANGUL_JONGSEONG_END
)) {
2880 state
= ((character
< HANGUL_JONGSEONG_START
) ? kCFStringHangulStateV
: kCFStringHangulStateT
);
2882 state
= kCFStringHangulStateBreak
;
2886 case kCFStringHangulStateLVT
:
2887 case kCFStringHangulStateT
:
2888 state
= (((character
>= HANGUL_JONGSEONG_START
) && (character
<= HANGUL_JONGSEONG_END
)) ? kCFStringHangulStateT
: kCFStringHangulStateBreak
);
2892 if (character
< HANGUL_JUNGSEONG_START
) {
2893 state
= kCFStringHangulStateL
;
2894 } else if (character
< HANGUL_JONGSEONG_START
) {
2895 state
= kCFStringHangulStateV
;
2896 } else if (character
>= HANGUL_SYLLABLE_START
) {
2897 state
= (_CFStringIsHangulLVT(character
) ? kCFStringHangulStateLVT
: kCFStringHangulStateLV
);
2899 state
= kCFStringHangulStateBreak
;
2904 if (state
== kCFStringHangulStateBreak
) break;
2910 while ((character
= CFStringGetCharacterFromInlineBuffer(buffer
, end
)) > 0) {
2911 if ((type
== kCFStringBackwardDeletionCluster
) && (character
>= 0x0530) && (character
< 0x1950)) break;
2913 if (CFUniCharIsSurrogateHighCharacter(character
) && CFUniCharIsSurrogateLowCharacter((otherSurrogate
= CFStringGetCharacterFromInlineBuffer(buffer
, end
+ 1)))) {
2914 character
= CFUniCharGetLongCharacterForSurrogatePair(character
, otherSurrogate
);
2915 nonBase
= CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet
, (character
>> 16));
2918 nonBase
= nonBaseBMP
;
2922 if (!CFUniCharIsMemberOfBitmap(character
, nonBase
) && (character
!= 0xFF9E) && (character
!= 0xFF9F) && ((character
& 0x1FFFF0) != 0xF870)) break;
2927 return CFRangeMake(start
, end
- start
);
2930 CF_INLINE
bool _CFStringIsVirama(UTF32Char character
, const uint8_t *combClassBMP
) {
2931 return ((character
== COMBINING_GRAPHEME_JOINER
) || (CFUniCharGetCombiningPropertyForCharacter(character
, ((character
< 0x10000) ? combClassBMP
: CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty
, (character
>> 16)))) == 9) ? true : false);
2934 CFRange
CFStringGetRangeOfCharacterClusterAtIndex(CFStringRef string
, CFIndex charIndex
, CFStringCharacterClusterType type
) {
2936 CFIndex currentIndex
;
2937 CFIndex length
= CFStringGetLength(string
);
2938 CFStringInlineBuffer stringBuffer
;
2939 UTF32Char character
;
2940 UTF16Char otherSurrogate
;
2941 static const uint8_t *nonBaseBMP
= NULL
;
2942 static const uint8_t *letterBMP
= NULL
;
2943 static const uint8_t *combClassBMP
= NULL
;
2945 if (charIndex
>= length
) return CFRangeMake(kCFNotFound
, 0);
2947 /* Fast case. If we're eight-bit, it's either the default encoding is cheap or the content is all ASCII. Watch out when (or if) adding more 8bit Mac-scripts in CFStringEncodingConverters
2949 if (!CF_IS_OBJC(__kCFStringTypeID
, string
) && __CFStrIsEightBit(string
)) return CFRangeMake(charIndex
, 1);
2951 if (NULL
== nonBaseBMP
) {
2952 nonBaseBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet
, 0);
2953 letterBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet
, 0);
2954 combClassBMP
= CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty
, 0);
2957 CFStringInitInlineBuffer(string
, &stringBuffer
, CFRangeMake(0, length
));
2959 // Get composed character sequence first
2960 range
= _CFStringInlineBufferGetComposedRange(&stringBuffer
, charIndex
, type
, nonBaseBMP
);
2962 // Do grapheme joiners
2963 if (type
< kCFStringCursorMovementCluster
) {
2964 const uint8_t *letter
= letterBMP
;
2966 // Check to see if we have a letter at the beginning of initial cluster
2967 character
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, range
.location
);
2969 if ((range
.length
> 1) && CFUniCharIsSurrogateHighCharacter(character
) && CFUniCharIsSurrogateLowCharacter((otherSurrogate
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, range
.location
+ 1)))) {
2970 character
= CFUniCharGetLongCharacterForSurrogatePair(character
, otherSurrogate
);
2971 letter
= CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet
, (character
>> 16));
2974 if ((character
== ZERO_WIDTH_JOINER
) || CFUniCharIsMemberOfBitmap(character
, letter
)) {
2977 // Check if preceded by grapheme joiners (U034F and viramas)
2978 otherRange
.location
= currentIndex
= range
.location
;
2980 while (currentIndex
> 1) {
2981 character
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, --currentIndex
);
2983 // ??? We're assuming viramas only in BMP
2984 if ((_CFStringIsVirama(character
, combClassBMP
) || ((character
== ZERO_WIDTH_JOINER
) && _CFStringIsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer
, --currentIndex
), combClassBMP
))) && (currentIndex
> 0)) {
2990 currentIndex
= _CFStringInlineBufferGetComposedRange(&stringBuffer
, currentIndex
, type
, nonBaseBMP
).location
;
2992 character
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, currentIndex
);
2994 if (CFUniCharIsSurrogateLowCharacter(character
) && CFUniCharIsSurrogateHighCharacter((otherSurrogate
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, currentIndex
- 1)))) {
2995 character
= CFUniCharGetLongCharacterForSurrogatePair(character
, otherSurrogate
);
2996 letter
= CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet
, (character
>> 16));
3002 if (!CFUniCharIsMemberOfBitmap(character
, letter
)) break;
3003 range
.location
= currentIndex
;
3006 range
.length
+= otherRange
.location
- range
.location
;
3008 // Check if followed by grapheme joiners
3009 if ((range
.length
> 1) && ((range
.location
+ range
.length
) < length
)) {
3013 currentIndex
= otherRange
.location
+ otherRange
.length
;
3014 character
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, currentIndex
- 1);
3016 // ??? We're assuming viramas only in BMP
3017 if ((character
!= ZERO_WIDTH_JOINER
) && !_CFStringIsVirama(character
, combClassBMP
)) break;
3019 character
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, currentIndex
);
3021 if (character
== ZERO_WIDTH_JOINER
) character
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, ++currentIndex
);
3023 if (CFUniCharIsSurrogateHighCharacter(character
) && CFUniCharIsSurrogateLowCharacter((otherSurrogate
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, currentIndex
+ 1)))) {
3024 character
= CFUniCharGetLongCharacterForSurrogatePair(character
, otherSurrogate
);
3025 letter
= CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet
, (character
>> 16));
3030 // We only conjoin letters
3031 if (!CFUniCharIsMemberOfBitmap(character
, letter
)) break;
3032 otherRange
= _CFStringInlineBufferGetComposedRange(&stringBuffer
, currentIndex
, type
, nonBaseBMP
);
3033 } while ((otherRange
.location
+ otherRange
.length
) < length
);
3034 range
.length
= currentIndex
- range
.location
;
3039 // Check if we're part of prefix transcoding hints
3042 currentIndex
= (range
.location
+ range
.length
) - (MAX_TRANSCODING_LENGTH
+ 1);
3043 if (currentIndex
< 0) currentIndex
= 0;
3045 while (currentIndex
<= range
.location
) {
3046 character
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, currentIndex
);
3048 if ((character
& 0x1FFFF0) == 0xF860) { // transcoding hint
3049 otherIndex
= currentIndex
+ __CFTranscodingHintLength
[(character
- 0xF860)] + 1;
3050 if (otherIndex
>= (range
.location
+ range
.length
)) {
3051 if (otherIndex
<= length
) {
3052 range
.location
= currentIndex
;
3053 range
.length
= otherIndex
- currentIndex
;
3064 #if 1 /* Using the new implementation. Leaving the old implementation if'ed out for testing purposes for now */
3065 CFRange
CFStringGetRangeOfComposedCharactersAtIndex(CFStringRef theString
, CFIndex theIndex
) {
3066 return CFStringGetRangeOfCharacterClusterAtIndex(theString
, theIndex
, kCFStringComposedCharacterCluster
);
3070 @function CFStringGetRangeOfComposedCharactersAtIndex
3071 Returns the range of the composed character sequence at the specified index.
3072 @param theString The CFString which is to be searched. If this
3073 parameter is not a valid CFString, the behavior is
3075 @param theIndex The index of the character contained in the
3076 composed character sequence. If the index is
3077 outside the index space of the string (0 to N-1 inclusive,
3078 where N is the length of the string), the behavior is
3080 @result The range of the composed character sequence.
3082 #define ExtHighHalfZoneLow 0xD800
3083 #define ExtHighHalfZoneHigh 0xDBFF
3084 #define ExtLowHalfZoneLow 0xDC00
3085 #define ExtLowHalfZoneHigh 0xDFFF
3086 #define JunseongStart 0x1160
3087 #define JonseongEnd 0x11F9
3088 CF_INLINE Boolean
IsHighCode(UniChar X
) { return (X
>= ExtHighHalfZoneLow
&& X
<= ExtHighHalfZoneHigh
); }
3089 CF_INLINE Boolean
IsLowCode(UniChar X
) { return (X
>= ExtLowHalfZoneLow
&& X
<= ExtLowHalfZoneHigh
); }
3090 #define IsHangulConjoiningJamo(X) (X >= JunseongStart && X <= JonseongEnd)
3091 #define IsHalfwidthKanaVoicedMark(X) ((X == 0xFF9E) || (X == 0xFF9F))
3092 CF_INLINE Boolean
IsNonBaseChar(UniChar X
, CFCharacterSetRef nonBaseSet
) { return (CFCharacterSetIsCharacterMember(nonBaseSet
, X
) || IsHangulConjoiningJamo(X
) || IsHalfwidthKanaVoicedMark(X
) || (X
& 0x1FFFF0) == 0xF870); } // combining char, hangul jamo, or Apple corporate variant tag
3095 #define COMBINING_GRAPHEME_JOINER (0x034F)
3097 static CFCharacterSetRef nonBaseChars
= NULL
;
3098 static CFCharacterSetRef letterChars
= NULL
;
3099 static const void *__CFCombiningClassBMP
= NULL
;
3101 CF_INLINE
bool IsVirama(UTF32Char character
) {
3102 return ((character
== COMBINING_GRAPHEME_JOINER
) ? true : ((character
< 0x10000) && (CFUniCharGetCombiningPropertyForCharacter(character
, __CFCombiningClassBMP
) == 9) ? true : false));
3105 CFRange
CFStringGetRangeOfComposedCharactersAtIndex(CFStringRef theString
, CFIndex theIndex
) {
3106 CFIndex left
, current
, save
;
3107 CFIndex len
= CFStringGetLength(theString
);
3108 CFStringInlineBuffer stringBuffer
;
3109 static volatile Boolean _isInited
= false;
3111 if (theIndex
>= len
) return CFRangeMake(kCFNotFound
, 0);
3114 nonBaseChars
= CFCharacterSetGetPredefined(kCFCharacterSetNonBase
);
3115 letterChars
= CFCharacterSetGetPredefined(kCFCharacterSetLetter
);
3116 __CFCombiningClassBMP
= CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty
, 0);
3120 save
= current
= theIndex
;
3122 CFStringInitInlineBuffer(theString
, &stringBuffer
, CFRangeMake(0, len
));
3125 * First check for transcoding hints
3128 CFRange theRange
= (current
> MAX_TRANSCODING_LENGTH
? CFRangeMake(current
- MAX_TRANSCODING_LENGTH
, MAX_TRANSCODING_LENGTH
+ 1) : CFRangeMake(0, current
+ 1));
3130 // Should check the next loc ?
3131 if (current
+ 1 < len
) ++theRange
.length
;
3133 if (theRange
.length
> 1) {
3134 UniChar characterBuffer
[MAX_TRANSCODING_LENGTH
+ 2]; // Transcoding hint length + current loc + next loc
3136 if (stringBuffer
.directBuffer
) {
3137 memmove(characterBuffer
, stringBuffer
.directBuffer
+ theRange
.location
, theRange
.length
* sizeof(UniChar
));
3139 CFStringGetCharacters(theString
, theRange
, characterBuffer
);
3142 while (current
>= theRange
.location
) {
3143 if ((characterBuffer
[current
- theRange
.location
] & 0x1FFFF0) == 0xF860) {
3144 theRange
= CFRangeMake(current
, __CFTranscodingHintLength
[characterBuffer
[current
- theRange
.location
] - 0xF860] + 1);
3145 if ((theRange
.location
+ theRange
.length
) <= theIndex
) break;
3146 if ((theRange
.location
+ theRange
.length
) >= len
) theRange
.length
= len
- theRange
.location
;
3149 if (current
== 0) break;
3152 current
= theIndex
; // Reset current
3156 //#warning Aki 5/29/01 This does not support non-base chars in non-BMP planes (i.e. musical symbol combining stem in Unicode 3.1)
3158 * if we start NOT on a base, first move back to a base as appropriate.
3163 while ((current
> 0) && IsNonBaseChar(CFStringGetCharacterFromInlineBuffer(&stringBuffer
, current
), nonBaseChars
)) --current
;
3165 if (current
>= 1 && current
< len
&& CFCharacterSetIsCharacterMember(letterChars
, CFStringGetCharacterFromInlineBuffer(&stringBuffer
, current
)) && IsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer
, current
- 1))) {
3168 } else if ((current
>= 2) && (CFStringGetCharacterFromInlineBuffer(&stringBuffer
, current
- 1) == ZWJ
) && IsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer
, current
- 2))) {
3174 * Set the left position, then jump back to the saved original position.
3177 if (current
>= 1 && IsLowCode(CFStringGetCharacterFromInlineBuffer(&stringBuffer
, current
)) && IsHighCode(CFStringGetCharacterFromInlineBuffer(&stringBuffer
, current
- 1))) --current
;
3182 * Now, presume we are on a base; move forward & look for the next base.
3183 * Handle jumping over H/L codes.
3185 if (IsHighCode(CFStringGetCharacterFromInlineBuffer(&stringBuffer
, current
)) && (current
+ 1) < len
&& IsLowCode(CFStringGetCharacterFromInlineBuffer(&stringBuffer
, current
+ 1))) ++current
;
3190 if (current
< len
) {
3191 while (IsNonBaseChar(CFStringGetCharacterFromInlineBuffer(&stringBuffer
, current
), nonBaseChars
)) {
3193 if (current
>= len
) break;
3195 if ((current
< len
) && CFCharacterSetIsCharacterMember(letterChars
, CFStringGetCharacterFromInlineBuffer(&stringBuffer
, current
))) {
3196 if (IsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer
, current
- 1))) {
3197 ++current
; goto round2Again
;
3198 } else if ((current
>= 2) && (CFStringGetCharacterFromInlineBuffer(&stringBuffer
, current
- 1) == ZWJ
) && IsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer
, current
- 2))) {
3199 ++current
; goto round2Again
;
3204 * Now, "current" is a base, and "left" is a base.
3205 * The junk between had better contain "save"!
3207 if ((! (left
<= save
)) || (! (save
<= current
))) {
3208 CFLog(0, CFSTR("CFString: CFStringGetRangeOfComposedCharactersAtIndex:%d returned invalid\n"), save
);
3210 return CFRangeMake(left
, current
- left
);
3215 @function CFStringFindCharacterFromSet
3216 Query the range of characters contained in the specified character set.
3217 @param theString The CFString which is to be searched. If this
3218 parameter is not a valid CFString, the behavior is
3220 @param theSet The CFCharacterSet against which the membership
3221 of characters is checked. If this parameter is not a valid
3222 CFCharacterSet, the behavior is undefined.
3223 @param range The range of characters within the string to search. If
3224 the range location or end point (defined by the location
3225 plus length minus 1) are outside the index space of the
3226 string (0 to N-1 inclusive, where N is the length of the
3227 string), the behavior is undefined. If the range length is
3228 negative, the behavior is undefined. The range may be empty
3229 (length 0), in which case no search is performed.
3230 @param searchOptions The bitwise-or'ed option flags to control
3231 the search behavior. The supported options are
3232 kCFCompareBackwards andkCFCompareAnchored.
3233 If other option flags are specified, the behavior
3235 @param result The pointer to a CFRange supplied by the caller in
3236 which the search result is stored. If a pointer to an invalid
3237 memory is specified, the behavior is undefined.
3238 @result true, if at least a character which is a member of the character
3239 set is found and result is filled, otherwise, false.
3241 #define SURROGATE_START 0xD800
3242 #define SURROGATE_END 0xDFFF
3244 CF_EXPORT Boolean
CFStringFindCharacterFromSet(CFStringRef theString
, CFCharacterSetRef theSet
, CFRange rangeToSearch
, CFOptionFlags searchOptions
, CFRange
*result
) {
3245 CFStringInlineBuffer stringBuffer
;
3248 CFIndex fromLoc
, toLoc
, cnt
; // fromLoc and toLoc are inclusive
3249 Boolean found
= false;
3250 Boolean done
= false;
3252 //#warning FIX ME !! Should support kCFCompareNonliteral
3254 if ((rangeToSearch
.location
+ rangeToSearch
.length
> CFStringGetLength(theString
)) || (rangeToSearch
.length
== 0)) return false;
3256 if (searchOptions
& kCFCompareBackwards
) {
3257 fromLoc
= rangeToSearch
.location
+ rangeToSearch
.length
- 1;
3258 toLoc
= rangeToSearch
.location
;
3260 fromLoc
= rangeToSearch
.location
;
3261 toLoc
= rangeToSearch
.location
+ rangeToSearch
.length
- 1;
3263 if (searchOptions
& kCFCompareAnchored
) {
3267 step
= (fromLoc
<= toLoc
) ? 1 : -1;
3270 CFStringInitInlineBuffer(theString
, &stringBuffer
, rangeToSearch
);
3273 ch
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, cnt
- rangeToSearch
.location
);
3274 if ((ch
>= SURROGATE_START
) && (ch
<= SURROGATE_END
)) {
3275 int otherCharIndex
= cnt
+ step
;
3277 if (((step
< 0) && (otherCharIndex
< toLoc
)) || ((step
> 0) && (otherCharIndex
> toLoc
))) {
3281 UniChar lowChar
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, otherCharIndex
- rangeToSearch
.location
);
3283 if (cnt
< otherCharIndex
) {
3290 if (CFUniCharIsSurrogateHighCharacter(highChar
) && CFUniCharIsSurrogateLowCharacter(lowChar
) && CFCharacterSetIsLongCharacterMember(theSet
, CFUniCharGetLongCharacterForSurrogatePair(highChar
, lowChar
))) {
3291 if (result
) *result
= CFRangeMake((cnt
< otherCharIndex
? cnt
: otherCharIndex
), 2);
3293 } else if (otherCharIndex
== toLoc
) {
3296 cnt
= otherCharIndex
+ step
;
3299 } else if (CFCharacterSetIsCharacterMember(theSet
, ch
)) {
3300 done
= found
= true;
3301 } else if (cnt
== toLoc
) {
3308 if (found
&& result
) *result
= CFRangeMake(cnt
, 1);
3312 /* Line range code */
3314 #define CarriageReturn '\r' /* 0x0d */
3315 #define NewLine '\n' /* 0x0a */
3316 #define NextLine 0x0085
3317 #define LineSeparator 0x2028
3318 #define ParaSeparator 0x2029
3320 CF_INLINE Boolean
isALineSeparatorTypeCharacter(UniChar ch
) {
3321 if (ch
> CarriageReturn
&& ch
< NextLine
) return false; /* Quick test to cover most chars */
3322 return (ch
== NewLine
|| ch
== CarriageReturn
|| ch
== NextLine
|| ch
== LineSeparator
|| ch
== ParaSeparator
) ? true : false;
3325 void CFStringGetLineBounds(CFStringRef string
, CFRange range
, CFIndex
*lineBeginIndex
, CFIndex
*lineEndIndex
, CFIndex
*contentsEndIndex
) {
3327 CFStringInlineBuffer buf
;
3330 CF_OBJC_FUNCDISPATCH4(__kCFStringTypeID
, void, string
, "getLineStart:end:contentsEnd:forRange:", lineBeginIndex
, lineEndIndex
, contentsEndIndex
, CFRangeMake(range
.location
, range
.length
));
3332 __CFAssertIsString(string
);
3333 __CFAssertRangeIsInStringBounds(string
, range
.location
, range
.length
);
3335 len
= __CFStrLength(string
);
3337 if (lineBeginIndex
) {
3339 if (range
.location
== 0) {
3342 CFStringInitInlineBuffer(string
, &buf
, CFRangeMake(0, len
));
3343 CFIndex buf_idx
= range
.location
;
3345 /* Take care of the special case where start happens to fall right between \r and \n */
3346 ch
= CFStringGetCharacterFromInlineBuffer(&buf
, buf_idx
);
3348 if ((ch
== NewLine
) && (CFStringGetCharacterFromInlineBuffer(&buf
, buf_idx
) == CarriageReturn
)) {
3355 } else if (isALineSeparatorTypeCharacter(CFStringGetCharacterFromInlineBuffer(&buf
, buf_idx
))) {
3356 start
= buf_idx
+ 1;
3363 *lineBeginIndex
= start
;
3366 /* Now find the ending point */
3367 if (lineEndIndex
|| contentsEndIndex
) {
3368 CFIndex endOfContents
, lineSeparatorLength
= 1; /* 1 by default */
3369 CFStringInitInlineBuffer(string
, &buf
, CFRangeMake(0, len
));
3370 CFIndex buf_idx
= range
.location
+ range
.length
- (range
.length
? 1 : 0);
3371 /* First look at the last char in the range (if the range is zero length, the char after the range) to see if we're already on or within a end of line sequence... */
3372 ch
= __CFStringGetCharacterFromInlineBufferAux(&buf
, buf_idx
);
3373 if (ch
== NewLine
) {
3374 endOfContents
= buf_idx
;
3376 if (__CFStringGetCharacterFromInlineBufferAux(&buf
, buf_idx
) == CarriageReturn
) {
3377 lineSeparatorLength
= 2;
3382 if (isALineSeparatorTypeCharacter(ch
)) {
3383 endOfContents
= buf_idx
; /* This is actually end of contentsRange */
3384 buf_idx
++; /* OK for this to go past the end */
3385 if ((ch
== CarriageReturn
) && (__CFStringGetCharacterFromInlineBufferAux(&buf
, buf_idx
) == NewLine
)) {
3386 lineSeparatorLength
= 2;
3389 } else if (buf_idx
>= len
) {
3390 endOfContents
= len
;
3391 lineSeparatorLength
= 0;
3395 ch
= __CFStringGetCharacterFromInlineBufferAux(&buf
, buf_idx
);
3399 if (contentsEndIndex
) *contentsEndIndex
= endOfContents
;
3400 if (lineEndIndex
) *lineEndIndex
= endOfContents
+ lineSeparatorLength
;
3405 CFStringRef
CFStringCreateByCombiningStrings(CFAllocatorRef alloc
, CFArrayRef array
, CFStringRef separatorString
) {
3407 CFIndex separatorNumByte
;
3408 CFIndex stringCount
= CFArrayGetCount(array
);
3409 Boolean isSepCFString
= !CF_IS_OBJC(__kCFStringTypeID
, separatorString
);
3410 Boolean canBeEightbit
= isSepCFString
&& __CFStrIsEightBit(separatorString
);
3412 CFStringRef otherString
;
3415 const void *separatorContents
= NULL
;
3417 if (stringCount
== 0) {
3418 return CFStringCreateWithCharacters(alloc
, NULL
, 0);
3419 } else if (stringCount
== 1) {
3420 return CFStringCreateCopy(alloc
, CFArrayGetValueAtIndex(array
, 0));
3423 if (alloc
== NULL
) alloc
= __CFGetDefaultAllocator();
3425 numChars
= CFStringGetLength(separatorString
) * (stringCount
- 1);
3426 for (idx
= 0; idx
< stringCount
; idx
++) {
3427 otherString
= (CFStringRef
)CFArrayGetValueAtIndex(array
, idx
);
3428 numChars
+= CFStringGetLength(otherString
);
3429 // canBeEightbit is already false if the separator is an NSString...
3430 if (!CF_IS_OBJC(__kCFStringTypeID
, otherString
) && __CFStrIsUnicode(otherString
)) canBeEightbit
= false;
3433 bufPtr
= buffer
= CFAllocatorAllocate(alloc
, canBeEightbit
? ((numChars
+ 1) * sizeof(uint8_t)) : (numChars
* sizeof(UniChar
)), 0);
3434 if (__CFOASafe
) __CFSetLastAllocationEventName(buffer
, "CFString (store)");
3435 separatorNumByte
= CFStringGetLength(separatorString
) * (canBeEightbit
? sizeof(uint8_t) : sizeof(UniChar
));
3437 for (idx
= 0; idx
< stringCount
; idx
++) {
3438 if (idx
) { // add separator here unless first string
3439 if (separatorContents
) {
3440 memmove(bufPtr
, separatorContents
, separatorNumByte
);
3442 if (!isSepCFString
) { // NSString
3443 CFStringGetCharacters(separatorString
, CFRangeMake(0, CFStringGetLength(separatorString
)), (UniChar
*)bufPtr
);
3444 } else if (canBeEightbit
|| __CFStrIsUnicode(separatorString
)) {
3445 memmove(bufPtr
, (const uint8_t *)__CFStrContents(separatorString
) + __CFStrSkipAnyLengthByte(separatorString
), separatorNumByte
);
3447 __CFStrConvertBytesToUnicode((uint8_t*)__CFStrContents(separatorString
) + __CFStrSkipAnyLengthByte(separatorString
), (UniChar
*)bufPtr
, __CFStrLength(separatorString
));
3449 separatorContents
= bufPtr
;
3451 bufPtr
+= separatorNumByte
;
3454 otherString
= (CFStringRef
)CFArrayGetValueAtIndex(array
, idx
);
3455 if (CF_IS_OBJC(__kCFStringTypeID
, otherString
)) {
3456 CFIndex otherLength
= CFStringGetLength(otherString
);
3457 CFStringGetCharacters(otherString
, CFRangeMake(0, otherLength
), (UniChar
*)bufPtr
);
3458 bufPtr
+= otherLength
* sizeof(UniChar
);
3460 const uint8_t* otherContents
= __CFStrContents(otherString
);
3461 CFIndex otherNumByte
= __CFStrLength2(otherString
, otherContents
) * (canBeEightbit
? sizeof(uint8_t) : sizeof(UniChar
));
3463 if (canBeEightbit
|| __CFStrIsUnicode(otherString
)) {
3464 memmove(bufPtr
, otherContents
+ __CFStrSkipAnyLengthByte(otherString
), otherNumByte
);
3466 __CFStrConvertBytesToUnicode(otherContents
+ __CFStrSkipAnyLengthByte(otherString
), (UniChar
*)bufPtr
, __CFStrLength2(otherString
, otherContents
));
3468 bufPtr
+= otherNumByte
;
3471 if (canBeEightbit
) *bufPtr
= 0; // NULL byte;
3473 return canBeEightbit
?
3474 CFStringCreateWithCStringNoCopy(alloc
, buffer
, __CFStringGetEightBitStringEncoding(), alloc
) :
3475 CFStringCreateWithCharactersNoCopy(alloc
, buffer
, numChars
, alloc
);
3479 CFArrayRef
CFStringCreateArrayBySeparatingStrings(CFAllocatorRef alloc
, CFStringRef string
, CFStringRef separatorString
) {
3480 CFArrayRef separatorRanges
;
3481 CFIndex length
= CFStringGetLength(string
);
3482 /* No objc dispatch needed here since CFStringCreateArrayWithFindResults() works with both CFString and NSString */
3483 if (!(separatorRanges
= CFStringCreateArrayWithFindResults(alloc
, string
, separatorString
, CFRangeMake(0, length
), 0))) {
3484 return CFArrayCreate(alloc
, (const void**)&string
, 1, & kCFTypeArrayCallBacks
);
3487 CFIndex count
= CFArrayGetCount(separatorRanges
);
3488 CFIndex startIndex
= 0;
3490 CFMutableArrayRef array
= CFArrayCreateMutable(alloc
, count
+ 2, & kCFTypeArrayCallBacks
);
3491 const CFRange
*currentRange
;
3492 CFStringRef substring
;
3494 for (idx
= 0;idx
< count
;idx
++) {
3495 currentRange
= CFArrayGetValueAtIndex(separatorRanges
, idx
);
3496 numChars
= currentRange
->location
- startIndex
;
3497 substring
= CFStringCreateWithSubstring(alloc
, string
, CFRangeMake(startIndex
, numChars
));
3498 CFArrayAppendValue(array
, substring
);
3499 CFRelease(substring
);
3500 startIndex
= currentRange
->location
+ currentRange
->length
;
3502 substring
= CFStringCreateWithSubstring(alloc
, string
, CFRangeMake(startIndex
, length
- startIndex
));
3503 CFArrayAppendValue(array
, substring
);
3504 CFRelease(substring
);
3506 CFRelease(separatorRanges
);
3512 CFStringRef
CFStringCreateFromExternalRepresentation(CFAllocatorRef alloc
, CFDataRef data
, CFStringEncoding encoding
) {
3513 return CFStringCreateWithBytes(alloc
, CFDataGetBytePtr(data
), CFDataGetLength(data
), encoding
, true);
3517 CFDataRef
CFStringCreateExternalRepresentation(CFAllocatorRef alloc
, CFStringRef string
, CFStringEncoding encoding
, uint8_t lossByte
) {
3519 CFIndex guessedByteLength
;
3524 if (CF_IS_OBJC(__kCFStringTypeID
, string
)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
3525 length
= CFStringGetLength(string
);
3527 __CFAssertIsString(string
);
3528 length
= __CFStrLength(string
);
3529 if (__CFStrIsEightBit(string
) && ((__CFStringGetEightBitStringEncoding() == encoding
) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII
&& __CFStringEncodingIsSupersetOfASCII(encoding
)))) { // Requested encoding is equal to the encoding in string
3530 return CFDataCreate(alloc
, ((char *)__CFStrContents(string
) + __CFStrSkipAnyLengthByte(string
)), __CFStrLength(string
));
3534 if (alloc
== NULL
) alloc
= __CFGetDefaultAllocator();
3536 if (encoding
== kCFStringEncodingUnicode
) {
3537 guessedByteLength
= (length
+ 1) * sizeof(UniChar
);
3538 } else if (((guessedByteLength
= CFStringGetMaximumSizeForEncoding(length
, encoding
)) > length
) && !CF_IS_OBJC(__kCFStringTypeID
, string
)) { // Multi byte encoding
3539 #if defined(__MACH__) || defined(__LINUX__) || defined(__FREEBSD__)
3540 if (__CFStrIsUnicode(string
)) {
3541 guessedByteLength
= CFStringEncodingByteLengthForCharacters(encoding
, kCFStringEncodingPrependBOM
, __CFStrContents(string
), __CFStrLength(string
));
3544 result
= __CFStringEncodeByteStream(string
, 0, length
, true, encoding
, lossByte
, NULL
, 0x7FFFFFFF, &guessedByteLength
);
3545 // if result == length, we always succeed
3546 // otherwise, if result == 0, we fail
3547 // otherwise, if there was a lossByte but still result != length, we fail
3548 if ((result
!= length
) && (!result
|| !lossByte
)) return NULL
;
3549 if (guessedByteLength
== length
&& __CFStrIsEightBit(string
) && __CFStringEncodingIsSupersetOfASCII(encoding
)) { // It's all ASCII !!
3550 return CFDataCreate(alloc
, ((char *)__CFStrContents(string
) + __CFStrSkipAnyLengthByte(string
)), __CFStrLength(string
));
3552 #if defined(__MACH__) || defined(__LINUX__) || defined(__FREEBSD__)
3556 bytes
= CFAllocatorAllocate(alloc
, guessedByteLength
, 0);
3557 if (__CFOASafe
) __CFSetLastAllocationEventName(bytes
, "CFData (store)");
3559 result
= __CFStringEncodeByteStream(string
, 0, length
, true, encoding
, lossByte
, bytes
, guessedByteLength
, &usedLength
);
3561 if ((result
!= length
) && (!result
|| !lossByte
)) { // see comment above about what this means
3562 CFAllocatorDeallocate(alloc
, bytes
);
3566 return CFDataCreateWithBytesNoCopy(alloc
, (char const *)bytes
, usedLength
, alloc
);
3570 CFStringEncoding
CFStringGetSmallestEncoding(CFStringRef str
) {
3572 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID
, CFStringEncoding
, str
, "_smallestEncodingInCFStringEncoding");
3573 __CFAssertIsString(str
);
3575 if (__CFStrIsEightBit(str
)) return __CFStringGetEightBitStringEncoding();
3576 len
= __CFStrLength(str
);
3577 if (__CFStringEncodeByteStream(str
, 0, len
, false, __CFStringGetEightBitStringEncoding(), 0, NULL
, 0x7fffffff, NULL
) == len
) return __CFStringGetEightBitStringEncoding();
3578 if ((__CFStringGetEightBitStringEncoding() != __CFStringGetSystemEncoding()) && (__CFStringEncodeByteStream(str
, 0, len
, false, __CFStringGetSystemEncoding(), 0, NULL
, 0x7fffffff, NULL
) == len
)) return __CFStringGetSystemEncoding();
3579 return kCFStringEncodingUnicode
; /* ??? */
3583 CFStringEncoding
CFStringGetFastestEncoding(CFStringRef str
) {
3584 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID
, CFStringEncoding
, str
, "_fastestEncodingInCFStringEncoding");
3585 __CFAssertIsString(str
);
3586 return __CFStrIsEightBit(str
) ? __CFStringGetEightBitStringEncoding() : kCFStringEncodingUnicode
; /* ??? */
3590 SInt32
CFStringGetIntValue(CFStringRef str
) {
3594 CFStringInlineBuffer buf
;
3595 CFStringInitInlineBuffer(str
, &buf
, CFRangeMake(0, CFStringGetLength(str
)));
3596 success
= __CFStringScanInteger(&buf
, NULL
, &idx
, false, &result
);
3597 return success
? result
: 0;
3601 double CFStringGetDoubleValue(CFStringRef str
) {
3605 CFStringInlineBuffer buf
;
3606 CFStringInitInlineBuffer(str
, &buf
, CFRangeMake(0, CFStringGetLength(str
)));
3607 success
= __CFStringScanDouble(&buf
, NULL
, &idx
, &result
);
3608 return success
? result
: 0.0;
3612 /*** Mutable functions... ***/
3614 void CFStringSetExternalCharactersNoCopy(CFMutableStringRef string
, UniChar
*chars
, CFIndex length
, CFIndex capacity
) {
3615 __CFAssertIsNotNegative(length
);
3616 __CFAssertIsStringAndExternalMutable(string
);
3617 CFAssert4((length
<= capacity
) && ((capacity
== 0) || ((capacity
> 0) && chars
)), __kCFLogAssertion
, "%s(): Invalid args: characters %p length %d capacity %d", __PRETTY_FUNCTION__
, chars
, length
, capacity
);
3618 __CFStrSetContentPtr(string
, chars
);
3619 __CFStrSetExplicitLength(string
, length
);
3620 __CFStrSetCapacity(string
, capacity
* sizeof(UniChar
));
3621 __CFStrSetCapacityProvidedExternally(string
);
3626 void CFStringInsert(CFMutableStringRef str
, CFIndex idx
, CFStringRef insertedStr
) {
3627 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID
, void, str
, "insertString:atIndex:", insertedStr
, idx
);
3628 __CFAssertIsStringAndMutable(str
);
3629 CFAssert3(idx
>= 0 && idx
<= __CFStrLength(str
), __kCFLogAssertion
, "%s(): string index %d out of bounds (length %d)", __PRETTY_FUNCTION__
, idx
, __CFStrLength(str
));
3630 __CFStringReplace(str
, CFRangeMake(idx
, 0), insertedStr
);
3634 void CFStringDelete(CFMutableStringRef str
, CFRange range
) {
3635 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID
, void, str
, "deleteCharactersInRange:", range
);
3636 __CFAssertIsStringAndMutable(str
);
3637 __CFAssertRangeIsInStringBounds(str
, range
.location
, range
.length
);
3638 __CFStringChangeSize(str
, range
, 0, false);
3642 void CFStringReplace(CFMutableStringRef str
, CFRange range
, CFStringRef replacement
) {
3643 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID
, void, str
, "replaceCharactersInRange:withString:", range
, replacement
);
3644 __CFAssertIsStringAndMutable(str
);
3645 __CFAssertRangeIsInStringBounds(str
, range
.location
, range
.length
);
3646 __CFStringReplace(str
, range
, replacement
);
3650 void CFStringReplaceAll(CFMutableStringRef str
, CFStringRef replacement
) {
3651 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID
, void, str
, "setString:", replacement
);
3652 __CFAssertIsStringAndMutable(str
);
3653 __CFStringReplace(str
, CFRangeMake(0, __CFStrLength(str
)), replacement
);
3657 void CFStringAppend(CFMutableStringRef str
, CFStringRef appended
) {
3658 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID
, void, str
, "appendString:", appended
);
3659 __CFAssertIsStringAndMutable(str
);
3660 __CFStringReplace(str
, CFRangeMake(__CFStrLength(str
), 0), appended
);
3664 void CFStringAppendCharacters(CFMutableStringRef str
, const UniChar
*chars
, CFIndex appendedLength
) {
3665 CFIndex strLength
, idx
;
3667 __CFAssertIsNotNegative(appendedLength
);
3669 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID
, void, str
, "appendCharacters:length:", chars
, appendedLength
);
3671 __CFAssertIsStringAndMutable(str
);
3673 strLength
= __CFStrLength(str
);
3674 if (__CFStringGetCompatibility(Bug2967272
) || __CFStrIsUnicode(str
)) {
3675 __CFStringChangeSize(str
, CFRangeMake(strLength
, 0), appendedLength
, true);
3676 memmove((UniChar
*)__CFStrContents(str
) + strLength
, chars
, appendedLength
* sizeof(UniChar
));
3679 bool isASCII
= true;
3680 for (idx
= 0; isASCII
&& idx
< appendedLength
; idx
++) isASCII
= (chars
[idx
] < 0x80);
3681 __CFStringChangeSize(str
, CFRangeMake(strLength
, 0), appendedLength
, !isASCII
);
3683 memmove((UniChar
*)__CFStrContents(str
) + strLength
, chars
, appendedLength
* sizeof(UniChar
));
3685 contents
= (uint8_t *)__CFStrContents(str
) + strLength
+ __CFStrSkipAnyLengthByte(str
);
3686 for (idx
= 0; idx
< appendedLength
; idx
++) contents
[idx
] = (uint8_t)chars
[idx
];
3692 static void __CFStringAppendBytes(CFMutableStringRef str
, const char *cStr
, CFIndex appendedLength
, CFStringEncoding encoding
) {
3693 Boolean appendedIsUnicode
= false;
3694 Boolean freeCStrWhenDone
= false;
3695 Boolean demoteAppendedUnicode
= false;
3696 CFVarWidthCharBuffer vBuf
;
3698 __CFAssertIsNotNegative(appendedLength
);
3700 if (encoding
== kCFStringEncodingASCII
|| encoding
== __CFStringGetEightBitStringEncoding()) {
3701 // appendedLength now denotes length in UniChars
3702 } else if (encoding
== kCFStringEncodingUnicode
) {
3703 UniChar
*chars
= (UniChar
*)cStr
;
3704 CFIndex idx
, length
= appendedLength
/ sizeof(UniChar
);
3705 bool isASCII
= true;
3706 for (idx
= 0; isASCII
&& idx
< length
; idx
++) isASCII
= (chars
[idx
] < 0x80);
3708 appendedIsUnicode
= true;
3710 demoteAppendedUnicode
= true;
3712 appendedLength
= length
;
3714 Boolean usingPassedInMemory
= false;
3716 vBuf
.allocator
= __CFGetDefaultAllocator(); // We don't want to use client's allocator for temp stuff
3717 vBuf
.chars
.unicode
= NULL
; // This will cause the decode function to allocate memory if necessary
3719 if (!__CFStringDecodeByteStream3(cStr
, appendedLength
, encoding
, __CFStrIsUnicode(str
), &vBuf
, &usingPassedInMemory
, 0)) {
3720 CFAssert1(0, __kCFLogAssertion
, "Supplied bytes could not be converted specified encoding %d", encoding
);
3724 // If not ASCII, appendedLength now denotes length in UniChars
3725 appendedLength
= vBuf
.numChars
;
3726 appendedIsUnicode
= !vBuf
.isASCII
;
3727 cStr
= vBuf
.chars
.ascii
;
3728 freeCStrWhenDone
= !usingPassedInMemory
&& vBuf
.shouldFreeChars
;
3731 if (CF_IS_OBJC(__kCFStringTypeID
, str
)) {
3732 if (!appendedIsUnicode
&& !demoteAppendedUnicode
) {
3733 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID
, void, str
, "_cfAppendCString:length:", cStr
, appendedLength
);
3735 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID
, void, str
, "appendCharacters:length:", cStr
, appendedLength
);
3739 __CFAssertIsStringAndMutable(str
);
3740 strLength
= __CFStrLength(str
);
3742 __CFStringChangeSize(str
, CFRangeMake(strLength
, 0), appendedLength
, appendedIsUnicode
|| __CFStrIsUnicode(str
));
3744 if (__CFStrIsUnicode(str
)) {
3745 UniChar
*contents
= (UniChar
*)__CFStrContents(str
);
3746 if (appendedIsUnicode
) {
3747 memmove(contents
+ strLength
, cStr
, appendedLength
* sizeof(UniChar
));
3749 __CFStrConvertBytesToUnicode(cStr
, contents
+ strLength
, appendedLength
);
3752 if (demoteAppendedUnicode
) {
3753 UniChar
*chars
= (UniChar
*)cStr
;
3755 uint8_t *contents
= (uint8_t *)__CFStrContents(str
) + strLength
+ __CFStrSkipAnyLengthByte(str
);
3756 for (idx
= 0; idx
< appendedLength
; idx
++) contents
[idx
] = (uint8_t)chars
[idx
];
3758 uint8_t *contents
= (uint8_t *)__CFStrContents(str
);
3759 memmove(contents
+ strLength
+ __CFStrSkipAnyLengthByte(str
), cStr
, appendedLength
);
3764 if (freeCStrWhenDone
) CFAllocatorDeallocate(__CFGetDefaultAllocator(), (void *)cStr
);
3767 void CFStringAppendPascalString(CFMutableStringRef str
, ConstStringPtr pStr
, CFStringEncoding encoding
) {
3768 __CFStringAppendBytes(str
, pStr
+ 1, (CFIndex
)*pStr
, encoding
);
3771 void CFStringAppendCString(CFMutableStringRef str
, const char *cStr
, CFStringEncoding encoding
) {
3772 __CFStringAppendBytes(str
, cStr
, strlen(cStr
), encoding
);
3776 void CFStringAppendFormat(CFMutableStringRef str
, CFDictionaryRef formatOptions
, CFStringRef format
, ...) {
3779 va_start(argList
, format
);
3780 CFStringAppendFormatAndArguments(str
, formatOptions
, format
, argList
);
3785 CFIndex
CFStringFindAndReplace(CFMutableStringRef string
, CFStringRef stringToFind
, CFStringRef replacementString
, CFRange rangeToSearch
, CFOptionFlags compareOptions
) {
3787 Boolean backwards
= compareOptions
& kCFCompareBackwards
;
3788 UInt32 endIndex
= rangeToSearch
.location
+ rangeToSearch
.length
;
3789 #define MAX_RANGES_ON_STACK (1000 / sizeof(CFRange))
3790 CFRange rangeBuffer
[MAX_RANGES_ON_STACK
]; // Used to avoid allocating memory
3791 CFRange
*ranges
= rangeBuffer
;
3792 CFIndex foundCount
= 0;
3793 CFIndex capacity
= MAX_RANGES_ON_STACK
;
3795 __CFAssertIsStringAndMutable(string
);
3796 __CFAssertRangeIsInStringBounds(string
, rangeToSearch
.location
, rangeToSearch
.length
);
3798 // Note: This code is very similar to the one in CFStringCreateArrayWithFindResults().
3799 while ((rangeToSearch
.length
> 0) && CFStringFindWithOptions(string
, stringToFind
, rangeToSearch
, compareOptions
, &foundRange
)) {
3800 // Determine the next range
3802 rangeToSearch
.length
= foundRange
.location
- rangeToSearch
.location
;
3804 rangeToSearch
.location
= foundRange
.location
+ foundRange
.length
;
3805 rangeToSearch
.length
= endIndex
- rangeToSearch
.location
;
3808 // If necessary, grow the array
3809 if (foundCount
>= capacity
) {
3810 bool firstAlloc
= (ranges
== rangeBuffer
) ? true : false;
3811 capacity
= (capacity
+ 4) * 2;
3812 // Note that reallocate with NULL previous pointer is same as allocate
3813 ranges
= CFAllocatorReallocate(NULL
, firstAlloc
? NULL
: ranges
, capacity
* sizeof(CFRange
), 0);
3814 if (firstAlloc
) memmove(ranges
, rangeBuffer
, MAX_RANGES_ON_STACK
* sizeof(CFRange
));
3816 ranges
[foundCount
] = foundRange
;
3820 if (foundCount
> 0) {
3821 if (backwards
) { // Reorder the ranges to be incrementing (better to do this here, then to check other places)
3823 int tail
= foundCount
- 1;
3824 while (head
< tail
) {
3825 CFRange temp
= ranges
[head
];
3826 ranges
[head
] = ranges
[tail
];
3827 ranges
[tail
] = temp
;
3832 __CFStringReplaceMultiple(string
, ranges
, foundCount
, replacementString
);
3833 if (ranges
!= rangeBuffer
) CFAllocatorDeallocate(NULL
, ranges
);
3840 // This function is here for NSString purposes
3841 // It allows checking for mutability before mutating; this allows NSString to catch invalid mutations
3843 int __CFStringCheckAndReplace(CFMutableStringRef str
, CFRange range
, CFStringRef replacement
) {
3844 if (!__CFStrIsMutable(str
)) return _CFStringErrNotMutable
; // These three ifs are always here, for NSString usage
3845 if (!replacement
&& __CFStringNoteErrors()) return _CFStringErrNilArg
;
3846 // We use unsigneds as that is what NSRanges do; we use uint64_t do make sure the sum doesn't wrap (otherwise we'd need to do 3 separate checks). This allows catching bad ranges as described in 3375535. (-1,1)
3847 if (((uint64_t)((unsigned)range
.location
)) + ((uint64_t)((unsigned)range
.length
)) > (uint64_t)__CFStrLength(str
) && __CFStringNoteErrors()) return _CFStringErrBounds
;
3848 __CFAssertIsStringAndMutable(str
);
3849 __CFAssertRangeIsInStringBounds(str
, range
.location
, range
.length
);
3850 __CFStringReplace(str
, range
, replacement
);
3851 return _CFStringErrNone
;
3854 // This function determines whether errors which would cause string exceptions should
3855 // be ignored or not
3857 Boolean
__CFStringNoteErrors(void) {
3858 return _CFExecutableLinkedOnOrAfter(CFSystemVersionJaguar
) ? true : false;
3863 void CFStringPad(CFMutableStringRef string
, CFStringRef padString
, CFIndex length
, CFIndex indexIntoPad
) {
3864 CFIndex originalLength
;
3866 __CFAssertIsNotNegative(length
);
3867 __CFAssertIsNotNegative(indexIntoPad
);
3869 CF_OBJC_FUNCDISPATCH3(__kCFStringTypeID
, void, string
, "_cfPad:length:padIndex:", padString
, length
, indexIntoPad
);
3871 __CFAssertIsStringAndMutable(string
);
3873 originalLength
= __CFStrLength(string
);
3874 if (length
< originalLength
) {
3875 __CFStringChangeSize(string
, CFRangeMake(length
, originalLength
- length
), 0, false);
3876 } else if (originalLength
< length
) {
3880 CFIndex padStringLength
;
3882 CFIndex padRemaining
= length
- originalLength
;
3884 if (CF_IS_OBJC(__kCFStringTypeID
, padString
)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
3885 padStringLength
= CFStringGetLength(padString
);
3886 isUnicode
= true; /* !!! Bad for now */
3888 __CFAssertIsString(padString
);
3889 padStringLength
= __CFStrLength(padString
);
3890 isUnicode
= __CFStrIsUnicode(string
) || __CFStrIsUnicode(padString
);
3893 charSize
= isUnicode
? sizeof(UniChar
) : sizeof(uint8_t);
3895 __CFStringChangeSize(string
, CFRangeMake(originalLength
, 0), padRemaining
, isUnicode
);
3897 contents
= (uint8_t*)__CFStrContents(string
) + charSize
* originalLength
+ __CFStrSkipAnyLengthByte(string
);
3898 padLength
= padStringLength
- indexIntoPad
;
3899 padLength
= padRemaining
< padLength
? padRemaining
: padLength
;
3901 while (padRemaining
> 0) {
3903 CFStringGetCharacters(padString
, CFRangeMake(indexIntoPad
, padLength
), (UniChar
*)contents
);
3905 CFStringGetBytes(padString
, CFRangeMake(indexIntoPad
, padLength
), __CFStringGetEightBitStringEncoding(), 0, false, contents
, padRemaining
* charSize
, NULL
);
3907 contents
+= padLength
* charSize
;
3908 padRemaining
-= padLength
;
3910 padLength
= padRemaining
< padLength
? padRemaining
: padStringLength
;
3915 void CFStringTrim(CFMutableStringRef string
, CFStringRef trimString
) {
3917 CFIndex newStartIndex
;
3920 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID
, void, string
, "_cfTrim:", trimString
);
3922 __CFAssertIsStringAndMutable(string
);
3923 __CFAssertIsString(trimString
);
3926 length
= __CFStrLength(string
);
3928 while (CFStringFindWithOptions(string
, trimString
, CFRangeMake(newStartIndex
, length
- newStartIndex
), kCFCompareAnchored
, &range
)) {
3929 newStartIndex
= range
.location
+ range
.length
;
3932 if (newStartIndex
< length
) {
3933 CFIndex charSize
= __CFStrIsUnicode(string
) ? sizeof(UniChar
) : sizeof(uint8_t);
3934 uint8_t *contents
= (uint8_t*)__CFStrContents(string
) + __CFStrSkipAnyLengthByte(string
);
3936 length
-= newStartIndex
;
3937 if (__CFStrLength(trimString
) < length
) {
3938 while (CFStringFindWithOptions(string
, trimString
, CFRangeMake(newStartIndex
, length
), kCFCompareAnchored
|kCFCompareBackwards
, &range
)) {
3939 length
= range
.location
- newStartIndex
;
3942 memmove(contents
, contents
+ newStartIndex
* charSize
, length
* charSize
);
3943 __CFStringChangeSize(string
, CFRangeMake(length
, __CFStrLength(string
) - length
), 0, false);
3944 } else { // Only trimString in string, trim all
3945 __CFStringChangeSize(string
, CFRangeMake(0, length
), 0, false);
3949 void CFStringTrimWhitespace(CFMutableStringRef string
) {
3950 CFIndex newStartIndex
;
3952 CFStringInlineBuffer buffer
;
3954 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID
, void, string
, "_cfTrimWS");
3956 __CFAssertIsStringAndMutable(string
);
3959 length
= __CFStrLength(string
);
3961 CFStringInitInlineBuffer(string
, &buffer
, CFRangeMake(0, length
));
3962 CFIndex buffer_idx
= 0;
3964 while (buffer_idx
< length
&& CFUniCharIsMemberOf(__CFStringGetCharacterFromInlineBufferQuick(&buffer
, buffer_idx
), kCFUniCharWhitespaceAndNewlineCharacterSet
))
3966 newStartIndex
= buffer_idx
;
3968 if (newStartIndex
< length
) {
3969 uint8_t *contents
= (uint8_t*)__CFStrContents(string
) + __CFStrSkipAnyLengthByte(string
);
3970 CFIndex charSize
= (__CFStrIsUnicode(string
) ? sizeof(UniChar
) : sizeof(uint8_t));
3972 buffer_idx
= length
- 1;
3973 while (0 <= buffer_idx
&& CFUniCharIsMemberOf(__CFStringGetCharacterFromInlineBufferQuick(&buffer
, buffer_idx
), kCFUniCharWhitespaceAndNewlineCharacterSet
))
3975 length
= buffer_idx
- newStartIndex
+ 1;
3977 memmove(contents
, contents
+ newStartIndex
* charSize
, length
* charSize
);
3978 __CFStringChangeSize(string
, CFRangeMake(length
, __CFStrLength(string
) - length
), 0, false);
3979 } else { // Whitespace only string
3980 __CFStringChangeSize(string
, CFRangeMake(0, length
), 0, false);
3984 void CFStringLowercase(CFMutableStringRef string
, CFLocaleRef locale
) {
3985 CFIndex currentIndex
= 0;
3987 const char *langCode
;
3988 Boolean isEightBit
= __CFStrIsEightBit(string
);
3990 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID
, void, string
, "_cfLowercase:", locale
);
3992 __CFAssertIsStringAndMutable(string
);
3994 length
= __CFStrLength(string
);
3996 langCode
= (_CFCanUseLocale(locale
) ? _CFStrGetLanguageIdentifierForLocale(locale
) : NULL
);
3998 if (!langCode
&& isEightBit
) {
3999 uint8_t *contents
= (uint8_t*)__CFStrContents(string
) + __CFStrSkipAnyLengthByte(string
);
4000 for (;currentIndex
< length
;currentIndex
++) {
4001 if (contents
[currentIndex
] >= 'A' && contents
[currentIndex
] <= 'Z') {
4002 contents
[currentIndex
] += 'a' - 'A';
4003 } else if (contents
[currentIndex
] > 127) {
4009 if (currentIndex
< length
) {
4011 UniChar mappedCharacters
[MAX_CASE_MAPPING_BUF
];
4012 CFIndex mappedLength
;
4013 UTF32Char currentChar
;
4016 if (isEightBit
) __CFStringChangeSize(string
, CFRangeMake(0, 0), 0, true);
4018 contents
= (UniChar
*)__CFStrContents(string
);
4020 for (;currentIndex
< length
;currentIndex
++) {
4022 if (CFUniCharIsSurrogateHighCharacter(contents
[currentIndex
]) && (currentIndex
+ 1 < length
) && CFUniCharIsSurrogateLowCharacter(contents
[currentIndex
+ 1])) {
4023 currentChar
= CFUniCharGetLongCharacterForSurrogatePair(contents
[currentIndex
], contents
[currentIndex
+ 1]);
4025 currentChar
= contents
[currentIndex
];
4027 flags
= ((langCode
|| (currentChar
== 0x03A3)) ? CFUniCharGetConditionalCaseMappingFlags(currentChar
, contents
, currentIndex
, length
, kCFUniCharToLowercase
, langCode
, flags
) : 0);
4029 mappedLength
= CFUniCharMapCaseTo(currentChar
, mappedCharacters
, MAX_CASE_MAPPING_BUF
, kCFUniCharToLowercase
, flags
, langCode
);
4030 if (mappedLength
> 0) contents
[currentIndex
] = *mappedCharacters
;
4032 if (currentChar
> 0xFFFF) { // Non-BMP char
4033 switch (mappedLength
) {
4035 __CFStringChangeSize(string
, CFRangeMake(currentIndex
, 2), 0, true);
4036 contents
= (UniChar
*)__CFStrContents(string
);
4041 __CFStringChangeSize(string
, CFRangeMake(currentIndex
+ 1, 1), 0, true);
4042 contents
= (UniChar
*)__CFStrContents(string
);
4047 contents
[++currentIndex
] = mappedCharacters
[1];
4051 --mappedLength
; // Skip the current char
4052 __CFStringChangeSize(string
, CFRangeMake(currentIndex
+ 1, 0), mappedLength
- 1, true);
4053 contents
= (UniChar
*)__CFStrContents(string
);
4054 memmove(contents
+ currentIndex
+ 1, mappedCharacters
+ 1, mappedLength
* sizeof(UniChar
));
4055 length
+= (mappedLength
- 1);
4056 currentIndex
+= mappedLength
;
4059 } else if (mappedLength
== 0) {
4060 __CFStringChangeSize(string
, CFRangeMake(currentIndex
, 1), 0, true);
4061 contents
= (UniChar
*)__CFStrContents(string
);
4063 } else if (mappedLength
> 1) {
4064 --mappedLength
; // Skip the current char
4065 __CFStringChangeSize(string
, CFRangeMake(currentIndex
+ 1, 0), mappedLength
, true);
4066 contents
= (UniChar
*)__CFStrContents(string
);
4067 memmove(contents
+ currentIndex
+ 1, mappedCharacters
+ 1, mappedLength
* sizeof(UniChar
));
4068 length
+= mappedLength
;
4069 currentIndex
+= mappedLength
;
4075 void CFStringUppercase(CFMutableStringRef string
, CFLocaleRef locale
) {
4076 CFIndex currentIndex
= 0;
4078 const char *langCode
;
4079 Boolean isEightBit
= __CFStrIsEightBit(string
);
4081 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID
, void, string
, "_cfUppercase:", locale
);
4083 __CFAssertIsStringAndMutable(string
);
4085 length
= __CFStrLength(string
);
4087 langCode
= (_CFCanUseLocale(locale
) ? _CFStrGetLanguageIdentifierForLocale(locale
) : NULL
);
4089 if (!langCode
&& isEightBit
) {
4090 uint8_t *contents
= (uint8_t*)__CFStrContents(string
) + __CFStrSkipAnyLengthByte(string
);
4091 for (;currentIndex
< length
;currentIndex
++) {
4092 if (contents
[currentIndex
] >= 'a' && contents
[currentIndex
] <= 'z') {
4093 contents
[currentIndex
] -= 'a' - 'A';
4094 } else if (contents
[currentIndex
] > 127) {
4100 if (currentIndex
< length
) {
4102 UniChar mappedCharacters
[MAX_CASE_MAPPING_BUF
];
4103 CFIndex mappedLength
;
4104 UTF32Char currentChar
;
4107 if (isEightBit
) __CFStringChangeSize(string
, CFRangeMake(0, 0), 0, true);
4109 contents
= (UniChar
*)__CFStrContents(string
);
4111 for (;currentIndex
< length
;currentIndex
++) {
4112 if (CFUniCharIsSurrogateHighCharacter(contents
[currentIndex
]) && (currentIndex
+ 1 < length
) && CFUniCharIsSurrogateLowCharacter(contents
[currentIndex
+ 1])) {
4113 currentChar
= CFUniCharGetLongCharacterForSurrogatePair(contents
[currentIndex
], contents
[currentIndex
+ 1]);
4115 currentChar
= contents
[currentIndex
];
4118 flags
= (langCode
? CFUniCharGetConditionalCaseMappingFlags(currentChar
, contents
, currentIndex
, length
, kCFUniCharToUppercase
, langCode
, flags
) : 0);
4120 mappedLength
= CFUniCharMapCaseTo(currentChar
, mappedCharacters
, MAX_CASE_MAPPING_BUF
, kCFUniCharToUppercase
, flags
, langCode
);
4121 if (mappedLength
> 0) contents
[currentIndex
] = *mappedCharacters
;
4123 if (currentChar
> 0xFFFF) { // Non-BMP char
4124 switch (mappedLength
) {
4126 __CFStringChangeSize(string
, CFRangeMake(currentIndex
, 2), 0, true);
4127 contents
= (UniChar
*)__CFStrContents(string
);
4132 __CFStringChangeSize(string
, CFRangeMake(currentIndex
+ 1, 1), 0, true);
4133 contents
= (UniChar
*)__CFStrContents(string
);
4138 contents
[++currentIndex
] = mappedCharacters
[1];
4142 --mappedLength
; // Skip the current char
4143 __CFStringChangeSize(string
, CFRangeMake(currentIndex
+ 1, 0), mappedLength
- 1, true);
4144 contents
= (UniChar
*)__CFStrContents(string
);
4145 memmove(contents
+ currentIndex
+ 1, mappedCharacters
+ 1, mappedLength
* sizeof(UniChar
));
4146 length
+= (mappedLength
- 1);
4147 currentIndex
+= mappedLength
;
4150 } else if (mappedLength
== 0) {
4151 __CFStringChangeSize(string
, CFRangeMake(currentIndex
, 1), 0, true);
4152 contents
= (UniChar
*)__CFStrContents(string
);
4154 } else if (mappedLength
> 1) {
4155 --mappedLength
; // Skip the current char
4156 __CFStringChangeSize(string
, CFRangeMake(currentIndex
+ 1, 0), mappedLength
, true);
4157 contents
= (UniChar
*)__CFStrContents(string
);
4158 memmove(contents
+ currentIndex
+ 1, mappedCharacters
+ 1, mappedLength
* sizeof(UniChar
));
4159 length
+= mappedLength
;
4160 currentIndex
+= mappedLength
;
4167 void CFStringCapitalize(CFMutableStringRef string
, CFLocaleRef locale
) {
4168 CFIndex currentIndex
= 0;
4170 const char *langCode
;
4171 Boolean isEightBit
= __CFStrIsEightBit(string
);
4172 Boolean isLastCased
= false;
4173 static const uint8_t *caseIgnorableForBMP
= NULL
;
4175 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID
, void, string
, "_cfCapitalize:", locale
);
4177 __CFAssertIsStringAndMutable(string
);
4179 length
= __CFStrLength(string
);
4181 if (NULL
== caseIgnorableForBMP
) caseIgnorableForBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharCaseIgnorableCharacterSet
, 0);
4183 langCode
= (_CFCanUseLocale(locale
) ? _CFStrGetLanguageIdentifierForLocale(locale
) : NULL
);
4185 if (!langCode
&& isEightBit
) {
4186 uint8_t *contents
= (uint8_t*)__CFStrContents(string
) + __CFStrSkipAnyLengthByte(string
);
4187 for (;currentIndex
< length
;currentIndex
++) {
4188 if (contents
[currentIndex
] > 127) {
4190 } else if (contents
[currentIndex
] >= 'A' && contents
[currentIndex
] <= 'Z') {
4191 contents
[currentIndex
] += (isLastCased
? 'a' - 'A' : 0);
4193 } else if (contents
[currentIndex
] >= 'a' && contents
[currentIndex
] <= 'z') {
4194 contents
[currentIndex
] -= (!isLastCased
? 'a' - 'A' : 0);
4196 } else if (!CFUniCharIsMemberOfBitmap(contents
[currentIndex
], caseIgnorableForBMP
)) {
4197 isLastCased
= false;
4202 if (currentIndex
< length
) {
4204 UniChar mappedCharacters
[MAX_CASE_MAPPING_BUF
];
4205 CFIndex mappedLength
;
4206 UTF32Char currentChar
;
4209 if (isEightBit
) __CFStringChangeSize(string
, CFRangeMake(0, 0), 0, true);
4211 contents
= (UniChar
*)__CFStrContents(string
);
4213 for (;currentIndex
< length
;currentIndex
++) {
4214 if (CFUniCharIsSurrogateHighCharacter(contents
[currentIndex
]) && (currentIndex
+ 1 < length
) && CFUniCharIsSurrogateLowCharacter(contents
[currentIndex
+ 1])) {
4215 currentChar
= CFUniCharGetLongCharacterForSurrogatePair(contents
[currentIndex
], contents
[currentIndex
+ 1]);
4217 currentChar
= contents
[currentIndex
];
4219 flags
= ((langCode
|| ((currentChar
== 0x03A3) && isLastCased
)) ? CFUniCharGetConditionalCaseMappingFlags(currentChar
, contents
, currentIndex
, length
, (isLastCased
? kCFUniCharToLowercase
: kCFUniCharToTitlecase
), langCode
, flags
) : 0);
4221 mappedLength
= CFUniCharMapCaseTo(currentChar
, mappedCharacters
, MAX_CASE_MAPPING_BUF
, (isLastCased
? kCFUniCharToLowercase
: kCFUniCharToTitlecase
), flags
, langCode
);
4222 if (mappedLength
> 0) contents
[currentIndex
] = *mappedCharacters
;
4224 if (currentChar
> 0xFFFF) { // Non-BMP char
4225 switch (mappedLength
) {
4227 __CFStringChangeSize(string
, CFRangeMake(currentIndex
, 2), 0, true);
4228 contents
= (UniChar
*)__CFStrContents(string
);
4233 __CFStringChangeSize(string
, CFRangeMake(currentIndex
+ 1, 1), 0, true);
4234 contents
= (UniChar
*)__CFStrContents(string
);
4239 contents
[++currentIndex
] = mappedCharacters
[1];
4243 --mappedLength
; // Skip the current char
4244 __CFStringChangeSize(string
, CFRangeMake(currentIndex
+ 1, 0), mappedLength
- 1, true);
4245 contents
= (UniChar
*)__CFStrContents(string
);
4246 memmove(contents
+ currentIndex
+ 1, mappedCharacters
+ 1, mappedLength
* sizeof(UniChar
));
4247 length
+= (mappedLength
- 1);
4248 currentIndex
+= mappedLength
;
4251 } else if (mappedLength
== 0) {
4252 __CFStringChangeSize(string
, CFRangeMake(currentIndex
, 1), 0, true);
4253 contents
= (UniChar
*)__CFStrContents(string
);
4255 } else if (mappedLength
> 1) {
4256 --mappedLength
; // Skip the current char
4257 __CFStringChangeSize(string
, CFRangeMake(currentIndex
+ 1, 0), mappedLength
, true);
4258 contents
= (UniChar
*)__CFStrContents(string
);
4259 memmove(contents
+ currentIndex
+ 1, mappedCharacters
+ 1, mappedLength
* sizeof(UniChar
));
4260 length
+= mappedLength
;
4261 currentIndex
+= mappedLength
;
4264 if (!((currentChar
> 0xFFFF) ? CFUniCharIsMemberOf(currentChar
, kCFUniCharCaseIgnorableCharacterSet
) : CFUniCharIsMemberOfBitmap(currentChar
, caseIgnorableForBMP
))) { // We have non-caseignorable here
4265 isLastCased
= ((CFUniCharIsMemberOf(currentChar
, kCFUniCharUppercaseLetterCharacterSet
) || CFUniCharIsMemberOf(currentChar
, kCFUniCharLowercaseLetterCharacterSet
)) ? true : false);
4272 #define MAX_DECOMP_BUF 64
4274 #define HANGUL_SBASE 0xAC00
4275 #define HANGUL_LBASE 0x1100
4276 #define HANGUL_VBASE 0x1161
4277 #define HANGUL_TBASE 0x11A7
4278 #define HANGUL_SCOUNT 11172
4279 #define HANGUL_LCOUNT 19
4280 #define HANGUL_VCOUNT 21
4281 #define HANGUL_TCOUNT 28
4282 #define HANGUL_NCOUNT (HANGUL_VCOUNT * HANGUL_TCOUNT)
4284 CF_INLINE
uint32_t __CFGetUTF16Length(const UTF32Char
*characters
, uint32_t utf32Length
) {
4285 const UTF32Char
*limit
= characters
+ utf32Length
;
4286 uint32_t length
= 0;
4288 while (characters
< limit
) length
+= (*(characters
++) > 0xFFFF ? 2 : 1);
4293 CF_INLINE
void __CFFillInUTF16(const UTF32Char
*characters
, UTF16Char
*dst
, uint32_t utf32Length
) {
4294 const UTF32Char
*limit
= characters
+ utf32Length
;
4295 UTF32Char currentChar
;
4297 while (characters
< limit
) {
4298 currentChar
= *(characters
++);
4299 if (currentChar
> 0xFFFF) {
4300 currentChar
-= 0x10000;
4301 *(dst
++) = (UTF16Char
)((currentChar
>> 10) + 0xD800UL
);
4302 *(dst
++) = (UTF16Char
)((currentChar
& 0x3FF) + 0xDC00UL
);
4304 *(dst
++) = currentChar
;
4309 void CFStringNormalize(CFMutableStringRef string
, CFStringNormalizationForm theForm
) {
4310 CFIndex currentIndex
= 0;
4312 bool needToReorder
= true;
4314 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID
, void, string
, "_cfNormalize:", theForm
);
4316 __CFAssertIsStringAndMutable(string
);
4318 length
= __CFStrLength(string
);
4320 if (__CFStrIsEightBit(string
)) {
4323 if (theForm
== kCFStringNormalizationFormC
) return; // 8bit form has no decomposition
4325 contents
= (uint8_t*)__CFStrContents(string
) + __CFStrSkipAnyLengthByte(string
);
4327 for (;currentIndex
< length
;currentIndex
++) {
4328 if (contents
[currentIndex
] > 127) {
4329 __CFStringChangeSize(string
, CFRangeMake(0, 0), 0, true); // need to do harm way
4330 needToReorder
= false;
4336 if (currentIndex
< length
) {
4337 UTF16Char
*limit
= (UTF16Char
*)__CFStrContents(string
) + length
;
4338 UTF16Char
*contents
= (UTF16Char
*)__CFStrContents(string
) + currentIndex
;
4339 UTF32Char buffer
[MAX_DECOMP_BUF
];
4340 UTF32Char
*mappedCharacters
= buffer
;
4341 CFIndex allocatedLength
= MAX_DECOMP_BUF
;
4342 CFIndex mappedLength
;
4343 CFIndex currentLength
;
4344 UTF32Char currentChar
;
4346 while (contents
< limit
) {
4347 if (CFUniCharIsSurrogateHighCharacter(*contents
) && (contents
+ 1 < limit
) && CFUniCharIsSurrogateLowCharacter(*(contents
+ 1))) {
4348 currentChar
= CFUniCharGetLongCharacterForSurrogatePair(*contents
, *(contents
+ 1));
4352 currentChar
= *(contents
++);
4358 if (CFUniCharIsMemberOf(currentChar
, kCFUniCharCanonicalDecomposableCharacterSet
) && !CFUniCharIsMemberOf(currentChar
, kCFUniCharNonBaseCharacterSet
)) {
4359 if ((theForm
& kCFStringNormalizationFormC
) == 0 || currentChar
< HANGUL_SBASE
|| currentChar
> (HANGUL_SBASE
+ HANGUL_SCOUNT
)) { // We don't have to decompose Hangul Syllables if we're precomposing again
4360 mappedLength
= CFUniCharDecomposeCharacter(currentChar
, mappedCharacters
, MAX_DECOMP_BUF
);
4364 if ((needToReorder
|| (theForm
& kCFStringNormalizationFormC
)) && ((contents
< limit
) || (mappedLength
== 0))) {
4365 if (mappedLength
> 0) {
4366 if (CFUniCharIsSurrogateHighCharacter(*contents
) && (contents
+ 1 < limit
) && CFUniCharIsSurrogateLowCharacter(*(contents
+ 1))) {
4367 currentChar
= CFUniCharGetLongCharacterForSurrogatePair(*contents
, *(contents
+ 1));
4369 currentChar
= *contents
;
4373 if (CFUniCharIsMemberOf(currentChar
, kCFUniCharNonBaseCharacterSet
)) {
4374 uint32_t decompLength
;
4376 if (mappedLength
== 0) {
4377 contents
-= (currentChar
& 0xFFFF0000 ? 2 : 1);
4378 if (currentIndex
> 0) {
4379 if (CFUniCharIsSurrogateLowCharacter(*(contents
- 1)) && (currentIndex
> 1) && CFUniCharIsSurrogateHighCharacter(*(contents
- 2))) {
4380 *mappedCharacters
= CFUniCharGetLongCharacterForSurrogatePair(*(contents
- 2), *(contents
- 1));
4384 *mappedCharacters
= *(contents
- 1);
4391 currentLength
+= (currentChar
& 0xFFFF0000 ? 2 : 1);
4393 contents
+= (currentChar
& 0xFFFF0000 ? 2 : 1);
4395 if (CFUniCharIsMemberOf(currentChar
, kCFUniCharDecomposableCharacterSet
)) { // Vietnamese accent, etc.
4396 decompLength
= CFUniCharDecomposeCharacter(currentChar
, mappedCharacters
+ mappedLength
, MAX_DECOMP_BUF
- mappedLength
);
4397 mappedLength
+= decompLength
;
4399 mappedCharacters
[mappedLength
++] = currentChar
;
4402 while (contents
< limit
) {
4403 if (CFUniCharIsSurrogateHighCharacter(*contents
) && (contents
+ 1 < limit
) && CFUniCharIsSurrogateLowCharacter(*(contents
+ 1))) {
4404 currentChar
= CFUniCharGetLongCharacterForSurrogatePair(*contents
, *(contents
+ 1));
4406 currentChar
= *contents
;
4408 if (!CFUniCharIsMemberOf(currentChar
, kCFUniCharNonBaseCharacterSet
)) break;
4409 if (currentChar
& 0xFFFF0000) {
4416 if (mappedLength
== allocatedLength
) {
4417 allocatedLength
+= MAX_DECOMP_BUF
;
4418 if (mappedCharacters
== buffer
) {
4419 mappedCharacters
= (UTF32Char
*)CFAllocatorAllocate(NULL
, allocatedLength
* sizeof(UTF32Char
), 0);
4420 memmove(mappedCharacters
, buffer
, MAX_DECOMP_BUF
* sizeof(UTF32Char
));
4422 mappedCharacters
= (UTF32Char
*)CFAllocatorReallocate(NULL
, mappedCharacters
, allocatedLength
* sizeof(UTF32Char
), 0);
4425 if (CFUniCharIsMemberOf(currentChar
, kCFUniCharDecomposableCharacterSet
)) { // Vietnamese accent, etc.
4426 decompLength
= CFUniCharDecomposeCharacter(currentChar
, mappedCharacters
+ mappedLength
, MAX_DECOMP_BUF
- mappedLength
);
4427 mappedLength
+= decompLength
;
4429 mappedCharacters
[mappedLength
++] = currentChar
;
4433 if (needToReorder
&& mappedLength
> 1) CFUniCharPrioritySort(mappedCharacters
, mappedLength
);
4436 if (theForm
& kCFStringNormalizationFormKD
) {
4437 CFIndex newLength
= 0;
4439 if (mappedLength
== 0 && CFUniCharIsMemberOf(currentChar
, kCFUniCharCompatibilityDecomposableCharacterSet
)) {
4440 mappedCharacters
[mappedLength
++] = currentChar
;
4442 while (newLength
< mappedLength
) {
4443 newLength
= CFUniCharCompatibilityDecompose(mappedCharacters
, mappedLength
, allocatedLength
);
4444 if (newLength
== 0) {
4445 allocatedLength
+= MAX_DECOMP_BUF
;
4446 if (mappedCharacters
== buffer
) {
4447 mappedCharacters
= (UTF32Char
*)CFAllocatorAllocate(NULL
, allocatedLength
* sizeof(UTF32Char
), 0);
4448 memmove(mappedCharacters
, buffer
, MAX_DECOMP_BUF
* sizeof(UTF32Char
));
4450 mappedCharacters
= (UTF32Char
*)CFAllocatorReallocate(NULL
, mappedCharacters
, allocatedLength
* sizeof(UTF32Char
), 0);
4454 mappedLength
= newLength
;
4457 if (theForm
& kCFStringNormalizationFormC
) {
4458 if (mappedLength
> 1) {
4459 CFIndex consumedLength
= 1;
4461 UTF32Char
*currentBase
= mappedCharacters
;
4462 uint8_t currentClass
, lastClass
= 0;
4463 const uint8_t *bmpClassTable
= CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty
, 0);
4464 bool didCombine
= false;
4466 currentChar
= *mappedCharacters
;
4468 while (consumedLength
< mappedLength
) {
4469 nextChar
= mappedCharacters
[consumedLength
];
4470 currentClass
= (nextChar
& 0xFFFF0000 ? CFUniCharGetUnicodeProperty(nextChar
, kCFUniCharCombiningProperty
) : CFUniCharGetCombiningPropertyForCharacter(nextChar
, bmpClassTable
));
4472 if (theForm
& kCFStringNormalizationFormKD
) {
4473 if ((currentChar
>= HANGUL_LBASE
) && (currentChar
< (HANGUL_LBASE
+ 0xFF))) {
4474 SInt8 lIndex
= currentChar
- HANGUL_LBASE
;
4476 if ((0 <= lIndex
) && (lIndex
<= HANGUL_LCOUNT
)) {
4477 SInt16 vIndex
= nextChar
- HANGUL_VBASE
;
4479 if ((vIndex
>= 0) && (vIndex
<= HANGUL_VCOUNT
)) {
4481 CFIndex usedLength
= mappedLength
;
4483 mappedCharacters
[consumedLength
++] = 0xFFFD;
4485 if (consumedLength
< mappedLength
) {
4486 tIndex
= mappedCharacters
[consumedLength
] - HANGUL_TBASE
;
4487 if ((tIndex
< 0) || (tIndex
> HANGUL_TCOUNT
)) {
4490 mappedCharacters
[consumedLength
++] = 0xFFFD;
4493 *currentBase
= (lIndex
* HANGUL_VCOUNT
+ vIndex
) * HANGUL_TCOUNT
+ tIndex
+ HANGUL_SBASE
;
4495 while (--usedLength
> 0) {
4496 if (mappedCharacters
[usedLength
] == 0xFFFD) {
4499 memmove(mappedCharacters
+ usedLength
, mappedCharacters
+ usedLength
+ 1, (mappedLength
- usedLength
) * sizeof(UTF32Char
));
4502 currentBase
= mappedCharacters
+ consumedLength
;
4503 currentChar
= *currentBase
;
4510 if (!CFUniCharIsMemberOf(nextChar
, kCFUniCharNonBaseCharacterSet
)) {
4511 *currentBase
= currentChar
;
4512 currentBase
= mappedCharacters
+ consumedLength
;
4513 currentChar
= nextChar
;
4518 if ((lastClass
== 0) || (currentClass
!= lastClass
)) {
4519 nextChar
= CFUniCharPrecomposeCharacter(currentChar
, nextChar
);
4520 if (nextChar
== 0xFFFD) {
4521 lastClass
= currentClass
;
4523 mappedCharacters
[consumedLength
] = 0xFFFD;
4525 currentChar
= nextChar
;
4532 *currentBase
= currentChar
;
4534 consumedLength
= mappedLength
;
4535 while (--consumedLength
> 0) {
4536 if (mappedCharacters
[consumedLength
] == 0xFFFD) {
4538 memmove(mappedCharacters
+ consumedLength
, mappedCharacters
+ consumedLength
+ 1, (mappedLength
- consumedLength
) * sizeof(UTF32Char
));
4542 } else if ((currentChar
>= HANGUL_LBASE
) && (currentChar
< (HANGUL_LBASE
+ 0xFF))) { // Hangul Jamo
4543 SInt8 lIndex
= currentChar
- HANGUL_LBASE
;
4545 if ((contents
< limit
) && (0 <= lIndex
) && (lIndex
<= HANGUL_LCOUNT
)) {
4546 SInt16 vIndex
= *contents
- HANGUL_VBASE
;
4548 if ((vIndex
>= 0) && (vIndex
<= HANGUL_VCOUNT
)) {
4551 ++contents
; ++currentLength
;
4553 if (contents
< limit
) {
4554 tIndex
= *contents
- HANGUL_TBASE
;
4555 if ((tIndex
< 0) || (tIndex
> HANGUL_TCOUNT
)) {
4558 ++contents
; ++currentLength
;
4561 *mappedCharacters
= (lIndex
* HANGUL_VCOUNT
+ vIndex
) * HANGUL_TCOUNT
+ tIndex
+ HANGUL_SBASE
;
4568 if (mappedLength
> 0) {
4569 CFIndex utf16Length
= __CFGetUTF16Length(mappedCharacters
, mappedLength
);
4571 if (utf16Length
!= currentLength
) {
4572 __CFStringChangeSize(string
, CFRangeMake(currentIndex
, currentLength
), utf16Length
, true);
4573 currentLength
= utf16Length
;
4575 contents
= (UTF16Char
*)__CFStrContents(string
);
4576 limit
= contents
+ __CFStrLength(string
);
4577 contents
+= currentIndex
;
4578 __CFFillInUTF16(mappedCharacters
, contents
, mappedLength
);
4579 contents
+= utf16Length
;
4581 currentIndex
+= currentLength
;
4584 if (mappedCharacters
!= buffer
) CFAllocatorDeallocate(NULL
, mappedCharacters
);
4590 kCFStringFormatZeroFlag
= (1 << 0), // if not, padding is space char
4591 kCFStringFormatMinusFlag
= (1 << 1), // if not, no flag implied
4592 kCFStringFormatPlusFlag
= (1 << 2), // if not, no flag implied, overrides space
4593 kCFStringFormatSpaceFlag
= (1 << 3) // if not, no flag implied
4621 CFFormatDefaultSize
= 0,
4626 CFFormatSize16
= 5, /* unused */
4630 CFFormatLiteralType
= 32,
4631 CFFormatLongType
= 33,
4632 CFFormatDoubleType
= 34,
4633 CFFormatPointerType
= 35,
4634 CFFormatObjectType
= 36, /* handled specially */ /* ??? not used anymore, can be removed? */
4635 CFFormatCFType
= 37, /* handled specially */
4636 CFFormatUnicharsType
= 38, /* handled specially */
4637 CFFormatCharsType
= 39, /* handled specially */
4638 CFFormatPascalCharsType
= 40, /* handled specially */
4639 CFFormatSingleUnicharType
= 41 /* handled specially */
4642 CF_INLINE
void __CFParseFormatSpec(const UniChar
*uformat
, const uint8_t *cformat
, SInt32
*fmtIdx
, SInt32 fmtLen
, CFFormatSpec
*spec
) {
4643 Boolean seenDot
= false;
4646 if (fmtLen
<= *fmtIdx
) return; /* no type */
4647 if (cformat
) ch
= (UniChar
)cformat
[(*fmtIdx
)++]; else ch
= uformat
[(*fmtIdx
)++];
4648 reswtch
:switch (ch
) {
4649 case '#': // ignored for now
4652 if (!(spec
->flags
& kCFStringFormatPlusFlag
)) spec
->flags
|= kCFStringFormatSpaceFlag
;
4655 spec
->flags
|= kCFStringFormatMinusFlag
;
4656 spec
->flags
&= ~kCFStringFormatZeroFlag
; // remove zero flag
4659 spec
->flags
|= kCFStringFormatPlusFlag
;
4660 spec
->flags
&= ~kCFStringFormatSpaceFlag
; // remove space flag
4663 if (!(spec
->flags
& kCFStringFormatMinusFlag
)) spec
->flags
|= kCFStringFormatZeroFlag
;
4666 spec
->size
= CFFormatSize2
;
4669 if (*fmtIdx
< fmtLen
) {
4670 // fetch next character, don't increment fmtIdx
4671 if (cformat
) ch
= (UniChar
)cformat
[(*fmtIdx
)]; else ch
= uformat
[(*fmtIdx
)];
4672 if ('l' == ch
) { // 'll' for long long, like 'q'
4674 spec
->size
= CFFormatSize8
;
4678 spec
->size
= CFFormatSize4
;
4681 spec
->size
= CFFormatSize8
;
4684 spec
->type
= CFFormatLongType
;
4685 spec
->size
= CFFormatSize1
;
4687 case 'O': case 'o': case 'D': case 'd': case 'i': case 'U': case 'u': case 'x': case 'X':
4688 spec
->type
= CFFormatLongType
;
4690 case 'e': case 'E': case 'f': case 'g': case 'G':
4691 spec
->type
= CFFormatDoubleType
;
4692 spec
->size
= CFFormatSize8
;
4694 case 'n': case 'p': /* %n is not handled correctly currently */
4695 spec
->type
= CFFormatPointerType
;
4696 spec
->size
= CFFormatSize4
;
4699 spec
->type
= CFFormatCharsType
;
4700 spec
->size
= CFFormatSize4
;
4703 spec
->type
= CFFormatUnicharsType
;
4704 spec
->size
= CFFormatSize4
;
4707 spec
->type
= CFFormatSingleUnicharType
;
4708 spec
->size
= CFFormatSize2
;
4711 spec
->type
= CFFormatPascalCharsType
;
4712 spec
->size
= CFFormatSize4
;
4715 spec
->type
= CFFormatCFType
;
4716 spec
->size
= CFFormatSize4
;
4718 case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': {
4721 number
= 10 * number
+ (ch
- '0');
4722 if (cformat
) ch
= (UniChar
)cformat
[(*fmtIdx
)++]; else ch
= uformat
[(*fmtIdx
)++];
4723 } while ((UInt32
)(ch
- '0') <= 9);
4725 if (-2 == spec
->precArgNum
) {
4726 spec
->precArgNum
= number
- 1; // Arg numbers start from 1
4727 } else if (-2 == spec
->widthArgNum
) {
4728 spec
->widthArgNum
= number
- 1; // Arg numbers start from 1
4730 spec
->mainArgNum
= number
- 1; // Arg numbers start from 1
4733 } else if (seenDot
) { /* else it's either precision or width */
4734 spec
->precArg
= (SInt32
)number
;
4736 spec
->widthArg
= (SInt32
)number
;
4741 spec
->widthArgNum
= -2;
4745 if (cformat
) ch
= (UniChar
)cformat
[(*fmtIdx
)++]; else ch
= uformat
[(*fmtIdx
)++];
4747 spec
->precArgNum
= -2;
4752 spec
->type
= CFFormatLiteralType
;
4758 #if defined(__WIN32__)
4759 static int snprintf(char *b
, size_t n
, const char * f
, ...) {
4763 retval
= _vsnprintf(b
, n
, f
, args
);
4769 /* ??? It ignores the formatOptions argument.
4770 ??? %s depends on handling of encodings by __CFStringAppendBytes
4772 void CFStringAppendFormatAndArguments(CFMutableStringRef outputString
, CFDictionaryRef formatOptions
, CFStringRef formatString
, va_list args
) {
4773 _CFStringAppendFormatAndArgumentsAux(outputString
, NULL
, formatOptions
, formatString
, args
);
4776 #define SNPRINTF(TYPE, WHAT) { \
4777 TYPE value = (TYPE) WHAT; \
4778 if (-1 != specs[curSpec].widthArgNum) { \
4779 if (-1 != specs[curSpec].precArgNum) { \
4780 snprintf_l(buffer, 255, NULL, formatBuffer, width, precision, value); \
4782 snprintf_l(buffer, 255, NULL, formatBuffer, width, value); \
4785 if (-1 != specs[curSpec].precArgNum) { \
4786 snprintf_l(buffer, 255, NULL, formatBuffer, precision, value); \
4788 snprintf_l(buffer, 255, NULL, formatBuffer, value); \
4792 void _CFStringAppendFormatAndArgumentsAux(CFMutableStringRef outputString
, CFStringRef (*copyDescFunc
)(void *, CFDictionaryRef
), CFDictionaryRef formatOptions
, CFStringRef formatString
, va_list args
) {
4793 SInt32 numSpecs
, sizeSpecs
, sizeArgNum
, formatIdx
, curSpec
, argNum
;
4795 #define FORMAT_BUFFER_LEN 400
4796 const uint8_t *cformat
= NULL
;
4797 const UniChar
*uformat
= NULL
;
4798 UniChar
*formatChars
= NULL
;
4799 UniChar localFormatBuffer
[FORMAT_BUFFER_LEN
];
4801 #define VPRINTF_BUFFER_LEN 61
4802 CFFormatSpec localSpecsBuffer
[VPRINTF_BUFFER_LEN
];
4803 CFFormatSpec
*specs
;
4804 CFPrintValue localValuesBuffer
[VPRINTF_BUFFER_LEN
];
4805 CFPrintValue
*values
;
4806 CFAllocatorRef tmpAlloc
= NULL
;
4814 formatLen
= CFStringGetLength(formatString
);
4815 if (!CF_IS_OBJC(__kCFStringTypeID
, formatString
)) {
4816 __CFAssertIsString(formatString
);
4817 if (!__CFStrIsUnicode(formatString
)) {
4818 cformat
= __CFStrContents(formatString
);
4819 if (cformat
) cformat
+= __CFStrSkipAnyLengthByte(formatString
);
4821 uformat
= __CFStrContents(formatString
);
4824 if (!cformat
&& !uformat
) {
4825 formatChars
= (formatLen
> FORMAT_BUFFER_LEN
) ? CFAllocatorAllocate(tmpAlloc
= __CFGetDefaultAllocator(), formatLen
* sizeof(UniChar
), 0) : localFormatBuffer
;
4826 if (formatChars
!= localFormatBuffer
&& __CFOASafe
) __CFSetLastAllocationEventName(formatChars
, "CFString (temp)");
4827 CFStringGetCharacters(formatString
, CFRangeMake(0, formatLen
), formatChars
);
4828 uformat
= formatChars
;
4831 /* Compute an upper bound for the number of format specifications */
4833 for (formatIdx
= 0; formatIdx
< formatLen
; formatIdx
++) if ('%' == cformat
[formatIdx
]) sizeSpecs
++;
4835 for (formatIdx
= 0; formatIdx
< formatLen
; formatIdx
++) if ('%' == uformat
[formatIdx
]) sizeSpecs
++;
4837 tmpAlloc
= __CFGetDefaultAllocator();
4838 specs
= ((2 * sizeSpecs
+ 1) > VPRINTF_BUFFER_LEN
) ? CFAllocatorAllocate(tmpAlloc
, (2 * sizeSpecs
+ 1) * sizeof(CFFormatSpec
), 0) : localSpecsBuffer
;
4839 if (specs
!= localSpecsBuffer
&& __CFOASafe
) __CFSetLastAllocationEventName(specs
, "CFString (temp)");
4841 /* Collect format specification information from the format string */
4842 for (curSpec
= 0, formatIdx
= 0; formatIdx
< formatLen
; curSpec
++) {
4844 specs
[curSpec
].loc
= formatIdx
;
4845 specs
[curSpec
].len
= 0;
4846 specs
[curSpec
].size
= 0;
4847 specs
[curSpec
].type
= 0;
4848 specs
[curSpec
].flags
= 0;
4849 specs
[curSpec
].widthArg
= -1;
4850 specs
[curSpec
].precArg
= -1;
4851 specs
[curSpec
].mainArgNum
= -1;
4852 specs
[curSpec
].precArgNum
= -1;
4853 specs
[curSpec
].widthArgNum
= -1;
4855 for (newFmtIdx
= formatIdx
; newFmtIdx
< formatLen
&& '%' != cformat
[newFmtIdx
]; newFmtIdx
++);
4857 for (newFmtIdx
= formatIdx
; newFmtIdx
< formatLen
&& '%' != uformat
[newFmtIdx
]; newFmtIdx
++);
4859 if (newFmtIdx
!= formatIdx
) { /* Literal chunk */
4860 specs
[curSpec
].type
= CFFormatLiteralType
;
4861 specs
[curSpec
].len
= newFmtIdx
- formatIdx
;
4863 newFmtIdx
++; /* Skip % */
4864 __CFParseFormatSpec(uformat
, cformat
, &newFmtIdx
, formatLen
, &(specs
[curSpec
]));
4865 if (CFFormatLiteralType
== specs
[curSpec
].type
) {
4866 specs
[curSpec
].loc
= formatIdx
+ 1;
4867 specs
[curSpec
].len
= 1;
4869 specs
[curSpec
].len
= newFmtIdx
- formatIdx
;
4872 formatIdx
= newFmtIdx
;
4874 // fprintf(stderr, "specs[%d] = {\n size = %d,\n type = %d,\n loc = %d,\n len = %d,\n mainArgNum = %d,\n precArgNum = %d,\n widthArgNum = %d\n}\n", curSpec, specs[curSpec].size, specs[curSpec].type, specs[curSpec].loc, specs[curSpec].len, specs[curSpec].mainArgNum, specs[curSpec].precArgNum, specs[curSpec].widthArgNum);
4878 // Max of three args per spec, reasoning thus: 1 width, 1 prec, 1 value
4879 values
= ((3 * sizeSpecs
+ 1) > VPRINTF_BUFFER_LEN
) ? CFAllocatorAllocate(tmpAlloc
, (3 * sizeSpecs
+ 1) * sizeof(CFPrintValue
), 0) : localValuesBuffer
;
4880 if (values
!= localValuesBuffer
&& __CFOASafe
) __CFSetLastAllocationEventName(values
, "CFString (temp)");
4881 memset(values
, 0, (3 * sizeSpecs
+ 1) * sizeof(CFPrintValue
));
4882 sizeArgNum
= (3 * sizeSpecs
+ 1);
4884 /* Compute values array */
4886 for (curSpec
= 0; curSpec
< numSpecs
; curSpec
++) {
4887 SInt32 newMaxArgNum
;
4888 if (0 == specs
[curSpec
].type
) continue;
4889 if (CFFormatLiteralType
== specs
[curSpec
].type
) continue;
4890 newMaxArgNum
= sizeArgNum
;
4891 if (newMaxArgNum
< specs
[curSpec
].mainArgNum
) {
4892 newMaxArgNum
= specs
[curSpec
].mainArgNum
;
4894 if (newMaxArgNum
< specs
[curSpec
].precArgNum
) {
4895 newMaxArgNum
= specs
[curSpec
].precArgNum
;
4897 if (newMaxArgNum
< specs
[curSpec
].widthArgNum
) {
4898 newMaxArgNum
= specs
[curSpec
].widthArgNum
;
4900 if (sizeArgNum
< newMaxArgNum
) {
4901 if (specs
!= localSpecsBuffer
) CFAllocatorDeallocate(tmpAlloc
, specs
);
4902 if (values
!= localValuesBuffer
) CFAllocatorDeallocate(tmpAlloc
, values
);
4903 if (formatChars
&& (formatChars
!= localFormatBuffer
)) CFAllocatorDeallocate(tmpAlloc
, formatChars
);
4904 return; // more args than we expected!
4906 /* It is actually incorrect to reorder some specs and not all; we just do some random garbage here */
4907 if (-2 == specs
[curSpec
].widthArgNum
) {
4908 specs
[curSpec
].widthArgNum
= argNum
++;
4910 if (-2 == specs
[curSpec
].precArgNum
) {
4911 specs
[curSpec
].precArgNum
= argNum
++;
4913 if (-1 == specs
[curSpec
].mainArgNum
) {
4914 specs
[curSpec
].mainArgNum
= argNum
++;
4916 values
[specs
[curSpec
].mainArgNum
].size
= specs
[curSpec
].size
;
4917 values
[specs
[curSpec
].mainArgNum
].type
= specs
[curSpec
].type
;
4918 if (-1 != specs
[curSpec
].widthArgNum
) {
4919 values
[specs
[curSpec
].widthArgNum
].size
= 0;
4920 values
[specs
[curSpec
].widthArgNum
].type
= CFFormatLongType
;
4922 if (-1 != specs
[curSpec
].precArgNum
) {
4923 values
[specs
[curSpec
].precArgNum
].size
= 0;
4924 values
[specs
[curSpec
].precArgNum
].type
= CFFormatLongType
;
4928 /* Collect the arguments in correct type from vararg list */
4929 for (argNum
= 0; argNum
< sizeArgNum
; argNum
++) {
4930 switch (values
[argNum
].type
) {
4932 case CFFormatLiteralType
:
4934 case CFFormatLongType
:
4935 case CFFormatSingleUnicharType
:
4936 if (CFFormatSize1
== values
[argNum
].size
) {
4937 values
[argNum
].value
.int64Value
= (int64_t)(int8_t)va_arg(args
, int);
4938 } else if (CFFormatSize2
== values
[argNum
].size
) {
4939 values
[argNum
].value
.int64Value
= (int64_t)(int16_t)va_arg(args
, int);
4940 } else if (CFFormatSize4
== values
[argNum
].size
) {
4941 values
[argNum
].value
.int64Value
= (int64_t)va_arg(args
, int32_t);
4942 } else if (CFFormatSize8
== values
[argNum
].size
) {
4943 values
[argNum
].value
.int64Value
= (int64_t)va_arg(args
, int64_t);
4945 values
[argNum
].value
.int64Value
= (int64_t)va_arg(args
, int);
4948 case CFFormatDoubleType
:
4949 values
[argNum
].value
.doubleValue
= va_arg(args
, double);
4951 case CFFormatPointerType
:
4952 case CFFormatObjectType
:
4953 case CFFormatCFType
:
4954 case CFFormatUnicharsType
:
4955 case CFFormatCharsType
:
4956 case CFFormatPascalCharsType
:
4957 values
[argNum
].value
.pointerValue
= va_arg(args
, void *);
4963 /* Format the pieces together */
4964 for (curSpec
= 0; curSpec
< numSpecs
; curSpec
++) {
4965 SInt32 width
= 0, precision
= 0;
4967 Boolean hasWidth
= false, hasPrecision
= false;
4969 // widthArgNum and widthArg are never set at the same time; same for precArg*
4970 if (-1 != specs
[curSpec
].widthArgNum
) {
4971 width
= (SInt32
)values
[specs
[curSpec
].widthArgNum
].value
.int64Value
;
4974 if (-1 != specs
[curSpec
].precArgNum
) {
4975 precision
= (SInt32
)values
[specs
[curSpec
].precArgNum
].value
.int64Value
;
4976 hasPrecision
= true;
4978 if (-1 != specs
[curSpec
].widthArg
) {
4979 width
= specs
[curSpec
].widthArg
;
4982 if (-1 != specs
[curSpec
].precArg
) {
4983 precision
= specs
[curSpec
].precArg
;
4984 hasPrecision
= true;
4987 switch (specs
[curSpec
].type
) {
4988 case CFFormatLongType
:
4989 case CFFormatDoubleType
:
4990 case CFFormatPointerType
: {
4991 int8_t formatBuffer
[128];
4992 #if defined(__GNUC__)
4993 int8_t buffer
[256 + width
+ precision
];
4995 int8_t stackBuffer
[512];
4996 int8_t *dynamicBuffer
= NULL
;
4997 int8_t *buffer
= stackBuffer
;
4998 if (256+width
+precision
> 512) {
4999 dynamicBuffer
= CFAllocatorAllocate(NULL
, 256+width
+precision
, 0);
5000 buffer
= dynamicBuffer
;
5003 SInt32 cidx
, idx
, loc
;
5004 Boolean appended
= false;
5005 loc
= specs
[curSpec
].loc
;
5006 // In preparation to call snprintf(), copy the format string out
5008 for (idx
= 0, cidx
= 0; cidx
< specs
[curSpec
].len
; idx
++, cidx
++) {
5009 if ('$' == cformat
[loc
+ cidx
]) {
5010 for (idx
--; '0' <= formatBuffer
[idx
] && formatBuffer
[idx
] <= '9'; idx
--);
5012 formatBuffer
[idx
] = cformat
[loc
+ cidx
];
5016 for (idx
= 0, cidx
= 0; cidx
< specs
[curSpec
].len
; idx
++, cidx
++) {
5017 if ('$' == uformat
[loc
+ cidx
]) {
5018 for (idx
--; '0' <= formatBuffer
[idx
] && formatBuffer
[idx
] <= '9'; idx
--);
5020 formatBuffer
[idx
] = (int8_t)uformat
[loc
+ cidx
];
5024 formatBuffer
[idx
] = '\0';
5025 // Should modify format buffer here if necessary; for example, to translate %qd to
5026 // the equivalent, on architectures which do not have %q.
5027 buffer
[sizeof(buffer
) - 1] = '\0';
5028 switch (specs
[curSpec
].type
) {
5029 case CFFormatLongType
:
5030 if (CFFormatSize8
== specs
[curSpec
].size
) {
5031 SNPRINTF(int64_t, values
[specs
[curSpec
].mainArgNum
].value
.int64Value
)
5033 SNPRINTF(SInt32
, values
[specs
[curSpec
].mainArgNum
].value
.int64Value
)
5036 case CFFormatPointerType
:
5037 SNPRINTF(void *, values
[specs
[curSpec
].mainArgNum
].value
.pointerValue
)
5040 case CFFormatDoubleType
:
5041 SNPRINTF(double, values
[specs
[curSpec
].mainArgNum
].value
.doubleValue
)
5042 // See if we need to localize the decimal point
5043 if (formatOptions
) { // We have a localization dictionary
5044 CFStringRef decimalSeparator
= CFDictionaryGetValue(formatOptions
, kCFNSDecimalSeparatorKey
);
5045 if (decimalSeparator
!= NULL
) { // We have a decimal separator in there
5046 CFIndex decimalPointLoc
= 0;
5047 while (buffer
[decimalPointLoc
] != 0 && buffer
[decimalPointLoc
] != '.') decimalPointLoc
++;
5048 if (buffer
[decimalPointLoc
] == '.') { // And we have a decimal point in the formatted string
5049 buffer
[decimalPointLoc
] = 0;
5050 CFStringAppendCString(outputString
, buffer
, __CFStringGetEightBitStringEncoding());
5051 CFStringAppend(outputString
, decimalSeparator
);
5052 CFStringAppendCString(outputString
, buffer
+ decimalPointLoc
+ 1, __CFStringGetEightBitStringEncoding());
5059 if (!appended
) CFStringAppendCString(outputString
, buffer
, __CFStringGetEightBitStringEncoding());
5061 #if !defined(__GNUC__)
5062 if (dynamicBuffer
) {
5063 CFAllocatorDeallocate(NULL
, dynamicBuffer
);
5067 case CFFormatLiteralType
:
5069 __CFStringAppendBytes(outputString
, cformat
+specs
[curSpec
].loc
, specs
[curSpec
].len
, __CFStringGetEightBitStringEncoding());
5071 CFStringAppendCharacters(outputString
, uformat
+specs
[curSpec
].loc
, specs
[curSpec
].len
);
5074 case CFFormatPascalCharsType
:
5075 case CFFormatCharsType
:
5076 if (values
[specs
[curSpec
].mainArgNum
].value
.pointerValue
== NULL
) {
5077 CFStringAppendCString(outputString
, "(null)", kCFStringEncodingASCII
);
5080 const char *str
= values
[specs
[curSpec
].mainArgNum
].value
.pointerValue
;
5081 if (specs
[curSpec
].type
== CFFormatPascalCharsType
) { // Pascal string case
5082 len
= ((unsigned char *)str
)[0];
5084 if (hasPrecision
&& precision
< len
) len
= precision
;
5085 } else { // C-string case
5086 if (!hasPrecision
) { // No precision, so rely on the terminating null character
5088 } else { // Don't blindly call strlen() if there is a precision; the string might not have a terminating null (3131988)
5089 const char *terminatingNull
= memchr(str
, 0, precision
); // Basically strlen() on only the first precision characters of str
5090 if (terminatingNull
) { // There was a null in the first precision characters
5091 len
= terminatingNull
- str
;
5097 // Since the spec says the behavior of the ' ', '0', '#', and '+' flags is undefined for
5098 // '%s', and since we have ignored them in the past, the behavior is hereby cast in stone
5099 // to ignore those flags (and, say, never pad with '0' instead of space).
5100 if (specs
[curSpec
].flags
& kCFStringFormatMinusFlag
) {
5101 __CFStringAppendBytes(outputString
, str
, len
, __CFStringGetSystemEncoding());
5102 if (hasWidth
&& width
> len
) {
5103 int w
= width
- len
; // We need this many spaces; do it ten at a time
5104 do {__CFStringAppendBytes(outputString
, " ", (w
> 10 ? 10 : w
), kCFStringEncodingASCII
);} while ((w
-= 10) > 0);
5107 if (hasWidth
&& width
> len
) {
5108 int w
= width
- len
; // We need this many spaces; do it ten at a time
5109 do {__CFStringAppendBytes(outputString
, " ", (w
> 10 ? 10 : w
), kCFStringEncodingASCII
);} while ((w
-= 10) > 0);
5111 __CFStringAppendBytes(outputString
, str
, len
, __CFStringGetSystemEncoding());
5115 case CFFormatSingleUnicharType
:
5116 ch
= values
[specs
[curSpec
].mainArgNum
].value
.int64Value
;
5117 CFStringAppendCharacters(outputString
, &ch
, 1);
5119 case CFFormatUnicharsType
:
5120 //??? need to handle width, precision, and padding arguments
5121 up
= values
[specs
[curSpec
].mainArgNum
].value
.pointerValue
;
5123 CFStringAppendCString(outputString
, "(null)", kCFStringEncodingASCII
);
5126 for (len
= 0; 0 != up
[len
]; len
++);
5127 // Since the spec says the behavior of the ' ', '0', '#', and '+' flags is undefined for
5128 // '%s', and since we have ignored them in the past, the behavior is hereby cast in stone
5129 // to ignore those flags (and, say, never pad with '0' instead of space).
5130 if (hasPrecision
&& precision
< len
) len
= precision
;
5131 if (specs
[curSpec
].flags
& kCFStringFormatMinusFlag
) {
5132 CFStringAppendCharacters(outputString
, up
, len
);
5133 if (hasWidth
&& width
> len
) {
5134 int w
= width
- len
; // We need this many spaces; do it ten at a time
5135 do {__CFStringAppendBytes(outputString
, " ", (w
> 10 ? 10 : w
), kCFStringEncodingASCII
);} while ((w
-= 10) > 0);
5138 if (hasWidth
&& width
> len
) {
5139 int w
= width
- len
; // We need this many spaces; do it ten at a time
5140 do {__CFStringAppendBytes(outputString
, " ", (w
> 10 ? 10 : w
), kCFStringEncodingASCII
);} while ((w
-= 10) > 0);
5142 CFStringAppendCharacters(outputString
, up
, len
);
5146 case CFFormatCFType
:
5147 case CFFormatObjectType
:
5148 if (NULL
!= values
[specs
[curSpec
].mainArgNum
].value
.pointerValue
) {
5149 CFStringRef str
= NULL
;
5151 str
= copyDescFunc(values
[specs
[curSpec
].mainArgNum
].value
.pointerValue
, formatOptions
);
5153 str
= __CFCopyFormattingDescription(values
[specs
[curSpec
].mainArgNum
].value
.pointerValue
, formatOptions
);
5155 str
= CFCopyDescription(values
[specs
[curSpec
].mainArgNum
].value
.pointerValue
);
5159 CFStringAppend(outputString
, str
);
5162 CFStringAppendCString(outputString
, "(null description)", kCFStringEncodingASCII
);
5165 CFStringAppendCString(outputString
, "(null)", kCFStringEncodingASCII
);
5171 if (specs
!= localSpecsBuffer
) CFAllocatorDeallocate(tmpAlloc
, specs
);
5172 if (values
!= localValuesBuffer
) CFAllocatorDeallocate(tmpAlloc
, values
);
5173 if (formatChars
&& (formatChars
!= localFormatBuffer
)) CFAllocatorDeallocate(tmpAlloc
, formatChars
);
5179 void CFShowStr(CFStringRef str
) {
5180 CFAllocatorRef alloc
;
5183 fprintf(stdout
, "(null)\n");
5187 if (CF_IS_OBJC(__kCFStringTypeID
, str
)) {
5188 fprintf(stdout
, "This is an NSString, not CFString\n");
5192 alloc
= CFGetAllocator(str
);
5194 fprintf(stdout
, "\nLength %d\nIsEightBit %d\n", (int)__CFStrLength(str
), __CFStrIsEightBit(str
));
5195 fprintf(stdout
, "HasLengthByte %d\nHasNullByte %d\nInlineContents %d\n",
5196 __CFStrHasLengthByte(str
), __CFStrHasNullByte(str
), __CFStrIsInline(str
));
5198 fprintf(stdout
, "Allocator ");
5199 if (alloc
!= kCFAllocatorSystemDefault
) {
5200 fprintf(stdout
, "%p\n", (void *)alloc
);
5202 fprintf(stdout
, "SystemDefault\n");
5204 fprintf(stdout
, "Mutable %d\n", __CFStrIsMutable(str
));
5205 if (!__CFStrIsMutable(str
) && __CFStrHasContentsDeallocator(str
)) {
5206 if (__CFStrContentsDeallocator(str
)) fprintf(stdout
, "ContentsDeallocatorFunc %p\n", (void *)__CFStrContentsDeallocator(str
));
5207 else fprintf(stdout
, "ContentsDeallocatorFunc None\n");
5208 } else if (__CFStrIsMutable(str
) && __CFStrHasContentsAllocator(str
)) {
5209 fprintf(stdout
, "ExternalContentsAllocator %p\n", (void *)__CFStrContentsAllocator((CFMutableStringRef
)str
));
5212 if (__CFStrIsMutable(str
)) {
5213 fprintf(stdout
, "CurrentCapacity %d\n%sCapacity %d\n", (int)__CFStrCapacity(str
), __CFStrIsFixed(str
) ? "Fixed" : "Desired", (int)__CFStrDesiredCapacity(str
));
5215 fprintf(stdout
, "Contents %p\n", (void *)__CFStrContents(str
));