2 * Copyright (c) 2011 Apple Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
25 Copyright (c) 1998-2011, Apple Inc. All rights reserved.
26 Responsibility: Ali Ozer
28 !!! For performance reasons, it's important that all functions marked CF_INLINE in this file are inlined.
31 #include <CoreFoundation/CFBase.h>
32 #include <CoreFoundation/CFString.h>
33 #include <CoreFoundation/CFDictionary.h>
34 #include <CoreFoundation/CFStringEncodingConverterExt.h>
35 #include <CoreFoundation/CFUniChar.h>
36 #include <CoreFoundation/CFUnicodeDecomposition.h>
37 #include <CoreFoundation/CFUnicodePrecomposition.h>
38 #include <CoreFoundation/CFPriv.h>
39 #include <CoreFoundation/CFNumber.h>
40 #include "CFInternal.h"
41 #include "CFLocaleInternal.h"
45 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
50 #define LONG_DOUBLE_SUPPORT 1
52 #define LONG_DOUBLE_SUPPORT 0
57 #define USE_STRING_ROM 0
60 #ifndef INSTRUMENT_SHARED_STRINGS
61 #define INSTRUMENT_SHARED_STRINGS 0
64 __private_extern__
const CFStringRef __kCFLocaleCollatorID
;
66 #if INSTRUMENT_SHARED_STRINGS
67 #include <sys/stat.h> /* for umask() */
69 static void __CFRecordStringAllocationEvent(const char *encoding
, const char *bytes
, CFIndex byteCount
) {
70 static CFSpinLock_t lock
= CFSpinLockInit
;
72 if (memchr(bytes
, '\n', byteCount
)) return; //never record string allocation events for strings with newlines, because those confuse our parser and because they'll never go into the ROM
77 extern char **_NSGetProgname(void);
78 const char *name
= *_NSGetProgname();
79 if (! name
) name
= "UNKNOWN";
82 snprintf(path
, sizeof(path
), "/tmp/CFSharedStringInstrumentation_%s_%d.txt", name
, getpid());
83 fd
= open(path
, O_WRONLY
| O_APPEND
| O_CREAT
, 0666);
86 const char *errString
= strerror(error
);
87 fprintf(stderr
, "open() failed with error %d (%s)\n", error
, errString
);
92 char formatString
[256];
93 snprintf(formatString
, sizeof(formatString
), "%%-8d\t%%-16s\t%%.%lds\n", byteCount
);
94 int resultCount
= asprintf(&buffer
, formatString
, getpid(), encoding
, bytes
);
95 if (buffer
&& resultCount
> 0) write(fd
, buffer
, resultCount
);
96 else puts("Couldn't record allocation event");
99 __CFSpinUnlock(&lock
);
101 #endif //INSTRUMENT_SHARED_STRINGS
105 typedef Boolean (*UNI_CHAR_FUNC
)(UInt32 flags
, UInt8 ch
, UniChar
*unicodeChar
);
107 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
108 extern size_t malloc_good_size(size_t size
);
110 extern void __CFStrConvertBytesToUnicode(const uint8_t *bytes
, UniChar
*buffer
, CFIndex numChars
);
112 static void __CFStringAppendFormatCore(CFMutableStringRef outputString
, CFStringRef (*copyDescFunc
)(void *, const void *), CFDictionaryRef formatOptions
, CFStringRef formatString
, CFIndex initialArgPosition
, const void *origValues
, CFIndex originalValuesSize
, va_list args
);
116 // We put this into C & Pascal strings if we can't convert
117 #define CONVERSIONFAILURESTR "CFString conversion failed"
119 // We set this to true when purging the constant string table, so CFStringDeallocate doesn't assert
120 static Boolean __CFConstantStringTableBeingFreed
= false;
126 // This section is for CFString compatibility and other behaviors...
128 static CFOptionFlags _CFStringCompatibilityMask
= 0;
130 void _CFStringSetCompatibility(CFOptionFlags mask
) {
131 _CFStringCompatibilityMask
|= mask
;
134 CF_INLINE Boolean
__CFStringGetCompatibility(CFOptionFlags mask
) {
135 return (_CFStringCompatibilityMask
& mask
) == mask
;
140 // Two constant strings used by CFString; these are initialized in CFStringInitialize
141 CONST_STRING_DECL(kCFEmptyString
, "")
143 // This is separate for C++
144 struct __notInlineMutable
{
147 CFIndex capacity
; // Capacity in bytes
148 unsigned int hasGap
:1; // Currently unused
149 unsigned int isFixedCapacity
:1;
150 unsigned int isExternalMutable
:1;
151 unsigned int capacityProvidedExternally
:1;
153 unsigned long desiredCapacity
:60;
155 unsigned long desiredCapacity
:28;
157 CFAllocatorRef contentsAllocator
; // Optional
158 }; // The only mutable variant for CFString
161 /* !!! Never do sizeof(CFString); the union is here just to make it easier to access some fields.
165 union { // In many cases the allocated structs are smaller than these
168 } inline1
; // Bytes follow the length
169 struct __notInlineImmutable1
{
170 void *buffer
; // Note that the buffer is in the same place for all non-inline variants of CFString
172 CFAllocatorRef contentsDeallocator
; // Optional; just the dealloc func is used
173 } notInlineImmutable1
; // This is the usual not-inline immutable CFString
174 struct __notInlineImmutable2
{
176 CFAllocatorRef contentsDeallocator
; // Optional; just the dealloc func is used
177 } notInlineImmutable2
; // This is the not-inline immutable CFString when length is stored with the contents (first byte)
178 struct __notInlineMutable notInlineMutable
;
184 E = not inline contents
188 D = explicit deallocator for contents (for mutable objects, allocator)
189 C = length field is CFIndex (rather than UInt32); only meaningful for 64-bit, really
190 if needed this bit (valuable real-estate) can be given up for another bit elsewhere, since this info is needed just for 64-bit
192 Also need (only for mutable)
195 Cap, DesCap = capacity
197 B7 B6 B5 B4 B3 B2 B1 B0
202 0 1 E (freed with default allocator)
206 !!! Note: Constant CFStrings use the bit patterns:
207 C8 (11001000 = default allocator, not inline, not freed contents; 8-bit; has NULL byte; doesn't have length; is immutable)
208 D0 (11010000 = default allocator, not inline, not freed contents; Unicode; is immutable)
209 The bit usages should not be modified in a way that would effect these bit patterns.
213 __kCFFreeContentsWhenDoneMask
= 0x020,
214 __kCFFreeContentsWhenDone
= 0x020,
215 __kCFContentsMask
= 0x060,
216 __kCFHasInlineContents
= 0x000,
217 __kCFNotInlineContentsNoFree
= 0x040, // Don't free
218 __kCFNotInlineContentsDefaultFree
= 0x020, // Use allocator's free function
219 __kCFNotInlineContentsCustomFree
= 0x060, // Use a specially provided free function
220 __kCFHasContentsAllocatorMask
= 0x060,
221 __kCFHasContentsAllocator
= 0x060, // (For mutable strings) use a specially provided allocator
222 __kCFHasContentsDeallocatorMask
= 0x060,
223 __kCFHasContentsDeallocator
= 0x060,
224 __kCFIsMutableMask
= 0x01,
225 __kCFIsMutable
= 0x01,
226 __kCFIsUnicodeMask
= 0x10,
227 __kCFIsUnicode
= 0x10,
228 __kCFHasNullByteMask
= 0x08,
229 __kCFHasNullByte
= 0x08,
230 __kCFHasLengthByteMask
= 0x04,
231 __kCFHasLengthByte
= 0x04,
232 // !!! Bit 0x02 has been freed up
237 // Mutable strings are not inline
238 // Compile-time constant strings are not inline
239 // Mutable strings always have explicit length (but they might also have length byte and null byte)
240 // If there is an explicit length, always use that instead of the length byte (length byte is useful for quickly returning pascal strings)
241 // Never look at the length byte for the length; use __CFStrLength or __CFStrLength2
243 /* The following set of functions and macros need to be updated on change to the bit configuration
245 CF_INLINE Boolean
__CFStrIsMutable(CFStringRef str
) {return (str
->base
._cfinfo
[CF_INFO_BITS
] & __kCFIsMutableMask
) == __kCFIsMutable
;}
246 CF_INLINE Boolean
__CFStrIsInline(CFStringRef str
) {return (str
->base
._cfinfo
[CF_INFO_BITS
] & __kCFContentsMask
) == __kCFHasInlineContents
;}
247 CF_INLINE Boolean
__CFStrFreeContentsWhenDone(CFStringRef str
) {return (str
->base
._cfinfo
[CF_INFO_BITS
] & __kCFFreeContentsWhenDoneMask
) == __kCFFreeContentsWhenDone
;}
248 CF_INLINE Boolean
__CFStrHasContentsDeallocator(CFStringRef str
) {return (str
->base
._cfinfo
[CF_INFO_BITS
] & __kCFHasContentsDeallocatorMask
) == __kCFHasContentsDeallocator
;}
249 CF_INLINE Boolean
__CFStrIsUnicode(CFStringRef str
) {return (str
->base
._cfinfo
[CF_INFO_BITS
] & __kCFIsUnicodeMask
) == __kCFIsUnicode
;}
250 CF_INLINE Boolean
__CFStrIsEightBit(CFStringRef str
) {return (str
->base
._cfinfo
[CF_INFO_BITS
] & __kCFIsUnicodeMask
) != __kCFIsUnicode
;}
251 CF_INLINE Boolean
__CFStrHasNullByte(CFStringRef str
) {return (str
->base
._cfinfo
[CF_INFO_BITS
] & __kCFHasNullByteMask
) == __kCFHasNullByte
;}
252 CF_INLINE Boolean
__CFStrHasLengthByte(CFStringRef str
) {return (str
->base
._cfinfo
[CF_INFO_BITS
] & __kCFHasLengthByteMask
) == __kCFHasLengthByte
;}
253 CF_INLINE Boolean
__CFStrHasExplicitLength(CFStringRef str
) {return (str
->base
._cfinfo
[CF_INFO_BITS
] & (__kCFIsMutableMask
| __kCFHasLengthByteMask
)) != __kCFHasLengthByte
;} // Has explicit length if (1) mutable or (2) not mutable and no length byte
254 CF_INLINE Boolean
__CFStrIsConstant(CFStringRef str
) {
256 return str
->base
._rc
== 0;
258 return (str
->base
._cfinfo
[CF_RC_BITS
]) == 0;
262 CF_INLINE SInt32
__CFStrSkipAnyLengthByte(CFStringRef str
) {return ((str
->base
._cfinfo
[CF_INFO_BITS
] & __kCFHasLengthByteMask
) == __kCFHasLengthByte
) ? 1 : 0;} // Number of bytes to skip over the length byte in the contents
264 /* Returns ptr to the buffer (which might include the length byte)
266 CF_INLINE
const void *__CFStrContents(CFStringRef str
) {
267 if (__CFStrIsInline(str
)) {
268 return (const void *)(((uintptr_t)&(str
->variants
)) + (__CFStrHasExplicitLength(str
) ? sizeof(CFIndex
) : 0));
269 } else { // Not inline; pointer is always word 2
270 return str
->variants
.notInlineImmutable1
.buffer
;
274 static CFAllocatorRef
*__CFStrContentsDeallocatorPtr(CFStringRef str
) {
275 return __CFStrHasExplicitLength(str
) ? &(((CFMutableStringRef
)str
)->variants
.notInlineImmutable1
.contentsDeallocator
) : &(((CFMutableStringRef
)str
)->variants
.notInlineImmutable2
.contentsDeallocator
); }
277 // Assumption: Called with immutable strings only, and on strings that are known to have a contentsDeallocator
278 CF_INLINE CFAllocatorRef
__CFStrContentsDeallocator(CFStringRef str
) {
279 return *__CFStrContentsDeallocatorPtr(str
);
282 // Assumption: Called with immutable strings only, and on strings that are known to have a contentsDeallocator
283 CF_INLINE
void __CFStrSetContentsDeallocator(CFStringRef str
, CFAllocatorRef allocator
) {
284 allocator
= kCFUseCollectableAllocator
? allocator
: _CFConvertAllocatorToNonGCRefZeroEquivalent(allocator
);
285 if (!(kCFAllocatorSystemDefaultGCRefZero
== allocator
|| kCFAllocatorDefaultGCRefZero
== allocator
)) CFRetain(allocator
);
286 *__CFStrContentsDeallocatorPtr(str
) = allocator
;
289 static CFAllocatorRef
*__CFStrContentsAllocatorPtr(CFStringRef str
) {
290 CFAssert(!__CFStrIsInline(str
), __kCFLogAssertion
, "Asking for contents allocator of inline string");
291 CFAssert(__CFStrIsMutable(str
), __kCFLogAssertion
, "Asking for contents allocator of an immutable string");
292 return (CFAllocatorRef
*)&(str
->variants
.notInlineMutable
.contentsAllocator
);
295 // Assumption: Called with strings that have a contents allocator; also, contents allocator follows custom
296 CF_INLINE CFAllocatorRef
__CFStrContentsAllocator(CFMutableStringRef str
) {
297 return *(__CFStrContentsAllocatorPtr(str
));
300 // Assumption: Called with strings that have a contents allocator; also, contents allocator follows custom
301 CF_INLINE
void __CFStrSetContentsAllocator(CFMutableStringRef str
, CFAllocatorRef allocator
) {
302 allocator
= kCFUseCollectableAllocator
? allocator
: _CFConvertAllocatorToNonGCRefZeroEquivalent(allocator
);
303 if (!(kCFAllocatorSystemDefaultGCRefZero
== allocator
|| kCFAllocatorDefaultGCRefZero
== allocator
)) CFRetain(allocator
);
304 *(__CFStrContentsAllocatorPtr(str
)) = allocator
;
307 /* Returns length; use __CFStrLength2 if contents buffer pointer has already been computed.
309 CF_INLINE CFIndex
__CFStrLength(CFStringRef str
) {
310 if (__CFStrHasExplicitLength(str
)) {
311 if (__CFStrIsInline(str
)) {
312 return str
->variants
.inline1
.length
;
314 return str
->variants
.notInlineImmutable1
.length
;
317 return (CFIndex
)(*((uint8_t *)__CFStrContents(str
)));
321 CF_INLINE CFIndex
__CFStrLength2(CFStringRef str
, const void *buffer
) {
322 if (__CFStrHasExplicitLength(str
)) {
323 if (__CFStrIsInline(str
)) {
324 return str
->variants
.inline1
.length
;
326 return str
->variants
.notInlineImmutable1
.length
;
329 return (CFIndex
)(*((uint8_t *)buffer
));
334 Boolean
__CFStringIsEightBit(CFStringRef str
) {
335 return __CFStrIsEightBit(str
);
338 /* Sets the content pointer for immutable or mutable strings.
340 CF_INLINE
void __CFStrSetContentPtr(CFStringRef str
, const void *p
) {
341 // XXX_PCB catch all writes for mutable string case.
342 __CFAssignWithWriteBarrier((void **)&((CFMutableStringRef
)str
)->variants
.notInlineImmutable1
.buffer
, (void *)p
);
344 CF_INLINE
void __CFStrSetInfoBits(CFStringRef str
, UInt32 v
) {__CFBitfieldSetValue(((CFMutableStringRef
)str
)->base
._cfinfo
[CF_INFO_BITS
], 6, 0, v
);}
346 CF_INLINE
void __CFStrSetExplicitLength(CFStringRef str
, CFIndex v
) {
347 if (__CFStrIsInline(str
)) {
348 ((CFMutableStringRef
)str
)->variants
.inline1
.length
= v
;
350 ((CFMutableStringRef
)str
)->variants
.notInlineImmutable1
.length
= v
;
354 CF_INLINE
void __CFStrSetUnicode(CFMutableStringRef str
) {str
->base
._cfinfo
[CF_INFO_BITS
] |= __kCFIsUnicode
;}
355 CF_INLINE
void __CFStrClearUnicode(CFMutableStringRef str
) {str
->base
._cfinfo
[CF_INFO_BITS
] &= ~__kCFIsUnicode
;}
356 CF_INLINE
void __CFStrSetHasLengthAndNullBytes(CFMutableStringRef str
) {str
->base
._cfinfo
[CF_INFO_BITS
] |= (__kCFHasLengthByte
| __kCFHasNullByte
);}
357 CF_INLINE
void __CFStrClearHasLengthAndNullBytes(CFMutableStringRef str
) {str
->base
._cfinfo
[CF_INFO_BITS
] &= ~(__kCFHasLengthByte
| __kCFHasNullByte
);}
360 // Assumption: The following set of inlines (using str->variants.notInlineMutable) are called with mutable strings only
361 CF_INLINE Boolean
__CFStrIsFixed(CFStringRef str
) {return str
->variants
.notInlineMutable
.isFixedCapacity
;}
362 CF_INLINE Boolean
__CFStrIsExternalMutable(CFStringRef str
) {return str
->variants
.notInlineMutable
.isExternalMutable
;}
363 CF_INLINE Boolean
__CFStrHasContentsAllocator(CFStringRef str
) {return (str
->base
._cfinfo
[CF_INFO_BITS
] & __kCFHasContentsAllocatorMask
) == __kCFHasContentsAllocator
;}
364 CF_INLINE
void __CFStrSetIsFixed(CFMutableStringRef str
) {str
->variants
.notInlineMutable
.isFixedCapacity
= 1;}
365 CF_INLINE
void __CFStrSetIsExternalMutable(CFMutableStringRef str
) {str
->variants
.notInlineMutable
.isExternalMutable
= 1;}
366 CF_INLINE
void __CFStrSetHasGap(CFMutableStringRef str
) {str
->variants
.notInlineMutable
.hasGap
= 1;}
368 // If capacity is provided externally, we only change it when we need to grow beyond it
369 CF_INLINE Boolean
__CFStrCapacityProvidedExternally(CFStringRef str
) {return str
->variants
.notInlineMutable
.capacityProvidedExternally
;}
370 CF_INLINE
void __CFStrSetCapacityProvidedExternally(CFMutableStringRef str
) {str
->variants
.notInlineMutable
.capacityProvidedExternally
= 1;}
371 CF_INLINE
void __CFStrClearCapacityProvidedExternally(CFMutableStringRef str
) {str
->variants
.notInlineMutable
.capacityProvidedExternally
= 0;}
373 // "Capacity" is stored in number of bytes, not characters. It indicates the total number of bytes in the contents buffer.
374 CF_INLINE CFIndex
__CFStrCapacity(CFStringRef str
) {return str
->variants
.notInlineMutable
.capacity
;}
375 CF_INLINE
void __CFStrSetCapacity(CFMutableStringRef str
, CFIndex cap
) {str
->variants
.notInlineMutable
.capacity
= cap
;}
377 // "Desired capacity" is in number of characters; it is the client requested capacity; if fixed, it is the upper bound on the mutable string backing store.
378 CF_INLINE CFIndex
__CFStrDesiredCapacity(CFStringRef str
) {return str
->variants
.notInlineMutable
.desiredCapacity
;}
379 CF_INLINE
void __CFStrSetDesiredCapacity(CFMutableStringRef str
, CFIndex size
) {str
->variants
.notInlineMutable
.desiredCapacity
= size
;}
382 static void *__CFStrAllocateMutableContents(CFMutableStringRef str
, CFIndex size
) {
384 CFAllocatorRef alloc
= (__CFStrHasContentsAllocator(str
)) ? __CFStrContentsAllocator(str
) : __CFGetAllocator(str
);
385 ptr
= CFAllocatorAllocate(alloc
, size
, 0);
386 if (__CFOASafe
) __CFSetLastAllocationEventName(ptr
, "CFString (store)");
390 static void __CFStrDeallocateMutableContents(CFMutableStringRef str
, void *buffer
) {
391 CFAllocatorRef alloc
= (__CFStrHasContentsAllocator(str
)) ? __CFStrContentsAllocator(str
) : __CFGetAllocator(str
);
392 if (__CFStrIsMutable(str
) && __CFStrHasContentsAllocator(str
) && _CFAllocatorIsGCRefZero(alloc
)) {
394 } else if (CF_IS_COLLECTABLE_ALLOCATOR(alloc
)) {
395 // GC: for finalization safety, let collector reclaim the buffer in the next GC cycle.
396 auto_zone_release(objc_collectableZone(), buffer
);
398 CFAllocatorDeallocate(alloc
, buffer
);
405 /* CFString specific init flags
406 Note that you cannot count on the external buffer not being copied.
407 Also, if you specify an external buffer, you should not change it behind the CFString's back.
410 __kCFThinUnicodeIfPossible
= 0x1000000, /* See if the Unicode contents can be thinned down to 8-bit */
411 kCFStringPascal
= 0x10000, /* Indicating that the string data has a Pascal string structure (length byte at start) */
412 kCFStringNoCopyProvidedContents
= 0x20000, /* Don't copy the provided string contents if possible; free it when no longer needed */
413 kCFStringNoCopyNoFreeProvidedContents
= 0x30000 /* Don't copy the provided string contents if possible; don't free it when no longer needed */
418 static CFStringEncoding __CFDefaultSystemEncoding
= kCFStringEncodingInvalidId
;
419 static CFStringEncoding __CFDefaultFileSystemEncoding
= kCFStringEncodingInvalidId
;
420 CFStringEncoding __CFDefaultEightBitStringEncoding
= kCFStringEncodingInvalidId
;
423 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_LINUX
424 #define __defaultEncoding kCFStringEncodingMacRoman
425 #elif DEPLOYMENT_TARGET_WINDOWS
426 #define __defaultEncoding kCFStringEncodingWindowsLatin1
428 #warning This value must match __CFGetConverter condition in CFStringEncodingConverter.c
429 #define __defaultEncoding kCFStringEncodingISOLatin1
432 CFStringEncoding
CFStringGetSystemEncoding(void) {
433 if (__CFDefaultSystemEncoding
== kCFStringEncodingInvalidId
) {
434 __CFDefaultSystemEncoding
= __defaultEncoding
;
435 const CFStringEncodingConverter
*converter
= CFStringEncodingGetConverter(__CFDefaultSystemEncoding
);
436 __CFSetCharToUniCharFunc(converter
->encodingClass
== kCFStringEncodingConverterCheapEightBit
? (UNI_CHAR_FUNC
)converter
->toUnicode
: NULL
);
438 return __CFDefaultSystemEncoding
;
441 // Fast version for internal use
443 CF_INLINE CFStringEncoding
__CFStringGetSystemEncoding(void) {
444 if (__CFDefaultSystemEncoding
== kCFStringEncodingInvalidId
) (void)CFStringGetSystemEncoding();
445 return __CFDefaultSystemEncoding
;
448 CFStringEncoding
CFStringFileSystemEncoding(void) {
449 if (__CFDefaultFileSystemEncoding
== kCFStringEncodingInvalidId
) {
450 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS
451 __CFDefaultFileSystemEncoding
= kCFStringEncodingUTF8
;
453 __CFDefaultFileSystemEncoding
= CFStringGetSystemEncoding();
457 return __CFDefaultFileSystemEncoding
;
460 /* ??? Is returning length when no other answer is available the right thing?
461 !!! All of the (length > (LONG_MAX / N)) type checks are to avoid wrap-around and eventual malloc overflow in the client
463 CFIndex
CFStringGetMaximumSizeForEncoding(CFIndex length
, CFStringEncoding encoding
) {
464 if (encoding
== kCFStringEncodingUTF8
) {
465 return (length
> (LONG_MAX
/ 3)) ? kCFNotFound
: (length
* 3);
466 } else if ((encoding
== kCFStringEncodingUTF32
) || (encoding
== kCFStringEncodingUTF32BE
) || (encoding
== kCFStringEncodingUTF32LE
)) { // UTF-32
467 return (length
> (LONG_MAX
/ sizeof(UTF32Char
))) ? kCFNotFound
: (length
* sizeof(UTF32Char
));
469 encoding
&= 0xFFF; // Mask off non-base part
472 case kCFStringEncodingUnicode
:
473 return (length
> (LONG_MAX
/ sizeof(UniChar
))) ? kCFNotFound
: (length
* sizeof(UniChar
));
475 case kCFStringEncodingNonLossyASCII
:
476 return (length
> (LONG_MAX
/ 6)) ? kCFNotFound
: (length
* 6); // 1 Unichar can expand to 6 bytes
478 case kCFStringEncodingMacRoman
:
479 case kCFStringEncodingWindowsLatin1
:
480 case kCFStringEncodingISOLatin1
:
481 case kCFStringEncodingNextStepLatin
:
482 case kCFStringEncodingASCII
:
483 return length
/ sizeof(uint8_t);
486 return length
/ sizeof(uint8_t);
491 /* Returns whether the indicated encoding can be stored in 8-bit chars
493 CF_INLINE Boolean
__CFStrEncodingCanBeStoredInEightBit(CFStringEncoding encoding
) {
494 switch (encoding
& 0xFFF) { // just use encoding base
495 case kCFStringEncodingInvalidId
:
496 case kCFStringEncodingUnicode
:
497 case kCFStringEncodingNonLossyASCII
:
500 case kCFStringEncodingMacRoman
:
501 case kCFStringEncodingWindowsLatin1
:
502 case kCFStringEncodingISOLatin1
:
503 case kCFStringEncodingNextStepLatin
:
504 case kCFStringEncodingASCII
:
507 default: return false;
511 /* Returns the encoding used in eight bit CFStrings (can't be any encoding which isn't 1-to-1 with Unicode)
512 ??? Perhaps only ASCII fits the bill due to Unicode decomposition.
514 CFStringEncoding
__CFStringComputeEightBitStringEncoding(void) {
515 if (__CFDefaultEightBitStringEncoding
== kCFStringEncodingInvalidId
) {
516 CFStringEncoding systemEncoding
= CFStringGetSystemEncoding();
517 if (systemEncoding
== kCFStringEncodingInvalidId
) { // We're right in the middle of querying system encoding from default database. Delaying to set until system encoding is determined.
518 return kCFStringEncodingASCII
;
519 } else if (__CFStrEncodingCanBeStoredInEightBit(systemEncoding
)) {
520 __CFDefaultEightBitStringEncoding
= systemEncoding
;
522 __CFDefaultEightBitStringEncoding
= kCFStringEncodingASCII
;
526 return __CFDefaultEightBitStringEncoding
;
529 /* Returns whether the provided bytes can be stored in ASCII
531 CF_INLINE Boolean
__CFBytesInASCII(const uint8_t *bytes
, CFIndex len
) {
533 /* Go by 8s in 64 bit */
535 uint64_t val
= *(const uint64_t *)bytes
;
536 if (val
& 0x8080808080808080ULL
) return false;
543 uint32_t val
= *(const uint32_t *)bytes
;
544 if (val
& 0x80808080U
) return false;
548 /* Handle the rest one byte at a time */
550 if (*bytes
++ & 0x80) return false;
556 /* Returns whether the provided 8-bit string in the specified encoding can be stored in an 8-bit CFString.
558 CF_INLINE Boolean
__CFCanUseEightBitCFStringForBytes(const uint8_t *bytes
, CFIndex len
, CFStringEncoding encoding
) {
559 // If the encoding is the same as the 8-bit CFString encoding, we can just use the bytes as-is.
560 // One exception is ASCII, which unfortunately needs to mean ISOLatin1 for compatibility reasons <rdar://problem/5458321>.
561 if (encoding
== __CFStringGetEightBitStringEncoding() && encoding
!= kCFStringEncodingASCII
) return true;
562 if (__CFStringEncodingIsSupersetOfASCII(encoding
) && __CFBytesInASCII(bytes
, len
)) return true;
567 /* Returns whether a length byte can be tacked on to a string of the indicated length.
569 CF_INLINE Boolean
__CFCanUseLengthByte(CFIndex len
) {
570 #define __kCFMaxPascalStrLen 255
571 return (len
<= __kCFMaxPascalStrLen
) ? true : false;
574 /* Various string assertions
576 #define __CFAssertIsString(cf) __CFGenericValidateType(cf, __kCFStringTypeID)
577 #define __CFAssertIndexIsInStringBounds(cf, idx) CFAssert3((idx) >= 0 && (idx) < __CFStrLength(cf), __kCFLogAssertion, "%s(): string index %d out of bounds (length %d)", __PRETTY_FUNCTION__, idx, __CFStrLength(cf))
578 #define __CFAssertRangeIsInStringBounds(cf, idx, count) CFAssert4((idx) >= 0 && (idx + count) <= __CFStrLength(cf), __kCFLogAssertion, "%s(): string range %d,%d out of bounds (length %d)", __PRETTY_FUNCTION__, idx, count, __CFStrLength(cf))
579 #define __CFAssertIsStringAndMutable(cf) {__CFGenericValidateType(cf, __kCFStringTypeID); CFAssert1(__CFStrIsMutable(cf), __kCFLogAssertion, "%s(): string not mutable", __PRETTY_FUNCTION__);}
580 #define __CFAssertIsStringAndExternalMutable(cf) {__CFGenericValidateType(cf, __kCFStringTypeID); CFAssert1(__CFStrIsMutable(cf) && __CFStrIsExternalMutable(cf), __kCFLogAssertion, "%s(): string not external mutable", __PRETTY_FUNCTION__);}
581 #define __CFAssertIsNotNegative(idx) CFAssert2(idx >= 0, __kCFLogAssertion, "%s(): index %d is negative", __PRETTY_FUNCTION__, idx)
582 #define __CFAssertIfFixedLengthIsOK(cf, reqLen) CFAssert2(!__CFStrIsFixed(cf) || (reqLen <= __CFStrDesiredCapacity(cf)), __kCFLogAssertion, "%s(): length %d too large", __PRETTY_FUNCTION__, reqLen)
585 /* Basic algorithm is to shrink memory when capacity is SHRINKFACTOR times the required capacity or to allocate memory when the capacity is less than GROWFACTOR times the required capacity. This function will return -1 if the new capacity is just too big (> LONG_MAX).
586 Additional complications are applied in the following order:
587 - desiredCapacity, which is the minimum (except initially things can be at zero)
588 - rounding up to factor of 8
589 - compressing (to fit the number if 16 bits), which effectively rounds up to factor of 256
590 - we need to make sure GROWFACTOR computation doesn't suffer from overflow issues on 32-bit, hence the casting to unsigned. Normally for required capacity of C bytes, the allocated space is (3C+1)/2. If C > ULONG_MAX/3, we instead simply return LONG_MAX
592 #define SHRINKFACTOR(c) (c / 2)
595 #define GROWFACTOR(c) ((c * 3 + 1) / 2)
597 #define GROWFACTOR(c) (((c) >= (ULONG_MAX / 3UL)) ? __CFMax(LONG_MAX - 4095, (c)) : (((unsigned long)c * 3 + 1) / 2))
600 CF_INLINE CFIndex
__CFStrNewCapacity(CFMutableStringRef str
, unsigned long reqCapacity
, CFIndex capacity
, Boolean leaveExtraRoom
, CFIndex charSize
) {
601 if (capacity
!= 0 || reqCapacity
!= 0) { /* If initially zero, and space not needed, leave it at that... */
602 if ((capacity
< reqCapacity
) || /* We definitely need the room... */
603 (!__CFStrCapacityProvidedExternally(str
) && /* Assuming we control the capacity... */
604 ((reqCapacity
< SHRINKFACTOR(capacity
)) || /* ...we have too much room! */
605 (!leaveExtraRoom
&& (reqCapacity
< capacity
))))) { /* ...we need to eliminate the extra space... */
606 if (reqCapacity
> LONG_MAX
) return -1; /* Too big any way you cut it */
607 unsigned long newCapacity
= leaveExtraRoom
? GROWFACTOR(reqCapacity
) : reqCapacity
; /* Grow by 3/2 if extra room is desired */
608 CFIndex desiredCapacity
= __CFStrDesiredCapacity(str
) * charSize
;
609 if (newCapacity
< desiredCapacity
) { /* If less than desired, bump up to desired */
610 newCapacity
= desiredCapacity
;
611 } else if (__CFStrIsFixed(str
)) { /* Otherwise, if fixed, no need to go above the desired (fixed) capacity */
612 newCapacity
= __CFMax(desiredCapacity
, reqCapacity
); /* !!! So, fixed is not really fixed, but "tight" */
614 if (__CFStrHasContentsAllocator(str
)) { /* Also apply any preferred size from the allocator */
615 newCapacity
= CFAllocatorGetPreferredSizeForSize(__CFStrContentsAllocator(str
), newCapacity
, 0);
616 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
618 newCapacity
= malloc_good_size(newCapacity
);
621 return (newCapacity
> LONG_MAX
) ? -1 : (CFIndex
)newCapacity
; // If packing: __CFStrUnpackNumber(__CFStrPackNumber(newCapacity));
628 /* rearrangeBlocks() rearranges the blocks of data within the buffer so that they are "evenly spaced". buffer is assumed to have enough room for the result.
629 numBlocks is current total number of blocks within buffer.
630 blockSize is the size of each block in bytes
631 ranges and numRanges hold the ranges that are no longer needed; ranges are stored sorted in increasing order, and don't overlap
632 insertLength is the final spacing between the remaining blocks
634 Example: buffer = A B C D E F G H, blockSize = 1, ranges = { (2,1) , (4,2) } (so we want to "delete" C and E F), fromEnd = NO
635 if insertLength = 4, result = A B ? ? ? ? D ? ? ? ? G H
636 if insertLength = 0, result = A B D G H
638 Example: buffer = A B C D E F G H I J K L M N O P Q R S T U, blockSize = 1, ranges { (1,1), (3,1), (5,11), (17,1), (19,1) }, fromEnd = NO
639 if insertLength = 3, result = A ? ? ? C ? ? ? E ? ? ? Q ? ? ? S ? ? ? U
642 typedef struct _CFStringDeferredRange
{
646 } CFStringDeferredRange
;
648 typedef struct _CFStringStackInfo
{
649 CFIndex capacity
; // Capacity (if capacity == count, need to realloc to add another)
650 CFIndex count
; // Number of elements actually stored
651 CFStringDeferredRange
*stack
;
652 Boolean hasMalloced
; // Indicates "stack" is allocated and needs to be deallocated when done
656 CF_INLINE
void pop (CFStringStackInfo
*si
, CFStringDeferredRange
*topRange
) {
657 si
->count
= si
->count
- 1;
658 *topRange
= si
->stack
[si
->count
];
661 CF_INLINE
void push (CFStringStackInfo
*si
, const CFStringDeferredRange
*newRange
) {
662 if (si
->count
== si
->capacity
) {
663 // increase size of the stack
664 si
->capacity
= (si
->capacity
+ 4) * 2;
665 if (si
->hasMalloced
) {
666 si
->stack
= (CFStringDeferredRange
*)CFAllocatorReallocate(kCFAllocatorSystemDefault
, si
->stack
, si
->capacity
* sizeof(CFStringDeferredRange
), 0);
668 CFStringDeferredRange
*newStack
= (CFStringDeferredRange
*)CFAllocatorAllocate(kCFAllocatorSystemDefault
, si
->capacity
* sizeof(CFStringDeferredRange
), 0);
669 memmove(newStack
, si
->stack
, si
->count
* sizeof(CFStringDeferredRange
));
670 si
->stack
= newStack
;
671 si
->hasMalloced
= true;
674 si
->stack
[si
->count
] = *newRange
;
675 si
->count
= si
->count
+ 1;
678 static void rearrangeBlocks(
682 const CFRange
*ranges
,
684 CFIndex insertLength
) {
686 #define origStackSize 10
687 CFStringDeferredRange origStack
[origStackSize
];
688 CFStringStackInfo si
= {origStackSize
, 0, origStack
, false, {0, 0, 0}};
689 CFStringDeferredRange currentNonRange
= {0, 0, 0};
690 CFIndex currentRange
= 0;
691 CFIndex amountShifted
= 0;
693 // must have at least 1 range left.
695 while (currentRange
< numRanges
) {
696 currentNonRange
.beginning
= (ranges
[currentRange
].location
+ ranges
[currentRange
].length
) * blockSize
;
697 if ((numRanges
- currentRange
) == 1) {
699 currentNonRange
.length
= numBlocks
* blockSize
- currentNonRange
.beginning
;
700 if (currentNonRange
.length
== 0) break;
702 currentNonRange
.length
= (ranges
[currentRange
+ 1].location
* blockSize
) - currentNonRange
.beginning
;
704 currentNonRange
.shift
= amountShifted
+ (insertLength
* blockSize
) - (ranges
[currentRange
].length
* blockSize
);
705 amountShifted
= currentNonRange
.shift
;
706 if (amountShifted
<= 0) {
707 // process current item and rest of stack
708 if (currentNonRange
.shift
&& currentNonRange
.length
) memmove (&buffer
[currentNonRange
.beginning
+ currentNonRange
.shift
], &buffer
[currentNonRange
.beginning
], currentNonRange
.length
);
709 while (si
.count
> 0) {
710 pop (&si
, ¤tNonRange
); // currentNonRange now equals the top element of the stack.
711 if (currentNonRange
.shift
&& currentNonRange
.length
) memmove (&buffer
[currentNonRange
.beginning
+ currentNonRange
.shift
], &buffer
[currentNonRange
.beginning
], currentNonRange
.length
);
714 // add currentNonRange to stack.
715 push (&si
, ¤tNonRange
);
720 // no more ranges. if anything is on the stack, process.
722 while (si
.count
> 0) {
723 pop (&si
, ¤tNonRange
); // currentNonRange now equals the top element of the stack.
724 if (currentNonRange
.shift
&& currentNonRange
.length
) memmove (&buffer
[currentNonRange
.beginning
+ currentNonRange
.shift
], &buffer
[currentNonRange
.beginning
], currentNonRange
.length
);
726 if (si
.hasMalloced
) CFAllocatorDeallocate (kCFAllocatorSystemDefault
, si
.stack
);
729 /* See comments for rearrangeBlocks(); this is the same, but the string is assembled in another buffer (dstBuffer), so the algorithm is much easier. We also take care of the case where the source is not-Unicode but destination is. (The reverse case is not supported.)
731 static void copyBlocks(
732 const uint8_t *srcBuffer
,
735 Boolean srcIsUnicode
,
736 Boolean dstIsUnicode
,
737 const CFRange
*ranges
,
739 CFIndex insertLength
) {
741 CFIndex srcLocationInBytes
= 0; // in order to avoid multiplying all the time, this is in terms of bytes, not blocks
742 CFIndex dstLocationInBytes
= 0; // ditto
743 CFIndex srcBlockSize
= srcIsUnicode
? sizeof(UniChar
) : sizeof(uint8_t);
744 CFIndex insertLengthInBytes
= insertLength
* (dstIsUnicode
? sizeof(UniChar
) : sizeof(uint8_t));
745 CFIndex rangeIndex
= 0;
746 CFIndex srcToDstMultiplier
= (srcIsUnicode
== dstIsUnicode
) ? 1 : (sizeof(UniChar
) / sizeof(uint8_t));
748 // Loop over the ranges, copying the range to be preserved (right before each range)
749 while (rangeIndex
< numRanges
) {
750 CFIndex srcLengthInBytes
= ranges
[rangeIndex
].location
* srcBlockSize
- srcLocationInBytes
; // srcLengthInBytes is in terms of bytes, not blocks; represents length of region to be preserved
751 if (srcLengthInBytes
> 0) {
752 if (srcIsUnicode
== dstIsUnicode
) {
753 memmove(dstBuffer
+ dstLocationInBytes
, srcBuffer
+ srcLocationInBytes
, srcLengthInBytes
);
755 __CFStrConvertBytesToUnicode(srcBuffer
+ srcLocationInBytes
, (UniChar
*)(dstBuffer
+ dstLocationInBytes
), srcLengthInBytes
);
758 srcLocationInBytes
+= srcLengthInBytes
+ ranges
[rangeIndex
].length
* srcBlockSize
; // Skip over the just-copied and to-be-deleted stuff
759 dstLocationInBytes
+= srcLengthInBytes
* srcToDstMultiplier
+ insertLengthInBytes
;
763 // Do last range (the one beyond last range)
764 if (srcLocationInBytes
< srcLength
* srcBlockSize
) {
765 if (srcIsUnicode
== dstIsUnicode
) {
766 memmove(dstBuffer
+ dstLocationInBytes
, srcBuffer
+ srcLocationInBytes
, srcLength
* srcBlockSize
- srcLocationInBytes
);
768 __CFStrConvertBytesToUnicode(srcBuffer
+ srcLocationInBytes
, (UniChar
*)(dstBuffer
+ dstLocationInBytes
), srcLength
* srcBlockSize
- srcLocationInBytes
);
773 /* Call the callback; if it doesn't exist or returns false, then log
775 static void __CFStringHandleOutOfMemory(CFTypeRef obj
) {
776 CFStringRef msg
= CFSTR("Out of memory. We suggest restarting the application. If you have an unsaved document, create a backup copy in Finder, then try to save.");
778 CFLog(kCFLogLevelCritical
, CFSTR("%@"), msg
);
782 /* Reallocates the backing store of the string to accomodate the new length. Space is reserved or characters are deleted as indicated by insertLength and the ranges in deleteRanges. The length is updated to reflect the new state. Will also maintain a length byte and a null byte in 8-bit strings. If length cannot fit in length byte, the space will still be reserved, but will be 0. (Hence the reason the length byte should never be looked at as length unless there is no explicit length.)
784 static void __CFStringChangeSizeMultiple(CFMutableStringRef str
, const CFRange
*deleteRanges
, CFIndex numDeleteRanges
, CFIndex insertLength
, Boolean makeUnicode
) {
785 const uint8_t *curContents
= (uint8_t *)__CFStrContents(str
);
786 CFIndex curLength
= curContents
? __CFStrLength2(str
, curContents
) : 0;
787 unsigned long newLength
; // We use unsigned to better keep track of overflow
789 // Compute new length of the string
790 if (numDeleteRanges
== 1) {
791 newLength
= curLength
+ insertLength
- deleteRanges
[0].length
;
794 newLength
= curLength
+ insertLength
* numDeleteRanges
;
795 for (cnt
= 0; cnt
< numDeleteRanges
; cnt
++) newLength
-= deleteRanges
[cnt
].length
;
798 __CFAssertIfFixedLengthIsOK(str
, newLength
);
800 if (newLength
== 0) {
801 // An somewhat optimized code-path for this special case, with the following implicit values:
802 // newIsUnicode = false
803 // useLengthAndNullBytes = false
804 // newCharSize = sizeof(uint8_t)
805 // If the newCapacity happens to be the same as the old, we don't free the buffer; otherwise we just free it totally
806 // instead of doing a potentially useless reallocation (as the needed capacity later might turn out to be different anyway)
807 CFIndex curCapacity
= __CFStrCapacity(str
);
808 CFIndex newCapacity
= __CFStrNewCapacity(str
, 0, curCapacity
, true, sizeof(uint8_t));
809 if (newCapacity
!= curCapacity
) { // If we're reallocing anyway (larger or smaller --- larger could happen if desired capacity was changed in the meantime), let's just free it all
810 if (curContents
) __CFStrDeallocateMutableContents(str
, (uint8_t *)curContents
);
811 __CFStrSetContentPtr(str
, NULL
);
812 __CFStrSetCapacity(str
, 0);
813 __CFStrClearCapacityProvidedExternally(str
);
814 __CFStrClearHasLengthAndNullBytes(str
);
815 if (!__CFStrIsExternalMutable(str
)) __CFStrClearUnicode(str
); // External mutable implies Unicode
817 if (!__CFStrIsExternalMutable(str
)) {
818 __CFStrClearUnicode(str
);
819 if (curCapacity
>= (int)(sizeof(uint8_t) * 2)) { // If there's room
820 __CFStrSetHasLengthAndNullBytes(str
);
821 ((uint8_t *)curContents
)[0] = ((uint8_t *)curContents
)[1] = 0;
823 __CFStrClearHasLengthAndNullBytes(str
);
827 __CFStrSetExplicitLength(str
, 0);
828 } else { /* This else-clause assumes newLength > 0 */
829 Boolean oldIsUnicode
= __CFStrIsUnicode(str
);
830 Boolean newIsUnicode
= makeUnicode
|| (oldIsUnicode
/* && (newLength > 0) - implicit */ ) || __CFStrIsExternalMutable(str
);
831 CFIndex newCharSize
= newIsUnicode
? sizeof(UniChar
) : sizeof(uint8_t);
832 Boolean useLengthAndNullBytes
= !newIsUnicode
/* && (newLength > 0) - implicit */;
833 CFIndex numExtraBytes
= useLengthAndNullBytes
? 2 : 0; /* 2 extra bytes to keep the length byte & null... */
834 CFIndex curCapacity
= __CFStrCapacity(str
);
835 if (newLength
> (LONG_MAX
- numExtraBytes
) / newCharSize
) __CFStringHandleOutOfMemory(str
); // Does not return
836 CFIndex newCapacity
= __CFStrNewCapacity(str
, newLength
* newCharSize
+ numExtraBytes
, curCapacity
, true, newCharSize
);
837 if (newCapacity
== -1) __CFStringHandleOutOfMemory(str
); // Does not return
838 Boolean allocNewBuffer
= (newCapacity
!= curCapacity
) || (curLength
> 0 && !oldIsUnicode
&& newIsUnicode
); /* We alloc new buffer if oldIsUnicode != newIsUnicode because the contents have to be copied */
839 uint8_t *newContents
;
840 if (allocNewBuffer
) {
841 newContents
= (uint8_t *)__CFStrAllocateMutableContents(str
, newCapacity
);
842 if (!newContents
) { // Try allocating without extra room
843 newCapacity
= __CFStrNewCapacity(str
, newLength
* newCharSize
+ numExtraBytes
, curCapacity
, false, newCharSize
);
844 // Since we checked for this above, it shouldn't be the case here, but just in case
845 if (newCapacity
== -1) __CFStringHandleOutOfMemory(str
); // Does not return
846 newContents
= (uint8_t *)__CFStrAllocateMutableContents(str
, newCapacity
);
847 if (!newContents
) __CFStringHandleOutOfMemory(str
); // Does not return
850 newContents
= (uint8_t *)curContents
;
853 Boolean hasLengthAndNullBytes
= __CFStrHasLengthByte(str
);
855 CFAssert1(hasLengthAndNullBytes
== __CFStrHasNullByte(str
), __kCFLogAssertion
, "%s(): Invalid state in 8-bit string", __PRETTY_FUNCTION__
);
857 if (hasLengthAndNullBytes
) curContents
++;
858 if (useLengthAndNullBytes
) newContents
++;
861 if (oldIsUnicode
== newIsUnicode
) {
862 if (newContents
== curContents
) {
863 rearrangeBlocks(newContents
, curLength
, newCharSize
, deleteRanges
, numDeleteRanges
, insertLength
);
865 copyBlocks(curContents
, newContents
, curLength
, oldIsUnicode
, newIsUnicode
, deleteRanges
, numDeleteRanges
, insertLength
);
867 } else if (newIsUnicode
) { /* this implies we have a new buffer */
868 copyBlocks(curContents
, newContents
, curLength
, oldIsUnicode
, newIsUnicode
, deleteRanges
, numDeleteRanges
, insertLength
);
870 if (hasLengthAndNullBytes
) curContents
--; /* Undo the damage from above */
871 if (allocNewBuffer
&& __CFStrFreeContentsWhenDone(str
)) __CFStrDeallocateMutableContents(str
, (void *)curContents
);
875 if (useLengthAndNullBytes
) {
876 newContents
[newLength
] = 0; /* Always have null byte, if not unicode */
877 newContents
--; /* Undo the damage from above */
878 newContents
[0] = __CFCanUseLengthByte(newLength
) ? (uint8_t)newLength
: 0;
879 if (!hasLengthAndNullBytes
) __CFStrSetHasLengthAndNullBytes(str
);
881 if (hasLengthAndNullBytes
) __CFStrClearHasLengthAndNullBytes(str
);
883 if (oldIsUnicode
) __CFStrClearUnicode(str
);
884 } else { // New is unicode...
885 if (!oldIsUnicode
) __CFStrSetUnicode(str
);
886 if (hasLengthAndNullBytes
) __CFStrClearHasLengthAndNullBytes(str
);
888 __CFStrSetExplicitLength(str
, newLength
);
890 if (allocNewBuffer
) {
891 __CFStrSetCapacity(str
, newCapacity
);
892 __CFStrClearCapacityProvidedExternally(str
);
893 __CFStrSetContentPtr(str
, newContents
);
898 /* Same as above, but takes one range (very common case)
900 CF_INLINE
void __CFStringChangeSize(CFMutableStringRef str
, CFRange range
, CFIndex insertLength
, Boolean makeUnicode
) {
901 __CFStringChangeSizeMultiple(str
, &range
, 1, insertLength
, makeUnicode
);
906 static Boolean
__CFStrIsConstantString(CFStringRef str
);
909 static void __CFStringDeallocate(CFTypeRef cf
) {
910 CFStringRef str
= (CFStringRef
)cf
;
912 // If in DEBUG mode, check to see if the string a CFSTR, and complain.
913 CFAssert1(__CFConstantStringTableBeingFreed
|| !__CFStrIsConstantString((CFStringRef
)cf
), __kCFLogAssertion
, "Tried to deallocate CFSTR(\"%@\")", str
);
915 if (!__CFStrIsInline(str
)) {
917 Boolean isMutable
= __CFStrIsMutable(str
);
918 if (__CFStrFreeContentsWhenDone(str
) && (contents
= (uint8_t *)__CFStrContents(str
))) {
920 __CFStrDeallocateMutableContents((CFMutableStringRef
)str
, contents
);
922 if (__CFStrHasContentsDeallocator(str
)) {
923 CFAllocatorRef allocator
= __CFStrContentsDeallocator(str
);
924 CFAllocatorDeallocate(allocator
, contents
);
925 if (!(kCFAllocatorSystemDefaultGCRefZero
== allocator
|| kCFAllocatorDefaultGCRefZero
== allocator
)) CFRelease(allocator
);
927 CFAllocatorRef alloc
= __CFGetAllocator(str
);
928 CFAllocatorDeallocate(alloc
, contents
);
932 if (isMutable
&& __CFStrHasContentsAllocator(str
)) {
933 CFAllocatorRef allocator
= __CFStrContentsAllocator((CFMutableStringRef
)str
);
934 if (!(kCFAllocatorSystemDefaultGCRefZero
== allocator
|| kCFAllocatorDefaultGCRefZero
== allocator
)) CFRelease(allocator
);
939 static Boolean
__CFStringEqual(CFTypeRef cf1
, CFTypeRef cf2
) {
940 CFStringRef str1
= (CFStringRef
)cf1
;
941 CFStringRef str2
= (CFStringRef
)cf2
;
942 const uint8_t *contents1
;
943 const uint8_t *contents2
;
946 /* !!! We do not need IsString assertions, as the CFBase runtime assures this */
947 /* !!! We do not need == test, as the CFBase runtime assures this */
949 contents1
= (uint8_t *)__CFStrContents(str1
);
950 contents2
= (uint8_t *)__CFStrContents(str2
);
951 len1
= __CFStrLength2(str1
, contents1
);
953 if (len1
!= __CFStrLength2(str2
, contents2
)) return false;
955 contents1
+= __CFStrSkipAnyLengthByte(str1
);
956 contents2
+= __CFStrSkipAnyLengthByte(str2
);
958 if (__CFStrIsEightBit(str1
) && __CFStrIsEightBit(str2
)) {
959 return memcmp((const char *)contents1
, (const char *)contents2
, len1
) ? false : true;
960 } else if (__CFStrIsEightBit(str1
)) { /* One string has Unicode contents */
961 CFStringInlineBuffer buf
;
964 CFStringInitInlineBuffer(str1
, &buf
, CFRangeMake(0, len1
));
965 for (buf_idx
= 0; buf_idx
< len1
; buf_idx
++) {
966 if (__CFStringGetCharacterFromInlineBufferQuick(&buf
, buf_idx
) != ((UniChar
*)contents2
)[buf_idx
]) return false;
968 } else if (__CFStrIsEightBit(str2
)) { /* One string has Unicode contents */
969 CFStringInlineBuffer buf
;
972 CFStringInitInlineBuffer(str2
, &buf
, CFRangeMake(0, len1
));
973 for (buf_idx
= 0; buf_idx
< len1
; buf_idx
++) {
974 if (__CFStringGetCharacterFromInlineBufferQuick(&buf
, buf_idx
) != ((UniChar
*)contents1
)[buf_idx
]) return false;
976 } else { /* Both strings have Unicode contents */
978 for (idx
= 0; idx
< len1
; idx
++) {
979 if (((UniChar
*)contents1
)[idx
] != ((UniChar
*)contents2
)[idx
]) return false;
986 /* String hashing: Should give the same results whatever the encoding; so we hash UniChars.
987 If the length is less than or equal to 96, then the hash function is simply the
988 following (n is the nth UniChar character, starting from 0):
991 hash(n) = hash(n-1) * 257 + unichar(n);
992 Hash = hash(length-1) * ((length & 31) + 1)
994 If the length is greater than 96, then the above algorithm applies to
995 characters 0..31, (length/2)-16..(length/2)+15, and length-32..length-1, inclusive;
996 thus the first, middle, and last 32 characters.
998 Note that the loops below are unrolled; and: 257^2 = 66049; 257^3 = 16974593; 257^4 = 4362470401; 67503105 is 257^4 - 256^4
999 If hashcode is changed from UInt32 to something else, this last piece needs to be readjusted.
1000 !!! We haven't updated for LP64 yet
1002 NOTE: The hash algorithm used to be duplicated in CF and Foundation; but now it should only be in the four functions below.
1004 Hash function was changed between Panther and Tiger, and Tiger and Leopard.
1006 #define HashEverythingLimit 96
1008 #define HashNextFourUniChars(accessStart, accessEnd, pointer) \
1009 {result = result * 67503105 + (accessStart 0 accessEnd) * 16974593 + (accessStart 1 accessEnd) * 66049 + (accessStart 2 accessEnd) * 257 + (accessStart 3 accessEnd); pointer += 4;}
1011 #define HashNextUniChar(accessStart, accessEnd, pointer) \
1012 {result = result * 257 + (accessStart 0 accessEnd); pointer++;}
1015 /* In this function, actualLen is the length of the original string; but len is the number of characters in buffer. The buffer is expected to contain the parts of the string relevant to hashing.
1017 CF_INLINE CFHashCode
__CFStrHashCharacters(const UniChar
*uContents
, CFIndex len
, CFIndex actualLen
) {
1018 CFHashCode result
= actualLen
;
1019 if (len
<= HashEverythingLimit
) {
1020 const UniChar
*end4
= uContents
+ (len
& ~3);
1021 const UniChar
*end
= uContents
+ len
;
1022 while (uContents
< end4
) HashNextFourUniChars(uContents
[, ], uContents
); // First count in fours
1023 while (uContents
< end
) HashNextUniChar(uContents
[, ], uContents
); // Then for the last <4 chars, count in ones...
1025 const UniChar
*contents
, *end
;
1026 contents
= uContents
;
1027 end
= contents
+ 32;
1028 while (contents
< end
) HashNextFourUniChars(contents
[, ], contents
);
1029 contents
= uContents
+ (len
>> 1) - 16;
1030 end
= contents
+ 32;
1031 while (contents
< end
) HashNextFourUniChars(contents
[, ], contents
);
1032 end
= uContents
+ len
;
1033 contents
= end
- 32;
1034 while (contents
< end
) HashNextFourUniChars(contents
[, ], contents
);
1036 return result
+ (result
<< (actualLen
& 31));
1039 /* This hashes cString in the eight bit string encoding. It also includes the little debug-time sanity check.
1041 CF_INLINE CFHashCode
__CFStrHashEightBit(const uint8_t *cContents
, CFIndex len
) {
1043 if (!__CFCharToUniCharFunc
) { // A little sanity verification: If this is not set, trying to hash high byte chars would be a bad idea
1045 Boolean err
= false;
1046 if (len
<= HashEverythingLimit
) {
1047 for (cnt
= 0; cnt
< len
; cnt
++) if (cContents
[cnt
] >= 128) err
= true;
1049 for (cnt
= 0; cnt
< 32; cnt
++) if (cContents
[cnt
] >= 128) err
= true;
1050 for (cnt
= (len
>> 1) - 16; cnt
< (len
>> 1) + 16; cnt
++) if (cContents
[cnt
] >= 128) err
= true;
1051 for (cnt
= (len
- 32); cnt
< len
; cnt
++) if (cContents
[cnt
] >= 128) err
= true;
1054 // Can't do log here, as it might be too early
1055 fprintf(stderr
, "Warning: CFHash() attempting to hash CFString containing high bytes before properly initialized to do so\n");
1059 CFHashCode result
= len
;
1060 if (len
<= HashEverythingLimit
) {
1061 const uint8_t *end4
= cContents
+ (len
& ~3);
1062 const uint8_t *end
= cContents
+ len
;
1063 while (cContents
< end4
) HashNextFourUniChars(__CFCharToUniCharTable
[cContents
[, ]], cContents
); // First count in fours
1064 while (cContents
< end
) HashNextUniChar(__CFCharToUniCharTable
[cContents
[, ]], cContents
); // Then for the last <4 chars, count in ones...
1066 const uint8_t *contents
, *end
;
1067 contents
= cContents
;
1068 end
= contents
+ 32;
1069 while (contents
< end
) HashNextFourUniChars(__CFCharToUniCharTable
[contents
[, ]], contents
);
1070 contents
= cContents
+ (len
>> 1) - 16;
1071 end
= contents
+ 32;
1072 while (contents
< end
) HashNextFourUniChars(__CFCharToUniCharTable
[contents
[, ]], contents
);
1073 end
= cContents
+ len
;
1074 contents
= end
- 32;
1075 while (contents
< end
) HashNextFourUniChars(__CFCharToUniCharTable
[contents
[, ]], contents
);
1077 return result
+ (result
<< (len
& 31));
1080 CFHashCode
CFStringHashISOLatin1CString(const uint8_t *bytes
, CFIndex len
) {
1081 CFHashCode result
= len
;
1082 if (len
<= HashEverythingLimit
) {
1083 const uint8_t *end4
= bytes
+ (len
& ~3);
1084 const uint8_t *end
= bytes
+ len
;
1085 while (bytes
< end4
) HashNextFourUniChars(bytes
[, ], bytes
); // First count in fours
1086 while (bytes
< end
) HashNextUniChar(bytes
[, ], bytes
); // Then for the last <4 chars, count in ones...
1088 const uint8_t *contents
, *end
;
1090 end
= contents
+ 32;
1091 while (contents
< end
) HashNextFourUniChars(contents
[, ], contents
);
1092 contents
= bytes
+ (len
>> 1) - 16;
1093 end
= contents
+ 32;
1094 while (contents
< end
) HashNextFourUniChars(contents
[, ], contents
);
1096 contents
= end
- 32;
1097 while (contents
< end
) HashNextFourUniChars(contents
[, ], contents
);
1099 return result
+ (result
<< (len
& 31));
1102 CFHashCode
CFStringHashCString(const uint8_t *bytes
, CFIndex len
) {
1103 return __CFStrHashEightBit(bytes
, len
);
1106 CFHashCode
CFStringHashCharacters(const UniChar
*characters
, CFIndex len
) {
1107 return __CFStrHashCharacters(characters
, len
, len
);
1110 /* This is meant to be called from NSString or subclassers only. It is an error for this to be called without the ObjC runtime or an argument which is not an NSString or subclass. It can be called with NSCFString, although that would be inefficient (causing indirection) and won't normally happen anyway, as NSCFString overrides hash.
1112 CFHashCode
CFStringHashNSString(CFStringRef str
) {
1113 UniChar buffer
[HashEverythingLimit
];
1114 CFIndex bufLen
; // Number of characters in the buffer for hashing
1115 CFIndex len
= 0; // Actual length of the string
1117 CF_OBJC_CALL0(CFIndex
, len
, str
, "length");
1118 if (len
<= HashEverythingLimit
) {
1119 CF_OBJC_VOIDCALL2(str
, "getCharacters:range:", buffer
, CFRangeMake(0, len
));
1122 CF_OBJC_VOIDCALL2(str
, "getCharacters:range:", buffer
, CFRangeMake(0, 32));
1123 CF_OBJC_VOIDCALL2(str
, "getCharacters:range:", buffer
+32, CFRangeMake((len
>> 1) - 16, 32));
1124 CF_OBJC_VOIDCALL2(str
, "getCharacters:range:", buffer
+64, CFRangeMake(len
- 32, 32));
1125 bufLen
= HashEverythingLimit
;
1127 return __CFStrHashCharacters(buffer
, bufLen
, len
);
1130 CFHashCode
__CFStringHash(CFTypeRef cf
) {
1131 /* !!! We do not need an IsString assertion here, as this is called by the CFBase runtime only */
1132 CFStringRef str
= (CFStringRef
)cf
;
1133 const uint8_t *contents
= (uint8_t *)__CFStrContents(str
);
1134 CFIndex len
= __CFStrLength2(str
, contents
);
1136 if (__CFStrIsEightBit(str
)) {
1137 contents
+= __CFStrSkipAnyLengthByte(str
);
1138 return __CFStrHashEightBit(contents
, len
);
1140 return __CFStrHashCharacters((const UniChar
*)contents
, len
, len
);
1145 static CFStringRef
__CFStringCopyDescription(CFTypeRef cf
) {
1146 return CFStringCreateWithFormat(kCFAllocatorSystemDefault
, NULL
, CFSTR("<CFString %p [%p]>{contents = \"%@\"}"), cf
, __CFGetAllocator(cf
), cf
);
1149 static CFStringRef
__CFStringCopyFormattingDescription(CFTypeRef cf
, CFDictionaryRef formatOptions
) {
1150 return (CFStringRef
)CFStringCreateCopy(__CFGetAllocator(cf
), (CFStringRef
)cf
);
1153 static CFTypeID __kCFStringTypeID
= _kCFRuntimeNotATypeID
;
1155 typedef CFTypeRef (*CF_STRING_CREATE_COPY
)(CFAllocatorRef alloc
, CFTypeRef theString
);
1157 static const CFRuntimeClass __CFStringClass
= {
1158 _kCFRuntimeScannedObject
,
1161 (CF_STRING_CREATE_COPY
)CFStringCreateCopy
,
1162 __CFStringDeallocate
,
1165 __CFStringCopyFormattingDescription
,
1166 __CFStringCopyDescription
1169 __private_extern__
void __CFStringInitialize(void) {
1170 __kCFStringTypeID
= _CFRuntimeRegisterClass(&__CFStringClass
);
1173 CFTypeID
CFStringGetTypeID(void) {
1174 return __kCFStringTypeID
;
1178 static Boolean
CFStrIsUnicode(CFStringRef str
) {
1179 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID
, Boolean
, str
, "_encodingCantBeStoredInEightBitCFString");
1180 return __CFStrIsUnicode(str
);
1185 #define ALLOCATORSFREEFUNC ((CFAllocatorRef)-1)
1187 /* contentsDeallocator indicates how to free the data if it's noCopy == true:
1188 kCFAllocatorNull: don't free
1189 ALLOCATORSFREEFUNC: free with main allocator's free func (don't pass in the real func ptr here)
1190 NULL: default allocator
1191 otherwise it's the allocator that should be used (it will be explicitly stored)
1192 if noCopy == false, then freeFunc should be ALLOCATORSFREEFUNC
1193 hasLengthByte, hasNullByte: refers to bytes; used only if encoding != Unicode
1194 possiblyExternalFormat indicates that the bytes might have BOM and be swapped
1195 tryToReduceUnicode means that the Unicode should be checked to see if it contains just ASCII (and reduce it if so)
1196 numBytes contains the actual number of bytes in "bytes", including Length byte,
1197 BUT not the NULL byte at the end
1198 bytes should not contain BOM characters
1199 !!! Various flags should be combined to reduce number of arguments, if possible
1201 __private_extern__ CFStringRef
__CFStringCreateImmutableFunnel3(
1202 CFAllocatorRef alloc
, const void *bytes
, CFIndex numBytes
, CFStringEncoding encoding
,
1203 Boolean possiblyExternalFormat
, Boolean tryToReduceUnicode
, Boolean hasLengthByte
, Boolean hasNullByte
, Boolean noCopy
,
1204 CFAllocatorRef contentsDeallocator
, UInt32 converterFlags
) {
1206 CFMutableStringRef str
;
1207 CFVarWidthCharBuffer vBuf
;
1209 Boolean useLengthByte
= false;
1210 Boolean useNullByte
= false;
1211 Boolean useInlineData
= false;
1213 #if INSTRUMENT_SHARED_STRINGS
1214 const char *recordedEncoding
;
1215 char encodingBuffer
[128];
1216 if (encoding
== kCFStringEncodingUnicode
) recordedEncoding
= "Unicode";
1217 else if (encoding
== kCFStringEncodingASCII
) recordedEncoding
= "ASCII";
1218 else if (encoding
== kCFStringEncodingUTF8
) recordedEncoding
= "UTF8";
1219 else if (encoding
== kCFStringEncodingMacRoman
) recordedEncoding
= "MacRoman";
1221 snprintf(encodingBuffer
, sizeof(encodingBuffer
), "0x%lX", (unsigned long)encoding
);
1222 recordedEncoding
= encodingBuffer
;
1226 if (alloc
== NULL
) alloc
= __CFGetDefaultAllocator();
1228 if (contentsDeallocator
== ALLOCATORSFREEFUNC
) {
1229 contentsDeallocator
= alloc
;
1230 } else if (contentsDeallocator
== NULL
) {
1231 contentsDeallocator
= __CFGetDefaultAllocator();
1234 if ((NULL
!= kCFEmptyString
) && (numBytes
== 0) && _CFAllocatorIsSystemDefault(alloc
)) { // If we are using the system default allocator, and the string is empty, then use the empty string!
1235 if (noCopy
&& (contentsDeallocator
!= kCFAllocatorNull
)) { // See 2365208... This change was done after Sonata; before we didn't free the bytes at all (leak).
1236 CFAllocatorDeallocate(contentsDeallocator
, (void *)bytes
);
1238 return (CFStringRef
)CFRetain(kCFEmptyString
); // Quick exit; won't catch all empty strings, but most
1241 // At this point, contentsDeallocator is either same as alloc, or kCFAllocatorNull, or something else, but not NULL
1243 vBuf
.shouldFreeChars
= false; // We use this to remember to free the buffer possibly allocated by decode
1245 // Record whether we're starting out with an ASCII-superset string, because we need to know this later for the string ROM; this may get changed later if we successfully convert down from Unicode. We only record this once because __CFCanUseEightBitCFStringForBytes() can be expensive.
1246 Boolean stringSupportsEightBitCFRepresentation
= encoding
!= kCFStringEncodingUnicode
&& __CFCanUseEightBitCFStringForBytes((const uint8_t *)bytes
, numBytes
, encoding
);
1248 // We may also change noCopy within this function if we have to decode the string into an external buffer. We do not want to avoid the use of the string ROM merely because we tried to be efficient and reuse the decoded buffer for the CFString's external storage. Therefore, we use this variable to track whether we actually can ignore the noCopy flag (which may or may not be set anyways).
1249 Boolean stringROMShouldIgnoreNoCopy
= false;
1251 // First check to see if the data needs to be converted...
1252 // ??? We could be more efficient here and in some cases (Unicode data) eliminate a copy
1254 if ((encoding
== kCFStringEncodingUnicode
&& possiblyExternalFormat
) || encoding
!= kCFStringEncodingUnicode
&& ! stringSupportsEightBitCFRepresentation
) {
1255 const void *realBytes
= (uint8_t *) bytes
+ (hasLengthByte
? 1 : 0);
1256 CFIndex realNumBytes
= numBytes
- (hasLengthByte
? 1 : 0);
1257 Boolean usingPassedInMemory
= false;
1259 vBuf
.allocator
= kCFAllocatorSystemDefault
; // We don't want to use client's allocator for temp stuff
1260 vBuf
.chars
.unicode
= NULL
; // This will cause the decode function to allocate memory if necessary
1262 if (!__CFStringDecodeByteStream3((const uint8_t *)realBytes
, realNumBytes
, encoding
, false, &vBuf
, &usingPassedInMemory
, converterFlags
)) {
1263 // Note that if the string can't be created, we don't free the buffer, even if there is a contents deallocator. This is on purpose.
1267 encoding
= vBuf
.isASCII
? kCFStringEncodingASCII
: kCFStringEncodingUnicode
;
1269 // Update our flag according to whether the decoded buffer is ASCII
1270 stringSupportsEightBitCFRepresentation
= vBuf
.isASCII
;
1272 if (!usingPassedInMemory
) {
1274 // Because __CFStringDecodeByteStream3() allocated our buffer, it's OK for us to free it if we can get the string from the ROM.
1275 stringROMShouldIgnoreNoCopy
= true;
1277 // Make the parameters fit the new situation
1278 numBytes
= vBuf
.isASCII
? vBuf
.numChars
: (vBuf
.numChars
* sizeof(UniChar
));
1279 hasLengthByte
= hasNullByte
= false;
1281 // Get rid of the original buffer if its not being used
1282 if (noCopy
&& (contentsDeallocator
!= kCFAllocatorNull
)) {
1283 CFAllocatorDeallocate(contentsDeallocator
, (void *)bytes
);
1285 contentsDeallocator
= alloc
; // At this point we are using the string's allocator, as the original buffer is gone...
1287 // See if we can reuse any storage the decode func might have allocated
1288 // We do this only for Unicode, as otherwise we would not have NULL and Length bytes
1290 if (vBuf
.shouldFreeChars
&& (alloc
== vBuf
.allocator
) && encoding
== kCFStringEncodingUnicode
) {
1291 vBuf
.shouldFreeChars
= false; // Transferring ownership to the CFString
1292 bytes
= CFAllocatorReallocate(vBuf
.allocator
, (void *)vBuf
.chars
.unicode
, numBytes
, 0); // Tighten up the storage
1294 #if INSTRUMENT_SHARED_STRINGS
1295 if (encoding
== kCFStringEncodingASCII
) recordedEncoding
= "ForeignASCII-NoCopy";
1296 else recordedEncoding
= "ForeignUnicode-NoCopy";
1299 #if INSTRUMENT_SHARED_STRINGS
1300 if (encoding
== kCFStringEncodingASCII
) recordedEncoding
= "ForeignASCII-Copy";
1301 else recordedEncoding
= "ForeignUnicode-Copy";
1303 bytes
= vBuf
.chars
.unicode
;
1304 noCopy
= false; // Can't do noCopy anymore
1305 // If vBuf.shouldFreeChars is true, the buffer will be freed as intended near the end of this func
1310 // At this point, all necessary input arguments have been changed to reflect the new state
1312 } else if (encoding
== kCFStringEncodingUnicode
&& tryToReduceUnicode
) { // Check to see if we can reduce Unicode to ASCII
1314 CFIndex len
= numBytes
/ sizeof(UniChar
);
1315 Boolean allASCII
= true;
1317 for (cnt
= 0; cnt
< len
; cnt
++) if (((const UniChar
*)bytes
)[cnt
] > 127) {
1322 if (allASCII
) { // Yes we can!
1324 Boolean newHasLengthByte
= __CFCanUseLengthByte(len
);
1325 numBytes
= (len
+ 1 + (newHasLengthByte
? 1 : 0)) * sizeof(uint8_t); // NULL and possible length byte
1326 // See if we can use that temporary local buffer in vBuf...
1327 if (numBytes
>= __kCFVarWidthLocalBufferSize
) {
1328 mem
= ptr
= (uint8_t *)CFAllocatorAllocate(alloc
, numBytes
, 0);
1329 if (__CFOASafe
) __CFSetLastAllocationEventName(mem
, "CFString (store)");
1331 mem
= ptr
= (uint8_t *)(vBuf
.localBuffer
);
1333 if (mem
) { // If we can't allocate memory for some reason, use what we had (that is, as if we didn't have all ASCII)
1334 // Copy the Unicode bytes into the new ASCII buffer
1335 hasLengthByte
= newHasLengthByte
;
1337 if (hasLengthByte
) *ptr
++ = (uint8_t)len
;
1338 for (cnt
= 0; cnt
< len
; cnt
++) ptr
[cnt
] = (uint8_t)(((const UniChar
*)bytes
)[cnt
]);
1340 if (noCopy
&& (contentsDeallocator
!= kCFAllocatorNull
)) {
1341 CFAllocatorDeallocate(contentsDeallocator
, (void *)bytes
);
1343 // Now make everything look like we had an ASCII buffer to start with
1345 encoding
= kCFStringEncodingASCII
;
1346 contentsDeallocator
= alloc
; // At this point we are using the string's allocator, as the original buffer is gone...
1347 noCopy
= (numBytes
>= __kCFVarWidthLocalBufferSize
); // If we had to allocate it, make sure it's kept around
1348 numBytes
--; // Should not contain the NULL byte at end...
1349 stringSupportsEightBitCFRepresentation
= true; // We're ASCII now!
1350 stringROMShouldIgnoreNoCopy
= true; // We allocated this buffer, so we should feel free to get rid of it if we can use the string ROM
1351 #if INSTRUMENT_SHARED_STRINGS
1352 recordedEncoding
= "U->A";
1357 // At this point, all necessary input arguments have been changed to reflect the new state
1360 // Now determine the necessary size
1361 #if INSTRUMENT_SHARED_STRINGS || USE_STRING_ROM
1362 Boolean stringSupportsROM
= stringSupportsEightBitCFRepresentation
;
1365 #if INSTRUMENT_SHARED_STRINGS
1366 if (stringSupportsROM
) {
1367 const void *realBytes
= (uint8_t *) bytes
+ (hasLengthByte
? 1 : 0);
1368 CFIndex realNumBytes
= numBytes
- !! hasLengthByte
;
1369 __CFRecordStringAllocationEvent(recordedEncoding
, realBytes
, realNumBytes
);
1373 CFStringRef romResult
= NULL
;
1377 if (stringSupportsROM
) {
1378 // Disable the string ROM if necessary
1379 static char sDisableStringROM
= -1;
1380 if (sDisableStringROM
== -1) sDisableStringROM
= !! __CFgetenv("CFStringDisableROM");
1382 if (sDisableStringROM
== 0) romResult
= _CFSearchStringROM(bytes
+ !! hasLengthByte
, numBytes
- !! hasLengthByte
);
1384 /* if we get a result from our ROM, and noCopy is set, then deallocate the buffer immediately */
1386 if (noCopy
&& (contentsDeallocator
!= kCFAllocatorNull
)) {
1387 CFAllocatorDeallocate(contentsDeallocator
, (void *)bytes
);
1390 /* these don't get used again, but clear them for consistency */
1394 /* set our result to the ROM result which is not really mutable, of course, but that's OK because we don't try to modify it. */
1395 str
= (CFMutableStringRef
)romResult
;
1400 // Now determine the necessary size
1404 size
= sizeof(void *); // Pointer to the buffer
1405 // special GCRefZero allocator usage always needs saving
1406 if (_CFAllocatorIsGCRefZero(contentsDeallocator
) || (contentsDeallocator
!= alloc
&& contentsDeallocator
!= kCFAllocatorNull
)) {
1407 size
+= sizeof(void *); // The contentsDeallocator
1409 if (!hasLengthByte
) size
+= sizeof(CFIndex
); // Explicit length
1410 useLengthByte
= hasLengthByte
;
1411 useNullByte
= hasNullByte
;
1413 } else { // Inline data; reserve space for it
1415 useInlineData
= true;
1418 if (hasLengthByte
|| (encoding
!= kCFStringEncodingUnicode
&& __CFCanUseLengthByte(numBytes
))) {
1419 useLengthByte
= true;
1420 if (!hasLengthByte
) size
+= 1;
1422 size
+= sizeof(CFIndex
); // Explicit length
1424 if (hasNullByte
|| encoding
!= kCFStringEncodingUnicode
) {
1430 #ifdef STRING_SIZE_STATS
1431 // Dump alloced CFString size info every so often
1433 static unsigned sizes
[256] = {0};
1434 int allocedSize
= size
+ sizeof(CFRuntimeBase
);
1435 if (allocedSize
< 255) sizes
[allocedSize
]++; else sizes
[255]++;
1436 if ((++cnt
% 1000) == 0) {
1437 printf ("\nTotal: %d\n", cnt
);
1438 int i
; for (i
= 0; i
< 256; i
++) printf("%03d: %5d%s", i
, sizes
[i
], ((i
% 8) == 7) ? "\n" : " ");
1442 // Finally, allocate!
1444 str
= (CFMutableStringRef
)_CFRuntimeCreateInstance(alloc
, __kCFStringTypeID
, size
, NULL
);
1446 if (__CFOASafe
) __CFSetLastAllocationEventName(str
, "CFString (immutable)");
1448 CFOptionFlags allocBits
= _CFAllocatorIsGCRefZero(contentsDeallocator
) ? __kCFHasContentsDeallocator
: (contentsDeallocator
== alloc
? __kCFNotInlineContentsDefaultFree
: (contentsDeallocator
== kCFAllocatorNull
? __kCFNotInlineContentsNoFree
: __kCFNotInlineContentsCustomFree
));
1449 __CFStrSetInfoBits(str
,
1450 (useInlineData
? __kCFHasInlineContents
: allocBits
) |
1451 ((encoding
== kCFStringEncodingUnicode
) ? __kCFIsUnicode
: 0) |
1452 (useNullByte
? __kCFHasNullByte
: 0) |
1453 (useLengthByte
? __kCFHasLengthByte
: 0));
1455 if (!useLengthByte
) {
1456 CFIndex length
= numBytes
- (hasLengthByte
? 1 : 0);
1457 if (encoding
== kCFStringEncodingUnicode
) length
/= sizeof(UniChar
);
1458 __CFStrSetExplicitLength(str
, length
);
1461 if (useInlineData
) {
1462 uint8_t *contents
= (uint8_t *)__CFStrContents(str
);
1463 if (useLengthByte
&& !hasLengthByte
) *contents
++ = (uint8_t)numBytes
;
1464 memmove(contents
, bytes
, numBytes
);
1465 if (useNullByte
) contents
[numBytes
] = 0;
1467 __CFStrSetContentPtr(str
, bytes
);
1468 if (__CFStrHasContentsDeallocator(str
)) __CFStrSetContentsDeallocator(str
, contentsDeallocator
);
1471 if (noCopy
&& (contentsDeallocator
!= kCFAllocatorNull
)) {
1472 CFAllocatorDeallocate(contentsDeallocator
, (void *)bytes
);
1476 if (vBuf
.shouldFreeChars
) CFAllocatorDeallocate(vBuf
.allocator
, (void *)bytes
);
1481 /* !!! __CFStringCreateImmutableFunnel2() is kept around for compatibility; it should be deprecated
1483 CFStringRef
__CFStringCreateImmutableFunnel2(
1484 CFAllocatorRef alloc
, const void *bytes
, CFIndex numBytes
, CFStringEncoding encoding
,
1485 Boolean possiblyExternalFormat
, Boolean tryToReduceUnicode
, Boolean hasLengthByte
, Boolean hasNullByte
, Boolean noCopy
,
1486 CFAllocatorRef contentsDeallocator
) {
1487 return __CFStringCreateImmutableFunnel3(alloc
, bytes
, numBytes
, encoding
, possiblyExternalFormat
, tryToReduceUnicode
, hasLengthByte
, hasNullByte
, noCopy
, contentsDeallocator
, 0);
1492 CFStringRef
CFStringCreateWithPascalString(CFAllocatorRef alloc
, ConstStringPtr pStr
, CFStringEncoding encoding
) {
1493 CFIndex len
= (CFIndex
)(*(uint8_t *)pStr
);
1494 return __CFStringCreateImmutableFunnel3(alloc
, pStr
, len
+1, encoding
, false, false, true, false, false, ALLOCATORSFREEFUNC
, 0);
1498 CFStringRef
CFStringCreateWithCString(CFAllocatorRef alloc
, const char *cStr
, CFStringEncoding encoding
) {
1499 CFIndex len
= strlen(cStr
);
1500 return __CFStringCreateImmutableFunnel3(alloc
, cStr
, len
, encoding
, false, false, false, true, false, ALLOCATORSFREEFUNC
, 0);
1503 CFStringRef
CFStringCreateWithPascalStringNoCopy(CFAllocatorRef alloc
, ConstStringPtr pStr
, CFStringEncoding encoding
, CFAllocatorRef contentsDeallocator
) {
1504 CFIndex len
= (CFIndex
)(*(uint8_t *)pStr
);
1505 return __CFStringCreateImmutableFunnel3(alloc
, pStr
, len
+1, encoding
, false, false, true, false, true, contentsDeallocator
, 0);
1509 CFStringRef
CFStringCreateWithCStringNoCopy(CFAllocatorRef alloc
, const char *cStr
, CFStringEncoding encoding
, CFAllocatorRef contentsDeallocator
) {
1510 CFIndex len
= strlen(cStr
);
1511 return __CFStringCreateImmutableFunnel3(alloc
, cStr
, len
, encoding
, false, false, false, true, true, contentsDeallocator
, 0);
1515 CFStringRef
CFStringCreateWithCharacters(CFAllocatorRef alloc
, const UniChar
*chars
, CFIndex numChars
) {
1516 return __CFStringCreateImmutableFunnel3(alloc
, chars
, numChars
* sizeof(UniChar
), kCFStringEncodingUnicode
, false, true, false, false, false, ALLOCATORSFREEFUNC
, 0);
1520 CFStringRef
CFStringCreateWithCharactersNoCopy(CFAllocatorRef alloc
, const UniChar
*chars
, CFIndex numChars
, CFAllocatorRef contentsDeallocator
) {
1521 return __CFStringCreateImmutableFunnel3(alloc
, chars
, numChars
* sizeof(UniChar
), kCFStringEncodingUnicode
, false, false, false, false, true, contentsDeallocator
, 0);
1525 CFStringRef
CFStringCreateWithBytes(CFAllocatorRef alloc
, const uint8_t *bytes
, CFIndex numBytes
, CFStringEncoding encoding
, Boolean externalFormat
) {
1526 return __CFStringCreateImmutableFunnel3(alloc
, bytes
, numBytes
, encoding
, externalFormat
, true, false, false, false, ALLOCATORSFREEFUNC
, 0);
1529 CFStringRef
_CFStringCreateWithBytesNoCopy(CFAllocatorRef alloc
, const uint8_t *bytes
, CFIndex numBytes
, CFStringEncoding encoding
, Boolean externalFormat
, CFAllocatorRef contentsDeallocator
) {
1530 return __CFStringCreateImmutableFunnel3(alloc
, bytes
, numBytes
, encoding
, externalFormat
, true, false, false, true, contentsDeallocator
, 0);
1533 CFStringRef
CFStringCreateWithBytesNoCopy(CFAllocatorRef alloc
, const uint8_t *bytes
, CFIndex numBytes
, CFStringEncoding encoding
, Boolean externalFormat
, CFAllocatorRef contentsDeallocator
) {
1534 return __CFStringCreateImmutableFunnel3(alloc
, bytes
, numBytes
, encoding
, externalFormat
, true, false, false, true, contentsDeallocator
, 0);
1537 CFStringRef
CFStringCreateWithFormatAndArguments(CFAllocatorRef alloc
, CFDictionaryRef formatOptions
, CFStringRef format
, va_list arguments
) {
1538 return _CFStringCreateWithFormatAndArgumentsAux(alloc
, NULL
, formatOptions
, format
, arguments
);
1541 CFStringRef
_CFStringCreateWithFormatAndArgumentsAux(CFAllocatorRef alloc
, CFStringRef (*copyDescFunc
)(void *, const void *), CFDictionaryRef formatOptions
, CFStringRef format
, va_list arguments
) {
1543 CFMutableStringRef outputString
= CFStringCreateMutable(kCFAllocatorSystemDefault
, 0); //should use alloc if no copy/release
1544 __CFStrSetDesiredCapacity(outputString
, 120); // Given this will be tightened later, choosing a larger working string is fine
1545 __CFStringAppendFormatCore(outputString
, copyDescFunc
, formatOptions
, format
, 0, NULL
, 0, arguments
);
1546 // ??? copy/release should not be necessary here -- just make immutable, compress if possible
1547 // (However, this does make the string inline, and cause the supplied allocator to be used...)
1548 str
= (CFStringRef
)CFStringCreateCopy(alloc
, outputString
);
1549 CFRelease(outputString
);
1553 CFStringRef
CFStringCreateWithFormat(CFAllocatorRef alloc
, CFDictionaryRef formatOptions
, CFStringRef format
, ...) {
1557 va_start(argList
, format
);
1558 result
= CFStringCreateWithFormatAndArguments(alloc
, formatOptions
, format
, argList
);
1564 CFStringRef
CFStringCreateWithSubstring(CFAllocatorRef alloc
, CFStringRef str
, CFRange range
) {
1565 // CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, CFStringRef , str, "_createSubstringWithRange:", CFRangeMake(range.location, range.length));
1567 __CFAssertIsString(str
);
1568 __CFAssertRangeIsInStringBounds(str
, range
.location
, range
.length
);
1570 if ((range
.location
== 0) && (range
.length
== __CFStrLength(str
))) { /* The substring is the whole string... */
1571 return (CFStringRef
)CFStringCreateCopy(alloc
, str
);
1572 } else if (__CFStrIsEightBit(str
)) {
1573 const uint8_t *contents
= (const uint8_t *)__CFStrContents(str
);
1574 return __CFStringCreateImmutableFunnel3(alloc
, contents
+ range
.location
+ __CFStrSkipAnyLengthByte(str
), range
.length
, __CFStringGetEightBitStringEncoding(), false, false, false, false, false, ALLOCATORSFREEFUNC
, 0);
1576 const UniChar
*contents
= (UniChar
*)__CFStrContents(str
);
1577 return __CFStringCreateImmutableFunnel3(alloc
, contents
+ range
.location
, range
.length
* sizeof(UniChar
), kCFStringEncodingUnicode
, false, true, false, false, false, ALLOCATORSFREEFUNC
, 0);
1581 CFStringRef
CFStringCreateCopy(CFAllocatorRef alloc
, CFStringRef str
) {
1582 // CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, CFStringRef, str, "copy");
1584 __CFAssertIsString(str
);
1585 if (!__CFStrIsMutable((CFStringRef
)str
) && // If the string is not mutable
1586 ((alloc
? _CFConvertAllocatorToNonGCRefZeroEquivalent(alloc
) : __CFGetDefaultAllocator()) == __CFGetAllocator(str
)) && // and it has the same allocator as the one we're using
1587 (__CFStrIsInline((CFStringRef
)str
) || __CFStrFreeContentsWhenDone((CFStringRef
)str
) || __CFStrIsConstant((CFStringRef
)str
))) { // and the characters are inline, or are owned by the string, or the string is constant
1588 if (!(kCFUseCollectableAllocator
&& _CFAllocatorIsGCRefZero(alloc
))) CFRetain(str
); // Then just retain instead of making a true copy
1591 if (__CFStrIsEightBit((CFStringRef
)str
)) {
1592 const uint8_t *contents
= (const uint8_t *)__CFStrContents((CFStringRef
)str
);
1593 return __CFStringCreateImmutableFunnel3(alloc
, contents
+ __CFStrSkipAnyLengthByte((CFStringRef
)str
), __CFStrLength2((CFStringRef
)str
, contents
), __CFStringGetEightBitStringEncoding(), false, false, false, false, false, ALLOCATORSFREEFUNC
, 0);
1595 const UniChar
*contents
= (const UniChar
*)__CFStrContents((CFStringRef
)str
);
1596 return __CFStringCreateImmutableFunnel3(alloc
, contents
, __CFStrLength2((CFStringRef
)str
, contents
) * sizeof(UniChar
), kCFStringEncodingUnicode
, false, true, false, false, false, ALLOCATORSFREEFUNC
, 0);
1602 /*** Constant string stuff... ***/
1604 /* Table which holds constant strings created with CFSTR, when -fconstant-cfstrings option is not used. These dynamically created constant strings are stored in constantStringTable. The keys are the 8-bit constant C-strings from the compiler; the values are the CFStrings created for them. _CFSTRLock protects this table.
1606 static CFMutableDictionaryRef constantStringTable
= NULL
;
1607 static CFSpinLock_t _CFSTRLock
= CFSpinLockInit
;
1609 static CFStringRef
__cStrCopyDescription(const void *ptr
) {
1610 return CFStringCreateWithCStringNoCopy(kCFAllocatorSystemDefault
, (const char *)ptr
, __CFStringGetEightBitStringEncoding(), kCFAllocatorNull
);
1613 static Boolean
__cStrEqual(const void *ptr1
, const void *ptr2
) {
1614 return (strcmp((const char *)ptr1
, (const char *)ptr2
) == 0);
1617 static CFHashCode
__cStrHash(const void *ptr
) {
1618 // It doesn't quite matter if we convert to Unicode correctly, as long as we do it consistently
1619 const char *cStr
= (const char *)ptr
;
1620 CFIndex len
= strlen(cStr
);
1621 CFHashCode result
= 0;
1622 if (len
<= 4) { // All chars
1624 while (cnt
--) result
+= (result
<< 8) + *cStr
++;
1625 } else { // First and last 2 chars
1626 result
+= (result
<< 8) + cStr
[0];
1627 result
+= (result
<< 8) + cStr
[1];
1628 result
+= (result
<< 8) + cStr
[len
-2];
1629 result
+= (result
<< 8) + cStr
[len
-1];
1631 result
+= (result
<< (len
& 31));
1636 CFStringRef
__CFStringMakeConstantString(const char *cStr
) {
1639 // StringTest checks that we share kCFEmptyString, which is defeated by constantStringAllocatorForDebugging
1640 if ('\0' == *cStr
) return kCFEmptyString
;
1642 if (constantStringTable
== NULL
) {
1643 CFDictionaryKeyCallBacks constantStringCallBacks
= {0, NULL
, NULL
, __cStrCopyDescription
, __cStrEqual
, __cStrHash
};
1644 CFDictionaryValueCallBacks constantStringValueCallBacks
= kCFTypeDictionaryValueCallBacks
;
1645 constantStringValueCallBacks
.equal
= NULL
; // So that we only find strings that are ==
1646 CFMutableDictionaryRef table
= CFDictionaryCreateMutable(kCFAllocatorSystemDefault
, 0, &constantStringCallBacks
, &constantStringValueCallBacks
);
1647 _CFDictionarySetCapacity(table
, 2500); // avoid lots of rehashing
1648 __CFSpinLock(&_CFSTRLock
);
1649 if (constantStringTable
== NULL
) constantStringTable
= table
;
1650 __CFSpinUnlock(&_CFSTRLock
);
1651 if (constantStringTable
!= table
) CFRelease(table
);
1654 __CFSpinLock(&_CFSTRLock
);
1655 if ((result
= (CFStringRef
)CFDictionaryGetValue(constantStringTable
, cStr
))) {
1656 __CFSpinUnlock(&_CFSTRLock
);
1658 __CFSpinUnlock(&_CFSTRLock
);
1662 Boolean isASCII
= true;
1663 // Given this code path is rarer these days, OK to do this extra work to verify the strings
1664 const char *tmp
= cStr
;
1666 if (*(tmp
++) & 0x80) {
1672 CFMutableStringRef ms
= CFStringCreateMutable(kCFAllocatorSystemDefault
, 0);
1675 CFStringAppendFormat(ms
, NULL
, (*tmp
& 0x80) ? CFSTR("\\%3o") : CFSTR("%1c"), *tmp
);
1678 CFLog(kCFLogLevelWarning
, CFSTR("WARNING: CFSTR(\"%@\") has non-7 bit chars, interpreting using MacOS Roman encoding for now, but this will change. Please eliminate usages of non-7 bit chars (including escaped characters above \\177 octal) in CFSTR()."), ms
);
1681 // Treat non-7 bit chars in CFSTR() as MacOSRoman, for compatibility
1682 result
= CFStringCreateWithCString(kCFAllocatorSystemDefault
, cStr
, kCFStringEncodingMacRoman
);
1683 if (result
== NULL
) {
1684 CFLog(__kCFLogAssertion
, CFSTR("Can't interpret CFSTR() as MacOS Roman, crashing"));
1687 if (__CFOASafe
) __CFSetLastAllocationEventName((void *)result
, "CFString (CFSTR)");
1688 if (__CFStrIsEightBit(result
)) {
1689 key
= (char *)__CFStrContents(result
) + __CFStrSkipAnyLengthByte(result
);
1690 } else { // For some reason the string is not 8-bit!
1691 key
= (char *)CFAllocatorAllocate(kCFAllocatorSystemDefault
, strlen(cStr
) + 1, 0);
1692 if (__CFOASafe
) __CFSetLastAllocationEventName((void *)key
, "CFString (CFSTR key)");
1693 strlcpy(key
, cStr
, strlen(cStr
) + 1); // !!! We will leak this, if the string is removed from the table (or table is freed)
1697 CFStringRef resultToBeReleased
= result
;
1699 __CFSpinLock(&_CFSTRLock
);
1700 count
= CFDictionaryGetCount(constantStringTable
);
1701 CFDictionaryAddValue(constantStringTable
, key
, result
);
1702 if (CFDictionaryGetCount(constantStringTable
) == count
) { // add did nothing, someone already put it there
1703 result
= (CFStringRef
)CFDictionaryGetValue(constantStringTable
, key
);
1706 ((struct __CFString
*)result
)->base
._rc
= 0;
1708 ((struct __CFString
*)result
)->base
._cfinfo
[CF_RC_BITS
] = 0;
1711 __CFSpinUnlock(&_CFSTRLock
);
1712 // This either eliminates the extra retain on the freshly created string, or frees it, if it was actually not inserted into the table
1713 CFRelease(resultToBeReleased
);
1721 static Boolean
__CFStrIsConstantString(CFStringRef str
) {
1722 Boolean found
= false;
1723 if (constantStringTable
) {
1724 __CFSpinLock(&_CFSTRLock
);
1725 found
= CFDictionaryContainsValue(constantStringTable
, str
);
1726 __CFSpinUnlock(&_CFSTRLock
);
1733 #if DEPLOYMENT_TARGET_WINDOWS
1734 void __CFStringCleanup (void) {
1735 /* in case library is unloaded, release store for the constant string table */
1736 if (constantStringTable
!= NULL
) {
1738 __CFConstantStringTableBeingFreed
= true;
1739 CFRelease(constantStringTable
);
1740 __CFConstantStringTableBeingFreed
= false;
1742 CFRelease(constantStringTable
);
1744 constantStringTable
= NULL
;
1750 // Can pass in NSString as replacement string
1751 // Call with numRanges > 0, and incrementing ranges
1753 static void __CFStringReplaceMultiple(CFMutableStringRef str
, CFRange
*ranges
, CFIndex numRanges
, CFStringRef replacement
) {
1755 CFStringRef copy
= NULL
;
1756 if (replacement
== str
) copy
= replacement
= CFStringCreateCopy(kCFAllocatorSystemDefault
, replacement
); // Very special and hopefully rare case
1757 CFIndex replacementLength
= CFStringGetLength(replacement
);
1759 __CFStringChangeSizeMultiple(str
, ranges
, numRanges
, replacementLength
, (replacementLength
> 0) && CFStrIsUnicode(replacement
));
1761 if (__CFStrIsUnicode(str
)) {
1762 UniChar
*contents
= (UniChar
*)__CFStrContents(str
);
1763 UniChar
*firstReplacement
= contents
+ ranges
[0].location
;
1764 // Extract the replacementString into the first location, then copy from there
1765 CFStringGetCharacters(replacement
, CFRangeMake(0, replacementLength
), firstReplacement
);
1766 for (cnt
= 1; cnt
< numRanges
; cnt
++) {
1767 // The ranges are in terms of the original string; so offset by the change in length due to insertion
1768 contents
+= replacementLength
- ranges
[cnt
- 1].length
;
1769 memmove(contents
+ ranges
[cnt
].location
, firstReplacement
, replacementLength
* sizeof(UniChar
));
1772 uint8_t *contents
= (uint8_t *)__CFStrContents(str
);
1773 uint8_t *firstReplacement
= contents
+ ranges
[0].location
+ __CFStrSkipAnyLengthByte(str
);
1774 // Extract the replacementString into the first location, then copy from there
1775 CFStringGetBytes(replacement
, CFRangeMake(0, replacementLength
), __CFStringGetEightBitStringEncoding(), 0, false, firstReplacement
, replacementLength
, NULL
);
1776 contents
+= __CFStrSkipAnyLengthByte(str
); // Now contents will simply track the location to insert next string into
1777 for (cnt
= 1; cnt
< numRanges
; cnt
++) {
1778 // The ranges are in terms of the original string; so offset by the change in length due to insertion
1779 contents
+= replacementLength
- ranges
[cnt
- 1].length
;
1780 memmove(contents
+ ranges
[cnt
].location
, firstReplacement
, replacementLength
);
1783 if (copy
) CFRelease(copy
);
1786 // Can pass in NSString as replacement string
1788 CF_INLINE
void __CFStringReplace(CFMutableStringRef str
, CFRange range
, CFStringRef replacement
) {
1789 CFStringRef copy
= NULL
;
1790 if (replacement
== str
) copy
= replacement
= (CFStringRef
)CFStringCreateCopy(kCFAllocatorSystemDefault
, replacement
); // Very special and hopefully rare case
1791 CFIndex replacementLength
= CFStringGetLength(replacement
);
1793 __CFStringChangeSize(str
, range
, replacementLength
, (replacementLength
> 0) && CFStrIsUnicode(replacement
));
1795 if (__CFStrIsUnicode(str
)) {
1796 UniChar
*contents
= (UniChar
*)__CFStrContents(str
);
1797 CFStringGetCharacters(replacement
, CFRangeMake(0, replacementLength
), contents
+ range
.location
);
1799 uint8_t *contents
= (uint8_t *)__CFStrContents(str
);
1800 CFStringGetBytes(replacement
, CFRangeMake(0, replacementLength
), __CFStringGetEightBitStringEncoding(), 0, false, contents
+ range
.location
+ __CFStrSkipAnyLengthByte(str
), replacementLength
, NULL
);
1803 if (copy
) CFRelease(copy
);
1806 /* If client does not provide a minimum capacity
1808 #define DEFAULTMINCAPACITY 32
1810 CF_INLINE CFMutableStringRef
__CFStringCreateMutableFunnel(CFAllocatorRef alloc
, CFIndex maxLength
, UInt32 additionalInfoBits
) {
1811 CFMutableStringRef str
;
1812 if (_CFAllocatorIsGCRefZero(alloc
)) additionalInfoBits
|= __kCFHasContentsAllocator
;
1813 Boolean hasExternalContentsAllocator
= (additionalInfoBits
& __kCFHasContentsAllocator
) ? true : false;
1815 if (alloc
== NULL
) alloc
= __CFGetDefaultAllocator();
1817 // Note that if there is an externalContentsAllocator, then we also have the storage for the string allocator...
1818 str
= (CFMutableStringRef
)_CFRuntimeCreateInstance(alloc
, __kCFStringTypeID
, sizeof(struct __notInlineMutable
) - (hasExternalContentsAllocator
? 0 : sizeof(CFAllocatorRef
)), NULL
);
1820 if (__CFOASafe
) __CFSetLastAllocationEventName(str
, "CFString (mutable)");
1822 __CFStrSetInfoBits(str
, __kCFIsMutable
| additionalInfoBits
);
1823 str
->variants
.notInlineMutable
.buffer
= NULL
;
1824 __CFStrSetExplicitLength(str
, 0);
1825 str
->variants
.notInlineMutable
.hasGap
= str
->variants
.notInlineMutable
.isFixedCapacity
= str
->variants
.notInlineMutable
.isExternalMutable
= str
->variants
.notInlineMutable
.capacityProvidedExternally
= 0;
1826 if (maxLength
!= 0) __CFStrSetIsFixed(str
);
1827 __CFStrSetDesiredCapacity(str
, (maxLength
== 0) ? DEFAULTMINCAPACITY
: maxLength
);
1828 __CFStrSetCapacity(str
, 0);
1829 if (__CFStrHasContentsAllocator(str
)) {
1830 // contents allocator starts out as the string's own allocator
1831 __CFStrSetContentsAllocator(str
, alloc
);
1837 CFMutableStringRef
CFStringCreateMutableWithExternalCharactersNoCopy(CFAllocatorRef alloc
, UniChar
*chars
, CFIndex numChars
, CFIndex capacity
, CFAllocatorRef externalCharactersAllocator
) {
1838 CFOptionFlags contentsAllocationBits
= externalCharactersAllocator
? ((externalCharactersAllocator
== kCFAllocatorNull
) ? __kCFNotInlineContentsNoFree
: __kCFHasContentsAllocator
) : __kCFNotInlineContentsDefaultFree
;
1839 CFMutableStringRef string
= __CFStringCreateMutableFunnel(alloc
, 0, contentsAllocationBits
| __kCFIsUnicode
);
1841 __CFStrSetIsExternalMutable(string
);
1842 if (__CFStrHasContentsAllocator(string
)) {
1843 CFAllocatorRef allocator
= __CFStrContentsAllocator((CFMutableStringRef
)string
);
1844 if (!(kCFAllocatorSystemDefaultGCRefZero
== allocator
|| kCFAllocatorDefaultGCRefZero
== allocator
)) CFRelease(allocator
);
1845 __CFStrSetContentsAllocator(string
, externalCharactersAllocator
);
1847 CFStringSetExternalCharactersNoCopy(string
, chars
, numChars
, capacity
);
1852 CFMutableStringRef
CFStringCreateMutable(CFAllocatorRef alloc
, CFIndex maxLength
) {
1853 return __CFStringCreateMutableFunnel(alloc
, maxLength
, __kCFNotInlineContentsDefaultFree
);
1856 CFMutableStringRef
CFStringCreateMutableCopy(CFAllocatorRef alloc
, CFIndex maxLength
, CFStringRef string
) {
1857 CFMutableStringRef newString
;
1859 // CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, CFMutableStringRef, string, "mutableCopy");
1861 __CFAssertIsString(string
);
1863 newString
= CFStringCreateMutable(alloc
, maxLength
);
1864 __CFStringReplace(newString
, CFRangeMake(0, 0), string
);
1870 __private_extern__
void _CFStrSetDesiredCapacity(CFMutableStringRef str
, CFIndex len
) {
1871 __CFAssertIsStringAndMutable(str
);
1872 __CFStrSetDesiredCapacity(str
, len
);
1876 /* This one is for CF
1878 CFIndex
CFStringGetLength(CFStringRef str
) {
1879 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID
, CFIndex
, str
, "length");
1881 __CFAssertIsString(str
);
1882 return __CFStrLength(str
);
1885 /* This one is for NSCFString; it does not ObjC dispatch or assertion check
1887 CFIndex
_CFStringGetLength2(CFStringRef str
) {
1888 return __CFStrLength(str
);
1892 /* Guts of CFStringGetCharacterAtIndex(); called from the two functions below. Don't call it from elsewhere.
1894 CF_INLINE UniChar
__CFStringGetCharacterAtIndexGuts(CFStringRef str
, CFIndex idx
, const uint8_t *contents
) {
1895 if (__CFStrIsEightBit(str
)) {
1896 contents
+= __CFStrSkipAnyLengthByte(str
);
1898 if (!__CFCharToUniCharFunc
&& (contents
[idx
] >= 128)) {
1899 // Can't do log here, as it might be too early
1900 fprintf(stderr
, "Warning: CFStringGetCharacterAtIndex() attempted on CFString containing high bytes before properly initialized to do so\n");
1903 return __CFCharToUniCharTable
[contents
[idx
]];
1906 return ((UniChar
*)contents
)[idx
];
1909 /* This one is for the CF API
1911 UniChar
CFStringGetCharacterAtIndex(CFStringRef str
, CFIndex idx
) {
1912 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID
, UniChar
, str
, "characterAtIndex:", idx
);
1914 __CFAssertIsString(str
);
1915 __CFAssertIndexIsInStringBounds(str
, idx
);
1916 return __CFStringGetCharacterAtIndexGuts(str
, idx
, (const uint8_t *)__CFStrContents(str
));
1919 /* This one is for NSCFString usage; it doesn't do ObjC dispatch; but it does do range check
1921 int _CFStringCheckAndGetCharacterAtIndex(CFStringRef str
, CFIndex idx
, UniChar
*ch
) {
1922 const uint8_t *contents
= (const uint8_t *)__CFStrContents(str
);
1923 if (idx
>= __CFStrLength2(str
, contents
) && __CFStringNoteErrors()) return _CFStringErrBounds
;
1924 *ch
= __CFStringGetCharacterAtIndexGuts(str
, idx
, contents
);
1925 return _CFStringErrNone
;
1929 /* Guts of CFStringGetCharacters(); called from the two functions below. Don't call it from elsewhere.
1931 CF_INLINE
void __CFStringGetCharactersGuts(CFStringRef str
, CFRange range
, UniChar
*buffer
, const uint8_t *contents
) {
1932 if (__CFStrIsEightBit(str
)) {
1933 __CFStrConvertBytesToUnicode(((uint8_t *)contents
) + (range
.location
+ __CFStrSkipAnyLengthByte(str
)), buffer
, range
.length
);
1935 const UniChar
*uContents
= ((UniChar
*)contents
) + range
.location
;
1936 memmove(buffer
, uContents
, range
.length
* sizeof(UniChar
));
1940 /* This one is for the CF API
1942 void CFStringGetCharacters(CFStringRef str
, CFRange range
, UniChar
*buffer
) {
1943 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID
, void, str
, "getCharacters:range:", buffer
, range
);
1945 __CFAssertIsString(str
);
1946 __CFAssertRangeIsInStringBounds(str
, range
.location
, range
.length
);
1947 __CFStringGetCharactersGuts(str
, range
, buffer
, (const uint8_t *)__CFStrContents(str
));
1950 /* This one is for NSCFString usage; it doesn't do ObjC dispatch; but it does do range check
1952 int _CFStringCheckAndGetCharacters(CFStringRef str
, CFRange range
, UniChar
*buffer
) {
1953 const uint8_t *contents
= (const uint8_t *)__CFStrContents(str
);
1954 if (range
.location
+ range
.length
> __CFStrLength2(str
, contents
) && __CFStringNoteErrors()) return _CFStringErrBounds
;
1955 __CFStringGetCharactersGuts(str
, range
, buffer
, contents
);
1956 return _CFStringErrNone
;
1960 CFIndex
CFStringGetBytes(CFStringRef str
, CFRange range
, CFStringEncoding encoding
, uint8_t lossByte
, Boolean isExternalRepresentation
, uint8_t *buffer
, CFIndex maxBufLen
, CFIndex
*usedBufLen
) {
1962 /* No objc dispatch needed here since __CFStringEncodeByteStream works with both CFString and NSString */
1963 __CFAssertIsNotNegative(maxBufLen
);
1965 if (!CF_IS_OBJC(__kCFStringTypeID
, str
)) { // If we can grope the ivars, let's do it...
1966 __CFAssertIsString(str
);
1967 __CFAssertRangeIsInStringBounds(str
, range
.location
, range
.length
);
1969 if (__CFStrIsEightBit(str
) && ((__CFStringGetEightBitStringEncoding() == encoding
) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII
&& __CFStringEncodingIsSupersetOfASCII(encoding
)))) { // Requested encoding is equal to the encoding in string
1970 const unsigned char *contents
= (const unsigned char *)__CFStrContents(str
);
1971 CFIndex cLength
= range
.length
;
1974 if (cLength
> maxBufLen
) cLength
= maxBufLen
;
1975 memmove(buffer
, contents
+ __CFStrSkipAnyLengthByte(str
) + range
.location
, cLength
);
1977 if (usedBufLen
) *usedBufLen
= cLength
;
1983 return __CFStringEncodeByteStream(str
, range
.location
, range
.length
, isExternalRepresentation
, encoding
, lossByte
, buffer
, maxBufLen
, usedBufLen
);
1987 ConstStringPtr
CFStringGetPascalStringPtr (CFStringRef str
, CFStringEncoding encoding
) {
1989 if (!CF_IS_OBJC(__kCFStringTypeID
, str
)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
1990 __CFAssertIsString(str
);
1991 if (__CFStrHasLengthByte(str
) && __CFStrIsEightBit(str
) && ((__CFStringGetEightBitStringEncoding() == encoding
) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII
&& __CFStringEncodingIsSupersetOfASCII(encoding
)))) { // Requested encoding is equal to the encoding in string || the contents is in ASCII
1992 const uint8_t *contents
= (const uint8_t *)__CFStrContents(str
);
1993 if (__CFStrHasExplicitLength(str
) && (__CFStrLength2(str
, contents
) != (SInt32
)(*contents
))) return NULL
; // Invalid length byte
1994 return (ConstStringPtr
)contents
;
1996 // ??? Also check for encoding = SystemEncoding and perhaps bytes are all ASCII?
2002 const char * CFStringGetCStringPtr(CFStringRef str
, CFStringEncoding encoding
) {
2004 if (encoding
!= __CFStringGetEightBitStringEncoding() && (kCFStringEncodingASCII
!= __CFStringGetEightBitStringEncoding() || !__CFStringEncodingIsSupersetOfASCII(encoding
))) return NULL
;
2005 // ??? Also check for encoding = SystemEncoding and perhaps bytes are all ASCII?
2007 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID
, const char *, str
, "_fastCStringContents:", true);
2009 __CFAssertIsString(str
);
2011 if (__CFStrHasNullByte(str
)) {
2012 // Note: this is called a lot, 27000 times to open a small xcode project with one file open.
2013 // Of these uses about 1500 are for cStrings/utf8strings.
2015 // Only sometimes when the stars are aligned will this call return a gc pointer
2016 // under GC we can only really return a pointer to the start of a GC buffer for cString use
2017 // (Is there a simpler way to ask if contents isGC?)
2018 CFAllocatorRef alloc
= (__CFStrHasContentsAllocator(str
)) ? __CFStrContentsAllocator(str
) : __CFGetAllocator(str
);
2019 if (CF_IS_COLLECTABLE_ALLOCATOR(alloc
)) {
2020 if (__CFStrSkipAnyLengthByte(str
) != 0 || !__CFStrIsMutable(str
)) {
2021 static int counter
= 0;
2022 printf("CFString %dth unsafe safe string %s\n", ++counter
, __CFStrContents(str
) + __CFStrSkipAnyLengthByte(str
));
2027 return (const char *)__CFStrContents(str
) + __CFStrSkipAnyLengthByte(str
);
2034 const UniChar
*CFStringGetCharactersPtr(CFStringRef str
) {
2036 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID
, const UniChar
*, str
, "_fastCharacterContents");
2038 __CFAssertIsString(str
);
2039 if (__CFStrIsUnicode(str
)) return (const UniChar
*)__CFStrContents(str
);
2044 Boolean
CFStringGetPascalString(CFStringRef str
, Str255 buffer
, CFIndex bufferSize
, CFStringEncoding encoding
) {
2048 __CFAssertIsNotNegative(bufferSize
);
2049 if (bufferSize
< 1) return false;
2051 if (CF_IS_OBJC(__kCFStringTypeID
, str
)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
2052 length
= CFStringGetLength(str
);
2053 if (!__CFCanUseLengthByte(length
)) return false; // Can't fit into pstring
2055 const uint8_t *contents
;
2057 __CFAssertIsString(str
);
2059 contents
= (const uint8_t *)__CFStrContents(str
);
2060 length
= __CFStrLength2(str
, contents
);
2062 if (!__CFCanUseLengthByte(length
)) return false; // Can't fit into pstring
2064 if (__CFStrIsEightBit(str
) && ((__CFStringGetEightBitStringEncoding() == encoding
) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII
&& __CFStringEncodingIsSupersetOfASCII(encoding
)))) { // Requested encoding is equal to the encoding in string
2065 if (length
>= bufferSize
) return false;
2066 memmove((void*)(1 + (const char*)buffer
), (__CFStrSkipAnyLengthByte(str
) + contents
), length
);
2067 *buffer
= (unsigned char)length
;
2072 if (__CFStringEncodeByteStream(str
, 0, length
, false, encoding
, false, (UInt8
*)(1 + (uint8_t *)buffer
), bufferSize
- 1, &usedLen
) != length
) {
2075 if (bufferSize
> 0) {
2076 strlcpy((char *)buffer
+ 1, CONVERSIONFAILURESTR
, bufferSize
- 1);
2077 buffer
[0] = (unsigned char)((CFIndex
)sizeof(CONVERSIONFAILURESTR
) < (bufferSize
- 1) ? (CFIndex
)sizeof(CONVERSIONFAILURESTR
) : (bufferSize
- 1));
2080 if (bufferSize
> 0) buffer
[0] = 0;
2084 *buffer
= (unsigned char)usedLen
;
2088 Boolean
CFStringGetCString(CFStringRef str
, char *buffer
, CFIndex bufferSize
, CFStringEncoding encoding
) {
2089 const uint8_t *contents
;
2092 __CFAssertIsNotNegative(bufferSize
);
2093 if (bufferSize
< 1) return false;
2095 CF_OBJC_FUNCDISPATCH3(__kCFStringTypeID
, Boolean
, str
, "_getCString:maxLength:encoding:", buffer
, bufferSize
- 1, encoding
);
2097 __CFAssertIsString(str
);
2099 contents
= (const uint8_t *)__CFStrContents(str
);
2100 len
= __CFStrLength2(str
, contents
);
2102 if (__CFStrIsEightBit(str
) && ((__CFStringGetEightBitStringEncoding() == encoding
) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII
&& __CFStringEncodingIsSupersetOfASCII(encoding
)))) { // Requested encoding is equal to the encoding in string
2103 if (len
>= bufferSize
) return false;
2104 memmove(buffer
, contents
+ __CFStrSkipAnyLengthByte(str
), len
);
2110 if (__CFStringEncodeByteStream(str
, 0, len
, false, encoding
, false, (unsigned char*) buffer
, bufferSize
- 1, &usedLen
) == len
) {
2111 buffer
[usedLen
] = '\0';
2115 strlcpy(buffer
, CONVERSIONFAILURESTR
, bufferSize
);
2117 if (bufferSize
> 0) buffer
[0] = 0;
2124 extern Boolean
__CFLocaleGetNullLocale(struct __CFLocale
*locale
);
2125 extern void __CFLocaleSetNullLocale(struct __CFLocale
*locale
);
2127 static const char *_CFStrGetLanguageIdentifierForLocale(CFLocaleRef locale
) {
2128 CFStringRef collatorID
;
2129 const char *langID
= NULL
;
2130 static const void *lastLocale
= NULL
;
2131 static const char *lastLangID
= NULL
;
2132 static CFSpinLock_t lock
= CFSpinLockInit
;
2134 if (__CFLocaleGetNullLocale((struct __CFLocale
*)locale
)) return NULL
;
2136 __CFSpinLock(&lock
);
2137 if ((NULL
!= lastLocale
) && (lastLocale
== locale
)) {
2138 __CFSpinUnlock(&lock
);
2141 __CFSpinUnlock(&lock
);
2143 collatorID
= (CFStringRef
)CFLocaleGetValue(locale
, __kCFLocaleCollatorID
);
2145 // This is somewhat depending on CFLocale implementation always creating CFString for locale identifer ???
2146 if (__CFStrLength(collatorID
) > 1) {
2147 const void *contents
= __CFStrContents(collatorID
);
2151 if (__CFStrIsEightBit(collatorID
)) {
2152 string
= ((const char *)contents
) + __CFStrSkipAnyLengthByte(collatorID
);
2154 const UTF16Char
*characters
= (const UTF16Char
*)contents
;
2156 buffer
[0] = (char)*(characters
++);
2157 buffer
[1] = (char)*characters
;
2161 if (!strncmp(string
, "az", 2)) { // Azerbaijani
2163 } else if (!strncmp(string
, "lt", 2)) { // Lithuanian
2165 } else if (!strncmp(string
, "tr", 2)) { // Turkish
2167 } else if (!strncmp(string
, "nl", 2)) { // Dutch
2173 if (langID
== NULL
) __CFLocaleSetNullLocale((struct __CFLocale
*)locale
);
2175 __CFSpinLock(&lock
);
2176 lastLocale
= locale
;
2177 lastLangID
= langID
;
2178 __CFSpinUnlock(&lock
);
2183 CF_INLINE
bool _CFCanUseLocale(CFLocaleRef locale
) {
2190 #define MAX_CASE_MAPPING_BUF (8)
2191 #define ZERO_WIDTH_JOINER (0x200D)
2192 #define COMBINING_GRAPHEME_JOINER (0x034F)
2194 #define HANGUL_CHOSEONG_START (0x1100)
2195 #define HANGUL_CHOSEONG_END (0x115F)
2196 #define HANGUL_JUNGSEONG_START (0x1160)
2197 #define HANGUL_JUNGSEONG_END (0x11A2)
2198 #define HANGUL_JONGSEONG_START (0x11A8)
2199 #define HANGUL_JONGSEONG_END (0x11F9)
2201 #define HANGUL_SYLLABLE_START (0xAC00)
2202 #define HANGUL_SYLLABLE_END (0xD7AF)
2205 // Returns the length of characters filled into outCharacters. If no change, returns 0. maxBufLen shoule be at least 8
2206 static CFIndex
__CFStringFoldCharacterClusterAtIndex(UTF32Char character
, CFStringInlineBuffer
*buffer
, CFIndex index
, CFOptionFlags flags
, const uint8_t *langCode
, UTF32Char
*outCharacters
, CFIndex maxBufferLength
, CFIndex
*consumedLength
) {
2207 CFIndex filledLength
= 0, currentIndex
= index
;
2209 if (0 != character
) {
2210 UTF16Char lowSurrogate
;
2211 CFIndex planeNo
= (character
>> 16);
2212 bool isTurkikCapitalI
= false;
2213 static const uint8_t *decompBMP
= NULL
;
2214 static const uint8_t *graphemeBMP
= NULL
;
2216 if (NULL
== decompBMP
) {
2217 decompBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet
, 0);
2218 graphemeBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet
, 0);
2223 if ((character
< 0x0080) && ((NULL
== langCode
) || (character
!= 'I'))) { // ASCII
2224 if ((flags
& kCFCompareCaseInsensitive
) && (character
>= 'A') && (character
<= 'Z')) {
2225 character
+= ('a' - 'A');
2226 *outCharacters
= character
;
2230 // do width-insensitive mapping
2231 if ((flags
& kCFCompareWidthInsensitive
) && (character
>= 0xFF00) && (character
<= 0xFFEF)) {
2232 (void)CFUniCharCompatibilityDecompose(&character
, 1, 1);
2233 *outCharacters
= character
;
2238 if ((0 == planeNo
) && CFUniCharIsSurrogateHighCharacter(character
) && CFUniCharIsSurrogateLowCharacter((lowSurrogate
= CFStringGetCharacterFromInlineBuffer(buffer
, currentIndex
)))) {
2239 character
= CFUniCharGetLongCharacterForSurrogatePair(character
, lowSurrogate
);
2241 planeNo
= (character
>> 16);
2245 if (flags
& (kCFCompareDiacriticInsensitive
|kCFCompareNonliteral
)) {
2246 if (CFUniCharIsMemberOfBitmap(character
, ((0 == planeNo
) ? decompBMP
: CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet
, planeNo
)))) {
2247 UTF32Char original
= character
;
2249 filledLength
= CFUniCharDecomposeCharacter(character
, outCharacters
, maxBufferLength
);
2250 character
= *outCharacters
;
2252 if ((flags
& kCFCompareDiacriticInsensitive
) && (character
< 0x0510)) {
2253 filledLength
= 1; // reset if Roman, Greek, Cyrillic
2254 } else if (0 == (flags
& kCFCompareNonliteral
)) {
2255 character
= original
;
2262 if (flags
& kCFCompareCaseInsensitive
) {
2263 const uint8_t *nonBaseBitmap
;
2264 bool filterNonBase
= (((flags
& kCFCompareDiacriticInsensitive
) && (character
< 0x0510)) ? true : false);
2265 static const uint8_t *lowerBMP
= NULL
;
2266 static const uint8_t *caseFoldBMP
= NULL
;
2268 if (NULL
== lowerBMP
) {
2269 lowerBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfLowercaseCharacterSet
, 0);
2270 caseFoldBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfCaseFoldingCharacterSet
, 0);
2273 if ((NULL
!= langCode
) && ('I' == character
) && ((0 == strcmp((const char *)langCode
, "tr")) || (0 == strcmp((const char *)langCode
, "az")))) { // do Turkik special-casing
2274 if (filledLength
> 1) {
2275 if (0x0307 == outCharacters
[1]) {
2276 if (--filledLength
> 1) memmove((outCharacters
+ 1), (outCharacters
+ 2), sizeof(UTF32Char
) * (filledLength
- 1));
2277 character
= *outCharacters
= 'i';
2278 isTurkikCapitalI
= true;
2280 } else if (0x0307 == CFStringGetCharacterFromInlineBuffer(buffer
, currentIndex
)) {
2281 character
= *outCharacters
= 'i';
2284 isTurkikCapitalI
= true;
2287 if (!isTurkikCapitalI
&& (CFUniCharIsMemberOfBitmap(character
, ((0 == planeNo
) ? lowerBMP
: CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfLowercaseCharacterSet
, planeNo
))) || CFUniCharIsMemberOfBitmap(character
, ((0 == planeNo
) ? caseFoldBMP
: CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfCaseFoldingCharacterSet
, planeNo
))))) {
2288 UTF16Char caseFoldBuffer
[MAX_CASE_MAPPING_BUF
];
2289 const UTF16Char
*bufferP
= caseFoldBuffer
, *bufferLimit
;
2290 UTF32Char
*outCharactersP
= outCharacters
;
2291 uint32_t bufferLength
= CFUniCharMapCaseTo(character
, caseFoldBuffer
, MAX_CASE_MAPPING_BUF
, kCFUniCharCaseFold
, 0, langCode
);
2293 bufferLimit
= bufferP
+ bufferLength
;
2295 if (filledLength
> 0) --filledLength
; // decrement filledLength (will add back later)
2297 // make space for casefold characters
2298 if ((filledLength
> 0) && (bufferLength
> 1)) {
2299 CFIndex totalScalerLength
= 0;
2301 while (bufferP
< bufferLimit
) {
2302 if (CFUniCharIsSurrogateHighCharacter(*(bufferP
++)) && (bufferP
< bufferLimit
) && CFUniCharIsSurrogateLowCharacter(*bufferP
)) ++bufferP
;
2303 ++totalScalerLength
;
2305 memmove(outCharacters
+ totalScalerLength
, outCharacters
+ 1, filledLength
* sizeof(UTF32Char
));
2306 bufferP
= caseFoldBuffer
;
2310 while (bufferP
< bufferLimit
) {
2311 character
= *(bufferP
++);
2312 if (CFUniCharIsSurrogateHighCharacter(character
) && (bufferP
< bufferLimit
) && CFUniCharIsSurrogateLowCharacter(*bufferP
)) {
2313 character
= CFUniCharGetLongCharacterForSurrogatePair(character
, *(bufferP
++));
2314 nonBaseBitmap
= CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet
, (character
>> 16));
2316 nonBaseBitmap
= graphemeBMP
;
2319 if (!filterNonBase
|| !CFUniCharIsMemberOfBitmap(character
, nonBaseBitmap
)) {
2320 *(outCharactersP
++) = character
;
2328 // collect following combining marks
2329 if (flags
& (kCFCompareDiacriticInsensitive
|kCFCompareNonliteral
)) {
2330 const uint8_t *nonBaseBitmap
;
2331 const uint8_t *decompBitmap
;
2332 bool doFill
= (((flags
& kCFCompareDiacriticInsensitive
) && (character
< 0x0510)) ? false : true);
2334 if (0 == filledLength
) {
2335 *outCharacters
= character
; // filledLength will be updated below on demand
2337 if (doFill
) { // check if really needs to fill
2338 UTF32Char nonBaseCharacter
= CFStringGetCharacterFromInlineBuffer(buffer
, currentIndex
);
2340 if (CFUniCharIsSurrogateHighCharacter(nonBaseCharacter
) && CFUniCharIsSurrogateLowCharacter((lowSurrogate
= CFStringGetCharacterFromInlineBuffer(buffer
, currentIndex
+ 1)))) {
2341 nonBaseCharacter
= CFUniCharGetLongCharacterForSurrogatePair(nonBaseCharacter
, lowSurrogate
);
2342 nonBaseBitmap
= CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet
, (nonBaseCharacter
>> 16));
2343 decompBitmap
= CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet
, (nonBaseCharacter
>> 16));
2345 nonBaseBitmap
= graphemeBMP
;
2346 decompBitmap
= decompBMP
;
2349 if (CFUniCharIsMemberOfBitmap(nonBaseCharacter
, nonBaseBitmap
)) {
2350 filledLength
= 1; // For the base character
2352 if ((0 == (flags
& kCFCompareDiacriticInsensitive
)) || (nonBaseCharacter
> 0x050F)) {
2353 if (CFUniCharIsMemberOfBitmap(nonBaseCharacter
, decompBitmap
)) {
2354 filledLength
+= CFUniCharDecomposeCharacter(nonBaseCharacter
, &(outCharacters
[filledLength
]), maxBufferLength
- filledLength
);
2356 outCharacters
[filledLength
++] = nonBaseCharacter
;
2359 currentIndex
+= ((nonBaseBitmap
== graphemeBMP
) ? 1 : 2);
2366 while (filledLength
< maxBufferLength
) { // do the rest
2367 character
= CFStringGetCharacterFromInlineBuffer(buffer
, currentIndex
);
2369 if (CFUniCharIsSurrogateHighCharacter(character
) && CFUniCharIsSurrogateLowCharacter((lowSurrogate
= CFStringGetCharacterFromInlineBuffer(buffer
, currentIndex
+ 1)))) {
2370 character
= CFUniCharGetLongCharacterForSurrogatePair(character
, lowSurrogate
);
2371 nonBaseBitmap
= CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet
, (character
>> 16));
2372 decompBitmap
= CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet
, (character
>> 16));
2374 nonBaseBitmap
= graphemeBMP
;
2375 decompBitmap
= decompBMP
;
2377 if (isTurkikCapitalI
) {
2378 isTurkikCapitalI
= false;
2379 } else if (CFUniCharIsMemberOfBitmap(character
, nonBaseBitmap
)) {
2381 if (CFUniCharIsMemberOfBitmap(character
, decompBitmap
)) {
2382 CFIndex currentLength
= CFUniCharDecomposeCharacter(character
, &(outCharacters
[filledLength
]), maxBufferLength
- filledLength
);
2384 if (0 == currentLength
) break; // didn't fit
2386 filledLength
+= currentLength
;
2388 outCharacters
[filledLength
++] = character
;
2390 } else if (0 == filledLength
) {
2391 filledLength
= 1; // For the base character
2393 currentIndex
+= ((nonBaseBitmap
== graphemeBMP
) ? 1 : 2);
2399 if (filledLength
> 1) {
2400 UTF32Char
*sortCharactersLimit
= outCharacters
+ filledLength
;
2401 UTF32Char
*sortCharacters
= sortCharactersLimit
- 1;
2403 while ((outCharacters
< sortCharacters
) && CFUniCharIsMemberOfBitmap(*sortCharacters
, ((*sortCharacters
< 0x10000) ? graphemeBMP
: CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet
, (*sortCharacters
>> 16))))) --sortCharacters
;
2405 if ((sortCharactersLimit
- sortCharacters
) > 1) CFUniCharPrioritySort(sortCharacters
, (sortCharactersLimit
- sortCharacters
)); // priority sort
2410 if ((filledLength
> 0) && (NULL
!= consumedLength
)) *consumedLength
= (currentIndex
- index
);
2412 return filledLength
;
2415 static bool __CFStringFillCharacterSetInlineBuffer(CFCharacterSetInlineBuffer
*buffer
, CFStringCompareFlags compareOptions
) {
2416 if (0 != (compareOptions
& kCFCompareIgnoreNonAlphanumeric
)) {
2417 static CFCharacterSetRef nonAlnumChars
= NULL
;
2419 if (NULL
== nonAlnumChars
) {
2420 CFMutableCharacterSetRef cset
= CFCharacterSetCreateMutableCopy(kCFAllocatorSystemDefault
, CFCharacterSetGetPredefined(kCFCharacterSetAlphaNumeric
));
2421 CFCharacterSetInvert(cset
);
2422 if (!OSAtomicCompareAndSwapPtrBarrier(NULL
, cset
, (void **)&nonAlnumChars
)) CFRelease(cset
);
2425 CFCharacterSetInitInlineBuffer(nonAlnumChars
, buffer
);
2433 #define kCFStringStackBufferLength (__kCFStringInlineBufferLength)
2435 CFComparisonResult
CFStringCompareWithOptionsAndLocale(CFStringRef string
, CFStringRef string2
, CFRange rangeToCompare
, CFStringCompareFlags compareOptions
, CFLocaleRef locale
) {
2436 /* No objc dispatch needed here since CFStringInlineBuffer works with both CFString and NSString */
2437 UTF32Char strBuf1
[kCFStringStackBufferLength
];
2438 UTF32Char strBuf2
[kCFStringStackBufferLength
];
2439 CFStringInlineBuffer inlineBuf1
, inlineBuf2
;
2440 UTF32Char str1Char
, str2Char
;
2441 CFIndex str1UsedLen
, str2UsedLen
;
2442 CFIndex str1Index
= 0, str2Index
= 0, strBuf1Index
= 0, strBuf2Index
= 0, strBuf1Len
= 0, strBuf2Len
= 0;
2443 CFIndex str1LocalizedIndex
= 0, str2LocalizedIndex
= 0;
2444 CFIndex forcedIndex1
= 0, forcedIndex2
= 0;
2445 CFIndex str2Len
= CFStringGetLength(string2
);
2446 bool caseInsensitive
= ((compareOptions
& kCFCompareCaseInsensitive
) ? true : false);
2447 bool diacriticsInsensitive
= ((compareOptions
& kCFCompareDiacriticInsensitive
) ? true : false);
2448 bool equalityOptions
= ((compareOptions
& (kCFCompareCaseInsensitive
|kCFCompareNonliteral
|kCFCompareDiacriticInsensitive
|kCFCompareWidthInsensitive
)) ? true : false);
2449 bool numerically
= ((compareOptions
& kCFCompareNumerically
) ? true : false);
2450 bool forceOrdering
= ((compareOptions
& kCFCompareForcedOrdering
) ? true : false);
2451 const uint8_t *graphemeBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet
, 0);
2452 const uint8_t *langCode
;
2453 CFComparisonResult compareResult
= kCFCompareEqualTo
;
2454 UTF16Char otherChar
;
2455 Boolean freeLocale
= false;
2456 CFCharacterSetInlineBuffer
*ignoredChars
= NULL
;
2457 CFCharacterSetInlineBuffer csetBuffer
;
2458 bool numericEquivalence
= false;
2460 if ((compareOptions
& kCFCompareLocalized
) && (NULL
== locale
)) {
2461 locale
= CFLocaleCopyCurrent();
2465 langCode
= ((NULL
== locale
) ? NULL
: (const uint8_t *)_CFStrGetLanguageIdentifierForLocale(locale
));
2467 if (__CFStringFillCharacterSetInlineBuffer(&csetBuffer
, compareOptions
)) {
2468 ignoredChars
= &csetBuffer
;
2469 equalityOptions
= true;
2472 if ((NULL
== locale
) && (NULL
== ignoredChars
) && !numerically
) { // could do binary comp (be careful when adding new flags)
2473 CFStringEncoding eightBitEncoding
= __CFStringGetEightBitStringEncoding();
2474 const uint8_t *str1Bytes
= (const uint8_t *)CFStringGetCStringPtr(string
, eightBitEncoding
);
2475 const uint8_t *str2Bytes
= (const uint8_t *)CFStringGetCStringPtr(string2
, eightBitEncoding
);
2476 CFIndex factor
= sizeof(uint8_t);
2478 if ((NULL
!= str1Bytes
) && (NULL
!= str2Bytes
)) {
2479 compareOptions
&= ~kCFCompareNonliteral
; // remove non-literal
2481 if ((kCFStringEncodingASCII
== eightBitEncoding
) && (false == forceOrdering
)) {
2482 if (caseInsensitive
) {
2483 int cmpResult
= strncasecmp_l((const char *)str1Bytes
+ rangeToCompare
.location
, (const char *)str2Bytes
, __CFMin(rangeToCompare
.length
, str2Len
), NULL
);
2485 if (0 == cmpResult
) cmpResult
= rangeToCompare
.length
- str2Len
;
2487 return ((0 == cmpResult
) ? kCFCompareEqualTo
: ((cmpResult
< 0) ? kCFCompareLessThan
: kCFCompareGreaterThan
));
2489 } else if (caseInsensitive
|| diacriticsInsensitive
) {
2490 CFIndex limitLength
= __CFMin(rangeToCompare
.length
, str2Len
);
2492 str1Bytes
+= rangeToCompare
.location
;
2494 while (str1Index
< limitLength
) {
2495 str1Char
= str1Bytes
[str1Index
];
2496 str2Char
= str2Bytes
[str1Index
];
2498 if (str1Char
!= str2Char
) {
2499 if ((str1Char
< 0x80) && (str2Char
< 0x80)) {
2500 if (forceOrdering
&& (kCFCompareEqualTo
== compareResult
) && (str1Char
!= str2Char
)) compareResult
= ((str1Char
< str2Char
) ? kCFCompareLessThan
: kCFCompareGreaterThan
);
2501 if (caseInsensitive
) {
2502 if ((str1Char
>= 'A') && (str1Char
<= 'Z')) str1Char
+= ('a' - 'A');
2503 if ((str2Char
>= 'A') && (str2Char
<= 'Z')) str2Char
+= ('a' - 'A');
2506 if (str1Char
!= str2Char
) return ((str1Char
< str2Char
) ? kCFCompareLessThan
: kCFCompareGreaterThan
);
2515 str2Index
= str1Index
;
2517 if (str1Index
== limitLength
) {
2518 int cmpResult
= rangeToCompare
.length
- str2Len
;
2520 return ((0 == cmpResult
) ? compareResult
: ((cmpResult
< 0) ? kCFCompareLessThan
: kCFCompareGreaterThan
));
2523 } else if (!equalityOptions
&& (NULL
== str1Bytes
) && (NULL
== str2Bytes
)) {
2524 str1Bytes
= (const uint8_t *)CFStringGetCharactersPtr(string
);
2525 str2Bytes
= (const uint8_t *)CFStringGetCharactersPtr(string2
);
2526 factor
= sizeof(UTF16Char
);
2527 #if __LITTLE_ENDIAN__
2528 if ((NULL
!= str1Bytes
) && (NULL
!= str2Bytes
)) { // we cannot use memcmp
2529 const UTF16Char
*str1
= ((const UTF16Char
*)str1Bytes
) + rangeToCompare
.location
;
2530 const UTF16Char
*str1Limit
= str1
+ __CFMin(rangeToCompare
.length
, str2Len
);
2531 const UTF16Char
*str2
= (const UTF16Char
*)str2Bytes
;
2532 CFIndex cmpResult
= 0;
2534 while ((0 == cmpResult
) && (str1
< str1Limit
)) cmpResult
= (CFIndex
)*(str1
++) - (CFIndex
)*(str2
++);
2536 if (0 == cmpResult
) cmpResult
= rangeToCompare
.length
- str2Len
;
2538 return ((0 == cmpResult
) ? kCFCompareEqualTo
: ((cmpResult
< 0) ? kCFCompareLessThan
: kCFCompareGreaterThan
));
2540 #endif /* __LITTLE_ENDIAN__ */
2542 if ((NULL
!= str1Bytes
) && (NULL
!= str2Bytes
)) {
2543 int cmpResult
= memcmp(str1Bytes
+ (rangeToCompare
.location
* factor
), str2Bytes
, __CFMin(rangeToCompare
.length
, str2Len
) * factor
);
2545 if (0 == cmpResult
) cmpResult
= rangeToCompare
.length
- str2Len
;
2547 return ((0 == cmpResult
) ? kCFCompareEqualTo
: ((cmpResult
< 0) ? kCFCompareLessThan
: kCFCompareGreaterThan
));
2551 CFStringInitInlineBuffer(string
, &inlineBuf1
, rangeToCompare
);
2552 CFStringInitInlineBuffer(string2
, &inlineBuf2
, CFRangeMake(0, str2Len
));
2554 if (NULL
!= locale
) {
2555 str1LocalizedIndex
= str1Index
;
2556 str2LocalizedIndex
= str2Index
;
2558 // We temporarily disable kCFCompareDiacriticInsensitive for SL <rdar://problem/6767096>. Should be revisited in NMOS <rdar://problem/7003830>
2559 if (forceOrdering
) {
2560 diacriticsInsensitive
= false;
2561 compareOptions
&= ~kCFCompareDiacriticInsensitive
;
2564 while ((str1Index
< rangeToCompare
.length
) && (str2Index
< str2Len
)) {
2565 if (strBuf1Len
== 0) {
2566 str1Char
= CFStringGetCharacterFromInlineBuffer(&inlineBuf1
, str1Index
);
2567 if (caseInsensitive
&& (str1Char
>= 'A') && (str1Char
<= 'Z') && ((NULL
== langCode
) || (str1Char
!= 'I')) && ((false == forceOrdering
) || (kCFCompareEqualTo
!= compareResult
))) str1Char
+= ('a' - 'A');
2570 str1Char
= strBuf1
[strBuf1Index
++];
2572 if (strBuf2Len
== 0) {
2573 str2Char
= CFStringGetCharacterFromInlineBuffer(&inlineBuf2
, str2Index
);
2574 if (caseInsensitive
&& (str2Char
>= 'A') && (str2Char
<= 'Z') && ((NULL
== langCode
) || (str2Char
!= 'I')) && ((false == forceOrdering
) || (kCFCompareEqualTo
!= compareResult
))) str2Char
+= ('a' - 'A');
2577 str2Char
= strBuf2
[strBuf2Index
++];
2580 if (numerically
&& ((0 == strBuf1Len
) && (str1Char
<= '9') && (str1Char
>= '0')) && ((0 == strBuf2Len
) && (str2Char
<= '9') && (str2Char
>= '0'))) { // If both are not ASCII digits, then don't do numerical comparison here
2581 uint64_t intValue1
= 0, intValue2
= 0; // !!! Doesn't work if numbers are > max uint64_t
2582 CFIndex str1NumRangeIndex
= str1Index
;
2583 CFIndex str2NumRangeIndex
= str2Index
;
2586 intValue1
= (intValue1
* 10) + (str1Char
- '0');
2587 str1Char
= CFStringGetCharacterFromInlineBuffer(&inlineBuf1
, ++str1Index
);
2588 } while ((str1Char
<= '9') && (str1Char
>= '0'));
2591 intValue2
= intValue2
* 10 + (str2Char
- '0');
2592 str2Char
= CFStringGetCharacterFromInlineBuffer(&inlineBuf2
, ++str2Index
);
2593 } while ((str2Char
<= '9') && (str2Char
>= '0'));
2595 if (intValue1
== intValue2
) {
2596 if (forceOrdering
&& (kCFCompareEqualTo
== compareResult
) && ((str1Index
- str1NumRangeIndex
) != (str2Index
- str2NumRangeIndex
))) {
2597 compareResult
= (((str1Index
- str1NumRangeIndex
) < (str2Index
- str2NumRangeIndex
)) ? kCFCompareLessThan
: kCFCompareGreaterThan
);
2598 numericEquivalence
= true;
2599 forcedIndex1
= str1NumRangeIndex
;
2600 forcedIndex2
= str2NumRangeIndex
;
2604 } else if (intValue1
< intValue2
) {
2605 if (freeLocale
&& locale
) {
2608 return kCFCompareLessThan
;
2610 if (freeLocale
&& locale
) {
2613 return kCFCompareGreaterThan
;
2617 if (str1Char
!= str2Char
) {
2618 if (!equalityOptions
) {
2619 compareResult
= ((NULL
== locale
) ? ((str1Char
< str2Char
) ? kCFCompareLessThan
: kCFCompareGreaterThan
) : _CFCompareStringsWithLocale(&inlineBuf1
, CFRangeMake(str1Index
, rangeToCompare
.length
- str1Index
), &inlineBuf2
, CFRangeMake(str2Index
, str2Len
- str2Index
), compareOptions
, locale
));
2620 if (freeLocale
&& locale
) {
2623 return compareResult
;
2626 if (forceOrdering
&& (kCFCompareEqualTo
== compareResult
)) {
2627 compareResult
= ((str1Char
< str2Char
) ? kCFCompareLessThan
: kCFCompareGreaterThan
);
2628 forcedIndex1
= str1LocalizedIndex
;
2629 forcedIndex2
= str2LocalizedIndex
;
2632 if ((str1Char
< 0x80) && (str2Char
< 0x80) && (NULL
== ignoredChars
)) {
2633 if (NULL
!= locale
) {
2634 compareResult
= _CFCompareStringsWithLocale(&inlineBuf1
, CFRangeMake(str1Index
, rangeToCompare
.length
- str1Index
), &inlineBuf2
, CFRangeMake(str2Index
, str2Len
- str2Index
), compareOptions
, locale
);
2635 if (freeLocale
&& locale
) {
2638 return compareResult
;
2639 } else if (!caseInsensitive
) {
2640 if (freeLocale
&& locale
) {
2643 return ((str1Char
< str2Char
) ? kCFCompareLessThan
: kCFCompareGreaterThan
);
2647 if (CFUniCharIsSurrogateHighCharacter(str1Char
) && CFUniCharIsSurrogateLowCharacter((otherChar
= CFStringGetCharacterFromInlineBuffer(&inlineBuf1
, str1Index
+ 1)))) {
2648 str1Char
= CFUniCharGetLongCharacterForSurrogatePair(str1Char
, otherChar
);
2652 if (CFUniCharIsSurrogateHighCharacter(str2Char
) && CFUniCharIsSurrogateLowCharacter((otherChar
= CFStringGetCharacterFromInlineBuffer(&inlineBuf2
, str2Index
+ 1)))) {
2653 str2Char
= CFUniCharGetLongCharacterForSurrogatePair(str2Char
, otherChar
);
2657 if (NULL
!= ignoredChars
) {
2658 if (CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars
, str1Char
)) {
2659 if ((strBuf1Len
> 0) && (strBuf1Index
== strBuf1Len
)) strBuf1Len
= 0;
2660 if (strBuf1Len
== 0) str1Index
+= str1UsedLen
;
2661 if (strBuf2Len
> 0) --strBuf2Index
;
2664 if (CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars
, str2Char
)) {
2665 if ((strBuf2Len
> 0) && (strBuf2Index
== strBuf2Len
)) strBuf2Len
= 0;
2666 if (strBuf2Len
== 0) str2Index
+= str2UsedLen
;
2667 if (strBuf1Len
> 0) -- strBuf1Index
;
2672 if (diacriticsInsensitive
&& (str1Index
> 0)) {
2673 bool str1Skip
= false;
2674 bool str2Skip
= false;
2676 if ((0 == strBuf1Len
) && CFUniCharIsMemberOfBitmap(str1Char
, ((str1Char
< 0x10000) ? graphemeBMP
: CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet
, (str1Char
>> 16))))) {
2677 str1Char
= str2Char
;
2680 if ((0 == strBuf2Len
) && CFUniCharIsMemberOfBitmap(str2Char
, ((str2Char
< 0x10000) ? graphemeBMP
: CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet
, (str2Char
>> 16))))) {
2681 str2Char
= str1Char
;
2685 if (str1Skip
!= str2Skip
) {
2686 if (str1Skip
) str2Index
-= str2UsedLen
;
2687 if (str2Skip
) str1Index
-= str1UsedLen
;
2691 if (str1Char
!= str2Char
) {
2692 if (0 == strBuf1Len
) {
2693 strBuf1Len
= __CFStringFoldCharacterClusterAtIndex(str1Char
, &inlineBuf1
, str1Index
, compareOptions
, langCode
, strBuf1
, kCFStringStackBufferLength
, &str1UsedLen
);
2694 if (strBuf1Len
> 0) {
2695 str1Char
= *strBuf1
;
2700 if ((0 == strBuf1Len
) && (0 < strBuf2Len
)) {
2701 compareResult
= ((NULL
== locale
) ? ((str1Char
< str2Char
) ? kCFCompareLessThan
: kCFCompareGreaterThan
) : _CFCompareStringsWithLocale(&inlineBuf1
, CFRangeMake(str1LocalizedIndex
, rangeToCompare
.length
- str1LocalizedIndex
), &inlineBuf2
, CFRangeMake(str2LocalizedIndex
, str2Len
- str2LocalizedIndex
), compareOptions
, locale
));
2702 if (freeLocale
&& locale
) {
2705 return compareResult
;
2708 if ((0 == strBuf2Len
) && ((0 == strBuf1Len
) || (str1Char
!= str2Char
))) {
2709 strBuf2Len
= __CFStringFoldCharacterClusterAtIndex(str2Char
, &inlineBuf2
, str2Index
, compareOptions
, langCode
, strBuf2
, kCFStringStackBufferLength
, &str2UsedLen
);
2710 if (strBuf2Len
> 0) {
2711 str2Char
= *strBuf2
;
2714 if ((0 == strBuf2Len
) || (str1Char
!= str2Char
)) {
2715 compareResult
= ((NULL
== locale
) ? ((str1Char
< str2Char
) ? kCFCompareLessThan
: kCFCompareGreaterThan
) : _CFCompareStringsWithLocale(&inlineBuf1
, CFRangeMake(str1LocalizedIndex
, rangeToCompare
.length
- str1LocalizedIndex
), &inlineBuf2
, CFRangeMake(str2LocalizedIndex
, str2Len
- str2LocalizedIndex
), compareOptions
, locale
));
2716 if (freeLocale
&& locale
) {
2719 return compareResult
;
2724 if ((strBuf1Len
> 0) && (strBuf2Len
> 0)) {
2725 while ((strBuf1Index
< strBuf1Len
) && (strBuf2Index
< strBuf2Len
)) {
2726 if (strBuf1
[strBuf1Index
] != strBuf2
[strBuf2Index
]) break;
2727 ++strBuf1Index
; ++strBuf2Index
;
2729 if ((strBuf1Index
< strBuf1Len
) && (strBuf2Index
< strBuf2Len
)) {
2730 CFComparisonResult res
= ((NULL
== locale
) ? ((strBuf1
[strBuf1Index
] < strBuf2
[strBuf2Index
]) ? kCFCompareLessThan
: kCFCompareGreaterThan
) : _CFCompareStringsWithLocale(&inlineBuf1
, CFRangeMake(str1LocalizedIndex
, rangeToCompare
.length
- str1LocalizedIndex
), &inlineBuf2
, CFRangeMake(str2LocalizedIndex
, str2Len
- str2LocalizedIndex
), compareOptions
, locale
));
2731 if (freeLocale
&& locale
) {
2739 if ((strBuf1Len
> 0) && (strBuf1Index
== strBuf1Len
)) strBuf1Len
= 0;
2740 if ((strBuf2Len
> 0) && (strBuf2Index
== strBuf2Len
)) strBuf2Len
= 0;
2742 if (strBuf1Len
== 0) str1Index
+= str1UsedLen
;
2743 if (strBuf2Len
== 0) str2Index
+= str2UsedLen
;
2744 if ((strBuf1Len
== 0) && (strBuf2Len
== 0)) {
2745 str1LocalizedIndex
= str1Index
;
2746 str2LocalizedIndex
= str2Index
;
2750 if (diacriticsInsensitive
|| (NULL
!= ignoredChars
)) {
2751 while (str1Index
< rangeToCompare
.length
) {
2752 str1Char
= CFStringGetCharacterFromInlineBuffer(&inlineBuf1
, str1Index
);
2753 if ((str1Char
< 0x80) && (NULL
== ignoredChars
)) break; // found ASCII
2755 if (CFUniCharIsSurrogateHighCharacter(str1Char
) && CFUniCharIsSurrogateLowCharacter((otherChar
= CFStringGetCharacterFromInlineBuffer(&inlineBuf1
, str1Index
+ 1)))) str1Char
= CFUniCharGetLongCharacterForSurrogatePair(str1Char
, otherChar
);
2757 if ((!diacriticsInsensitive
|| !CFUniCharIsMemberOfBitmap(str1Char
, ((str1Char
< 0x10000) ? graphemeBMP
: CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet
, (str1Char
>> 16))))) && ((NULL
== ignoredChars
) || !CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars
, str1Char
))) break;
2759 str1Index
+= ((str1Char
< 0x10000) ? 1 : 2);
2762 while (str2Index
< str2Len
) {
2763 str2Char
= CFStringGetCharacterFromInlineBuffer(&inlineBuf2
, str2Index
);
2764 if ((str2Char
< 0x80) && (NULL
== ignoredChars
)) break; // found ASCII
2766 if (CFUniCharIsSurrogateHighCharacter(str2Char
) && CFUniCharIsSurrogateLowCharacter((otherChar
= CFStringGetCharacterFromInlineBuffer(&inlineBuf2
, str2Index
+ 1)))) str2Char
= CFUniCharGetLongCharacterForSurrogatePair(str2Char
, otherChar
);
2768 if ((!diacriticsInsensitive
|| !CFUniCharIsMemberOfBitmap(str2Char
, ((str2Char
< 0x10000) ? graphemeBMP
: CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet
, (str2Char
>> 16))))) && ((NULL
== ignoredChars
) || !CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars
, str2Char
))) break;
2770 str2Index
+= ((str2Char
< 0x10000) ? 1 : 2);
2773 // Need to recalc localized result here for forced ordering, ICU cannot do numericEquivalence
2774 if (!numericEquivalence
&& (NULL
!= locale
) && (kCFCompareEqualTo
!= compareResult
) && (str1Index
== rangeToCompare
.length
) && (str2Index
== str2Len
)) compareResult
= _CFCompareStringsWithLocale(&inlineBuf1
, CFRangeMake(forcedIndex1
, rangeToCompare
.length
- forcedIndex1
), &inlineBuf2
, CFRangeMake(forcedIndex2
, str2Len
- forcedIndex2
), compareOptions
, locale
);
2776 if (freeLocale
&& locale
) {
2780 return ((str1Index
< rangeToCompare
.length
) ? kCFCompareGreaterThan
: ((str2Index
< str2Len
) ? kCFCompareLessThan
: compareResult
));
2784 CFComparisonResult
CFStringCompareWithOptions(CFStringRef string
, CFStringRef string2
, CFRange rangeToCompare
, CFStringCompareFlags compareOptions
) { return CFStringCompareWithOptionsAndLocale(string
, string2
, rangeToCompare
, compareOptions
, NULL
); }
2786 CFComparisonResult
CFStringCompare(CFStringRef string
, CFStringRef str2
, CFOptionFlags options
) {
2787 return CFStringCompareWithOptions(string
, str2
, CFRangeMake(0, CFStringGetLength(string
)), options
);
2790 Boolean
CFStringFindWithOptionsAndLocale(CFStringRef string
, CFStringRef stringToFind
, CFRange rangeToSearch
, CFStringCompareFlags compareOptions
, CFLocaleRef locale
, CFRange
*result
) {
2791 /* No objc dispatch needed here since CFStringInlineBuffer works with both CFString and NSString */
2792 CFIndex findStrLen
= CFStringGetLength(stringToFind
);
2793 Boolean didFind
= false;
2794 bool lengthVariants
= ((compareOptions
& (kCFCompareCaseInsensitive
|kCFCompareNonliteral
|kCFCompareDiacriticInsensitive
)) ? true : false);
2795 CFCharacterSetInlineBuffer
*ignoredChars
= NULL
;
2796 CFCharacterSetInlineBuffer csetBuffer
;
2798 if (__CFStringFillCharacterSetInlineBuffer(&csetBuffer
, compareOptions
)) {
2799 ignoredChars
= &csetBuffer
;
2800 lengthVariants
= true;
2803 if ((findStrLen
> 0) && (rangeToSearch
.length
> 0) && ((findStrLen
<= rangeToSearch
.length
) || lengthVariants
)) {
2804 UTF32Char strBuf1
[kCFStringStackBufferLength
];
2805 UTF32Char strBuf2
[kCFStringStackBufferLength
];
2806 CFStringInlineBuffer inlineBuf1
, inlineBuf2
;
2807 UTF32Char str1Char
= 0, str2Char
= 0;
2808 CFStringEncoding eightBitEncoding
= __CFStringGetEightBitStringEncoding();
2809 const uint8_t *str1Bytes
= (const uint8_t *)CFStringGetCStringPtr(string
, eightBitEncoding
);
2810 const uint8_t *str2Bytes
= (const uint8_t *)CFStringGetCStringPtr(stringToFind
, eightBitEncoding
);
2811 const UTF32Char
*characters
, *charactersLimit
;
2812 const uint8_t *langCode
= NULL
;
2813 CFIndex fromLoc
, toLoc
;
2814 CFIndex str1Index
, str2Index
;
2815 CFIndex strBuf1Len
, strBuf2Len
;
2816 CFIndex maxStr1Index
= (rangeToSearch
.location
+ rangeToSearch
.length
);
2817 bool equalityOptions
= ((lengthVariants
|| (compareOptions
& kCFCompareWidthInsensitive
)) ? true : false);
2818 bool caseInsensitive
= ((compareOptions
& kCFCompareCaseInsensitive
) ? true : false);
2819 bool forwardAnchor
= ((kCFCompareAnchored
== (compareOptions
& (kCFCompareBackwards
|kCFCompareAnchored
))) ? true : false);
2820 bool backwardAnchor
= (((kCFCompareBackwards
|kCFCompareAnchored
) == (compareOptions
& (kCFCompareBackwards
|kCFCompareAnchored
))) ? true : false);
2823 if (NULL
== locale
) {
2824 if (compareOptions
& kCFCompareLocalized
) {
2825 CFLocaleRef currentLocale
= CFLocaleCopyCurrent();
2826 langCode
= (const uint8_t *)_CFStrGetLanguageIdentifierForLocale(currentLocale
);
2827 CFRelease(currentLocale
);
2830 langCode
= (const uint8_t *)_CFStrGetLanguageIdentifierForLocale(locale
);
2833 CFStringInitInlineBuffer(string
, &inlineBuf1
, CFRangeMake(0, rangeToSearch
.location
+ rangeToSearch
.length
));
2834 CFStringInitInlineBuffer(stringToFind
, &inlineBuf2
, CFRangeMake(0, findStrLen
));
2836 if (compareOptions
& kCFCompareBackwards
) {
2837 fromLoc
= rangeToSearch
.location
+ rangeToSearch
.length
- (lengthVariants
? 1 : findStrLen
);
2838 toLoc
= (((compareOptions
& kCFCompareAnchored
) && !lengthVariants
) ? fromLoc
: rangeToSearch
.location
);
2840 fromLoc
= rangeToSearch
.location
;
2841 toLoc
= ((compareOptions
& kCFCompareAnchored
) ? fromLoc
: rangeToSearch
.location
+ rangeToSearch
.length
- (lengthVariants
? 1 : findStrLen
));
2844 delta
= ((fromLoc
<= toLoc
) ? 1 : -1);
2846 if ((NULL
!= str1Bytes
) && (NULL
!= str2Bytes
)) {
2847 uint8_t str1Byte
, str2Byte
;
2850 str1Index
= fromLoc
;
2853 while ((str1Index
< maxStr1Index
) && (str2Index
< findStrLen
)) {
2854 str1Byte
= str1Bytes
[str1Index
];
2855 str2Byte
= str2Bytes
[str2Index
];
2857 if (str1Byte
!= str2Byte
) {
2858 if (equalityOptions
) {
2859 if ((str1Byte
< 0x80) && ((NULL
== langCode
) || ('I' != str1Byte
))) {
2860 if (caseInsensitive
&& (str1Byte
>= 'A') && (str1Byte
<= 'Z')) str1Byte
+= ('a' - 'A');
2861 *strBuf1
= str1Byte
;
2864 str1Char
= CFStringGetCharacterFromInlineBuffer(&inlineBuf1
, str1Index
);
2865 strBuf1Len
= __CFStringFoldCharacterClusterAtIndex(str1Char
, &inlineBuf1
, str1Index
, compareOptions
, langCode
, strBuf1
, kCFStringStackBufferLength
, NULL
);
2866 if (1 > strBuf1Len
) {
2867 *strBuf1
= str1Char
;
2872 if ((NULL
!= ignoredChars
) && (forwardAnchor
|| (str1Index
!= fromLoc
)) && CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars
, ((str1Byte
< 0x80) ? str1Byte
: str1Char
))) {
2877 if ((str2Byte
< 0x80) && ((NULL
== langCode
) || ('I' != str2Byte
))) {
2878 if (caseInsensitive
&& (str2Byte
>= 'A') && (str2Byte
<= 'Z')) str2Byte
+= ('a' - 'A');
2879 *strBuf2
= str2Byte
;
2882 str2Char
= CFStringGetCharacterFromInlineBuffer(&inlineBuf2
, str2Index
);
2883 strBuf2Len
= __CFStringFoldCharacterClusterAtIndex(str2Char
, &inlineBuf2
, str2Index
, compareOptions
, langCode
, strBuf2
, kCFStringStackBufferLength
, NULL
);
2884 if (1 > strBuf2Len
) {
2885 *strBuf2
= str2Char
;
2890 if ((NULL
!= ignoredChars
) && CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars
, ((str2Byte
< 0x80) ? str2Byte
: str2Char
))) {
2895 if ((1 == strBuf1Len
) && (1 == strBuf2Len
)) { // normal case
2896 if (*strBuf1
!= *strBuf2
) break;
2900 if (!caseInsensitive
&& (strBuf1Len
!= strBuf2Len
)) break;
2901 if (memcmp(strBuf1
, strBuf2
, sizeof(UTF32Char
) * __CFMin(strBuf1Len
, strBuf2Len
))) break;
2903 if (strBuf1Len
< strBuf2Len
) {
2904 delta
= strBuf2Len
- strBuf1Len
;
2906 if ((str1Index
+ strBuf1Len
+ delta
) > maxStr1Index
) break;
2908 characters
= &(strBuf2
[strBuf1Len
]);
2909 charactersLimit
= characters
+ delta
;
2911 while (characters
< charactersLimit
) {
2912 strBuf1Len
= __CFStringFoldCharacterClusterAtIndex(CFStringGetCharacterFromInlineBuffer(&inlineBuf1
, str1Index
+ 1), &inlineBuf1
, str1Index
+ 1, compareOptions
, langCode
, strBuf1
, kCFStringStackBufferLength
, NULL
);
2913 if ((strBuf1Len
> 0) || (*characters
!= *strBuf1
)) break;
2914 ++characters
; ++str1Index
;
2916 if (characters
< charactersLimit
) break;
2917 } else if (strBuf2Len
< strBuf1Len
) {
2918 delta
= strBuf1Len
- strBuf2Len
;
2920 if ((str2Index
+ strBuf2Len
+ delta
) > findStrLen
) break;
2922 characters
= &(strBuf1
[strBuf2Len
]);
2923 charactersLimit
= characters
+ delta
;
2925 while (characters
< charactersLimit
) {
2926 strBuf2Len
= __CFStringFoldCharacterClusterAtIndex(CFStringGetCharacterFromInlineBuffer(&inlineBuf2
, str1Index
+ 1), &inlineBuf2
, str2Index
+ 1, compareOptions
, langCode
, strBuf2
, kCFStringStackBufferLength
, NULL
);
2927 if ((strBuf2Len
> 0) || (*characters
!= *strBuf2
)) break;
2928 ++characters
; ++str2Index
;
2930 if (characters
< charactersLimit
) break;
2937 ++str1Index
; ++str2Index
;
2940 if ((NULL
!= ignoredChars
) && (str1Index
== maxStr1Index
) && (str2Index
< findStrLen
)) { // Process the stringToFind tail
2941 while (str2Index
< findStrLen
) {
2942 str2Char
= CFStringGetCharacterFromInlineBuffer(&inlineBuf2
, str2Index
);
2944 if (!CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars
, str2Char
)) break;
2949 if (str2Index
== findStrLen
) {
2950 if ((NULL
!= ignoredChars
) && backwardAnchor
&& (str1Index
< maxStr1Index
)) { // Process the anchor tail
2951 while (str1Index
< maxStr1Index
) {
2952 str1Char
= CFStringGetCharacterFromInlineBuffer(&inlineBuf1
, str1Index
);
2954 if (!CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars
, str1Char
)) break;
2959 if (!backwardAnchor
|| (str1Index
== maxStr1Index
)) {
2961 if (NULL
!= result
) *result
= CFRangeMake(fromLoc
, str1Index
- fromLoc
);
2966 if (fromLoc
== toLoc
) break;
2969 } else if (equalityOptions
) {
2970 UTF16Char otherChar
;
2971 CFIndex str1UsedLen
, str2UsedLen
, strBuf1Index
= 0, strBuf2Index
= 0;
2972 bool diacriticsInsensitive
= ((compareOptions
& kCFCompareDiacriticInsensitive
) ? true : false);
2973 const uint8_t *graphemeBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet
, 0);
2974 const uint8_t *combClassBMP
= (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty
, 0);
2977 str1Index
= fromLoc
;
2980 strBuf1Len
= strBuf2Len
= 0;
2982 while (str2Index
< findStrLen
) {
2983 if (strBuf1Len
== 0) {
2984 str1Char
= CFStringGetCharacterFromInlineBuffer(&inlineBuf1
, str1Index
);
2985 if (caseInsensitive
&& (str1Char
>= 'A') && (str1Char
<= 'Z') && ((NULL
== langCode
) || (str1Char
!= 'I'))) str1Char
+= ('a' - 'A');
2988 str1Char
= strBuf1
[strBuf1Index
++];
2990 if (strBuf2Len
== 0) {
2991 str2Char
= CFStringGetCharacterFromInlineBuffer(&inlineBuf2
, str2Index
);
2992 if (caseInsensitive
&& (str2Char
>= 'A') && (str2Char
<= 'Z') && ((NULL
== langCode
) || (str2Char
!= 'I'))) str2Char
+= ('a' - 'A');
2995 str2Char
= strBuf2
[strBuf2Index
++];
2998 if (str1Char
!= str2Char
) {
2999 if ((str1Char
< 0x80) && (str2Char
< 0x80) && (NULL
== ignoredChars
) && ((NULL
== langCode
) || !caseInsensitive
)) break;
3001 if (CFUniCharIsSurrogateHighCharacter(str1Char
) && CFUniCharIsSurrogateLowCharacter((otherChar
= CFStringGetCharacterFromInlineBuffer(&inlineBuf1
, str1Index
+ 1)))) {
3002 str1Char
= CFUniCharGetLongCharacterForSurrogatePair(str1Char
, otherChar
);
3006 if (CFUniCharIsSurrogateHighCharacter(str2Char
) && CFUniCharIsSurrogateLowCharacter((otherChar
= CFStringGetCharacterFromInlineBuffer(&inlineBuf2
, str2Index
+ 1)))) {
3007 str2Char
= CFUniCharGetLongCharacterForSurrogatePair(str2Char
, otherChar
);
3011 if (NULL
!= ignoredChars
) {
3012 if ((forwardAnchor
|| (str1Index
!= fromLoc
)) && (str1Index
< maxStr1Index
) && CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars
, str1Char
)) {
3013 if ((strBuf1Len
> 0) && (strBuf1Index
== strBuf1Len
)) strBuf1Len
= 0;
3014 if (strBuf1Len
== 0) str1Index
+= str1UsedLen
;
3015 if (strBuf2Len
> 0) --strBuf2Index
;
3018 if (CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars
, str2Char
)) {
3019 if ((strBuf2Len
> 0) && (strBuf2Index
== strBuf2Len
)) strBuf2Len
= 0;
3020 if (strBuf2Len
== 0) str2Index
+= str2UsedLen
;
3021 if (strBuf1Len
> 0) -- strBuf1Index
;
3026 if (diacriticsInsensitive
&& (str1Index
> fromLoc
)) {
3027 bool str1Skip
= false;
3028 bool str2Skip
= false;
3030 if ((0 == strBuf1Len
) && CFUniCharIsMemberOfBitmap(str1Char
, ((str1Char
< 0x10000) ? graphemeBMP
: CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet
, (str1Char
>> 16))))) {
3031 str1Char
= str2Char
;
3034 if ((0 == strBuf2Len
) && CFUniCharIsMemberOfBitmap(str2Char
, ((str2Char
< 0x10000) ? graphemeBMP
: CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet
, (str2Char
>> 16))))) {
3035 str2Char
= str1Char
;
3039 if (str1Skip
!= str2Skip
) {
3040 if (str1Skip
) str2Index
-= str2UsedLen
;
3041 if (str2Skip
) str1Index
-= str1UsedLen
;
3045 if (str1Char
!= str2Char
) {
3046 if (0 == strBuf1Len
) {
3047 strBuf1Len
= __CFStringFoldCharacterClusterAtIndex(str1Char
, &inlineBuf1
, str1Index
, compareOptions
, langCode
, strBuf1
, kCFStringStackBufferLength
, &str1UsedLen
);
3048 if (strBuf1Len
> 0) {
3049 str1Char
= *strBuf1
;
3054 if ((0 == strBuf1Len
) && (0 < strBuf2Len
)) break;
3056 if ((0 == strBuf2Len
) && ((0 == strBuf1Len
) || (str1Char
!= str2Char
))) {
3057 strBuf2Len
= __CFStringFoldCharacterClusterAtIndex(str2Char
, &inlineBuf2
, str2Index
, compareOptions
, langCode
, strBuf2
, kCFStringStackBufferLength
, &str2UsedLen
);
3058 if ((0 == strBuf2Len
) || (str1Char
!= *strBuf2
)) break;
3063 if ((strBuf1Len
> 0) && (strBuf2Len
> 0)) {
3064 while ((strBuf1Index
< strBuf1Len
) && (strBuf2Index
< strBuf2Len
)) {
3065 if (strBuf1
[strBuf1Index
] != strBuf2
[strBuf2Index
]) break;
3066 ++strBuf1Index
; ++strBuf2Index
;
3068 if ((strBuf1Index
< strBuf1Len
) && (strBuf2Index
< strBuf2Len
)) break;
3072 if ((strBuf1Len
> 0) && (strBuf1Index
== strBuf1Len
)) strBuf1Len
= 0;
3073 if ((strBuf2Len
> 0) && (strBuf2Index
== strBuf2Len
)) strBuf2Len
= 0;
3075 if (strBuf1Len
== 0) str1Index
+= str1UsedLen
;
3076 if (strBuf2Len
== 0) str2Index
+= str2UsedLen
;
3079 if ((NULL
!= ignoredChars
) && (str1Index
== maxStr1Index
) && (str2Index
< findStrLen
)) { // Process the stringToFind tail
3080 while (str2Index
< findStrLen
) {
3081 str2Char
= CFStringGetCharacterFromInlineBuffer(&inlineBuf2
, str2Index
);
3082 if (CFUniCharIsSurrogateHighCharacter(str2Char
) && CFUniCharIsSurrogateLowCharacter((otherChar
= CFStringGetCharacterFromInlineBuffer(&inlineBuf2
, str2Index
+ 1)))) {
3083 str2Char
= CFUniCharGetLongCharacterForSurrogatePair(str2Char
, otherChar
);
3085 if (!CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars
, str2Char
)) break;
3086 str2Index
+= ((str2Char
< 0x10000) ? 1 : 2);
3090 if (str2Index
== findStrLen
) {
3093 if (strBuf1Len
> 0) {
3096 if (diacriticsInsensitive
&& (strBuf1
[0] < 0x0510)) {
3097 while (strBuf1Index
< strBuf1Len
) {
3098 if (!CFUniCharIsMemberOfBitmap(strBuf1
[strBuf1Index
], ((strBuf1
[strBuf1Index
] < 0x10000) ? graphemeBMP
: CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet
, (strBuf1
[strBuf1Index
] >> 16))))) break;
3102 if (strBuf1Index
== strBuf1Len
) {
3103 str1Index
+= str1UsedLen
;
3109 if (match
&& (compareOptions
& (kCFCompareDiacriticInsensitive
|kCFCompareNonliteral
)) && (str1Index
< maxStr1Index
)) {
3110 const uint8_t *nonBaseBitmap
;
3112 str1Char
= CFStringGetCharacterFromInlineBuffer(&inlineBuf1
, str1Index
);
3114 if (CFUniCharIsSurrogateHighCharacter(str1Char
) && CFUniCharIsSurrogateLowCharacter((otherChar
= CFStringGetCharacterFromInlineBuffer(&inlineBuf1
, str1Index
+ 1)))) {
3115 str1Char
= CFUniCharGetLongCharacterForSurrogatePair(str1Char
, otherChar
);
3116 nonBaseBitmap
= CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet
, (str1Char
>> 16));
3118 nonBaseBitmap
= graphemeBMP
;
3121 if (CFUniCharIsMemberOfBitmap(str1Char
, nonBaseBitmap
)) {
3122 if (diacriticsInsensitive
) {
3123 if (str1Char
< 0x10000) {
3124 CFIndex index
= str1Index
;
3127 str1Char
= CFStringGetCharacterFromInlineBuffer(&inlineBuf1
, --index
);
3128 } while (CFUniCharIsMemberOfBitmap(str1Char
, graphemeBMP
), (rangeToSearch
.location
< index
));
3130 if (str1Char
< 0x0510) {
3131 while (++str1Index
< maxStr1Index
) if (!CFUniCharIsMemberOfBitmap(CFStringGetCharacterFromInlineBuffer(&inlineBuf1
, str1Index
), graphemeBMP
)) break;
3137 } else if (!diacriticsInsensitive
) {
3138 otherChar
= CFStringGetCharacterFromInlineBuffer(&inlineBuf1
, str1Index
- 1);
3140 // this is assuming viramas are only in BMP ???
3141 if ((str1Char
== COMBINING_GRAPHEME_JOINER
) || (otherChar
== COMBINING_GRAPHEME_JOINER
) || (otherChar
== ZERO_WIDTH_JOINER
) || ((otherChar
>= HANGUL_CHOSEONG_START
) && (otherChar
<= HANGUL_JONGSEONG_END
)) || (CFUniCharGetCombiningPropertyForCharacter(otherChar
, combClassBMP
) == 9)) {
3142 CFRange clusterRange
= CFStringGetRangeOfCharacterClusterAtIndex(string
, str1Index
- 1, kCFStringGraphemeCluster
);
3144 if (str1Index
< (clusterRange
.location
+ clusterRange
.length
)) match
= false;
3150 if ((NULL
!= ignoredChars
) && backwardAnchor
&& (str1Index
< maxStr1Index
)) { // Process the anchor tail
3151 while (str1Index
< maxStr1Index
) {
3152 str1Char
= CFStringGetCharacterFromInlineBuffer(&inlineBuf1
, str1Index
);
3153 if (CFUniCharIsSurrogateHighCharacter(str1Char
) && CFUniCharIsSurrogateLowCharacter((otherChar
= CFStringGetCharacterFromInlineBuffer(&inlineBuf1
, str1Index
+ 1)))) {
3154 str1Char
= CFUniCharGetLongCharacterForSurrogatePair(str1Char
, otherChar
);
3156 if (!CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars
, str1Char
)) break;
3157 str1Index
+= ((str1Char
< 0x10000) ? 1 : 2);
3161 if (!backwardAnchor
|| (str1Index
== maxStr1Index
)) {
3163 if (NULL
!= result
) *result
= CFRangeMake(fromLoc
, str1Index
- fromLoc
);
3169 if (fromLoc
== toLoc
) break;
3174 str1Index
= fromLoc
;
3177 while (str2Index
< findStrLen
) {
3178 if (CFStringGetCharacterFromInlineBuffer(&inlineBuf1
, str1Index
) != CFStringGetCharacterFromInlineBuffer(&inlineBuf2
, str2Index
)) break;
3180 ++str1Index
; ++str2Index
;
3183 if (str2Index
== findStrLen
) {
3185 if (NULL
!= result
) *result
= CFRangeMake(fromLoc
, findStrLen
);
3189 if (fromLoc
== toLoc
) break;
3199 Boolean
CFStringFindWithOptions(CFStringRef string
, CFStringRef stringToFind
, CFRange rangeToSearch
, CFStringCompareFlags compareOptions
, CFRange
*result
) { return CFStringFindWithOptionsAndLocale(string
, stringToFind
, rangeToSearch
, compareOptions
, NULL
, result
); }
3201 // Functions to deal with special arrays of CFRange, CFDataRef, created by CFStringCreateArrayWithFindResults()
3203 static const void *__rangeRetain(CFAllocatorRef allocator
, const void *ptr
) {
3204 CFRetain(*(CFDataRef
*)((uint8_t *)ptr
+ sizeof(CFRange
)));
3208 static void __rangeRelease(CFAllocatorRef allocator
, const void *ptr
) {
3209 CFRelease(*(CFDataRef
*)((uint8_t *)ptr
+ sizeof(CFRange
)));
3212 static CFStringRef
__rangeCopyDescription(const void *ptr
) {
3213 CFRange range
= *(CFRange
*)ptr
;
3214 return CFStringCreateWithFormat(kCFAllocatorSystemDefault
, NULL
, CFSTR("{%d, %d}"), range
.location
, range
.length
);
3217 static Boolean
__rangeEqual(const void *ptr1
, const void *ptr2
) {
3218 CFRange range1
= *(CFRange
*)ptr1
;
3219 CFRange range2
= *(CFRange
*)ptr2
;
3220 return (range1
.location
== range2
.location
) && (range1
.length
== range2
.length
);
3224 CFArrayRef
CFStringCreateArrayWithFindResults(CFAllocatorRef alloc
, CFStringRef string
, CFStringRef stringToFind
, CFRange rangeToSearch
, CFStringCompareFlags compareOptions
) {
3226 Boolean backwards
= ((compareOptions
& kCFCompareBackwards
) != 0);
3227 UInt32 endIndex
= rangeToSearch
.location
+ rangeToSearch
.length
;
3228 CFMutableDataRef rangeStorage
= NULL
; // Basically an array of CFRange, CFDataRef (packed)
3229 uint8_t *rangeStorageBytes
= NULL
;
3230 CFIndex foundCount
= 0;
3231 CFIndex capacity
= 0; // Number of CFRange, CFDataRef element slots in rangeStorage
3233 if (alloc
== NULL
) alloc
= __CFGetDefaultAllocator();
3235 while ((rangeToSearch
.length
> 0) && CFStringFindWithOptions(string
, stringToFind
, rangeToSearch
, compareOptions
, &foundRange
)) {
3236 // Determine the next range
3238 rangeToSearch
.length
= foundRange
.location
- rangeToSearch
.location
;
3240 rangeToSearch
.location
= foundRange
.location
+ foundRange
.length
;
3241 rangeToSearch
.length
= endIndex
- rangeToSearch
.location
;
3244 // If necessary, grow the data and squirrel away the found range
3245 if (foundCount
>= capacity
) {
3246 // Note that rangeStorage is not allowed to be allocated from one of the GCRefZero allocators
3247 if (rangeStorage
== NULL
) rangeStorage
= CFDataCreateMutable(_CFConvertAllocatorToNonGCRefZeroEquivalent(alloc
), 0);
3248 capacity
= (capacity
+ 4) * 2;
3249 CFDataSetLength(rangeStorage
, capacity
* (sizeof(CFRange
) + sizeof(CFDataRef
)));
3250 rangeStorageBytes
= (uint8_t *)CFDataGetMutableBytePtr(rangeStorage
) + foundCount
* (sizeof(CFRange
) + sizeof(CFDataRef
));
3252 memmove(rangeStorageBytes
, &foundRange
, sizeof(CFRange
)); // The range
3253 memmove(rangeStorageBytes
+ sizeof(CFRange
), &rangeStorage
, sizeof(CFDataRef
)); // The data
3254 rangeStorageBytes
+= (sizeof(CFRange
) + sizeof(CFDataRef
));
3258 if (foundCount
> 0) {
3260 CFMutableArrayRef array
;
3261 const CFArrayCallBacks callbacks
= {0, __rangeRetain
, __rangeRelease
, __rangeCopyDescription
, __rangeEqual
};
3263 CFDataSetLength(rangeStorage
, foundCount
* (sizeof(CFRange
) + sizeof(CFDataRef
))); // Tighten storage up
3264 rangeStorageBytes
= (uint8_t *)CFDataGetMutableBytePtr(rangeStorage
);
3266 array
= CFArrayCreateMutable(alloc
, foundCount
* sizeof(CFRange
*), &callbacks
);
3267 for (cnt
= 0; cnt
< foundCount
; cnt
++) {
3268 // Each element points to the appropriate CFRange in the CFData
3269 CFArrayAppendValue(array
, rangeStorageBytes
+ cnt
* (sizeof(CFRange
) + sizeof(CFDataRef
)));
3271 CFRelease(rangeStorage
); // We want the data to go away when all CFRanges inside it are released...
3279 CFRange
CFStringFind(CFStringRef string
, CFStringRef stringToFind
, CFStringCompareFlags compareOptions
) {
3282 if (CFStringFindWithOptions(string
, stringToFind
, CFRangeMake(0, CFStringGetLength(string
)), compareOptions
, &foundRange
)) {
3285 return CFRangeMake(kCFNotFound
, 0);
3289 Boolean
CFStringHasPrefix(CFStringRef string
, CFStringRef prefix
) {
3290 return CFStringFindWithOptions(string
, prefix
, CFRangeMake(0, CFStringGetLength(string
)), kCFCompareAnchored
, NULL
);
3293 Boolean
CFStringHasSuffix(CFStringRef string
, CFStringRef suffix
) {
3294 return CFStringFindWithOptions(string
, suffix
, CFRangeMake(0, CFStringGetLength(string
)), kCFCompareAnchored
|kCFCompareBackwards
, NULL
);
3297 #define MAX_TRANSCODING_LENGTH 4
3299 #define HANGUL_JONGSEONG_COUNT (28)
3301 CF_INLINE
bool _CFStringIsHangulLVT(UTF32Char character
) {
3302 return (((character
- HANGUL_SYLLABLE_START
) % HANGUL_JONGSEONG_COUNT
) ? true : false);
3305 static uint8_t __CFTranscodingHintLength
[] = {
3306 2, 3, 4, 4, 4, 4, 4, 2, 2, 2, 2, 4, 0, 0, 0, 0
3310 kCFStringHangulStateL
,
3311 kCFStringHangulStateV
,
3312 kCFStringHangulStateT
,
3313 kCFStringHangulStateLV
,
3314 kCFStringHangulStateLVT
,
3315 kCFStringHangulStateBreak
3318 static CFRange
_CFStringInlineBufferGetComposedRange(CFStringInlineBuffer
*buffer
, CFIndex start
, CFStringCharacterClusterType type
, const uint8_t *bmpBitmap
, CFIndex csetType
) {
3319 CFIndex end
= start
+ 1;
3320 const uint8_t *bitmap
= bmpBitmap
;
3321 UTF32Char character
;
3322 UTF16Char otherSurrogate
;
3325 character
= CFStringGetCharacterFromInlineBuffer(buffer
, start
);
3327 // We don't combine characters in Armenian ~ Limbu range for backward deletion
3328 if ((type
!= kCFStringBackwardDeletionCluster
) || (character
< 0x0530) || (character
> 0x194F)) {
3329 // Check if the current is surrogate
3330 if (CFUniCharIsSurrogateHighCharacter(character
) && CFUniCharIsSurrogateLowCharacter((otherSurrogate
= CFStringGetCharacterFromInlineBuffer(buffer
, start
+ 1)))) {
3332 character
= CFUniCharGetLongCharacterForSurrogatePair(character
, otherSurrogate
);
3333 bitmap
= CFUniCharGetBitmapPtrForPlane(csetType
, (character
>> 16));
3338 if ((type
== kCFStringBackwardDeletionCluster
) && (character
>= 0x0530) && (character
< 0x1950)) break;
3340 if (character
< 0x10000) { // the first round could be already be non-BMP
3341 if (CFUniCharIsSurrogateLowCharacter(character
) && CFUniCharIsSurrogateHighCharacter((otherSurrogate
= CFStringGetCharacterFromInlineBuffer(buffer
, start
- 1)))) {
3342 character
= CFUniCharGetLongCharacterForSurrogatePair(otherSurrogate
, character
);
3343 bitmap
= CFUniCharGetBitmapPtrForPlane(csetType
, (character
>> 16));
3344 if (--start
== 0) break; // starting with non-BMP combining mark
3350 if (!CFUniCharIsMemberOfBitmap(character
, bitmap
) && (character
!= 0xFF9E) && (character
!= 0xFF9F) && ((character
& 0x1FFFF0) != 0xF870)) break;
3354 character
= CFStringGetCharacterFromInlineBuffer(buffer
, start
);
3359 if (((character
>= HANGUL_CHOSEONG_START
) && (character
<= HANGUL_JONGSEONG_END
)) || ((character
>= HANGUL_SYLLABLE_START
) && (character
<= HANGUL_SYLLABLE_END
))) {
3361 uint8_t initialState
;
3363 if (character
< HANGUL_JUNGSEONG_START
) {
3364 state
= kCFStringHangulStateL
;
3365 } else if (character
< HANGUL_JONGSEONG_START
) {
3366 state
= kCFStringHangulStateV
;
3367 } else if (character
< HANGUL_SYLLABLE_START
) {
3368 state
= kCFStringHangulStateT
;
3370 state
= (_CFStringIsHangulLVT(character
) ? kCFStringHangulStateLVT
: kCFStringHangulStateLV
);
3372 initialState
= state
;
3375 while (((character
= CFStringGetCharacterFromInlineBuffer(buffer
, start
- 1)) >= HANGUL_CHOSEONG_START
) && (character
<= HANGUL_SYLLABLE_END
) && ((character
<= HANGUL_JONGSEONG_END
) || (character
>= HANGUL_SYLLABLE_START
))) {
3377 case kCFStringHangulStateV
:
3378 if (character
<= HANGUL_CHOSEONG_END
) {
3379 state
= kCFStringHangulStateL
;
3380 } else if ((character
>= HANGUL_SYLLABLE_START
) && (character
<= HANGUL_SYLLABLE_END
) && !_CFStringIsHangulLVT(character
)) {
3381 state
= kCFStringHangulStateLV
;
3382 } else if (character
> HANGUL_JUNGSEONG_END
) {
3383 state
= kCFStringHangulStateBreak
;
3387 case kCFStringHangulStateT
:
3388 if ((character
>= HANGUL_JUNGSEONG_START
) && (character
<= HANGUL_JUNGSEONG_END
)) {
3389 state
= kCFStringHangulStateV
;
3390 } else if ((character
>= HANGUL_SYLLABLE_START
) && (character
<= HANGUL_SYLLABLE_END
)) {
3391 state
= (_CFStringIsHangulLVT(character
) ? kCFStringHangulStateLVT
: kCFStringHangulStateLV
);
3392 } else if (character
< HANGUL_JUNGSEONG_START
) {
3393 state
= kCFStringHangulStateBreak
;
3398 state
= ((character
< HANGUL_JUNGSEONG_START
) ? kCFStringHangulStateL
: kCFStringHangulStateBreak
);
3402 if (state
== kCFStringHangulStateBreak
) break;
3407 state
= initialState
;
3408 while (((character
= CFStringGetCharacterFromInlineBuffer(buffer
, end
)) > 0) && (((character
>= HANGUL_CHOSEONG_START
) && (character
<= HANGUL_JONGSEONG_END
)) || ((character
>= HANGUL_SYLLABLE_START
) && (character
<= HANGUL_SYLLABLE_END
)))) {
3410 case kCFStringHangulStateLV
:
3411 case kCFStringHangulStateV
:
3412 if ((character
>= HANGUL_JUNGSEONG_START
) && (character
<= HANGUL_JONGSEONG_END
)) {
3413 state
= ((character
< HANGUL_JONGSEONG_START
) ? kCFStringHangulStateV
: kCFStringHangulStateT
);
3415 state
= kCFStringHangulStateBreak
;
3419 case kCFStringHangulStateLVT
:
3420 case kCFStringHangulStateT
:
3421 state
= (((character
>= HANGUL_JONGSEONG_START
) && (character
<= HANGUL_JONGSEONG_END
)) ? kCFStringHangulStateT
: kCFStringHangulStateBreak
);
3425 if (character
< HANGUL_JUNGSEONG_START
) {
3426 state
= kCFStringHangulStateL
;
3427 } else if (character
< HANGUL_JONGSEONG_START
) {
3428 state
= kCFStringHangulStateV
;
3429 } else if (character
>= HANGUL_SYLLABLE_START
) {
3430 state
= (_CFStringIsHangulLVT(character
) ? kCFStringHangulStateLVT
: kCFStringHangulStateLV
);
3432 state
= kCFStringHangulStateBreak
;
3437 if (state
== kCFStringHangulStateBreak
) break;
3443 while ((character
= CFStringGetCharacterFromInlineBuffer(buffer
, end
)) > 0) {
3444 if ((type
== kCFStringBackwardDeletionCluster
) && (character
>= 0x0530) && (character
< 0x1950)) break;
3446 if (CFUniCharIsSurrogateHighCharacter(character
) && CFUniCharIsSurrogateLowCharacter((otherSurrogate
= CFStringGetCharacterFromInlineBuffer(buffer
, end
+ 1)))) {
3447 character
= CFUniCharGetLongCharacterForSurrogatePair(character
, otherSurrogate
);
3448 bitmap
= CFUniCharGetBitmapPtrForPlane(csetType
, (character
>> 16));
3455 if (!CFUniCharIsMemberOfBitmap(character
, bitmap
) && (character
!= 0xFF9E) && (character
!= 0xFF9F) && ((character
& 0x1FFFF0) != 0xF870)) break;
3460 return CFRangeMake(start
, end
- start
);
3463 CF_INLINE
bool _CFStringIsVirama(UTF32Char character
, const uint8_t *combClassBMP
) {
3464 return ((character
== COMBINING_GRAPHEME_JOINER
) || (CFUniCharGetCombiningPropertyForCharacter(character
, (const uint8_t *)((character
< 0x10000) ? combClassBMP
: CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty
, (character
>> 16)))) == 9) ? true : false);
3467 CFRange
CFStringGetRangeOfCharacterClusterAtIndex(CFStringRef string
, CFIndex charIndex
, CFStringCharacterClusterType type
) {
3469 CFIndex currentIndex
;
3470 CFIndex length
= CFStringGetLength(string
);
3471 CFIndex csetType
= ((kCFStringGraphemeCluster
== type
) ? kCFUniCharGraphemeExtendCharacterSet
: kCFUniCharNonBaseCharacterSet
);
3472 CFStringInlineBuffer stringBuffer
;
3473 const uint8_t *bmpBitmap
;
3474 const uint8_t *letterBMP
;
3475 static const uint8_t *combClassBMP
= NULL
;
3476 UTF32Char character
;
3477 UTF16Char otherSurrogate
;
3479 if (charIndex
>= length
) return CFRangeMake(kCFNotFound
, 0);
3481 /* Fast case. If we're eight-bit, it's either the default encoding is cheap or the content is all ASCII. Watch out when (or if) adding more 8bit Mac-scripts in CFStringEncodingConverters
3483 if (!CF_IS_OBJC(__kCFStringTypeID
, string
) && __CFStrIsEightBit(string
)) return CFRangeMake(charIndex
, 1);
3485 bmpBitmap
= CFUniCharGetBitmapPtrForPlane(csetType
, 0);
3486 letterBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet
, 0);
3487 if (NULL
== combClassBMP
) combClassBMP
= (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty
, 0);
3489 CFStringInitInlineBuffer(string
, &stringBuffer
, CFRangeMake(0, length
));
3491 // Get composed character sequence first
3492 range
= _CFStringInlineBufferGetComposedRange(&stringBuffer
, charIndex
, type
, bmpBitmap
, csetType
);
3494 // Do grapheme joiners
3495 if (type
< kCFStringCursorMovementCluster
) {
3496 const uint8_t *letter
= letterBMP
;
3498 // Check to see if we have a letter at the beginning of initial cluster
3499 character
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, range
.location
);
3501 if ((range
.length
> 1) && CFUniCharIsSurrogateHighCharacter(character
) && CFUniCharIsSurrogateLowCharacter((otherSurrogate
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, range
.location
+ 1)))) {
3502 character
= CFUniCharGetLongCharacterForSurrogatePair(character
, otherSurrogate
);
3503 letter
= CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet
, (character
>> 16));
3506 if ((character
== ZERO_WIDTH_JOINER
) || CFUniCharIsMemberOfBitmap(character
, letter
)) {
3509 // Check if preceded by grapheme joiners (U034F and viramas)
3510 otherRange
.location
= currentIndex
= range
.location
;
3512 while (currentIndex
> 1) {
3513 character
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, --currentIndex
);
3515 // ??? We're assuming viramas only in BMP
3516 if ((_CFStringIsVirama(character
, combClassBMP
) || ((character
== ZERO_WIDTH_JOINER
) && _CFStringIsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer
, --currentIndex
), combClassBMP
))) && (currentIndex
> 0)) {
3522 currentIndex
= _CFStringInlineBufferGetComposedRange(&stringBuffer
, currentIndex
, type
, bmpBitmap
, csetType
).location
;
3524 character
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, currentIndex
);
3526 if (CFUniCharIsSurrogateLowCharacter(character
) && CFUniCharIsSurrogateHighCharacter((otherSurrogate
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, currentIndex
- 1)))) {
3527 character
= CFUniCharGetLongCharacterForSurrogatePair(character
, otherSurrogate
);
3528 letter
= CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet
, (character
>> 16));
3534 if (!CFUniCharIsMemberOfBitmap(character
, letter
)) break;
3535 range
.location
= currentIndex
;
3538 range
.length
+= otherRange
.location
- range
.location
;
3540 // Check if followed by grapheme joiners
3541 if ((range
.length
> 1) && ((range
.location
+ range
.length
) < length
)) {
3543 currentIndex
= otherRange
.location
+ otherRange
.length
;
3546 character
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, currentIndex
- 1);
3548 // ??? We're assuming viramas only in BMP
3549 if ((character
!= ZERO_WIDTH_JOINER
) && !_CFStringIsVirama(character
, combClassBMP
)) break;
3551 character
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, currentIndex
);
3553 if (character
== ZERO_WIDTH_JOINER
) character
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, ++currentIndex
);
3555 if (CFUniCharIsSurrogateHighCharacter(character
) && CFUniCharIsSurrogateLowCharacter((otherSurrogate
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, currentIndex
+ 1)))) {
3556 character
= CFUniCharGetLongCharacterForSurrogatePair(character
, otherSurrogate
);
3557 letter
= CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet
, (character
>> 16));
3562 // We only conjoin letters
3563 if (!CFUniCharIsMemberOfBitmap(character
, letter
)) break;
3564 otherRange
= _CFStringInlineBufferGetComposedRange(&stringBuffer
, currentIndex
, type
, bmpBitmap
, csetType
);
3565 currentIndex
= otherRange
.location
+ otherRange
.length
;
3566 } while ((otherRange
.location
+ otherRange
.length
) < length
);
3567 range
.length
= currentIndex
- range
.location
;
3572 // Check if we're part of prefix transcoding hints
3575 currentIndex
= (range
.location
+ range
.length
) - (MAX_TRANSCODING_LENGTH
+ 1);
3576 if (currentIndex
< 0) currentIndex
= 0;
3578 while (currentIndex
<= range
.location
) {
3579 character
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, currentIndex
);
3581 if ((character
& 0x1FFFF0) == 0xF860) { // transcoding hint
3582 otherIndex
= currentIndex
+ __CFTranscodingHintLength
[(character
- 0xF860)] + 1;
3583 if (otherIndex
>= (range
.location
+ range
.length
)) {
3584 if (otherIndex
<= length
) {
3585 range
.location
= currentIndex
;
3586 range
.length
= otherIndex
- currentIndex
;
3597 CFRange
CFStringGetRangeOfComposedCharactersAtIndex(CFStringRef theString
, CFIndex theIndex
) {
3598 return CFStringGetRangeOfCharacterClusterAtIndex(theString
, theIndex
, kCFStringComposedCharacterCluster
);
3602 @function CFStringFindCharacterFromSet
3603 Query the range of characters contained in the specified character set.
3604 @param theString The CFString which is to be searched. If this
3605 parameter is not a valid CFString, the behavior is
3607 @param theSet The CFCharacterSet against which the membership
3608 of characters is checked. If this parameter is not a valid
3609 CFCharacterSet, the behavior is undefined.
3610 @param range The range of characters within the string to search. If
3611 the range location or end point (defined by the location
3612 plus length minus 1) are outside the index space of the
3613 string (0 to N-1 inclusive, where N is the length of the
3614 string), the behavior is undefined. If the range length is
3615 negative, the behavior is undefined. The range may be empty
3616 (length 0), in which case no search is performed.
3617 @param searchOptions The bitwise-or'ed option flags to control
3618 the search behavior. The supported options are
3619 kCFCompareBackwards andkCFCompareAnchored.
3620 If other option flags are specified, the behavior
3622 @param result The pointer to a CFRange supplied by the caller in
3623 which the search result is stored. If a pointer to an invalid
3624 memory is specified, the behavior is undefined.
3625 @result true, if at least a character which is a member of the character
3626 set is found and result is filled, otherwise, false.
3628 #define SURROGATE_START 0xD800
3629 #define SURROGATE_END 0xDFFF
3631 CF_EXPORT Boolean
CFStringFindCharacterFromSet(CFStringRef theString
, CFCharacterSetRef theSet
, CFRange rangeToSearch
, CFStringCompareFlags searchOptions
, CFRange
*result
) {
3632 CFStringInlineBuffer stringBuffer
;
3633 CFCharacterSetInlineBuffer csetBuffer
;
3636 CFIndex fromLoc
, toLoc
, cnt
; // fromLoc and toLoc are inclusive
3637 Boolean found
= false;
3638 Boolean done
= false;
3640 //#warning FIX ME !! Should support kCFCompareNonliteral
3642 if ((rangeToSearch
.location
+ rangeToSearch
.length
> CFStringGetLength(theString
)) || (rangeToSearch
.length
== 0)) return false;
3644 if (searchOptions
& kCFCompareBackwards
) {
3645 fromLoc
= rangeToSearch
.location
+ rangeToSearch
.length
- 1;
3646 toLoc
= rangeToSearch
.location
;
3648 fromLoc
= rangeToSearch
.location
;
3649 toLoc
= rangeToSearch
.location
+ rangeToSearch
.length
- 1;
3651 if (searchOptions
& kCFCompareAnchored
) {
3655 step
= (fromLoc
<= toLoc
) ? 1 : -1;
3658 CFStringInitInlineBuffer(theString
, &stringBuffer
, rangeToSearch
);
3659 CFCharacterSetInitInlineBuffer(theSet
, &csetBuffer
);
3662 ch
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, cnt
- rangeToSearch
.location
);
3663 if ((ch
>= SURROGATE_START
) && (ch
<= SURROGATE_END
)) {
3664 int otherCharIndex
= cnt
+ step
;
3666 if (((step
< 0) && (otherCharIndex
< toLoc
)) || ((step
> 0) && (otherCharIndex
> toLoc
))) {
3670 UniChar lowChar
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, otherCharIndex
- rangeToSearch
.location
);
3672 if (cnt
< otherCharIndex
) {
3679 if (CFUniCharIsSurrogateHighCharacter(highChar
) && CFUniCharIsSurrogateLowCharacter(lowChar
) && CFCharacterSetInlineBufferIsLongCharacterMember(&csetBuffer
, CFUniCharGetLongCharacterForSurrogatePair(highChar
, lowChar
))) {
3680 if (result
) *result
= CFRangeMake((cnt
< otherCharIndex
? cnt
: otherCharIndex
), 2);
3682 } else if (otherCharIndex
== toLoc
) {
3685 cnt
= otherCharIndex
+ step
;
3688 } else if (CFCharacterSetInlineBufferIsLongCharacterMember(&csetBuffer
, ch
)) {
3689 done
= found
= true;
3690 } else if (cnt
== toLoc
) {
3697 if (found
&& result
) *result
= CFRangeMake(cnt
, 1);
3701 /* Line range code */
3703 #define CarriageReturn '\r' /* 0x0d */
3704 #define NewLine '\n' /* 0x0a */
3705 #define NextLine 0x0085
3706 #define LineSeparator 0x2028
3707 #define ParaSeparator 0x2029
3709 CF_INLINE Boolean
isALineSeparatorTypeCharacter(UniChar ch
, Boolean includeLineEndings
) {
3710 if (ch
> CarriageReturn
&& ch
< NextLine
) return false; /* Quick test to cover most chars */
3711 return (ch
== NewLine
|| ch
== CarriageReturn
|| ch
== ParaSeparator
|| (includeLineEndings
&& (ch
== NextLine
|| ch
== LineSeparator
))) ? true : false;
3714 static void __CFStringGetLineOrParagraphBounds(CFStringRef string
, CFRange range
, CFIndex
*lineBeginIndex
, CFIndex
*lineEndIndex
, CFIndex
*contentsEndIndex
, Boolean includeLineEndings
) {
3716 CFStringInlineBuffer buf
;
3719 __CFAssertIsString(string
);
3720 __CFAssertRangeIsInStringBounds(string
, range
.location
, range
.length
);
3722 len
= __CFStrLength(string
);
3724 if (lineBeginIndex
) {
3726 if (range
.location
== 0) {
3729 CFStringInitInlineBuffer(string
, &buf
, CFRangeMake(0, len
));
3730 CFIndex buf_idx
= range
.location
;
3732 /* Take care of the special case where start happens to fall right between \r and \n */
3733 ch
= CFStringGetCharacterFromInlineBuffer(&buf
, buf_idx
);
3735 if ((ch
== NewLine
) && (CFStringGetCharacterFromInlineBuffer(&buf
, buf_idx
) == CarriageReturn
)) {
3742 } else if (isALineSeparatorTypeCharacter(CFStringGetCharacterFromInlineBuffer(&buf
, buf_idx
), includeLineEndings
)) {
3743 start
= buf_idx
+ 1;
3750 *lineBeginIndex
= start
;
3753 /* Now find the ending point */
3754 if (lineEndIndex
|| contentsEndIndex
) {
3755 CFIndex endOfContents
, lineSeparatorLength
= 1; /* 1 by default */
3756 CFStringInitInlineBuffer(string
, &buf
, CFRangeMake(0, len
));
3757 CFIndex buf_idx
= range
.location
+ range
.length
- (range
.length
? 1 : 0);
3758 /* First look at the last char in the range (if the range is zero length, the char after the range) to see if we're already on or within a end of line sequence... */
3759 ch
= __CFStringGetCharacterFromInlineBufferAux(&buf
, buf_idx
);
3760 if (ch
== NewLine
) {
3761 endOfContents
= buf_idx
;
3763 if (__CFStringGetCharacterFromInlineBufferAux(&buf
, buf_idx
) == CarriageReturn
) {
3764 lineSeparatorLength
= 2;
3769 if (isALineSeparatorTypeCharacter(ch
, includeLineEndings
)) {
3770 endOfContents
= buf_idx
; /* This is actually end of contentsRange */
3771 buf_idx
++; /* OK for this to go past the end */
3772 if ((ch
== CarriageReturn
) && (__CFStringGetCharacterFromInlineBufferAux(&buf
, buf_idx
) == NewLine
)) {
3773 lineSeparatorLength
= 2;
3776 } else if (buf_idx
>= len
) {
3777 endOfContents
= len
;
3778 lineSeparatorLength
= 0;
3782 ch
= __CFStringGetCharacterFromInlineBufferAux(&buf
, buf_idx
);
3786 if (contentsEndIndex
) *contentsEndIndex
= endOfContents
;
3787 if (lineEndIndex
) *lineEndIndex
= endOfContents
+ lineSeparatorLength
;
3791 void CFStringGetLineBounds(CFStringRef string
, CFRange range
, CFIndex
*lineBeginIndex
, CFIndex
*lineEndIndex
, CFIndex
*contentsEndIndex
) {
3792 CF_OBJC_FUNCDISPATCH4(__kCFStringTypeID
, void, string
, "getLineStart:end:contentsEnd:forRange:", lineBeginIndex
, lineEndIndex
, contentsEndIndex
, range
);
3793 __CFStringGetLineOrParagraphBounds(string
, range
, lineBeginIndex
, lineEndIndex
, contentsEndIndex
, true);
3796 void CFStringGetParagraphBounds(CFStringRef string
, CFRange range
, CFIndex
*parBeginIndex
, CFIndex
*parEndIndex
, CFIndex
*contentsEndIndex
) {
3797 CF_OBJC_FUNCDISPATCH4(__kCFStringTypeID
, void, string
, "getParagraphStart:end:contentsEnd:forRange:", parBeginIndex
, parEndIndex
, contentsEndIndex
, range
);
3798 __CFStringGetLineOrParagraphBounds(string
, range
, parBeginIndex
, parEndIndex
, contentsEndIndex
, false);
3802 CFStringRef
CFStringCreateByCombiningStrings(CFAllocatorRef alloc
, CFArrayRef array
, CFStringRef separatorString
) {
3804 CFIndex separatorNumByte
;
3805 CFIndex stringCount
= CFArrayGetCount(array
);
3806 Boolean isSepCFString
= !CF_IS_OBJC(__kCFStringTypeID
, separatorString
);
3807 Boolean canBeEightbit
= isSepCFString
&& __CFStrIsEightBit(separatorString
);
3809 CFStringRef otherString
;
3812 const void *separatorContents
= NULL
;
3814 if (stringCount
== 0) {
3815 return CFStringCreateWithCharacters(alloc
, NULL
, 0);
3816 } else if (stringCount
== 1) {
3817 return (CFStringRef
)CFStringCreateCopy(alloc
, (CFStringRef
)CFArrayGetValueAtIndex(array
, 0));
3820 if (alloc
== NULL
) alloc
= __CFGetDefaultAllocator();
3822 numChars
= CFStringGetLength(separatorString
) * (stringCount
- 1);
3823 for (idx
= 0; idx
< stringCount
; idx
++) {
3824 otherString
= (CFStringRef
)CFArrayGetValueAtIndex(array
, idx
);
3825 numChars
+= CFStringGetLength(otherString
);
3826 // canBeEightbit is already false if the separator is an NSString...
3827 if (CF_IS_OBJC(__kCFStringTypeID
, otherString
) || ! __CFStrIsEightBit(otherString
)) canBeEightbit
= false;
3830 buffer
= (uint8_t *)CFAllocatorAllocate(alloc
, canBeEightbit
? ((numChars
+ 1) * sizeof(uint8_t)) : (numChars
* sizeof(UniChar
)), 0);
3831 bufPtr
= (uint8_t *)buffer
;
3832 if (__CFOASafe
) __CFSetLastAllocationEventName(buffer
, "CFString (store)");
3833 separatorNumByte
= CFStringGetLength(separatorString
) * (canBeEightbit
? sizeof(uint8_t) : sizeof(UniChar
));
3835 for (idx
= 0; idx
< stringCount
; idx
++) {
3836 if (idx
) { // add separator here unless first string
3837 if (separatorContents
) {
3838 memmove(bufPtr
, separatorContents
, separatorNumByte
);
3840 if (!isSepCFString
) { // NSString
3841 CFStringGetCharacters(separatorString
, CFRangeMake(0, CFStringGetLength(separatorString
)), (UniChar
*)bufPtr
);
3842 } else if (canBeEightbit
) {
3843 memmove(bufPtr
, (const uint8_t *)__CFStrContents(separatorString
) + __CFStrSkipAnyLengthByte(separatorString
), separatorNumByte
);
3845 __CFStrConvertBytesToUnicode((uint8_t *)__CFStrContents(separatorString
) + __CFStrSkipAnyLengthByte(separatorString
), (UniChar
*)bufPtr
, __CFStrLength(separatorString
));
3847 separatorContents
= bufPtr
;
3849 bufPtr
+= separatorNumByte
;
3852 otherString
= (CFStringRef
)CFArrayGetValueAtIndex(array
, idx
);
3853 if (CF_IS_OBJC(__kCFStringTypeID
, otherString
)) {
3854 CFIndex otherLength
= CFStringGetLength(otherString
);
3855 CFStringGetCharacters(otherString
, CFRangeMake(0, otherLength
), (UniChar
*)bufPtr
);
3856 bufPtr
+= otherLength
* sizeof(UniChar
);
3858 const uint8_t * otherContents
= (const uint8_t *)__CFStrContents(otherString
);
3859 CFIndex otherNumByte
= __CFStrLength2(otherString
, otherContents
) * (canBeEightbit
? sizeof(uint8_t) : sizeof(UniChar
));
3861 if (canBeEightbit
|| __CFStrIsUnicode(otherString
)) {
3862 memmove(bufPtr
, otherContents
+ __CFStrSkipAnyLengthByte(otherString
), otherNumByte
);
3864 __CFStrConvertBytesToUnicode(otherContents
+ __CFStrSkipAnyLengthByte(otherString
), (UniChar
*)bufPtr
, __CFStrLength2(otherString
, otherContents
));
3866 bufPtr
+= otherNumByte
;
3869 if (canBeEightbit
) *bufPtr
= 0; // NULL byte;
3871 return canBeEightbit
?
3872 CFStringCreateWithCStringNoCopy(alloc
, (const char*)buffer
, __CFStringGetEightBitStringEncoding(), alloc
) :
3873 CFStringCreateWithCharactersNoCopy(alloc
, (UniChar
*)buffer
, numChars
, alloc
);
3877 CFArrayRef
CFStringCreateArrayBySeparatingStrings(CFAllocatorRef alloc
, CFStringRef string
, CFStringRef separatorString
) {
3878 CFArrayRef separatorRanges
;
3879 CFIndex length
= CFStringGetLength(string
);
3880 /* No objc dispatch needed here since CFStringCreateArrayWithFindResults() works with both CFString and NSString */
3881 if (!(separatorRanges
= CFStringCreateArrayWithFindResults(alloc
, string
, separatorString
, CFRangeMake(0, length
), 0))) {
3882 return CFArrayCreate(alloc
, (const void **)&string
, 1, & kCFTypeArrayCallBacks
);
3885 CFIndex count
= CFArrayGetCount(separatorRanges
);
3886 CFIndex startIndex
= 0;
3888 CFMutableArrayRef array
= CFArrayCreateMutable(alloc
, count
+ 2, & kCFTypeArrayCallBacks
);
3889 const CFRange
*currentRange
;
3890 CFStringRef substring
;
3892 for (idx
= 0;idx
< count
;idx
++) {
3893 currentRange
= (const CFRange
*)CFArrayGetValueAtIndex(separatorRanges
, idx
);
3894 numChars
= currentRange
->location
- startIndex
;
3895 substring
= CFStringCreateWithSubstring(alloc
, string
, CFRangeMake(startIndex
, numChars
));
3896 CFArrayAppendValue(array
, substring
);
3897 if (!_CFAllocatorIsGCRefZero(alloc
)) CFRelease(substring
);
3898 startIndex
= currentRange
->location
+ currentRange
->length
;
3900 substring
= CFStringCreateWithSubstring(alloc
, string
, CFRangeMake(startIndex
, length
- startIndex
));
3901 CFArrayAppendValue(array
, substring
);
3902 if (!_CFAllocatorIsGCRefZero(alloc
)) CFRelease(substring
);
3904 if (!_CFAllocatorIsGCRefZero(alloc
)) CFRelease(separatorRanges
);
3910 CFStringRef
CFStringCreateFromExternalRepresentation(CFAllocatorRef alloc
, CFDataRef data
, CFStringEncoding encoding
) {
3911 return CFStringCreateWithBytes(alloc
, CFDataGetBytePtr(data
), CFDataGetLength(data
), encoding
, true);
3915 CFDataRef
CFStringCreateExternalRepresentation(CFAllocatorRef alloc
, CFStringRef string
, CFStringEncoding encoding
, uint8_t lossByte
) {
3917 CFIndex guessedByteLength
;
3922 if (CF_IS_OBJC(__kCFStringTypeID
, string
)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
3923 length
= CFStringGetLength(string
);
3925 __CFAssertIsString(string
);
3926 length
= __CFStrLength(string
);
3927 if (__CFStrIsEightBit(string
) && ((__CFStringGetEightBitStringEncoding() == encoding
) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII
&& __CFStringEncodingIsSupersetOfASCII(encoding
)))) { // Requested encoding is equal to the encoding in string
3928 return CFDataCreate(alloc
, ((uint8_t *)__CFStrContents(string
) + __CFStrSkipAnyLengthByte(string
)), __CFStrLength(string
));
3932 if (alloc
== NULL
) alloc
= __CFGetDefaultAllocator();
3934 if (((encoding
& 0x0FFF) == kCFStringEncodingUnicode
) && ((encoding
== kCFStringEncodingUnicode
) || ((encoding
> kCFStringEncodingUTF8
) && (encoding
<= kCFStringEncodingUTF32LE
)))) {
3935 guessedByteLength
= (length
+ 1) * ((((encoding
>> 26) & 2) == 0) ? sizeof(UTF16Char
) : sizeof(UTF32Char
)); // UTF32 format has the bit set
3936 } else if (((guessedByteLength
= CFStringGetMaximumSizeForEncoding(length
, encoding
)) > length
) && !CF_IS_OBJC(__kCFStringTypeID
, string
)) { // Multi byte encoding
3937 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
3938 if (__CFStrIsUnicode(string
)) {
3939 CFIndex aLength
= CFStringEncodingByteLengthForCharacters(encoding
, kCFStringEncodingPrependBOM
, __CFStrContents(string
), __CFStrLength(string
));
3940 if (aLength
> 0) guessedByteLength
= aLength
;
3943 result
= __CFStringEncodeByteStream(string
, 0, length
, true, encoding
, lossByte
, NULL
, LONG_MAX
, &guessedByteLength
);
3944 // if result == length, we always succeed
3945 // otherwise, if result == 0, we fail
3946 // otherwise, if there was a lossByte but still result != length, we fail
3947 if ((result
!= length
) && (!result
|| !lossByte
)) return NULL
;
3948 if (guessedByteLength
== length
&& __CFStrIsEightBit(string
) && __CFStringEncodingIsSupersetOfASCII(encoding
)) { // It's all ASCII !!
3949 return CFDataCreate(alloc
, ((uint8_t *)__CFStrContents(string
) + __CFStrSkipAnyLengthByte(string
)), __CFStrLength(string
));
3951 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
3955 bytes
= (uint8_t *)CFAllocatorAllocate(alloc
, guessedByteLength
, 0);
3956 if (__CFOASafe
) __CFSetLastAllocationEventName(bytes
, "CFData (store)");
3958 result
= __CFStringEncodeByteStream(string
, 0, length
, true, encoding
, lossByte
, bytes
, guessedByteLength
, &usedLength
);
3960 if ((result
!= length
) && (!result
|| !lossByte
)) { // see comment above about what this means
3961 CFAllocatorDeallocate(alloc
, bytes
);
3965 return CFDataCreateWithBytesNoCopy(alloc
, (uint8_t *)bytes
, usedLength
, alloc
);
3969 CFStringEncoding
CFStringGetSmallestEncoding(CFStringRef str
) {
3971 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID
, CFStringEncoding
, str
, "_smallestEncodingInCFStringEncoding");
3972 __CFAssertIsString(str
);
3974 if (__CFStrIsEightBit(str
)) return __CFStringGetEightBitStringEncoding();
3975 len
= __CFStrLength(str
);
3976 if (__CFStringEncodeByteStream(str
, 0, len
, false, __CFStringGetEightBitStringEncoding(), 0, NULL
, LONG_MAX
, NULL
) == len
) return __CFStringGetEightBitStringEncoding();
3977 if ((__CFStringGetEightBitStringEncoding() != __CFStringGetSystemEncoding()) && (__CFStringEncodeByteStream(str
, 0, len
, false, __CFStringGetSystemEncoding(), 0, NULL
, LONG_MAX
, NULL
) == len
)) return __CFStringGetSystemEncoding();
3978 return kCFStringEncodingUnicode
; /* ??? */
3982 CFStringEncoding
CFStringGetFastestEncoding(CFStringRef str
) {
3983 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID
, CFStringEncoding
, str
, "_fastestEncodingInCFStringEncoding");
3984 __CFAssertIsString(str
);
3985 return __CFStrIsEightBit(str
) ? __CFStringGetEightBitStringEncoding() : kCFStringEncodingUnicode
; /* ??? */
3989 SInt32
CFStringGetIntValue(CFStringRef str
) {
3993 CFStringInlineBuffer buf
;
3994 CFStringInitInlineBuffer(str
, &buf
, CFRangeMake(0, CFStringGetLength(str
)));
3995 success
= __CFStringScanInteger(&buf
, NULL
, &idx
, false, &result
);
3996 return success
? result
: 0;
4000 double CFStringGetDoubleValue(CFStringRef str
) {
4004 CFStringInlineBuffer buf
;
4005 CFStringInitInlineBuffer(str
, &buf
, CFRangeMake(0, CFStringGetLength(str
)));
4006 success
= __CFStringScanDouble(&buf
, NULL
, &idx
, &result
);
4007 return success
? result
: 0.0;
4011 /*** Mutable functions... ***/
4013 void CFStringSetExternalCharactersNoCopy(CFMutableStringRef string
, UniChar
*chars
, CFIndex length
, CFIndex capacity
) {
4014 __CFAssertIsNotNegative(length
);
4015 __CFAssertIsStringAndExternalMutable(string
);
4016 CFAssert4((length
<= capacity
) && ((capacity
== 0) || ((capacity
> 0) && chars
)), __kCFLogAssertion
, "%s(): Invalid args: characters %p length %d capacity %d", __PRETTY_FUNCTION__
, chars
, length
, capacity
);
4017 __CFStrSetContentPtr(string
, chars
);
4018 __CFStrSetExplicitLength(string
, length
);
4019 __CFStrSetCapacity(string
, capacity
* sizeof(UniChar
));
4020 __CFStrSetCapacityProvidedExternally(string
);
4025 void CFStringInsert(CFMutableStringRef str
, CFIndex idx
, CFStringRef insertedStr
) {
4026 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID
, void, str
, "insertString:atIndex:", insertedStr
, idx
);
4027 __CFAssertIsStringAndMutable(str
);
4028 CFAssert3(idx
>= 0 && idx
<= __CFStrLength(str
), __kCFLogAssertion
, "%s(): string index %d out of bounds (length %d)", __PRETTY_FUNCTION__
, idx
, __CFStrLength(str
));
4029 __CFStringReplace(str
, CFRangeMake(idx
, 0), insertedStr
);
4033 void CFStringDelete(CFMutableStringRef str
, CFRange range
) {
4034 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID
, void, str
, "deleteCharactersInRange:", range
);
4035 __CFAssertIsStringAndMutable(str
);
4036 __CFAssertRangeIsInStringBounds(str
, range
.location
, range
.length
);
4037 __CFStringChangeSize(str
, range
, 0, false);
4041 void CFStringReplace(CFMutableStringRef str
, CFRange range
, CFStringRef replacement
) {
4042 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID
, void, str
, "replaceCharactersInRange:withString:", range
, replacement
);
4043 __CFAssertIsStringAndMutable(str
);
4044 __CFAssertRangeIsInStringBounds(str
, range
.location
, range
.length
);
4045 __CFStringReplace(str
, range
, replacement
);
4049 void CFStringReplaceAll(CFMutableStringRef str
, CFStringRef replacement
) {
4050 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID
, void, str
, "setString:", replacement
);
4051 __CFAssertIsStringAndMutable(str
);
4052 __CFStringReplace(str
, CFRangeMake(0, __CFStrLength(str
)), replacement
);
4056 void CFStringAppend(CFMutableStringRef str
, CFStringRef appended
) {
4057 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID
, void, str
, "appendString:", appended
);
4058 __CFAssertIsStringAndMutable(str
);
4059 __CFStringReplace(str
, CFRangeMake(__CFStrLength(str
), 0), appended
);
4063 void CFStringAppendCharacters(CFMutableStringRef str
, const UniChar
*chars
, CFIndex appendedLength
) {
4064 CFIndex strLength
, idx
;
4066 __CFAssertIsNotNegative(appendedLength
);
4068 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID
, void, str
, "appendCharacters:length:", chars
, appendedLength
);
4070 __CFAssertIsStringAndMutable(str
);
4072 strLength
= __CFStrLength(str
);
4073 if (__CFStrIsUnicode(str
)) {
4074 __CFStringChangeSize(str
, CFRangeMake(strLength
, 0), appendedLength
, true);
4075 memmove((UniChar
*)__CFStrContents(str
) + strLength
, chars
, appendedLength
* sizeof(UniChar
));
4078 bool isASCII
= true;
4079 for (idx
= 0; isASCII
&& idx
< appendedLength
; idx
++) isASCII
= (chars
[idx
] < 0x80);
4080 __CFStringChangeSize(str
, CFRangeMake(strLength
, 0), appendedLength
, !isASCII
);
4082 memmove((UniChar
*)__CFStrContents(str
) + strLength
, chars
, appendedLength
* sizeof(UniChar
));
4084 contents
= (uint8_t *)__CFStrContents(str
) + strLength
+ __CFStrSkipAnyLengthByte(str
);
4085 for (idx
= 0; idx
< appendedLength
; idx
++) contents
[idx
] = (uint8_t)chars
[idx
];
4091 void __CFStringAppendBytes(CFMutableStringRef str
, const char *cStr
, CFIndex appendedLength
, CFStringEncoding encoding
) {
4092 Boolean appendedIsUnicode
= false;
4093 Boolean freeCStrWhenDone
= false;
4094 Boolean demoteAppendedUnicode
= false;
4095 CFVarWidthCharBuffer vBuf
;
4097 __CFAssertIsNotNegative(appendedLength
);
4099 if (encoding
== kCFStringEncodingASCII
|| encoding
== __CFStringGetEightBitStringEncoding()) {
4100 // appendedLength now denotes length in UniChars
4101 } else if (encoding
== kCFStringEncodingUnicode
) {
4102 UniChar
*chars
= (UniChar
*)cStr
;
4103 CFIndex idx
, length
= appendedLength
/ sizeof(UniChar
);
4104 bool isASCII
= true;
4105 for (idx
= 0; isASCII
&& idx
< length
; idx
++) isASCII
= (chars
[idx
] < 0x80);
4107 appendedIsUnicode
= true;
4109 demoteAppendedUnicode
= true;
4111 appendedLength
= length
;
4113 Boolean usingPassedInMemory
= false;
4115 vBuf
.allocator
= __CFGetDefaultAllocator(); // We don't want to use client's allocator for temp stuff
4116 vBuf
.chars
.unicode
= NULL
; // This will cause the decode function to allocate memory if necessary
4118 if (!__CFStringDecodeByteStream3((const uint8_t *)cStr
, appendedLength
, encoding
, __CFStrIsUnicode(str
), &vBuf
, &usingPassedInMemory
, 0)) {
4119 CFAssert1(0, __kCFLogAssertion
, "Supplied bytes could not be converted specified encoding %d", encoding
);
4123 // If not ASCII, appendedLength now denotes length in UniChars
4124 appendedLength
= vBuf
.numChars
;
4125 appendedIsUnicode
= !vBuf
.isASCII
;
4126 cStr
= (const char *)vBuf
.chars
.ascii
;
4127 freeCStrWhenDone
= !usingPassedInMemory
&& vBuf
.shouldFreeChars
;
4130 if (CF_IS_OBJC(__kCFStringTypeID
, str
)) {
4131 if (!appendedIsUnicode
&& !demoteAppendedUnicode
) {
4132 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID
, void, str
, "_cfAppendCString:length:", cStr
, appendedLength
);
4134 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID
, void, str
, "appendCharacters:length:", cStr
, appendedLength
);
4138 __CFAssertIsStringAndMutable(str
);
4139 strLength
= __CFStrLength(str
);
4141 __CFStringChangeSize(str
, CFRangeMake(strLength
, 0), appendedLength
, appendedIsUnicode
|| __CFStrIsUnicode(str
));
4143 if (__CFStrIsUnicode(str
)) {
4144 UniChar
*contents
= (UniChar
*)__CFStrContents(str
);
4145 if (appendedIsUnicode
) {
4146 memmove(contents
+ strLength
, cStr
, appendedLength
* sizeof(UniChar
));
4148 __CFStrConvertBytesToUnicode((const uint8_t *)cStr
, contents
+ strLength
, appendedLength
);
4151 if (demoteAppendedUnicode
) {
4152 UniChar
*chars
= (UniChar
*)cStr
;
4154 uint8_t *contents
= (uint8_t *)__CFStrContents(str
) + strLength
+ __CFStrSkipAnyLengthByte(str
);
4155 for (idx
= 0; idx
< appendedLength
; idx
++) contents
[idx
] = (uint8_t)chars
[idx
];
4157 uint8_t *contents
= (uint8_t *)__CFStrContents(str
);
4158 memmove(contents
+ strLength
+ __CFStrSkipAnyLengthByte(str
), cStr
, appendedLength
);
4163 if (freeCStrWhenDone
) CFAllocatorDeallocate(__CFGetDefaultAllocator(), (void *)cStr
);
4166 void CFStringAppendPascalString(CFMutableStringRef str
, ConstStringPtr pStr
, CFStringEncoding encoding
) {
4167 __CFStringAppendBytes(str
, (const char *)(pStr
+ 1), (CFIndex
)*pStr
, encoding
);
4170 void CFStringAppendCString(CFMutableStringRef str
, const char *cStr
, CFStringEncoding encoding
) {
4171 __CFStringAppendBytes(str
, cStr
, strlen(cStr
), encoding
);
4175 void CFStringAppendFormat(CFMutableStringRef str
, CFDictionaryRef formatOptions
, CFStringRef format
, ...) {
4178 va_start(argList
, format
);
4179 CFStringAppendFormatAndArguments(str
, formatOptions
, format
, argList
);
4184 CFIndex
CFStringFindAndReplace(CFMutableStringRef string
, CFStringRef stringToFind
, CFStringRef replacementString
, CFRange rangeToSearch
, CFStringCompareFlags compareOptions
) {
4185 CF_OBJC_FUNCDISPATCH4(__kCFStringTypeID
, CFIndex
, string
, "replaceOccurrencesOfString:withString:options:range:", stringToFind
, replacementString
, compareOptions
, rangeToSearch
);
4187 Boolean backwards
= ((compareOptions
& kCFCompareBackwards
) != 0);
4188 UInt32 endIndex
= rangeToSearch
.location
+ rangeToSearch
.length
;
4189 #define MAX_RANGES_ON_STACK (1000 / sizeof(CFRange))
4190 CFRange rangeBuffer
[MAX_RANGES_ON_STACK
]; // Used to avoid allocating memory
4191 CFRange
*ranges
= rangeBuffer
;
4192 CFIndex foundCount
= 0;
4193 CFIndex capacity
= MAX_RANGES_ON_STACK
;
4195 __CFAssertIsStringAndMutable(string
);
4196 __CFAssertRangeIsInStringBounds(string
, rangeToSearch
.location
, rangeToSearch
.length
);
4198 // Note: This code is very similar to the one in CFStringCreateArrayWithFindResults().
4199 while ((rangeToSearch
.length
> 0) && CFStringFindWithOptions(string
, stringToFind
, rangeToSearch
, compareOptions
, &foundRange
)) {
4200 // Determine the next range
4202 rangeToSearch
.length
= foundRange
.location
- rangeToSearch
.location
;
4204 rangeToSearch
.location
= foundRange
.location
+ foundRange
.length
;
4205 rangeToSearch
.length
= endIndex
- rangeToSearch
.location
;
4208 // If necessary, grow the array
4209 if (foundCount
>= capacity
) {
4210 bool firstAlloc
= (ranges
== rangeBuffer
) ? true : false;
4211 capacity
= (capacity
+ 4) * 2;
4212 // Note that reallocate with NULL previous pointer is same as allocate
4213 ranges
= (CFRange
*)CFAllocatorReallocate(kCFAllocatorSystemDefault
, firstAlloc
? NULL
: ranges
, capacity
* sizeof(CFRange
), 0);
4214 if (firstAlloc
) memmove(ranges
, rangeBuffer
, MAX_RANGES_ON_STACK
* sizeof(CFRange
));
4216 ranges
[foundCount
] = foundRange
;
4220 if (foundCount
> 0) {
4221 if (backwards
) { // Reorder the ranges to be incrementing (better to do this here, then to check other places)
4223 int tail
= foundCount
- 1;
4224 while (head
< tail
) {
4225 CFRange temp
= ranges
[head
];
4226 ranges
[head
] = ranges
[tail
];
4227 ranges
[tail
] = temp
;
4232 __CFStringReplaceMultiple(string
, ranges
, foundCount
, replacementString
);
4233 if (ranges
!= rangeBuffer
) CFAllocatorDeallocate(kCFAllocatorSystemDefault
, ranges
);
4240 // This function is here for NSString purposes
4241 // It allows checking for mutability before mutating; this allows NSString to catch invalid mutations
4243 int __CFStringCheckAndReplace(CFMutableStringRef str
, CFRange range
, CFStringRef replacement
) {
4244 if (!__CFStrIsMutable(str
)) return _CFStringErrNotMutable
; // These three ifs are always here, for NSString usage
4245 if (!replacement
&& __CFStringNoteErrors()) return _CFStringErrNilArg
;
4246 // This attempts to catch bad ranges including those described in 3375535 (-1,1)
4247 unsigned long endOfRange
= (unsigned long)(range
.location
) + (unsigned long)(range
.length
); // NSRange uses unsigned quantities, hence the casting
4248 if (((endOfRange
> (unsigned long)__CFStrLength(str
)) || (endOfRange
< (unsigned long)(range
.location
))) && __CFStringNoteErrors()) return _CFStringErrBounds
;
4250 __CFAssertIsStringAndMutable(str
);
4251 __CFAssertRangeIsInStringBounds(str
, range
.location
, range
.length
);
4252 __CFStringReplace(str
, range
, replacement
);
4253 return _CFStringErrNone
;
4256 // This function determines whether errors which would cause string exceptions should
4257 // be ignored or not
4259 Boolean
__CFStringNoteErrors(void) {
4265 void CFStringPad(CFMutableStringRef string
, CFStringRef padString
, CFIndex length
, CFIndex indexIntoPad
) {
4266 CFIndex originalLength
;
4268 __CFAssertIsNotNegative(length
);
4269 __CFAssertIsNotNegative(indexIntoPad
);
4271 CF_OBJC_FUNCDISPATCH3(__kCFStringTypeID
, void, string
, "_cfPad:length:padIndex:", padString
, length
, indexIntoPad
);
4273 __CFAssertIsStringAndMutable(string
);
4275 originalLength
= __CFStrLength(string
);
4276 if (length
< originalLength
) {
4277 __CFStringChangeSize(string
, CFRangeMake(length
, originalLength
- length
), 0, false);
4278 } else if (originalLength
< length
) {
4282 CFIndex padStringLength
;
4284 CFIndex padRemaining
= length
- originalLength
;
4286 if (CF_IS_OBJC(__kCFStringTypeID
, padString
)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
4287 padStringLength
= CFStringGetLength(padString
);
4288 isUnicode
= true; /* !!! Bad for now */
4290 __CFAssertIsString(padString
);
4291 padStringLength
= __CFStrLength(padString
);
4292 isUnicode
= __CFStrIsUnicode(string
) || __CFStrIsUnicode(padString
);
4295 charSize
= isUnicode
? sizeof(UniChar
) : sizeof(uint8_t);
4297 __CFStringChangeSize(string
, CFRangeMake(originalLength
, 0), padRemaining
, isUnicode
);
4299 contents
= (uint8_t *)__CFStrContents(string
) + charSize
* originalLength
+ __CFStrSkipAnyLengthByte(string
);
4300 padLength
= padStringLength
- indexIntoPad
;
4301 padLength
= padRemaining
< padLength
? padRemaining
: padLength
;
4303 while (padRemaining
> 0) {
4305 CFStringGetCharacters(padString
, CFRangeMake(indexIntoPad
, padLength
), (UniChar
*)contents
);
4307 CFStringGetBytes(padString
, CFRangeMake(indexIntoPad
, padLength
), __CFStringGetEightBitStringEncoding(), 0, false, contents
, padRemaining
* charSize
, NULL
);
4309 contents
+= padLength
* charSize
;
4310 padRemaining
-= padLength
;
4312 padLength
= padRemaining
< padLength
? padRemaining
: padStringLength
;
4317 void CFStringTrim(CFMutableStringRef string
, CFStringRef trimString
) {
4319 CFIndex newStartIndex
;
4322 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID
, void, string
, "_cfTrim:", trimString
);
4324 __CFAssertIsStringAndMutable(string
);
4325 __CFAssertIsString(trimString
);
4328 length
= __CFStrLength(string
);
4330 while (CFStringFindWithOptions(string
, trimString
, CFRangeMake(newStartIndex
, length
- newStartIndex
), kCFCompareAnchored
, &range
)) {
4331 newStartIndex
= range
.location
+ range
.length
;
4334 if (newStartIndex
< length
) {
4335 CFIndex charSize
= __CFStrIsUnicode(string
) ? sizeof(UniChar
) : sizeof(uint8_t);
4336 uint8_t *contents
= (uint8_t *)__CFStrContents(string
) + __CFStrSkipAnyLengthByte(string
);
4338 length
-= newStartIndex
;
4339 if (__CFStrLength(trimString
) < length
) {
4340 while (CFStringFindWithOptions(string
, trimString
, CFRangeMake(newStartIndex
, length
), kCFCompareAnchored
|kCFCompareBackwards
, &range
)) {
4341 length
= range
.location
- newStartIndex
;
4344 memmove(contents
, contents
+ newStartIndex
* charSize
, length
* charSize
);
4345 __CFStringChangeSize(string
, CFRangeMake(length
, __CFStrLength(string
) - length
), 0, false);
4346 } else { // Only trimString in string, trim all
4347 __CFStringChangeSize(string
, CFRangeMake(0, length
), 0, false);
4351 void CFStringTrimWhitespace(CFMutableStringRef string
) {
4352 CFIndex newStartIndex
;
4354 CFStringInlineBuffer buffer
;
4356 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID
, void, string
, "_cfTrimWS");
4358 __CFAssertIsStringAndMutable(string
);
4361 length
= __CFStrLength(string
);
4363 CFStringInitInlineBuffer(string
, &buffer
, CFRangeMake(0, length
));
4364 CFIndex buffer_idx
= 0;
4366 while (buffer_idx
< length
&& CFUniCharIsMemberOf(__CFStringGetCharacterFromInlineBufferQuick(&buffer
, buffer_idx
), kCFUniCharWhitespaceAndNewlineCharacterSet
))
4368 newStartIndex
= buffer_idx
;
4370 if (newStartIndex
< length
) {
4371 uint8_t *contents
= (uint8_t *)__CFStrContents(string
) + __CFStrSkipAnyLengthByte(string
);
4372 CFIndex charSize
= (__CFStrIsUnicode(string
) ? sizeof(UniChar
) : sizeof(uint8_t));
4374 buffer_idx
= length
- 1;
4375 while (0 <= buffer_idx
&& CFUniCharIsMemberOf(__CFStringGetCharacterFromInlineBufferQuick(&buffer
, buffer_idx
), kCFUniCharWhitespaceAndNewlineCharacterSet
))
4377 length
= buffer_idx
- newStartIndex
+ 1;
4379 memmove(contents
, contents
+ newStartIndex
* charSize
, length
* charSize
);
4380 __CFStringChangeSize(string
, CFRangeMake(length
, __CFStrLength(string
) - length
), 0, false);
4381 } else { // Whitespace only string
4382 __CFStringChangeSize(string
, CFRangeMake(0, length
), 0, false);
4386 void CFStringLowercase(CFMutableStringRef string
, CFLocaleRef locale
) {
4387 CFIndex currentIndex
= 0;
4389 const uint8_t *langCode
;
4390 Boolean isEightBit
= __CFStrIsEightBit(string
);
4392 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID
, void, string
, "_cfLowercase:", locale
);
4394 __CFAssertIsStringAndMutable(string
);
4396 length
= __CFStrLength(string
);
4398 langCode
= (const uint8_t *)(_CFCanUseLocale(locale
) ? _CFStrGetLanguageIdentifierForLocale(locale
) : NULL
);
4400 if (!langCode
&& isEightBit
) {
4401 uint8_t *contents
= (uint8_t *)__CFStrContents(string
) + __CFStrSkipAnyLengthByte(string
);
4402 for (;currentIndex
< length
;currentIndex
++) {
4403 if (contents
[currentIndex
] >= 'A' && contents
[currentIndex
] <= 'Z') {
4404 contents
[currentIndex
] += 'a' - 'A';
4405 } else if (contents
[currentIndex
] > 127) {
4411 if (currentIndex
< length
) {
4412 UTF16Char
*contents
;
4413 UniChar mappedCharacters
[MAX_CASE_MAPPING_BUF
];
4414 CFIndex mappedLength
;
4415 UTF32Char currentChar
;
4418 if (isEightBit
) __CFStringChangeSize(string
, CFRangeMake(0, 0), 0, true);
4420 contents
= (UniChar
*)__CFStrContents(string
);
4422 for (;currentIndex
< length
;currentIndex
++) {
4424 if (CFUniCharIsSurrogateHighCharacter(contents
[currentIndex
]) && (currentIndex
+ 1 < length
) && CFUniCharIsSurrogateLowCharacter(contents
[currentIndex
+ 1])) {
4425 currentChar
= CFUniCharGetLongCharacterForSurrogatePair(contents
[currentIndex
], contents
[currentIndex
+ 1]);
4427 currentChar
= contents
[currentIndex
];
4429 flags
= ((langCode
|| (currentChar
== 0x03A3)) ? CFUniCharGetConditionalCaseMappingFlags(currentChar
, contents
, currentIndex
, length
, kCFUniCharToLowercase
, langCode
, flags
) : 0);
4431 mappedLength
= CFUniCharMapCaseTo(currentChar
, mappedCharacters
, MAX_CASE_MAPPING_BUF
, kCFUniCharToLowercase
, flags
, langCode
);
4432 if (mappedLength
> 0) contents
[currentIndex
] = *mappedCharacters
;
4434 if (currentChar
> 0xFFFF) { // Non-BMP char
4435 switch (mappedLength
) {
4437 __CFStringChangeSize(string
, CFRangeMake(currentIndex
, 2), 0, true);
4438 contents
= (UniChar
*)__CFStrContents(string
);
4443 __CFStringChangeSize(string
, CFRangeMake(currentIndex
+ 1, 1), 0, true);
4444 contents
= (UniChar
*)__CFStrContents(string
);
4449 contents
[++currentIndex
] = mappedCharacters
[1];
4453 --mappedLength
; // Skip the current char
4454 __CFStringChangeSize(string
, CFRangeMake(currentIndex
+ 1, 0), mappedLength
- 1, true);
4455 contents
= (UniChar
*)__CFStrContents(string
);
4456 memmove(contents
+ currentIndex
+ 1, mappedCharacters
+ 1, mappedLength
* sizeof(UniChar
));
4457 length
+= (mappedLength
- 1);
4458 currentIndex
+= mappedLength
;
4461 } else if (mappedLength
== 0) {
4462 __CFStringChangeSize(string
, CFRangeMake(currentIndex
, 1), 0, true);
4463 contents
= (UniChar
*)__CFStrContents(string
);
4465 } else if (mappedLength
> 1) {
4466 --mappedLength
; // Skip the current char
4467 __CFStringChangeSize(string
, CFRangeMake(currentIndex
+ 1, 0), mappedLength
, true);
4468 contents
= (UniChar
*)__CFStrContents(string
);
4469 memmove(contents
+ currentIndex
+ 1, mappedCharacters
+ 1, mappedLength
* sizeof(UniChar
));
4470 length
+= mappedLength
;
4471 currentIndex
+= mappedLength
;
4477 void CFStringUppercase(CFMutableStringRef string
, CFLocaleRef locale
) {
4478 CFIndex currentIndex
= 0;
4480 const uint8_t *langCode
;
4481 Boolean isEightBit
= __CFStrIsEightBit(string
);
4483 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID
, void, string
, "_cfUppercase:", locale
);
4485 __CFAssertIsStringAndMutable(string
);
4487 length
= __CFStrLength(string
);
4489 langCode
= (const uint8_t *)(_CFCanUseLocale(locale
) ? _CFStrGetLanguageIdentifierForLocale(locale
) : NULL
);
4491 if (!langCode
&& isEightBit
) {
4492 uint8_t *contents
= (uint8_t *)__CFStrContents(string
) + __CFStrSkipAnyLengthByte(string
);
4493 for (;currentIndex
< length
;currentIndex
++) {
4494 if (contents
[currentIndex
] >= 'a' && contents
[currentIndex
] <= 'z') {
4495 contents
[currentIndex
] -= 'a' - 'A';
4496 } else if (contents
[currentIndex
] > 127) {
4502 if (currentIndex
< length
) {
4504 UniChar mappedCharacters
[MAX_CASE_MAPPING_BUF
];
4505 CFIndex mappedLength
;
4506 UTF32Char currentChar
;
4509 if (isEightBit
) __CFStringChangeSize(string
, CFRangeMake(0, 0), 0, true);
4511 contents
= (UniChar
*)__CFStrContents(string
);
4513 for (;currentIndex
< length
;currentIndex
++) {
4514 if (CFUniCharIsSurrogateHighCharacter(contents
[currentIndex
]) && (currentIndex
+ 1 < length
) && CFUniCharIsSurrogateLowCharacter(contents
[currentIndex
+ 1])) {
4515 currentChar
= CFUniCharGetLongCharacterForSurrogatePair(contents
[currentIndex
], contents
[currentIndex
+ 1]);
4517 currentChar
= contents
[currentIndex
];
4520 flags
= (langCode
? CFUniCharGetConditionalCaseMappingFlags(currentChar
, contents
, currentIndex
, length
, kCFUniCharToUppercase
, langCode
, flags
) : 0);
4522 mappedLength
= CFUniCharMapCaseTo(currentChar
, mappedCharacters
, MAX_CASE_MAPPING_BUF
, kCFUniCharToUppercase
, flags
, langCode
);
4523 if (mappedLength
> 0) contents
[currentIndex
] = *mappedCharacters
;
4525 if (currentChar
> 0xFFFF) { // Non-BMP char
4526 switch (mappedLength
) {
4528 __CFStringChangeSize(string
, CFRangeMake(currentIndex
, 2), 0, true);
4529 contents
= (UniChar
*)__CFStrContents(string
);
4534 __CFStringChangeSize(string
, CFRangeMake(currentIndex
+ 1, 1), 0, true);
4535 contents
= (UniChar
*)__CFStrContents(string
);
4540 contents
[++currentIndex
] = mappedCharacters
[1];
4544 --mappedLength
; // Skip the current char
4545 __CFStringChangeSize(string
, CFRangeMake(currentIndex
+ 1, 0), mappedLength
- 1, true);
4546 contents
= (UniChar
*)__CFStrContents(string
);
4547 memmove(contents
+ currentIndex
+ 1, mappedCharacters
+ 1, mappedLength
* sizeof(UniChar
));
4548 length
+= (mappedLength
- 1);
4549 currentIndex
+= mappedLength
;
4552 } else if (mappedLength
== 0) {
4553 __CFStringChangeSize(string
, CFRangeMake(currentIndex
, 1), 0, true);
4554 contents
= (UniChar
*)__CFStrContents(string
);
4556 } else if (mappedLength
> 1) {
4557 --mappedLength
; // Skip the current char
4558 __CFStringChangeSize(string
, CFRangeMake(currentIndex
+ 1, 0), mappedLength
, true);
4559 contents
= (UniChar
*)__CFStrContents(string
);
4560 memmove(contents
+ currentIndex
+ 1, mappedCharacters
+ 1, mappedLength
* sizeof(UniChar
));
4561 length
+= mappedLength
;
4562 currentIndex
+= mappedLength
;
4569 void CFStringCapitalize(CFMutableStringRef string
, CFLocaleRef locale
) {
4570 CFIndex currentIndex
= 0;
4572 const uint8_t *langCode
;
4573 Boolean isEightBit
= __CFStrIsEightBit(string
);
4574 Boolean isLastCased
= false;
4575 const uint8_t *caseIgnorableForBMP
;
4577 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID
, void, string
, "_cfCapitalize:", locale
);
4579 __CFAssertIsStringAndMutable(string
);
4581 length
= __CFStrLength(string
);
4583 caseIgnorableForBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharCaseIgnorableCharacterSet
, 0);
4585 langCode
= (const uint8_t *)(_CFCanUseLocale(locale
) ? _CFStrGetLanguageIdentifierForLocale(locale
) : NULL
);
4587 if (!langCode
&& isEightBit
) {
4588 uint8_t *contents
= (uint8_t *)__CFStrContents(string
) + __CFStrSkipAnyLengthByte(string
);
4589 for (;currentIndex
< length
;currentIndex
++) {
4590 if (contents
[currentIndex
] > 127) {
4592 } else if (contents
[currentIndex
] >= 'A' && contents
[currentIndex
] <= 'Z') {
4593 contents
[currentIndex
] += (isLastCased
? 'a' - 'A' : 0);
4595 } else if (contents
[currentIndex
] >= 'a' && contents
[currentIndex
] <= 'z') {
4596 contents
[currentIndex
] -= (!isLastCased
? 'a' - 'A' : 0);
4598 } else if (!CFUniCharIsMemberOfBitmap(contents
[currentIndex
], caseIgnorableForBMP
)) {
4599 isLastCased
= false;
4604 if (currentIndex
< length
) {
4606 UniChar mappedCharacters
[MAX_CASE_MAPPING_BUF
];
4607 CFIndex mappedLength
;
4608 UTF32Char currentChar
;
4611 if (isEightBit
) __CFStringChangeSize(string
, CFRangeMake(0, 0), 0, true);
4613 contents
= (UniChar
*)__CFStrContents(string
);
4615 for (;currentIndex
< length
;currentIndex
++) {
4616 if (CFUniCharIsSurrogateHighCharacter(contents
[currentIndex
]) && (currentIndex
+ 1 < length
) && CFUniCharIsSurrogateLowCharacter(contents
[currentIndex
+ 1])) {
4617 currentChar
= CFUniCharGetLongCharacterForSurrogatePair(contents
[currentIndex
], contents
[currentIndex
+ 1]);
4619 currentChar
= contents
[currentIndex
];
4621 flags
= ((langCode
|| ((currentChar
== 0x03A3) && isLastCased
)) ? CFUniCharGetConditionalCaseMappingFlags(currentChar
, contents
, currentIndex
, length
, (isLastCased
? kCFUniCharToLowercase
: kCFUniCharToTitlecase
), langCode
, flags
) : 0);
4623 mappedLength
= CFUniCharMapCaseTo(currentChar
, mappedCharacters
, MAX_CASE_MAPPING_BUF
, (isLastCased
? kCFUniCharToLowercase
: kCFUniCharToTitlecase
), flags
, langCode
);
4624 if (mappedLength
> 0) contents
[currentIndex
] = *mappedCharacters
;
4626 if (currentChar
> 0xFFFF) { // Non-BMP char
4627 switch (mappedLength
) {
4629 __CFStringChangeSize(string
, CFRangeMake(currentIndex
, 2), 0, true);
4630 contents
= (UniChar
*)__CFStrContents(string
);
4635 __CFStringChangeSize(string
, CFRangeMake(currentIndex
+ 1, 1), 0, true);
4636 contents
= (UniChar
*)__CFStrContents(string
);
4641 contents
[++currentIndex
] = mappedCharacters
[1];
4645 --mappedLength
; // Skip the current char
4646 __CFStringChangeSize(string
, CFRangeMake(currentIndex
+ 1, 0), mappedLength
- 1, true);
4647 contents
= (UniChar
*)__CFStrContents(string
);
4648 memmove(contents
+ currentIndex
+ 1, mappedCharacters
+ 1, mappedLength
* sizeof(UniChar
));
4649 length
+= (mappedLength
- 1);
4650 currentIndex
+= mappedLength
;
4653 } else if (mappedLength
== 0) {
4654 __CFStringChangeSize(string
, CFRangeMake(currentIndex
, 1), 0, true);
4655 contents
= (UniChar
*)__CFStrContents(string
);
4657 } else if (mappedLength
> 1) {
4658 --mappedLength
; // Skip the current char
4659 __CFStringChangeSize(string
, CFRangeMake(currentIndex
+ 1, 0), mappedLength
, true);
4660 contents
= (UniChar
*)__CFStrContents(string
);
4661 memmove(contents
+ currentIndex
+ 1, mappedCharacters
+ 1, mappedLength
* sizeof(UniChar
));
4662 length
+= mappedLength
;
4663 currentIndex
+= mappedLength
;
4666 if (!((currentChar
> 0xFFFF) ? CFUniCharIsMemberOf(currentChar
, kCFUniCharCaseIgnorableCharacterSet
) : CFUniCharIsMemberOfBitmap(currentChar
, caseIgnorableForBMP
))) { // We have non-caseignorable here
4667 isLastCased
= ((CFUniCharIsMemberOf(currentChar
, kCFUniCharUppercaseLetterCharacterSet
) || CFUniCharIsMemberOf(currentChar
, kCFUniCharLowercaseLetterCharacterSet
)) ? true : false);
4674 #define MAX_DECOMP_BUF 64
4676 #define HANGUL_SBASE 0xAC00
4677 #define HANGUL_LBASE 0x1100
4678 #define HANGUL_VBASE 0x1161
4679 #define HANGUL_TBASE 0x11A7
4680 #define HANGUL_SCOUNT 11172
4681 #define HANGUL_LCOUNT 19
4682 #define HANGUL_VCOUNT 21
4683 #define HANGUL_TCOUNT 28
4684 #define HANGUL_NCOUNT (HANGUL_VCOUNT * HANGUL_TCOUNT)
4686 CF_INLINE
uint32_t __CFGetUTF16Length(const UTF32Char
*characters
, uint32_t utf32Length
) {
4687 const UTF32Char
*limit
= characters
+ utf32Length
;
4688 uint32_t length
= 0;
4690 while (characters
< limit
) length
+= (*(characters
++) > 0xFFFF ? 2 : 1);
4695 CF_INLINE
void __CFFillInUTF16(const UTF32Char
*characters
, UTF16Char
*dst
, uint32_t utf32Length
) {
4696 const UTF32Char
*limit
= characters
+ utf32Length
;
4697 UTF32Char currentChar
;
4699 while (characters
< limit
) {
4700 currentChar
= *(characters
++);
4701 if (currentChar
> 0xFFFF) {
4702 currentChar
-= 0x10000;
4703 *(dst
++) = (UTF16Char
)((currentChar
>> 10) + 0xD800UL
);
4704 *(dst
++) = (UTF16Char
)((currentChar
& 0x3FF) + 0xDC00UL
);
4706 *(dst
++) = currentChar
;
4711 void CFStringNormalize(CFMutableStringRef string
, CFStringNormalizationForm theForm
) {
4712 CFIndex currentIndex
= 0;
4714 bool needToReorder
= true;
4716 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID
, void, string
, "_cfNormalize:", theForm
);
4718 __CFAssertIsStringAndMutable(string
);
4720 length
= __CFStrLength(string
);
4722 if (__CFStrIsEightBit(string
)) {
4725 if (theForm
== kCFStringNormalizationFormC
) return; // 8bit form has no decomposition
4727 contents
= (uint8_t *)__CFStrContents(string
) + __CFStrSkipAnyLengthByte(string
);
4729 for (;currentIndex
< length
;currentIndex
++) {
4730 if (contents
[currentIndex
] > 127) {
4731 __CFStringChangeSize(string
, CFRangeMake(0, 0), 0, true); // need to do harm way
4732 needToReorder
= false;
4738 if (currentIndex
< length
) {
4739 UTF16Char
*limit
= (UTF16Char
*)__CFStrContents(string
) + length
;
4740 UTF16Char
*contents
= (UTF16Char
*)__CFStrContents(string
) + currentIndex
;
4741 UTF32Char buffer
[MAX_DECOMP_BUF
];
4742 UTF32Char
*mappedCharacters
= buffer
;
4743 CFIndex allocatedLength
= MAX_DECOMP_BUF
;
4744 CFIndex mappedLength
;
4745 CFIndex currentLength
;
4746 UTF32Char currentChar
;
4747 const uint8_t *decompBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet
, 0);
4748 const uint8_t *nonBaseBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet
, 0);
4749 const uint8_t *combiningBMP
= (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty
, 0);
4751 while (contents
< limit
) {
4752 if (CFUniCharIsSurrogateHighCharacter(*contents
) && (contents
+ 1 < limit
) && CFUniCharIsSurrogateLowCharacter(*(contents
+ 1))) {
4753 currentChar
= CFUniCharGetLongCharacterForSurrogatePair(*contents
, *(contents
+ 1));
4757 currentChar
= *(contents
++);
4763 if (CFUniCharIsMemberOfBitmap(currentChar
, ((currentChar
< 0x10000) ? decompBMP
: CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet
, (currentChar
>> 16)))) && (0 == CFUniCharGetCombiningPropertyForCharacter(currentChar
, ((currentChar
< 0x10000) ? combiningBMP
: (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty
, (currentChar
>> 16)))))) {
4764 if ((theForm
& kCFStringNormalizationFormC
) == 0 || currentChar
< HANGUL_SBASE
|| currentChar
> (HANGUL_SBASE
+ HANGUL_SCOUNT
)) { // We don't have to decompose Hangul Syllables if we're precomposing again
4765 mappedLength
= CFUniCharDecomposeCharacter(currentChar
, mappedCharacters
, MAX_DECOMP_BUF
);
4769 if ((needToReorder
|| (theForm
& kCFStringNormalizationFormC
)) && ((contents
< limit
) || (mappedLength
== 0))) {
4770 if (mappedLength
> 0) {
4771 if (CFUniCharIsSurrogateHighCharacter(*contents
) && (contents
+ 1 < limit
) && CFUniCharIsSurrogateLowCharacter(*(contents
+ 1))) {
4772 currentChar
= CFUniCharGetLongCharacterForSurrogatePair(*contents
, *(contents
+ 1));
4774 currentChar
= *contents
;
4778 if (0 != CFUniCharGetCombiningPropertyForCharacter(currentChar
, (const uint8_t *)((currentChar
< 0x10000) ? combiningBMP
: CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty
, (currentChar
>> 16))))) {
4779 uint32_t decompLength
;
4781 if (mappedLength
== 0) {
4782 contents
-= (currentChar
& 0xFFFF0000 ? 2 : 1);
4783 if (currentIndex
> 0) {
4784 if (CFUniCharIsSurrogateLowCharacter(*(contents
- 1)) && (currentIndex
> 1) && CFUniCharIsSurrogateHighCharacter(*(contents
- 2))) {
4785 *mappedCharacters
= CFUniCharGetLongCharacterForSurrogatePair(*(contents
- 2), *(contents
- 1));
4789 *mappedCharacters
= *(contents
- 1);
4796 currentLength
+= (currentChar
& 0xFFFF0000 ? 2 : 1);
4798 contents
+= (currentChar
& 0xFFFF0000 ? 2 : 1);
4800 if (CFUniCharIsMemberOfBitmap(currentChar
, ((currentChar
< 0x10000) ? decompBMP
: CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet
, (currentChar
>> 16))))) { // Vietnamese accent, etc.
4801 decompLength
= CFUniCharDecomposeCharacter(currentChar
, mappedCharacters
+ mappedLength
, MAX_DECOMP_BUF
- mappedLength
);
4802 mappedLength
+= decompLength
;
4804 mappedCharacters
[mappedLength
++] = currentChar
;
4807 while (contents
< limit
) {
4808 if (CFUniCharIsSurrogateHighCharacter(*contents
) && (contents
+ 1 < limit
) && CFUniCharIsSurrogateLowCharacter(*(contents
+ 1))) {
4809 currentChar
= CFUniCharGetLongCharacterForSurrogatePair(*contents
, *(contents
+ 1));
4811 currentChar
= *contents
;
4813 if (0 == CFUniCharGetCombiningPropertyForCharacter(currentChar
, (const uint8_t *)((currentChar
< 0x10000) ? combiningBMP
: CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty
, (currentChar
>> 16))))) break;
4814 if (currentChar
& 0xFFFF0000) {
4821 if (mappedLength
== allocatedLength
) {
4822 allocatedLength
+= MAX_DECOMP_BUF
;
4823 if (mappedCharacters
== buffer
) {
4824 mappedCharacters
= (UTF32Char
*)CFAllocatorAllocate(kCFAllocatorSystemDefault
, allocatedLength
* sizeof(UTF32Char
), 0);
4825 memmove(mappedCharacters
, buffer
, MAX_DECOMP_BUF
* sizeof(UTF32Char
));
4827 mappedCharacters
= (UTF32Char
*)CFAllocatorReallocate(kCFAllocatorSystemDefault
, mappedCharacters
, allocatedLength
* sizeof(UTF32Char
), 0);
4830 if (CFUniCharIsMemberOfBitmap(currentChar
, ((currentChar
< 0x10000) ? decompBMP
: CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet
, (currentChar
>> 16))))) { // Vietnamese accent, etc.
4831 decompLength
= CFUniCharDecomposeCharacter(currentChar
, mappedCharacters
+ mappedLength
, MAX_DECOMP_BUF
- mappedLength
);
4832 mappedLength
+= decompLength
;
4834 mappedCharacters
[mappedLength
++] = currentChar
;
4838 if (needToReorder
&& mappedLength
> 1) CFUniCharPrioritySort(mappedCharacters
, mappedLength
);
4841 if (theForm
& kCFStringNormalizationFormKD
) {
4842 CFIndex newLength
= 0;
4844 if (mappedLength
== 0 && CFUniCharIsMemberOf(currentChar
, kCFUniCharCompatibilityDecomposableCharacterSet
)) {
4845 mappedCharacters
[mappedLength
++] = currentChar
;
4847 while (newLength
< mappedLength
) {
4848 newLength
= CFUniCharCompatibilityDecompose(mappedCharacters
, mappedLength
, allocatedLength
);
4849 if (newLength
== 0) {
4850 allocatedLength
+= MAX_DECOMP_BUF
;
4851 if (mappedCharacters
== buffer
) {
4852 mappedCharacters
= (UTF32Char
*)CFAllocatorAllocate(kCFAllocatorSystemDefault
, allocatedLength
* sizeof(UTF32Char
), 0);
4853 memmove(mappedCharacters
, buffer
, MAX_DECOMP_BUF
* sizeof(UTF32Char
));
4855 mappedCharacters
= (UTF32Char
*)CFAllocatorReallocate(kCFAllocatorSystemDefault
, mappedCharacters
, allocatedLength
* sizeof(UTF32Char
), 0);
4859 mappedLength
= newLength
;
4862 if (theForm
& kCFStringNormalizationFormC
) {
4865 if (mappedLength
> 1) {
4866 CFIndex consumedLength
= 1;
4867 UTF32Char
*currentBase
= mappedCharacters
;
4868 uint8_t currentClass
, lastClass
= 0;
4869 bool didCombine
= false;
4871 currentChar
= *mappedCharacters
;
4873 while (consumedLength
< mappedLength
) {
4874 nextChar
= mappedCharacters
[consumedLength
];
4875 currentClass
= CFUniCharGetCombiningPropertyForCharacter(nextChar
, (const uint8_t *)((nextChar
< 0x10000) ? combiningBMP
: CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty
, (nextChar
>> 16))));
4877 if (theForm
& kCFStringNormalizationFormKD
) {
4878 if ((currentChar
>= HANGUL_LBASE
) && (currentChar
< (HANGUL_LBASE
+ 0xFF))) {
4879 SInt8 lIndex
= currentChar
- HANGUL_LBASE
;
4881 if ((0 <= lIndex
) && (lIndex
<= HANGUL_LCOUNT
)) {
4882 SInt16 vIndex
= nextChar
- HANGUL_VBASE
;
4884 if ((vIndex
>= 0) && (vIndex
<= HANGUL_VCOUNT
)) {
4886 CFIndex usedLength
= mappedLength
;
4888 mappedCharacters
[consumedLength
++] = 0xFFFD;
4890 if (consumedLength
< mappedLength
) {
4891 tIndex
= mappedCharacters
[consumedLength
] - HANGUL_TBASE
;
4892 if ((tIndex
< 0) || (tIndex
> HANGUL_TCOUNT
)) {
4895 mappedCharacters
[consumedLength
++] = 0xFFFD;
4898 *currentBase
= (lIndex
* HANGUL_VCOUNT
+ vIndex
) * HANGUL_TCOUNT
+ tIndex
+ HANGUL_SBASE
;
4900 while (--usedLength
> 0) {
4901 if (mappedCharacters
[usedLength
] == 0xFFFD) {
4904 memmove(mappedCharacters
+ usedLength
, mappedCharacters
+ usedLength
+ 1, (mappedLength
- usedLength
) * sizeof(UTF32Char
));
4907 currentBase
= mappedCharacters
+ consumedLength
;
4908 currentChar
= *currentBase
;
4915 if (!CFUniCharIsMemberOfBitmap(nextChar
, ((nextChar
< 0x10000) ? nonBaseBMP
: CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet
, (nextChar
>> 16))))) {
4916 *currentBase
= currentChar
;
4917 currentBase
= mappedCharacters
+ consumedLength
;
4918 currentChar
= nextChar
;
4924 if ((lastClass
== 0) || (currentClass
> lastClass
)) {
4925 nextChar
= CFUniCharPrecomposeCharacter(currentChar
, nextChar
);
4926 if (nextChar
== 0xFFFD) {
4927 lastClass
= currentClass
;
4929 mappedCharacters
[consumedLength
] = 0xFFFD;
4931 currentChar
= nextChar
;
4937 *currentBase
= currentChar
;
4939 consumedLength
= mappedLength
;
4940 while (--consumedLength
> 0) {
4941 if (mappedCharacters
[consumedLength
] == 0xFFFD) {
4943 memmove(mappedCharacters
+ consumedLength
, mappedCharacters
+ consumedLength
+ 1, (mappedLength
- consumedLength
) * sizeof(UTF32Char
));
4947 } else if ((currentChar
>= HANGUL_LBASE
) && (currentChar
< (HANGUL_LBASE
+ 0xFF))) { // Hangul Jamo
4948 SInt8 lIndex
= currentChar
- HANGUL_LBASE
;
4950 if ((contents
< limit
) && (0 <= lIndex
) && (lIndex
<= HANGUL_LCOUNT
)) {
4951 SInt16 vIndex
= *contents
- HANGUL_VBASE
;
4953 if ((vIndex
>= 0) && (vIndex
<= HANGUL_VCOUNT
)) {
4956 ++contents
; ++currentLength
;
4958 if (contents
< limit
) {
4959 tIndex
= *contents
- HANGUL_TBASE
;
4960 if ((tIndex
< 0) || (tIndex
> HANGUL_TCOUNT
)) {
4963 ++contents
; ++currentLength
;
4966 *mappedCharacters
= (lIndex
* HANGUL_VCOUNT
+ vIndex
) * HANGUL_TCOUNT
+ tIndex
+ HANGUL_SBASE
;
4970 } else { // collect class 0 non-base characters
4971 while (contents
< limit
) {
4972 nextChar
= *contents
;
4973 if (CFUniCharIsSurrogateHighCharacter(nextChar
) && ((contents
+ 1) < limit
) && CFUniCharIsSurrogateLowCharacter(*(contents
+ 1))) {
4974 nextChar
= CFUniCharGetLongCharacterForSurrogatePair(nextChar
, *(contents
+ 1));
4975 if (!CFUniCharIsMemberOfBitmap(nextChar
, (const uint8_t *)CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet
, (nextChar
>> 16))) || (0 != CFUniCharGetCombiningPropertyForCharacter(nextChar
, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty
, (nextChar
>> 16))))) break;
4977 if (!CFUniCharIsMemberOfBitmap(nextChar
, nonBaseBMP
) || (0 != CFUniCharGetCombiningPropertyForCharacter(nextChar
, combiningBMP
))) break;
4979 currentChar
= CFUniCharPrecomposeCharacter(currentChar
, nextChar
);
4980 if (0xFFFD == currentChar
) break;
4982 if (nextChar
< 0x10000) {
4983 ++contents
; ++currentLength
;
4989 *mappedCharacters
= currentChar
;
4995 if (mappedLength
> 0) {
4996 CFIndex utf16Length
= __CFGetUTF16Length(mappedCharacters
, mappedLength
);
4998 if (utf16Length
!= currentLength
) {
4999 __CFStringChangeSize(string
, CFRangeMake(currentIndex
, currentLength
), utf16Length
, true);
5000 currentLength
= utf16Length
;
5002 contents
= (UTF16Char
*)__CFStrContents(string
);
5003 limit
= contents
+ __CFStrLength(string
);
5004 contents
+= currentIndex
;
5005 __CFFillInUTF16(mappedCharacters
, contents
, mappedLength
);
5006 contents
+= utf16Length
;
5008 currentIndex
+= currentLength
;
5011 if (mappedCharacters
!= buffer
) CFAllocatorDeallocate(kCFAllocatorSystemDefault
, mappedCharacters
);
5015 void CFStringFold(CFMutableStringRef theString
, CFStringCompareFlags theFlags
, CFLocaleRef locale
) {
5016 CFStringInlineBuffer stringBuffer
;
5017 CFIndex length
= CFStringGetLength(theString
);
5018 CFIndex currentIndex
= 0;
5019 CFIndex bufferLength
= 0;
5020 UTF32Char buffer
[kCFStringStackBufferLength
];
5021 const uint8_t *cString
;
5022 const uint8_t *langCode
;
5023 CFStringEncoding eightBitEncoding
;
5024 bool caseInsensitive
= ((theFlags
& kCFCompareCaseInsensitive
) ? true : false);
5025 bool isObjc
= CF_IS_OBJC(__kCFStringTypeID
, theString
);
5026 CFLocaleRef theLocale
= locale
;
5028 if ((theFlags
& kCFCompareLocalized
) && (NULL
== locale
)) {
5029 theLocale
= CFLocaleCopyCurrent();
5032 theFlags
&= (kCFCompareCaseInsensitive
|kCFCompareDiacriticInsensitive
|kCFCompareWidthInsensitive
);
5034 if ((0 == theFlags
) || (0 == length
)) goto bail
; // nothing to do
5036 langCode
= ((NULL
== theLocale
) ? NULL
: (const uint8_t *)_CFStrGetLanguageIdentifierForLocale(theLocale
));
5038 eightBitEncoding
= __CFStringGetEightBitStringEncoding();
5039 cString
= (const uint8_t *)CFStringGetCStringPtr(theString
, eightBitEncoding
);
5041 if ((NULL
!= cString
) && !caseInsensitive
&& (kCFStringEncodingASCII
== eightBitEncoding
)) goto bail
; // All ASCII
5043 CFStringInitInlineBuffer(theString
, &stringBuffer
, CFRangeMake(0, length
));
5045 if ((NULL
!= cString
) && (theFlags
& (kCFCompareCaseInsensitive
|kCFCompareDiacriticInsensitive
))) {
5046 const uint8_t *cStringPtr
= cString
;
5047 const uint8_t *cStringLimit
= cString
+ length
;
5048 uint8_t *cStringContents
= (isObjc
? NULL
: (uint8_t *)__CFStrContents(theString
) + __CFStrSkipAnyLengthByte(theString
));
5050 while (cStringPtr
< cStringLimit
) {
5051 if ((*cStringPtr
< 0x80) && (NULL
== langCode
)) {
5052 if (caseInsensitive
&& (*cStringPtr
>= 'A') && (*cStringPtr
<= 'Z')) {
5053 if (NULL
== cStringContents
) {
5056 cStringContents
[cStringPtr
- cString
] += ('a' - 'A');
5060 if ((bufferLength
= __CFStringFoldCharacterClusterAtIndex((UTF32Char
)__CFCharToUniCharTable
[*cStringPtr
], &stringBuffer
, cStringPtr
- cString
, theFlags
, langCode
, buffer
, kCFStringStackBufferLength
, NULL
)) > 0) {
5061 if ((*buffer
> 0x7F) || (bufferLength
> 1) || (NULL
== cStringContents
)) break;
5062 cStringContents
[cStringPtr
- cString
] = *buffer
;
5068 currentIndex
= cStringPtr
- cString
;
5071 if (currentIndex
< length
) {
5072 UTF16Char
*contents
;
5075 CFMutableStringRef cfString
;
5076 CFRange range
= CFRangeMake(currentIndex
, length
- currentIndex
);
5078 contents
= (UTF16Char
*)CFAllocatorAllocate(kCFAllocatorSystemDefault
, sizeof(UTF16Char
) * range
.length
, 0);
5080 CFStringGetCharacters(theString
, range
, contents
);
5082 cfString
= CFStringCreateMutableWithExternalCharactersNoCopy(kCFAllocatorSystemDefault
, contents
, range
.length
, range
.length
, NULL
);
5084 CFStringFold(cfString
, theFlags
, theLocale
);
5086 CFStringReplace(theString
, range
, cfString
);
5088 CFRelease(cfString
);
5090 const UTF32Char
*characters
;
5091 const UTF32Char
*charactersLimit
;
5092 UTF32Char character
;
5093 CFIndex consumedLength
;
5097 if (bufferLength
> 0) {
5098 __CFStringChangeSize(theString
, CFRangeMake(currentIndex
+ 1, 0), bufferLength
- 1, true);
5099 length
= __CFStrLength(theString
);
5100 CFStringInitInlineBuffer(theString
, &stringBuffer
, CFRangeMake(0, length
));
5102 contents
= (UTF16Char
*)__CFStrContents(theString
) + currentIndex
;
5103 characters
= buffer
;
5104 charactersLimit
= characters
+ bufferLength
;
5105 while (characters
< charactersLimit
) *(contents
++) = (UTF16Char
)*(characters
++);
5109 while (currentIndex
< length
) {
5110 character
= __CFStringGetCharacterFromInlineBufferQuick(&stringBuffer
, currentIndex
);
5114 if ((NULL
== langCode
) && (character
< 0x80) && (0 == (theFlags
& kCFCompareDiacriticInsensitive
))) {
5115 if (caseInsensitive
&& (character
>= 'A') && (character
<= 'Z')) {
5118 *buffer
= character
+ ('a' - 'A');
5121 if (CFUniCharIsSurrogateHighCharacter(character
) && ((currentIndex
+ 1) < length
)) {
5122 UTF16Char lowSurrogate
= __CFStringGetCharacterFromInlineBufferQuick(&stringBuffer
, currentIndex
+ 1);
5123 if (CFUniCharIsSurrogateLowCharacter(lowSurrogate
)) character
= CFUniCharGetLongCharacterForSurrogatePair(character
, lowSurrogate
);
5126 bufferLength
= __CFStringFoldCharacterClusterAtIndex(character
, &stringBuffer
, currentIndex
, theFlags
, langCode
, buffer
, kCFStringStackBufferLength
, &consumedLength
);
5129 if (consumedLength
> 0) {
5130 CFIndex utf16Length
= bufferLength
;
5132 characters
= buffer
;
5133 charactersLimit
= characters
+ bufferLength
;
5135 while (characters
< charactersLimit
) if (*(characters
++) > 0xFFFF) ++utf16Length
; // Extend bufferLength to the UTF-16 length
5137 if ((utf16Length
!= consumedLength
) || __CFStrIsEightBit(theString
)) {
5139 CFIndex insertLength
;
5141 if (consumedLength
< utf16Length
) { // Need to expand
5142 range
= CFRangeMake(currentIndex
+ consumedLength
, 0);
5143 insertLength
= utf16Length
- consumedLength
;
5145 range
= CFRangeMake(currentIndex
+ utf16Length
, consumedLength
- utf16Length
);
5148 __CFStringChangeSize(theString
, range
, insertLength
, true);
5149 length
= __CFStrLength(theString
);
5150 CFStringInitInlineBuffer(theString
, &stringBuffer
, CFRangeMake(0, length
));
5153 (void)CFUniCharFromUTF32(buffer
, bufferLength
, (UTF16Char
*)__CFStrContents(theString
) + currentIndex
, true, __CF_BIG_ENDIAN__
);
5155 currentIndex
+= utf16Length
;
5164 if (NULL
== locale
&& theLocale
) {
5165 CFRelease(theLocale
);
5170 kCFStringFormatZeroFlag
= (1 << 0), // if not, padding is space char
5171 kCFStringFormatMinusFlag
= (1 << 1), // if not, no flag implied
5172 kCFStringFormatPlusFlag
= (1 << 2), // if not, no flag implied, overrides space
5173 kCFStringFormatSpaceFlag
= (1 << 3), // if not, no flag implied
5174 kCFStringFormatExternalSpecFlag
= (1 << 4) // using config dict
5188 int8_t configDictIndex
;
5197 #if LONG_DOUBLE_SUPPORT
5198 long double longDoubleValue
;
5205 CFFormatDefaultSize
= 0,
5212 CFFormatSizeLong
= CFFormatSize8
,
5213 CFFormatSizePointer
= CFFormatSize8
5215 CFFormatSizeLong
= CFFormatSize4
,
5216 CFFormatSizePointer
= CFFormatSize4
5223 CFFormatLiteralType
= 32,
5224 CFFormatLongType
= 33,
5225 CFFormatDoubleType
= 34,
5226 CFFormatPointerType
= 35,
5227 CFFormatObjectType
= 36, /* handled specially */ /* ??? not used anymore, can be removed? */
5228 CFFormatCFType
= 37, /* handled specially */
5229 CFFormatUnicharsType
= 38, /* handled specially */
5230 CFFormatCharsType
= 39, /* handled specially */
5231 CFFormatPascalCharsType
= 40, /* handled specially */
5232 CFFormatSingleUnicharType
= 41, /* handled specially */
5233 CFFormatDummyPointerType
= 42 /* special case for %n */
5236 CF_INLINE
void __CFParseFormatSpec(const UniChar
*uformat
, const uint8_t *cformat
, SInt32
*fmtIdx
, SInt32 fmtLen
, CFFormatSpec
*spec
, CFStringRef
*configKeyPointer
) {
5237 Boolean seenDot
= false;
5238 Boolean seenSharp
= false;
5239 CFIndex keyIndex
= kCFNotFound
;
5243 if (fmtLen
<= *fmtIdx
) return; /* no type */
5244 if (cformat
) ch
= (UniChar
)cformat
[(*fmtIdx
)++]; else ch
= uformat
[(*fmtIdx
)++];
5246 if (keyIndex
>= 0) {
5247 if ((ch
< '0') || ((ch
> '9') && (ch
< 'A')) || ((ch
> 'Z') && (ch
< 'a') && (ch
!= '_')) || (ch
> 'z')) {
5248 if (ch
== '@') { // found the key
5249 CFIndex length
= (*fmtIdx
) - 1 - keyIndex
;
5251 spec
->flags
|= kCFStringFormatExternalSpecFlag
;
5252 spec
->type
= CFFormatCFType
;
5253 spec
->size
= CFFormatSizePointer
; // 4 or 8 depending on LP64
5255 if ((NULL
!= configKeyPointer
) && (length
> 0)) {
5257 *configKeyPointer
= CFStringCreateWithBytes(NULL
, cformat
+ keyIndex
, length
, __CFStringGetEightBitStringEncoding(), FALSE
);
5259 *configKeyPointer
= CFStringCreateWithCharactersNoCopy(NULL
, uformat
+ keyIndex
, length
, kCFAllocatorNull
);
5264 keyIndex
= kCFNotFound
;
5269 reswtch
:switch (ch
) {
5270 case '#': // ignored for now
5274 if (!(spec
->flags
& kCFStringFormatPlusFlag
)) spec
->flags
|= kCFStringFormatSpaceFlag
;
5277 spec
->flags
|= kCFStringFormatMinusFlag
;
5278 spec
->flags
&= ~kCFStringFormatZeroFlag
; // remove zero flag
5281 spec
->flags
|= kCFStringFormatPlusFlag
;
5282 spec
->flags
&= ~kCFStringFormatSpaceFlag
; // remove space flag
5285 if (!(spec
->flags
& kCFStringFormatMinusFlag
)) spec
->flags
|= kCFStringFormatZeroFlag
;
5288 if (*fmtIdx
< fmtLen
) {
5289 // fetch next character, don't increment fmtIdx
5290 if (cformat
) ch
= (UniChar
)cformat
[(*fmtIdx
)]; else ch
= uformat
[(*fmtIdx
)];
5291 if ('h' == ch
) { // 'hh' for char, like 'c'
5293 spec
->size
= CFFormatSize1
;
5297 spec
->size
= CFFormatSize2
;
5300 if (*fmtIdx
< fmtLen
) {
5301 // fetch next character, don't increment fmtIdx
5302 if (cformat
) ch
= (UniChar
)cformat
[(*fmtIdx
)]; else ch
= uformat
[(*fmtIdx
)];
5303 if ('l' == ch
) { // 'll' for long long, like 'q'
5305 spec
->size
= CFFormatSize8
;
5309 spec
->size
= CFFormatSizeLong
; // 4 or 8 depending on LP64
5311 #if LONG_DOUBLE_SUPPORT
5313 spec
->size
= CFFormatSize16
;
5317 spec
->size
= CFFormatSize8
;
5320 spec
->size
= CFFormatSizeLong
; // 4 or 8 depending on LP64
5323 spec
->size
= CFFormatSize8
;
5326 spec
->type
= CFFormatLongType
;
5327 spec
->size
= CFFormatSize1
;
5329 case 'O': case 'o': case 'D': case 'd': case 'i': case 'U': case 'u': case 'x': case 'X':
5330 spec
->type
= CFFormatLongType
;
5331 // Seems like if spec->size == 0, we should spec->size = CFFormatSize4. However, 0 is handled correctly.
5333 case 'a': case 'A': case 'e': case 'E': case 'f': case 'F': case 'g': case 'G':
5334 spec
->type
= CFFormatDoubleType
;
5335 if (spec
->size
!= CFFormatSize16
) spec
->size
= CFFormatSize8
;
5337 case 'n': /* %n is not handled correctly; for Leopard or newer apps, we disable it further */
5338 spec
->type
= _CFExecutableLinkedOnOrAfter(CFSystemVersionLeopard
) ? CFFormatDummyPointerType
: CFFormatPointerType
;
5339 spec
->size
= CFFormatSizePointer
; // 4 or 8 depending on LP64
5342 spec
->type
= CFFormatPointerType
;
5343 spec
->size
= CFFormatSizePointer
; // 4 or 8 depending on LP64
5346 spec
->type
= CFFormatCharsType
;
5347 spec
->size
= CFFormatSizePointer
; // 4 or 8 depending on LP64
5350 spec
->type
= CFFormatUnicharsType
;
5351 spec
->size
= CFFormatSizePointer
; // 4 or 8 depending on LP64
5354 spec
->type
= CFFormatSingleUnicharType
;
5355 spec
->size
= CFFormatSize2
;
5358 spec
->type
= CFFormatPascalCharsType
;
5359 spec
->size
= CFFormatSizePointer
; // 4 or 8 depending on LP64
5367 spec
->type
= CFFormatCFType
;
5368 spec
->size
= CFFormatSizePointer
; // 4 or 8 depending on LP64
5371 case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': {
5374 number
= 10 * number
+ (ch
- '0');
5375 if (cformat
) ch
= (UniChar
)cformat
[(*fmtIdx
)++]; else ch
= uformat
[(*fmtIdx
)++];
5376 } while ((UInt32
)(ch
- '0') <= 9);
5378 if (-2 == spec
->precArgNum
) {
5379 spec
->precArgNum
= (int8_t)number
- 1; // Arg numbers start from 1
5380 } else if (-2 == spec
->widthArgNum
) {
5381 spec
->widthArgNum
= (int8_t)number
- 1; // Arg numbers start from 1
5383 spec
->mainArgNum
= (int8_t)number
- 1; // Arg numbers start from 1
5386 } else if (seenDot
) { /* else it's either precision or width */
5387 spec
->precArg
= (SInt32
)number
;
5389 spec
->widthArg
= (SInt32
)number
;
5394 spec
->widthArgNum
= -2;
5398 if (cformat
) ch
= (UniChar
)cformat
[(*fmtIdx
)++]; else ch
= uformat
[(*fmtIdx
)++];
5400 spec
->precArgNum
= -2;
5405 spec
->type
= CFFormatLiteralType
;
5411 /* ??? It ignores the formatOptions argument.
5412 ??? %s depends on handling of encodings by __CFStringAppendBytes
5414 void CFStringAppendFormatAndArguments(CFMutableStringRef outputString
, CFDictionaryRef formatOptions
, CFStringRef formatString
, va_list args
) {
5415 __CFStringAppendFormatCore(outputString
, NULL
, formatOptions
, formatString
, 0, NULL
, 0, args
);
5418 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
5419 #define SNPRINTF(TYPE, WHAT) { \
5420 TYPE value = (TYPE) WHAT; \
5421 if (-1 != specs[curSpec].widthArgNum) { \
5422 if (-1 != specs[curSpec].precArgNum) { \
5423 snprintf_l(buffer, 255, NULL, formatBuffer, width, precision, value); \
5425 snprintf_l(buffer, 255, NULL, formatBuffer, width, value); \
5428 if (-1 != specs[curSpec].precArgNum) { \
5429 snprintf_l(buffer, 255, NULL, formatBuffer, precision, value); \
5431 snprintf_l(buffer, 255, NULL, formatBuffer, value); \
5435 #define SNPRINTF(TYPE, WHAT) { \
5436 TYPE value = (TYPE) WHAT; \
5437 if (-1 != specs[curSpec].widthArgNum) { \
5438 if (-1 != specs[curSpec].precArgNum) { \
5439 sprintf(buffer, formatBuffer, width, precision, value); \
5441 sprintf(buffer, formatBuffer, width, value); \
5444 if (-1 != specs[curSpec].precArgNum) { \
5445 sprintf(buffer, formatBuffer, precision, value); \
5447 sprintf(buffer, formatBuffer, value); \
5452 void _CFStringAppendFormatAndArgumentsAux(CFMutableStringRef outputString
, CFStringRef (*copyDescFunc
)(void *, const void *), CFDictionaryRef formatOptions
, CFStringRef formatString
, va_list args
) { __CFStringAppendFormatCore(outputString
, copyDescFunc
, formatOptions
, formatString
, 0, NULL
, 0, args
); }
5454 static void __CFStringAppendFormatCore(CFMutableStringRef outputString
, CFStringRef (*copyDescFunc
)(void *, const void *), CFDictionaryRef formatOptions
, CFStringRef formatString
, CFIndex initialArgPosition
, const void *origValues
, CFIndex originalValuesSize
, va_list args
) {
5455 SInt32 numSpecs
, sizeSpecs
, sizeArgNum
, formatIdx
, curSpec
, argNum
;
5457 #define FORMAT_BUFFER_LEN 400
5458 const uint8_t *cformat
= NULL
;
5459 const UniChar
*uformat
= NULL
;
5460 UniChar
*formatChars
= NULL
;
5461 UniChar localFormatBuffer
[FORMAT_BUFFER_LEN
];
5463 #define VPRINTF_BUFFER_LEN 61
5464 CFFormatSpec localSpecsBuffer
[VPRINTF_BUFFER_LEN
];
5465 CFFormatSpec
*specs
;
5466 CFPrintValue localValuesBuffer
[VPRINTF_BUFFER_LEN
];
5467 CFPrintValue
*values
;
5468 const CFPrintValue
*originalValues
= (const CFPrintValue
*)origValues
;
5469 CFDictionaryRef localConfigs
[VPRINTF_BUFFER_LEN
];
5470 CFDictionaryRef
*configs
;
5472 CFAllocatorRef tmpAlloc
= NULL
;
5473 intmax_t dummyLocation
; // A place for %n to do its thing in; should be the widest possible int value
5485 formatLen
= CFStringGetLength(formatString
);
5486 if (!CF_IS_OBJC(__kCFStringTypeID
, formatString
)) {
5487 __CFAssertIsString(formatString
);
5488 if (!__CFStrIsUnicode(formatString
)) {
5489 cformat
= (const uint8_t *)__CFStrContents(formatString
);
5490 if (cformat
) cformat
+= __CFStrSkipAnyLengthByte(formatString
);
5492 uformat
= (const UniChar
*)__CFStrContents(formatString
);
5495 if (!cformat
&& !uformat
) {
5496 formatChars
= (formatLen
> FORMAT_BUFFER_LEN
) ? (UniChar
*)CFAllocatorAllocate(tmpAlloc
= __CFGetDefaultAllocator(), formatLen
* sizeof(UniChar
), 0) : localFormatBuffer
;
5497 if (formatChars
!= localFormatBuffer
&& __CFOASafe
) __CFSetLastAllocationEventName(formatChars
, "CFString (temp)");
5498 CFStringGetCharacters(formatString
, CFRangeMake(0, formatLen
), formatChars
);
5499 uformat
= formatChars
;
5502 /* Compute an upper bound for the number of format specifications */
5504 for (formatIdx
= 0; formatIdx
< formatLen
; formatIdx
++) if ('%' == cformat
[formatIdx
]) sizeSpecs
++;
5506 for (formatIdx
= 0; formatIdx
< formatLen
; formatIdx
++) if ('%' == uformat
[formatIdx
]) sizeSpecs
++;
5508 tmpAlloc
= __CFGetDefaultAllocator();
5509 specs
= ((2 * sizeSpecs
+ 1) > VPRINTF_BUFFER_LEN
) ? (CFFormatSpec
*)CFAllocatorAllocate(tmpAlloc
, (2 * sizeSpecs
+ 1) * sizeof(CFFormatSpec
), 0) : localSpecsBuffer
;
5510 if (specs
!= localSpecsBuffer
&& __CFOASafe
) __CFSetLastAllocationEventName(specs
, "CFString (temp)");
5512 configs
= ((sizeSpecs
< VPRINTF_BUFFER_LEN
) ? localConfigs
: (CFDictionaryRef
*)CFAllocatorAllocate(tmpAlloc
, sizeof(CFStringRef
) * sizeSpecs
, 0));
5514 /* Collect format specification information from the format string */
5515 for (curSpec
= 0, formatIdx
= 0; formatIdx
< formatLen
; curSpec
++) {
5517 specs
[curSpec
].loc
= formatIdx
;
5518 specs
[curSpec
].len
= 0;
5519 specs
[curSpec
].size
= 0;
5520 specs
[curSpec
].type
= 0;
5521 specs
[curSpec
].flags
= 0;
5522 specs
[curSpec
].widthArg
= -1;
5523 specs
[curSpec
].precArg
= -1;
5524 specs
[curSpec
].mainArgNum
= -1;
5525 specs
[curSpec
].precArgNum
= -1;
5526 specs
[curSpec
].widthArgNum
= -1;
5527 specs
[curSpec
].configDictIndex
= -1;
5529 for (newFmtIdx
= formatIdx
; newFmtIdx
< formatLen
&& '%' != cformat
[newFmtIdx
]; newFmtIdx
++);
5531 for (newFmtIdx
= formatIdx
; newFmtIdx
< formatLen
&& '%' != uformat
[newFmtIdx
]; newFmtIdx
++);
5533 if (newFmtIdx
!= formatIdx
) { /* Literal chunk */
5534 specs
[curSpec
].type
= CFFormatLiteralType
;
5535 specs
[curSpec
].len
= newFmtIdx
- formatIdx
;
5537 CFStringRef configKey
= NULL
;
5538 newFmtIdx
++; /* Skip % */
5539 __CFParseFormatSpec(uformat
, cformat
, &newFmtIdx
, formatLen
, &(specs
[curSpec
]), &configKey
);
5540 if (CFFormatLiteralType
== specs
[curSpec
].type
) {
5541 specs
[curSpec
].loc
= formatIdx
+ 1;
5542 specs
[curSpec
].len
= 1;
5544 specs
[curSpec
].len
= newFmtIdx
- formatIdx
;
5547 formatIdx
= newFmtIdx
;
5549 // fprintf(stderr, "specs[%d] = {\n size = %d,\n type = %d,\n loc = %d,\n len = %d,\n mainArgNum = %d,\n precArgNum = %d,\n widthArgNum = %d\n}\n", curSpec, specs[curSpec].size, specs[curSpec].type, specs[curSpec].loc, specs[curSpec].len, specs[curSpec].mainArgNum, specs[curSpec].precArgNum, specs[curSpec].widthArgNum);
5554 // Max of three args per spec, reasoning thus: 1 width, 1 prec, 1 value
5555 sizeArgNum
= ((NULL
== originalValues
) ? (3 * sizeSpecs
+ 1) : originalValuesSize
);
5557 values
= (sizeArgNum
> VPRINTF_BUFFER_LEN
) ? (CFPrintValue
*)CFAllocatorAllocate(tmpAlloc
, sizeArgNum
* sizeof(CFPrintValue
), 0) : localValuesBuffer
;
5558 if (values
!= localValuesBuffer
&& __CFOASafe
) __CFSetLastAllocationEventName(values
, "CFString (temp)");
5559 memset(values
, 0, sizeArgNum
* sizeof(CFPrintValue
));
5561 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
5562 // va_copy is a C99 extension. No support on Windows
5563 if (numConfigs
> 0) va_copy(copiedArgs
, args
); // we need to preserve the original state for passing down
5564 #endif /* DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD */
5566 /* Compute values array */
5567 argNum
= initialArgPosition
;
5568 for (curSpec
= 0; curSpec
< numSpecs
; curSpec
++) {
5569 SInt32 newMaxArgNum
;
5570 if (0 == specs
[curSpec
].type
) continue;
5571 if (CFFormatLiteralType
== specs
[curSpec
].type
) continue;
5572 newMaxArgNum
= sizeArgNum
;
5573 if (newMaxArgNum
< specs
[curSpec
].mainArgNum
) {
5574 newMaxArgNum
= specs
[curSpec
].mainArgNum
;
5576 if (newMaxArgNum
< specs
[curSpec
].precArgNum
) {
5577 newMaxArgNum
= specs
[curSpec
].precArgNum
;
5579 if (newMaxArgNum
< specs
[curSpec
].widthArgNum
) {
5580 newMaxArgNum
= specs
[curSpec
].widthArgNum
;
5582 if (sizeArgNum
< newMaxArgNum
) {
5583 if (specs
!= localSpecsBuffer
) CFAllocatorDeallocate(tmpAlloc
, specs
);
5584 if (values
!= localValuesBuffer
) CFAllocatorDeallocate(tmpAlloc
, values
);
5585 if (formatChars
&& (formatChars
!= localFormatBuffer
)) CFAllocatorDeallocate(tmpAlloc
, formatChars
);
5586 return; // more args than we expected!
5588 /* It is actually incorrect to reorder some specs and not all; we just do some random garbage here */
5589 if (-2 == specs
[curSpec
].widthArgNum
) {
5590 specs
[curSpec
].widthArgNum
= argNum
++;
5592 if (-2 == specs
[curSpec
].precArgNum
) {
5593 specs
[curSpec
].precArgNum
= argNum
++;
5595 if (-1 == specs
[curSpec
].mainArgNum
) {
5596 specs
[curSpec
].mainArgNum
= argNum
++;
5599 values
[specs
[curSpec
].mainArgNum
].size
= specs
[curSpec
].size
;
5600 values
[specs
[curSpec
].mainArgNum
].type
= specs
[curSpec
].type
;
5603 if (-1 != specs
[curSpec
].widthArgNum
) {
5604 values
[specs
[curSpec
].widthArgNum
].size
= 0;
5605 values
[specs
[curSpec
].widthArgNum
].type
= CFFormatLongType
;
5607 if (-1 != specs
[curSpec
].precArgNum
) {
5608 values
[specs
[curSpec
].precArgNum
].size
= 0;
5609 values
[specs
[curSpec
].precArgNum
].type
= CFFormatLongType
;
5613 /* Collect the arguments in correct type from vararg list */
5614 for (argNum
= 0; argNum
< sizeArgNum
; argNum
++) {
5615 if ((NULL
!= originalValues
) && (0 == values
[argNum
].type
)) values
[argNum
] = originalValues
[argNum
];
5616 switch (values
[argNum
].type
) {
5618 case CFFormatLiteralType
:
5620 case CFFormatLongType
:
5621 case CFFormatSingleUnicharType
:
5622 if (CFFormatSize1
== values
[argNum
].size
) {
5623 values
[argNum
].value
.int64Value
= (int64_t)(int8_t)va_arg(args
, int);
5624 } else if (CFFormatSize2
== values
[argNum
].size
) {
5625 values
[argNum
].value
.int64Value
= (int64_t)(int16_t)va_arg(args
, int);
5626 } else if (CFFormatSize4
== values
[argNum
].size
) {
5627 values
[argNum
].value
.int64Value
= (int64_t)va_arg(args
, int32_t);
5628 } else if (CFFormatSize8
== values
[argNum
].size
) {
5629 values
[argNum
].value
.int64Value
= (int64_t)va_arg(args
, int64_t);
5631 values
[argNum
].value
.int64Value
= (int64_t)va_arg(args
, int);
5634 case CFFormatDoubleType
:
5635 #if LONG_DOUBLE_SUPPORT
5636 if (CFFormatSize16
== values
[argNum
].size
) {
5637 values
[argNum
].value
.longDoubleValue
= va_arg(args
, long double);
5641 values
[argNum
].value
.doubleValue
= va_arg(args
, double);
5644 case CFFormatPointerType
:
5645 case CFFormatObjectType
:
5646 case CFFormatCFType
:
5647 case CFFormatUnicharsType
:
5648 case CFFormatCharsType
:
5649 case CFFormatPascalCharsType
:
5650 values
[argNum
].value
.pointerValue
= va_arg(args
, void *);
5652 case CFFormatDummyPointerType
:
5653 (void)va_arg(args
, void *); // Skip the provided argument
5654 values
[argNum
].value
.pointerValue
= &dummyLocation
;
5660 /* Format the pieces together */
5662 if (NULL
== originalValues
) {
5663 originalValues
= values
;
5664 originalValuesSize
= sizeArgNum
;
5667 for (curSpec
= 0; curSpec
< numSpecs
; curSpec
++) {
5668 SInt32 width
= 0, precision
= 0;
5670 Boolean hasWidth
= false, hasPrecision
= false;
5672 // widthArgNum and widthArg are never set at the same time; same for precArg*
5673 if (-1 != specs
[curSpec
].widthArgNum
) {
5674 width
= (SInt32
)values
[specs
[curSpec
].widthArgNum
].value
.int64Value
;
5677 if (-1 != specs
[curSpec
].precArgNum
) {
5678 precision
= (SInt32
)values
[specs
[curSpec
].precArgNum
].value
.int64Value
;
5679 hasPrecision
= true;
5681 if (-1 != specs
[curSpec
].widthArg
) {
5682 width
= specs
[curSpec
].widthArg
;
5685 if (-1 != specs
[curSpec
].precArg
) {
5686 precision
= specs
[curSpec
].precArg
;
5687 hasPrecision
= true;
5690 switch (specs
[curSpec
].type
) {
5691 case CFFormatLongType
:
5692 case CFFormatDoubleType
:
5693 case CFFormatPointerType
: {
5694 char formatBuffer
[128];
5695 #if defined(__GNUC__)
5696 char buffer
[256 + width
+ precision
];
5698 char stackBuffer
[512];
5699 char *dynamicBuffer
= NULL
;
5700 char *buffer
= stackBuffer
;
5701 if (256+width
+precision
> 512) {
5702 dynamicBuffer
= (char *)CFAllocatorAllocate(kCFAllocatorSystemDefault
, 256+width
+precision
, 0);
5703 buffer
= dynamicBuffer
;
5706 SInt32 cidx
, idx
, loc
;
5707 Boolean appended
= false;
5708 loc
= specs
[curSpec
].loc
;
5709 // In preparation to call snprintf(), copy the format string out
5711 for (idx
= 0, cidx
= 0; cidx
< specs
[curSpec
].len
; idx
++, cidx
++) {
5712 if ('$' == cformat
[loc
+ cidx
]) {
5713 for (idx
--; '0' <= formatBuffer
[idx
] && formatBuffer
[idx
] <= '9'; idx
--);
5715 formatBuffer
[idx
] = cformat
[loc
+ cidx
];
5719 for (idx
= 0, cidx
= 0; cidx
< specs
[curSpec
].len
; idx
++, cidx
++) {
5720 if ('$' == uformat
[loc
+ cidx
]) {
5721 for (idx
--; '0' <= formatBuffer
[idx
] && formatBuffer
[idx
] <= '9'; idx
--);
5723 formatBuffer
[idx
] = (int8_t)uformat
[loc
+ cidx
];
5727 formatBuffer
[idx
] = '\0';
5728 // Should modify format buffer here if necessary; for example, to translate %qd to
5729 // the equivalent, on architectures which do not have %q.
5730 buffer
[sizeof(buffer
) - 1] = '\0';
5731 switch (specs
[curSpec
].type
) {
5732 case CFFormatLongType
:
5733 if (CFFormatSize8
== specs
[curSpec
].size
) {
5734 SNPRINTF(int64_t, values
[specs
[curSpec
].mainArgNum
].value
.int64Value
)
5736 SNPRINTF(SInt32
, values
[specs
[curSpec
].mainArgNum
].value
.int64Value
)
5739 case CFFormatPointerType
:
5740 case CFFormatDummyPointerType
:
5741 SNPRINTF(void *, values
[specs
[curSpec
].mainArgNum
].value
.pointerValue
)
5744 case CFFormatDoubleType
:
5745 #if LONG_DOUBLE_SUPPORT
5746 if (CFFormatSize16
== specs
[curSpec
].size
) {
5747 SNPRINTF(long double, values
[specs
[curSpec
].mainArgNum
].value
.longDoubleValue
)
5751 SNPRINTF(double, values
[specs
[curSpec
].mainArgNum
].value
.doubleValue
)
5753 // See if we need to localize the decimal point
5754 if (formatOptions
) { // We have localization info
5755 CFStringRef decimalSeparator
= (CFGetTypeID(formatOptions
) == CFLocaleGetTypeID()) ? (CFStringRef
)CFLocaleGetValue((CFLocaleRef
)formatOptions
, kCFLocaleDecimalSeparatorKey
) : (CFStringRef
)CFDictionaryGetValue(formatOptions
, CFSTR("NSDecimalSeparator"));
5756 if (decimalSeparator
!= NULL
) { // We have a decimal separator in there
5757 CFIndex decimalPointLoc
= 0;
5758 while (buffer
[decimalPointLoc
] != 0 && buffer
[decimalPointLoc
] != '.') decimalPointLoc
++;
5759 if (buffer
[decimalPointLoc
] == '.') { // And we have a decimal point in the formatted string
5760 buffer
[decimalPointLoc
] = 0;
5761 CFStringAppendCString(outputString
, (const char *)buffer
, __CFStringGetEightBitStringEncoding());
5762 CFStringAppend(outputString
, decimalSeparator
);
5763 CFStringAppendCString(outputString
, (const char *)(buffer
+ decimalPointLoc
+ 1), __CFStringGetEightBitStringEncoding());
5770 if (!appended
) CFStringAppendCString(outputString
, (const char *)buffer
, __CFStringGetEightBitStringEncoding());
5771 #if !defined(__GNUC__)
5772 if (dynamicBuffer
) {
5773 CFAllocatorDeallocate(kCFAllocatorSystemDefault
, dynamicBuffer
);
5778 case CFFormatLiteralType
:
5780 __CFStringAppendBytes(outputString
, (const char *)(cformat
+specs
[curSpec
].loc
), specs
[curSpec
].len
, __CFStringGetEightBitStringEncoding());
5782 CFStringAppendCharacters(outputString
, uformat
+specs
[curSpec
].loc
, specs
[curSpec
].len
);
5785 case CFFormatPascalCharsType
:
5786 case CFFormatCharsType
:
5787 if (values
[specs
[curSpec
].mainArgNum
].value
.pointerValue
== NULL
) {
5788 CFStringAppendCString(outputString
, "(null)", kCFStringEncodingASCII
);
5791 const char *str
= (const char *)values
[specs
[curSpec
].mainArgNum
].value
.pointerValue
;
5792 if (specs
[curSpec
].type
== CFFormatPascalCharsType
) { // Pascal string case
5793 len
= ((unsigned char *)str
)[0];
5795 if (hasPrecision
&& precision
< len
) len
= precision
;
5796 } else { // C-string case
5797 if (!hasPrecision
) { // No precision, so rely on the terminating null character
5799 } else { // Don't blindly call strlen() if there is a precision; the string might not have a terminating null (3131988)
5800 const char *terminatingNull
= (const char *)memchr(str
, 0, precision
); // Basically strlen() on only the first precision characters of str
5801 if (terminatingNull
) { // There was a null in the first precision characters
5802 len
= terminatingNull
- str
;
5808 // Since the spec says the behavior of the ' ', '0', '#', and '+' flags is undefined for
5809 // '%s', and since we have ignored them in the past, the behavior is hereby cast in stone
5810 // to ignore those flags (and, say, never pad with '0' instead of space).
5811 if (specs
[curSpec
].flags
& kCFStringFormatMinusFlag
) {
5812 __CFStringAppendBytes(outputString
, str
, len
, __CFStringGetSystemEncoding());
5813 if (hasWidth
&& width
> len
) {
5814 int w
= width
- len
; // We need this many spaces; do it ten at a time
5815 do {__CFStringAppendBytes(outputString
, " ", (w
> 10 ? 10 : w
), kCFStringEncodingASCII
);} while ((w
-= 10) > 0);
5818 if (hasWidth
&& width
> len
) {
5819 int w
= width
- len
; // We need this many spaces; do it ten at a time
5820 do {__CFStringAppendBytes(outputString
, " ", (w
> 10 ? 10 : w
), kCFStringEncodingASCII
);} while ((w
-= 10) > 0);
5822 __CFStringAppendBytes(outputString
, str
, len
, __CFStringGetSystemEncoding());
5826 case CFFormatSingleUnicharType
:
5827 ch
= (UniChar
)values
[specs
[curSpec
].mainArgNum
].value
.int64Value
;
5828 CFStringAppendCharacters(outputString
, &ch
, 1);
5830 case CFFormatUnicharsType
:
5831 //??? need to handle width, precision, and padding arguments
5832 up
= (UniChar
*)values
[specs
[curSpec
].mainArgNum
].value
.pointerValue
;
5834 CFStringAppendCString(outputString
, "(null)", kCFStringEncodingASCII
);
5837 for (len
= 0; 0 != up
[len
]; len
++);
5838 // Since the spec says the behavior of the ' ', '0', '#', and '+' flags is undefined for
5839 // '%s', and since we have ignored them in the past, the behavior is hereby cast in stone
5840 // to ignore those flags (and, say, never pad with '0' instead of space).
5841 if (hasPrecision
&& precision
< len
) len
= precision
;
5842 if (specs
[curSpec
].flags
& kCFStringFormatMinusFlag
) {
5843 CFStringAppendCharacters(outputString
, up
, len
);
5844 if (hasWidth
&& width
> len
) {
5845 int w
= width
- len
; // We need this many spaces; do it ten at a time
5846 do {__CFStringAppendBytes(outputString
, " ", (w
> 10 ? 10 : w
), kCFStringEncodingASCII
);} while ((w
-= 10) > 0);
5849 if (hasWidth
&& width
> len
) {
5850 int w
= width
- len
; // We need this many spaces; do it ten at a time
5851 do {__CFStringAppendBytes(outputString
, " ", (w
> 10 ? 10 : w
), kCFStringEncodingASCII
);} while ((w
-= 10) > 0);
5853 CFStringAppendCharacters(outputString
, up
, len
);
5857 case CFFormatCFType
:
5858 case CFFormatObjectType
:
5859 if (specs
[curSpec
].configDictIndex
!= -1) { // config dict
5860 CFTypeRef object
= NULL
;
5861 CFStringRef innerFormat
= NULL
;
5863 switch (values
[specs
[curSpec
].mainArgNum
].type
) {
5864 case CFFormatLongType
:
5865 object
= CFNumberCreate(tmpAlloc
, kCFNumberSInt64Type
, &(values
[specs
[curSpec
].mainArgNum
].value
.int64Value
));
5868 case CFFormatDoubleType
:
5869 #if LONG_DOUBLE_SUPPORT
5870 if (CFFormatSize16
== values
[specs
[curSpec
].mainArgNum
].size
) {
5871 double aValue
= values
[specs
[curSpec
].mainArgNum
].value
.longDoubleValue
; // losing precision
5873 object
= CFNumberCreate(tmpAlloc
, kCFNumberDoubleType
, &aValue
);
5877 object
= CFNumberCreate(tmpAlloc
, kCFNumberDoubleType
, &(values
[specs
[curSpec
].mainArgNum
].value
.doubleValue
));
5881 case CFFormatPointerType
:
5882 object
= CFNumberCreate(tmpAlloc
, kCFNumberCFIndexType
, &(values
[specs
[curSpec
].mainArgNum
].value
.pointerValue
));
5885 case CFFormatPascalCharsType
:
5886 case CFFormatCharsType
:
5887 if (NULL
!= values
[specs
[curSpec
].mainArgNum
].value
.pointerValue
) {
5888 CFMutableStringRef aString
= CFStringCreateMutable(tmpAlloc
, 0);
5890 const char *str
= (const char *)values
[specs
[curSpec
].mainArgNum
].value
.pointerValue
;
5891 if (specs
[curSpec
].type
== CFFormatPascalCharsType
) { // Pascal string case
5892 len
= ((unsigned char *)str
)[0];
5894 if (hasPrecision
&& precision
< len
) len
= precision
;
5895 } else { // C-string case
5896 if (!hasPrecision
) { // No precision, so rely on the terminating null character
5898 } else { // Don't blindly call strlen() if there is a precision; the string might not have a terminating null (3131988)
5899 const char *terminatingNull
= (const char *)memchr(str
, 0, precision
); // Basically strlen() on only the first precision characters of str
5900 if (terminatingNull
) { // There was a null in the first precision characters
5901 len
= terminatingNull
- str
;
5907 // Since the spec says the behavior of the ' ', '0', '#', and '+' flags is undefined for
5908 // '%s', and since we have ignored them in the past, the behavior is hereby cast in stone
5909 // to ignore those flags (and, say, never pad with '0' instead of space).
5910 if (specs
[curSpec
].flags
& kCFStringFormatMinusFlag
) {
5911 __CFStringAppendBytes(aString
, str
, len
, __CFStringGetSystemEncoding());
5912 if (hasWidth
&& width
> len
) {
5913 int w
= width
- len
; // We need this many spaces; do it ten at a time
5914 do {__CFStringAppendBytes(aString
, " ", (w
> 10 ? 10 : w
), kCFStringEncodingASCII
);} while ((w
-= 10) > 0);
5917 if (hasWidth
&& width
> len
) {
5918 int w
= width
- len
; // We need this many spaces; do it ten at a time
5919 do {__CFStringAppendBytes(aString
, " ", (w
> 10 ? 10 : w
), kCFStringEncodingASCII
);} while ((w
-= 10) > 0);
5921 __CFStringAppendBytes(aString
, str
, len
, __CFStringGetSystemEncoding());
5928 case CFFormatSingleUnicharType
:
5929 ch
= (UniChar
)values
[specs
[curSpec
].mainArgNum
].value
.int64Value
;
5930 object
= CFStringCreateWithCharactersNoCopy(tmpAlloc
, &ch
, 1, kCFAllocatorNull
);
5933 case CFFormatUnicharsType
:
5934 //??? need to handle width, precision, and padding arguments
5935 up
= (UniChar
*)values
[specs
[curSpec
].mainArgNum
].value
.pointerValue
;
5937 CFMutableStringRef aString
= CFStringCreateMutable(tmpAlloc
, 0);
5939 for (len
= 0; 0 != up
[len
]; len
++);
5940 // Since the spec says the behavior of the ' ', '0', '#', and '+' flags is undefined for
5941 // '%s', and since we have ignored them in the past, the behavior is hereby cast in stone
5942 // to ignore those flags (and, say, never pad with '0' instead of space).
5943 if (hasPrecision
&& precision
< len
) len
= precision
;
5944 if (specs
[curSpec
].flags
& kCFStringFormatMinusFlag
) {
5945 CFStringAppendCharacters(aString
, up
, len
);
5946 if (hasWidth
&& width
> len
) {
5947 int w
= width
- len
; // We need this many spaces; do it ten at a time
5948 do {__CFStringAppendBytes(aString
, " ", (w
> 10 ? 10 : w
), kCFStringEncodingASCII
);} while ((w
-= 10) > 0);
5951 if (hasWidth
&& width
> len
) {
5952 int w
= width
- len
; // We need this many spaces; do it ten at a time
5953 do {__CFStringAppendBytes(aString
, " ", (w
> 10 ? 10 : w
), kCFStringEncodingASCII
);} while ((w
-= 10) > 0);
5955 CFStringAppendCharacters(aString
, up
, len
);
5961 case CFFormatCFType
:
5962 case CFFormatObjectType
:
5963 if (NULL
!= values
[specs
[curSpec
].mainArgNum
].value
.pointerValue
) object
= CFRetain(values
[specs
[curSpec
].mainArgNum
].value
.pointerValue
);
5967 if (NULL
!= object
) CFRelease(object
);
5969 } else if (NULL
!= values
[specs
[curSpec
].mainArgNum
].value
.pointerValue
) {
5970 CFStringRef str
= NULL
;
5972 str
= copyDescFunc(values
[specs
[curSpec
].mainArgNum
].value
.pointerValue
, formatOptions
);
5974 str
= __CFCopyFormattingDescription(values
[specs
[curSpec
].mainArgNum
].value
.pointerValue
, formatOptions
);
5976 str
= CFCopyDescription(values
[specs
[curSpec
].mainArgNum
].value
.pointerValue
);
5980 CFStringAppend(outputString
, str
);
5983 CFStringAppendCString(outputString
, "(null description)", kCFStringEncodingASCII
);
5986 CFStringAppendCString(outputString
, "(null)", kCFStringEncodingASCII
);
5992 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
5993 // va_copy is a C99 extension. No support on Windows
5994 if (numConfigs
> 0) va_end(copiedArgs
);
5995 #endif /* DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD */
5996 if (specs
!= localSpecsBuffer
) CFAllocatorDeallocate(tmpAlloc
, specs
);
5997 if (values
!= localValuesBuffer
) CFAllocatorDeallocate(tmpAlloc
, values
);
5998 if (formatChars
&& (formatChars
!= localFormatBuffer
)) CFAllocatorDeallocate(tmpAlloc
, formatChars
);
5999 if (configs
!= localConfigs
) CFAllocatorDeallocate(tmpAlloc
, configs
);
6004 void CFShowStr(CFStringRef str
) {
6005 CFAllocatorRef alloc
;
6008 fprintf(stdout
, "(null)\n");
6012 if (CF_IS_OBJC(__kCFStringTypeID
, str
)) {
6013 fprintf(stdout
, "This is an NSString, not CFString\n");
6017 alloc
= CFGetAllocator(str
);
6019 fprintf(stdout
, "\nLength %d\nIsEightBit %d\n", (int)__CFStrLength(str
), __CFStrIsEightBit(str
));
6020 fprintf(stdout
, "HasLengthByte %d\nHasNullByte %d\nInlineContents %d\n",
6021 __CFStrHasLengthByte(str
), __CFStrHasNullByte(str
), __CFStrIsInline(str
));
6023 fprintf(stdout
, "Allocator ");
6024 if (alloc
!= kCFAllocatorSystemDefault
) {
6025 fprintf(stdout
, "%p\n", (void *)alloc
);
6027 fprintf(stdout
, "SystemDefault\n");
6029 fprintf(stdout
, "Mutable %d\n", __CFStrIsMutable(str
));
6030 if (!__CFStrIsMutable(str
) && __CFStrHasContentsDeallocator(str
)) {
6031 if (__CFStrContentsDeallocator(str
)) fprintf(stdout
, "ContentsDeallocatorFunc %p\n", (void *)__CFStrContentsDeallocator(str
));
6032 else fprintf(stdout
, "ContentsDeallocatorFunc None\n");
6033 } else if (__CFStrIsMutable(str
) && __CFStrHasContentsAllocator(str
)) {
6034 fprintf(stdout
, "ExternalContentsAllocator %p\n", (void *)__CFStrContentsAllocator((CFMutableStringRef
)str
));
6037 if (__CFStrIsMutable(str
)) {
6038 fprintf(stdout
, "CurrentCapacity %d\n%sCapacity %d\n", (int)__CFStrCapacity(str
), __CFStrIsFixed(str
) ? "Fixed" : "Desired", (int)__CFStrDesiredCapacity(str
));
6040 fprintf(stdout
, "Contents %p\n", (void *)__CFStrContents(str
));
6049 #undef HANGUL_SCOUNT
6050 #undef HANGUL_LCOUNT
6051 #undef HANGUL_VCOUNT
6052 #undef HANGUL_TCOUNT
6053 #undef HANGUL_NCOUNT