2 * Copyright (c) 2008 Apple Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
24 Copyright 1998-2002, Apple, Inc. All rights reserved.
25 Responsibility: Ali Ozer
27 !!! For performance reasons, it's important that all functions marked CF_INLINE in this file are inlined.
30 #include <CoreFoundation/CFBase.h>
31 #include <CoreFoundation/CFString.h>
32 #include <CoreFoundation/CFDictionary.h>
33 #include "CFStringEncodingConverterExt.h"
34 #include "CFUniChar.h"
35 #include "CFUnicodeDecomposition.h"
36 #include "CFUnicodePrecomposition.h"
38 #include "CFInternal.h"
42 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
47 #define LONG_DOUBLE_SUPPORT 1
49 #define LONG_DOUBLE_SUPPORT 0
54 #define USE_STRING_ROM 0
57 #ifndef INSTRUMENT_SHARED_STRINGS
58 #define INSTRUMENT_SHARED_STRINGS 0
62 __private_extern__ CFStringRef __kCFLocaleCollatorID
;
64 #if INSTRUMENT_SHARED_STRINGS
65 #include <sys/stat.h> /* for umask() */
67 static void __CFRecordStringAllocationEvent(const char *encoding
, const char *bytes
, CFIndex byteCount
) {
68 static CFSpinLock_t lock
= CFSpinLockInit
;
70 if (memchr(bytes
, '\n', byteCount
)) return; //never record string allocation events for strings with newlines, because those confuse our parser and because they'll never go into the ROM
75 extern char **_NSGetProgname(void);
76 const char *name
= *_NSGetProgname();
77 if (! name
) name
= "UNKNOWN";
80 sprintf(path
, "/tmp/CFSharedStringInstrumentation_%s_%d.txt", name
, getpid());
81 fd
= open(path
, O_WRONLY
| O_APPEND
| O_CREAT
, 0666);
84 const char *errString
= strerror(error
);
85 fprintf(stderr
, "open() failed with error %d (%s)\n", error
, errString
);
90 char formatString
[256];
91 sprintf(formatString
, "%%-8d\t%%-16s\t%%.%lds\n", byteCount
);
92 int resultCount
= asprintf(&buffer
, formatString
, getpid(), encoding
, bytes
);
93 if (buffer
&& resultCount
> 0) write(fd
, buffer
, resultCount
);
94 else puts("Couldn't record allocation event");
97 __CFSpinUnlock(&lock
);
99 #endif //INSTRUMENT_SHARED_STRINGS
103 typedef Boolean (*UNI_CHAR_FUNC
)(UInt32 flags
, UInt8 ch
, UniChar
*unicodeChar
);
105 #if DEPLOYMENT_TARGET_MACOSX
106 extern size_t malloc_good_size(size_t size
);
108 extern void __CFStrConvertBytesToUnicode(const uint8_t *bytes
, UniChar
*buffer
, CFIndex numChars
);
112 // We put this into C & Pascal strings if we can't convert
113 #define CONVERSIONFAILURESTR "CFString conversion failed"
115 // We set this to true when purging the constant string table, so CFStringDeallocate doesn't assert
116 static Boolean __CFConstantStringTableBeingFreed
= false;
122 // This section is for CFString compatibility and other behaviors...
124 static CFOptionFlags _CFStringCompatibilityMask
= 0;
128 void _CFStringSetCompatibility(CFOptionFlags mask
) {
129 _CFStringCompatibilityMask
|= mask
;
132 CF_INLINE Boolean
__CFStringGetCompatibility(CFOptionFlags mask
) {
133 return (_CFStringCompatibilityMask
& mask
) == mask
;
138 // Two constant strings used by CFString; these are initialized in CFStringInitialize
139 CONST_STRING_DECL(kCFEmptyString
, "")
141 // This is separate for C++
142 struct __notInlineMutable
{
145 CFIndex capacity
; // Capacity in bytes
146 unsigned int hasGap
:1; // Currently unused
147 unsigned int isFixedCapacity
:1;
148 unsigned int isExternalMutable
:1;
149 unsigned int capacityProvidedExternally
:1;
151 unsigned long desiredCapacity
:60;
153 unsigned long desiredCapacity
:28;
155 CFAllocatorRef contentsAllocator
; // Optional
156 }; // The only mutable variant for CFString
159 /* !!! Never do sizeof(CFString); the union is here just to make it easier to access some fields.
163 union { // In many cases the allocated structs are smaller than these
166 } inline1
; // Bytes follow the length
167 struct __notInlineImmutable1
{
168 void *buffer
; // Note that the buffer is in the same place for all non-inline variants of CFString
170 CFAllocatorRef contentsDeallocator
; // Optional; just the dealloc func is used
171 } notInlineImmutable1
; // This is the usual not-inline immutable CFString
172 struct __notInlineImmutable2
{
174 CFAllocatorRef contentsDeallocator
; // Optional; just the dealloc func is used
175 } notInlineImmutable2
; // This is the not-inline immutable CFString when length is stored with the contents (first byte)
176 struct __notInlineMutable notInlineMutable
;
182 E = not inline contents
186 D = explicit deallocator for contents (for mutable objects, allocator)
187 C = length field is CFIndex (rather than UInt32); only meaningful for 64-bit, really
188 if needed this bit (valuable real-estate) can be given up for another bit elsewhere, since this info is needed just for 64-bit
190 Also need (only for mutable)
193 Cap, DesCap = capacity
195 B7 B6 B5 B4 B3 B2 B1 B0
200 0 1 E (freed with default allocator)
204 !!! Note: Constant CFStrings use the bit patterns:
205 C8 (11001000 = default allocator, not inline, not freed contents; 8-bit; has NULL byte; doesn't have length; is immutable)
206 D0 (11010000 = default allocator, not inline, not freed contents; Unicode; is immutable)
207 The bit usages should not be modified in a way that would effect these bit patterns.
211 __kCFFreeContentsWhenDoneMask
= 0x020,
212 __kCFFreeContentsWhenDone
= 0x020,
213 __kCFContentsMask
= 0x060,
214 __kCFHasInlineContents
= 0x000,
215 __kCFNotInlineContentsNoFree
= 0x040, // Don't free
216 __kCFNotInlineContentsDefaultFree
= 0x020, // Use allocator's free function
217 __kCFNotInlineContentsCustomFree
= 0x060, // Use a specially provided free function
218 __kCFHasContentsAllocatorMask
= 0x060,
219 __kCFHasContentsAllocator
= 0x060, // (For mutable strings) use a specially provided allocator
220 __kCFHasContentsDeallocatorMask
= 0x060,
221 __kCFHasContentsDeallocator
= 0x060,
222 __kCFIsMutableMask
= 0x01,
223 __kCFIsMutable
= 0x01,
224 __kCFIsUnicodeMask
= 0x10,
225 __kCFIsUnicode
= 0x10,
226 __kCFHasNullByteMask
= 0x08,
227 __kCFHasNullByte
= 0x08,
228 __kCFHasLengthByteMask
= 0x04,
229 __kCFHasLengthByte
= 0x04,
230 // !!! Bit 0x02 has been freed up
235 // Mutable strings are not inline
236 // Compile-time constant strings are not inline
237 // Mutable strings always have explicit length (but they might also have length byte and null byte)
238 // If there is an explicit length, always use that instead of the length byte (length byte is useful for quickly returning pascal strings)
239 // Never look at the length byte for the length; use __CFStrLength or __CFStrLength2
241 /* The following set of functions and macros need to be updated on change to the bit configuration
243 CF_INLINE Boolean
__CFStrIsMutable(CFStringRef str
) {return (str
->base
._cfinfo
[CF_INFO_BITS
] & __kCFIsMutableMask
) == __kCFIsMutable
;}
244 CF_INLINE Boolean
__CFStrIsInline(CFStringRef str
) {return (str
->base
._cfinfo
[CF_INFO_BITS
] & __kCFContentsMask
) == __kCFHasInlineContents
;}
245 CF_INLINE Boolean
__CFStrFreeContentsWhenDone(CFStringRef str
) {return (str
->base
._cfinfo
[CF_INFO_BITS
] & __kCFFreeContentsWhenDoneMask
) == __kCFFreeContentsWhenDone
;}
246 CF_INLINE Boolean
__CFStrHasContentsDeallocator(CFStringRef str
) {return (str
->base
._cfinfo
[CF_INFO_BITS
] & __kCFHasContentsDeallocatorMask
) == __kCFHasContentsDeallocator
;}
247 CF_INLINE Boolean
__CFStrIsUnicode(CFStringRef str
) {return (str
->base
._cfinfo
[CF_INFO_BITS
] & __kCFIsUnicodeMask
) == __kCFIsUnicode
;}
248 CF_INLINE Boolean
__CFStrIsEightBit(CFStringRef str
) {return (str
->base
._cfinfo
[CF_INFO_BITS
] & __kCFIsUnicodeMask
) != __kCFIsUnicode
;}
249 CF_INLINE Boolean
__CFStrHasNullByte(CFStringRef str
) {return (str
->base
._cfinfo
[CF_INFO_BITS
] & __kCFHasNullByteMask
) == __kCFHasNullByte
;}
250 CF_INLINE Boolean
__CFStrHasLengthByte(CFStringRef str
) {return (str
->base
._cfinfo
[CF_INFO_BITS
] & __kCFHasLengthByteMask
) == __kCFHasLengthByte
;}
251 CF_INLINE Boolean
__CFStrHasExplicitLength(CFStringRef str
) {return (str
->base
._cfinfo
[CF_INFO_BITS
] & (__kCFIsMutableMask
| __kCFHasLengthByteMask
)) != __kCFHasLengthByte
;} // Has explicit length if (1) mutable or (2) not mutable and no length byte
252 CF_INLINE Boolean
__CFStrIsConstant(CFStringRef str
) {
254 return str
->base
._rc
== 0;
256 return (str
->base
._cfinfo
[CF_RC_BITS
]) == 0;
260 CF_INLINE SInt32
__CFStrSkipAnyLengthByte(CFStringRef str
) {return ((str
->base
._cfinfo
[CF_INFO_BITS
] & __kCFHasLengthByteMask
) == __kCFHasLengthByte
) ? 1 : 0;} // Number of bytes to skip over the length byte in the contents
262 /* Returns ptr to the buffer (which might include the length byte)
264 CF_INLINE
const void *__CFStrContents(CFStringRef str
) {
265 if (__CFStrIsInline(str
)) {
266 return (const void *)(((uintptr_t)&(str
->variants
)) + (__CFStrHasExplicitLength(str
) ? sizeof(CFIndex
) : 0));
267 } else { // Not inline; pointer is always word 2
268 return str
->variants
.notInlineImmutable1
.buffer
;
272 static CFAllocatorRef
*__CFStrContentsDeallocatorPtr(CFStringRef str
) {
273 return __CFStrHasExplicitLength(str
) ? &(((CFMutableStringRef
)str
)->variants
.notInlineImmutable1
.contentsDeallocator
) : &(((CFMutableStringRef
)str
)->variants
.notInlineImmutable2
.contentsDeallocator
); }
275 // Assumption: Called with immutable strings only, and on strings that are known to have a contentsDeallocator
276 CF_INLINE CFAllocatorRef
__CFStrContentsDeallocator(CFStringRef str
) {
277 return *__CFStrContentsDeallocatorPtr(str
);
280 // Assumption: Called with immutable strings only, and on strings that are known to have a contentsDeallocator
281 CF_INLINE
void __CFStrSetContentsDeallocator(CFStringRef str
, CFAllocatorRef contentsAllocator
) {
282 *__CFStrContentsDeallocatorPtr(str
) = contentsAllocator
;
285 static CFAllocatorRef
*__CFStrContentsAllocatorPtr(CFStringRef str
) {
286 CFAssert(!__CFStrIsInline(str
), __kCFLogAssertion
, "Asking for contents allocator of inline string");
287 CFAssert(__CFStrIsMutable(str
), __kCFLogAssertion
, "Asking for contents allocator of an immutable string");
288 return (CFAllocatorRef
*)&(str
->variants
.notInlineMutable
.contentsAllocator
);
291 // Assumption: Called with strings that have a contents allocator; also, contents allocator follows custom
292 CF_INLINE CFAllocatorRef
__CFStrContentsAllocator(CFMutableStringRef str
) {
293 return *(__CFStrContentsAllocatorPtr(str
));
296 // Assumption: Called with strings that have a contents allocator; also, contents allocator follows custom
297 CF_INLINE
void __CFStrSetContentsAllocator(CFMutableStringRef str
, CFAllocatorRef alloc
) {
298 *(__CFStrContentsAllocatorPtr(str
)) = alloc
;
301 /* Returns length; use __CFStrLength2 if contents buffer pointer has already been computed.
303 CF_INLINE CFIndex
__CFStrLength(CFStringRef str
) {
304 if (__CFStrHasExplicitLength(str
)) {
305 if (__CFStrIsInline(str
)) {
306 return str
->variants
.inline1
.length
;
308 return str
->variants
.notInlineImmutable1
.length
;
311 return (CFIndex
)(*((uint8_t *)__CFStrContents(str
)));
315 CF_INLINE CFIndex
__CFStrLength2(CFStringRef str
, const void *buffer
) {
316 if (__CFStrHasExplicitLength(str
)) {
317 if (__CFStrIsInline(str
)) {
318 return str
->variants
.inline1
.length
;
320 return str
->variants
.notInlineImmutable1
.length
;
323 return (CFIndex
)(*((uint8_t *)buffer
));
328 Boolean
__CFStringIsEightBit(CFStringRef str
) {
329 return __CFStrIsEightBit(str
);
332 /* Sets the content pointer for immutable or mutable strings.
334 CF_INLINE
void __CFStrSetContentPtr(CFStringRef str
, const void *p
) {
335 // XXX_PCB catch all writes for mutable string case.
336 CF_WRITE_BARRIER_BASE_ASSIGN(__CFGetAllocator(str
), str
, ((CFMutableStringRef
)str
)->variants
.notInlineImmutable1
.buffer
, (void *)p
);
338 CF_INLINE
void __CFStrSetInfoBits(CFStringRef str
, UInt32 v
) {__CFBitfieldSetValue(((CFMutableStringRef
)str
)->base
._cfinfo
[CF_INFO_BITS
], 6, 0, v
);}
340 CF_INLINE
void __CFStrSetExplicitLength(CFStringRef str
, CFIndex v
) {
341 if (__CFStrIsInline(str
)) {
342 ((CFMutableStringRef
)str
)->variants
.inline1
.length
= v
;
344 ((CFMutableStringRef
)str
)->variants
.notInlineImmutable1
.length
= v
;
348 CF_INLINE
void __CFStrSetUnicode(CFMutableStringRef str
) {str
->base
._cfinfo
[CF_INFO_BITS
] |= __kCFIsUnicode
;}
349 CF_INLINE
void __CFStrClearUnicode(CFMutableStringRef str
) {str
->base
._cfinfo
[CF_INFO_BITS
] &= ~__kCFIsUnicode
;}
350 CF_INLINE
void __CFStrSetHasLengthAndNullBytes(CFMutableStringRef str
) {str
->base
._cfinfo
[CF_INFO_BITS
] |= (__kCFHasLengthByte
| __kCFHasNullByte
);}
351 CF_INLINE
void __CFStrClearHasLengthAndNullBytes(CFMutableStringRef str
) {str
->base
._cfinfo
[CF_INFO_BITS
] &= ~(__kCFHasLengthByte
| __kCFHasNullByte
);}
354 // Assumption: The following set of inlines (using str->variants.notInlineMutable) are called with mutable strings only
355 CF_INLINE Boolean
__CFStrIsFixed(CFStringRef str
) {return str
->variants
.notInlineMutable
.isFixedCapacity
;}
356 CF_INLINE Boolean
__CFStrIsExternalMutable(CFStringRef str
) {return str
->variants
.notInlineMutable
.isExternalMutable
;}
357 CF_INLINE Boolean
__CFStrHasContentsAllocator(CFStringRef str
) {return (str
->base
._cfinfo
[CF_INFO_BITS
] & __kCFHasContentsAllocatorMask
) == __kCFHasContentsAllocator
;}
358 CF_INLINE
void __CFStrSetIsFixed(CFMutableStringRef str
) {str
->variants
.notInlineMutable
.isFixedCapacity
= 1;}
359 CF_INLINE
void __CFStrSetIsExternalMutable(CFMutableStringRef str
) {str
->variants
.notInlineMutable
.isExternalMutable
= 1;}
360 CF_INLINE
void __CFStrSetHasGap(CFMutableStringRef str
) {str
->variants
.notInlineMutable
.hasGap
= 1;}
362 // If capacity is provided externally, we only change it when we need to grow beyond it
363 CF_INLINE Boolean
__CFStrCapacityProvidedExternally(CFStringRef str
) {return str
->variants
.notInlineMutable
.capacityProvidedExternally
;}
364 CF_INLINE
void __CFStrSetCapacityProvidedExternally(CFMutableStringRef str
) {str
->variants
.notInlineMutable
.capacityProvidedExternally
= 1;}
365 CF_INLINE
void __CFStrClearCapacityProvidedExternally(CFMutableStringRef str
) {str
->variants
.notInlineMutable
.capacityProvidedExternally
= 0;}
367 // "Capacity" is stored in number of bytes, not characters. It indicates the total number of bytes in the contents buffer.
368 CF_INLINE CFIndex
__CFStrCapacity(CFStringRef str
) {return str
->variants
.notInlineMutable
.capacity
;}
369 CF_INLINE
void __CFStrSetCapacity(CFMutableStringRef str
, CFIndex cap
) {str
->variants
.notInlineMutable
.capacity
= cap
;}
371 // "Desired capacity" is in number of characters; it is the client requested capacity; if fixed, it is the upper bound on the mutable string backing store.
372 CF_INLINE CFIndex
__CFStrDesiredCapacity(CFStringRef str
) {return str
->variants
.notInlineMutable
.desiredCapacity
;}
373 CF_INLINE
void __CFStrSetDesiredCapacity(CFMutableStringRef str
, CFIndex size
) {str
->variants
.notInlineMutable
.desiredCapacity
= size
;}
376 static void *__CFStrAllocateMutableContents(CFMutableStringRef str
, CFIndex size
) {
378 CFAllocatorRef alloc
= (__CFStrHasContentsAllocator(str
)) ? __CFStrContentsAllocator(str
) : __CFGetAllocator(str
);
379 ptr
= CFAllocatorAllocate(alloc
, size
, 0);
380 if (__CFOASafe
) __CFSetLastAllocationEventName(ptr
, "CFString (store)");
384 static void __CFStrDeallocateMutableContents(CFMutableStringRef str
, void *buffer
) {
385 CFAllocatorRef alloc
= (__CFStrHasContentsAllocator(str
)) ? __CFStrContentsAllocator(str
) : __CFGetAllocator(str
);
386 if (CF_IS_COLLECTABLE_ALLOCATOR(alloc
)) {
387 // GC: for finalization safety, let collector reclaim the buffer in the next GC cycle.
388 auto_zone_release(__CFCollectableZone
, buffer
);
390 CFAllocatorDeallocate(alloc
, buffer
);
397 /* CFString specific init flags
398 Note that you cannot count on the external buffer not being copied.
399 Also, if you specify an external buffer, you should not change it behind the CFString's back.
402 __kCFThinUnicodeIfPossible
= 0x1000000, /* See if the Unicode contents can be thinned down to 8-bit */
403 kCFStringPascal
= 0x10000, /* Indicating that the string data has a Pascal string structure (length byte at start) */
404 kCFStringNoCopyProvidedContents
= 0x20000, /* Don't copy the provided string contents if possible; free it when no longer needed */
405 kCFStringNoCopyNoFreeProvidedContents
= 0x30000 /* Don't copy the provided string contents if possible; don't free it when no longer needed */
410 static CFStringEncoding __CFDefaultSystemEncoding
= kCFStringEncodingInvalidId
;
411 static CFStringEncoding __CFDefaultFileSystemEncoding
= kCFStringEncodingInvalidId
;
412 CFStringEncoding __CFDefaultEightBitStringEncoding
= kCFStringEncodingInvalidId
;
414 CFStringEncoding
CFStringGetSystemEncoding(void) {
416 if (__CFDefaultSystemEncoding
== kCFStringEncodingInvalidId
) {
417 const CFStringEncodingConverter
*converter
= NULL
;
418 #if DEPLOYMENT_TARGET_MACOSX
419 __CFDefaultSystemEncoding
= kCFStringEncodingMacRoman
; // MacRoman is built-in so always available
420 #elif defined(__WIN32__)
421 __CFDefaultSystemEncoding
= kCFStringEncodingWindowsLatin1
; // WinLatin1 is built-in so always available
422 #elif DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
423 __CFDefaultSystemEncoding
= kCFStringEncodingISOLatin1
; // a reasonable default
424 #else // Solaris && HP-UX ?
425 __CFDefaultSystemEncoding
= kCFStringEncodingISOLatin1
; // a reasonable default
427 converter
= CFStringEncodingGetConverter(__CFDefaultSystemEncoding
);
429 __CFSetCharToUniCharFunc(converter
->encodingClass
== kCFStringEncodingConverterCheapEightBit
? (UNI_CHAR_FUNC
)converter
->toUnicode
: NULL
);
432 return __CFDefaultSystemEncoding
;
435 // Fast version for internal use
437 CF_INLINE CFStringEncoding
__CFStringGetSystemEncoding(void) {
438 if (__CFDefaultSystemEncoding
== kCFStringEncodingInvalidId
) (void)CFStringGetSystemEncoding();
439 return __CFDefaultSystemEncoding
;
442 CFStringEncoding
CFStringFileSystemEncoding(void) {
443 if (__CFDefaultFileSystemEncoding
== kCFStringEncodingInvalidId
) {
444 #if DEPLOYMENT_TARGET_MACOSX
445 __CFDefaultFileSystemEncoding
= kCFStringEncodingUTF8
;
447 __CFDefaultFileSystemEncoding
= CFStringGetSystemEncoding();
451 return __CFDefaultFileSystemEncoding
;
454 /* ??? Is returning length when no other answer is available the right thing?
456 CFIndex
CFStringGetMaximumSizeForEncoding(CFIndex length
, CFStringEncoding encoding
) {
457 if (encoding
== kCFStringEncodingUTF8
) {
458 return _CFExecutableLinkedOnOrAfter(CFSystemVersionPanther
) ? (length
* 3) : (length
* 6); // 1 Unichar could expand to 3 bytes; we return 6 for older apps for compatibility
459 } else if ((encoding
== kCFStringEncodingUTF32
) || (encoding
== kCFStringEncodingUTF32BE
) || (encoding
== kCFStringEncodingUTF32LE
)) { // UTF-32
460 return length
* sizeof(UTF32Char
);
462 encoding
&= 0xFFF; // Mask off non-base part
465 case kCFStringEncodingUnicode
:
466 return length
* sizeof(UniChar
);
468 case kCFStringEncodingNonLossyASCII
:
469 return length
* 6; // 1 Unichar could expand to 6 bytes
471 case kCFStringEncodingMacRoman
:
472 case kCFStringEncodingWindowsLatin1
:
473 case kCFStringEncodingISOLatin1
:
474 case kCFStringEncodingNextStepLatin
:
475 case kCFStringEncodingASCII
:
476 return length
/ sizeof(uint8_t);
479 return length
/ sizeof(uint8_t);
484 /* Returns whether the indicated encoding can be stored in 8-bit chars
486 CF_INLINE Boolean
__CFStrEncodingCanBeStoredInEightBit(CFStringEncoding encoding
) {
487 switch (encoding
& 0xFFF) { // just use encoding base
488 case kCFStringEncodingInvalidId
:
489 case kCFStringEncodingUnicode
:
490 case kCFStringEncodingNonLossyASCII
:
493 case kCFStringEncodingMacRoman
:
494 case kCFStringEncodingWindowsLatin1
:
495 case kCFStringEncodingISOLatin1
:
496 case kCFStringEncodingNextStepLatin
:
497 case kCFStringEncodingASCII
:
500 default: return false;
504 /* Returns the encoding used in eight bit CFStrings (can't be any encoding which isn't 1-to-1 with Unicode)
505 ??? Perhaps only ASCII fits the bill due to Unicode decomposition.
507 CFStringEncoding
__CFStringComputeEightBitStringEncoding(void) {
508 if (__CFDefaultEightBitStringEncoding
== kCFStringEncodingInvalidId
) {
509 CFStringEncoding systemEncoding
= CFStringGetSystemEncoding();
510 if (systemEncoding
== kCFStringEncodingInvalidId
) { // We're right in the middle of querying system encoding from default database. Delaying to set until system encoding is determined.
511 return kCFStringEncodingASCII
;
512 } else if (__CFStrEncodingCanBeStoredInEightBit(systemEncoding
)) {
513 __CFDefaultEightBitStringEncoding
= systemEncoding
;
515 __CFDefaultEightBitStringEncoding
= kCFStringEncodingASCII
;
519 return __CFDefaultEightBitStringEncoding
;
522 /* Returns whether the provided bytes can be stored in ASCII
524 CF_INLINE Boolean
__CFBytesInASCII(const uint8_t *bytes
, CFIndex len
) {
525 while (len
--) if ((uint8_t)(*bytes
++) >= 128) return false;
529 /* Returns whether the provided 8-bit string in the specified encoding can be stored in an 8-bit CFString.
531 CF_INLINE Boolean
__CFCanUseEightBitCFStringForBytes(const uint8_t *bytes
, CFIndex len
, CFStringEncoding encoding
) {
532 if (encoding
== __CFStringGetEightBitStringEncoding()) return true;
533 if (__CFStringEncodingIsSupersetOfASCII(encoding
) && __CFBytesInASCII(bytes
, len
)) return true;
538 /* Returns whether a length byte can be tacked on to a string of the indicated length.
540 CF_INLINE Boolean
__CFCanUseLengthByte(CFIndex len
) {
541 #define __kCFMaxPascalStrLen 255
542 return (len
<= __kCFMaxPascalStrLen
) ? true : false;
545 /* Various string assertions
547 #define __CFAssertIsString(cf) __CFGenericValidateType(cf, __kCFStringTypeID)
548 #define __CFAssertIndexIsInStringBounds(cf, idx) CFAssert3((idx) >= 0 && (idx) < __CFStrLength(cf), __kCFLogAssertion, "%s(): string index %d out of bounds (length %d)", __PRETTY_FUNCTION__, idx, __CFStrLength(cf))
549 #define __CFAssertRangeIsInStringBounds(cf, idx, count) CFAssert4((idx) >= 0 && (idx + count) <= __CFStrLength(cf), __kCFLogAssertion, "%s(): string range %d,%d out of bounds (length %d)", __PRETTY_FUNCTION__, idx, count, __CFStrLength(cf))
550 #define __CFAssertIsStringAndMutable(cf) {__CFGenericValidateType(cf, __kCFStringTypeID); CFAssert1(__CFStrIsMutable(cf), __kCFLogAssertion, "%s(): string not mutable", __PRETTY_FUNCTION__);}
551 #define __CFAssertIsStringAndExternalMutable(cf) {__CFGenericValidateType(cf, __kCFStringTypeID); CFAssert1(__CFStrIsMutable(cf) && __CFStrIsExternalMutable(cf), __kCFLogAssertion, "%s(): string not external mutable", __PRETTY_FUNCTION__);}
552 #define __CFAssertIsNotNegative(idx) CFAssert2(idx >= 0, __kCFLogAssertion, "%s(): index %d is negative", __PRETTY_FUNCTION__, idx)
553 #define __CFAssertIfFixedLengthIsOK(cf, reqLen) CFAssert2(!__CFStrIsFixed(cf) || (reqLen <= __CFStrDesiredCapacity(cf)), __kCFLogAssertion, "%s(): length %d too large", __PRETTY_FUNCTION__, reqLen)
556 /* Basic algorithm is to shrink memory when capacity is SHRINKFACTOR times the required capacity or to allocate memory when the capacity is less than GROWFACTOR times the required capacity.
557 Additional complications are applied in the following order:
558 - desiredCapacity, which is the minimum (except initially things can be at zero)
559 - rounding up to factor of 8
560 - compressing (to fit the number if 16 bits), which effectively rounds up to factor of 256
561 - we need to make sure GROWFACTOR computation doesn't suffer from overflow issues on 32-bit, hence the casting to unsigned. Normally for required capacity of C bytes, the allocated space is (3C+1)/2. If C > ULONG_MAX/3, we instead simply return LONG_MAX
563 #define SHRINKFACTOR(c) (c / 2)
566 #define GROWFACTOR(c) ((c * 3 + 1) / 2)
568 #define GROWFACTOR(c) (((c) >= (ULONG_MAX / 3UL)) ? __CFMax(LONG_MAX - 4095, (c)) : (((unsigned long)c * 3 + 1) / 2))
571 CF_INLINE CFIndex
__CFStrNewCapacity(CFMutableStringRef str
, CFIndex reqCapacity
, CFIndex capacity
, Boolean leaveExtraRoom
, CFIndex charSize
) {
572 if (capacity
!= 0 || reqCapacity
!= 0) { /* If initially zero, and space not needed, leave it at that... */
573 if ((capacity
< reqCapacity
) || /* We definitely need the room... */
574 (!__CFStrCapacityProvidedExternally(str
) && /* Assuming we control the capacity... */
575 ((reqCapacity
< SHRINKFACTOR(capacity
)) || /* ...we have too much room! */
576 (!leaveExtraRoom
&& (reqCapacity
< capacity
))))) { /* ...we need to eliminate the extra space... */
577 CFIndex newCapacity
= leaveExtraRoom
? GROWFACTOR(reqCapacity
) : reqCapacity
; /* Grow by 3/2 if extra room is desired */
578 CFIndex desiredCapacity
= __CFStrDesiredCapacity(str
) * charSize
;
579 if (newCapacity
< desiredCapacity
) { /* If less than desired, bump up to desired */
580 newCapacity
= desiredCapacity
;
581 } else if (__CFStrIsFixed(str
)) { /* Otherwise, if fixed, no need to go above the desired (fixed) capacity */
582 newCapacity
= __CFMax(desiredCapacity
, reqCapacity
); /* !!! So, fixed is not really fixed, but "tight" */
584 if (__CFStrHasContentsAllocator(str
)) { /* Also apply any preferred size from the allocator; should we do something for */
585 newCapacity
= CFAllocatorGetPreferredSizeForSize(__CFStrContentsAllocator(str
), newCapacity
, 0);
586 #if DEPLOYMENT_TARGET_MACOSX
588 newCapacity
= malloc_good_size(newCapacity
);
591 return newCapacity
; // If packing: __CFStrUnpackNumber(__CFStrPackNumber(newCapacity));
598 /* rearrangeBlocks() rearranges the blocks of data within the buffer so that they are "evenly spaced". buffer is assumed to have enough room for the result.
599 numBlocks is current total number of blocks within buffer.
600 blockSize is the size of each block in bytes
601 ranges and numRanges hold the ranges that are no longer needed; ranges are stored sorted in increasing order, and don't overlap
602 insertLength is the final spacing between the remaining blocks
604 Example: buffer = A B C D E F G H, blockSize = 1, ranges = { (2,1) , (4,2) } (so we want to "delete" C and E F), fromEnd = NO
605 if insertLength = 4, result = A B ? ? ? ? D ? ? ? ? G H
606 if insertLength = 0, result = A B D G H
608 Example: buffer = A B C D E F G H I J K L M N O P Q R S T U, blockSize = 1, ranges { (1,1), (3,1), (5,11), (17,1), (19,1) }, fromEnd = NO
609 if insertLength = 3, result = A ? ? ? C ? ? ? E ? ? ? Q ? ? ? S ? ? ? U
612 typedef struct _CFStringDeferredRange
{
616 } CFStringDeferredRange
;
618 typedef struct _CFStringStackInfo
{
619 CFIndex capacity
; // Capacity (if capacity == count, need to realloc to add another)
620 CFIndex count
; // Number of elements actually stored
621 CFStringDeferredRange
*stack
;
622 Boolean hasMalloced
; // Indicates "stack" is allocated and needs to be deallocated when done
626 CF_INLINE
void pop (CFStringStackInfo
*si
, CFStringDeferredRange
*topRange
) {
627 si
->count
= si
->count
- 1;
628 *topRange
= si
->stack
[si
->count
];
631 CF_INLINE
void push (CFStringStackInfo
*si
, const CFStringDeferredRange
*newRange
) {
632 if (si
->count
== si
->capacity
) {
633 // increase size of the stack
634 si
->capacity
= (si
->capacity
+ 4) * 2;
635 if (si
->hasMalloced
) {
636 si
->stack
= (CFStringDeferredRange
*)CFAllocatorReallocate(kCFAllocatorSystemDefault
, si
->stack
, si
->capacity
* sizeof(CFStringDeferredRange
), 0);
638 CFStringDeferredRange
*newStack
= (CFStringDeferredRange
*)CFAllocatorAllocate(kCFAllocatorSystemDefault
, si
->capacity
* sizeof(CFStringDeferredRange
), 0);
639 memmove(newStack
, si
->stack
, si
->count
* sizeof(CFStringDeferredRange
));
640 si
->stack
= newStack
;
641 si
->hasMalloced
= true;
644 si
->stack
[si
->count
] = *newRange
;
645 si
->count
= si
->count
+ 1;
648 static void rearrangeBlocks(
652 const CFRange
*ranges
,
654 CFIndex insertLength
) {
656 #define origStackSize 10
657 CFStringDeferredRange origStack
[origStackSize
];
658 CFStringStackInfo si
= {origStackSize
, 0, origStack
, false, {0, 0, 0}};
659 CFStringDeferredRange currentNonRange
= {0, 0, 0};
660 CFIndex currentRange
= 0;
661 CFIndex amountShifted
= 0;
663 // must have at least 1 range left.
665 while (currentRange
< numRanges
) {
666 currentNonRange
.beginning
= (ranges
[currentRange
].location
+ ranges
[currentRange
].length
) * blockSize
;
667 if ((numRanges
- currentRange
) == 1) {
669 currentNonRange
.length
= numBlocks
* blockSize
- currentNonRange
.beginning
;
670 if (currentNonRange
.length
== 0) break;
672 currentNonRange
.length
= (ranges
[currentRange
+ 1].location
* blockSize
) - currentNonRange
.beginning
;
674 currentNonRange
.shift
= amountShifted
+ (insertLength
* blockSize
) - (ranges
[currentRange
].length
* blockSize
);
675 amountShifted
= currentNonRange
.shift
;
676 if (amountShifted
<= 0) {
677 // process current item and rest of stack
678 if (currentNonRange
.shift
&& currentNonRange
.length
) memmove (&buffer
[currentNonRange
.beginning
+ currentNonRange
.shift
], &buffer
[currentNonRange
.beginning
], currentNonRange
.length
);
679 while (si
.count
> 0) {
680 pop (&si
, ¤tNonRange
); // currentNonRange now equals the top element of the stack.
681 if (currentNonRange
.shift
&& currentNonRange
.length
) memmove (&buffer
[currentNonRange
.beginning
+ currentNonRange
.shift
], &buffer
[currentNonRange
.beginning
], currentNonRange
.length
);
684 // add currentNonRange to stack.
685 push (&si
, ¤tNonRange
);
690 // no more ranges. if anything is on the stack, process.
692 while (si
.count
> 0) {
693 pop (&si
, ¤tNonRange
); // currentNonRange now equals the top element of the stack.
694 if (currentNonRange
.shift
&& currentNonRange
.length
) memmove (&buffer
[currentNonRange
.beginning
+ currentNonRange
.shift
], &buffer
[currentNonRange
.beginning
], currentNonRange
.length
);
696 if (si
.hasMalloced
) CFAllocatorDeallocate (kCFAllocatorSystemDefault
, si
.stack
);
699 /* See comments for rearrangeBlocks(); this is the same, but the string is assembled in another buffer (dstBuffer), so the algorithm is much easier. We also take care of the case where the source is not-Unicode but destination is. (The reverse case is not supported.)
701 static void copyBlocks(
702 const uint8_t *srcBuffer
,
705 Boolean srcIsUnicode
,
706 Boolean dstIsUnicode
,
707 const CFRange
*ranges
,
709 CFIndex insertLength
) {
711 CFIndex srcLocationInBytes
= 0; // in order to avoid multiplying all the time, this is in terms of bytes, not blocks
712 CFIndex dstLocationInBytes
= 0; // ditto
713 CFIndex srcBlockSize
= srcIsUnicode
? sizeof(UniChar
) : sizeof(uint8_t);
714 CFIndex insertLengthInBytes
= insertLength
* (dstIsUnicode
? sizeof(UniChar
) : sizeof(uint8_t));
715 CFIndex rangeIndex
= 0;
716 CFIndex srcToDstMultiplier
= (srcIsUnicode
== dstIsUnicode
) ? 1 : (sizeof(UniChar
) / sizeof(uint8_t));
718 // Loop over the ranges, copying the range to be preserved (right before each range)
719 while (rangeIndex
< numRanges
) {
720 CFIndex srcLengthInBytes
= ranges
[rangeIndex
].location
* srcBlockSize
- srcLocationInBytes
; // srcLengthInBytes is in terms of bytes, not blocks; represents length of region to be preserved
721 if (srcLengthInBytes
> 0) {
722 if (srcIsUnicode
== dstIsUnicode
) {
723 memmove(dstBuffer
+ dstLocationInBytes
, srcBuffer
+ srcLocationInBytes
, srcLengthInBytes
);
725 __CFStrConvertBytesToUnicode(srcBuffer
+ srcLocationInBytes
, (UniChar
*)(dstBuffer
+ dstLocationInBytes
), srcLengthInBytes
);
728 srcLocationInBytes
+= srcLengthInBytes
+ ranges
[rangeIndex
].length
* srcBlockSize
; // Skip over the just-copied and to-be-deleted stuff
729 dstLocationInBytes
+= srcLengthInBytes
* srcToDstMultiplier
+ insertLengthInBytes
;
733 // Do last range (the one beyond last range)
734 if (srcLocationInBytes
< srcLength
* srcBlockSize
) {
735 if (srcIsUnicode
== dstIsUnicode
) {
736 memmove(dstBuffer
+ dstLocationInBytes
, srcBuffer
+ srcLocationInBytes
, srcLength
* srcBlockSize
- srcLocationInBytes
);
738 __CFStrConvertBytesToUnicode(srcBuffer
+ srcLocationInBytes
, (UniChar
*)(dstBuffer
+ dstLocationInBytes
), srcLength
* srcBlockSize
- srcLocationInBytes
);
743 /* Call the callback; if it doesn't exist or returns false, then log
745 static void __CFStringHandleOutOfMemory(CFTypeRef obj
) {
746 CFStringRef msg
= CFSTR("Out of memory. We suggest restarting the application. If you have an unsaved document, create a backup copy in Finder, then try to save.");
747 CFBadErrorCallBack cb
= _CFGetOutOfMemoryErrorCallBack();
748 if (NULL
== cb
|| !cb(obj
, CFSTR("NS/CFString"), msg
)) {
749 CFLog(kCFLogLevelCritical
, CFSTR("%@"), msg
);
753 /* Reallocates the backing store of the string to accomodate the new length. Space is reserved or characters are deleted as indicated by insertLength and the ranges in deleteRanges. The length is updated to reflect the new state. Will also maintain a length byte and a null byte in 8-bit strings. If length cannot fit in length byte, the space will still be reserved, but will be 0. (Hence the reason the length byte should never be looked at as length unless there is no explicit length.)
755 static void __CFStringChangeSizeMultiple(CFMutableStringRef str
, const CFRange
*deleteRanges
, CFIndex numDeleteRanges
, CFIndex insertLength
, Boolean makeUnicode
) {
756 const uint8_t *curContents
= (uint8_t *)__CFStrContents(str
);
757 CFIndex curLength
= curContents
? __CFStrLength2(str
, curContents
) : 0;
760 // Compute new length of the string
761 if (numDeleteRanges
== 1) {
762 newLength
= curLength
+ insertLength
- deleteRanges
[0].length
;
765 newLength
= curLength
+ insertLength
* numDeleteRanges
;
766 for (cnt
= 0; cnt
< numDeleteRanges
; cnt
++) newLength
-= deleteRanges
[cnt
].length
;
769 __CFAssertIfFixedLengthIsOK(str
, newLength
);
771 if (newLength
== 0) {
772 // An somewhat optimized code-path for this special case, with the following implicit values:
773 // newIsUnicode = false
774 // useLengthAndNullBytes = false
775 // newCharSize = sizeof(uint8_t)
776 // If the newCapacity happens to be the same as the old, we don't free the buffer; otherwise we just free it totally
777 // instead of doing a potentially useless reallocation (as the needed capacity later might turn out to be different anyway)
778 CFIndex curCapacity
= __CFStrCapacity(str
);
779 CFIndex newCapacity
= __CFStrNewCapacity(str
, 0, curCapacity
, true, sizeof(uint8_t));
780 if (newCapacity
!= curCapacity
) { // If we're reallocing anyway (larger or smaller --- larger could happen if desired capacity was changed in the meantime), let's just free it all
781 if (curContents
) __CFStrDeallocateMutableContents(str
, (uint8_t *)curContents
);
782 __CFStrSetContentPtr(str
, NULL
);
783 __CFStrSetCapacity(str
, 0);
784 __CFStrClearCapacityProvidedExternally(str
);
785 __CFStrClearHasLengthAndNullBytes(str
);
786 if (!__CFStrIsExternalMutable(str
)) __CFStrClearUnicode(str
); // External mutable implies Unicode
788 if (!__CFStrIsExternalMutable(str
)) {
789 __CFStrClearUnicode(str
);
790 if (curCapacity
>= (int)(sizeof(uint8_t) * 2)) { // If there's room
791 __CFStrSetHasLengthAndNullBytes(str
);
792 ((uint8_t *)curContents
)[0] = ((uint8_t *)curContents
)[1] = 0;
794 __CFStrClearHasLengthAndNullBytes(str
);
798 __CFStrSetExplicitLength(str
, 0);
799 } else { /* This else-clause assumes newLength > 0 */
800 Boolean oldIsUnicode
= __CFStrIsUnicode(str
);
801 Boolean newIsUnicode
= makeUnicode
|| (oldIsUnicode
/* && (newLength > 0) - implicit */ ) || __CFStrIsExternalMutable(str
);
802 CFIndex newCharSize
= newIsUnicode
? sizeof(UniChar
) : sizeof(uint8_t);
803 Boolean useLengthAndNullBytes
= !newIsUnicode
/* && (newLength > 0) - implicit */;
804 CFIndex numExtraBytes
= useLengthAndNullBytes
? 2 : 0; /* 2 extra bytes to keep the length byte & null... */
805 CFIndex curCapacity
= __CFStrCapacity(str
);
806 CFIndex newCapacity
= __CFStrNewCapacity(str
, newLength
* newCharSize
+ numExtraBytes
, curCapacity
, true, newCharSize
);
807 Boolean allocNewBuffer
= (newCapacity
!= curCapacity
) || (curLength
> 0 && !oldIsUnicode
&& newIsUnicode
); /* We alloc new buffer if oldIsUnicode != newIsUnicode because the contents have to be copied */
808 uint8_t *newContents
;
809 if (allocNewBuffer
) {
810 newContents
= (uint8_t *)__CFStrAllocateMutableContents(str
, newCapacity
);
811 if (!newContents
) { // Try allocating without extra room
812 newCapacity
= __CFStrNewCapacity(str
, newLength
* newCharSize
+ numExtraBytes
, curCapacity
, false, newCharSize
);
813 newContents
= (uint8_t *)__CFStrAllocateMutableContents(str
, newCapacity
);
815 __CFStringHandleOutOfMemory(str
);
816 // Ideally control doesn't come here at all since we expect the above call to raise an exception.
817 // If control comes here, there isn't much we can do.
821 newContents
= (uint8_t *)curContents
;
824 Boolean hasLengthAndNullBytes
= __CFStrHasLengthByte(str
);
826 CFAssert1(hasLengthAndNullBytes
== __CFStrHasNullByte(str
), __kCFLogAssertion
, "%s(): Invalid state in 8-bit string", __PRETTY_FUNCTION__
);
828 if (hasLengthAndNullBytes
) curContents
++;
829 if (useLengthAndNullBytes
) newContents
++;
832 if (oldIsUnicode
== newIsUnicode
) {
833 if (newContents
== curContents
) {
834 rearrangeBlocks(newContents
, curLength
, newCharSize
, deleteRanges
, numDeleteRanges
, insertLength
);
836 copyBlocks(curContents
, newContents
, curLength
, oldIsUnicode
, newIsUnicode
, deleteRanges
, numDeleteRanges
, insertLength
);
838 } else if (newIsUnicode
) { /* this implies we have a new buffer */
839 copyBlocks(curContents
, newContents
, curLength
, oldIsUnicode
, newIsUnicode
, deleteRanges
, numDeleteRanges
, insertLength
);
841 if (hasLengthAndNullBytes
) curContents
--; /* Undo the damage from above */
842 if (allocNewBuffer
&& __CFStrFreeContentsWhenDone(str
)) __CFStrDeallocateMutableContents(str
, (void *)curContents
);
846 if (useLengthAndNullBytes
) {
847 newContents
[newLength
] = 0; /* Always have null byte, if not unicode */
848 newContents
--; /* Undo the damage from above */
849 newContents
[0] = __CFCanUseLengthByte(newLength
) ? (uint8_t)newLength
: 0;
850 if (!hasLengthAndNullBytes
) __CFStrSetHasLengthAndNullBytes(str
);
852 if (hasLengthAndNullBytes
) __CFStrClearHasLengthAndNullBytes(str
);
854 if (oldIsUnicode
) __CFStrClearUnicode(str
);
855 } else { // New is unicode...
856 if (!oldIsUnicode
) __CFStrSetUnicode(str
);
857 if (hasLengthAndNullBytes
) __CFStrClearHasLengthAndNullBytes(str
);
859 __CFStrSetExplicitLength(str
, newLength
);
861 if (allocNewBuffer
) {
862 __CFStrSetCapacity(str
, newCapacity
);
863 __CFStrClearCapacityProvidedExternally(str
);
864 __CFStrSetContentPtr(str
, newContents
);
869 /* Same as above, but takes one range (very common case)
871 CF_INLINE
void __CFStringChangeSize(CFMutableStringRef str
, CFRange range
, CFIndex insertLength
, Boolean makeUnicode
) {
872 __CFStringChangeSizeMultiple(str
, &range
, 1, insertLength
, makeUnicode
);
877 static Boolean
__CFStrIsConstantString(CFStringRef str
);
880 static void __CFStringDeallocate(CFTypeRef cf
) {
881 CFStringRef str
= (CFStringRef
)cf
;
883 // If in DEBUG mode, check to see if the string a CFSTR, and complain.
884 CFAssert1(__CFConstantStringTableBeingFreed
|| !__CFStrIsConstantString((CFStringRef
)cf
), __kCFLogAssertion
, "Tried to deallocate CFSTR(\"%@\")", str
);
886 if (!__CFStrIsInline(str
)) {
888 Boolean isMutable
= __CFStrIsMutable(str
);
889 if (__CFStrFreeContentsWhenDone(str
) && (contents
= (uint8_t *)__CFStrContents(str
))) {
891 __CFStrDeallocateMutableContents((CFMutableStringRef
)str
, contents
);
893 if (__CFStrHasContentsDeallocator(str
)) {
894 CFAllocatorRef contentsDeallocator
= __CFStrContentsDeallocator(str
);
895 CFAllocatorDeallocate(contentsDeallocator
, contents
);
896 CFRelease(contentsDeallocator
);
898 CFAllocatorRef alloc
= __CFGetAllocator(str
);
899 CFAllocatorDeallocate(alloc
, contents
);
903 if (isMutable
&& __CFStrHasContentsAllocator(str
)) CFRelease(__CFStrContentsAllocator((CFMutableStringRef
)str
));
907 static Boolean
__CFStringEqual(CFTypeRef cf1
, CFTypeRef cf2
) {
908 CFStringRef str1
= (CFStringRef
)cf1
;
909 CFStringRef str2
= (CFStringRef
)cf2
;
910 const uint8_t *contents1
;
911 const uint8_t *contents2
;
914 /* !!! We do not need IsString assertions, as the CFBase runtime assures this */
915 /* !!! We do not need == test, as the CFBase runtime assures this */
917 contents1
= (uint8_t *)__CFStrContents(str1
);
918 contents2
= (uint8_t *)__CFStrContents(str2
);
919 len1
= __CFStrLength2(str1
, contents1
);
921 if (len1
!= __CFStrLength2(str2
, contents2
)) return false;
923 contents1
+= __CFStrSkipAnyLengthByte(str1
);
924 contents2
+= __CFStrSkipAnyLengthByte(str2
);
926 if (__CFStrIsEightBit(str1
) && __CFStrIsEightBit(str2
)) {
927 return memcmp((const char *)contents1
, (const char *)contents2
, len1
) ? false : true;
928 } else if (__CFStrIsEightBit(str1
)) { /* One string has Unicode contents */
929 CFStringInlineBuffer buf
;
932 CFStringInitInlineBuffer(str1
, &buf
, CFRangeMake(0, len1
));
933 for (buf_idx
= 0; buf_idx
< len1
; buf_idx
++) {
934 if (__CFStringGetCharacterFromInlineBufferQuick(&buf
, buf_idx
) != ((UniChar
*)contents2
)[buf_idx
]) return false;
936 } else if (__CFStrIsEightBit(str2
)) { /* One string has Unicode contents */
937 CFStringInlineBuffer buf
;
940 CFStringInitInlineBuffer(str2
, &buf
, CFRangeMake(0, len1
));
941 for (buf_idx
= 0; buf_idx
< len1
; buf_idx
++) {
942 if (__CFStringGetCharacterFromInlineBufferQuick(&buf
, buf_idx
) != ((UniChar
*)contents1
)[buf_idx
]) return false;
944 } else { /* Both strings have Unicode contents */
946 for (idx
= 0; idx
< len1
; idx
++) {
947 if (((UniChar
*)contents1
)[idx
] != ((UniChar
*)contents2
)[idx
]) return false;
954 /* String hashing: Should give the same results whatever the encoding; so we hash UniChars.
955 If the length is less than or equal to 96, then the hash function is simply the
956 following (n is the nth UniChar character, starting from 0):
959 hash(n) = hash(n-1) * 257 + unichar(n);
960 Hash = hash(length-1) * ((length & 31) + 1)
962 If the length is greater than 96, then the above algorithm applies to
963 characters 0..31, (length/2)-16..(length/2)+15, and length-32..length-1, inclusive;
964 thus the first, middle, and last 32 characters.
966 Note that the loops below are unrolled; and: 257^2 = 66049; 257^3 = 16974593; 257^4 = 4362470401; 67503105 is 257^4 - 256^4
967 If hashcode is changed from UInt32 to something else, this last piece needs to be readjusted.
968 !!! We haven't updated for LP64 yet
970 NOTE: The hash algorithm used to be duplicated in CF and Foundation; but now it should only be in the four functions below.
972 Hash function was changed between Panther and Tiger, and Tiger and Leopard.
974 #define HashEverythingLimit 96
976 #define HashNextFourUniChars(accessStart, accessEnd, pointer) \
977 {result = result * 67503105 + (accessStart 0 accessEnd) * 16974593 + (accessStart 1 accessEnd) * 66049 + (accessStart 2 accessEnd) * 257 + (accessStart 3 accessEnd); pointer += 4;}
979 #define HashNextUniChar(accessStart, accessEnd, pointer) \
980 {result = result * 257 + (accessStart 0 accessEnd); pointer++;}
983 /* In this function, actualLen is the length of the original string; but len is the number of characters in buffer. The buffer is expected to contain the parts of the string relevant to hashing.
985 CF_INLINE CFHashCode
__CFStrHashCharacters(const UniChar
*uContents
, CFIndex len
, CFIndex actualLen
) {
986 CFHashCode result
= actualLen
;
987 if (len
<= HashEverythingLimit
) {
988 const UniChar
*end4
= uContents
+ (len
& ~3);
989 const UniChar
*end
= uContents
+ len
;
990 while (uContents
< end4
) HashNextFourUniChars(uContents
[, ], uContents
); // First count in fours
991 while (uContents
< end
) HashNextUniChar(uContents
[, ], uContents
); // Then for the last <4 chars, count in ones...
993 const UniChar
*contents
, *end
;
994 contents
= uContents
;
996 while (contents
< end
) HashNextFourUniChars(contents
[, ], contents
);
997 contents
= uContents
+ (len
>> 1) - 16;
999 while (contents
< end
) HashNextFourUniChars(contents
[, ], contents
);
1000 end
= uContents
+ len
;
1001 contents
= end
- 32;
1002 while (contents
< end
) HashNextFourUniChars(contents
[, ], contents
);
1004 return result
+ (result
<< (actualLen
& 31));
1007 /* This hashes cString in the eight bit string encoding. It also includes the little debug-time sanity check.
1009 CF_INLINE CFHashCode
__CFStrHashEightBit(const uint8_t *cContents
, CFIndex len
) {
1011 if (!__CFCharToUniCharFunc
) { // A little sanity verification: If this is not set, trying to hash high byte chars would be a bad idea
1013 Boolean err
= false;
1014 if (len
<= HashEverythingLimit
) {
1015 for (cnt
= 0; cnt
< len
; cnt
++) if (cContents
[cnt
] >= 128) err
= true;
1017 for (cnt
= 0; cnt
< 32; cnt
++) if (cContents
[cnt
] >= 128) err
= true;
1018 for (cnt
= (len
>> 1) - 16; cnt
< (len
>> 1) + 16; cnt
++) if (cContents
[cnt
] >= 128) err
= true;
1019 for (cnt
= (len
- 32); cnt
< len
; cnt
++) if (cContents
[cnt
] >= 128) err
= true;
1022 // Can't do log here, as it might be too early
1023 fprintf(stderr
, "Warning: CFHash() attempting to hash CFString containing high bytes before properly initialized to do so\n");
1027 CFHashCode result
= len
;
1028 if (len
<= HashEverythingLimit
) {
1029 const uint8_t *end4
= cContents
+ (len
& ~3);
1030 const uint8_t *end
= cContents
+ len
;
1031 while (cContents
< end4
) HashNextFourUniChars(__CFCharToUniCharTable
[cContents
[, ]], cContents
); // First count in fours
1032 while (cContents
< end
) HashNextUniChar(__CFCharToUniCharTable
[cContents
[, ]], cContents
); // Then for the last <4 chars, count in ones...
1034 const uint8_t *contents
, *end
;
1035 contents
= cContents
;
1036 end
= contents
+ 32;
1037 while (contents
< end
) HashNextFourUniChars(__CFCharToUniCharTable
[contents
[, ]], contents
);
1038 contents
= cContents
+ (len
>> 1) - 16;
1039 end
= contents
+ 32;
1040 while (contents
< end
) HashNextFourUniChars(__CFCharToUniCharTable
[contents
[, ]], contents
);
1041 end
= cContents
+ len
;
1042 contents
= end
- 32;
1043 while (contents
< end
) HashNextFourUniChars(__CFCharToUniCharTable
[contents
[, ]], contents
);
1045 return result
+ (result
<< (len
& 31));
1048 CFHashCode
CFStringHashISOLatin1CString(const uint8_t *bytes
, CFIndex len
) {
1049 CFHashCode result
= len
;
1050 if (len
<= HashEverythingLimit
) {
1051 const uint8_t *end4
= bytes
+ (len
& ~3);
1052 const uint8_t *end
= bytes
+ len
;
1053 while (bytes
< end4
) HashNextFourUniChars(bytes
[, ], bytes
); // First count in fours
1054 while (bytes
< end
) HashNextUniChar(bytes
[, ], bytes
); // Then for the last <4 chars, count in ones...
1056 const uint8_t *contents
, *end
;
1058 end
= contents
+ 32;
1059 while (contents
< end
) HashNextFourUniChars(contents
[, ], contents
);
1060 contents
= bytes
+ (len
>> 1) - 16;
1061 end
= contents
+ 32;
1062 while (contents
< end
) HashNextFourUniChars(contents
[, ], contents
);
1064 contents
= end
- 32;
1065 while (contents
< end
) HashNextFourUniChars(contents
[, ], contents
);
1067 return result
+ (result
<< (len
& 31));
1070 CFHashCode
CFStringHashCString(const uint8_t *bytes
, CFIndex len
) {
1071 return __CFStrHashEightBit(bytes
, len
);
1074 CFHashCode
CFStringHashCharacters(const UniChar
*characters
, CFIndex len
) {
1075 return __CFStrHashCharacters(characters
, len
, len
);
1078 /* This is meant to be called from NSString or subclassers only. It is an error for this to be called without the ObjC runtime or an argument which is not an NSString or subclass. It can be called with NSCFString, although that would be inefficient (causing indirection) and won't normally happen anyway, as NSCFString overrides hash.
1080 CFHashCode
CFStringHashNSString(CFStringRef str
) {
1081 UniChar buffer
[HashEverythingLimit
];
1082 CFIndex bufLen
; // Number of characters in the buffer for hashing
1083 CFIndex len
= 0; // Actual length of the string
1085 CF_OBJC_CALL0(CFIndex
, len
, str
, "length");
1086 if (len
<= HashEverythingLimit
) {
1087 CF_OBJC_VOIDCALL2(str
, "getCharacters:range:", buffer
, CFRangeMake(0, len
));
1090 CF_OBJC_VOIDCALL2(str
, "getCharacters:range:", buffer
, CFRangeMake(0, 32));
1091 CF_OBJC_VOIDCALL2(str
, "getCharacters:range:", buffer
+32, CFRangeMake((len
>> 1) - 16, 32));
1092 CF_OBJC_VOIDCALL2(str
, "getCharacters:range:", buffer
+64, CFRangeMake(len
- 32, 32));
1093 bufLen
= HashEverythingLimit
;
1095 return __CFStrHashCharacters(buffer
, bufLen
, len
);
1098 CFHashCode
__CFStringHash(CFTypeRef cf
) {
1099 /* !!! We do not need an IsString assertion here, as this is called by the CFBase runtime only */
1100 CFStringRef str
= (CFStringRef
)cf
;
1101 const uint8_t *contents
= (uint8_t *)__CFStrContents(str
);
1102 CFIndex len
= __CFStrLength2(str
, contents
);
1104 if (__CFStrIsEightBit(str
)) {
1105 contents
+= __CFStrSkipAnyLengthByte(str
);
1106 return __CFStrHashEightBit(contents
, len
);
1108 return __CFStrHashCharacters((const UniChar
*)contents
, len
, len
);
1113 static CFStringRef
__CFStringCopyDescription(CFTypeRef cf
) {
1114 return CFStringCreateWithFormat(kCFAllocatorSystemDefault
, NULL
, CFSTR("<CFString %p [%p]>{contents = \"%@\"}"), cf
, __CFGetAllocator(cf
), cf
);
1117 static CFStringRef
__CFStringCopyFormattingDescription(CFTypeRef cf
, CFDictionaryRef formatOptions
) {
1118 return (CFStringRef
)CFStringCreateCopy(__CFGetAllocator(cf
), (CFStringRef
)cf
);
1121 static CFTypeID __kCFStringTypeID
= _kCFRuntimeNotATypeID
;
1123 typedef CFTypeRef (*CF_STRING_CREATE_COPY
)(CFAllocatorRef alloc
, CFTypeRef theString
);
1125 static const CFRuntimeClass __CFStringClass
= {
1129 (CF_STRING_CREATE_COPY
)CFStringCreateCopy
,
1130 __CFStringDeallocate
,
1133 __CFStringCopyFormattingDescription
,
1134 __CFStringCopyDescription
1137 __private_extern__
void __CFStringInitialize(void) {
1138 __kCFStringTypeID
= _CFRuntimeRegisterClass(&__CFStringClass
);
1141 CFTypeID
CFStringGetTypeID(void) {
1142 return __kCFStringTypeID
;
1146 static Boolean
CFStrIsUnicode(CFStringRef str
) {
1147 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID
, Boolean
, str
, "_encodingCantBeStoredInEightBitCFString");
1148 return __CFStrIsUnicode(str
);
1153 #define ALLOCATORSFREEFUNC ((CFAllocatorRef)-1)
1155 /* contentsDeallocator indicates how to free the data if it's noCopy == true:
1156 kCFAllocatorNull: don't free
1157 ALLOCATORSFREEFUNC: free with main allocator's free func (don't pass in the real func ptr here)
1158 NULL: default allocator
1159 otherwise it's the allocator that should be used (it will be explicitly stored)
1160 if noCopy == false, then freeFunc should be ALLOCATORSFREEFUNC
1161 hasLengthByte, hasNullByte: refers to bytes; used only if encoding != Unicode
1162 possiblyExternalFormat indicates that the bytes might have BOM and be swapped
1163 tryToReduceUnicode means that the Unicode should be checked to see if it contains just ASCII (and reduce it if so)
1164 numBytes contains the actual number of bytes in "bytes", including Length byte,
1165 BUT not the NULL byte at the end
1166 bytes should not contain BOM characters
1167 !!! Various flags should be combined to reduce number of arguments, if possible
1169 __private_extern__ CFStringRef
__CFStringCreateImmutableFunnel3(
1170 CFAllocatorRef alloc
, const void *bytes
, CFIndex numBytes
, CFStringEncoding encoding
,
1171 Boolean possiblyExternalFormat
, Boolean tryToReduceUnicode
, Boolean hasLengthByte
, Boolean hasNullByte
, Boolean noCopy
,
1172 CFAllocatorRef contentsDeallocator
, UInt32 converterFlags
) {
1174 CFMutableStringRef str
;
1175 CFVarWidthCharBuffer vBuf
;
1177 Boolean useLengthByte
= false;
1178 Boolean useNullByte
= false;
1179 Boolean useInlineData
= false;
1181 #if INSTRUMENT_SHARED_STRINGS
1182 const char *recordedEncoding
;
1183 char encodingBuffer
[128];
1184 if (encoding
== kCFStringEncodingUnicode
) recordedEncoding
= "Unicode";
1185 else if (encoding
== kCFStringEncodingASCII
) recordedEncoding
= "ASCII";
1186 else if (encoding
== kCFStringEncodingUTF8
) recordedEncoding
= "UTF8";
1187 else if (encoding
== kCFStringEncodingMacRoman
) recordedEncoding
= "MacRoman";
1189 sprintf(encodingBuffer
, "0x%lX", (unsigned long)encoding
);
1190 recordedEncoding
= encodingBuffer
;
1194 if (alloc
== NULL
) alloc
= __CFGetDefaultAllocator();
1196 if (contentsDeallocator
== ALLOCATORSFREEFUNC
) {
1197 contentsDeallocator
= alloc
;
1198 } else if (contentsDeallocator
== NULL
) {
1199 contentsDeallocator
= __CFGetDefaultAllocator();
1202 if ((NULL
!= kCFEmptyString
) && (numBytes
== 0) && (alloc
== kCFAllocatorSystemDefault
)) { // If we are using the system default allocator, and the string is empty, then use the empty string!
1203 if (noCopy
&& (contentsDeallocator
!= kCFAllocatorNull
)) { // See 2365208... This change was done after Sonata; before we didn't free the bytes at all (leak).
1204 CFAllocatorDeallocate(contentsDeallocator
, (void *)bytes
);
1206 return (CFStringRef
)CFRetain(kCFEmptyString
); // Quick exit; won't catch all empty strings, but most
1209 // At this point, contentsDeallocator is either same as alloc, or kCFAllocatorNull, or something else, but not NULL
1211 vBuf
.shouldFreeChars
= false; // We use this to remember to free the buffer possibly allocated by decode
1213 // Record whether we're starting out with an ASCII-superset string, because we need to know this later for the string ROM; this may get changed later if we successfully convert down from Unicode. We only record this once because __CFCanUseEightBitCFStringForBytes() can be expensive.
1214 Boolean stringSupportsEightBitCFRepresentation
= encoding
!= kCFStringEncodingUnicode
&& __CFCanUseEightBitCFStringForBytes((const uint8_t *)bytes
, numBytes
, encoding
);
1216 // We may also change noCopy within this function if we have to decode the string into an external buffer. We do not want to avoid the use of the string ROM merely because we tried to be efficient and reuse the decoded buffer for the CFString's external storage. Therefore, we use this variable to track whether we actually can ignore the noCopy flag (which may or may not be set anyways).
1217 Boolean stringROMShouldIgnoreNoCopy
= false;
1219 // First check to see if the data needs to be converted...
1220 // ??? We could be more efficient here and in some cases (Unicode data) eliminate a copy
1222 if ((encoding
== kCFStringEncodingUnicode
&& possiblyExternalFormat
) || encoding
!= kCFStringEncodingUnicode
&& ! stringSupportsEightBitCFRepresentation
) {
1223 const void *realBytes
= (uint8_t *) bytes
+ (hasLengthByte
? 1 : 0);
1224 CFIndex realNumBytes
= numBytes
- (hasLengthByte
? 1 : 0);
1225 Boolean usingPassedInMemory
= false;
1227 vBuf
.allocator
= __CFGetDefaultAllocator(); // We don't want to use client's allocator for temp stuff
1228 vBuf
.chars
.unicode
= NULL
; // This will cause the decode function to allocate memory if necessary
1230 if (!__CFStringDecodeByteStream3((const uint8_t *)realBytes
, realNumBytes
, encoding
, false, &vBuf
, &usingPassedInMemory
, converterFlags
)) {
1231 // Note that if the string can't be created, we don't free the buffer, even if there is a contents deallocator. This is on purpose.
1235 encoding
= vBuf
.isASCII
? kCFStringEncodingASCII
: kCFStringEncodingUnicode
;
1237 // Update our flag according to whether the decoded buffer is ASCII
1238 stringSupportsEightBitCFRepresentation
= vBuf
.isASCII
;
1240 if (!usingPassedInMemory
) {
1242 // Because __CFStringDecodeByteStream3() allocated our buffer, it's OK for us to free it if we can get the string from the ROM.
1243 stringROMShouldIgnoreNoCopy
= true;
1245 // Make the parameters fit the new situation
1246 numBytes
= vBuf
.isASCII
? vBuf
.numChars
: (vBuf
.numChars
* sizeof(UniChar
));
1247 hasLengthByte
= hasNullByte
= false;
1249 // Get rid of the original buffer if its not being used
1250 if (noCopy
&& (contentsDeallocator
!= kCFAllocatorNull
)) {
1251 CFAllocatorDeallocate(contentsDeallocator
, (void *)bytes
);
1253 contentsDeallocator
= alloc
; // At this point we are using the string's allocator, as the original buffer is gone...
1255 // See if we can reuse any storage the decode func might have allocated
1256 // We do this only for Unicode, as otherwise we would not have NULL and Length bytes
1258 if (vBuf
.shouldFreeChars
&& (alloc
== vBuf
.allocator
) && encoding
== kCFStringEncodingUnicode
) {
1259 vBuf
.shouldFreeChars
= false; // Transferring ownership to the CFString
1260 bytes
= CFAllocatorReallocate(vBuf
.allocator
, (void *)vBuf
.chars
.unicode
, numBytes
, 0); // Tighten up the storage
1262 #if INSTRUMENT_SHARED_STRINGS
1263 if (encoding
== kCFStringEncodingASCII
) recordedEncoding
= "ForeignASCII-NoCopy";
1264 else recordedEncoding
= "ForeignUnicode-NoCopy";
1267 #if INSTRUMENT_SHARED_STRINGS
1268 if (encoding
== kCFStringEncodingASCII
) recordedEncoding
= "ForeignASCII-Copy";
1269 else recordedEncoding
= "ForeignUnicode-Copy";
1271 bytes
= vBuf
.chars
.unicode
;
1272 noCopy
= false; // Can't do noCopy anymore
1273 // If vBuf.shouldFreeChars is true, the buffer will be freed as intended near the end of this func
1278 // At this point, all necessary input arguments have been changed to reflect the new state
1280 } else if (encoding
== kCFStringEncodingUnicode
&& tryToReduceUnicode
) { // Check to see if we can reduce Unicode to ASCII
1282 CFIndex len
= numBytes
/ sizeof(UniChar
);
1283 Boolean allASCII
= true;
1285 for (cnt
= 0; cnt
< len
; cnt
++) if (((const UniChar
*)bytes
)[cnt
] > 127) {
1290 if (allASCII
) { // Yes we can!
1292 Boolean newHasLengthByte
= __CFCanUseLengthByte(len
);
1293 numBytes
= (len
+ 1 + (newHasLengthByte
? 1 : 0)) * sizeof(uint8_t); // NULL and possible length byte
1294 // See if we can use that temporary local buffer in vBuf...
1295 if (numBytes
>= __kCFVarWidthLocalBufferSize
) {
1296 mem
= ptr
= (uint8_t *)CFAllocatorAllocate(alloc
, numBytes
, 0);
1297 if (__CFOASafe
) __CFSetLastAllocationEventName(mem
, "CFString (store)");
1299 mem
= ptr
= (uint8_t *)(vBuf
.localBuffer
);
1301 if (mem
) { // If we can't allocate memory for some reason, use what we had (that is, as if we didn't have all ASCII)
1302 // Copy the Unicode bytes into the new ASCII buffer
1303 hasLengthByte
= newHasLengthByte
;
1305 if (hasLengthByte
) *ptr
++ = (uint8_t)len
;
1306 for (cnt
= 0; cnt
< len
; cnt
++) ptr
[cnt
] = (uint8_t)(((const UniChar
*)bytes
)[cnt
]);
1308 if (noCopy
&& (contentsDeallocator
!= kCFAllocatorNull
)) {
1309 CFAllocatorDeallocate(contentsDeallocator
, (void *)bytes
);
1311 // Now make everything look like we had an ASCII buffer to start with
1313 encoding
= kCFStringEncodingASCII
;
1314 contentsDeallocator
= alloc
; // At this point we are using the string's allocator, as the original buffer is gone...
1315 noCopy
= (numBytes
>= __kCFVarWidthLocalBufferSize
); // If we had to allocate it, make sure it's kept around
1316 numBytes
--; // Should not contain the NULL byte at end...
1317 stringSupportsEightBitCFRepresentation
= true; // We're ASCII now!
1318 stringROMShouldIgnoreNoCopy
= true; // We allocated this buffer, so we should feel free to get rid of it if we can use the string ROM
1319 #if INSTRUMENT_SHARED_STRINGS
1320 recordedEncoding
= "U->A";
1325 // At this point, all necessary input arguments have been changed to reflect the new state
1328 // Now determine the necessary size
1330 Boolean stringSupportsROM
= stringSupportsEightBitCFRepresentation
;
1332 #if INSTRUMENT_SHARED_STRINGS
1333 if (stringSupportsROM
) {
1334 const void *realBytes
= (uint8_t *) bytes
+ (hasLengthByte
? 1 : 0);
1335 CFIndex realNumBytes
= numBytes
- !! hasLengthByte
;
1336 __CFRecordStringAllocationEvent(recordedEncoding
, realBytes
, realNumBytes
);
1340 CFStringRef romResult
= NULL
;
1344 if (stringSupportsROM
) {
1345 // Disable the string ROM if necessary
1346 static char sDisableStringROM
= -1;
1347 if (sDisableStringROM
== -1) sDisableStringROM
= !! getenv("CFStringDisableROM");
1349 if (sDisableStringROM
== 0) romResult
= _CFSearchStringROM(bytes
+ !! hasLengthByte
, numBytes
- !! hasLengthByte
);
1351 /* if we get a result from our ROM, and noCopy is set, then deallocate the buffer immediately */
1353 if (noCopy
&& (contentsDeallocator
!= kCFAllocatorNull
)) {
1354 CFAllocatorDeallocate(contentsDeallocator
, (void *)bytes
);
1357 /* these don't get used again, but clear them for consistency */
1361 /* set our result to the ROM result which is not really mutable, of course, but that's OK because we don't try to modify it. */
1362 str
= (CFMutableStringRef
)romResult
;
1367 // Now determine the necessary size
1371 size
= sizeof(void *); // Pointer to the buffer
1372 if (contentsDeallocator
!= alloc
&& contentsDeallocator
!= kCFAllocatorNull
) {
1373 size
+= sizeof(void *); // The contentsDeallocator
1375 if (!hasLengthByte
) size
+= sizeof(CFIndex
); // Explicit length
1376 useLengthByte
= hasLengthByte
;
1377 useNullByte
= hasNullByte
;
1379 } else { // Inline data; reserve space for it
1381 useInlineData
= true;
1384 if (hasLengthByte
|| (encoding
!= kCFStringEncodingUnicode
&& __CFCanUseLengthByte(numBytes
))) {
1385 useLengthByte
= true;
1386 if (!hasLengthByte
) size
+= 1;
1388 size
+= sizeof(CFIndex
); // Explicit length
1390 if (hasNullByte
|| encoding
!= kCFStringEncodingUnicode
) {
1396 #ifdef STRING_SIZE_STATS
1397 // Dump alloced CFString size info every so often
1399 static unsigned sizes
[256] = {0};
1400 int allocedSize
= size
+ sizeof(CFRuntimeBase
);
1401 if (allocedSize
< 255) sizes
[allocedSize
]++; else sizes
[255]++;
1402 if ((++cnt
% 1000) == 0) {
1403 printf ("\nTotal: %d\n", cnt
);
1404 int i
; for (i
= 0; i
< 256; i
++) printf("%03d: %5d%s", i
, sizes
[i
], ((i
% 8) == 7) ? "\n" : " ");
1408 // Finally, allocate!
1410 str
= (CFMutableStringRef
)_CFRuntimeCreateInstance(alloc
, __kCFStringTypeID
, size
, NULL
);
1412 if (__CFOASafe
) __CFSetLastAllocationEventName(str
, "CFString (immutable)");
1414 __CFStrSetInfoBits(str
,
1415 (useInlineData
? __kCFHasInlineContents
: (contentsDeallocator
== alloc
? __kCFNotInlineContentsDefaultFree
: (contentsDeallocator
== kCFAllocatorNull
? __kCFNotInlineContentsNoFree
: __kCFNotInlineContentsCustomFree
))) |
1416 ((encoding
== kCFStringEncodingUnicode
) ? __kCFIsUnicode
: 0) |
1417 (useNullByte
? __kCFHasNullByte
: 0) |
1418 (useLengthByte
? __kCFHasLengthByte
: 0));
1420 if (!useLengthByte
) {
1421 CFIndex length
= numBytes
- (hasLengthByte
? 1 : 0);
1422 if (encoding
== kCFStringEncodingUnicode
) length
/= sizeof(UniChar
);
1423 __CFStrSetExplicitLength(str
, length
);
1426 if (useInlineData
) {
1427 uint8_t *contents
= (uint8_t *)__CFStrContents(str
);
1428 if (useLengthByte
&& !hasLengthByte
) *contents
++ = (uint8_t)numBytes
;
1429 memmove(contents
, bytes
, numBytes
);
1430 if (useNullByte
) contents
[numBytes
] = 0;
1432 __CFStrSetContentPtr(str
, bytes
);
1433 if (contentsDeallocator
!= alloc
&& contentsDeallocator
!= kCFAllocatorNull
) __CFStrSetContentsDeallocator(str
, (CFAllocatorRef
)CFRetain(contentsDeallocator
));
1436 if (noCopy
&& (contentsDeallocator
!= kCFAllocatorNull
)) {
1437 CFAllocatorDeallocate(contentsDeallocator
, (void *)bytes
);
1441 if (vBuf
.shouldFreeChars
) CFAllocatorDeallocate(vBuf
.allocator
, (void *)bytes
);
1446 /* !!! __CFStringCreateImmutableFunnel2() is kept around for compatibility; it should be deprecated
1448 CFStringRef
__CFStringCreateImmutableFunnel2(
1449 CFAllocatorRef alloc
, const void *bytes
, CFIndex numBytes
, CFStringEncoding encoding
,
1450 Boolean possiblyExternalFormat
, Boolean tryToReduceUnicode
, Boolean hasLengthByte
, Boolean hasNullByte
, Boolean noCopy
,
1451 CFAllocatorRef contentsDeallocator
) {
1452 return __CFStringCreateImmutableFunnel3(alloc
, bytes
, numBytes
, encoding
, possiblyExternalFormat
, tryToReduceUnicode
, hasLengthByte
, hasNullByte
, noCopy
, contentsDeallocator
, 0);
1457 CFStringRef
CFStringCreateWithPascalString(CFAllocatorRef alloc
, ConstStringPtr pStr
, CFStringEncoding encoding
) {
1458 CFIndex len
= (CFIndex
)(*(uint8_t *)pStr
);
1459 return __CFStringCreateImmutableFunnel3(alloc
, pStr
, len
+1, encoding
, false, false, true, false, false, ALLOCATORSFREEFUNC
, 0);
1463 CFStringRef
CFStringCreateWithCString(CFAllocatorRef alloc
, const char *cStr
, CFStringEncoding encoding
) {
1464 CFIndex len
= strlen(cStr
);
1465 return __CFStringCreateImmutableFunnel3(alloc
, cStr
, len
, encoding
, false, false, false, true, false, ALLOCATORSFREEFUNC
, 0);
1468 CFStringRef
CFStringCreateWithPascalStringNoCopy(CFAllocatorRef alloc
, ConstStringPtr pStr
, CFStringEncoding encoding
, CFAllocatorRef contentsDeallocator
) {
1469 CFIndex len
= (CFIndex
)(*(uint8_t *)pStr
);
1470 return __CFStringCreateImmutableFunnel3(alloc
, pStr
, len
+1, encoding
, false, false, true, false, true, contentsDeallocator
, 0);
1474 CFStringRef
CFStringCreateWithCStringNoCopy(CFAllocatorRef alloc
, const char *cStr
, CFStringEncoding encoding
, CFAllocatorRef contentsDeallocator
) {
1475 CFIndex len
= strlen(cStr
);
1476 return __CFStringCreateImmutableFunnel3(alloc
, cStr
, len
, encoding
, false, false, false, true, true, contentsDeallocator
, 0);
1480 CFStringRef
CFStringCreateWithCharacters(CFAllocatorRef alloc
, const UniChar
*chars
, CFIndex numChars
) {
1481 return __CFStringCreateImmutableFunnel3(alloc
, chars
, numChars
* sizeof(UniChar
), kCFStringEncodingUnicode
, false, true, false, false, false, ALLOCATORSFREEFUNC
, 0);
1485 CFStringRef
CFStringCreateWithCharactersNoCopy(CFAllocatorRef alloc
, const UniChar
*chars
, CFIndex numChars
, CFAllocatorRef contentsDeallocator
) {
1486 return __CFStringCreateImmutableFunnel3(alloc
, chars
, numChars
* sizeof(UniChar
), kCFStringEncodingUnicode
, false, false, false, false, true, contentsDeallocator
, 0);
1490 CFStringRef
CFStringCreateWithBytes(CFAllocatorRef alloc
, const uint8_t *bytes
, CFIndex numBytes
, CFStringEncoding encoding
, Boolean externalFormat
) {
1491 return __CFStringCreateImmutableFunnel3(alloc
, bytes
, numBytes
, encoding
, externalFormat
, true, false, false, false, ALLOCATORSFREEFUNC
, 0);
1494 CFStringRef
_CFStringCreateWithBytesNoCopy(CFAllocatorRef alloc
, const uint8_t *bytes
, CFIndex numBytes
, CFStringEncoding encoding
, Boolean externalFormat
, CFAllocatorRef contentsDeallocator
) {
1495 return __CFStringCreateImmutableFunnel3(alloc
, bytes
, numBytes
, encoding
, externalFormat
, true, false, false, true, contentsDeallocator
, 0);
1498 CFStringRef
CFStringCreateWithBytesNoCopy(CFAllocatorRef alloc
, const uint8_t *bytes
, CFIndex numBytes
, CFStringEncoding encoding
, Boolean externalFormat
, CFAllocatorRef contentsDeallocator
) {
1499 return __CFStringCreateImmutableFunnel3(alloc
, bytes
, numBytes
, encoding
, externalFormat
, true, false, false, true, contentsDeallocator
, 0);
1502 CFStringRef
CFStringCreateWithFormatAndArguments(CFAllocatorRef alloc
, CFDictionaryRef formatOptions
, CFStringRef format
, va_list arguments
) {
1503 return _CFStringCreateWithFormatAndArgumentsAux(alloc
, NULL
, formatOptions
, format
, arguments
);
1506 CFStringRef
_CFStringCreateWithFormatAndArgumentsAux(CFAllocatorRef alloc
, CFStringRef (*copyDescFunc
)(void *, const void *), CFDictionaryRef formatOptions
, CFStringRef format
, va_list arguments
) {
1508 CFMutableStringRef outputString
= CFStringCreateMutable(__CFGetDefaultAllocator(), 0); //should use alloc if no copy/release
1509 __CFStrSetDesiredCapacity(outputString
, 120); // Given this will be tightened later, choosing a larger working string is fine
1510 _CFStringAppendFormatAndArgumentsAux(outputString
, copyDescFunc
, formatOptions
, format
, arguments
);
1511 // ??? copy/release should not be necessary here -- just make immutable, compress if possible
1512 // (However, this does make the string inline, and cause the supplied allocator to be used...)
1513 str
= (CFStringRef
)CFStringCreateCopy(alloc
, outputString
);
1514 CFRelease(outputString
);
1518 CFStringRef
CFStringCreateWithFormat(CFAllocatorRef alloc
, CFDictionaryRef formatOptions
, CFStringRef format
, ...) {
1522 va_start(argList
, format
);
1523 result
= CFStringCreateWithFormatAndArguments(alloc
, formatOptions
, format
, argList
);
1529 CFStringRef
CFStringCreateWithSubstring(CFAllocatorRef alloc
, CFStringRef str
, CFRange range
) {
1530 // CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, CFStringRef , str, "_createSubstringWithRange:", CFRangeMake(range.location, range.length));
1532 __CFAssertIsString(str
);
1533 __CFAssertRangeIsInStringBounds(str
, range
.location
, range
.length
);
1535 if ((range
.location
== 0) && (range
.length
== __CFStrLength(str
))) { /* The substring is the whole string... */
1536 return (CFStringRef
)CFStringCreateCopy(alloc
, str
);
1537 } else if (__CFStrIsEightBit(str
)) {
1538 const uint8_t *contents
= (const uint8_t *)__CFStrContents(str
);
1539 return __CFStringCreateImmutableFunnel3(alloc
, contents
+ range
.location
+ __CFStrSkipAnyLengthByte(str
), range
.length
, __CFStringGetEightBitStringEncoding(), false, false, false, false, false, ALLOCATORSFREEFUNC
, 0);
1541 const UniChar
*contents
= (UniChar
*)__CFStrContents(str
);
1542 return __CFStringCreateImmutableFunnel3(alloc
, contents
+ range
.location
, range
.length
* sizeof(UniChar
), kCFStringEncodingUnicode
, false, true, false, false, false, ALLOCATORSFREEFUNC
, 0);
1546 CFStringRef
CFStringCreateCopy(CFAllocatorRef alloc
, CFStringRef str
) {
1547 // CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, CFStringRef, str, "copy");
1549 __CFAssertIsString(str
);
1550 if (!__CFStrIsMutable((CFStringRef
)str
) && // If the string is not mutable
1551 ((alloc
? alloc
: __CFGetDefaultAllocator()) == __CFGetAllocator(str
)) && // and it has the same allocator as the one we're using
1552 (__CFStrIsInline((CFStringRef
)str
) || __CFStrFreeContentsWhenDone((CFStringRef
)str
) || __CFStrIsConstant((CFStringRef
)str
))) { // and the characters are inline, or are owned by the string, or the string is constant
1553 CFRetain(str
); // Then just retain instead of making a true copy
1556 if (__CFStrIsEightBit((CFStringRef
)str
)) {
1557 const uint8_t *contents
= (const uint8_t *)__CFStrContents((CFStringRef
)str
);
1558 return __CFStringCreateImmutableFunnel3(alloc
, contents
+ __CFStrSkipAnyLengthByte((CFStringRef
)str
), __CFStrLength2((CFStringRef
)str
, contents
), __CFStringGetEightBitStringEncoding(), false, false, false, false, false, ALLOCATORSFREEFUNC
, 0);
1560 const UniChar
*contents
= (const UniChar
*)__CFStrContents((CFStringRef
)str
);
1561 return __CFStringCreateImmutableFunnel3(alloc
, contents
, __CFStrLength2((CFStringRef
)str
, contents
) * sizeof(UniChar
), kCFStringEncodingUnicode
, false, true, false, false, false, ALLOCATORSFREEFUNC
, 0);
1567 /*** Constant string stuff... ***/
1569 /* Table which holds constant strings created with CFSTR, when -fconstant-cfstrings option is not used. These dynamically created constant strings are stored in constantStringTable. The keys are the 8-bit constant C-strings from the compiler; the values are the CFStrings created for them. _CFSTRLock protects this table.
1571 static CFMutableDictionaryRef constantStringTable
= NULL
;
1572 static CFSpinLock_t _CFSTRLock
= CFSpinLockInit
;
1574 static CFStringRef
__cStrCopyDescription(const void *ptr
) {
1575 return CFStringCreateWithCStringNoCopy(kCFAllocatorSystemDefault
, (const char *)ptr
, __CFStringGetEightBitStringEncoding(), kCFAllocatorNull
);
1578 static Boolean
__cStrEqual(const void *ptr1
, const void *ptr2
) {
1579 return (strcmp((const char *)ptr1
, (const char *)ptr2
) == 0);
1582 static CFHashCode
__cStrHash(const void *ptr
) {
1583 // It doesn't quite matter if we convert to Unicode correctly, as long as we do it consistently
1584 const char *cStr
= (const char *)ptr
;
1585 CFIndex len
= strlen(cStr
);
1586 CFHashCode result
= 0;
1587 if (len
<= 4) { // All chars
1589 while (cnt
--) result
+= (result
<< 8) + *cStr
++;
1590 } else { // First and last 2 chars
1591 result
+= (result
<< 8) + cStr
[0];
1592 result
+= (result
<< 8) + cStr
[1];
1593 result
+= (result
<< 8) + cStr
[len
-2];
1594 result
+= (result
<< 8) + cStr
[len
-1];
1596 result
+= (result
<< (len
& 31));
1601 CFStringRef
__CFStringMakeConstantString(const char *cStr
) {
1604 // StringTest checks that we share kCFEmptyString, which is defeated by constantStringAllocatorForDebugging
1605 if ('\0' == *cStr
) return kCFEmptyString
;
1607 if (constantStringTable
== NULL
) {
1608 CFDictionaryKeyCallBacks constantStringCallBacks
= {0, NULL
, NULL
, __cStrCopyDescription
, __cStrEqual
, __cStrHash
};
1609 CFDictionaryValueCallBacks constantStringValueCallBacks
= kCFTypeDictionaryValueCallBacks
;
1610 constantStringValueCallBacks
.equal
= NULL
; // So that we only find strings that are ==
1611 CFMutableDictionaryRef table
= CFDictionaryCreateMutable(kCFAllocatorSystemDefault
, 0, &constantStringCallBacks
, &constantStringValueCallBacks
);
1612 _CFDictionarySetCapacity(table
, 2500); // avoid lots of rehashing
1613 __CFSpinLock(&_CFSTRLock
);
1614 if (constantStringTable
== NULL
) constantStringTable
= table
;
1615 __CFSpinUnlock(&_CFSTRLock
);
1616 if (constantStringTable
!= table
) CFRelease(table
);
1619 __CFSpinLock(&_CFSTRLock
);
1620 if ((result
= (CFStringRef
)CFDictionaryGetValue(constantStringTable
, cStr
))) {
1621 __CFSpinUnlock(&_CFSTRLock
);
1623 __CFSpinUnlock(&_CFSTRLock
);
1627 Boolean isASCII
= true;
1628 // Given this code path is rarer these days, OK to do this extra work to verify the strings
1629 const char *tmp
= cStr
;
1631 if (*(tmp
++) & 0x80) {
1637 CFMutableStringRef ms
= CFStringCreateMutable(kCFAllocatorSystemDefault
, 0);
1640 CFStringAppendFormat(ms
, NULL
, (*tmp
& 0x80) ? CFSTR("\\%3o") : CFSTR("%1c"), *tmp
);
1643 CFLog(kCFLogLevelWarning
, CFSTR("WARNING: CFSTR(\"%@\") has non-7 bit chars, interpreting using MacOS Roman encoding for now, but this will change. Please eliminate usages of non-7 bit chars (including escaped characters above \\177 octal) in CFSTR()."), ms
);
1646 // Treat non-7 bit chars in CFSTR() as MacOSRoman, for compatibility
1647 result
= CFStringCreateWithCString(kCFAllocatorSystemDefault
, cStr
, kCFStringEncodingMacRoman
);
1648 if (result
== NULL
) {
1649 CFLog(__kCFLogAssertion
, CFSTR("Can't interpret CFSTR() as MacOS Roman, crashing"));
1652 if (__CFOASafe
) __CFSetLastAllocationEventName((void *)result
, "CFString (CFSTR)");
1653 if (__CFStrIsEightBit(result
)) {
1654 key
= (char *)__CFStrContents(result
) + __CFStrSkipAnyLengthByte(result
);
1655 } else { // For some reason the string is not 8-bit!
1656 key
= (char *)CFAllocatorAllocate(kCFAllocatorSystemDefault
, strlen(cStr
) + 1, 0);
1657 if (__CFOASafe
) __CFSetLastAllocationEventName((void *)key
, "CFString (CFSTR key)");
1658 strlcpy(key
, cStr
, strlen(cStr
) + 1); // !!! We will leak this, if the string is removed from the table (or table is freed)
1662 CFStringRef resultToBeReleased
= result
;
1664 __CFSpinLock(&_CFSTRLock
);
1665 count
= CFDictionaryGetCount(constantStringTable
);
1666 CFDictionaryAddValue(constantStringTable
, key
, result
);
1667 if (CFDictionaryGetCount(constantStringTable
) == count
) { // add did nothing, someone already put it there
1668 result
= (CFStringRef
)CFDictionaryGetValue(constantStringTable
, key
);
1671 ((struct __CFString
*)result
)->base
._rc
= 0;
1673 ((struct __CFString
*)result
)->base
._cfinfo
[CF_RC_BITS
] = 0;
1676 __CFSpinUnlock(&_CFSTRLock
);
1677 // This either eliminates the extra retain on the freshly created string, or frees it, if it was actually not inserted into the table
1678 CFRelease(resultToBeReleased
);
1686 static Boolean
__CFStrIsConstantString(CFStringRef str
) {
1687 Boolean found
= false;
1688 if (constantStringTable
) {
1689 __CFSpinLock(&_CFSTRLock
);
1690 found
= CFDictionaryContainsValue(constantStringTable
, str
);
1691 __CFSpinUnlock(&_CFSTRLock
);
1699 void __CFStringCleanup (void) {
1700 /* in case library is unloaded, release store for the constant string table */
1701 if (constantStringTable
!= NULL
) {
1703 __CFConstantStringTableBeingFreed
= true;
1704 CFRelease(constantStringTable
);
1705 __CFConstantStringTableBeingFreed
= false;
1707 CFRelease(constantStringTable
);
1714 // Can pass in NSString as replacement string
1715 // Call with numRanges > 0, and incrementing ranges
1717 static void __CFStringReplaceMultiple(CFMutableStringRef str
, CFRange
*ranges
, CFIndex numRanges
, CFStringRef replacement
) {
1719 CFStringRef copy
= NULL
;
1720 if (replacement
== str
) copy
= replacement
= CFStringCreateCopy(kCFAllocatorSystemDefault
, replacement
); // Very special and hopefully rare case
1721 CFIndex replacementLength
= CFStringGetLength(replacement
);
1723 __CFStringChangeSizeMultiple(str
, ranges
, numRanges
, replacementLength
, (replacementLength
> 0) && CFStrIsUnicode(replacement
));
1725 if (__CFStrIsUnicode(str
)) {
1726 UniChar
*contents
= (UniChar
*)__CFStrContents(str
);
1727 UniChar
*firstReplacement
= contents
+ ranges
[0].location
;
1728 // Extract the replacementString into the first location, then copy from there
1729 CFStringGetCharacters(replacement
, CFRangeMake(0, replacementLength
), firstReplacement
);
1730 for (cnt
= 1; cnt
< numRanges
; cnt
++) {
1731 // The ranges are in terms of the original string; so offset by the change in length due to insertion
1732 contents
+= replacementLength
- ranges
[cnt
- 1].length
;
1733 memmove(contents
+ ranges
[cnt
].location
, firstReplacement
, replacementLength
* sizeof(UniChar
));
1736 uint8_t *contents
= (uint8_t *)__CFStrContents(str
);
1737 uint8_t *firstReplacement
= contents
+ ranges
[0].location
+ __CFStrSkipAnyLengthByte(str
);
1738 // Extract the replacementString into the first location, then copy from there
1739 CFStringGetBytes(replacement
, CFRangeMake(0, replacementLength
), __CFStringGetEightBitStringEncoding(), 0, false, firstReplacement
, replacementLength
, NULL
);
1740 contents
+= __CFStrSkipAnyLengthByte(str
); // Now contents will simply track the location to insert next string into
1741 for (cnt
= 1; cnt
< numRanges
; cnt
++) {
1742 // The ranges are in terms of the original string; so offset by the change in length due to insertion
1743 contents
+= replacementLength
- ranges
[cnt
- 1].length
;
1744 memmove(contents
+ ranges
[cnt
].location
, firstReplacement
, replacementLength
);
1747 if (copy
) CFRelease(copy
);
1750 // Can pass in NSString as replacement string
1752 CF_INLINE
void __CFStringReplace(CFMutableStringRef str
, CFRange range
, CFStringRef replacement
) {
1753 CFStringRef copy
= NULL
;
1754 if (replacement
== str
) copy
= replacement
= (CFStringRef
)CFStringCreateCopy(kCFAllocatorSystemDefault
, replacement
); // Very special and hopefully rare case
1755 CFIndex replacementLength
= CFStringGetLength(replacement
);
1757 __CFStringChangeSize(str
, range
, replacementLength
, (replacementLength
> 0) && CFStrIsUnicode(replacement
));
1759 if (__CFStrIsUnicode(str
)) {
1760 UniChar
*contents
= (UniChar
*)__CFStrContents(str
);
1761 CFStringGetCharacters(replacement
, CFRangeMake(0, replacementLength
), contents
+ range
.location
);
1763 uint8_t *contents
= (uint8_t *)__CFStrContents(str
);
1764 CFStringGetBytes(replacement
, CFRangeMake(0, replacementLength
), __CFStringGetEightBitStringEncoding(), 0, false, contents
+ range
.location
+ __CFStrSkipAnyLengthByte(str
), replacementLength
, NULL
);
1767 if (copy
) CFRelease(copy
);
1770 /* If client does not provide a minimum capacity
1772 #define DEFAULTMINCAPACITY 32
1774 CF_INLINE CFMutableStringRef
__CFStringCreateMutableFunnel(CFAllocatorRef alloc
, CFIndex maxLength
, UInt32 additionalInfoBits
) {
1775 CFMutableStringRef str
;
1776 Boolean hasExternalContentsAllocator
= (additionalInfoBits
& __kCFHasContentsAllocator
) ? true : false;
1778 if (alloc
== NULL
) alloc
= __CFGetDefaultAllocator();
1780 // Note that if there is an externalContentsAllocator, then we also have the storage for the string allocator...
1781 str
= (CFMutableStringRef
)_CFRuntimeCreateInstance(alloc
, __kCFStringTypeID
, sizeof(struct __notInlineMutable
) - (hasExternalContentsAllocator
? 0 : sizeof(CFAllocatorRef
)), NULL
);
1783 if (__CFOASafe
) __CFSetLastAllocationEventName(str
, "CFString (mutable)");
1785 __CFStrSetInfoBits(str
, __kCFIsMutable
| additionalInfoBits
);
1786 str
->variants
.notInlineMutable
.buffer
= NULL
;
1787 __CFStrSetExplicitLength(str
, 0);
1788 str
->variants
.notInlineMutable
.hasGap
= str
->variants
.notInlineMutable
.isFixedCapacity
= str
->variants
.notInlineMutable
.isExternalMutable
= str
->variants
.notInlineMutable
.capacityProvidedExternally
= 0;
1789 if (maxLength
!= 0) __CFStrSetIsFixed(str
);
1790 __CFStrSetDesiredCapacity(str
, (maxLength
== 0) ? DEFAULTMINCAPACITY
: maxLength
);
1791 __CFStrSetCapacity(str
, 0);
1796 CFMutableStringRef
CFStringCreateMutableWithExternalCharactersNoCopy(CFAllocatorRef alloc
, UniChar
*chars
, CFIndex numChars
, CFIndex capacity
, CFAllocatorRef externalCharactersAllocator
) {
1797 CFOptionFlags contentsAllocationBits
= externalCharactersAllocator
? ((externalCharactersAllocator
== kCFAllocatorNull
) ? __kCFNotInlineContentsNoFree
: __kCFHasContentsAllocator
) : __kCFNotInlineContentsDefaultFree
;
1798 CFMutableStringRef string
= __CFStringCreateMutableFunnel(alloc
, 0, contentsAllocationBits
| __kCFIsUnicode
);
1800 __CFStrSetIsExternalMutable(string
);
1801 if (contentsAllocationBits
== __kCFHasContentsAllocator
) __CFStrSetContentsAllocator(string
, (CFAllocatorRef
)CFRetain(externalCharactersAllocator
));
1802 CFStringSetExternalCharactersNoCopy(string
, chars
, numChars
, capacity
);
1807 CFMutableStringRef
CFStringCreateMutable(CFAllocatorRef alloc
, CFIndex maxLength
) {
1808 return __CFStringCreateMutableFunnel(alloc
, maxLength
, __kCFNotInlineContentsDefaultFree
);
1811 CFMutableStringRef
CFStringCreateMutableCopy(CFAllocatorRef alloc
, CFIndex maxLength
, CFStringRef string
) {
1812 CFMutableStringRef newString
;
1814 // CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, CFMutableStringRef, string, "mutableCopy");
1816 __CFAssertIsString(string
);
1818 newString
= CFStringCreateMutable(alloc
, maxLength
);
1819 __CFStringReplace(newString
, CFRangeMake(0, 0), string
);
1825 __private_extern__
void _CFStrSetDesiredCapacity(CFMutableStringRef str
, CFIndex len
) {
1826 __CFAssertIsStringAndMutable(str
);
1827 __CFStrSetDesiredCapacity(str
, len
);
1831 /* This one is for CF
1833 CFIndex
CFStringGetLength(CFStringRef str
) {
1834 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID
, CFIndex
, str
, "length");
1836 __CFAssertIsString(str
);
1837 return __CFStrLength(str
);
1840 /* This one is for NSCFString; it does not ObjC dispatch or assertion check
1842 CFIndex
_CFStringGetLength2(CFStringRef str
) {
1843 return __CFStrLength(str
);
1847 /* Guts of CFStringGetCharacterAtIndex(); called from the two functions below. Don't call it from elsewhere.
1849 CF_INLINE UniChar
__CFStringGetCharacterAtIndexGuts(CFStringRef str
, CFIndex idx
, const uint8_t *contents
) {
1850 if (__CFStrIsEightBit(str
)) {
1851 contents
+= __CFStrSkipAnyLengthByte(str
);
1853 if (!__CFCharToUniCharFunc
&& (contents
[idx
] >= 128)) {
1854 // Can't do log here, as it might be too early
1855 fprintf(stderr
, "Warning: CFStringGetCharacterAtIndex() attempted on CFString containing high bytes before properly initialized to do so\n");
1858 return __CFCharToUniCharTable
[contents
[idx
]];
1861 return ((UniChar
*)contents
)[idx
];
1864 /* This one is for the CF API
1866 UniChar
CFStringGetCharacterAtIndex(CFStringRef str
, CFIndex idx
) {
1867 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID
, UniChar
, str
, "characterAtIndex:", idx
);
1869 __CFAssertIsString(str
);
1870 __CFAssertIndexIsInStringBounds(str
, idx
);
1871 return __CFStringGetCharacterAtIndexGuts(str
, idx
, (const uint8_t *)__CFStrContents(str
));
1874 /* This one is for NSCFString usage; it doesn't do ObjC dispatch; but it does do range check
1876 int _CFStringCheckAndGetCharacterAtIndex(CFStringRef str
, CFIndex idx
, UniChar
*ch
) {
1877 const uint8_t *contents
= (const uint8_t *)__CFStrContents(str
);
1878 if (idx
>= __CFStrLength2(str
, contents
) && __CFStringNoteErrors()) return _CFStringErrBounds
;
1879 *ch
= __CFStringGetCharacterAtIndexGuts(str
, idx
, contents
);
1880 return _CFStringErrNone
;
1884 /* Guts of CFStringGetCharacters(); called from the two functions below. Don't call it from elsewhere.
1886 CF_INLINE
void __CFStringGetCharactersGuts(CFStringRef str
, CFRange range
, UniChar
*buffer
, const uint8_t *contents
) {
1887 if (__CFStrIsEightBit(str
)) {
1888 __CFStrConvertBytesToUnicode(((uint8_t *)contents
) + (range
.location
+ __CFStrSkipAnyLengthByte(str
)), buffer
, range
.length
);
1890 const UniChar
*uContents
= ((UniChar
*)contents
) + range
.location
;
1891 memmove(buffer
, uContents
, range
.length
* sizeof(UniChar
));
1895 /* This one is for the CF API
1897 void CFStringGetCharacters(CFStringRef str
, CFRange range
, UniChar
*buffer
) {
1898 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID
, void, str
, "getCharacters:range:", buffer
, CFRangeMake(range
.location
, range
.length
));
1900 __CFAssertIsString(str
);
1901 __CFAssertRangeIsInStringBounds(str
, range
.location
, range
.length
);
1902 __CFStringGetCharactersGuts(str
, range
, buffer
, (const uint8_t *)__CFStrContents(str
));
1905 /* This one is for NSCFString usage; it doesn't do ObjC dispatch; but it does do range check
1907 int _CFStringCheckAndGetCharacters(CFStringRef str
, CFRange range
, UniChar
*buffer
) {
1908 const uint8_t *contents
= (const uint8_t *)__CFStrContents(str
);
1909 if (range
.location
+ range
.length
> __CFStrLength2(str
, contents
) && __CFStringNoteErrors()) return _CFStringErrBounds
;
1910 __CFStringGetCharactersGuts(str
, range
, buffer
, contents
);
1911 return _CFStringErrNone
;
1915 CFIndex
CFStringGetBytes(CFStringRef str
, CFRange range
, CFStringEncoding encoding
, uint8_t lossByte
, Boolean isExternalRepresentation
, uint8_t *buffer
, CFIndex maxBufLen
, CFIndex
*usedBufLen
) {
1917 /* No objc dispatch needed here since __CFStringEncodeByteStream works with both CFString and NSString */
1918 __CFAssertIsNotNegative(maxBufLen
);
1920 if (!CF_IS_OBJC(__kCFStringTypeID
, str
)) { // If we can grope the ivars, let's do it...
1921 __CFAssertIsString(str
);
1922 __CFAssertRangeIsInStringBounds(str
, range
.location
, range
.length
);
1924 if (__CFStrIsEightBit(str
) && ((__CFStringGetEightBitStringEncoding() == encoding
) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII
&& __CFStringEncodingIsSupersetOfASCII(encoding
)))) { // Requested encoding is equal to the encoding in string
1925 const unsigned char *contents
= (const unsigned char *)__CFStrContents(str
);
1926 CFIndex cLength
= range
.length
;
1929 if (cLength
> maxBufLen
) cLength
= maxBufLen
;
1930 memmove(buffer
, contents
+ __CFStrSkipAnyLengthByte(str
) + range
.location
, cLength
);
1932 if (usedBufLen
) *usedBufLen
= cLength
;
1938 return __CFStringEncodeByteStream(str
, range
.location
, range
.length
, isExternalRepresentation
, encoding
, lossByte
, buffer
, maxBufLen
, usedBufLen
);
1942 ConstStringPtr
CFStringGetPascalStringPtr (CFStringRef str
, CFStringEncoding encoding
) {
1944 if (!CF_IS_OBJC(__kCFStringTypeID
, str
)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
1945 __CFAssertIsString(str
);
1946 if (__CFStrHasLengthByte(str
) && __CFStrIsEightBit(str
) && ((__CFStringGetEightBitStringEncoding() == encoding
) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII
&& __CFStringEncodingIsSupersetOfASCII(encoding
)))) { // Requested encoding is equal to the encoding in string || the contents is in ASCII
1947 const uint8_t *contents
= (const uint8_t *)__CFStrContents(str
);
1948 if (__CFStrHasExplicitLength(str
) && (__CFStrLength2(str
, contents
) != (SInt32
)(*contents
))) return NULL
; // Invalid length byte
1949 return (ConstStringPtr
)contents
;
1951 // ??? Also check for encoding = SystemEncoding and perhaps bytes are all ASCII?
1957 const char * CFStringGetCStringPtr(CFStringRef str
, CFStringEncoding encoding
) {
1959 if (encoding
!= __CFStringGetEightBitStringEncoding() && (kCFStringEncodingASCII
!= __CFStringGetEightBitStringEncoding() || !__CFStringEncodingIsSupersetOfASCII(encoding
))) return NULL
;
1960 // ??? Also check for encoding = SystemEncoding and perhaps bytes are all ASCII?
1962 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID
, const char *, str
, "_fastCStringContents:", true);
1964 __CFAssertIsString(str
);
1966 if (__CFStrHasNullByte(str
)) {
1967 // Note: this is called a lot, 27000 times to open a small xcode project with one file open.
1968 // Of these uses about 1500 are for cStrings/utf8strings.
1969 return (const char *)__CFStrContents(str
) + __CFStrSkipAnyLengthByte(str
);
1976 const UniChar
*CFStringGetCharactersPtr(CFStringRef str
) {
1978 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID
, const UniChar
*, str
, "_fastCharacterContents");
1980 __CFAssertIsString(str
);
1981 if (__CFStrIsUnicode(str
)) return (const UniChar
*)__CFStrContents(str
);
1986 Boolean
CFStringGetPascalString(CFStringRef str
, Str255 buffer
, CFIndex bufferSize
, CFStringEncoding encoding
) {
1990 __CFAssertIsNotNegative(bufferSize
);
1991 if (bufferSize
< 1) return false;
1993 if (CF_IS_OBJC(__kCFStringTypeID
, str
)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
1994 length
= CFStringGetLength(str
);
1995 if (!__CFCanUseLengthByte(length
)) return false; // Can't fit into pstring
1997 const uint8_t *contents
;
1999 __CFAssertIsString(str
);
2001 contents
= (const uint8_t *)__CFStrContents(str
);
2002 length
= __CFStrLength2(str
, contents
);
2004 if (!__CFCanUseLengthByte(length
)) return false; // Can't fit into pstring
2006 if (__CFStrIsEightBit(str
) && ((__CFStringGetEightBitStringEncoding() == encoding
) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII
&& __CFStringEncodingIsSupersetOfASCII(encoding
)))) { // Requested encoding is equal to the encoding in string
2007 if (length
>= bufferSize
) return false;
2008 memmove((void*)(1 + (const char*)buffer
), (__CFStrSkipAnyLengthByte(str
) + contents
), length
);
2009 *buffer
= (unsigned char)length
;
2014 if (__CFStringEncodeByteStream(str
, 0, length
, false, encoding
, false, (UInt8
*)(1 + (uint8_t *)buffer
), bufferSize
- 1, &usedLen
) != length
) {
2017 if (bufferSize
> 0) {
2018 strlcpy((char *)buffer
+ 1, CONVERSIONFAILURESTR
, bufferSize
- 1);
2019 buffer
[0] = (unsigned char)((CFIndex
)sizeof(CONVERSIONFAILURESTR
) < (bufferSize
- 1) ? (CFIndex
)sizeof(CONVERSIONFAILURESTR
) : (bufferSize
- 1));
2022 if (bufferSize
> 0) buffer
[0] = 0;
2026 *buffer
= (unsigned char)usedLen
;
2030 Boolean
CFStringGetCString(CFStringRef str
, char *buffer
, CFIndex bufferSize
, CFStringEncoding encoding
) {
2031 const uint8_t *contents
;
2034 __CFAssertIsNotNegative(bufferSize
);
2035 if (bufferSize
< 1) return false;
2037 CF_OBJC_FUNCDISPATCH3(__kCFStringTypeID
, Boolean
, str
, "_getCString:maxLength:encoding:", buffer
, bufferSize
- 1, encoding
);
2039 __CFAssertIsString(str
);
2041 contents
= (const uint8_t *)__CFStrContents(str
);
2042 len
= __CFStrLength2(str
, contents
);
2044 if (__CFStrIsEightBit(str
) && ((__CFStringGetEightBitStringEncoding() == encoding
) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII
&& __CFStringEncodingIsSupersetOfASCII(encoding
)))) { // Requested encoding is equal to the encoding in string
2045 if (len
>= bufferSize
) return false;
2046 memmove(buffer
, contents
+ __CFStrSkipAnyLengthByte(str
), len
);
2052 if (__CFStringEncodeByteStream(str
, 0, len
, false, encoding
, false, (unsigned char*) buffer
, bufferSize
- 1, &usedLen
) == len
) {
2053 buffer
[usedLen
] = '\0';
2057 strlcpy(buffer
, CONVERSIONFAILURESTR
, bufferSize
);
2059 if (bufferSize
> 0) buffer
[0] = 0;
2066 static const char *_CFStrGetLanguageIdentifierForLocale(CFLocaleRef locale
) {
2067 CFStringRef collatorID
;
2068 const char *langID
= NULL
;
2069 static const void *lastLocale
= NULL
;
2070 static const char *lastLangID
= NULL
;
2071 static CFSpinLock_t lock
= CFSpinLockInit
;
2073 __CFSpinLock(&lock
);
2074 if ((NULL
!= lastLocale
) && (lastLocale
== locale
)) {
2075 __CFSpinUnlock(&lock
);
2078 __CFSpinUnlock(&lock
);
2080 collatorID
= CFLocaleGetValue(locale
, __kCFLocaleCollatorID
);
2082 // This is somewhat depending on CFLocale implementation always creating CFString for locale identifer ???
2083 if (__CFStrLength(collatorID
) > 1) {
2084 const void *contents
= __CFStrContents(collatorID
);
2088 if (__CFStrIsEightBit(collatorID
)) {
2089 string
= ((const char *)contents
) + __CFStrSkipAnyLengthByte(collatorID
);
2091 const UTF16Char
*characters
= (const UTF16Char
*)contents
;
2093 buffer
[0] = (char)*(characters
++);
2094 buffer
[1] = (char)*characters
;
2098 if (!strncmp(string
, "az", 2)) { // Azerbaijani
2100 } else if (!strncmp(string
, "lt", 2)) { // Lithuanian
2102 } else if (!strncmp(string
, "tr", 2)) { // Turkish
2107 __CFSpinLock(&lock
);
2108 lastLocale
= locale
;
2109 lastLangID
= langID
;
2110 __CFSpinUnlock(&lock
);
2115 static int8_t __CFCheckLocaleCFType
= -1;
2117 CF_INLINE
bool _CFCanUseLocale(CFLocaleRef locale
) {
2119 if (__CFCheckLocaleCFType
< 0) __CFCheckLocaleCFType
= !_CFExecutableLinkedOnOrAfter(CFSystemVersionPanther
);
2120 if (!__CFCheckLocaleCFType
|| (CFGetTypeID(locale
) == CFLocaleGetTypeID())) return true;
2125 #define MAX_CASE_MAPPING_BUF (8)
2126 #define ZERO_WIDTH_JOINER (0x200D)
2127 #define COMBINING_GRAPHEME_JOINER (0x034F)
2129 #define HANGUL_CHOSEONG_START (0x1100)
2130 #define HANGUL_CHOSEONG_END (0x115F)
2131 #define HANGUL_JUNGSEONG_START (0x1160)
2132 #define HANGUL_JUNGSEONG_END (0x11A2)
2133 #define HANGUL_JONGSEONG_START (0x11A8)
2134 #define HANGUL_JONGSEONG_END (0x11F9)
2136 #define HANGUL_SYLLABLE_START (0xAC00)
2137 #define HANGUL_SYLLABLE_END (0xD7AF)
2140 // Returns the length of characters filled into outCharacters. If no change, returns 0. maxBufLen shoule be at least 8
2141 static CFIndex
__CFStringFoldCharacterClusterAtIndex(UTF32Char character
, CFStringInlineBuffer
*buffer
, CFIndex index
, CFOptionFlags flags
, const uint8_t *langCode
, UTF32Char
*outCharacters
, CFIndex maxBufferLength
, CFIndex
*consumedLength
) {
2142 CFIndex filledLength
= 0, currentIndex
= index
;
2144 if (0 != character
) {
2145 UTF16Char lowSurrogate
;
2146 CFIndex planeNo
= (character
>> 16);
2147 bool isTurkikCapitalI
= false;
2148 static const uint8_t *decompBMP
= NULL
;
2149 static const uint8_t *graphemeBMP
= NULL
;
2151 if (NULL
== decompBMP
) {
2152 decompBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet
, 0);
2153 graphemeBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet
, 0);
2158 if ((character
< 0x0080) && ((NULL
== langCode
) || (character
!= 'I'))) { // ASCII
2159 if ((flags
& kCFCompareCaseInsensitive
) && (character
>= 'A') && (character
<= 'Z')) {
2160 character
+= ('a' - 'A');
2161 *outCharacters
= character
;
2165 // do width-insensitive mapping
2166 if ((flags
& kCFCompareWidthInsensitive
) && (character
>= 0xFF00) && (character
<= 0xFFEF)) {
2167 (void)CFUniCharCompatibilityDecompose(&character
, 1, 1);
2168 *outCharacters
= character
;
2173 if ((0 == planeNo
) && CFUniCharIsSurrogateHighCharacter(character
) && CFUniCharIsSurrogateLowCharacter((lowSurrogate
= CFStringGetCharacterFromInlineBuffer(buffer
, currentIndex
)))) {
2174 character
= CFUniCharGetLongCharacterForSurrogatePair(character
, lowSurrogate
);
2176 planeNo
= (character
>> 16);
2180 if (flags
& (kCFCompareDiacriticsInsensitiveCompatibilityMask
|kCFCompareNonliteral
)) {
2181 if (CFUniCharIsMemberOfBitmap(character
, ((0 == planeNo
) ? decompBMP
: CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet
, planeNo
)))) {
2182 UTF32Char original
= character
;
2184 filledLength
= CFUniCharDecomposeCharacter(character
, outCharacters
, maxBufferLength
);
2185 character
= *outCharacters
;
2187 if ((flags
& kCFCompareDiacriticsInsensitiveCompatibilityMask
) && (character
< 0x0510)) {
2188 filledLength
= 1; // reset if Roman, Greek, Cyrillic
2189 } else if (0 == (flags
& kCFCompareNonliteral
)) {
2190 character
= original
;
2197 if (flags
& kCFCompareCaseInsensitive
) {
2198 const uint8_t *nonBaseBitmap
;
2199 bool filterNonBase
= (((flags
& kCFCompareDiacriticsInsensitiveCompatibilityMask
) && (character
< 0x0510)) ? true : false);
2200 static const uint8_t *lowerBMP
= NULL
;
2201 static const uint8_t *caseFoldBMP
= NULL
;
2203 if (NULL
== lowerBMP
) {
2204 lowerBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfLowercaseCharacterSet
, 0);
2205 caseFoldBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfCaseFoldingCharacterSet
, 0);
2208 if ((NULL
!= langCode
) && ('I' == character
) && ((0 == strcmp((const char *)langCode
, "tr")) || (0 == strcmp((const char *)langCode
, "az")))) { // do Turkik special-casing
2209 if (filledLength
> 1) {
2210 if (0x0307 == outCharacters
[1]) {
2211 if (--filledLength
> 1) memmove((outCharacters
+ 1), (outCharacters
+ 2), sizeof(UTF32Char
) * (filledLength
- 1));
2212 character
= *outCharacters
= 'i';
2213 isTurkikCapitalI
= true;
2215 } else if (0x0307 == CFStringGetCharacterFromInlineBuffer(buffer
, currentIndex
)) {
2216 character
= *outCharacters
= 'i';
2219 isTurkikCapitalI
= true;
2222 if (!isTurkikCapitalI
&& (CFUniCharIsMemberOfBitmap(character
, ((0 == planeNo
) ? lowerBMP
: CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfLowercaseCharacterSet
, planeNo
))) || CFUniCharIsMemberOfBitmap(character
, ((0 == planeNo
) ? caseFoldBMP
: CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfCaseFoldingCharacterSet
, planeNo
))))) {
2223 UTF16Char caseFoldBuffer
[MAX_CASE_MAPPING_BUF
];
2224 const UTF16Char
*bufferP
= caseFoldBuffer
, *bufferLimit
;
2225 UTF32Char
*outCharactersP
= outCharacters
;
2226 uint32_t bufferLength
= CFUniCharMapCaseTo(character
, caseFoldBuffer
, MAX_CASE_MAPPING_BUF
, kCFUniCharCaseFold
, 0, langCode
);
2228 bufferLimit
= bufferP
+ bufferLength
;
2230 if (filledLength
> 0) --filledLength
; // decrement filledLength (will add back later)
2232 // make space for casefold characters
2233 if ((filledLength
> 0) && (bufferLength
> 1)) {
2234 CFIndex totalScalerLength
= 0;
2236 while (bufferP
< bufferLimit
) {
2237 if (CFUniCharIsSurrogateHighCharacter(*(bufferP
++)) && (bufferP
< bufferLimit
) && CFUniCharIsSurrogateLowCharacter(*bufferP
)) ++bufferP
;
2238 ++totalScalerLength
;
2240 memmove(outCharacters
+ totalScalerLength
, outCharacters
+ 1, filledLength
* sizeof(UTF32Char
));
2241 bufferP
= caseFoldBuffer
;
2245 while (bufferP
< bufferLimit
) {
2246 character
= *(bufferP
++);
2247 if (CFUniCharIsSurrogateHighCharacter(character
) && (bufferP
< bufferLimit
) && CFUniCharIsSurrogateLowCharacter(*bufferP
)) {
2248 character
= CFUniCharGetLongCharacterForSurrogatePair(character
, *(bufferP
++));
2249 nonBaseBitmap
= CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet
, (character
>> 16));
2251 nonBaseBitmap
= graphemeBMP
;
2254 if (!filterNonBase
|| !CFUniCharIsMemberOfBitmap(character
, nonBaseBitmap
)) {
2255 *(outCharactersP
++) = character
;
2263 // collect following combining marks
2264 if (flags
& (kCFCompareDiacriticsInsensitiveCompatibilityMask
|kCFCompareNonliteral
)) {
2265 const uint8_t *nonBaseBitmap
;
2266 const uint8_t *decompBitmap
;
2267 bool doFill
= (((flags
& kCFCompareDiacriticsInsensitiveCompatibilityMask
) && (character
< 0x0510)) ? false : true);
2269 if (0 == filledLength
) {
2270 *outCharacters
= character
; // filledLength will be updated below on demand
2272 if (doFill
) { // check if really needs to fill
2273 UTF32Char nonBaseCharacter
= CFStringGetCharacterFromInlineBuffer(buffer
, currentIndex
);
2275 if (CFUniCharIsSurrogateHighCharacter(nonBaseCharacter
) && CFUniCharIsSurrogateLowCharacter((lowSurrogate
= CFStringGetCharacterFromInlineBuffer(buffer
, currentIndex
+ 1)))) {
2276 nonBaseCharacter
= CFUniCharGetLongCharacterForSurrogatePair(nonBaseCharacter
, lowSurrogate
);
2277 nonBaseBitmap
= CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet
, (nonBaseCharacter
>> 16));
2278 decompBitmap
= CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet
, (nonBaseCharacter
>> 16));
2280 nonBaseBitmap
= graphemeBMP
;
2281 decompBitmap
= decompBMP
;
2284 if (CFUniCharIsMemberOfBitmap(nonBaseCharacter
, nonBaseBitmap
)) {
2285 filledLength
= 1; // For the base character
2287 if ((0 == (flags
& kCFCompareDiacriticsInsensitiveCompatibilityMask
)) || (nonBaseCharacter
> 0x050F)) {
2288 if (CFUniCharIsMemberOfBitmap(nonBaseCharacter
, decompBitmap
)) {
2289 filledLength
+= CFUniCharDecomposeCharacter(nonBaseCharacter
, &(outCharacters
[filledLength
]), maxBufferLength
- filledLength
);
2291 outCharacters
[filledLength
++] = nonBaseCharacter
;
2294 currentIndex
+= ((nonBaseBitmap
== graphemeBMP
) ? 1 : 2);
2301 while (filledLength
< maxBufferLength
) { // do the rest
2302 character
= CFStringGetCharacterFromInlineBuffer(buffer
, currentIndex
);
2304 if (CFUniCharIsSurrogateHighCharacter(character
) && CFUniCharIsSurrogateLowCharacter((lowSurrogate
= CFStringGetCharacterFromInlineBuffer(buffer
, currentIndex
+ 1)))) {
2305 character
= CFUniCharGetLongCharacterForSurrogatePair(character
, lowSurrogate
);
2306 nonBaseBitmap
= CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet
, (character
>> 16));
2307 decompBitmap
= CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet
, (character
>> 16));
2309 nonBaseBitmap
= graphemeBMP
;
2310 decompBitmap
= decompBMP
;
2312 if (isTurkikCapitalI
) {
2313 isTurkikCapitalI
= false;
2314 } else if (CFUniCharIsMemberOfBitmap(character
, nonBaseBitmap
)) {
2316 if (CFUniCharIsMemberOfBitmap(character
, decompBitmap
)) {
2317 CFIndex currentLength
= CFUniCharDecomposeCharacter(character
, &(outCharacters
[filledLength
]), maxBufferLength
- filledLength
);
2319 if (0 == currentLength
) break; // didn't fit
2321 filledLength
+= currentLength
;
2323 outCharacters
[filledLength
++] = character
;
2325 } else if (0 == filledLength
) {
2326 filledLength
= 1; // For the base character
2328 currentIndex
+= ((nonBaseBitmap
== graphemeBMP
) ? 1 : 2);
2334 if (filledLength
> 1) {
2335 UTF32Char
*sortCharactersLimit
= outCharacters
+ filledLength
;
2336 UTF32Char
*sortCharacters
= sortCharactersLimit
- 1;
2338 while ((outCharacters
< sortCharacters
) && CFUniCharIsMemberOfBitmap(*sortCharacters
, ((*sortCharacters
< 0x10000) ? graphemeBMP
: CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet
, (*sortCharacters
>> 16))))) --sortCharacters
;
2340 if ((sortCharactersLimit
- sortCharacters
) > 1) CFUniCharPrioritySort(sortCharacters
, (sortCharactersLimit
- sortCharacters
)); // priority sort
2345 if ((filledLength
> 0) && (NULL
!= consumedLength
)) *consumedLength
= (currentIndex
- index
);
2347 return filledLength
;
2350 #define kCFStringStackBufferLength (64)
2352 CFComparisonResult
CFStringCompareWithOptionsAndLocale(CFStringRef string
, CFStringRef string2
, CFRange rangeToCompare
, CFOptionFlags compareOptions
, CFLocaleRef locale
) {
2353 /* No objc dispatch needed here since CFStringInlineBuffer works with both CFString and NSString */
2354 UTF32Char strBuf1
[kCFStringStackBufferLength
];
2355 UTF32Char strBuf2
[kCFStringStackBufferLength
];
2356 CFStringInlineBuffer inlineBuf1
, inlineBuf2
;
2357 UTF32Char str1Char
, str2Char
;
2358 CFIndex str1UsedLen
, str2UsedLen
;
2359 CFIndex str1Index
= 0, str2Index
= 0, strBuf1Index
= 0, strBuf2Index
= 0, strBuf1Len
= 0, strBuf2Len
= 0;
2360 CFIndex str2Len
= CFStringGetLength(string2
);
2361 bool caseInsensitive
= ((compareOptions
& kCFCompareCaseInsensitive
) ? true : false);
2362 bool diacriticsInsensitive
= ((compareOptions
& kCFCompareDiacriticsInsensitiveCompatibilityMask
) ? true : false);
2363 bool equalityOptions
= ((compareOptions
& (kCFCompareCaseInsensitive
|kCFCompareNonliteral
|kCFCompareDiacriticsInsensitiveCompatibilityMask
|kCFCompareWidthInsensitive
)) ? true : false);
2364 bool numerically
= ((compareOptions
& kCFCompareNumerically
) ? true : false);
2365 const uint8_t *graphemeBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet
, 0);
2366 const uint8_t *langCode
;
2367 CFComparisonResult compareResult
= kCFCompareEqualTo
;
2368 UTF16Char otherChar
;
2369 Boolean freeLocale
= false;
2371 #define _CFCompareStringsWithLocale(A, B, C, D, E, F) (0)
2374 if ((compareOptions
& kCFCompareLocalized
) && (NULL
== locale
)) {
2375 locale
= CFLocaleCopyCurrent();
2379 langCode
= ((NULL
== locale
) ? NULL
: (const uint8_t *)_CFStrGetLanguageIdentifierForLocale(locale
));
2381 if ((NULL
== locale
) && !numerically
) { // could do binary comp (be careful when adding new flags)
2382 CFStringEncoding eightBitEncoding
= __CFStringGetEightBitStringEncoding();
2383 const uint8_t *str1Bytes
= (const uint8_t *)CFStringGetCStringPtr(string
, eightBitEncoding
);
2384 const uint8_t *str2Bytes
= (const uint8_t *)CFStringGetCStringPtr(string2
, eightBitEncoding
);
2385 CFIndex factor
= sizeof(uint8_t);
2387 if ((NULL
!= str1Bytes
) && (NULL
!= str2Bytes
)) {
2388 compareOptions
&= ~kCFCompareNonliteral
; // remove non-literal
2390 if (kCFStringEncodingASCII
== eightBitEncoding
) {
2391 if (caseInsensitive
) {
2392 int cmpResult
= strncasecmp_l((const char *)str1Bytes
+ rangeToCompare
.location
, (const char *)str2Bytes
, __CFMin(rangeToCompare
.length
, str2Len
), NULL
);
2394 if (0 == cmpResult
) cmpResult
= rangeToCompare
.length
- str2Len
;
2396 return ((0 == cmpResult
) ? kCFCompareEqualTo
: ((cmpResult
< 0) ? kCFCompareLessThan
: kCFCompareGreaterThan
));
2398 } else if (caseInsensitive
|| diacriticsInsensitive
) {
2399 CFIndex limitLength
= __CFMin(rangeToCompare
.length
, str2Len
);
2401 str1Bytes
+= rangeToCompare
.location
;
2403 while (str1Index
< limitLength
) {
2404 str1Char
= str1Bytes
[str1Index
];
2405 str2Char
= str2Bytes
[str1Index
];
2407 if (str1Char
!= str2Char
) {
2408 if ((str1Char
< 0x80) && (str2Char
< 0x80)) {
2409 if ((str1Char
>= 'A') && (str1Char
<= 'Z')) str1Char
+= ('a' - 'A');
2410 if ((str2Char
>= 'A') && (str2Char
<= 'Z')) str2Char
+= ('a' - 'A');
2412 if (str1Char
!= str2Char
) return ((str1Char
< str2Char
) ? kCFCompareLessThan
: kCFCompareGreaterThan
);
2421 str2Index
= str1Index
;
2423 if (str1Index
== limitLength
) {
2424 int cmpResult
= rangeToCompare
.length
- str2Len
;
2426 return ((0 == cmpResult
) ? kCFCompareEqualTo
: ((cmpResult
< 0) ? kCFCompareLessThan
: kCFCompareGreaterThan
));
2429 } else if (!equalityOptions
&& (NULL
== str1Bytes
) && (NULL
== str2Bytes
)) {
2430 str1Bytes
= (const uint8_t *)CFStringGetCharactersPtr(string
);
2431 str2Bytes
= (const uint8_t *)CFStringGetCharactersPtr(string2
);
2432 factor
= sizeof(UTF16Char
);
2433 #if __LITTLE_ENDIAN__
2434 if ((NULL
!= str1Bytes
) && (NULL
!= str2Bytes
)) { // we cannot use memcmp
2435 const UTF16Char
*str1
= ((const UTF16Char
*)str1Bytes
) + rangeToCompare
.location
;
2436 const UTF16Char
*str1Limit
= str1
+ __CFMin(rangeToCompare
.length
, str2Len
);
2437 const UTF16Char
*str2
= (const UTF16Char
*)str2Bytes
;
2438 CFIndex cmpResult
= 0;
2440 while ((0 == cmpResult
) && (str1
< str1Limit
)) cmpResult
= (CFIndex
)*(str1
++) - (CFIndex
)*(str2
++);
2442 if (0 == cmpResult
) cmpResult
= rangeToCompare
.length
- str2Len
;
2444 return ((0 == cmpResult
) ? kCFCompareEqualTo
: ((cmpResult
< 0) ? kCFCompareLessThan
: kCFCompareGreaterThan
));
2446 #endif /* __LITTLE_ENDIAN__ */
2448 if ((NULL
!= str1Bytes
) && (NULL
!= str2Bytes
)) {
2449 int cmpResult
= memcmp(str1Bytes
+ (rangeToCompare
.location
* factor
), str2Bytes
, __CFMin(rangeToCompare
.length
, str2Len
) * factor
);
2451 if (0 == cmpResult
) cmpResult
= rangeToCompare
.length
- str2Len
;
2453 return ((0 == cmpResult
) ? kCFCompareEqualTo
: ((cmpResult
< 0) ? kCFCompareLessThan
: kCFCompareGreaterThan
));
2457 CFStringInitInlineBuffer(string
, &inlineBuf1
, rangeToCompare
);
2458 CFStringInitInlineBuffer(string2
, &inlineBuf2
, CFRangeMake(0, str2Len
));
2460 while ((str1Index
< rangeToCompare
.length
) && (str2Index
< str2Len
)) {
2461 if (strBuf1Len
== 0) {
2462 str1Char
= CFStringGetCharacterFromInlineBuffer(&inlineBuf1
, str1Index
);
2463 if (caseInsensitive
&& (str1Char
>= 'A') && (str1Char
<= 'Z') && ((NULL
== langCode
) || (str1Char
!= 'I'))) str1Char
+= ('a' - 'A');
2466 str1Char
= strBuf1
[strBuf1Index
++];
2468 if (strBuf2Len
== 0) {
2469 str2Char
= CFStringGetCharacterFromInlineBuffer(&inlineBuf2
, str2Index
);
2470 if (caseInsensitive
&& (str2Char
>= 'A') && (str2Char
<= 'Z') && ((NULL
== langCode
) || (str2Char
!= 'I'))) str2Char
+= ('a' - 'A');
2473 str2Char
= strBuf2
[strBuf2Index
++];
2476 if (numerically
&& ((0 == strBuf1Len
) && (str1Char
<= '9') && (str1Char
>= '0')) && ((0 == strBuf2Len
) && (str2Char
<= '9') && (str2Char
>= '0'))) { // If both are not ASCII digits, then don't do numerical comparison here
2477 uint64_t intValue1
= 0, intValue2
= 0; // !!! Doesn't work if numbers are > max uint64_t
2480 intValue1
= (intValue1
* 10) + (str1Char
- '0');
2481 str1Char
= CFStringGetCharacterFromInlineBuffer(&inlineBuf1
, ++str1Index
);
2482 } while ((str1Char
<= '9') && (str1Char
>= '0'));
2485 intValue2
= intValue2
* 10 + (str2Char
- '0');
2486 str2Char
= CFStringGetCharacterFromInlineBuffer(&inlineBuf2
, ++str2Index
);
2487 } while ((str2Char
<= '9') && (str2Char
>= '0'));
2489 if (intValue1
== intValue2
) {
2491 } else if (intValue1
< intValue2
) {
2492 if (freeLocale
&& locale
) {
2495 return kCFCompareLessThan
;
2497 if (freeLocale
&& locale
) {
2500 return kCFCompareGreaterThan
;
2504 if (str1Char
!= str2Char
) {
2505 if (!equalityOptions
) {
2506 CFComparisonResult res
= ((NULL
== locale
) ? ((str1Char
< str2Char
) ? kCFCompareLessThan
: kCFCompareGreaterThan
) : _CFCompareStringsWithLocale(&inlineBuf1
, CFRangeMake(strBuf1Index
, rangeToCompare
.length
- strBuf1Index
), &inlineBuf2
, CFRangeMake(strBuf2Index
, str2Len
- strBuf2Index
), compareOptions
, locale
));
2507 if (freeLocale
&& locale
) {
2513 if ((compareOptions
& kCFCompareForcedOrdering
) && (kCFCompareEqualTo
== compareResult
)) compareResult
= ((str1Char
< str2Char
) ? kCFCompareLessThan
: kCFCompareGreaterThan
);
2515 if ((str1Char
< 0x80) && (str2Char
< 0x80)) {
2516 if (NULL
!= locale
) {
2517 CFComparisonResult res
= _CFCompareStringsWithLocale(&inlineBuf1
, CFRangeMake(strBuf1Index
, rangeToCompare
.length
- strBuf1Index
), &inlineBuf2
, CFRangeMake(strBuf2Index
, str2Len
- strBuf2Index
), compareOptions
, locale
);
2518 if (freeLocale
&& locale
) {
2522 } else if (!caseInsensitive
) {
2523 if (freeLocale
&& locale
) {
2526 return ((str1Char
< str2Char
) ? kCFCompareLessThan
: kCFCompareGreaterThan
);
2530 if (CFUniCharIsSurrogateHighCharacter(str1Char
) && CFUniCharIsSurrogateLowCharacter((otherChar
= CFStringGetCharacterFromInlineBuffer(&inlineBuf1
, str1Index
+ 1)))) {
2531 str1Char
= CFUniCharGetLongCharacterForSurrogatePair(str1Char
, otherChar
);
2535 if (CFUniCharIsSurrogateHighCharacter(str2Char
) && CFUniCharIsSurrogateLowCharacter((otherChar
= CFStringGetCharacterFromInlineBuffer(&inlineBuf2
, str2Index
+ 1)))) {
2536 str2Char
= CFUniCharGetLongCharacterForSurrogatePair(str2Char
, otherChar
);
2540 if (diacriticsInsensitive
&& (str1Index
> 0)) {
2541 bool str1Skip
= false;
2542 bool str2Skip
= false;
2544 if ((0 == strBuf1Len
) && CFUniCharIsMemberOfBitmap(str1Char
, ((str1Char
< 0x10000) ? graphemeBMP
: CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet
, (str1Char
>> 16))))) {
2545 str1Char
= str2Char
;
2548 if ((0 == strBuf2Len
) && CFUniCharIsMemberOfBitmap(str2Char
, ((str2Char
< 0x10000) ? graphemeBMP
: CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet
, (str2Char
>> 16))))) {
2549 str2Char
= str1Char
;
2553 if (str1Skip
!= str2Skip
) {
2554 if (str1Skip
) str2Index
-= str2UsedLen
;
2555 if (str2Skip
) str1Index
-= str1UsedLen
;
2559 if (str1Char
!= str2Char
) {
2560 if (0 == strBuf1Len
) {
2561 strBuf1Len
= __CFStringFoldCharacterClusterAtIndex(str1Char
, &inlineBuf1
, str1Index
, compareOptions
, langCode
, strBuf1
, kCFStringStackBufferLength
, &str1UsedLen
);
2562 if (strBuf1Len
> 0) {
2563 str1Char
= *strBuf1
;
2568 if ((0 == strBuf1Len
) && (0 < strBuf2Len
)) {
2569 CFComparisonResult res
= ((NULL
== locale
) ? ((str1Char
< str2Char
) ? kCFCompareLessThan
: kCFCompareGreaterThan
) : _CFCompareStringsWithLocale(&inlineBuf1
, CFRangeMake(strBuf1Index
, rangeToCompare
.length
- strBuf1Index
), &inlineBuf2
, CFRangeMake(strBuf2Index
, str2Len
- strBuf2Index
), compareOptions
, locale
));
2570 if (freeLocale
&& locale
) {
2576 if ((0 == strBuf2Len
) && ((0 == strBuf1Len
) || (str1Char
!= str2Char
))) {
2577 strBuf2Len
= __CFStringFoldCharacterClusterAtIndex(str2Char
, &inlineBuf2
, str2Index
, compareOptions
, langCode
, strBuf2
, kCFStringStackBufferLength
, &str2UsedLen
);
2578 if (strBuf2Len
> 0) {
2579 str2Char
= *strBuf2
;
2582 if ((0 == strBuf2Len
) || (str1Char
!= str2Char
)) {
2583 CFComparisonResult res
= ((NULL
== locale
) ? ((str1Char
< str2Char
) ? kCFCompareLessThan
: kCFCompareGreaterThan
) : _CFCompareStringsWithLocale(&inlineBuf1
, CFRangeMake(strBuf1Index
, rangeToCompare
.length
- strBuf1Index
), &inlineBuf2
, CFRangeMake(strBuf2Index
, str2Len
- strBuf2Index
), compareOptions
, locale
));
2584 if (freeLocale
&& locale
) {
2592 if ((strBuf1Len
> 0) && (strBuf2Len
> 0)) {
2593 while ((strBuf1Index
< strBuf1Len
) && (strBuf2Index
< strBuf2Len
)) {
2594 if (strBuf1
[strBuf1Index
] != strBuf2
[strBuf2Index
]) break;
2595 ++strBuf1Index
; ++strBuf2Index
;
2597 if ((strBuf1Index
< strBuf1Len
) && (strBuf2Index
< strBuf2Len
)) {
2598 CFComparisonResult res
= ((NULL
== locale
) ? ((str1Char
< str2Char
) ? kCFCompareLessThan
: kCFCompareGreaterThan
) : _CFCompareStringsWithLocale(&inlineBuf1
, CFRangeMake(strBuf1Index
, rangeToCompare
.length
- strBuf1Index
), &inlineBuf2
, CFRangeMake(strBuf2Index
, str2Len
- strBuf2Index
), compareOptions
, locale
));
2599 if (freeLocale
&& locale
) {
2607 if ((strBuf1Len
> 0) && (strBuf1Index
== strBuf1Len
)) strBuf1Len
= 0;
2608 if ((strBuf2Len
> 0) && (strBuf2Index
== strBuf2Len
)) strBuf2Len
= 0;
2610 if (strBuf1Len
== 0) str1Index
+= str1UsedLen
;
2611 if (strBuf2Len
== 0) str2Index
+= str2UsedLen
;
2614 if (diacriticsInsensitive
) {
2615 while (str1Index
< rangeToCompare
.length
) {
2616 str1Char
= CFStringGetCharacterFromInlineBuffer(&inlineBuf1
, str1Index
);
2617 if (str1Char
< 0x80) break; // found ASCII
2619 if (CFUniCharIsSurrogateHighCharacter(str1Char
) && CFUniCharIsSurrogateLowCharacter((otherChar
= CFStringGetCharacterFromInlineBuffer(&inlineBuf1
, str1Index
+ 1)))) str1Char
= CFUniCharGetLongCharacterForSurrogatePair(str1Char
, otherChar
);
2621 if (!CFUniCharIsMemberOfBitmap(str1Char
, ((str1Char
< 0x10000) ? graphemeBMP
: CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet
, (str1Char
>> 16))))) break;
2623 str1Index
+= ((str1Char
< 0x10000) ? 1 : 2);
2626 while (str2Index
< str2Len
) {
2627 str2Char
= CFStringGetCharacterFromInlineBuffer(&inlineBuf2
, str2Index
);
2628 if (str2Char
< 0x80) break; // found ASCII
2630 if (CFUniCharIsSurrogateHighCharacter(str2Char
) && CFUniCharIsSurrogateLowCharacter((otherChar
= CFStringGetCharacterFromInlineBuffer(&inlineBuf2
, str2Index
+ 1)))) str2Char
= CFUniCharGetLongCharacterForSurrogatePair(str2Char
, otherChar
);
2632 if (!CFUniCharIsMemberOfBitmap(str2Char
, ((str2Char
< 0x10000) ? graphemeBMP
: CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet
, (str2Char
>> 16))))) break;
2634 str2Index
+= ((str2Char
< 0x10000) ? 1 : 2);
2638 if (freeLocale
&& locale
) {
2642 return ((str1Index
< rangeToCompare
.length
) ? kCFCompareGreaterThan
: ((str2Index
< str2Len
) ? kCFCompareLessThan
: compareResult
));
2646 CFComparisonResult
CFStringCompareWithOptions(CFStringRef string
, CFStringRef string2
, CFRange rangeToCompare
, CFOptionFlags compareOptions
) { return CFStringCompareWithOptionsAndLocale(string
, string2
, rangeToCompare
, compareOptions
, NULL
); }
2648 CFComparisonResult
CFStringCompare(CFStringRef string
, CFStringRef str2
, CFOptionFlags options
) {
2649 return CFStringCompareWithOptions(string
, str2
, CFRangeMake(0, CFStringGetLength(string
)), options
);
2652 Boolean
CFStringFindWithOptionsAndLocale(CFStringRef string
, CFStringRef stringToFind
, CFRange rangeToSearch
, CFOptionFlags compareOptions
, CFLocaleRef locale
, CFRange
*result
) {
2653 /* No objc dispatch needed here since CFStringInlineBuffer works with both CFString and NSString */
2654 CFIndex findStrLen
= CFStringGetLength(stringToFind
);
2655 Boolean didFind
= false;
2656 bool lengthVariants
= ((compareOptions
& (kCFCompareCaseInsensitive
|kCFCompareNonliteral
|kCFCompareDiacriticsInsensitiveCompatibilityMask
)) ? true : false);
2658 if ((findStrLen
> 0) && (rangeToSearch
.length
> 0) && ((findStrLen
<= rangeToSearch
.length
) || lengthVariants
)) {
2659 UTF32Char strBuf1
[kCFStringStackBufferLength
];
2660 UTF32Char strBuf2
[kCFStringStackBufferLength
];
2661 CFStringInlineBuffer inlineBuf1
, inlineBuf2
;
2662 UTF32Char str1Char
, str2Char
;
2663 CFStringEncoding eightBitEncoding
= __CFStringGetEightBitStringEncoding();
2664 const uint8_t *str1Bytes
= (const uint8_t *)CFStringGetCStringPtr(string
, eightBitEncoding
);
2665 const uint8_t *str2Bytes
= (const uint8_t *)CFStringGetCStringPtr(stringToFind
, eightBitEncoding
);
2666 const UTF32Char
*characters
, *charactersLimit
;
2667 const uint8_t *langCode
= NULL
;
2668 CFIndex fromLoc
, toLoc
;
2669 CFIndex str1Index
, str2Index
;
2670 CFIndex strBuf1Len
, strBuf2Len
;
2671 bool equalityOptions
= ((lengthVariants
|| (compareOptions
& kCFCompareWidthInsensitive
)) ? true : false);
2672 bool caseInsensitive
= ((compareOptions
& kCFCompareCaseInsensitive
) ? true : false);
2675 if (NULL
== locale
) {
2676 if (compareOptions
& kCFCompareLocalized
) {
2677 CFLocaleRef currentLocale
= CFLocaleCopyCurrent();
2678 langCode
= (const uint8_t *)_CFStrGetLanguageIdentifierForLocale(currentLocale
);
2679 CFRelease(currentLocale
);
2682 langCode
= (const uint8_t *)_CFStrGetLanguageIdentifierForLocale(locale
);
2685 CFStringInitInlineBuffer(string
, &inlineBuf1
, CFRangeMake(0, rangeToSearch
.location
+ rangeToSearch
.length
));
2686 CFStringInitInlineBuffer(stringToFind
, &inlineBuf2
, CFRangeMake(0, findStrLen
));
2688 if (compareOptions
& kCFCompareBackwards
) {
2689 fromLoc
= rangeToSearch
.location
+ rangeToSearch
.length
- (lengthVariants
? 1 : findStrLen
);
2690 toLoc
= (((compareOptions
& kCFCompareAnchored
) && !lengthVariants
) ? fromLoc
: rangeToSearch
.location
);
2692 fromLoc
= rangeToSearch
.location
;
2693 toLoc
= ((compareOptions
& kCFCompareAnchored
) ? fromLoc
: rangeToSearch
.location
+ rangeToSearch
.length
- (lengthVariants
? 1 : findStrLen
));
2696 delta
= ((fromLoc
<= toLoc
) ? 1 : -1);
2698 if ((NULL
!= str1Bytes
) && (NULL
!= str2Bytes
)) {
2699 CFIndex maxStr1Index
= (rangeToSearch
.location
+ rangeToSearch
.length
);
2700 uint8_t str1Byte
, str2Byte
;
2703 str1Index
= fromLoc
;
2706 while ((str1Index
< maxStr1Index
) && (str2Index
< findStrLen
)) {
2707 str1Byte
= str1Bytes
[str1Index
];
2708 str2Byte
= str2Bytes
[str2Index
];
2710 if (str1Byte
!= str2Byte
) {
2711 if (equalityOptions
) {
2712 if ((str1Byte
< 0x80) && ((NULL
== langCode
) || ('I' != str1Byte
))) {
2713 if (caseInsensitive
&& (str1Byte
>= 'A') && (str1Byte
<= 'Z')) str1Byte
+= ('a' - 'A');
2714 *strBuf1
= str1Byte
;
2717 str1Char
= CFStringGetCharacterFromInlineBuffer(&inlineBuf1
, str1Index
);
2718 strBuf1Len
= __CFStringFoldCharacterClusterAtIndex(str1Char
, &inlineBuf1
, str1Index
, compareOptions
, langCode
, strBuf1
, kCFStringStackBufferLength
, NULL
);
2719 if (1 > strBuf1Len
) {
2720 *strBuf1
= str1Char
;
2724 if ((str2Byte
< 0x80) && ((NULL
== langCode
) || ('I' != str2Byte
))) {
2725 if (caseInsensitive
&& (str2Byte
>= 'A') && (str2Byte
<= 'Z')) str2Byte
+= ('a' - 'A');
2726 *strBuf2
= str2Byte
;
2729 str2Char
= CFStringGetCharacterFromInlineBuffer(&inlineBuf2
, str2Index
);
2730 strBuf2Len
= __CFStringFoldCharacterClusterAtIndex(str2Char
, &inlineBuf2
, str2Index
, compareOptions
, langCode
, strBuf2
, kCFStringStackBufferLength
, NULL
);
2731 if (1 > strBuf2Len
) {
2732 *strBuf2
= str2Char
;
2737 if ((1 == strBuf1Len
) && (1 == strBuf2Len
)) { // normal case
2738 if (*strBuf1
!= *strBuf2
) break;
2742 if (!caseInsensitive
&& (strBuf1Len
!= strBuf2Len
)) break;
2743 if (memcmp(strBuf1
, strBuf2
, sizeof(UTF32Char
) * __CFMin(strBuf1Len
, strBuf2Len
))) break;
2745 if (strBuf1Len
< strBuf2Len
) {
2746 delta
= strBuf2Len
- strBuf1Len
;
2748 if ((str1Index
+ strBuf1Len
+ delta
) > (rangeToSearch
.location
+ rangeToSearch
.length
)) break;
2750 characters
= &(strBuf2
[strBuf1Len
]);
2751 charactersLimit
= characters
+ delta
;
2753 while (characters
< charactersLimit
) {
2754 strBuf1Len
= __CFStringFoldCharacterClusterAtIndex(CFStringGetCharacterFromInlineBuffer(&inlineBuf1
, str1Index
+ 1), &inlineBuf1
, str1Index
+ 1, compareOptions
, langCode
, strBuf1
, kCFStringStackBufferLength
, NULL
);
2755 if ((strBuf1Len
> 0) || (*characters
!= *strBuf1
)) break;
2756 ++characters
; ++str1Index
;
2758 if (characters
< charactersLimit
) break;
2759 } else if (strBuf2Len
< strBuf1Len
) {
2760 delta
= strBuf1Len
- strBuf2Len
;
2762 if ((str2Index
+ strBuf2Len
+ delta
) > findStrLen
) break;
2764 characters
= &(strBuf1
[strBuf2Len
]);
2765 charactersLimit
= characters
+ delta
;
2767 while (characters
< charactersLimit
) {
2768 strBuf2Len
= __CFStringFoldCharacterClusterAtIndex(CFStringGetCharacterFromInlineBuffer(&inlineBuf2
, str1Index
+ 1), &inlineBuf2
, str2Index
+ 1, compareOptions
, langCode
, strBuf2
, kCFStringStackBufferLength
, NULL
);
2769 if ((strBuf2Len
> 0) || (*characters
!= *strBuf2
)) break;
2770 ++characters
; ++str2Index
;
2772 if (characters
< charactersLimit
) break;
2779 ++str1Index
; ++str2Index
;
2782 if (str2Index
== findStrLen
) {
2783 if (((kCFCompareBackwards
|kCFCompareAnchored
) != (compareOptions
& (kCFCompareBackwards
|kCFCompareAnchored
))) || (str1Index
== (rangeToSearch
.location
+ rangeToSearch
.length
))) {
2785 if (NULL
!= result
) *result
= CFRangeMake(fromLoc
, str1Index
- fromLoc
);
2790 if (fromLoc
== toLoc
) break;
2793 } else if (equalityOptions
) {
2794 UTF16Char otherChar
;
2795 CFIndex str1UsedLen
, str2UsedLen
, strBuf1Index
= 0, strBuf2Index
= 0;
2796 bool diacriticsInsensitive
= ((compareOptions
& kCFCompareDiacriticsInsensitiveCompatibilityMask
) ? true : false);
2797 const uint8_t *graphemeBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet
, 0);
2798 const uint8_t *combClassBMP
= (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty
, 0);
2801 str1Index
= fromLoc
;
2804 strBuf1Len
= strBuf2Len
= 0;
2806 while (str2Index
< findStrLen
) {
2807 if (strBuf1Len
== 0) {
2808 str1Char
= CFStringGetCharacterFromInlineBuffer(&inlineBuf1
, str1Index
);
2809 if (caseInsensitive
&& (str1Char
>= 'A') && (str1Char
<= 'Z') && ((NULL
== langCode
) || (str1Char
!= 'I'))) str1Char
+= ('a' - 'A');
2812 str1Char
= strBuf1
[strBuf1Index
++];
2814 if (strBuf2Len
== 0) {
2815 str2Char
= CFStringGetCharacterFromInlineBuffer(&inlineBuf2
, str2Index
);
2816 if (caseInsensitive
&& (str2Char
>= 'A') && (str2Char
<= 'Z') && ((NULL
== langCode
) || (str2Char
!= 'I'))) str2Char
+= ('a' - 'A');
2819 str2Char
= strBuf2
[strBuf2Index
++];
2822 if (str1Char
!= str2Char
) {
2823 if ((str1Char
< 0x80) && (str2Char
< 0x80) && ((NULL
== langCode
) || !caseInsensitive
)) break;
2825 if (CFUniCharIsSurrogateHighCharacter(str1Char
) && CFUniCharIsSurrogateLowCharacter((otherChar
= CFStringGetCharacterFromInlineBuffer(&inlineBuf1
, str1Index
+ 1)))) {
2826 str1Char
= CFUniCharGetLongCharacterForSurrogatePair(str1Char
, otherChar
);
2830 if (CFUniCharIsSurrogateHighCharacter(str2Char
) && CFUniCharIsSurrogateLowCharacter((otherChar
= CFStringGetCharacterFromInlineBuffer(&inlineBuf2
, str2Index
+ 1)))) {
2831 str2Char
= CFUniCharGetLongCharacterForSurrogatePair(str2Char
, otherChar
);
2835 if (diacriticsInsensitive
&& (str1Index
> fromLoc
)) {
2836 bool str1Skip
= false;
2837 bool str2Skip
= false;
2839 if ((0 == strBuf1Len
) && CFUniCharIsMemberOfBitmap(str1Char
, ((str1Char
< 0x10000) ? graphemeBMP
: CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet
, (str1Char
>> 16))))) {
2840 str1Char
= str2Char
;
2843 if ((0 == strBuf2Len
) && CFUniCharIsMemberOfBitmap(str2Char
, ((str2Char
< 0x10000) ? graphemeBMP
: CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet
, (str2Char
>> 16))))) {
2844 str2Char
= str1Char
;
2848 if (str1Skip
!= str2Skip
) {
2849 if (str1Skip
) str2Index
-= str2UsedLen
;
2850 if (str2Skip
) str1Index
-= str1UsedLen
;
2854 if (str1Char
!= str2Char
) {
2855 if (0 == strBuf1Len
) {
2856 strBuf1Len
= __CFStringFoldCharacterClusterAtIndex(str1Char
, &inlineBuf1
, str1Index
, compareOptions
, langCode
, strBuf1
, kCFStringStackBufferLength
, &str1UsedLen
);
2857 if (strBuf1Len
> 0) {
2858 str1Char
= *strBuf1
;
2863 if ((0 == strBuf1Len
) && (0 < strBuf2Len
)) break;
2865 if ((0 == strBuf2Len
) && ((0 == strBuf1Len
) || (str1Char
!= str2Char
))) {
2866 strBuf2Len
= __CFStringFoldCharacterClusterAtIndex(str2Char
, &inlineBuf2
, str2Index
, compareOptions
, langCode
, strBuf2
, kCFStringStackBufferLength
, &str2UsedLen
);
2867 if ((0 == strBuf2Len
) || (str1Char
!= *strBuf2
)) break;
2872 if ((strBuf1Len
> 0) && (strBuf2Len
> 0)) {
2873 while ((strBuf1Index
< strBuf1Len
) && (strBuf2Index
< strBuf2Len
)) {
2874 if (strBuf1
[strBuf1Index
] != strBuf2
[strBuf2Index
]) break;
2875 ++strBuf1Index
; ++strBuf2Index
;
2877 if ((strBuf1Index
< strBuf1Len
) && (strBuf2Index
< strBuf2Len
)) break;
2881 if ((strBuf1Len
> 0) && (strBuf1Index
== strBuf1Len
)) strBuf1Len
= 0;
2882 if ((strBuf2Len
> 0) && (strBuf2Index
== strBuf2Len
)) strBuf2Len
= 0;
2884 if (strBuf1Len
== 0) str1Index
+= str1UsedLen
;
2885 if (strBuf2Len
== 0) str2Index
+= str2UsedLen
;
2888 if (str2Index
== findStrLen
) {
2891 if (strBuf1Len
> 0) {
2894 if ((compareOptions
& kCFCompareDiacriticsInsensitiveCompatibilityMask
) && (strBuf1
[0] < 0x0510)) {
2895 while (strBuf1Index
< strBuf1Len
) {
2896 if (!CFUniCharIsMemberOfBitmap(strBuf1
[strBuf1Index
], ((strBuf1
[strBuf1Index
] < 0x10000) ? graphemeBMP
: CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet
, (strBuf1
[strBuf1Index
] >> 16))))) break;
2900 if (strBuf1Index
== strBuf1Len
) {
2901 str1Index
+= str1UsedLen
;
2907 if (match
&& (compareOptions
& (kCFCompareDiacriticsInsensitiveCompatibilityMask
|kCFCompareNonliteral
)) && (str1Index
< (rangeToSearch
.location
+ rangeToSearch
.length
))) {
2908 const uint8_t *nonBaseBitmap
;
2910 str1Char
= CFStringGetCharacterFromInlineBuffer(&inlineBuf1
, str1Index
);
2912 if (CFUniCharIsSurrogateHighCharacter(str1Char
) && CFUniCharIsSurrogateLowCharacter((otherChar
= CFStringGetCharacterFromInlineBuffer(&inlineBuf1
, str1Index
+ 1)))) {
2913 str1Char
= CFUniCharGetLongCharacterForSurrogatePair(str1Char
, otherChar
);
2914 nonBaseBitmap
= CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet
, (str1Char
>> 16));
2916 nonBaseBitmap
= graphemeBMP
;
2919 if (CFUniCharIsMemberOfBitmap(str1Char
, nonBaseBitmap
)) {
2920 if (diacriticsInsensitive
) {
2921 if (str1Char
< 0x10000) {
2922 CFIndex index
= str1Index
;
2925 str1Char
= CFStringGetCharacterFromInlineBuffer(&inlineBuf1
, --index
);
2926 } while (CFUniCharIsMemberOfBitmap(str1Char
, graphemeBMP
), (rangeToSearch
.location
< index
));
2928 if (str1Char
< 0x0510) {
2929 CFIndex maxIndex
= (rangeToSearch
.location
+ rangeToSearch
.length
);
2931 while (++str1Index
< maxIndex
) if (!CFUniCharIsMemberOfBitmap(CFStringGetCharacterFromInlineBuffer(&inlineBuf1
, str1Index
), graphemeBMP
)) break;
2937 } else if (!diacriticsInsensitive
) {
2938 otherChar
= CFStringGetCharacterFromInlineBuffer(&inlineBuf1
, str1Index
- 1);
2940 // this is assuming viramas are only in BMP ???
2941 if ((str1Char
== COMBINING_GRAPHEME_JOINER
) || (otherChar
== COMBINING_GRAPHEME_JOINER
) || (otherChar
== ZERO_WIDTH_JOINER
) || ((otherChar
>= HANGUL_CHOSEONG_START
) && (otherChar
<= HANGUL_JONGSEONG_END
)) || (CFUniCharGetCombiningPropertyForCharacter(otherChar
, combClassBMP
) == 9)) {
2942 CFRange clusterRange
= CFStringGetRangeOfCharacterClusterAtIndex(string
, str1Index
- 1, kCFStringGraphemeCluster
);
2944 if (str1Index
< (clusterRange
.location
+ clusterRange
.length
)) match
= false;
2950 if (((kCFCompareBackwards
|kCFCompareAnchored
) != (compareOptions
& (kCFCompareBackwards
|kCFCompareAnchored
))) || (str1Index
== (rangeToSearch
.location
+ rangeToSearch
.length
))) {
2952 if (NULL
!= result
) *result
= CFRangeMake(fromLoc
, str1Index
- fromLoc
);
2958 if (fromLoc
== toLoc
) break;
2963 str1Index
= fromLoc
;
2966 while (str2Index
< findStrLen
) {
2967 if (CFStringGetCharacterFromInlineBuffer(&inlineBuf1
, str1Index
) != CFStringGetCharacterFromInlineBuffer(&inlineBuf2
, str2Index
)) break;
2969 ++str1Index
; ++str2Index
;
2972 if (str2Index
== findStrLen
) {
2974 if (NULL
!= result
) *result
= CFRangeMake(fromLoc
, findStrLen
);
2978 if (fromLoc
== toLoc
) break;
2987 Boolean
CFStringFindWithOptions(CFStringRef string
, CFStringRef stringToFind
, CFRange rangeToSearch
, CFOptionFlags compareOptions
, CFRange
*result
) { return CFStringFindWithOptionsAndLocale(string
, stringToFind
, rangeToSearch
, compareOptions
, NULL
, result
); }
2989 // Functions to deal with special arrays of CFRange, CFDataRef, created by CFStringCreateArrayWithFindResults()
2991 static const void *__rangeRetain(CFAllocatorRef allocator
, const void *ptr
) {
2992 CFRetain(*(CFDataRef
*)((uint8_t *)ptr
+ sizeof(CFRange
)));
2996 static void __rangeRelease(CFAllocatorRef allocator
, const void *ptr
) {
2997 CFRelease(*(CFDataRef
*)((uint8_t *)ptr
+ sizeof(CFRange
)));
3000 static CFStringRef
__rangeCopyDescription(const void *ptr
) {
3001 CFRange range
= *(CFRange
*)ptr
;
3002 return CFStringCreateWithFormat(kCFAllocatorSystemDefault
, NULL
, CFSTR("{%d, %d}"), range
.location
, range
.length
);
3005 static Boolean
__rangeEqual(const void *ptr1
, const void *ptr2
) {
3006 CFRange range1
= *(CFRange
*)ptr1
;
3007 CFRange range2
= *(CFRange
*)ptr2
;
3008 return (range1
.location
== range2
.location
) && (range1
.length
== range2
.length
);
3012 CFArrayRef
CFStringCreateArrayWithFindResults(CFAllocatorRef alloc
, CFStringRef string
, CFStringRef stringToFind
, CFRange rangeToSearch
, CFOptionFlags compareOptions
) {
3014 Boolean backwards
= ((compareOptions
& kCFCompareBackwards
) != 0);
3015 UInt32 endIndex
= rangeToSearch
.location
+ rangeToSearch
.length
;
3016 CFMutableDataRef rangeStorage
= NULL
; // Basically an array of CFRange, CFDataRef (packed)
3017 uint8_t *rangeStorageBytes
= NULL
;
3018 CFIndex foundCount
= 0;
3019 CFIndex capacity
= 0; // Number of CFRange, CFDataRef element slots in rangeStorage
3021 if (alloc
== NULL
) alloc
= __CFGetDefaultAllocator();
3023 while ((rangeToSearch
.length
> 0) && CFStringFindWithOptions(string
, stringToFind
, rangeToSearch
, compareOptions
, &foundRange
)) {
3024 // Determine the next range
3026 rangeToSearch
.length
= foundRange
.location
- rangeToSearch
.location
;
3028 rangeToSearch
.location
= foundRange
.location
+ foundRange
.length
;
3029 rangeToSearch
.length
= endIndex
- rangeToSearch
.location
;
3032 // If necessary, grow the data and squirrel away the found range
3033 if (foundCount
>= capacity
) {
3034 if (rangeStorage
== NULL
) rangeStorage
= CFDataCreateMutable(alloc
, 0);
3035 capacity
= (capacity
+ 4) * 2;
3036 CFDataSetLength(rangeStorage
, capacity
* (sizeof(CFRange
) + sizeof(CFDataRef
)));
3037 rangeStorageBytes
= (uint8_t *)CFDataGetMutableBytePtr(rangeStorage
) + foundCount
* (sizeof(CFRange
) + sizeof(CFDataRef
));
3039 memmove(rangeStorageBytes
, &foundRange
, sizeof(CFRange
)); // The range
3040 memmove(rangeStorageBytes
+ sizeof(CFRange
), &rangeStorage
, sizeof(CFDataRef
)); // The data
3041 rangeStorageBytes
+= (sizeof(CFRange
) + sizeof(CFDataRef
));
3045 if (foundCount
> 0) {
3047 CFMutableArrayRef array
;
3048 const CFArrayCallBacks callbacks
= {0, __rangeRetain
, __rangeRelease
, __rangeCopyDescription
, __rangeEqual
};
3050 CFDataSetLength(rangeStorage
, foundCount
* (sizeof(CFRange
) + sizeof(CFDataRef
))); // Tighten storage up
3051 rangeStorageBytes
= (uint8_t *)CFDataGetMutableBytePtr(rangeStorage
);
3053 array
= CFArrayCreateMutable(alloc
, foundCount
* sizeof(CFRange
*), &callbacks
);
3054 for (cnt
= 0; cnt
< foundCount
; cnt
++) {
3055 // Each element points to the appropriate CFRange in the CFData
3056 CFArrayAppendValue(array
, rangeStorageBytes
+ cnt
* (sizeof(CFRange
) + sizeof(CFDataRef
)));
3058 CFRelease(rangeStorage
); // We want the data to go away when all CFRanges inside it are released...
3066 CFRange
CFStringFind(CFStringRef string
, CFStringRef stringToFind
, CFOptionFlags compareOptions
) {
3069 if (CFStringFindWithOptions(string
, stringToFind
, CFRangeMake(0, CFStringGetLength(string
)), compareOptions
, &foundRange
)) {
3072 return CFRangeMake(kCFNotFound
, 0);
3076 Boolean
CFStringHasPrefix(CFStringRef string
, CFStringRef prefix
) {
3077 return CFStringFindWithOptions(string
, prefix
, CFRangeMake(0, CFStringGetLength(string
)), kCFCompareAnchored
, NULL
);
3080 Boolean
CFStringHasSuffix(CFStringRef string
, CFStringRef suffix
) {
3081 return CFStringFindWithOptions(string
, suffix
, CFRangeMake(0, CFStringGetLength(string
)), kCFCompareAnchored
|kCFCompareBackwards
, NULL
);
3084 #define MAX_TRANSCODING_LENGTH 4
3086 #define HANGUL_JONGSEONG_COUNT (28)
3088 CF_INLINE
bool _CFStringIsHangulLVT(UTF32Char character
) {
3089 return (((character
- HANGUL_SYLLABLE_START
) % HANGUL_JONGSEONG_COUNT
) ? true : false);
3092 static uint8_t __CFTranscodingHintLength
[] = {
3093 2, 3, 4, 4, 4, 4, 4, 2, 2, 2, 2, 4, 0, 0, 0, 0
3097 kCFStringHangulStateL
,
3098 kCFStringHangulStateV
,
3099 kCFStringHangulStateT
,
3100 kCFStringHangulStateLV
,
3101 kCFStringHangulStateLVT
,
3102 kCFStringHangulStateBreak
3105 static CFRange
_CFStringInlineBufferGetComposedRange(CFStringInlineBuffer
*buffer
, CFIndex start
, CFStringCharacterClusterType type
, const uint8_t *bmpBitmap
, CFIndex csetType
) {
3106 CFIndex end
= start
+ 1;
3107 const uint8_t *bitmap
= bmpBitmap
;
3108 UTF32Char character
;
3109 UTF16Char otherSurrogate
;
3112 character
= CFStringGetCharacterFromInlineBuffer(buffer
, start
);
3114 // We don't combine characters in Armenian ~ Limbu range for backward deletion
3115 if ((type
!= kCFStringBackwardDeletionCluster
) || (character
< 0x0530) || (character
> 0x194F)) {
3116 // Check if the current is surrogate
3117 if (CFUniCharIsSurrogateHighCharacter(character
) && CFUniCharIsSurrogateLowCharacter((otherSurrogate
= CFStringGetCharacterFromInlineBuffer(buffer
, start
+ 1)))) {
3119 character
= CFUniCharGetLongCharacterForSurrogatePair(character
, otherSurrogate
);
3120 bitmap
= CFUniCharGetBitmapPtrForPlane(csetType
, (character
>> 16));
3125 if ((type
== kCFStringBackwardDeletionCluster
) && (character
>= 0x0530) && (character
< 0x1950)) break;
3127 if (character
< 0x10000) { // the first round could be already be non-BMP
3128 if (CFUniCharIsSurrogateLowCharacter(character
) && CFUniCharIsSurrogateHighCharacter((otherSurrogate
= CFStringGetCharacterFromInlineBuffer(buffer
, start
- 1)))) {
3129 character
= CFUniCharGetLongCharacterForSurrogatePair(otherSurrogate
, character
);
3130 bitmap
= CFUniCharGetBitmapPtrForPlane(csetType
, (character
>> 16));
3137 if (!CFUniCharIsMemberOfBitmap(character
, bitmap
) && (character
!= 0xFF9E) && (character
!= 0xFF9F) && ((character
& 0x1FFFF0) != 0xF870)) break;
3141 character
= CFStringGetCharacterFromInlineBuffer(buffer
, start
);
3146 if (((character
>= HANGUL_CHOSEONG_START
) && (character
<= HANGUL_JONGSEONG_END
)) || ((character
>= HANGUL_SYLLABLE_START
) && (character
<= HANGUL_SYLLABLE_END
))) {
3148 uint8_t initialState
;
3150 if (character
< HANGUL_JUNGSEONG_START
) {
3151 state
= kCFStringHangulStateL
;
3152 } else if (character
< HANGUL_JONGSEONG_START
) {
3153 state
= kCFStringHangulStateV
;
3154 } else if (character
< HANGUL_SYLLABLE_START
) {
3155 state
= kCFStringHangulStateT
;
3157 state
= (_CFStringIsHangulLVT(character
) ? kCFStringHangulStateLVT
: kCFStringHangulStateLV
);
3159 initialState
= state
;
3162 while (((character
= CFStringGetCharacterFromInlineBuffer(buffer
, start
- 1)) >= HANGUL_CHOSEONG_START
) && (character
<= HANGUL_SYLLABLE_END
) && ((character
<= HANGUL_JONGSEONG_END
) || (character
>= HANGUL_SYLLABLE_START
))) {
3164 case kCFStringHangulStateV
:
3165 if (character
<= HANGUL_CHOSEONG_END
) {
3166 state
= kCFStringHangulStateL
;
3167 } else if ((character
>= HANGUL_SYLLABLE_START
) && (character
<= HANGUL_SYLLABLE_END
) && !_CFStringIsHangulLVT(character
)) {
3168 state
= kCFStringHangulStateLV
;
3169 } else if (character
> HANGUL_JUNGSEONG_END
) {
3170 state
= kCFStringHangulStateBreak
;
3174 case kCFStringHangulStateT
:
3175 if ((character
>= HANGUL_JUNGSEONG_START
) && (character
<= HANGUL_JUNGSEONG_END
)) {
3176 state
= kCFStringHangulStateV
;
3177 } else if ((character
>= HANGUL_SYLLABLE_START
) && (character
<= HANGUL_SYLLABLE_END
)) {
3178 state
= (_CFStringIsHangulLVT(character
) ? kCFStringHangulStateLVT
: kCFStringHangulStateLV
);
3179 } else if (character
< HANGUL_JUNGSEONG_START
) {
3180 state
= kCFStringHangulStateBreak
;
3185 state
= ((character
< HANGUL_JUNGSEONG_START
) ? kCFStringHangulStateL
: kCFStringHangulStateBreak
);
3189 if (state
== kCFStringHangulStateBreak
) break;
3194 state
= initialState
;
3195 while (((character
= CFStringGetCharacterFromInlineBuffer(buffer
, end
)) > 0) && (((character
>= HANGUL_CHOSEONG_START
) && (character
<= HANGUL_JONGSEONG_END
)) || ((character
>= HANGUL_SYLLABLE_START
) && (character
<= HANGUL_SYLLABLE_END
)))) {
3197 case kCFStringHangulStateLV
:
3198 case kCFStringHangulStateV
:
3199 if ((character
>= HANGUL_JUNGSEONG_START
) && (character
<= HANGUL_JONGSEONG_END
)) {
3200 state
= ((character
< HANGUL_JONGSEONG_START
) ? kCFStringHangulStateV
: kCFStringHangulStateT
);
3202 state
= kCFStringHangulStateBreak
;
3206 case kCFStringHangulStateLVT
:
3207 case kCFStringHangulStateT
:
3208 state
= (((character
>= HANGUL_JONGSEONG_START
) && (character
<= HANGUL_JONGSEONG_END
)) ? kCFStringHangulStateT
: kCFStringHangulStateBreak
);
3212 if (character
< HANGUL_JUNGSEONG_START
) {
3213 state
= kCFStringHangulStateL
;
3214 } else if (character
< HANGUL_JONGSEONG_START
) {
3215 state
= kCFStringHangulStateV
;
3216 } else if (character
>= HANGUL_SYLLABLE_START
) {
3217 state
= (_CFStringIsHangulLVT(character
) ? kCFStringHangulStateLVT
: kCFStringHangulStateLV
);
3219 state
= kCFStringHangulStateBreak
;
3224 if (state
== kCFStringHangulStateBreak
) break;
3230 while ((character
= CFStringGetCharacterFromInlineBuffer(buffer
, end
)) > 0) {
3231 if ((type
== kCFStringBackwardDeletionCluster
) && (character
>= 0x0530) && (character
< 0x1950)) break;
3233 if (CFUniCharIsSurrogateHighCharacter(character
) && CFUniCharIsSurrogateLowCharacter((otherSurrogate
= CFStringGetCharacterFromInlineBuffer(buffer
, end
+ 1)))) {
3234 character
= CFUniCharGetLongCharacterForSurrogatePair(character
, otherSurrogate
);
3235 bitmap
= CFUniCharGetBitmapPtrForPlane(csetType
, (character
>> 16));
3242 if (!CFUniCharIsMemberOfBitmap(character
, bitmap
) && (character
!= 0xFF9E) && (character
!= 0xFF9F) && ((character
& 0x1FFFF0) != 0xF870)) break;
3247 return CFRangeMake(start
, end
- start
);
3250 CF_INLINE
bool _CFStringIsVirama(UTF32Char character
, const uint8_t *combClassBMP
) {
3251 return ((character
== COMBINING_GRAPHEME_JOINER
) || (CFUniCharGetCombiningPropertyForCharacter(character
, (const uint8_t *)((character
< 0x10000) ? combClassBMP
: CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty
, (character
>> 16)))) == 9) ? true : false);
3254 CFRange
CFStringGetRangeOfCharacterClusterAtIndex(CFStringRef string
, CFIndex charIndex
, CFStringCharacterClusterType type
) {
3256 CFIndex currentIndex
;
3257 CFIndex length
= CFStringGetLength(string
);
3258 CFIndex csetType
= ((kCFStringGraphemeCluster
== type
) ? kCFUniCharGraphemeExtendCharacterSet
: kCFUniCharNonBaseCharacterSet
);
3259 CFStringInlineBuffer stringBuffer
;
3260 const uint8_t *bmpBitmap
;
3261 const uint8_t *letterBMP
;
3262 const uint8_t *combClassBMP
;
3263 UTF32Char character
;
3264 UTF16Char otherSurrogate
;
3266 if (charIndex
>= length
) return CFRangeMake(kCFNotFound
, 0);
3268 /* Fast case. If we're eight-bit, it's either the default encoding is cheap or the content is all ASCII. Watch out when (or if) adding more 8bit Mac-scripts in CFStringEncodingConverters
3270 if (!CF_IS_OBJC(__kCFStringTypeID
, string
) && __CFStrIsEightBit(string
)) return CFRangeMake(charIndex
, 1);
3272 bmpBitmap
= CFUniCharGetBitmapPtrForPlane(csetType
, 0);
3273 letterBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet
, 0);
3274 combClassBMP
= (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty
, 0);
3276 CFStringInitInlineBuffer(string
, &stringBuffer
, CFRangeMake(0, length
));
3278 // Get composed character sequence first
3279 range
= _CFStringInlineBufferGetComposedRange(&stringBuffer
, charIndex
, type
, bmpBitmap
, csetType
);
3281 // Do grapheme joiners
3282 if (type
< kCFStringCursorMovementCluster
) {
3283 const uint8_t *letter
= letterBMP
;
3285 // Check to see if we have a letter at the beginning of initial cluster
3286 character
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, range
.location
);
3288 if ((range
.length
> 1) && CFUniCharIsSurrogateHighCharacter(character
) && CFUniCharIsSurrogateLowCharacter((otherSurrogate
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, range
.location
+ 1)))) {
3289 character
= CFUniCharGetLongCharacterForSurrogatePair(character
, otherSurrogate
);
3290 letter
= CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet
, (character
>> 16));
3293 if ((character
== ZERO_WIDTH_JOINER
) || CFUniCharIsMemberOfBitmap(character
, letter
)) {
3296 // Check if preceded by grapheme joiners (U034F and viramas)
3297 otherRange
.location
= currentIndex
= range
.location
;
3299 while (currentIndex
> 1) {
3300 character
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, --currentIndex
);
3302 // ??? We're assuming viramas only in BMP
3303 if ((_CFStringIsVirama(character
, combClassBMP
) || ((character
== ZERO_WIDTH_JOINER
) && _CFStringIsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer
, --currentIndex
), combClassBMP
))) && (currentIndex
> 0)) {
3309 currentIndex
= _CFStringInlineBufferGetComposedRange(&stringBuffer
, currentIndex
, type
, bmpBitmap
, csetType
).location
;
3311 character
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, currentIndex
);
3313 if (CFUniCharIsSurrogateLowCharacter(character
) && CFUniCharIsSurrogateHighCharacter((otherSurrogate
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, currentIndex
- 1)))) {
3314 character
= CFUniCharGetLongCharacterForSurrogatePair(character
, otherSurrogate
);
3315 letter
= CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet
, (character
>> 16));
3321 if (!CFUniCharIsMemberOfBitmap(character
, letter
)) break;
3322 range
.location
= currentIndex
;
3325 range
.length
+= otherRange
.location
- range
.location
;
3327 // Check if followed by grapheme joiners
3328 if ((range
.length
> 1) && ((range
.location
+ range
.length
) < length
)) {
3330 currentIndex
= otherRange
.location
+ otherRange
.length
;
3333 character
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, currentIndex
- 1);
3335 // ??? We're assuming viramas only in BMP
3336 if ((character
!= ZERO_WIDTH_JOINER
) && !_CFStringIsVirama(character
, combClassBMP
)) break;
3338 character
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, currentIndex
);
3340 if (character
== ZERO_WIDTH_JOINER
) character
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, ++currentIndex
);
3342 if (CFUniCharIsSurrogateHighCharacter(character
) && CFUniCharIsSurrogateLowCharacter((otherSurrogate
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, currentIndex
+ 1)))) {
3343 character
= CFUniCharGetLongCharacterForSurrogatePair(character
, otherSurrogate
);
3344 letter
= CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet
, (character
>> 16));
3349 // We only conjoin letters
3350 if (!CFUniCharIsMemberOfBitmap(character
, letter
)) break;
3351 otherRange
= _CFStringInlineBufferGetComposedRange(&stringBuffer
, currentIndex
, type
, bmpBitmap
, csetType
);
3352 currentIndex
= otherRange
.location
+ otherRange
.length
;
3353 } while ((otherRange
.location
+ otherRange
.length
) < length
);
3354 range
.length
= currentIndex
- range
.location
;
3359 // Check if we're part of prefix transcoding hints
3362 currentIndex
= (range
.location
+ range
.length
) - (MAX_TRANSCODING_LENGTH
+ 1);
3363 if (currentIndex
< 0) currentIndex
= 0;
3365 while (currentIndex
<= range
.location
) {
3366 character
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, currentIndex
);
3368 if ((character
& 0x1FFFF0) == 0xF860) { // transcoding hint
3369 otherIndex
= currentIndex
+ __CFTranscodingHintLength
[(character
- 0xF860)] + 1;
3370 if (otherIndex
>= (range
.location
+ range
.length
)) {
3371 if (otherIndex
<= length
) {
3372 range
.location
= currentIndex
;
3373 range
.length
= otherIndex
- currentIndex
;
3384 #if 1 /* Using the new implementation. Leaving the old implementation if'ed out for testing purposes for now */
3385 CFRange
CFStringGetRangeOfComposedCharactersAtIndex(CFStringRef theString
, CFIndex theIndex
) {
3386 return CFStringGetRangeOfCharacterClusterAtIndex(theString
, theIndex
, kCFStringComposedCharacterCluster
);
3390 @function CFStringGetRangeOfComposedCharactersAtIndex
3391 Returns the range of the composed character sequence at the specified index.
3392 @param theString The CFString which is to be searched. If this
3393 parameter is not a valid CFString, the behavior is
3395 @param theIndex The index of the character contained in the
3396 composed character sequence. If the index is
3397 outside the index space of the string (0 to N-1 inclusive,
3398 where N is the length of the string), the behavior is
3400 @result The range of the composed character sequence.
3402 #define ExtHighHalfZoneLow 0xD800
3403 #define ExtHighHalfZoneHigh 0xDBFF
3404 #define ExtLowHalfZoneLow 0xDC00
3405 #define ExtLowHalfZoneHigh 0xDFFF
3406 #define JunseongStart 0x1160
3407 #define JonseongEnd 0x11F9
3408 CF_INLINE Boolean
IsHighCode(UniChar X
) { return (X
>= ExtHighHalfZoneLow
&& X
<= ExtHighHalfZoneHigh
); }
3409 CF_INLINE Boolean
IsLowCode(UniChar X
) { return (X
>= ExtLowHalfZoneLow
&& X
<= ExtLowHalfZoneHigh
); }
3410 #define IsHangulConjoiningJamo(X) (X >= JunseongStart && X <= JonseongEnd)
3411 #define IsHalfwidthKanaVoicedMark(X) ((X == 0xFF9E) || (X == 0xFF9F))
3412 CF_INLINE Boolean
IsNonBaseChar(UniChar X
, CFCharacterSetRef nonBaseSet
) { return (CFCharacterSetIsCharacterMember(nonBaseSet
, X
) || IsHangulConjoiningJamo(X
) || IsHalfwidthKanaVoicedMark(X
) || (X
& 0x1FFFF0) == 0xF870); } // combining char, hangul jamo, or Apple corporate variant tag
3415 #define COMBINING_GRAPHEME_JOINER (0x034F)
3417 static CFCharacterSetRef nonBaseChars
= NULL
;
3418 static CFCharacterSetRef letterChars
= NULL
;
3419 static const void *__CFCombiningClassBMP
= NULL
;
3421 CF_INLINE
bool IsVirama(UTF32Char character
) {
3422 return ((character
== COMBINING_GRAPHEME_JOINER
) ? true : ((character
< 0x10000) && (CFUniCharGetCombiningPropertyForCharacter(character
, __CFCombiningClassBMP
) == 9) ? true : false));
3425 CFRange
CFStringGetRangeOfComposedCharactersAtIndex(CFStringRef theString
, CFIndex theIndex
) {
3426 CFIndex left
, current
, save
;
3427 CFIndex len
= CFStringGetLength(theString
);
3428 CFStringInlineBuffer stringBuffer
;
3429 static volatile Boolean _isInited
= false;
3431 if (theIndex
>= len
) return CFRangeMake(kCFNotFound
, 0);
3434 nonBaseChars
= CFCharacterSetGetPredefined(kCFCharacterSetNonBase
);
3435 letterChars
= CFCharacterSetGetPredefined(kCFCharacterSetLetter
);
3436 __CFCombiningClassBMP
= CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty
, 0);
3440 save
= current
= theIndex
;
3442 CFStringInitInlineBuffer(theString
, &stringBuffer
, CFRangeMake(0, len
));
3445 * First check for transcoding hints
3448 CFRange theRange
= (current
> MAX_TRANSCODING_LENGTH
? CFRangeMake(current
- MAX_TRANSCODING_LENGTH
, MAX_TRANSCODING_LENGTH
+ 1) : CFRangeMake(0, current
+ 1));
3450 // Should check the next loc ?
3451 if (current
+ 1 < len
) ++theRange
.length
;
3453 if (theRange
.length
> 1) {
3454 UniChar characterBuffer
[MAX_TRANSCODING_LENGTH
+ 2]; // Transcoding hint length + current loc + next loc
3456 if (stringBuffer
.directBuffer
) {
3457 memmove(characterBuffer
, stringBuffer
.directBuffer
+ theRange
.location
, theRange
.length
* sizeof(UniChar
));
3459 CFStringGetCharacters(theString
, theRange
, characterBuffer
);
3462 while (current
>= theRange
.location
) {
3463 if ((characterBuffer
[current
- theRange
.location
] & 0x1FFFF0) == 0xF860) {
3464 theRange
= CFRangeMake(current
, __CFTranscodingHintLength
[characterBuffer
[current
- theRange
.location
] - 0xF860] + 1);
3465 if ((theRange
.location
+ theRange
.length
) <= theIndex
) break;
3466 if ((theRange
.location
+ theRange
.length
) >= len
) theRange
.length
= len
- theRange
.location
;
3469 if (current
== 0) break;
3472 current
= theIndex
; // Reset current
3476 //#warning Aki 5/29/01 This does not support non-base chars in non-BMP planes (i.e. musical symbol combining stem in Unicode 3.1)
3478 * if we start NOT on a base, first move back to a base as appropriate.
3483 while ((current
> 0) && IsNonBaseChar(CFStringGetCharacterFromInlineBuffer(&stringBuffer
, current
), nonBaseChars
)) --current
;
3485 if (current
>= 1 && current
< len
&& CFCharacterSetIsCharacterMember(letterChars
, CFStringGetCharacterFromInlineBuffer(&stringBuffer
, current
)) && IsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer
, current
- 1))) {
3488 } else if ((current
>= 2) && (CFStringGetCharacterFromInlineBuffer(&stringBuffer
, current
- 1) == ZWJ
) && IsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer
, current
- 2))) {
3494 * Set the left position, then jump back to the saved original position.
3497 if (current
>= 1 && IsLowCode(CFStringGetCharacterFromInlineBuffer(&stringBuffer
, current
)) && IsHighCode(CFStringGetCharacterFromInlineBuffer(&stringBuffer
, current
- 1))) --current
;
3502 * Now, presume we are on a base; move forward & look for the next base.
3503 * Handle jumping over H/L codes.
3505 if (IsHighCode(CFStringGetCharacterFromInlineBuffer(&stringBuffer
, current
)) && (current
+ 1) < len
&& IsLowCode(CFStringGetCharacterFromInlineBuffer(&stringBuffer
, current
+ 1))) ++current
;
3510 if (current
< len
) {
3511 while (IsNonBaseChar(CFStringGetCharacterFromInlineBuffer(&stringBuffer
, current
), nonBaseChars
)) {
3513 if (current
>= len
) break;
3515 if ((current
< len
) && CFCharacterSetIsCharacterMember(letterChars
, CFStringGetCharacterFromInlineBuffer(&stringBuffer
, current
))) {
3516 if (IsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer
, current
- 1))) {
3517 ++current
; goto round2Again
;
3518 } else if ((current
>= 2) && (CFStringGetCharacterFromInlineBuffer(&stringBuffer
, current
- 1) == ZWJ
) && IsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer
, current
- 2))) {
3519 ++current
; goto round2Again
;
3524 * Now, "current" is a base, and "left" is a base.
3525 * The junk between had better contain "save"!
3527 if ((! (left
<= save
)) || (! (save
<= current
))) {
3528 CFLog(kCFLogLevelWarning
, CFSTR("CFString: CFStringGetRangeOfComposedCharactersAtIndex:%d returned invalid\n"), save
);
3530 return CFRangeMake(left
, current
- left
);
3535 @function CFStringFindCharacterFromSet
3536 Query the range of characters contained in the specified character set.
3537 @param theString The CFString which is to be searched. If this
3538 parameter is not a valid CFString, the behavior is
3540 @param theSet The CFCharacterSet against which the membership
3541 of characters is checked. If this parameter is not a valid
3542 CFCharacterSet, the behavior is undefined.
3543 @param range The range of characters within the string to search. If
3544 the range location or end point (defined by the location
3545 plus length minus 1) are outside the index space of the
3546 string (0 to N-1 inclusive, where N is the length of the
3547 string), the behavior is undefined. If the range length is
3548 negative, the behavior is undefined. The range may be empty
3549 (length 0), in which case no search is performed.
3550 @param searchOptions The bitwise-or'ed option flags to control
3551 the search behavior. The supported options are
3552 kCFCompareBackwards andkCFCompareAnchored.
3553 If other option flags are specified, the behavior
3555 @param result The pointer to a CFRange supplied by the caller in
3556 which the search result is stored. If a pointer to an invalid
3557 memory is specified, the behavior is undefined.
3558 @result true, if at least a character which is a member of the character
3559 set is found and result is filled, otherwise, false.
3561 #define SURROGATE_START 0xD800
3562 #define SURROGATE_END 0xDFFF
3564 CF_EXPORT Boolean
CFStringFindCharacterFromSet(CFStringRef theString
, CFCharacterSetRef theSet
, CFRange rangeToSearch
, CFOptionFlags searchOptions
, CFRange
*result
) {
3565 CFStringInlineBuffer stringBuffer
;
3566 CFCharacterSetInlineBuffer csetBuffer
;
3569 CFIndex fromLoc
, toLoc
, cnt
; // fromLoc and toLoc are inclusive
3570 Boolean found
= false;
3571 Boolean done
= false;
3573 //#warning FIX ME !! Should support kCFCompareNonliteral
3575 if ((rangeToSearch
.location
+ rangeToSearch
.length
> CFStringGetLength(theString
)) || (rangeToSearch
.length
== 0)) return false;
3577 if (searchOptions
& kCFCompareBackwards
) {
3578 fromLoc
= rangeToSearch
.location
+ rangeToSearch
.length
- 1;
3579 toLoc
= rangeToSearch
.location
;
3581 fromLoc
= rangeToSearch
.location
;
3582 toLoc
= rangeToSearch
.location
+ rangeToSearch
.length
- 1;
3584 if (searchOptions
& kCFCompareAnchored
) {
3588 step
= (fromLoc
<= toLoc
) ? 1 : -1;
3591 CFStringInitInlineBuffer(theString
, &stringBuffer
, rangeToSearch
);
3592 CFCharacterSetInitInlineBuffer(theSet
, &csetBuffer
);
3595 ch
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, cnt
- rangeToSearch
.location
);
3596 if ((ch
>= SURROGATE_START
) && (ch
<= SURROGATE_END
)) {
3597 int otherCharIndex
= cnt
+ step
;
3599 if (((step
< 0) && (otherCharIndex
< toLoc
)) || ((step
> 0) && (otherCharIndex
> toLoc
))) {
3603 UniChar lowChar
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, otherCharIndex
- rangeToSearch
.location
);
3605 if (cnt
< otherCharIndex
) {
3612 if (CFUniCharIsSurrogateHighCharacter(highChar
) && CFUniCharIsSurrogateLowCharacter(lowChar
) && CFCharacterSetInlineBufferIsLongCharacterMember(&csetBuffer
, CFUniCharGetLongCharacterForSurrogatePair(highChar
, lowChar
))) {
3613 if (result
) *result
= CFRangeMake((cnt
< otherCharIndex
? cnt
: otherCharIndex
), 2);
3615 } else if (otherCharIndex
== toLoc
) {
3618 cnt
= otherCharIndex
+ step
;
3621 } else if (CFCharacterSetInlineBufferIsLongCharacterMember(&csetBuffer
, ch
)) {
3622 done
= found
= true;
3623 } else if (cnt
== toLoc
) {
3630 if (found
&& result
) *result
= CFRangeMake(cnt
, 1);
3634 /* Line range code */
3636 #define CarriageReturn '\r' /* 0x0d */
3637 #define NewLine '\n' /* 0x0a */
3638 #define NextLine 0x0085
3639 #define LineSeparator 0x2028
3640 #define ParaSeparator 0x2029
3642 CF_INLINE Boolean
isALineSeparatorTypeCharacter(UniChar ch
, Boolean includeLineEndings
) {
3643 if (ch
> CarriageReturn
&& ch
< NextLine
) return false; /* Quick test to cover most chars */
3644 return (ch
== NewLine
|| ch
== CarriageReturn
|| ch
== ParaSeparator
|| (includeLineEndings
&& (ch
== NextLine
|| ch
== LineSeparator
))) ? true : false;
3647 static void __CFStringGetLineOrParagraphBounds(CFStringRef string
, CFRange range
, CFIndex
*lineBeginIndex
, CFIndex
*lineEndIndex
, CFIndex
*contentsEndIndex
, Boolean includeLineEndings
) {
3649 CFStringInlineBuffer buf
;
3652 __CFAssertIsString(string
);
3653 __CFAssertRangeIsInStringBounds(string
, range
.location
, range
.length
);
3655 len
= __CFStrLength(string
);
3657 if (lineBeginIndex
) {
3659 if (range
.location
== 0) {
3662 CFStringInitInlineBuffer(string
, &buf
, CFRangeMake(0, len
));
3663 CFIndex buf_idx
= range
.location
;
3665 /* Take care of the special case where start happens to fall right between \r and \n */
3666 ch
= CFStringGetCharacterFromInlineBuffer(&buf
, buf_idx
);
3668 if ((ch
== NewLine
) && (CFStringGetCharacterFromInlineBuffer(&buf
, buf_idx
) == CarriageReturn
)) {
3675 } else if (isALineSeparatorTypeCharacter(CFStringGetCharacterFromInlineBuffer(&buf
, buf_idx
), includeLineEndings
)) {
3676 start
= buf_idx
+ 1;
3683 *lineBeginIndex
= start
;
3686 /* Now find the ending point */
3687 if (lineEndIndex
|| contentsEndIndex
) {
3688 CFIndex endOfContents
, lineSeparatorLength
= 1; /* 1 by default */
3689 CFStringInitInlineBuffer(string
, &buf
, CFRangeMake(0, len
));
3690 CFIndex buf_idx
= range
.location
+ range
.length
- (range
.length
? 1 : 0);
3691 /* First look at the last char in the range (if the range is zero length, the char after the range) to see if we're already on or within a end of line sequence... */
3692 ch
= __CFStringGetCharacterFromInlineBufferAux(&buf
, buf_idx
);
3693 if (ch
== NewLine
) {
3694 endOfContents
= buf_idx
;
3696 if (__CFStringGetCharacterFromInlineBufferAux(&buf
, buf_idx
) == CarriageReturn
) {
3697 lineSeparatorLength
= 2;
3702 if (isALineSeparatorTypeCharacter(ch
, includeLineEndings
)) {
3703 endOfContents
= buf_idx
; /* This is actually end of contentsRange */
3704 buf_idx
++; /* OK for this to go past the end */
3705 if ((ch
== CarriageReturn
) && (__CFStringGetCharacterFromInlineBufferAux(&buf
, buf_idx
) == NewLine
)) {
3706 lineSeparatorLength
= 2;
3709 } else if (buf_idx
>= len
) {
3710 endOfContents
= len
;
3711 lineSeparatorLength
= 0;
3715 ch
= __CFStringGetCharacterFromInlineBufferAux(&buf
, buf_idx
);
3719 if (contentsEndIndex
) *contentsEndIndex
= endOfContents
;
3720 if (lineEndIndex
) *lineEndIndex
= endOfContents
+ lineSeparatorLength
;
3724 void CFStringGetLineBounds(CFStringRef string
, CFRange range
, CFIndex
*lineBeginIndex
, CFIndex
*lineEndIndex
, CFIndex
*contentsEndIndex
) {
3725 CF_OBJC_FUNCDISPATCH4(__kCFStringTypeID
, void, string
, "getLineStart:end:contentsEnd:forRange:", lineBeginIndex
, lineEndIndex
, contentsEndIndex
, CFRangeMake(range
.location
, range
.length
));
3726 __CFStringGetLineOrParagraphBounds(string
, range
, lineBeginIndex
, lineEndIndex
, contentsEndIndex
, true);
3729 void CFStringGetParagraphBounds(CFStringRef string
, CFRange range
, CFIndex
*parBeginIndex
, CFIndex
*parEndIndex
, CFIndex
*contentsEndIndex
) {
3730 CF_OBJC_FUNCDISPATCH4(__kCFStringTypeID
, void, string
, "getParagraphStart:end:contentsEnd:forRange:", parBeginIndex
, parEndIndex
, contentsEndIndex
, CFRangeMake(range
.location
, range
.length
));
3731 __CFStringGetLineOrParagraphBounds(string
, range
, parBeginIndex
, parEndIndex
, contentsEndIndex
, false);
3735 CFStringRef
CFStringCreateByCombiningStrings(CFAllocatorRef alloc
, CFArrayRef array
, CFStringRef separatorString
) {
3737 CFIndex separatorNumByte
;
3738 CFIndex stringCount
= CFArrayGetCount(array
);
3739 Boolean isSepCFString
= !CF_IS_OBJC(__kCFStringTypeID
, separatorString
);
3740 Boolean canBeEightbit
= isSepCFString
&& __CFStrIsEightBit(separatorString
);
3742 CFStringRef otherString
;
3745 const void *separatorContents
= NULL
;
3747 if (stringCount
== 0) {
3748 return CFStringCreateWithCharacters(alloc
, NULL
, 0);
3749 } else if (stringCount
== 1) {
3750 return (CFStringRef
)CFStringCreateCopy(alloc
, (CFStringRef
)CFArrayGetValueAtIndex(array
, 0));
3753 if (alloc
== NULL
) alloc
= __CFGetDefaultAllocator();
3755 numChars
= CFStringGetLength(separatorString
) * (stringCount
- 1);
3756 for (idx
= 0; idx
< stringCount
; idx
++) {
3757 otherString
= (CFStringRef
)CFArrayGetValueAtIndex(array
, idx
);
3758 numChars
+= CFStringGetLength(otherString
);
3759 // canBeEightbit is already false if the separator is an NSString...
3760 if (!CF_IS_OBJC(__kCFStringTypeID
, otherString
) && __CFStrIsUnicode(otherString
)) canBeEightbit
= false;
3763 buffer
= (uint8_t *)CFAllocatorAllocate(alloc
, canBeEightbit
? ((numChars
+ 1) * sizeof(uint8_t)) : (numChars
* sizeof(UniChar
)), 0);
3764 bufPtr
= (uint8_t *)buffer
;
3765 if (__CFOASafe
) __CFSetLastAllocationEventName(buffer
, "CFString (store)");
3766 separatorNumByte
= CFStringGetLength(separatorString
) * (canBeEightbit
? sizeof(uint8_t) : sizeof(UniChar
));
3768 for (idx
= 0; idx
< stringCount
; idx
++) {
3769 if (idx
) { // add separator here unless first string
3770 if (separatorContents
) {
3771 memmove(bufPtr
, separatorContents
, separatorNumByte
);
3773 if (!isSepCFString
) { // NSString
3774 CFStringGetCharacters(separatorString
, CFRangeMake(0, CFStringGetLength(separatorString
)), (UniChar
*)bufPtr
);
3775 } else if (canBeEightbit
|| __CFStrIsUnicode(separatorString
)) {
3776 memmove(bufPtr
, (const uint8_t *)__CFStrContents(separatorString
) + __CFStrSkipAnyLengthByte(separatorString
), separatorNumByte
);
3778 __CFStrConvertBytesToUnicode((uint8_t *)__CFStrContents(separatorString
) + __CFStrSkipAnyLengthByte(separatorString
), (UniChar
*)bufPtr
, __CFStrLength(separatorString
));
3780 separatorContents
= bufPtr
;
3782 bufPtr
+= separatorNumByte
;
3785 otherString
= (CFStringRef
)CFArrayGetValueAtIndex(array
, idx
);
3786 if (CF_IS_OBJC(__kCFStringTypeID
, otherString
)) {
3787 CFIndex otherLength
= CFStringGetLength(otherString
);
3788 CFStringGetCharacters(otherString
, CFRangeMake(0, otherLength
), (UniChar
*)bufPtr
);
3789 bufPtr
+= otherLength
* sizeof(UniChar
);
3791 const uint8_t * otherContents
= (const uint8_t *)__CFStrContents(otherString
);
3792 CFIndex otherNumByte
= __CFStrLength2(otherString
, otherContents
) * (canBeEightbit
? sizeof(uint8_t) : sizeof(UniChar
));
3794 if (canBeEightbit
|| __CFStrIsUnicode(otherString
)) {
3795 memmove(bufPtr
, otherContents
+ __CFStrSkipAnyLengthByte(otherString
), otherNumByte
);
3797 __CFStrConvertBytesToUnicode(otherContents
+ __CFStrSkipAnyLengthByte(otherString
), (UniChar
*)bufPtr
, __CFStrLength2(otherString
, otherContents
));
3799 bufPtr
+= otherNumByte
;
3802 if (canBeEightbit
) *bufPtr
= 0; // NULL byte;
3804 return canBeEightbit
?
3805 CFStringCreateWithCStringNoCopy(alloc
, (const char*)buffer
, __CFStringGetEightBitStringEncoding(), alloc
) :
3806 CFStringCreateWithCharactersNoCopy(alloc
, (UniChar
*)buffer
, numChars
, alloc
);
3810 CFArrayRef
CFStringCreateArrayBySeparatingStrings(CFAllocatorRef alloc
, CFStringRef string
, CFStringRef separatorString
) {
3811 CFArrayRef separatorRanges
;
3812 CFIndex length
= CFStringGetLength(string
);
3813 /* No objc dispatch needed here since CFStringCreateArrayWithFindResults() works with both CFString and NSString */
3814 if (!(separatorRanges
= CFStringCreateArrayWithFindResults(alloc
, string
, separatorString
, CFRangeMake(0, length
), 0))) {
3815 return CFArrayCreate(alloc
, (const void **)&string
, 1, & kCFTypeArrayCallBacks
);
3818 CFIndex count
= CFArrayGetCount(separatorRanges
);
3819 CFIndex startIndex
= 0;
3821 CFMutableArrayRef array
= CFArrayCreateMutable(alloc
, count
+ 2, & kCFTypeArrayCallBacks
);
3822 const CFRange
*currentRange
;
3823 CFStringRef substring
;
3825 for (idx
= 0;idx
< count
;idx
++) {
3826 currentRange
= (const CFRange
*)CFArrayGetValueAtIndex(separatorRanges
, idx
);
3827 numChars
= currentRange
->location
- startIndex
;
3828 substring
= CFStringCreateWithSubstring(alloc
, string
, CFRangeMake(startIndex
, numChars
));
3829 CFArrayAppendValue(array
, substring
);
3830 CFRelease(substring
);
3831 startIndex
= currentRange
->location
+ currentRange
->length
;
3833 substring
= CFStringCreateWithSubstring(alloc
, string
, CFRangeMake(startIndex
, length
- startIndex
));
3834 CFArrayAppendValue(array
, substring
);
3835 CFRelease(substring
);
3837 CFRelease(separatorRanges
);
3843 CFStringRef
CFStringCreateFromExternalRepresentation(CFAllocatorRef alloc
, CFDataRef data
, CFStringEncoding encoding
) {
3844 return CFStringCreateWithBytes(alloc
, CFDataGetBytePtr(data
), CFDataGetLength(data
), encoding
, true);
3848 CFDataRef
CFStringCreateExternalRepresentation(CFAllocatorRef alloc
, CFStringRef string
, CFStringEncoding encoding
, uint8_t lossByte
) {
3850 CFIndex guessedByteLength
;
3855 if (CF_IS_OBJC(__kCFStringTypeID
, string
)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
3856 length
= CFStringGetLength(string
);
3858 __CFAssertIsString(string
);
3859 length
= __CFStrLength(string
);
3860 if (__CFStrIsEightBit(string
) && ((__CFStringGetEightBitStringEncoding() == encoding
) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII
&& __CFStringEncodingIsSupersetOfASCII(encoding
)))) { // Requested encoding is equal to the encoding in string
3861 return CFDataCreate(alloc
, ((uint8_t *)__CFStrContents(string
) + __CFStrSkipAnyLengthByte(string
)), __CFStrLength(string
));
3865 if (alloc
== NULL
) alloc
= __CFGetDefaultAllocator();
3867 if (((encoding
& 0x0FFF) == kCFStringEncodingUnicode
) && ((encoding
== kCFStringEncodingUnicode
) || ((encoding
> kCFStringEncodingUTF8
) && (encoding
<= kCFStringEncodingUTF32LE
)))) {
3868 guessedByteLength
= (length
+ 1) * ((((encoding
>> 26) & 2) == 0) ? sizeof(UTF16Char
) : sizeof(UTF32Char
)); // UTF32 format has the bit set
3869 } else if (((guessedByteLength
= CFStringGetMaximumSizeForEncoding(length
, encoding
)) > length
) && !CF_IS_OBJC(__kCFStringTypeID
, string
)) { // Multi byte encoding
3870 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
3871 if (__CFStrIsUnicode(string
)) {
3872 CFIndex aLength
= CFStringEncodingByteLengthForCharacters(encoding
, kCFStringEncodingPrependBOM
, __CFStrContents(string
), __CFStrLength(string
));
3873 if (aLength
> 0) guessedByteLength
= aLength
;
3876 result
= __CFStringEncodeByteStream(string
, 0, length
, true, encoding
, lossByte
, NULL
, LONG_MAX
, &guessedByteLength
);
3877 // if result == length, we always succeed
3878 // otherwise, if result == 0, we fail
3879 // otherwise, if there was a lossByte but still result != length, we fail
3880 if ((result
!= length
) && (!result
|| !lossByte
)) return NULL
;
3881 if (guessedByteLength
== length
&& __CFStrIsEightBit(string
) && __CFStringEncodingIsSupersetOfASCII(encoding
)) { // It's all ASCII !!
3882 return CFDataCreate(alloc
, ((uint8_t *)__CFStrContents(string
) + __CFStrSkipAnyLengthByte(string
)), __CFStrLength(string
));
3884 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
3888 bytes
= (uint8_t *)CFAllocatorAllocate(alloc
, guessedByteLength
, 0);
3889 if (__CFOASafe
) __CFSetLastAllocationEventName(bytes
, "CFData (store)");
3891 result
= __CFStringEncodeByteStream(string
, 0, length
, true, encoding
, lossByte
, bytes
, guessedByteLength
, &usedLength
);
3893 if ((result
!= length
) && (!result
|| !lossByte
)) { // see comment above about what this means
3894 CFAllocatorDeallocate(alloc
, bytes
);
3898 return CFDataCreateWithBytesNoCopy(alloc
, (uint8_t *)bytes
, usedLength
, alloc
);
3902 CFStringEncoding
CFStringGetSmallestEncoding(CFStringRef str
) {
3904 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID
, CFStringEncoding
, str
, "_smallestEncodingInCFStringEncoding");
3905 __CFAssertIsString(str
);
3907 if (__CFStrIsEightBit(str
)) return __CFStringGetEightBitStringEncoding();
3908 len
= __CFStrLength(str
);
3909 if (__CFStringEncodeByteStream(str
, 0, len
, false, __CFStringGetEightBitStringEncoding(), 0, NULL
, LONG_MAX
, NULL
) == len
) return __CFStringGetEightBitStringEncoding();
3910 if ((__CFStringGetEightBitStringEncoding() != __CFStringGetSystemEncoding()) && (__CFStringEncodeByteStream(str
, 0, len
, false, __CFStringGetSystemEncoding(), 0, NULL
, LONG_MAX
, NULL
) == len
)) return __CFStringGetSystemEncoding();
3911 return kCFStringEncodingUnicode
; /* ??? */
3915 CFStringEncoding
CFStringGetFastestEncoding(CFStringRef str
) {
3916 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID
, CFStringEncoding
, str
, "_fastestEncodingInCFStringEncoding");
3917 __CFAssertIsString(str
);
3918 return __CFStrIsEightBit(str
) ? __CFStringGetEightBitStringEncoding() : kCFStringEncodingUnicode
; /* ??? */
3922 SInt32
CFStringGetIntValue(CFStringRef str
) {
3926 CFStringInlineBuffer buf
;
3927 CFStringInitInlineBuffer(str
, &buf
, CFRangeMake(0, CFStringGetLength(str
)));
3928 success
= __CFStringScanInteger(&buf
, NULL
, &idx
, false, &result
);
3929 return success
? result
: 0;
3933 double CFStringGetDoubleValue(CFStringRef str
) {
3937 CFStringInlineBuffer buf
;
3938 CFStringInitInlineBuffer(str
, &buf
, CFRangeMake(0, CFStringGetLength(str
)));
3939 success
= __CFStringScanDouble(&buf
, NULL
, &idx
, &result
);
3940 return success
? result
: 0.0;
3944 /*** Mutable functions... ***/
3946 void CFStringSetExternalCharactersNoCopy(CFMutableStringRef string
, UniChar
*chars
, CFIndex length
, CFIndex capacity
) {
3947 __CFAssertIsNotNegative(length
);
3948 __CFAssertIsStringAndExternalMutable(string
);
3949 CFAssert4((length
<= capacity
) && ((capacity
== 0) || ((capacity
> 0) && chars
)), __kCFLogAssertion
, "%s(): Invalid args: characters %p length %d capacity %d", __PRETTY_FUNCTION__
, chars
, length
, capacity
);
3950 __CFStrSetContentPtr(string
, chars
);
3951 __CFStrSetExplicitLength(string
, length
);
3952 __CFStrSetCapacity(string
, capacity
* sizeof(UniChar
));
3953 __CFStrSetCapacityProvidedExternally(string
);
3958 void CFStringInsert(CFMutableStringRef str
, CFIndex idx
, CFStringRef insertedStr
) {
3959 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID
, void, str
, "insertString:atIndex:", insertedStr
, idx
);
3960 __CFAssertIsStringAndMutable(str
);
3961 CFAssert3(idx
>= 0 && idx
<= __CFStrLength(str
), __kCFLogAssertion
, "%s(): string index %d out of bounds (length %d)", __PRETTY_FUNCTION__
, idx
, __CFStrLength(str
));
3962 __CFStringReplace(str
, CFRangeMake(idx
, 0), insertedStr
);
3966 void CFStringDelete(CFMutableStringRef str
, CFRange range
) {
3967 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID
, void, str
, "deleteCharactersInRange:", range
);
3968 __CFAssertIsStringAndMutable(str
);
3969 __CFAssertRangeIsInStringBounds(str
, range
.location
, range
.length
);
3970 __CFStringChangeSize(str
, range
, 0, false);
3974 void CFStringReplace(CFMutableStringRef str
, CFRange range
, CFStringRef replacement
) {
3975 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID
, void, str
, "replaceCharactersInRange:withString:", range
, replacement
);
3976 __CFAssertIsStringAndMutable(str
);
3977 __CFAssertRangeIsInStringBounds(str
, range
.location
, range
.length
);
3978 __CFStringReplace(str
, range
, replacement
);
3982 void CFStringReplaceAll(CFMutableStringRef str
, CFStringRef replacement
) {
3983 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID
, void, str
, "setString:", replacement
);
3984 __CFAssertIsStringAndMutable(str
);
3985 __CFStringReplace(str
, CFRangeMake(0, __CFStrLength(str
)), replacement
);
3989 void CFStringAppend(CFMutableStringRef str
, CFStringRef appended
) {
3990 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID
, void, str
, "appendString:", appended
);
3991 __CFAssertIsStringAndMutable(str
);
3992 __CFStringReplace(str
, CFRangeMake(__CFStrLength(str
), 0), appended
);
3996 void CFStringAppendCharacters(CFMutableStringRef str
, const UniChar
*chars
, CFIndex appendedLength
) {
3997 CFIndex strLength
, idx
;
3999 __CFAssertIsNotNegative(appendedLength
);
4001 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID
, void, str
, "appendCharacters:length:", chars
, appendedLength
);
4003 __CFAssertIsStringAndMutable(str
);
4005 strLength
= __CFStrLength(str
);
4006 if (__CFStringGetCompatibility(Bug2967272
) || __CFStrIsUnicode(str
)) {
4007 __CFStringChangeSize(str
, CFRangeMake(strLength
, 0), appendedLength
, true);
4008 memmove((UniChar
*)__CFStrContents(str
) + strLength
, chars
, appendedLength
* sizeof(UniChar
));
4011 bool isASCII
= true;
4012 for (idx
= 0; isASCII
&& idx
< appendedLength
; idx
++) isASCII
= (chars
[idx
] < 0x80);
4013 __CFStringChangeSize(str
, CFRangeMake(strLength
, 0), appendedLength
, !isASCII
);
4015 memmove((UniChar
*)__CFStrContents(str
) + strLength
, chars
, appendedLength
* sizeof(UniChar
));
4017 contents
= (uint8_t *)__CFStrContents(str
) + strLength
+ __CFStrSkipAnyLengthByte(str
);
4018 for (idx
= 0; idx
< appendedLength
; idx
++) contents
[idx
] = (uint8_t)chars
[idx
];
4024 static void __CFStringAppendBytes(CFMutableStringRef str
, const char *cStr
, CFIndex appendedLength
, CFStringEncoding encoding
) {
4025 Boolean appendedIsUnicode
= false;
4026 Boolean freeCStrWhenDone
= false;
4027 Boolean demoteAppendedUnicode
= false;
4028 CFVarWidthCharBuffer vBuf
;
4030 __CFAssertIsNotNegative(appendedLength
);
4032 if (encoding
== kCFStringEncodingASCII
|| encoding
== __CFStringGetEightBitStringEncoding()) {
4033 // appendedLength now denotes length in UniChars
4034 } else if (encoding
== kCFStringEncodingUnicode
) {
4035 UniChar
*chars
= (UniChar
*)cStr
;
4036 CFIndex idx
, length
= appendedLength
/ sizeof(UniChar
);
4037 bool isASCII
= true;
4038 for (idx
= 0; isASCII
&& idx
< length
; idx
++) isASCII
= (chars
[idx
] < 0x80);
4040 appendedIsUnicode
= true;
4042 demoteAppendedUnicode
= true;
4044 appendedLength
= length
;
4046 Boolean usingPassedInMemory
= false;
4048 vBuf
.allocator
= __CFGetDefaultAllocator(); // We don't want to use client's allocator for temp stuff
4049 vBuf
.chars
.unicode
= NULL
; // This will cause the decode function to allocate memory if necessary
4051 if (!__CFStringDecodeByteStream3((const uint8_t *)cStr
, appendedLength
, encoding
, __CFStrIsUnicode(str
), &vBuf
, &usingPassedInMemory
, 0)) {
4052 CFAssert1(0, __kCFLogAssertion
, "Supplied bytes could not be converted specified encoding %d", encoding
);
4056 // If not ASCII, appendedLength now denotes length in UniChars
4057 appendedLength
= vBuf
.numChars
;
4058 appendedIsUnicode
= !vBuf
.isASCII
;
4059 cStr
= (const char *)vBuf
.chars
.ascii
;
4060 freeCStrWhenDone
= !usingPassedInMemory
&& vBuf
.shouldFreeChars
;
4063 if (CF_IS_OBJC(__kCFStringTypeID
, str
)) {
4064 if (!appendedIsUnicode
&& !demoteAppendedUnicode
) {
4065 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID
, void, str
, "_cfAppendCString:length:", cStr
, appendedLength
);
4067 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID
, void, str
, "appendCharacters:length:", cStr
, appendedLength
);
4071 __CFAssertIsStringAndMutable(str
);
4072 strLength
= __CFStrLength(str
);
4074 __CFStringChangeSize(str
, CFRangeMake(strLength
, 0), appendedLength
, appendedIsUnicode
|| __CFStrIsUnicode(str
));
4076 if (__CFStrIsUnicode(str
)) {
4077 UniChar
*contents
= (UniChar
*)__CFStrContents(str
);
4078 if (appendedIsUnicode
) {
4079 memmove(contents
+ strLength
, cStr
, appendedLength
* sizeof(UniChar
));
4081 __CFStrConvertBytesToUnicode((const uint8_t *)cStr
, contents
+ strLength
, appendedLength
);
4084 if (demoteAppendedUnicode
) {
4085 UniChar
*chars
= (UniChar
*)cStr
;
4087 uint8_t *contents
= (uint8_t *)__CFStrContents(str
) + strLength
+ __CFStrSkipAnyLengthByte(str
);
4088 for (idx
= 0; idx
< appendedLength
; idx
++) contents
[idx
] = (uint8_t)chars
[idx
];
4090 uint8_t *contents
= (uint8_t *)__CFStrContents(str
);
4091 memmove(contents
+ strLength
+ __CFStrSkipAnyLengthByte(str
), cStr
, appendedLength
);
4096 if (freeCStrWhenDone
) CFAllocatorDeallocate(__CFGetDefaultAllocator(), (void *)cStr
);
4099 void CFStringAppendPascalString(CFMutableStringRef str
, ConstStringPtr pStr
, CFStringEncoding encoding
) {
4100 __CFStringAppendBytes(str
, (const char *)(pStr
+ 1), (CFIndex
)*pStr
, encoding
);
4103 void CFStringAppendCString(CFMutableStringRef str
, const char *cStr
, CFStringEncoding encoding
) {
4104 __CFStringAppendBytes(str
, cStr
, strlen(cStr
), encoding
);
4108 void CFStringAppendFormat(CFMutableStringRef str
, CFDictionaryRef formatOptions
, CFStringRef format
, ...) {
4111 va_start(argList
, format
);
4112 CFStringAppendFormatAndArguments(str
, formatOptions
, format
, argList
);
4117 CFIndex
CFStringFindAndReplace(CFMutableStringRef string
, CFStringRef stringToFind
, CFStringRef replacementString
, CFRange rangeToSearch
, CFOptionFlags compareOptions
) {
4119 Boolean backwards
= ((compareOptions
& kCFCompareBackwards
) != 0);
4120 UInt32 endIndex
= rangeToSearch
.location
+ rangeToSearch
.length
;
4121 #define MAX_RANGES_ON_STACK (1000 / sizeof(CFRange))
4122 CFRange rangeBuffer
[MAX_RANGES_ON_STACK
]; // Used to avoid allocating memory
4123 CFRange
*ranges
= rangeBuffer
;
4124 CFIndex foundCount
= 0;
4125 CFIndex capacity
= MAX_RANGES_ON_STACK
;
4127 __CFAssertIsStringAndMutable(string
);
4128 __CFAssertRangeIsInStringBounds(string
, rangeToSearch
.location
, rangeToSearch
.length
);
4130 // Note: This code is very similar to the one in CFStringCreateArrayWithFindResults().
4131 while ((rangeToSearch
.length
> 0) && CFStringFindWithOptions(string
, stringToFind
, rangeToSearch
, compareOptions
, &foundRange
)) {
4132 // Determine the next range
4134 rangeToSearch
.length
= foundRange
.location
- rangeToSearch
.location
;
4136 rangeToSearch
.location
= foundRange
.location
+ foundRange
.length
;
4137 rangeToSearch
.length
= endIndex
- rangeToSearch
.location
;
4140 // If necessary, grow the array
4141 if (foundCount
>= capacity
) {
4142 bool firstAlloc
= (ranges
== rangeBuffer
) ? true : false;
4143 capacity
= (capacity
+ 4) * 2;
4144 // Note that reallocate with NULL previous pointer is same as allocate
4145 ranges
= (CFRange
*)CFAllocatorReallocate(kCFAllocatorSystemDefault
, firstAlloc
? NULL
: ranges
, capacity
* sizeof(CFRange
), 0);
4146 if (firstAlloc
) memmove(ranges
, rangeBuffer
, MAX_RANGES_ON_STACK
* sizeof(CFRange
));
4148 ranges
[foundCount
] = foundRange
;
4152 if (foundCount
> 0) {
4153 if (backwards
) { // Reorder the ranges to be incrementing (better to do this here, then to check other places)
4155 int tail
= foundCount
- 1;
4156 while (head
< tail
) {
4157 CFRange temp
= ranges
[head
];
4158 ranges
[head
] = ranges
[tail
];
4159 ranges
[tail
] = temp
;
4164 __CFStringReplaceMultiple(string
, ranges
, foundCount
, replacementString
);
4165 if (ranges
!= rangeBuffer
) CFAllocatorDeallocate(kCFAllocatorSystemDefault
, ranges
);
4172 // This function is here for NSString purposes
4173 // It allows checking for mutability before mutating; this allows NSString to catch invalid mutations
4175 int __CFStringCheckAndReplace(CFMutableStringRef str
, CFRange range
, CFStringRef replacement
) {
4176 if (!__CFStrIsMutable(str
)) return _CFStringErrNotMutable
; // These three ifs are always here, for NSString usage
4177 if (!replacement
&& __CFStringNoteErrors()) return _CFStringErrNilArg
;
4178 // We use unsigneds as that is what NSRanges do; we use uint64_t do make sure the sum doesn't wrap (otherwise we'd need to do 3 separate checks). This allows catching bad ranges as described in 3375535. (-1,1)
4179 if (((uint64_t)((unsigned)range
.location
)) + ((uint64_t)((unsigned)range
.length
)) > (uint64_t)__CFStrLength(str
) && __CFStringNoteErrors()) return _CFStringErrBounds
;
4180 __CFAssertIsStringAndMutable(str
);
4181 __CFAssertRangeIsInStringBounds(str
, range
.location
, range
.length
);
4182 __CFStringReplace(str
, range
, replacement
);
4183 return _CFStringErrNone
;
4186 // This function determines whether errors which would cause string exceptions should
4187 // be ignored or not
4189 Boolean
__CFStringNoteErrors(void) {
4195 void CFStringPad(CFMutableStringRef string
, CFStringRef padString
, CFIndex length
, CFIndex indexIntoPad
) {
4196 CFIndex originalLength
;
4198 __CFAssertIsNotNegative(length
);
4199 __CFAssertIsNotNegative(indexIntoPad
);
4201 CF_OBJC_FUNCDISPATCH3(__kCFStringTypeID
, void, string
, "_cfPad:length:padIndex:", padString
, length
, indexIntoPad
);
4203 __CFAssertIsStringAndMutable(string
);
4205 originalLength
= __CFStrLength(string
);
4206 if (length
< originalLength
) {
4207 __CFStringChangeSize(string
, CFRangeMake(length
, originalLength
- length
), 0, false);
4208 } else if (originalLength
< length
) {
4212 CFIndex padStringLength
;
4214 CFIndex padRemaining
= length
- originalLength
;
4216 if (CF_IS_OBJC(__kCFStringTypeID
, padString
)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
4217 padStringLength
= CFStringGetLength(padString
);
4218 isUnicode
= true; /* !!! Bad for now */
4220 __CFAssertIsString(padString
);
4221 padStringLength
= __CFStrLength(padString
);
4222 isUnicode
= __CFStrIsUnicode(string
) || __CFStrIsUnicode(padString
);
4225 charSize
= isUnicode
? sizeof(UniChar
) : sizeof(uint8_t);
4227 __CFStringChangeSize(string
, CFRangeMake(originalLength
, 0), padRemaining
, isUnicode
);
4229 contents
= (uint8_t *)__CFStrContents(string
) + charSize
* originalLength
+ __CFStrSkipAnyLengthByte(string
);
4230 padLength
= padStringLength
- indexIntoPad
;
4231 padLength
= padRemaining
< padLength
? padRemaining
: padLength
;
4233 while (padRemaining
> 0) {
4235 CFStringGetCharacters(padString
, CFRangeMake(indexIntoPad
, padLength
), (UniChar
*)contents
);
4237 CFStringGetBytes(padString
, CFRangeMake(indexIntoPad
, padLength
), __CFStringGetEightBitStringEncoding(), 0, false, contents
, padRemaining
* charSize
, NULL
);
4239 contents
+= padLength
* charSize
;
4240 padRemaining
-= padLength
;
4242 padLength
= padRemaining
< padLength
? padRemaining
: padStringLength
;
4247 void CFStringTrim(CFMutableStringRef string
, CFStringRef trimString
) {
4249 CFIndex newStartIndex
;
4252 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID
, void, string
, "_cfTrim:", trimString
);
4254 __CFAssertIsStringAndMutable(string
);
4255 __CFAssertIsString(trimString
);
4258 length
= __CFStrLength(string
);
4260 while (CFStringFindWithOptions(string
, trimString
, CFRangeMake(newStartIndex
, length
- newStartIndex
), kCFCompareAnchored
, &range
)) {
4261 newStartIndex
= range
.location
+ range
.length
;
4264 if (newStartIndex
< length
) {
4265 CFIndex charSize
= __CFStrIsUnicode(string
) ? sizeof(UniChar
) : sizeof(uint8_t);
4266 uint8_t *contents
= (uint8_t *)__CFStrContents(string
) + __CFStrSkipAnyLengthByte(string
);
4268 length
-= newStartIndex
;
4269 if (__CFStrLength(trimString
) < length
) {
4270 while (CFStringFindWithOptions(string
, trimString
, CFRangeMake(newStartIndex
, length
), kCFCompareAnchored
|kCFCompareBackwards
, &range
)) {
4271 length
= range
.location
- newStartIndex
;
4274 memmove(contents
, contents
+ newStartIndex
* charSize
, length
* charSize
);
4275 __CFStringChangeSize(string
, CFRangeMake(length
, __CFStrLength(string
) - length
), 0, false);
4276 } else { // Only trimString in string, trim all
4277 __CFStringChangeSize(string
, CFRangeMake(0, length
), 0, false);
4281 void CFStringTrimWhitespace(CFMutableStringRef string
) {
4282 CFIndex newStartIndex
;
4284 CFStringInlineBuffer buffer
;
4286 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID
, void, string
, "_cfTrimWS");
4288 __CFAssertIsStringAndMutable(string
);
4291 length
= __CFStrLength(string
);
4293 CFStringInitInlineBuffer(string
, &buffer
, CFRangeMake(0, length
));
4294 CFIndex buffer_idx
= 0;
4296 while (buffer_idx
< length
&& CFUniCharIsMemberOf(__CFStringGetCharacterFromInlineBufferQuick(&buffer
, buffer_idx
), kCFUniCharWhitespaceAndNewlineCharacterSet
))
4298 newStartIndex
= buffer_idx
;
4300 if (newStartIndex
< length
) {
4301 uint8_t *contents
= (uint8_t *)__CFStrContents(string
) + __CFStrSkipAnyLengthByte(string
);
4302 CFIndex charSize
= (__CFStrIsUnicode(string
) ? sizeof(UniChar
) : sizeof(uint8_t));
4304 buffer_idx
= length
- 1;
4305 while (0 <= buffer_idx
&& CFUniCharIsMemberOf(__CFStringGetCharacterFromInlineBufferQuick(&buffer
, buffer_idx
), kCFUniCharWhitespaceAndNewlineCharacterSet
))
4307 length
= buffer_idx
- newStartIndex
+ 1;
4309 memmove(contents
, contents
+ newStartIndex
* charSize
, length
* charSize
);
4310 __CFStringChangeSize(string
, CFRangeMake(length
, __CFStrLength(string
) - length
), 0, false);
4311 } else { // Whitespace only string
4312 __CFStringChangeSize(string
, CFRangeMake(0, length
), 0, false);
4316 void CFStringLowercase(CFMutableStringRef string
, CFLocaleRef locale
) {
4317 CFIndex currentIndex
= 0;
4319 const uint8_t *langCode
;
4320 Boolean isEightBit
= __CFStrIsEightBit(string
);
4322 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID
, void, string
, "_cfLowercase:", locale
);
4324 __CFAssertIsStringAndMutable(string
);
4326 length
= __CFStrLength(string
);
4328 langCode
= (const uint8_t *)(_CFCanUseLocale(locale
) ? _CFStrGetLanguageIdentifierForLocale(locale
) : NULL
);
4330 if (!langCode
&& isEightBit
) {
4331 uint8_t *contents
= (uint8_t *)__CFStrContents(string
) + __CFStrSkipAnyLengthByte(string
);
4332 for (;currentIndex
< length
;currentIndex
++) {
4333 if (contents
[currentIndex
] >= 'A' && contents
[currentIndex
] <= 'Z') {
4334 contents
[currentIndex
] += 'a' - 'A';
4335 } else if (contents
[currentIndex
] > 127) {
4341 if (currentIndex
< length
) {
4342 UTF16Char
*contents
;
4343 UniChar mappedCharacters
[MAX_CASE_MAPPING_BUF
];
4344 CFIndex mappedLength
;
4345 UTF32Char currentChar
;
4348 if (isEightBit
) __CFStringChangeSize(string
, CFRangeMake(0, 0), 0, true);
4350 contents
= (UniChar
*)__CFStrContents(string
);
4352 for (;currentIndex
< length
;currentIndex
++) {
4354 if (CFUniCharIsSurrogateHighCharacter(contents
[currentIndex
]) && (currentIndex
+ 1 < length
) && CFUniCharIsSurrogateLowCharacter(contents
[currentIndex
+ 1])) {
4355 currentChar
= CFUniCharGetLongCharacterForSurrogatePair(contents
[currentIndex
], contents
[currentIndex
+ 1]);
4357 currentChar
= contents
[currentIndex
];
4359 flags
= ((langCode
|| (currentChar
== 0x03A3)) ? CFUniCharGetConditionalCaseMappingFlags(currentChar
, contents
, currentIndex
, length
, kCFUniCharToLowercase
, langCode
, flags
) : 0);
4361 mappedLength
= CFUniCharMapCaseTo(currentChar
, mappedCharacters
, MAX_CASE_MAPPING_BUF
, kCFUniCharToLowercase
, flags
, langCode
);
4362 if (mappedLength
> 0) contents
[currentIndex
] = *mappedCharacters
;
4364 if (currentChar
> 0xFFFF) { // Non-BMP char
4365 switch (mappedLength
) {
4367 __CFStringChangeSize(string
, CFRangeMake(currentIndex
, 2), 0, true);
4368 contents
= (UniChar
*)__CFStrContents(string
);
4373 __CFStringChangeSize(string
, CFRangeMake(currentIndex
+ 1, 1), 0, true);
4374 contents
= (UniChar
*)__CFStrContents(string
);
4379 contents
[++currentIndex
] = mappedCharacters
[1];
4383 --mappedLength
; // Skip the current char
4384 __CFStringChangeSize(string
, CFRangeMake(currentIndex
+ 1, 0), mappedLength
- 1, true);
4385 contents
= (UniChar
*)__CFStrContents(string
);
4386 memmove(contents
+ currentIndex
+ 1, mappedCharacters
+ 1, mappedLength
* sizeof(UniChar
));
4387 length
+= (mappedLength
- 1);
4388 currentIndex
+= mappedLength
;
4391 } else if (mappedLength
== 0) {
4392 __CFStringChangeSize(string
, CFRangeMake(currentIndex
, 1), 0, true);
4393 contents
= (UniChar
*)__CFStrContents(string
);
4395 } else if (mappedLength
> 1) {
4396 --mappedLength
; // Skip the current char
4397 __CFStringChangeSize(string
, CFRangeMake(currentIndex
+ 1, 0), mappedLength
, true);
4398 contents
= (UniChar
*)__CFStrContents(string
);
4399 memmove(contents
+ currentIndex
+ 1, mappedCharacters
+ 1, mappedLength
* sizeof(UniChar
));
4400 length
+= mappedLength
;
4401 currentIndex
+= mappedLength
;
4407 void CFStringUppercase(CFMutableStringRef string
, CFLocaleRef locale
) {
4408 CFIndex currentIndex
= 0;
4410 const uint8_t *langCode
;
4411 Boolean isEightBit
= __CFStrIsEightBit(string
);
4413 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID
, void, string
, "_cfUppercase:", locale
);
4415 __CFAssertIsStringAndMutable(string
);
4417 length
= __CFStrLength(string
);
4419 langCode
= (const uint8_t *)(_CFCanUseLocale(locale
) ? _CFStrGetLanguageIdentifierForLocale(locale
) : NULL
);
4421 if (!langCode
&& isEightBit
) {
4422 uint8_t *contents
= (uint8_t *)__CFStrContents(string
) + __CFStrSkipAnyLengthByte(string
);
4423 for (;currentIndex
< length
;currentIndex
++) {
4424 if (contents
[currentIndex
] >= 'a' && contents
[currentIndex
] <= 'z') {
4425 contents
[currentIndex
] -= 'a' - 'A';
4426 } else if (contents
[currentIndex
] > 127) {
4432 if (currentIndex
< length
) {
4434 UniChar mappedCharacters
[MAX_CASE_MAPPING_BUF
];
4435 CFIndex mappedLength
;
4436 UTF32Char currentChar
;
4439 if (isEightBit
) __CFStringChangeSize(string
, CFRangeMake(0, 0), 0, true);
4441 contents
= (UniChar
*)__CFStrContents(string
);
4443 for (;currentIndex
< length
;currentIndex
++) {
4444 if (CFUniCharIsSurrogateHighCharacter(contents
[currentIndex
]) && (currentIndex
+ 1 < length
) && CFUniCharIsSurrogateLowCharacter(contents
[currentIndex
+ 1])) {
4445 currentChar
= CFUniCharGetLongCharacterForSurrogatePair(contents
[currentIndex
], contents
[currentIndex
+ 1]);
4447 currentChar
= contents
[currentIndex
];
4450 flags
= (langCode
? CFUniCharGetConditionalCaseMappingFlags(currentChar
, contents
, currentIndex
, length
, kCFUniCharToUppercase
, langCode
, flags
) : 0);
4452 mappedLength
= CFUniCharMapCaseTo(currentChar
, mappedCharacters
, MAX_CASE_MAPPING_BUF
, kCFUniCharToUppercase
, flags
, langCode
);
4453 if (mappedLength
> 0) contents
[currentIndex
] = *mappedCharacters
;
4455 if (currentChar
> 0xFFFF) { // Non-BMP char
4456 switch (mappedLength
) {
4458 __CFStringChangeSize(string
, CFRangeMake(currentIndex
, 2), 0, true);
4459 contents
= (UniChar
*)__CFStrContents(string
);
4464 __CFStringChangeSize(string
, CFRangeMake(currentIndex
+ 1, 1), 0, true);
4465 contents
= (UniChar
*)__CFStrContents(string
);
4470 contents
[++currentIndex
] = mappedCharacters
[1];
4474 --mappedLength
; // Skip the current char
4475 __CFStringChangeSize(string
, CFRangeMake(currentIndex
+ 1, 0), mappedLength
- 1, true);
4476 contents
= (UniChar
*)__CFStrContents(string
);
4477 memmove(contents
+ currentIndex
+ 1, mappedCharacters
+ 1, mappedLength
* sizeof(UniChar
));
4478 length
+= (mappedLength
- 1);
4479 currentIndex
+= mappedLength
;
4482 } else if (mappedLength
== 0) {
4483 __CFStringChangeSize(string
, CFRangeMake(currentIndex
, 1), 0, true);
4484 contents
= (UniChar
*)__CFStrContents(string
);
4486 } else if (mappedLength
> 1) {
4487 --mappedLength
; // Skip the current char
4488 __CFStringChangeSize(string
, CFRangeMake(currentIndex
+ 1, 0), mappedLength
, true);
4489 contents
= (UniChar
*)__CFStrContents(string
);
4490 memmove(contents
+ currentIndex
+ 1, mappedCharacters
+ 1, mappedLength
* sizeof(UniChar
));
4491 length
+= mappedLength
;
4492 currentIndex
+= mappedLength
;
4499 void CFStringCapitalize(CFMutableStringRef string
, CFLocaleRef locale
) {
4500 CFIndex currentIndex
= 0;
4502 const uint8_t *langCode
;
4503 Boolean isEightBit
= __CFStrIsEightBit(string
);
4504 Boolean isLastCased
= false;
4505 const uint8_t *caseIgnorableForBMP
;
4507 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID
, void, string
, "_cfCapitalize:", locale
);
4509 __CFAssertIsStringAndMutable(string
);
4511 length
= __CFStrLength(string
);
4513 caseIgnorableForBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharCaseIgnorableCharacterSet
, 0);
4515 langCode
= (const uint8_t *)(_CFCanUseLocale(locale
) ? _CFStrGetLanguageIdentifierForLocale(locale
) : NULL
);
4517 if (!langCode
&& isEightBit
) {
4518 uint8_t *contents
= (uint8_t *)__CFStrContents(string
) + __CFStrSkipAnyLengthByte(string
);
4519 for (;currentIndex
< length
;currentIndex
++) {
4520 if (contents
[currentIndex
] > 127) {
4522 } else if (contents
[currentIndex
] >= 'A' && contents
[currentIndex
] <= 'Z') {
4523 contents
[currentIndex
] += (isLastCased
? 'a' - 'A' : 0);
4525 } else if (contents
[currentIndex
] >= 'a' && contents
[currentIndex
] <= 'z') {
4526 contents
[currentIndex
] -= (!isLastCased
? 'a' - 'A' : 0);
4528 } else if (!CFUniCharIsMemberOfBitmap(contents
[currentIndex
], caseIgnorableForBMP
)) {
4529 isLastCased
= false;
4534 if (currentIndex
< length
) {
4536 UniChar mappedCharacters
[MAX_CASE_MAPPING_BUF
];
4537 CFIndex mappedLength
;
4538 UTF32Char currentChar
;
4541 if (isEightBit
) __CFStringChangeSize(string
, CFRangeMake(0, 0), 0, true);
4543 contents
= (UniChar
*)__CFStrContents(string
);
4545 for (;currentIndex
< length
;currentIndex
++) {
4546 if (CFUniCharIsSurrogateHighCharacter(contents
[currentIndex
]) && (currentIndex
+ 1 < length
) && CFUniCharIsSurrogateLowCharacter(contents
[currentIndex
+ 1])) {
4547 currentChar
= CFUniCharGetLongCharacterForSurrogatePair(contents
[currentIndex
], contents
[currentIndex
+ 1]);
4549 currentChar
= contents
[currentIndex
];
4551 flags
= ((langCode
|| ((currentChar
== 0x03A3) && isLastCased
)) ? CFUniCharGetConditionalCaseMappingFlags(currentChar
, contents
, currentIndex
, length
, (isLastCased
? kCFUniCharToLowercase
: kCFUniCharToTitlecase
), langCode
, flags
) : 0);
4553 mappedLength
= CFUniCharMapCaseTo(currentChar
, mappedCharacters
, MAX_CASE_MAPPING_BUF
, (isLastCased
? kCFUniCharToLowercase
: kCFUniCharToTitlecase
), flags
, langCode
);
4554 if (mappedLength
> 0) contents
[currentIndex
] = *mappedCharacters
;
4556 if (currentChar
> 0xFFFF) { // Non-BMP char
4557 switch (mappedLength
) {
4559 __CFStringChangeSize(string
, CFRangeMake(currentIndex
, 2), 0, true);
4560 contents
= (UniChar
*)__CFStrContents(string
);
4565 __CFStringChangeSize(string
, CFRangeMake(currentIndex
+ 1, 1), 0, true);
4566 contents
= (UniChar
*)__CFStrContents(string
);
4571 contents
[++currentIndex
] = mappedCharacters
[1];
4575 --mappedLength
; // Skip the current char
4576 __CFStringChangeSize(string
, CFRangeMake(currentIndex
+ 1, 0), mappedLength
- 1, true);
4577 contents
= (UniChar
*)__CFStrContents(string
);
4578 memmove(contents
+ currentIndex
+ 1, mappedCharacters
+ 1, mappedLength
* sizeof(UniChar
));
4579 length
+= (mappedLength
- 1);
4580 currentIndex
+= mappedLength
;
4583 } else if (mappedLength
== 0) {
4584 __CFStringChangeSize(string
, CFRangeMake(currentIndex
, 1), 0, true);
4585 contents
= (UniChar
*)__CFStrContents(string
);
4587 } else if (mappedLength
> 1) {
4588 --mappedLength
; // Skip the current char
4589 __CFStringChangeSize(string
, CFRangeMake(currentIndex
+ 1, 0), mappedLength
, true);
4590 contents
= (UniChar
*)__CFStrContents(string
);
4591 memmove(contents
+ currentIndex
+ 1, mappedCharacters
+ 1, mappedLength
* sizeof(UniChar
));
4592 length
+= mappedLength
;
4593 currentIndex
+= mappedLength
;
4596 if (!((currentChar
> 0xFFFF) ? CFUniCharIsMemberOf(currentChar
, kCFUniCharCaseIgnorableCharacterSet
) : CFUniCharIsMemberOfBitmap(currentChar
, caseIgnorableForBMP
))) { // We have non-caseignorable here
4597 isLastCased
= ((CFUniCharIsMemberOf(currentChar
, kCFUniCharUppercaseLetterCharacterSet
) || CFUniCharIsMemberOf(currentChar
, kCFUniCharLowercaseLetterCharacterSet
)) ? true : false);
4604 #define MAX_DECOMP_BUF 64
4606 #define HANGUL_SBASE 0xAC00
4607 #define HANGUL_LBASE 0x1100
4608 #define HANGUL_VBASE 0x1161
4609 #define HANGUL_TBASE 0x11A7
4610 #define HANGUL_SCOUNT 11172
4611 #define HANGUL_LCOUNT 19
4612 #define HANGUL_VCOUNT 21
4613 #define HANGUL_TCOUNT 28
4614 #define HANGUL_NCOUNT (HANGUL_VCOUNT * HANGUL_TCOUNT)
4616 CF_INLINE
uint32_t __CFGetUTF16Length(const UTF32Char
*characters
, uint32_t utf32Length
) {
4617 const UTF32Char
*limit
= characters
+ utf32Length
;
4618 uint32_t length
= 0;
4620 while (characters
< limit
) length
+= (*(characters
++) > 0xFFFF ? 2 : 1);
4625 CF_INLINE
void __CFFillInUTF16(const UTF32Char
*characters
, UTF16Char
*dst
, uint32_t utf32Length
) {
4626 const UTF32Char
*limit
= characters
+ utf32Length
;
4627 UTF32Char currentChar
;
4629 while (characters
< limit
) {
4630 currentChar
= *(characters
++);
4631 if (currentChar
> 0xFFFF) {
4632 currentChar
-= 0x10000;
4633 *(dst
++) = (UTF16Char
)((currentChar
>> 10) + 0xD800UL
);
4634 *(dst
++) = (UTF16Char
)((currentChar
& 0x3FF) + 0xDC00UL
);
4636 *(dst
++) = currentChar
;
4641 void CFStringNormalize(CFMutableStringRef string
, CFStringNormalizationForm theForm
) {
4642 CFIndex currentIndex
= 0;
4644 bool needToReorder
= true;
4646 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID
, void, string
, "_cfNormalize:", theForm
);
4648 __CFAssertIsStringAndMutable(string
);
4650 length
= __CFStrLength(string
);
4652 if (__CFStrIsEightBit(string
)) {
4655 if (theForm
== kCFStringNormalizationFormC
) return; // 8bit form has no decomposition
4657 contents
= (uint8_t *)__CFStrContents(string
) + __CFStrSkipAnyLengthByte(string
);
4659 for (;currentIndex
< length
;currentIndex
++) {
4660 if (contents
[currentIndex
] > 127) {
4661 __CFStringChangeSize(string
, CFRangeMake(0, 0), 0, true); // need to do harm way
4662 needToReorder
= false;
4668 if (currentIndex
< length
) {
4669 UTF16Char
*limit
= (UTF16Char
*)__CFStrContents(string
) + length
;
4670 UTF16Char
*contents
= (UTF16Char
*)__CFStrContents(string
) + currentIndex
;
4671 UTF32Char buffer
[MAX_DECOMP_BUF
];
4672 UTF32Char
*mappedCharacters
= buffer
;
4673 CFIndex allocatedLength
= MAX_DECOMP_BUF
;
4674 CFIndex mappedLength
;
4675 CFIndex currentLength
;
4676 UTF32Char currentChar
;
4677 const uint8_t *decompBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet
, 0);
4678 const uint8_t *nonBaseBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet
, 0);
4679 const uint8_t *combiningBMP
= (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty
, 0);
4681 while (contents
< limit
) {
4682 if (CFUniCharIsSurrogateHighCharacter(*contents
) && (contents
+ 1 < limit
) && CFUniCharIsSurrogateLowCharacter(*(contents
+ 1))) {
4683 currentChar
= CFUniCharGetLongCharacterForSurrogatePair(*contents
, *(contents
+ 1));
4687 currentChar
= *(contents
++);
4693 if (CFUniCharIsMemberOfBitmap(currentChar
, ((currentChar
< 0x10000) ? decompBMP
: CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet
, (currentChar
>> 16)))) && (0 == CFUniCharGetCombiningPropertyForCharacter(currentChar
, ((currentChar
< 0x10000) ? combiningBMP
: (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty
, (currentChar
>> 16)))))) {
4694 if ((theForm
& kCFStringNormalizationFormC
) == 0 || currentChar
< HANGUL_SBASE
|| currentChar
> (HANGUL_SBASE
+ HANGUL_SCOUNT
)) { // We don't have to decompose Hangul Syllables if we're precomposing again
4695 mappedLength
= CFUniCharDecomposeCharacter(currentChar
, mappedCharacters
, MAX_DECOMP_BUF
);
4699 if ((needToReorder
|| (theForm
& kCFStringNormalizationFormC
)) && ((contents
< limit
) || (mappedLength
== 0))) {
4700 if (mappedLength
> 0) {
4701 if (CFUniCharIsSurrogateHighCharacter(*contents
) && (contents
+ 1 < limit
) && CFUniCharIsSurrogateLowCharacter(*(contents
+ 1))) {
4702 currentChar
= CFUniCharGetLongCharacterForSurrogatePair(*contents
, *(contents
+ 1));
4704 currentChar
= *contents
;
4708 if (0 != CFUniCharGetCombiningPropertyForCharacter(currentChar
, (const uint8_t *)((currentChar
< 0x10000) ? combiningBMP
: CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty
, (currentChar
>> 16))))) {
4709 uint32_t decompLength
;
4711 if (mappedLength
== 0) {
4712 contents
-= (currentChar
& 0xFFFF0000 ? 2 : 1);
4713 if (currentIndex
> 0) {
4714 if (CFUniCharIsSurrogateLowCharacter(*(contents
- 1)) && (currentIndex
> 1) && CFUniCharIsSurrogateHighCharacter(*(contents
- 2))) {
4715 *mappedCharacters
= CFUniCharGetLongCharacterForSurrogatePair(*(contents
- 2), *(contents
- 1));
4719 *mappedCharacters
= *(contents
- 1);
4726 currentLength
+= (currentChar
& 0xFFFF0000 ? 2 : 1);
4728 contents
+= (currentChar
& 0xFFFF0000 ? 2 : 1);
4730 if (CFUniCharIsMemberOfBitmap(currentChar
, ((currentChar
< 0x10000) ? decompBMP
: CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet
, (currentChar
>> 16))))) { // Vietnamese accent, etc.
4731 decompLength
= CFUniCharDecomposeCharacter(currentChar
, mappedCharacters
+ mappedLength
, MAX_DECOMP_BUF
- mappedLength
);
4732 mappedLength
+= decompLength
;
4734 mappedCharacters
[mappedLength
++] = currentChar
;
4737 while (contents
< limit
) {
4738 if (CFUniCharIsSurrogateHighCharacter(*contents
) && (contents
+ 1 < limit
) && CFUniCharIsSurrogateLowCharacter(*(contents
+ 1))) {
4739 currentChar
= CFUniCharGetLongCharacterForSurrogatePair(*contents
, *(contents
+ 1));
4741 currentChar
= *contents
;
4743 if (0 == CFUniCharGetCombiningPropertyForCharacter(currentChar
, (const uint8_t *)((currentChar
< 0x10000) ? combiningBMP
: CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty
, (currentChar
>> 16))))) break;
4744 if (currentChar
& 0xFFFF0000) {
4751 if (mappedLength
== allocatedLength
) {
4752 allocatedLength
+= MAX_DECOMP_BUF
;
4753 if (mappedCharacters
== buffer
) {
4754 mappedCharacters
= (UTF32Char
*)CFAllocatorAllocate(kCFAllocatorSystemDefault
, allocatedLength
* sizeof(UTF32Char
), 0);
4755 memmove(mappedCharacters
, buffer
, MAX_DECOMP_BUF
* sizeof(UTF32Char
));
4757 mappedCharacters
= (UTF32Char
*)CFAllocatorReallocate(kCFAllocatorSystemDefault
, mappedCharacters
, allocatedLength
* sizeof(UTF32Char
), 0);
4760 if (CFUniCharIsMemberOfBitmap(currentChar
, ((currentChar
< 0x10000) ? decompBMP
: CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet
, (currentChar
>> 16))))) { // Vietnamese accent, etc.
4761 decompLength
= CFUniCharDecomposeCharacter(currentChar
, mappedCharacters
+ mappedLength
, MAX_DECOMP_BUF
- mappedLength
);
4762 mappedLength
+= decompLength
;
4764 mappedCharacters
[mappedLength
++] = currentChar
;
4768 if (needToReorder
&& mappedLength
> 1) CFUniCharPrioritySort(mappedCharacters
, mappedLength
);
4771 if (theForm
& kCFStringNormalizationFormKD
) {
4772 CFIndex newLength
= 0;
4774 if (mappedLength
== 0 && CFUniCharIsMemberOf(currentChar
, kCFUniCharCompatibilityDecomposableCharacterSet
)) {
4775 mappedCharacters
[mappedLength
++] = currentChar
;
4777 while (newLength
< mappedLength
) {
4778 newLength
= CFUniCharCompatibilityDecompose(mappedCharacters
, mappedLength
, allocatedLength
);
4779 if (newLength
== 0) {
4780 allocatedLength
+= MAX_DECOMP_BUF
;
4781 if (mappedCharacters
== buffer
) {
4782 mappedCharacters
= (UTF32Char
*)CFAllocatorAllocate(kCFAllocatorSystemDefault
, allocatedLength
* sizeof(UTF32Char
), 0);
4783 memmove(mappedCharacters
, buffer
, MAX_DECOMP_BUF
* sizeof(UTF32Char
));
4785 mappedCharacters
= (UTF32Char
*)CFAllocatorReallocate(kCFAllocatorSystemDefault
, mappedCharacters
, allocatedLength
* sizeof(UTF32Char
), 0);
4789 mappedLength
= newLength
;
4792 if (theForm
& kCFStringNormalizationFormC
) {
4795 if (mappedLength
> 1) {
4796 CFIndex consumedLength
= 1;
4797 UTF32Char
*currentBase
= mappedCharacters
;
4798 uint8_t currentClass
, lastClass
= 0;
4799 bool didCombine
= false;
4801 currentChar
= *mappedCharacters
;
4803 while (consumedLength
< mappedLength
) {
4804 nextChar
= mappedCharacters
[consumedLength
];
4805 currentClass
= CFUniCharGetCombiningPropertyForCharacter(nextChar
, (const uint8_t *)((nextChar
< 0x10000) ? combiningBMP
: CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty
, (nextChar
>> 16))));
4807 if (theForm
& kCFStringNormalizationFormKD
) {
4808 if ((currentChar
>= HANGUL_LBASE
) && (currentChar
< (HANGUL_LBASE
+ 0xFF))) {
4809 SInt8 lIndex
= currentChar
- HANGUL_LBASE
;
4811 if ((0 <= lIndex
) && (lIndex
<= HANGUL_LCOUNT
)) {
4812 SInt16 vIndex
= nextChar
- HANGUL_VBASE
;
4814 if ((vIndex
>= 0) && (vIndex
<= HANGUL_VCOUNT
)) {
4816 CFIndex usedLength
= mappedLength
;
4818 mappedCharacters
[consumedLength
++] = 0xFFFD;
4820 if (consumedLength
< mappedLength
) {
4821 tIndex
= mappedCharacters
[consumedLength
] - HANGUL_TBASE
;
4822 if ((tIndex
< 0) || (tIndex
> HANGUL_TCOUNT
)) {
4825 mappedCharacters
[consumedLength
++] = 0xFFFD;
4828 *currentBase
= (lIndex
* HANGUL_VCOUNT
+ vIndex
) * HANGUL_TCOUNT
+ tIndex
+ HANGUL_SBASE
;
4830 while (--usedLength
> 0) {
4831 if (mappedCharacters
[usedLength
] == 0xFFFD) {
4834 memmove(mappedCharacters
+ usedLength
, mappedCharacters
+ usedLength
+ 1, (mappedLength
- usedLength
) * sizeof(UTF32Char
));
4837 currentBase
= mappedCharacters
+ consumedLength
;
4838 currentChar
= *currentBase
;
4845 if (!CFUniCharIsMemberOfBitmap(nextChar
, ((nextChar
< 0x10000) ? nonBaseBMP
: CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet
, (nextChar
>> 16))))) {
4846 *currentBase
= currentChar
;
4847 currentBase
= mappedCharacters
+ consumedLength
;
4848 currentChar
= nextChar
;
4854 if ((lastClass
== 0) || (currentClass
> lastClass
)) {
4855 nextChar
= CFUniCharPrecomposeCharacter(currentChar
, nextChar
);
4856 if (nextChar
== 0xFFFD) {
4857 lastClass
= currentClass
;
4859 mappedCharacters
[consumedLength
] = 0xFFFD;
4861 currentChar
= nextChar
;
4867 *currentBase
= currentChar
;
4869 consumedLength
= mappedLength
;
4870 while (--consumedLength
> 0) {
4871 if (mappedCharacters
[consumedLength
] == 0xFFFD) {
4873 memmove(mappedCharacters
+ consumedLength
, mappedCharacters
+ consumedLength
+ 1, (mappedLength
- consumedLength
) * sizeof(UTF32Char
));
4877 } else if ((currentChar
>= HANGUL_LBASE
) && (currentChar
< (HANGUL_LBASE
+ 0xFF))) { // Hangul Jamo
4878 SInt8 lIndex
= currentChar
- HANGUL_LBASE
;
4880 if ((contents
< limit
) && (0 <= lIndex
) && (lIndex
<= HANGUL_LCOUNT
)) {
4881 SInt16 vIndex
= *contents
- HANGUL_VBASE
;
4883 if ((vIndex
>= 0) && (vIndex
<= HANGUL_VCOUNT
)) {
4886 ++contents
; ++currentLength
;
4888 if (contents
< limit
) {
4889 tIndex
= *contents
- HANGUL_TBASE
;
4890 if ((tIndex
< 0) || (tIndex
> HANGUL_TCOUNT
)) {
4893 ++contents
; ++currentLength
;
4896 *mappedCharacters
= (lIndex
* HANGUL_VCOUNT
+ vIndex
) * HANGUL_TCOUNT
+ tIndex
+ HANGUL_SBASE
;
4900 } else { // collect class 0 non-base characters
4901 while (contents
< limit
) {
4902 nextChar
= *contents
;
4903 if (CFUniCharIsSurrogateHighCharacter(nextChar
) && ((contents
+ 1) < limit
) && CFUniCharIsSurrogateLowCharacter(*(contents
+ 1))) {
4904 nextChar
= CFUniCharGetLongCharacterForSurrogatePair(nextChar
, *(contents
+ 1));
4905 if (!CFUniCharIsMemberOfBitmap(nextChar
, (const uint8_t *)CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet
, (nextChar
>> 16))) || (0 != CFUniCharGetCombiningPropertyForCharacter(nextChar
, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty
, (nextChar
>> 16))))) break;
4907 if (!CFUniCharIsMemberOfBitmap(nextChar
, nonBaseBMP
) || (0 != CFUniCharGetCombiningPropertyForCharacter(nextChar
, combiningBMP
))) break;
4909 currentChar
= CFUniCharPrecomposeCharacter(currentChar
, nextChar
);
4910 if (0xFFFD == currentChar
) break;
4912 if (nextChar
< 0x10000) {
4913 ++contents
; ++currentLength
;
4919 *mappedCharacters
= currentChar
;
4925 if (mappedLength
> 0) {
4926 CFIndex utf16Length
= __CFGetUTF16Length(mappedCharacters
, mappedLength
);
4928 if (utf16Length
!= currentLength
) {
4929 __CFStringChangeSize(string
, CFRangeMake(currentIndex
, currentLength
), utf16Length
, true);
4930 currentLength
= utf16Length
;
4932 contents
= (UTF16Char
*)__CFStrContents(string
);
4933 limit
= contents
+ __CFStrLength(string
);
4934 contents
+= currentIndex
;
4935 __CFFillInUTF16(mappedCharacters
, contents
, mappedLength
);
4936 contents
+= utf16Length
;
4938 currentIndex
+= currentLength
;
4941 if (mappedCharacters
!= buffer
) CFAllocatorDeallocate(kCFAllocatorSystemDefault
, mappedCharacters
);
4945 void CFStringFold(CFMutableStringRef theString
, CFStringCompareFlags theFlags
, CFLocaleRef locale
) {
4946 CFStringInlineBuffer stringBuffer
;
4947 CFIndex length
= CFStringGetLength(theString
);
4948 CFIndex currentIndex
= 0;
4949 CFIndex bufferLength
= 0;
4950 UTF32Char buffer
[kCFStringStackBufferLength
];
4951 const uint8_t *cString
;
4952 const uint8_t *langCode
;
4953 CFStringEncoding eightBitEncoding
;
4954 bool caseInsensitive
= ((theFlags
& kCFCompareCaseInsensitive
) ? true : false);
4955 bool isObjc
= CF_IS_OBJC(__kCFStringTypeID
, theString
);
4956 CFLocaleRef theLocale
= locale
;
4958 if ((theFlags
& kCFCompareLocalized
) && (NULL
== locale
)) {
4959 theLocale
= CFLocaleCopyCurrent();
4962 theFlags
&= (kCFCompareCaseInsensitive
|kCFCompareDiacriticInsensitive
|kCFCompareWidthInsensitive
);
4964 if ((0 == theFlags
) || (0 == length
)) goto bail
; // nothing to do
4966 langCode
= ((NULL
== theLocale
) ? NULL
: (const uint8_t *)_CFStrGetLanguageIdentifierForLocale(theLocale
));
4968 eightBitEncoding
= __CFStringGetEightBitStringEncoding();
4969 cString
= (const uint8_t *)CFStringGetCStringPtr(theString
, eightBitEncoding
);
4971 if ((NULL
!= cString
) && !caseInsensitive
&& (kCFStringEncodingASCII
== eightBitEncoding
)) goto bail
; // All ASCII
4973 CFStringInitInlineBuffer(theString
, &stringBuffer
, CFRangeMake(0, length
));
4975 if ((NULL
!= cString
) && (theFlags
& (kCFCompareCaseInsensitive
|kCFCompareDiacriticInsensitive
))) {
4976 const uint8_t *cStringPtr
= cString
;
4977 const uint8_t *cStringLimit
= cString
+ length
;
4978 uint8_t *cStringContents
= (isObjc
? NULL
: (uint8_t *)__CFStrContents(theString
) + __CFStrSkipAnyLengthByte(theString
));
4980 while (cStringPtr
< cStringLimit
) {
4981 if ((*cStringPtr
< 0x80) && (NULL
== langCode
)) {
4982 if (caseInsensitive
&& (*cStringPtr
>= 'A') && (*cStringPtr
<= 'Z')) {
4983 if (NULL
== cStringContents
) {
4986 cStringContents
[cStringPtr
- cString
] += ('a' - 'A');
4990 if ((bufferLength
= __CFStringFoldCharacterClusterAtIndex((UTF32Char
)__CFCharToUniCharTable
[*cStringPtr
], &stringBuffer
, cStringPtr
- cString
, theFlags
, langCode
, buffer
, kCFStringStackBufferLength
, NULL
)) > 0) {
4991 if ((*buffer
> 0x7F) || (bufferLength
> 1) || (NULL
== cStringContents
)) break;
4992 cStringContents
[cStringPtr
- cString
] = *buffer
;
4998 currentIndex
= cStringPtr
- cString
;
5001 if (currentIndex
< length
) {
5002 UTF16Char
*contents
;
5005 CFMutableStringRef cfString
;
5006 CFRange range
= CFRangeMake(currentIndex
, length
- currentIndex
);
5008 contents
= (UTF16Char
*)CFAllocatorAllocate(kCFAllocatorSystemDefault
, sizeof(UTF16Char
) * range
.length
, 0);
5010 CFStringGetCharacters(theString
, range
, contents
);
5012 cfString
= CFStringCreateMutableWithExternalCharactersNoCopy(kCFAllocatorSystemDefault
, contents
, range
.length
, range
.length
, NULL
);
5014 CFStringFold(cfString
, theFlags
, theLocale
);
5016 CFStringReplace(theString
, range
, cfString
);
5018 CFRelease(cfString
);
5020 const UTF32Char
*characters
;
5021 const UTF32Char
*charactersLimit
;
5022 UTF32Char character
;
5023 CFIndex consumedLength
;
5027 if (bufferLength
> 0) {
5028 __CFStringChangeSize(theString
, CFRangeMake(currentIndex
+ 1, 0), bufferLength
- 1, true);
5029 length
= __CFStrLength(theString
);
5030 CFStringInitInlineBuffer(theString
, &stringBuffer
, CFRangeMake(0, length
));
5032 contents
= (UTF16Char
*)__CFStrContents(theString
) + currentIndex
;
5033 characters
= buffer
;
5034 charactersLimit
= characters
+ bufferLength
;
5035 while (characters
< charactersLimit
) *(contents
++) = (UTF16Char
)*(characters
++);
5039 while (currentIndex
< length
) {
5040 character
= __CFStringGetCharacterFromInlineBufferQuick(&stringBuffer
, currentIndex
);
5044 if ((NULL
== langCode
) && (character
< 0x80) && (0 == (theFlags
& kCFCompareDiacriticInsensitive
))) {
5045 if (caseInsensitive
&& (character
>= 'A') && (character
<= 'Z')) {
5048 *buffer
= character
+ ('a' - 'A');
5051 if (CFUniCharIsSurrogateHighCharacter(character
) && ((currentIndex
+ 1) < length
)) {
5052 UTF16Char lowSurrogate
= __CFStringGetCharacterFromInlineBufferQuick(&stringBuffer
, currentIndex
+ 1);
5053 if (CFUniCharIsSurrogateLowCharacter(lowSurrogate
)) character
= CFUniCharGetLongCharacterForSurrogatePair(character
, lowSurrogate
);
5056 bufferLength
= __CFStringFoldCharacterClusterAtIndex(character
, &stringBuffer
, currentIndex
, theFlags
, langCode
, buffer
, kCFStringStackBufferLength
, &consumedLength
);
5059 if (consumedLength
> 0) {
5060 CFIndex utf16Length
= bufferLength
;
5062 characters
= buffer
;
5063 charactersLimit
= characters
+ bufferLength
;
5065 while (characters
< charactersLimit
) if (*(characters
++) > 0xFFFF) ++utf16Length
; // Extend bufferLength to the UTF-16 length
5067 if ((utf16Length
!= consumedLength
) || __CFStrIsEightBit(theString
)) {
5069 CFIndex insertLength
;
5071 if (consumedLength
< utf16Length
) { // Need to expand
5072 range
= CFRangeMake(currentIndex
+ consumedLength
, 0);
5073 insertLength
= utf16Length
- consumedLength
;
5075 range
= CFRangeMake(currentIndex
+ utf16Length
, consumedLength
- utf16Length
);
5078 __CFStringChangeSize(theString
, range
, insertLength
, true);
5079 length
= __CFStrLength(theString
);
5080 CFStringInitInlineBuffer(theString
, &stringBuffer
, CFRangeMake(0, length
));
5083 (void)CFUniCharFromUTF32(buffer
, bufferLength
, (UTF16Char
*)__CFStrContents(theString
) + currentIndex
, true, __CF_BIG_ENDIAN__
);
5085 currentIndex
+= utf16Length
;
5094 if (NULL
== locale
&& theLocale
) {
5095 CFRelease(theLocale
);
5100 kCFStringFormatZeroFlag
= (1 << 0), // if not, padding is space char
5101 kCFStringFormatMinusFlag
= (1 << 1), // if not, no flag implied
5102 kCFStringFormatPlusFlag
= (1 << 2), // if not, no flag implied, overrides space
5103 kCFStringFormatSpaceFlag
= (1 << 3) // if not, no flag implied
5126 #if LONG_DOUBLE_SUPPORT
5127 long double longDoubleValue
;
5134 CFFormatDefaultSize
= 0,
5141 CFFormatSizeLong
= CFFormatSize8
,
5142 CFFormatSizePointer
= CFFormatSize8
5144 CFFormatSizeLong
= CFFormatSize4
,
5145 CFFormatSizePointer
= CFFormatSize4
5152 CFFormatLiteralType
= 32,
5153 CFFormatLongType
= 33,
5154 CFFormatDoubleType
= 34,
5155 CFFormatPointerType
= 35,
5156 CFFormatObjectType
= 36, /* handled specially */ /* ??? not used anymore, can be removed? */
5157 CFFormatCFType
= 37, /* handled specially */
5158 CFFormatUnicharsType
= 38, /* handled specially */
5159 CFFormatCharsType
= 39, /* handled specially */
5160 CFFormatPascalCharsType
= 40, /* handled specially */
5161 CFFormatSingleUnicharType
= 41, /* handled specially */
5162 CFFormatDummyPointerType
= 42 /* special case for %n */
5165 CF_INLINE
void __CFParseFormatSpec(const UniChar
*uformat
, const uint8_t *cformat
, SInt32
*fmtIdx
, SInt32 fmtLen
, CFFormatSpec
*spec
) {
5166 Boolean seenDot
= false;
5169 if (fmtLen
<= *fmtIdx
) return; /* no type */
5170 if (cformat
) ch
= (UniChar
)cformat
[(*fmtIdx
)++]; else ch
= uformat
[(*fmtIdx
)++];
5171 reswtch
:switch (ch
) {
5172 case '#': // ignored for now
5175 if (!(spec
->flags
& kCFStringFormatPlusFlag
)) spec
->flags
|= kCFStringFormatSpaceFlag
;
5178 spec
->flags
|= kCFStringFormatMinusFlag
;
5179 spec
->flags
&= ~kCFStringFormatZeroFlag
; // remove zero flag
5182 spec
->flags
|= kCFStringFormatPlusFlag
;
5183 spec
->flags
&= ~kCFStringFormatSpaceFlag
; // remove space flag
5186 if (!(spec
->flags
& kCFStringFormatMinusFlag
)) spec
->flags
|= kCFStringFormatZeroFlag
;
5189 spec
->size
= CFFormatSize2
;
5192 if (*fmtIdx
< fmtLen
) {
5193 // fetch next character, don't increment fmtIdx
5194 if (cformat
) ch
= (UniChar
)cformat
[(*fmtIdx
)]; else ch
= uformat
[(*fmtIdx
)];
5195 if ('l' == ch
) { // 'll' for long long, like 'q'
5197 spec
->size
= CFFormatSize8
;
5201 spec
->size
= CFFormatSizeLong
; // 4 or 8 depending on LP64
5203 #if LONG_DOUBLE_SUPPORT
5205 spec
->size
= CFFormatSize16
;
5209 spec
->size
= CFFormatSize8
;
5212 spec
->size
= CFFormatSizeLong
; // 4 or 8 depending on LP64
5215 spec
->size
= CFFormatSize8
;
5218 spec
->type
= CFFormatLongType
;
5219 spec
->size
= CFFormatSize1
;
5221 case 'O': case 'o': case 'D': case 'd': case 'i': case 'U': case 'u': case 'x': case 'X':
5222 spec
->type
= CFFormatLongType
;
5223 // Seems like if spec->size == 0, we should spec->size = CFFormatSize4. However, 0 is handled correctly.
5225 case 'a': case 'A': case 'e': case 'E': case 'f': case 'F': case 'g': case 'G':
5226 spec
->type
= CFFormatDoubleType
;
5227 if (spec
->size
!= CFFormatSize16
) spec
->size
= CFFormatSize8
;
5229 case 'n': /* %n is not handled correctly; for Leopard or newer apps, we disable it further */
5230 spec
->type
= _CFExecutableLinkedOnOrAfter(CFSystemVersionLeopard
) ? CFFormatDummyPointerType
: CFFormatPointerType
;
5231 spec
->size
= CFFormatSizePointer
; // 4 or 8 depending on LP64
5234 spec
->type
= CFFormatPointerType
;
5235 spec
->size
= CFFormatSizePointer
; // 4 or 8 depending on LP64
5238 spec
->type
= CFFormatCharsType
;
5239 spec
->size
= CFFormatSizePointer
; // 4 or 8 depending on LP64
5242 spec
->type
= CFFormatUnicharsType
;
5243 spec
->size
= CFFormatSizePointer
; // 4 or 8 depending on LP64
5246 spec
->type
= CFFormatSingleUnicharType
;
5247 spec
->size
= CFFormatSize2
;
5250 spec
->type
= CFFormatPascalCharsType
;
5251 spec
->size
= CFFormatSizePointer
; // 4 or 8 depending on LP64
5254 spec
->type
= CFFormatCFType
;
5255 spec
->size
= CFFormatSizePointer
; // 4 or 8 depending on LP64
5257 case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': {
5260 number
= 10 * number
+ (ch
- '0');
5261 if (cformat
) ch
= (UniChar
)cformat
[(*fmtIdx
)++]; else ch
= uformat
[(*fmtIdx
)++];
5262 } while ((UInt32
)(ch
- '0') <= 9);
5264 if (-2 == spec
->precArgNum
) {
5265 spec
->precArgNum
= (int8_t)number
- 1; // Arg numbers start from 1
5266 } else if (-2 == spec
->widthArgNum
) {
5267 spec
->widthArgNum
= (int8_t)number
- 1; // Arg numbers start from 1
5269 spec
->mainArgNum
= (int8_t)number
- 1; // Arg numbers start from 1
5272 } else if (seenDot
) { /* else it's either precision or width */
5273 spec
->precArg
= (SInt32
)number
;
5275 spec
->widthArg
= (SInt32
)number
;
5280 spec
->widthArgNum
= -2;
5284 if (cformat
) ch
= (UniChar
)cformat
[(*fmtIdx
)++]; else ch
= uformat
[(*fmtIdx
)++];
5286 spec
->precArgNum
= -2;
5291 spec
->type
= CFFormatLiteralType
;
5297 /* ??? It ignores the formatOptions argument.
5298 ??? %s depends on handling of encodings by __CFStringAppendBytes
5300 void CFStringAppendFormatAndArguments(CFMutableStringRef outputString
, CFDictionaryRef formatOptions
, CFStringRef formatString
, va_list args
) {
5301 _CFStringAppendFormatAndArgumentsAux(outputString
, NULL
, formatOptions
, formatString
, args
);
5304 #if DEPLOYMENT_TARGET_MACOSX
5305 #define SNPRINTF(TYPE, WHAT) { \
5306 TYPE value = (TYPE) WHAT; \
5307 if (-1 != specs[curSpec].widthArgNum) { \
5308 if (-1 != specs[curSpec].precArgNum) { \
5309 snprintf_l(buffer, 255, NULL, formatBuffer, width, precision, value); \
5311 snprintf_l(buffer, 255, NULL, formatBuffer, width, value); \
5314 if (-1 != specs[curSpec].precArgNum) { \
5315 snprintf_l(buffer, 255, NULL, formatBuffer, precision, value); \
5317 snprintf_l(buffer, 255, NULL, formatBuffer, value); \
5321 #define SNPRINTF(TYPE, WHAT) { \
5322 TYPE value = (TYPE) WHAT; \
5323 if (-1 != specs[curSpec].widthArgNum) { \
5324 if (-1 != specs[curSpec].precArgNum) { \
5325 snprintf(buffer, 255, formatBuffer, width, precision, value); \
5327 snprintf(buffer, 255, formatBuffer, width, value); \
5330 if (-1 != specs[curSpec].precArgNum) { \
5331 snprintf(buffer, 255, formatBuffer, precision, value); \
5333 snprintf(buffer, 255, formatBuffer, value); \
5338 void _CFStringAppendFormatAndArgumentsAux(CFMutableStringRef outputString
, CFStringRef (*copyDescFunc
)(void *, const void *), CFDictionaryRef formatOptions
, CFStringRef formatString
, va_list args
) {
5339 SInt32 numSpecs
, sizeSpecs
, sizeArgNum
, formatIdx
, curSpec
, argNum
;
5341 #define FORMAT_BUFFER_LEN 400
5342 const uint8_t *cformat
= NULL
;
5343 const UniChar
*uformat
= NULL
;
5344 UniChar
*formatChars
= NULL
;
5345 UniChar localFormatBuffer
[FORMAT_BUFFER_LEN
];
5347 #define VPRINTF_BUFFER_LEN 61
5348 CFFormatSpec localSpecsBuffer
[VPRINTF_BUFFER_LEN
];
5349 CFFormatSpec
*specs
;
5350 CFPrintValue localValuesBuffer
[VPRINTF_BUFFER_LEN
];
5351 CFPrintValue
*values
;
5352 CFAllocatorRef tmpAlloc
= NULL
;
5354 intmax_t dummyLocation
; // A place for %n to do its thing in; should be the widest possible int value
5362 formatLen
= CFStringGetLength(formatString
);
5363 if (!CF_IS_OBJC(__kCFStringTypeID
, formatString
)) {
5364 __CFAssertIsString(formatString
);
5365 if (!__CFStrIsUnicode(formatString
)) {
5366 cformat
= (const uint8_t *)__CFStrContents(formatString
);
5367 if (cformat
) cformat
+= __CFStrSkipAnyLengthByte(formatString
);
5369 uformat
= (const UniChar
*)__CFStrContents(formatString
);
5372 if (!cformat
&& !uformat
) {
5373 formatChars
= (formatLen
> FORMAT_BUFFER_LEN
) ? (UniChar
*)CFAllocatorAllocate(tmpAlloc
= __CFGetDefaultAllocator(), formatLen
* sizeof(UniChar
), 0) : localFormatBuffer
;
5374 if (formatChars
!= localFormatBuffer
&& __CFOASafe
) __CFSetLastAllocationEventName(formatChars
, "CFString (temp)");
5375 CFStringGetCharacters(formatString
, CFRangeMake(0, formatLen
), formatChars
);
5376 uformat
= formatChars
;
5379 /* Compute an upper bound for the number of format specifications */
5381 for (formatIdx
= 0; formatIdx
< formatLen
; formatIdx
++) if ('%' == cformat
[formatIdx
]) sizeSpecs
++;
5383 for (formatIdx
= 0; formatIdx
< formatLen
; formatIdx
++) if ('%' == uformat
[formatIdx
]) sizeSpecs
++;
5385 tmpAlloc
= __CFGetDefaultAllocator();
5386 specs
= ((2 * sizeSpecs
+ 1) > VPRINTF_BUFFER_LEN
) ? (CFFormatSpec
*)CFAllocatorAllocate(tmpAlloc
, (2 * sizeSpecs
+ 1) * sizeof(CFFormatSpec
), 0) : localSpecsBuffer
;
5387 if (specs
!= localSpecsBuffer
&& __CFOASafe
) __CFSetLastAllocationEventName(specs
, "CFString (temp)");
5389 /* Collect format specification information from the format string */
5390 for (curSpec
= 0, formatIdx
= 0; formatIdx
< formatLen
; curSpec
++) {
5392 specs
[curSpec
].loc
= formatIdx
;
5393 specs
[curSpec
].len
= 0;
5394 specs
[curSpec
].size
= 0;
5395 specs
[curSpec
].type
= 0;
5396 specs
[curSpec
].flags
= 0;
5397 specs
[curSpec
].widthArg
= -1;
5398 specs
[curSpec
].precArg
= -1;
5399 specs
[curSpec
].mainArgNum
= -1;
5400 specs
[curSpec
].precArgNum
= -1;
5401 specs
[curSpec
].widthArgNum
= -1;
5403 for (newFmtIdx
= formatIdx
; newFmtIdx
< formatLen
&& '%' != cformat
[newFmtIdx
]; newFmtIdx
++);
5405 for (newFmtIdx
= formatIdx
; newFmtIdx
< formatLen
&& '%' != uformat
[newFmtIdx
]; newFmtIdx
++);
5407 if (newFmtIdx
!= formatIdx
) { /* Literal chunk */
5408 specs
[curSpec
].type
= CFFormatLiteralType
;
5409 specs
[curSpec
].len
= newFmtIdx
- formatIdx
;
5411 newFmtIdx
++; /* Skip % */
5412 __CFParseFormatSpec(uformat
, cformat
, &newFmtIdx
, formatLen
, &(specs
[curSpec
]));
5413 if (CFFormatLiteralType
== specs
[curSpec
].type
) {
5414 specs
[curSpec
].loc
= formatIdx
+ 1;
5415 specs
[curSpec
].len
= 1;
5417 specs
[curSpec
].len
= newFmtIdx
- formatIdx
;
5420 formatIdx
= newFmtIdx
;
5422 // fprintf(stderr, "specs[%d] = {\n size = %d,\n type = %d,\n loc = %d,\n len = %d,\n mainArgNum = %d,\n precArgNum = %d,\n widthArgNum = %d\n}\n", curSpec, specs[curSpec].size, specs[curSpec].type, specs[curSpec].loc, specs[curSpec].len, specs[curSpec].mainArgNum, specs[curSpec].precArgNum, specs[curSpec].widthArgNum);
5426 // Max of three args per spec, reasoning thus: 1 width, 1 prec, 1 value
5427 values
= ((3 * sizeSpecs
+ 1) > VPRINTF_BUFFER_LEN
) ? (CFPrintValue
*)CFAllocatorAllocate(tmpAlloc
, (3 * sizeSpecs
+ 1) * sizeof(CFPrintValue
), 0) : localValuesBuffer
;
5428 if (values
!= localValuesBuffer
&& __CFOASafe
) __CFSetLastAllocationEventName(values
, "CFString (temp)");
5429 memset(values
, 0, (3 * sizeSpecs
+ 1) * sizeof(CFPrintValue
));
5430 sizeArgNum
= (3 * sizeSpecs
+ 1);
5432 /* Compute values array */
5434 for (curSpec
= 0; curSpec
< numSpecs
; curSpec
++) {
5435 SInt32 newMaxArgNum
;
5436 if (0 == specs
[curSpec
].type
) continue;
5437 if (CFFormatLiteralType
== specs
[curSpec
].type
) continue;
5438 newMaxArgNum
= sizeArgNum
;
5439 if (newMaxArgNum
< specs
[curSpec
].mainArgNum
) {
5440 newMaxArgNum
= specs
[curSpec
].mainArgNum
;
5442 if (newMaxArgNum
< specs
[curSpec
].precArgNum
) {
5443 newMaxArgNum
= specs
[curSpec
].precArgNum
;
5445 if (newMaxArgNum
< specs
[curSpec
].widthArgNum
) {
5446 newMaxArgNum
= specs
[curSpec
].widthArgNum
;
5448 if (sizeArgNum
< newMaxArgNum
) {
5449 if (specs
!= localSpecsBuffer
) CFAllocatorDeallocate(tmpAlloc
, specs
);
5450 if (values
!= localValuesBuffer
) CFAllocatorDeallocate(tmpAlloc
, values
);
5451 if (formatChars
&& (formatChars
!= localFormatBuffer
)) CFAllocatorDeallocate(tmpAlloc
, formatChars
);
5452 return; // more args than we expected!
5454 /* It is actually incorrect to reorder some specs and not all; we just do some random garbage here */
5455 if (-2 == specs
[curSpec
].widthArgNum
) {
5456 specs
[curSpec
].widthArgNum
= argNum
++;
5458 if (-2 == specs
[curSpec
].precArgNum
) {
5459 specs
[curSpec
].precArgNum
= argNum
++;
5461 if (-1 == specs
[curSpec
].mainArgNum
) {
5462 specs
[curSpec
].mainArgNum
= argNum
++;
5464 values
[specs
[curSpec
].mainArgNum
].size
= specs
[curSpec
].size
;
5465 values
[specs
[curSpec
].mainArgNum
].type
= specs
[curSpec
].type
;
5466 if (-1 != specs
[curSpec
].widthArgNum
) {
5467 values
[specs
[curSpec
].widthArgNum
].size
= 0;
5468 values
[specs
[curSpec
].widthArgNum
].type
= CFFormatLongType
;
5470 if (-1 != specs
[curSpec
].precArgNum
) {
5471 values
[specs
[curSpec
].precArgNum
].size
= 0;
5472 values
[specs
[curSpec
].precArgNum
].type
= CFFormatLongType
;
5476 /* Collect the arguments in correct type from vararg list */
5477 for (argNum
= 0; argNum
< sizeArgNum
; argNum
++) {
5478 switch (values
[argNum
].type
) {
5480 case CFFormatLiteralType
:
5482 case CFFormatLongType
:
5483 case CFFormatSingleUnicharType
:
5484 if (CFFormatSize1
== values
[argNum
].size
) {
5485 values
[argNum
].value
.int64Value
= (int64_t)(int8_t)va_arg(args
, int);
5486 } else if (CFFormatSize2
== values
[argNum
].size
) {
5487 values
[argNum
].value
.int64Value
= (int64_t)(int16_t)va_arg(args
, int);
5488 } else if (CFFormatSize4
== values
[argNum
].size
) {
5489 values
[argNum
].value
.int64Value
= (int64_t)va_arg(args
, int32_t);
5490 } else if (CFFormatSize8
== values
[argNum
].size
) {
5491 values
[argNum
].value
.int64Value
= (int64_t)va_arg(args
, int64_t);
5493 values
[argNum
].value
.int64Value
= (int64_t)va_arg(args
, int);
5496 case CFFormatDoubleType
:
5497 #if LONG_DOUBLE_SUPPORT
5498 if (CFFormatSize16
== values
[argNum
].size
) {
5499 values
[argNum
].value
.longDoubleValue
= va_arg(args
, long double);
5503 values
[argNum
].value
.doubleValue
= va_arg(args
, double);
5506 case CFFormatPointerType
:
5507 case CFFormatObjectType
:
5508 case CFFormatCFType
:
5509 case CFFormatUnicharsType
:
5510 case CFFormatCharsType
:
5511 case CFFormatPascalCharsType
:
5512 values
[argNum
].value
.pointerValue
= va_arg(args
, void *);
5514 case CFFormatDummyPointerType
:
5515 (void)va_arg(args
, void *); // Skip the provided argument
5516 values
[argNum
].value
.pointerValue
= &dummyLocation
;
5522 /* Format the pieces together */
5523 for (curSpec
= 0; curSpec
< numSpecs
; curSpec
++) {
5524 SInt32 width
= 0, precision
= 0;
5526 Boolean hasWidth
= false, hasPrecision
= false;
5528 // widthArgNum and widthArg are never set at the same time; same for precArg*
5529 if (-1 != specs
[curSpec
].widthArgNum
) {
5530 width
= (SInt32
)values
[specs
[curSpec
].widthArgNum
].value
.int64Value
;
5533 if (-1 != specs
[curSpec
].precArgNum
) {
5534 precision
= (SInt32
)values
[specs
[curSpec
].precArgNum
].value
.int64Value
;
5535 hasPrecision
= true;
5537 if (-1 != specs
[curSpec
].widthArg
) {
5538 width
= specs
[curSpec
].widthArg
;
5541 if (-1 != specs
[curSpec
].precArg
) {
5542 precision
= specs
[curSpec
].precArg
;
5543 hasPrecision
= true;
5546 switch (specs
[curSpec
].type
) {
5547 case CFFormatLongType
:
5548 case CFFormatDoubleType
:
5549 case CFFormatPointerType
: {
5550 char formatBuffer
[128];
5551 #if defined(__GNUC__)
5552 char buffer
[256 + width
+ precision
];
5554 char stackBuffer
[512];
5555 char *dynamicBuffer
= NULL
;
5556 char *buffer
= stackBuffer
;
5557 if (256+width
+precision
> 512) {
5558 dynamicBuffer
= (char *)CFAllocatorAllocate(kCFAllocatorSystemDefault
, 256+width
+precision
, 0);
5559 buffer
= dynamicBuffer
;
5562 SInt32 cidx
, idx
, loc
;
5563 Boolean appended
= false;
5564 loc
= specs
[curSpec
].loc
;
5565 // In preparation to call snprintf(), copy the format string out
5567 for (idx
= 0, cidx
= 0; cidx
< specs
[curSpec
].len
; idx
++, cidx
++) {
5568 if ('$' == cformat
[loc
+ cidx
]) {
5569 for (idx
--; '0' <= formatBuffer
[idx
] && formatBuffer
[idx
] <= '9'; idx
--);
5571 formatBuffer
[idx
] = cformat
[loc
+ cidx
];
5575 for (idx
= 0, cidx
= 0; cidx
< specs
[curSpec
].len
; idx
++, cidx
++) {
5576 if ('$' == uformat
[loc
+ cidx
]) {
5577 for (idx
--; '0' <= formatBuffer
[idx
] && formatBuffer
[idx
] <= '9'; idx
--);
5579 formatBuffer
[idx
] = (int8_t)uformat
[loc
+ cidx
];
5583 formatBuffer
[idx
] = '\0';
5584 // Should modify format buffer here if necessary; for example, to translate %qd to
5585 // the equivalent, on architectures which do not have %q.
5586 buffer
[sizeof(buffer
) - 1] = '\0';
5587 switch (specs
[curSpec
].type
) {
5588 case CFFormatLongType
:
5589 if (CFFormatSize8
== specs
[curSpec
].size
) {
5590 SNPRINTF(int64_t, values
[specs
[curSpec
].mainArgNum
].value
.int64Value
)
5592 SNPRINTF(SInt32
, values
[specs
[curSpec
].mainArgNum
].value
.int64Value
)
5595 case CFFormatPointerType
:
5596 case CFFormatDummyPointerType
:
5597 SNPRINTF(void *, values
[specs
[curSpec
].mainArgNum
].value
.pointerValue
)
5600 case CFFormatDoubleType
:
5601 #if LONG_DOUBLE_SUPPORT
5602 if (CFFormatSize16
== specs
[curSpec
].size
) {
5603 SNPRINTF(long double, values
[specs
[curSpec
].mainArgNum
].value
.longDoubleValue
)
5607 SNPRINTF(double, values
[specs
[curSpec
].mainArgNum
].value
.doubleValue
)
5609 // See if we need to localize the decimal point
5610 if (formatOptions
) { // We have localization info
5611 CFStringRef decimalSeparator
= (CFGetTypeID(formatOptions
) == CFLocaleGetTypeID()) ? (CFStringRef
)CFLocaleGetValue((CFLocaleRef
)formatOptions
, kCFLocaleDecimalSeparator
) : (CFStringRef
)CFDictionaryGetValue(formatOptions
, CFSTR("NSDecimalSeparator"));
5612 if (decimalSeparator
!= NULL
) { // We have a decimal separator in there
5613 CFIndex decimalPointLoc
= 0;
5614 while (buffer
[decimalPointLoc
] != 0 && buffer
[decimalPointLoc
] != '.') decimalPointLoc
++;
5615 if (buffer
[decimalPointLoc
] == '.') { // And we have a decimal point in the formatted string
5616 buffer
[decimalPointLoc
] = 0;
5617 CFStringAppendCString(outputString
, (const char *)buffer
, __CFStringGetEightBitStringEncoding());
5618 CFStringAppend(outputString
, decimalSeparator
);
5619 CFStringAppendCString(outputString
, (const char *)(buffer
+ decimalPointLoc
+ 1), __CFStringGetEightBitStringEncoding());
5626 if (!appended
) CFStringAppendCString(outputString
, (const char *)buffer
, __CFStringGetEightBitStringEncoding());
5627 #if !defined(__GNUC__)
5628 if (dynamicBuffer
) {
5629 CFAllocatorDeallocate(kCFAllocatorSystemDefault
, dynamicBuffer
);
5634 case CFFormatLiteralType
:
5636 __CFStringAppendBytes(outputString
, (const char *)(cformat
+specs
[curSpec
].loc
), specs
[curSpec
].len
, __CFStringGetEightBitStringEncoding());
5638 CFStringAppendCharacters(outputString
, uformat
+specs
[curSpec
].loc
, specs
[curSpec
].len
);
5641 case CFFormatPascalCharsType
:
5642 case CFFormatCharsType
:
5643 if (values
[specs
[curSpec
].mainArgNum
].value
.pointerValue
== NULL
) {
5644 CFStringAppendCString(outputString
, "(null)", kCFStringEncodingASCII
);
5647 const char *str
= (const char *)values
[specs
[curSpec
].mainArgNum
].value
.pointerValue
;
5648 if (specs
[curSpec
].type
== CFFormatPascalCharsType
) { // Pascal string case
5649 len
= ((unsigned char *)str
)[0];
5651 if (hasPrecision
&& precision
< len
) len
= precision
;
5652 } else { // C-string case
5653 if (!hasPrecision
) { // No precision, so rely on the terminating null character
5655 } else { // Don't blindly call strlen() if there is a precision; the string might not have a terminating null (3131988)
5656 const char *terminatingNull
= (const char *)memchr(str
, 0, precision
); // Basically strlen() on only the first precision characters of str
5657 if (terminatingNull
) { // There was a null in the first precision characters
5658 len
= terminatingNull
- str
;
5664 // Since the spec says the behavior of the ' ', '0', '#', and '+' flags is undefined for
5665 // '%s', and since we have ignored them in the past, the behavior is hereby cast in stone
5666 // to ignore those flags (and, say, never pad with '0' instead of space).
5667 if (specs
[curSpec
].flags
& kCFStringFormatMinusFlag
) {
5668 __CFStringAppendBytes(outputString
, str
, len
, __CFStringGetSystemEncoding());
5669 if (hasWidth
&& width
> len
) {
5670 int w
= width
- len
; // We need this many spaces; do it ten at a time
5671 do {__CFStringAppendBytes(outputString
, " ", (w
> 10 ? 10 : w
), kCFStringEncodingASCII
);} while ((w
-= 10) > 0);
5674 if (hasWidth
&& width
> len
) {
5675 int w
= width
- len
; // We need this many spaces; do it ten at a time
5676 do {__CFStringAppendBytes(outputString
, " ", (w
> 10 ? 10 : w
), kCFStringEncodingASCII
);} while ((w
-= 10) > 0);
5678 __CFStringAppendBytes(outputString
, str
, len
, __CFStringGetSystemEncoding());
5682 case CFFormatSingleUnicharType
:
5683 ch
= (UniChar
)values
[specs
[curSpec
].mainArgNum
].value
.int64Value
;
5684 CFStringAppendCharacters(outputString
, &ch
, 1);
5686 case CFFormatUnicharsType
:
5687 //??? need to handle width, precision, and padding arguments
5688 up
= (UniChar
*)values
[specs
[curSpec
].mainArgNum
].value
.pointerValue
;
5690 CFStringAppendCString(outputString
, "(null)", kCFStringEncodingASCII
);
5693 for (len
= 0; 0 != up
[len
]; len
++);
5694 // Since the spec says the behavior of the ' ', '0', '#', and '+' flags is undefined for
5695 // '%s', and since we have ignored them in the past, the behavior is hereby cast in stone
5696 // to ignore those flags (and, say, never pad with '0' instead of space).
5697 if (hasPrecision
&& precision
< len
) len
= precision
;
5698 if (specs
[curSpec
].flags
& kCFStringFormatMinusFlag
) {
5699 CFStringAppendCharacters(outputString
, up
, len
);
5700 if (hasWidth
&& width
> len
) {
5701 int w
= width
- len
; // We need this many spaces; do it ten at a time
5702 do {__CFStringAppendBytes(outputString
, " ", (w
> 10 ? 10 : w
), kCFStringEncodingASCII
);} while ((w
-= 10) > 0);
5705 if (hasWidth
&& width
> len
) {
5706 int w
= width
- len
; // We need this many spaces; do it ten at a time
5707 do {__CFStringAppendBytes(outputString
, " ", (w
> 10 ? 10 : w
), kCFStringEncodingASCII
);} while ((w
-= 10) > 0);
5709 CFStringAppendCharacters(outputString
, up
, len
);
5713 case CFFormatCFType
:
5714 case CFFormatObjectType
:
5715 if (NULL
!= values
[specs
[curSpec
].mainArgNum
].value
.pointerValue
) {
5716 CFStringRef str
= NULL
;
5718 str
= copyDescFunc(values
[specs
[curSpec
].mainArgNum
].value
.pointerValue
, formatOptions
);
5720 str
= __CFCopyFormattingDescription(values
[specs
[curSpec
].mainArgNum
].value
.pointerValue
, formatOptions
);
5722 str
= CFCopyDescription(values
[specs
[curSpec
].mainArgNum
].value
.pointerValue
);
5726 CFStringAppend(outputString
, str
);
5729 CFStringAppendCString(outputString
, "(null description)", kCFStringEncodingASCII
);
5732 CFStringAppendCString(outputString
, "(null)", kCFStringEncodingASCII
);
5738 if (specs
!= localSpecsBuffer
) CFAllocatorDeallocate(tmpAlloc
, specs
);
5739 if (values
!= localValuesBuffer
) CFAllocatorDeallocate(tmpAlloc
, values
);
5740 if (formatChars
&& (formatChars
!= localFormatBuffer
)) CFAllocatorDeallocate(tmpAlloc
, formatChars
);
5746 void CFShowStr(CFStringRef str
) {
5747 CFAllocatorRef alloc
;
5750 fprintf(stdout
, "(null)\n");
5754 if (CF_IS_OBJC(__kCFStringTypeID
, str
)) {
5755 fprintf(stdout
, "This is an NSString, not CFString\n");
5759 alloc
= CFGetAllocator(str
);
5761 fprintf(stdout
, "\nLength %d\nIsEightBit %d\n", (int)__CFStrLength(str
), __CFStrIsEightBit(str
));
5762 fprintf(stdout
, "HasLengthByte %d\nHasNullByte %d\nInlineContents %d\n",
5763 __CFStrHasLengthByte(str
), __CFStrHasNullByte(str
), __CFStrIsInline(str
));
5765 fprintf(stdout
, "Allocator ");
5766 if (alloc
!= kCFAllocatorSystemDefault
) {
5767 fprintf(stdout
, "%p\n", (void *)alloc
);
5769 fprintf(stdout
, "SystemDefault\n");
5771 fprintf(stdout
, "Mutable %d\n", __CFStrIsMutable(str
));
5772 if (!__CFStrIsMutable(str
) && __CFStrHasContentsDeallocator(str
)) {
5773 if (__CFStrContentsDeallocator(str
)) fprintf(stdout
, "ContentsDeallocatorFunc %p\n", (void *)__CFStrContentsDeallocator(str
));
5774 else fprintf(stdout
, "ContentsDeallocatorFunc None\n");
5775 } else if (__CFStrIsMutable(str
) && __CFStrHasContentsAllocator(str
)) {
5776 fprintf(stdout
, "ExternalContentsAllocator %p\n", (void *)__CFStrContentsAllocator((CFMutableStringRef
)str
));
5779 if (__CFStrIsMutable(str
)) {
5780 fprintf(stdout
, "CurrentCapacity %d\n%sCapacity %d\n", (int)__CFStrCapacity(str
), __CFStrIsFixed(str
) ? "Fixed" : "Desired", (int)__CFStrDesiredCapacity(str
));
5782 fprintf(stdout
, "Contents %p\n", (void *)__CFStrContents(str
));
5791 #undef HANGUL_SCOUNT
5792 #undef HANGUL_LCOUNT
5793 #undef HANGUL_VCOUNT
5794 #undef HANGUL_TCOUNT
5795 #undef HANGUL_NCOUNT