2 * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
23 * @APPLE_LICENSE_HEADER_END@
26 Copyright 1998-2002, Apple, Inc. All rights reserved.
27 Responsibility: Ali Ozer
30 #include <CoreFoundation/CFBase.h>
31 #include <CoreFoundation/CFString.h>
32 #include <CoreFoundation/CFDictionary.h>
33 #include "CFStringEncodingConverterExt.h"
34 #include "CFUniChar.h"
35 #include "CFUnicodeDecomposition.h"
36 #include "CFUnicodePrecomposition.h"
37 #include "CFUtilities.h"
38 #include "CFInternal.h"
43 #if defined(__MACH__) || defined(__LINUX__) || defined(__FREEBSD__)
46 #if defined(__WIN32__)
48 #endif /* __WIN32__ */
50 extern size_t malloc_good_size(size_t size
);
51 extern void __CFStrConvertBytesToUnicode(const uint8_t *bytes
, UniChar
*buffer
, CFIndex numChars
);
55 // Special allocator used by CFSTRs to catch deallocations
56 static CFAllocatorRef constantStringAllocatorForDebugging
= NULL
;
58 // We put this into C & Pascal strings if we can't convert
59 #define CONVERSIONFAILURESTR "CFString conversion failed"
61 // We set this to true when purging the constant string table, so CFStringDeallocate doesn't assert
62 static Boolean __CFConstantStringTableBeingFreed
= false;
67 // This section is for CFString compatibility and other behaviors...
69 static CFOptionFlags _CFStringCompatibilityMask
= 0;
73 void _CFStringSetCompatibility(CFOptionFlags mask
) {
74 _CFStringCompatibilityMask
|= mask
;
77 CF_INLINE Boolean
__CFStringGetCompatibility(CFOptionFlags mask
) {
78 return (_CFStringCompatibilityMask
& mask
) == mask
;
83 // Two constant strings used by CFString; these are initialized in CFStringInitialize
84 CONST_STRING_DECL(kCFEmptyString
, "")
85 CONST_STRING_DECL(kCFNSDecimalSeparatorKey
, "NSDecimalSeparator")
88 /* !!! Never do sizeof(CFString); the union is here just to make it easier to access some fields.
92 union { // In many cases the allocated structs are smaller than these
100 CFAllocatorRef contentsDeallocator
; // Just the dealloc func is used
101 } externalImmutable1
;
104 CFAllocatorRef contentsDeallocator
; // Just the dealloc func is used
105 } externalImmutable2
;
109 UInt32 capacityFields
; // Currently only stores capacity
110 UInt32 gapEtc
; // Stores some bits, plus desired or fixed capacity
111 CFAllocatorRef contentsAllocator
; // Optional
118 E = not inline contents
122 D = explicit deallocator for contents (for mutable objects, allocator)
123 X = is external mutable
125 Also need (only for mutable)
128 Cap, DesCap = capacity
130 B7 B6 B5 B4 B3 B2 B1 B0
135 0 1 E (freed with default allocator)
141 __kCFFreeContentsWhenDoneMask
= 0x020,
142 __kCFFreeContentsWhenDone
= 0x020,
143 __kCFContentsMask
= 0x060,
144 __kCFHasInlineData
= 0x000,
145 __kCFHasExternalDataNoFree
= 0x040, // Don't free
146 __kCFHasExternalDataDefaultFree
= 0x020, // Use allocator's free function
147 __kCFHasExternalDataCustomFree
= 0x060, // Use a specially provided free function
148 __kCFHasContentsAllocatorMask
= 0x060,
149 __kCFHasContentsAllocator
= 0x060, // (For mutable strings) use a specially provided allocator
150 __kCFHasContentsDeallocatorMask
= 0x060,
151 __kCFHasContentsDeallocator
= 0x060,
152 __kCFIsMutableMask
= 0x01,
153 __kCFIsMutable
= 0x01,
154 __kCFIsUnicodeMask
= 0x10,
155 __kCFIsUnicode
= 0x10,
156 __kCFHasNullByteMask
= 0x08,
157 __kCFHasNullByte
= 0x08,
158 __kCFHasLengthByteMask
= 0x04,
159 __kCFHasLengthByte
= 0x04,
160 __kCFIsExternalMutableMask
= 0x02, // For now we use this bit; can switch to something else
161 __kCFIsExternalMutable
= 0x02,
162 // These are in variants.externalMutable.gapEtc
163 __kCFGapMask
= 0x00ffffff,
164 __kCFGapBitNumber
= 24,
165 __kCFDesiredCapacityMask
= 0x00ffffff, // Currently gap and fixed share same bits as gap not implemented
166 __kCFDesiredCapacityBitNumber
= 24,
167 __kCFIsFixedMask
= 0x80000000,
168 __kCFIsFixed
= 0x80000000,
169 __kCFHasGapMask
= 0x40000000,
170 __kCFHasGap
= 0x40000000,
171 __kCFCapacityProvidedExternallyMask
= 0x20000000, // Set if the external buffer is set explicitly by the developer
172 __kCFCapacityProvidedExternally
= 0x20000000
177 // Mutable strings are not inline
178 // Compile-time constant strings are not inline
179 // Mutable strings always have explicit length (but they might also have length byte and null byte)
180 // If there is an explicit length, always use that instead of the length byte (length byte is useful for quickly returning pascal strings)
181 // Never look at the length byte for the length; use __CFStrLength or __CFStrLength2
183 /* The following set of functions and macros need to be updated on change to the bit configuration
185 CF_INLINE Boolean
__CFStrIsMutable(CFStringRef str
) {return (str
->base
._info
& __kCFIsMutableMask
) == __kCFIsMutable
;}
186 CF_INLINE Boolean
__CFStrIsExternalMutable(CFStringRef str
) {return (str
->base
._info
& __kCFIsExternalMutableMask
) == __kCFIsExternalMutable
;}
187 CF_INLINE Boolean
__CFStrIsInline(CFStringRef str
) {return (str
->base
._info
& __kCFContentsMask
) == __kCFHasInlineData
;}
188 CF_INLINE Boolean
__CFStrFreeContentsWhenDone(CFStringRef str
) {return (str
->base
._info
& __kCFFreeContentsWhenDoneMask
) == __kCFFreeContentsWhenDone
;}
189 CF_INLINE Boolean
__CFStrHasContentsDeallocator(CFStringRef str
) {return (str
->base
._info
& __kCFHasContentsDeallocatorMask
) == __kCFHasContentsDeallocator
;}
190 CF_INLINE Boolean
__CFStrIsUnicode(CFStringRef str
) {return (str
->base
._info
& __kCFIsUnicodeMask
) == __kCFIsUnicode
;}
191 CF_INLINE Boolean
__CFStrIsEightBit(CFStringRef str
) {return (str
->base
._info
& __kCFIsUnicodeMask
) != __kCFIsUnicode
;}
192 CF_INLINE Boolean
__CFStrHasNullByte(CFStringRef str
) {return (str
->base
._info
& __kCFHasNullByteMask
) == __kCFHasNullByte
;}
193 CF_INLINE Boolean
__CFStrHasLengthByte(CFStringRef str
) {return (str
->base
._info
& __kCFHasLengthByteMask
) == __kCFHasLengthByte
;}
194 CF_INLINE Boolean
__CFStrHasExplicitLength(CFStringRef str
) {return (str
->base
._info
& (__kCFIsMutableMask
| __kCFHasLengthByteMask
)) != __kCFHasLengthByte
;} // Has explicit length if (1) mutable or (2) not mutable and no length byte
196 CF_INLINE SInt32
__CFStrSkipAnyLengthByte(CFStringRef str
) {return ((str
->base
._info
& __kCFHasLengthByteMask
) == __kCFHasLengthByte
) ? 1 : 0;} // Number of bytes to skip over the length byte in the contents
198 /* Returns ptr to the buffer (which might include the length byte)
200 CF_INLINE
const void *__CFStrContents(CFStringRef str
) {
201 if (__CFStrIsInline(str
)) {
202 return (const void *)(((UInt32
)&(str
->variants
)) + (__CFStrHasExplicitLength(str
) ? sizeof(UInt32
) : 0));
203 } else { // External; pointer is always word 2
204 return str
->variants
.externalImmutable1
.buffer
;
208 static CFAllocatorRef
*__CFStrContentsDeallocatorPtr(CFStringRef str
) {
209 return __CFStrHasExplicitLength(str
) ? &(((CFMutableStringRef
)str
)->variants
.externalImmutable1
.contentsDeallocator
) : &(((CFMutableStringRef
)str
)->variants
.externalImmutable2
.contentsDeallocator
); }
211 // Assumption: Called with immutable strings only, and on strings that are known to have a contentsDeallocator
212 CF_INLINE CFAllocatorRef
__CFStrContentsDeallocator(CFStringRef str
) {
213 return *__CFStrContentsDeallocatorPtr(str
);
216 // Assumption: Called with immutable strings only, and on strings that are known to have a contentsDeallocator
217 CF_INLINE
void __CFStrSetContentsDeallocator(CFStringRef str
, CFAllocatorRef contentsAllocator
) {
218 *__CFStrContentsDeallocatorPtr(str
) = contentsAllocator
;
221 static CFAllocatorRef
*__CFStrContentsAllocatorPtr(CFStringRef str
) {
222 CFAssert(!__CFStrIsInline(str
), __kCFLogAssertion
, "Asking for contents allocator of inline string");
223 CFAssert(__CFStrIsMutable(str
), __kCFLogAssertion
, "Asking for contents allocator of an immutable string");
224 return (CFAllocatorRef
*)&(str
->variants
.externalMutable
.contentsAllocator
);
227 // Assumption: Called with strings that have a contents allocator; also, contents allocator follows custom
228 CF_INLINE CFAllocatorRef
__CFStrContentsAllocator(CFMutableStringRef str
) {
229 return *(__CFStrContentsAllocatorPtr(str
));
232 // Assumption: Called with strings that have a contents allocator; also, contents allocator follows custom
233 CF_INLINE
void __CFStrSetContentsAllocator(CFMutableStringRef str
, CFAllocatorRef alloc
) {
234 *(__CFStrContentsAllocatorPtr(str
)) = alloc
;
237 /* Returns length; use __CFStrLength2 if contents buffer pointer has already been computed.
239 CF_INLINE CFIndex
__CFStrLength(CFStringRef str
) {
240 if (__CFStrHasExplicitLength(str
)) {
241 if (__CFStrIsInline(str
)) {
242 return str
->variants
.inline1
.length
;
244 CFIndex len
= str
->variants
.externalImmutable1
.length
;
245 if (len
== 0x0ffffff) ((CFMutableStringRef
)str
)->variants
.externalImmutable1
.length
= (len
= strlen(__CFStrContents(str
))); /* For compile-time constant strings */
249 return (CFIndex
)(*((uint8_t *)__CFStrContents(str
)));
253 CF_INLINE CFIndex
__CFStrLength2(CFStringRef str
, const void *buffer
) {
254 if (__CFStrHasExplicitLength(str
)) {
255 if (__CFStrIsInline(str
)) {
256 return str
->variants
.inline1
.length
;
258 CFIndex len
= str
->variants
.externalImmutable1
.length
;
259 if (len
== 0x0ffffff) ((CFMutableStringRef
)str
)->variants
.externalImmutable1
.length
= (len
= strlen(buffer
)); /* For compile-time constant strings */
263 return (CFIndex
)(*((uint8_t *)buffer
));
267 Boolean
__CFStringIsMutable(CFStringRef str
) {
268 return __CFStrIsMutable(str
);
271 Boolean
__CFStringIsEightBit(CFStringRef str
) {
272 return __CFStrIsEightBit(str
);
275 /* Sets the external content pointer for immutable or mutable strings.
277 CF_INLINE
void __CFStrSetContentPtr(CFStringRef str
, const void *p
) {((CFMutableStringRef
)str
)->variants
.externalImmutable1
.buffer
= (void *)p
;}
278 CF_INLINE
void __CFStrSetInfoBits(CFStringRef str
, UInt32 v
) {__CFBitfieldSetValue(((CFMutableStringRef
)str
)->base
._info
, 6, 0, v
);}
280 CF_INLINE
void __CFStrSetExplicitLength(CFStringRef str
, CFIndex v
) {
281 if (__CFStrIsInline(str
)) {
282 ((CFMutableStringRef
)str
)->variants
.inline1
.length
= v
;
284 ((CFMutableStringRef
)str
)->variants
.externalImmutable1
.length
= v
;
288 // Assumption: Called with mutable strings only
289 CF_INLINE Boolean
__CFStrIsFixed(CFStringRef str
) {return (str
->variants
.externalMutable
.gapEtc
& __kCFIsFixedMask
) == __kCFIsFixed
;}
290 CF_INLINE Boolean
__CFStrHasContentsAllocator(CFStringRef str
) {return (str
->base
._info
& __kCFHasContentsAllocatorMask
) == __kCFHasContentsAllocator
;}
292 // If capacity is provided externally, we only change it when we need to grow beyond it
293 CF_INLINE Boolean
__CFStrCapacityProvidedExternally(CFStringRef str
) {return (str
->variants
.externalMutable
.gapEtc
& __kCFCapacityProvidedExternallyMask
) == __kCFCapacityProvidedExternally
;}
294 CF_INLINE
void __CFStrSetCapacityProvidedExternally(CFMutableStringRef str
) {str
->variants
.externalMutable
.gapEtc
|= __kCFCapacityProvidedExternally
;}
295 CF_INLINE
void __CFStrClearCapacityProvidedExternally(CFMutableStringRef str
) {str
->variants
.externalMutable
.gapEtc
&= ~__kCFCapacityProvidedExternally
;}
298 CF_INLINE
void __CFStrSetIsFixed(CFMutableStringRef str
) {str
->variants
.externalMutable
.gapEtc
|= __kCFIsFixed
;}
299 CF_INLINE
void __CFStrSetHasGap(CFMutableStringRef str
) {str
->variants
.externalMutable
.gapEtc
|= __kCFHasGap
;}
300 CF_INLINE
void __CFStrSetUnicode(CFMutableStringRef str
) {str
->base
._info
|= __kCFIsUnicode
;}
301 CF_INLINE
void __CFStrClearUnicode(CFMutableStringRef str
) {str
->base
._info
&= ~__kCFIsUnicode
;}
302 CF_INLINE
void __CFStrSetHasLengthAndNullBytes(CFMutableStringRef str
) {str
->base
._info
|= (__kCFHasLengthByte
| __kCFHasNullByte
);}
303 CF_INLINE
void __CFStrClearHasLengthAndNullBytes(CFMutableStringRef str
) {str
->base
._info
&= ~(__kCFHasLengthByte
| __kCFHasNullByte
);}
306 static void *__CFStrAllocateMutableContents(CFMutableStringRef str
, CFIndex size
) {
308 CFAllocatorRef alloc
= (__CFStrHasContentsAllocator(str
)) ? __CFStrContentsAllocator(str
) : __CFGetAllocator(str
);
309 ptr
= CFAllocatorAllocate(alloc
, size
, 0);
310 if (__CFOASafe
) __CFSetLastAllocationEventName(ptr
, "CFString (store)");
314 static void __CFStrDeallocateMutableContents(CFMutableStringRef str
, void *buffer
) {
315 CFAllocatorRef alloc
= (__CFStrHasContentsAllocator(str
)) ? __CFStrContentsAllocator(str
) : __CFGetAllocator(str
);
316 CFAllocatorDeallocate(alloc
, buffer
);
320 // The following set of functions should only be called on mutable strings
322 /* "Capacity" is stored in number of bytes, not characters. It indicates the total number of bytes in the contents buffer.
323 "Desired capacity" is in number of characters; it is the client requested capacity; if fixed, it is the upper bound on the mutable string backing store.
325 CF_INLINE CFIndex
__CFStrCapacity(CFStringRef str
) {return str
->variants
.externalMutable
.capacityFields
;}
326 CF_INLINE
void __CFStrSetCapacity(CFMutableStringRef str
, CFIndex cap
) {str
->variants
.externalMutable
.capacityFields
= cap
;}
327 CF_INLINE CFIndex
__CFStrDesiredCapacity(CFStringRef str
) {return __CFBitfieldGetValue(str
->variants
.externalMutable
.gapEtc
, __kCFDesiredCapacityBitNumber
, 0);}
328 CF_INLINE
void __CFStrSetDesiredCapacity(CFMutableStringRef str
, CFIndex size
) {__CFBitfieldSetValue(str
->variants
.externalMutable
.gapEtc
, __kCFDesiredCapacityBitNumber
, 0, size
);}
333 /* CFString specific init flags
334 Note that you cannot count on the external buffer not being copied.
335 Also, if you specify an external buffer, you should not change it behind the CFString's back.
338 __kCFThinUnicodeIfPossible
= 0x1000000, /* See if the Unicode contents can be thinned down to 8-bit */
339 kCFStringPascal
= 0x10000, /* Indicating that the string data has a Pascal string structure (length byte at start) */
340 kCFStringNoCopyProvidedContents
= 0x20000, /* Don't copy the provided string contents if possible; free it when no longer needed */
341 kCFStringNoCopyNoFreeProvidedContents
= 0x30000 /* Don't copy the provided string contents if possible; don't free it when no longer needed */
344 /* Size for temporary buffers
346 #define MAXTMPBUFFERLEN (2048)
347 #define MAXISDECOMPBUFFERLEN (32)
351 static CFStringEncoding __CFDefaultSystemEncoding
= kCFStringEncodingInvalidId
;
352 static CFStringEncoding __CFDefaultFileSystemEncoding
= kCFStringEncodingInvalidId
;
353 CFStringEncoding __CFDefaultEightBitStringEncoding
= kCFStringEncodingInvalidId
;
355 CFStringEncoding
CFStringGetSystemEncoding(void) {
357 if (__CFDefaultSystemEncoding
== kCFStringEncodingInvalidId
) {
358 const CFStringEncodingConverter
*converter
= NULL
;
359 #if defined(__MACOS8__) || defined(__MACH__)
360 __CFDefaultSystemEncoding
= kCFStringEncodingMacRoman
; // MacRoman is built-in so always available
361 #elif defined(__WIN32__)
362 __CFDefaultSystemEncoding
= kCFStringEncodingWindowsLatin1
; // WinLatin1 is built-in so always available
363 #elif defined(__LINUX__) || defined(__FREEBSD__)
364 __CFDefaultSystemEncoding
= kCFStringEncodingISOLatin1
; // a reasonable default
365 #else // Solaris && HP-UX ?
366 __CFDefaultSystemEncoding
= kCFStringEncodingISOLatin1
; // a reasonable default
368 converter
= CFStringEncodingGetConverter(__CFDefaultSystemEncoding
);
370 __CFSetCharToUniCharFunc(converter
->encodingClass
== kCFStringEncodingConverterCheapEightBit
? converter
->toUnicode
: NULL
);
373 return __CFDefaultSystemEncoding
;
376 // Fast version for internal use
378 CF_INLINE CFStringEncoding
__CFStringGetSystemEncoding(void) {
379 if (__CFDefaultSystemEncoding
== kCFStringEncodingInvalidId
) (void)CFStringGetSystemEncoding();
380 return __CFDefaultSystemEncoding
;
383 CFStringEncoding
CFStringFileSystemEncoding(void) {
384 if (__CFDefaultFileSystemEncoding
== kCFStringEncodingInvalidId
) {
385 #if defined(__MACH__)
386 __CFDefaultFileSystemEncoding
= kCFStringEncodingUTF8
;
388 __CFDefaultFileSystemEncoding
= CFStringGetSystemEncoding();
392 return __CFDefaultFileSystemEncoding
;
395 /* ??? Is returning length when no other answer is available the right thing?
397 CFIndex
CFStringGetMaximumSizeForEncoding(CFIndex length
, CFStringEncoding encoding
) {
398 if (encoding
== kCFStringEncodingUTF8
) {
399 return _CFExecutableLinkedOnOrAfter(CFSystemVersionPanther
) ? (length
* 3) : (length
* 6); // 1 Unichar could expand to 3 bytes; we return 6 for older apps for compatibility
401 encoding
&= 0xFFF; // Mask off non-base part
404 case kCFStringEncodingUnicode
:
405 return length
* sizeof(UniChar
);
407 case kCFStringEncodingNonLossyASCII
:
408 return length
* 6; // 1 Unichar could expand to 6 bytes
410 case kCFStringEncodingMacRoman
:
411 case kCFStringEncodingWindowsLatin1
:
412 case kCFStringEncodingISOLatin1
:
413 case kCFStringEncodingNextStepLatin
:
414 case kCFStringEncodingASCII
:
415 return length
/ sizeof(uint8_t);
418 return length
/ sizeof(uint8_t);
423 /* Returns whether the indicated encoding can be stored in 8-bit chars
425 CF_INLINE Boolean
__CFStrEncodingCanBeStoredInEightBit(CFStringEncoding encoding
) {
427 case kCFStringEncodingInvalidId
:
428 case kCFStringEncodingUnicode
:
429 case kCFStringEncodingUTF8
:
430 case kCFStringEncodingNonLossyASCII
:
433 case kCFStringEncodingMacRoman
:
434 case kCFStringEncodingWindowsLatin1
:
435 case kCFStringEncodingISOLatin1
:
436 case kCFStringEncodingNextStepLatin
:
437 case kCFStringEncodingASCII
:
440 default: return false;
444 /* Returns the encoding used in eight bit CFStrings (can't be any encoding which isn't 1-to-1 with Unicode)
445 ??? Perhaps only ASCII fits the bill due to Unicode decomposition.
447 CFStringEncoding
__CFStringComputeEightBitStringEncoding(void) {
448 if (__CFDefaultEightBitStringEncoding
== kCFStringEncodingInvalidId
) {
449 CFStringEncoding systemEncoding
= CFStringGetSystemEncoding();
450 if (systemEncoding
== kCFStringEncodingInvalidId
) { // We're right in the middle of querying system encoding from default database. Delaying to set until system encoding is determined.
451 return kCFStringEncodingASCII
;
452 } else if (__CFStrEncodingCanBeStoredInEightBit(systemEncoding
)) {
453 __CFDefaultEightBitStringEncoding
= systemEncoding
;
455 __CFDefaultEightBitStringEncoding
= kCFStringEncodingASCII
;
459 return __CFDefaultEightBitStringEncoding
;
462 /* Returns whether the provided bytes can be stored in ASCII
464 CF_INLINE Boolean
__CFBytesInASCII(const uint8_t *bytes
, CFIndex len
) {
465 while (len
--) if ((uint8_t)(*bytes
++) >= 128) return false;
469 /* Returns whether the provided 8-bit string in the specified encoding can be stored in an 8-bit CFString.
471 CF_INLINE Boolean
__CFCanUseEightBitCFStringForBytes(const uint8_t *bytes
, CFIndex len
, CFStringEncoding encoding
) {
472 if (encoding
== __CFStringGetEightBitStringEncoding()) return true;
473 if (__CFStringEncodingIsSupersetOfASCII(encoding
) && __CFBytesInASCII(bytes
, len
)) return true;
478 /* Returns whether a length byte can be tacked on to a string of the indicated length.
480 CF_INLINE Boolean
__CFCanUseLengthByte(CFIndex len
) {
481 #define __kCFMaxPascalStrLen 255
482 return (len
<= __kCFMaxPascalStrLen
) ? true : false;
485 /* Various string assertions
487 #define __CFAssertIsString(cf) __CFGenericValidateType(cf, __kCFStringTypeID)
488 #define __CFAssertIndexIsInStringBounds(cf, idx) CFAssert3((idx) >= 0 && (idx) < __CFStrLength(cf), __kCFLogAssertion, "%s(): string index %d out of bounds (length %d)", __PRETTY_FUNCTION__, idx, __CFStrLength(cf))
489 #define __CFAssertRangeIsInStringBounds(cf, idx, count) CFAssert4((idx) >= 0 && (idx + count) <= __CFStrLength(cf), __kCFLogAssertion, "%s(): string range %d,%d out of bounds (length %d)", __PRETTY_FUNCTION__, idx, count, __CFStrLength(cf))
490 #define __CFAssertLengthIsOK(len) CFAssert2(len < __kCFMaxLength, __kCFLogAssertion, "%s(): length %d too large", __PRETTY_FUNCTION__, len)
491 #define __CFAssertIsStringAndMutable(cf) {__CFGenericValidateType(cf, __kCFStringTypeID); CFAssert1(__CFStrIsMutable(cf), __kCFLogAssertion, "%s(): string not mutable", __PRETTY_FUNCTION__);}
492 #define __CFAssertIsStringAndExternalMutable(cf) {__CFGenericValidateType(cf, __kCFStringTypeID); CFAssert1(__CFStrIsExternalMutable(cf), __kCFLogAssertion, "%s(): string not external mutable", __PRETTY_FUNCTION__);}
493 #define __CFAssertIsNotNegative(idx) CFAssert2(idx >= 0, __kCFLogAssertion, "%s(): index %d is negative", __PRETTY_FUNCTION__, idx)
494 #define __CFAssertIfFixedLengthIsOK(cf, reqLen) CFAssert2(!__CFStrIsFixed(cf) || (reqLen <= __CFStrDesiredCapacity(cf)), __kCFLogAssertion, "%s(): length %d too large", __PRETTY_FUNCTION__, reqLen)
497 /* Basic algorithm is to shrink memory when capacity is SHRINKFACTOR times the required capacity or to allocate memory when the capacity is less than GROWFACTOR times the required capacity.
498 Additional complications are applied in the following order:
499 - desiredCapacity, which is the minimum (except initially things can be at zero)
500 - rounding up to factor of 8
501 - compressing (to fit the number if 16 bits), which effectively rounds up to factor of 256
503 #define SHRINKFACTOR(c) (c / 2)
504 #define GROWFACTOR(c) ((c * 3 + 1) / 2)
506 CF_INLINE CFIndex
__CFStrNewCapacity(CFMutableStringRef str
, CFIndex reqCapacity
, CFIndex capacity
, Boolean leaveExtraRoom
, CFIndex charSize
) {
507 if (capacity
!= 0 || reqCapacity
!= 0) { /* If initially zero, and space not needed, leave it at that... */
508 if ((capacity
< reqCapacity
) || /* We definitely need the room... */
509 (!__CFStrCapacityProvidedExternally(str
) && /* Assuming we control the capacity... */
510 ((reqCapacity
< SHRINKFACTOR(capacity
)) || /* ...we have too much room! */
511 (!leaveExtraRoom
&& (reqCapacity
< capacity
))))) { /* ...we need to eliminate the extra space... */
512 CFIndex newCapacity
= leaveExtraRoom
? GROWFACTOR(reqCapacity
) : reqCapacity
; /* Grow by 3/2 if extra room is desired */
513 CFIndex desiredCapacity
= __CFStrDesiredCapacity(str
) * charSize
;
514 if (newCapacity
< desiredCapacity
) { /* If less than desired, bump up to desired */
515 newCapacity
= desiredCapacity
;
516 } else if (__CFStrIsFixed(str
)) { /* Otherwise, if fixed, no need to go above the desired (fixed) capacity */
517 newCapacity
= __CFMax(desiredCapacity
, reqCapacity
); /* !!! So, fixed is not really fixed, but "tight" */
519 if (__CFStrHasContentsAllocator(str
)) { /* Also apply any preferred size from the allocator; should we do something for */
520 newCapacity
= CFAllocatorGetPreferredSizeForSize(__CFStrContentsAllocator(str
), newCapacity
, 0);
522 newCapacity
= malloc_good_size(newCapacity
);
524 return newCapacity
; // If packing: __CFStrUnpackNumber(__CFStrPackNumber(newCapacity));
531 /* rearrangeBlocks() rearranges the blocks of data within the buffer so that they are "evenly spaced". buffer is assumed to have enough room for the result.
532 numBlocks is current total number of blocks within buffer.
533 blockSize is the size of each block in bytes
534 ranges and numRanges hold the ranges that are no longer needed; ranges are stored sorted in increasing order, and don't overlap
535 insertLength is the final spacing between the remaining blocks
537 Example: buffer = A B C D E F G H, blockSize = 1, ranges = { (2,1) , (4,2) } (so we want to "delete" C and E F), fromEnd = NO
538 if insertLength = 4, result = A B ? ? ? ? D ? ? ? ? G H
539 if insertLength = 0, result = A B D G H
541 Example: buffer = A B C D E F G H I J K L M N O P Q R S T U, blockSize = 1, ranges { (1,1), (3,1), (5,11), (17,1), (19,1) }, fromEnd = NO
542 if insertLength = 3, result = A ? ? ? C ? ? ? E ? ? ? Q ? ? ? S ? ? ? U
545 typedef struct _CFStringDeferredRange
{
549 } CFStringDeferredRange
;
551 typedef struct _CFStringStackInfo
{
552 int capacity
; // Capacity (if capacity == count, need to realloc to add another)
553 int count
; // Number of elements actually stored
554 CFStringDeferredRange
*stack
;
555 Boolean hasMalloced
; // Indicates "stack" is allocated and needs to be deallocated when done
559 CF_INLINE
void pop (CFStringStackInfo
*si
, CFStringDeferredRange
*topRange
) {
560 si
->count
= si
->count
- 1;
561 *topRange
= si
->stack
[si
->count
];
564 CF_INLINE
void push (CFStringStackInfo
*si
, const CFStringDeferredRange
*newRange
) {
565 if (si
->count
== si
->capacity
) {
566 // increase size of the stack
567 si
->capacity
= (si
->capacity
+ 4) * 2;
568 if (si
->hasMalloced
) {
569 si
->stack
= CFAllocatorReallocate(NULL
, si
->stack
, si
->capacity
* sizeof(CFStringDeferredRange
), 0);
571 CFStringDeferredRange
*newStack
= (CFStringDeferredRange
*)CFAllocatorAllocate(NULL
, si
->capacity
* sizeof(CFStringDeferredRange
), 0);
572 memmove(newStack
, si
->stack
, si
->count
* sizeof(CFStringDeferredRange
));
573 si
->stack
= newStack
;
574 si
->hasMalloced
= true;
577 si
->stack
[si
->count
] = *newRange
;
578 si
->count
= si
->count
+ 1;
581 static void rearrangeBlocks(
585 const CFRange
*ranges
,
587 CFIndex insertLength
) {
589 #define origStackSize 10
590 CFStringDeferredRange origStack
[origStackSize
];
591 CFStringStackInfo si
= {origStackSize
, 0, origStack
, false, {0, 0, 0}};
592 CFStringDeferredRange currentNonRange
= {0, 0, 0};
593 int currentRange
= 0;
594 int amountShifted
= 0;
596 // must have at least 1 range left.
598 while (currentRange
< numRanges
) {
599 currentNonRange
.beginning
= (ranges
[currentRange
].location
+ ranges
[currentRange
].length
) * blockSize
;
600 if ((numRanges
- currentRange
) == 1) {
602 currentNonRange
.length
= numBlocks
* blockSize
- currentNonRange
.beginning
;
603 if (currentNonRange
.length
== 0) break;
605 currentNonRange
.length
= (ranges
[currentRange
+ 1].location
* blockSize
) - currentNonRange
.beginning
;
607 currentNonRange
.shift
= amountShifted
+ (insertLength
* blockSize
) - (ranges
[currentRange
].length
* blockSize
);
608 amountShifted
= currentNonRange
.shift
;
609 if (amountShifted
<= 0) {
610 // process current item and rest of stack
611 if (currentNonRange
.shift
&& currentNonRange
.length
) memmove (&buffer
[currentNonRange
.beginning
+ currentNonRange
.shift
], &buffer
[currentNonRange
.beginning
], currentNonRange
.length
);
612 while (si
.count
> 0) {
613 pop (&si
, ¤tNonRange
); // currentNonRange now equals the top element of the stack.
614 if (currentNonRange
.shift
&& currentNonRange
.length
) memmove (&buffer
[currentNonRange
.beginning
+ currentNonRange
.shift
], &buffer
[currentNonRange
.beginning
], currentNonRange
.length
);
617 // add currentNonRange to stack.
618 push (&si
, ¤tNonRange
);
623 // no more ranges. if anything is on the stack, process.
625 while (si
.count
> 0) {
626 pop (&si
, ¤tNonRange
); // currentNonRange now equals the top element of the stack.
627 if (currentNonRange
.shift
&& currentNonRange
.length
) memmove (&buffer
[currentNonRange
.beginning
+ currentNonRange
.shift
], &buffer
[currentNonRange
.beginning
], currentNonRange
.length
);
629 if (si
.hasMalloced
) CFAllocatorDeallocate (NULL
, si
.stack
);
632 /* See comments for rearrangeBlocks(); this is the same, but the string is assembled in another buffer (dstBuffer), so the algorithm is much easier. We also take care of the case where the source is not-Unicode but destination is. (The reverse case is not supported.)
634 static void copyBlocks(
635 const uint8_t *srcBuffer
,
638 Boolean srcIsUnicode
,
639 Boolean dstIsUnicode
,
640 const CFRange
*ranges
,
642 CFIndex insertLength
) {
644 CFIndex srcLocationInBytes
= 0; // in order to avoid multiplying all the time, this is in terms of bytes, not blocks
645 CFIndex dstLocationInBytes
= 0; // ditto
646 CFIndex srcBlockSize
= srcIsUnicode
? sizeof(UniChar
) : sizeof(uint8_t);
647 CFIndex insertLengthInBytes
= insertLength
* (dstIsUnicode
? sizeof(UniChar
) : sizeof(uint8_t));
648 CFIndex rangeIndex
= 0;
649 CFIndex srcToDstMultiplier
= (srcIsUnicode
== dstIsUnicode
) ? 1 : (sizeof(UniChar
) / sizeof(uint8_t));
651 // Loop over the ranges, copying the range to be preserved (right before each range)
652 while (rangeIndex
< numRanges
) {
653 CFIndex srcLengthInBytes
= ranges
[rangeIndex
].location
* srcBlockSize
- srcLocationInBytes
; // srcLengthInBytes is in terms of bytes, not blocks; represents length of region to be preserved
654 if (srcLengthInBytes
> 0) {
655 if (srcIsUnicode
== dstIsUnicode
) {
656 memmove(dstBuffer
+ dstLocationInBytes
, srcBuffer
+ srcLocationInBytes
, srcLengthInBytes
);
658 __CFStrConvertBytesToUnicode(srcBuffer
+ srcLocationInBytes
, (UniChar
*)(dstBuffer
+ dstLocationInBytes
), srcLengthInBytes
);
661 srcLocationInBytes
+= srcLengthInBytes
+ ranges
[rangeIndex
].length
* srcBlockSize
; // Skip over the just-copied and to-be-deleted stuff
662 dstLocationInBytes
+= srcLengthInBytes
* srcToDstMultiplier
+ insertLengthInBytes
;
666 // Do last range (the one beyond last range)
667 if (srcLocationInBytes
< srcLength
* srcBlockSize
) {
668 if (srcIsUnicode
== dstIsUnicode
) {
669 memmove(dstBuffer
+ dstLocationInBytes
, srcBuffer
+ srcLocationInBytes
, srcLength
* srcBlockSize
- srcLocationInBytes
);
671 __CFStrConvertBytesToUnicode(srcBuffer
+ srcLocationInBytes
, (UniChar
*)(dstBuffer
+ dstLocationInBytes
), srcLength
* srcBlockSize
- srcLocationInBytes
);
677 /* Reallocates the backing store of the string to accomodate the new length. Space is reserved or characters are deleted as indicated by insertLength and the ranges in deleteRanges. The length is updated to reflect the new state. Will also maintain a length byte and a null byte in 8-bit strings. If length cannot fit in length byte, the space will still be reserved, but will be 0. (Hence the reason the length byte should never be looked at as length unless there is no explicit length.)
679 static void __CFStringChangeSizeMultiple(CFMutableStringRef str
, const CFRange
*deleteRanges
, CFIndex numDeleteRanges
, CFIndex insertLength
, Boolean makeUnicode
) {
680 const uint8_t *curContents
= __CFStrContents(str
);
681 CFIndex curLength
= curContents
? __CFStrLength2(str
, curContents
) : 0;
684 // Compute new length of the string
685 if (numDeleteRanges
== 1) {
686 newLength
= curLength
+ insertLength
- deleteRanges
[0].length
;
689 newLength
= curLength
+ insertLength
* numDeleteRanges
;
690 for (cnt
= 0; cnt
< numDeleteRanges
; cnt
++) newLength
-= deleteRanges
[cnt
].length
;
693 __CFAssertIfFixedLengthIsOK(str
, newLength
);
695 if (newLength
== 0) {
696 // An somewhat optimized code-path for this special case, with the following implicit values:
697 // newIsUnicode = false
698 // useLengthAndNullBytes = false
699 // newCharSize = sizeof(uint8_t)
700 // If the newCapacity happens to be the same as the old, we don't free the buffer; otherwise we just free it totally
701 // instead of doing a potentially useless reallocation (as the needed capacity later might turn out to be different anyway)
702 CFIndex curCapacity
= __CFStrCapacity(str
);
703 CFIndex newCapacity
= __CFStrNewCapacity(str
, 0, curCapacity
, true, sizeof(uint8_t));
704 if (newCapacity
!= curCapacity
) { // If we're reallocing anyway (larger or smaller --- larger could happen if desired capacity was changed in the meantime), let's just free it all
705 if (curContents
) __CFStrDeallocateMutableContents(str
, (uint8_t *)curContents
);
706 __CFStrSetContentPtr(str
, NULL
);
707 __CFStrSetCapacity(str
, 0);
708 __CFStrClearCapacityProvidedExternally(str
);
709 __CFStrClearHasLengthAndNullBytes(str
);
710 if (!__CFStrIsExternalMutable(str
)) __CFStrClearUnicode(str
); // External mutable implies Unicode
712 if (!__CFStrIsExternalMutable(str
)) {
713 __CFStrClearUnicode(str
);
714 if (curCapacity
>= (int)(sizeof(uint8_t) * 2)) { // If there's room
715 __CFStrSetHasLengthAndNullBytes(str
);
716 ((uint8_t *)curContents
)[0] = ((uint8_t *)curContents
)[1] = 0;
718 __CFStrClearHasLengthAndNullBytes(str
);
722 __CFStrSetExplicitLength(str
, 0);
723 } else { /* This else-clause assumes newLength > 0 */
724 Boolean oldIsUnicode
= __CFStrIsUnicode(str
);
725 Boolean newIsUnicode
= makeUnicode
|| (oldIsUnicode
/* && (newLength > 0) - implicit */ ) || __CFStrIsExternalMutable(str
);
726 CFIndex newCharSize
= newIsUnicode
? sizeof(UniChar
) : sizeof(uint8_t);
727 Boolean useLengthAndNullBytes
= !newIsUnicode
/* && (newLength > 0) - implicit */;
728 CFIndex numExtraBytes
= useLengthAndNullBytes
? 2 : 0; /* 2 extra bytes to keep the length byte & null... */
729 CFIndex curCapacity
= __CFStrCapacity(str
);
730 CFIndex newCapacity
= __CFStrNewCapacity(str
, newLength
* newCharSize
+ numExtraBytes
, curCapacity
, true, newCharSize
);
731 Boolean allocNewBuffer
= (newCapacity
!= curCapacity
) || (curLength
> 0 && !oldIsUnicode
&& newIsUnicode
); /* We alloc new buffer if oldIsUnicode != newIsUnicode because the contents have to be copied */
732 uint8_t *newContents
= allocNewBuffer
? __CFStrAllocateMutableContents(str
, newCapacity
) : (uint8_t *)curContents
;
733 Boolean hasLengthAndNullBytes
= __CFStrHasLengthByte(str
);
735 CFAssert1(hasLengthAndNullBytes
== __CFStrHasNullByte(str
), __kCFLogAssertion
, "%s(): Invalid state in 8-bit string", __PRETTY_FUNCTION__
);
737 if (hasLengthAndNullBytes
) curContents
++;
738 if (useLengthAndNullBytes
) newContents
++;
741 if (oldIsUnicode
== newIsUnicode
) {
742 if (newContents
== curContents
) {
743 rearrangeBlocks(newContents
, curLength
, newCharSize
, deleteRanges
, numDeleteRanges
, insertLength
);
745 copyBlocks(curContents
, newContents
, curLength
, oldIsUnicode
, newIsUnicode
, deleteRanges
, numDeleteRanges
, insertLength
);
747 } else if (newIsUnicode
) { /* this implies we have a new buffer */
748 copyBlocks(curContents
, newContents
, curLength
, oldIsUnicode
, newIsUnicode
, deleteRanges
, numDeleteRanges
, insertLength
);
750 if (hasLengthAndNullBytes
) curContents
--; /* Undo the damage from above */
751 if (allocNewBuffer
) __CFStrDeallocateMutableContents(str
, (void *)curContents
);
755 if (useLengthAndNullBytes
) {
756 newContents
[newLength
] = 0; /* Always have null byte, if not unicode */
757 newContents
--; /* Undo the damage from above */
758 newContents
[0] = __CFCanUseLengthByte(newLength
) ? (uint8_t)newLength
: 0;
759 if (!hasLengthAndNullBytes
) __CFStrSetHasLengthAndNullBytes(str
);
761 if (hasLengthAndNullBytes
) __CFStrClearHasLengthAndNullBytes(str
);
763 if (oldIsUnicode
) __CFStrClearUnicode(str
);
764 } else { // New is unicode...
765 if (!oldIsUnicode
) __CFStrSetUnicode(str
);
766 if (hasLengthAndNullBytes
) __CFStrClearHasLengthAndNullBytes(str
);
768 __CFStrSetExplicitLength(str
, newLength
);
770 if (allocNewBuffer
) {
771 __CFStrSetCapacity(str
, newCapacity
);
772 __CFStrClearCapacityProvidedExternally(str
);
773 __CFStrSetContentPtr(str
, newContents
);
778 /* Same as above, but takes one range (very common case)
780 CF_INLINE
void __CFStringChangeSize(CFMutableStringRef str
, CFRange range
, CFIndex insertLength
, Boolean makeUnicode
) {
781 __CFStringChangeSizeMultiple(str
, &range
, 1, insertLength
, makeUnicode
);
785 static void __CFStringDeallocate(CFTypeRef cf
) {
786 CFStringRef str
= cf
;
788 // constantStringAllocatorForDebugging is not around unless DEBUG is defined, but neither is CFAssert2()...
789 CFAssert1(__CFConstantStringTableBeingFreed
|| CFGetAllocator(str
) != constantStringAllocatorForDebugging
, __kCFLogAssertion
, "Tried to deallocate CFSTR(\"%@\")", str
);
791 if (!__CFStrIsInline(str
)) {
793 Boolean
mutable = __CFStrIsMutable(str
);
794 if (__CFStrFreeContentsWhenDone(str
) && (contents
= (uint8_t *)__CFStrContents(str
))) {
796 __CFStrDeallocateMutableContents((CFMutableStringRef
)str
, contents
);
798 if (__CFStrHasContentsDeallocator(str
)) {
799 CFAllocatorRef contentsDeallocator
= __CFStrContentsDeallocator(str
);
800 CFAllocatorDeallocate(contentsDeallocator
, contents
);
801 CFRelease(contentsDeallocator
);
803 CFAllocatorRef alloc
= __CFGetAllocator(str
);
804 CFAllocatorDeallocate(alloc
, contents
);
808 if (mutable && __CFStrHasContentsAllocator(str
)) CFRelease(__CFStrContentsAllocator((CFMutableStringRef
)str
));
812 static Boolean
__CFStringEqual(CFTypeRef cf1
, CFTypeRef cf2
) {
813 CFStringRef str1
= cf1
;
814 CFStringRef str2
= cf2
;
815 const uint8_t *contents1
;
816 const uint8_t *contents2
;
819 /* !!! We do not need IsString assertions, as the CFBase runtime assures this */
820 /* !!! We do not need == test, as the CFBase runtime assures this */
822 contents1
= __CFStrContents(str1
);
823 contents2
= __CFStrContents(str2
);
824 len1
= __CFStrLength2(str1
, contents1
);
826 if (len1
!= __CFStrLength2(str2
, contents2
)) return false;
828 contents1
+= __CFStrSkipAnyLengthByte(str1
);
829 contents2
+= __CFStrSkipAnyLengthByte(str2
);
831 if (__CFStrIsEightBit(str1
) && __CFStrIsEightBit(str2
)) {
832 return memcmp((const char *)contents1
, (const char *)contents2
, len1
) ? false : true;
833 } else if (__CFStrIsEightBit(str1
)) { /* One string has Unicode contents */
834 CFStringInlineBuffer buf
;
837 CFStringInitInlineBuffer(str1
, &buf
, CFRangeMake(0, len1
));
838 for (buf_idx
= 0; buf_idx
< len1
; buf_idx
++) {
839 if (__CFStringGetCharacterFromInlineBufferQuick(&buf
, buf_idx
) != ((UniChar
*)contents2
)[buf_idx
]) return false;
841 } else if (__CFStrIsEightBit(str2
)) { /* One string has Unicode contents */
842 CFStringInlineBuffer buf
;
845 CFStringInitInlineBuffer(str2
, &buf
, CFRangeMake(0, len1
));
846 for (buf_idx
= 0; buf_idx
< len1
; buf_idx
++) {
847 if (__CFStringGetCharacterFromInlineBufferQuick(&buf
, buf_idx
) != ((UniChar
*)contents1
)[buf_idx
]) return false;
849 } else { /* Both strings have Unicode contents */
851 for (idx
= 0; idx
< len1
; idx
++) {
852 if (((UniChar
*)contents1
)[idx
] != ((UniChar
*)contents2
)[idx
]) return false;
859 /* String hashing: Should give the same results whatever the encoding; so we hash UniChars.
860 If the length is less than or equal to 16, then the hash function is simply the
861 following (n is the nth UniChar character, starting from 0):
864 hash(n) = hash(n-1) * 257 + unichar(n);
865 Hash = hash(length-1) * ((length & 31) + 1)
867 If the length is greater than 16, then the above algorithm applies to
868 characters 0..7 and length-8..length-1; thus the first and last 8 characters.
870 CFHashCode
__CFStringHash(CFTypeRef cf
) {
871 CFStringRef str
= cf
;
872 const uint8_t *contents
;
877 /* !!! We do not need an IsString assertion here, as this is called by the CFBase runtime only */
879 contents
= __CFStrContents(str
);
880 len
= __CFStrLength2(str
, contents
);
882 if (__CFStrIsEightBit(str
)) {
883 contents
+= __CFStrSkipAnyLengthByte(str
);
885 for (cnt
= 0; cnt
< len
; cnt
++) result
= result
* 257 + __CFCharToUniCharTable
[contents
[cnt
]];
887 for (cnt
= 0; cnt
< 8; cnt
++) result
= result
* 257 + __CFCharToUniCharTable
[contents
[cnt
]];
888 for (cnt
= len
- 8; cnt
< len
; cnt
++) result
= result
* 257 + __CFCharToUniCharTable
[contents
[cnt
]];
891 if (!__CFCharToUniCharFunc
) { // A little sanity verification: If this is not set, trying to hash high byte chars would be a bad idea
894 for (cnt
= 0; cnt
< len
; cnt
++) if (contents
[cnt
] >= 128) err
= true;
896 for (cnt
= 0; cnt
< 8; cnt
++) if (contents
[cnt
] >= 128) err
= true;
897 for (cnt
= len
- 8; cnt
< len
; cnt
++) if (contents
[cnt
] >= 128) err
= true;
900 // Can't do log here, as it might be too early
901 printf("Warning: CFHash() attempting to hash CFString containing high bytes before properly initialized to do so\n");
906 const UniChar
*uContents
= (UniChar
*)contents
;
908 for (cnt
= 0; cnt
< len
; cnt
++) result
= result
* 257 + uContents
[cnt
];
910 for (cnt
= 0; cnt
< 8; cnt
++) result
= result
* 257 + uContents
[cnt
];
911 for (cnt
= len
- 8; cnt
< len
; cnt
++) result
= result
* 257 + uContents
[cnt
];
914 result
+= (result
<< (len
& 31));
919 static CFStringRef
__CFStringCopyDescription(CFTypeRef cf
) {
920 return CFStringCreateWithFormat(kCFAllocatorDefault
, NULL
, CFSTR("<CFString %p [%p]>{contents = \"%@\"}"), cf
, __CFGetAllocator(cf
), cf
);
923 static CFStringRef
__CFStringCopyFormattingDescription(CFTypeRef cf
, CFDictionaryRef formatOptions
) {
924 return CFStringCreateCopy(__CFGetAllocator(cf
), cf
);
927 static CFTypeID __kCFStringTypeID
= _kCFRuntimeNotATypeID
;
929 static const CFRuntimeClass __CFStringClass
= {
933 (void *)CFStringCreateCopy
,
934 __CFStringDeallocate
,
937 __CFStringCopyFormattingDescription
,
938 __CFStringCopyDescription
941 __private_extern__
void __CFStringInitialize(void) {
942 __kCFStringTypeID
= _CFRuntimeRegisterClass(&__CFStringClass
);
945 CFTypeID
CFStringGetTypeID(void) {
946 return __kCFStringTypeID
;
950 static Boolean
CFStrIsUnicode(CFStringRef str
) {
951 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID
, Boolean
, str
, "_encodingCantBeStoredInEightBitCFString");
952 return __CFStrIsUnicode(str
);
957 #define ALLOCATORSFREEFUNC ((void *)-1)
959 /* contentsDeallocator indicates how to free the data if it's noCopy == true:
960 kCFAllocatorNull: don't free
961 ALLOCATORSFREEFUNC: free with main allocator's free func (don't pass in the real func ptr here)
962 NULL: default allocator
963 otherwise it's the allocator that should be used (it will be explicitly stored)
964 if noCopy == false, then freeFunc should be ALLOCATORSFREEFUNC
965 hasLengthByte, hasNullByte: refers to bytes; used only if encoding != Unicode
966 possiblyExternalFormat indicates that the bytes might have BOM and be swapped
967 tryToReduceUnicode means that the Unicode should be checked to see if it contains just ASCII (and reduce it if so)
968 numBytes contains the actual number of bytes in "bytes", including Length byte,
969 BUT not the NULL byte at the end
970 bytes should not contain BOM characters
971 !!! Various flags should be combined to reduce number of arguments, if possible
973 __private_extern__ CFStringRef
__CFStringCreateImmutableFunnel3(
974 CFAllocatorRef alloc
, const void *bytes
, CFIndex numBytes
, CFStringEncoding encoding
,
975 Boolean possiblyExternalFormat
, Boolean tryToReduceUnicode
, Boolean hasLengthByte
, Boolean hasNullByte
, Boolean noCopy
,
976 CFAllocatorRef contentsDeallocator
, UInt32 converterFlags
) {
978 CFMutableStringRef str
;
979 CFVarWidthCharBuffer vBuf
;
981 Boolean useLengthByte
= false;
982 Boolean useNullByte
= false;
983 Boolean useInlineData
= false;
985 if (alloc
== NULL
) alloc
= __CFGetDefaultAllocator();
987 if (contentsDeallocator
== ALLOCATORSFREEFUNC
) {
988 contentsDeallocator
= alloc
;
989 } else if (contentsDeallocator
== NULL
) {
990 contentsDeallocator
= __CFGetDefaultAllocator();
993 if ((NULL
!= kCFEmptyString
) && (numBytes
== 0) && (alloc
== kCFAllocatorSystemDefault
)) { // If we are using the system default allocator, and the string is empty, then use the empty string!
994 if (noCopy
&& (contentsDeallocator
!= kCFAllocatorNull
)) { // See 2365208... This change was done after Sonata; before we didn't free the bytes at all (leak).
995 CFAllocatorDeallocate(contentsDeallocator
, (void *)bytes
);
997 return CFRetain(kCFEmptyString
); // Quick exit; won't catch all empty strings, but most
1000 // At this point, contentsDeallocator is either same as alloc, or kCFAllocatorNull, or something else, but not NULL
1002 vBuf
.shouldFreeChars
= false; // We use this to remember to free the buffer possibly allocated by decode
1004 // First check to see if the data needs to be converted...
1005 // ??? We could be more efficient here and in some cases (Unicode data) eliminate a copy
1007 if ((encoding
== kCFStringEncodingUnicode
&& possiblyExternalFormat
) || (encoding
!= kCFStringEncodingUnicode
&& !__CFCanUseEightBitCFStringForBytes(bytes
, numBytes
, encoding
))) {
1008 const void *realBytes
= (uint8_t*) bytes
+ (hasLengthByte
? 1 : 0);
1009 CFIndex realNumBytes
= numBytes
- (hasLengthByte
? 1 : 0);
1010 Boolean usingPassedInMemory
= false;
1012 vBuf
.allocator
= __CFGetDefaultAllocator(); // We don't want to use client's allocator for temp stuff
1013 vBuf
.chars
.unicode
= NULL
; // This will cause the decode function to allocate memory if necessary
1015 if (!__CFStringDecodeByteStream3(realBytes
, realNumBytes
, encoding
, false, &vBuf
, &usingPassedInMemory
, converterFlags
)) {
1016 return NULL
; // !!! Is this acceptable failure mode?
1019 encoding
= vBuf
.isASCII
? kCFStringEncodingASCII
: kCFStringEncodingUnicode
;
1021 if (!usingPassedInMemory
) {
1023 // Make the parameters fit the new situation
1024 numBytes
= vBuf
.isASCII
? vBuf
.numChars
: (vBuf
.numChars
* sizeof(UniChar
));
1025 hasLengthByte
= hasNullByte
= false;
1027 // Get rid of the original buffer if its not being used
1028 if (noCopy
&& contentsDeallocator
!= kCFAllocatorNull
) {
1029 CFAllocatorDeallocate(contentsDeallocator
, (void *)bytes
);
1031 contentsDeallocator
= alloc
; // At this point we are using the string's allocator, as the original buffer is gone...
1033 // See if we can reuse any storage the decode func might have allocated
1034 // We do this only for Unicode, as otherwise we would not have NULL and Length bytes
1036 if (vBuf
.shouldFreeChars
&& (alloc
== vBuf
.allocator
) && encoding
== kCFStringEncodingUnicode
) {
1037 vBuf
.shouldFreeChars
= false; // Transferring ownership to the CFString
1038 bytes
= CFAllocatorReallocate(vBuf
.allocator
, (void *)vBuf
.chars
.unicode
, numBytes
, 0); // Tighten up the storage
1041 bytes
= vBuf
.chars
.unicode
;
1042 noCopy
= false; // Can't do noCopy anymore
1043 // If vBuf.shouldFreeChars is true, the buffer will be freed as intended near the end of this func
1048 // At this point, all necessary input arguments have been changed to reflect the new state
1050 } else if (encoding
== kCFStringEncodingUnicode
&& tryToReduceUnicode
) { // Check to see if we can reduce Unicode to ASCII
1052 CFIndex len
= numBytes
/ sizeof(UniChar
);
1053 Boolean allASCII
= true;
1055 for (cnt
= 0; cnt
< len
; cnt
++) if (((const UniChar
*)bytes
)[cnt
] > 127) {
1060 if (allASCII
) { // Yes we can!
1062 hasLengthByte
= __CFCanUseLengthByte(len
);
1064 numBytes
= (len
+ 1 + (hasLengthByte
? 1 : 0)) * sizeof(uint8_t); // NULL and possible length byte
1065 // See if we can use that temporary local buffer in vBuf...
1066 mem
= ptr
= (uint8_t *)((numBytes
>= __kCFVarWidthLocalBufferSize
) ? CFAllocatorAllocate(alloc
, numBytes
, 0) : vBuf
.localBuffer
);
1067 if (mem
!= vBuf
.localBuffer
&& __CFOASafe
) __CFSetLastAllocationEventName(mem
, "CFString (store)");
1068 if (hasLengthByte
) *ptr
++ = len
;
1069 for (cnt
= 0; cnt
< len
; cnt
++) ptr
[cnt
] = ((const UniChar
*)bytes
)[cnt
];
1071 if (noCopy
&& contentsDeallocator
!= kCFAllocatorNull
) {
1072 CFAllocatorDeallocate(contentsDeallocator
, (void *)bytes
);
1075 encoding
= kCFStringEncodingASCII
;
1076 contentsDeallocator
= alloc
; // At this point we are using the string's allocator, as the original buffer is gone...
1077 noCopy
= (numBytes
>= __kCFVarWidthLocalBufferSize
); // If we had to allocate it, make sure it's kept around
1078 numBytes
--; // Should not contain the NULL byte at end...
1081 // At this point, all necessary input arguments have been changed to reflect the new state
1084 // Now determine the necessary size
1088 size
= sizeof(void *); // Pointer to the buffer
1089 if (contentsDeallocator
!= alloc
&& contentsDeallocator
!= kCFAllocatorNull
) {
1090 size
+= sizeof(void *); // The contentsDeallocator
1092 if (!hasLengthByte
) size
+= sizeof(SInt32
); // Explicit length
1093 useLengthByte
= hasLengthByte
;
1094 useNullByte
= hasNullByte
;
1096 } else { // Inline data; reserve space for it
1098 useInlineData
= true;
1101 if (hasLengthByte
|| (encoding
!= kCFStringEncodingUnicode
&& __CFCanUseLengthByte(numBytes
))) {
1102 useLengthByte
= true;
1103 if (!hasLengthByte
) size
+= 1;
1105 size
+= sizeof(SInt32
); // Explicit length
1107 if (hasNullByte
|| encoding
!= kCFStringEncodingUnicode
) {
1113 // Finally, allocate!
1115 str
= (CFMutableStringRef
)_CFRuntimeCreateInstance(alloc
, __kCFStringTypeID
, size
, NULL
);
1116 if (__CFOASafe
) __CFSetLastAllocationEventName(str
, "CFString (immutable)");
1118 __CFStrSetInfoBits(str
,
1119 (useInlineData
? __kCFHasInlineData
: (contentsDeallocator
== alloc
? __kCFHasExternalDataDefaultFree
: (contentsDeallocator
== kCFAllocatorNull
? __kCFHasExternalDataNoFree
: __kCFHasExternalDataCustomFree
))) |
1120 ((encoding
== kCFStringEncodingUnicode
) ? __kCFIsUnicode
: 0) |
1121 (useNullByte
? __kCFHasNullByte
: 0) |
1122 (useLengthByte
? __kCFHasLengthByte
: 0));
1124 if (!useLengthByte
) {
1125 CFIndex length
= numBytes
- (hasLengthByte
? 1 : 0);
1126 if (encoding
== kCFStringEncodingUnicode
) length
/= sizeof(UniChar
);
1127 __CFStrSetExplicitLength(str
, length
);
1130 if (useInlineData
) {
1131 uint8_t *contents
= (uint8_t *)__CFStrContents(str
);
1132 if (useLengthByte
&& !hasLengthByte
) *contents
++ = numBytes
;
1133 memmove(contents
, bytes
, numBytes
);
1134 if (useNullByte
) contents
[numBytes
] = 0;
1136 __CFStrSetContentPtr(str
, bytes
);
1137 if (contentsDeallocator
!= alloc
&& contentsDeallocator
!= kCFAllocatorNull
) __CFStrSetContentsDeallocator(str
, CFRetain(contentsDeallocator
));
1139 if (vBuf
.shouldFreeChars
) CFAllocatorDeallocate(vBuf
.allocator
, (void *)bytes
);
1144 /* !!! __CFStringCreateImmutableFunnel2() is kept around for compatibility; it should be deprecated
1146 CFStringRef
__CFStringCreateImmutableFunnel2(
1147 CFAllocatorRef alloc
, const void *bytes
, CFIndex numBytes
, CFStringEncoding encoding
,
1148 Boolean possiblyExternalFormat
, Boolean tryToReduceUnicode
, Boolean hasLengthByte
, Boolean hasNullByte
, Boolean noCopy
,
1149 CFAllocatorRef contentsDeallocator
) {
1150 return __CFStringCreateImmutableFunnel3(alloc
, bytes
, numBytes
, encoding
, possiblyExternalFormat
, tryToReduceUnicode
, hasLengthByte
, hasNullByte
, noCopy
, contentsDeallocator
, 0);
1155 CFStringRef
CFStringCreateWithPascalString(CFAllocatorRef alloc
, ConstStringPtr pStr
, CFStringEncoding encoding
) {
1156 CFIndex len
= (CFIndex
)(*(uint8_t *)pStr
);
1157 return __CFStringCreateImmutableFunnel3(alloc
, pStr
, len
+1, encoding
, false, false, true, false, false, ALLOCATORSFREEFUNC
, 0);
1161 CFStringRef
CFStringCreateWithCString(CFAllocatorRef alloc
, const char *cStr
, CFStringEncoding encoding
) {
1162 CFIndex len
= strlen(cStr
);
1163 return __CFStringCreateImmutableFunnel3(alloc
, cStr
, len
, encoding
, false, false, false, true, false, ALLOCATORSFREEFUNC
, 0);
1166 CFStringRef
CFStringCreateWithPascalStringNoCopy(CFAllocatorRef alloc
, ConstStringPtr pStr
, CFStringEncoding encoding
, CFAllocatorRef contentsDeallocator
) {
1167 CFIndex len
= (CFIndex
)(*(uint8_t *)pStr
);
1168 return __CFStringCreateImmutableFunnel3(alloc
, pStr
, len
+1, encoding
, false, false, true, false, true, contentsDeallocator
, 0);
1172 CFStringRef
CFStringCreateWithCStringNoCopy(CFAllocatorRef alloc
, const char *cStr
, CFStringEncoding encoding
, CFAllocatorRef contentsDeallocator
) {
1173 CFIndex len
= strlen(cStr
);
1174 return __CFStringCreateImmutableFunnel3(alloc
, cStr
, len
, encoding
, false, false, false, true, true, contentsDeallocator
, 0);
1178 CFStringRef
CFStringCreateWithCharacters(CFAllocatorRef alloc
, const UniChar
*chars
, CFIndex numChars
) {
1179 return __CFStringCreateImmutableFunnel3(alloc
, chars
, numChars
* sizeof(UniChar
), kCFStringEncodingUnicode
, false, true, false, false, false, ALLOCATORSFREEFUNC
, 0);
1183 CFStringRef
CFStringCreateWithCharactersNoCopy(CFAllocatorRef alloc
, const UniChar
*chars
, CFIndex numChars
, CFAllocatorRef contentsDeallocator
) {
1184 return __CFStringCreateImmutableFunnel3(alloc
, chars
, numChars
* sizeof(UniChar
), kCFStringEncodingUnicode
, false, false, false, false, true, contentsDeallocator
, 0);
1188 CFStringRef
CFStringCreateWithBytes(CFAllocatorRef alloc
, const uint8_t *bytes
, CFIndex numBytes
, CFStringEncoding encoding
, Boolean externalFormat
) {
1189 return __CFStringCreateImmutableFunnel3(alloc
, bytes
, numBytes
, encoding
, externalFormat
, true, false, false, false, ALLOCATORSFREEFUNC
, 0);
1192 CFStringRef
_CFStringCreateWithBytesNoCopy(CFAllocatorRef alloc
, const uint8_t *bytes
, CFIndex numBytes
, CFStringEncoding encoding
, Boolean externalFormat
, CFAllocatorRef contentsDeallocator
) {
1193 return __CFStringCreateImmutableFunnel3(alloc
, bytes
, numBytes
, encoding
, externalFormat
, true, false, false, true, contentsDeallocator
, 0);
1196 CFStringRef
CFStringCreateWithFormatAndArguments(CFAllocatorRef alloc
, CFDictionaryRef formatOptions
, CFStringRef format
, va_list arguments
) {
1197 return _CFStringCreateWithFormatAndArgumentsAux(alloc
, NULL
, formatOptions
, format
, arguments
);
1200 CFStringRef
_CFStringCreateWithFormatAndArgumentsAux(CFAllocatorRef alloc
, CFStringRef (*copyDescFunc
)(void *, CFDictionaryRef
), CFDictionaryRef formatOptions
, CFStringRef format
, va_list arguments
) {
1202 CFMutableStringRef outputString
= CFStringCreateMutable(__CFGetDefaultAllocator(), 0); //should use alloc if no copy/release
1203 __CFStrSetDesiredCapacity(outputString
, 120); // Given this will be tightened later, choosing a larger working string is fine
1204 _CFStringAppendFormatAndArgumentsAux(outputString
, copyDescFunc
, formatOptions
, format
, arguments
);
1205 // ??? copy/release should not be necessary here -- just make immutable, compress if possible
1206 // (However, this does make the string inline, and cause the supplied allocator to be used...)
1207 str
= CFStringCreateCopy(alloc
, outputString
);
1208 CFRelease(outputString
);
1212 CFStringRef
CFStringCreateWithFormat(CFAllocatorRef alloc
, CFDictionaryRef formatOptions
, CFStringRef format
, ...) {
1216 va_start(argList
, format
);
1217 result
= CFStringCreateWithFormatAndArguments(alloc
, formatOptions
, format
, argList
);
1224 CFStringRef
CFStringCreateWithSubstring(CFAllocatorRef alloc
, CFStringRef str
, CFRange range
) {
1225 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID
, CFStringRef
, str
, "_createSubstringWithRange:", CFRangeMake(range
.location
, range
.length
));
1227 __CFAssertIsString(str
);
1228 __CFAssertRangeIsInStringBounds(str
, range
.location
, range
.length
);
1230 if ((range
.location
== 0) && (range
.length
== __CFStrLength(str
))) { /* The substring is the whole string... */
1231 return CFStringCreateCopy(alloc
, str
);
1232 } else if (__CFStrIsEightBit(str
)) {
1233 const uint8_t *contents
= __CFStrContents(str
);
1234 return __CFStringCreateImmutableFunnel3(alloc
, contents
+ range
.location
+ __CFStrSkipAnyLengthByte(str
), range
.length
, __CFStringGetEightBitStringEncoding(), false, false, false, false, false, ALLOCATORSFREEFUNC
, 0);
1236 const UniChar
*contents
= __CFStrContents(str
);
1237 return __CFStringCreateImmutableFunnel3(alloc
, contents
+ range
.location
, range
.length
* sizeof(UniChar
), kCFStringEncodingUnicode
, false, true, false, false, false, ALLOCATORSFREEFUNC
, 0);
1241 CFStringRef
CFStringCreateCopy(CFAllocatorRef alloc
, CFStringRef str
) {
1242 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID
, CFStringRef
, str
, "copy");
1244 __CFAssertIsString(str
);
1245 if (!__CFStrIsMutable(str
) && // If the string is not mutable
1246 ((alloc
? alloc
: __CFGetDefaultAllocator()) == __CFGetAllocator(str
)) && // and it has the same allocator as the one we're using
1247 (__CFStrIsInline(str
) || __CFStrFreeContentsWhenDone(str
) || str
->base
._rc
== 0)) { // and the characters are inline, or are owned by the string, or the string is constant
1248 CFRetain(str
); // Then just retain instead of making a true copy
1251 if (__CFStrIsEightBit(str
)) {
1252 const uint8_t *contents
= __CFStrContents(str
);
1253 return __CFStringCreateImmutableFunnel3(alloc
, contents
+ __CFStrSkipAnyLengthByte(str
), __CFStrLength2(str
, contents
), __CFStringGetEightBitStringEncoding(), false, false, false, false, false, ALLOCATORSFREEFUNC
, 0);
1255 const UniChar
*contents
= __CFStrContents(str
);
1256 return __CFStringCreateImmutableFunnel3(alloc
, contents
, __CFStrLength2(str
, contents
) * sizeof(UniChar
), kCFStringEncodingUnicode
, false, true, false, false, false, ALLOCATORSFREEFUNC
, 0);
1262 /*** Constant string stuff... ***/
1264 static CFMutableDictionaryRef constantStringTable
= NULL
;
1266 /* For now we call a function to create a constant string and keep previously created constant strings in a dictionary. The keys are the 8-bit constant C-strings from the compiler; the values are the CFStrings created for them.
1269 static CFStringRef
__cStrCopyDescription(const void *ptr
) {
1270 return CFStringCreateWithCStringNoCopy(NULL
, (const char *)ptr
, __CFStringGetEightBitStringEncoding(), kCFAllocatorNull
);
1273 static Boolean
__cStrEqual(const void *ptr1
, const void *ptr2
) {
1274 return (strcmp((const char *)ptr1
, (const char *)ptr2
) == 0);
1277 static CFHashCode
__cStrHash(const void *ptr
) {
1278 // It doesn't quite matter if we convert to Unicode correctly, as long as we do it consistently
1279 const unsigned char *cStr
= (const unsigned char *)ptr
;
1280 CFIndex len
= strlen(cStr
);
1281 CFHashCode result
= 0;
1282 if (len
<= 4) { // All chars
1284 while (cnt
--) result
+= (result
<< 8) + *cStr
++;
1285 } else { // First and last 2 chars
1286 result
+= (result
<< 8) + cStr
[0];
1287 result
+= (result
<< 8) + cStr
[1];
1288 result
+= (result
<< 8) + cStr
[len
-2];
1289 result
+= (result
<< 8) + cStr
[len
-1];
1291 result
+= (result
<< (len
& 31));
1296 /* We use a special allocator (which simply calls through to the default) for constant strings so that we can catch them being freed...
1298 static void *csRealloc(void *oPtr
, CFIndex size
, CFOptionFlags hint
, void *info
) {
1299 return CFAllocatorReallocate(NULL
, oPtr
, size
, hint
);
1302 static void *csAlloc(CFIndex size
, CFOptionFlags hint
, void *info
) {
1303 return CFAllocatorAllocate(NULL
, size
, hint
);
1306 static void csDealloc(void *ptr
, void *info
) {
1307 CFAllocatorDeallocate(NULL
, ptr
);
1310 static CFStringRef
csCopyDescription(const void *info
) {
1311 return CFRetain(CFSTR("Debug allocator for CFSTRs"));
1315 static CFSpinLock_t _CFSTRLock
= 0;
1317 CFStringRef
__CFStringMakeConstantString(const char *cStr
) {
1319 if (constantStringTable
== NULL
) {
1320 CFDictionaryKeyCallBacks constantStringCallBacks
= {0, NULL
, NULL
, __cStrCopyDescription
, __cStrEqual
, __cStrHash
};
1321 constantStringTable
= CFDictionaryCreateMutable(NULL
, 0, &constantStringCallBacks
, &kCFTypeDictionaryValueCallBacks
);
1322 _CFDictionarySetCapacity(constantStringTable
, 2500); // avoid lots of rehashing
1325 CFAllocatorContext context
= {0, NULL
, NULL
, NULL
, csCopyDescription
, csAlloc
, csRealloc
, csDealloc
, NULL
};
1326 constantStringAllocatorForDebugging
= CFAllocatorCreate(NULL
, &context
);
1329 #define constantStringAllocatorForDebugging NULL
1333 __CFSpinLock(&_CFSTRLock
);
1334 if ((result
= (CFStringRef
)CFDictionaryGetValue(constantStringTable
, cStr
))) {
1335 __CFSpinUnlock(&_CFSTRLock
);
1337 __CFSpinUnlock(&_CFSTRLock
);
1341 // This #if treats non-7 bit chars in CFSTR() as MacOSRoman, for backward compatibility
1343 Boolean isASCII
= true;
1344 //#warning Ali: Code to verify CFSTRs active, should be disabled before ship
1345 const unsigned char *tmp
= cStr
;
1353 if (isASCII
) result
= CFStringCreateWithCString(constantStringAllocatorForDebugging
, cStr
, kCFStringEncodingASCII
);
1354 if (result
== NULL
) {
1356 result
= CFStringCreateWithCString(constantStringAllocatorForDebugging
, cStr
, kCFStringEncodingUTF8
);
1357 if (result
== NULL
) {
1358 result
= CFStringCreateWithCString(constantStringAllocatorForDebugging
, cStr
, kCFStringEncodingMacRoman
);
1359 if (result
== NULL
) {
1360 log
= "that are not UTF-8, crashing";
1362 log
= "that are not UTF-8; treating as Mac OS Roman for now. FIX THIS!";
1365 log
= "that seem to be UTF-8; please VERIFY";
1368 const unsigned char *tmp
= cStr
;
1369 CFMutableStringRef ms
= CFStringCreateMutable(NULL
, 0);
1371 CFStringAppendFormat(ms
, NULL
, (*tmp
> 127) ? CFSTR("\\%3o") : CFSTR("%1c"), *tmp
);
1374 CFLog(__kCFLogAssertion
, CFSTR("CFSTR(\"%@\") has non-7 bit chars %s"), ms
, log
);
1376 if (result
== NULL
) HALT
;
1380 // This #else treats non-7 bit chars in CFSTR() as UTF8 first, and if that doesn't work, as MacOSRoman, for compatibility
1382 Boolean isASCII
= true;
1383 //#warning Ali: Code to verify CFSTRs active, should be disabled before ship
1384 const unsigned char *tmp
= cStr
;
1392 CFMutableStringRef ms
= CFStringCreateMutable(NULL
, 0);
1395 CFStringAppendFormat(ms
, NULL
, (*tmp
> 127) ? CFSTR("\\%3o") : CFSTR("%1c"), *tmp
);
1398 CFLog(0, CFSTR("WARNING: CFSTR(\"%@\") has non-7 bit chars, interpreting using MacOS Roman encoding for now, but this will change. Please eliminate usages of non-7 bit chars (including escaped characters above \\177 octal) in CFSTR()."), ms
);
1401 result
= CFStringCreateWithCString(constantStringAllocatorForDebugging
, cStr
, kCFStringEncodingMacRoman
);
1402 if (result
== NULL
) {
1403 CFLog(__kCFLogAssertion
, CFSTR("Can't interpret CFSTR() as MacOS Roman, crashing"));
1407 if (__CFOASafe
) __CFSetLastAllocationEventName((void *)result
, "CFString (CFSTR)");
1408 if (__CFStrIsEightBit(result
)) {
1409 key
= (char *)__CFStrContents(result
) + __CFStrSkipAnyLengthByte(result
);
1410 } else { // For some reason the string is not 8-bit!
1411 key
= CFAllocatorAllocate(NULL
, strlen(cStr
) + 1, 0);
1412 if (__CFOASafe
) __CFSetLastAllocationEventName((void *)key
, "CFString (CFSTR key)");
1413 strcpy(key
, cStr
); // !!! We will leak this, if the string is removed from the table (or table is freed)
1418 CFStringRef resultToBeReleased
= result
;
1421 __CFSpinLock(&_CFSTRLock
);
1422 count
= CFDictionaryGetCount(constantStringTable
);
1423 CFDictionaryAddValue(constantStringTable
, key
, result
);
1424 if (CFDictionaryGetCount(constantStringTable
) == count
) { // add did nothing, someone already put it there
1425 result
= (CFStringRef
)CFDictionaryGetValue(constantStringTable
, key
);
1427 __CFSpinUnlock(&_CFSTRLock
);
1429 // Can't release this in the DEBUG case; will get assertion failure
1430 CFRelease(resultToBeReleased
);
1438 #if defined(__MACOS8__) || defined(__WIN32__)
1440 void __CFStringCleanup (void) {
1441 /* in case library is unloaded, release store for the constant string table */
1442 if (constantStringTable
!= NULL
) {
1444 __CFConstantStringTableBeingFreed
= true;
1445 CFRelease(constantStringTable
);
1446 __CFConstantStringTableBeingFreed
= false;
1448 CFRelease(constantStringTable
);
1452 CFAllocatorDeallocate( constantStringAllocatorForDebugging
, (void*) constantStringAllocatorForDebugging
);
1459 // Can pass in NSString as replacement string
1460 // Call with numRanges > 0, and incrementing ranges
1462 static void __CFStringReplaceMultiple(CFMutableStringRef str
, CFRange
*ranges
, CFIndex numRanges
, CFStringRef replacement
) {
1464 CFIndex replacementLength
= CFStringGetLength(replacement
);
1466 __CFStringChangeSizeMultiple(str
, ranges
, numRanges
, replacementLength
, (replacementLength
> 0) && CFStrIsUnicode(replacement
));
1468 if (__CFStrIsUnicode(str
)) {
1469 UniChar
*contents
= (UniChar
*)__CFStrContents(str
);
1470 UniChar
*firstReplacement
= contents
+ ranges
[0].location
;
1471 // Extract the replacementString into the first location, then copy from there
1472 CFStringGetCharacters(replacement
, CFRangeMake(0, replacementLength
), firstReplacement
);
1473 for (cnt
= 1; cnt
< numRanges
; cnt
++) {
1474 // The ranges are in terms of the original string; so offset by the change in length due to insertion
1475 contents
+= replacementLength
- ranges
[cnt
- 1].length
;
1476 memmove(contents
+ ranges
[cnt
].location
, firstReplacement
, replacementLength
* sizeof(UniChar
));
1479 uint8_t *contents
= (uint8_t *)__CFStrContents(str
);
1480 uint8_t *firstReplacement
= contents
+ ranges
[0].location
+ __CFStrSkipAnyLengthByte(str
);
1481 // Extract the replacementString into the first location, then copy from there
1482 CFStringGetBytes(replacement
, CFRangeMake(0, replacementLength
), __CFStringGetEightBitStringEncoding(), 0, false, firstReplacement
, replacementLength
, NULL
);
1483 contents
+= __CFStrSkipAnyLengthByte(str
); // Now contents will simply track the location to insert next string into
1484 for (cnt
= 1; cnt
< numRanges
; cnt
++) {
1485 // The ranges are in terms of the original string; so offset by the change in length due to insertion
1486 contents
+= replacementLength
- ranges
[cnt
- 1].length
;
1487 memmove(contents
+ ranges
[cnt
].location
, firstReplacement
, replacementLength
);
1492 // Can pass in NSString as replacement string
1494 static void __CFStringReplace(CFMutableStringRef str
, CFRange range
, CFStringRef replacement
) {
1495 CFIndex replacementLength
= CFStringGetLength(replacement
);
1497 __CFStringChangeSize(str
, range
, replacementLength
, (replacementLength
> 0) && CFStrIsUnicode(replacement
));
1499 if (__CFStrIsUnicode(str
)) {
1500 UniChar
*contents
= (UniChar
*)__CFStrContents(str
);
1501 CFStringGetCharacters(replacement
, CFRangeMake(0, replacementLength
), contents
+ range
.location
);
1503 uint8_t *contents
= (uint8_t *)__CFStrContents(str
);
1504 CFStringGetBytes(replacement
, CFRangeMake(0, replacementLength
), __CFStringGetEightBitStringEncoding(), 0, false, contents
+ range
.location
+ __CFStrSkipAnyLengthByte(str
), replacementLength
, NULL
);
1508 /* If client does not provide a minimum capacity
1510 #define DEFAULTMINCAPACITY 32
1512 CF_INLINE CFMutableStringRef
__CFStringCreateMutableFunnel(CFAllocatorRef alloc
, CFIndex maxLength
, UInt32 additionalInfoBits
) {
1513 CFMutableStringRef str
;
1514 Boolean hasExternalContentsAllocator
= (additionalInfoBits
& __kCFHasContentsAllocator
) ? true : false;
1516 if (alloc
== NULL
) alloc
= __CFGetDefaultAllocator();
1518 // Note that if there is an externalContentsAllocator, then we also have the storage for the string allocator...
1519 str
= (CFMutableStringRef
)_CFRuntimeCreateInstance(alloc
, __kCFStringTypeID
, sizeof(void *) + sizeof(UInt32
) * 3 + (hasExternalContentsAllocator
? sizeof(CFAllocatorRef
) : 0), NULL
);
1521 if (__CFOASafe
) __CFSetLastAllocationEventName(str
, "CFString (mutable)");
1523 __CFStrSetInfoBits(str
, __kCFIsMutable
| additionalInfoBits
);
1524 str
->variants
.externalMutable
.buffer
= NULL
;
1525 __CFStrSetExplicitLength(str
, 0);
1526 str
->variants
.externalMutable
.gapEtc
= 0;
1527 if (maxLength
!= 0) __CFStrSetIsFixed(str
);
1528 __CFStrSetDesiredCapacity(str
, (maxLength
== 0) ? DEFAULTMINCAPACITY
: maxLength
);
1529 __CFStrSetCapacity(str
, 0);
1534 CFMutableStringRef
CFStringCreateMutableWithExternalCharactersNoCopy(CFAllocatorRef alloc
, UniChar
*chars
, CFIndex numChars
, CFIndex capacity
, CFAllocatorRef externalCharactersAllocator
) {
1535 CFOptionFlags contentsAllocationBits
= externalCharactersAllocator
? ((externalCharactersAllocator
== kCFAllocatorNull
) ? __kCFHasExternalDataNoFree
: __kCFHasContentsAllocator
) : __kCFHasExternalDataDefaultFree
;
1536 CFMutableStringRef string
= __CFStringCreateMutableFunnel(alloc
, 0, contentsAllocationBits
| __kCFIsExternalMutable
| __kCFIsUnicode
);
1538 if (contentsAllocationBits
== __kCFHasContentsAllocator
) __CFStrSetContentsAllocator(string
, CFRetain(externalCharactersAllocator
));
1539 CFStringSetExternalCharactersNoCopy(string
, chars
, numChars
, capacity
);
1544 CFMutableStringRef
CFStringCreateMutable(CFAllocatorRef alloc
, CFIndex maxLength
) {
1545 return __CFStringCreateMutableFunnel(alloc
, maxLength
, __kCFHasExternalDataDefaultFree
);
1548 CFMutableStringRef
CFStringCreateMutableCopy(CFAllocatorRef alloc
, CFIndex maxLength
, CFStringRef string
) {
1549 CFMutableStringRef newString
;
1551 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID
, CFMutableStringRef
, string
, "mutableCopy");
1553 __CFAssertIsString(string
);
1555 newString
= CFStringCreateMutable(alloc
, maxLength
);
1556 __CFStringReplace(newString
, CFRangeMake(0, 0), string
);
1562 __private_extern__
void _CFStrSetDesiredCapacity(CFMutableStringRef str
, CFIndex len
) {
1563 __CFAssertIsStringAndMutable(str
);
1564 __CFStrSetDesiredCapacity(str
, len
);
1568 /* This one is for CF
1570 CFIndex
CFStringGetLength(CFStringRef str
) {
1571 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID
, CFIndex
, str
, "length");
1573 __CFAssertIsString(str
);
1574 return __CFStrLength(str
);
1577 /* This one is for NSCFString; it does not ObjC dispatch or assertion check
1579 CFIndex
_CFStringGetLength2(CFStringRef str
) {
1580 return __CFStrLength(str
);
1584 /* Guts of CFStringGetCharacterAtIndex(); called from the two functions below. Don't call it from elsewhere.
1586 CF_INLINE UniChar
__CFStringGetCharacterAtIndexGuts(CFStringRef str
, CFIndex idx
, const uint8_t *contents
) {
1587 if (__CFStrIsEightBit(str
)) {
1588 contents
+= __CFStrSkipAnyLengthByte(str
);
1590 if (!__CFCharToUniCharFunc
&& (contents
[idx
] >= 128)) {
1591 // Can't do log here, as it might be too early
1592 printf("Warning: CFStringGetCharacterAtIndex() attempted on CFString containing high bytes before properly initialized to do so\n");
1595 return __CFCharToUniCharTable
[contents
[idx
]];
1598 return ((UniChar
*)contents
)[idx
];
1601 /* This one is for the CF API
1603 UniChar
CFStringGetCharacterAtIndex(CFStringRef str
, CFIndex idx
) {
1604 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID
, UniChar
, str
, "characterAtIndex:", idx
);
1606 __CFAssertIsString(str
);
1607 __CFAssertIndexIsInStringBounds(str
, idx
);
1608 return __CFStringGetCharacterAtIndexGuts(str
, idx
, __CFStrContents(str
));
1611 /* This one is for NSCFString usage; it doesn't do ObjC dispatch; but it does do range check
1613 int _CFStringCheckAndGetCharacterAtIndex(CFStringRef str
, CFIndex idx
, UniChar
*ch
) {
1614 const uint8_t *contents
= __CFStrContents(str
);
1615 if (idx
>= __CFStrLength2(str
, contents
) && __CFStringNoteErrors()) return _CFStringErrBounds
;
1616 *ch
= __CFStringGetCharacterAtIndexGuts(str
, idx
, contents
);
1617 return _CFStringErrNone
;
1621 /* Guts of CFStringGetCharacters(); called from the two functions below. Don't call it from elsewhere.
1623 CF_INLINE
void __CFStringGetCharactersGuts(CFStringRef str
, CFRange range
, UniChar
*buffer
, const uint8_t *contents
) {
1624 if (__CFStrIsEightBit(str
)) {
1625 __CFStrConvertBytesToUnicode(((uint8_t *)contents
) + (range
.location
+ __CFStrSkipAnyLengthByte(str
)), buffer
, range
.length
);
1627 const UniChar
*uContents
= ((UniChar
*)contents
) + range
.location
;
1628 memmove(buffer
, uContents
, range
.length
* sizeof(UniChar
));
1632 /* This one is for the CF API
1634 void CFStringGetCharacters(CFStringRef str
, CFRange range
, UniChar
*buffer
) {
1635 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID
, void, str
, "getCharacters:range:", buffer
, CFRangeMake(range
.location
, range
.length
));
1637 __CFAssertIsString(str
);
1638 __CFAssertRangeIsInStringBounds(str
, range
.location
, range
.length
);
1639 __CFStringGetCharactersGuts(str
, range
, buffer
, __CFStrContents(str
));
1642 /* This one is for NSCFString usage; it doesn't do ObjC dispatch; but it does do range check
1644 int _CFStringCheckAndGetCharacters(CFStringRef str
, CFRange range
, UniChar
*buffer
) {
1645 const uint8_t *contents
= __CFStrContents(str
);
1646 if (range
.location
+ range
.length
> __CFStrLength2(str
, contents
) && __CFStringNoteErrors()) return _CFStringErrBounds
;
1647 __CFStringGetCharactersGuts(str
, range
, buffer
, contents
);
1648 return _CFStringErrNone
;
1652 CFIndex
CFStringGetBytes(CFStringRef str
, CFRange range
, CFStringEncoding encoding
, uint8_t lossByte
, Boolean isExternalRepresentation
, uint8_t *buffer
, CFIndex maxBufLen
, CFIndex
*usedBufLen
) {
1654 /* No objc dispatch needed here since __CFStringEncodeByteStream works with both CFString and NSString */
1655 __CFAssertIsNotNegative(maxBufLen
);
1657 if (!CF_IS_OBJC(__kCFStringTypeID
, str
)) { // If we can grope the ivars, let's do it...
1658 __CFAssertIsString(str
);
1659 __CFAssertRangeIsInStringBounds(str
, range
.location
, range
.length
);
1661 if (__CFStrIsEightBit(str
) && ((__CFStringGetEightBitStringEncoding() == encoding
) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII
&& __CFStringEncodingIsSupersetOfASCII(encoding
)))) { // Requested encoding is equal to the encoding in string
1662 const unsigned char *contents
= __CFStrContents(str
);
1663 CFIndex cLength
= range
.length
;
1666 if (cLength
> maxBufLen
) cLength
= maxBufLen
;
1667 memmove(buffer
, contents
+ __CFStrSkipAnyLengthByte(str
) + range
.location
, cLength
);
1669 if (usedBufLen
) *usedBufLen
= cLength
;
1675 return __CFStringEncodeByteStream(str
, range
.location
, range
.length
, isExternalRepresentation
, encoding
, lossByte
, buffer
, maxBufLen
, usedBufLen
);
1679 ConstStringPtr
CFStringGetPascalStringPtr (CFStringRef str
, CFStringEncoding encoding
) {
1681 if (!CF_IS_OBJC(__kCFStringTypeID
, str
)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
1682 __CFAssertIsString(str
);
1683 if (__CFStrHasLengthByte(str
) && __CFStrIsEightBit(str
) && ((__CFStringGetEightBitStringEncoding() == encoding
) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII
&& __CFStringEncodingIsSupersetOfASCII(encoding
)))) { // Requested encoding is equal to the encoding in string || the contents is in ASCII
1684 const uint8_t *contents
= __CFStrContents(str
);
1685 if (__CFStrHasExplicitLength(str
) && (__CFStrLength2(str
, contents
) != (SInt32
)(*contents
))) return NULL
; // Invalid length byte
1686 return (ConstStringPtr
)contents
;
1688 // ??? Also check for encoding = SystemEncoding and perhaps bytes are all ASCII?
1694 const char * CFStringGetCStringPtr(CFStringRef str
, CFStringEncoding encoding
) {
1696 if (encoding
!= __CFStringGetEightBitStringEncoding() && (kCFStringEncodingASCII
!= __CFStringGetEightBitStringEncoding() || !__CFStringEncodingIsSupersetOfASCII(encoding
))) return NULL
;
1697 // ??? Also check for encoding = SystemEncoding and perhaps bytes are all ASCII?
1699 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID
, const char *, str
, "_fastCStringContents:", true);
1701 __CFAssertIsString(str
);
1703 if (__CFStrHasNullByte(str
)) {
1704 return (const char *)__CFStrContents(str
) + __CFStrSkipAnyLengthByte(str
);
1711 const UniChar
*CFStringGetCharactersPtr(CFStringRef str
) {
1713 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID
, const UniChar
*, str
, "_fastCharacterContents");
1715 __CFAssertIsString(str
);
1716 if (__CFStrIsUnicode(str
)) return (const UniChar
*)__CFStrContents(str
);
1721 Boolean
CFStringGetPascalString(CFStringRef str
, Str255 buffer
, CFIndex bufferSize
, CFStringEncoding encoding
) {
1725 __CFAssertIsNotNegative(bufferSize
);
1726 if (bufferSize
< 1) return false;
1728 if (CF_IS_OBJC(__kCFStringTypeID
, str
)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
1729 length
= CFStringGetLength(str
);
1730 if (!__CFCanUseLengthByte(length
)) return false; // Can't fit into pstring
1732 const uint8_t *contents
;
1734 __CFAssertIsString(str
);
1736 contents
= __CFStrContents(str
);
1737 length
= __CFStrLength2(str
, contents
);
1739 if (!__CFCanUseLengthByte(length
)) return false; // Can't fit into pstring
1741 if (__CFStrIsEightBit(str
) && ((__CFStringGetEightBitStringEncoding() == encoding
) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII
&& __CFStringEncodingIsSupersetOfASCII(encoding
)))) { // Requested encoding is equal to the encoding in string
1742 if (length
>= bufferSize
) return false;
1743 memmove((void*)(1 + (const char*)buffer
), (__CFStrSkipAnyLengthByte(str
) + contents
), length
);
1749 if (__CFStringEncodeByteStream(str
, 0, length
, false, encoding
, false, (void*)(1 + (uint8_t*)buffer
), bufferSize
- 1, &usedLen
) != length
) {
1751 if (bufferSize
> 0) {
1752 strncpy((char *)buffer
+ 1, CONVERSIONFAILURESTR
, bufferSize
- 1);
1753 buffer
[0] = (CFIndex
)sizeof(CONVERSIONFAILURESTR
) < (bufferSize
- 1) ? (CFIndex
)sizeof(CONVERSIONFAILURESTR
) : (bufferSize
- 1);
1756 if (bufferSize
> 0) buffer
[0] = 0;
1764 Boolean
CFStringGetCString(CFStringRef str
, char *buffer
, CFIndex bufferSize
, CFStringEncoding encoding
) {
1765 const uint8_t *contents
;
1768 __CFAssertIsNotNegative(bufferSize
);
1769 if (bufferSize
< 1) return false;
1771 CF_OBJC_FUNCDISPATCH3(__kCFStringTypeID
, Boolean
, str
, "_getCString:maxLength:encoding:", buffer
, bufferSize
- 1, encoding
);
1773 __CFAssertIsString(str
);
1775 contents
= __CFStrContents(str
);
1776 len
= __CFStrLength2(str
, contents
);
1778 if (__CFStrIsEightBit(str
) && ((__CFStringGetEightBitStringEncoding() == encoding
) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII
&& __CFStringEncodingIsSupersetOfASCII(encoding
)))) { // Requested encoding is equal to the encoding in string
1779 if (len
>= bufferSize
) return false;
1780 memmove(buffer
, contents
+ __CFStrSkipAnyLengthByte(str
), len
);
1786 if (__CFStringEncodeByteStream(str
, 0, len
, false, encoding
, false, (unsigned char*) buffer
, bufferSize
- 1, &usedLen
) == len
) {
1787 buffer
[usedLen
] = '\0';
1791 strncpy(buffer
, CONVERSIONFAILURESTR
, bufferSize
);
1793 if (bufferSize
> 0) buffer
[0] = 0;
1800 #define MAX_CASE_MAPPING_BUF (8)
1802 /* Special casing for Uk sorting */
1803 #define DO_IGNORE_PUNCTUATION 1
1804 #if DO_IGNORE_PUNCTUATION
1805 #define UKRAINIAN_LANG_CODE (45)
1806 static bool __CFLocaleChecked
= false;
1807 static const uint8_t *__CFPunctSetBMP
= NULL
;
1808 #endif /* DO_IGNORE_PUNCTUATION */
1810 /* ??? We need to implement some additional flags here
1811 ??? Also, pay attention to flag 2, which is the NS flag (which CF has as flag 16, w/opposite meaning).
1813 CFComparisonResult
CFStringCompareWithOptions(CFStringRef string
, CFStringRef string2
, CFRange rangeToCompare
, CFOptionFlags compareOptions
) {
1814 /* No objc dispatch needed here since CFStringInlineBuffer works with both CFString and NSString */
1815 CFStringInlineBuffer strBuf1
, strBuf2
;
1817 const uint8_t *punctBMP
= NULL
;
1818 Boolean caseInsensitive
= (compareOptions
& kCFCompareCaseInsensitive
? true : false);
1819 Boolean decompose
= (compareOptions
& kCFCompareNonliteral
? true : false);
1820 Boolean numerically
= (compareOptions
& kCFCompareNumerically
? true : false);
1821 Boolean localized
= (compareOptions
& kCFCompareLocalized
? true : false);
1823 #if DO_IGNORE_PUNCTUATION
1825 if (!__CFLocaleChecked
) {
1826 CFArrayRef locales
= _CFBundleCopyUserLanguages(false);
1828 if (locales
&& (CFArrayGetCount(locales
) > 0)) {
1831 if (CFBundleGetLocalizationInfoForLocalization((CFStringRef
)CFArrayGetValueAtIndex(locales
, 0), &langCode
, NULL
, NULL
, NULL
) && (langCode
== UKRAINIAN_LANG_CODE
)) {
1832 __CFPunctSetBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet
, 0);
1837 __CFLocaleChecked
= true;
1840 punctBMP
= __CFPunctSetBMP
;
1842 #endif /* DO_IGNORE_PUNCTUATION */
1844 CFStringInitInlineBuffer(string
, &strBuf1
, CFRangeMake(rangeToCompare
.location
, rangeToCompare
.length
));
1845 CFIndex strBuf1_idx
= 0;
1846 CFIndex string2_len
= CFStringGetLength(string2
);
1847 CFStringInitInlineBuffer(string2
, &strBuf2
, CFRangeMake(0, string2_len
));
1848 CFIndex strBuf2_idx
= 0;
1850 while (strBuf1_idx
< rangeToCompare
.length
&& strBuf2_idx
< string2_len
) {
1851 ch1
= CFStringGetCharacterFromInlineBuffer(&strBuf1
, strBuf1_idx
);
1852 ch2
= CFStringGetCharacterFromInlineBuffer(&strBuf2
, strBuf2_idx
);
1854 if (numerically
&& (ch1
<= '9' && ch1
>= '0') && (ch2
<= '9' && ch2
>= '0')) { // If both are not digits, then don't do numerical comparison
1855 unsigned long long n1
= 0; // !!! Doesn't work if numbers are > max unsigned long long
1856 unsigned long long n2
= 0;
1858 n1
= n1
* 10 + (ch1
- '0');
1860 if (rangeToCompare
.length
<= strBuf1_idx
) break;
1861 ch1
= CFStringGetCharacterFromInlineBuffer(&strBuf1
, strBuf1_idx
);
1862 } while (ch1
<= '9' && ch1
>= '0');
1864 n2
= n2
* 10 + (ch2
- '0');
1866 if (string2_len
<= strBuf2_idx
) break;
1867 ch2
= CFStringGetCharacterFromInlineBuffer(&strBuf2
, strBuf2_idx
);
1868 } while (ch2
<= '9' && ch2
>= '0');
1869 if (n1
< n2
) return kCFCompareLessThan
; else if (n1
> n2
) return kCFCompareGreaterThan
;
1870 continue; // If numbers were equal, go back to top without incrementing the buffer pointers
1873 if (CFUniCharIsSurrogateHighCharacter(ch1
)) {
1875 if (strBuf1_idx
< rangeToCompare
.length
&& CFUniCharIsSurrogateLowCharacter(CFStringGetCharacterFromInlineBuffer(&strBuf1
, strBuf1_idx
))) {
1876 ch1
= CFUniCharGetLongCharacterForSurrogatePair(ch1
, CFStringGetCharacterFromInlineBuffer(&strBuf1
, strBuf1_idx
));
1881 if (CFUniCharIsSurrogateHighCharacter(ch2
)) {
1883 if (strBuf2_idx
< string2_len
&& CFUniCharIsSurrogateLowCharacter(CFStringGetCharacterFromInlineBuffer(&strBuf2
, strBuf2_idx
))) {
1884 ch2
= CFUniCharGetLongCharacterForSurrogatePair(ch2
, CFStringGetCharacterFromInlineBuffer(&strBuf2
, strBuf2_idx
));
1891 #if DO_IGNORE_PUNCTUATION
1893 if (CFUniCharIsMemberOfBitmap(ch1
, (ch1
< 0x10000 ? punctBMP
: CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet
, (ch1
>> 16))))) {
1894 ++strBuf1_idx
; continue;
1896 if (CFUniCharIsMemberOfBitmap(ch2
, (ch2
< 0x10000 ? punctBMP
: CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet
, (ch2
>> 16))))) {
1897 ++strBuf2_idx
; continue;
1900 #endif /* DO_IGNORE_PUNCTUATION */
1901 // We standardize to lowercase here since currently, as of Unicode 3.1.1, it's one-to-one mapping.
1902 // Note we map to uppercase for both SMALL LETTER SIGMA and SMALL LETTER FINAL SIGMA
1903 if (caseInsensitive
) {
1905 ch1
-= ((ch1
>= 'A' && ch1
<= 'Z') ? 'A' - 'a' : 0);
1906 } else if (ch1
== 0x03C2 || ch1
== 0x03C3 || ch1
== 0x03A3) { // SMALL SIGMA
1909 UniChar buffer
[MAX_CASE_MAPPING_BUF
];
1911 if (CFUniCharMapCaseTo(ch1
, buffer
, MAX_CASE_MAPPING_BUF
, kCFUniCharToLowercase
, 0, NULL
) > 1) { // It's supposed to be surrogates
1912 ch1
= CFUniCharGetLongCharacterForSurrogatePair(buffer
[0], buffer
[1]);
1918 ch2
-= ((ch2
>= 'A' && ch2
<= 'Z') ? 'A' - 'a' : 0);
1919 } else if (ch2
== 0x03C2 || ch2
== 0x03C3 || ch2
== 0x03A3) { // SMALL SIGMA
1922 UniChar buffer
[MAX_CASE_MAPPING_BUF
];
1924 if (CFUniCharMapCaseTo(ch2
, buffer
, MAX_CASE_MAPPING_BUF
, kCFUniCharToLowercase
, 0, NULL
) > 1) { // It's supposed to be surrogates
1925 ch2
= CFUniCharGetLongCharacterForSurrogatePair(buffer
[0], buffer
[1]);
1932 if (ch1
!= ch2
) { // still different
1933 if (decompose
) { // ??? This is not exactly the canonical comparison (We need to do priority sort)
1934 Boolean isCh1Decomposable
= (ch1
> 0x7F && CFUniCharIsMemberOf(ch1
, kCFUniCharDecomposableCharacterSet
));
1935 Boolean isCh2Decomposable
= (ch2
> 0x7F && CFUniCharIsMemberOf(ch2
, kCFUniCharDecomposableCharacterSet
));
1937 if (isCh1Decomposable
!= isCh2Decomposable
) {
1938 UTF32Char decomposedCharater
[MAX_DECOMPOSED_LENGTH
];
1939 UInt32 decomposedCharacterLength
;
1942 if (isCh1Decomposable
) {
1943 decomposedCharacterLength
= CFUniCharDecomposeCharacter(ch1
, decomposedCharater
, MAX_DECOMPOSED_LENGTH
);
1944 for (idx
= 0; idx
< decomposedCharacterLength
&& strBuf2_idx
< string2_len
; idx
++) {
1945 ch1
= decomposedCharater
[idx
];
1946 if (ch1
< ch2
) return kCFCompareLessThan
; else if (ch1
> ch2
) return kCFCompareGreaterThan
;
1947 strBuf2_idx
++; ch2
= (strBuf2_idx
< string2_len
? CFStringGetCharacterFromInlineBuffer(&strBuf2
, strBuf2_idx
) : 0xffff);
1948 if (CFUniCharIsSurrogateHighCharacter(ch2
)) {
1950 if (strBuf2_idx
< string2_len
&& CFUniCharIsSurrogateLowCharacter(CFStringGetCharacterFromInlineBuffer(&strBuf2
, strBuf2_idx
))) {
1951 ch2
= CFUniCharGetLongCharacterForSurrogatePair(ch2
, CFStringGetCharacterFromInlineBuffer(&strBuf2
, strBuf2_idx
));
1957 strBuf1_idx
++; continue;
1958 } else { // ch2 is decomposable, then
1959 decomposedCharacterLength
= CFUniCharDecomposeCharacter(ch2
, decomposedCharater
, MAX_DECOMPOSED_LENGTH
);
1960 for (idx
= 0; idx
< decomposedCharacterLength
&& strBuf1_idx
< rangeToCompare
.length
; idx
++) {
1961 ch2
= decomposedCharater
[idx
];
1962 if (ch1
< ch2
) return kCFCompareLessThan
; else if (ch1
> ch2
) return kCFCompareGreaterThan
;
1963 strBuf1_idx
++; ch1
= (strBuf1_idx
< rangeToCompare
.length
? CFStringGetCharacterFromInlineBuffer(&strBuf1
, strBuf1_idx
) : 0xffff);
1964 if (CFUniCharIsSurrogateHighCharacter(ch1
)) {
1966 if (strBuf1_idx
< rangeToCompare
.length
&& CFUniCharIsSurrogateLowCharacter(CFStringGetCharacterFromInlineBuffer(&strBuf1
, strBuf1_idx
))) {
1967 ch1
= CFUniCharGetLongCharacterForSurrogatePair(ch1
, CFStringGetCharacterFromInlineBuffer(&strBuf1
, strBuf1_idx
));
1973 strBuf2_idx
++; continue;
1977 if (ch1
< ch2
) return kCFCompareLessThan
; else if (ch1
> ch2
) return kCFCompareGreaterThan
;
1980 strBuf1_idx
++; strBuf2_idx
++;
1982 if (strBuf1_idx
< rangeToCompare
.length
) {
1983 return kCFCompareGreaterThan
;
1984 } else if (strBuf2_idx
< string2_len
) {
1985 return kCFCompareLessThan
;
1987 return kCFCompareEqualTo
;
1992 CFComparisonResult
CFStringCompare(CFStringRef string
, CFStringRef str2
, CFOptionFlags options
) {
1993 return CFStringCompareWithOptions(string
, str2
, CFRangeMake(0, CFStringGetLength(string
)), options
);
1996 /* ??? Need to implement localized find
1998 Boolean
CFStringFindWithOptions(CFStringRef string
, CFStringRef stringToFind
, CFRange rangeToSearch
, CFOptionFlags compareOptions
, CFRange
*result
) {
1999 /* No objc dispatch needed here since CFStringInlineBuffer works with both CFString and NSString */
2001 SInt32 fromLoc
, toLoc
; // fromLoc and toLoc are inclusive
2002 SInt32 cnt
, findStrLen
= CFStringGetLength(stringToFind
);
2003 Boolean done
= false;
2004 Boolean caseInsensitive
= (compareOptions
& kCFCompareCaseInsensitive
) ? true : false;
2005 Boolean decompose
= (compareOptions
& kCFCompareNonliteral
) ? true : false;
2006 UniChar tmpBuf
[MAXTMPBUFFERLEN
];
2009 Boolean isDecompBuf
[MAXISDECOMPBUFFERLEN
];
2010 CFStringInlineBuffer buf
;
2011 CFAllocatorRef tmpAlloc
= NULL
;
2013 if (findStrLen
== 0) return false; // This is the way it is, by definition (even find("", "") -> false)
2014 if (!decompose
&& findStrLen
> rangeToSearch
.length
) return false;
2015 if (rangeToSearch
.length
== 0) return false; // This protects against crashes further below (see 2908472); if we ever implement ignorable characters, this shouldn't be here
2017 findBuf
= (findStrLen
> MAXTMPBUFFERLEN
) ? CFAllocatorAllocate(tmpAlloc
= __CFGetDefaultAllocator(), findStrLen
* sizeof(UniChar
), 0) : tmpBuf
;
2018 if (findBuf
!= tmpBuf
&& __CFOASafe
) __CFSetLastAllocationEventName(findBuf
, "CFString (temp)");
2019 CFStringGetCharacters(stringToFind
, CFRangeMake(0, findStrLen
), findBuf
);
2022 SInt32 max
= __CFMin(MAXISDECOMPBUFFERLEN
, findStrLen
);
2024 for (cnt
= 0;cnt
< max
;cnt
++) {
2025 if (CFUniCharIsSurrogateHighCharacter(findBuf
[cnt
]) && (cnt
+ 1 < max
) && CFUniCharIsSurrogateLowCharacter(findBuf
[cnt
+ 1])) {
2026 isDecompBuf
[cnt
] = isDecompBuf
[cnt
+ 1] = CFUniCharIsMemberOf(CFUniCharGetLongCharacterForSurrogatePair(findBuf
[cnt
], findBuf
[cnt
+ 1]), kCFUniCharDecomposableCharacterSet
);
2029 isDecompBuf
[cnt
] = (findBuf
[cnt
] > 0x7F && CFUniCharIsMemberOf(findBuf
[cnt
], kCFCharacterSetDecomposable
));
2034 if (caseInsensitive
) { /* Lower case the search string */
2035 for (cnt
= 0; cnt
< findStrLen
; cnt
++) {
2038 if (ch1
>= 'A' && ch1
<= 'Z') findBuf
[cnt
] = (ch1
- 'A' + 'a'); /* Lower case the cheap way */
2039 } else if (ch1
== 0x03C2 || ch1
== 0x03C3 || ch1
== 0x03A3) { // SMALL SIGMA
2040 findBuf
[cnt
] = 0x03A3;
2042 UniChar buffer
[MAX_CASE_MAPPING_BUF
];
2044 if (CFUniCharIsSurrogateHighCharacter(ch1
) && (cnt
+ 1 < findStrLen
) && CFUniCharIsSurrogateLowCharacter(findBuf
[cnt
+ 1])) {
2045 ch1
= CFUniCharGetLongCharacterForSurrogatePair(ch1
, findBuf
[cnt
+ 1]);
2047 if (CFUniCharMapCaseTo(ch1
, buffer
, MAX_CASE_MAPPING_BUF
, kCFUniCharToLowercase
, 0, NULL
) > 1) { // It's supposed to be surrogates
2048 findBuf
[cnt
] = buffer
[0];
2049 findBuf
[++cnt
] = buffer
[1];
2051 findBuf
[cnt
] = *buffer
;
2057 if (compareOptions
& kCFCompareBackwards
) {
2058 fromLoc
= rangeToSearch
.location
+ rangeToSearch
.length
- (decompose
? 1 : findStrLen
);
2059 toLoc
= ((compareOptions
& kCFCompareAnchored
) && !decompose
? fromLoc
: rangeToSearch
.location
);
2061 fromLoc
= rangeToSearch
.location
;
2062 toLoc
= ((compareOptions
& kCFCompareAnchored
) ? fromLoc
: rangeToSearch
.location
+ rangeToSearch
.length
- (decompose
? 1 : findStrLen
));
2065 step
= (fromLoc
<= toLoc
) ? 1 : -1;
2067 CFStringInitInlineBuffer(string
, &buf
, CFRangeMake(0, rangeToSearch
.location
+ rangeToSearch
.length
));
2068 CFIndex buf_idx
= fromLoc
, buf_idx_end
= rangeToSearch
.location
+ rangeToSearch
.length
;
2072 for (chCnt
= 0; chCnt
< findStrLen
; chCnt
++) {
2073 ch2
= buf_idx
< buf_idx_end
? CFStringGetCharacterFromInlineBuffer(&buf
, buf_idx
) : 0xffff;
2074 if (decompose
&& ch2
== 0xffff) break;
2076 if (caseInsensitive
) {
2077 if (CFUniCharIsSurrogateHighCharacter(ch2
)) {
2079 if (buf_idx
< buf_idx_end
&& CFUniCharIsSurrogateLowCharacter(CFStringGetCharacterFromInlineBuffer(&buf
, buf_idx
))) {
2080 ch2
= CFUniCharGetLongCharacterForSurrogatePair(ch2
, CFStringGetCharacterFromInlineBuffer(&buf
, buf_idx
));
2087 if (ch2
>= 'A' && ch2
<= 'Z') ch2
= (ch2
- 'A' + 'a'); /* Lower case the cheap way */
2088 } else if (ch2
== 0x03C2 || ch2
== 0x03C3 || ch2
== 0x03A3) { // SMALL SIGMA
2091 UniChar buffer
[MAX_CASE_MAPPING_BUF
];
2093 if (CFUniCharMapCaseTo(ch2
, buffer
, MAX_CASE_MAPPING_BUF
, kCFUniCharToLowercase
, 0, NULL
) > 1) { // It's supposed to be surrogates
2094 ch2
= CFUniCharGetLongCharacterForSurrogatePair(buffer
[0], buffer
[1]);
2101 if (CFUniCharIsSurrogateHighCharacter(ch2
)) {
2103 if (buf_idx
< buf_idx_end
&& CFUniCharIsSurrogateLowCharacter(CFStringGetCharacterFromInlineBuffer(&buf
, buf_idx
))) {
2104 ch2
= CFUniCharGetLongCharacterForSurrogatePair(ch2
, CFStringGetCharacterFromInlineBuffer(&buf
, buf_idx
));
2110 if (CFUniCharIsSurrogateHighCharacter(findBuf
[chCnt
]) && (chCnt
+ 1 < findStrLen
) && CFUniCharIsSurrogateLowCharacter(findBuf
[chCnt
+ 1])) {
2111 ch1
= CFUniCharGetLongCharacterForSurrogatePair(findBuf
[chCnt
], findBuf
[chCnt
+ 1]);
2114 ch1
= findBuf
[chCnt
];
2117 if (ch1
!= ch2
) { // ??? This is not exactly the canonical comparison. Needs to be addressed by Cheetah.
2118 Boolean isCh1Decomposable
= (chCnt
< MAXISDECOMPBUFFERLEN
? isDecompBuf
[chCnt
] : (ch1
> 0x7F && CFUniCharIsMemberOf(ch1
, kCFUniCharDecomposableCharacterSet
)));
2119 Boolean isCh2Decomposable
= (ch2
> 0x7F && CFUniCharIsMemberOf(ch2
, kCFUniCharDecomposableCharacterSet
));
2121 if (isCh1Decomposable
!= isCh2Decomposable
) {
2122 UTF32Char decomposedCharater
[MAX_DECOMPOSED_LENGTH
];
2123 UInt32 decomposedCharacterLength
;
2126 if (isCh1Decomposable
) {
2127 decomposedCharacterLength
= CFUniCharDecomposeCharacter(ch1
, decomposedCharater
, MAX_DECOMPOSED_LENGTH
);
2128 for (idx
= 0; idx
< decomposedCharacterLength
&& buf_idx
< buf_idx_end
; idx
++) {
2129 if (decomposedCharater
[idx
] != ch2
) break;
2131 buf_idx
++; ch2
= buf_idx
< buf_idx_end
? CFStringGetCharacterFromInlineBuffer(&buf
, buf_idx
) : 0xffff;
2132 if (CFUniCharIsSurrogateHighCharacter(ch2
)) {
2134 if (buf_idx
< buf_idx_end
&& CFUniCharIsSurrogateLowCharacter(CFStringGetCharacterFromInlineBuffer(&buf
, buf_idx
))) {
2135 ch2
= CFUniCharGetLongCharacterForSurrogatePair(ch2
, CFStringGetCharacterFromInlineBuffer(&buf
, buf_idx
));
2141 if (idx
< decomposedCharacterLength
) break;
2143 } else { // ch2 is decomposable, then
2144 int32_t foundLen
= chCnt
;
2146 decomposedCharacterLength
= CFUniCharDecomposeCharacter(ch2
, decomposedCharater
, MAX_DECOMPOSED_LENGTH
);
2147 for (idx
= 0;idx
< decomposedCharacterLength
&& foundLen
< findStrLen
;idx
++) {
2148 if (CFUniCharIsSurrogateHighCharacter(findBuf
[foundLen
]) && ((foundLen
+ 1) < findStrLen
) && CFUniCharIsSurrogateLowCharacter(findBuf
[foundLen
+ 1])) {
2149 if (CFUniCharGetLongCharacterForSurrogatePair(findBuf
[foundLen
], findBuf
[foundLen
+ 1]) != decomposedCharater
[idx
]) break;
2152 if (findBuf
[foundLen
] != decomposedCharater
[idx
]) break;
2156 if (idx
< decomposedCharacterLength
) break;
2157 chCnt
= foundLen
- 1; // Decrement so we can back up
2158 buf_idx
++; continue;
2164 if (ch2
> 0xFFFF) { // Non-BMP
2165 if (CFUniCharIsSurrogateHighCharacter(findBuf
[chCnt
]) && (chCnt
+ 1 < findStrLen
) && CFUniCharIsSurrogateLowCharacter(findBuf
[chCnt
+ 1])) {
2166 if (ch2
!= CFUniCharGetLongCharacterForSurrogatePair(findBuf
[chCnt
], findBuf
[chCnt
+ 1])) break;
2172 if (findBuf
[chCnt
] != ch2
) break;
2177 if (chCnt
== findStrLen
) {
2178 if (decompose
&& (buf_idx
< buf_idx_end
)) {
2179 if ((compareOptions
& kCFCompareAnchored
) && (compareOptions
& kCFCompareBackwards
)) break;
2181 ch2
= CFStringGetCharacterFromInlineBuffer(&buf
, buf_idx
);
2183 if (CFUniCharIsSurrogateHighCharacter(ch2
)) {
2184 if ((buf_idx
+ 1) < buf_idx_end
&& CFUniCharIsSurrogateLowCharacter(CFStringGetCharacterFromInlineBuffer(&buf
, buf_idx
+ 1))) {
2185 ch2
= CFUniCharGetLongCharacterForSurrogatePair(ch2
, CFStringGetCharacterFromInlineBuffer(&buf
, buf_idx
+ 1));
2188 if (ch2
> 0x7F && CFUniCharIsMemberOf(ch2
, kCFUniCharNonBaseCharacterSet
)) continue; // Next char is non-base
2192 result
->location
= cnt
;
2193 result
->length
= (decompose
? buf_idx
- cnt
: findStrLen
);
2195 } else if (cnt
== toLoc
) {
2203 if (findBuf
!= tmpBuf
) CFAllocatorDeallocate(tmpAlloc
, findBuf
);
2209 // Functions to deal with special arrays of CFRange, CFDataRef, created by CFStringCreateArrayWithFindResults()
2211 static const void *__rangeRetain(CFAllocatorRef allocator
, const void *ptr
) {
2212 CFRetain(*(CFDataRef
*)((uint8_t *)ptr
+ sizeof(CFRange
)));
2216 static void __rangeRelease(CFAllocatorRef allocator
, const void *ptr
) {
2217 CFRelease(*(CFDataRef
*)((uint8_t *)ptr
+ sizeof(CFRange
)));
2220 static CFStringRef
__rangeCopyDescription(const void *ptr
) {
2221 CFRange range
= *(CFRange
*)ptr
;
2222 return CFStringCreateWithFormat(NULL
/* ??? allocator */, NULL
, CFSTR("{%d, %d}"), range
.location
, range
.length
);
2225 static Boolean
__rangeEqual(const void *ptr1
, const void *ptr2
) {
2226 CFRange range1
= *(CFRange
*)ptr1
;
2227 CFRange range2
= *(CFRange
*)ptr2
;
2228 return (range1
.location
== range2
.location
) && (range1
.length
== range2
.length
);
2232 CFArrayRef
CFStringCreateArrayWithFindResults(CFAllocatorRef alloc
, CFStringRef string
, CFStringRef stringToFind
, CFRange rangeToSearch
, CFOptionFlags compareOptions
) {
2234 Boolean backwards
= compareOptions
& kCFCompareBackwards
;
2235 UInt32 endIndex
= rangeToSearch
.location
+ rangeToSearch
.length
;
2236 CFMutableDataRef rangeStorage
= NULL
; // Basically an array of CFRange, CFDataRef (packed)
2237 uint8_t *rangeStorageBytes
= NULL
;
2238 CFIndex foundCount
= 0;
2239 CFIndex capacity
= 0; // Number of CFRange, CFDataRef element slots in rangeStorage
2241 if (alloc
== NULL
) alloc
= __CFGetDefaultAllocator();
2243 while ((rangeToSearch
.length
> 0) && CFStringFindWithOptions(string
, stringToFind
, rangeToSearch
, compareOptions
, &foundRange
)) {
2244 // Determine the next range
2246 rangeToSearch
.length
= foundRange
.location
- rangeToSearch
.location
;
2248 rangeToSearch
.location
= foundRange
.location
+ foundRange
.length
;
2249 rangeToSearch
.length
= endIndex
- rangeToSearch
.location
;
2252 // If necessary, grow the data and squirrel away the found range
2253 if (foundCount
>= capacity
) {
2254 if (rangeStorage
== NULL
) rangeStorage
= CFDataCreateMutable(alloc
, 0);
2255 capacity
= (capacity
+ 4) * 2;
2256 CFDataSetLength(rangeStorage
, capacity
* (sizeof(CFRange
) + sizeof(CFDataRef
)));
2257 rangeStorageBytes
= (uint8_t *)CFDataGetMutableBytePtr(rangeStorage
) + foundCount
* (sizeof(CFRange
) + sizeof(CFDataRef
));
2259 memmove(rangeStorageBytes
, &foundRange
, sizeof(CFRange
)); // The range
2260 memmove(rangeStorageBytes
+ sizeof(CFRange
), &rangeStorage
, sizeof(CFDataRef
)); // The data
2261 rangeStorageBytes
+= (sizeof(CFRange
) + sizeof(CFDataRef
));
2265 if (foundCount
> 0) {
2267 CFMutableArrayRef array
;
2268 const CFArrayCallBacks callbacks
= {0, __rangeRetain
, __rangeRelease
, __rangeCopyDescription
, __rangeEqual
};
2270 CFDataSetLength(rangeStorage
, foundCount
* (sizeof(CFRange
) + sizeof(CFDataRef
))); // Tighten storage up
2271 rangeStorageBytes
= (uint8_t *)CFDataGetMutableBytePtr(rangeStorage
);
2273 array
= CFArrayCreateMutable(alloc
, foundCount
* sizeof(CFRange
*), &callbacks
);
2274 for (cnt
= 0; cnt
< foundCount
; cnt
++) {
2275 // Each element points to the appropriate CFRange in the CFData
2276 CFArrayAppendValue(array
, rangeStorageBytes
+ cnt
* (sizeof(CFRange
) + sizeof(CFDataRef
)));
2278 CFRelease(rangeStorage
); // We want the data to go away when all CFRanges inside it are released...
2286 CFRange
CFStringFind(CFStringRef string
, CFStringRef stringToFind
, CFOptionFlags compareOptions
) {
2289 if (CFStringFindWithOptions(string
, stringToFind
, CFRangeMake(0, CFStringGetLength(string
)), compareOptions
, &foundRange
)) {
2292 return CFRangeMake(kCFNotFound
, 0);
2296 Boolean
CFStringHasPrefix(CFStringRef string
, CFStringRef prefix
) {
2297 return CFStringFindWithOptions(string
, prefix
, CFRangeMake(0, CFStringGetLength(string
)), kCFCompareAnchored
, NULL
);
2300 Boolean
CFStringHasSuffix(CFStringRef string
, CFStringRef suffix
) {
2301 return CFStringFindWithOptions(string
, suffix
, CFRangeMake(0, CFStringGetLength(string
)), kCFCompareAnchored
|kCFCompareBackwards
, NULL
);
2304 #define ZERO_WIDTH_JOINER (0x200D)
2305 #define COMBINING_GRAPHEME_JOINER (0x034F)
2306 #define MAX_TRANSCODING_LENGTH 4
2309 #define HANGUL_CHOSEONG_START (0x1100)
2310 #define HANGUL_CHOSEONG_END (0x115F)
2311 #define HANGUL_JUNGSEONG_START (0x1160)
2312 #define HANGUL_JUNGSEONG_END (0x11A2)
2313 #define HANGUL_JONGSEONG_START (0x11A8)
2314 #define HANGUL_JONGSEONG_END (0x11F9)
2316 #define HANGUL_SYLLABLE_START (0xAC00)
2317 #define HANGUL_SYLLABLE_END (0xD7AF)
2319 #define HANGUL_JONGSEONG_COUNT (28)
2321 CF_INLINE
bool _CFStringIsHangulLVT(UTF32Char character
) {
2322 return (((character
- HANGUL_SYLLABLE_START
) % HANGUL_JONGSEONG_COUNT
) ? true : false);
2325 static uint8_t __CFTranscodingHintLength
[] = {
2326 2, 3, 4, 4, 4, 4, 4, 2, 2, 2, 2, 4, 0, 0, 0, 0
2330 kCFStringHangulStateL
,
2331 kCFStringHangulStateV
,
2332 kCFStringHangulStateT
,
2333 kCFStringHangulStateLV
,
2334 kCFStringHangulStateLVT
,
2335 kCFStringHangulStateBreak
2338 static CFRange
_CFStringInlineBufferGetComposedRange(CFStringInlineBuffer
*buffer
, CFIndex start
, CFStringCharacterClusterType type
, const uint8_t *nonBaseBMP
) {
2339 CFIndex end
= start
+ 1;
2340 const uint8_t *nonBase
= nonBaseBMP
;
2341 UTF32Char character
;
2342 UTF16Char otherSurrogate
;
2345 character
= CFStringGetCharacterFromInlineBuffer(buffer
, start
);
2348 // We don't combine characters in Armenian ~ Limbu range for backward deletion
2349 if ((type
!= kCFStringBackwardDeletionCluster
) || (character
< 0x0530) || (character
> 0x194F)) {
2350 // Check if the current is surrogate
2351 if (CFUniCharIsSurrogateHighCharacter(character
) && CFUniCharIsSurrogateLowCharacter((otherSurrogate
= CFStringGetCharacterFromInlineBuffer(buffer
, start
+ 1)))) {
2353 character
= CFUniCharGetLongCharacterForSurrogatePair(character
, otherSurrogate
);
2354 nonBase
= CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet
, (character
>> 16));
2359 if ((type
== kCFStringBackwardDeletionCluster
) && (character
>= 0x0530) && (character
< 0x1950)) break;
2361 if (character
< 0x10000) { // the first round could be already be non-BMP
2362 if (CFUniCharIsSurrogateLowCharacter(character
) && CFUniCharIsSurrogateHighCharacter((otherSurrogate
= CFStringGetCharacterFromInlineBuffer(buffer
, start
- 1)))) {
2363 character
= CFUniCharGetLongCharacterForSurrogatePair(otherSurrogate
, character
);
2364 nonBase
= CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet
, (character
>> 16));
2367 nonBase
= nonBaseBMP
;
2371 if (!CFUniCharIsMemberOfBitmap(character
, nonBase
) && (character
!= 0xFF9E) && (character
!= 0xFF9F) && ((character
& 0x1FFFF0) != 0xF870)) break;
2375 character
= CFStringGetCharacterFromInlineBuffer(buffer
, start
);
2380 if (((character
>= HANGUL_CHOSEONG_START
) && (character
<= HANGUL_JONGSEONG_END
)) || ((character
>= HANGUL_SYLLABLE_START
) && (character
<= HANGUL_SYLLABLE_END
))) {
2382 uint8_t initialState
;
2384 if (character
< HANGUL_JUNGSEONG_START
) {
2385 state
= kCFStringHangulStateL
;
2386 } else if (character
< HANGUL_JONGSEONG_START
) {
2387 state
= kCFStringHangulStateV
;
2388 } else if (character
< HANGUL_SYLLABLE_START
) {
2389 state
= kCFStringHangulStateT
;
2391 state
= (_CFStringIsHangulLVT(character
) ? kCFStringHangulStateLVT
: kCFStringHangulStateLV
);
2393 initialState
= state
;
2396 while (((character
= CFStringGetCharacterFromInlineBuffer(buffer
, start
- 1)) >= HANGUL_CHOSEONG_START
) && (character
<= HANGUL_SYLLABLE_END
) && ((character
<= HANGUL_JONGSEONG_END
) || (character
>= HANGUL_SYLLABLE_START
))) {
2398 case kCFStringHangulStateV
:
2399 if (character
<= HANGUL_CHOSEONG_END
) {
2400 state
= kCFStringHangulStateL
;
2401 } else if ((character
>= HANGUL_SYLLABLE_START
) && (character
<= HANGUL_SYLLABLE_END
) && !_CFStringIsHangulLVT(character
)) {
2402 state
= kCFStringHangulStateLV
;
2403 } else if (character
> HANGUL_JUNGSEONG_END
) {
2404 state
= kCFStringHangulStateBreak
;
2408 case kCFStringHangulStateT
:
2409 if ((character
>= HANGUL_JUNGSEONG_START
) && (character
<= HANGUL_JUNGSEONG_END
)) {
2410 state
= kCFStringHangulStateV
;
2411 } else if ((character
>= HANGUL_SYLLABLE_START
) && (character
<= HANGUL_SYLLABLE_END
)) {
2412 state
= (_CFStringIsHangulLVT(character
) ? kCFStringHangulStateLVT
: kCFStringHangulStateLV
);
2413 } else if (character
< HANGUL_JUNGSEONG_START
) {
2414 state
= kCFStringHangulStateBreak
;
2419 state
= ((character
< HANGUL_JUNGSEONG_START
) ? kCFStringHangulStateL
: kCFStringHangulStateBreak
);
2423 if (state
== kCFStringHangulStateBreak
) break;
2428 state
= initialState
;
2429 while (((character
= CFStringGetCharacterFromInlineBuffer(buffer
, end
)) > 0) && (((character
>= HANGUL_CHOSEONG_START
) && (character
<= HANGUL_JONGSEONG_END
)) || ((character
>= HANGUL_SYLLABLE_START
) && (character
<= HANGUL_SYLLABLE_END
)))) {
2431 case kCFStringHangulStateLV
:
2432 case kCFStringHangulStateV
:
2433 if ((character
>= HANGUL_JUNGSEONG_START
) && (character
<= HANGUL_JONGSEONG_END
)) {
2434 state
= ((character
< HANGUL_JONGSEONG_START
) ? kCFStringHangulStateV
: kCFStringHangulStateT
);
2436 state
= kCFStringHangulStateBreak
;
2440 case kCFStringHangulStateLVT
:
2441 case kCFStringHangulStateT
:
2442 state
= (((character
>= HANGUL_JONGSEONG_START
) && (character
<= HANGUL_JONGSEONG_END
)) ? kCFStringHangulStateT
: kCFStringHangulStateBreak
);
2446 if (character
< HANGUL_JUNGSEONG_START
) {
2447 state
= kCFStringHangulStateL
;
2448 } else if (character
< HANGUL_JONGSEONG_START
) {
2449 state
= kCFStringHangulStateV
;
2450 } else if (character
>= HANGUL_SYLLABLE_START
) {
2451 state
= (_CFStringIsHangulLVT(character
) ? kCFStringHangulStateLVT
: kCFStringHangulStateLV
);
2453 state
= kCFStringHangulStateBreak
;
2458 if (state
== kCFStringHangulStateBreak
) break;
2464 while ((character
= CFStringGetCharacterFromInlineBuffer(buffer
, end
)) > 0) {
2465 if ((type
== kCFStringBackwardDeletionCluster
) && (character
>= 0x0530) && (character
< 0x1950)) break;
2467 if (CFUniCharIsSurrogateHighCharacter(character
) && CFUniCharIsSurrogateLowCharacter((otherSurrogate
= CFStringGetCharacterFromInlineBuffer(buffer
, end
+ 1)))) {
2468 character
= CFUniCharGetLongCharacterForSurrogatePair(character
, otherSurrogate
);
2469 nonBase
= CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet
, (character
>> 16));
2472 nonBase
= nonBaseBMP
;
2476 if (!CFUniCharIsMemberOfBitmap(character
, nonBase
) && (character
!= 0xFF9E) && (character
!= 0xFF9F) && ((character
& 0x1FFFF0) != 0xF870)) break;
2481 return CFRangeMake(start
, end
- start
);
2484 CF_INLINE
bool _CFStringIsVirama(UTF32Char character
, const uint8_t *combClassBMP
) {
2485 return ((character
== COMBINING_GRAPHEME_JOINER
) || (CFUniCharGetCombiningPropertyForCharacter(character
, ((character
< 0x10000) ? combClassBMP
: CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty
, (character
>> 16)))) == 9) ? true : false);
2488 CFRange
CFStringGetRangeOfCharacterClusterAtIndex(CFStringRef string
, CFIndex charIndex
, CFStringCharacterClusterType type
) {
2490 CFIndex currentIndex
;
2491 CFIndex length
= CFStringGetLength(string
);
2492 CFStringInlineBuffer stringBuffer
;
2493 UTF32Char character
;
2494 UTF16Char otherSurrogate
;
2495 static const uint8_t *nonBaseBMP
= NULL
;
2496 static const uint8_t *letterBMP
= NULL
;
2497 static const uint8_t *combClassBMP
= NULL
;
2499 if (charIndex
>= length
) return CFRangeMake(kCFNotFound
, 0);
2501 /* Fast case. If we're eight-bit, it's either the default encoding is cheap or the content is all ASCII. Watch out when (or if) adding more 8bit Mac-scripts in CFStringEncodingConverters
2503 if (!CF_IS_OBJC(__kCFStringTypeID
, string
) && __CFStrIsEightBit(string
)) return CFRangeMake(charIndex
, 1);
2505 if (NULL
== nonBaseBMP
) {
2506 nonBaseBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet
, 0);
2507 letterBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet
, 0);
2508 combClassBMP
= CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty
, 0);
2511 CFStringInitInlineBuffer(string
, &stringBuffer
, CFRangeMake(0, length
));
2513 // Get composed character sequence first
2514 range
= _CFStringInlineBufferGetComposedRange(&stringBuffer
, charIndex
, type
, nonBaseBMP
);
2516 // Do grapheme joiners
2517 if (type
< kCFStringCursorMovementCluster
) {
2518 const uint8_t *letter
= letterBMP
;
2520 // Check to see if we have a letter at the beginning of initial cluster
2521 character
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, range
.location
);
2523 if ((range
.length
> 1) && CFUniCharIsSurrogateHighCharacter(character
) && CFUniCharIsSurrogateLowCharacter((otherSurrogate
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, range
.location
+ 1)))) {
2524 character
= CFUniCharGetLongCharacterForSurrogatePair(character
, otherSurrogate
);
2525 letter
= CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet
, (character
>> 16));
2528 if ((character
== ZERO_WIDTH_JOINER
) || CFUniCharIsMemberOfBitmap(character
, letter
)) {
2531 // Check if preceded by grapheme joiners (U034F and viramas)
2532 otherRange
.location
= currentIndex
= range
.location
;
2534 while (currentIndex
> 1) {
2535 character
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, --currentIndex
);
2537 // ??? We're assuming viramas only in BMP
2538 if ((_CFStringIsVirama(character
, combClassBMP
) || ((character
== ZERO_WIDTH_JOINER
) && _CFStringIsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer
, --currentIndex
), combClassBMP
))) && (currentIndex
> 0)) {
2544 currentIndex
= _CFStringInlineBufferGetComposedRange(&stringBuffer
, currentIndex
, type
, nonBaseBMP
).location
;
2546 character
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, currentIndex
);
2548 if (CFUniCharIsSurrogateLowCharacter(character
) && CFUniCharIsSurrogateHighCharacter((otherSurrogate
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, currentIndex
- 1)))) {
2549 character
= CFUniCharGetLongCharacterForSurrogatePair(character
, otherSurrogate
);
2550 letter
= CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet
, (character
>> 16));
2556 if (!CFUniCharIsMemberOfBitmap(character
, letter
)) break;
2557 range
.location
= currentIndex
;
2560 range
.length
+= otherRange
.location
- range
.location
;
2562 // Check if followed by grapheme joiners
2563 if ((range
.length
> 1) && ((range
.location
+ range
.length
) < length
)) {
2567 currentIndex
= otherRange
.location
+ otherRange
.length
;
2568 character
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, currentIndex
- 1);
2570 // ??? We're assuming viramas only in BMP
2571 if ((character
!= ZERO_WIDTH_JOINER
) && !_CFStringIsVirama(character
, combClassBMP
)) break;
2573 character
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, currentIndex
);
2575 if (character
== ZERO_WIDTH_JOINER
) character
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, ++currentIndex
);
2577 if (CFUniCharIsSurrogateHighCharacter(character
) && CFUniCharIsSurrogateLowCharacter((otherSurrogate
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, currentIndex
+ 1)))) {
2578 character
= CFUniCharGetLongCharacterForSurrogatePair(character
, otherSurrogate
);
2579 letter
= CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet
, (character
>> 16));
2584 // We only conjoin letters
2585 if (!CFUniCharIsMemberOfBitmap(character
, letter
)) break;
2586 otherRange
= _CFStringInlineBufferGetComposedRange(&stringBuffer
, currentIndex
, type
, nonBaseBMP
);
2587 } while ((otherRange
.location
+ otherRange
.length
) < length
);
2588 range
.length
= currentIndex
- range
.location
;
2593 // Check if we're part of prefix transcoding hints
2594 if (range
.location
> 0) {
2597 currentIndex
= (range
.location
+ range
.length
) - (MAX_TRANSCODING_LENGTH
+ 1);
2598 if (currentIndex
< 0) currentIndex
= 0;
2600 while (currentIndex
<= range
.location
) {
2601 character
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, currentIndex
);
2603 if ((character
& 0x1FFFF0) == 0xF860) { // transcoding hint
2604 otherIndex
= currentIndex
+ __CFTranscodingHintLength
[(character
- 0xF860)] + 1;
2605 if (otherIndex
>= (range
.location
+ range
.length
)) {
2606 range
.location
= currentIndex
;
2607 range
.length
= otherIndex
- currentIndex
;
2618 #if 1 /* Using the new implementation. Leaving the old implementation if'ed out for testing purposes for now */
2619 CFRange
CFStringGetRangeOfComposedCharactersAtIndex(CFStringRef theString
, CFIndex theIndex
) {
2620 return CFStringGetRangeOfCharacterClusterAtIndex(theString
, theIndex
, kCFStringComposedCharacterCluster
);
2624 @function CFStringGetRangeOfComposedCharactersAtIndex
2625 Returns the range of the composed character sequence at the specified index.
2626 @param theString The CFString which is to be searched. If this
2627 parameter is not a valid CFString, the behavior is
2629 @param theIndex The index of the character contained in the
2630 composed character sequence. If the index is
2631 outside the index space of the string (0 to N-1 inclusive,
2632 where N is the length of the string), the behavior is
2634 @result The range of the composed character sequence.
2636 #define ExtHighHalfZoneLow 0xD800
2637 #define ExtHighHalfZoneHigh 0xDBFF
2638 #define ExtLowHalfZoneLow 0xDC00
2639 #define ExtLowHalfZoneHigh 0xDFFF
2640 #define JunseongStart 0x1160
2641 #define JonseongEnd 0x11F9
2642 CF_INLINE Boolean
IsHighCode(UniChar X
) { return (X
>= ExtHighHalfZoneLow
&& X
<= ExtHighHalfZoneHigh
); }
2643 CF_INLINE Boolean
IsLowCode(UniChar X
) { return (X
>= ExtLowHalfZoneLow
&& X
<= ExtLowHalfZoneHigh
); }
2644 #define IsHangulConjoiningJamo(X) (X >= JunseongStart && X <= JonseongEnd)
2645 #define IsHalfwidthKanaVoicedMark(X) ((X == 0xFF9E) || (X == 0xFF9F))
2646 CF_INLINE Boolean
IsNonBaseChar(UniChar X
, CFCharacterSetRef nonBaseSet
) { return (CFCharacterSetIsCharacterMember(nonBaseSet
, X
) || IsHangulConjoiningJamo(X
) || IsHalfwidthKanaVoicedMark(X
) || (X
& 0x1FFFF0) == 0xF870); } // combining char, hangul jamo, or Apple corporate variant tag
2649 #define COMBINING_GRAPHEME_JOINER (0x034F)
2651 static CFCharacterSetRef nonBaseChars
= NULL
;
2652 static CFCharacterSetRef letterChars
= NULL
;
2653 static const void *__CFCombiningClassBMP
= NULL
;
2655 CF_INLINE
bool IsVirama(UTF32Char character
) {
2656 return ((character
== COMBINING_GRAPHEME_JOINER
) ? true : ((character
< 0x10000) && (CFUniCharGetCombiningPropertyForCharacter(character
, __CFCombiningClassBMP
) == 9) ? true : false));
2659 CFRange
CFStringGetRangeOfComposedCharactersAtIndex(CFStringRef theString
, CFIndex theIndex
) {
2660 CFIndex left
, current
, save
;
2661 CFIndex len
= CFStringGetLength(theString
);
2662 CFStringInlineBuffer stringBuffer
;
2663 static volatile Boolean _isInited
= false;
2665 if (theIndex
>= len
) return CFRangeMake(kCFNotFound
, 0);
2668 nonBaseChars
= CFCharacterSetGetPredefined(kCFCharacterSetNonBase
);
2669 letterChars
= CFCharacterSetGetPredefined(kCFCharacterSetLetter
);
2670 __CFCombiningClassBMP
= CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty
, 0);
2674 save
= current
= theIndex
;
2676 CFStringInitInlineBuffer(theString
, &stringBuffer
, CFRangeMake(0, len
));
2679 * First check for transcoding hints
2682 CFRange theRange
= (current
> MAX_TRANSCODING_LENGTH
? CFRangeMake(current
- MAX_TRANSCODING_LENGTH
, MAX_TRANSCODING_LENGTH
+ 1) : CFRangeMake(0, current
+ 1));
2684 // Should check the next loc ?
2685 if (current
+ 1 < len
) ++theRange
.length
;
2687 if (theRange
.length
> 1) {
2688 UniChar characterBuffer
[MAX_TRANSCODING_LENGTH
+ 2]; // Transcoding hint length + current loc + next loc
2690 if (stringBuffer
.directBuffer
) {
2691 memmove(characterBuffer
, stringBuffer
.directBuffer
+ theRange
.location
, theRange
.length
* sizeof(UniChar
));
2693 CFStringGetCharacters(theString
, theRange
, characterBuffer
);
2696 while (current
>= theRange
.location
) {
2697 if ((characterBuffer
[current
- theRange
.location
] & 0x1FFFF0) == 0xF860) {
2698 theRange
= CFRangeMake(current
, __CFTranscodingHintLength
[characterBuffer
[current
- theRange
.location
] - 0xF860] + 1);
2699 if ((theRange
.location
+ theRange
.length
) <= theIndex
) break;
2700 if ((theRange
.location
+ theRange
.length
) >= len
) theRange
.length
= len
- theRange
.location
;
2703 if (current
== 0) break;
2706 current
= theIndex
; // Reset current
2710 //#warning Aki 5/29/01 This does not support non-base chars in non-BMP planes (i.e. musical symbol combining stem in Unicode 3.1)
2712 * if we start NOT on a base, first move back to a base as appropriate.
2717 while ((current
> 0) && IsNonBaseChar(CFStringGetCharacterFromInlineBuffer(&stringBuffer
, current
), nonBaseChars
)) --current
;
2719 if (current
>= 1 && current
< len
&& CFCharacterSetIsCharacterMember(letterChars
, CFStringGetCharacterFromInlineBuffer(&stringBuffer
, current
)) && IsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer
, current
- 1))) {
2722 } else if ((current
>= 2) && (CFStringGetCharacterFromInlineBuffer(&stringBuffer
, current
- 1) == ZWJ
) && IsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer
, current
- 2))) {
2728 * Set the left position, then jump back to the saved original position.
2731 if (current
>= 1 && IsLowCode(CFStringGetCharacterFromInlineBuffer(&stringBuffer
, current
)) && IsHighCode(CFStringGetCharacterFromInlineBuffer(&stringBuffer
, current
- 1))) --current
;
2736 * Now, presume we are on a base; move forward & look for the next base.
2737 * Handle jumping over H/L codes.
2739 if (IsHighCode(CFStringGetCharacterFromInlineBuffer(&stringBuffer
, current
)) && (current
+ 1) < len
&& IsLowCode(CFStringGetCharacterFromInlineBuffer(&stringBuffer
, current
+ 1))) ++current
;
2744 if (current
< len
) {
2745 while (IsNonBaseChar(CFStringGetCharacterFromInlineBuffer(&stringBuffer
, current
), nonBaseChars
)) {
2747 if (current
>= len
) break;
2749 if ((current
< len
) && CFCharacterSetIsCharacterMember(letterChars
, CFStringGetCharacterFromInlineBuffer(&stringBuffer
, current
))) {
2750 if (IsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer
, current
- 1))) {
2751 ++current
; goto round2Again
;
2752 } else if ((current
>= 2) && (CFStringGetCharacterFromInlineBuffer(&stringBuffer
, current
- 1) == ZWJ
) && IsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer
, current
- 2))) {
2753 ++current
; goto round2Again
;
2758 * Now, "current" is a base, and "left" is a base.
2759 * The junk between had better contain "save"!
2761 if ((! (left
<= save
)) || (! (save
<= current
))) {
2762 CFLog(0, CFSTR("CFString: CFStringGetRangeOfComposedCharactersAtIndex:%d returned invalid\n"), save
);
2764 return CFRangeMake(left
, current
- left
);
2769 @function CFStringFindCharacterFromSet
2770 Query the range of characters contained in the specified character set.
2771 @param theString The CFString which is to be searched. If this
2772 parameter is not a valid CFString, the behavior is
2774 @param theSet The CFCharacterSet against which the membership
2775 of characters is checked. If this parameter is not a valid
2776 CFCharacterSet, the behavior is undefined.
2777 @param range The range of characters within the string to search. If
2778 the range location or end point (defined by the location
2779 plus length minus 1) are outside the index space of the
2780 string (0 to N-1 inclusive, where N is the length of the
2781 string), the behavior is undefined. If the range length is
2782 negative, the behavior is undefined. The range may be empty
2783 (length 0), in which case no search is performed.
2784 @param searchOptions The bitwise-or'ed option flags to control
2785 the search behavior. The supported options are
2786 kCFCompareBackwards andkCFCompareAnchored.
2787 If other option flags are specified, the behavior
2789 @param result The pointer to a CFRange supplied by the caller in
2790 which the search result is stored. If a pointer to an invalid
2791 memory is specified, the behavior is undefined.
2792 @result true, if at least a character which is a member of the character
2793 set is found and result is filled, otherwise, false.
2795 #define SURROGATE_START 0xD800
2796 #define SURROGATE_END 0xDFFF
2798 CF_EXPORT Boolean
CFStringFindCharacterFromSet(CFStringRef theString
, CFCharacterSetRef theSet
, CFRange rangeToSearch
, CFOptionFlags searchOptions
, CFRange
*result
) {
2799 CFStringInlineBuffer stringBuffer
;
2802 CFIndex fromLoc
, toLoc
, cnt
; // fromLoc and toLoc are inclusive
2803 Boolean found
= false;
2804 Boolean done
= false;
2806 //#warning FIX ME !! Should support kCFCompareNonliteral
2808 if ((rangeToSearch
.location
+ rangeToSearch
.length
> CFStringGetLength(theString
)) || (rangeToSearch
.length
== 0)) return false;
2810 if (searchOptions
& kCFCompareBackwards
) {
2811 fromLoc
= rangeToSearch
.location
+ rangeToSearch
.length
- 1;
2812 toLoc
= rangeToSearch
.location
;
2814 fromLoc
= rangeToSearch
.location
;
2815 toLoc
= rangeToSearch
.location
+ rangeToSearch
.length
- 1;
2817 if (searchOptions
& kCFCompareAnchored
) {
2821 step
= (fromLoc
<= toLoc
) ? 1 : -1;
2824 CFStringInitInlineBuffer(theString
, &stringBuffer
, rangeToSearch
);
2827 ch
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, cnt
- rangeToSearch
.location
);
2828 if ((ch
>= SURROGATE_START
) && (ch
<= SURROGATE_END
)) {
2829 int otherCharIndex
= cnt
+ step
;
2831 if (((step
< 0) && (otherCharIndex
< toLoc
)) || ((step
> 0) && (otherCharIndex
> toLoc
))) {
2835 UniChar lowChar
= CFStringGetCharacterFromInlineBuffer(&stringBuffer
, otherCharIndex
- rangeToSearch
.location
);
2837 if (cnt
< otherCharIndex
) {
2844 if (CFUniCharIsSurrogateHighCharacter(highChar
) && CFUniCharIsSurrogateLowCharacter(lowChar
) && CFCharacterSetIsLongCharacterMember(theSet
, CFUniCharGetLongCharacterForSurrogatePair(highChar
, lowChar
))) {
2845 if (result
) *result
= CFRangeMake((cnt
< otherCharIndex
? cnt
: otherCharIndex
), 2);
2847 } else if (otherCharIndex
== toLoc
) {
2850 cnt
= otherCharIndex
+ step
;
2853 } else if (CFCharacterSetIsCharacterMember(theSet
, ch
)) {
2854 done
= found
= true;
2855 } else if (cnt
== toLoc
) {
2862 if (found
&& result
) *result
= CFRangeMake(cnt
, 1);
2866 /* Line range code */
2868 #define CarriageReturn '\r' /* 0x0d */
2869 #define NewLine '\n' /* 0x0a */
2870 #define NextLine 0x0085
2871 #define LineSeparator 0x2028
2872 #define ParaSeparator 0x2029
2874 CF_INLINE Boolean
isALineSeparatorTypeCharacter(UniChar ch
) {
2875 if (ch
> CarriageReturn
&& ch
< NextLine
) return false; /* Quick test to cover most chars */
2876 return (ch
== NewLine
|| ch
== CarriageReturn
|| ch
== NextLine
|| ch
== LineSeparator
|| ch
== ParaSeparator
) ? true : false;
2879 void CFStringGetLineBounds(CFStringRef string
, CFRange range
, CFIndex
*lineBeginIndex
, CFIndex
*lineEndIndex
, CFIndex
*contentsEndIndex
) {
2881 CFStringInlineBuffer buf
;
2884 CF_OBJC_FUNCDISPATCH4(__kCFStringTypeID
, void, string
, "getLineStart:end:contentsEnd:forRange:", lineBeginIndex
, lineEndIndex
, contentsEndIndex
, CFRangeMake(range
.location
, range
.length
));
2886 __CFAssertIsString(string
);
2887 __CFAssertRangeIsInStringBounds(string
, range
.location
, range
.length
);
2889 len
= __CFStrLength(string
);
2891 if (lineBeginIndex
) {
2893 if (range
.location
== 0) {
2896 CFStringInitInlineBuffer(string
, &buf
, CFRangeMake(0, len
));
2897 CFIndex buf_idx
= range
.location
;
2899 /* Take care of the special case where start happens to fall right between \r and \n */
2900 ch
= CFStringGetCharacterFromInlineBuffer(&buf
, buf_idx
);
2902 if ((ch
== NewLine
) && (CFStringGetCharacterFromInlineBuffer(&buf
, buf_idx
) == CarriageReturn
)) {
2909 } else if (isALineSeparatorTypeCharacter(CFStringGetCharacterFromInlineBuffer(&buf
, buf_idx
))) {
2910 start
= buf_idx
+ 1;
2917 *lineBeginIndex
= start
;
2920 /* Now find the ending point */
2921 if (lineEndIndex
|| contentsEndIndex
) {
2922 CFIndex endOfContents
, lineSeparatorLength
= 1; /* 1 by default */
2923 CFStringInitInlineBuffer(string
, &buf
, CFRangeMake(0, len
));
2924 CFIndex buf_idx
= range
.location
+ range
.length
- (range
.length
? 1 : 0);
2925 /* First look at the last char in the range (if the range is zero length, the char after the range) to see if we're already on or within a end of line sequence... */
2926 ch
= __CFStringGetCharacterFromInlineBufferAux(&buf
, buf_idx
);
2927 if (ch
== NewLine
) {
2928 endOfContents
= buf_idx
;
2930 if (__CFStringGetCharacterFromInlineBufferAux(&buf
, buf_idx
) == CarriageReturn
) {
2931 lineSeparatorLength
= 2;
2936 if (isALineSeparatorTypeCharacter(ch
)) {
2937 endOfContents
= buf_idx
; /* This is actually end of contentsRange */
2938 buf_idx
++; /* OK for this to go past the end */
2939 if ((ch
== CarriageReturn
) && (__CFStringGetCharacterFromInlineBufferAux(&buf
, buf_idx
) == NewLine
)) {
2940 lineSeparatorLength
= 2;
2943 } else if (buf_idx
>= len
) {
2944 endOfContents
= len
;
2945 lineSeparatorLength
= 0;
2949 ch
= __CFStringGetCharacterFromInlineBufferAux(&buf
, buf_idx
);
2953 if (contentsEndIndex
) *contentsEndIndex
= endOfContents
;
2954 if (lineEndIndex
) *lineEndIndex
= endOfContents
+ lineSeparatorLength
;
2959 CFStringRef
CFStringCreateByCombiningStrings(CFAllocatorRef alloc
, CFArrayRef array
, CFStringRef separatorString
) {
2961 CFIndex separatorNumByte
;
2962 CFIndex stringCount
= CFArrayGetCount(array
);
2963 Boolean isSepCFString
= !CF_IS_OBJC(__kCFStringTypeID
, separatorString
);
2964 Boolean canBeEightbit
= isSepCFString
&& __CFStrIsEightBit(separatorString
);
2966 CFStringRef otherString
;
2969 const void *separatorContents
= NULL
;
2971 if (stringCount
== 0) {
2972 return CFStringCreateWithCharacters(alloc
, NULL
, 0);
2973 } else if (stringCount
== 1) {
2974 return CFStringCreateCopy(alloc
, CFArrayGetValueAtIndex(array
, 0));
2977 if (alloc
== NULL
) alloc
= __CFGetDefaultAllocator();
2979 numChars
= CFStringGetLength(separatorString
) * (stringCount
- 1);
2980 for (idx
= 0; idx
< stringCount
; idx
++) {
2981 otherString
= (CFStringRef
)CFArrayGetValueAtIndex(array
, idx
);
2982 numChars
+= CFStringGetLength(otherString
);
2983 // canBeEightbit is already false if the separator is an NSString...
2984 if (!CF_IS_OBJC(__kCFStringTypeID
, otherString
) && __CFStrIsUnicode(otherString
)) canBeEightbit
= false;
2987 bufPtr
= buffer
= CFAllocatorAllocate(alloc
, canBeEightbit
? ((numChars
+ 1) * sizeof(uint8_t)) : (numChars
* sizeof(UniChar
)), 0);
2988 if (__CFOASafe
) __CFSetLastAllocationEventName(buffer
, "CFString (store)");
2989 separatorNumByte
= CFStringGetLength(separatorString
) * (canBeEightbit
? sizeof(uint8_t) : sizeof(UniChar
));
2991 for (idx
= 0; idx
< stringCount
; idx
++) {
2992 if (idx
) { // add separator here unless first string
2993 if (separatorContents
) {
2994 memmove(bufPtr
, separatorContents
, separatorNumByte
);
2996 if (!isSepCFString
) { // NSString
2997 CFStringGetCharacters(separatorString
, CFRangeMake(0, CFStringGetLength(separatorString
)), (UniChar
*)bufPtr
);
2998 } else if (canBeEightbit
|| __CFStrIsUnicode(separatorString
)) {
2999 memmove(bufPtr
, (const uint8_t *)__CFStrContents(separatorString
) + __CFStrSkipAnyLengthByte(separatorString
), separatorNumByte
);
3001 __CFStrConvertBytesToUnicode((uint8_t*)__CFStrContents(separatorString
) + __CFStrSkipAnyLengthByte(separatorString
), (UniChar
*)bufPtr
, __CFStrLength(separatorString
));
3003 separatorContents
= bufPtr
;
3005 bufPtr
+= separatorNumByte
;
3008 otherString
= (CFStringRef
)CFArrayGetValueAtIndex(array
, idx
);
3009 if (CF_IS_OBJC(__kCFStringTypeID
, otherString
)) {
3010 CFIndex otherLength
= CFStringGetLength(otherString
);
3011 CFStringGetCharacters(otherString
, CFRangeMake(0, otherLength
), (UniChar
*)bufPtr
);
3012 bufPtr
+= otherLength
* sizeof(UniChar
);
3014 const uint8_t* otherContents
= __CFStrContents(otherString
);
3015 CFIndex otherNumByte
= __CFStrLength2(otherString
, otherContents
) * (canBeEightbit
? sizeof(uint8_t) : sizeof(UniChar
));
3017 if (canBeEightbit
|| __CFStrIsUnicode(otherString
)) {
3018 memmove(bufPtr
, otherContents
+ __CFStrSkipAnyLengthByte(otherString
), otherNumByte
);
3020 __CFStrConvertBytesToUnicode(otherContents
+ __CFStrSkipAnyLengthByte(otherString
), (UniChar
*)bufPtr
, __CFStrLength2(otherString
, otherContents
));
3022 bufPtr
+= otherNumByte
;
3025 if (canBeEightbit
) *bufPtr
= 0; // NULL byte;
3027 return canBeEightbit
?
3028 CFStringCreateWithCStringNoCopy(alloc
, buffer
, __CFStringGetEightBitStringEncoding(), alloc
) :
3029 CFStringCreateWithCharactersNoCopy(alloc
, buffer
, numChars
, alloc
);
3033 CFArrayRef
CFStringCreateArrayBySeparatingStrings(CFAllocatorRef alloc
, CFStringRef string
, CFStringRef separatorString
) {
3034 CFArrayRef separatorRanges
;
3035 CFIndex length
= CFStringGetLength(string
);
3036 /* No objc dispatch needed here since CFStringCreateArrayWithFindResults() works with both CFString and NSString */
3037 if (!(separatorRanges
= CFStringCreateArrayWithFindResults(alloc
, string
, separatorString
, CFRangeMake(0, length
), 0))) {
3038 return CFArrayCreate(alloc
, (const void**)&string
, 1, & kCFTypeArrayCallBacks
);
3041 CFIndex count
= CFArrayGetCount(separatorRanges
);
3042 CFIndex startIndex
= 0;
3044 CFMutableArrayRef array
= CFArrayCreateMutable(alloc
, count
+ 2, & kCFTypeArrayCallBacks
);
3045 const CFRange
*currentRange
;
3046 CFStringRef substring
;
3048 for (idx
= 0;idx
< count
;idx
++) {
3049 currentRange
= CFArrayGetValueAtIndex(separatorRanges
, idx
);
3050 numChars
= currentRange
->location
- startIndex
;
3051 substring
= CFStringCreateWithSubstring(alloc
, string
, CFRangeMake(startIndex
, numChars
));
3052 CFArrayAppendValue(array
, substring
);
3053 CFRelease(substring
);
3054 startIndex
= currentRange
->location
+ currentRange
->length
;
3056 substring
= CFStringCreateWithSubstring(alloc
, string
, CFRangeMake(startIndex
, length
- startIndex
));
3057 CFArrayAppendValue(array
, substring
);
3058 CFRelease(substring
);
3060 CFRelease(separatorRanges
);
3066 CFStringRef
CFStringCreateFromExternalRepresentation(CFAllocatorRef alloc
, CFDataRef data
, CFStringEncoding encoding
) {
3067 return CFStringCreateWithBytes(alloc
, CFDataGetBytePtr(data
), CFDataGetLength(data
), encoding
, true);
3071 CFDataRef
CFStringCreateExternalRepresentation(CFAllocatorRef alloc
, CFStringRef string
, CFStringEncoding encoding
, uint8_t lossByte
) {
3073 CFIndex guessedByteLength
;
3078 if (CF_IS_OBJC(__kCFStringTypeID
, string
)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
3079 length
= CFStringGetLength(string
);
3081 __CFAssertIsString(string
);
3082 length
= __CFStrLength(string
);
3083 if (__CFStrIsEightBit(string
) && ((__CFStringGetEightBitStringEncoding() == encoding
) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII
&& __CFStringEncodingIsSupersetOfASCII(encoding
)))) { // Requested encoding is equal to the encoding in string
3084 return CFDataCreate(alloc
, ((char *)__CFStrContents(string
) + __CFStrSkipAnyLengthByte(string
)), __CFStrLength(string
));
3088 if (alloc
== NULL
) alloc
= __CFGetDefaultAllocator();
3090 if (encoding
== kCFStringEncodingUnicode
) {
3091 guessedByteLength
= (length
+ 1) * sizeof(UniChar
);
3092 } else if (((guessedByteLength
= CFStringGetMaximumSizeForEncoding(length
, encoding
)) > length
) && !CF_IS_OBJC(__kCFStringTypeID
, string
)) { // Multi byte encoding
3093 #if defined(__MACH__) || defined(__LINUX__) || defined(__FREEBSD__)
3094 if (__CFStrIsUnicode(string
)) {
3095 guessedByteLength
= CFStringEncodingByteLengthForCharacters(encoding
, kCFStringEncodingPrependBOM
, __CFStrContents(string
), __CFStrLength(string
));
3098 result
= __CFStringEncodeByteStream(string
, 0, length
, true, encoding
, lossByte
, NULL
, 0x7FFFFFFF, &guessedByteLength
);
3099 // if result == length, we always succeed
3100 // otherwise, if result == 0, we fail
3101 // otherwise, if there was a lossByte but still result != length, we fail
3102 if ((result
!= length
) && (!result
|| !lossByte
)) return NULL
;
3103 if (guessedByteLength
== length
&& __CFStrIsEightBit(string
) && __CFStringEncodingIsSupersetOfASCII(encoding
)) { // It's all ASCII !!
3104 return CFDataCreate(alloc
, ((char *)__CFStrContents(string
) + __CFStrSkipAnyLengthByte(string
)), __CFStrLength(string
));
3106 #if defined(__MACH__) || defined(__LINUX__) || defined(__FREEBSD__)
3110 bytes
= CFAllocatorAllocate(alloc
, guessedByteLength
, 0);
3111 if (__CFOASafe
) __CFSetLastAllocationEventName(bytes
, "CFData (store)");
3113 result
= __CFStringEncodeByteStream(string
, 0, length
, true, encoding
, lossByte
, bytes
, guessedByteLength
, &usedLength
);
3115 if ((result
!= length
) && (!result
|| !lossByte
)) { // see comment above about what this means
3116 CFAllocatorDeallocate(alloc
, bytes
);
3120 return CFDataCreateWithBytesNoCopy(alloc
, (char const *)bytes
, usedLength
, alloc
);
3124 CFStringEncoding
CFStringGetSmallestEncoding(CFStringRef str
) {
3126 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID
, CFStringEncoding
, str
, "_smallestEncodingInCFStringEncoding");
3127 __CFAssertIsString(str
);
3129 if (__CFStrIsEightBit(str
)) return __CFStringGetEightBitStringEncoding();
3130 len
= __CFStrLength(str
);
3131 if (__CFStringEncodeByteStream(str
, 0, len
, false, __CFStringGetEightBitStringEncoding(), 0, NULL
, 0x7fffffff, NULL
) == len
) return __CFStringGetEightBitStringEncoding();
3132 if ((__CFStringGetEightBitStringEncoding() != __CFStringGetSystemEncoding()) && (__CFStringEncodeByteStream(str
, 0, len
, false, __CFStringGetSystemEncoding(), 0, NULL
, 0x7fffffff, NULL
) == len
)) return __CFStringGetSystemEncoding();
3133 return kCFStringEncodingUnicode
; /* ??? */
3137 CFStringEncoding
CFStringGetFastestEncoding(CFStringRef str
) {
3138 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID
, CFStringEncoding
, str
, "_fastestEncodingInCFStringEncoding");
3139 __CFAssertIsString(str
);
3140 return __CFStrIsEightBit(str
) ? __CFStringGetEightBitStringEncoding() : kCFStringEncodingUnicode
; /* ??? */
3144 SInt32
CFStringGetIntValue(CFStringRef str
) {
3148 CFStringInlineBuffer buf
;
3149 CFStringInitInlineBuffer(str
, &buf
, CFRangeMake(0, CFStringGetLength(str
)));
3150 success
= __CFStringScanInteger(&buf
, NULL
, &idx
, false, &result
);
3151 return success
? result
: 0;
3155 double CFStringGetDoubleValue(CFStringRef str
) {
3159 CFStringInlineBuffer buf
;
3160 CFStringInitInlineBuffer(str
, &buf
, CFRangeMake(0, CFStringGetLength(str
)));
3161 success
= __CFStringScanDouble(&buf
, NULL
, &idx
, &result
);
3162 return success
? result
: 0.0;
3166 /*** Mutable functions... ***/
3168 void CFStringSetExternalCharactersNoCopy(CFMutableStringRef string
, UniChar
*chars
, CFIndex length
, CFIndex capacity
) {
3169 __CFAssertIsNotNegative(length
);
3170 __CFAssertIsStringAndExternalMutable(string
);
3171 CFAssert4((length
<= capacity
) && ((capacity
== 0) || ((capacity
> 0) && chars
)), __kCFLogAssertion
, "%s(): Invalid args: characters %p length %d capacity %d", __PRETTY_FUNCTION__
, chars
, length
, capacity
);
3172 __CFStrSetContentPtr(string
, chars
);
3173 __CFStrSetExplicitLength(string
, length
);
3174 __CFStrSetCapacity(string
, capacity
* sizeof(UniChar
));
3175 __CFStrSetCapacityProvidedExternally(string
);
3180 void CFStringInsert(CFMutableStringRef str
, CFIndex idx
, CFStringRef insertedStr
) {
3181 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID
, void, str
, "insertString:atIndex:", insertedStr
, idx
);
3182 __CFAssertIsStringAndMutable(str
);
3183 CFAssert3(idx
>= 0 && idx
<= __CFStrLength(str
), __kCFLogAssertion
, "%s(): string index %d out of bounds (length %d)", __PRETTY_FUNCTION__
, idx
, __CFStrLength(str
));
3184 __CFStringReplace(str
, CFRangeMake(idx
, 0), insertedStr
);
3188 void CFStringDelete(CFMutableStringRef str
, CFRange range
) {
3189 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID
, void, str
, "deleteCharactersInRange:", range
);
3190 __CFAssertIsStringAndMutable(str
);
3191 __CFAssertRangeIsInStringBounds(str
, range
.location
, range
.length
);
3192 __CFStringChangeSize(str
, range
, 0, false);
3196 void CFStringReplace(CFMutableStringRef str
, CFRange range
, CFStringRef replacement
) {
3197 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID
, void, str
, "replaceCharactersInRange:withString:", range
, replacement
);
3198 __CFAssertIsStringAndMutable(str
);
3199 __CFAssertRangeIsInStringBounds(str
, range
.location
, range
.length
);
3200 __CFStringReplace(str
, range
, replacement
);
3204 void CFStringReplaceAll(CFMutableStringRef str
, CFStringRef replacement
) {
3205 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID
, void, str
, "setString:", replacement
);
3206 __CFAssertIsStringAndMutable(str
);
3207 __CFStringReplace(str
, CFRangeMake(0, __CFStrLength(str
)), replacement
);
3211 void CFStringAppend(CFMutableStringRef str
, CFStringRef appended
) {
3212 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID
, void, str
, "appendString:", appended
);
3213 __CFAssertIsStringAndMutable(str
);
3214 __CFStringReplace(str
, CFRangeMake(__CFStrLength(str
), 0), appended
);
3218 void CFStringAppendCharacters(CFMutableStringRef str
, const UniChar
*chars
, CFIndex appendedLength
) {
3219 CFIndex strLength
, idx
;
3221 __CFAssertIsNotNegative(appendedLength
);
3223 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID
, void, str
, "appendCharacters:length:", chars
, appendedLength
);
3225 __CFAssertIsStringAndMutable(str
);
3227 strLength
= __CFStrLength(str
);
3228 if (__CFStringGetCompatibility(Bug2967272
) || __CFStrIsUnicode(str
)) {
3229 __CFStringChangeSize(str
, CFRangeMake(strLength
, 0), appendedLength
, true);
3230 memmove((UniChar
*)__CFStrContents(str
) + strLength
, chars
, appendedLength
* sizeof(UniChar
));
3233 bool isASCII
= true;
3234 for (idx
= 0; isASCII
&& idx
< appendedLength
; idx
++) isASCII
= (chars
[idx
] < 0x80);
3235 __CFStringChangeSize(str
, CFRangeMake(strLength
, 0), appendedLength
, !isASCII
);
3237 memmove((UniChar
*)__CFStrContents(str
) + strLength
, chars
, appendedLength
* sizeof(UniChar
));
3239 contents
= (uint8_t *)__CFStrContents(str
) + strLength
+ __CFStrSkipAnyLengthByte(str
);
3240 for (idx
= 0; idx
< appendedLength
; idx
++) contents
[idx
] = (uint8_t)chars
[idx
];
3246 static void __CFStringAppendBytes(CFMutableStringRef str
, const char *cStr
, CFIndex appendedLength
, CFStringEncoding encoding
) {
3247 Boolean appendedIsUnicode
= false;
3248 Boolean freeCStrWhenDone
= false;
3249 Boolean demoteAppendedUnicode
= false;
3250 CFVarWidthCharBuffer vBuf
;
3252 __CFAssertIsNotNegative(appendedLength
);
3254 if (encoding
== kCFStringEncodingASCII
|| encoding
== __CFStringGetEightBitStringEncoding()) {
3255 // appendedLength now denotes length in UniChars
3256 } else if (encoding
== kCFStringEncodingUnicode
) {
3257 UniChar
*chars
= (UniChar
*)cStr
;
3258 CFIndex idx
, length
= appendedLength
/ sizeof(UniChar
);
3259 bool isASCII
= true;
3260 for (idx
= 0; isASCII
&& idx
< length
; idx
++) isASCII
= (chars
[idx
] < 0x80);
3262 appendedIsUnicode
= true;
3264 demoteAppendedUnicode
= true;
3266 appendedLength
= length
;
3268 Boolean usingPassedInMemory
= false;
3270 vBuf
.allocator
= __CFGetDefaultAllocator(); // We don't want to use client's allocator for temp stuff
3271 vBuf
.chars
.unicode
= NULL
; // This will cause the decode function to allocate memory if necessary
3273 if (!__CFStringDecodeByteStream3(cStr
, appendedLength
, encoding
, __CFStrIsUnicode(str
), &vBuf
, &usingPassedInMemory
, 0)) {
3274 CFAssert1(0, __kCFLogAssertion
, "Supplied bytes could not be converted specified encoding %d", encoding
);
3278 // If not ASCII, appendedLength now denotes length in UniChars
3279 appendedLength
= vBuf
.numChars
;
3280 appendedIsUnicode
= !vBuf
.isASCII
;
3281 cStr
= vBuf
.chars
.ascii
;
3282 freeCStrWhenDone
= !usingPassedInMemory
&& vBuf
.shouldFreeChars
;
3285 if (CF_IS_OBJC(__kCFStringTypeID
, str
)) {
3286 if (!appendedIsUnicode
&& !demoteAppendedUnicode
) {
3287 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID
, void, str
, "_cfAppendCString:length:", cStr
, appendedLength
);
3289 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID
, void, str
, "appendCharacters:length:", cStr
, appendedLength
);
3293 __CFAssertIsStringAndMutable(str
);
3294 strLength
= __CFStrLength(str
);
3296 __CFStringChangeSize(str
, CFRangeMake(strLength
, 0), appendedLength
, appendedIsUnicode
|| __CFStrIsUnicode(str
));
3298 if (__CFStrIsUnicode(str
)) {
3299 UniChar
*contents
= (UniChar
*)__CFStrContents(str
);
3300 if (appendedIsUnicode
) {
3301 memmove(contents
+ strLength
, cStr
, appendedLength
* sizeof(UniChar
));
3303 __CFStrConvertBytesToUnicode(cStr
, contents
+ strLength
, appendedLength
);
3306 if (demoteAppendedUnicode
) {
3307 UniChar
*chars
= (UniChar
*)cStr
;
3309 uint8_t *contents
= (uint8_t *)__CFStrContents(str
) + strLength
+ __CFStrSkipAnyLengthByte(str
);
3310 for (idx
= 0; idx
< appendedLength
; idx
++) contents
[idx
] = (uint8_t)chars
[idx
];
3312 uint8_t *contents
= (uint8_t *)__CFStrContents(str
);
3313 memmove(contents
+ strLength
+ __CFStrSkipAnyLengthByte(str
), cStr
, appendedLength
);
3318 if (freeCStrWhenDone
) CFAllocatorDeallocate(__CFGetDefaultAllocator(), (void *)cStr
);
3321 void CFStringAppendPascalString(CFMutableStringRef str
, ConstStringPtr pStr
, CFStringEncoding encoding
) {
3322 __CFStringAppendBytes(str
, pStr
+ 1, (CFIndex
)*pStr
, encoding
);
3325 void CFStringAppendCString(CFMutableStringRef str
, const char *cStr
, CFStringEncoding encoding
) {
3326 __CFStringAppendBytes(str
, cStr
, strlen(cStr
), encoding
);
3330 void CFStringAppendFormat(CFMutableStringRef str
, CFDictionaryRef formatOptions
, CFStringRef format
, ...) {
3333 va_start(argList
, format
);
3334 CFStringAppendFormatAndArguments(str
, formatOptions
, format
, argList
);
3339 CFIndex
CFStringFindAndReplace(CFMutableStringRef string
, CFStringRef stringToFind
, CFStringRef replacementString
, CFRange rangeToSearch
, CFOptionFlags compareOptions
) {
3341 Boolean backwards
= compareOptions
& kCFCompareBackwards
;
3342 UInt32 endIndex
= rangeToSearch
.location
+ rangeToSearch
.length
;
3343 #define MAX_RANGES_ON_STACK (1000 / sizeof(CFRange))
3344 CFRange rangeBuffer
[MAX_RANGES_ON_STACK
]; // Used to avoid allocating memory
3345 CFRange
*ranges
= rangeBuffer
;
3346 CFIndex foundCount
= 0;
3347 CFIndex capacity
= MAX_RANGES_ON_STACK
;
3349 __CFAssertIsStringAndMutable(string
);
3350 __CFAssertRangeIsInStringBounds(string
, rangeToSearch
.location
, rangeToSearch
.length
);
3352 // Note: This code is very similar to the one in CFStringCreateArrayWithFindResults().
3353 while ((rangeToSearch
.length
> 0) && CFStringFindWithOptions(string
, stringToFind
, rangeToSearch
, compareOptions
, &foundRange
)) {
3354 // Determine the next range
3356 rangeToSearch
.length
= foundRange
.location
- rangeToSearch
.location
;
3358 rangeToSearch
.location
= foundRange
.location
+ foundRange
.length
;
3359 rangeToSearch
.length
= endIndex
- rangeToSearch
.location
;
3362 // If necessary, grow the array
3363 if (foundCount
>= capacity
) {
3364 bool firstAlloc
= (ranges
== rangeBuffer
) ? true : false;
3365 capacity
= (capacity
+ 4) * 2;
3366 // Note that reallocate with NULL previous pointer is same as allocate
3367 ranges
= CFAllocatorReallocate(NULL
, firstAlloc
? NULL
: ranges
, capacity
* sizeof(CFRange
), 0);
3368 if (firstAlloc
) memmove(ranges
, rangeBuffer
, MAX_RANGES_ON_STACK
* sizeof(CFRange
));
3370 ranges
[foundCount
] = foundRange
;
3374 if (foundCount
> 0) {
3375 if (backwards
) { // Reorder the ranges to be incrementing (better to do this here, then to check other places)
3377 int tail
= foundCount
- 1;
3378 while (head
< tail
) {
3379 CFRange temp
= ranges
[head
];
3380 ranges
[head
] = ranges
[tail
];
3381 ranges
[tail
] = temp
;
3386 __CFStringReplaceMultiple(string
, ranges
, foundCount
, replacementString
);
3387 if (ranges
!= rangeBuffer
) CFAllocatorDeallocate(NULL
, ranges
);
3394 // This function is here for NSString purposes
3395 // It allows checking for mutability before mutating; this allows NSString to catch invalid mutations
3397 int __CFStringCheckAndReplace(CFMutableStringRef str
, CFRange range
, CFStringRef replacement
) {
3398 if (!__CFStrIsMutable(str
)) return _CFStringErrNotMutable
; // These three ifs are always here, for NSString usage
3399 if (!replacement
&& __CFStringNoteErrors()) return _CFStringErrNilArg
;
3400 // We use unsigneds as that is what NSRanges do
3401 if ((unsigned)range
.location
+ (unsigned)range
.length
> (unsigned)__CFStrLength(str
) && __CFStringNoteErrors()) return _CFStringErrBounds
;
3402 __CFAssertIsStringAndMutable(str
);
3403 __CFAssertRangeIsInStringBounds(str
, range
.location
, range
.length
);
3404 __CFStringReplace(str
, range
, replacement
);
3405 return _CFStringErrNone
;
3408 // This function determines whether errors which would cause string exceptions should
3409 // be ignored or not
3411 Boolean
__CFStringNoteErrors(void) {
3412 return _CFExecutableLinkedOnOrAfter(CFSystemVersionJaguar
) ? true : false;
3417 void CFStringPad(CFMutableStringRef string
, CFStringRef padString
, CFIndex length
, CFIndex indexIntoPad
) {
3418 CFIndex originalLength
;
3420 __CFAssertIsNotNegative(length
);
3421 __CFAssertIsNotNegative(indexIntoPad
);
3423 CF_OBJC_FUNCDISPATCH3(__kCFStringTypeID
, void, string
, "_cfPad:length:padIndex:", padString
, length
, indexIntoPad
);
3425 __CFAssertIsStringAndMutable(string
);
3427 originalLength
= __CFStrLength(string
);
3428 if (length
< originalLength
) {
3429 __CFStringChangeSize(string
, CFRangeMake(length
, originalLength
- length
), 0, false);
3430 } else if (originalLength
< length
) {
3434 CFIndex padStringLength
;
3436 CFIndex padRemaining
= length
- originalLength
;
3438 if (CF_IS_OBJC(__kCFStringTypeID
, padString
)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
3439 padStringLength
= CFStringGetLength(padString
);
3440 isUnicode
= true; /* !!! Bad for now */
3442 __CFAssertIsString(padString
);
3443 padStringLength
= __CFStrLength(padString
);
3444 isUnicode
= __CFStrIsUnicode(string
) || __CFStrIsUnicode(padString
);
3447 charSize
= isUnicode
? sizeof(UniChar
) : sizeof(uint8_t);
3449 __CFStringChangeSize(string
, CFRangeMake(originalLength
, 0), padRemaining
, isUnicode
);
3451 contents
= (uint8_t*)__CFStrContents(string
) + charSize
* originalLength
+ __CFStrSkipAnyLengthByte(string
);
3452 padLength
= padStringLength
- indexIntoPad
;
3453 padLength
= padRemaining
< padLength
? padRemaining
: padLength
;
3455 while (padRemaining
> 0) {
3457 CFStringGetCharacters(padString
, CFRangeMake(indexIntoPad
, padLength
), (UniChar
*)contents
);
3459 CFStringGetBytes(padString
, CFRangeMake(indexIntoPad
, padLength
), __CFStringGetEightBitStringEncoding(), 0, false, contents
, padRemaining
* charSize
, NULL
);
3461 contents
+= padLength
* charSize
;
3462 padRemaining
-= padLength
;
3464 padLength
= padRemaining
< padLength
? padRemaining
: padStringLength
;
3469 void CFStringTrim(CFMutableStringRef string
, CFStringRef trimString
) {
3471 CFIndex newStartIndex
;
3474 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID
, void, string
, "_cfTrim:", trimString
);
3476 __CFAssertIsStringAndMutable(string
);
3477 __CFAssertIsString(trimString
);
3480 length
= __CFStrLength(string
);
3482 while (CFStringFindWithOptions(string
, trimString
, CFRangeMake(newStartIndex
, length
- newStartIndex
), kCFCompareAnchored
, &range
)) {
3483 newStartIndex
= range
.location
+ range
.length
;
3486 if (newStartIndex
< length
) {
3487 CFIndex charSize
= __CFStrIsUnicode(string
) ? sizeof(UniChar
) : sizeof(uint8_t);
3488 uint8_t *contents
= (uint8_t*)__CFStrContents(string
) + __CFStrSkipAnyLengthByte(string
);
3490 length
-= newStartIndex
;
3491 if (__CFStrLength(trimString
) < length
) {
3492 while (CFStringFindWithOptions(string
, trimString
, CFRangeMake(newStartIndex
, length
), kCFCompareAnchored
|kCFCompareBackwards
, &range
)) {
3493 length
= range
.location
- newStartIndex
;
3496 memmove(contents
, contents
+ newStartIndex
* charSize
, length
* charSize
);
3497 __CFStringChangeSize(string
, CFRangeMake(length
, __CFStrLength(string
) - length
), 0, false);
3498 } else { // Only trimString in string, trim all
3499 __CFStringChangeSize(string
, CFRangeMake(0, length
), 0, false);
3503 void CFStringTrimWhitespace(CFMutableStringRef string
) {
3504 CFIndex newStartIndex
;
3506 CFStringInlineBuffer buffer
;
3508 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID
, void, string
, "_cfTrimWS");
3510 __CFAssertIsStringAndMutable(string
);
3513 length
= __CFStrLength(string
);
3515 CFStringInitInlineBuffer(string
, &buffer
, CFRangeMake(0, length
));
3516 CFIndex buffer_idx
= 0;
3518 while (buffer_idx
< length
&& CFUniCharIsMemberOf(__CFStringGetCharacterFromInlineBufferQuick(&buffer
, buffer_idx
), kCFUniCharWhitespaceAndNewlineCharacterSet
))
3520 newStartIndex
= buffer_idx
;
3522 if (newStartIndex
< length
) {
3523 uint8_t *contents
= (uint8_t*)__CFStrContents(string
) + __CFStrSkipAnyLengthByte(string
);
3524 CFIndex charSize
= (__CFStrIsUnicode(string
) ? sizeof(UniChar
) : sizeof(uint8_t));
3526 buffer_idx
= length
- 1;
3527 while (0 <= buffer_idx
&& CFUniCharIsMemberOf(__CFStringGetCharacterFromInlineBufferQuick(&buffer
, buffer_idx
), kCFUniCharWhitespaceAndNewlineCharacterSet
))
3529 length
= buffer_idx
- newStartIndex
+ 1;
3531 memmove(contents
, contents
+ newStartIndex
* charSize
, length
* charSize
);
3532 __CFStringChangeSize(string
, CFRangeMake(length
, __CFStrLength(string
) - length
), 0, false);
3533 } else { // Whitespace only string
3534 __CFStringChangeSize(string
, CFRangeMake(0, length
), 0, false);
3538 void CFStringLowercase(CFMutableStringRef string
, CFLocaleRef locale
) {
3539 CFIndex currentIndex
= 0;
3541 const char *langCode
;
3542 Boolean isEightBit
= __CFStrIsEightBit(string
);
3544 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID
, void, string
, "_cfLowercase:", locale
);
3546 __CFAssertIsStringAndMutable(string
);
3548 length
= __CFStrLength(string
);
3552 if (!langCode
&& isEightBit
) {
3553 uint8_t *contents
= (uint8_t*)__CFStrContents(string
) + __CFStrSkipAnyLengthByte(string
);
3554 for (;currentIndex
< length
;currentIndex
++) {
3555 if (contents
[currentIndex
] >= 'A' && contents
[currentIndex
] <= 'Z') {
3556 contents
[currentIndex
] += 'a' - 'A';
3557 } else if (contents
[currentIndex
] > 127) {
3563 if (currentIndex
< length
) {
3565 UniChar mappedCharacters
[MAX_CASE_MAPPING_BUF
];
3566 CFIndex mappedLength
;
3567 UTF32Char currentChar
;
3570 if (isEightBit
) __CFStringChangeSize(string
, CFRangeMake(0, 0), 0, true);
3572 contents
= (UniChar
*)__CFStrContents(string
);
3574 for (;currentIndex
< length
;currentIndex
++) {
3576 if (CFUniCharIsSurrogateHighCharacter(contents
[currentIndex
]) && (currentIndex
+ 1 < length
) && CFUniCharIsSurrogateLowCharacter(contents
[currentIndex
+ 1])) {
3577 currentChar
= CFUniCharGetLongCharacterForSurrogatePair(contents
[currentIndex
], contents
[currentIndex
+ 1]);
3579 currentChar
= contents
[currentIndex
];
3581 flags
= ((langCode
|| (currentChar
== 0x03A3)) ? CFUniCharGetConditionalCaseMappingFlags(currentChar
, contents
, currentIndex
, length
, kCFUniCharToLowercase
, langCode
, flags
) : 0);
3583 mappedLength
= CFUniCharMapCaseTo(currentChar
, mappedCharacters
, MAX_CASE_MAPPING_BUF
, kCFUniCharToLowercase
, flags
, langCode
);
3584 if (mappedLength
> 0) contents
[currentIndex
] = *mappedCharacters
;
3586 if (currentChar
> 0xFFFF) { // Non-BMP char
3587 switch (mappedLength
) {
3589 __CFStringChangeSize(string
, CFRangeMake(currentIndex
, 2), 0, true);
3590 contents
= (UniChar
*)__CFStrContents(string
);
3595 __CFStringChangeSize(string
, CFRangeMake(currentIndex
+ 1, 1), 0, true);
3596 contents
= (UniChar
*)__CFStrContents(string
);
3601 contents
[++currentIndex
] = mappedCharacters
[1];
3605 --mappedLength
; // Skip the current char
3606 __CFStringChangeSize(string
, CFRangeMake(currentIndex
+ 1, 0), mappedLength
- 1, true);
3607 contents
= (UniChar
*)__CFStrContents(string
);
3608 memmove(contents
+ currentIndex
+ 1, mappedCharacters
+ 1, mappedLength
* sizeof(UniChar
));
3609 length
+= (mappedLength
- 1);
3610 currentIndex
+= mappedLength
;
3613 } else if (mappedLength
== 0) {
3614 __CFStringChangeSize(string
, CFRangeMake(currentIndex
, 1), 0, true);
3615 contents
= (UniChar
*)__CFStrContents(string
);
3617 } else if (mappedLength
> 1) {
3618 --mappedLength
; // Skip the current char
3619 __CFStringChangeSize(string
, CFRangeMake(currentIndex
+ 1, 0), mappedLength
, true);
3620 contents
= (UniChar
*)__CFStrContents(string
);
3621 memmove(contents
+ currentIndex
+ 1, mappedCharacters
+ 1, mappedLength
* sizeof(UniChar
));
3622 length
+= mappedLength
;
3623 currentIndex
+= mappedLength
;
3629 void CFStringUppercase(CFMutableStringRef string
, CFLocaleRef locale
) {
3630 CFIndex currentIndex
= 0;
3632 const char *langCode
;
3633 Boolean isEightBit
= __CFStrIsEightBit(string
);
3635 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID
, void, string
, "_cfUppercase:", locale
);
3637 __CFAssertIsStringAndMutable(string
);
3639 length
= __CFStrLength(string
);
3643 if (!langCode
&& isEightBit
) {
3644 uint8_t *contents
= (uint8_t*)__CFStrContents(string
) + __CFStrSkipAnyLengthByte(string
);
3645 for (;currentIndex
< length
;currentIndex
++) {
3646 if (contents
[currentIndex
] >= 'a' && contents
[currentIndex
] <= 'z') {
3647 contents
[currentIndex
] -= 'a' - 'A';
3648 } else if (contents
[currentIndex
] > 127) {
3654 if (currentIndex
< length
) {
3656 UniChar mappedCharacters
[MAX_CASE_MAPPING_BUF
];
3657 CFIndex mappedLength
;
3658 UTF32Char currentChar
;
3661 if (isEightBit
) __CFStringChangeSize(string
, CFRangeMake(0, 0), 0, true);
3663 contents
= (UniChar
*)__CFStrContents(string
);
3665 for (;currentIndex
< length
;currentIndex
++) {
3666 if (CFUniCharIsSurrogateHighCharacter(contents
[currentIndex
]) && (currentIndex
+ 1 < length
) && CFUniCharIsSurrogateLowCharacter(contents
[currentIndex
+ 1])) {
3667 currentChar
= CFUniCharGetLongCharacterForSurrogatePair(contents
[currentIndex
], contents
[currentIndex
+ 1]);
3669 currentChar
= contents
[currentIndex
];
3672 flags
= (langCode
? CFUniCharGetConditionalCaseMappingFlags(currentChar
, contents
, currentIndex
, length
, kCFUniCharToUppercase
, langCode
, flags
) : 0);
3674 mappedLength
= CFUniCharMapCaseTo(currentChar
, mappedCharacters
, MAX_CASE_MAPPING_BUF
, kCFUniCharToUppercase
, flags
, langCode
);
3675 if (mappedLength
> 0) contents
[currentIndex
] = *mappedCharacters
;
3677 if (currentChar
> 0xFFFF) { // Non-BMP char
3678 switch (mappedLength
) {
3680 __CFStringChangeSize(string
, CFRangeMake(currentIndex
, 2), 0, true);
3681 contents
= (UniChar
*)__CFStrContents(string
);
3686 __CFStringChangeSize(string
, CFRangeMake(currentIndex
+ 1, 1), 0, true);
3687 contents
= (UniChar
*)__CFStrContents(string
);
3692 contents
[++currentIndex
] = mappedCharacters
[1];
3696 --mappedLength
; // Skip the current char
3697 __CFStringChangeSize(string
, CFRangeMake(currentIndex
+ 1, 0), mappedLength
- 1, true);
3698 contents
= (UniChar
*)__CFStrContents(string
);
3699 memmove(contents
+ currentIndex
+ 1, mappedCharacters
+ 1, mappedLength
* sizeof(UniChar
));
3700 length
+= (mappedLength
- 1);
3701 currentIndex
+= mappedLength
;
3704 } else if (mappedLength
== 0) {
3705 __CFStringChangeSize(string
, CFRangeMake(currentIndex
, 1), 0, true);
3706 contents
= (UniChar
*)__CFStrContents(string
);
3708 } else if (mappedLength
> 1) {
3709 --mappedLength
; // Skip the current char
3710 __CFStringChangeSize(string
, CFRangeMake(currentIndex
+ 1, 0), mappedLength
, true);
3711 contents
= (UniChar
*)__CFStrContents(string
);
3712 memmove(contents
+ currentIndex
+ 1, mappedCharacters
+ 1, mappedLength
* sizeof(UniChar
));
3713 length
+= mappedLength
;
3714 currentIndex
+= mappedLength
;
3721 void CFStringCapitalize(CFMutableStringRef string
, CFLocaleRef locale
) {
3722 CFIndex currentIndex
= 0;
3724 const char *langCode
;
3725 Boolean isEightBit
= __CFStrIsEightBit(string
);
3726 Boolean isLastCased
= false;
3727 static const uint8_t *caseIgnorableForBMP
= NULL
;
3729 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID
, void, string
, "_cfCapitalize:", locale
);
3731 __CFAssertIsStringAndMutable(string
);
3733 length
= __CFStrLength(string
);
3735 if (NULL
== caseIgnorableForBMP
) caseIgnorableForBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharCaseIgnorableCharacterSet
, 0);
3739 if (!langCode
&& isEightBit
) {
3740 uint8_t *contents
= (uint8_t*)__CFStrContents(string
) + __CFStrSkipAnyLengthByte(string
);
3741 for (;currentIndex
< length
;currentIndex
++) {
3742 if (contents
[currentIndex
] > 127) {
3744 } else if (contents
[currentIndex
] >= 'A' && contents
[currentIndex
] <= 'Z') {
3745 contents
[currentIndex
] += (isLastCased
? 'a' - 'A' : 0);
3747 } else if (contents
[currentIndex
] >= 'a' && contents
[currentIndex
] <= 'z') {
3748 contents
[currentIndex
] -= (!isLastCased
? 'a' - 'A' : 0);
3750 } else if (!CFUniCharIsMemberOfBitmap(contents
[currentIndex
], caseIgnorableForBMP
)) {
3751 isLastCased
= false;
3756 if (currentIndex
< length
) {
3758 UniChar mappedCharacters
[MAX_CASE_MAPPING_BUF
];
3759 CFIndex mappedLength
;
3760 UTF32Char currentChar
;
3763 if (isEightBit
) __CFStringChangeSize(string
, CFRangeMake(0, 0), 0, true);
3765 contents
= (UniChar
*)__CFStrContents(string
);
3767 for (;currentIndex
< length
;currentIndex
++) {
3768 if (CFUniCharIsSurrogateHighCharacter(contents
[currentIndex
]) && (currentIndex
+ 1 < length
) && CFUniCharIsSurrogateLowCharacter(contents
[currentIndex
+ 1])) {
3769 currentChar
= CFUniCharGetLongCharacterForSurrogatePair(contents
[currentIndex
], contents
[currentIndex
+ 1]);
3771 currentChar
= contents
[currentIndex
];
3773 flags
= ((langCode
|| ((currentChar
== 0x03A3) && isLastCased
)) ? CFUniCharGetConditionalCaseMappingFlags(currentChar
, contents
, currentIndex
, length
, (isLastCased
? kCFUniCharToLowercase
: kCFUniCharToTitlecase
), langCode
, flags
) : 0);
3775 mappedLength
= CFUniCharMapCaseTo(currentChar
, mappedCharacters
, MAX_CASE_MAPPING_BUF
, (isLastCased
? kCFUniCharToLowercase
: kCFUniCharToTitlecase
), flags
, langCode
);
3776 if (mappedLength
> 0) contents
[currentIndex
] = *mappedCharacters
;
3778 if (currentChar
> 0xFFFF) { // Non-BMP char
3779 switch (mappedLength
) {
3781 __CFStringChangeSize(string
, CFRangeMake(currentIndex
, 2), 0, true);
3782 contents
= (UniChar
*)__CFStrContents(string
);
3787 __CFStringChangeSize(string
, CFRangeMake(currentIndex
+ 1, 1), 0, true);
3788 contents
= (UniChar
*)__CFStrContents(string
);
3793 contents
[++currentIndex
] = mappedCharacters
[1];
3797 --mappedLength
; // Skip the current char
3798 __CFStringChangeSize(string
, CFRangeMake(currentIndex
+ 1, 0), mappedLength
- 1, true);
3799 contents
= (UniChar
*)__CFStrContents(string
);
3800 memmove(contents
+ currentIndex
+ 1, mappedCharacters
+ 1, mappedLength
* sizeof(UniChar
));
3801 length
+= (mappedLength
- 1);
3802 currentIndex
+= mappedLength
;
3805 } else if (mappedLength
== 0) {
3806 __CFStringChangeSize(string
, CFRangeMake(currentIndex
, 1), 0, true);
3807 contents
= (UniChar
*)__CFStrContents(string
);
3809 } else if (mappedLength
> 1) {
3810 --mappedLength
; // Skip the current char
3811 __CFStringChangeSize(string
, CFRangeMake(currentIndex
+ 1, 0), mappedLength
, true);
3812 contents
= (UniChar
*)__CFStrContents(string
);
3813 memmove(contents
+ currentIndex
+ 1, mappedCharacters
+ 1, mappedLength
* sizeof(UniChar
));
3814 length
+= mappedLength
;
3815 currentIndex
+= mappedLength
;
3818 if (!((currentChar
> 0xFFFF) ? CFUniCharIsMemberOf(currentChar
, kCFUniCharCaseIgnorableCharacterSet
) : CFUniCharIsMemberOfBitmap(currentChar
, caseIgnorableForBMP
))) { // We have non-caseignorable here
3819 isLastCased
= ((CFUniCharIsMemberOf(currentChar
, kCFUniCharUppercaseLetterCharacterSet
) || CFUniCharIsMemberOf(currentChar
, kCFUniCharLowercaseLetterCharacterSet
)) ? true : false);
3825 #define MAX_DECOMP_BUF 64
3827 #define HANGUL_SBASE 0xAC00
3828 #define HANGUL_LBASE 0x1100
3829 #define HANGUL_VBASE 0x1161
3830 #define HANGUL_TBASE 0x11A7
3831 #define HANGUL_SCOUNT 11172
3832 #define HANGUL_LCOUNT 19
3833 #define HANGUL_VCOUNT 21
3834 #define HANGUL_TCOUNT 28
3835 #define HANGUL_NCOUNT (HANGUL_VCOUNT * HANGUL_TCOUNT)
3837 CF_INLINE
uint32_t __CFGetUTF16Length(const UTF32Char
*characters
, uint32_t utf32Length
) {
3838 const UTF32Char
*limit
= characters
+ utf32Length
;
3839 uint32_t length
= 0;
3841 while (characters
< limit
) length
+= (*(characters
++) > 0xFFFF ? 2 : 1);
3846 CF_INLINE
void __CFFillInUTF16(const UTF32Char
*characters
, UTF16Char
*dst
, uint32_t utf32Length
) {
3847 const UTF32Char
*limit
= characters
+ utf32Length
;
3848 UTF32Char currentChar
;
3850 while (characters
< limit
) {
3851 currentChar
= *(characters
++);
3852 if (currentChar
> 0xFFFF) {
3853 currentChar
-= 0x10000;
3854 *(dst
++) = (UTF16Char
)((currentChar
>> 10) + 0xD800UL
);
3855 *(dst
++) = (UTF16Char
)((currentChar
& 0x3FF) + 0xDC00UL
);
3857 *(dst
++) = currentChar
;
3862 void CFStringNormalize(CFMutableStringRef string
, CFStringNormalizationForm theForm
) {
3863 CFIndex currentIndex
= 0;
3865 bool needToReorder
= true;
3867 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID
, void, string
, "_cfNormalize:", theForm
);
3869 __CFAssertIsStringAndMutable(string
);
3871 length
= __CFStrLength(string
);
3873 if (__CFStrIsEightBit(string
)) {
3876 if (theForm
== kCFStringNormalizationFormC
) return; // 8bit form has no decomposition
3878 contents
= (uint8_t*)__CFStrContents(string
) + __CFStrSkipAnyLengthByte(string
);
3880 for (;currentIndex
< length
;currentIndex
++) {
3881 if (contents
[currentIndex
] > 127) {
3882 __CFStringChangeSize(string
, CFRangeMake(0, 0), 0, true); // need to do harm way
3883 needToReorder
= false;
3889 if (currentIndex
< length
) {
3890 UTF16Char
*limit
= (UTF16Char
*)__CFStrContents(string
) + length
;
3891 UTF16Char
*contents
= (UTF16Char
*)__CFStrContents(string
) + currentIndex
;
3892 UTF32Char buffer
[MAX_DECOMP_BUF
];
3893 UTF32Char
*mappedCharacters
= buffer
;
3894 CFIndex allocatedLength
= MAX_DECOMP_BUF
;
3895 CFIndex mappedLength
;
3896 CFIndex currentLength
;
3897 UTF32Char currentChar
;
3899 while (contents
< limit
) {
3900 if (CFUniCharIsSurrogateHighCharacter(*contents
) && (contents
+ 1 < limit
) && CFUniCharIsSurrogateLowCharacter(*(contents
+ 1))) {
3901 currentChar
= CFUniCharGetLongCharacterForSurrogatePair(*contents
, *(contents
+ 1));
3905 currentChar
= *(contents
++);
3911 if (CFUniCharIsMemberOf(currentChar
, kCFUniCharCanonicalDecomposableCharacterSet
) && !CFUniCharIsMemberOf(currentChar
, kCFUniCharNonBaseCharacterSet
)) {
3912 if ((theForm
& kCFStringNormalizationFormC
) == 0 || currentChar
< HANGUL_SBASE
|| currentChar
> (HANGUL_SBASE
+ HANGUL_SCOUNT
)) { // We don't have to decompose Hangul Syllables if we're precomposing again
3913 mappedLength
= CFUniCharDecomposeCharacter(currentChar
, mappedCharacters
, MAX_DECOMP_BUF
);
3917 if ((needToReorder
|| (theForm
& kCFStringNormalizationFormC
)) && ((contents
< limit
) || (mappedLength
== 0))) {
3918 if (mappedLength
> 0) {
3919 if (CFUniCharIsSurrogateHighCharacter(*contents
) && (contents
+ 1 < limit
) && CFUniCharIsSurrogateLowCharacter(*(contents
+ 1))) {
3920 currentChar
= CFUniCharGetLongCharacterForSurrogatePair(*contents
, *(contents
+ 1));
3922 currentChar
= *contents
;
3926 if (CFUniCharIsMemberOf(currentChar
, kCFUniCharNonBaseCharacterSet
)) {
3927 uint32_t decompLength
;
3929 if (mappedLength
== 0) {
3930 contents
-= (currentChar
& 0xFFFF0000 ? 2 : 1);
3931 if (currentIndex
> 0) {
3932 if (CFUniCharIsSurrogateLowCharacter(*(contents
- 1)) && (currentIndex
> 1) && CFUniCharIsSurrogateHighCharacter(*(contents
- 2))) {
3933 *mappedCharacters
= CFUniCharGetLongCharacterForSurrogatePair(*(contents
- 2), *(contents
- 1));
3937 *mappedCharacters
= *(contents
- 1);
3944 currentLength
+= (currentChar
& 0xFFFF0000 ? 2 : 1);
3946 contents
+= (currentChar
& 0xFFFF0000 ? 2 : 1);
3948 if (CFUniCharIsMemberOf(currentChar
, kCFUniCharDecomposableCharacterSet
)) { // Vietnamese accent, etc.
3949 decompLength
= CFUniCharDecomposeCharacter(currentChar
, mappedCharacters
+ mappedLength
, MAX_DECOMP_BUF
- mappedLength
);
3950 mappedLength
+= decompLength
;
3952 mappedCharacters
[mappedLength
++] = currentChar
;
3955 while (contents
< limit
) {
3956 if (CFUniCharIsSurrogateHighCharacter(*contents
) && (contents
+ 1 < limit
) && CFUniCharIsSurrogateLowCharacter(*(contents
+ 1))) {
3957 currentChar
= CFUniCharGetLongCharacterForSurrogatePair(*contents
, *(contents
+ 1));
3959 currentChar
= *contents
;
3961 if (!CFUniCharIsMemberOf(currentChar
, kCFUniCharNonBaseCharacterSet
)) break;
3962 if (currentChar
& 0xFFFF0000) {
3969 if (mappedLength
== allocatedLength
) {
3970 allocatedLength
+= MAX_DECOMP_BUF
;
3971 if (mappedCharacters
== buffer
) {
3972 mappedCharacters
= (UTF32Char
*)CFAllocatorAllocate(NULL
, allocatedLength
* sizeof(UTF32Char
), 0);
3973 memmove(mappedCharacters
, buffer
, MAX_DECOMP_BUF
* sizeof(UTF32Char
));
3975 mappedCharacters
= (UTF32Char
*)CFAllocatorReallocate(NULL
, mappedCharacters
, allocatedLength
* sizeof(UTF32Char
), 0);
3978 if (CFUniCharIsMemberOf(currentChar
, kCFUniCharDecomposableCharacterSet
)) { // Vietnamese accent, etc.
3979 decompLength
= CFUniCharDecomposeCharacter(currentChar
, mappedCharacters
+ mappedLength
, MAX_DECOMP_BUF
- mappedLength
);
3980 mappedLength
+= decompLength
;
3982 mappedCharacters
[mappedLength
++] = currentChar
;
3986 if (needToReorder
&& mappedLength
> 1) CFUniCharPrioritySort(mappedCharacters
, mappedLength
);
3989 if (theForm
& kCFStringNormalizationFormKD
) {
3990 CFIndex newLength
= 0;
3992 if (mappedLength
== 0 && CFUniCharIsMemberOf(currentChar
, kCFUniCharCompatibilityDecomposableCharacterSet
)) {
3993 mappedCharacters
[mappedLength
++] = currentChar
;
3995 while (newLength
< mappedLength
) {
3996 newLength
= CFUniCharCompatibilityDecompose(mappedCharacters
, mappedLength
, allocatedLength
);
3997 if (newLength
== 0) {
3998 allocatedLength
+= MAX_DECOMP_BUF
;
3999 if (mappedCharacters
== buffer
) {
4000 mappedCharacters
= (UTF32Char
*)CFAllocatorAllocate(NULL
, allocatedLength
* sizeof(UTF32Char
), 0);
4001 memmove(mappedCharacters
, buffer
, MAX_DECOMP_BUF
* sizeof(UTF32Char
));
4003 mappedCharacters
= (UTF32Char
*)CFAllocatorReallocate(NULL
, mappedCharacters
, allocatedLength
* sizeof(UTF32Char
), 0);
4007 mappedLength
= newLength
;
4010 if (theForm
& kCFStringNormalizationFormC
) {
4011 if (mappedLength
> 1) {
4012 CFIndex consumedLength
= 1;
4014 UTF32Char
*currentBase
= mappedCharacters
;
4015 uint8_t currentClass
, lastClass
= 0;
4016 const uint8_t *bmpClassTable
= CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty
, 0);
4017 bool didCombine
= false;
4019 currentChar
= *mappedCharacters
;
4021 while (consumedLength
< mappedLength
) {
4022 nextChar
= mappedCharacters
[consumedLength
];
4023 currentClass
= (nextChar
& 0xFFFF0000 ? CFUniCharGetUnicodeProperty(nextChar
, kCFUniCharCombiningProperty
) : CFUniCharGetCombiningPropertyForCharacter(nextChar
, bmpClassTable
));
4025 if (theForm
& kCFStringNormalizationFormKD
) {
4026 if ((currentChar
>= HANGUL_LBASE
) && (currentChar
< (HANGUL_LBASE
+ 0xFF))) {
4027 SInt8 lIndex
= currentChar
- HANGUL_LBASE
;
4029 if ((0 <= lIndex
) && (lIndex
<= HANGUL_LCOUNT
)) {
4030 SInt16 vIndex
= nextChar
- HANGUL_VBASE
;
4032 if ((vIndex
>= 0) && (vIndex
<= HANGUL_VCOUNT
)) {
4034 CFIndex usedLength
= mappedLength
;
4036 mappedCharacters
[consumedLength
++] = 0xFFFD;
4038 if (consumedLength
< mappedLength
) {
4039 tIndex
= mappedCharacters
[consumedLength
] - HANGUL_TBASE
;
4040 if ((tIndex
< 0) || (tIndex
> HANGUL_TCOUNT
)) {
4043 mappedCharacters
[consumedLength
++] = 0xFFFD;
4046 *currentBase
= (lIndex
* HANGUL_VCOUNT
+ vIndex
) * HANGUL_TCOUNT
+ tIndex
+ HANGUL_SBASE
;
4048 while (--usedLength
> 0) {
4049 if (mappedCharacters
[usedLength
] == 0xFFFD) {
4052 memmove(mappedCharacters
+ usedLength
, mappedCharacters
+ usedLength
+ 1, (mappedLength
- usedLength
) * sizeof(UTF32Char
));
4055 currentBase
= mappedCharacters
+ consumedLength
;
4056 currentChar
= *currentBase
;
4063 if (!CFUniCharIsMemberOf(nextChar
, kCFUniCharNonBaseCharacterSet
)) {
4064 *currentBase
= currentChar
;
4065 currentBase
= mappedCharacters
+ consumedLength
;
4066 currentChar
= nextChar
;
4071 if ((lastClass
== 0) || (currentClass
!= lastClass
)) {
4072 nextChar
= CFUniCharPrecomposeCharacter(currentChar
, nextChar
);
4073 if (nextChar
== 0xFFFD) {
4074 lastClass
= currentClass
;
4076 mappedCharacters
[consumedLength
] = 0xFFFD;
4078 currentChar
= nextChar
;
4085 *currentBase
= currentChar
;
4087 consumedLength
= mappedLength
;
4088 while (--consumedLength
> 0) {
4089 if (mappedCharacters
[consumedLength
] == 0xFFFD) {
4091 memmove(mappedCharacters
+ consumedLength
, mappedCharacters
+ consumedLength
+ 1, (mappedLength
- consumedLength
) * sizeof(UTF32Char
));
4095 } else if ((currentChar
>= HANGUL_LBASE
) && (currentChar
< (HANGUL_LBASE
+ 0xFF))) { // Hangul Jamo
4096 SInt8 lIndex
= currentChar
- HANGUL_LBASE
;
4098 if ((contents
< limit
) && (0 <= lIndex
) && (lIndex
<= HANGUL_LCOUNT
)) {
4099 SInt16 vIndex
= *contents
- HANGUL_VBASE
;
4101 if ((vIndex
>= 0) && (vIndex
<= HANGUL_VCOUNT
)) {
4104 ++contents
; ++currentLength
;
4106 if (contents
< limit
) {
4107 tIndex
= *contents
- HANGUL_TBASE
;
4108 if ((tIndex
< 0) || (tIndex
> HANGUL_TCOUNT
)) {
4111 ++contents
; ++currentLength
;
4114 *mappedCharacters
= (lIndex
* HANGUL_VCOUNT
+ vIndex
) * HANGUL_TCOUNT
+ tIndex
+ HANGUL_SBASE
;
4121 if (mappedLength
> 0) {
4122 CFIndex utf16Length
= __CFGetUTF16Length(mappedCharacters
, mappedLength
);
4124 if (utf16Length
!= currentLength
) {
4125 __CFStringChangeSize(string
, CFRangeMake(currentIndex
, currentLength
), utf16Length
, true);
4126 currentLength
= utf16Length
;
4128 contents
= (UTF16Char
*)__CFStrContents(string
);
4129 limit
= contents
+ __CFStrLength(string
);
4130 contents
+= currentIndex
;
4131 __CFFillInUTF16(mappedCharacters
, contents
, mappedLength
);
4132 contents
+= utf16Length
;
4134 currentIndex
+= currentLength
;
4137 if (mappedCharacters
!= buffer
) CFAllocatorDeallocate(NULL
, mappedCharacters
);
4141 #define POSIX_SEPARATOR "/"
4143 CF_INLINE
void __CFStringReplacePathSeparator(CFMutableStringRef string
, const char from
, const char to
) {
4144 uint8_t *contents
= (uint8_t*)__CFStrContents(string
) + __CFStrSkipAnyLengthByte(string
);
4145 CFIndex length
= __CFStrLength2(string
, contents
);
4146 bool isUnicode
= __CFStrIsUnicode(string
);
4149 for (idx
= 0;idx
< length
;idx
++) {
4150 if ((isUnicode
? ((UniChar
*)contents
)[idx
] : ((uint8_t*)contents
)[idx
]) == from
) {
4152 ((UniChar
*)contents
)[idx
] = to
;
4154 ((uint8_t*)contents
)[idx
] = to
;
4161 kCFStringFormatZeroFlag
= (1 << 0), // if not, padding is space char
4162 kCFStringFormatMinusFlag
= (1 << 1), // if not, no flag implied
4163 kCFStringFormatPlusFlag
= (1 << 2), // if not, no flag implied, overrides space
4164 kCFStringFormatSpaceFlag
= (1 << 3) // if not, no flag implied
4185 int64_t longlongValue
;
4192 CFFormatDefaultSize
= 0,
4197 CFFormatSize16
= 5, /* unused */
4201 CFFormatLiteralType
= 32,
4202 CFFormatLongType
= 33,
4203 CFFormatDoubleType
= 34,
4204 CFFormatPointerType
= 35,
4205 CFFormatObjectType
= 36, /* handled specially */ /* ??? not used anymore, can be removed? */
4206 CFFormatCFType
= 37, /* handled specially */
4207 CFFormatUnicharsType
= 38, /* handled specially */
4208 CFFormatCharsType
= 39, /* handled specially */
4209 CFFormatPascalCharsType
= 40, /* handled specially */
4210 CFFormatSingleUnicharType
= 41 /* handled specially */
4213 CF_INLINE
void __CFParseFormatSpec(const UniChar
*uformat
, const uint8_t *cformat
, SInt32
*fmtIdx
, SInt32 fmtLen
, CFFormatSpec
*spec
) {
4214 Boolean seenDot
= false;
4217 if (fmtLen
<= *fmtIdx
) return; /* no type */
4218 if (cformat
) ch
= (UniChar
)cformat
[(*fmtIdx
)++]; else ch
= uformat
[(*fmtIdx
)++];
4219 reswtch
:switch (ch
) {
4220 case '#': // ignored for now
4223 if (!(spec
->flags
& kCFStringFormatPlusFlag
)) spec
->flags
|= kCFStringFormatSpaceFlag
;
4226 spec
->flags
|= kCFStringFormatMinusFlag
;
4227 spec
->flags
&= ~kCFStringFormatZeroFlag
; // remove zero flag
4230 spec
->flags
|= kCFStringFormatPlusFlag
;
4231 spec
->flags
&= ~kCFStringFormatSpaceFlag
; // remove space flag
4234 if (!(spec
->flags
& kCFStringFormatMinusFlag
)) spec
->flags
|= kCFStringFormatZeroFlag
;
4237 spec
->size
= CFFormatSize2
;
4240 if (*fmtIdx
< fmtLen
) {
4241 // fetch next character, don't increment fmtIdx
4242 if (cformat
) ch
= (UniChar
)cformat
[(*fmtIdx
)]; else ch
= uformat
[(*fmtIdx
)];
4243 if ('l' == ch
) { // 'll' for long long, like 'q'
4245 spec
->size
= CFFormatSize8
;
4249 spec
->size
= CFFormatSize4
;
4252 spec
->size
= CFFormatSize8
;
4255 spec
->type
= CFFormatLongType
;
4256 spec
->size
= CFFormatSize1
;
4258 case 'O': case 'o': case 'D': case 'd': case 'i': case 'U': case 'u': case 'x': case 'X':
4259 spec
->type
= CFFormatLongType
;
4261 case 'e': case 'E': case 'f': case 'g': case 'G':
4262 spec
->type
= CFFormatDoubleType
;
4263 spec
->size
= CFFormatSize8
;
4265 case 'n': case 'p': /* %n is not handled correctly currently */
4266 spec
->type
= CFFormatPointerType
;
4267 spec
->size
= CFFormatSize4
;
4270 spec
->type
= CFFormatCharsType
;
4271 spec
->size
= CFFormatSize4
;
4274 spec
->type
= CFFormatUnicharsType
;
4275 spec
->size
= CFFormatSize4
;
4278 spec
->type
= CFFormatSingleUnicharType
;
4279 spec
->size
= CFFormatSize2
;
4282 spec
->type
= CFFormatPascalCharsType
;
4283 spec
->size
= CFFormatSize4
;
4286 spec
->type
= CFFormatCFType
;
4287 spec
->size
= CFFormatSize4
;
4289 case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': {
4292 number
= 10 * number
+ (ch
- '0');
4293 if (cformat
) ch
= (UniChar
)cformat
[(*fmtIdx
)++]; else ch
= uformat
[(*fmtIdx
)++];
4294 } while ((UInt32
)(ch
- '0') <= 9);
4296 if (-2 == spec
->precArgNum
) {
4297 spec
->precArgNum
= number
- 1; // Arg numbers start from 1
4298 } else if (-2 == spec
->widthArgNum
) {
4299 spec
->widthArgNum
= number
- 1; // Arg numbers start from 1
4301 spec
->mainArgNum
= number
- 1; // Arg numbers start from 1
4304 } else if (seenDot
) { /* else it's either precision or width */
4305 spec
->precArg
= (SInt32
)number
;
4307 spec
->widthArg
= (SInt32
)number
;
4312 spec
->widthArgNum
= -2;
4316 if (cformat
) ch
= (UniChar
)cformat
[(*fmtIdx
)++]; else ch
= uformat
[(*fmtIdx
)++];
4318 spec
->precArgNum
= -2;
4323 spec
->type
= CFFormatLiteralType
;
4329 #if defined(__MACOS8__)
4330 static int snprintf (char *b
, size_t n
, const char * f
, ...) {
4334 retval
= vsprintf(b
, f
, args
);
4338 #elif defined(__WIN32__)
4339 static int snprintf (char *b
, size_t n
, const char * f
, ...) {
4343 retval
= _vsnprintf(b
, n
, f
, args
);
4349 /* ??? It ignores the formatOptions argument.
4350 ??? %s depends on handling of encodings by __CFStringAppendBytes
4352 void CFStringAppendFormatAndArguments(CFMutableStringRef outputString
, CFDictionaryRef formatOptions
, CFStringRef formatString
, va_list args
) {
4353 _CFStringAppendFormatAndArgumentsAux(outputString
, NULL
, formatOptions
, formatString
, args
);
4356 #define SNPRINTF(TYPE, WHAT) { \
4357 TYPE value = (TYPE) WHAT; \
4358 if (-1 != specs[curSpec].widthArgNum) { \
4359 if (-1 != specs[curSpec].precArgNum) { \
4360 snprintf(buffer, 255, formatBuffer, width, precision, value); \
4362 snprintf(buffer, 255, formatBuffer, width, value); \
4365 if (-1 != specs[curSpec].precArgNum) { \
4366 snprintf(buffer, 255, formatBuffer, precision, value); \
4368 snprintf(buffer, 255, formatBuffer, value); \
4372 void _CFStringAppendFormatAndArgumentsAux(CFMutableStringRef outputString
, CFStringRef (*copyDescFunc
)(void *, CFDictionaryRef
), CFDictionaryRef formatOptions
, CFStringRef formatString
, va_list args
) {
4373 SInt32 numSpecs
, sizeSpecs
, sizeArgNum
, formatIdx
, curSpec
, argNum
;
4375 #define FORMAT_BUFFER_LEN 400
4376 const uint8_t *cformat
= NULL
;
4377 const UniChar
*uformat
= NULL
;
4378 UniChar
*formatChars
= NULL
;
4379 UniChar localFormatBuffer
[FORMAT_BUFFER_LEN
];
4381 #define VPRINTF_BUFFER_LEN 61
4382 CFFormatSpec localSpecsBuffer
[VPRINTF_BUFFER_LEN
];
4383 CFFormatSpec
*specs
;
4384 CFPrintValue localValuesBuffer
[VPRINTF_BUFFER_LEN
];
4385 CFPrintValue
*values
;
4386 CFAllocatorRef tmpAlloc
= NULL
;
4394 formatLen
= CFStringGetLength(formatString
);
4395 if (!CF_IS_OBJC(__kCFStringTypeID
, formatString
)) {
4396 __CFAssertIsString(formatString
);
4397 if (!__CFStrIsUnicode(formatString
)) {
4398 cformat
= __CFStrContents(formatString
);
4399 if (cformat
) cformat
+= __CFStrSkipAnyLengthByte(formatString
);
4401 uformat
= __CFStrContents(formatString
);
4404 if (!cformat
&& !uformat
) {
4405 formatChars
= (formatLen
> FORMAT_BUFFER_LEN
) ? CFAllocatorAllocate(tmpAlloc
= __CFGetDefaultAllocator(), formatLen
* sizeof(UniChar
), 0) : localFormatBuffer
;
4406 if (formatChars
!= localFormatBuffer
&& __CFOASafe
) __CFSetLastAllocationEventName(formatChars
, "CFString (temp)");
4407 CFStringGetCharacters(formatString
, CFRangeMake(0, formatLen
), formatChars
);
4408 uformat
= formatChars
;
4411 /* Compute an upper bound for the number of format specifications */
4413 for (formatIdx
= 0; formatIdx
< formatLen
; formatIdx
++) if ('%' == cformat
[formatIdx
]) sizeSpecs
++;
4415 for (formatIdx
= 0; formatIdx
< formatLen
; formatIdx
++) if ('%' == uformat
[formatIdx
]) sizeSpecs
++;
4417 tmpAlloc
= __CFGetDefaultAllocator();
4418 specs
= ((2 * sizeSpecs
+ 1) > VPRINTF_BUFFER_LEN
) ? CFAllocatorAllocate(tmpAlloc
, (2 * sizeSpecs
+ 1) * sizeof(CFFormatSpec
), 0) : localSpecsBuffer
;
4419 if (specs
!= localSpecsBuffer
&& __CFOASafe
) __CFSetLastAllocationEventName(specs
, "CFString (temp)");
4421 /* Collect format specification information from the format string */
4422 for (curSpec
= 0, formatIdx
= 0; formatIdx
< formatLen
; curSpec
++) {
4424 specs
[curSpec
].loc
= formatIdx
;
4425 specs
[curSpec
].len
= 0;
4426 specs
[curSpec
].size
= 0;
4427 specs
[curSpec
].type
= 0;
4428 specs
[curSpec
].flags
= 0;
4429 specs
[curSpec
].widthArg
= -1;
4430 specs
[curSpec
].precArg
= -1;
4431 specs
[curSpec
].mainArgNum
= -1;
4432 specs
[curSpec
].precArgNum
= -1;
4433 specs
[curSpec
].widthArgNum
= -1;
4435 for (newFmtIdx
= formatIdx
; newFmtIdx
< formatLen
&& '%' != cformat
[newFmtIdx
]; newFmtIdx
++);
4437 for (newFmtIdx
= formatIdx
; newFmtIdx
< formatLen
&& '%' != uformat
[newFmtIdx
]; newFmtIdx
++);
4439 if (newFmtIdx
!= formatIdx
) { /* Literal chunk */
4440 specs
[curSpec
].type
= CFFormatLiteralType
;
4441 specs
[curSpec
].len
= newFmtIdx
- formatIdx
;
4443 newFmtIdx
++; /* Skip % */
4444 __CFParseFormatSpec(uformat
, cformat
, &newFmtIdx
, formatLen
, &(specs
[curSpec
]));
4445 if (CFFormatLiteralType
== specs
[curSpec
].type
) {
4446 specs
[curSpec
].loc
= formatIdx
+ 1;
4447 specs
[curSpec
].len
= 1;
4449 specs
[curSpec
].len
= newFmtIdx
- formatIdx
;
4452 formatIdx
= newFmtIdx
;
4454 // printf("specs[%d] = {\n size = %d,\n type = %d,\n loc = %d,\n len = %d,\n mainArgNum = %d,\n precArgNum = %d,\n widthArgNum = %d\n}\n", curSpec, specs[curSpec].size, specs[curSpec].type, specs[curSpec].loc, specs[curSpec].len, specs[curSpec].mainArgNum, specs[curSpec].precArgNum, specs[curSpec].widthArgNum);
4458 // Max of three args per spec, reasoning thus: 1 width, 1 prec, 1 value
4459 values
= ((3 * sizeSpecs
+ 1) > VPRINTF_BUFFER_LEN
) ? CFAllocatorAllocate(tmpAlloc
, (3 * sizeSpecs
+ 1) * sizeof(CFPrintValue
), 0) : localValuesBuffer
;
4460 if (values
!= localValuesBuffer
&& __CFOASafe
) __CFSetLastAllocationEventName(values
, "CFString (temp)");
4461 memset(values
, 0, (3 * sizeSpecs
+ 1) * sizeof(CFPrintValue
));
4462 sizeArgNum
= (3 * sizeSpecs
+ 1);
4464 /* Compute values array */
4466 for (curSpec
= 0; curSpec
< numSpecs
; curSpec
++) {
4467 SInt32 newMaxArgNum
;
4468 if (0 == specs
[curSpec
].type
) continue;
4469 if (CFFormatLiteralType
== specs
[curSpec
].type
) continue;
4470 newMaxArgNum
= sizeArgNum
;
4471 if (newMaxArgNum
< specs
[curSpec
].mainArgNum
) {
4472 newMaxArgNum
= specs
[curSpec
].mainArgNum
;
4474 if (newMaxArgNum
< specs
[curSpec
].precArgNum
) {
4475 newMaxArgNum
= specs
[curSpec
].precArgNum
;
4477 if (newMaxArgNum
< specs
[curSpec
].widthArgNum
) {
4478 newMaxArgNum
= specs
[curSpec
].widthArgNum
;
4480 if (sizeArgNum
< newMaxArgNum
) {
4481 if (specs
!= localSpecsBuffer
) CFAllocatorDeallocate(tmpAlloc
, specs
);
4482 if (values
!= localValuesBuffer
) CFAllocatorDeallocate(tmpAlloc
, values
);
4483 if (formatChars
&& (formatChars
!= localFormatBuffer
)) CFAllocatorDeallocate(tmpAlloc
, formatChars
);
4484 return; // more args than we expected!
4486 /* It is actually incorrect to reorder some specs and not all; we just do some random garbage here */
4487 if (-2 == specs
[curSpec
].widthArgNum
) {
4488 specs
[curSpec
].widthArgNum
= argNum
++;
4490 if (-2 == specs
[curSpec
].precArgNum
) {
4491 specs
[curSpec
].precArgNum
= argNum
++;
4493 if (-1 == specs
[curSpec
].mainArgNum
) {
4494 specs
[curSpec
].mainArgNum
= argNum
++;
4496 values
[specs
[curSpec
].mainArgNum
].size
= specs
[curSpec
].size
;
4497 values
[specs
[curSpec
].mainArgNum
].type
= specs
[curSpec
].type
;
4498 if (-1 != specs
[curSpec
].widthArgNum
) {
4499 values
[specs
[curSpec
].widthArgNum
].size
= 0;
4500 values
[specs
[curSpec
].widthArgNum
].type
= CFFormatLongType
;
4502 if (-1 != specs
[curSpec
].precArgNum
) {
4503 values
[specs
[curSpec
].precArgNum
].size
= 0;
4504 values
[specs
[curSpec
].precArgNum
].type
= CFFormatLongType
;
4508 /* Collect the arguments in correct type from vararg list */
4509 for (argNum
= 0; argNum
< sizeArgNum
; argNum
++) {
4510 switch (values
[argNum
].type
) {
4512 case CFFormatLiteralType
:
4514 case CFFormatLongType
:
4515 case CFFormatSingleUnicharType
:
4516 if (CFFormatSize1
== values
[argNum
].size
) {
4517 values
[argNum
].value
.longlongValue
= (int64_t)(char)va_arg(args
, int);
4518 } else if (CFFormatSize2
== values
[argNum
].size
) {
4519 values
[argNum
].value
.longlongValue
= (int64_t)(short)va_arg(args
, int);
4520 } else if (CFFormatSize4
== values
[argNum
].size
) {
4521 values
[argNum
].value
.longlongValue
= (int64_t)va_arg(args
, long);
4522 } else if (CFFormatSize8
== values
[argNum
].size
) {
4523 values
[argNum
].value
.longlongValue
= (int64_t)va_arg(args
, int64_t);
4525 values
[argNum
].value
.longlongValue
= (int64_t)va_arg(args
, int);
4528 case CFFormatDoubleType
:
4529 values
[argNum
].value
.doubleValue
= va_arg(args
, double);
4531 case CFFormatPointerType
:
4532 case CFFormatObjectType
:
4533 case CFFormatCFType
:
4534 case CFFormatUnicharsType
:
4535 case CFFormatCharsType
:
4536 case CFFormatPascalCharsType
:
4537 values
[argNum
].value
.pointerValue
= va_arg(args
, void *);
4543 /* Format the pieces together */
4544 for (curSpec
= 0; curSpec
< numSpecs
; curSpec
++) {
4545 SInt32 width
= 0, precision
= 0;
4547 Boolean hasWidth
= false, hasPrecision
= false;
4549 // widthArgNum and widthArg are never set at the same time; same for precArg*
4550 if (-1 != specs
[curSpec
].widthArgNum
) {
4551 width
= (SInt32
)values
[specs
[curSpec
].widthArgNum
].value
.longlongValue
;
4554 if (-1 != specs
[curSpec
].precArgNum
) {
4555 precision
= (SInt32
)values
[specs
[curSpec
].precArgNum
].value
.longlongValue
;
4556 hasPrecision
= true;
4558 if (-1 != specs
[curSpec
].widthArg
) {
4559 width
= specs
[curSpec
].widthArg
;
4562 if (-1 != specs
[curSpec
].precArg
) {
4563 precision
= specs
[curSpec
].precArg
;
4564 hasPrecision
= true;
4567 switch (specs
[curSpec
].type
) {
4568 case CFFormatLongType
:
4569 case CFFormatDoubleType
:
4570 case CFFormatPointerType
: {
4571 int8_t formatBuffer
[128];
4572 int8_t buffer
[256 + width
+ precision
];
4573 SInt32 cidx
, idx
, loc
;
4574 Boolean appended
= false;
4575 loc
= specs
[curSpec
].loc
;
4576 // In preparation to call snprintf(), copy the format string out
4578 for (idx
= 0, cidx
= 0; cidx
< specs
[curSpec
].len
; idx
++, cidx
++) {
4579 if ('$' == cformat
[loc
+ cidx
]) {
4580 for (idx
--; '0' <= formatBuffer
[idx
] && formatBuffer
[idx
] <= '9'; idx
--);
4582 formatBuffer
[idx
] = cformat
[loc
+ cidx
];
4586 for (idx
= 0, cidx
= 0; cidx
< specs
[curSpec
].len
; idx
++, cidx
++) {
4587 if ('$' == uformat
[loc
+ cidx
]) {
4588 for (idx
--; '0' <= formatBuffer
[idx
] && formatBuffer
[idx
] <= '9'; idx
--);
4590 formatBuffer
[idx
] = (int8_t)uformat
[loc
+ cidx
];
4594 formatBuffer
[idx
] = '\0';
4595 // Should modify format buffer here if necessary; for example, to translate %qd to
4596 // the equivalent, on architectures which do not have %q.
4597 buffer
[sizeof(buffer
) - 1] = '\0';
4598 switch (specs
[curSpec
].type
) {
4599 case CFFormatLongType
:
4600 if (CFFormatSize8
== specs
[curSpec
].size
) {
4601 SNPRINTF(int64_t, values
[specs
[curSpec
].mainArgNum
].value
.longlongValue
)
4603 SNPRINTF(SInt32
, values
[specs
[curSpec
].mainArgNum
].value
.longlongValue
)
4606 case CFFormatPointerType
:
4607 SNPRINTF(void *, values
[specs
[curSpec
].mainArgNum
].value
.pointerValue
)
4610 case CFFormatDoubleType
:
4611 SNPRINTF(double, values
[specs
[curSpec
].mainArgNum
].value
.doubleValue
)
4612 // See if we need to localize the decimal point
4613 if (formatOptions
) { // We have a localization dictionary
4614 CFStringRef decimalSeparator
= CFDictionaryGetValue(formatOptions
, kCFNSDecimalSeparatorKey
);
4615 if (decimalSeparator
!= NULL
) { // We have a decimal separator in there
4616 CFIndex decimalPointLoc
= 0;
4617 while (buffer
[decimalPointLoc
] != 0 && buffer
[decimalPointLoc
] != '.') decimalPointLoc
++;
4618 if (buffer
[decimalPointLoc
] == '.') { // And we have a decimal point in the formatted string
4619 buffer
[decimalPointLoc
] = 0;
4620 CFStringAppendCString(outputString
, buffer
, __CFStringGetEightBitStringEncoding());
4621 CFStringAppend(outputString
, decimalSeparator
);
4622 CFStringAppendCString(outputString
, buffer
+ decimalPointLoc
+ 1, __CFStringGetEightBitStringEncoding());
4629 if (!appended
) CFStringAppendCString(outputString
, buffer
, __CFStringGetEightBitStringEncoding());
4632 case CFFormatLiteralType
:
4634 __CFStringAppendBytes(outputString
, cformat
+specs
[curSpec
].loc
, specs
[curSpec
].len
, __CFStringGetEightBitStringEncoding());
4636 CFStringAppendCharacters(outputString
, uformat
+specs
[curSpec
].loc
, specs
[curSpec
].len
);
4639 case CFFormatPascalCharsType
:
4640 case CFFormatCharsType
:
4641 if (values
[specs
[curSpec
].mainArgNum
].value
.pointerValue
== NULL
) {
4642 CFStringAppendCString(outputString
, "(null)", kCFStringEncodingASCII
);
4645 const char *str
= values
[specs
[curSpec
].mainArgNum
].value
.pointerValue
;
4646 if (specs
[curSpec
].type
== CFFormatPascalCharsType
) { // Pascal string case
4647 len
= ((unsigned char *)str
)[0];
4649 if (hasPrecision
&& precision
< len
) len
= precision
;
4650 } else { // C-string case
4651 if (!hasPrecision
) { // No precision, so rely on the terminating null character
4653 } else { // Don't blindly call strlen() if there is a precision; the string might not have a terminating null (3131988)
4654 const char *terminatingNull
= memchr(str
, 0, precision
); // Basically strlen() on only the first precision characters of str
4655 if (terminatingNull
) { // There was a null in the first precision characters
4656 len
= terminatingNull
- str
;
4662 // Since the spec says the behavior of the ' ', '0', '#', and '+' flags is undefined for
4663 // '%s', and since we have ignored them in the past, the behavior is hereby cast in stone
4664 // to ignore those flags (and, say, never pad with '0' instead of space).
4665 if (specs
[curSpec
].flags
& kCFStringFormatMinusFlag
) {
4666 __CFStringAppendBytes(outputString
, str
, len
, __CFStringGetSystemEncoding());
4667 if (hasWidth
&& width
> len
) {
4668 int w
= width
- len
; // We need this many spaces; do it ten at a time
4669 do {__CFStringAppendBytes(outputString
, " ", (w
> 10 ? 10 : w
), kCFStringEncodingASCII
);} while ((w
-= 10) > 0);
4672 if (hasWidth
&& width
> len
) {
4673 int w
= width
- len
; // We need this many spaces; do it ten at a time
4674 do {__CFStringAppendBytes(outputString
, " ", (w
> 10 ? 10 : w
), kCFStringEncodingASCII
);} while ((w
-= 10) > 0);
4676 __CFStringAppendBytes(outputString
, str
, len
, __CFStringGetSystemEncoding());
4680 case CFFormatSingleUnicharType
:
4681 ch
= values
[specs
[curSpec
].mainArgNum
].value
.longlongValue
;
4682 CFStringAppendCharacters(outputString
, &ch
, 1);
4684 case CFFormatUnicharsType
:
4685 //??? need to handle width, precision, and padding arguments
4686 up
= values
[specs
[curSpec
].mainArgNum
].value
.pointerValue
;
4688 CFStringAppendCString(outputString
, "(null)", kCFStringEncodingASCII
);
4691 for (len
= 0; 0 != up
[len
]; len
++);
4692 // Since the spec says the behavior of the ' ', '0', '#', and '+' flags is undefined for
4693 // '%s', and since we have ignored them in the past, the behavior is hereby cast in stone
4694 // to ignore those flags (and, say, never pad with '0' instead of space).
4695 if (hasPrecision
&& precision
< len
) len
= precision
;
4696 if (specs
[curSpec
].flags
& kCFStringFormatMinusFlag
) {
4697 CFStringAppendCharacters(outputString
, up
, len
);
4698 if (hasWidth
&& width
> len
) {
4699 int w
= width
- len
; // We need this many spaces; do it ten at a time
4700 do {__CFStringAppendBytes(outputString
, " ", (w
> 10 ? 10 : w
), kCFStringEncodingASCII
);} while ((w
-= 10) > 0);
4703 if (hasWidth
&& width
> len
) {
4704 int w
= width
- len
; // We need this many spaces; do it ten at a time
4705 do {__CFStringAppendBytes(outputString
, " ", (w
> 10 ? 10 : w
), kCFStringEncodingASCII
);} while ((w
-= 10) > 0);
4707 CFStringAppendCharacters(outputString
, up
, len
);
4711 case CFFormatCFType
:
4712 case CFFormatObjectType
:
4713 if (NULL
!= values
[specs
[curSpec
].mainArgNum
].value
.pointerValue
) {
4714 CFStringRef str
= NULL
;
4716 str
= copyDescFunc(values
[specs
[curSpec
].mainArgNum
].value
.pointerValue
, formatOptions
);
4718 str
= __CFCopyFormattingDescription(values
[specs
[curSpec
].mainArgNum
].value
.pointerValue
, formatOptions
);
4720 str
= CFCopyDescription(values
[specs
[curSpec
].mainArgNum
].value
.pointerValue
);
4724 CFStringAppend(outputString
, str
);
4727 CFStringAppendCString(outputString
, "(null description)", kCFStringEncodingASCII
);
4730 CFStringAppendCString(outputString
, "(null)", kCFStringEncodingASCII
);
4736 if (specs
!= localSpecsBuffer
) CFAllocatorDeallocate(tmpAlloc
, specs
);
4737 if (values
!= localValuesBuffer
) CFAllocatorDeallocate(tmpAlloc
, values
);
4738 if (formatChars
&& (formatChars
!= localFormatBuffer
)) CFAllocatorDeallocate(tmpAlloc
, formatChars
);
4744 void CFShowStr(CFStringRef str
) {
4745 CFAllocatorRef alloc
;
4748 printf ("(null)\n");
4752 if (CF_IS_OBJC(__kCFStringTypeID
, str
)) {
4753 printf ("This is an NSString, not CFString\n");
4757 alloc
= CFGetAllocator(str
);
4759 printf ("\nLength %d\nIsEightBit %d\n", (int)__CFStrLength(str
), __CFStrIsEightBit(str
));
4760 printf ("HasLengthByte %d\nHasNullByte %d\nInlineContents %d\n",
4761 __CFStrHasLengthByte(str
), __CFStrHasNullByte(str
), __CFStrIsInline(str
));
4763 printf ("Allocator ");
4764 if (alloc
!= kCFAllocatorSystemDefault
) {
4765 printf ("%p\n", (void *)alloc
);
4767 printf ("SystemDefault\n");
4769 printf ("Mutable %d\n", __CFStrIsMutable(str
));
4770 if (!__CFStrIsMutable(str
) && __CFStrHasContentsDeallocator(str
)) {
4771 if (__CFStrContentsDeallocator(str
)) printf ("ContentsDeallocatorFunc %p\n", (void *)__CFStrContentsDeallocator(str
));
4772 else printf ("ContentsDeallocatorFunc None\n");
4773 } else if (__CFStrIsMutable(str
) && __CFStrHasContentsAllocator(str
)) {
4774 printf ("ExternalContentsAllocator %p\n", (void *)__CFStrContentsAllocator((CFMutableStringRef
)str
));
4777 if (__CFStrIsMutable(str
)) {
4778 printf ("CurrentCapacity %d\n%sCapacity %d\n", (int)__CFStrCapacity(str
), __CFStrIsFixed(str
) ? "Fixed" : "Desired", (int)__CFStrDesiredCapacity(str
));
4780 printf ("Contents %p\n", (void *)__CFStrContents(str
));