String.subproj/CFString.c

   1 /*
   2  * Copyright (c) 2005 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. Please obtain a copy of the License at
  10  * http://www.opensource.apple.com/apsl/ and read it before using this
  11  * file.
  12  *
  13  * The Original Code and all software distributed under the License are
  14  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  15  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  16  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  18  * Please see the License for the specific language governing rights and
  19  * limitations under the License.
  20  *
  21  * @APPLE_LICENSE_HEADER_END@
  22  */
  23 /*      CFString.c
  24         Copyright 1998-2002, Apple, Inc. All rights reserved.
  25         Responsibility: Ali Ozer
  26
  27 !!! For performance reasons, it's important that all functions marked CF_INLINE in this file are inlined.
  28 */
  29
  30 #include <CoreFoundation/CFBase.h>
  31 #include <CoreFoundation/CFString.h>
  32 #include <CoreFoundation/CFDictionary.h>
  33 #include "CFStringEncodingConverterExt.h"
  34 #include "CFUniChar.h"
  35 #include "CFUnicodeDecomposition.h"
  36 #include "CFUnicodePrecomposition.h"
  37 #include "CFUtilitiesPriv.h"
  38 #include "CFInternal.h"
  39 #include <stdarg.h>
  40 #include <stdio.h>
  41 #include <string.h>
  42 #if defined (__MACOS8__)
  43     #include <Script.h> // For GetScriptManagerVariable
  44     #include <Processes.h> // For logging
  45     #include <stdlib.h>
  46 #include <UnicodeConverter.h>
  47 #include <TextEncodingConverter.h>
  48 #elif defined(__MACH__) || defined(__LINUX__) || defined(__FREEBSD__)
  49 #include <unistd.h>
  50 #endif
  51 #if defined(__WIN32__)
  52 #include <windows.h>
  53 #endif /* __WIN32__ */
  54
  55 #if defined(__MACH__)
  56 extern size_t malloc_good_size(size_t size);
  57 #endif
  58 extern void __CFStrConvertBytesToUnicode(const uint8_t *bytes, UniChar *buffer, CFIndex numChars);
  59
  60 #if defined(DEBUG)
  61
  62 // Special allocator used by CFSTRs to catch deallocations
  63 static CFAllocatorRef constantStringAllocatorForDebugging = NULL;
  64
  65 // We put this into C & Pascal strings if we can't convert
  66 #define CONVERSIONFAILURESTR "CFString conversion failed"
  67
  68 // We set this to true when purging the constant string table, so CFStringDeallocate doesn't assert
  69 static Boolean __CFConstantStringTableBeingFreed = false;
  70
  71 #endif
  72
  73
  74
  75 // This section is for CFString compatibility and other behaviors...
  76
  77 static CFOptionFlags _CFStringCompatibilityMask = 0;
  78
  79 #define Bug2967272 1
  80
  81 void _CFStringSetCompatibility(CFOptionFlags mask) {
  82     _CFStringCompatibilityMask |= mask;
  83 }
  84
  85 CF_INLINE Boolean __CFStringGetCompatibility(CFOptionFlags mask) {
  86     return (_CFStringCompatibilityMask & mask) == mask;
  87 }
  88
  89
  90
  91 // Two constant strings used by CFString; these are initialized in CFStringInitialize
  92 CONST_STRING_DECL(kCFEmptyString, "")
  93 CONST_STRING_DECL(kCFNSDecimalSeparatorKey, "NSDecimalSeparator")
  94
  95
  96 /* !!! Never do sizeof(CFString); the union is here just to make it easier to access some fields.
  97 */
  98 struct __CFString {
  99     CFRuntimeBase base;
 100     union {     // In many cases the allocated structs are smaller than these
 101         struct {
 102             SInt32 length;
 103         } inline1;
 104
 105         struct {
 106             void *buffer;
 107             UInt32 length;
 108             CFAllocatorRef contentsDeallocator;         // Just the dealloc func is used
 109         } notInlineImmutable1;
 110         struct {
 111             void *buffer;
 112             CFAllocatorRef contentsDeallocator;         // Just the dealloc func is used
 113         } notInlineImmutable2;
 114         struct {
 115             void *buffer;
 116             UInt32 length;
 117             UInt32 capacityFields;      // Currently only stores capacity
 118             UInt32 gapEtc;              // Stores some bits, plus desired or fixed capacity
 119             CFAllocatorRef contentsAllocator;   // Optional
 120         } notInlineMutable;
 121     } variants;
 122 };
 123
 124 /*
 125 I = is immutable
 126 E = not inline contents
 127 U = is Unicode
 128 N = has NULL byte
 129 L = has length byte
 130 D = explicit deallocator for contents (for mutable objects, allocator)
 131 X = UNUSED
 132
 133 Also need (only for mutable)
 134 F = is fixed
 135 G = has gap
 136 Cap, DesCap = capacity
 137
 138 B7 B6 B5 B4 B3 B2 B1 B0
 139          U  N  L  X  I
 140
 141 B6 B5
 142  0  0   inline contents
 143  0  1   E (freed with default allocator)
 144  1  0   E (not freed)
 145  1  1   E D
 146
 147 !!! Note: Constant CFStrings use the bit patterns:
 148 C8 (11001000 = default allocator, not inline, not freed contents; 8-bit; has NULL byte; doesn't have length; is immutable)
 149 D0 (11010000 = default allocator, not inline, not freed contents; Unicode; is immutable)
 150 The bit usages should not be modified in a way that would effect these bit patterns.
 151 */
 152
 153 enum {
 154     __kCFFreeContentsWhenDoneMask = 0x020,
 155         __kCFFreeContentsWhenDone = 0x020,
 156     __kCFContentsMask = 0x060,
 157         __kCFHasInlineContents = 0x000,
 158         __kCFNotInlineContentsNoFree = 0x040,           // Don't free
 159         __kCFNotInlineContentsDefaultFree = 0x020,      // Use allocator's free function
 160         __kCFNotInlineContentsCustomFree = 0x060,               // Use a specially provided free function
 161     __kCFHasContentsAllocatorMask = 0x060,
 162         __kCFHasContentsAllocator = 0x060,              // (For mutable strings) use a specially provided allocator
 163     __kCFHasContentsDeallocatorMask = 0x060,
 164         __kCFHasContentsDeallocator = 0x060,
 165     __kCFIsMutableMask = 0x01,
 166         __kCFIsMutable = 0x01,
 167     __kCFIsUnicodeMask = 0x10,
 168         __kCFIsUnicode = 0x10,
 169     __kCFHasNullByteMask = 0x08,
 170         __kCFHasNullByte = 0x08,
 171     __kCFHasLengthByteMask = 0x04,
 172         __kCFHasLengthByte = 0x04,
 173     // !!! Bit 0x02 has been freed up
 174     // These are in variants.notInlineMutable.gapEtc
 175     __kCFGapMask = 0x00ffffff,
 176     __kCFGapBitNumber = 24,
 177     __kCFDesiredCapacityMask = 0x00ffffff,      // Currently gap and fixed share same bits as gap not implemented
 178     __kCFDesiredCapacityBitNumber = 24,
 179     __kCFIsFixedMask = 0x80000000,
 180         __kCFIsFixed = 0x80000000,
 181     __kCFHasGapMask = 0x40000000,
 182         __kCFHasGap = 0x40000000,
 183     __kCFCapacityProvidedExternallyMask = 0x20000000,   // Set if the external buffer capacity is set explicitly by the developer
 184         __kCFCapacityProvidedExternally = 0x20000000,
 185     __kCFIsExternalMutableMask = 0x10000000,            // Determines whether the buffer is controlled by the developer
 186         __kCFIsExternalMutable = 0x10000000
 187     // 0x0f000000: 4 additional bits available for use in mutable strings
 188 };
 189
 190
 191 // !!! Assumptions:
 192 // Mutable strings are not inline
 193 // Compile-time constant strings are not inline
 194 // Mutable strings always have explicit length (but they might also have length byte and null byte)
 195 // If there is an explicit length, always use that instead of the length byte (length byte is useful for quickly returning pascal strings)
 196 // Never look at the length byte for the length; use __CFStrLength or __CFStrLength2
 197
 198 /* The following set of functions and macros need to be updated on change to the bit configuration
 199 */
 200 CF_INLINE Boolean __CFStrIsMutable(CFStringRef str)             {return (str->base._info & __kCFIsMutableMask) == __kCFIsMutable;}
 201 CF_INLINE Boolean __CFStrIsInline(CFStringRef str)              {return (str->base._info & __kCFContentsMask) == __kCFHasInlineContents;}
 202 CF_INLINE Boolean __CFStrFreeContentsWhenDone(CFStringRef str)  {return (str->base._info & __kCFFreeContentsWhenDoneMask) == __kCFFreeContentsWhenDone;}
 203 CF_INLINE Boolean __CFStrHasContentsDeallocator(CFStringRef str)        {return (str->base._info & __kCFHasContentsDeallocatorMask) == __kCFHasContentsDeallocator;}
 204 CF_INLINE Boolean __CFStrIsUnicode(CFStringRef str)             {return (str->base._info & __kCFIsUnicodeMask) == __kCFIsUnicode;}
 205 CF_INLINE Boolean __CFStrIsEightBit(CFStringRef str)            {return (str->base._info & __kCFIsUnicodeMask) != __kCFIsUnicode;}
 206 CF_INLINE Boolean __CFStrHasNullByte(CFStringRef str)           {return (str->base._info & __kCFHasNullByteMask) == __kCFHasNullByte;}
 207 CF_INLINE Boolean __CFStrHasLengthByte(CFStringRef str)         {return (str->base._info & __kCFHasLengthByteMask) == __kCFHasLengthByte;}
 208 CF_INLINE Boolean __CFStrHasExplicitLength(CFStringRef str)     {return (str->base._info & (__kCFIsMutableMask | __kCFHasLengthByteMask)) != __kCFHasLengthByte;}       // Has explicit length if (1) mutable or (2) not mutable and no length byte
 209 CF_INLINE Boolean __CFStrIsConstant(CFStringRef str)            {return (str->base._rc) == 0;}
 210
 211 CF_INLINE SInt32 __CFStrSkipAnyLengthByte(CFStringRef str)      {return ((str->base._info & __kCFHasLengthByteMask) == __kCFHasLengthByte) ? 1 : 0;}    // Number of bytes to skip over the length byte in the contents
 212
 213 /* Returns ptr to the buffer (which might include the length byte)
 214 */
 215 CF_INLINE const void *__CFStrContents(CFStringRef str) {
 216     if (__CFStrIsInline(str)) {
 217         return (const void *)(((UInt32)&(str->variants)) + (__CFStrHasExplicitLength(str) ? sizeof(UInt32) : 0));
 218     } else {    // Not inline; pointer is always word 2
 219         return str->variants.notInlineImmutable1.buffer;
 220     }
 221 }
 222
 223 static CFAllocatorRef *__CFStrContentsDeallocatorPtr(CFStringRef str) {
 224     return __CFStrHasExplicitLength(str) ? &(((CFMutableStringRef)str)->variants.notInlineImmutable1.contentsDeallocator) : &(((CFMutableStringRef)str)->variants.notInlineImmutable2.contentsDeallocator); }
 225
 226 // Assumption: Called with immutable strings only, and on strings that are known to have a contentsDeallocator
 227 CF_INLINE CFAllocatorRef __CFStrContentsDeallocator(CFStringRef str) {
 228     return *__CFStrContentsDeallocatorPtr(str);
 229 }
 230
 231 // Assumption: Called with immutable strings only, and on strings that are known to have a contentsDeallocator
 232 CF_INLINE void __CFStrSetContentsDeallocator(CFStringRef str, CFAllocatorRef contentsAllocator) {
 233     *__CFStrContentsDeallocatorPtr(str) = contentsAllocator;
 234 }
 235
 236 static CFAllocatorRef *__CFStrContentsAllocatorPtr(CFStringRef str) {
 237     CFAssert(!__CFStrIsInline(str), __kCFLogAssertion, "Asking for contents allocator of inline string");
 238     CFAssert(__CFStrIsMutable(str), __kCFLogAssertion, "Asking for contents allocator of an immutable string");
 239     return (CFAllocatorRef *)&(str->variants.notInlineMutable.contentsAllocator);
 240 }
 241
 242 // Assumption: Called with strings that have a contents allocator; also, contents allocator follows custom
 243 CF_INLINE CFAllocatorRef __CFStrContentsAllocator(CFMutableStringRef str) {
 244     return *(__CFStrContentsAllocatorPtr(str));
 245 }
 246
 247 // Assumption: Called with strings that have a contents allocator; also, contents allocator follows custom
 248 CF_INLINE void __CFStrSetContentsAllocator(CFMutableStringRef str, CFAllocatorRef alloc) {
 249     *(__CFStrContentsAllocatorPtr(str)) = alloc;
 250 }
 251
 252 /* Returns length; use __CFStrLength2 if contents buffer pointer has already been computed.
 253 */
 254 CF_INLINE CFIndex __CFStrLength(CFStringRef str) {
 255     if (__CFStrHasExplicitLength(str)) {
 256         if (__CFStrIsInline(str)) {
 257             return str->variants.inline1.length;
 258         } else {
 259             return str->variants.notInlineImmutable1.length;
 260         }
 261     } else {
 262         return (CFIndex)(*((uint8_t *)__CFStrContents(str)));
 263     }
 264 }
 265
 266 CF_INLINE CFIndex __CFStrLength2(CFStringRef str, const void *buffer) {
 267     if (__CFStrHasExplicitLength(str)) {
 268         if (__CFStrIsInline(str)) {
 269             return str->variants.inline1.length;
 270         } else {
 271             return str->variants.notInlineImmutable1.length;
 272         }
 273     } else {
 274         return (CFIndex)(*((uint8_t *)buffer));
 275     }
 276 }
 277
 278
 279 Boolean __CFStringIsEightBit(CFStringRef str) {
 280     return __CFStrIsEightBit(str);
 281 }
 282
 283 /* Sets the content pointer for immutable or mutable strings.
 284 */
 285 CF_INLINE void __CFStrSetContentPtr(CFStringRef str, const void *p)
 286 {
 287     // XXX_PCB catch all writes for mutable string case.
 288     CF_WRITE_BARRIER_BASE_ASSIGN(__CFGetAllocator(str), str, ((CFMutableStringRef)str)->variants.notInlineImmutable1.buffer, (void *)p);
 289 }
 290 CF_INLINE void __CFStrSetInfoBits(CFStringRef str, UInt32 v)            {__CFBitfieldSetValue(((CFMutableStringRef)str)->base._info, 6, 0, v);}
 291
 292 CF_INLINE void __CFStrSetExplicitLength(CFStringRef str, CFIndex v) {
 293     if (__CFStrIsInline(str)) {
 294         ((CFMutableStringRef)str)->variants.inline1.length = v;
 295     } else {
 296         ((CFMutableStringRef)str)->variants.notInlineImmutable1.length = v;
 297     }
 298 }
 299
 300 // Assumption: Called with mutable strings only
 301 CF_INLINE Boolean __CFStrIsFixed(CFStringRef str)               {return (str->variants.notInlineMutable.gapEtc & __kCFIsFixedMask) == __kCFIsFixed;}
 302 CF_INLINE Boolean __CFStrHasContentsAllocator(CFStringRef str)  {return (str->base._info & __kCFHasContentsAllocatorMask) == __kCFHasContentsAllocator;}
 303 CF_INLINE Boolean __CFStrIsExternalMutable(CFStringRef str)     {return (str->variants.notInlineMutable.gapEtc & __kCFIsExternalMutableMask) == __kCFIsExternalMutable;}
 304
 305 // If capacity is provided externally, we only change it when we need to grow beyond it
 306 CF_INLINE Boolean __CFStrCapacityProvidedExternally(CFStringRef str)            {return (str->variants.notInlineMutable.gapEtc & __kCFCapacityProvidedExternallyMask) == __kCFCapacityProvidedExternally;}
 307 CF_INLINE void __CFStrSetCapacityProvidedExternally(CFMutableStringRef str)     {str->variants.notInlineMutable.gapEtc |= __kCFCapacityProvidedExternally;}
 308 CF_INLINE void __CFStrClearCapacityProvidedExternally(CFMutableStringRef str)   {str->variants.notInlineMutable.gapEtc &= ~__kCFCapacityProvidedExternally;}
 309
 310
 311 CF_INLINE void __CFStrSetIsFixed(CFMutableStringRef str)                {str->variants.notInlineMutable.gapEtc |= __kCFIsFixed;}
 312 CF_INLINE void __CFStrSetIsExternalMutable(CFMutableStringRef str)      {str->variants.notInlineMutable.gapEtc |= __kCFIsExternalMutable;}
 313 CF_INLINE void __CFStrSetHasGap(CFMutableStringRef str)                 {str->variants.notInlineMutable.gapEtc |= __kCFHasGap;}
 314 CF_INLINE void __CFStrSetUnicode(CFMutableStringRef str)                {str->base._info |= __kCFIsUnicode;}
 315 CF_INLINE void __CFStrClearUnicode(CFMutableStringRef str)              {str->base._info &= ~__kCFIsUnicode;}
 316 CF_INLINE void __CFStrSetHasLengthAndNullBytes(CFMutableStringRef str)  {str->base._info |= (__kCFHasLengthByte | __kCFHasNullByte);}
 317 CF_INLINE void __CFStrClearHasLengthAndNullBytes(CFMutableStringRef str)        {str->base._info &= ~(__kCFHasLengthByte | __kCFHasNullByte);}
 318
 319
 320 static void *__CFStrAllocateMutableContents(CFMutableStringRef str, CFIndex size) {
 321     void *ptr;
 322     CFAllocatorRef alloc = (__CFStrHasContentsAllocator(str)) ? __CFStrContentsAllocator(str) : __CFGetAllocator(str);
 323     ptr = CFAllocatorAllocate(alloc, size, 0);
 324     if (__CFOASafe) __CFSetLastAllocationEventName(ptr, "CFString (store)");
 325     return ptr;
 326 }
 327
 328 static void __CFStrDeallocateMutableContents(CFMutableStringRef str, void *buffer) {
 329     CFAllocatorRef alloc = (__CFStrHasContentsAllocator(str)) ? __CFStrContentsAllocator(str) : __CFGetAllocator(str);
 330     if (CF_IS_COLLECTABLE_ALLOCATOR(alloc)) {
 331         // GC:  for finalization safety, let collector reclaim the buffer in the next GC cycle.
 332         auto_zone_release(__CFCollectableZone, buffer);
 333     } else {
 334         CFAllocatorDeallocate(alloc, buffer);
 335     }
 336 }
 337
 338
 339 // The following set of functions should only be called on mutable strings
 340
 341 /* "Capacity" is stored in number of bytes, not characters. It indicates the total number of bytes in the contents buffer.
 342    "Desired capacity" is in number of characters; it is the client requested capacity; if fixed, it is the upper bound on the mutable string backing store.
 343 */
 344 CF_INLINE CFIndex __CFStrCapacity(CFStringRef str)                      {return str->variants.notInlineMutable.capacityFields;}
 345 CF_INLINE void __CFStrSetCapacity(CFMutableStringRef str, CFIndex cap)  {str->variants.notInlineMutable.capacityFields = cap;}
 346 CF_INLINE CFIndex __CFStrDesiredCapacity(CFStringRef str)               {return __CFBitfieldGetValue(str->variants.notInlineMutable.gapEtc, __kCFDesiredCapacityBitNumber, 0);}
 347 CF_INLINE void __CFStrSetDesiredCapacity(CFMutableStringRef str, CFIndex size)  {__CFBitfieldSetValue(str->variants.notInlineMutable.gapEtc, __kCFDesiredCapacityBitNumber, 0, size);}
 348
 349
 350
 351
 352 /* CFString specific init flags
 353    Note that you cannot count on the external buffer not being copied.
 354    Also, if you specify an external buffer, you should not change it behind the CFString's back.
 355 */
 356 enum {
 357     __kCFThinUnicodeIfPossible = 0x1000000,             /* See if the Unicode contents can be thinned down to 8-bit */
 358     kCFStringPascal = 0x10000,                          /* Indicating that the string data has a Pascal string structure (length byte at start) */
 359     kCFStringNoCopyProvidedContents = 0x20000,          /* Don't copy the provided string contents if possible; free it when no longer needed */
 360     kCFStringNoCopyNoFreeProvidedContents = 0x30000     /* Don't copy the provided string contents if possible; don't free it when no longer needed */
 361 };
 362
 363 /* System Encoding.
 364 */
 365 static CFStringEncoding __CFDefaultSystemEncoding = kCFStringEncodingInvalidId;
 366 static CFStringEncoding __CFDefaultFileSystemEncoding = kCFStringEncodingInvalidId;
 367 CFStringEncoding __CFDefaultEightBitStringEncoding = kCFStringEncodingInvalidId;
 368
 369 CFStringEncoding CFStringGetSystemEncoding(void) {
 370
 371     if (__CFDefaultSystemEncoding == kCFStringEncodingInvalidId) {
 372         const CFStringEncodingConverter *converter = NULL;
 373 #if defined(__MACOS8__) || defined(__MACH__)
 374             __CFDefaultSystemEncoding = kCFStringEncodingMacRoman; // MacRoman is built-in so always available
 375 #elif defined(__WIN32__)
 376             __CFDefaultSystemEncoding = kCFStringEncodingWindowsLatin1; // WinLatin1 is built-in so always available
 377 #elif defined(__LINUX__) || defined(__FREEBSD__)
 378             __CFDefaultSystemEncoding = kCFStringEncodingISOLatin1; // a reasonable default
 379 #else // Solaris && HP-UX ?
 380             __CFDefaultSystemEncoding = kCFStringEncodingISOLatin1; // a reasonable default
 381 #endif
 382             converter = CFStringEncodingGetConverter(__CFDefaultSystemEncoding);
 383
 384         __CFSetCharToUniCharFunc(converter->encodingClass == kCFStringEncodingConverterCheapEightBit ? converter->toUnicode : NULL);
 385     }
 386
 387     return __CFDefaultSystemEncoding;
 388 }
 389
 390 // Fast version for internal use
 391
 392 CF_INLINE CFStringEncoding __CFStringGetSystemEncoding(void) {
 393     if (__CFDefaultSystemEncoding == kCFStringEncodingInvalidId) (void)CFStringGetSystemEncoding();
 394     return __CFDefaultSystemEncoding;
 395 }
 396
 397 CFStringEncoding CFStringFileSystemEncoding(void) {
 398     if (__CFDefaultFileSystemEncoding == kCFStringEncodingInvalidId) {
 399 #if defined(__MACH__)
 400         __CFDefaultFileSystemEncoding = kCFStringEncodingUTF8;
 401 #else
 402         __CFDefaultFileSystemEncoding = CFStringGetSystemEncoding();
 403 #endif
 404     }
 405
 406     return __CFDefaultFileSystemEncoding;
 407 }
 408
 409 /* ??? Is returning length when no other answer is available the right thing?
 410 */
 411 CFIndex CFStringGetMaximumSizeForEncoding(CFIndex length, CFStringEncoding encoding) {
 412     if (encoding == kCFStringEncodingUTF8) {
 413         return _CFExecutableLinkedOnOrAfter(CFSystemVersionPanther) ? (length * 3) : (length * 6); // 1 Unichar could expand to 3 bytes; we return 6 for older apps for compatibility
 414     } else if ((encoding == kCFStringEncodingUTF32) || (encoding == kCFStringEncodingUTF32BE) || (encoding == kCFStringEncodingUTF32LE)) { // UTF-32
 415         return length * sizeof(UTF32Char);
 416     } else {
 417         encoding &= 0xFFF; // Mask off non-base part
 418     }
 419     switch (encoding) {
 420         case kCFStringEncodingUnicode:
 421             return length * sizeof(UniChar);
 422
 423         case kCFStringEncodingNonLossyASCII:
 424             return length * 6; // 1 Unichar could expand to 6 bytes
 425
 426         case kCFStringEncodingMacRoman:
 427         case kCFStringEncodingWindowsLatin1:
 428         case kCFStringEncodingISOLatin1:
 429         case kCFStringEncodingNextStepLatin:
 430         case kCFStringEncodingASCII:
 431             return length / sizeof(uint8_t);
 432
 433         default:
 434             return length / sizeof(uint8_t);
 435     }
 436 }
 437
 438
 439 /* Returns whether the indicated encoding can be stored in 8-bit chars
 440 */
 441 CF_INLINE Boolean __CFStrEncodingCanBeStoredInEightBit(CFStringEncoding encoding) {
 442     switch (encoding & 0xFFF) { // just use encoding base
 443         case kCFStringEncodingInvalidId:
 444         case kCFStringEncodingUnicode:
 445         case kCFStringEncodingNonLossyASCII:
 446             return false;
 447
 448         case kCFStringEncodingMacRoman:
 449         case kCFStringEncodingWindowsLatin1:
 450         case kCFStringEncodingISOLatin1:
 451         case kCFStringEncodingNextStepLatin:
 452         case kCFStringEncodingASCII:
 453             return true;
 454
 455         default: return false;
 456     }
 457 }
 458
 459 /* Returns the encoding used in eight bit CFStrings (can't be any encoding which isn't 1-to-1 with Unicode)
 460    ??? Perhaps only ASCII fits the bill due to Unicode decomposition.
 461 */
 462 CFStringEncoding __CFStringComputeEightBitStringEncoding(void) {
 463     if (__CFDefaultEightBitStringEncoding == kCFStringEncodingInvalidId) {
 464         CFStringEncoding systemEncoding = CFStringGetSystemEncoding();
 465         if (systemEncoding == kCFStringEncodingInvalidId) { // We're right in the middle of querying system encoding from default database. Delaying to set until system encoding is determined.
 466             return kCFStringEncodingASCII;
 467         } else if (__CFStrEncodingCanBeStoredInEightBit(systemEncoding)) {
 468             __CFDefaultEightBitStringEncoding = systemEncoding;
 469         } else {
 470             __CFDefaultEightBitStringEncoding = kCFStringEncodingASCII;
 471         }
 472     }
 473
 474     return __CFDefaultEightBitStringEncoding;
 475 }
 476
 477 /* Returns whether the provided bytes can be stored in ASCII
 478 */
 479 CF_INLINE Boolean __CFBytesInASCII(const uint8_t *bytes, CFIndex len) {
 480     while (len--) if ((uint8_t)(*bytes++) >= 128) return false;
 481     return true;
 482 }
 483
 484 /* Returns whether the provided 8-bit string in the specified encoding can be stored in an 8-bit CFString.
 485 */
 486 CF_INLINE Boolean __CFCanUseEightBitCFStringForBytes(const uint8_t *bytes, CFIndex len, CFStringEncoding encoding) {
 487     if (encoding == __CFStringGetEightBitStringEncoding()) return true;
 488     if (__CFStringEncodingIsSupersetOfASCII(encoding) && __CFBytesInASCII(bytes, len)) return true;
 489     return false;
 490 }
 491
 492
 493 /* Returns whether a length byte can be tacked on to a string of the indicated length.
 494 */
 495 CF_INLINE Boolean __CFCanUseLengthByte(CFIndex len) {
 496 #define __kCFMaxPascalStrLen 255
 497     return (len <= __kCFMaxPascalStrLen) ? true : false;
 498 }
 499
 500 /* Various string assertions
 501 */
 502 #define __CFAssertIsString(cf) __CFGenericValidateType(cf, __kCFStringTypeID)
 503 #define __CFAssertIndexIsInStringBounds(cf, idx) CFAssert3((idx) >= 0 && (idx) < __CFStrLength(cf), __kCFLogAssertion, "%s(): string index %d out of bounds (length %d)", __PRETTY_FUNCTION__, idx, __CFStrLength(cf))
 504 #define __CFAssertRangeIsInStringBounds(cf, idx, count) CFAssert4((idx) >= 0 && (idx + count) <= __CFStrLength(cf), __kCFLogAssertion, "%s(): string range %d,%d out of bounds (length %d)", __PRETTY_FUNCTION__, idx, count, __CFStrLength(cf))
 505 #define __CFAssertLengthIsOK(len) CFAssert2(len < __kCFMaxLength, __kCFLogAssertion, "%s(): length %d too large", __PRETTY_FUNCTION__, len)
 506 #define __CFAssertIsStringAndMutable(cf) {__CFGenericValidateType(cf, __kCFStringTypeID); CFAssert1(__CFStrIsMutable(cf), __kCFLogAssertion, "%s(): string not mutable", __PRETTY_FUNCTION__);}
 507 #define __CFAssertIsStringAndExternalMutable(cf) {__CFGenericValidateType(cf, __kCFStringTypeID); CFAssert1(__CFStrIsMutable(cf) && __CFStrIsExternalMutable(cf), __kCFLogAssertion, "%s(): string not external mutable", __PRETTY_FUNCTION__);}
 508 #define __CFAssertIsNotNegative(idx) CFAssert2(idx >= 0, __kCFLogAssertion, "%s(): index %d is negative", __PRETTY_FUNCTION__, idx)
 509 #define __CFAssertIfFixedLengthIsOK(cf, reqLen) CFAssert2(!__CFStrIsFixed(cf) || (reqLen <= __CFStrDesiredCapacity(cf)), __kCFLogAssertion, "%s(): length %d too large", __PRETTY_FUNCTION__, reqLen)
 510
 511
 512 /* Basic algorithm is to shrink memory when capacity is SHRINKFACTOR times the required capacity or to allocate memory when the capacity is less than GROWFACTOR times the required capacity.
 513 Additional complications are applied in the following order:
 514 - desiredCapacity, which is the minimum (except initially things can be at zero)
 515 - rounding up to factor of 8
 516 - compressing (to fit the number if 16 bits), which effectively rounds up to factor of 256
 517 */
 518 #define SHRINKFACTOR(c) (c / 2)
 519 #define GROWFACTOR(c) ((c * 3 + 1) / 2)
 520
 521 CF_INLINE CFIndex __CFStrNewCapacity(CFMutableStringRef str, CFIndex reqCapacity, CFIndex capacity, Boolean leaveExtraRoom, CFIndex charSize) {
 522     if (capacity != 0 || reqCapacity != 0) {    /* If initially zero, and space not needed, leave it at that... */
 523         if ((capacity < reqCapacity) ||         /* We definitely need the room... */
 524             (!__CFStrCapacityProvidedExternally(str) &&         /* Assuming we control the capacity... */
 525                 ((reqCapacity < SHRINKFACTOR(capacity)) ||              /* ...we have too much room! */
 526                  (!leaveExtraRoom && (reqCapacity < capacity))))) {     /* ...we need to eliminate the extra space... */
 527             CFIndex newCapacity = leaveExtraRoom ? GROWFACTOR(reqCapacity) : reqCapacity;       /* Grow by 3/2 if extra room is desired */
 528             CFIndex desiredCapacity = __CFStrDesiredCapacity(str) * charSize;
 529             if (newCapacity < desiredCapacity) {        /* If less than desired, bump up to desired */
 530                 newCapacity = desiredCapacity;
 531             } else if (__CFStrIsFixed(str)) {           /* Otherwise, if fixed, no need to go above the desired (fixed) capacity */
 532                 newCapacity = __CFMax(desiredCapacity, reqCapacity);    /* !!! So, fixed is not really fixed, but "tight" */
 533             }
 534             if (__CFStrHasContentsAllocator(str)) {     /* Also apply any preferred size from the allocator; should we do something for  */
 535                 newCapacity = CFAllocatorGetPreferredSizeForSize(__CFStrContentsAllocator(str), newCapacity, 0);
 536 #if defined(__MACH__)
 537             } else {
 538                 newCapacity = malloc_good_size(newCapacity);
 539 #endif
 540             }
 541             return newCapacity; // If packing: __CFStrUnpackNumber(__CFStrPackNumber(newCapacity));
 542         }
 543     }
 544     return capacity;
 545 }
 546
 547
 548 /* rearrangeBlocks() rearranges the blocks of data within the buffer so that they are "evenly spaced". buffer is assumed to have enough room for the result.
 549   numBlocks is current total number of blocks within buffer.
 550   blockSize is the size of each block in bytes
 551   ranges and numRanges hold the ranges that are no longer needed; ranges are stored sorted in increasing order, and don't overlap
 552   insertLength is the final spacing between the remaining blocks
 553
 554 Example: buffer = A B C D E F G H, blockSize = 1, ranges = { (2,1) , (4,2) }  (so we want to "delete" C and E F), fromEnd = NO
 555 if insertLength = 4, result = A B ? ? ? ? D ? ? ? ? G H
 556 if insertLength = 0, result = A B D G H
 557
 558 Example: buffer = A B C D E F G H I J K L M N O P Q R S T U, blockSize = 1, ranges { (1,1), (3,1), (5,11), (17,1), (19,1) }, fromEnd = NO
 559 if insertLength = 3, result = A ? ? ? C ? ? ? E ? ? ? Q ? ? ? S ? ? ? U
 560
 561 */
 562 typedef struct _CFStringDeferredRange {
 563     int beginning;
 564     int length;
 565     int shift;
 566 } CFStringDeferredRange;
 567
 568 typedef struct _CFStringStackInfo {
 569     int capacity;               // Capacity (if capacity == count, need to realloc to add another)
 570     int count;                  // Number of elements actually stored
 571     CFStringDeferredRange *stack;
 572     Boolean hasMalloced;        // Indicates "stack" is allocated and needs to be deallocated when done
 573     char _padding[3];
 574 } CFStringStackInfo;
 575
 576 CF_INLINE void pop (CFStringStackInfo *si, CFStringDeferredRange *topRange) {
 577     si->count = si->count - 1;
 578     *topRange = si->stack[si->count];
 579 }
 580
 581 CF_INLINE void push (CFStringStackInfo *si, const CFStringDeferredRange *newRange) {
 582     if (si->count == si->capacity) {
 583         // increase size of the stack
 584         si->capacity = (si->capacity + 4) * 2;
 585         if (si->hasMalloced) {
 586             si->stack = CFAllocatorReallocate(NULL, si->stack, si->capacity * sizeof(CFStringDeferredRange), 0);
 587         } else {
 588             CFStringDeferredRange *newStack = (CFStringDeferredRange *)CFAllocatorAllocate(NULL, si->capacity * sizeof(CFStringDeferredRange), 0);
 589             memmove(newStack, si->stack, si->count * sizeof(CFStringDeferredRange));
 590             si->stack = newStack;
 591             si->hasMalloced = true;
 592         }
 593     }
 594     si->stack[si->count] = *newRange;
 595     si->count = si->count + 1;
 596 }
 597
 598 static void rearrangeBlocks(
 599         uint8_t *buffer,
 600         CFIndex numBlocks,
 601         CFIndex blockSize,
 602         const CFRange *ranges,
 603         CFIndex numRanges,
 604         CFIndex insertLength) {
 605
 606 #define origStackSize 10
 607     CFStringDeferredRange origStack[origStackSize];
 608     CFStringStackInfo si = {origStackSize, 0, origStack, false, {0, 0, 0}};
 609     CFStringDeferredRange currentNonRange = {0, 0, 0};
 610     int currentRange = 0;
 611     int amountShifted = 0;
 612
 613     // must have at least 1 range left.
 614
 615     while (currentRange < numRanges) {
 616         currentNonRange.beginning = (ranges[currentRange].location + ranges[currentRange].length) * blockSize;
 617         if ((numRanges - currentRange) == 1) {
 618             // at the end.
 619             currentNonRange.length = numBlocks * blockSize - currentNonRange.beginning;
 620             if (currentNonRange.length == 0) break;
 621         } else {
 622             currentNonRange.length = (ranges[currentRange + 1].location * blockSize) - currentNonRange.beginning;
 623         }
 624         currentNonRange.shift = amountShifted + (insertLength * blockSize) - (ranges[currentRange].length * blockSize);
 625         amountShifted = currentNonRange.shift;
 626         if (amountShifted <= 0) {
 627             // process current item and rest of stack
 628             if (currentNonRange.shift && currentNonRange.length) memmove (&buffer[currentNonRange.beginning + currentNonRange.shift], &buffer[currentNonRange.beginning], currentNonRange.length);
 629             while (si.count > 0) {
 630                 pop (&si, &currentNonRange);  // currentNonRange now equals the top element of the stack.
 631                 if (currentNonRange.shift && currentNonRange.length) memmove (&buffer[currentNonRange.beginning + currentNonRange.shift], &buffer[currentNonRange.beginning], currentNonRange.length);
 632             }
 633         } else {
 634             // add currentNonRange to stack.
 635             push (&si, &currentNonRange);
 636         }
 637         currentRange++;
 638     }
 639
 640     // no more ranges.  if anything is on the stack, process.
 641
 642     while (si.count > 0) {
 643         pop (&si, &currentNonRange);  // currentNonRange now equals the top element of the stack.
 644         if (currentNonRange.shift && currentNonRange.length) memmove (&buffer[currentNonRange.beginning + currentNonRange.shift], &buffer[currentNonRange.beginning], currentNonRange.length);
 645     }
 646     if (si.hasMalloced) CFAllocatorDeallocate (NULL, si.stack);
 647 }
 648
 649 /* See comments for rearrangeBlocks(); this is the same, but the string is assembled in another buffer (dstBuffer), so the algorithm is much easier. We also take care of the case where the source is not-Unicode but destination is. (The reverse case is not supported.)
 650 */
 651 static void copyBlocks(
 652         const uint8_t *srcBuffer,
 653         uint8_t *dstBuffer,
 654         CFIndex srcLength,
 655         Boolean srcIsUnicode,
 656         Boolean dstIsUnicode,
 657         const CFRange *ranges,
 658         CFIndex numRanges,
 659         CFIndex insertLength) {
 660
 661     CFIndex srcLocationInBytes = 0;     // in order to avoid multiplying all the time, this is in terms of bytes, not blocks
 662     CFIndex dstLocationInBytes = 0;     // ditto
 663     CFIndex srcBlockSize = srcIsUnicode ? sizeof(UniChar) : sizeof(uint8_t);
 664     CFIndex insertLengthInBytes = insertLength * (dstIsUnicode ? sizeof(UniChar) : sizeof(uint8_t));
 665     CFIndex rangeIndex = 0;
 666     CFIndex srcToDstMultiplier = (srcIsUnicode == dstIsUnicode) ? 1 : (sizeof(UniChar) / sizeof(uint8_t));
 667
 668     // Loop over the ranges, copying the range to be preserved (right before each range)
 669     while (rangeIndex < numRanges) {
 670         CFIndex srcLengthInBytes = ranges[rangeIndex].location * srcBlockSize - srcLocationInBytes;     // srcLengthInBytes is in terms of bytes, not blocks; represents length of region to be preserved
 671         if (srcLengthInBytes > 0) {
 672             if (srcIsUnicode == dstIsUnicode) {
 673                 memmove(dstBuffer + dstLocationInBytes, srcBuffer + srcLocationInBytes, srcLengthInBytes);
 674             } else {
 675                 __CFStrConvertBytesToUnicode(srcBuffer + srcLocationInBytes, (UniChar *)(dstBuffer + dstLocationInBytes), srcLengthInBytes);
 676             }
 677         }
 678         srcLocationInBytes += srcLengthInBytes + ranges[rangeIndex].length * srcBlockSize;      // Skip over the just-copied and to-be-deleted stuff
 679         dstLocationInBytes += srcLengthInBytes * srcToDstMultiplier + insertLengthInBytes;
 680         rangeIndex++;
 681     }
 682
 683     // Do last range (the one beyond last range)
 684     if (srcLocationInBytes < srcLength * srcBlockSize) {
 685         if (srcIsUnicode == dstIsUnicode) {
 686             memmove(dstBuffer + dstLocationInBytes, srcBuffer + srcLocationInBytes, srcLength * srcBlockSize - srcLocationInBytes);
 687         } else {
 688             __CFStrConvertBytesToUnicode(srcBuffer + srcLocationInBytes, (UniChar *)(dstBuffer + dstLocationInBytes), srcLength * srcBlockSize - srcLocationInBytes);
 689         }
 690     }
 691 }
 692
 693
 694 /* Reallocates the backing store of the string to accomodate the new length. Space is reserved or characters are deleted as indicated by insertLength and the ranges in deleteRanges. The length is updated to reflect the new state. Will also maintain a length byte and a null byte in 8-bit strings. If length cannot fit in length byte, the space will still be reserved, but will be 0. (Hence the reason the length byte should never be looked at as length unless there is no explicit length.)
 695 */
 696 static void __CFStringChangeSizeMultiple(CFMutableStringRef str, const CFRange *deleteRanges, CFIndex numDeleteRanges, CFIndex insertLength, Boolean makeUnicode) {
 697     const uint8_t *curContents = __CFStrContents(str);
 698     CFIndex curLength = curContents ? __CFStrLength2(str, curContents) : 0;
 699     CFIndex newLength;
 700
 701     // Compute new length of the string
 702     if (numDeleteRanges == 1) {
 703         newLength = curLength + insertLength - deleteRanges[0].length;
 704     } else {
 705         int cnt;
 706         newLength = curLength + insertLength * numDeleteRanges;
 707         for (cnt = 0; cnt < numDeleteRanges; cnt++) newLength -= deleteRanges[cnt].length;
 708     }
 709
 710     __CFAssertIfFixedLengthIsOK(str, newLength);
 711
 712     if (newLength == 0) {
 713         // An somewhat optimized code-path for this special case, with the following implicit values:
 714         // newIsUnicode = false
 715         // useLengthAndNullBytes = false
 716         // newCharSize = sizeof(uint8_t)
 717         // If the newCapacity happens to be the same as the old, we don't free the buffer; otherwise we just free it totally
 718         // instead of doing a potentially useless reallocation (as the needed capacity later might turn out to be different anyway)
 719         CFIndex curCapacity = __CFStrCapacity(str);
 720         CFIndex newCapacity = __CFStrNewCapacity(str, 0, curCapacity, true, sizeof(uint8_t));
 721         if (newCapacity != curCapacity) {       // If we're reallocing anyway (larger or smaller --- larger could happen if desired capacity was changed in the meantime), let's just free it all
 722             if (curContents) __CFStrDeallocateMutableContents(str, (uint8_t *)curContents);
 723             __CFStrSetContentPtr(str, NULL);
 724             __CFStrSetCapacity(str, 0);
 725             __CFStrClearCapacityProvidedExternally(str);
 726             __CFStrClearHasLengthAndNullBytes(str);
 727             if (!__CFStrIsExternalMutable(str)) __CFStrClearUnicode(str);       // External mutable implies Unicode
 728         } else {
 729             if (!__CFStrIsExternalMutable(str)) {
 730                 __CFStrClearUnicode(str);
 731                 if (curCapacity >= (int)(sizeof(uint8_t) * 2)) {        // If there's room
 732                     __CFStrSetHasLengthAndNullBytes(str);
 733                     ((uint8_t *)curContents)[0] = ((uint8_t *)curContents)[1] = 0;
 734                 } else {
 735                     __CFStrClearHasLengthAndNullBytes(str);
 736                 }
 737             }
 738         }
 739         __CFStrSetExplicitLength(str, 0);
 740     } else {    /* This else-clause assumes newLength > 0 */
 741         Boolean oldIsUnicode = __CFStrIsUnicode(str);
 742         Boolean newIsUnicode = makeUnicode || (oldIsUnicode /* && (newLength > 0) - implicit */ ) || __CFStrIsExternalMutable(str);
 743         CFIndex newCharSize = newIsUnicode ? sizeof(UniChar) : sizeof(uint8_t);
 744         Boolean useLengthAndNullBytes = !newIsUnicode /* && (newLength > 0) - implicit */;
 745         CFIndex numExtraBytes = useLengthAndNullBytes ? 2 : 0;  /* 2 extra bytes to keep the length byte & null... */
 746         CFIndex curCapacity = __CFStrCapacity(str);
 747         CFIndex newCapacity = __CFStrNewCapacity(str, newLength * newCharSize + numExtraBytes, curCapacity, true, newCharSize);
 748         Boolean allocNewBuffer = (newCapacity != curCapacity) || (curLength > 0 && !oldIsUnicode && newIsUnicode);      /* We alloc new buffer if oldIsUnicode != newIsUnicode because the contents have to be copied */
 749         uint8_t *newContents =  allocNewBuffer ? __CFStrAllocateMutableContents(str, newCapacity) : (uint8_t *)curContents;
 750         Boolean hasLengthAndNullBytes = __CFStrHasLengthByte(str);
 751
 752         CFAssert1(hasLengthAndNullBytes == __CFStrHasNullByte(str), __kCFLogAssertion, "%s(): Invalid state in 8-bit string", __PRETTY_FUNCTION__);
 753
 754         if (hasLengthAndNullBytes) curContents++;
 755         if (useLengthAndNullBytes) newContents++;
 756
 757         if (curContents) {
 758             if (oldIsUnicode == newIsUnicode) {
 759                 if (newContents == curContents) {
 760                     rearrangeBlocks(newContents, curLength, newCharSize, deleteRanges, numDeleteRanges, insertLength);
 761                 } else {
 762                     copyBlocks(curContents, newContents, curLength, oldIsUnicode, newIsUnicode, deleteRanges, numDeleteRanges, insertLength);
 763                 }
 764             } else if (newIsUnicode) {  /* this implies we have a new buffer */
 765                 copyBlocks(curContents, newContents, curLength, oldIsUnicode, newIsUnicode, deleteRanges, numDeleteRanges, insertLength);
 766             }
 767             if (hasLengthAndNullBytes) curContents--;   /* Undo the damage from above */
 768             if (allocNewBuffer) __CFStrDeallocateMutableContents(str, (void *)curContents);
 769         }
 770
 771         if (!newIsUnicode) {
 772             if (useLengthAndNullBytes) {
 773                 newContents[newLength] = 0;     /* Always have null byte, if not unicode */
 774                 newContents--;  /* Undo the damage from above */
 775                 newContents[0] = __CFCanUseLengthByte(newLength) ? (uint8_t)newLength : 0;
 776                 if (!hasLengthAndNullBytes) __CFStrSetHasLengthAndNullBytes(str);
 777             } else {
 778                 if (hasLengthAndNullBytes) __CFStrClearHasLengthAndNullBytes(str);
 779             }
 780             if (oldIsUnicode) __CFStrClearUnicode(str);
 781         } else {        // New is unicode...
 782             if (!oldIsUnicode) __CFStrSetUnicode(str);
 783             if (hasLengthAndNullBytes) __CFStrClearHasLengthAndNullBytes(str);
 784         }
 785         __CFStrSetExplicitLength(str, newLength);
 786
 787         if (allocNewBuffer) {
 788             __CFStrSetCapacity(str, newCapacity);
 789             __CFStrClearCapacityProvidedExternally(str);
 790             __CFStrSetContentPtr(str, newContents);
 791         }
 792     }
 793 }
 794
 795 /* Same as above, but takes one range (very common case)
 796 */
 797 CF_INLINE void __CFStringChangeSize(CFMutableStringRef str, CFRange range, CFIndex insertLength, Boolean makeUnicode) {
 798     __CFStringChangeSizeMultiple(str, &range, 1, insertLength, makeUnicode);
 799 }
 800
 801
 802 static void __CFStringDeallocate(CFTypeRef cf) {
 803     CFStringRef str = cf;
 804
 805     // constantStringAllocatorForDebugging is not around unless DEBUG is defined, but neither is CFAssert2()...
 806     CFAssert1(__CFConstantStringTableBeingFreed || CFGetAllocator(str) != constantStringAllocatorForDebugging, __kCFLogAssertion, "Tried to deallocate CFSTR(\"%@\")", str);
 807
 808     if (!__CFStrIsInline(str)) {
 809         uint8_t *contents;
 810         Boolean mutable = __CFStrIsMutable(str);
 811         if (__CFStrFreeContentsWhenDone(str) && (contents = (uint8_t *)__CFStrContents(str))) {
 812             if (mutable) {
 813                 __CFStrDeallocateMutableContents((CFMutableStringRef)str, contents);
 814             } else {
 815                 if (__CFStrHasContentsDeallocator(str)) {
 816                     CFAllocatorRef contentsDeallocator = __CFStrContentsDeallocator(str);
 817                     CFAllocatorDeallocate(contentsDeallocator, contents);
 818                     CFRelease(contentsDeallocator);
 819                 } else {
 820                     CFAllocatorRef alloc = __CFGetAllocator(str);
 821                     CFAllocatorDeallocate(alloc, contents);
 822                 }
 823             }
 824         }
 825         if (mutable && __CFStrHasContentsAllocator(str)) CFRelease(__CFStrContentsAllocator((CFMutableStringRef)str));
 826     }
 827 }
 828
 829 static Boolean __CFStringEqual(CFTypeRef cf1, CFTypeRef cf2) {
 830     CFStringRef str1 = cf1;
 831     CFStringRef str2 = cf2;
 832     const uint8_t *contents1;
 833     const uint8_t *contents2;
 834     CFIndex len1;
 835
 836     /* !!! We do not need IsString assertions, as the CFBase runtime assures this */
 837     /* !!! We do not need == test, as the CFBase runtime assures this */
 838
 839     contents1 = __CFStrContents(str1);
 840     contents2 = __CFStrContents(str2);
 841     len1 = __CFStrLength2(str1, contents1);
 842
 843     if (len1 != __CFStrLength2(str2, contents2)) return false;
 844
 845     contents1 += __CFStrSkipAnyLengthByte(str1);
 846     contents2 += __CFStrSkipAnyLengthByte(str2);
 847
 848     if (__CFStrIsEightBit(str1) && __CFStrIsEightBit(str2)) {
 849         return memcmp((const char *)contents1, (const char *)contents2, len1) ? false : true;
 850     } else if (__CFStrIsEightBit(str1)) {       /* One string has Unicode contents */
 851         CFStringInlineBuffer buf;
 852         CFIndex buf_idx = 0;
 853
 854         CFStringInitInlineBuffer(str1, &buf, CFRangeMake(0, len1));
 855         for (buf_idx = 0; buf_idx < len1; buf_idx++) {
 856             if (__CFStringGetCharacterFromInlineBufferQuick(&buf, buf_idx) != ((UniChar *)contents2)[buf_idx]) return false;
 857         }
 858     } else if (__CFStrIsEightBit(str2)) {       /* One string has Unicode contents */
 859         CFStringInlineBuffer buf;
 860         CFIndex buf_idx = 0;
 861
 862         CFStringInitInlineBuffer(str2, &buf, CFRangeMake(0, len1));
 863         for (buf_idx = 0; buf_idx < len1; buf_idx++) {
 864             if (__CFStringGetCharacterFromInlineBufferQuick(&buf, buf_idx) != ((UniChar *)contents1)[buf_idx]) return false;
 865         }
 866     } else {                                    /* Both strings have Unicode contents */
 867         CFIndex idx;
 868         for (idx = 0; idx < len1; idx++) {
 869             if (((UniChar *)contents1)[idx] != ((UniChar *)contents2)[idx]) return false;
 870         }
 871     }
 872     return true;
 873 }
 874
 875
 876 /* String hashing: Should give the same results whatever the encoding; so we hash UniChars.
 877 If the length is less than or equal to 24, then the hash function is simply the
 878 following (n is the nth UniChar character, starting from 0):
 879
 880   hash(-1) = length
 881   hash(n) = hash(n-1) * 257 + unichar(n);
 882   Hash = hash(length-1) * ((length & 31) + 1)
 883
 884 If the length is greater than 24, then the above algorithm applies to
 885 characters 0..7 and length-16..length-1; thus the first 8 and last 16 characters.
 886
 887 Note that the loops below are unrolled; and: 257^2 = 66049; 257^3 = 16974593; 257^4 = 4362470401;  67503105 is 257^4 - 256^4
 888 If hashcode is changed from UInt32 to something else, this last piece needs to be readjusted.
 889
 890 NOTE: The hash algorithm used to be duplicated in CF and Foundation; but now it should only be in the four functions below.
 891 */
 892
 893 /* In this function, actualLen is the length of the original string; but len is the number of characters in buffer. The buffer is expected to contain the parts of the string relevant to hashing.
 894 */
 895 CF_INLINE CFHashCode __CFStrHashCharacters(const UniChar *uContents, CFIndex len, CFIndex actualLen) {
 896     CFHashCode result = actualLen;
 897     if (len < 24) {
 898         const UniChar *end4 = uContents + (len & ~3);
 899         const UniChar *end = uContents + len;
 900         while (uContents < end4) {      // First count in fours
 901             result = result * 67503105 + uContents[0] * 16974593  + uContents[1] * 66049  + uContents[2] * 257 + uContents[3];
 902             uContents += 4;
 903         }
 904         while (uContents < end) {       // Then for the last <4 chars, count in ones...
 905             result = result * 257 + *uContents++;
 906         }
 907     } else {
 908         result = result * 67503105 + uContents[0] * 16974593 + uContents[1] * 66049 + uContents[2] * 257 + uContents[3];
 909         result = result * 67503105 + uContents[4] * 16974593 + uContents[5] * 66049 + uContents[6] * 257 + uContents[7];
 910         uContents += (len - 16);
 911         result = result * 67503105 + uContents[0] * 16974593 + uContents[1] * 66049 + uContents[2] * 257 + uContents[3];
 912         result = result * 67503105 + uContents[4] * 16974593 + uContents[5] * 66049 + uContents[6] * 257 + uContents[7];
 913         result = result * 67503105 + uContents[8] * 16974593 + uContents[9] * 66049 + uContents[10] * 257 + uContents[11];
 914         result = result * 67503105 + uContents[12] * 16974593 + uContents[13] * 66049 + uContents[14] * 257 + uContents[15];
 915     }
 916     return result + (result << (actualLen & 31));
 917 }
 918
 919 /* This hashes cString in the eight bit string encoding. It also includes the little debug-time sanity check.
 920 */
 921 CF_INLINE CFHashCode __CFStrHashEightBit(const uint8_t *contents, CFIndex len) {
 922 #if defined(DEBUG)
 923     const uint8_t *origContents = contents;
 924 #endif
 925     CFHashCode result = len;
 926     if (len < 24) {
 927         const uint8_t *end4 = contents + (len & ~3);
 928         const uint8_t *end = contents + len;
 929         while (contents < end4) {       // First count in fours
 930             result = result * 67503105 + __CFCharToUniCharTable[contents[0]] * 16974593  + __CFCharToUniCharTable[contents[1]] * 66049  + __CFCharToUniCharTable[contents[2]] * 257 + __CFCharToUniCharTable[contents[3]];
 931             contents += 4;
 932         }
 933         while (contents < end) {        // Then for the last <4 chars, count single chars
 934             result = result * 257 + __CFCharToUniCharTable[*contents++];
 935         }
 936     } else {
 937         result = result * 67503105 + __CFCharToUniCharTable[contents[0]] * 16974593  + __CFCharToUniCharTable[contents[1]] * 66049  + __CFCharToUniCharTable[contents[2]] * 257 + __CFCharToUniCharTable[contents[3]];
 938         result = result * 67503105 + __CFCharToUniCharTable[contents[4]] * 16974593  + __CFCharToUniCharTable[contents[5]] * 66049  + __CFCharToUniCharTable[contents[6]] * 257 + __CFCharToUniCharTable[contents[7]];
 939         contents += (len - 16);
 940         result = result * 67503105 + __CFCharToUniCharTable[contents[0]] * 16974593  + __CFCharToUniCharTable[contents[1]] * 66049  + __CFCharToUniCharTable[contents[2]] * 257 + __CFCharToUniCharTable[contents[3]];
 941         result = result * 67503105 + __CFCharToUniCharTable[contents[4]] * 16974593  + __CFCharToUniCharTable[contents[5]] * 66049  + __CFCharToUniCharTable[contents[6]] * 257 + __CFCharToUniCharTable[contents[7]];
 942         result = result * 67503105 + __CFCharToUniCharTable[contents[8]] * 16974593  + __CFCharToUniCharTable[contents[9]] * 66049  + __CFCharToUniCharTable[contents[10]] * 257 + __CFCharToUniCharTable[contents[11]];
 943         result = result * 67503105 + __CFCharToUniCharTable[contents[12]] * 16974593  + __CFCharToUniCharTable[contents[13]] * 66049  + __CFCharToUniCharTable[contents[14]] * 257 + __CFCharToUniCharTable[contents[15]];
 944     }
 945 #if defined(DEBUG)
 946     if (!__CFCharToUniCharFunc) {       // A little sanity verification: If this is not set, trying to hash high byte chars would be a bad idea
 947         CFIndex cnt;
 948         Boolean err = false;
 949         contents = origContents;
 950         if (len <= 24) {
 951             for (cnt = 0; cnt < len; cnt++) if (contents[cnt] >= 128) err = true;
 952         } else {
 953             for (cnt = 0; cnt < 8; cnt++) if (contents[cnt] >= 128) err = true;
 954             for (cnt = len - 16; cnt < len; cnt++) if (contents[cnt] >= 128) err = true;
 955         }
 956         if (err) {
 957             // Can't do log here, as it might be too early
 958             fprintf(stderr, "Warning: CFHash() attempting to hash CFString containing high bytes before properly initialized to do so\n");
 959         }
 960     }
 961 #endif
 962     return result + (result << (len & 31));
 963 }
 964
 965 CFHashCode CFStringHashISOLatin1CString(const uint8_t *bytes, CFIndex len) {
 966     CFHashCode result = len;
 967     if (len < 24) {
 968         const uint8_t *end4 = bytes + (len & ~3);
 969         const uint8_t *end = bytes + len;
 970         while (bytes < end4) {  // First count in fours
 971             result = result * 67503105 + bytes[0] * 16974593  + bytes[1] * 66049  + bytes[2] * 257 + bytes[3];
 972             bytes += 4;
 973         }
 974         while (bytes < end) {   // Then for the last <4 chars, count in ones...
 975             result = result * 257 + *bytes++;
 976         }
 977     } else {
 978         result = result * 67503105 + bytes[0] * 16974593 + bytes[1] * 66049 + bytes[2] * 257 + bytes[3];
 979         result = result * 67503105 + bytes[4] * 16974593 + bytes[5] * 66049 + bytes[6] * 257 + bytes[7];
 980         bytes += (len - 16);
 981         result = result * 67503105 + bytes[0] * 16974593 + bytes[1] * 66049 + bytes[2] * 257 + bytes[3];
 982         result = result * 67503105 + bytes[4] * 16974593 + bytes[5] * 66049 + bytes[6] * 257 + bytes[7];
 983         result = result * 67503105 + bytes[8] * 16974593 + bytes[9] * 66049 + bytes[10] * 257 + bytes[11];
 984         result = result * 67503105 + bytes[12] * 16974593 + bytes[13] * 66049 + bytes[14] * 257 + bytes[15];
 985     }
 986     return result + (result << (len & 31));
 987 }
 988
 989 CFHashCode CFStringHashCString(const uint8_t *bytes, CFIndex len) {
 990     return __CFStrHashEightBit(bytes, len);
 991 }
 992
 993 CFHashCode CFStringHashCharacters(const UniChar *characters, CFIndex len) {
 994     return __CFStrHashCharacters(characters, len, len);
 995 }
 996
 997 /* This is meant to be called from NSString or subclassers only. It is an error for this to be called without the ObjC runtime or an argument which is not an NSString or subclass. It can be called with NSCFString, although that would be inefficient (causing indirection) and won't normally happen anyway, as NSCFString overrides hash.
 998 */
 999 CFHashCode CFStringHashNSString(CFStringRef str) {
1000     UniChar buffer[24];
1001     CFIndex bufLen;             // Number of characters in the buffer for hashing
1002     CFIndex len;                // Actual length of the string
1003
1004     CF_OBJC_CALL0(CFIndex, len, str, "length");
1005    if (len <= 24) {
1006         CF_OBJC_VOIDCALL2(str, "getCharacters:range:", buffer, CFRangeMake(0, len));
1007         bufLen = len;
1008     } else {
1009         CF_OBJC_VOIDCALL2(str, "getCharacters:range:", buffer, CFRangeMake(0, 8));
1010         CF_OBJC_VOIDCALL2(str, "getCharacters:range:", buffer+8, CFRangeMake(len-16, 16));
1011         bufLen = 24;
1012     }
1013     return __CFStrHashCharacters(buffer, bufLen, len);
1014 }
1015
1016 CFHashCode __CFStringHash(CFTypeRef cf) {
1017     /* !!! We do not need an IsString assertion here, as this is called by the CFBase runtime only */
1018     CFStringRef str = cf;
1019     const uint8_t *contents = __CFStrContents(str);
1020     CFIndex len = __CFStrLength2(str, contents);
1021
1022     if (__CFStrIsEightBit(str)) {
1023         contents += __CFStrSkipAnyLengthByte(str);
1024         return __CFStrHashEightBit(contents, len);
1025     } else {
1026         return __CFStrHashCharacters((const UniChar *)contents, len, len);
1027     }
1028 }
1029
1030
1031 static CFStringRef __CFStringCopyDescription(CFTypeRef cf) {
1032     return CFStringCreateWithFormat(kCFAllocatorDefault, NULL, CFSTR("<CFString %p [%p]>{contents = \"%@\"}"), cf, __CFGetAllocator(cf), cf);
1033 }
1034
1035 static CFStringRef __CFStringCopyFormattingDescription(CFTypeRef cf, CFDictionaryRef formatOptions) {
1036     return CFStringCreateCopy(__CFGetAllocator(cf), cf);
1037 }
1038
1039 static CFTypeID __kCFStringTypeID = _kCFRuntimeNotATypeID;
1040
1041 static const CFRuntimeClass __CFStringClass = {
1042     0,
1043     "CFString",
1044     NULL,      // init
1045     (void *)CFStringCreateCopy,
1046     __CFStringDeallocate,
1047     __CFStringEqual,
1048     __CFStringHash,
1049     __CFStringCopyFormattingDescription,
1050     __CFStringCopyDescription
1051 };
1052
1053 __private_extern__ void __CFStringInitialize(void) {
1054     __kCFStringTypeID = _CFRuntimeRegisterClass(&__CFStringClass);
1055 }
1056
1057 CFTypeID CFStringGetTypeID(void) {
1058     return __kCFStringTypeID;
1059 }
1060
1061
1062 static Boolean CFStrIsUnicode(CFStringRef str) {
1063     CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, Boolean, str, "_encodingCantBeStoredInEightBitCFString");
1064     return __CFStrIsUnicode(str);
1065 }
1066
1067
1068
1069 #define ALLOCATORSFREEFUNC ((void *)-1)
1070
1071 /* contentsDeallocator indicates how to free the data if it's noCopy == true:
1072         kCFAllocatorNull: don't free
1073         ALLOCATORSFREEFUNC: free with main allocator's free func (don't pass in the real func ptr here)
1074         NULL: default allocator
1075         otherwise it's the allocator that should be used (it will be explicitly stored)
1076    if noCopy == false, then freeFunc should be ALLOCATORSFREEFUNC
1077    hasLengthByte, hasNullByte: refers to bytes; used only if encoding != Unicode
1078    possiblyExternalFormat indicates that the bytes might have BOM and be swapped
1079    tryToReduceUnicode means that the Unicode should be checked to see if it contains just ASCII (and reduce it if so)
1080    numBytes contains the actual number of bytes in "bytes", including Length byte,
1081         BUT not the NULL byte at the end
1082    bytes should not contain BOM characters
1083    !!! Various flags should be combined to reduce number of arguments, if possible
1084 */
1085 __private_extern__ CFStringRef __CFStringCreateImmutableFunnel3(
1086                         CFAllocatorRef alloc, const void *bytes, CFIndex numBytes, CFStringEncoding encoding,
1087                         Boolean possiblyExternalFormat, Boolean tryToReduceUnicode, Boolean hasLengthByte, Boolean hasNullByte, Boolean noCopy,
1088                         CFAllocatorRef contentsDeallocator, UInt32 converterFlags) {
1089
1090     CFMutableStringRef str;
1091     CFVarWidthCharBuffer vBuf;
1092     CFIndex size;
1093     Boolean useLengthByte = false;
1094     Boolean useNullByte = false;
1095     Boolean useInlineData = false;
1096
1097     if (alloc == NULL) alloc = __CFGetDefaultAllocator();
1098
1099     if (contentsDeallocator == ALLOCATORSFREEFUNC) {
1100         contentsDeallocator = alloc;
1101     } else if (contentsDeallocator == NULL) {
1102         contentsDeallocator = __CFGetDefaultAllocator();
1103     }
1104
1105     if ((NULL != kCFEmptyString) && (numBytes == 0) && (alloc == kCFAllocatorSystemDefault)) {  // If we are using the system default allocator, and the string is empty, then use the empty string!
1106         if (noCopy && (contentsDeallocator != kCFAllocatorNull)) {      // See 2365208... This change was done after Sonata; before we didn't free the bytes at all (leak).
1107             CFAllocatorDeallocate(contentsDeallocator, (void *)bytes);
1108         }
1109         return CFRetain(kCFEmptyString);        // Quick exit; won't catch all empty strings, but most
1110     }
1111
1112     // At this point, contentsDeallocator is either same as alloc, or kCFAllocatorNull, or something else, but not NULL
1113
1114     vBuf.shouldFreeChars = false;       // We use this to remember to free the buffer possibly allocated by decode
1115
1116     // First check to see if the data needs to be converted...
1117     // ??? We could be more efficient here and in some cases (Unicode data) eliminate a copy
1118
1119     if ((encoding == kCFStringEncodingUnicode && possiblyExternalFormat) || (encoding != kCFStringEncodingUnicode && !__CFCanUseEightBitCFStringForBytes(bytes, numBytes, encoding))) {
1120         const void *realBytes = (uint8_t*) bytes + (hasLengthByte ? 1 : 0);
1121         CFIndex realNumBytes = numBytes - (hasLengthByte ? 1 : 0);
1122         Boolean usingPassedInMemory = false;
1123
1124         vBuf.allocator = __CFGetDefaultAllocator();     // We don't want to use client's allocator for temp stuff
1125         vBuf.chars.unicode = NULL;      // This will cause the decode function to allocate memory if necessary
1126
1127         if (!__CFStringDecodeByteStream3(realBytes, realNumBytes, encoding, false, &vBuf, &usingPassedInMemory, converterFlags)) {
1128             return NULL;                // !!! Is this acceptable failure mode?
1129         }
1130
1131         encoding = vBuf.isASCII ? kCFStringEncodingASCII : kCFStringEncodingUnicode;
1132
1133         if (!usingPassedInMemory) {
1134
1135             // Make the parameters fit the new situation
1136             numBytes = vBuf.isASCII ? vBuf.numChars : (vBuf.numChars * sizeof(UniChar));
1137             hasLengthByte = hasNullByte = false;
1138
1139             // Get rid of the original buffer if its not being used
1140             if (noCopy && contentsDeallocator != kCFAllocatorNull) {
1141                 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes);
1142             }
1143             contentsDeallocator = alloc;        // At this point we are using the string's allocator, as the original buffer is gone...
1144
1145             // See if we can reuse any storage the decode func might have allocated
1146             // We do this only for Unicode, as otherwise we would not have NULL and Length bytes
1147
1148             if (vBuf.shouldFreeChars && (alloc == vBuf.allocator) && encoding == kCFStringEncodingUnicode) {
1149                 vBuf.shouldFreeChars = false;   // Transferring ownership to the CFString
1150                 bytes = CFAllocatorReallocate(vBuf.allocator, (void *)vBuf.chars.unicode, numBytes, 0); // Tighten up the storage
1151                 noCopy = true;
1152             } else {
1153                 bytes = vBuf.chars.unicode;
1154                 noCopy = false;                 // Can't do noCopy anymore
1155                 // If vBuf.shouldFreeChars is true, the buffer will be freed as intended near the end of this func
1156             }
1157
1158         }
1159
1160         // At this point, all necessary input arguments have been changed to reflect the new state
1161
1162     } else if (encoding == kCFStringEncodingUnicode && tryToReduceUnicode) {    // Check to see if we can reduce Unicode to ASCII
1163         CFIndex cnt;
1164         CFIndex len = numBytes / sizeof(UniChar);
1165         Boolean allASCII = true;
1166
1167         for (cnt = 0; cnt < len; cnt++) if (((const UniChar *)bytes)[cnt] > 127) {
1168             allASCII = false;
1169             break;
1170         }
1171
1172         if (allASCII) { // Yes we can!
1173             uint8_t *ptr, *mem;
1174             hasLengthByte = __CFCanUseLengthByte(len);
1175             hasNullByte = true;
1176             numBytes = (len + 1 + (hasLengthByte ? 1 : 0)) * sizeof(uint8_t);   // NULL and possible length byte
1177             // See if we can use that temporary local buffer in vBuf...
1178             if (numBytes >= __kCFVarWidthLocalBufferSize) {
1179                 mem = ptr = (uint8_t *)CFAllocatorAllocate(alloc, numBytes, 0);
1180                 if (__CFOASafe) __CFSetLastAllocationEventName(mem, "CFString (store)");
1181             } else {
1182                 mem = ptr = (uint8_t *)(vBuf.localBuffer);
1183             }
1184             // Copy the Unicode bytes into the new ASCII buffer
1185             if (hasLengthByte) *ptr++ = len;
1186             for (cnt = 0; cnt < len; cnt++) ptr[cnt] = ((const UniChar *)bytes)[cnt];
1187             ptr[len] = 0;
1188             if (noCopy && contentsDeallocator != kCFAllocatorNull) {
1189                 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes);
1190             }
1191             // Now make everything look like we had an ASCII buffer to start with
1192             bytes = mem;
1193             encoding = kCFStringEncodingASCII;
1194             contentsDeallocator = alloc;        // At this point we are using the string's allocator, as the original buffer is gone...
1195             noCopy = (numBytes >= __kCFVarWidthLocalBufferSize);        // If we had to allocate it, make sure it's kept around
1196             numBytes--;         // Should not contain the NULL byte at end...
1197         }
1198
1199         // At this point, all necessary input arguments have been changed to reflect the new state
1200     }
1201
1202     // Now determine the necessary size
1203
1204     if (noCopy) {
1205
1206         size = sizeof(void *);                          // Pointer to the buffer
1207         if (contentsDeallocator != alloc && contentsDeallocator != kCFAllocatorNull) {
1208             size += sizeof(void *);     // The contentsDeallocator
1209         }
1210         if (!hasLengthByte) size += sizeof(SInt32);     // Explicit length
1211         useLengthByte = hasLengthByte;
1212         useNullByte = hasNullByte;
1213
1214     } else {    // Inline data; reserve space for it
1215
1216         useInlineData = true;
1217         size = numBytes;
1218
1219         if (hasLengthByte || (encoding != kCFStringEncodingUnicode && __CFCanUseLengthByte(numBytes))) {
1220             useLengthByte = true;
1221             if (!hasLengthByte) size += 1;
1222         } else {
1223             size += sizeof(SInt32);     // Explicit length
1224         }
1225         if (hasNullByte || encoding != kCFStringEncodingUnicode) {
1226             useNullByte = true;
1227             size += 1;
1228         }
1229     }
1230
1231 #ifdef STRING_SIZE_STATS
1232     // Dump alloced CFString size info every so often
1233     static int cnt = 0;
1234     static unsigned sizes[256] = {0};
1235     int allocedSize = size + sizeof(CFRuntimeBase);
1236     if (allocedSize < 255) sizes[allocedSize]++; else sizes[255]++;
1237     if ((++cnt % 1000) == 0) {
1238         printf ("\nTotal: %d\n", cnt);
1239         int i; for (i = 0; i < 256; i++) printf("%03d: %5d%s", i, sizes[i], ((i % 8) == 7) ? "\n" : " ");
1240     }
1241 #endif
1242
1243     // Finally, allocate!
1244
1245     str = (CFMutableStringRef)_CFRuntimeCreateInstance(alloc, __kCFStringTypeID, size, NULL);
1246     if (str) {
1247         if (__CFOASafe) __CFSetLastAllocationEventName(str, "CFString (immutable)");
1248
1249         __CFStrSetInfoBits(str,
1250                             (useInlineData ? __kCFHasInlineContents : (contentsDeallocator == alloc ? __kCFNotInlineContentsDefaultFree : (contentsDeallocator == kCFAllocatorNull ? __kCFNotInlineContentsNoFree : __kCFNotInlineContentsCustomFree))) |
1251                             ((encoding == kCFStringEncodingUnicode) ? __kCFIsUnicode : 0) |
1252                             (useNullByte ? __kCFHasNullByte : 0) |
1253                             (useLengthByte ? __kCFHasLengthByte : 0));
1254
1255         if (!useLengthByte) {
1256             CFIndex length = numBytes - (hasLengthByte ? 1 : 0);
1257             if (encoding == kCFStringEncodingUnicode) length /= sizeof(UniChar);
1258             __CFStrSetExplicitLength(str, length);
1259         }
1260
1261         if (useInlineData) {
1262             uint8_t *contents = (uint8_t *)__CFStrContents(str);
1263             if (useLengthByte && !hasLengthByte) *contents++ = numBytes;
1264             memmove(contents, bytes, numBytes);
1265             if (useNullByte) contents[numBytes] = 0;
1266         } else {
1267             __CFStrSetContentPtr(str, bytes);
1268             if (contentsDeallocator != alloc && contentsDeallocator != kCFAllocatorNull) __CFStrSetContentsDeallocator(str, CFRetain(contentsDeallocator));
1269         }
1270     } else {
1271         if (contentsDeallocator != kCFAllocatorNull) CFAllocatorDeallocate(contentsDeallocator, (void *)bytes);
1272     }
1273     if (vBuf.shouldFreeChars) CFAllocatorDeallocate(vBuf.allocator, (void *)bytes);
1274
1275     return str;
1276 }
1277
1278 /* !!! __CFStringCreateImmutableFunnel2() is kept around for compatibility; it should be deprecated
1279 */
1280 CFStringRef __CFStringCreateImmutableFunnel2(
1281                                              CFAllocatorRef alloc, const void *bytes, CFIndex numBytes, CFStringEncoding encoding,
1282                                              Boolean possiblyExternalFormat, Boolean tryToReduceUnicode, Boolean hasLengthByte, Boolean hasNullByte, Boolean noCopy,
1283                                              CFAllocatorRef contentsDeallocator) {
1284     return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, possiblyExternalFormat, tryToReduceUnicode, hasLengthByte, hasNullByte, noCopy, contentsDeallocator, 0);
1285 }
1286
1287
1288
1289 CFStringRef  CFStringCreateWithPascalString(CFAllocatorRef alloc, ConstStringPtr pStr, CFStringEncoding encoding) {
1290     CFIndex len = (CFIndex)(*(uint8_t *)pStr);
1291     return __CFStringCreateImmutableFunnel3(alloc, pStr, len+1, encoding, false, false, true, false, false, ALLOCATORSFREEFUNC, 0);
1292 }
1293
1294
1295 CFStringRef  CFStringCreateWithCString(CFAllocatorRef alloc, const char *cStr, CFStringEncoding encoding) {
1296     CFIndex len = strlen(cStr);
1297     return __CFStringCreateImmutableFunnel3(alloc, cStr, len, encoding, false, false, false, true, false, ALLOCATORSFREEFUNC, 0);
1298 }
1299
1300 CFStringRef  CFStringCreateWithPascalStringNoCopy(CFAllocatorRef alloc, ConstStringPtr pStr, CFStringEncoding encoding, CFAllocatorRef contentsDeallocator) {
1301     CFIndex len = (CFIndex)(*(uint8_t *)pStr);
1302     return __CFStringCreateImmutableFunnel3(alloc, pStr, len+1, encoding, false, false, true, false, true, contentsDeallocator, 0);
1303 }
1304
1305
1306 CFStringRef  CFStringCreateWithCStringNoCopy(CFAllocatorRef alloc, const char *cStr, CFStringEncoding encoding, CFAllocatorRef contentsDeallocator) {
1307     CFIndex len = strlen(cStr);
1308     return __CFStringCreateImmutableFunnel3(alloc, cStr, len, encoding, false, false, false, true, true, contentsDeallocator, 0);
1309 }
1310
1311
1312 CFStringRef  CFStringCreateWithCharacters(CFAllocatorRef alloc, const UniChar *chars, CFIndex numChars) {
1313     return __CFStringCreateImmutableFunnel3(alloc, chars, numChars * sizeof(UniChar), kCFStringEncodingUnicode, false, true, false, false, false, ALLOCATORSFREEFUNC, 0);
1314 }
1315
1316
1317 CFStringRef  CFStringCreateWithCharactersNoCopy(CFAllocatorRef alloc, const UniChar *chars, CFIndex numChars, CFAllocatorRef contentsDeallocator) {
1318     return __CFStringCreateImmutableFunnel3(alloc, chars, numChars * sizeof(UniChar), kCFStringEncodingUnicode, false, false, false, false, true, contentsDeallocator, 0);
1319 }
1320
1321
1322 CFStringRef  CFStringCreateWithBytes(CFAllocatorRef alloc, const uint8_t *bytes, CFIndex numBytes, CFStringEncoding encoding, Boolean externalFormat) {
1323     return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, externalFormat, true, false, false, false, ALLOCATORSFREEFUNC, 0);
1324 }
1325
1326 CFStringRef  _CFStringCreateWithBytesNoCopy(CFAllocatorRef alloc, const uint8_t *bytes, CFIndex numBytes, CFStringEncoding encoding, Boolean externalFormat, CFAllocatorRef contentsDeallocator) {
1327     return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, externalFormat, true, false, false, true, contentsDeallocator, 0);
1328 }
1329
1330 CFStringRef  CFStringCreateWithBytesNoCopy(CFAllocatorRef alloc, const uint8_t *bytes, CFIndex numBytes, CFStringEncoding encoding, Boolean externalFormat, CFAllocatorRef contentsDeallocator) {
1331     return _CFStringCreateWithBytesNoCopy(alloc, bytes, numBytes, encoding, externalFormat, contentsDeallocator);
1332 }
1333
1334 CFStringRef  CFStringCreateWithFormatAndArguments(CFAllocatorRef alloc, CFDictionaryRef formatOptions, CFStringRef format, va_list arguments) {
1335     return _CFStringCreateWithFormatAndArgumentsAux(alloc, NULL, formatOptions, format, arguments);
1336 }
1337
1338 CFStringRef  _CFStringCreateWithFormatAndArgumentsAux(CFAllocatorRef alloc, CFStringRef (*copyDescFunc)(void *, CFDictionaryRef), CFDictionaryRef formatOptions, CFStringRef format, va_list arguments) {
1339     CFStringRef str;
1340     CFMutableStringRef outputString = CFStringCreateMutable(__CFGetDefaultAllocator(), 0); //should use alloc if no copy/release
1341     __CFStrSetDesiredCapacity(outputString, 120);       // Given this will be tightened later, choosing a larger working string is fine
1342     _CFStringAppendFormatAndArgumentsAux(outputString, copyDescFunc, formatOptions, format, arguments);
1343     // ??? copy/release should not be necessary here -- just make immutable, compress if possible
1344     // (However, this does make the string inline, and cause the supplied allocator to be used...)
1345     str = CFStringCreateCopy(alloc, outputString);
1346     CFRelease(outputString);
1347     return str;
1348 }
1349
1350 CFStringRef  CFStringCreateWithFormat(CFAllocatorRef alloc, CFDictionaryRef formatOptions, CFStringRef format, ...) {
1351     CFStringRef result;
1352     va_list argList;
1353
1354     va_start(argList, format);
1355     result = CFStringCreateWithFormatAndArguments(alloc, formatOptions, format, argList);
1356     va_end(argList);
1357
1358     return result;
1359 }
1360
1361 CFStringRef CFStringCreateWithSubstring(CFAllocatorRef alloc, CFStringRef str, CFRange range) {
1362     if (CF_IS_OBJC(__kCFStringTypeID, str)) {
1363         static SEL s = NULL;
1364         CFStringRef (*func)(void *, SEL, ...) = (void *)__CFSendObjCMsg;
1365         if (!s) s = sel_registerName("_createSubstringWithRange:");
1366         CFStringRef result = func((void *)str, s, CFRangeMake(range.location, range.length));
1367         if (result && CF_USING_COLLECTABLE_MEMORY) CFRetain(result);                                 // needs hard retain.
1368         return result;
1369     }
1370 //      CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, CFStringRef , str, "_createSubstringWithRange:", CFRangeMake(range.location, range.length));
1371
1372     __CFAssertIsString(str);
1373     __CFAssertRangeIsInStringBounds(str, range.location, range.length);
1374
1375     if ((range.location == 0) && (range.length == __CFStrLength(str))) {        /* The substring is the whole string... */
1376         return CFStringCreateCopy(alloc, str);
1377     } else if (__CFStrIsEightBit(str)) {
1378         const uint8_t *contents = __CFStrContents(str);
1379         return __CFStringCreateImmutableFunnel3(alloc, contents + range.location + __CFStrSkipAnyLengthByte(str), range.length, __CFStringGetEightBitStringEncoding(), false, false, false, false, false, ALLOCATORSFREEFUNC, 0);
1380     } else {
1381         const UniChar *contents = __CFStrContents(str);
1382         return __CFStringCreateImmutableFunnel3(alloc, contents + range.location, range.length * sizeof(UniChar), kCFStringEncodingUnicode, false, true, false, false, false, ALLOCATORSFREEFUNC, 0);
1383     }
1384 }
1385
1386 CFStringRef  CFStringCreateCopy(CFAllocatorRef alloc, CFStringRef str) {
1387     if (CF_IS_OBJC(__kCFStringTypeID, str)) {
1388         static SEL s = NULL;
1389         CFStringRef (*func)(void *, SEL, ...) = (void *)__CFSendObjCMsg;
1390         if (!s) s = sel_registerName("copy");
1391         CFStringRef result = func((void *)str, s);
1392         if (result && CF_USING_COLLECTABLE_MEMORY) CFRetain(result);                                                 // needs hard retain.
1393         return result;
1394     }
1395 //  CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, CFStringRef, str, "copy");
1396
1397     __CFAssertIsString(str);
1398     if (!__CFStrIsMutable(str) &&                                                               // If the string is not mutable
1399         ((alloc ? alloc : __CFGetDefaultAllocator()) == __CFGetAllocator(str)) &&               //  and it has the same allocator as the one we're using
1400         (__CFStrIsInline(str) || __CFStrFreeContentsWhenDone(str) || __CFStrIsConstant(str))) { //  and the characters are inline, or are owned by the string, or the string is constant
1401         CFRetain(str);                                                                          // Then just retain instead of making a true copy
1402         return str;
1403     }
1404     if (__CFStrIsEightBit(str)) {
1405         const uint8_t *contents = __CFStrContents(str);
1406         return __CFStringCreateImmutableFunnel3(alloc, contents + __CFStrSkipAnyLengthByte(str), __CFStrLength2(str, contents), __CFStringGetEightBitStringEncoding(), false, false, false, false, false, ALLOCATORSFREEFUNC, 0);
1407     } else {
1408         const UniChar *contents = __CFStrContents(str);
1409         return __CFStringCreateImmutableFunnel3(alloc, contents, __CFStrLength2(str, contents) * sizeof(UniChar), kCFStringEncodingUnicode, false, true, false, false, false, ALLOCATORSFREEFUNC, 0);
1410     }
1411 }
1412
1413
1414
1415 /*** Constant string stuff... ***/
1416
1417 static CFMutableDictionaryRef constantStringTable = NULL;
1418
1419 /* For now we call a function to create a constant string and keep previously created constant strings in a dictionary. The keys are the 8-bit constant C-strings from the compiler; the values are the CFStrings created for them.
1420 */
1421
1422 static CFStringRef __cStrCopyDescription(const void *ptr) {
1423     return CFStringCreateWithCStringNoCopy(NULL, (const char *)ptr, __CFStringGetEightBitStringEncoding(), kCFAllocatorNull);
1424 }
1425
1426 static Boolean __cStrEqual(const void *ptr1, const void *ptr2) {
1427     return (strcmp((const char *)ptr1, (const char *)ptr2) == 0);
1428 }
1429
1430 static CFHashCode __cStrHash(const void *ptr) {
1431     // It doesn't quite matter if we convert to Unicode correctly, as long as we do it consistently
1432     const unsigned char *cStr = (const unsigned char *)ptr;
1433     CFIndex len = strlen(cStr);
1434     CFHashCode result = 0;
1435     if (len <= 4) {     // All chars
1436         unsigned cnt = len;
1437         while (cnt--) result += (result << 8) + *cStr++;
1438     } else {            // First and last 2 chars
1439         result += (result << 8) + cStr[0];
1440         result += (result << 8) + cStr[1];
1441         result += (result << 8) + cStr[len-2];
1442         result += (result << 8) + cStr[len-1];
1443     }
1444     result += (result << (len & 31));
1445     return result;
1446 }
1447
1448 #if defined(DEBUG)
1449 /* We use a special allocator (which simply calls through to the default) for constant strings so that we can catch them being freed...
1450 */
1451 static void *csRealloc(void *oPtr, CFIndex size, CFOptionFlags hint, void *info) {
1452     return CFAllocatorReallocate(NULL, oPtr, size, hint);
1453 }
1454
1455 static void *csAlloc(CFIndex size, CFOptionFlags hint, void *info) {
1456     return CFAllocatorAllocate(NULL, size, hint);
1457 }
1458
1459 static void csDealloc(void *ptr, void *info) {
1460      CFAllocatorDeallocate(NULL, ptr);
1461 }
1462
1463 static CFStringRef csCopyDescription(const void *info) {
1464     return CFRetain(CFSTR("Debug allocator for CFSTRs"));
1465 }
1466 #endif
1467
1468 static CFSpinLock_t _CFSTRLock = 0;
1469
1470 CFStringRef __CFStringMakeConstantString(const char *cStr) {
1471     CFStringRef result;
1472 #if defined(DEBUG)
1473     //StringTest checks that we share kCFEmptyString, which is defeated by constantStringAllocatorForDebugging
1474     if ('\0' == *cStr) return kCFEmptyString;
1475 #endif
1476     if (constantStringTable == NULL) {
1477         CFDictionaryKeyCallBacks constantStringCallBacks = {0, NULL, NULL, __cStrCopyDescription, __cStrEqual, __cStrHash};
1478         CFMutableDictionaryRef table = CFDictionaryCreateMutable(NULL, 0, &constantStringCallBacks, &kCFTypeDictionaryValueCallBacks);
1479         _CFDictionarySetCapacity(table, 2500);  // avoid lots of rehashing
1480         __CFSpinLock(&_CFSTRLock);
1481         if (constantStringTable == NULL) constantStringTable = table;
1482         __CFSpinUnlock(&_CFSTRLock);
1483         if (constantStringTable != table) CFRelease(table);
1484 #if defined(DEBUG)
1485         {
1486             CFAllocatorContext context = {0, NULL, NULL, NULL, csCopyDescription, csAlloc, csRealloc, csDealloc, NULL};
1487             constantStringAllocatorForDebugging = _CFAllocatorCreateGC(NULL, &context);
1488         }
1489 #else
1490 #define constantStringAllocatorForDebugging NULL
1491 #endif
1492     }
1493
1494     __CFSpinLock(&_CFSTRLock);
1495     if ((result = (CFStringRef)CFDictionaryGetValue(constantStringTable, cStr))) {
1496         __CFSpinUnlock(&_CFSTRLock);
1497     } else {
1498         __CFSpinUnlock(&_CFSTRLock);
1499
1500         {
1501         char *key;
1502         Boolean isASCII = true;
1503         // Given this code path is rarer these days, OK to do this extra work to verify the strings
1504         const unsigned char *tmp = cStr;
1505         while (*tmp) {
1506             if (*tmp++ > 127) {
1507                 isASCII = false;
1508                 break;
1509             }
1510         }
1511         if (!isASCII) {
1512             CFMutableStringRef ms = CFStringCreateMutable(NULL, 0);
1513             tmp = cStr;
1514             while (*tmp) {
1515                 CFStringAppendFormat(ms, NULL, (*tmp > 127) ? CFSTR("\\%3o") : CFSTR("%1c"), *tmp);
1516                 tmp++;
1517             }
1518             CFLog(0, CFSTR("WARNING: CFSTR(\"%@\") has non-7 bit chars, interpreting using MacOS Roman encoding for now, but this will change. Please eliminate usages of non-7 bit chars (including escaped characters above \\177 octal) in CFSTR()."), ms);
1519             CFRelease(ms);
1520         }
1521         // Treat non-7 bit chars in CFSTR() as MacOSRoman, for compatibility
1522         result = CFStringCreateWithCString(constantStringAllocatorForDebugging, cStr, kCFStringEncodingMacRoman);
1523         if (result == NULL) {
1524             CFLog(__kCFLogAssertion, CFSTR("Can't interpret CFSTR() as MacOS Roman, crashing"));
1525             HALT;
1526         }
1527         if (__CFOASafe) __CFSetLastAllocationEventName((void *)result, "CFString (CFSTR)");
1528         if (__CFStrIsEightBit(result)) {
1529             key = (char *)__CFStrContents(result) + __CFStrSkipAnyLengthByte(result);
1530         } else {        // For some reason the string is not 8-bit!
1531             key = CFAllocatorAllocate(NULL, strlen(cStr) + 1, 0);
1532             if (__CFOASafe) __CFSetLastAllocationEventName((void *)key, "CFString (CFSTR key)");
1533             strcpy(key, cStr);  // !!! We will leak this, if the string is removed from the table (or table is freed)
1534         }
1535
1536         {
1537 #if !defined(DEBUG)
1538             CFStringRef resultToBeReleased = result;
1539 #endif
1540             CFIndex count;
1541             __CFSpinLock(&_CFSTRLock);
1542             count = CFDictionaryGetCount(constantStringTable);
1543             CFDictionaryAddValue(constantStringTable, key, result);
1544             if (CFDictionaryGetCount(constantStringTable) == count) { // add did nothing, someone already put it there
1545                 result = (CFStringRef)CFDictionaryGetValue(constantStringTable, key);
1546             }
1547             __CFSpinUnlock(&_CFSTRLock);
1548 #if !defined(DEBUG)
1549             // Can't release this in the DEBUG case; will get assertion failure
1550             CFRelease(resultToBeReleased);
1551 #endif
1552         }
1553         }
1554     }
1555     return result;
1556 }
1557
1558 #if defined(__MACOS8__) || defined(__WIN32__)
1559
1560 void __CFStringCleanup (void) {
1561     /* in case library is unloaded, release store for the constant string table */
1562     if (constantStringTable != NULL) {
1563 #if defined(DEBUG)
1564         __CFConstantStringTableBeingFreed = true;
1565         CFRelease(constantStringTable);
1566         __CFConstantStringTableBeingFreed = false;
1567 #else
1568         CFRelease(constantStringTable);
1569 #endif
1570     }
1571 #if defined(DEBUG)
1572     CFAllocatorDeallocate( constantStringAllocatorForDebugging, (void*) constantStringAllocatorForDebugging );
1573 #endif
1574 }
1575
1576 #endif
1577
1578
1579 // Can pass in NSString as replacement string
1580 // Call with numRanges > 0, and incrementing ranges
1581
1582 static void __CFStringReplaceMultiple(CFMutableStringRef str, CFRange *ranges, CFIndex numRanges, CFStringRef replacement) {
1583     int cnt;
1584     CFStringRef copy = NULL;
1585     if (replacement == str) copy = replacement = CFStringCreateCopy(NULL, replacement);   // Very special and hopefully rare case
1586     CFIndex replacementLength = CFStringGetLength(replacement);
1587
1588     __CFStringChangeSizeMultiple(str, ranges, numRanges, replacementLength, (replacementLength > 0) && CFStrIsUnicode(replacement));
1589
1590     if (__CFStrIsUnicode(str)) {
1591         UniChar *contents = (UniChar *)__CFStrContents(str);
1592         UniChar *firstReplacement = contents + ranges[0].location;
1593         // Extract the replacementString into the first location, then copy from there
1594         CFStringGetCharacters(replacement, CFRangeMake(0, replacementLength), firstReplacement);
1595         for (cnt = 1; cnt < numRanges; cnt++) {
1596             // The ranges are in terms of the original string; so offset by the change in length due to insertion
1597             contents += replacementLength - ranges[cnt - 1].length;
1598             memmove(contents + ranges[cnt].location, firstReplacement, replacementLength * sizeof(UniChar));
1599         }
1600     } else {
1601         uint8_t *contents = (uint8_t *)__CFStrContents(str);
1602         uint8_t *firstReplacement = contents + ranges[0].location + __CFStrSkipAnyLengthByte(str);
1603         // Extract the replacementString into the first location, then copy from there
1604         CFStringGetBytes(replacement, CFRangeMake(0, replacementLength), __CFStringGetEightBitStringEncoding(), 0, false, firstReplacement, replacementLength, NULL);
1605         contents += __CFStrSkipAnyLengthByte(str);      // Now contents will simply track the location to insert next string into
1606         for (cnt = 1; cnt < numRanges; cnt++) {
1607             // The ranges are in terms of the original string; so offset by the change in length due to insertion
1608             contents += replacementLength - ranges[cnt - 1].length;
1609             memmove(contents + ranges[cnt].location, firstReplacement, replacementLength);
1610         }
1611     }
1612     if (copy) CFRelease(copy);
1613 }
1614
1615 // Can pass in NSString as replacement string
1616
1617 CF_INLINE void __CFStringReplace(CFMutableStringRef str, CFRange range, CFStringRef replacement) {
1618     CFStringRef copy = NULL;
1619     if (replacement == str) copy = replacement = CFStringCreateCopy(NULL, replacement);   // Very special and hopefully rare case
1620     CFIndex replacementLength = CFStringGetLength(replacement);
1621
1622     __CFStringChangeSize(str, range, replacementLength, (replacementLength > 0) && CFStrIsUnicode(replacement));
1623
1624     if (__CFStrIsUnicode(str)) {
1625         UniChar *contents = (UniChar *)__CFStrContents(str);
1626         CFStringGetCharacters(replacement, CFRangeMake(0, replacementLength), contents + range.location);
1627     } else {
1628         uint8_t *contents = (uint8_t *)__CFStrContents(str);
1629         CFStringGetBytes(replacement, CFRangeMake(0, replacementLength), __CFStringGetEightBitStringEncoding(), 0, false, contents + range.location + __CFStrSkipAnyLengthByte(str), replacementLength, NULL);
1630     }
1631
1632     if (copy) CFRelease(copy);
1633 }
1634
1635 /* If client does not provide a minimum capacity
1636 */
1637 #define DEFAULTMINCAPACITY 32
1638
1639 CF_INLINE CFMutableStringRef __CFStringCreateMutableFunnel(CFAllocatorRef alloc, CFIndex maxLength, UInt32 additionalInfoBits) {
1640     CFMutableStringRef str;
1641     Boolean hasExternalContentsAllocator = (additionalInfoBits & __kCFHasContentsAllocator) ? true : false;
1642
1643     if (alloc == NULL) alloc = __CFGetDefaultAllocator();
1644
1645     // Note that if there is an externalContentsAllocator, then we also have the storage for the string allocator...
1646     str = (CFMutableStringRef)_CFRuntimeCreateInstance(alloc, __kCFStringTypeID, sizeof(void *) + sizeof(UInt32) * 3 + (hasExternalContentsAllocator ? sizeof(CFAllocatorRef) : 0), NULL);
1647     if (str) {
1648         if (__CFOASafe) __CFSetLastAllocationEventName(str, "CFString (mutable)");
1649
1650         __CFStrSetInfoBits(str, __kCFIsMutable | additionalInfoBits);
1651         str->variants.notInlineMutable.buffer = NULL;
1652         __CFStrSetExplicitLength(str, 0);
1653         str->variants.notInlineMutable.gapEtc = 0;
1654         if (maxLength != 0) __CFStrSetIsFixed(str);
1655         __CFStrSetDesiredCapacity(str, (maxLength == 0) ? DEFAULTMINCAPACITY : maxLength);
1656         __CFStrSetCapacity(str, 0);
1657     }
1658     return str;
1659 }
1660
1661 CFMutableStringRef CFStringCreateMutableWithExternalCharactersNoCopy(CFAllocatorRef alloc, UniChar *chars, CFIndex numChars, CFIndex capacity, CFAllocatorRef externalCharactersAllocator) {
1662     CFOptionFlags contentsAllocationBits = externalCharactersAllocator ? ((externalCharactersAllocator == kCFAllocatorNull) ? __kCFNotInlineContentsNoFree : __kCFHasContentsAllocator) : __kCFNotInlineContentsDefaultFree;
1663     CFMutableStringRef string = __CFStringCreateMutableFunnel(alloc, 0, contentsAllocationBits | __kCFIsUnicode);
1664     if (string) {
1665         __CFStrSetIsExternalMutable(string);
1666         if (contentsAllocationBits == __kCFHasContentsAllocator) __CFStrSetContentsAllocator(string, CFRetain(externalCharactersAllocator));
1667         CFStringSetExternalCharactersNoCopy(string, chars, numChars, capacity);
1668     }
1669     return string;
1670 }
1671
1672 CFMutableStringRef CFStringCreateMutable(CFAllocatorRef alloc, CFIndex maxLength) {
1673     return __CFStringCreateMutableFunnel(alloc, maxLength, __kCFNotInlineContentsDefaultFree);
1674 }
1675
1676 CFMutableStringRef  CFStringCreateMutableCopy(CFAllocatorRef alloc, CFIndex maxLength, CFStringRef string) {
1677     CFMutableStringRef newString;
1678
1679     if (CF_IS_OBJC(__kCFStringTypeID, string)) {
1680          static SEL s = NULL;
1681          CFMutableStringRef (*func)(void *, SEL, ...) = (void *)__CFSendObjCMsg;
1682          if (!s) s = sel_registerName("mutableCopy");
1683          newString = func((void *)string, s);
1684          if (CF_USING_COLLECTABLE_MEMORY) auto_zone_retain(__CFCollectableZone, newString); // needs hard retain IF using GC
1685          return newString;
1686     }
1687     //  CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, CFMutableStringRef, string, "mutableCopy");
1688
1689     __CFAssertIsString(string);
1690
1691     newString = CFStringCreateMutable(alloc, maxLength);
1692     __CFStringReplace(newString, CFRangeMake(0, 0), string);
1693
1694     return newString;
1695 }
1696
1697
1698 __private_extern__ void _CFStrSetDesiredCapacity(CFMutableStringRef str, CFIndex len) {
1699     __CFAssertIsStringAndMutable(str);
1700     __CFStrSetDesiredCapacity(str, len);
1701 }
1702
1703
1704 /* This one is for CF
1705 */
1706 CFIndex CFStringGetLength(CFStringRef str) {
1707     CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, CFIndex, str, "length");
1708
1709     __CFAssertIsString(str);
1710     return __CFStrLength(str);
1711 }
1712
1713 /* This one is for NSCFString; it does not ObjC dispatch or assertion check
1714 */
1715 CFIndex _CFStringGetLength2(CFStringRef str) {
1716     return __CFStrLength(str);
1717 }
1718
1719
1720 /* Guts of CFStringGetCharacterAtIndex(); called from the two functions below. Don't call it from elsewhere.
1721 */
1722 CF_INLINE UniChar __CFStringGetCharacterAtIndexGuts(CFStringRef str, CFIndex idx, const uint8_t *contents) {
1723     if (__CFStrIsEightBit(str)) {
1724         contents += __CFStrSkipAnyLengthByte(str);
1725 #if defined(DEBUG)
1726         if (!__CFCharToUniCharFunc && (contents[idx] >= 128)) {
1727             // Can't do log here, as it might be too early
1728             fprintf(stderr, "Warning: CFStringGetCharacterAtIndex() attempted on CFString containing high bytes before properly initialized to do so\n");
1729         }
1730 #endif
1731         return __CFCharToUniCharTable[contents[idx]];
1732     }
1733
1734     return ((UniChar *)contents)[idx];
1735 }
1736
1737 /* This one is for the CF API
1738 */
1739 UniChar CFStringGetCharacterAtIndex(CFStringRef str, CFIndex idx) {
1740     CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, UniChar, str, "characterAtIndex:", idx);
1741
1742     __CFAssertIsString(str);
1743     __CFAssertIndexIsInStringBounds(str, idx);
1744     return __CFStringGetCharacterAtIndexGuts(str, idx, __CFStrContents(str));
1745 }
1746
1747 /* This one is for NSCFString usage; it doesn't do ObjC dispatch; but it does do range check
1748 */
1749 int _CFStringCheckAndGetCharacterAtIndex(CFStringRef str, CFIndex idx, UniChar *ch) {
1750     const uint8_t *contents = __CFStrContents(str);
1751     if (idx >= __CFStrLength2(str, contents) && __CFStringNoteErrors()) return _CFStringErrBounds;
1752     *ch = __CFStringGetCharacterAtIndexGuts(str, idx, contents);
1753     return _CFStringErrNone;
1754 }
1755
1756
1757 /* Guts of CFStringGetCharacters(); called from the two functions below. Don't call it from elsewhere.
1758 */
1759 CF_INLINE void __CFStringGetCharactersGuts(CFStringRef str, CFRange range, UniChar *buffer, const uint8_t *contents) {
1760     if (__CFStrIsEightBit(str)) {
1761         __CFStrConvertBytesToUnicode(((uint8_t *)contents) + (range.location + __CFStrSkipAnyLengthByte(str)), buffer, range.length);
1762     } else {
1763         const UniChar *uContents = ((UniChar *)contents) + range.location;
1764         memmove(buffer, uContents, range.length * sizeof(UniChar));
1765     }
1766 }
1767
1768 /* This one is for the CF API
1769 */
1770 void CFStringGetCharacters(CFStringRef str, CFRange range, UniChar *buffer) {
1771     CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID, void, str, "getCharacters:range:", buffer, CFRangeMake(range.location, range.length));
1772
1773     __CFAssertIsString(str);
1774     __CFAssertRangeIsInStringBounds(str, range.location, range.length);
1775     __CFStringGetCharactersGuts(str, range, buffer, __CFStrContents(str));
1776 }
1777
1778 /* This one is for NSCFString usage; it doesn't do ObjC dispatch; but it does do range check
1779 */
1780 int _CFStringCheckAndGetCharacters(CFStringRef str, CFRange range, UniChar *buffer) {
1781      const uint8_t *contents = __CFStrContents(str);
1782      if (range.location + range.length > __CFStrLength2(str, contents) && __CFStringNoteErrors()) return _CFStringErrBounds;
1783      __CFStringGetCharactersGuts(str, range, buffer, contents);
1784      return _CFStringErrNone;
1785 }
1786
1787
1788 CFIndex CFStringGetBytes(CFStringRef str, CFRange range, CFStringEncoding encoding, uint8_t lossByte, Boolean isExternalRepresentation, uint8_t *buffer, CFIndex maxBufLen, CFIndex *usedBufLen) {
1789
1790     /* No objc dispatch needed here since __CFStringEncodeByteStream works with both CFString and NSString */
1791     __CFAssertIsNotNegative(maxBufLen);
1792
1793     if (!CF_IS_OBJC(__kCFStringTypeID, str)) {  // If we can grope the ivars, let's do it...
1794         __CFAssertIsString(str);
1795         __CFAssertRangeIsInStringBounds(str, range.location, range.length);
1796
1797         if (__CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) {    // Requested encoding is equal to the encoding in string
1798             const unsigned char *contents = __CFStrContents(str);
1799             CFIndex cLength = range.length;
1800
1801             if (buffer) {
1802                 if (cLength > maxBufLen) cLength = maxBufLen;
1803                 memmove(buffer, contents + __CFStrSkipAnyLengthByte(str) + range.location, cLength);
1804             }
1805             if (usedBufLen) *usedBufLen = cLength;
1806
1807             return cLength;
1808         }
1809     }
1810
1811     return __CFStringEncodeByteStream(str, range.location, range.length, isExternalRepresentation, encoding, lossByte, buffer, maxBufLen, usedBufLen);
1812 }
1813
1814
1815 ConstStringPtr CFStringGetPascalStringPtr (CFStringRef str, CFStringEncoding encoding) {
1816
1817     if (!CF_IS_OBJC(__kCFStringTypeID, str)) {  /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
1818         __CFAssertIsString(str);
1819         if (__CFStrHasLengthByte(str) && __CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) {       // Requested encoding is equal to the encoding in string || the contents is in ASCII
1820             const uint8_t *contents = __CFStrContents(str);
1821             if (__CFStrHasExplicitLength(str) && (__CFStrLength2(str, contents) != (SInt32)(*contents))) return NULL;   // Invalid length byte
1822             return (ConstStringPtr)contents;
1823         }
1824         // ??? Also check for encoding = SystemEncoding and perhaps bytes are all ASCII?
1825     }
1826     return NULL;
1827 }
1828
1829
1830 const char * CFStringGetCStringPtr(CFStringRef str, CFStringEncoding encoding) {
1831
1832     if (encoding != __CFStringGetEightBitStringEncoding() && (kCFStringEncodingASCII != __CFStringGetEightBitStringEncoding() || !__CFStringEncodingIsSupersetOfASCII(encoding))) return NULL;
1833     // ??? Also check for encoding = SystemEncoding and perhaps bytes are all ASCII?
1834
1835     CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, const char *, str, "_fastCStringContents:", true);
1836
1837     __CFAssertIsString(str);
1838
1839     if (__CFStrHasNullByte(str)) {
1840         return (const char *)__CFStrContents(str) + __CFStrSkipAnyLengthByte(str);
1841     } else {
1842         return NULL;
1843     }
1844 }
1845
1846
1847 const UniChar *CFStringGetCharactersPtr(CFStringRef str) {
1848
1849     CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, const UniChar *, str, "_fastCharacterContents");
1850
1851     __CFAssertIsString(str);
1852     if (__CFStrIsUnicode(str)) return (const UniChar *)__CFStrContents(str);
1853     return NULL;
1854 }
1855
1856
1857 Boolean CFStringGetPascalString(CFStringRef str, Str255 buffer, CFIndex bufferSize, CFStringEncoding encoding) {
1858     CFIndex length;
1859     CFIndex usedLen;
1860
1861     __CFAssertIsNotNegative(bufferSize);
1862     if (bufferSize < 1) return false;
1863
1864     if (CF_IS_OBJC(__kCFStringTypeID, str)) {   /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
1865         length = CFStringGetLength(str);
1866         if (!__CFCanUseLengthByte(length)) return false; // Can't fit into pstring
1867     } else {
1868         const uint8_t *contents;
1869
1870         __CFAssertIsString(str);
1871
1872         contents = __CFStrContents(str);
1873         length = __CFStrLength2(str, contents);
1874
1875         if (!__CFCanUseLengthByte(length)) return false; // Can't fit into pstring
1876
1877         if (__CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) {    // Requested encoding is equal to the encoding in string
1878             if (length >= bufferSize) return false;
1879             memmove((void*)(1 + (const char*)buffer), (__CFStrSkipAnyLengthByte(str) + contents), length);
1880             *buffer = length;
1881             return true;
1882         }
1883     }
1884
1885     if (__CFStringEncodeByteStream(str, 0, length, false, encoding, false, (void*)(1 + (uint8_t*)buffer), bufferSize - 1, &usedLen) != length) {
1886 #if defined(DEBUG)
1887         if (bufferSize > 0) {
1888             strncpy((char *)buffer + 1, CONVERSIONFAILURESTR, bufferSize - 1);
1889             buffer[0] = (CFIndex)sizeof(CONVERSIONFAILURESTR) < (bufferSize - 1) ? (CFIndex)sizeof(CONVERSIONFAILURESTR) : (bufferSize - 1);
1890         }
1891 #else
1892         if (bufferSize > 0) buffer[0] = 0;
1893 #endif
1894         return false;
1895     }
1896     *buffer = usedLen;
1897     return true;
1898 }
1899
1900 Boolean CFStringGetCString(CFStringRef str, char *buffer, CFIndex bufferSize, CFStringEncoding encoding) {
1901     const uint8_t *contents;
1902     CFIndex len;
1903
1904     __CFAssertIsNotNegative(bufferSize);
1905     if (bufferSize < 1) return false;
1906
1907     CF_OBJC_FUNCDISPATCH3(__kCFStringTypeID, Boolean, str, "_getCString:maxLength:encoding:", buffer, bufferSize - 1, encoding);
1908
1909     __CFAssertIsString(str);
1910
1911     contents = __CFStrContents(str);
1912     len = __CFStrLength2(str, contents);
1913
1914     if (__CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) {        // Requested encoding is equal to the encoding in string
1915         if (len >= bufferSize) return false;
1916         memmove(buffer, contents + __CFStrSkipAnyLengthByte(str), len);
1917         buffer[len] = 0;
1918         return true;
1919     } else {
1920         CFIndex usedLen;
1921
1922         if (__CFStringEncodeByteStream(str, 0, len, false, encoding, false, (unsigned char*) buffer, bufferSize - 1, &usedLen) == len) {
1923             buffer[usedLen] = '\0';
1924             return true;
1925         } else {
1926 #if defined(DEBUG)
1927             strncpy(buffer, CONVERSIONFAILURESTR, bufferSize);
1928 #else
1929             if (bufferSize > 0) buffer[0] = 0;
1930 #endif
1931             return false;
1932         }
1933     }
1934 }
1935
1936
1937 CF_INLINE bool _CFCanUseLocale(CFLocaleRef locale) {
1938     return false;
1939 }
1940
1941 static const char *_CFStrGetLanguageIdentifierForLocale(CFLocaleRef locale) {
1942     return NULL;
1943 }
1944
1945 #define MAX_CASE_MAPPING_BUF (8)
1946 #define ZERO_WIDTH_JOINER (0x200D)
1947 #define COMBINING_GRAPHEME_JOINER (0x034F)
1948 // Hangul ranges
1949 #define HANGUL_CHOSEONG_START (0x1100)
1950 #define HANGUL_CHOSEONG_END (0x115F)
1951 #define HANGUL_JUNGSEONG_START (0x1160)
1952 #define HANGUL_JUNGSEONG_END (0x11A2)
1953 #define HANGUL_JONGSEONG_START (0x11A8)
1954 #define HANGUL_JONGSEONG_END (0x11F9)
1955
1956 #define HANGUL_SYLLABLE_START (0xAC00)
1957 #define HANGUL_SYLLABLE_END (0xD7AF)
1958
1959
1960 // Returns the length of characters filled into outCharacters. If no change, returns 0. maxBufLen shoule be at least 8
1961 static inline CFIndex __CFStringFoldCharacterClusterAtIndex(UTF32Char character, CFStringInlineBuffer *buffer, CFIndex index, CFOptionFlags flags, const uint8_t *langCode, UTF32Char *outCharacters, CFIndex maxBufferLength, CFIndex *consumedLength) {
1962     CFIndex filledLength = 0, currentIndex = index;
1963
1964     if (0 != character) {
1965         UTF16Char lowSurrogate;
1966         CFIndex planeNo = (character >> 16);
1967         bool isTurkikCapitalI = false;
1968         static const uint8_t *decompBMP = NULL;
1969         static const uint8_t *nonBaseBMP = NULL;
1970
1971         if (NULL == decompBMP) {
1972             decompBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, 0);
1973             nonBaseBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, 0);
1974         }
1975
1976         ++currentIndex;
1977
1978         if ((character < 0x0080) && ((NULL == langCode) || (character != 'I'))) { // ASCII
1979             if ((flags & kCFCompareCaseInsensitive) && (character >= 'A') && (character <= 'Z')) {
1980                 character += ('a' - 'A');
1981                 *outCharacters = character;
1982                 filledLength = 1;
1983             }
1984         } else {
1985             // do width-insensitive mapping
1986             if ((flags & kCFCompareWidthInsensitive) && (character >= 0xFF00) && (character <= 0xFFEF)) {
1987                 (void)CFUniCharCompatibilityDecompose(&character, 1, 1);
1988                 *outCharacters = character;
1989                 filledLength = 1;
1990             }
1991
1992             // map surrogates
1993             if ((0 == planeNo) && CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((lowSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex)))) {
1994                 character = CFUniCharGetLongCharacterForSurrogatePair(character, lowSurrogate);
1995                 ++currentIndex;
1996                 planeNo = (character >> 16);
1997             }
1998
1999             // decompose
2000             if (flags & (kCFCompareDiacriticsInsensitive|kCFCompareNonliteral)) {
2001                 if (CFUniCharIsMemberOfBitmap(character, ((0 == planeNo) ? decompBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, planeNo)))) {
2002                     filledLength = CFUniCharDecomposeCharacter(character, outCharacters, maxBufferLength);
2003                     character = *outCharacters;
2004                     if ((flags & kCFCompareDiacriticsInsensitive) && (character < 0x0510)) filledLength = 1; // reset if Roman, Greek, Cyrillic
2005                 }
2006             }
2007
2008             // fold case
2009             if (flags & kCFCompareCaseInsensitive) {
2010                 const uint8_t *nonBaseBitmap;
2011                 bool filterNonBase = (((flags & kCFCompareDiacriticsInsensitive) && (character < 0x0510)) ? true : false);
2012                 static const uint8_t *lowerBMP = NULL;
2013                 static const uint8_t *caseFoldBMP = NULL;
2014
2015                 if (NULL == lowerBMP) {
2016                     lowerBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfLowercaseCharacterSet, 0);
2017                     caseFoldBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfCaseFoldingCharacterSet, 0);
2018                 }
2019
2020                 if ((NULL != langCode) && ('I' == character) && ((0 == strcmp(langCode, "tr")) || (0 == strcmp(langCode, "az")))) { // do Turkik special-casing
2021                     if (filledLength > 1) {
2022                         if (0x0307 == outCharacters[1]) {
2023                             memmove(&(outCharacters[index]), &(outCharacters[index + 1]), sizeof(UTF32Char) * (--filledLength));
2024                             character = *outCharacters = 'i';
2025                             isTurkikCapitalI = true;
2026                         }
2027                     } else if (0x0307 == CFStringGetCharacterFromInlineBuffer(buffer, currentIndex)) {
2028                         character = *outCharacters = 'i';
2029                         filledLength = 1;
2030                         ++currentIndex;
2031                         isTurkikCapitalI = true;
2032                     }
2033                 }
2034                 if (!isTurkikCapitalI && (CFUniCharIsMemberOfBitmap(character, ((0 == planeNo) ? lowerBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfLowercaseCharacterSet, planeNo))) || CFUniCharIsMemberOfBitmap(character, ((0 == planeNo) ? caseFoldBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfCaseFoldingCharacterSet, planeNo))))) {
2035                     UTF16Char caseFoldBuffer[MAX_CASE_MAPPING_BUF];
2036                     const UTF16Char *bufferP = caseFoldBuffer, *bufferLimit;
2037                     UTF32Char *outCharactersP = outCharacters;
2038                     uint32_t bufferLength = CFUniCharMapCaseTo(character, caseFoldBuffer, MAX_CASE_MAPPING_BUF, kCFUniCharCaseFold, 0, langCode);
2039
2040                     bufferLimit = bufferP + bufferLength;
2041
2042                     if (filledLength > 0) --filledLength; // decrement filledLength (will add back later)
2043
2044                     // make space for casefold characters
2045                     if ((filledLength > 0) && (bufferLength > 1)) {
2046                         CFIndex totalScalerLength = 0;
2047
2048                         while (bufferP < bufferLimit) {
2049                             if (CFUniCharIsSurrogateHighCharacter(*(bufferP++)) && (bufferP < bufferLimit) && CFUniCharIsSurrogateLowCharacter(*bufferP)) ++bufferP;
2050                             ++totalScalerLength;
2051                         }
2052                         memmove(outCharacters + totalScalerLength, outCharacters + 1, filledLength * sizeof(UTF32Char));
2053                         bufferP = caseFoldBuffer;
2054                     }
2055
2056                     // fill
2057                     while (bufferP < bufferLimit) {
2058                         character = *(bufferP++);
2059                         if (CFUniCharIsSurrogateHighCharacter(character) && (bufferP < bufferLimit) && CFUniCharIsSurrogateLowCharacter(*bufferP)) {
2060                             character = CFUniCharGetLongCharacterForSurrogatePair(character, *(bufferP++));
2061                             nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (character >> 16));
2062                         } else {
2063                             nonBaseBitmap = nonBaseBMP;
2064                         }
2065
2066                         if (!filterNonBase || !CFUniCharIsMemberOfBitmap(character, nonBaseBitmap)) {
2067                             *(outCharactersP++) = character;
2068                             ++filledLength;
2069                         }
2070                     }
2071                 }
2072             }
2073         }
2074
2075         // collect following combining marks
2076         if (flags & (kCFCompareDiacriticsInsensitive|kCFCompareNonliteral)) {
2077             const uint8_t *nonBaseBitmap;
2078             const uint8_t *decompBitmap;
2079             bool doFill = (((flags & kCFCompareDiacriticsInsensitive) && (character < 0x0510)) ? false : true);
2080
2081             if (doFill && (0 == filledLength)) { // check if really needs to fill
2082                 UTF32Char nonBaseCharacter = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex);
2083
2084                 if (CFUniCharIsSurrogateHighCharacter(nonBaseCharacter) && CFUniCharIsSurrogateLowCharacter((lowSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex + 1)))) {
2085                     nonBaseCharacter = CFUniCharGetLongCharacterForSurrogatePair(nonBaseCharacter, lowSurrogate);
2086                     nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (nonBaseCharacter >> 16));
2087                     decompBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (nonBaseCharacter >> 16));
2088                 } else {
2089                     nonBaseBitmap = nonBaseBMP;
2090                     decompBitmap = decompBMP;
2091                 }
2092
2093                 if (CFUniCharIsMemberOfBitmap(nonBaseCharacter, nonBaseBitmap)) {
2094                     outCharacters[filledLength++] = character;
2095
2096                     if ((0 == (flags & kCFCompareDiacriticsInsensitive)) || (nonBaseCharacter > 0x050F)) {
2097                         if (CFUniCharIsMemberOfBitmap(nonBaseCharacter, decompBitmap)) {
2098                             filledLength += CFUniCharDecomposeCharacter(nonBaseCharacter, &(outCharacters[filledLength]), maxBufferLength - filledLength);
2099                         } else {
2100                             outCharacters[filledLength++] = nonBaseCharacter;
2101                         }
2102                     }
2103                     currentIndex += ((nonBaseBitmap == nonBaseBMP) ? 1 : 2);
2104                 } else {
2105                     doFill = false;
2106                 }
2107             }
2108
2109             while (filledLength < maxBufferLength) { // do the rest
2110                 character = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex);
2111
2112                 if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((lowSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex + 1)))) {
2113                     character = CFUniCharGetLongCharacterForSurrogatePair(character, lowSurrogate);
2114                     nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (character >> 16));
2115                     decompBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (character >> 16));
2116                 } else {
2117                     nonBaseBitmap = nonBaseBMP;
2118                     decompBitmap = decompBMP;
2119                 }
2120                 if (isTurkikCapitalI) {
2121                     isTurkikCapitalI = false;
2122                 } else if (CFUniCharIsMemberOfBitmap(character, nonBaseBitmap)) {
2123                     if (doFill && ((0 == (flags & kCFCompareDiacriticsInsensitive)) || (character > 0x050F))) {
2124                         if (CFUniCharIsMemberOfBitmap(character, decompBitmap)) {
2125                             CFIndex currentLength = CFUniCharDecomposeCharacter(character, &(outCharacters[filledLength]), maxBufferLength - filledLength);
2126
2127                             if (0 == currentLength) break; // didn't fit
2128
2129                             filledLength += currentLength;
2130                         } else {
2131                             outCharacters[filledLength++] = character;
2132                         }
2133                     }
2134                     currentIndex += ((nonBaseBitmap == nonBaseBMP) ? 1 : 2);
2135                 } else {
2136                     break;
2137                 }
2138             }
2139
2140             if (filledLength > 1) CFUniCharPrioritySort(outCharacters, filledLength); // priority sort
2141         }
2142     }
2143
2144     if ((filledLength > 0) && (NULL != consumedLength)) *consumedLength = (currentIndex - index);
2145
2146     return filledLength;
2147 }
2148
2149 /* Special casing for Uk sorting */
2150 #define DO_IGNORE_PUNCTUATION 1
2151 #if DO_IGNORE_PUNCTUATION
2152 #define UKRAINIAN_LANG_CODE (45)
2153 static bool __CFLocaleChecked = false;
2154 static const uint8_t *__CFPunctSetBMP = NULL;
2155 #endif /* DO_IGNORE_PUNCTUATION */
2156
2157 /* ??? We need to implement some additional flags here
2158    ??? Also, pay attention to flag 2, which is the NS flag (which CF has as flag 16, w/opposite meaning).
2159 */
2160 CFComparisonResult CFStringCompareWithOptions(CFStringRef string, CFStringRef string2, CFRange rangeToCompare, CFOptionFlags compareOptions) {
2161 /* No objc dispatch needed here since CFStringInlineBuffer works with both CFString and NSString */
2162     CFStringInlineBuffer strBuf1, strBuf2;
2163     UTF32Char ch1, ch2;
2164     const uint8_t *punctBMP = NULL;
2165     Boolean caseInsensitive = (compareOptions & kCFCompareCaseInsensitive ? true : false);
2166     Boolean decompose = (compareOptions & kCFCompareNonliteral ? true : false);
2167     Boolean numerically = (compareOptions & kCFCompareNumerically ? true : false);
2168     Boolean localized = (compareOptions & kCFCompareLocalized ? true : false);
2169
2170 #if DO_IGNORE_PUNCTUATION
2171     if (localized) {
2172         if (!__CFLocaleChecked) {
2173             CFArrayRef locales = _CFBundleCopyUserLanguages(false);
2174
2175             if (locales && (CFArrayGetCount(locales) > 0)) {
2176                 SInt32 langCode;
2177
2178                 if (CFBundleGetLocalizationInfoForLocalization((CFStringRef)CFArrayGetValueAtIndex(locales, 0), &langCode, NULL, NULL, NULL) && (langCode == UKRAINIAN_LANG_CODE)) {
2179                     __CFPunctSetBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet, 0);
2180                 }
2181
2182                 CFRelease(locales);
2183             }
2184             __CFLocaleChecked = true;
2185         }
2186
2187         punctBMP = __CFPunctSetBMP;
2188     }
2189 #endif /* DO_IGNORE_PUNCTUATION */
2190
2191     CFStringInitInlineBuffer(string, &strBuf1, CFRangeMake(rangeToCompare.location, rangeToCompare.length));
2192     CFIndex strBuf1_idx = 0;
2193     CFIndex string2_len = CFStringGetLength(string2);
2194     CFStringInitInlineBuffer(string2, &strBuf2, CFRangeMake(0, string2_len));
2195     CFIndex strBuf2_idx = 0;
2196
2197     while (strBuf1_idx < rangeToCompare.length && strBuf2_idx < string2_len) {
2198         ch1 = CFStringGetCharacterFromInlineBuffer(&strBuf1, strBuf1_idx);
2199         ch2 = CFStringGetCharacterFromInlineBuffer(&strBuf2, strBuf2_idx);
2200
2201         if (numerically && (ch1 <= '9' && ch1 >= '0') && (ch2 <= '9' && ch2 >= '0')) {  // If both are not digits, then don't do numerical comparison
2202             uint64_t n1 = 0;    // !!! Doesn't work if numbers are > max uint64_t
2203             uint64_t n2 = 0;
2204             do {
2205                 n1 = n1 * 10 + (ch1 - '0');
2206                 strBuf1_idx++;
2207                 if (rangeToCompare.length <= strBuf1_idx) break;
2208                 ch1 = CFStringGetCharacterFromInlineBuffer(&strBuf1, strBuf1_idx);
2209             } while (ch1 <= '9' && ch1 >= '0');
2210             do {
2211                 n2 = n2 * 10 + (ch2 - '0');
2212                 strBuf2_idx++;
2213                 if (string2_len <= strBuf2_idx) break;
2214                 ch2 = CFStringGetCharacterFromInlineBuffer(&strBuf2, strBuf2_idx);
2215             } while (ch2 <= '9' && ch2 >= '0');
2216             if (n1 < n2) return kCFCompareLessThan; else if (n1 > n2) return kCFCompareGreaterThan;
2217             continue;   // If numbers were equal, go back to top without incrementing the buffer pointers
2218         }
2219
2220         if (CFUniCharIsSurrogateHighCharacter(ch1)) {
2221             strBuf1_idx++;
2222             if (strBuf1_idx < rangeToCompare.length && CFUniCharIsSurrogateLowCharacter(CFStringGetCharacterFromInlineBuffer(&strBuf1, strBuf1_idx))) {
2223                 ch1 = CFUniCharGetLongCharacterForSurrogatePair(ch1, CFStringGetCharacterFromInlineBuffer(&strBuf1, strBuf1_idx));
2224             } else {
2225                 strBuf1_idx--;
2226             }
2227         }
2228         if (CFUniCharIsSurrogateHighCharacter(ch2)) {
2229             strBuf2_idx++;
2230             if (strBuf2_idx < string2_len && CFUniCharIsSurrogateLowCharacter(CFStringGetCharacterFromInlineBuffer(&strBuf2, strBuf2_idx))) {
2231                 ch2 = CFUniCharGetLongCharacterForSurrogatePair(ch2, CFStringGetCharacterFromInlineBuffer(&strBuf2, strBuf2_idx));
2232             } else {
2233                 strBuf2_idx--;
2234             }
2235         }
2236
2237         if (ch1 != ch2) {
2238 #if DO_IGNORE_PUNCTUATION
2239             if (punctBMP) {
2240                 if (CFUniCharIsMemberOfBitmap(ch1, (ch1 < 0x10000 ? punctBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet, (ch1 >> 16))))) {
2241                     ++strBuf1_idx; continue;
2242                 }
2243                 if (CFUniCharIsMemberOfBitmap(ch2, (ch2 < 0x10000 ? punctBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet, (ch2 >> 16))))) {
2244                     ++strBuf2_idx; continue;
2245                 }
2246             }
2247 #endif  /* DO_IGNORE_PUNCTUATION */
2248             // We standardize to lowercase here since currently, as of Unicode 3.1.1, it's one-to-one mapping.
2249             // Note we map to uppercase for both SMALL LETTER SIGMA and SMALL LETTER FINAL SIGMA
2250             if (caseInsensitive) {
2251                 if (ch1 < 128) {
2252                     ch1 -= ((ch1 >= 'A' && ch1 <= 'Z') ? 'A' - 'a' : 0);
2253                 } else if (ch1 == 0x03C2 || ch1 == 0x03C3 || ch1 == 0x03A3) { // SMALL SIGMA
2254                     ch1 = 0x03A3;
2255                 } else {
2256                     UniChar buffer[MAX_CASE_MAPPING_BUF];
2257
2258                     if (CFUniCharMapCaseTo(ch1, buffer, MAX_CASE_MAPPING_BUF, kCFUniCharToLowercase, 0, NULL) > 1) { // It's supposed to be surrogates
2259                         ch1 = CFUniCharGetLongCharacterForSurrogatePair(buffer[0], buffer[1]);
2260                     } else {
2261                         ch1 = *buffer;
2262                     }
2263                 }
2264                 if (ch2 < 128) {
2265                     ch2 -= ((ch2 >= 'A' && ch2 <= 'Z') ? 'A' - 'a' : 0);
2266                 } else if (ch2 == 0x03C2 || ch2 == 0x03C3 || ch2 == 0x03A3) { // SMALL SIGMA
2267                     ch2 = 0x03A3;
2268                 } else {
2269                     UniChar buffer[MAX_CASE_MAPPING_BUF];
2270
2271                     if (CFUniCharMapCaseTo(ch2, buffer, MAX_CASE_MAPPING_BUF, kCFUniCharToLowercase, 0, NULL) > 1) { // It's supposed to be surrogates
2272                         ch2 = CFUniCharGetLongCharacterForSurrogatePair(buffer[0], buffer[1]);
2273                     } else {
2274                         ch2 = *buffer;
2275                     }
2276                 }
2277             }
2278
2279             if (ch1 != ch2) { // still different
2280                 if (decompose) { // ??? This is not exactly the canonical comparison (We need to do priority sort)
2281                     Boolean isCh1Decomposable = (ch1 > 0x7F && CFUniCharIsMemberOf(ch1, kCFUniCharDecomposableCharacterSet));
2282                     Boolean isCh2Decomposable = (ch2 > 0x7F && CFUniCharIsMemberOf(ch2, kCFUniCharDecomposableCharacterSet));
2283
2284                     if (isCh1Decomposable != isCh2Decomposable) {
2285                         UTF32Char decomposedCharater[MAX_DECOMPOSED_LENGTH];
2286                         UInt32 decomposedCharacterLength;
2287                         UInt32 idx;
2288
2289                         if (isCh1Decomposable) {
2290                             decomposedCharacterLength = CFUniCharDecomposeCharacter(ch1, decomposedCharater, MAX_DECOMPOSED_LENGTH);
2291                             if ((string2_len - strBuf2_idx) < decomposedCharacterLength) { // the remaining other length is shorter
2292                                 if (ch1 < ch2) return kCFCompareLessThan; else if (ch1 > ch2) return kCFCompareGreaterThan;
2293                             }
2294                             for (idx = 0; idx < decomposedCharacterLength; idx++) {
2295                                 ch1 = decomposedCharater[idx];
2296                                 if (ch1 < ch2) return kCFCompareLessThan; else if (ch1 > ch2) return kCFCompareGreaterThan;
2297                                 strBuf2_idx++; ch2 = (strBuf2_idx < string2_len ? CFStringGetCharacterFromInlineBuffer(&strBuf2, strBuf2_idx) : 0xffff);
2298                                 if (CFUniCharIsSurrogateHighCharacter(ch2)) {
2299                                     strBuf2_idx++;
2300                                     if (strBuf2_idx < string2_len && CFUniCharIsSurrogateLowCharacter(CFStringGetCharacterFromInlineBuffer(&strBuf2, strBuf2_idx))) {
2301                                         ch2 = CFUniCharGetLongCharacterForSurrogatePair(ch2, CFStringGetCharacterFromInlineBuffer(&strBuf2, strBuf2_idx));
2302                                     } else {
2303                                         strBuf2_idx--;
2304                                     }
2305                                 }
2306                             }
2307                             strBuf1_idx++; continue;
2308                         } else { // ch2 is decomposable, then
2309                             decomposedCharacterLength = CFUniCharDecomposeCharacter(ch2, decomposedCharater, MAX_DECOMPOSED_LENGTH);
2310                             if ((rangeToCompare.length - strBuf1_idx) < decomposedCharacterLength) { // the remaining other length is shorter
2311                                 if (ch1 < ch2) return kCFCompareLessThan; else if (ch1 > ch2) return kCFCompareGreaterThan;
2312                             }
2313                             for (idx = 0; idx < decomposedCharacterLength && strBuf1_idx < rangeToCompare.length; idx++) {
2314                                 ch2 = decomposedCharater[idx];
2315                                 if (ch1 < ch2) return kCFCompareLessThan; else if (ch1 > ch2) return kCFCompareGreaterThan;
2316                                 strBuf1_idx++; ch1 = (strBuf1_idx < rangeToCompare.length ? CFStringGetCharacterFromInlineBuffer(&strBuf1, strBuf1_idx) : 0xffff);
2317                                 if (CFUniCharIsSurrogateHighCharacter(ch1)) {
2318                                     strBuf1_idx++;
2319                                     if (strBuf1_idx < rangeToCompare.length && CFUniCharIsSurrogateLowCharacter(CFStringGetCharacterFromInlineBuffer(&strBuf1, strBuf1_idx))) {
2320                                         ch1 = CFUniCharGetLongCharacterForSurrogatePair(ch1, CFStringGetCharacterFromInlineBuffer(&strBuf1, strBuf1_idx));
2321                                     } else {
2322                                         strBuf1_idx--;
2323                                     }
2324                                 }
2325                             }
2326                             strBuf2_idx++; continue;
2327                         }
2328                     }
2329                 }
2330                 if (ch1 < ch2) return kCFCompareLessThan; else if (ch1 > ch2) return kCFCompareGreaterThan;
2331             }
2332         }
2333         strBuf1_idx++; strBuf2_idx++;
2334     }
2335     if (strBuf1_idx < rangeToCompare.length) {
2336         return kCFCompareGreaterThan;
2337     } else if (strBuf2_idx < string2_len) {
2338         return kCFCompareLessThan;
2339     } else {
2340         return kCFCompareEqualTo;
2341     }
2342 }
2343
2344
2345 CFComparisonResult CFStringCompare(CFStringRef string, CFStringRef str2, CFOptionFlags options) {
2346     return CFStringCompareWithOptions(string, str2, CFRangeMake(0, CFStringGetLength(string)), options);
2347 }
2348
2349 #define kCFStringStackBufferLength (64)
2350
2351 Boolean CFStringFindWithOptions(CFStringRef string, CFStringRef stringToFind, CFRange rangeToSearch, CFOptionFlags compareOptions, CFRange *result) {
2352     /* No objc dispatch needed here since CFStringInlineBuffer works with both CFString and NSString */
2353     CFIndex findStrLen = CFStringGetLength(stringToFind);
2354     Boolean didFind = false;
2355     bool lengthVariants = ((compareOptions & (kCFCompareCaseInsensitive|kCFCompareNonliteral|kCFCompareDiacriticsInsensitive)) ? true : false);
2356
2357     if ((findStrLen > 0) && (rangeToSearch.length > 0) && ((findStrLen <= rangeToSearch.length) || lengthVariants)) {
2358         UTF32Char strBuf1[kCFStringStackBufferLength];
2359         UTF32Char strBuf2[kCFStringStackBufferLength];
2360         CFStringInlineBuffer inlineBuf1, inlineBuf2;
2361         UTF32Char str1Char, str2Char;
2362         CFStringEncoding eightBitEncoding = __CFStringGetEightBitStringEncoding();
2363         const uint8_t *str1Bytes = CFStringGetCStringPtr(string, eightBitEncoding);
2364         const uint8_t *str2Bytes = CFStringGetCStringPtr(stringToFind, eightBitEncoding);
2365         const UTF32Char *characters, *charactersLimit;
2366         const uint8_t *langCode = NULL;
2367         CFIndex fromLoc, toLoc;
2368         CFIndex str1Index, str2Index;
2369         CFIndex strBuf1Len, strBuf2Len;
2370         bool equalityOptions = ((lengthVariants || (compareOptions & kCFCompareWidthInsensitive)) ? true : false);
2371         bool caseInsensitive = ((compareOptions & kCFCompareCaseInsensitive) ? true : false);
2372         int8_t delta;
2373
2374
2375         CFStringInitInlineBuffer(string, &inlineBuf1, CFRangeMake(0, rangeToSearch.location + rangeToSearch.length));
2376         CFStringInitInlineBuffer(stringToFind, &inlineBuf2, CFRangeMake(0, findStrLen));
2377
2378         if (compareOptions & kCFCompareBackwards) {
2379             fromLoc = rangeToSearch.location + rangeToSearch.length - (lengthVariants ? 1 : findStrLen);
2380             toLoc = (((compareOptions & kCFCompareAnchored) && !lengthVariants) ? fromLoc : rangeToSearch.location);
2381         } else {
2382             fromLoc = rangeToSearch.location;
2383             toLoc = ((compareOptions & kCFCompareAnchored) ? fromLoc : rangeToSearch.location + rangeToSearch.length - (lengthVariants ? 1 : findStrLen));
2384         }
2385
2386         delta = ((fromLoc <= toLoc) ? 1 : -1);
2387
2388         if ((NULL != str1Bytes) && (NULL != str2Bytes)) {
2389             CFIndex maxStr1Index = (rangeToSearch.location + rangeToSearch.length);
2390             uint8_t str1Byte, str2Byte;
2391
2392             while (1) {
2393                 str1Index = fromLoc;
2394                 str2Index = 0;
2395
2396                 while ((str1Index < maxStr1Index) && (str2Index < findStrLen)) {
2397                     str1Byte = str1Bytes[str1Index];
2398                     str2Byte = str2Bytes[str2Index];
2399
2400                     if (str1Byte != str2Byte) {
2401                         if (equalityOptions) {
2402                             if ((str1Byte < 0x80) && ((NULL == langCode) || ('I' != str1Byte))) {
2403                                 if (caseInsensitive && (str1Byte >= 'A') && (str1Byte <= 'Z')) str1Byte += ('a' - 'A');
2404                                 *strBuf1 = str1Byte;
2405                                 strBuf1Len = 1;
2406                             } else {
2407                                 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
2408                                 strBuf1Len = __CFStringFoldCharacterClusterAtIndex(str1Char, &inlineBuf1, str1Index, compareOptions, langCode, strBuf1, kCFStringStackBufferLength, NULL);
2409                                 if (1 > strBuf1Len) {
2410                                     *strBuf1 = str1Char;
2411                                     strBuf1Len = 1;
2412                                 }
2413                             }
2414                             if ((str2Byte < 0x80) && ((NULL == langCode) || ('I' != str2Byte))) {
2415                                 if (caseInsensitive && (str2Byte >= 'A') && (str2Byte <= 'Z')) str2Byte += ('a' - 'A');
2416                                 *strBuf2 = str2Byte;
2417                                 strBuf2Len = 1;
2418                             } else {
2419                                 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
2420                                 strBuf2Len = __CFStringFoldCharacterClusterAtIndex(str2Char, &inlineBuf2, str2Index, compareOptions, langCode, strBuf2, kCFStringStackBufferLength, NULL);
2421                                 if (1 > strBuf2Len) {
2422                                     *strBuf2 = str2Char;
2423                                     strBuf2Len = 1;
2424                                 }
2425                             }
2426
2427                             if ((1 == strBuf1Len) && (1 == strBuf2Len)) { // normal case
2428                                 if (*strBuf1 != *strBuf2) break;
2429                             } else {
2430                                 CFIndex delta;
2431
2432                                 if (!caseInsensitive && (strBuf1Len != strBuf2Len)) break;
2433                                 if (memcmp(strBuf1, strBuf2, sizeof(UTF32Char) * __CFMin(strBuf1Len, strBuf2Len))) break;
2434
2435                                 if (strBuf1Len < strBuf2Len) {
2436                                     delta = strBuf2Len - strBuf1Len;
2437
2438                                     if ((str1Index + strBuf1Len + delta) > (rangeToSearch.location + rangeToSearch.length)) break;
2439
2440                                     characters = &(strBuf2[strBuf1Len]);
2441                                     charactersLimit = characters + delta;
2442
2443                                     while (characters < charactersLimit) {
2444                                         strBuf1Len = __CFStringFoldCharacterClusterAtIndex(CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1), &inlineBuf1, str1Index + 1, compareOptions, langCode, strBuf1, kCFStringStackBufferLength, NULL);
2445                                         if ((strBuf1Len > 0) || (*characters != *strBuf1)) break;
2446                                         ++characters; ++str1Index;
2447                                     }
2448                                     if (characters < charactersLimit) break;
2449                                 } else if (strBuf2Len < strBuf1Len) {
2450                                     delta = strBuf1Len - strBuf2Len;
2451
2452                                     if ((str2Index + strBuf2Len + delta) > findStrLen) break;
2453
2454                                     characters = &(strBuf1[strBuf2Len]);
2455                                     charactersLimit = characters + delta;
2456
2457                                     while (characters < charactersLimit) {
2458                                         strBuf2Len = __CFStringFoldCharacterClusterAtIndex(CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str1Index + 1), &inlineBuf2, str2Index + 1, compareOptions, langCode, strBuf2, kCFStringStackBufferLength, NULL);
2459                                         if ((strBuf2Len > 0) || (*characters != *strBuf2)) break;
2460                                         ++characters; ++str2Index;
2461                                     }
2462                                     if (characters < charactersLimit) break;
2463                                 }
2464                             }
2465                         } else {
2466                             break;
2467                         }
2468                     }
2469                     ++str1Index; ++str2Index;
2470                 }
2471
2472                 if (str2Index == findStrLen) {
2473                     if (((kCFCompareBackwards|kCFCompareAnchored) != (compareOptions & (kCFCompareBackwards|kCFCompareAnchored))) || (str1Index == (rangeToSearch.location + rangeToSearch.length))) {
2474                         didFind = true;
2475                         if (NULL != result) *result = CFRangeMake(fromLoc, str1Index - fromLoc);
2476                     }
2477                     break;
2478                 }
2479
2480                 if (fromLoc == toLoc) break;
2481                 fromLoc += delta;
2482             }
2483         } else if (equalityOptions) {
2484             UTF16Char otherChar;
2485             CFIndex str1UsedLen, str2UsedLen, strBuf1Index = 0, strBuf2Index = 0;
2486             bool diacriticsInsensitive = ((compareOptions & kCFCompareDiacriticsInsensitive) ? true : false);
2487             static const uint8_t *nonBaseBMP = NULL;
2488             static const uint8_t *combClassBMP = NULL;
2489
2490             if (NULL == nonBaseBMP) {
2491                 nonBaseBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, 0);
2492                 combClassBMP = CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0);
2493             }
2494
2495             while (1) {
2496                 str1Index = fromLoc;
2497                 str2Index = 0;
2498
2499                 strBuf1Len = strBuf2Len = 0;
2500
2501                 while (str2Index < findStrLen) {
2502                     if (strBuf1Len == 0) {
2503                         str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
2504                         if (caseInsensitive && (str1Char >= 'A') && (str1Char <= 'Z') && ((NULL == langCode) || (str1Char != 'I'))) str1Char += ('a' - 'A');
2505                         str1UsedLen = 1;
2506                     } else {
2507                         str1Char = strBuf1[strBuf1Index++];
2508                     }
2509                     if (strBuf2Len == 0) {
2510                         str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
2511                         if (caseInsensitive && (str2Char >= 'A') && (str2Char <= 'Z') && ((NULL == langCode) || (str2Char != 'I'))) str2Char += ('a' - 'A');
2512                         str2UsedLen = 1;
2513                     } else {
2514                         str2Char = strBuf2[strBuf2Index++];
2515                     }
2516
2517                     if (str1Char != str2Char) {
2518                         if ((str1Char < 0x80) && (str2Char < 0x80) && ((NULL == langCode) || !caseInsensitive)) break;
2519
2520                         if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) {
2521                             str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);
2522                             str1UsedLen = 2;
2523                         }
2524
2525                         if (CFUniCharIsSurrogateHighCharacter(str2Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index + 1)))) {
2526                             str2Char = CFUniCharGetLongCharacterForSurrogatePair(str2Char, otherChar);
2527                             str2UsedLen = 2;
2528                         }
2529
2530                         if (diacriticsInsensitive && (str1Index > fromLoc)) {
2531                             if ((0 == strBuf1Len) && CFUniCharIsMemberOfBitmap(str1Char, ((str1Char < 0x10000) ? nonBaseBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (str1Char >> 16))))) str1Char = str2Char;
2532                             if ((0 == strBuf2Len) && CFUniCharIsMemberOfBitmap(str2Char, ((str2Char < 0x10000) ? nonBaseBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (str2Char >> 16))))) str2Char = str1Char;
2533                         }
2534
2535                         if (str1Char != str2Char) {
2536                             if (0 == strBuf1Len) {
2537                                 strBuf1Len = __CFStringFoldCharacterClusterAtIndex(str1Char, &inlineBuf1, str1Index, compareOptions, langCode, strBuf1, kCFStringStackBufferLength, &str1UsedLen);
2538                                 if (strBuf1Len > 0) {
2539                                     str1Char = *strBuf1;
2540                                     strBuf1Index = 1;
2541                                 }
2542                             }
2543
2544                             if ((0 == strBuf1Len) && (0 < strBuf2Len)) break;
2545
2546                             if ((0 == strBuf2Len) && ((0 == strBuf1Len) || (str1Char != str2Char))) {
2547                                 strBuf2Len = __CFStringFoldCharacterClusterAtIndex(str2Char, &inlineBuf2, str2Index, compareOptions, langCode, strBuf2, kCFStringStackBufferLength, &str2UsedLen);
2548                                 if ((0 == strBuf2Len) || (str1Char != *strBuf2)) break;
2549                                 strBuf2Index = 1;
2550                             }
2551                         }
2552
2553                         if ((strBuf1Len > 0) && (strBuf2Len > 0)) {
2554                             while ((strBuf1Index < strBuf1Len) && (strBuf2Index < strBuf2Len)) {
2555                                 if (strBuf1[strBuf1Index] != strBuf2[strBuf2Index]) break;
2556                                 ++strBuf1Index; ++strBuf2Index;
2557                             }
2558                             if ((strBuf1Index < strBuf1Len) && (strBuf2Index < strBuf2Len)) break;
2559                         }
2560                     }
2561
2562                     if ((strBuf1Len > 0) && (strBuf1Index == strBuf1Len)) strBuf1Len = 0;
2563                     if ((strBuf2Len > 0) && (strBuf2Index == strBuf2Len)) strBuf2Len = 0;
2564
2565                     if (strBuf1Len == 0) str1Index += str1UsedLen;
2566                     if (strBuf2Len == 0) str2Index += str2UsedLen;
2567                 }
2568
2569                 if (str2Index == findStrLen) {
2570                     bool match = true;
2571
2572                     if (strBuf1Len > 0) {
2573                         match = false;
2574
2575                         if ((compareOptions & kCFCompareDiacriticsInsensitive) && (strBuf1[0] < 0x0510)) {
2576                             while (strBuf1Index < strBuf1Len) {
2577                                 if (!CFUniCharIsMemberOfBitmap(strBuf1[strBuf1Index], ((strBuf1[strBuf1Index] < 0x10000) ? nonBaseBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (strBuf1[strBuf1Index] >> 16))))) break;
2578                                 ++strBuf1Index;
2579                             }
2580
2581                             if (strBuf1Index == strBuf1Len) {
2582                                 str1Index += str1UsedLen;
2583                                 match = true;
2584                             }
2585                         }
2586                     }
2587
2588                     if (match && (compareOptions & (kCFCompareDiacriticsInsensitive|kCFCompareNonliteral)) && (str1Index < (rangeToSearch.location + rangeToSearch.length))) {
2589                         const uint8_t *nonBaseBitmap;
2590
2591                         str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
2592
2593                         if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) {
2594                             str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);
2595                             nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (str1Char >> 16));
2596                         } else {
2597                             nonBaseBitmap = nonBaseBMP;
2598                         }
2599
2600                         if (CFUniCharIsMemberOfBitmap(str1Char, nonBaseBitmap)) {
2601                             if (diacriticsInsensitive) {
2602                                 if (str1Char < 0x10000) {
2603                                     CFIndex index = str1Index;
2604
2605                                     do {
2606                                         str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, --index);
2607                                     } while (CFUniCharIsMemberOfBitmap(str1Char, nonBaseBMP), (rangeToSearch.location < index));
2608
2609                                     if (str1Char < 0x0510) {
2610                                         CFIndex maxIndex = (rangeToSearch.location + rangeToSearch.length);
2611
2612                                         while (++str1Index < maxIndex) if (!CFUniCharIsMemberOfBitmap(CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index), nonBaseBMP)) break;
2613                                     }
2614                                 }
2615                             } else {
2616                                 match = false;
2617                             }
2618                         } else if (!diacriticsInsensitive) {
2619                             otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index - 1);
2620
2621                             // this is assuming viramas are only in BMP ???
2622                             if ((str1Char == COMBINING_GRAPHEME_JOINER) || (otherChar == COMBINING_GRAPHEME_JOINER) || (otherChar == ZERO_WIDTH_JOINER) || ((otherChar >= HANGUL_CHOSEONG_START) && (otherChar <= HANGUL_JONGSEONG_END)) || (CFUniCharGetCombiningPropertyForCharacter(otherChar, combClassBMP) == 9)) {
2623                                 CFRange clusterRange = CFStringGetRangeOfCharacterClusterAtIndex(string, str1Index - 1, kCFStringGramphemeCluster);
2624
2625                                 if (str1Index < (clusterRange.location + clusterRange.length)) match = false;
2626                             }
2627                         }
2628                     }
2629
2630                     if (match) {
2631                         if (((kCFCompareBackwards|kCFCompareAnchored) != (compareOptions & (kCFCompareBackwards|kCFCompareAnchored))) || (str1Index == (rangeToSearch.location + rangeToSearch.length))) {
2632                             didFind = true;
2633                             if (NULL != result) *result = CFRangeMake(fromLoc, str1Index - fromLoc);
2634                         }
2635                         break;
2636                     }
2637                 }
2638
2639                 if (fromLoc == toLoc) break;
2640                 fromLoc += delta;
2641             }
2642         } else {
2643             while (1) {
2644                 str1Index = fromLoc;
2645                 str2Index = 0;
2646
2647                 while (str2Index < findStrLen) {
2648                     if (CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index) != CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index)) break;
2649
2650                     ++str1Index; ++str2Index;
2651                 }
2652
2653                 if (str2Index == findStrLen) {
2654                     didFind = true;
2655                     if (NULL != result) *result = CFRangeMake(fromLoc, findStrLen);
2656                     break;
2657                 }
2658
2659                 if (fromLoc == toLoc) break;
2660                 fromLoc += delta;
2661             }
2662         }
2663     }
2664
2665     return didFind;
2666 }
2667
2668 // Functions to deal with special arrays of CFRange, CFDataRef, created by CFStringCreateArrayWithFindResults()
2669
2670 static const void *__rangeRetain(CFAllocatorRef allocator, const void *ptr) {
2671     CFRetain(*(CFDataRef *)((uint8_t *)ptr + sizeof(CFRange)));
2672     return ptr;
2673 }
2674
2675 static void __rangeRelease(CFAllocatorRef allocator, const void *ptr) {
2676     CFRelease(*(CFDataRef *)((uint8_t *)ptr + sizeof(CFRange)));
2677 }
2678
2679 static CFStringRef __rangeCopyDescription(const void *ptr) {
2680     CFRange range = *(CFRange *)ptr;
2681     return CFStringCreateWithFormat(NULL /* ??? allocator */, NULL, CFSTR("{%d, %d}"), range.location, range.length);
2682 }
2683
2684 static Boolean  __rangeEqual(const void *ptr1, const void *ptr2) {
2685     CFRange range1 = *(CFRange *)ptr1;
2686     CFRange range2 = *(CFRange *)ptr2;
2687     return (range1.location == range2.location) && (range1.length == range2.length);
2688 }
2689
2690
2691 CFArrayRef CFStringCreateArrayWithFindResults(CFAllocatorRef alloc, CFStringRef string, CFStringRef stringToFind, CFRange rangeToSearch, CFOptionFlags compareOptions) {
2692     CFRange foundRange;
2693     Boolean backwards = compareOptions & kCFCompareBackwards;
2694     UInt32 endIndex = rangeToSearch.location + rangeToSearch.length;
2695     CFMutableDataRef rangeStorage = NULL;       // Basically an array of CFRange, CFDataRef (packed)
2696     uint8_t *rangeStorageBytes = NULL;
2697     CFIndex foundCount = 0;
2698     CFIndex capacity = 0;               // Number of CFRange, CFDataRef element slots in rangeStorage
2699
2700     if (alloc == NULL) alloc = __CFGetDefaultAllocator();
2701
2702     while ((rangeToSearch.length > 0) && CFStringFindWithOptions(string, stringToFind, rangeToSearch, compareOptions, &foundRange)) {
2703         // Determine the next range
2704         if (backwards) {
2705             rangeToSearch.length = foundRange.location - rangeToSearch.location;
2706         } else {
2707             rangeToSearch.location = foundRange.location + foundRange.length;
2708             rangeToSearch.length = endIndex - rangeToSearch.location;
2709         }
2710
2711         // If necessary, grow the data and squirrel away the found range
2712         if (foundCount >= capacity) {
2713             if (rangeStorage == NULL) rangeStorage = CFDataCreateMutable(alloc, 0);
2714             capacity = (capacity + 4) * 2;
2715             CFDataSetLength(rangeStorage, capacity * (sizeof(CFRange) + sizeof(CFDataRef)));
2716             rangeStorageBytes = (uint8_t *)CFDataGetMutableBytePtr(rangeStorage) + foundCount * (sizeof(CFRange) + sizeof(CFDataRef));
2717         }
2718         memmove(rangeStorageBytes, &foundRange, sizeof(CFRange));       // The range
2719         memmove(rangeStorageBytes + sizeof(CFRange), &rangeStorage, sizeof(CFDataRef)); // The data
2720         rangeStorageBytes += (sizeof(CFRange) + sizeof(CFDataRef));
2721         foundCount++;
2722     }
2723
2724     if (foundCount > 0) {
2725         CFIndex cnt;
2726         CFMutableArrayRef array;
2727         const CFArrayCallBacks callbacks = {0, __rangeRetain, __rangeRelease, __rangeCopyDescription, __rangeEqual};
2728
2729         CFDataSetLength(rangeStorage, foundCount * (sizeof(CFRange) + sizeof(CFDataRef)));      // Tighten storage up
2730         rangeStorageBytes = (uint8_t *)CFDataGetMutableBytePtr(rangeStorage);
2731
2732         array = CFArrayCreateMutable(alloc, foundCount * sizeof(CFRange *), &callbacks);
2733         for (cnt = 0; cnt < foundCount; cnt++) {
2734             // Each element points to the appropriate CFRange in the CFData
2735             CFArrayAppendValue(array, rangeStorageBytes + cnt * (sizeof(CFRange) + sizeof(CFDataRef)));
2736         }
2737         CFRelease(rangeStorage);                // We want the data to go away when all CFRanges inside it are released...
2738         return array;
2739     } else {
2740         return NULL;
2741     }
2742 }
2743
2744
2745 CFRange CFStringFind(CFStringRef string, CFStringRef stringToFind, CFOptionFlags compareOptions) {
2746     CFRange foundRange;
2747
2748     if (CFStringFindWithOptions(string, stringToFind, CFRangeMake(0, CFStringGetLength(string)), compareOptions, &foundRange)) {
2749         return foundRange;
2750     } else {
2751         return CFRangeMake(kCFNotFound, 0);
2752     }
2753 }
2754
2755 Boolean CFStringHasPrefix(CFStringRef string, CFStringRef prefix) {
2756     return CFStringFindWithOptions(string, prefix, CFRangeMake(0, CFStringGetLength(string)), kCFCompareAnchored, NULL);
2757 }
2758
2759 Boolean CFStringHasSuffix(CFStringRef string, CFStringRef suffix) {
2760     return CFStringFindWithOptions(string, suffix, CFRangeMake(0, CFStringGetLength(string)), kCFCompareAnchored|kCFCompareBackwards, NULL);
2761 }
2762
2763 #define MAX_TRANSCODING_LENGTH 4
2764
2765 #define HANGUL_JONGSEONG_COUNT (28)
2766
2767 CF_INLINE bool _CFStringIsHangulLVT(UTF32Char character) {
2768     return (((character - HANGUL_SYLLABLE_START) % HANGUL_JONGSEONG_COUNT) ? true : false);
2769 }
2770
2771 static uint8_t __CFTranscodingHintLength[] = {
2772     2, 3, 4, 4, 4, 4, 4, 2, 2, 2, 2, 4, 0, 0, 0, 0
2773 };
2774
2775 enum {
2776     kCFStringHangulStateL,
2777     kCFStringHangulStateV,
2778     kCFStringHangulStateT,
2779     kCFStringHangulStateLV,
2780     kCFStringHangulStateLVT,
2781     kCFStringHangulStateBreak
2782 };
2783
2784 static CFRange _CFStringInlineBufferGetComposedRange(CFStringInlineBuffer *buffer, CFIndex start, CFStringCharacterClusterType type, const uint8_t *nonBaseBMP) {
2785     CFIndex end = start + 1;
2786     const uint8_t *nonBase = nonBaseBMP;
2787     UTF32Char character;
2788     UTF16Char otherSurrogate;
2789     uint8_t step;
2790
2791     character = CFStringGetCharacterFromInlineBuffer(buffer, start);
2792
2793
2794     // We don't combine characters in Armenian ~ Limbu range for backward deletion
2795     if ((type != kCFStringBackwardDeletionCluster) || (character < 0x0530) || (character > 0x194F)) {
2796         // Check if the current is surrogate
2797         if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, start + 1)))) {
2798             ++end;
2799             character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
2800             nonBase = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (character >> 16));
2801         }
2802
2803         // Extend backward
2804         while (start > 0) {
2805             if ((type == kCFStringBackwardDeletionCluster) && (character >= 0x0530) && (character < 0x1950)) break;
2806
2807             if (character < 0x10000) { // the first round could be already be non-BMP
2808                 if (CFUniCharIsSurrogateLowCharacter(character) && CFUniCharIsSurrogateHighCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, start - 1)))) {
2809                     character = CFUniCharGetLongCharacterForSurrogatePair(otherSurrogate, character);
2810                     nonBase = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (character >> 16));
2811                     --start;
2812                 } else {
2813                     nonBase = nonBaseBMP;
2814                 }
2815             }
2816
2817             if (!CFUniCharIsMemberOfBitmap(character, nonBase) && (character != 0xFF9E) && (character != 0xFF9F) && ((character & 0x1FFFF0) != 0xF870)) break;
2818
2819             --start;
2820
2821             character = CFStringGetCharacterFromInlineBuffer(buffer, start);
2822         }
2823     }
2824
2825     // Hangul
2826     if (((character >= HANGUL_CHOSEONG_START) && (character <= HANGUL_JONGSEONG_END)) || ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END))) {
2827         uint8_t state;
2828         uint8_t initialState;
2829
2830         if (character < HANGUL_JUNGSEONG_START) {
2831             state = kCFStringHangulStateL;
2832         } else if (character < HANGUL_JONGSEONG_START) {
2833             state = kCFStringHangulStateV;
2834         } else if (character < HANGUL_SYLLABLE_START) {
2835             state = kCFStringHangulStateT;
2836         } else {
2837             state = (_CFStringIsHangulLVT(character) ? kCFStringHangulStateLVT : kCFStringHangulStateLV);
2838         }
2839         initialState = state;
2840
2841         // Extend backward
2842         while (((character = CFStringGetCharacterFromInlineBuffer(buffer, start - 1)) >= HANGUL_CHOSEONG_START) && (character <= HANGUL_SYLLABLE_END) && ((character <= HANGUL_JONGSEONG_END) || (character >= HANGUL_SYLLABLE_START))) {
2843             switch (state) {
2844             case kCFStringHangulStateV:
2845                 if (character <= HANGUL_CHOSEONG_END) {
2846                     state = kCFStringHangulStateL;
2847                 } else if ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END) && !_CFStringIsHangulLVT(character)) {
2848                     state = kCFStringHangulStateLV;
2849                 } else if (character > HANGUL_JUNGSEONG_END) {
2850                     state = kCFStringHangulStateBreak;
2851                 }
2852                 break;
2853
2854             case kCFStringHangulStateT:
2855                 if ((character >= HANGUL_JUNGSEONG_START) && (character <= HANGUL_JUNGSEONG_END)) {
2856                     state = kCFStringHangulStateV;
2857                 } else if ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END)) {
2858                     state = (_CFStringIsHangulLVT(character) ? kCFStringHangulStateLVT : kCFStringHangulStateLV);
2859                 } else if (character < HANGUL_JUNGSEONG_START) {
2860                     state = kCFStringHangulStateBreak;
2861                 }
2862                 break;
2863
2864             default:
2865                 state = ((character < HANGUL_JUNGSEONG_START) ? kCFStringHangulStateL : kCFStringHangulStateBreak);
2866                 break;
2867             }
2868
2869             if (state == kCFStringHangulStateBreak) break;
2870             --start;
2871         }
2872
2873         // Extend forward
2874         state = initialState;
2875         while (((character = CFStringGetCharacterFromInlineBuffer(buffer, end)) > 0) && (((character >= HANGUL_CHOSEONG_START) && (character <= HANGUL_JONGSEONG_END)) || ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END)))) {
2876             switch (state) {
2877             case kCFStringHangulStateLV:
2878             case kCFStringHangulStateV:
2879                 if ((character >= HANGUL_JUNGSEONG_START) && (character <= HANGUL_JONGSEONG_END)) {
2880                     state = ((character < HANGUL_JONGSEONG_START) ? kCFStringHangulStateV : kCFStringHangulStateT);
2881                 } else {
2882                     state = kCFStringHangulStateBreak;
2883                 }
2884                 break;
2885
2886             case kCFStringHangulStateLVT:
2887             case kCFStringHangulStateT:
2888                 state = (((character >= HANGUL_JONGSEONG_START) && (character <= HANGUL_JONGSEONG_END)) ? kCFStringHangulStateT : kCFStringHangulStateBreak);
2889                 break;
2890
2891             default:
2892                 if (character < HANGUL_JUNGSEONG_START) {
2893                     state = kCFStringHangulStateL;
2894                 } else if (character < HANGUL_JONGSEONG_START) {
2895                     state = kCFStringHangulStateV;
2896                 } else if (character >= HANGUL_SYLLABLE_START) {
2897                     state = (_CFStringIsHangulLVT(character) ? kCFStringHangulStateLVT : kCFStringHangulStateLV);
2898                 } else {
2899                     state = kCFStringHangulStateBreak;
2900                 }
2901                 break;
2902             }
2903
2904             if (state == kCFStringHangulStateBreak) break;
2905             ++end;
2906         }
2907     }
2908
2909     // Extend forward
2910     while ((character = CFStringGetCharacterFromInlineBuffer(buffer, end)) > 0) {
2911         if ((type == kCFStringBackwardDeletionCluster) && (character >= 0x0530) && (character < 0x1950)) break;
2912
2913         if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, end + 1)))) {
2914             character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
2915             nonBase = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (character >> 16));
2916             step = 2;
2917         } else {
2918             nonBase = nonBaseBMP;
2919             step  = 1;
2920         }
2921
2922         if (!CFUniCharIsMemberOfBitmap(character, nonBase) && (character != 0xFF9E) && (character != 0xFF9F) && ((character & 0x1FFFF0) != 0xF870)) break;
2923
2924         end += step;
2925     }
2926
2927     return CFRangeMake(start, end - start);
2928 }
2929
2930 CF_INLINE bool _CFStringIsVirama(UTF32Char character, const uint8_t *combClassBMP) {
2931     return ((character == COMBINING_GRAPHEME_JOINER) || (CFUniCharGetCombiningPropertyForCharacter(character, ((character < 0x10000) ? combClassBMP : CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (character >> 16)))) == 9) ? true : false);
2932 }
2933
2934 CFRange CFStringGetRangeOfCharacterClusterAtIndex(CFStringRef string, CFIndex charIndex, CFStringCharacterClusterType type) {
2935     CFRange range;
2936     CFIndex currentIndex;
2937     CFIndex length = CFStringGetLength(string);
2938     CFStringInlineBuffer stringBuffer;
2939     UTF32Char character;
2940     UTF16Char otherSurrogate;
2941     static const uint8_t *nonBaseBMP = NULL;
2942     static const uint8_t *letterBMP = NULL;
2943     static const uint8_t *combClassBMP = NULL;
2944
2945     if (charIndex >= length) return CFRangeMake(kCFNotFound, 0);
2946
2947     /* Fast case.  If we're eight-bit, it's either the default encoding is cheap or the content is all ASCII.  Watch out when (or if) adding more 8bit Mac-scripts in CFStringEncodingConverters
2948     */
2949     if (!CF_IS_OBJC(__kCFStringTypeID, string) && __CFStrIsEightBit(string)) return CFRangeMake(charIndex, 1);
2950
2951     if (NULL == nonBaseBMP) {
2952         nonBaseBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, 0);
2953         letterBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, 0);
2954         combClassBMP = CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0);
2955     }
2956
2957     CFStringInitInlineBuffer(string, &stringBuffer, CFRangeMake(0, length));
2958
2959     // Get composed character sequence first
2960     range = _CFStringInlineBufferGetComposedRange(&stringBuffer, charIndex, type, nonBaseBMP);
2961
2962     // Do grapheme joiners
2963     if (type < kCFStringCursorMovementCluster) {
2964         const uint8_t *letter = letterBMP;
2965
2966         // Check to see if we have a letter at the beginning of initial cluster
2967         character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, range.location);
2968
2969         if ((range.length > 1) && CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(&stringBuffer, range.location + 1)))) {
2970             character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
2971             letter = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, (character >> 16));
2972         }
2973
2974         if ((character == ZERO_WIDTH_JOINER) || CFUniCharIsMemberOfBitmap(character, letter)) {
2975             CFRange otherRange;
2976
2977             // Check if preceded by grapheme joiners (U034F and viramas)
2978             otherRange.location = currentIndex = range.location;
2979
2980             while (currentIndex > 1) {
2981                 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, --currentIndex);
2982
2983                 // ??? We're assuming viramas only in BMP
2984                 if ((_CFStringIsVirama(character, combClassBMP) || ((character == ZERO_WIDTH_JOINER) && _CFStringIsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer, --currentIndex), combClassBMP))) && (currentIndex > 0)) {
2985                     --currentIndex;
2986                 } else {
2987                     break;
2988                 }
2989
2990                 currentIndex = _CFStringInlineBufferGetComposedRange(&stringBuffer, currentIndex, type, nonBaseBMP).location;
2991
2992                 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex);
2993
2994                 if (CFUniCharIsSurrogateLowCharacter(character) && CFUniCharIsSurrogateHighCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex - 1)))) {
2995                     character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
2996                     letter = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, (character >> 16));
2997                     --currentIndex;
2998                 } else {
2999                     letter = letterBMP;
3000                 }
3001
3002                 if (!CFUniCharIsMemberOfBitmap(character, letter)) break;
3003                 range.location = currentIndex;
3004             }
3005
3006             range.length += otherRange.location - range.location;
3007
3008             // Check if followed by grapheme joiners
3009             if ((range.length > 1) && ((range.location + range.length) < length)) {
3010                 otherRange = range;
3011
3012                 do {
3013                     currentIndex = otherRange.location + otherRange.length;
3014                     character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex - 1);
3015
3016                     // ??? We're assuming viramas only in BMP
3017                     if ((character != ZERO_WIDTH_JOINER) && !_CFStringIsVirama(character, combClassBMP)) break;
3018
3019                     character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex);
3020
3021                     if (character == ZERO_WIDTH_JOINER) character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, ++currentIndex);
3022
3023                     if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex + 1)))) {
3024                         character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
3025                         letter = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, (character >> 16));
3026                     } else {
3027                         letter = letterBMP;
3028                     }
3029
3030                     // We only conjoin letters
3031                     if (!CFUniCharIsMemberOfBitmap(character, letter)) break;
3032                     otherRange = _CFStringInlineBufferGetComposedRange(&stringBuffer, currentIndex, type, nonBaseBMP);
3033                 } while ((otherRange.location + otherRange.length) < length);
3034                 range.length = currentIndex - range.location;
3035             }
3036         }
3037     }
3038
3039     // Check if we're part of prefix transcoding hints
3040     CFIndex otherIndex;
3041
3042     currentIndex = (range.location + range.length) - (MAX_TRANSCODING_LENGTH + 1);
3043     if (currentIndex < 0) currentIndex = 0;
3044
3045     while (currentIndex <= range.location) {
3046         character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex);
3047
3048         if ((character & 0x1FFFF0) == 0xF860) { // transcoding hint
3049             otherIndex = currentIndex + __CFTranscodingHintLength[(character - 0xF860)] + 1;
3050             if (otherIndex >= (range.location + range.length)) {
3051                 if (otherIndex <= length) {
3052                     range.location = currentIndex;
3053                     range.length = otherIndex - currentIndex;
3054                 }
3055                 break;
3056             }
3057         }
3058         ++currentIndex;
3059     }
3060
3061     return range;
3062 }
3063
3064 #if 1 /* Using the new implementation. Leaving the old implementation if'ed out for testing purposes for now */
3065 CFRange CFStringGetRangeOfComposedCharactersAtIndex(CFStringRef theString, CFIndex theIndex) {
3066     return CFStringGetRangeOfCharacterClusterAtIndex(theString, theIndex, kCFStringComposedCharacterCluster);
3067 }
3068 #else
3069 /*!
3070         @function CFStringGetRangeOfComposedCharactersAtIndex
3071         Returns the range of the composed character sequence at the specified index.
3072         @param theString The CFString which is to be searched.  If this
3073                                 parameter is not a valid CFString, the behavior is
3074                         undefined.
3075         @param theIndex The index of the character contained in the
3076                         composed character sequence.  If the index is
3077                         outside the index space of the string (0 to N-1 inclusive,
3078                         where N is the length of the string), the behavior is
3079                         undefined.
3080         @result The range of the composed character sequence.
3081 */
3082 #define ExtHighHalfZoneLow 0xD800
3083 #define ExtHighHalfZoneHigh 0xDBFF
3084 #define ExtLowHalfZoneLow 0xDC00
3085 #define ExtLowHalfZoneHigh 0xDFFF
3086 #define JunseongStart 0x1160
3087 #define JonseongEnd 0x11F9
3088 CF_INLINE Boolean IsHighCode(UniChar X) { return (X >= ExtHighHalfZoneLow && X <= ExtHighHalfZoneHigh); }
3089 CF_INLINE Boolean IsLowCode(UniChar X) { return (X >= ExtLowHalfZoneLow && X <= ExtLowHalfZoneHigh); }
3090 #define IsHangulConjoiningJamo(X) (X >= JunseongStart && X <= JonseongEnd)
3091 #define IsHalfwidthKanaVoicedMark(X) ((X == 0xFF9E) || (X == 0xFF9F))
3092 CF_INLINE Boolean IsNonBaseChar(UniChar X, CFCharacterSetRef nonBaseSet) { return (CFCharacterSetIsCharacterMember(nonBaseSet, X) || IsHangulConjoiningJamo(X) || IsHalfwidthKanaVoicedMark(X) || (X & 0x1FFFF0) == 0xF870); } // combining char, hangul jamo, or Apple corporate variant tag
3093 #define ZWJ     0x200D
3094 #define ZWNJ    0x200C
3095 #define COMBINING_GRAPHEME_JOINER (0x034F)
3096
3097 static CFCharacterSetRef nonBaseChars = NULL;
3098 static CFCharacterSetRef letterChars = NULL;
3099 static const void *__CFCombiningClassBMP = NULL;
3100
3101 CF_INLINE bool IsVirama(UTF32Char character) {
3102     return ((character == COMBINING_GRAPHEME_JOINER) ? true : ((character < 0x10000) && (CFUniCharGetCombiningPropertyForCharacter(character, __CFCombiningClassBMP) == 9) ? true : false));
3103 }
3104
3105 CFRange CFStringGetRangeOfComposedCharactersAtIndex(CFStringRef theString, CFIndex theIndex) {
3106     CFIndex left, current, save;
3107     CFIndex len = CFStringGetLength(theString);
3108     CFStringInlineBuffer stringBuffer;
3109     static volatile Boolean _isInited = false;
3110
3111     if (theIndex >= len) return CFRangeMake(kCFNotFound, 0);
3112
3113     if (!_isInited) {
3114         nonBaseChars = CFCharacterSetGetPredefined(kCFCharacterSetNonBase);
3115         letterChars = CFCharacterSetGetPredefined(kCFCharacterSetLetter);
3116         __CFCombiningClassBMP = CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0);
3117         _isInited = true;
3118     }
3119
3120     save = current = theIndex;
3121
3122     CFStringInitInlineBuffer(theString, &stringBuffer, CFRangeMake(0, len));
3123
3124     /*
3125      * First check for transcoding hints
3126      */
3127     {
3128         CFRange theRange = (current > MAX_TRANSCODING_LENGTH  ? CFRangeMake(current - MAX_TRANSCODING_LENGTH, MAX_TRANSCODING_LENGTH + 1) : CFRangeMake(0, current + 1));
3129
3130         // Should check the next loc ?
3131         if (current + 1 < len) ++theRange.length;
3132
3133         if (theRange.length > 1) {
3134             UniChar characterBuffer[MAX_TRANSCODING_LENGTH + 2]; // Transcoding hint length + current loc + next loc
3135
3136             if (stringBuffer.directBuffer) {
3137                 memmove(characterBuffer, stringBuffer.directBuffer + theRange.location, theRange.length * sizeof(UniChar));
3138             } else {
3139                 CFStringGetCharacters(theString, theRange, characterBuffer);
3140             }
3141
3142             while (current >= theRange.location) {
3143                 if ((characterBuffer[current - theRange.location] & 0x1FFFF0) == 0xF860) {
3144                     theRange = CFRangeMake(current, __CFTranscodingHintLength[characterBuffer[current - theRange.location] - 0xF860] + 1);
3145                     if ((theRange.location + theRange.length) <= theIndex) break;
3146                     if ((theRange.location + theRange.length) >= len) theRange.length = len - theRange.location;
3147                     return theRange;
3148                 }
3149                 if (current == 0) break;
3150                 --current;
3151             }
3152             current = theIndex; // Reset current
3153         }
3154     }
3155
3156 //#warning Aki 5/29/01 This does not support non-base chars in non-BMP planes (i.e. musical symbol combining stem in Unicode 3.1)
3157     /*
3158      * if we start NOT on a base, first move back to a base as appropriate.
3159      */
3160
3161   roundAgain:
3162
3163     while ((current > 0) && IsNonBaseChar(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current), nonBaseChars)) --current;
3164
3165     if (current >= 1 && current < len && CFCharacterSetIsCharacterMember(letterChars, CFStringGetCharacterFromInlineBuffer(&stringBuffer, current)) && IsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current - 1))) {
3166         --current;
3167         goto roundAgain;
3168     } else if ((current >= 2) && (CFStringGetCharacterFromInlineBuffer(&stringBuffer, current - 1) == ZWJ) && IsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current - 2))) {
3169         current -= 2;
3170         goto roundAgain;
3171     }
3172
3173     /*
3174      * Set the left position, then jump back to the saved original position.
3175      */
3176
3177     if (current >= 1 && IsLowCode(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current)) && IsHighCode(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current - 1))) --current;
3178     left = current;
3179     current = save;
3180
3181     /*
3182      * Now, presume we are on a base; move forward & look for the next base.
3183      * Handle jumping over H/L codes.
3184      */
3185     if (IsHighCode(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current)) && (current + 1) < len && IsLowCode(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current + 1))) ++current;
3186     ++current;
3187
3188   round2Again:
3189
3190     if (current < len)  {
3191         while (IsNonBaseChar(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current), nonBaseChars)) {
3192             ++current;
3193             if (current >= len) break;
3194         }
3195         if ((current < len) && CFCharacterSetIsCharacterMember(letterChars, CFStringGetCharacterFromInlineBuffer(&stringBuffer, current))) {
3196             if (IsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current - 1))) {
3197                 ++current; goto round2Again;
3198             } else if ((current >= 2) && (CFStringGetCharacterFromInlineBuffer(&stringBuffer, current - 1) == ZWJ) && IsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current - 2))) {
3199                 ++current; goto round2Again;
3200             }
3201         }
3202     }
3203     /*
3204      * Now, "current" is a base, and "left" is a base.
3205      * The junk between had better contain "save"!
3206      */
3207     if ((! (left <= save)) || (! (save <= current))) {
3208         CFLog(0, CFSTR("CFString: CFStringGetRangeOfComposedCharactersAtIndex:%d returned invalid\n"), save);
3209     }
3210     return CFRangeMake(left, current - left);
3211 }
3212 #endif
3213
3214 /*!
3215         @function CFStringFindCharacterFromSet
3216         Query the range of characters contained in the specified character set.
3217         @param theString The CFString which is to be searched.  If this
3218                                 parameter is not a valid CFString, the behavior is
3219                         undefined.
3220         @param theSet The CFCharacterSet against which the membership
3221                         of characters is checked.  If this parameter is not a valid
3222                         CFCharacterSet, the behavior is undefined.
3223         @param range The range of characters within the string to search. If
3224                         the range location or end point (defined by the location
3225                         plus length minus 1) are outside the index space of the
3226                         string (0 to N-1 inclusive, where N is the length of the
3227                         string), the behavior is undefined. If the range length is
3228                         negative, the behavior is undefined. The range may be empty
3229                         (length 0), in which case no search is performed.
3230         @param searchOptions The bitwise-or'ed option flags to control
3231                         the search behavior.  The supported options are
3232                         kCFCompareBackwards andkCFCompareAnchored.
3233                         If other option flags are specified, the behavior
3234                         is undefined.
3235         @param result The pointer to a CFRange supplied by the caller in
3236                         which the search result is stored.  If a pointer to an invalid
3237                         memory is specified, the behavior is undefined.
3238         @result true, if at least a character which is a member of the character
3239                         set is found and result is filled, otherwise, false.
3240 */
3241 #define SURROGATE_START 0xD800
3242 #define SURROGATE_END 0xDFFF
3243
3244 CF_EXPORT Boolean CFStringFindCharacterFromSet(CFStringRef theString, CFCharacterSetRef theSet, CFRange rangeToSearch, CFOptionFlags searchOptions, CFRange *result) {
3245     CFStringInlineBuffer stringBuffer;
3246     UniChar ch;
3247     CFIndex step;
3248     CFIndex fromLoc, toLoc, cnt;        // fromLoc and toLoc are inclusive
3249     Boolean found = false;
3250     Boolean done = false;
3251
3252 //#warning FIX ME !! Should support kCFCompareNonliteral
3253
3254     if ((rangeToSearch.location + rangeToSearch.length > CFStringGetLength(theString)) || (rangeToSearch.length == 0)) return false;
3255
3256     if (searchOptions & kCFCompareBackwards) {
3257         fromLoc = rangeToSearch.location + rangeToSearch.length - 1;
3258         toLoc = rangeToSearch.location;
3259     } else {
3260         fromLoc = rangeToSearch.location;
3261         toLoc = rangeToSearch.location + rangeToSearch.length - 1;
3262     }
3263     if (searchOptions & kCFCompareAnchored) {
3264         toLoc = fromLoc;
3265     }
3266
3267     step = (fromLoc <= toLoc) ? 1 : -1;
3268     cnt = fromLoc;
3269
3270     CFStringInitInlineBuffer(theString, &stringBuffer, rangeToSearch);
3271
3272     do {
3273         ch = CFStringGetCharacterFromInlineBuffer(&stringBuffer, cnt - rangeToSearch.location);
3274         if ((ch >= SURROGATE_START) && (ch <= SURROGATE_END)) {
3275             int otherCharIndex = cnt + step;
3276
3277             if (((step < 0) && (otherCharIndex < toLoc)) || ((step > 0) && (otherCharIndex > toLoc))) {
3278                 done = true;
3279             } else {
3280                 UniChar highChar;
3281                 UniChar lowChar = CFStringGetCharacterFromInlineBuffer(&stringBuffer, otherCharIndex - rangeToSearch.location);
3282
3283                 if (cnt < otherCharIndex) {
3284                     highChar = ch;
3285                 } else {
3286                     highChar = lowChar;
3287                     lowChar = ch;
3288                 }
3289
3290                 if (CFUniCharIsSurrogateHighCharacter(highChar) && CFUniCharIsSurrogateLowCharacter(lowChar) && CFCharacterSetIsLongCharacterMember(theSet, CFUniCharGetLongCharacterForSurrogatePair(highChar, lowChar))) {
3291                     if (result) *result = CFRangeMake((cnt < otherCharIndex ? cnt : otherCharIndex), 2);
3292                     return true;
3293                 } else if (otherCharIndex == toLoc) {
3294                     done = true;
3295                 } else {
3296                     cnt = otherCharIndex + step;
3297                 }
3298             }
3299         } else if (CFCharacterSetIsCharacterMember(theSet, ch)) {
3300             done = found = true;
3301         } else if (cnt == toLoc) {
3302             done = true;
3303         } else {
3304             cnt += step;
3305         }
3306     } while (!done);
3307
3308     if (found && result) *result = CFRangeMake(cnt, 1);
3309     return found;
3310 }
3311
3312 /* Line range code */
3313
3314 #define CarriageReturn '\r'     /* 0x0d */
3315 #define NewLine '\n'            /* 0x0a */
3316 #define NextLine 0x0085
3317 #define LineSeparator 0x2028
3318 #define ParaSeparator 0x2029
3319
3320 CF_INLINE Boolean isALineSeparatorTypeCharacter(UniChar ch) {
3321     if (ch > CarriageReturn && ch < NextLine) return false;     /* Quick test to cover most chars */
3322     return (ch == NewLine || ch == CarriageReturn || ch == NextLine || ch == LineSeparator || ch == ParaSeparator) ? true : false;
3323 }
3324
3325 void CFStringGetLineBounds(CFStringRef string, CFRange range, CFIndex *lineBeginIndex, CFIndex *lineEndIndex, CFIndex *contentsEndIndex) {
3326     CFIndex len;
3327     CFStringInlineBuffer buf;
3328     UniChar ch;
3329
3330     CF_OBJC_FUNCDISPATCH4(__kCFStringTypeID, void, string, "getLineStart:end:contentsEnd:forRange:", lineBeginIndex, lineEndIndex, contentsEndIndex, CFRangeMake(range.location, range.length));
3331
3332     __CFAssertIsString(string);
3333     __CFAssertRangeIsInStringBounds(string, range.location, range.length);
3334
3335     len = __CFStrLength(string);
3336
3337     if (lineBeginIndex) {
3338         CFIndex start;
3339         if (range.location == 0) {
3340             start = 0;
3341         } else {
3342             CFStringInitInlineBuffer(string, &buf, CFRangeMake(0, len));
3343             CFIndex buf_idx = range.location;
3344
3345             /* Take care of the special case where start happens to fall right between \r and \n */
3346             ch = CFStringGetCharacterFromInlineBuffer(&buf, buf_idx);
3347             buf_idx--;
3348             if ((ch == NewLine) && (CFStringGetCharacterFromInlineBuffer(&buf, buf_idx) == CarriageReturn)) {
3349                 buf_idx--;
3350             }
3351             while (1) {
3352                 if (buf_idx < 0) {
3353                     start = 0;
3354                     break;
3355                 } else if (isALineSeparatorTypeCharacter(CFStringGetCharacterFromInlineBuffer(&buf, buf_idx))) {
3356                     start = buf_idx + 1;
3357                     break;
3358                 } else {
3359                     buf_idx--;
3360                 }
3361             }
3362         }
3363         *lineBeginIndex = start;
3364     }
3365
3366     /* Now find the ending point */
3367     if (lineEndIndex || contentsEndIndex) {
3368         CFIndex endOfContents, lineSeparatorLength = 1; /* 1 by default */
3369         CFStringInitInlineBuffer(string, &buf, CFRangeMake(0, len));
3370         CFIndex buf_idx = range.location + range.length - (range.length ? 1 : 0);
3371         /* First look at the last char in the range (if the range is zero length, the char after the range) to see if we're already on or within a end of line sequence... */
3372         ch = __CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx);
3373         if (ch == NewLine) {
3374             endOfContents = buf_idx;
3375             buf_idx--;
3376             if (__CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx) == CarriageReturn) {
3377                 lineSeparatorLength = 2;
3378                 endOfContents--;
3379             }
3380         } else {
3381             while (1) {
3382                 if (isALineSeparatorTypeCharacter(ch)) {
3383                     endOfContents = buf_idx;    /* This is actually end of contentsRange */
3384                     buf_idx++;  /* OK for this to go past the end */
3385                     if ((ch == CarriageReturn) && (__CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx) == NewLine)) {
3386                         lineSeparatorLength = 2;
3387                     }
3388                     break;
3389                 } else if (buf_idx >= len) {
3390                     endOfContents = len;
3391                     lineSeparatorLength = 0;
3392                     break;
3393                 } else {
3394                     buf_idx++;
3395                     ch = __CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx);
3396                 }
3397             }
3398         }
3399         if (contentsEndIndex) *contentsEndIndex = endOfContents;
3400         if (lineEndIndex) *lineEndIndex = endOfContents + lineSeparatorLength;
3401     }
3402 }
3403
3404
3405 CFStringRef CFStringCreateByCombiningStrings(CFAllocatorRef alloc, CFArrayRef array, CFStringRef separatorString) {
3406     CFIndex numChars;
3407     CFIndex separatorNumByte;
3408     CFIndex stringCount = CFArrayGetCount(array);
3409     Boolean isSepCFString = !CF_IS_OBJC(__kCFStringTypeID, separatorString);
3410     Boolean canBeEightbit = isSepCFString && __CFStrIsEightBit(separatorString);
3411     CFIndex idx;
3412     CFStringRef otherString;
3413     void *buffer;
3414     uint8_t *bufPtr;
3415     const void *separatorContents = NULL;
3416
3417     if (stringCount == 0) {
3418         return CFStringCreateWithCharacters(alloc, NULL, 0);
3419     } else if (stringCount == 1) {
3420         return CFStringCreateCopy(alloc, CFArrayGetValueAtIndex(array, 0));
3421     }
3422
3423     if (alloc == NULL) alloc = __CFGetDefaultAllocator();
3424
3425     numChars = CFStringGetLength(separatorString) * (stringCount - 1);
3426     for (idx = 0; idx < stringCount; idx++) {
3427         otherString = (CFStringRef)CFArrayGetValueAtIndex(array, idx);
3428         numChars += CFStringGetLength(otherString);
3429         // canBeEightbit is already false if the separator is an NSString...
3430         if (!CF_IS_OBJC(__kCFStringTypeID, otherString) && __CFStrIsUnicode(otherString)) canBeEightbit = false;
3431     }
3432
3433     bufPtr = buffer = CFAllocatorAllocate(alloc, canBeEightbit ? ((numChars + 1) * sizeof(uint8_t)) : (numChars * sizeof(UniChar)), 0);
3434     if (__CFOASafe) __CFSetLastAllocationEventName(buffer, "CFString (store)");
3435     separatorNumByte = CFStringGetLength(separatorString) * (canBeEightbit ? sizeof(uint8_t) : sizeof(UniChar));
3436
3437     for (idx = 0; idx < stringCount; idx++) {
3438         if (idx) { // add separator here unless first string
3439             if (separatorContents) {
3440                 memmove(bufPtr, separatorContents, separatorNumByte);
3441             } else {
3442                 if (!isSepCFString) { // NSString
3443                     CFStringGetCharacters(separatorString, CFRangeMake(0, CFStringGetLength(separatorString)), (UniChar*)bufPtr);
3444                 } else if (canBeEightbit || __CFStrIsUnicode(separatorString)) {
3445                     memmove(bufPtr, (const uint8_t *)__CFStrContents(separatorString) + __CFStrSkipAnyLengthByte(separatorString), separatorNumByte);
3446                 } else {
3447                     __CFStrConvertBytesToUnicode((uint8_t*)__CFStrContents(separatorString) + __CFStrSkipAnyLengthByte(separatorString), (UniChar*)bufPtr, __CFStrLength(separatorString));
3448                 }
3449                 separatorContents = bufPtr;
3450             }
3451             bufPtr += separatorNumByte;
3452         }
3453
3454         otherString = (CFStringRef )CFArrayGetValueAtIndex(array, idx);
3455         if (CF_IS_OBJC(__kCFStringTypeID, otherString)) {
3456             CFIndex otherLength = CFStringGetLength(otherString);
3457             CFStringGetCharacters(otherString, CFRangeMake(0, otherLength), (UniChar*)bufPtr);
3458             bufPtr += otherLength * sizeof(UniChar);
3459         } else {
3460             const uint8_t* otherContents = __CFStrContents(otherString);
3461             CFIndex otherNumByte = __CFStrLength2(otherString, otherContents) * (canBeEightbit ? sizeof(uint8_t) : sizeof(UniChar));
3462
3463             if (canBeEightbit || __CFStrIsUnicode(otherString)) {
3464                 memmove(bufPtr, otherContents + __CFStrSkipAnyLengthByte(otherString), otherNumByte);
3465             } else {
3466                 __CFStrConvertBytesToUnicode(otherContents + __CFStrSkipAnyLengthByte(otherString), (UniChar*)bufPtr, __CFStrLength2(otherString, otherContents));
3467             }
3468             bufPtr += otherNumByte;
3469         }
3470     }
3471     if (canBeEightbit) *bufPtr = 0; // NULL byte;
3472
3473     return canBeEightbit ?
3474                 CFStringCreateWithCStringNoCopy(alloc, buffer, __CFStringGetEightBitStringEncoding(), alloc) :
3475                 CFStringCreateWithCharactersNoCopy(alloc, buffer, numChars, alloc);
3476 }
3477
3478
3479 CFArrayRef CFStringCreateArrayBySeparatingStrings(CFAllocatorRef alloc, CFStringRef string, CFStringRef separatorString) {
3480     CFArrayRef separatorRanges;
3481     CFIndex length = CFStringGetLength(string);
3482     /* No objc dispatch needed here since CFStringCreateArrayWithFindResults() works with both CFString and NSString */
3483     if (!(separatorRanges = CFStringCreateArrayWithFindResults(alloc, string, separatorString, CFRangeMake(0, length), 0))) {
3484         return CFArrayCreate(alloc, (const void**)&string, 1, & kCFTypeArrayCallBacks);
3485     } else {
3486         CFIndex idx;
3487         CFIndex count = CFArrayGetCount(separatorRanges);
3488         CFIndex startIndex = 0;
3489         CFIndex numChars;
3490         CFMutableArrayRef array = CFArrayCreateMutable(alloc, count + 2, & kCFTypeArrayCallBacks);
3491         const CFRange *currentRange;
3492         CFStringRef substring;
3493
3494         for (idx = 0;idx < count;idx++) {
3495             currentRange = CFArrayGetValueAtIndex(separatorRanges, idx);
3496             numChars = currentRange->location - startIndex;
3497             substring = CFStringCreateWithSubstring(alloc, string, CFRangeMake(startIndex, numChars));
3498             CFArrayAppendValue(array, substring);
3499             CFRelease(substring);
3500             startIndex = currentRange->location + currentRange->length;
3501         }
3502         substring = CFStringCreateWithSubstring(alloc, string, CFRangeMake(startIndex, length - startIndex));
3503         CFArrayAppendValue(array, substring);
3504         CFRelease(substring);
3505
3506         CFRelease(separatorRanges);
3507
3508         return array;
3509     }
3510 }
3511
3512 CFStringRef CFStringCreateFromExternalRepresentation(CFAllocatorRef alloc, CFDataRef data, CFStringEncoding encoding) {
3513     return CFStringCreateWithBytes(alloc, CFDataGetBytePtr(data), CFDataGetLength(data), encoding, true);
3514 }
3515
3516
3517 CFDataRef CFStringCreateExternalRepresentation(CFAllocatorRef alloc, CFStringRef string, CFStringEncoding encoding, uint8_t lossByte) {
3518     CFIndex length;
3519     CFIndex guessedByteLength;
3520     uint8_t *bytes;
3521     CFIndex usedLength;
3522     SInt32 result;
3523
3524     if (CF_IS_OBJC(__kCFStringTypeID, string)) {        /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
3525         length = CFStringGetLength(string);
3526     } else {
3527         __CFAssertIsString(string);
3528         length = __CFStrLength(string);
3529         if (__CFStrIsEightBit(string) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string
3530             return CFDataCreate(alloc, ((char *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string)), __CFStrLength(string));
3531         }
3532     }
3533
3534     if (alloc == NULL) alloc = __CFGetDefaultAllocator();
3535
3536     if (encoding == kCFStringEncodingUnicode) {
3537         guessedByteLength = (length + 1) * sizeof(UniChar);
3538     } else if (((guessedByteLength = CFStringGetMaximumSizeForEncoding(length, encoding)) > length) && !CF_IS_OBJC(__kCFStringTypeID, string)) { // Multi byte encoding
3539 #if defined(__MACH__) || defined(__LINUX__) || defined(__FREEBSD__)
3540         if (__CFStrIsUnicode(string)) {
3541             guessedByteLength = CFStringEncodingByteLengthForCharacters(encoding, kCFStringEncodingPrependBOM, __CFStrContents(string), __CFStrLength(string));
3542         } else {
3543 #endif
3544         result = __CFStringEncodeByteStream(string, 0, length, true, encoding, lossByte, NULL, 0x7FFFFFFF, &guessedByteLength);
3545         // if result == length, we always succeed
3546         //   otherwise, if result == 0, we fail
3547         //   otherwise, if there was a lossByte but still result != length, we fail
3548         if ((result != length) && (!result || !lossByte)) return NULL;
3549         if (guessedByteLength == length && __CFStrIsEightBit(string) && __CFStringEncodingIsSupersetOfASCII(encoding)) { // It's all ASCII !!
3550             return CFDataCreate(alloc, ((char *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string)), __CFStrLength(string));
3551         }
3552 #if defined(__MACH__) || defined(__LINUX__) || defined(__FREEBSD__)
3553         }
3554 #endif
3555     }
3556     bytes = CFAllocatorAllocate(alloc, guessedByteLength, 0);
3557     if (__CFOASafe) __CFSetLastAllocationEventName(bytes, "CFData (store)");
3558
3559     result = __CFStringEncodeByteStream(string, 0, length, true, encoding, lossByte, bytes, guessedByteLength, &usedLength);
3560
3561     if ((result != length) && (!result || !lossByte)) {         // see comment above about what this means
3562         CFAllocatorDeallocate(alloc, bytes);
3563         return NULL;
3564     }
3565
3566     return CFDataCreateWithBytesNoCopy(alloc, (char const *)bytes, usedLength, alloc);
3567 }
3568
3569
3570 CFStringEncoding CFStringGetSmallestEncoding(CFStringRef str) {
3571     CFIndex len;
3572     CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, CFStringEncoding, str, "_smallestEncodingInCFStringEncoding");
3573     __CFAssertIsString(str);
3574
3575     if (__CFStrIsEightBit(str)) return __CFStringGetEightBitStringEncoding();
3576     len = __CFStrLength(str);
3577     if (__CFStringEncodeByteStream(str, 0, len, false, __CFStringGetEightBitStringEncoding(), 0, NULL, 0x7fffffff, NULL) == len) return __CFStringGetEightBitStringEncoding();
3578     if ((__CFStringGetEightBitStringEncoding() != __CFStringGetSystemEncoding()) && (__CFStringEncodeByteStream(str, 0, len, false, __CFStringGetSystemEncoding(), 0, NULL, 0x7fffffff, NULL) == len)) return __CFStringGetSystemEncoding();
3579     return kCFStringEncodingUnicode;    /* ??? */
3580 }
3581
3582
3583 CFStringEncoding CFStringGetFastestEncoding(CFStringRef str) {
3584     CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, CFStringEncoding, str, "_fastestEncodingInCFStringEncoding");
3585     __CFAssertIsString(str);
3586     return __CFStrIsEightBit(str) ? __CFStringGetEightBitStringEncoding() : kCFStringEncodingUnicode;   /* ??? */
3587 }
3588
3589
3590 SInt32 CFStringGetIntValue(CFStringRef str) {
3591     Boolean success;
3592     SInt32 result;
3593     SInt32 idx = 0;
3594     CFStringInlineBuffer buf;
3595     CFStringInitInlineBuffer(str, &buf, CFRangeMake(0, CFStringGetLength(str)));
3596     success = __CFStringScanInteger(&buf, NULL, &idx, false, &result);
3597     return success ? result : 0;
3598 }
3599
3600
3601 double CFStringGetDoubleValue(CFStringRef str) {
3602     Boolean success;
3603     double result;
3604     SInt32 idx = 0;
3605     CFStringInlineBuffer buf;
3606     CFStringInitInlineBuffer(str, &buf, CFRangeMake(0, CFStringGetLength(str)));
3607     success = __CFStringScanDouble(&buf, NULL, &idx, &result);
3608     return success ? result : 0.0;
3609 }
3610
3611
3612 /*** Mutable functions... ***/
3613
3614 void CFStringSetExternalCharactersNoCopy(CFMutableStringRef string, UniChar *chars, CFIndex length, CFIndex capacity) {
3615     __CFAssertIsNotNegative(length);
3616     __CFAssertIsStringAndExternalMutable(string);
3617     CFAssert4((length <= capacity) && ((capacity == 0) || ((capacity > 0) && chars)), __kCFLogAssertion, "%s(): Invalid args: characters %p length %d capacity %d", __PRETTY_FUNCTION__, chars, length, capacity);
3618     __CFStrSetContentPtr(string, chars);
3619     __CFStrSetExplicitLength(string, length);
3620     __CFStrSetCapacity(string, capacity * sizeof(UniChar));
3621     __CFStrSetCapacityProvidedExternally(string);
3622 }
3623
3624
3625
3626 void CFStringInsert(CFMutableStringRef str, CFIndex idx, CFStringRef insertedStr) {
3627     CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID, void, str, "insertString:atIndex:", insertedStr, idx);
3628     __CFAssertIsStringAndMutable(str);
3629     CFAssert3(idx >= 0 && idx <= __CFStrLength(str), __kCFLogAssertion, "%s(): string index %d out of bounds (length %d)", __PRETTY_FUNCTION__, idx, __CFStrLength(str));
3630     __CFStringReplace(str, CFRangeMake(idx, 0), insertedStr);
3631 }
3632
3633
3634 void CFStringDelete(CFMutableStringRef str, CFRange range) {
3635     CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, str, "deleteCharactersInRange:", range);
3636     __CFAssertIsStringAndMutable(str);
3637     __CFAssertRangeIsInStringBounds(str, range.location, range.length);
3638     __CFStringChangeSize(str, range, 0, false);
3639 }
3640
3641
3642 void CFStringReplace(CFMutableStringRef str, CFRange range, CFStringRef replacement) {
3643     CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID, void, str, "replaceCharactersInRange:withString:", range, replacement);
3644     __CFAssertIsStringAndMutable(str);
3645     __CFAssertRangeIsInStringBounds(str, range.location, range.length);
3646     __CFStringReplace(str, range, replacement);
3647 }
3648
3649
3650 void CFStringReplaceAll(CFMutableStringRef str, CFStringRef replacement) {
3651     CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, str, "setString:", replacement);
3652     __CFAssertIsStringAndMutable(str);
3653     __CFStringReplace(str, CFRangeMake(0, __CFStrLength(str)), replacement);
3654 }
3655
3656
3657 void CFStringAppend(CFMutableStringRef str, CFStringRef appended) {
3658     CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, str, "appendString:", appended);
3659     __CFAssertIsStringAndMutable(str);
3660     __CFStringReplace(str, CFRangeMake(__CFStrLength(str), 0), appended);
3661 }
3662
3663
3664 void CFStringAppendCharacters(CFMutableStringRef str, const UniChar *chars, CFIndex appendedLength) {
3665     CFIndex strLength, idx;
3666
3667     __CFAssertIsNotNegative(appendedLength);
3668
3669     CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID, void, str, "appendCharacters:length:", chars, appendedLength);
3670
3671     __CFAssertIsStringAndMutable(str);
3672
3673     strLength = __CFStrLength(str);
3674     if (__CFStringGetCompatibility(Bug2967272) || __CFStrIsUnicode(str)) {
3675         __CFStringChangeSize(str, CFRangeMake(strLength, 0), appendedLength, true);
3676         memmove((UniChar *)__CFStrContents(str) + strLength, chars, appendedLength * sizeof(UniChar));
3677     } else {
3678         uint8_t *contents;
3679         bool isASCII = true;
3680         for (idx = 0; isASCII && idx < appendedLength; idx++) isASCII = (chars[idx] < 0x80);
3681         __CFStringChangeSize(str, CFRangeMake(strLength, 0), appendedLength, !isASCII);
3682         if (!isASCII) {
3683             memmove((UniChar *)__CFStrContents(str) + strLength, chars, appendedLength * sizeof(UniChar));
3684         } else {
3685             contents = (uint8_t *)__CFStrContents(str) + strLength + __CFStrSkipAnyLengthByte(str);
3686             for (idx = 0; idx < appendedLength; idx++) contents[idx] = (uint8_t)chars[idx];
3687         }
3688     }
3689 }
3690
3691
3692 static void __CFStringAppendBytes(CFMutableStringRef str, const char *cStr, CFIndex appendedLength, CFStringEncoding encoding) {
3693     Boolean appendedIsUnicode = false;
3694     Boolean freeCStrWhenDone = false;
3695     Boolean demoteAppendedUnicode = false;
3696     CFVarWidthCharBuffer vBuf;
3697
3698     __CFAssertIsNotNegative(appendedLength);
3699
3700     if (encoding == kCFStringEncodingASCII || encoding == __CFStringGetEightBitStringEncoding()) {
3701         // appendedLength now denotes length in UniChars
3702     } else if (encoding == kCFStringEncodingUnicode) {
3703         UniChar *chars = (UniChar *)cStr;
3704         CFIndex idx, length = appendedLength / sizeof(UniChar);
3705         bool isASCII = true;
3706         for (idx = 0; isASCII && idx < length; idx++) isASCII = (chars[idx] < 0x80);
3707         if (!isASCII) {
3708             appendedIsUnicode = true;
3709         } else {
3710             demoteAppendedUnicode = true;
3711         }
3712         appendedLength = length;
3713     } else {
3714         Boolean usingPassedInMemory = false;
3715
3716         vBuf.allocator = __CFGetDefaultAllocator();     // We don't want to use client's allocator for temp stuff
3717         vBuf.chars.unicode = NULL;      // This will cause the decode function to allocate memory if necessary
3718
3719         if (!__CFStringDecodeByteStream3(cStr, appendedLength, encoding, __CFStrIsUnicode(str), &vBuf, &usingPassedInMemory, 0)) {
3720             CFAssert1(0, __kCFLogAssertion, "Supplied bytes could not be converted specified encoding %d", encoding);
3721             return;
3722         }
3723
3724         // If not ASCII, appendedLength now denotes length in UniChars
3725         appendedLength = vBuf.numChars;
3726         appendedIsUnicode = !vBuf.isASCII;
3727         cStr = vBuf.chars.ascii;
3728         freeCStrWhenDone = !usingPassedInMemory && vBuf.shouldFreeChars;
3729     }
3730
3731     if (CF_IS_OBJC(__kCFStringTypeID, str)) {
3732         if (!appendedIsUnicode && !demoteAppendedUnicode) {
3733             CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID, void, str, "_cfAppendCString:length:", cStr, appendedLength);
3734         } else {
3735             CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID, void, str, "appendCharacters:length:", cStr, appendedLength);
3736         }
3737     } else {
3738         CFIndex strLength;
3739         __CFAssertIsStringAndMutable(str);
3740         strLength = __CFStrLength(str);
3741
3742         __CFStringChangeSize(str, CFRangeMake(strLength, 0), appendedLength, appendedIsUnicode || __CFStrIsUnicode(str));
3743
3744         if (__CFStrIsUnicode(str)) {
3745             UniChar *contents = (UniChar *)__CFStrContents(str);
3746             if (appendedIsUnicode) {
3747                 memmove(contents + strLength, cStr, appendedLength * sizeof(UniChar));
3748             } else {
3749                 __CFStrConvertBytesToUnicode(cStr, contents + strLength, appendedLength);
3750             }
3751         } else {
3752             if (demoteAppendedUnicode) {
3753                 UniChar *chars = (UniChar *)cStr;
3754                 CFIndex idx;
3755                 uint8_t *contents = (uint8_t *)__CFStrContents(str) + strLength + __CFStrSkipAnyLengthByte(str);
3756                 for (idx = 0; idx < appendedLength; idx++) contents[idx] = (uint8_t)chars[idx];
3757             } else {
3758                 uint8_t *contents = (uint8_t *)__CFStrContents(str);
3759                 memmove(contents + strLength + __CFStrSkipAnyLengthByte(str), cStr, appendedLength);
3760             }
3761         }
3762     }
3763
3764     if (freeCStrWhenDone) CFAllocatorDeallocate(__CFGetDefaultAllocator(), (void *)cStr);
3765 }
3766
3767 void CFStringAppendPascalString(CFMutableStringRef str, ConstStringPtr pStr, CFStringEncoding encoding) {
3768     __CFStringAppendBytes(str, pStr + 1, (CFIndex)*pStr, encoding);
3769 }
3770
3771 void CFStringAppendCString(CFMutableStringRef str, const char *cStr, CFStringEncoding encoding) {
3772     __CFStringAppendBytes(str, cStr, strlen(cStr), encoding);
3773 }
3774
3775
3776 void CFStringAppendFormat(CFMutableStringRef str, CFDictionaryRef formatOptions, CFStringRef format, ...) {
3777     va_list argList;
3778
3779     va_start(argList, format);
3780     CFStringAppendFormatAndArguments(str, formatOptions, format, argList);
3781     va_end(argList);
3782 }
3783
3784
3785 CFIndex CFStringFindAndReplace(CFMutableStringRef string, CFStringRef stringToFind, CFStringRef replacementString, CFRange rangeToSearch, CFOptionFlags compareOptions) {
3786     CFRange foundRange;
3787     Boolean backwards = compareOptions & kCFCompareBackwards;
3788     UInt32 endIndex = rangeToSearch.location + rangeToSearch.length;
3789 #define MAX_RANGES_ON_STACK (1000 / sizeof(CFRange))
3790     CFRange rangeBuffer[MAX_RANGES_ON_STACK];   // Used to avoid allocating memory
3791     CFRange *ranges = rangeBuffer;
3792     CFIndex foundCount = 0;
3793     CFIndex capacity = MAX_RANGES_ON_STACK;
3794
3795     __CFAssertIsStringAndMutable(string);
3796     __CFAssertRangeIsInStringBounds(string, rangeToSearch.location, rangeToSearch.length);
3797
3798     // Note: This code is very similar to the one in CFStringCreateArrayWithFindResults().
3799     while ((rangeToSearch.length > 0) && CFStringFindWithOptions(string, stringToFind, rangeToSearch, compareOptions, &foundRange)) {
3800         // Determine the next range
3801         if (backwards) {
3802             rangeToSearch.length = foundRange.location - rangeToSearch.location;
3803         } else {
3804             rangeToSearch.location = foundRange.location + foundRange.length;
3805             rangeToSearch.length = endIndex - rangeToSearch.location;
3806         }
3807
3808         // If necessary, grow the array
3809         if (foundCount >= capacity) {
3810             bool firstAlloc = (ranges == rangeBuffer) ? true : false;
3811             capacity = (capacity + 4) * 2;
3812             // Note that reallocate with NULL previous pointer is same as allocate
3813             ranges = CFAllocatorReallocate(NULL, firstAlloc ? NULL : ranges, capacity * sizeof(CFRange), 0);
3814             if (firstAlloc) memmove(ranges, rangeBuffer, MAX_RANGES_ON_STACK * sizeof(CFRange));
3815         }
3816         ranges[foundCount] = foundRange;
3817         foundCount++;
3818     }
3819
3820     if (foundCount > 0) {
3821         if (backwards) {        // Reorder the ranges to be incrementing (better to do this here, then to check other places)
3822             int head = 0;
3823             int tail = foundCount - 1;
3824             while (head < tail) {
3825                 CFRange temp = ranges[head];
3826                 ranges[head] = ranges[tail];
3827                 ranges[tail] = temp;
3828                 head++;
3829                 tail--;
3830             }
3831         }
3832         __CFStringReplaceMultiple(string, ranges, foundCount, replacementString);
3833         if (ranges != rangeBuffer) CFAllocatorDeallocate(NULL, ranges);
3834     }
3835
3836     return foundCount;
3837 }
3838
3839
3840 // This function is here for NSString purposes
3841 // It allows checking for mutability before mutating; this allows NSString to catch invalid mutations
3842
3843 int __CFStringCheckAndReplace(CFMutableStringRef str, CFRange range, CFStringRef replacement) {
3844     if (!__CFStrIsMutable(str)) return _CFStringErrNotMutable;  // These three ifs are always here, for NSString usage
3845     if (!replacement && __CFStringNoteErrors()) return _CFStringErrNilArg;
3846     // We use unsigneds as that is what NSRanges do; we use uint64_t do make sure the sum doesn't wrap (otherwise we'd need to do 3 separate checks). This allows catching bad ranges as described in 3375535. (-1,1)
3847     if (((uint64_t)((unsigned)range.location)) + ((uint64_t)((unsigned)range.length)) > (uint64_t)__CFStrLength(str) && __CFStringNoteErrors()) return _CFStringErrBounds;
3848     __CFAssertIsStringAndMutable(str);
3849     __CFAssertRangeIsInStringBounds(str, range.location, range.length);
3850     __CFStringReplace(str, range, replacement);
3851     return _CFStringErrNone;
3852 }
3853
3854 // This function determines whether errors which would cause string exceptions should
3855 // be ignored or not
3856
3857 Boolean __CFStringNoteErrors(void) {
3858     return _CFExecutableLinkedOnOrAfter(CFSystemVersionJaguar) ? true : false;
3859 }
3860
3861
3862
3863 void CFStringPad(CFMutableStringRef string, CFStringRef padString, CFIndex length, CFIndex indexIntoPad) {
3864     CFIndex originalLength;
3865
3866     __CFAssertIsNotNegative(length);
3867     __CFAssertIsNotNegative(indexIntoPad);
3868
3869     CF_OBJC_FUNCDISPATCH3(__kCFStringTypeID, void, string, "_cfPad:length:padIndex:", padString, length, indexIntoPad);
3870
3871     __CFAssertIsStringAndMutable(string);
3872
3873     originalLength = __CFStrLength(string);
3874     if (length < originalLength) {
3875         __CFStringChangeSize(string, CFRangeMake(length, originalLength - length), 0, false);
3876     } else if (originalLength < length) {
3877         uint8_t *contents;
3878         Boolean isUnicode;
3879         CFIndex charSize;
3880         CFIndex padStringLength;
3881         CFIndex padLength;
3882         CFIndex padRemaining = length - originalLength;
3883
3884         if (CF_IS_OBJC(__kCFStringTypeID, padString)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
3885             padStringLength = CFStringGetLength(padString);
3886             isUnicode = true;   /* !!! Bad for now */
3887         } else {
3888             __CFAssertIsString(padString);
3889             padStringLength = __CFStrLength(padString);
3890             isUnicode = __CFStrIsUnicode(string) || __CFStrIsUnicode(padString);
3891         }
3892
3893         charSize = isUnicode ? sizeof(UniChar) : sizeof(uint8_t);
3894
3895         __CFStringChangeSize(string, CFRangeMake(originalLength, 0), padRemaining, isUnicode);
3896
3897         contents = (uint8_t*)__CFStrContents(string) + charSize * originalLength + __CFStrSkipAnyLengthByte(string);
3898         padLength = padStringLength - indexIntoPad;
3899         padLength = padRemaining < padLength ? padRemaining : padLength;
3900
3901         while (padRemaining > 0) {
3902             if (isUnicode) {
3903                 CFStringGetCharacters(padString, CFRangeMake(indexIntoPad, padLength), (UniChar*)contents);
3904             } else {
3905                 CFStringGetBytes(padString, CFRangeMake(indexIntoPad, padLength), __CFStringGetEightBitStringEncoding(), 0, false, contents, padRemaining * charSize, NULL);
3906             }
3907             contents += padLength * charSize;
3908             padRemaining -= padLength;
3909             indexIntoPad = 0;
3910             padLength = padRemaining < padLength ? padRemaining : padStringLength;
3911         }
3912     }
3913 }
3914
3915 void CFStringTrim(CFMutableStringRef string, CFStringRef trimString) {
3916     CFRange range;
3917     CFIndex newStartIndex;
3918     CFIndex length;
3919
3920     CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, string, "_cfTrim:", trimString);
3921
3922     __CFAssertIsStringAndMutable(string);
3923     __CFAssertIsString(trimString);
3924
3925     newStartIndex = 0;
3926     length = __CFStrLength(string);
3927
3928     while (CFStringFindWithOptions(string, trimString, CFRangeMake(newStartIndex, length - newStartIndex), kCFCompareAnchored, &range)) {
3929         newStartIndex = range.location + range.length;
3930     }
3931
3932     if (newStartIndex < length) {
3933         CFIndex charSize = __CFStrIsUnicode(string) ? sizeof(UniChar) : sizeof(uint8_t);
3934         uint8_t *contents = (uint8_t*)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
3935
3936         length -= newStartIndex;
3937         if (__CFStrLength(trimString) < length) {
3938             while (CFStringFindWithOptions(string, trimString, CFRangeMake(newStartIndex, length), kCFCompareAnchored|kCFCompareBackwards, &range)) {
3939                 length = range.location - newStartIndex;
3940             }
3941         }
3942         memmove(contents, contents + newStartIndex * charSize, length * charSize);
3943         __CFStringChangeSize(string, CFRangeMake(length, __CFStrLength(string) - length), 0, false);
3944     } else { // Only trimString in string, trim all
3945         __CFStringChangeSize(string, CFRangeMake(0, length), 0, false);
3946     }
3947 }
3948
3949 void CFStringTrimWhitespace(CFMutableStringRef string) {
3950     CFIndex newStartIndex;
3951     CFIndex length;
3952     CFStringInlineBuffer buffer;
3953
3954     CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, void, string, "_cfTrimWS");
3955
3956     __CFAssertIsStringAndMutable(string);
3957
3958     newStartIndex = 0;
3959     length = __CFStrLength(string);
3960
3961     CFStringInitInlineBuffer(string, &buffer, CFRangeMake(0, length));
3962     CFIndex buffer_idx = 0;
3963
3964     while (buffer_idx < length && CFUniCharIsMemberOf(__CFStringGetCharacterFromInlineBufferQuick(&buffer, buffer_idx), kCFUniCharWhitespaceAndNewlineCharacterSet))
3965         buffer_idx++;
3966     newStartIndex = buffer_idx;
3967
3968     if (newStartIndex < length) {
3969         uint8_t *contents = (uint8_t*)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
3970         CFIndex charSize = (__CFStrIsUnicode(string) ? sizeof(UniChar) : sizeof(uint8_t));
3971
3972         buffer_idx = length - 1;
3973         while (0 <= buffer_idx && CFUniCharIsMemberOf(__CFStringGetCharacterFromInlineBufferQuick(&buffer, buffer_idx), kCFUniCharWhitespaceAndNewlineCharacterSet))
3974             buffer_idx--;
3975         length = buffer_idx - newStartIndex + 1;
3976
3977         memmove(contents, contents + newStartIndex * charSize, length * charSize);
3978         __CFStringChangeSize(string, CFRangeMake(length, __CFStrLength(string) - length), 0, false);
3979     } else { // Whitespace only string
3980         __CFStringChangeSize(string, CFRangeMake(0, length), 0, false);
3981     }
3982 }
3983
3984 void CFStringLowercase(CFMutableStringRef string, CFLocaleRef locale) {
3985     CFIndex currentIndex = 0;
3986     CFIndex length;
3987     const char *langCode;
3988     Boolean isEightBit = __CFStrIsEightBit(string);
3989
3990     CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, string, "_cfLowercase:", locale);
3991
3992     __CFAssertIsStringAndMutable(string);
3993
3994     length = __CFStrLength(string);
3995
3996     langCode = (_CFCanUseLocale(locale) ? _CFStrGetLanguageIdentifierForLocale(locale) : NULL);
3997
3998     if (!langCode && isEightBit) {
3999         uint8_t *contents = (uint8_t*)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4000         for (;currentIndex < length;currentIndex++) {
4001             if (contents[currentIndex] >= 'A' && contents[currentIndex] <= 'Z') {
4002                 contents[currentIndex] += 'a' - 'A';
4003             } else if (contents[currentIndex] > 127) {
4004                 break;
4005             }
4006         }
4007     }
4008
4009     if (currentIndex < length) {
4010         UniChar *contents;
4011         UniChar mappedCharacters[MAX_CASE_MAPPING_BUF];
4012         CFIndex mappedLength;
4013         UTF32Char currentChar;
4014         UInt32 flags = 0;
4015
4016         if (isEightBit) __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true);
4017
4018         contents = (UniChar*)__CFStrContents(string);
4019
4020         for (;currentIndex < length;currentIndex++) {
4021
4022             if (CFUniCharIsSurrogateHighCharacter(contents[currentIndex]) && (currentIndex + 1 < length) && CFUniCharIsSurrogateLowCharacter(contents[currentIndex + 1])) {
4023                 currentChar = CFUniCharGetLongCharacterForSurrogatePair(contents[currentIndex], contents[currentIndex + 1]);
4024             } else {
4025                 currentChar = contents[currentIndex];
4026             }
4027             flags = ((langCode || (currentChar == 0x03A3)) ? CFUniCharGetConditionalCaseMappingFlags(currentChar, contents, currentIndex, length, kCFUniCharToLowercase, langCode, flags) : 0);
4028
4029             mappedLength = CFUniCharMapCaseTo(currentChar, mappedCharacters, MAX_CASE_MAPPING_BUF, kCFUniCharToLowercase, flags, langCode);
4030             if (mappedLength > 0) contents[currentIndex] = *mappedCharacters;
4031
4032             if (currentChar > 0xFFFF) { // Non-BMP char
4033                 switch (mappedLength) {
4034                     case 0:
4035                     __CFStringChangeSize(string, CFRangeMake(currentIndex, 2), 0, true);
4036                     contents = (UniChar*)__CFStrContents(string);
4037                     length -= 2;
4038                     break;
4039
4040                     case 1:
4041                     __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 1), 0, true);
4042                     contents = (UniChar*)__CFStrContents(string);
4043                     --length;
4044                     break;
4045
4046                     case 2:
4047                     contents[++currentIndex] = mappedCharacters[1];
4048                     break;
4049
4050                     default:
4051                     --mappedLength; // Skip the current char
4052                     __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength - 1, true);
4053                     contents = (UniChar*)__CFStrContents(string);
4054                     memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4055                     length += (mappedLength - 1);
4056                     currentIndex += mappedLength;
4057                     break;
4058                 }
4059             } else if (mappedLength == 0) {
4060                 __CFStringChangeSize(string, CFRangeMake(currentIndex, 1), 0, true);
4061                 contents = (UniChar*)__CFStrContents(string);
4062                 --length;
4063             } else if (mappedLength > 1) {
4064                 --mappedLength; // Skip the current char
4065                 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength, true);
4066                 contents = (UniChar*)__CFStrContents(string);
4067                 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4068                 length += mappedLength;
4069                 currentIndex += mappedLength;
4070             }
4071         }
4072     }
4073 }
4074
4075 void CFStringUppercase(CFMutableStringRef string, CFLocaleRef locale) {
4076     CFIndex currentIndex = 0;
4077     CFIndex length;
4078     const char *langCode;
4079     Boolean isEightBit = __CFStrIsEightBit(string);
4080
4081     CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, string, "_cfUppercase:", locale);
4082
4083     __CFAssertIsStringAndMutable(string);
4084
4085     length = __CFStrLength(string);
4086
4087     langCode = (_CFCanUseLocale(locale) ? _CFStrGetLanguageIdentifierForLocale(locale) : NULL);
4088
4089     if (!langCode && isEightBit) {
4090         uint8_t *contents = (uint8_t*)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4091         for (;currentIndex < length;currentIndex++) {
4092             if (contents[currentIndex] >= 'a' && contents[currentIndex] <= 'z') {
4093                 contents[currentIndex] -= 'a' - 'A';
4094             } else if (contents[currentIndex] > 127) {
4095                 break;
4096             }
4097         }
4098     }
4099
4100     if (currentIndex < length) {
4101         UniChar *contents;
4102         UniChar mappedCharacters[MAX_CASE_MAPPING_BUF];
4103         CFIndex mappedLength;
4104         UTF32Char currentChar;
4105         UInt32 flags = 0;
4106
4107         if (isEightBit) __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true);
4108
4109         contents = (UniChar*)__CFStrContents(string);
4110
4111         for (;currentIndex < length;currentIndex++) {
4112             if (CFUniCharIsSurrogateHighCharacter(contents[currentIndex]) && (currentIndex + 1 < length) && CFUniCharIsSurrogateLowCharacter(contents[currentIndex + 1])) {
4113                 currentChar = CFUniCharGetLongCharacterForSurrogatePair(contents[currentIndex], contents[currentIndex + 1]);
4114             } else {
4115                 currentChar = contents[currentIndex];
4116             }
4117
4118             flags = (langCode ? CFUniCharGetConditionalCaseMappingFlags(currentChar, contents, currentIndex, length, kCFUniCharToUppercase, langCode, flags) : 0);
4119
4120             mappedLength = CFUniCharMapCaseTo(currentChar, mappedCharacters, MAX_CASE_MAPPING_BUF, kCFUniCharToUppercase, flags, langCode);
4121             if (mappedLength > 0) contents[currentIndex] = *mappedCharacters;
4122
4123             if (currentChar > 0xFFFF) { // Non-BMP char
4124                 switch (mappedLength) {
4125                     case 0:
4126                     __CFStringChangeSize(string, CFRangeMake(currentIndex, 2), 0, true);
4127                     contents = (UniChar*)__CFStrContents(string);
4128                     length -= 2;
4129                     break;
4130
4131                     case 1:
4132                     __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 1), 0, true);
4133                     contents = (UniChar*)__CFStrContents(string);
4134                     --length;
4135                     break;
4136
4137                     case 2:
4138                     contents[++currentIndex] = mappedCharacters[1];
4139                     break;
4140
4141                     default:
4142                     --mappedLength; // Skip the current char
4143                     __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength - 1, true);
4144                     contents = (UniChar*)__CFStrContents(string);
4145                     memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4146                     length += (mappedLength - 1);
4147                     currentIndex += mappedLength;
4148                     break;
4149                 }
4150             } else if (mappedLength == 0) {
4151                 __CFStringChangeSize(string, CFRangeMake(currentIndex, 1), 0, true);
4152                 contents = (UniChar*)__CFStrContents(string);
4153                 --length;
4154             } else if (mappedLength > 1) {
4155                 --mappedLength; // Skip the current char
4156                 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength, true);
4157                 contents = (UniChar*)__CFStrContents(string);
4158                 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4159                 length += mappedLength;
4160                 currentIndex += mappedLength;
4161             }
4162         }
4163     }
4164 }
4165
4166
4167 void CFStringCapitalize(CFMutableStringRef string, CFLocaleRef locale) {
4168     CFIndex currentIndex = 0;
4169     CFIndex length;
4170     const char *langCode;
4171     Boolean isEightBit = __CFStrIsEightBit(string);
4172     Boolean isLastCased = false;
4173     static const uint8_t *caseIgnorableForBMP = NULL;
4174
4175     CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, string, "_cfCapitalize:", locale);
4176
4177     __CFAssertIsStringAndMutable(string);
4178
4179     length = __CFStrLength(string);
4180
4181     if (NULL == caseIgnorableForBMP) caseIgnorableForBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharCaseIgnorableCharacterSet, 0);
4182
4183     langCode = (_CFCanUseLocale(locale) ? _CFStrGetLanguageIdentifierForLocale(locale) : NULL);
4184
4185     if (!langCode && isEightBit) {
4186         uint8_t *contents = (uint8_t*)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4187         for (;currentIndex < length;currentIndex++) {
4188             if (contents[currentIndex] > 127) {
4189                 break;
4190             } else if (contents[currentIndex] >= 'A' && contents[currentIndex] <= 'Z') {
4191                 contents[currentIndex] += (isLastCased ? 'a' - 'A' : 0);
4192                 isLastCased = true;
4193             } else if (contents[currentIndex] >= 'a' && contents[currentIndex] <= 'z') {
4194                 contents[currentIndex] -= (!isLastCased ? 'a' - 'A' : 0);
4195                 isLastCased = true;
4196             } else if (!CFUniCharIsMemberOfBitmap(contents[currentIndex], caseIgnorableForBMP)) {
4197                 isLastCased = false;
4198             }
4199         }
4200     }
4201
4202     if (currentIndex < length) {
4203         UniChar *contents;
4204         UniChar mappedCharacters[MAX_CASE_MAPPING_BUF];
4205         CFIndex mappedLength;
4206         UTF32Char currentChar;
4207         UInt32 flags = 0;
4208
4209         if (isEightBit) __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true);
4210
4211         contents = (UniChar*)__CFStrContents(string);
4212
4213         for (;currentIndex < length;currentIndex++) {
4214             if (CFUniCharIsSurrogateHighCharacter(contents[currentIndex]) && (currentIndex + 1 < length) && CFUniCharIsSurrogateLowCharacter(contents[currentIndex + 1])) {
4215                 currentChar = CFUniCharGetLongCharacterForSurrogatePair(contents[currentIndex], contents[currentIndex + 1]);
4216             } else {
4217                 currentChar = contents[currentIndex];
4218             }
4219             flags = ((langCode || ((currentChar == 0x03A3) && isLastCased)) ? CFUniCharGetConditionalCaseMappingFlags(currentChar, contents, currentIndex, length, (isLastCased ? kCFUniCharToLowercase : kCFUniCharToTitlecase), langCode, flags) : 0);
4220
4221             mappedLength = CFUniCharMapCaseTo(currentChar, mappedCharacters, MAX_CASE_MAPPING_BUF, (isLastCased ? kCFUniCharToLowercase : kCFUniCharToTitlecase), flags, langCode);
4222             if (mappedLength > 0) contents[currentIndex] = *mappedCharacters;
4223
4224             if (currentChar > 0xFFFF) { // Non-BMP char
4225                 switch (mappedLength) {
4226                     case 0:
4227                     __CFStringChangeSize(string, CFRangeMake(currentIndex, 2), 0, true);
4228                     contents = (UniChar*)__CFStrContents(string);
4229                     length -= 2;
4230                     break;
4231
4232                     case 1:
4233                     __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 1), 0, true);
4234                     contents = (UniChar*)__CFStrContents(string);
4235                     --length;
4236                     break;
4237
4238                     case 2:
4239                     contents[++currentIndex] = mappedCharacters[1];
4240                     break;
4241
4242                     default:
4243                     --mappedLength; // Skip the current char
4244                     __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength - 1, true);
4245                     contents = (UniChar*)__CFStrContents(string);
4246                     memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4247                     length += (mappedLength - 1);
4248                     currentIndex += mappedLength;
4249                     break;
4250                 }
4251             } else if (mappedLength == 0) {
4252                 __CFStringChangeSize(string, CFRangeMake(currentIndex, 1), 0, true);
4253                 contents = (UniChar*)__CFStrContents(string);
4254                 --length;
4255             } else if (mappedLength > 1) {
4256                 --mappedLength; // Skip the current char
4257                 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength, true);
4258                 contents = (UniChar*)__CFStrContents(string);
4259                 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4260                 length += mappedLength;
4261                 currentIndex += mappedLength;
4262             }
4263
4264             if (!((currentChar > 0xFFFF) ? CFUniCharIsMemberOf(currentChar, kCFUniCharCaseIgnorableCharacterSet) : CFUniCharIsMemberOfBitmap(currentChar, caseIgnorableForBMP))) { // We have non-caseignorable here
4265                 isLastCased = ((CFUniCharIsMemberOf(currentChar, kCFUniCharUppercaseLetterCharacterSet) || CFUniCharIsMemberOf(currentChar, kCFUniCharLowercaseLetterCharacterSet)) ? true : false);
4266             }
4267         }
4268     }
4269 }
4270
4271
4272 #define MAX_DECOMP_BUF 64
4273
4274 #define HANGUL_SBASE 0xAC00
4275 #define HANGUL_LBASE 0x1100
4276 #define HANGUL_VBASE 0x1161
4277 #define HANGUL_TBASE 0x11A7
4278 #define HANGUL_SCOUNT 11172
4279 #define HANGUL_LCOUNT 19
4280 #define HANGUL_VCOUNT 21
4281 #define HANGUL_TCOUNT 28
4282 #define HANGUL_NCOUNT (HANGUL_VCOUNT * HANGUL_TCOUNT)
4283
4284 CF_INLINE uint32_t __CFGetUTF16Length(const UTF32Char *characters, uint32_t utf32Length) {
4285     const UTF32Char *limit = characters + utf32Length;
4286     uint32_t length = 0;
4287
4288     while (characters < limit) length += (*(characters++) > 0xFFFF ? 2 : 1);
4289
4290     return length;
4291 }
4292
4293 CF_INLINE void __CFFillInUTF16(const UTF32Char *characters, UTF16Char *dst, uint32_t utf32Length) {
4294     const UTF32Char *limit = characters + utf32Length;
4295     UTF32Char currentChar;
4296
4297     while (characters < limit) {
4298         currentChar = *(characters++);
4299         if (currentChar > 0xFFFF) {
4300             currentChar -= 0x10000;
4301             *(dst++) = (UTF16Char)((currentChar >> 10) + 0xD800UL);
4302             *(dst++) = (UTF16Char)((currentChar & 0x3FF) + 0xDC00UL);
4303         } else {
4304             *(dst++) = currentChar;
4305         }
4306     }
4307 }
4308
4309 void CFStringNormalize(CFMutableStringRef string, CFStringNormalizationForm theForm) {
4310     CFIndex currentIndex = 0;
4311     CFIndex length;
4312     bool needToReorder = true;
4313
4314     CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, string, "_cfNormalize:", theForm);
4315
4316     __CFAssertIsStringAndMutable(string);
4317
4318     length = __CFStrLength(string);
4319
4320     if (__CFStrIsEightBit(string)) {
4321         uint8_t *contents;
4322
4323         if (theForm == kCFStringNormalizationFormC) return; // 8bit form has no decomposition
4324
4325         contents = (uint8_t*)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4326
4327         for (;currentIndex < length;currentIndex++) {
4328             if (contents[currentIndex] > 127) {
4329                 __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true); // need to do harm way
4330                 needToReorder = false;
4331                 break;
4332             }
4333         }
4334     }
4335
4336     if (currentIndex < length) {
4337         UTF16Char *limit = (UTF16Char *)__CFStrContents(string) + length;
4338         UTF16Char *contents = (UTF16Char *)__CFStrContents(string) + currentIndex;
4339         UTF32Char buffer[MAX_DECOMP_BUF];
4340         UTF32Char *mappedCharacters = buffer;
4341         CFIndex allocatedLength = MAX_DECOMP_BUF;
4342         CFIndex mappedLength;
4343         CFIndex currentLength;
4344         UTF32Char currentChar;
4345
4346         while (contents < limit) {
4347             if (CFUniCharIsSurrogateHighCharacter(*contents) && (contents + 1 < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) {
4348                 currentChar = CFUniCharGetLongCharacterForSurrogatePair(*contents, *(contents + 1));
4349                 currentLength = 2;
4350                 contents += 2;
4351             } else {
4352                 currentChar = *(contents++);
4353                 currentLength = 1;
4354             }
4355
4356             mappedLength = 0;
4357
4358             if (CFUniCharIsMemberOf(currentChar, kCFUniCharCanonicalDecomposableCharacterSet) && !CFUniCharIsMemberOf(currentChar, kCFUniCharNonBaseCharacterSet)) {
4359                 if ((theForm & kCFStringNormalizationFormC) == 0 || currentChar < HANGUL_SBASE || currentChar > (HANGUL_SBASE + HANGUL_SCOUNT)) { // We don't have to decompose Hangul Syllables if we're precomposing again
4360                     mappedLength = CFUniCharDecomposeCharacter(currentChar, mappedCharacters, MAX_DECOMP_BUF);
4361                 }
4362             }
4363
4364             if ((needToReorder || (theForm & kCFStringNormalizationFormC)) && ((contents < limit) || (mappedLength == 0))) {
4365                 if (mappedLength > 0) {
4366                     if (CFUniCharIsSurrogateHighCharacter(*contents) && (contents + 1 < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) {
4367                         currentChar = CFUniCharGetLongCharacterForSurrogatePair(*contents, *(contents + 1));
4368                     } else {
4369                         currentChar = *contents;
4370                     }
4371                 }
4372
4373                 if (CFUniCharIsMemberOf(currentChar, kCFUniCharNonBaseCharacterSet)) {
4374                     uint32_t decompLength;
4375
4376                     if (mappedLength == 0) {
4377                         contents -= (currentChar & 0xFFFF0000 ? 2 : 1);
4378                         if (currentIndex > 0) {
4379                             if (CFUniCharIsSurrogateLowCharacter(*(contents - 1)) && (currentIndex > 1) && CFUniCharIsSurrogateHighCharacter(*(contents - 2))) {
4380                                 *mappedCharacters = CFUniCharGetLongCharacterForSurrogatePair(*(contents - 2), *(contents - 1));
4381                                 currentIndex -= 2;
4382                                 currentLength += 2;
4383                             } else {
4384                                 *mappedCharacters = *(contents - 1);
4385                                 --currentIndex;
4386                                 ++currentLength;
4387                             }
4388                             mappedLength = 1;
4389                         }
4390                     } else {
4391                         currentLength += (currentChar & 0xFFFF0000 ? 2 : 1);
4392                     }
4393                     contents += (currentChar & 0xFFFF0000 ? 2 : 1);
4394
4395                     if (CFUniCharIsMemberOf(currentChar, kCFUniCharDecomposableCharacterSet)) { // Vietnamese accent, etc.
4396                         decompLength = CFUniCharDecomposeCharacter(currentChar, mappedCharacters + mappedLength, MAX_DECOMP_BUF - mappedLength);
4397                         mappedLength += decompLength;
4398                     } else {
4399                         mappedCharacters[mappedLength++] = currentChar;
4400                     }
4401
4402                     while (contents < limit) {
4403                         if (CFUniCharIsSurrogateHighCharacter(*contents) && (contents + 1 < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) {
4404                             currentChar = CFUniCharGetLongCharacterForSurrogatePair(*contents, *(contents + 1));
4405                         } else {
4406                             currentChar = *contents;
4407                         }
4408                         if (!CFUniCharIsMemberOf(currentChar, kCFUniCharNonBaseCharacterSet)) break;
4409                         if (currentChar & 0xFFFF0000) {
4410                             contents += 2;
4411                             currentLength += 2;
4412                         } else {
4413                             ++contents;
4414                             ++currentLength;
4415                         }
4416                         if (mappedLength == allocatedLength) {
4417                             allocatedLength += MAX_DECOMP_BUF;
4418                             if (mappedCharacters == buffer) {
4419                                 mappedCharacters = (UTF32Char *)CFAllocatorAllocate(NULL, allocatedLength * sizeof(UTF32Char), 0);
4420                                 memmove(mappedCharacters, buffer, MAX_DECOMP_BUF * sizeof(UTF32Char));
4421                             } else {
4422                                 mappedCharacters = (UTF32Char *)CFAllocatorReallocate(NULL, mappedCharacters, allocatedLength * sizeof(UTF32Char), 0);
4423                             }
4424                         }
4425                         if (CFUniCharIsMemberOf(currentChar, kCFUniCharDecomposableCharacterSet)) { // Vietnamese accent, etc.
4426                             decompLength = CFUniCharDecomposeCharacter(currentChar, mappedCharacters + mappedLength, MAX_DECOMP_BUF - mappedLength);
4427                             mappedLength += decompLength;
4428                         } else {
4429                             mappedCharacters[mappedLength++] = currentChar;
4430                         }
4431                     }
4432                 }
4433                 if (needToReorder && mappedLength > 1) CFUniCharPrioritySort(mappedCharacters, mappedLength);
4434             }
4435
4436             if (theForm & kCFStringNormalizationFormKD) {
4437                 CFIndex newLength = 0;
4438
4439                 if (mappedLength == 0 && CFUniCharIsMemberOf(currentChar, kCFUniCharCompatibilityDecomposableCharacterSet)) {
4440                     mappedCharacters[mappedLength++] = currentChar;
4441                 }
4442                 while (newLength < mappedLength) {
4443                     newLength = CFUniCharCompatibilityDecompose(mappedCharacters, mappedLength, allocatedLength);
4444                     if (newLength == 0) {
4445                         allocatedLength += MAX_DECOMP_BUF;
4446                         if (mappedCharacters == buffer) {
4447                             mappedCharacters = (UTF32Char *)CFAllocatorAllocate(NULL, allocatedLength * sizeof(UTF32Char), 0);
4448                             memmove(mappedCharacters, buffer, MAX_DECOMP_BUF * sizeof(UTF32Char));
4449                         } else {
4450                             mappedCharacters = (UTF32Char *)CFAllocatorReallocate(NULL, mappedCharacters, allocatedLength * sizeof(UTF32Char), 0);
4451                         }
4452                     }
4453                 }
4454                 mappedLength = newLength;
4455             }
4456
4457             if (theForm & kCFStringNormalizationFormC) {
4458                 if (mappedLength > 1) {
4459                     CFIndex consumedLength = 1;
4460                     UTF32Char nextChar;
4461                     UTF32Char *currentBase = mappedCharacters;
4462                     uint8_t currentClass, lastClass = 0;
4463                     const uint8_t *bmpClassTable = CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0);
4464                     bool didCombine = false;
4465
4466                     currentChar = *mappedCharacters;
4467
4468                     while (consumedLength < mappedLength) {
4469                         nextChar = mappedCharacters[consumedLength];
4470                         currentClass = (nextChar & 0xFFFF0000 ? CFUniCharGetUnicodeProperty(nextChar, kCFUniCharCombiningProperty) : CFUniCharGetCombiningPropertyForCharacter(nextChar, bmpClassTable));
4471
4472                         if (theForm & kCFStringNormalizationFormKD) {
4473                             if ((currentChar >= HANGUL_LBASE) && (currentChar < (HANGUL_LBASE + 0xFF))) {
4474                                 SInt8 lIndex = currentChar - HANGUL_LBASE;
4475
4476                                 if ((0 <= lIndex) && (lIndex <= HANGUL_LCOUNT)) {
4477                                     SInt16 vIndex = nextChar - HANGUL_VBASE;
4478
4479                                     if ((vIndex >= 0) && (vIndex <= HANGUL_VCOUNT)) {
4480                                         SInt16 tIndex = 0;
4481                                         CFIndex usedLength = mappedLength;
4482
4483                                         mappedCharacters[consumedLength++] = 0xFFFD;
4484
4485                                         if (consumedLength < mappedLength) {
4486                                             tIndex = mappedCharacters[consumedLength] - HANGUL_TBASE;
4487                                             if ((tIndex < 0) || (tIndex > HANGUL_TCOUNT)) {
4488                                                 tIndex = 0;
4489                                             } else {
4490                                                 mappedCharacters[consumedLength++] = 0xFFFD;
4491                                             }
4492                                         }
4493                                         *currentBase = (lIndex * HANGUL_VCOUNT + vIndex) * HANGUL_TCOUNT + tIndex + HANGUL_SBASE;
4494
4495                                         while (--usedLength > 0) {
4496                                             if (mappedCharacters[usedLength] == 0xFFFD) {
4497                                                 --mappedLength;
4498                                                 --consumedLength;
4499                                                 memmove(mappedCharacters + usedLength, mappedCharacters + usedLength + 1, (mappedLength - usedLength) * sizeof(UTF32Char));
4500                                             }
4501                                         }
4502                                         currentBase = mappedCharacters + consumedLength;
4503                                         currentChar = *currentBase;
4504                                         ++consumedLength;
4505
4506                                         continue;
4507                                     }
4508                                 }
4509                             }
4510                             if (!CFUniCharIsMemberOf(nextChar, kCFUniCharNonBaseCharacterSet)) {
4511                                 *currentBase = currentChar;
4512                                 currentBase = mappedCharacters + consumedLength;
4513                                 currentChar = nextChar;
4514                                 ++consumedLength;
4515                                 continue;
4516                             }
4517                         }
4518                         if ((lastClass == 0) || (currentClass != lastClass)) {
4519                             nextChar = CFUniCharPrecomposeCharacter(currentChar, nextChar);
4520                             if (nextChar == 0xFFFD) {
4521                                 lastClass = currentClass;
4522                             } else {
4523                                 mappedCharacters[consumedLength] = 0xFFFD;
4524                                 didCombine = true;
4525                                 currentChar = nextChar;
4526                                 lastClass = 0;
4527                             }
4528                         }
4529                         ++consumedLength;
4530                     }
4531
4532                     *currentBase = currentChar;
4533                     if (didCombine) {
4534                         consumedLength = mappedLength;
4535                         while (--consumedLength > 0) {
4536                             if (mappedCharacters[consumedLength] == 0xFFFD) {
4537                                 --mappedLength;
4538                                 memmove(mappedCharacters + consumedLength, mappedCharacters + consumedLength + 1, (mappedLength - consumedLength) * sizeof(UTF32Char));
4539                             }
4540                         }
4541                     }
4542                 } else if ((currentChar >= HANGUL_LBASE) && (currentChar < (HANGUL_LBASE + 0xFF))) { // Hangul Jamo
4543                     SInt8 lIndex = currentChar - HANGUL_LBASE;
4544
4545                     if ((contents < limit) && (0 <= lIndex) && (lIndex <= HANGUL_LCOUNT)) {
4546                         SInt16 vIndex = *contents - HANGUL_VBASE;
4547
4548                         if ((vIndex >= 0) && (vIndex <= HANGUL_VCOUNT)) {
4549                             SInt16 tIndex = 0;
4550
4551                             ++contents; ++currentLength;
4552
4553                             if (contents < limit) {
4554                                 tIndex = *contents - HANGUL_TBASE;
4555                                 if ((tIndex < 0) || (tIndex > HANGUL_TCOUNT)) {
4556                                     tIndex = 0;
4557                                 } else {
4558                                     ++contents; ++currentLength;
4559                                 }
4560                             }
4561                             *mappedCharacters = (lIndex * HANGUL_VCOUNT + vIndex) * HANGUL_TCOUNT + tIndex + HANGUL_SBASE;
4562                             mappedLength = 1;
4563                         }
4564                     }
4565                 }
4566             }
4567
4568             if (mappedLength > 0) {
4569                 CFIndex utf16Length = __CFGetUTF16Length(mappedCharacters, mappedLength);
4570
4571                 if (utf16Length != currentLength) {
4572                     __CFStringChangeSize(string, CFRangeMake(currentIndex, currentLength), utf16Length, true);
4573                     currentLength = utf16Length;
4574                 }
4575                 contents = (UTF16Char *)__CFStrContents(string);
4576                 limit = contents + __CFStrLength(string);
4577                 contents += currentIndex;
4578                 __CFFillInUTF16(mappedCharacters, contents, mappedLength);
4579                 contents += utf16Length;
4580             }
4581             currentIndex += currentLength;
4582         }
4583
4584         if (mappedCharacters != buffer) CFAllocatorDeallocate(NULL, mappedCharacters);
4585     }
4586 }
4587
4588
4589 enum {
4590         kCFStringFormatZeroFlag = (1 << 0),     // if not, padding is space char
4591         kCFStringFormatMinusFlag = (1 << 1),    // if not, no flag implied
4592         kCFStringFormatPlusFlag = (1 << 2),     // if not, no flag implied, overrides space
4593         kCFStringFormatSpaceFlag = (1 << 3)     // if not, no flag implied
4594 };
4595
4596 typedef struct {
4597     int16_t size;
4598     int16_t type;
4599     SInt32 loc;
4600     SInt32 len;
4601     SInt32 widthArg;
4602     SInt32 precArg;
4603     uint32_t flags;
4604     int8_t mainArgNum;
4605     int8_t precArgNum;
4606     int8_t widthArgNum;
4607     int8_t unused1;
4608 } CFFormatSpec;
4609
4610 typedef struct {
4611     int16_t type;
4612     int16_t size;
4613     union {
4614         int64_t int64Value;
4615         double doubleValue;
4616         void *pointerValue;
4617     } value;
4618 } CFPrintValue;
4619
4620 enum {
4621     CFFormatDefaultSize = 0,
4622     CFFormatSize1 = 1,
4623     CFFormatSize2 = 2,
4624     CFFormatSize4 = 3,
4625     CFFormatSize8 = 4,
4626     CFFormatSize16 = 5,         /* unused */
4627 };
4628
4629 enum {
4630     CFFormatLiteralType = 32,
4631     CFFormatLongType = 33,
4632     CFFormatDoubleType = 34,
4633     CFFormatPointerType = 35,
4634     CFFormatObjectType = 36,            /* handled specially */ /* ??? not used anymore, can be removed? */
4635     CFFormatCFType = 37,                /* handled specially */
4636     CFFormatUnicharsType = 38,          /* handled specially */
4637     CFFormatCharsType = 39,             /* handled specially */
4638     CFFormatPascalCharsType = 40,       /* handled specially */
4639     CFFormatSingleUnicharType = 41      /* handled specially */
4640 };
4641
4642 CF_INLINE void __CFParseFormatSpec(const UniChar *uformat, const uint8_t *cformat, SInt32 *fmtIdx, SInt32 fmtLen, CFFormatSpec *spec) {
4643     Boolean seenDot = false;
4644     for (;;) {
4645         UniChar ch;
4646         if (fmtLen <= *fmtIdx) return;  /* no type */
4647         if (cformat) ch = (UniChar)cformat[(*fmtIdx)++]; else ch = uformat[(*fmtIdx)++];
4648 reswtch:switch (ch) {
4649         case '#':       // ignored for now
4650             break;
4651         case 0x20:
4652             if (!(spec->flags & kCFStringFormatPlusFlag)) spec->flags |= kCFStringFormatSpaceFlag;
4653             break;
4654         case '-':
4655             spec->flags |= kCFStringFormatMinusFlag;
4656             spec->flags &= ~kCFStringFormatZeroFlag;    // remove zero flag
4657             break;
4658         case '+':
4659             spec->flags |= kCFStringFormatPlusFlag;
4660             spec->flags &= ~kCFStringFormatSpaceFlag;   // remove space flag
4661             break;
4662         case '0':
4663             if (!(spec->flags & kCFStringFormatMinusFlag)) spec->flags |= kCFStringFormatZeroFlag;
4664             break;
4665         case 'h':
4666             spec->size = CFFormatSize2;
4667             break;
4668         case 'l':
4669             if (*fmtIdx < fmtLen) {
4670                 // fetch next character, don't increment fmtIdx
4671                 if (cformat) ch = (UniChar)cformat[(*fmtIdx)]; else ch = uformat[(*fmtIdx)];
4672                 if ('l' == ch) {        // 'll' for long long, like 'q'
4673                     (*fmtIdx)++;
4674                     spec->size = CFFormatSize8;
4675                     break;
4676                 }
4677             }
4678             spec->size = CFFormatSize4;
4679             break;
4680         case 'q':
4681             spec->size = CFFormatSize8;
4682             break;
4683         case 'c':
4684             spec->type = CFFormatLongType;
4685             spec->size = CFFormatSize1;
4686             return;
4687         case 'O': case 'o': case 'D': case 'd': case 'i': case 'U': case 'u': case 'x': case 'X':
4688             spec->type = CFFormatLongType;
4689             return;
4690         case 'e': case 'E': case 'f': case 'g': case 'G':
4691             spec->type = CFFormatDoubleType;
4692             spec->size = CFFormatSize8;
4693             return;
4694         case 'n': case 'p':             /* %n is not handled correctly currently */
4695             spec->type = CFFormatPointerType;
4696             spec->size = CFFormatSize4;
4697             return;
4698         case 's':
4699             spec->type = CFFormatCharsType;
4700             spec->size = CFFormatSize4;
4701             return;
4702         case 'S':
4703             spec->type = CFFormatUnicharsType;
4704             spec->size = CFFormatSize4;
4705             return;
4706         case 'C':
4707             spec->type = CFFormatSingleUnicharType;
4708             spec->size = CFFormatSize2;
4709             return;
4710         case 'P':
4711             spec->type = CFFormatPascalCharsType;
4712             spec->size = CFFormatSize4;
4713             return;
4714         case '@':
4715             spec->type = CFFormatCFType;
4716             spec->size = CFFormatSize4;
4717             return;
4718         case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': {
4719             int64_t number = 0;
4720             do {
4721                 number = 10 * number + (ch - '0');
4722                 if (cformat) ch = (UniChar)cformat[(*fmtIdx)++]; else ch = uformat[(*fmtIdx)++];
4723             } while ((UInt32)(ch - '0') <= 9);
4724             if ('$' == ch) {
4725                 if (-2 == spec->precArgNum) {
4726                     spec->precArgNum = number - 1;      // Arg numbers start from 1
4727                 } else if (-2 == spec->widthArgNum) {
4728                     spec->widthArgNum = number - 1;     // Arg numbers start from 1
4729                 } else {
4730                     spec->mainArgNum = number - 1;      // Arg numbers start from 1
4731                 }
4732                 break;
4733             } else if (seenDot) {       /* else it's either precision or width */
4734                 spec->precArg = (SInt32)number;
4735             } else {
4736                 spec->widthArg = (SInt32)number;
4737             }
4738             goto reswtch;
4739         }
4740         case '*':
4741             spec->widthArgNum = -2;
4742             break;
4743         case '.':
4744             seenDot = true;
4745             if (cformat) ch = (UniChar)cformat[(*fmtIdx)++]; else ch = uformat[(*fmtIdx)++];
4746             if ('*' == ch) {
4747                 spec->precArgNum = -2;
4748                 break;
4749             }
4750             goto reswtch;
4751         default:
4752             spec->type = CFFormatLiteralType;
4753             return;
4754         }
4755     }
4756 }
4757
4758 #if defined(__WIN32__)
4759 static int snprintf(char *b, size_t n, const char * f, ...) {
4760     int retval;
4761     va_list args;
4762     va_start (args, f);
4763     retval = _vsnprintf(b, n, f, args);
4764     va_end(args);
4765     return retval;
4766 }
4767 #endif
4768
4769 /* ??? It ignores the formatOptions argument.
4770    ??? %s depends on handling of encodings by __CFStringAppendBytes
4771 */
4772 void CFStringAppendFormatAndArguments(CFMutableStringRef outputString, CFDictionaryRef formatOptions, CFStringRef formatString, va_list args) {
4773     _CFStringAppendFormatAndArgumentsAux(outputString, NULL, formatOptions, formatString, args);
4774 }
4775
4776 #define SNPRINTF(TYPE, WHAT) {                          \
4777     TYPE value = (TYPE) WHAT;                           \
4778     if (-1 != specs[curSpec].widthArgNum) {             \
4779         if (-1 != specs[curSpec].precArgNum) {          \
4780             snprintf_l(buffer, 255, NULL, formatBuffer, width, precision, value); \
4781         } else {                                        \
4782             snprintf_l(buffer, 255, NULL, formatBuffer, width, value); \
4783         }                                               \
4784     } else {                                            \
4785         if (-1 != specs[curSpec].precArgNum) {          \
4786             snprintf_l(buffer, 255, NULL, formatBuffer, precision, value); \
4787         } else {                                        \
4788             snprintf_l(buffer, 255, NULL, formatBuffer, value); \
4789         }                                               \
4790     }}
4791
4792 void _CFStringAppendFormatAndArgumentsAux(CFMutableStringRef outputString, CFStringRef (*copyDescFunc)(void *, CFDictionaryRef), CFDictionaryRef formatOptions, CFStringRef formatString, va_list args) {
4793     SInt32 numSpecs, sizeSpecs, sizeArgNum, formatIdx, curSpec, argNum;
4794     CFIndex formatLen;
4795 #define FORMAT_BUFFER_LEN 400
4796     const uint8_t *cformat = NULL;
4797     const UniChar *uformat = NULL;
4798     UniChar *formatChars = NULL;
4799     UniChar localFormatBuffer[FORMAT_BUFFER_LEN];
4800
4801     #define VPRINTF_BUFFER_LEN 61
4802     CFFormatSpec localSpecsBuffer[VPRINTF_BUFFER_LEN];
4803     CFFormatSpec *specs;
4804     CFPrintValue localValuesBuffer[VPRINTF_BUFFER_LEN];
4805     CFPrintValue *values;
4806     CFAllocatorRef tmpAlloc = NULL;
4807
4808     numSpecs = 0;
4809     sizeSpecs = 0;
4810     sizeArgNum = 0;
4811     specs = NULL;
4812     values = NULL;
4813
4814     formatLen = CFStringGetLength(formatString);
4815     if (!CF_IS_OBJC(__kCFStringTypeID, formatString)) {
4816         __CFAssertIsString(formatString);
4817         if (!__CFStrIsUnicode(formatString)) {
4818             cformat = __CFStrContents(formatString);
4819             if (cformat) cformat += __CFStrSkipAnyLengthByte(formatString);
4820         } else {
4821             uformat = __CFStrContents(formatString);
4822         }
4823     }
4824     if (!cformat && !uformat) {
4825         formatChars = (formatLen > FORMAT_BUFFER_LEN) ? CFAllocatorAllocate(tmpAlloc = __CFGetDefaultAllocator(), formatLen * sizeof(UniChar), 0) : localFormatBuffer;
4826         if (formatChars != localFormatBuffer && __CFOASafe) __CFSetLastAllocationEventName(formatChars, "CFString (temp)");
4827         CFStringGetCharacters(formatString, CFRangeMake(0, formatLen), formatChars);
4828         uformat = formatChars;
4829     }
4830
4831     /* Compute an upper bound for the number of format specifications */
4832     if (cformat) {
4833         for (formatIdx = 0; formatIdx < formatLen; formatIdx++) if ('%' == cformat[formatIdx]) sizeSpecs++;
4834     } else {
4835         for (formatIdx = 0; formatIdx < formatLen; formatIdx++) if ('%' == uformat[formatIdx]) sizeSpecs++;
4836     }
4837     tmpAlloc = __CFGetDefaultAllocator();
4838     specs = ((2 * sizeSpecs + 1) > VPRINTF_BUFFER_LEN) ? CFAllocatorAllocate(tmpAlloc, (2 * sizeSpecs + 1) * sizeof(CFFormatSpec), 0) : localSpecsBuffer;
4839     if (specs != localSpecsBuffer && __CFOASafe) __CFSetLastAllocationEventName(specs, "CFString (temp)");
4840
4841     /* Collect format specification information from the format string */
4842     for (curSpec = 0, formatIdx = 0; formatIdx < formatLen; curSpec++) {
4843         SInt32 newFmtIdx;
4844         specs[curSpec].loc = formatIdx;
4845         specs[curSpec].len = 0;
4846         specs[curSpec].size = 0;
4847         specs[curSpec].type = 0;
4848         specs[curSpec].flags = 0;
4849         specs[curSpec].widthArg = -1;
4850         specs[curSpec].precArg = -1;
4851         specs[curSpec].mainArgNum = -1;
4852         specs[curSpec].precArgNum = -1;
4853         specs[curSpec].widthArgNum = -1;
4854         if (cformat) {
4855             for (newFmtIdx = formatIdx; newFmtIdx < formatLen && '%' != cformat[newFmtIdx]; newFmtIdx++);
4856         } else {
4857             for (newFmtIdx = formatIdx; newFmtIdx < formatLen && '%' != uformat[newFmtIdx]; newFmtIdx++);
4858         }
4859         if (newFmtIdx != formatIdx) {   /* Literal chunk */
4860             specs[curSpec].type = CFFormatLiteralType;
4861             specs[curSpec].len = newFmtIdx - formatIdx;
4862         } else {
4863             newFmtIdx++;        /* Skip % */
4864             __CFParseFormatSpec(uformat, cformat, &newFmtIdx, formatLen, &(specs[curSpec]));
4865             if (CFFormatLiteralType == specs[curSpec].type) {
4866                 specs[curSpec].loc = formatIdx + 1;
4867                 specs[curSpec].len = 1;
4868             } else {
4869                 specs[curSpec].len = newFmtIdx - formatIdx;
4870             }
4871         }
4872         formatIdx = newFmtIdx;
4873
4874 // fprintf(stderr, "specs[%d] = {\n  size = %d,\n  type = %d,\n  loc = %d,\n  len = %d,\n  mainArgNum = %d,\n  precArgNum = %d,\n  widthArgNum = %d\n}\n", curSpec, specs[curSpec].size, specs[curSpec].type, specs[curSpec].loc, specs[curSpec].len, specs[curSpec].mainArgNum, specs[curSpec].precArgNum, specs[curSpec].widthArgNum);
4875
4876     }
4877     numSpecs = curSpec;
4878     // Max of three args per spec, reasoning thus: 1 width, 1 prec, 1 value
4879     values = ((3 * sizeSpecs + 1) > VPRINTF_BUFFER_LEN) ? CFAllocatorAllocate(tmpAlloc, (3 * sizeSpecs + 1) * sizeof(CFPrintValue), 0) : localValuesBuffer;
4880     if (values != localValuesBuffer && __CFOASafe) __CFSetLastAllocationEventName(values, "CFString (temp)");
4881     memset(values, 0, (3 * sizeSpecs + 1) * sizeof(CFPrintValue));
4882     sizeArgNum = (3 * sizeSpecs + 1);
4883
4884     /* Compute values array */
4885     argNum = 0;
4886     for (curSpec = 0; curSpec < numSpecs; curSpec++) {
4887         SInt32 newMaxArgNum;
4888         if (0 == specs[curSpec].type) continue;
4889         if (CFFormatLiteralType == specs[curSpec].type) continue;
4890         newMaxArgNum = sizeArgNum;
4891         if (newMaxArgNum < specs[curSpec].mainArgNum) {
4892             newMaxArgNum = specs[curSpec].mainArgNum;
4893         }
4894         if (newMaxArgNum < specs[curSpec].precArgNum) {
4895             newMaxArgNum = specs[curSpec].precArgNum;
4896         }
4897         if (newMaxArgNum < specs[curSpec].widthArgNum) {
4898             newMaxArgNum = specs[curSpec].widthArgNum;
4899         }
4900         if (sizeArgNum < newMaxArgNum) {
4901             if (specs != localSpecsBuffer) CFAllocatorDeallocate(tmpAlloc, specs);
4902             if (values != localValuesBuffer) CFAllocatorDeallocate(tmpAlloc, values);
4903             if (formatChars && (formatChars != localFormatBuffer)) CFAllocatorDeallocate(tmpAlloc, formatChars);
4904             return;  // more args than we expected!
4905         }
4906         /* It is actually incorrect to reorder some specs and not all; we just do some random garbage here */
4907         if (-2 == specs[curSpec].widthArgNum) {
4908             specs[curSpec].widthArgNum = argNum++;
4909         }
4910         if (-2 == specs[curSpec].precArgNum) {
4911             specs[curSpec].precArgNum = argNum++;
4912         }
4913         if (-1 == specs[curSpec].mainArgNum) {
4914             specs[curSpec].mainArgNum = argNum++;
4915         }
4916         values[specs[curSpec].mainArgNum].size = specs[curSpec].size;
4917         values[specs[curSpec].mainArgNum].type = specs[curSpec].type;
4918         if (-1 != specs[curSpec].widthArgNum) {
4919             values[specs[curSpec].widthArgNum].size = 0;
4920             values[specs[curSpec].widthArgNum].type = CFFormatLongType;
4921         }
4922         if (-1 != specs[curSpec].precArgNum) {
4923             values[specs[curSpec].precArgNum].size = 0;
4924             values[specs[curSpec].precArgNum].type = CFFormatLongType;
4925         }
4926     }
4927
4928     /* Collect the arguments in correct type from vararg list */
4929     for (argNum = 0; argNum < sizeArgNum; argNum++) {
4930         switch (values[argNum].type) {
4931         case 0:
4932         case CFFormatLiteralType:
4933             break;
4934         case CFFormatLongType:
4935         case CFFormatSingleUnicharType:
4936             if (CFFormatSize1 == values[argNum].size) {
4937                 values[argNum].value.int64Value = (int64_t)(int8_t)va_arg(args, int);
4938             } else if (CFFormatSize2 == values[argNum].size) {
4939                 values[argNum].value.int64Value = (int64_t)(int16_t)va_arg(args, int);
4940             } else if (CFFormatSize4 == values[argNum].size) {
4941                 values[argNum].value.int64Value = (int64_t)va_arg(args, int32_t);
4942             } else if (CFFormatSize8 == values[argNum].size) {
4943                 values[argNum].value.int64Value = (int64_t)va_arg(args, int64_t);
4944             } else {
4945                 values[argNum].value.int64Value = (int64_t)va_arg(args, int);
4946             }
4947             break;
4948         case CFFormatDoubleType:
4949             values[argNum].value.doubleValue = va_arg(args, double);
4950             break;
4951         case CFFormatPointerType:
4952         case CFFormatObjectType:
4953         case CFFormatCFType:
4954         case CFFormatUnicharsType:
4955         case CFFormatCharsType:
4956         case CFFormatPascalCharsType:
4957             values[argNum].value.pointerValue = va_arg(args, void *);
4958             break;
4959         }
4960     }
4961     va_end(args);
4962
4963     /* Format the pieces together */
4964     for (curSpec = 0; curSpec < numSpecs; curSpec++) {
4965         SInt32 width = 0, precision = 0;
4966         UniChar *up, ch;
4967         Boolean hasWidth = false, hasPrecision = false;
4968
4969         // widthArgNum and widthArg are never set at the same time; same for precArg*
4970         if (-1 != specs[curSpec].widthArgNum) {
4971             width = (SInt32)values[specs[curSpec].widthArgNum].value.int64Value;
4972             hasWidth = true;
4973         }
4974         if (-1 != specs[curSpec].precArgNum) {
4975             precision = (SInt32)values[specs[curSpec].precArgNum].value.int64Value;
4976             hasPrecision = true;
4977         }
4978         if (-1 != specs[curSpec].widthArg) {
4979             width = specs[curSpec].widthArg;
4980             hasWidth = true;
4981         }
4982         if (-1 != specs[curSpec].precArg) {
4983             precision = specs[curSpec].precArg;
4984             hasPrecision = true;
4985         }
4986
4987         switch (specs[curSpec].type) {
4988         case CFFormatLongType:
4989         case CFFormatDoubleType:
4990         case CFFormatPointerType: {
4991                 int8_t formatBuffer[128];
4992 #if defined(__GNUC__)
4993                 int8_t buffer[256 + width + precision];
4994 #else
4995                 int8_t stackBuffer[512];
4996                 int8_t *dynamicBuffer = NULL;
4997                 int8_t *buffer = stackBuffer;
4998                 if (256+width+precision > 512) {
4999                     dynamicBuffer = CFAllocatorAllocate(NULL, 256+width+precision, 0);
5000                     buffer = dynamicBuffer;
5001                 }
5002 #endif
5003                 SInt32 cidx, idx, loc;
5004                 Boolean appended = false;
5005                 loc = specs[curSpec].loc;
5006                 // In preparation to call snprintf(), copy the format string out
5007                 if (cformat) {
5008                     for (idx = 0, cidx = 0; cidx < specs[curSpec].len; idx++, cidx++) {
5009                         if ('$' == cformat[loc + cidx]) {
5010                             for (idx--; '0' <= formatBuffer[idx] && formatBuffer[idx] <= '9'; idx--);
5011                         } else {
5012                             formatBuffer[idx] = cformat[loc + cidx];
5013                         }
5014                     }
5015                 } else {
5016                     for (idx = 0, cidx = 0; cidx < specs[curSpec].len; idx++, cidx++) {
5017                         if ('$' == uformat[loc + cidx]) {
5018                             for (idx--; '0' <= formatBuffer[idx] && formatBuffer[idx] <= '9'; idx--);
5019                         } else {
5020                             formatBuffer[idx] = (int8_t)uformat[loc + cidx];
5021                         }
5022                     }
5023                 }
5024                 formatBuffer[idx] = '\0';
5025                 // Should modify format buffer here if necessary; for example, to translate %qd to
5026                 // the equivalent, on architectures which do not have %q.
5027                 buffer[sizeof(buffer) - 1] = '\0';
5028                 switch (specs[curSpec].type) {
5029                     case CFFormatLongType:
5030                         if (CFFormatSize8 == specs[curSpec].size) {
5031                             SNPRINTF(int64_t, values[specs[curSpec].mainArgNum].value.int64Value)
5032                         } else {
5033                             SNPRINTF(SInt32, values[specs[curSpec].mainArgNum].value.int64Value)
5034                         }
5035                         break;
5036                     case CFFormatPointerType:
5037                         SNPRINTF(void *, values[specs[curSpec].mainArgNum].value.pointerValue)
5038                         break;
5039
5040                     case CFFormatDoubleType:
5041                         SNPRINTF(double, values[specs[curSpec].mainArgNum].value.doubleValue)
5042                         // See if we need to localize the decimal point
5043                         if (formatOptions) {    // We have a localization dictionary
5044                             CFStringRef decimalSeparator = CFDictionaryGetValue(formatOptions, kCFNSDecimalSeparatorKey);
5045                             if (decimalSeparator != NULL) {     // We have a decimal separator in there
5046                                 CFIndex decimalPointLoc = 0;
5047                                 while (buffer[decimalPointLoc] != 0 && buffer[decimalPointLoc] != '.') decimalPointLoc++;
5048                                 if (buffer[decimalPointLoc] == '.') {   // And we have a decimal point in the formatted string
5049                                     buffer[decimalPointLoc] = 0;
5050                                     CFStringAppendCString(outputString, buffer, __CFStringGetEightBitStringEncoding());
5051                                     CFStringAppend(outputString, decimalSeparator);
5052                                     CFStringAppendCString(outputString, buffer + decimalPointLoc + 1, __CFStringGetEightBitStringEncoding());
5053                                     appended = true;
5054                                 }
5055                             }
5056                         }
5057                         break;
5058                 }
5059                 if (!appended) CFStringAppendCString(outputString, buffer, __CFStringGetEightBitStringEncoding());
5060             }
5061 #if !defined(__GNUC__)
5062             if (dynamicBuffer) {
5063                 CFAllocatorDeallocate(NULL, dynamicBuffer);
5064             }
5065 #endif
5066                 break;
5067         case CFFormatLiteralType:
5068             if (cformat) {
5069                 __CFStringAppendBytes(outputString, cformat+specs[curSpec].loc, specs[curSpec].len, __CFStringGetEightBitStringEncoding());
5070             } else {
5071                 CFStringAppendCharacters(outputString, uformat+specs[curSpec].loc, specs[curSpec].len);
5072             }
5073             break;
5074         case CFFormatPascalCharsType:
5075         case CFFormatCharsType:
5076             if (values[specs[curSpec].mainArgNum].value.pointerValue == NULL) {
5077                 CFStringAppendCString(outputString, "(null)", kCFStringEncodingASCII);
5078             } else {
5079                 int len;
5080                 const char *str = values[specs[curSpec].mainArgNum].value.pointerValue;
5081                 if (specs[curSpec].type == CFFormatPascalCharsType) {   // Pascal string case
5082                     len = ((unsigned char *)str)[0];
5083                     str++;
5084                     if (hasPrecision && precision < len) len = precision;
5085                 } else {        // C-string case
5086                     if (!hasPrecision) {        // No precision, so rely on the terminating null character
5087                         len = strlen(str);
5088                     } else {    // Don't blindly call strlen() if there is a precision; the string might not have a terminating null (3131988)
5089                         const char *terminatingNull = memchr(str, 0, precision);        // Basically strlen() on only the first precision characters of str
5090                         if (terminatingNull) {  // There was a null in the first precision characters
5091                             len = terminatingNull - str;
5092                         } else {
5093                             len = precision;
5094                         }
5095                     }
5096                 }
5097                 // Since the spec says the behavior of the ' ', '0', '#', and '+' flags is undefined for
5098                 // '%s', and since we have ignored them in the past, the behavior is hereby cast in stone
5099                 // to ignore those flags (and, say, never pad with '0' instead of space).
5100                 if (specs[curSpec].flags & kCFStringFormatMinusFlag) {
5101                     __CFStringAppendBytes(outputString, str, len, __CFStringGetSystemEncoding());
5102                     if (hasWidth && width > len) {
5103                         int w = width - len;    // We need this many spaces; do it ten at a time
5104                         do {__CFStringAppendBytes(outputString, "          ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
5105                     }
5106                 } else {
5107                     if (hasWidth && width > len) {
5108                         int w = width - len;    // We need this many spaces; do it ten at a time
5109                         do {__CFStringAppendBytes(outputString, "          ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
5110                     }
5111                     __CFStringAppendBytes(outputString, str, len, __CFStringGetSystemEncoding());
5112                 }
5113             }
5114             break;
5115         case CFFormatSingleUnicharType:
5116             ch = values[specs[curSpec].mainArgNum].value.int64Value;
5117             CFStringAppendCharacters(outputString, &ch, 1);
5118             break;
5119         case CFFormatUnicharsType:
5120             //??? need to handle width, precision, and padding arguments
5121             up = values[specs[curSpec].mainArgNum].value.pointerValue;
5122             if (NULL == up) {
5123                 CFStringAppendCString(outputString, "(null)", kCFStringEncodingASCII);
5124             } else {
5125                 int len;
5126                 for (len = 0; 0 != up[len]; len++);
5127                 // Since the spec says the behavior of the ' ', '0', '#', and '+' flags is undefined for
5128                 // '%s', and since we have ignored them in the past, the behavior is hereby cast in stone
5129                 // to ignore those flags (and, say, never pad with '0' instead of space).
5130                 if (hasPrecision && precision < len) len = precision;
5131                 if (specs[curSpec].flags & kCFStringFormatMinusFlag) {
5132                     CFStringAppendCharacters(outputString, up, len);
5133                     if (hasWidth && width > len) {
5134                         int w = width - len;    // We need this many spaces; do it ten at a time
5135                         do {__CFStringAppendBytes(outputString, "          ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
5136                     }
5137                 } else {
5138                     if (hasWidth && width > len) {
5139                         int w = width - len;    // We need this many spaces; do it ten at a time
5140                         do {__CFStringAppendBytes(outputString, "          ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
5141                     }
5142                     CFStringAppendCharacters(outputString, up, len);
5143                 }
5144             }
5145             break;
5146         case CFFormatCFType:
5147         case CFFormatObjectType:
5148             if (NULL != values[specs[curSpec].mainArgNum].value.pointerValue) {
5149                 CFStringRef str = NULL;
5150                 if (copyDescFunc) {
5151                     str = copyDescFunc(values[specs[curSpec].mainArgNum].value.pointerValue, formatOptions);
5152                 } else {
5153                     str = __CFCopyFormattingDescription(values[specs[curSpec].mainArgNum].value.pointerValue, formatOptions);
5154                     if (NULL == str) {
5155                         str = CFCopyDescription(values[specs[curSpec].mainArgNum].value.pointerValue);
5156                     }
5157                 }
5158                 if (str) {
5159                     CFStringAppend(outputString, str);
5160                     CFRelease(str);
5161                 } else {
5162                     CFStringAppendCString(outputString, "(null description)", kCFStringEncodingASCII);
5163                 }
5164             } else {
5165                 CFStringAppendCString(outputString, "(null)", kCFStringEncodingASCII);
5166             }
5167             break;
5168         }
5169     }
5170
5171     if (specs != localSpecsBuffer) CFAllocatorDeallocate(tmpAlloc, specs);
5172     if (values != localValuesBuffer) CFAllocatorDeallocate(tmpAlloc, values);
5173     if (formatChars && (formatChars != localFormatBuffer)) CFAllocatorDeallocate(tmpAlloc, formatChars);
5174
5175 }
5176
5177 #undef SNPRINTF
5178
5179 void CFShowStr(CFStringRef str) {
5180     CFAllocatorRef alloc;
5181
5182     if (!str) {
5183         fprintf(stdout, "(null)\n");
5184         return;
5185     }
5186
5187     if (CF_IS_OBJC(__kCFStringTypeID, str)) {
5188         fprintf(stdout, "This is an NSString, not CFString\n");
5189         return;
5190     }
5191
5192     alloc = CFGetAllocator(str);
5193
5194     fprintf(stdout, "\nLength %d\nIsEightBit %d\n", (int)__CFStrLength(str), __CFStrIsEightBit(str));
5195     fprintf(stdout, "HasLengthByte %d\nHasNullByte %d\nInlineContents %d\n",
5196             __CFStrHasLengthByte(str), __CFStrHasNullByte(str), __CFStrIsInline(str));
5197
5198     fprintf(stdout, "Allocator ");
5199     if (alloc != kCFAllocatorSystemDefault) {
5200         fprintf(stdout, "%p\n", (void *)alloc);
5201     } else {
5202         fprintf(stdout, "SystemDefault\n");
5203     }
5204     fprintf(stdout, "Mutable %d\n", __CFStrIsMutable(str));
5205     if (!__CFStrIsMutable(str) && __CFStrHasContentsDeallocator(str)) {
5206         if (__CFStrContentsDeallocator(str)) fprintf(stdout, "ContentsDeallocatorFunc %p\n", (void *)__CFStrContentsDeallocator(str));
5207         else fprintf(stdout, "ContentsDeallocatorFunc None\n");
5208     } else if (__CFStrIsMutable(str) && __CFStrHasContentsAllocator(str)) {
5209         fprintf(stdout, "ExternalContentsAllocator %p\n", (void *)__CFStrContentsAllocator((CFMutableStringRef)str));
5210     }
5211
5212     if (__CFStrIsMutable(str)) {
5213         fprintf(stdout, "CurrentCapacity %d\n%sCapacity %d\n", (int)__CFStrCapacity(str), __CFStrIsFixed(str) ? "Fixed" : "Desired", (int)__CFStrDesiredCapacity(str));
5214     }
5215     fprintf(stdout, "Contents %p\n", (void *)__CFStrContents(str));
5216 }
5217
5218
5219