]> git.saurik.com Git - apple/cf.git/blob - CFString.c
800918530dcaadeb2d63e017ed6a208e07332ce0
[apple/cf.git] / CFString.c
1 /*
2 * Copyright (c) 2014 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /* CFString.c
25 Copyright (c) 1998-2013, Apple Inc. All rights reserved.
26 Responsibility: Ali Ozer
27
28 !!! For performance reasons, it's important that all functions marked CF_INLINE in this file are inlined.
29 */
30
31 #include <CoreFoundation/CFBase.h>
32 #include <CoreFoundation/CFString.h>
33 #include <CoreFoundation/CFDictionary.h>
34 #include <CoreFoundation/CFStringEncodingConverterExt.h>
35 #include <CoreFoundation/CFUniChar.h>
36 #include <CoreFoundation/CFUnicodeDecomposition.h>
37 #include <CoreFoundation/CFUnicodePrecomposition.h>
38 #include <CoreFoundation/CFPriv.h>
39 #include <CoreFoundation/CFNumber.h>
40 #include <CoreFoundation/CFNumberFormatter.h>
41 #include "CFInternal.h"
42 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
43 #include "CFLocaleInternal.h"
44 #endif
45 #include <stdarg.h>
46 #include <stdio.h>
47 #include <string.h>
48 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
49 #include <unistd.h>
50 #endif
51
52 #if defined(__GNUC__)
53 #define LONG_DOUBLE_SUPPORT 1
54 #else
55 #define LONG_DOUBLE_SUPPORT 0
56 #endif
57
58
59
60 #define USE_STRING_ROM 0
61
62
63 #ifndef INSTRUMENT_SHARED_STRINGS
64 #define INSTRUMENT_SHARED_STRINGS 0
65 #endif
66
67 CF_PRIVATE const CFStringRef __kCFLocaleCollatorID;
68
69 #if INSTRUMENT_SHARED_STRINGS
70 #include <sys/stat.h> /* for umask() */
71
72 static void __CFRecordStringAllocationEvent(const char *encoding, const char *bytes, CFIndex byteCount) {
73 static CFSpinLock_t lock = CFSpinLockInit;
74
75 if (memchr(bytes, '\n', byteCount)) return; //never record string allocation events for strings with newlines, because those confuse our parser and because they'll never go into the ROM
76
77 __CFSpinLock(&lock);
78 static int fd;
79 if (! fd) {
80 extern char **_NSGetProgname(void);
81 const char *name = *_NSGetProgname();
82 if (! name) name = "UNKNOWN";
83 umask(0);
84 char path[1024];
85 snprintf(path, sizeof(path), "/tmp/CFSharedStringInstrumentation_%s_%d.txt", name, getpid());
86 fd = open(path, O_WRONLY | O_APPEND | O_CREAT, 0666);
87 if (fd <= 0) {
88 int error = errno;
89 const char *errString = strerror(error);
90 fprintf(stderr, "open() failed with error %d (%s)\n", error, errString);
91 }
92 }
93 if (fd > 0) {
94 char *buffer = NULL;
95 char formatString[256];
96 snprintf(formatString, sizeof(formatString), "%%-8d\t%%-16s\t%%.%lds\n", byteCount);
97 int resultCount = asprintf(&buffer, formatString, getpid(), encoding, bytes);
98 if (buffer && resultCount > 0) write(fd, buffer, resultCount);
99 else puts("Couldn't record allocation event");
100 free(buffer);
101 }
102 __CFSpinUnlock(&lock);
103 }
104 #endif //INSTRUMENT_SHARED_STRINGS
105
106
107
108 typedef Boolean (*UNI_CHAR_FUNC)(UInt32 flags, UInt8 ch, UniChar *unicodeChar);
109
110 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI
111 extern size_t malloc_good_size(size_t size);
112 #endif
113 extern void __CFStrConvertBytesToUnicode(const uint8_t *bytes, UniChar *buffer, CFIndex numChars);
114
115 static void __CFStringAppendFormatCore(CFMutableStringRef outputString, CFStringRef (*copyDescFunc)(void *, const void *), CFDictionaryRef formatOptions, CFDictionaryRef stringsDictConfig, CFStringRef formatString, CFIndex initialArgPosition, const void *origValues, CFIndex originalValuesSize, va_list args);
116
117 #if defined(DEBUG)
118
119 // We put this into C & Pascal strings if we can't convert
120 #define CONVERSIONFAILURESTR "CFString conversion failed"
121
122 // We set this to true when purging the constant string table, so CFStringDeallocate doesn't assert
123 static Boolean __CFConstantStringTableBeingFreed = false;
124
125 #endif
126
127
128
129 // This section is for CFString compatibility and other behaviors...
130
131 static CFOptionFlags _CFStringCompatibilityMask = 0;
132
133 void _CFStringSetCompatibility(CFOptionFlags mask) {
134 _CFStringCompatibilityMask |= mask;
135 }
136
137 CF_INLINE Boolean __CFStringGetCompatibility(CFOptionFlags mask) {
138 return (_CFStringCompatibilityMask & mask) == mask;
139 }
140
141
142
143 // Two constant strings used by CFString; these are initialized in CFStringInitialize
144 CONST_STRING_DECL(kCFEmptyString, "")
145
146 // This is separate for C++
147 struct __notInlineMutable {
148 void *buffer;
149 CFIndex length;
150 CFIndex capacity; // Capacity in bytes
151 unsigned int hasGap:1; // Currently unused
152 unsigned int isFixedCapacity:1;
153 unsigned int isExternalMutable:1;
154 unsigned int capacityProvidedExternally:1;
155 #if __LP64__
156 unsigned long desiredCapacity:60;
157 #else
158 unsigned long desiredCapacity:28;
159 #endif
160 CFAllocatorRef contentsAllocator; // Optional
161 }; // The only mutable variant for CFString
162
163
164 /* !!! Never do sizeof(CFString); the union is here just to make it easier to access some fields.
165 */
166 struct __CFString {
167 CFRuntimeBase base;
168 union { // In many cases the allocated structs are smaller than these
169 struct __inline1 {
170 CFIndex length;
171 } inline1; // Bytes follow the length
172 struct __notInlineImmutable1 {
173 void *buffer; // Note that the buffer is in the same place for all non-inline variants of CFString
174 CFIndex length;
175 CFAllocatorRef contentsDeallocator; // Optional; just the dealloc func is used
176 } notInlineImmutable1; // This is the usual not-inline immutable CFString
177 struct __notInlineImmutable2 {
178 void *buffer;
179 CFAllocatorRef contentsDeallocator; // Optional; just the dealloc func is used
180 } notInlineImmutable2; // This is the not-inline immutable CFString when length is stored with the contents (first byte)
181 struct __notInlineMutable notInlineMutable;
182 } variants;
183 };
184
185 /*
186 I = is immutable
187 E = not inline contents
188 U = is Unicode
189 N = has NULL byte
190 L = has length byte
191 D = explicit deallocator for contents (for mutable objects, allocator)
192 C = length field is CFIndex (rather than UInt32); only meaningful for 64-bit, really
193 if needed this bit (valuable real-estate) can be given up for another bit elsewhere, since this info is needed just for 64-bit
194
195 Also need (only for mutable)
196 F = is fixed
197 G = has gap
198 Cap, DesCap = capacity
199
200 B7 B6 B5 B4 B3 B2 B1 B0
201 U N L C I
202
203 B6 B5
204 0 0 inline contents
205 0 1 E (freed with default allocator)
206 1 0 E (not freed)
207 1 1 E D
208
209 !!! Note: Constant CFStrings use the bit patterns:
210 C8 (11001000 = default allocator, not inline, not freed contents; 8-bit; has NULL byte; doesn't have length; is immutable)
211 D0 (11010000 = default allocator, not inline, not freed contents; Unicode; is immutable)
212 The bit usages should not be modified in a way that would effect these bit patterns.
213 */
214
215 enum {
216 __kCFFreeContentsWhenDoneMask = 0x020,
217 __kCFFreeContentsWhenDone = 0x020,
218 __kCFContentsMask = 0x060,
219 __kCFHasInlineContents = 0x000,
220 __kCFNotInlineContentsNoFree = 0x040, // Don't free
221 __kCFNotInlineContentsDefaultFree = 0x020, // Use allocator's free function
222 __kCFNotInlineContentsCustomFree = 0x060, // Use a specially provided free function
223 __kCFHasContentsAllocatorMask = 0x060,
224 __kCFHasContentsAllocator = 0x060, // (For mutable strings) use a specially provided allocator
225 __kCFHasContentsDeallocatorMask = 0x060,
226 __kCFHasContentsDeallocator = 0x060,
227 __kCFIsMutableMask = 0x01,
228 __kCFIsMutable = 0x01,
229 __kCFIsUnicodeMask = 0x10,
230 __kCFIsUnicode = 0x10,
231 __kCFHasNullByteMask = 0x08,
232 __kCFHasNullByte = 0x08,
233 __kCFHasLengthByteMask = 0x04,
234 __kCFHasLengthByte = 0x04,
235 // !!! Bit 0x02 has been freed up
236 };
237
238
239 // !!! Assumptions:
240 // Mutable strings are not inline
241 // Compile-time constant strings are not inline
242 // Mutable strings always have explicit length (but they might also have length byte and null byte)
243 // If there is an explicit length, always use that instead of the length byte (length byte is useful for quickly returning pascal strings)
244 // Never look at the length byte for the length; use __CFStrLength or __CFStrLength2
245
246 /* The following set of functions and macros need to be updated on change to the bit configuration
247 */
248 CF_INLINE Boolean __CFStrIsMutable(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFIsMutableMask) == __kCFIsMutable;}
249 CF_INLINE Boolean __CFStrIsInline(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFContentsMask) == __kCFHasInlineContents;}
250 CF_INLINE Boolean __CFStrFreeContentsWhenDone(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFFreeContentsWhenDoneMask) == __kCFFreeContentsWhenDone;}
251 CF_INLINE Boolean __CFStrHasContentsDeallocator(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFHasContentsDeallocatorMask) == __kCFHasContentsDeallocator;}
252 CF_INLINE Boolean __CFStrIsUnicode(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFIsUnicodeMask) == __kCFIsUnicode;}
253 CF_INLINE Boolean __CFStrIsEightBit(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFIsUnicodeMask) != __kCFIsUnicode;}
254 CF_INLINE Boolean __CFStrHasNullByte(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFHasNullByteMask) == __kCFHasNullByte;}
255 CF_INLINE Boolean __CFStrHasLengthByte(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFHasLengthByteMask) == __kCFHasLengthByte;}
256 CF_INLINE Boolean __CFStrHasExplicitLength(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & (__kCFIsMutableMask | __kCFHasLengthByteMask)) != __kCFHasLengthByte;} // Has explicit length if (1) mutable or (2) not mutable and no length byte
257 CF_INLINE Boolean __CFStrIsConstant(CFStringRef str) {
258 #if __LP64__
259 return str->base._rc == 0;
260 #else
261 return (str->base._cfinfo[CF_RC_BITS]) == 0;
262 #endif
263 }
264
265 CF_INLINE SInt32 __CFStrSkipAnyLengthByte(CFStringRef str) {return ((str->base._cfinfo[CF_INFO_BITS] & __kCFHasLengthByteMask) == __kCFHasLengthByte) ? 1 : 0;} // Number of bytes to skip over the length byte in the contents
266
267 /* Returns ptr to the buffer (which might include the length byte)
268 */
269 CF_INLINE const void *__CFStrContents(CFStringRef str) {
270 if (__CFStrIsInline(str)) {
271 return (const void *)(((uintptr_t)&(str->variants)) + (__CFStrHasExplicitLength(str) ? sizeof(CFIndex) : 0));
272 } else { // Not inline; pointer is always word 2
273 return str->variants.notInlineImmutable1.buffer;
274 }
275 }
276
277 static CFAllocatorRef *__CFStrContentsDeallocatorPtr(CFStringRef str) {
278 return __CFStrHasExplicitLength(str) ? &(((CFMutableStringRef)str)->variants.notInlineImmutable1.contentsDeallocator) : &(((CFMutableStringRef)str)->variants.notInlineImmutable2.contentsDeallocator); }
279
280 // Assumption: Called with immutable strings only, and on strings that are known to have a contentsDeallocator
281 CF_INLINE CFAllocatorRef __CFStrContentsDeallocator(CFStringRef str) {
282 return *__CFStrContentsDeallocatorPtr(str);
283 }
284
285 // Assumption: Called with immutable strings only, and on strings that are known to have a contentsDeallocator
286 CF_INLINE void __CFStrSetContentsDeallocator(CFStringRef str, CFAllocatorRef allocator) {
287 if (!(0 || 0)) CFRetain(allocator);
288 *__CFStrContentsDeallocatorPtr(str) = allocator;
289 }
290
291 static CFAllocatorRef *__CFStrContentsAllocatorPtr(CFStringRef str) {
292 CFAssert(!__CFStrIsInline(str), __kCFLogAssertion, "Asking for contents allocator of inline string");
293 CFAssert(__CFStrIsMutable(str), __kCFLogAssertion, "Asking for contents allocator of an immutable string");
294 return (CFAllocatorRef *)&(str->variants.notInlineMutable.contentsAllocator);
295 }
296
297 // Assumption: Called with strings that have a contents allocator; also, contents allocator follows custom
298 CF_INLINE CFAllocatorRef __CFStrContentsAllocator(CFMutableStringRef str) {
299 return *(__CFStrContentsAllocatorPtr(str));
300 }
301
302 // Assumption: Called with strings that have a contents allocator; also, contents allocator follows custom
303 CF_INLINE void __CFStrSetContentsAllocator(CFMutableStringRef str, CFAllocatorRef allocator) {
304 if (!(0 || 0)) CFRetain(allocator);
305 *(__CFStrContentsAllocatorPtr(str)) = allocator;
306 }
307
308 /* Returns length; use __CFStrLength2 if contents buffer pointer has already been computed.
309 */
310 CF_INLINE CFIndex __CFStrLength(CFStringRef str) {
311 if (__CFStrHasExplicitLength(str)) {
312 if (__CFStrIsInline(str)) {
313 return str->variants.inline1.length;
314 } else {
315 return str->variants.notInlineImmutable1.length;
316 }
317 } else {
318 return (CFIndex)(*((uint8_t *)__CFStrContents(str)));
319 }
320 }
321
322 CF_INLINE CFIndex __CFStrLength2(CFStringRef str, const void *buffer) {
323 if (__CFStrHasExplicitLength(str)) {
324 if (__CFStrIsInline(str)) {
325 return str->variants.inline1.length;
326 } else {
327 return str->variants.notInlineImmutable1.length;
328 }
329 } else {
330 return (CFIndex)(*((uint8_t *)buffer));
331 }
332 }
333
334
335 Boolean __CFStringIsEightBit(CFStringRef str) {
336 return __CFStrIsEightBit(str);
337 }
338
339 /* Sets the content pointer for immutable or mutable strings.
340 */
341 CF_INLINE void __CFStrSetContentPtr(CFStringRef str, const void *p) {
342 // XXX_PCB catch all writes for mutable string case.
343 __CFAssignWithWriteBarrier((void **)&((CFMutableStringRef)str)->variants.notInlineImmutable1.buffer, (void *)p);
344 }
345 CF_INLINE void __CFStrSetInfoBits(CFStringRef str, UInt32 v) {__CFBitfieldSetValue(((CFMutableStringRef)str)->base._cfinfo[CF_INFO_BITS], 6, 0, v);}
346
347 CF_INLINE void __CFStrSetExplicitLength(CFStringRef str, CFIndex v) {
348 if (__CFStrIsInline(str)) {
349 ((CFMutableStringRef)str)->variants.inline1.length = v;
350 } else {
351 ((CFMutableStringRef)str)->variants.notInlineImmutable1.length = v;
352 }
353 }
354
355 CF_INLINE void __CFStrSetUnicode(CFMutableStringRef str) {str->base._cfinfo[CF_INFO_BITS] |= __kCFIsUnicode;}
356 CF_INLINE void __CFStrClearUnicode(CFMutableStringRef str) {str->base._cfinfo[CF_INFO_BITS] &= ~__kCFIsUnicode;}
357 CF_INLINE void __CFStrSetHasLengthAndNullBytes(CFMutableStringRef str) {str->base._cfinfo[CF_INFO_BITS] |= (__kCFHasLengthByte | __kCFHasNullByte);}
358 CF_INLINE void __CFStrClearHasLengthAndNullBytes(CFMutableStringRef str) {str->base._cfinfo[CF_INFO_BITS] &= ~(__kCFHasLengthByte | __kCFHasNullByte);}
359
360
361 // Assumption: The following set of inlines (using str->variants.notInlineMutable) are called with mutable strings only
362 CF_INLINE Boolean __CFStrIsFixed(CFStringRef str) {return str->variants.notInlineMutable.isFixedCapacity;}
363 CF_INLINE Boolean __CFStrIsExternalMutable(CFStringRef str) {return str->variants.notInlineMutable.isExternalMutable;}
364 CF_INLINE Boolean __CFStrHasContentsAllocator(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFHasContentsAllocatorMask) == __kCFHasContentsAllocator;}
365 CF_INLINE void __CFStrSetIsFixed(CFMutableStringRef str) {str->variants.notInlineMutable.isFixedCapacity = 1;}
366 CF_INLINE void __CFStrSetIsExternalMutable(CFMutableStringRef str) {str->variants.notInlineMutable.isExternalMutable = 1;}
367 CF_INLINE void __CFStrSetHasGap(CFMutableStringRef str) {str->variants.notInlineMutable.hasGap = 1;}
368
369 // If capacity is provided externally, we only change it when we need to grow beyond it
370 CF_INLINE Boolean __CFStrCapacityProvidedExternally(CFStringRef str) {return str->variants.notInlineMutable.capacityProvidedExternally;}
371 CF_INLINE void __CFStrSetCapacityProvidedExternally(CFMutableStringRef str) {str->variants.notInlineMutable.capacityProvidedExternally = 1;}
372 CF_INLINE void __CFStrClearCapacityProvidedExternally(CFMutableStringRef str) {str->variants.notInlineMutable.capacityProvidedExternally = 0;}
373
374 // "Capacity" is stored in number of bytes, not characters. It indicates the total number of bytes in the contents buffer.
375 CF_INLINE CFIndex __CFStrCapacity(CFStringRef str) {return str->variants.notInlineMutable.capacity;}
376 CF_INLINE void __CFStrSetCapacity(CFMutableStringRef str, CFIndex cap) {str->variants.notInlineMutable.capacity = cap;}
377
378 // "Desired capacity" is in number of characters; it is the client requested capacity; if fixed, it is the upper bound on the mutable string backing store.
379 CF_INLINE CFIndex __CFStrDesiredCapacity(CFStringRef str) {return str->variants.notInlineMutable.desiredCapacity;}
380 CF_INLINE void __CFStrSetDesiredCapacity(CFMutableStringRef str, CFIndex size) {str->variants.notInlineMutable.desiredCapacity = size;}
381
382
383 static void *__CFStrAllocateMutableContents(CFMutableStringRef str, CFIndex size) {
384 void *ptr;
385 CFAllocatorRef alloc = (__CFStrHasContentsAllocator(str)) ? __CFStrContentsAllocator(str) : __CFGetAllocator(str);
386 ptr = CFAllocatorAllocate(alloc, size, 0);
387 if (__CFOASafe) __CFSetLastAllocationEventName(ptr, "CFString (store)");
388 return ptr;
389 }
390
391 static void __CFStrDeallocateMutableContents(CFMutableStringRef str, void *buffer) {
392 CFAllocatorRef alloc = (__CFStrHasContentsAllocator(str)) ? __CFStrContentsAllocator(str) : __CFGetAllocator(str);
393 if (__CFStrIsMutable(str) && __CFStrHasContentsAllocator(str) && (0)) {
394 // do nothing
395 } else if (CF_IS_COLLECTABLE_ALLOCATOR(alloc)) {
396 // GC: for finalization safety, let collector reclaim the buffer in the next GC cycle.
397 auto_zone_release(objc_collectableZone(), buffer);
398 } else {
399 CFAllocatorDeallocate(alloc, buffer);
400 }
401 }
402
403
404
405
406 /* CFString specific init flags
407 Note that you cannot count on the external buffer not being copied.
408 Also, if you specify an external buffer, you should not change it behind the CFString's back.
409 */
410 enum {
411 __kCFThinUnicodeIfPossible = 0x1000000, /* See if the Unicode contents can be thinned down to 8-bit */
412 kCFStringPascal = 0x10000, /* Indicating that the string data has a Pascal string structure (length byte at start) */
413 kCFStringNoCopyProvidedContents = 0x20000, /* Don't copy the provided string contents if possible; free it when no longer needed */
414 kCFStringNoCopyNoFreeProvidedContents = 0x30000 /* Don't copy the provided string contents if possible; don't free it when no longer needed */
415 };
416
417 /* System Encoding.
418 */
419 static CFStringEncoding __CFDefaultSystemEncoding = kCFStringEncodingInvalidId;
420 static CFStringEncoding __CFDefaultFileSystemEncoding = kCFStringEncodingInvalidId;
421 CFStringEncoding __CFDefaultEightBitStringEncoding = kCFStringEncodingInvalidId;
422
423
424 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX
425 #define __defaultEncoding kCFStringEncodingMacRoman
426 #elif DEPLOYMENT_TARGET_WINDOWS
427 #define __defaultEncoding kCFStringEncodingWindowsLatin1
428 #else
429 #warning This value must match __CFGetConverter condition in CFStringEncodingConverter.c
430 #define __defaultEncoding kCFStringEncodingISOLatin1
431 #endif
432
433 CFStringEncoding CFStringGetSystemEncoding(void) {
434 if (__CFDefaultSystemEncoding == kCFStringEncodingInvalidId) {
435 __CFDefaultSystemEncoding = __defaultEncoding;
436 const CFStringEncodingConverter *converter = CFStringEncodingGetConverter(__CFDefaultSystemEncoding);
437 __CFSetCharToUniCharFunc(converter->encodingClass == kCFStringEncodingConverterCheapEightBit ? (UNI_CHAR_FUNC)converter->toUnicode : NULL);
438 }
439 return __CFDefaultSystemEncoding;
440 }
441
442 // Fast version for internal use
443
444 CF_INLINE CFStringEncoding __CFStringGetSystemEncoding(void) {
445 if (__CFDefaultSystemEncoding == kCFStringEncodingInvalidId) (void)CFStringGetSystemEncoding();
446 return __CFDefaultSystemEncoding;
447 }
448
449 CFStringEncoding CFStringFileSystemEncoding(void) {
450 if (__CFDefaultFileSystemEncoding == kCFStringEncodingInvalidId) {
451 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_WINDOWS
452 __CFDefaultFileSystemEncoding = kCFStringEncodingUTF8;
453 #else
454 __CFDefaultFileSystemEncoding = CFStringGetSystemEncoding();
455 #endif
456 }
457
458 return __CFDefaultFileSystemEncoding;
459 }
460
461 /* ??? Is returning length when no other answer is available the right thing?
462 !!! All of the (length > (LONG_MAX / N)) type checks are to avoid wrap-around and eventual malloc overflow in the client
463 */
464 CFIndex CFStringGetMaximumSizeForEncoding(CFIndex length, CFStringEncoding encoding) {
465 if (encoding == kCFStringEncodingUTF8) {
466 return (length > (LONG_MAX / 3)) ? kCFNotFound : (length * 3);
467 } else if ((encoding == kCFStringEncodingUTF32) || (encoding == kCFStringEncodingUTF32BE) || (encoding == kCFStringEncodingUTF32LE)) { // UTF-32
468 return (length > (LONG_MAX / sizeof(UTF32Char))) ? kCFNotFound : (length * sizeof(UTF32Char));
469 } else {
470 encoding &= 0xFFF; // Mask off non-base part
471 }
472 switch (encoding) {
473 case kCFStringEncodingUnicode:
474 return (length > (LONG_MAX / sizeof(UniChar))) ? kCFNotFound : (length * sizeof(UniChar));
475
476 case kCFStringEncodingNonLossyASCII:
477 return (length > (LONG_MAX / 6)) ? kCFNotFound : (length * 6); // 1 Unichar can expand to 6 bytes
478
479 case kCFStringEncodingMacRoman:
480 case kCFStringEncodingWindowsLatin1:
481 case kCFStringEncodingISOLatin1:
482 case kCFStringEncodingNextStepLatin:
483 case kCFStringEncodingASCII:
484 return length / sizeof(uint8_t);
485
486 default:
487 return length / sizeof(uint8_t);
488 }
489 }
490
491
492 /* Returns whether the indicated encoding can be stored in 8-bit chars
493 */
494 CF_INLINE Boolean __CFStrEncodingCanBeStoredInEightBit(CFStringEncoding encoding) {
495 switch (encoding & 0xFFF) { // just use encoding base
496 case kCFStringEncodingInvalidId:
497 case kCFStringEncodingUnicode:
498 case kCFStringEncodingNonLossyASCII:
499 return false;
500
501 case kCFStringEncodingMacRoman:
502 case kCFStringEncodingWindowsLatin1:
503 case kCFStringEncodingISOLatin1:
504 case kCFStringEncodingNextStepLatin:
505 case kCFStringEncodingASCII:
506 return true;
507
508 default: return false;
509 }
510 }
511
512 /* Returns the encoding used in eight bit CFStrings (can't be any encoding which isn't 1-to-1 with Unicode)
513 For 10.9-linked apps, we've set this encoding to ASCII for all cases; see <rdar://problem/3597233>
514 */
515 CFStringEncoding __CFStringComputeEightBitStringEncoding(void) {
516 // This flag prevents recursive entry into __CFStringComputeEightBitStringEncoding
517 static Boolean __CFStringIsBeingInitialized2 = false;
518 if (__CFStringIsBeingInitialized2) return kCFStringEncodingASCII;
519 __CFStringIsBeingInitialized2 = true;
520
521 Boolean useAscii = true;
522 __CFStringIsBeingInitialized2 = false;
523 if (useAscii) {
524 __CFDefaultEightBitStringEncoding = kCFStringEncodingASCII;
525 } else {
526 if (__CFDefaultEightBitStringEncoding == kCFStringEncodingInvalidId) {
527 CFStringEncoding systemEncoding = CFStringGetSystemEncoding();
528 if (systemEncoding == kCFStringEncodingInvalidId) { // We're right in the middle of querying system encoding from default database. Delaying to set until system encoding is determined.
529 return kCFStringEncodingASCII;
530 } else if (__CFStrEncodingCanBeStoredInEightBit(systemEncoding)) {
531 __CFDefaultEightBitStringEncoding = systemEncoding;
532 } else {
533 __CFDefaultEightBitStringEncoding = kCFStringEncodingASCII;
534 }
535 }
536 }
537 return __CFDefaultEightBitStringEncoding;
538 }
539
540 /* Returns whether the provided bytes can be stored in ASCII
541 */
542 CF_INLINE Boolean __CFBytesInASCII(const uint8_t *bytes, CFIndex len) {
543 #if __LP64__
544 /* A bit of unrolling; go by 32s, 16s, and 8s first */
545 while (len >= 32) {
546 uint64_t val = *(const uint64_t *)bytes;
547 uint64_t hiBits = (val & 0x8080808080808080ULL); // More efficient to collect this rather than do a conditional at every step
548 bytes += 8;
549 val = *(const uint64_t *)bytes;
550 hiBits |= (val & 0x8080808080808080ULL);
551 bytes += 8;
552 val = *(const uint64_t *)bytes;
553 hiBits |= (val & 0x8080808080808080ULL);
554 bytes += 8;
555 val = *(const uint64_t *)bytes;
556 if (hiBits | (val & 0x8080808080808080ULL)) return false;
557 bytes += 8;
558 len -= 32;
559 }
560
561 while (len >= 16) {
562 uint64_t val = *(const uint64_t *)bytes;
563 uint64_t hiBits = (val & 0x8080808080808080ULL);
564 bytes += 8;
565 val = *(const uint64_t *)bytes;
566 if (hiBits | (val & 0x8080808080808080ULL)) return false;
567 bytes += 8;
568 len -= 16;
569 }
570
571 while (len >= 8) {
572 uint64_t val = *(const uint64_t *)bytes;
573 if (val & 0x8080808080808080ULL) return false;
574 bytes += 8;
575 len -= 8;
576 }
577 #endif
578 /* Go by 4s */
579 while (len >= 4) {
580 uint32_t val = *(const uint32_t *)bytes;
581 if (val & 0x80808080U) return false;
582 bytes += 4;
583 len -= 4;
584 }
585 /* Handle the rest one byte at a time */
586 while (len--) {
587 if (*bytes++ & 0x80) return false;
588 }
589
590 return true;
591 }
592
593 /* Returns whether the provided 8-bit string in the specified encoding can be stored in an 8-bit CFString.
594 */
595 CF_INLINE Boolean __CFCanUseEightBitCFStringForBytes(const uint8_t *bytes, CFIndex len, CFStringEncoding encoding) {
596 // If the encoding is the same as the 8-bit CFString encoding, we can just use the bytes as-is.
597 // One exception is ASCII, which unfortunately needs to mean ISOLatin1 for compatibility reasons <rdar://problem/5458321>.
598 if (encoding == __CFStringGetEightBitStringEncoding() && encoding != kCFStringEncodingASCII) return true;
599 if (__CFStringEncodingIsSupersetOfASCII(encoding) && __CFBytesInASCII(bytes, len)) return true;
600 return false;
601 }
602
603
604 /* Returns whether a length byte can be tacked on to a string of the indicated length.
605 */
606 CF_INLINE Boolean __CFCanUseLengthByte(CFIndex len) {
607 #define __kCFMaxPascalStrLen 255
608 return (len <= __kCFMaxPascalStrLen) ? true : false;
609 }
610
611 /* Various string assertions
612 */
613 #define __CFAssertIsString(cf) __CFGenericValidateType(cf, __kCFStringTypeID)
614 #define __CFAssertIndexIsInStringBounds(cf, idx) CFAssert3((idx) >= 0 && (idx) < __CFStrLength(cf), __kCFLogAssertion, "%s(): string index %d out of bounds (length %d)", __PRETTY_FUNCTION__, idx, __CFStrLength(cf))
615 #define __CFAssertRangeIsInStringBounds(cf, idx, count) CFAssert4((idx) >= 0 && (idx + count) <= __CFStrLength(cf), __kCFLogAssertion, "%s(): string range %d,%d out of bounds (length %d)", __PRETTY_FUNCTION__, idx, count, __CFStrLength(cf))
616 #define __CFAssertIsStringAndMutable(cf) {__CFGenericValidateType(cf, __kCFStringTypeID); CFAssert1(__CFStrIsMutable(cf), __kCFLogAssertion, "%s(): string not mutable", __PRETTY_FUNCTION__);}
617 #define __CFAssertIsStringAndExternalMutable(cf) {__CFGenericValidateType(cf, __kCFStringTypeID); CFAssert1(__CFStrIsMutable(cf) && __CFStrIsExternalMutable(cf), __kCFLogAssertion, "%s(): string not external mutable", __PRETTY_FUNCTION__);}
618 #define __CFAssertIsNotNegative(idx) CFAssert2(idx >= 0, __kCFLogAssertion, "%s(): index %d is negative", __PRETTY_FUNCTION__, idx)
619 #define __CFAssertIfFixedLengthIsOK(cf, reqLen) CFAssert2(!__CFStrIsFixed(cf) || (reqLen <= __CFStrDesiredCapacity(cf)), __kCFLogAssertion, "%s(): length %d too large", __PRETTY_FUNCTION__, reqLen)
620
621
622 /* Basic algorithm is to shrink memory when capacity is SHRINKFACTOR times the required capacity or to allocate memory when the capacity is less than GROWFACTOR times the required capacity. This function will return -1 if the new capacity is just too big (> LONG_MAX).
623 Additional complications are applied in the following order:
624 - desiredCapacity, which is the minimum (except initially things can be at zero)
625 - rounding up to factor of 8
626 - compressing (to fit the number if 16 bits), which effectively rounds up to factor of 256
627 - we need to make sure GROWFACTOR computation doesn't suffer from overflow issues on 32-bit, hence the casting to unsigned. Normally for required capacity of C bytes, the allocated space is (3C+1)/2. If C > ULONG_MAX/3, we instead simply return LONG_MAX
628 */
629 #define SHRINKFACTOR(c) (c / 2)
630
631 #if __LP64__
632 #define GROWFACTOR(c) ((c * 3 + 1) / 2)
633 #else
634 #define GROWFACTOR(c) (((c) >= (ULONG_MAX / 3UL)) ? __CFMax(LONG_MAX - 4095, (c)) : (((unsigned long)c * 3 + 1) / 2))
635 #endif
636
637 CF_INLINE CFIndex __CFStrNewCapacity(CFMutableStringRef str, unsigned long reqCapacity, CFIndex capacity, Boolean leaveExtraRoom, CFIndex charSize) {
638 if (capacity != 0 || reqCapacity != 0) { /* If initially zero, and space not needed, leave it at that... */
639 if ((capacity < reqCapacity) || /* We definitely need the room... */
640 (!__CFStrCapacityProvidedExternally(str) && /* Assuming we control the capacity... */
641 ((reqCapacity < SHRINKFACTOR(capacity)) || /* ...we have too much room! */
642 (!leaveExtraRoom && (reqCapacity < capacity))))) { /* ...we need to eliminate the extra space... */
643 if (reqCapacity > LONG_MAX) return -1; /* Too big any way you cut it */
644 unsigned long newCapacity = leaveExtraRoom ? GROWFACTOR(reqCapacity) : reqCapacity; /* Grow by 3/2 if extra room is desired */
645 CFIndex desiredCapacity = __CFStrDesiredCapacity(str) * charSize;
646 if (newCapacity < desiredCapacity) { /* If less than desired, bump up to desired */
647 newCapacity = desiredCapacity;
648 } else if (__CFStrIsFixed(str)) { /* Otherwise, if fixed, no need to go above the desired (fixed) capacity */
649 newCapacity = __CFMax(desiredCapacity, reqCapacity); /* !!! So, fixed is not really fixed, but "tight" */
650 }
651 if (__CFStrHasContentsAllocator(str)) { /* Also apply any preferred size from the allocator */
652 newCapacity = CFAllocatorGetPreferredSizeForSize(__CFStrContentsAllocator(str), newCapacity, 0);
653 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI
654 } else {
655 newCapacity = malloc_good_size(newCapacity);
656 #endif
657 }
658 return (newCapacity > LONG_MAX) ? -1 : (CFIndex)newCapacity; // If packing: __CFStrUnpackNumber(__CFStrPackNumber(newCapacity));
659 }
660 }
661 return capacity;
662 }
663
664
665 /* rearrangeBlocks() rearranges the blocks of data within the buffer so that they are "evenly spaced". buffer is assumed to have enough room for the result.
666 numBlocks is current total number of blocks within buffer.
667 blockSize is the size of each block in bytes
668 ranges and numRanges hold the ranges that are no longer needed; ranges are stored sorted in increasing order, and don't overlap
669 insertLength is the final spacing between the remaining blocks
670
671 Example: buffer = A B C D E F G H, blockSize = 1, ranges = { (2,1) , (4,2) } (so we want to "delete" C and E F), fromEnd = NO
672 if insertLength = 4, result = A B ? ? ? ? D ? ? ? ? G H
673 if insertLength = 0, result = A B D G H
674
675 Example: buffer = A B C D E F G H I J K L M N O P Q R S T U, blockSize = 1, ranges { (1,1), (3,1), (5,11), (17,1), (19,1) }, fromEnd = NO
676 if insertLength = 3, result = A ? ? ? C ? ? ? E ? ? ? Q ? ? ? S ? ? ? U
677
678 */
679 typedef struct _CFStringDeferredRange {
680 CFIndex beginning;
681 CFIndex length;
682 CFIndex shift;
683 } CFStringDeferredRange;
684
685 typedef struct _CFStringStackInfo {
686 CFIndex capacity; // Capacity (if capacity == count, need to realloc to add another)
687 CFIndex count; // Number of elements actually stored
688 CFStringDeferredRange *stack;
689 Boolean hasMalloced; // Indicates "stack" is allocated and needs to be deallocated when done
690 char _padding[3];
691 } CFStringStackInfo;
692
693 CF_INLINE void pop (CFStringStackInfo *si, CFStringDeferredRange *topRange) {
694 si->count = si->count - 1;
695 *topRange = si->stack[si->count];
696 }
697
698 CF_INLINE void push (CFStringStackInfo *si, const CFStringDeferredRange *newRange) {
699 if (si->count == si->capacity) {
700 // increase size of the stack
701 si->capacity = (si->capacity + 4) * 2;
702 if (si->hasMalloced) {
703 si->stack = (CFStringDeferredRange *)CFAllocatorReallocate(kCFAllocatorSystemDefault, si->stack, si->capacity * sizeof(CFStringDeferredRange), 0);
704 } else {
705 CFStringDeferredRange *newStack = (CFStringDeferredRange *)CFAllocatorAllocate(kCFAllocatorSystemDefault, si->capacity * sizeof(CFStringDeferredRange), 0);
706 memmove(newStack, si->stack, si->count * sizeof(CFStringDeferredRange));
707 si->stack = newStack;
708 si->hasMalloced = true;
709 }
710 }
711 si->stack[si->count] = *newRange;
712 si->count = si->count + 1;
713 }
714
715 static void rearrangeBlocks(
716 uint8_t *buffer,
717 CFIndex numBlocks,
718 CFIndex blockSize,
719 const CFRange *ranges,
720 CFIndex numRanges,
721 CFIndex insertLength) {
722
723 #define origStackSize 10
724 CFStringDeferredRange origStack[origStackSize];
725 CFStringStackInfo si = {origStackSize, 0, origStack, false, {0, 0, 0}};
726 CFStringDeferredRange currentNonRange = {0, 0, 0};
727 CFIndex currentRange = 0;
728 CFIndex amountShifted = 0;
729
730 // must have at least 1 range left.
731
732 while (currentRange < numRanges) {
733 currentNonRange.beginning = (ranges[currentRange].location + ranges[currentRange].length) * blockSize;
734 if ((numRanges - currentRange) == 1) {
735 // at the end.
736 currentNonRange.length = numBlocks * blockSize - currentNonRange.beginning;
737 if (currentNonRange.length == 0) break;
738 } else {
739 currentNonRange.length = (ranges[currentRange + 1].location * blockSize) - currentNonRange.beginning;
740 }
741 currentNonRange.shift = amountShifted + (insertLength * blockSize) - (ranges[currentRange].length * blockSize);
742 amountShifted = currentNonRange.shift;
743 if (amountShifted <= 0) {
744 // process current item and rest of stack
745 if (currentNonRange.shift && currentNonRange.length) memmove (&buffer[currentNonRange.beginning + currentNonRange.shift], &buffer[currentNonRange.beginning], currentNonRange.length);
746 while (si.count > 0) {
747 pop (&si, &currentNonRange); // currentNonRange now equals the top element of the stack.
748 if (currentNonRange.shift && currentNonRange.length) memmove (&buffer[currentNonRange.beginning + currentNonRange.shift], &buffer[currentNonRange.beginning], currentNonRange.length);
749 }
750 } else {
751 // add currentNonRange to stack.
752 push (&si, &currentNonRange);
753 }
754 currentRange++;
755 }
756
757 // no more ranges. if anything is on the stack, process.
758
759 while (si.count > 0) {
760 pop (&si, &currentNonRange); // currentNonRange now equals the top element of the stack.
761 if (currentNonRange.shift && currentNonRange.length) memmove (&buffer[currentNonRange.beginning + currentNonRange.shift], &buffer[currentNonRange.beginning], currentNonRange.length);
762 }
763 if (si.hasMalloced) CFAllocatorDeallocate (kCFAllocatorSystemDefault, si.stack);
764 }
765
766 /* See comments for rearrangeBlocks(); this is the same, but the string is assembled in another buffer (dstBuffer), so the algorithm is much easier. We also take care of the case where the source is not-Unicode but destination is. (The reverse case is not supported.)
767 */
768 static void copyBlocks(
769 const uint8_t *srcBuffer,
770 uint8_t *dstBuffer,
771 CFIndex srcLength,
772 Boolean srcIsUnicode,
773 Boolean dstIsUnicode,
774 const CFRange *ranges,
775 CFIndex numRanges,
776 CFIndex insertLength) {
777
778 CFIndex srcLocationInBytes = 0; // in order to avoid multiplying all the time, this is in terms of bytes, not blocks
779 CFIndex dstLocationInBytes = 0; // ditto
780 CFIndex srcBlockSize = srcIsUnicode ? sizeof(UniChar) : sizeof(uint8_t);
781 CFIndex insertLengthInBytes = insertLength * (dstIsUnicode ? sizeof(UniChar) : sizeof(uint8_t));
782 CFIndex rangeIndex = 0;
783 CFIndex srcToDstMultiplier = (srcIsUnicode == dstIsUnicode) ? 1 : (sizeof(UniChar) / sizeof(uint8_t));
784
785 // Loop over the ranges, copying the range to be preserved (right before each range)
786 while (rangeIndex < numRanges) {
787 CFIndex srcLengthInBytes = ranges[rangeIndex].location * srcBlockSize - srcLocationInBytes; // srcLengthInBytes is in terms of bytes, not blocks; represents length of region to be preserved
788 if (srcLengthInBytes > 0) {
789 if (srcIsUnicode == dstIsUnicode) {
790 memmove(dstBuffer + dstLocationInBytes, srcBuffer + srcLocationInBytes, srcLengthInBytes);
791 } else {
792 __CFStrConvertBytesToUnicode(srcBuffer + srcLocationInBytes, (UniChar *)(dstBuffer + dstLocationInBytes), srcLengthInBytes);
793 }
794 }
795 srcLocationInBytes += srcLengthInBytes + ranges[rangeIndex].length * srcBlockSize; // Skip over the just-copied and to-be-deleted stuff
796 dstLocationInBytes += srcLengthInBytes * srcToDstMultiplier + insertLengthInBytes;
797 rangeIndex++;
798 }
799
800 // Do last range (the one beyond last range)
801 if (srcLocationInBytes < srcLength * srcBlockSize) {
802 if (srcIsUnicode == dstIsUnicode) {
803 memmove(dstBuffer + dstLocationInBytes, srcBuffer + srcLocationInBytes, srcLength * srcBlockSize - srcLocationInBytes);
804 } else {
805 __CFStrConvertBytesToUnicode(srcBuffer + srcLocationInBytes, (UniChar *)(dstBuffer + dstLocationInBytes), srcLength * srcBlockSize - srcLocationInBytes);
806 }
807 }
808 }
809
810 /* Call the callback; if it doesn't exist or returns false, then log
811 */
812 static void __CFStringHandleOutOfMemory(CFTypeRef obj) {
813 CFStringRef msg = CFSTR("Out of memory. We suggest restarting the application. If you have an unsaved document, create a backup copy in Finder, then try to save.");
814 {
815 CFLog(kCFLogLevelCritical, CFSTR("%@"), msg);
816 }
817 }
818
819 /* Reallocates the backing store of the string to accomodate the new length. Space is reserved or characters are deleted as indicated by insertLength and the ranges in deleteRanges. The length is updated to reflect the new state. Will also maintain a length byte and a null byte in 8-bit strings. If length cannot fit in length byte, the space will still be reserved, but will be 0. (Hence the reason the length byte should never be looked at as length unless there is no explicit length.)
820 */
821 static void __CFStringChangeSizeMultiple(CFMutableStringRef str, const CFRange *deleteRanges, CFIndex numDeleteRanges, CFIndex insertLength, Boolean makeUnicode) {
822 const uint8_t *curContents = (uint8_t *)__CFStrContents(str);
823 CFIndex curLength = curContents ? __CFStrLength2(str, curContents) : 0;
824 unsigned long newLength; // We use unsigned to better keep track of overflow
825
826 // Compute new length of the string
827 if (numDeleteRanges == 1) {
828 newLength = curLength + insertLength - deleteRanges[0].length;
829 } else {
830 CFIndex cnt;
831 newLength = curLength + insertLength * numDeleteRanges;
832 for (cnt = 0; cnt < numDeleteRanges; cnt++) newLength -= deleteRanges[cnt].length;
833 }
834
835 __CFAssertIfFixedLengthIsOK(str, newLength);
836
837 if (newLength == 0) {
838 // An somewhat optimized code-path for this special case, with the following implicit values:
839 // newIsUnicode = false
840 // useLengthAndNullBytes = false
841 // newCharSize = sizeof(uint8_t)
842 // If the newCapacity happens to be the same as the old, we don't free the buffer; otherwise we just free it totally
843 // instead of doing a potentially useless reallocation (as the needed capacity later might turn out to be different anyway)
844 CFIndex curCapacity = __CFStrCapacity(str);
845 CFIndex newCapacity = __CFStrNewCapacity(str, 0, curCapacity, true, sizeof(uint8_t));
846 if (newCapacity != curCapacity) { // If we're reallocing anyway (larger or smaller --- larger could happen if desired capacity was changed in the meantime), let's just free it all
847 if (curContents) __CFStrDeallocateMutableContents(str, (uint8_t *)curContents);
848 __CFStrSetContentPtr(str, NULL);
849 __CFStrSetCapacity(str, 0);
850 __CFStrClearCapacityProvidedExternally(str);
851 __CFStrClearHasLengthAndNullBytes(str);
852 if (!__CFStrIsExternalMutable(str)) __CFStrClearUnicode(str); // External mutable implies Unicode
853 } else {
854 if (!__CFStrIsExternalMutable(str)) {
855 __CFStrClearUnicode(str);
856 if (curCapacity >= (int)(sizeof(uint8_t) * 2)) { // If there's room
857 __CFStrSetHasLengthAndNullBytes(str);
858 ((uint8_t *)curContents)[0] = ((uint8_t *)curContents)[1] = 0;
859 } else {
860 __CFStrClearHasLengthAndNullBytes(str);
861 }
862 }
863 }
864 __CFStrSetExplicitLength(str, 0);
865 } else { /* This else-clause assumes newLength > 0 */
866 Boolean oldIsUnicode = __CFStrIsUnicode(str);
867 Boolean newIsUnicode = makeUnicode || (oldIsUnicode /* && (newLength > 0) - implicit */ ) || __CFStrIsExternalMutable(str);
868 CFIndex newCharSize = newIsUnicode ? sizeof(UniChar) : sizeof(uint8_t);
869 Boolean useLengthAndNullBytes = !newIsUnicode /* && (newLength > 0) - implicit */;
870 CFIndex numExtraBytes = useLengthAndNullBytes ? 2 : 0; /* 2 extra bytes to keep the length byte & null... */
871 CFIndex curCapacity = __CFStrCapacity(str);
872 if (newLength > (LONG_MAX - numExtraBytes) / newCharSize) __CFStringHandleOutOfMemory(str); // Does not return
873 CFIndex newCapacity = __CFStrNewCapacity(str, newLength * newCharSize + numExtraBytes, curCapacity, true, newCharSize);
874 if (newCapacity == -1) __CFStringHandleOutOfMemory(str); // Does not return
875 Boolean allocNewBuffer = (newCapacity != curCapacity) || (curLength > 0 && !oldIsUnicode && newIsUnicode); /* We alloc new buffer if oldIsUnicode != newIsUnicode because the contents have to be copied */
876 uint8_t *newContents;
877 if (allocNewBuffer) {
878 newContents = (uint8_t *)__CFStrAllocateMutableContents(str, newCapacity);
879 if (!newContents) { // Try allocating without extra room
880 newCapacity = __CFStrNewCapacity(str, newLength * newCharSize + numExtraBytes, curCapacity, false, newCharSize);
881 // Since we checked for this above, it shouldn't be the case here, but just in case
882 if (newCapacity == -1) __CFStringHandleOutOfMemory(str); // Does not return
883 newContents = (uint8_t *)__CFStrAllocateMutableContents(str, newCapacity);
884 if (!newContents) __CFStringHandleOutOfMemory(str); // Does not return
885 }
886 } else {
887 newContents = (uint8_t *)curContents;
888 }
889
890 Boolean hasLengthAndNullBytes = __CFStrHasLengthByte(str);
891
892 CFAssert1(hasLengthAndNullBytes == __CFStrHasNullByte(str), __kCFLogAssertion, "%s(): Invalid state in 8-bit string", __PRETTY_FUNCTION__);
893
894 // Calculate pointers to the actual string content (skipping over the length byte, if present). Note that keeping a reference to the base is needed for newContents under GC, since the copy may take a long time.
895 const uint8_t *curContentsBody = hasLengthAndNullBytes ? (curContents+1) : curContents;
896 uint8_t *newContentsBody = useLengthAndNullBytes ? (newContents+1) : newContents;
897
898 if (curContents) {
899 if (oldIsUnicode == newIsUnicode) {
900 if (newContentsBody == curContentsBody) {
901 rearrangeBlocks(newContentsBody, curLength, newCharSize, deleteRanges, numDeleteRanges, insertLength);
902 } else {
903 copyBlocks(curContentsBody, newContentsBody, curLength, oldIsUnicode, newIsUnicode, deleteRanges, numDeleteRanges, insertLength);
904 }
905 } else if (newIsUnicode) { /* this implies we have a new buffer */
906 copyBlocks(curContentsBody, newContentsBody, curLength, oldIsUnicode, newIsUnicode, deleteRanges, numDeleteRanges, insertLength);
907 }
908 if (allocNewBuffer && __CFStrFreeContentsWhenDone(str)) __CFStrDeallocateMutableContents(str, (void *)curContents);
909 }
910
911 if (!newIsUnicode) {
912 if (useLengthAndNullBytes) {
913 newContentsBody[newLength] = 0; /* Always have null byte, if not unicode */
914 newContents[0] = __CFCanUseLengthByte(newLength) ? (uint8_t)newLength : 0;
915 if (!hasLengthAndNullBytes) __CFStrSetHasLengthAndNullBytes(str);
916 } else {
917 if (hasLengthAndNullBytes) __CFStrClearHasLengthAndNullBytes(str);
918 }
919 if (oldIsUnicode) __CFStrClearUnicode(str);
920 } else { // New is unicode...
921 if (!oldIsUnicode) __CFStrSetUnicode(str);
922 if (hasLengthAndNullBytes) __CFStrClearHasLengthAndNullBytes(str);
923 }
924 __CFStrSetExplicitLength(str, newLength);
925
926 if (allocNewBuffer) {
927 __CFStrSetCapacity(str, newCapacity);
928 __CFStrClearCapacityProvidedExternally(str);
929 __CFStrSetContentPtr(str, newContents);
930 }
931 }
932 }
933
934 /* Same as above, but takes one range (very common case)
935 */
936 CF_INLINE void __CFStringChangeSize(CFMutableStringRef str, CFRange range, CFIndex insertLength, Boolean makeUnicode) {
937 __CFStringChangeSizeMultiple(str, &range, 1, insertLength, makeUnicode);
938 }
939
940
941 #if defined(DEBUG)
942 static Boolean __CFStrIsConstantString(CFStringRef str);
943 #endif
944
945 static void __CFStringDeallocate(CFTypeRef cf) {
946 CFStringRef str = (CFStringRef)cf;
947
948 // If in DEBUG mode, check to see if the string a CFSTR, and complain.
949 CFAssert1(__CFConstantStringTableBeingFreed || !__CFStrIsConstantString((CFStringRef)cf), __kCFLogAssertion, "Tried to deallocate CFSTR(\"%@\")", str);
950
951 if (!__CFStrIsInline(str)) {
952 uint8_t *contents;
953 Boolean isMutable = __CFStrIsMutable(str);
954 if (__CFStrFreeContentsWhenDone(str) && (contents = (uint8_t *)__CFStrContents(str))) {
955 if (isMutable) {
956 __CFStrDeallocateMutableContents((CFMutableStringRef)str, contents);
957 } else {
958 if (__CFStrHasContentsDeallocator(str)) {
959 CFAllocatorRef allocator = __CFStrContentsDeallocator(str);
960 CFAllocatorDeallocate(allocator, contents);
961 if (!(0 || 0 )) CFRelease(allocator);
962 } else {
963 CFAllocatorRef alloc = __CFGetAllocator(str);
964 CFAllocatorDeallocate(alloc, contents);
965 }
966 }
967 }
968 if (isMutable && __CFStrHasContentsAllocator(str)) {
969 CFAllocatorRef allocator = __CFStrContentsAllocator((CFMutableStringRef)str);
970 if (!(0 || 0)) CFRelease(allocator);
971 }
972 }
973 }
974
975 static Boolean __CFStringEqual(CFTypeRef cf1, CFTypeRef cf2) {
976 CFStringRef str1 = (CFStringRef)cf1;
977 CFStringRef str2 = (CFStringRef)cf2;
978 const uint8_t *contents1;
979 const uint8_t *contents2;
980 CFIndex len1;
981
982 /* !!! We do not need IsString assertions, as the CFBase runtime assures this */
983 /* !!! We do not need == test, as the CFBase runtime assures this */
984
985 contents1 = (uint8_t *)__CFStrContents(str1);
986 contents2 = (uint8_t *)__CFStrContents(str2);
987 len1 = __CFStrLength2(str1, contents1);
988
989 if (len1 != __CFStrLength2(str2, contents2)) return false;
990
991 contents1 += __CFStrSkipAnyLengthByte(str1);
992 contents2 += __CFStrSkipAnyLengthByte(str2);
993
994 if (__CFStrIsEightBit(str1) && __CFStrIsEightBit(str2)) {
995 return memcmp((const char *)contents1, (const char *)contents2, len1) ? false : true;
996 } else if (__CFStrIsEightBit(str1)) { /* One string has Unicode contents */
997 CFStringInlineBuffer buf;
998 CFIndex buf_idx = 0;
999
1000 CFStringInitInlineBuffer(str1, &buf, CFRangeMake(0, len1));
1001 for (buf_idx = 0; buf_idx < len1; buf_idx++) {
1002 if (__CFStringGetCharacterFromInlineBufferQuick(&buf, buf_idx) != ((UniChar *)contents2)[buf_idx]) return false;
1003 }
1004 } else if (__CFStrIsEightBit(str2)) { /* One string has Unicode contents */
1005 CFStringInlineBuffer buf;
1006 CFIndex buf_idx = 0;
1007
1008 CFStringInitInlineBuffer(str2, &buf, CFRangeMake(0, len1));
1009 for (buf_idx = 0; buf_idx < len1; buf_idx++) {
1010 if (__CFStringGetCharacterFromInlineBufferQuick(&buf, buf_idx) != ((UniChar *)contents1)[buf_idx]) return false;
1011 }
1012 } else { /* Both strings have Unicode contents */
1013 CFIndex idx;
1014 for (idx = 0; idx < len1; idx++) {
1015 if (((UniChar *)contents1)[idx] != ((UniChar *)contents2)[idx]) return false;
1016 }
1017 }
1018 return true;
1019 }
1020
1021
1022 /* String hashing: Should give the same results whatever the encoding; so we hash UniChars.
1023 If the length is less than or equal to 96, then the hash function is simply the
1024 following (n is the nth UniChar character, starting from 0):
1025
1026 hash(-1) = length
1027 hash(n) = hash(n-1) * 257 + unichar(n);
1028 Hash = hash(length-1) * ((length & 31) + 1)
1029
1030 If the length is greater than 96, then the above algorithm applies to
1031 characters 0..31, (length/2)-16..(length/2)+15, and length-32..length-1, inclusive;
1032 thus the first, middle, and last 32 characters.
1033
1034 Note that the loops below are unrolled; and: 257^2 = 66049; 257^3 = 16974593; 257^4 = 4362470401; 67503105 is 257^4 - 256^4
1035 If hashcode is changed from UInt32 to something else, this last piece needs to be readjusted.
1036 !!! We haven't updated for LP64 yet
1037
1038 NOTE: The hash algorithm used to be duplicated in CF and Foundation; but now it should only be in the four functions below.
1039
1040 Hash function was changed between Panther and Tiger, and Tiger and Leopard.
1041 */
1042 #define HashEverythingLimit 96
1043
1044 #define HashNextFourUniChars(accessStart, accessEnd, pointer) \
1045 {result = result * 67503105 + (accessStart 0 accessEnd) * 16974593 + (accessStart 1 accessEnd) * 66049 + (accessStart 2 accessEnd) * 257 + (accessStart 3 accessEnd); pointer += 4;}
1046
1047 #define HashNextUniChar(accessStart, accessEnd, pointer) \
1048 {result = result * 257 + (accessStart 0 accessEnd); pointer++;}
1049
1050
1051 /* In this function, actualLen is the length of the original string; but len is the number of characters in buffer. The buffer is expected to contain the parts of the string relevant to hashing.
1052 */
1053 CF_INLINE CFHashCode __CFStrHashCharacters(const UniChar *uContents, CFIndex len, CFIndex actualLen) {
1054 CFHashCode result = actualLen;
1055 if (len <= HashEverythingLimit) {
1056 const UniChar *end4 = uContents + (len & ~3);
1057 const UniChar *end = uContents + len;
1058 while (uContents < end4) HashNextFourUniChars(uContents[, ], uContents); // First count in fours
1059 while (uContents < end) HashNextUniChar(uContents[, ], uContents); // Then for the last <4 chars, count in ones...
1060 } else {
1061 const UniChar *contents, *end;
1062 contents = uContents;
1063 end = contents + 32;
1064 while (contents < end) HashNextFourUniChars(contents[, ], contents);
1065 contents = uContents + (len >> 1) - 16;
1066 end = contents + 32;
1067 while (contents < end) HashNextFourUniChars(contents[, ], contents);
1068 end = uContents + len;
1069 contents = end - 32;
1070 while (contents < end) HashNextFourUniChars(contents[, ], contents);
1071 }
1072 return result + (result << (actualLen & 31));
1073 }
1074
1075 /* This hashes cString in the eight bit string encoding. It also includes the little debug-time sanity check.
1076 */
1077 CF_INLINE CFHashCode __CFStrHashEightBit(const uint8_t *cContents, CFIndex len) {
1078 #if defined(DEBUG)
1079 if (!__CFCharToUniCharFunc) { // A little sanity verification: If this is not set, trying to hash high byte chars would be a bad idea
1080 CFIndex cnt;
1081 Boolean err = false;
1082 if (len <= HashEverythingLimit) {
1083 for (cnt = 0; cnt < len; cnt++) if (cContents[cnt] >= 128) err = true;
1084 } else {
1085 for (cnt = 0; cnt < 32; cnt++) if (cContents[cnt] >= 128) err = true;
1086 for (cnt = (len >> 1) - 16; cnt < (len >> 1) + 16; cnt++) if (cContents[cnt] >= 128) err = true;
1087 for (cnt = (len - 32); cnt < len; cnt++) if (cContents[cnt] >= 128) err = true;
1088 }
1089 if (err) {
1090 // Can't do log here, as it might be too early
1091 fprintf(stderr, "Warning: CFHash() attempting to hash CFString containing high bytes before properly initialized to do so\n");
1092 }
1093 }
1094 #endif
1095 CFHashCode result = len;
1096 if (len <= HashEverythingLimit) {
1097 const uint8_t *end4 = cContents + (len & ~3);
1098 const uint8_t *end = cContents + len;
1099 while (cContents < end4) HashNextFourUniChars(__CFCharToUniCharTable[cContents[, ]], cContents); // First count in fours
1100 while (cContents < end) HashNextUniChar(__CFCharToUniCharTable[cContents[, ]], cContents); // Then for the last <4 chars, count in ones...
1101 } else {
1102 const uint8_t *contents, *end;
1103 contents = cContents;
1104 end = contents + 32;
1105 while (contents < end) HashNextFourUniChars(__CFCharToUniCharTable[contents[, ]], contents);
1106 contents = cContents + (len >> 1) - 16;
1107 end = contents + 32;
1108 while (contents < end) HashNextFourUniChars(__CFCharToUniCharTable[contents[, ]], contents);
1109 end = cContents + len;
1110 contents = end - 32;
1111 while (contents < end) HashNextFourUniChars(__CFCharToUniCharTable[contents[, ]], contents);
1112 }
1113 return result + (result << (len & 31));
1114 }
1115
1116 CFHashCode CFStringHashISOLatin1CString(const uint8_t *bytes, CFIndex len) {
1117 CFHashCode result = len;
1118 if (len <= HashEverythingLimit) {
1119 const uint8_t *end4 = bytes + (len & ~3);
1120 const uint8_t *end = bytes + len;
1121 while (bytes < end4) HashNextFourUniChars(bytes[, ], bytes); // First count in fours
1122 while (bytes < end) HashNextUniChar(bytes[, ], bytes); // Then for the last <4 chars, count in ones...
1123 } else {
1124 const uint8_t *contents, *end;
1125 contents = bytes;
1126 end = contents + 32;
1127 while (contents < end) HashNextFourUniChars(contents[, ], contents);
1128 contents = bytes + (len >> 1) - 16;
1129 end = contents + 32;
1130 while (contents < end) HashNextFourUniChars(contents[, ], contents);
1131 end = bytes + len;
1132 contents = end - 32;
1133 while (contents < end) HashNextFourUniChars(contents[, ], contents);
1134 }
1135 return result + (result << (len & 31));
1136 }
1137
1138 CFHashCode CFStringHashCString(const uint8_t *bytes, CFIndex len) {
1139 return __CFStrHashEightBit(bytes, len);
1140 }
1141
1142 CFHashCode CFStringHashCharacters(const UniChar *characters, CFIndex len) {
1143 return __CFStrHashCharacters(characters, len, len);
1144 }
1145
1146 /* This is meant to be called from NSString or subclassers only. It is an error for this to be called without the ObjC runtime or an argument which is not an NSString or subclass. It can be called with NSCFString, although that would be inefficient (causing indirection) and won't normally happen anyway, as NSCFString overrides hash.
1147 */
1148 CFHashCode CFStringHashNSString(CFStringRef str) {
1149 UniChar buffer[HashEverythingLimit];
1150 CFIndex bufLen; // Number of characters in the buffer for hashing
1151 CFIndex len = 0; // Actual length of the string
1152
1153 len = CF_OBJC_CALLV((NSString *)str, length);
1154 if (len <= HashEverythingLimit) {
1155 (void)CF_OBJC_CALLV((NSString *)str, getCharacters:buffer range:NSMakeRange(0, len));
1156 bufLen = len;
1157 } else {
1158 (void)CF_OBJC_CALLV((NSString *)str, getCharacters:buffer range:NSMakeRange(0, 32));
1159 (void)CF_OBJC_CALLV((NSString *)str, getCharacters:buffer+32 range:NSMakeRange((len >> 1) - 16, 32));
1160 (void)CF_OBJC_CALLV((NSString *)str, getCharacters:buffer+64 range:NSMakeRange(len - 32, 32));
1161 bufLen = HashEverythingLimit;
1162 }
1163 return __CFStrHashCharacters(buffer, bufLen, len);
1164 }
1165
1166 CFHashCode __CFStringHash(CFTypeRef cf) {
1167 /* !!! We do not need an IsString assertion here, as this is called by the CFBase runtime only */
1168 CFStringRef str = (CFStringRef)cf;
1169 const uint8_t *contents = (uint8_t *)__CFStrContents(str);
1170 CFIndex len = __CFStrLength2(str, contents);
1171
1172 if (__CFStrIsEightBit(str)) {
1173 contents += __CFStrSkipAnyLengthByte(str);
1174 return __CFStrHashEightBit(contents, len);
1175 } else {
1176 return __CFStrHashCharacters((const UniChar *)contents, len, len);
1177 }
1178 }
1179
1180
1181 static CFStringRef __CFStringCopyDescription(CFTypeRef cf) {
1182 return CFStringCreateWithFormat(kCFAllocatorSystemDefault, NULL, CFSTR("<CFString %p [%p]>{contents = \"%@\"}"), cf, __CFGetAllocator(cf), cf);
1183 }
1184
1185 static CFStringRef __CFStringCopyFormattingDescription(CFTypeRef cf, CFDictionaryRef formatOptions) {
1186 return (CFStringRef)CFStringCreateCopy(__CFGetAllocator(cf), (CFStringRef)cf);
1187 }
1188
1189 static CFTypeID __kCFStringTypeID = _kCFRuntimeNotATypeID;
1190
1191 typedef CFTypeRef (*CF_STRING_CREATE_COPY)(CFAllocatorRef alloc, CFTypeRef theString);
1192
1193 static const CFRuntimeClass __CFStringClass = {
1194 _kCFRuntimeScannedObject,
1195 "CFString",
1196 NULL, // init
1197 (CF_STRING_CREATE_COPY)CFStringCreateCopy,
1198 __CFStringDeallocate,
1199 __CFStringEqual,
1200 __CFStringHash,
1201 __CFStringCopyFormattingDescription,
1202 __CFStringCopyDescription
1203 };
1204
1205 CF_PRIVATE void __CFStringInitialize(void) {
1206 __kCFStringTypeID = _CFRuntimeRegisterClass(&__CFStringClass);
1207 }
1208
1209 CFTypeID CFStringGetTypeID(void) {
1210 return __kCFStringTypeID;
1211 }
1212
1213
1214 static Boolean CFStrIsUnicode(CFStringRef str) {
1215 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, Boolean, (NSString *)str, _encodingCantBeStoredInEightBitCFString);
1216 return __CFStrIsUnicode(str);
1217 }
1218
1219
1220
1221 #define ALLOCATORSFREEFUNC ((CFAllocatorRef)-1)
1222
1223 /* contentsDeallocator indicates how to free the data if it's noCopy == true:
1224 kCFAllocatorNull: don't free
1225 ALLOCATORSFREEFUNC: free with main allocator's free func (don't pass in the real func ptr here)
1226 NULL: default allocator
1227 otherwise it's the allocator that should be used (it will be explicitly stored)
1228 if noCopy == false, then freeFunc should be ALLOCATORSFREEFUNC
1229 hasLengthByte, hasNullByte: refers to bytes; used only if encoding != Unicode
1230 possiblyExternalFormat indicates that the bytes might have BOM and be swapped
1231 tryToReduceUnicode means that the Unicode should be checked to see if it contains just ASCII (and reduce it if so)
1232 numBytes contains the actual number of bytes in "bytes", including Length byte,
1233 BUT not the NULL byte at the end
1234 bytes should not contain BOM characters
1235 !!! Various flags should be combined to reduce number of arguments, if possible
1236 */
1237 CF_PRIVATE CFStringRef __CFStringCreateImmutableFunnel3(
1238 CFAllocatorRef alloc, const void *bytes, CFIndex numBytes, CFStringEncoding encoding,
1239 Boolean possiblyExternalFormat, Boolean tryToReduceUnicode, Boolean hasLengthByte, Boolean hasNullByte, Boolean noCopy,
1240 CFAllocatorRef contentsDeallocator, UInt32 converterFlags) {
1241
1242 CFMutableStringRef str;
1243 CFVarWidthCharBuffer vBuf;
1244 CFIndex size;
1245 Boolean useLengthByte = false;
1246 Boolean useNullByte = false;
1247 Boolean useInlineData = false;
1248
1249 #if INSTRUMENT_SHARED_STRINGS
1250 const char *recordedEncoding;
1251 char encodingBuffer[128];
1252 if (encoding == kCFStringEncodingUnicode) recordedEncoding = "Unicode";
1253 else if (encoding == kCFStringEncodingASCII) recordedEncoding = "ASCII";
1254 else if (encoding == kCFStringEncodingUTF8) recordedEncoding = "UTF8";
1255 else if (encoding == kCFStringEncodingMacRoman) recordedEncoding = "MacRoman";
1256 else {
1257 snprintf(encodingBuffer, sizeof(encodingBuffer), "0x%lX", (unsigned long)encoding);
1258 recordedEncoding = encodingBuffer;
1259 }
1260 #endif
1261
1262 if (alloc == NULL) alloc = __CFGetDefaultAllocator();
1263
1264 if (contentsDeallocator == ALLOCATORSFREEFUNC) {
1265 contentsDeallocator = alloc;
1266 } else if (contentsDeallocator == NULL) {
1267 contentsDeallocator = __CFGetDefaultAllocator();
1268 }
1269
1270 if ((NULL != kCFEmptyString) && (numBytes == 0) && _CFAllocatorIsSystemDefault(alloc)) { // If we are using the system default allocator, and the string is empty, then use the empty string!
1271 if (noCopy && (contentsDeallocator != kCFAllocatorNull)) { // See 2365208... This change was done after Sonata; before we didn't free the bytes at all (leak).
1272 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes);
1273 }
1274 return (CFStringRef)CFRetain(kCFEmptyString); // Quick exit; won't catch all empty strings, but most
1275 }
1276
1277 // At this point, contentsDeallocator is either same as alloc, or kCFAllocatorNull, or something else, but not NULL
1278
1279 vBuf.shouldFreeChars = false; // We use this to remember to free the buffer possibly allocated by decode
1280
1281 // Record whether we're starting out with an ASCII-superset string, because we need to know this later for the string ROM; this may get changed later if we successfully convert down from Unicode. We only record this once because __CFCanUseEightBitCFStringForBytes() can be expensive.
1282 Boolean stringSupportsEightBitCFRepresentation = encoding != kCFStringEncodingUnicode && __CFCanUseEightBitCFStringForBytes((const uint8_t *)bytes, numBytes, encoding);
1283
1284 // We may also change noCopy within this function if we have to decode the string into an external buffer. We do not want to avoid the use of the string ROM merely because we tried to be efficient and reuse the decoded buffer for the CFString's external storage. Therefore, we use this variable to track whether we actually can ignore the noCopy flag (which may or may not be set anyways).
1285 Boolean stringROMShouldIgnoreNoCopy = false;
1286
1287 // First check to see if the data needs to be converted...
1288 // ??? We could be more efficient here and in some cases (Unicode data) eliminate a copy
1289
1290 if ((encoding == kCFStringEncodingUnicode && possiblyExternalFormat) || (encoding != kCFStringEncodingUnicode && ! stringSupportsEightBitCFRepresentation)) {
1291 const void *realBytes = (uint8_t *) bytes + (hasLengthByte ? 1 : 0);
1292 CFIndex realNumBytes = numBytes - (hasLengthByte ? 1 : 0);
1293 Boolean usingPassedInMemory = false;
1294
1295 vBuf.allocator = kCFAllocatorSystemDefault; // We don't want to use client's allocator for temp stuff
1296 vBuf.chars.unicode = NULL; // This will cause the decode function to allocate memory if necessary
1297
1298 if (!__CFStringDecodeByteStream3((const uint8_t *)realBytes, realNumBytes, encoding, false, &vBuf, &usingPassedInMemory, converterFlags)) {
1299 // Note that if the string can't be created, we don't free the buffer, even if there is a contents deallocator. This is on purpose.
1300 return NULL;
1301 }
1302
1303 encoding = vBuf.isASCII ? kCFStringEncodingASCII : kCFStringEncodingUnicode;
1304
1305 // Update our flag according to whether the decoded buffer is ASCII
1306 stringSupportsEightBitCFRepresentation = vBuf.isASCII;
1307
1308 if (!usingPassedInMemory) {
1309
1310 // Because __CFStringDecodeByteStream3() allocated our buffer, it's OK for us to free it if we can get the string from the ROM.
1311 stringROMShouldIgnoreNoCopy = true;
1312
1313 // Make the parameters fit the new situation
1314 numBytes = vBuf.isASCII ? vBuf.numChars : (vBuf.numChars * sizeof(UniChar));
1315 hasLengthByte = hasNullByte = false;
1316
1317 // Get rid of the original buffer if its not being used
1318 if (noCopy && (contentsDeallocator != kCFAllocatorNull)) {
1319 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes);
1320 }
1321 contentsDeallocator = alloc; // At this point we are using the string's allocator, as the original buffer is gone...
1322
1323 // See if we can reuse any storage the decode func might have allocated
1324 // We do this only for Unicode, as otherwise we would not have NULL and Length bytes
1325
1326 if (vBuf.shouldFreeChars && (alloc == vBuf.allocator) && encoding == kCFStringEncodingUnicode) {
1327 vBuf.shouldFreeChars = false; // Transferring ownership to the CFString
1328 bytes = CFAllocatorReallocate(vBuf.allocator, (void *)vBuf.chars.unicode, numBytes, 0); // Tighten up the storage
1329 noCopy = true;
1330 #if INSTRUMENT_SHARED_STRINGS
1331 if (encoding == kCFStringEncodingASCII) recordedEncoding = "ForeignASCII-NoCopy";
1332 else recordedEncoding = "ForeignUnicode-NoCopy";
1333 #endif
1334 } else {
1335 #if INSTRUMENT_SHARED_STRINGS
1336 if (encoding == kCFStringEncodingASCII) recordedEncoding = "ForeignASCII-Copy";
1337 else recordedEncoding = "ForeignUnicode-Copy";
1338 #endif
1339 bytes = vBuf.chars.unicode;
1340 noCopy = false; // Can't do noCopy anymore
1341 // If vBuf.shouldFreeChars is true, the buffer will be freed as intended near the end of this func
1342 }
1343
1344 }
1345
1346 // At this point, all necessary input arguments have been changed to reflect the new state
1347
1348 } else if (encoding == kCFStringEncodingUnicode && tryToReduceUnicode) { // Check to see if we can reduce Unicode to ASCII
1349 CFIndex cnt;
1350 CFIndex len = numBytes / sizeof(UniChar);
1351 Boolean allASCII = true;
1352
1353 for (cnt = 0; cnt < len; cnt++) if (((const UniChar *)bytes)[cnt] > 127) {
1354 allASCII = false;
1355 break;
1356 }
1357
1358 if (allASCII) { // Yes we can!
1359 uint8_t *ptr, *mem;
1360 Boolean newHasLengthByte = __CFCanUseLengthByte(len);
1361 numBytes = (len + 1 + (newHasLengthByte ? 1 : 0)) * sizeof(uint8_t); // NULL and possible length byte
1362 // See if we can use that temporary local buffer in vBuf...
1363 if (numBytes >= __kCFVarWidthLocalBufferSize) {
1364 mem = ptr = (uint8_t *)CFAllocatorAllocate(alloc, numBytes, 0);
1365 if (__CFOASafe) __CFSetLastAllocationEventName(mem, "CFString (store)");
1366 } else {
1367 mem = ptr = (uint8_t *)(vBuf.localBuffer);
1368 }
1369 if (mem) { // If we can't allocate memory for some reason, use what we had (that is, as if we didn't have all ASCII)
1370 // Copy the Unicode bytes into the new ASCII buffer
1371 hasLengthByte = newHasLengthByte;
1372 hasNullByte = true;
1373 if (hasLengthByte) *ptr++ = (uint8_t)len;
1374 for (cnt = 0; cnt < len; cnt++) ptr[cnt] = (uint8_t)(((const UniChar *)bytes)[cnt]);
1375 ptr[len] = 0;
1376 if (noCopy && (contentsDeallocator != kCFAllocatorNull)) {
1377 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes);
1378 }
1379 // Now make everything look like we had an ASCII buffer to start with
1380 bytes = mem;
1381 encoding = kCFStringEncodingASCII;
1382 contentsDeallocator = alloc; // At this point we are using the string's allocator, as the original buffer is gone...
1383 noCopy = (numBytes >= __kCFVarWidthLocalBufferSize); // If we had to allocate it, make sure it's kept around
1384 numBytes--; // Should not contain the NULL byte at end...
1385 stringSupportsEightBitCFRepresentation = true; // We're ASCII now!
1386 stringROMShouldIgnoreNoCopy = true; // We allocated this buffer, so we should feel free to get rid of it if we can use the string ROM
1387 #if INSTRUMENT_SHARED_STRINGS
1388 recordedEncoding = "U->A";
1389 #endif
1390 }
1391 }
1392
1393 // At this point, all necessary input arguments have been changed to reflect the new state
1394 }
1395
1396 // Now determine the necessary size
1397 #if INSTRUMENT_SHARED_STRINGS || USE_STRING_ROM
1398 Boolean stringSupportsROM = stringSupportsEightBitCFRepresentation;
1399 #endif
1400
1401 #if INSTRUMENT_SHARED_STRINGS
1402 if (stringSupportsROM) {
1403 const void *realBytes = (uint8_t *) bytes + (hasLengthByte ? 1 : 0);
1404 CFIndex realNumBytes = numBytes - !! hasLengthByte;
1405 __CFRecordStringAllocationEvent(recordedEncoding, realBytes, realNumBytes);
1406 }
1407 #endif
1408
1409 #if USE_STRING_ROM
1410 CFStringRef romResult = NULL;
1411
1412
1413 if (stringSupportsROM) {
1414 // Disable the string ROM if necessary
1415 static char sDisableStringROM = -1;
1416 if (sDisableStringROM == -1) sDisableStringROM = !! __CFgetenv("CFStringDisableROM");
1417
1418 if (sDisableStringROM == 0) romResult = _CFSearchStringROM(bytes + !! hasLengthByte, numBytes - !! hasLengthByte);
1419 }
1420 /* if we get a result from our ROM, and noCopy is set, then deallocate the buffer immediately */
1421 if (romResult) {
1422 if (noCopy && (contentsDeallocator != kCFAllocatorNull)) {
1423 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes);
1424 }
1425
1426 /* these don't get used again, but clear them for consistency */
1427 noCopy = false;
1428 bytes = NULL;
1429
1430 /* set our result to the ROM result which is not really mutable, of course, but that's OK because we don't try to modify it. */
1431 str = (CFMutableStringRef)romResult;
1432 }
1433
1434 if (! romResult) {
1435 #else
1436 if (1) {
1437 #endif
1438 // Now determine the necessary size
1439
1440 if (noCopy) {
1441
1442 size = sizeof(void *); // Pointer to the buffer
1443 if ((0) || (contentsDeallocator != alloc && contentsDeallocator != kCFAllocatorNull)) {
1444 size += sizeof(void *); // The contentsDeallocator
1445 }
1446 if (!hasLengthByte) size += sizeof(CFIndex); // Explicit length
1447 useLengthByte = hasLengthByte;
1448 useNullByte = hasNullByte;
1449
1450 } else { // Inline data; reserve space for it
1451
1452 useInlineData = true;
1453 size = numBytes;
1454
1455 if (hasLengthByte || (encoding != kCFStringEncodingUnicode && __CFCanUseLengthByte(numBytes))) {
1456 useLengthByte = true;
1457 if (!hasLengthByte) size += 1;
1458 } else {
1459 size += sizeof(CFIndex); // Explicit length
1460 }
1461 if (hasNullByte || encoding != kCFStringEncodingUnicode) {
1462 useNullByte = true;
1463 size += 1;
1464 }
1465 }
1466
1467 #ifdef STRING_SIZE_STATS
1468 // Dump alloced CFString size info every so often
1469 static int cnt = 0;
1470 static unsigned sizes[256] = {0};
1471 int allocedSize = size + sizeof(CFRuntimeBase);
1472 if (allocedSize < 255) sizes[allocedSize]++; else sizes[255]++;
1473 if ((++cnt % 1000) == 0) {
1474 printf ("\nTotal: %d\n", cnt);
1475 int i; for (i = 0; i < 256; i++) printf("%03d: %5d%s", i, sizes[i], ((i % 8) == 7) ? "\n" : " ");
1476 }
1477 #endif
1478
1479 // Finally, allocate!
1480
1481 str = (CFMutableStringRef)_CFRuntimeCreateInstance(alloc, __kCFStringTypeID, size, NULL);
1482 if (str) {
1483 if (__CFOASafe) __CFSetLastAllocationEventName(str, "CFString (immutable)");
1484
1485 CFOptionFlags allocBits = (0) ? __kCFHasContentsDeallocator : (contentsDeallocator == alloc ? __kCFNotInlineContentsDefaultFree : (contentsDeallocator == kCFAllocatorNull ? __kCFNotInlineContentsNoFree : __kCFNotInlineContentsCustomFree));
1486 __CFStrSetInfoBits(str,
1487 (useInlineData ? __kCFHasInlineContents : allocBits) |
1488 ((encoding == kCFStringEncodingUnicode) ? __kCFIsUnicode : 0) |
1489 (useNullByte ? __kCFHasNullByte : 0) |
1490 (useLengthByte ? __kCFHasLengthByte : 0));
1491
1492 if (!useLengthByte) {
1493 CFIndex length = numBytes - (hasLengthByte ? 1 : 0);
1494 if (encoding == kCFStringEncodingUnicode) length /= sizeof(UniChar);
1495 __CFStrSetExplicitLength(str, length);
1496 }
1497
1498 if (useInlineData) {
1499 uint8_t *contents = (uint8_t *)__CFStrContents(str);
1500 if (useLengthByte && !hasLengthByte) *contents++ = (uint8_t)numBytes;
1501 memmove(contents, bytes, numBytes);
1502 if (useNullByte) contents[numBytes] = 0;
1503 } else {
1504 __CFStrSetContentPtr(str, bytes);
1505 if (__CFStrHasContentsDeallocator(str)) __CFStrSetContentsDeallocator(str, contentsDeallocator);
1506 }
1507 } else {
1508 if (noCopy && (contentsDeallocator != kCFAllocatorNull)) {
1509 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes);
1510 }
1511 }
1512 }
1513 if (vBuf.shouldFreeChars) CFAllocatorDeallocate(vBuf.allocator, (void *)bytes);
1514
1515 #if 0
1516 #warning Debug code
1517 const uint8_t *contents = (uint8_t *)__CFStrContents(str);
1518 CFIndex len = __CFStrLength2(str, contents);
1519
1520 if (__CFStrIsEightBit(str)) {
1521 contents += __CFStrSkipAnyLengthByte(str);
1522 if (!__CFBytesInASCII(contents, len)) {
1523 printf("CFString with 8 bit backing store not ASCII: %p, \"%.*s\"\n", str, (int)len, contents);
1524 }
1525 }
1526 #endif
1527
1528 return str;
1529 }
1530
1531 /* !!! __CFStringCreateImmutableFunnel2() is kept around for compatibility; it should be deprecated
1532 */
1533 CFStringRef __CFStringCreateImmutableFunnel2(
1534 CFAllocatorRef alloc, const void *bytes, CFIndex numBytes, CFStringEncoding encoding,
1535 Boolean possiblyExternalFormat, Boolean tryToReduceUnicode, Boolean hasLengthByte, Boolean hasNullByte, Boolean noCopy,
1536 CFAllocatorRef contentsDeallocator) {
1537 return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, possiblyExternalFormat, tryToReduceUnicode, hasLengthByte, hasNullByte, noCopy, contentsDeallocator, 0);
1538 }
1539
1540
1541
1542 CFStringRef CFStringCreateWithPascalString(CFAllocatorRef alloc, ConstStringPtr pStr, CFStringEncoding encoding) {
1543 CFIndex len = (CFIndex)(*(uint8_t *)pStr);
1544 return __CFStringCreateImmutableFunnel3(alloc, pStr, len+1, encoding, false, false, true, false, false, ALLOCATORSFREEFUNC, 0);
1545 }
1546
1547
1548 CFStringRef CFStringCreateWithCString(CFAllocatorRef alloc, const char *cStr, CFStringEncoding encoding) {
1549 CFIndex len = strlen(cStr);
1550 return __CFStringCreateImmutableFunnel3(alloc, cStr, len, encoding, false, false, false, true, false, ALLOCATORSFREEFUNC, 0);
1551 }
1552
1553 CFStringRef CFStringCreateWithPascalStringNoCopy(CFAllocatorRef alloc, ConstStringPtr pStr, CFStringEncoding encoding, CFAllocatorRef contentsDeallocator) {
1554 CFIndex len = (CFIndex)(*(uint8_t *)pStr);
1555 return __CFStringCreateImmutableFunnel3(alloc, pStr, len+1, encoding, false, false, true, false, true, contentsDeallocator, 0);
1556 }
1557
1558
1559 CFStringRef CFStringCreateWithCStringNoCopy(CFAllocatorRef alloc, const char *cStr, CFStringEncoding encoding, CFAllocatorRef contentsDeallocator) {
1560 CFIndex len = strlen(cStr);
1561 return __CFStringCreateImmutableFunnel3(alloc, cStr, len, encoding, false, false, false, true, true, contentsDeallocator, 0);
1562 }
1563
1564
1565 CFStringRef CFStringCreateWithCharacters(CFAllocatorRef alloc, const UniChar *chars, CFIndex numChars) {
1566 return __CFStringCreateImmutableFunnel3(alloc, chars, numChars * sizeof(UniChar), kCFStringEncodingUnicode, false, true, false, false, false, ALLOCATORSFREEFUNC, 0);
1567 }
1568
1569
1570 CFStringRef CFStringCreateWithCharactersNoCopy(CFAllocatorRef alloc, const UniChar *chars, CFIndex numChars, CFAllocatorRef contentsDeallocator) {
1571 return __CFStringCreateImmutableFunnel3(alloc, chars, numChars * sizeof(UniChar), kCFStringEncodingUnicode, false, false, false, false, true, contentsDeallocator, 0);
1572 }
1573
1574
1575 CFStringRef CFStringCreateWithBytes(CFAllocatorRef alloc, const uint8_t *bytes, CFIndex numBytes, CFStringEncoding encoding, Boolean externalFormat) {
1576 return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, externalFormat, true, false, false, false, ALLOCATORSFREEFUNC, 0);
1577 }
1578
1579 CFStringRef _CFStringCreateWithBytesNoCopy(CFAllocatorRef alloc, const uint8_t *bytes, CFIndex numBytes, CFStringEncoding encoding, Boolean externalFormat, CFAllocatorRef contentsDeallocator) {
1580 return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, externalFormat, true, false, false, true, contentsDeallocator, 0);
1581 }
1582
1583 CFStringRef CFStringCreateWithBytesNoCopy(CFAllocatorRef alloc, const uint8_t *bytes, CFIndex numBytes, CFStringEncoding encoding, Boolean externalFormat, CFAllocatorRef contentsDeallocator) {
1584 return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, externalFormat, true, false, false, true, contentsDeallocator, 0);
1585 }
1586
1587 CFStringRef CFStringCreateWithFormatAndArguments(CFAllocatorRef alloc, CFDictionaryRef formatOptions, CFStringRef format, va_list arguments) {
1588 return _CFStringCreateWithFormatAndArgumentsAux(alloc, NULL, formatOptions, format, arguments);
1589 }
1590
1591 CFStringRef _CFStringCreateWithFormatAndArgumentsAux(CFAllocatorRef alloc, CFStringRef (*copyDescFunc)(void *, const void *), CFDictionaryRef formatOptions, CFStringRef format, va_list arguments) {
1592 CFStringRef str;
1593 CFMutableStringRef outputString = CFStringCreateMutable(kCFAllocatorSystemDefault, 0); //should use alloc if no copy/release
1594 __CFStrSetDesiredCapacity(outputString, 120); // Given this will be tightened later, choosing a larger working string is fine
1595 __CFStringAppendFormatCore(outputString, copyDescFunc, formatOptions, NULL, format, 0, NULL, 0, arguments);
1596 // ??? copy/release should not be necessary here -- just make immutable, compress if possible
1597 // (However, this does make the string inline, and cause the supplied allocator to be used...)
1598 str = (CFStringRef)CFStringCreateCopy(alloc, outputString);
1599 CFRelease(outputString);
1600 return str;
1601 }
1602
1603 CFStringRef CFStringCreateWithFormat(CFAllocatorRef alloc, CFDictionaryRef formatOptions, CFStringRef format, ...) {
1604 CFStringRef result;
1605 va_list argList;
1606
1607 va_start(argList, format);
1608 result = CFStringCreateWithFormatAndArguments(alloc, formatOptions, format, argList);
1609 va_end(argList);
1610
1611 return result;
1612 }
1613
1614 CFStringRef CFStringCreateWithSubstring(CFAllocatorRef alloc, CFStringRef str, CFRange range) {
1615 // CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, CFStringRef , (NSString *)str, _createSubstringWithRange:NSMakeRange(range.location, range.length));
1616
1617 __CFAssertIsString(str);
1618 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
1619
1620 if ((range.location == 0) && (range.length == __CFStrLength(str))) { /* The substring is the whole string... */
1621 return (CFStringRef)CFStringCreateCopy(alloc, str);
1622 } else if (__CFStrIsEightBit(str)) {
1623 const uint8_t *contents = (const uint8_t *)__CFStrContents(str);
1624 return __CFStringCreateImmutableFunnel3(alloc, contents + range.location + __CFStrSkipAnyLengthByte(str), range.length, __CFStringGetEightBitStringEncoding(), false, false, false, false, false, ALLOCATORSFREEFUNC, 0);
1625 } else {
1626 const UniChar *contents = (UniChar *)__CFStrContents(str);
1627 return __CFStringCreateImmutableFunnel3(alloc, contents + range.location, range.length * sizeof(UniChar), kCFStringEncodingUnicode, false, true, false, false, false, ALLOCATORSFREEFUNC, 0);
1628 }
1629 }
1630
1631 CFStringRef CFStringCreateCopy(CFAllocatorRef alloc, CFStringRef str) {
1632 // CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, CFStringRef, (NSString *)str, copy);
1633
1634 __CFAssertIsString(str);
1635 if (!__CFStrIsMutable((CFStringRef)str) && // If the string is not mutable
1636 ((alloc ? alloc : __CFGetDefaultAllocator()) == __CFGetAllocator(str)) && // and it has the same allocator as the one we're using
1637 (__CFStrIsInline((CFStringRef)str) || __CFStrFreeContentsWhenDone((CFStringRef)str) || __CFStrIsConstant((CFStringRef)str))) { // and the characters are inline, or are owned by the string, or the string is constant
1638 if (!(kCFUseCollectableAllocator && (0))) CFRetain(str); // Then just retain instead of making a true copy
1639 return str;
1640 }
1641 if (__CFStrIsEightBit((CFStringRef)str)) {
1642 const uint8_t *contents = (const uint8_t *)__CFStrContents((CFStringRef)str);
1643 return __CFStringCreateImmutableFunnel3(alloc, contents + __CFStrSkipAnyLengthByte((CFStringRef)str), __CFStrLength2((CFStringRef)str, contents), __CFStringGetEightBitStringEncoding(), false, false, false, false, false, ALLOCATORSFREEFUNC, 0);
1644 } else {
1645 const UniChar *contents = (const UniChar *)__CFStrContents((CFStringRef)str);
1646 return __CFStringCreateImmutableFunnel3(alloc, contents, __CFStrLength2((CFStringRef)str, contents) * sizeof(UniChar), kCFStringEncodingUnicode, false, true, false, false, false, ALLOCATORSFREEFUNC, 0);
1647 }
1648 }
1649
1650
1651
1652 /*** Constant string stuff... ***/
1653
1654 /* Table which holds constant strings created with CFSTR, when -fconstant-cfstrings option is not used. These dynamically created constant strings are stored in constantStringTable. The keys are the 8-bit constant C-strings from the compiler; the values are the CFStrings created for them. _CFSTRLock protects this table.
1655 */
1656 static CFMutableDictionaryRef constantStringTable = NULL;
1657 static CFSpinLock_t _CFSTRLock = CFSpinLockInit;
1658
1659 static CFStringRef __cStrCopyDescription(const void *ptr) {
1660 return CFStringCreateWithCStringNoCopy(kCFAllocatorSystemDefault, (const char *)ptr, __CFStringGetEightBitStringEncoding(), kCFAllocatorNull);
1661 }
1662
1663 static Boolean __cStrEqual(const void *ptr1, const void *ptr2) {
1664 return (strcmp((const char *)ptr1, (const char *)ptr2) == 0);
1665 }
1666
1667 static CFHashCode __cStrHash(const void *ptr) {
1668 // It doesn't quite matter if we convert to Unicode correctly, as long as we do it consistently
1669 const char *cStr = (const char *)ptr;
1670 CFIndex len = strlen(cStr);
1671 CFHashCode result = 0;
1672 if (len <= 4) { // All chars
1673 unsigned cnt = len;
1674 while (cnt--) result += (result << 8) + *cStr++;
1675 } else { // First and last 2 chars
1676 result += (result << 8) + cStr[0];
1677 result += (result << 8) + cStr[1];
1678 result += (result << 8) + cStr[len-2];
1679 result += (result << 8) + cStr[len-1];
1680 }
1681 result += (result << (len & 31));
1682 return result;
1683 }
1684
1685
1686 CFStringRef __CFStringMakeConstantString(const char *cStr) {
1687 CFStringRef result;
1688 #if defined(DEBUG)
1689 // StringTest checks that we share kCFEmptyString, which is defeated by constantStringAllocatorForDebugging
1690 if ('\0' == *cStr) return kCFEmptyString;
1691 #endif
1692 if (constantStringTable == NULL) {
1693 CFDictionaryKeyCallBacks constantStringCallBacks = {0, NULL, NULL, __cStrCopyDescription, __cStrEqual, __cStrHash};
1694 CFDictionaryValueCallBacks constantStringValueCallBacks = kCFTypeDictionaryValueCallBacks;
1695 constantStringValueCallBacks.equal = NULL; // So that we only find strings that are ==
1696 CFMutableDictionaryRef table = CFDictionaryCreateMutable(kCFAllocatorSystemDefault, 0, &constantStringCallBacks, &constantStringValueCallBacks);
1697 _CFDictionarySetCapacity(table, 2500); // avoid lots of rehashing
1698 __CFSpinLock(&_CFSTRLock);
1699 if (constantStringTable == NULL) constantStringTable = table;
1700 __CFSpinUnlock(&_CFSTRLock);
1701 if (constantStringTable != table) CFRelease(table);
1702 }
1703
1704 __CFSpinLock(&_CFSTRLock);
1705 if ((result = (CFStringRef)CFDictionaryGetValue(constantStringTable, cStr))) {
1706 __CFSpinUnlock(&_CFSTRLock);
1707 } else {
1708 __CFSpinUnlock(&_CFSTRLock);
1709
1710 {
1711 char *key;
1712 Boolean isASCII = true;
1713 // Given this code path is rarer these days, OK to do this extra work to verify the strings
1714 const char *tmp = cStr;
1715 while (*tmp) {
1716 if (*(tmp++) & 0x80) {
1717 isASCII = false;
1718 break;
1719 }
1720 }
1721 if (!isASCII) {
1722 CFMutableStringRef ms = CFStringCreateMutable(kCFAllocatorSystemDefault, 0);
1723 tmp = cStr;
1724 while (*tmp) {
1725 CFStringAppendFormat(ms, NULL, (*tmp & 0x80) ? CFSTR("\\%3o") : CFSTR("%1c"), *tmp);
1726 tmp++;
1727 }
1728 CFLog(kCFLogLevelWarning, CFSTR("WARNING: CFSTR(\"%@\") has non-7 bit chars, interpreting using MacOS Roman encoding for now, but this will change. Please eliminate usages of non-7 bit chars (including escaped characters above \\177 octal) in CFSTR()."), ms);
1729 CFRelease(ms);
1730 }
1731 // Treat non-7 bit chars in CFSTR() as MacOSRoman, for compatibility
1732 result = CFStringCreateWithCString(kCFAllocatorSystemDefault, cStr, kCFStringEncodingMacRoman);
1733 if (result == NULL) {
1734 CFLog(__kCFLogAssertion, CFSTR("Can't interpret CFSTR() as MacOS Roman, crashing"));
1735 HALT;
1736 }
1737 if (__CFOASafe) __CFSetLastAllocationEventName((void *)result, "CFString (CFSTR)");
1738 if (__CFStrIsEightBit(result)) {
1739 key = (char *)__CFStrContents(result) + __CFStrSkipAnyLengthByte(result);
1740 } else { // For some reason the string is not 8-bit!
1741 CFIndex keySize = strlen(cStr) + 1;
1742 key = (char *)CFAllocatorAllocate(kCFAllocatorSystemDefault, keySize, 0);
1743 if (__CFOASafe) __CFSetLastAllocationEventName((void *)key, "CFString (CFSTR key)");
1744 strlcpy(key, cStr, keySize); // !!! We will leak this, if the string is removed from the table (or table is freed)
1745 }
1746
1747 {
1748 CFStringRef resultToBeReleased = result;
1749 CFIndex count;
1750 __CFSpinLock(&_CFSTRLock);
1751 count = CFDictionaryGetCount(constantStringTable);
1752 CFDictionaryAddValue(constantStringTable, key, result);
1753 if (CFDictionaryGetCount(constantStringTable) == count) { // add did nothing, someone already put it there
1754 result = (CFStringRef)CFDictionaryGetValue(constantStringTable, key);
1755 } else {
1756 #if __LP64__
1757 ((struct __CFString *)result)->base._rc = 0;
1758 #else
1759 ((struct __CFString *)result)->base._cfinfo[CF_RC_BITS] = 0;
1760 #endif
1761 }
1762 __CFSpinUnlock(&_CFSTRLock);
1763 // This either eliminates the extra retain on the freshly created string, or frees it, if it was actually not inserted into the table
1764 CFRelease(resultToBeReleased);
1765 }
1766 }
1767 }
1768 return result;
1769 }
1770
1771 #if defined(DEBUG)
1772 static Boolean __CFStrIsConstantString(CFStringRef str) {
1773 Boolean found = false;
1774 if (constantStringTable) {
1775 __CFSpinLock(&_CFSTRLock);
1776 found = CFDictionaryContainsValue(constantStringTable, str);
1777 __CFSpinUnlock(&_CFSTRLock);
1778 }
1779 return found;
1780 }
1781 #endif
1782
1783
1784 #if DEPLOYMENT_TARGET_WINDOWS
1785 void __CFStringCleanup (void) {
1786 /* in case library is unloaded, release store for the constant string table */
1787 if (constantStringTable != NULL) {
1788 #if defined(DEBUG)
1789 __CFConstantStringTableBeingFreed = true;
1790 CFRelease(constantStringTable);
1791 __CFConstantStringTableBeingFreed = false;
1792 #else
1793 CFRelease(constantStringTable);
1794 #endif
1795 constantStringTable = NULL;
1796 }
1797 }
1798 #endif
1799
1800
1801 // Can pass in NSString as replacement string
1802 // Call with numRanges > 0, and incrementing ranges
1803
1804 static void __CFStringReplaceMultiple(CFMutableStringRef str, CFRange *ranges, CFIndex numRanges, CFStringRef replacement) {
1805 int cnt;
1806 CFStringRef copy = NULL;
1807 if (replacement == str) copy = replacement = CFStringCreateCopy(kCFAllocatorSystemDefault, replacement); // Very special and hopefully rare case
1808 CFIndex replacementLength = CFStringGetLength(replacement);
1809
1810 __CFStringChangeSizeMultiple(str, ranges, numRanges, replacementLength, (replacementLength > 0) && CFStrIsUnicode(replacement));
1811
1812 if (__CFStrIsUnicode(str)) {
1813 UniChar *contents = (UniChar *)__CFStrContents(str);
1814 UniChar *firstReplacement = contents + ranges[0].location;
1815 // Extract the replacementString into the first location, then copy from there
1816 CFStringGetCharacters(replacement, CFRangeMake(0, replacementLength), firstReplacement);
1817 for (cnt = 1; cnt < numRanges; cnt++) {
1818 // The ranges are in terms of the original string; so offset by the change in length due to insertion
1819 contents += replacementLength - ranges[cnt - 1].length;
1820 memmove(contents + ranges[cnt].location, firstReplacement, replacementLength * sizeof(UniChar));
1821 }
1822 } else {
1823 uint8_t *contents = (uint8_t *)__CFStrContents(str);
1824 uint8_t *firstReplacement = contents + ranges[0].location + __CFStrSkipAnyLengthByte(str);
1825 // Extract the replacementString into the first location, then copy from there
1826 CFStringGetBytes(replacement, CFRangeMake(0, replacementLength), __CFStringGetEightBitStringEncoding(), 0, false, firstReplacement, replacementLength, NULL);
1827 contents += __CFStrSkipAnyLengthByte(str); // Now contents will simply track the location to insert next string into
1828 for (cnt = 1; cnt < numRanges; cnt++) {
1829 // The ranges are in terms of the original string; so offset by the change in length due to insertion
1830 contents += replacementLength - ranges[cnt - 1].length;
1831 memmove(contents + ranges[cnt].location, firstReplacement, replacementLength);
1832 }
1833 }
1834 if (copy) CFRelease(copy);
1835 }
1836
1837 // Can pass in NSString as replacement string
1838
1839 CF_INLINE void __CFStringReplace(CFMutableStringRef str, CFRange range, CFStringRef replacement) {
1840 CFStringRef copy = NULL;
1841 if (replacement == str) copy = replacement = (CFStringRef)CFStringCreateCopy(kCFAllocatorSystemDefault, replacement); // Very special and hopefully rare case
1842 CFIndex replacementLength = CFStringGetLength(replacement);
1843
1844 __CFStringChangeSize(str, range, replacementLength, (replacementLength > 0) && CFStrIsUnicode(replacement));
1845
1846 if (__CFStrIsUnicode(str)) {
1847 UniChar *contents = (UniChar *)__CFStrContents(str);
1848 CFStringGetCharacters(replacement, CFRangeMake(0, replacementLength), contents + range.location);
1849 } else {
1850 uint8_t *contents = (uint8_t *)__CFStrContents(str);
1851 CFStringGetBytes(replacement, CFRangeMake(0, replacementLength), __CFStringGetEightBitStringEncoding(), 0, false, contents + range.location + __CFStrSkipAnyLengthByte(str), replacementLength, NULL);
1852 }
1853
1854 if (copy) CFRelease(copy);
1855 }
1856
1857 /* If client does not provide a minimum capacity
1858 */
1859 #define DEFAULTMINCAPACITY 32
1860
1861 CF_INLINE CFMutableStringRef __CFStringCreateMutableFunnel(CFAllocatorRef alloc, CFIndex maxLength, UInt32 additionalInfoBits) {
1862 CFMutableStringRef str;
1863 if ((0)) additionalInfoBits |= __kCFHasContentsAllocator;
1864 Boolean hasExternalContentsAllocator = (additionalInfoBits & __kCFHasContentsAllocator) ? true : false;
1865
1866 if (alloc == NULL) alloc = __CFGetDefaultAllocator();
1867
1868 // Note that if there is an externalContentsAllocator, then we also have the storage for the string allocator...
1869 str = (CFMutableStringRef)_CFRuntimeCreateInstance(alloc, __kCFStringTypeID, sizeof(struct __notInlineMutable) - (hasExternalContentsAllocator ? 0 : sizeof(CFAllocatorRef)), NULL);
1870 if (str) {
1871 if (__CFOASafe) __CFSetLastAllocationEventName(str, "CFString (mutable)");
1872
1873 __CFStrSetInfoBits(str, __kCFIsMutable | additionalInfoBits);
1874 str->variants.notInlineMutable.buffer = NULL;
1875 __CFStrSetExplicitLength(str, 0);
1876 str->variants.notInlineMutable.hasGap = str->variants.notInlineMutable.isFixedCapacity = str->variants.notInlineMutable.isExternalMutable = str->variants.notInlineMutable.capacityProvidedExternally = 0;
1877 if (maxLength != 0) __CFStrSetIsFixed(str);
1878 __CFStrSetDesiredCapacity(str, (maxLength == 0) ? DEFAULTMINCAPACITY : maxLength);
1879 __CFStrSetCapacity(str, 0);
1880 if (__CFStrHasContentsAllocator(str)) {
1881 // contents allocator starts out as the string's own allocator
1882 __CFStrSetContentsAllocator(str, alloc);
1883 }
1884 }
1885 return str;
1886 }
1887
1888 CFMutableStringRef CFStringCreateMutableWithExternalCharactersNoCopy(CFAllocatorRef alloc, UniChar *chars, CFIndex numChars, CFIndex capacity, CFAllocatorRef externalCharactersAllocator) {
1889 CFOptionFlags contentsAllocationBits = externalCharactersAllocator ? ((externalCharactersAllocator == kCFAllocatorNull) ? __kCFNotInlineContentsNoFree : __kCFHasContentsAllocator) : __kCFNotInlineContentsDefaultFree;
1890 CFMutableStringRef string = __CFStringCreateMutableFunnel(alloc, 0, contentsAllocationBits | __kCFIsUnicode);
1891 if (string) {
1892 __CFStrSetIsExternalMutable(string);
1893 if (__CFStrHasContentsAllocator(string)) {
1894 CFAllocatorRef allocator = __CFStrContentsAllocator((CFMutableStringRef)string);
1895 if (!(0 || 0)) CFRelease(allocator);
1896 __CFStrSetContentsAllocator(string, externalCharactersAllocator);
1897 }
1898 CFStringSetExternalCharactersNoCopy(string, chars, numChars, capacity);
1899 }
1900 return string;
1901 }
1902
1903 CFMutableStringRef CFStringCreateMutable(CFAllocatorRef alloc, CFIndex maxLength) {
1904 return __CFStringCreateMutableFunnel(alloc, maxLength, __kCFNotInlineContentsDefaultFree);
1905 }
1906
1907 CFMutableStringRef CFStringCreateMutableCopy(CFAllocatorRef alloc, CFIndex maxLength, CFStringRef string) {
1908 CFMutableStringRef newString;
1909
1910 // CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, CFMutableStringRef, (NSString *)string, mutableCopy);
1911
1912 __CFAssertIsString(string);
1913
1914 newString = CFStringCreateMutable(alloc, maxLength);
1915 __CFStringReplace(newString, CFRangeMake(0, 0), string);
1916
1917 return newString;
1918 }
1919
1920
1921 CF_PRIVATE void _CFStrSetDesiredCapacity(CFMutableStringRef str, CFIndex len) {
1922 __CFAssertIsStringAndMutable(str);
1923 __CFStrSetDesiredCapacity(str, len);
1924 }
1925
1926
1927 /* This one is for CF
1928 */
1929 CFIndex CFStringGetLength(CFStringRef str) {
1930 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, CFIndex, (NSString *)str, length);
1931
1932 __CFAssertIsString(str);
1933 return __CFStrLength(str);
1934 }
1935
1936 /* This one is for NSCFString; it does not ObjC dispatch or assertion check
1937 */
1938 CFIndex _CFStringGetLength2(CFStringRef str) {
1939 return __CFStrLength(str);
1940 }
1941
1942
1943 /* Guts of CFStringGetCharacterAtIndex(); called from the two functions below. Don't call it from elsewhere.
1944 */
1945 CF_INLINE UniChar __CFStringGetCharacterAtIndexGuts(CFStringRef str, CFIndex idx, const uint8_t *contents) {
1946 if (__CFStrIsEightBit(str)) {
1947 contents += __CFStrSkipAnyLengthByte(str);
1948 #if defined(DEBUG)
1949 if (!__CFCharToUniCharFunc && (contents[idx] >= 128)) {
1950 // Can't do log here, as it might be too early
1951 fprintf(stderr, "Warning: CFStringGetCharacterAtIndex() attempted on CFString containing high bytes before properly initialized to do so\n");
1952 }
1953 #endif
1954 return __CFCharToUniCharTable[contents[idx]];
1955 }
1956
1957 return ((UniChar *)contents)[idx];
1958 }
1959
1960 /* This one is for the CF API
1961 */
1962 UniChar CFStringGetCharacterAtIndex(CFStringRef str, CFIndex idx) {
1963 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, UniChar, (NSString *)str, characterAtIndex:(NSUInteger)idx);
1964
1965 __CFAssertIsString(str);
1966 __CFAssertIndexIsInStringBounds(str, idx);
1967 return __CFStringGetCharacterAtIndexGuts(str, idx, (const uint8_t *)__CFStrContents(str));
1968 }
1969
1970 /* This one is for NSCFString usage; it doesn't do ObjC dispatch; but it does do range check
1971 */
1972 int _CFStringCheckAndGetCharacterAtIndex(CFStringRef str, CFIndex idx, UniChar *ch) {
1973 const uint8_t *contents = (const uint8_t *)__CFStrContents(str);
1974 if (idx >= __CFStrLength2(str, contents) && __CFStringNoteErrors()) return _CFStringErrBounds;
1975 *ch = __CFStringGetCharacterAtIndexGuts(str, idx, contents);
1976 return _CFStringErrNone;
1977 }
1978
1979
1980 /* Guts of CFStringGetCharacters(); called from the two functions below. Don't call it from elsewhere.
1981 */
1982 CF_INLINE void __CFStringGetCharactersGuts(CFStringRef str, CFRange range, UniChar *buffer, const uint8_t *contents) {
1983 if (__CFStrIsEightBit(str)) {
1984 __CFStrConvertBytesToUnicode(((uint8_t *)contents) + (range.location + __CFStrSkipAnyLengthByte(str)), buffer, range.length);
1985 } else {
1986 const UniChar *uContents = ((UniChar *)contents) + range.location;
1987 memmove(buffer, uContents, range.length * sizeof(UniChar));
1988 }
1989 }
1990
1991 /* This one is for the CF API
1992 */
1993 void CFStringGetCharacters(CFStringRef str, CFRange range, UniChar *buffer) {
1994 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSString *)str, getCharacters:(unichar *)buffer range:NSMakeRange(range.location, range.length));
1995
1996 __CFAssertIsString(str);
1997 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
1998 __CFStringGetCharactersGuts(str, range, buffer, (const uint8_t *)__CFStrContents(str));
1999 }
2000
2001 /* This one is for NSCFString usage; it doesn't do ObjC dispatch; but it does do range check
2002 */
2003 int _CFStringCheckAndGetCharacters(CFStringRef str, CFRange range, UniChar *buffer) {
2004 const uint8_t *contents = (const uint8_t *)__CFStrContents(str);
2005 if (range.location + range.length > __CFStrLength2(str, contents) && __CFStringNoteErrors()) return _CFStringErrBounds;
2006 __CFStringGetCharactersGuts(str, range, buffer, contents);
2007 return _CFStringErrNone;
2008 }
2009
2010
2011 CFIndex CFStringGetBytes(CFStringRef str, CFRange range, CFStringEncoding encoding, uint8_t lossByte, Boolean isExternalRepresentation, uint8_t *buffer, CFIndex maxBufLen, CFIndex *usedBufLen) {
2012
2013 /* No objc dispatch needed here since __CFStringEncodeByteStream works with both CFString and NSString */
2014 __CFAssertIsNotNegative(maxBufLen);
2015
2016 if (!CF_IS_OBJC(__kCFStringTypeID, str)) { // If we can grope the ivars, let's do it...
2017 __CFAssertIsString(str);
2018 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
2019
2020 if (__CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string
2021 const unsigned char *contents = (const unsigned char *)__CFStrContents(str);
2022 CFIndex cLength = range.length;
2023
2024 if (buffer) {
2025 if (cLength > maxBufLen) cLength = maxBufLen;
2026 memmove(buffer, contents + __CFStrSkipAnyLengthByte(str) + range.location, cLength);
2027 }
2028 if (usedBufLen) *usedBufLen = cLength;
2029
2030 return cLength;
2031 }
2032 }
2033
2034 return __CFStringEncodeByteStream(str, range.location, range.length, isExternalRepresentation, encoding, lossByte, buffer, maxBufLen, usedBufLen);
2035 }
2036
2037
2038 ConstStringPtr CFStringGetPascalStringPtr (CFStringRef str, CFStringEncoding encoding) {
2039
2040 if (!CF_IS_OBJC(__kCFStringTypeID, str)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
2041 __CFAssertIsString(str);
2042 if (__CFStrHasLengthByte(str) && __CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string || the contents is in ASCII
2043 const uint8_t *contents = (const uint8_t *)__CFStrContents(str);
2044 if (__CFStrHasExplicitLength(str) && (__CFStrLength2(str, contents) != (SInt32)(*contents))) return NULL; // Invalid length byte
2045 return (ConstStringPtr)contents;
2046 }
2047 // ??? Also check for encoding = SystemEncoding and perhaps bytes are all ASCII?
2048 }
2049 return NULL;
2050 }
2051
2052
2053 const char * CFStringGetCStringPtr(CFStringRef str, CFStringEncoding encoding) {
2054
2055 if (encoding != __CFStringGetEightBitStringEncoding() && (kCFStringEncodingASCII != __CFStringGetEightBitStringEncoding() || !__CFStringEncodingIsSupersetOfASCII(encoding))) return NULL;
2056 // ??? Also check for encoding = SystemEncoding and perhaps bytes are all ASCII?
2057
2058 if (str == NULL) return NULL; // Should really just crash, but for compatibility... see <rdar://problem/12340248>
2059
2060 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, const char *, (NSString *)str, _fastCStringContents:true);
2061
2062 __CFAssertIsString(str);
2063
2064 if (__CFStrHasNullByte(str)) {
2065 // Note: this is called a lot, 27000 times to open a small xcode project with one file open.
2066 // Of these uses about 1500 are for cStrings/utf8strings.
2067 #if 0
2068 // Only sometimes when the stars are aligned will this call return a gc pointer
2069 // under GC we can only really return a pointer to the start of a GC buffer for cString use
2070 // (Is there a simpler way to ask if contents isGC?)
2071 CFAllocatorRef alloc = (__CFStrHasContentsAllocator(str)) ? __CFStrContentsAllocator(str) : __CFGetAllocator(str);
2072 if (CF_IS_COLLECTABLE_ALLOCATOR(alloc)) {
2073 if (__CFStrSkipAnyLengthByte(str) != 0 || !__CFStrIsMutable(str)) {
2074 static int counter = 0;
2075 printf("CFString %dth unsafe safe string %s\n", ++counter, __CFStrContents(str) + __CFStrSkipAnyLengthByte(str));
2076 return NULL;
2077 }
2078 }
2079 #endif
2080 return (const char *)__CFStrContents(str) + __CFStrSkipAnyLengthByte(str);
2081 } else {
2082 return NULL;
2083 }
2084 }
2085
2086
2087 const UniChar *CFStringGetCharactersPtr(CFStringRef str) {
2088
2089 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, const UniChar *, (NSString *)str, _fastCharacterContents);
2090
2091 __CFAssertIsString(str);
2092 if (__CFStrIsUnicode(str)) return (const UniChar *)__CFStrContents(str);
2093 return NULL;
2094 }
2095
2096
2097 Boolean CFStringGetPascalString(CFStringRef str, Str255 buffer, CFIndex bufferSize, CFStringEncoding encoding) {
2098 CFIndex length;
2099 CFIndex usedLen;
2100
2101 __CFAssertIsNotNegative(bufferSize);
2102 if (bufferSize < 1) return false;
2103
2104 if (CF_IS_OBJC(__kCFStringTypeID, str)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
2105 length = CFStringGetLength(str);
2106 if (!__CFCanUseLengthByte(length)) return false; // Can't fit into pstring
2107 } else {
2108 const uint8_t *contents;
2109
2110 __CFAssertIsString(str);
2111
2112 contents = (const uint8_t *)__CFStrContents(str);
2113 length = __CFStrLength2(str, contents);
2114
2115 if (!__CFCanUseLengthByte(length)) return false; // Can't fit into pstring
2116
2117 if (__CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string
2118 if (length >= bufferSize) return false;
2119 memmove((void*)(1 + (const char*)buffer), (__CFStrSkipAnyLengthByte(str) + contents), length);
2120 *buffer = (unsigned char)length;
2121 return true;
2122 }
2123 }
2124
2125 if (__CFStringEncodeByteStream(str, 0, length, false, encoding, false, (UInt8 *)(1 + (uint8_t *)buffer), bufferSize - 1, &usedLen) != length) {
2126
2127 #if defined(DEBUG)
2128 if (bufferSize > 0) {
2129 strlcpy((char *)buffer + 1, CONVERSIONFAILURESTR, bufferSize - 1);
2130 buffer[0] = (unsigned char)((CFIndex)sizeof(CONVERSIONFAILURESTR) < (bufferSize - 1) ? (CFIndex)sizeof(CONVERSIONFAILURESTR) : (bufferSize - 1));
2131 }
2132 #else
2133 if (bufferSize > 0) buffer[0] = 0;
2134 #endif
2135 return false;
2136 }
2137 *buffer = (unsigned char)usedLen;
2138 return true;
2139 }
2140
2141 Boolean CFStringGetCString(CFStringRef str, char *buffer, CFIndex bufferSize, CFStringEncoding encoding) {
2142 const uint8_t *contents;
2143 CFIndex len;
2144
2145 __CFAssertIsNotNegative(bufferSize);
2146 if (bufferSize < 1) return false;
2147
2148 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, Boolean, (NSString *)str, _getCString:buffer maxLength:(NSUInteger)bufferSize - 1 encoding:encoding);
2149
2150 __CFAssertIsString(str);
2151
2152 contents = (const uint8_t *)__CFStrContents(str);
2153 len = __CFStrLength2(str, contents);
2154
2155 if (__CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string
2156 if (len >= bufferSize) return false;
2157 memmove(buffer, contents + __CFStrSkipAnyLengthByte(str), len);
2158 buffer[len] = 0;
2159 return true;
2160 } else {
2161 CFIndex usedLen;
2162
2163 if (__CFStringEncodeByteStream(str, 0, len, false, encoding, false, (unsigned char*) buffer, bufferSize - 1, &usedLen) == len) {
2164 buffer[usedLen] = '\0';
2165 return true;
2166 } else {
2167 #if defined(DEBUG)
2168 strlcpy(buffer, CONVERSIONFAILURESTR, bufferSize);
2169 #else
2170 if (bufferSize > 0) buffer[0] = 0;
2171 #endif
2172 return false;
2173 }
2174 }
2175 }
2176
2177 extern Boolean __CFLocaleGetNullLocale(struct __CFLocale *locale);
2178 extern void __CFLocaleSetNullLocale(struct __CFLocale *locale);
2179
2180 static const char *_CFStrGetLanguageIdentifierForLocale(CFLocaleRef locale, bool collatorOnly) {
2181 CFStringRef localeID;
2182 const char *langID = NULL;
2183 static const void *lastLocale = NULL;
2184 static const char *lastLangID = NULL;
2185 static CFSpinLock_t lock = CFSpinLockInit;
2186
2187 if (__CFLocaleGetNullLocale((struct __CFLocale *)locale)) return NULL;
2188
2189 __CFSpinLock(&lock);
2190 if ((NULL != lastLocale) && (lastLocale == locale)) {
2191 __CFSpinUnlock(&lock);
2192 return lastLangID;
2193 }
2194 __CFSpinUnlock(&lock);
2195
2196 localeID = (CFStringRef)CFLocaleGetValue(locale, __kCFLocaleCollatorID);
2197
2198 if (!collatorOnly) {
2199 CFIndex length = __CFStrLength(localeID);
2200
2201 if ((length < 2) || ((4 == length) && CFEqual(localeID, CFSTR("root")))) localeID = (CFStringRef)CFLocaleGetIdentifier(locale);
2202 }
2203
2204 // This is somewhat depending on CFLocale implementation always creating CFString for locale identifer ???
2205 if (__CFStrLength(localeID) > 1) {
2206 const void *contents = __CFStrContents(localeID);
2207 const char *string;
2208 char buffer[2];
2209
2210 if (__CFStrIsEightBit(localeID)) {
2211 string = ((const char *)contents) + __CFStrSkipAnyLengthByte(localeID);
2212 } else {
2213 const UTF16Char *characters = (const UTF16Char *)contents;
2214
2215 buffer[0] = (char)*(characters++);
2216 buffer[1] = (char)*characters;
2217 string = buffer;
2218 }
2219
2220 if (!strncmp(string, "az", 2)) { // Azerbaijani
2221 langID = "az";
2222 } else if (!strncmp(string, "lt", 2)) { // Lithuanian
2223 langID = "lt";
2224 } else if (!strncmp(string, "tr", 2)) { // Turkish
2225 langID = "tr";
2226 } else if (!strncmp(string, "nl", 2)) { // Dutch
2227 langID = "nl";
2228 } else if (!strncmp(string, "el", 2)) { // Greek
2229 langID = "el";
2230 }
2231 }
2232
2233 if (langID == NULL) __CFLocaleSetNullLocale((struct __CFLocale *)locale);
2234
2235 __CFSpinLock(&lock);
2236 lastLocale = locale;
2237 lastLangID = langID;
2238 __CFSpinUnlock(&lock);
2239
2240 return langID;
2241 }
2242
2243 CF_INLINE bool _CFCanUseLocale(CFLocaleRef locale) {
2244 if (locale) {
2245 return true;
2246 }
2247 return false;
2248 }
2249
2250 #define MAX_CASE_MAPPING_BUF (8)
2251 #define ZERO_WIDTH_JOINER (0x200D)
2252 #define COMBINING_GRAPHEME_JOINER (0x034F)
2253 // Hangul ranges
2254 #define HANGUL_CHOSEONG_START (0x1100)
2255 #define HANGUL_CHOSEONG_END (0x115F)
2256 #define HANGUL_JUNGSEONG_START (0x1160)
2257 #define HANGUL_JUNGSEONG_END (0x11A2)
2258 #define HANGUL_JONGSEONG_START (0x11A8)
2259 #define HANGUL_JONGSEONG_END (0x11F9)
2260
2261 #define HANGUL_SYLLABLE_START (0xAC00)
2262 #define HANGUL_SYLLABLE_END (0xD7AF)
2263
2264
2265 // Returns the length of characters filled into outCharacters. If no change, returns 0. maxBufLen shoule be at least 8
2266 static CFIndex __CFStringFoldCharacterClusterAtIndex(UTF32Char character, CFStringInlineBuffer *buffer, CFIndex index, CFOptionFlags flags, const uint8_t *langCode, UTF32Char *outCharacters, CFIndex maxBufferLength, CFIndex *consumedLength) {
2267 CFIndex filledLength = 0, currentIndex = index;
2268
2269 if (0 != character) {
2270 UTF16Char lowSurrogate;
2271 CFIndex planeNo = (character >> 16);
2272 bool isTurkikCapitalI = false;
2273 static const uint8_t *decompBMP = NULL;
2274 static const uint8_t *graphemeBMP = NULL;
2275
2276 if (NULL == decompBMP) {
2277 decompBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, 0);
2278 graphemeBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, 0);
2279 }
2280
2281 ++currentIndex;
2282
2283 if ((character < 0x0080) && ((NULL == langCode) || (character != 'I'))) { // ASCII
2284 if ((flags & kCFCompareCaseInsensitive) && (character >= 'A') && (character <= 'Z')) {
2285 character += ('a' - 'A');
2286 *outCharacters = character;
2287 filledLength = 1;
2288 }
2289 } else {
2290 // do width-insensitive mapping
2291 if ((flags & kCFCompareWidthInsensitive) && (character >= 0xFF00) && (character <= 0xFFEF)) {
2292 (void)CFUniCharCompatibilityDecompose(&character, 1, 1);
2293 *outCharacters = character;
2294 filledLength = 1;
2295 }
2296
2297 // map surrogates
2298 if ((0 == planeNo) && CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((lowSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex)))) {
2299 character = CFUniCharGetLongCharacterForSurrogatePair(character, lowSurrogate);
2300 ++currentIndex;
2301 planeNo = (character >> 16);
2302 }
2303
2304 // decompose
2305 if (flags & (kCFCompareDiacriticInsensitive|kCFCompareNonliteral)) {
2306 if (CFUniCharIsMemberOfBitmap(character, ((0 == planeNo) ? decompBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, planeNo)))) {
2307 UTF32Char original = character;
2308
2309 filledLength = CFUniCharDecomposeCharacter(character, outCharacters, maxBufferLength);
2310 character = *outCharacters;
2311
2312 if ((flags & kCFCompareDiacriticInsensitive) && (character < 0x0510)) {
2313 filledLength = 1; // reset if Roman, Greek, Cyrillic
2314 } else if (0 == (flags & kCFCompareNonliteral)) {
2315 character = original;
2316 filledLength = 0;
2317 }
2318 }
2319 }
2320
2321 // fold case
2322 if (flags & kCFCompareCaseInsensitive) {
2323 const uint8_t *nonBaseBitmap;
2324 bool filterNonBase = (((flags & kCFCompareDiacriticInsensitive) && (character < 0x0510)) ? true : false);
2325 static const uint8_t *lowerBMP = NULL;
2326 static const uint8_t *caseFoldBMP = NULL;
2327
2328 if (NULL == lowerBMP) {
2329 lowerBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfLowercaseCharacterSet, 0);
2330 caseFoldBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfCaseFoldingCharacterSet, 0);
2331 }
2332
2333 if ((NULL != langCode) && ('I' == character) && ((0 == strcmp((const char *)langCode, "tr")) || (0 == strcmp((const char *)langCode, "az")))) { // do Turkik special-casing
2334 if (filledLength > 1) {
2335 if (0x0307 == outCharacters[1]) {
2336 if (--filledLength > 1) memmove((outCharacters + 1), (outCharacters + 2), sizeof(UTF32Char) * (filledLength - 1));
2337 character = *outCharacters = 'i';
2338 isTurkikCapitalI = true;
2339 }
2340 } else if (0x0307 == CFStringGetCharacterFromInlineBuffer(buffer, currentIndex)) {
2341 character = *outCharacters = 'i';
2342 filledLength = 1;
2343 ++currentIndex;
2344 isTurkikCapitalI = true;
2345 }
2346 }
2347 if (!isTurkikCapitalI && (CFUniCharIsMemberOfBitmap(character, ((0 == planeNo) ? lowerBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfLowercaseCharacterSet, planeNo))) || CFUniCharIsMemberOfBitmap(character, ((0 == planeNo) ? caseFoldBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfCaseFoldingCharacterSet, planeNo))))) {
2348 UTF16Char caseFoldBuffer[MAX_CASE_MAPPING_BUF];
2349 const UTF16Char *bufferP = caseFoldBuffer, *bufferLimit;
2350 UTF32Char *outCharactersP = outCharacters;
2351 uint32_t bufferLength = CFUniCharMapCaseTo(character, caseFoldBuffer, MAX_CASE_MAPPING_BUF, kCFUniCharCaseFold, 0, langCode);
2352
2353 bufferLimit = bufferP + bufferLength;
2354
2355 if (filledLength > 0) --filledLength; // decrement filledLength (will add back later)
2356
2357 // make space for casefold characters
2358 if ((filledLength > 0) && (bufferLength > 1)) {
2359 CFIndex totalScalerLength = 0;
2360
2361 while (bufferP < bufferLimit) {
2362 if (CFUniCharIsSurrogateHighCharacter(*(bufferP++)) && (bufferP < bufferLimit) && CFUniCharIsSurrogateLowCharacter(*bufferP)) ++bufferP;
2363 ++totalScalerLength;
2364 }
2365 memmove(outCharacters + totalScalerLength, outCharacters + 1, filledLength * sizeof(UTF32Char));
2366 bufferP = caseFoldBuffer;
2367 }
2368
2369 // fill
2370 while (bufferP < bufferLimit) {
2371 character = *(bufferP++);
2372 if (CFUniCharIsSurrogateHighCharacter(character) && (bufferP < bufferLimit) && CFUniCharIsSurrogateLowCharacter(*bufferP)) {
2373 character = CFUniCharGetLongCharacterForSurrogatePair(character, *(bufferP++));
2374 nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (character >> 16));
2375 } else {
2376 nonBaseBitmap = graphemeBMP;
2377 }
2378
2379 if (!filterNonBase || !CFUniCharIsMemberOfBitmap(character, nonBaseBitmap)) {
2380 *(outCharactersP++) = character;
2381 ++filledLength;
2382 }
2383 }
2384 }
2385 }
2386 }
2387
2388 // collect following combining marks
2389 if (flags & (kCFCompareDiacriticInsensitive|kCFCompareNonliteral)) {
2390 const uint8_t *nonBaseBitmap;
2391 const uint8_t *decompBitmap;
2392 bool doFill = (((flags & kCFCompareDiacriticInsensitive) && (character < 0x0510)) ? false : true);
2393
2394 if (0 == filledLength) {
2395 *outCharacters = character; // filledLength will be updated below on demand
2396
2397 if (doFill) { // check if really needs to fill
2398 UTF32Char nonBaseCharacter = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex);
2399
2400 if (CFUniCharIsSurrogateHighCharacter(nonBaseCharacter) && CFUniCharIsSurrogateLowCharacter((lowSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex + 1)))) {
2401 nonBaseCharacter = CFUniCharGetLongCharacterForSurrogatePair(nonBaseCharacter, lowSurrogate);
2402 nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (nonBaseCharacter >> 16));
2403 decompBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (nonBaseCharacter >> 16));
2404 } else {
2405 nonBaseBitmap = graphemeBMP;
2406 decompBitmap = decompBMP;
2407 }
2408
2409 if (CFUniCharIsMemberOfBitmap(nonBaseCharacter, nonBaseBitmap)) {
2410 filledLength = 1; // For the base character
2411
2412 if ((0 == (flags & kCFCompareDiacriticInsensitive)) || (nonBaseCharacter > 0x050F)) {
2413 if (CFUniCharIsMemberOfBitmap(nonBaseCharacter, decompBitmap)) {
2414 filledLength += CFUniCharDecomposeCharacter(nonBaseCharacter, &(outCharacters[filledLength]), maxBufferLength - filledLength);
2415 } else {
2416 outCharacters[filledLength++] = nonBaseCharacter;
2417 }
2418 }
2419 currentIndex += ((nonBaseBitmap == graphemeBMP) ? 1 : 2);
2420 } else {
2421 doFill = false;
2422 }
2423 }
2424 }
2425
2426 while (filledLength < maxBufferLength) { // do the rest
2427 character = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex);
2428
2429 if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((lowSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex + 1)))) {
2430 character = CFUniCharGetLongCharacterForSurrogatePair(character, lowSurrogate);
2431 nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (character >> 16));
2432 decompBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (character >> 16));
2433 } else {
2434 nonBaseBitmap = graphemeBMP;
2435 decompBitmap = decompBMP;
2436 }
2437 if (isTurkikCapitalI) {
2438 isTurkikCapitalI = false;
2439 } else if (CFUniCharIsMemberOfBitmap(character, nonBaseBitmap)) {
2440 if (doFill) {
2441 if (CFUniCharIsMemberOfBitmap(character, decompBitmap)) {
2442 CFIndex currentLength = CFUniCharDecomposeCharacter(character, &(outCharacters[filledLength]), maxBufferLength - filledLength);
2443
2444 if (0 == currentLength) break; // didn't fit
2445
2446 filledLength += currentLength;
2447 } else {
2448 outCharacters[filledLength++] = character;
2449 }
2450 } else if (0 == filledLength) {
2451 filledLength = 1; // For the base character
2452 }
2453 currentIndex += ((nonBaseBitmap == graphemeBMP) ? 1 : 2);
2454 } else {
2455 break;
2456 }
2457 }
2458
2459 if (filledLength > 1) {
2460 UTF32Char *sortCharactersLimit = outCharacters + filledLength;
2461 UTF32Char *sortCharacters = sortCharactersLimit - 1;
2462
2463 while ((outCharacters < sortCharacters) && CFUniCharIsMemberOfBitmap(*sortCharacters, ((*sortCharacters < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (*sortCharacters >> 16))))) --sortCharacters;
2464
2465 if ((sortCharactersLimit - sortCharacters) > 1) CFUniCharPrioritySort(sortCharacters, (sortCharactersLimit - sortCharacters)); // priority sort
2466 }
2467 }
2468 }
2469
2470 if ((filledLength > 0) && (NULL != consumedLength)) *consumedLength = (currentIndex - index);
2471
2472 return filledLength;
2473 }
2474
2475 static bool __CFStringFillCharacterSetInlineBuffer(CFCharacterSetInlineBuffer *buffer, CFStringCompareFlags compareOptions) {
2476 if (0 != (compareOptions & kCFCompareIgnoreNonAlphanumeric)) {
2477 static CFCharacterSetRef nonAlnumChars = NULL;
2478
2479 if (NULL == nonAlnumChars) {
2480 CFMutableCharacterSetRef cset = CFCharacterSetCreateMutableCopy(kCFAllocatorSystemDefault, CFCharacterSetGetPredefined(kCFCharacterSetAlphaNumeric));
2481 CFCharacterSetInvert(cset);
2482 if (!OSAtomicCompareAndSwapPtrBarrier(NULL, cset, (void **)&nonAlnumChars)) CFRelease(cset);
2483 }
2484
2485 CFCharacterSetInitInlineBuffer(nonAlnumChars, buffer);
2486
2487 return true;
2488 }
2489
2490 return false;
2491 }
2492
2493 #define kCFStringStackBufferLength (__kCFStringInlineBufferLength)
2494
2495 CFComparisonResult CFStringCompareWithOptionsAndLocale(CFStringRef string, CFStringRef string2, CFRange rangeToCompare, CFStringCompareFlags compareOptions, CFLocaleRef locale) {
2496 /* No objc dispatch needed here since CFStringInlineBuffer works with both CFString and NSString */
2497 UTF32Char strBuf1[kCFStringStackBufferLength];
2498 UTF32Char strBuf2[kCFStringStackBufferLength];
2499 CFStringInlineBuffer inlineBuf1, inlineBuf2;
2500 UTF32Char str1Char, str2Char;
2501 CFIndex str1UsedLen, str2UsedLen;
2502 CFIndex str1Index = 0, str2Index = 0, strBuf1Index = 0, strBuf2Index = 0, strBuf1Len = 0, strBuf2Len = 0;
2503 CFIndex str1LocalizedIndex = 0, str2LocalizedIndex = 0;
2504 CFIndex forcedIndex1 = 0, forcedIndex2 = 0;
2505 CFIndex str2Len = CFStringGetLength(string2);
2506 bool caseInsensitive = ((compareOptions & kCFCompareCaseInsensitive) ? true : false);
2507 bool diacriticsInsensitive = ((compareOptions & kCFCompareDiacriticInsensitive) ? true : false);
2508 bool equalityOptions = ((compareOptions & (kCFCompareCaseInsensitive|kCFCompareNonliteral|kCFCompareDiacriticInsensitive|kCFCompareWidthInsensitive)) ? true : false);
2509 bool numerically = ((compareOptions & kCFCompareNumerically) ? true : false);
2510 bool forceOrdering = ((compareOptions & kCFCompareForcedOrdering) ? true : false);
2511 const uint8_t *graphemeBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, 0);
2512 const uint8_t *langCode;
2513 CFComparisonResult compareResult = kCFCompareEqualTo;
2514 UTF16Char otherChar;
2515 Boolean freeLocale = false;
2516 CFCharacterSetInlineBuffer *ignoredChars = NULL;
2517 CFCharacterSetInlineBuffer csetBuffer;
2518 bool numericEquivalence = false;
2519
2520 if ((compareOptions & kCFCompareLocalized) && (NULL == locale)) {
2521 locale = CFLocaleCopyCurrent();
2522 freeLocale = true;
2523 }
2524
2525 langCode = ((NULL == locale) ? NULL : (const uint8_t *)_CFStrGetLanguageIdentifierForLocale(locale, true));
2526
2527 if (__CFStringFillCharacterSetInlineBuffer(&csetBuffer, compareOptions)) {
2528 ignoredChars = &csetBuffer;
2529 equalityOptions = true;
2530 }
2531
2532 if ((NULL == locale) && (NULL == ignoredChars) && !numerically) { // could do binary comp (be careful when adding new flags)
2533 CFStringEncoding eightBitEncoding = __CFStringGetEightBitStringEncoding();
2534 const uint8_t *str1Bytes = (const uint8_t *)CFStringGetCStringPtr(string, eightBitEncoding);
2535 const uint8_t *str2Bytes = (const uint8_t *)CFStringGetCStringPtr(string2, eightBitEncoding);
2536 CFIndex factor = sizeof(uint8_t);
2537
2538 if ((NULL != str1Bytes) && (NULL != str2Bytes)) {
2539 compareOptions &= ~kCFCompareNonliteral; // remove non-literal
2540
2541 if ((kCFStringEncodingASCII == eightBitEncoding) && (false == forceOrdering)) {
2542 if (caseInsensitive) {
2543 int cmpResult = strncasecmp_l((const char *)str1Bytes + rangeToCompare.location, (const char *)str2Bytes, __CFMin(rangeToCompare.length, str2Len), NULL);
2544
2545 if (0 == cmpResult) cmpResult = rangeToCompare.length - str2Len;
2546
2547 return ((0 == cmpResult) ? kCFCompareEqualTo : ((cmpResult < 0) ? kCFCompareLessThan : kCFCompareGreaterThan));
2548 }
2549 } else if (caseInsensitive || diacriticsInsensitive) {
2550 CFIndex limitLength = __CFMin(rangeToCompare.length, str2Len);
2551
2552 str1Bytes += rangeToCompare.location;
2553
2554 while (str1Index < limitLength) {
2555 str1Char = str1Bytes[str1Index];
2556 str2Char = str2Bytes[str1Index];
2557
2558 if (str1Char != str2Char) {
2559 if ((str1Char < 0x80) && (str2Char < 0x80)) {
2560 if (forceOrdering && (kCFCompareEqualTo == compareResult) && (str1Char != str2Char)) compareResult = ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan);
2561 if (caseInsensitive) {
2562 if ((str1Char >= 'A') && (str1Char <= 'Z')) str1Char += ('a' - 'A');
2563 if ((str2Char >= 'A') && (str2Char <= 'Z')) str2Char += ('a' - 'A');
2564 }
2565
2566 if (str1Char != str2Char) return ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan);
2567 } else {
2568 str1Bytes = NULL;
2569 break;
2570 }
2571 }
2572 ++str1Index;
2573 }
2574
2575 str2Index = str1Index;
2576
2577 if (str1Index == limitLength) {
2578 int cmpResult = rangeToCompare.length - str2Len;
2579
2580 return ((0 == cmpResult) ? compareResult : ((cmpResult < 0) ? kCFCompareLessThan : kCFCompareGreaterThan));
2581 }
2582 }
2583 } else if (!equalityOptions && (NULL == str1Bytes) && (NULL == str2Bytes)) {
2584 str1Bytes = (const uint8_t *)CFStringGetCharactersPtr(string);
2585 str2Bytes = (const uint8_t *)CFStringGetCharactersPtr(string2);
2586 factor = sizeof(UTF16Char);
2587 #if __LITTLE_ENDIAN__
2588 if ((NULL != str1Bytes) && (NULL != str2Bytes)) { // we cannot use memcmp
2589 const UTF16Char *str1 = ((const UTF16Char *)str1Bytes) + rangeToCompare.location;
2590 const UTF16Char *str1Limit = str1 + __CFMin(rangeToCompare.length, str2Len);
2591 const UTF16Char *str2 = (const UTF16Char *)str2Bytes;
2592 CFIndex cmpResult = 0;
2593
2594 while ((0 == cmpResult) && (str1 < str1Limit)) cmpResult = (CFIndex)*(str1++) - (CFIndex)*(str2++);
2595
2596 if (0 == cmpResult) cmpResult = rangeToCompare.length - str2Len;
2597
2598 return ((0 == cmpResult) ? kCFCompareEqualTo : ((cmpResult < 0) ? kCFCompareLessThan : kCFCompareGreaterThan));
2599 }
2600 #endif /* __LITTLE_ENDIAN__ */
2601 }
2602 if ((NULL != str1Bytes) && (NULL != str2Bytes)) {
2603 int cmpResult = memcmp(str1Bytes + (rangeToCompare.location * factor), str2Bytes, __CFMin(rangeToCompare.length, str2Len) * factor);
2604
2605 if (0 == cmpResult) cmpResult = rangeToCompare.length - str2Len;
2606
2607 return ((0 == cmpResult) ? kCFCompareEqualTo : ((cmpResult < 0) ? kCFCompareLessThan : kCFCompareGreaterThan));
2608 }
2609 }
2610
2611 CFStringInitInlineBuffer(string, &inlineBuf1, rangeToCompare);
2612 CFStringInitInlineBuffer(string2, &inlineBuf2, CFRangeMake(0, str2Len));
2613
2614 if (NULL != locale) {
2615 str1LocalizedIndex = str1Index;
2616 str2LocalizedIndex = str2Index;
2617
2618 // We temporarily disable kCFCompareDiacriticInsensitive for SL <rdar://problem/6767096>. Should be revisited in NMOS <rdar://problem/7003830>
2619 if (forceOrdering) {
2620 diacriticsInsensitive = false;
2621 compareOptions &= ~kCFCompareDiacriticInsensitive;
2622 }
2623 }
2624 while ((str1Index < rangeToCompare.length) && (str2Index < str2Len)) {
2625 if (strBuf1Len == 0) {
2626 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
2627 if (caseInsensitive && (str1Char >= 'A') && (str1Char <= 'Z') && ((NULL == langCode) || (str1Char != 'I')) && ((false == forceOrdering) || (kCFCompareEqualTo != compareResult))) str1Char += ('a' - 'A');
2628 str1UsedLen = 1;
2629 } else {
2630 str1Char = strBuf1[strBuf1Index++];
2631 }
2632 if (strBuf2Len == 0) {
2633 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
2634 if (caseInsensitive && (str2Char >= 'A') && (str2Char <= 'Z') && ((NULL == langCode) || (str2Char != 'I')) && ((false == forceOrdering) || (kCFCompareEqualTo != compareResult))) str2Char += ('a' - 'A');
2635 str2UsedLen = 1;
2636 } else {
2637 str2Char = strBuf2[strBuf2Index++];
2638 }
2639
2640 if (numerically && ((0 == strBuf1Len) && (str1Char <= '9') && (str1Char >= '0')) && ((0 == strBuf2Len) && (str2Char <= '9') && (str2Char >= '0'))) { // If both are not ASCII digits, then don't do numerical comparison here
2641 uint64_t intValue1 = 0, intValue2 = 0; // !!! Doesn't work if numbers are > max uint64_t
2642 CFIndex str1NumRangeIndex = str1Index;
2643 CFIndex str2NumRangeIndex = str2Index;
2644
2645 do {
2646 intValue1 = (intValue1 * 10) + (str1Char - '0');
2647 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, ++str1Index);
2648 } while ((str1Char <= '9') && (str1Char >= '0'));
2649
2650 do {
2651 intValue2 = intValue2 * 10 + (str2Char - '0');
2652 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, ++str2Index);
2653 } while ((str2Char <= '9') && (str2Char >= '0'));
2654
2655 if (intValue1 == intValue2) {
2656 if (forceOrdering && (kCFCompareEqualTo == compareResult) && ((str1Index - str1NumRangeIndex) != (str2Index - str2NumRangeIndex))) {
2657 compareResult = (((str1Index - str1NumRangeIndex) < (str2Index - str2NumRangeIndex)) ? kCFCompareLessThan : kCFCompareGreaterThan);
2658 numericEquivalence = true;
2659 forcedIndex1 = str1NumRangeIndex;
2660 forcedIndex2 = str2NumRangeIndex;
2661 }
2662
2663 continue;
2664 } else if (intValue1 < intValue2) {
2665 if (freeLocale && locale) {
2666 CFRelease(locale);
2667 }
2668 return kCFCompareLessThan;
2669 } else {
2670 if (freeLocale && locale) {
2671 CFRelease(locale);
2672 }
2673 return kCFCompareGreaterThan;
2674 }
2675 }
2676
2677 if (str1Char != str2Char) {
2678 if (!equalityOptions) {
2679 compareResult = ((NULL == locale) ? ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1Index, rangeToCompare.length - str1Index), &inlineBuf2, CFRangeMake(str2Index, str2Len - str2Index), compareOptions, locale));
2680 if (freeLocale && locale) {
2681 CFRelease(locale);
2682 }
2683 return compareResult;
2684 }
2685
2686 if (forceOrdering && (kCFCompareEqualTo == compareResult)) {
2687 compareResult = ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan);
2688 forcedIndex1 = str1LocalizedIndex;
2689 forcedIndex2 = str2LocalizedIndex;
2690 }
2691
2692 if ((str1Char < 0x80) && (str2Char < 0x80) && (NULL == ignoredChars)) {
2693 if (NULL != locale) {
2694 compareResult = _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1Index, rangeToCompare.length - str1Index), &inlineBuf2, CFRangeMake(str2Index, str2Len - str2Index), compareOptions, locale);
2695 if (freeLocale && locale) {
2696 CFRelease(locale);
2697 }
2698 return compareResult;
2699 } else if (!caseInsensitive) {
2700 if (freeLocale && locale) {
2701 CFRelease(locale);
2702 }
2703 return ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan);
2704 }
2705 }
2706
2707 if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) {
2708 str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);
2709 str1UsedLen = 2;
2710 }
2711
2712 if (CFUniCharIsSurrogateHighCharacter(str2Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index + 1)))) {
2713 str2Char = CFUniCharGetLongCharacterForSurrogatePair(str2Char, otherChar);
2714 str2UsedLen = 2;
2715 }
2716
2717 if (NULL != ignoredChars) {
2718 if (CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char)) {
2719 if ((strBuf1Len > 0) && (strBuf1Index == strBuf1Len)) strBuf1Len = 0;
2720 if (strBuf1Len == 0) str1Index += str1UsedLen;
2721 if (strBuf2Len > 0) --strBuf2Index;
2722 continue;
2723 }
2724 if (CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char)) {
2725 if ((strBuf2Len > 0) && (strBuf2Index == strBuf2Len)) strBuf2Len = 0;
2726 if (strBuf2Len == 0) str2Index += str2UsedLen;
2727 if (strBuf1Len > 0) -- strBuf1Index;
2728 continue;
2729 }
2730 }
2731
2732 if (diacriticsInsensitive && (str1Index > 0)) {
2733 bool str1Skip = false;
2734 bool str2Skip = false;
2735
2736 if ((0 == strBuf1Len) && CFUniCharIsMemberOfBitmap(str1Char, ((str1Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str1Char >> 16))))) {
2737 str1Char = str2Char;
2738 str1Skip = true;
2739 }
2740 if ((0 == strBuf2Len) && CFUniCharIsMemberOfBitmap(str2Char, ((str2Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str2Char >> 16))))) {
2741 str2Char = str1Char;
2742 str2Skip = true;
2743 }
2744
2745 if (str1Skip != str2Skip) {
2746 if (str1Skip) str2Index -= str2UsedLen;
2747 if (str2Skip) str1Index -= str1UsedLen;
2748 }
2749 }
2750
2751 if (str1Char != str2Char) {
2752 if (0 == strBuf1Len) {
2753 strBuf1Len = __CFStringFoldCharacterClusterAtIndex(str1Char, &inlineBuf1, str1Index, compareOptions, langCode, strBuf1, kCFStringStackBufferLength, &str1UsedLen);
2754 if (strBuf1Len > 0) {
2755 str1Char = *strBuf1;
2756 strBuf1Index = 1;
2757 }
2758 }
2759
2760 if ((0 == strBuf1Len) && (0 < strBuf2Len)) {
2761 compareResult = ((NULL == locale) ? ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1LocalizedIndex, rangeToCompare.length - str1LocalizedIndex), &inlineBuf2, CFRangeMake(str2LocalizedIndex, str2Len - str2LocalizedIndex), compareOptions, locale));
2762 if (freeLocale && locale) {
2763 CFRelease(locale);
2764 }
2765 return compareResult;
2766 }
2767
2768 if ((0 == strBuf2Len) && ((0 == strBuf1Len) || (str1Char != str2Char))) {
2769 strBuf2Len = __CFStringFoldCharacterClusterAtIndex(str2Char, &inlineBuf2, str2Index, compareOptions, langCode, strBuf2, kCFStringStackBufferLength, &str2UsedLen);
2770 if (strBuf2Len > 0) {
2771 str2Char = *strBuf2;
2772 strBuf2Index = 1;
2773 }
2774 if ((0 == strBuf2Len) || (str1Char != str2Char)) {
2775 compareResult = ((NULL == locale) ? ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1LocalizedIndex, rangeToCompare.length - str1LocalizedIndex), &inlineBuf2, CFRangeMake(str2LocalizedIndex, str2Len - str2LocalizedIndex), compareOptions, locale));
2776 if (freeLocale && locale) {
2777 CFRelease(locale);
2778 }
2779 return compareResult;
2780 }
2781 }
2782 }
2783
2784 if ((strBuf1Len > 0) && (strBuf2Len > 0)) {
2785 while ((strBuf1Index < strBuf1Len) && (strBuf2Index < strBuf2Len)) {
2786 if (strBuf1[strBuf1Index] != strBuf2[strBuf2Index]) break;
2787 ++strBuf1Index; ++strBuf2Index;
2788 }
2789 if ((strBuf1Index < strBuf1Len) && (strBuf2Index < strBuf2Len)) {
2790 CFComparisonResult res = ((NULL == locale) ? ((strBuf1[strBuf1Index] < strBuf2[strBuf2Index]) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1LocalizedIndex, rangeToCompare.length - str1LocalizedIndex), &inlineBuf2, CFRangeMake(str2LocalizedIndex, str2Len - str2LocalizedIndex), compareOptions, locale));
2791 if (freeLocale && locale) {
2792 CFRelease(locale);
2793 }
2794 return res;
2795 }
2796 }
2797 }
2798
2799 if ((strBuf1Len > 0) && (strBuf1Index == strBuf1Len)) strBuf1Len = 0;
2800 if ((strBuf2Len > 0) && (strBuf2Index == strBuf2Len)) strBuf2Len = 0;
2801
2802 if (strBuf1Len == 0) str1Index += str1UsedLen;
2803 if (strBuf2Len == 0) str2Index += str2UsedLen;
2804 if ((strBuf1Len == 0) && (strBuf2Len == 0)) {
2805 str1LocalizedIndex = str1Index;
2806 str2LocalizedIndex = str2Index;
2807 }
2808 }
2809
2810 if (diacriticsInsensitive || (NULL != ignoredChars)) {
2811 while (str1Index < rangeToCompare.length) {
2812 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
2813 if ((str1Char < 0x80) && (NULL == ignoredChars)) break; // found ASCII
2814
2815 if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);
2816
2817 if ((!diacriticsInsensitive || !CFUniCharIsMemberOfBitmap(str1Char, ((str1Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str1Char >> 16))))) && ((NULL == ignoredChars) || !CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char))) break;
2818
2819 str1Index += ((str1Char < 0x10000) ? 1 : 2);
2820 }
2821
2822 while (str2Index < str2Len) {
2823 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
2824 if ((str2Char < 0x80) && (NULL == ignoredChars)) break; // found ASCII
2825
2826 if (CFUniCharIsSurrogateHighCharacter(str2Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index + 1)))) str2Char = CFUniCharGetLongCharacterForSurrogatePair(str2Char, otherChar);
2827
2828 if ((!diacriticsInsensitive || !CFUniCharIsMemberOfBitmap(str2Char, ((str2Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str2Char >> 16))))) && ((NULL == ignoredChars) || !CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char))) break;
2829
2830 str2Index += ((str2Char < 0x10000) ? 1 : 2);
2831 }
2832 }
2833 // Need to recalc localized result here for forced ordering, ICU cannot do numericEquivalence
2834 if (!numericEquivalence && (NULL != locale) && (kCFCompareEqualTo != compareResult) && (str1Index == rangeToCompare.length) && (str2Index == str2Len)) compareResult = _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(forcedIndex1, rangeToCompare.length - forcedIndex1), &inlineBuf2, CFRangeMake(forcedIndex2, str2Len - forcedIndex2), compareOptions, locale);
2835
2836 if (freeLocale && locale) {
2837 CFRelease(locale);
2838 }
2839
2840 return ((str1Index < rangeToCompare.length) ? kCFCompareGreaterThan : ((str2Index < str2Len) ? kCFCompareLessThan : compareResult));
2841 }
2842
2843
2844 CFComparisonResult CFStringCompareWithOptions(CFStringRef string, CFStringRef string2, CFRange rangeToCompare, CFStringCompareFlags compareOptions) { return CFStringCompareWithOptionsAndLocale(string, string2, rangeToCompare, compareOptions, NULL); }
2845
2846 CFComparisonResult CFStringCompare(CFStringRef string, CFStringRef str2, CFStringCompareFlags options) {
2847 return CFStringCompareWithOptions(string, str2, CFRangeMake(0, CFStringGetLength(string)), options);
2848 }
2849
2850 Boolean CFStringFindWithOptionsAndLocale(CFStringRef string, CFStringRef stringToFind, CFRange rangeToSearch, CFStringCompareFlags compareOptions, CFLocaleRef locale, CFRange *result) {
2851 /* No objc dispatch needed here since CFStringInlineBuffer works with both CFString and NSString */
2852 CFIndex findStrLen = CFStringGetLength(stringToFind);
2853 Boolean didFind = false;
2854 bool lengthVariants = ((compareOptions & (kCFCompareCaseInsensitive|kCFCompareNonliteral|kCFCompareDiacriticInsensitive)) ? true : false);
2855 CFCharacterSetInlineBuffer *ignoredChars = NULL;
2856 CFCharacterSetInlineBuffer csetBuffer;
2857
2858 if (__CFStringFillCharacterSetInlineBuffer(&csetBuffer, compareOptions)) {
2859 ignoredChars = &csetBuffer;
2860 lengthVariants = true;
2861 }
2862
2863 if ((findStrLen > 0) && (rangeToSearch.length > 0) && ((findStrLen <= rangeToSearch.length) || lengthVariants)) {
2864 UTF32Char strBuf1[kCFStringStackBufferLength];
2865 UTF32Char strBuf2[kCFStringStackBufferLength];
2866 CFStringInlineBuffer inlineBuf1, inlineBuf2;
2867 UTF32Char str1Char = 0, str2Char = 0;
2868 CFStringEncoding eightBitEncoding = __CFStringGetEightBitStringEncoding();
2869 const uint8_t *str1Bytes = (const uint8_t *)CFStringGetCStringPtr(string, eightBitEncoding);
2870 const uint8_t *str2Bytes = (const uint8_t *)CFStringGetCStringPtr(stringToFind, eightBitEncoding);
2871 const UTF32Char *characters, *charactersLimit;
2872 const uint8_t *langCode = NULL;
2873 CFIndex fromLoc, toLoc;
2874 CFIndex str1Index, str2Index;
2875 CFIndex strBuf1Len, strBuf2Len;
2876 CFIndex maxStr1Index = (rangeToSearch.location + rangeToSearch.length);
2877 bool equalityOptions = ((lengthVariants || (compareOptions & kCFCompareWidthInsensitive)) ? true : false);
2878 bool caseInsensitive = ((compareOptions & kCFCompareCaseInsensitive) ? true : false);
2879 bool forwardAnchor = ((kCFCompareAnchored == (compareOptions & (kCFCompareBackwards|kCFCompareAnchored))) ? true : false);
2880 bool backwardAnchor = (((kCFCompareBackwards|kCFCompareAnchored) == (compareOptions & (kCFCompareBackwards|kCFCompareAnchored))) ? true : false);
2881 int8_t delta;
2882
2883 if (NULL == locale) {
2884 if (compareOptions & kCFCompareLocalized) {
2885 CFLocaleRef currentLocale = CFLocaleCopyCurrent();
2886 langCode = (const uint8_t *)_CFStrGetLanguageIdentifierForLocale(currentLocale, true);
2887 CFRelease(currentLocale);
2888 }
2889 } else {
2890 langCode = (const uint8_t *)_CFStrGetLanguageIdentifierForLocale(locale, true);
2891 }
2892
2893 CFStringInitInlineBuffer(string, &inlineBuf1, CFRangeMake(0, rangeToSearch.location + rangeToSearch.length));
2894 CFStringInitInlineBuffer(stringToFind, &inlineBuf2, CFRangeMake(0, findStrLen));
2895
2896 if (compareOptions & kCFCompareBackwards) {
2897 fromLoc = rangeToSearch.location + rangeToSearch.length - (lengthVariants ? 1 : findStrLen);
2898 toLoc = (((compareOptions & kCFCompareAnchored) && !lengthVariants) ? fromLoc : rangeToSearch.location);
2899 } else {
2900 fromLoc = rangeToSearch.location;
2901 toLoc = ((compareOptions & kCFCompareAnchored) ? fromLoc : rangeToSearch.location + rangeToSearch.length - (lengthVariants ? 1 : findStrLen));
2902 }
2903
2904 delta = ((fromLoc <= toLoc) ? 1 : -1);
2905
2906 if ((NULL != str1Bytes) && (NULL != str2Bytes)) {
2907 uint8_t str1Byte, str2Byte;
2908
2909 while (1) {
2910 str1Index = fromLoc;
2911 str2Index = 0;
2912
2913 while ((str1Index < maxStr1Index) && (str2Index < findStrLen)) {
2914 str1Byte = str1Bytes[str1Index];
2915 str2Byte = str2Bytes[str2Index];
2916
2917 if (str1Byte != str2Byte) {
2918 if (equalityOptions) {
2919 if ((str1Byte < 0x80) && ((NULL == langCode) || ('I' != str1Byte))) {
2920 if (caseInsensitive && (str1Byte >= 'A') && (str1Byte <= 'Z')) str1Byte += ('a' - 'A');
2921 *strBuf1 = str1Byte;
2922 strBuf1Len = 1;
2923 } else {
2924 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
2925 strBuf1Len = __CFStringFoldCharacterClusterAtIndex(str1Char, &inlineBuf1, str1Index, compareOptions, langCode, strBuf1, kCFStringStackBufferLength, NULL);
2926 if (1 > strBuf1Len) {
2927 *strBuf1 = str1Char;
2928 strBuf1Len = 1;
2929 }
2930 }
2931
2932 if ((NULL != ignoredChars) && (forwardAnchor || (str1Index != fromLoc)) && CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, ((str1Byte < 0x80) ? str1Byte : str1Char))) {
2933 ++str1Index;
2934 continue;
2935 }
2936
2937 if ((str2Byte < 0x80) && ((NULL == langCode) || ('I' != str2Byte))) {
2938 if (caseInsensitive && (str2Byte >= 'A') && (str2Byte <= 'Z')) str2Byte += ('a' - 'A');
2939 *strBuf2 = str2Byte;
2940 strBuf2Len = 1;
2941 } else {
2942 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
2943 strBuf2Len = __CFStringFoldCharacterClusterAtIndex(str2Char, &inlineBuf2, str2Index, compareOptions, langCode, strBuf2, kCFStringStackBufferLength, NULL);
2944 if (1 > strBuf2Len) {
2945 *strBuf2 = str2Char;
2946 strBuf2Len = 1;
2947 }
2948 }
2949
2950 if ((NULL != ignoredChars) && CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, ((str2Byte < 0x80) ? str2Byte : str2Char))) {
2951 ++str2Index;
2952 continue;
2953 }
2954
2955 if ((1 == strBuf1Len) && (1 == strBuf2Len)) { // normal case
2956 if (*strBuf1 != *strBuf2) break;
2957 } else {
2958 CFIndex delta;
2959
2960 if (!caseInsensitive && (strBuf1Len != strBuf2Len)) break;
2961 if (memcmp(strBuf1, strBuf2, sizeof(UTF32Char) * __CFMin(strBuf1Len, strBuf2Len))) break;
2962
2963 if (strBuf1Len < strBuf2Len) {
2964 delta = strBuf2Len - strBuf1Len;
2965
2966 if ((str1Index + strBuf1Len + delta) > maxStr1Index) break;
2967
2968 characters = &(strBuf2[strBuf1Len]);
2969 charactersLimit = characters + delta;
2970
2971 while (characters < charactersLimit) {
2972 strBuf1Len = __CFStringFoldCharacterClusterAtIndex(CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1), &inlineBuf1, str1Index + 1, compareOptions, langCode, strBuf1, kCFStringStackBufferLength, NULL);
2973 if ((strBuf1Len > 0) || (*characters != *strBuf1)) break;
2974 ++characters; ++str1Index;
2975 }
2976 if (characters < charactersLimit) break;
2977 } else if (strBuf2Len < strBuf1Len) {
2978 delta = strBuf1Len - strBuf2Len;
2979
2980 if ((str2Index + strBuf2Len + delta) > findStrLen) break;
2981
2982 characters = &(strBuf1[strBuf2Len]);
2983 charactersLimit = characters + delta;
2984
2985 while (characters < charactersLimit) {
2986 strBuf2Len = __CFStringFoldCharacterClusterAtIndex(CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str1Index + 1), &inlineBuf2, str2Index + 1, compareOptions, langCode, strBuf2, kCFStringStackBufferLength, NULL);
2987 if ((strBuf2Len > 0) || (*characters != *strBuf2)) break;
2988 ++characters; ++str2Index;
2989 }
2990 if (characters < charactersLimit) break;
2991 }
2992 }
2993 } else {
2994 break;
2995 }
2996 }
2997 ++str1Index; ++str2Index;
2998 }
2999
3000 if ((NULL != ignoredChars) && (str1Index == maxStr1Index) && (str2Index < findStrLen)) { // Process the stringToFind tail
3001 while (str2Index < findStrLen) {
3002 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
3003
3004 if (!CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char)) break;
3005 ++str2Index;
3006 }
3007 }
3008
3009 if (str2Index == findStrLen) {
3010 if ((NULL != ignoredChars) && backwardAnchor && (str1Index < maxStr1Index)) { // Process the anchor tail
3011 while (str1Index < maxStr1Index) {
3012 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
3013
3014 if (!CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char)) break;
3015 ++str1Index;
3016 }
3017 }
3018
3019 if (!backwardAnchor || (str1Index == maxStr1Index)) {
3020 didFind = true;
3021 if (NULL != result) *result = CFRangeMake(fromLoc, str1Index - fromLoc);
3022 }
3023 break;
3024 }
3025
3026 if (fromLoc == toLoc) break;
3027 fromLoc += delta;
3028 }
3029 } else if (equalityOptions) {
3030 UTF16Char otherChar;
3031 CFIndex str1UsedLen, str2UsedLen, strBuf1Index = 0, strBuf2Index = 0;
3032 bool diacriticsInsensitive = ((compareOptions & kCFCompareDiacriticInsensitive) ? true : false);
3033 const uint8_t *graphemeBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, 0);
3034 const uint8_t *combClassBMP = (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0);
3035
3036 while (1) {
3037 str1Index = fromLoc;
3038 str2Index = 0;
3039
3040 strBuf1Len = strBuf2Len = 0;
3041
3042 while (str2Index < findStrLen) {
3043 if (strBuf1Len == 0) {
3044 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
3045 if (caseInsensitive && (str1Char >= 'A') && (str1Char <= 'Z') && ((NULL == langCode) || (str1Char != 'I'))) str1Char += ('a' - 'A');
3046 str1UsedLen = 1;
3047 } else {
3048 str1Char = strBuf1[strBuf1Index++];
3049 }
3050 if (strBuf2Len == 0) {
3051 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
3052 if (caseInsensitive && (str2Char >= 'A') && (str2Char <= 'Z') && ((NULL == langCode) || (str2Char != 'I'))) str2Char += ('a' - 'A');
3053 str2UsedLen = 1;
3054 } else {
3055 str2Char = strBuf2[strBuf2Index++];
3056 }
3057
3058 if (str1Char != str2Char) {
3059 if ((str1Char < 0x80) && (str2Char < 0x80) && (NULL == ignoredChars) && ((NULL == langCode) || !caseInsensitive)) break;
3060
3061 if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) {
3062 str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);
3063 str1UsedLen = 2;
3064 }
3065
3066 if (CFUniCharIsSurrogateHighCharacter(str2Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index + 1)))) {
3067 str2Char = CFUniCharGetLongCharacterForSurrogatePair(str2Char, otherChar);
3068 str2UsedLen = 2;
3069 }
3070
3071 if (NULL != ignoredChars) {
3072 if ((forwardAnchor || (str1Index != fromLoc)) && (str1Index < maxStr1Index) && CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char)) {
3073 if ((strBuf1Len > 0) && (strBuf1Index == strBuf1Len)) strBuf1Len = 0;
3074 if (strBuf1Len == 0) str1Index += str1UsedLen;
3075 if (strBuf2Len > 0) --strBuf2Index;
3076 continue;
3077 }
3078 if (CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char)) {
3079 if ((strBuf2Len > 0) && (strBuf2Index == strBuf2Len)) strBuf2Len = 0;
3080 if (strBuf2Len == 0) str2Index += str2UsedLen;
3081 if (strBuf1Len > 0) -- strBuf1Index;
3082 continue;
3083 }
3084 }
3085
3086 if (diacriticsInsensitive && (str1Index > fromLoc)) {
3087 bool str1Skip = false;
3088 bool str2Skip = false;
3089
3090 if ((0 == strBuf1Len) && CFUniCharIsMemberOfBitmap(str1Char, ((str1Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str1Char >> 16))))) {
3091 str1Char = str2Char;
3092 str1Skip = true;
3093 }
3094 if ((0 == strBuf2Len) && CFUniCharIsMemberOfBitmap(str2Char, ((str2Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str2Char >> 16))))) {
3095 str2Char = str1Char;
3096 str2Skip = true;
3097 }
3098
3099 if (str1Skip != str2Skip) {
3100 if (str1Skip) str2Index -= str2UsedLen;
3101 if (str2Skip) str1Index -= str1UsedLen;
3102 }
3103 }
3104
3105 if (str1Char != str2Char) {
3106 if (0 == strBuf1Len) {
3107 strBuf1Len = __CFStringFoldCharacterClusterAtIndex(str1Char, &inlineBuf1, str1Index, compareOptions, langCode, strBuf1, kCFStringStackBufferLength, &str1UsedLen);
3108 if (strBuf1Len > 0) {
3109 str1Char = *strBuf1;
3110 strBuf1Index = 1;
3111 }
3112 }
3113
3114 if ((0 == strBuf1Len) && (0 < strBuf2Len)) break;
3115
3116 if ((0 == strBuf2Len) && ((0 == strBuf1Len) || (str1Char != str2Char))) {
3117 strBuf2Len = __CFStringFoldCharacterClusterAtIndex(str2Char, &inlineBuf2, str2Index, compareOptions, langCode, strBuf2, kCFStringStackBufferLength, &str2UsedLen);
3118 if ((0 == strBuf2Len) || (str1Char != *strBuf2)) break;
3119 strBuf2Index = 1;
3120 }
3121 }
3122
3123 if ((strBuf1Len > 0) && (strBuf2Len > 0)) {
3124 while ((strBuf1Index < strBuf1Len) && (strBuf2Index < strBuf2Len)) {
3125 if (strBuf1[strBuf1Index] != strBuf2[strBuf2Index]) break;
3126 ++strBuf1Index; ++strBuf2Index;
3127 }
3128 if ((strBuf1Index < strBuf1Len) && (strBuf2Index < strBuf2Len)) break;
3129 }
3130 }
3131
3132 if ((strBuf1Len > 0) && (strBuf1Index == strBuf1Len)) strBuf1Len = 0;
3133 if ((strBuf2Len > 0) && (strBuf2Index == strBuf2Len)) strBuf2Len = 0;
3134
3135 if (strBuf1Len == 0) str1Index += str1UsedLen;
3136 if (strBuf2Len == 0) str2Index += str2UsedLen;
3137 }
3138
3139 if ((NULL != ignoredChars) && (str1Index == maxStr1Index) && (str2Index < findStrLen)) { // Process the stringToFind tail
3140 while (str2Index < findStrLen) {
3141 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
3142 if (CFUniCharIsSurrogateHighCharacter(str2Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index + 1)))) {
3143 str2Char = CFUniCharGetLongCharacterForSurrogatePair(str2Char, otherChar);
3144 }
3145 if (!CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char)) break;
3146 str2Index += ((str2Char < 0x10000) ? 1 : 2);
3147 }
3148 }
3149
3150 if (str2Index == findStrLen) {
3151 bool match = true;
3152
3153 if (strBuf1Len > 0) {
3154 match = false;
3155
3156 if (diacriticsInsensitive && (strBuf1[0] < 0x0510)) {
3157 while (strBuf1Index < strBuf1Len) {
3158 if (!CFUniCharIsMemberOfBitmap(strBuf1[strBuf1Index], ((strBuf1[strBuf1Index] < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (strBuf1[strBuf1Index] >> 16))))) break;
3159 ++strBuf1Index;
3160 }
3161
3162 if (strBuf1Index == strBuf1Len) {
3163 str1Index += str1UsedLen;
3164 match = true;
3165 }
3166 }
3167 }
3168
3169 if (match && (compareOptions & (kCFCompareDiacriticInsensitive|kCFCompareNonliteral)) && (str1Index < maxStr1Index)) {
3170 const uint8_t *nonBaseBitmap;
3171
3172 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
3173
3174 if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) {
3175 str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);
3176 nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str1Char >> 16));
3177 } else {
3178 nonBaseBitmap = graphemeBMP;
3179 }
3180
3181 if (CFUniCharIsMemberOfBitmap(str1Char, nonBaseBitmap)) {
3182 if (diacriticsInsensitive) {
3183 if (str1Char < 0x10000) {
3184 CFIndex index = str1Index;
3185
3186 do {
3187 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, --index);
3188 } while (CFUniCharIsMemberOfBitmap(str1Char, graphemeBMP), (rangeToSearch.location < index));
3189
3190 if (str1Char < 0x0510) {
3191 while (++str1Index < maxStr1Index) if (!CFUniCharIsMemberOfBitmap(CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index), graphemeBMP)) break;
3192 }
3193 }
3194 } else {
3195 match = false;
3196 }
3197 } else if (!diacriticsInsensitive) {
3198 otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index - 1);
3199
3200 // this is assuming viramas are only in BMP ???
3201 if ((str1Char == COMBINING_GRAPHEME_JOINER) || (otherChar == COMBINING_GRAPHEME_JOINER) || (otherChar == ZERO_WIDTH_JOINER) || ((otherChar >= HANGUL_CHOSEONG_START) && (otherChar <= HANGUL_JONGSEONG_END)) || (CFUniCharGetCombiningPropertyForCharacter(otherChar, combClassBMP) == 9)) {
3202 CFRange clusterRange = CFStringGetRangeOfCharacterClusterAtIndex(string, str1Index - 1, kCFStringGraphemeCluster);
3203
3204 if (str1Index < (clusterRange.location + clusterRange.length)) match = false;
3205 }
3206 }
3207 }
3208
3209 if (match) {
3210 if ((NULL != ignoredChars) && backwardAnchor && (str1Index < maxStr1Index)) { // Process the anchor tail
3211 while (str1Index < maxStr1Index) {
3212 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
3213 if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) {
3214 str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);
3215 }
3216 if (!CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char)) break;
3217 str1Index += ((str1Char < 0x10000) ? 1 : 2);
3218 }
3219 }
3220
3221 if (!backwardAnchor || (str1Index == maxStr1Index)) {
3222 didFind = true;
3223 if (NULL != result) *result = CFRangeMake(fromLoc, str1Index - fromLoc);
3224 }
3225 break;
3226 }
3227 }
3228
3229 if (fromLoc == toLoc) break;
3230 fromLoc += delta;
3231 }
3232 } else {
3233 while (1) {
3234 str1Index = fromLoc;
3235 str2Index = 0;
3236
3237 while (str2Index < findStrLen) {
3238 if (CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index) != CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index)) break;
3239
3240 ++str1Index; ++str2Index;
3241 }
3242
3243 if (str2Index == findStrLen) {
3244 didFind = true;
3245 if (NULL != result) *result = CFRangeMake(fromLoc, findStrLen);
3246 break;
3247 }
3248
3249 if (fromLoc == toLoc) break;
3250 fromLoc += delta;
3251 }
3252 }
3253 }
3254
3255 return didFind;
3256 }
3257
3258
3259 Boolean CFStringFindWithOptions(CFStringRef string, CFStringRef stringToFind, CFRange rangeToSearch, CFStringCompareFlags compareOptions, CFRange *result) { return CFStringFindWithOptionsAndLocale(string, stringToFind, rangeToSearch, compareOptions, NULL, result); }
3260
3261 // Functions to deal with special arrays of CFRange, CFDataRef, created by CFStringCreateArrayWithFindResults()
3262
3263 static const void *__rangeRetain(CFAllocatorRef allocator, const void *ptr) {
3264 CFRetain(*(CFDataRef *)((uint8_t *)ptr + sizeof(CFRange)));
3265 return ptr;
3266 }
3267
3268 static void __rangeRelease(CFAllocatorRef allocator, const void *ptr) {
3269 CFRelease(*(CFDataRef *)((uint8_t *)ptr + sizeof(CFRange)));
3270 }
3271
3272 static CFStringRef __rangeCopyDescription(const void *ptr) {
3273 CFRange range = *(CFRange *)ptr;
3274 return CFStringCreateWithFormat(kCFAllocatorSystemDefault, NULL, CFSTR("{%ld, %ld}"), (long)range.location, (long)range.length);
3275 }
3276
3277 static Boolean __rangeEqual(const void *ptr1, const void *ptr2) {
3278 CFRange range1 = *(CFRange *)ptr1;
3279 CFRange range2 = *(CFRange *)ptr2;
3280 return (range1.location == range2.location) && (range1.length == range2.length);
3281 }
3282
3283
3284 CFArrayRef CFStringCreateArrayWithFindResults(CFAllocatorRef alloc, CFStringRef string, CFStringRef stringToFind, CFRange rangeToSearch, CFStringCompareFlags compareOptions) {
3285 CFRange foundRange;
3286 Boolean backwards = ((compareOptions & kCFCompareBackwards) != 0);
3287 UInt32 endIndex = rangeToSearch.location + rangeToSearch.length;
3288 CFMutableDataRef rangeStorage = NULL; // Basically an array of CFRange, CFDataRef (packed)
3289 uint8_t *rangeStorageBytes = NULL;
3290 CFIndex foundCount = 0;
3291 CFIndex capacity = 0; // Number of CFRange, CFDataRef element slots in rangeStorage
3292
3293 if (alloc == NULL) alloc = __CFGetDefaultAllocator();
3294
3295 while ((rangeToSearch.length > 0) && CFStringFindWithOptions(string, stringToFind, rangeToSearch, compareOptions, &foundRange)) {
3296 // Determine the next range
3297 if (backwards) {
3298 rangeToSearch.length = foundRange.location - rangeToSearch.location;
3299 } else {
3300 rangeToSearch.location = foundRange.location + foundRange.length;
3301 rangeToSearch.length = endIndex - rangeToSearch.location;
3302 }
3303
3304 // If necessary, grow the data and squirrel away the found range
3305 if (foundCount >= capacity) {
3306 if (rangeStorage == NULL) rangeStorage = CFDataCreateMutable(alloc, 0);
3307 capacity = (capacity + 4) * 2;
3308 CFDataSetLength(rangeStorage, capacity * (sizeof(CFRange) + sizeof(CFDataRef)));
3309 rangeStorageBytes = (uint8_t *)CFDataGetMutableBytePtr(rangeStorage) + foundCount * (sizeof(CFRange) + sizeof(CFDataRef));
3310 }
3311 memmove(rangeStorageBytes, &foundRange, sizeof(CFRange)); // The range
3312 memmove(rangeStorageBytes + sizeof(CFRange), &rangeStorage, sizeof(CFDataRef)); // The data
3313 rangeStorageBytes += (sizeof(CFRange) + sizeof(CFDataRef));
3314 foundCount++;
3315 }
3316
3317 if (foundCount > 0) {
3318 CFIndex cnt;
3319 CFMutableArrayRef array;
3320 const CFArrayCallBacks callbacks = {0, __rangeRetain, __rangeRelease, __rangeCopyDescription, __rangeEqual};
3321
3322 CFDataSetLength(rangeStorage, foundCount * (sizeof(CFRange) + sizeof(CFDataRef))); // Tighten storage up
3323 rangeStorageBytes = (uint8_t *)CFDataGetMutableBytePtr(rangeStorage);
3324
3325 array = CFArrayCreateMutable(alloc, foundCount * sizeof(CFRange *), &callbacks);
3326 for (cnt = 0; cnt < foundCount; cnt++) {
3327 // Each element points to the appropriate CFRange in the CFData
3328 CFArrayAppendValue(array, rangeStorageBytes + cnt * (sizeof(CFRange) + sizeof(CFDataRef)));
3329 }
3330 CFRelease(rangeStorage); // We want the data to go away when all CFRanges inside it are released...
3331 return array;
3332 } else {
3333 return NULL;
3334 }
3335 }
3336
3337
3338 CFRange CFStringFind(CFStringRef string, CFStringRef stringToFind, CFStringCompareFlags compareOptions) {
3339 CFRange foundRange;
3340
3341 if (CFStringFindWithOptions(string, stringToFind, CFRangeMake(0, CFStringGetLength(string)), compareOptions, &foundRange)) {
3342 return foundRange;
3343 } else {
3344 return CFRangeMake(kCFNotFound, 0);
3345 }
3346 }
3347
3348 Boolean CFStringHasPrefix(CFStringRef string, CFStringRef prefix) {
3349 return CFStringFindWithOptions(string, prefix, CFRangeMake(0, CFStringGetLength(string)), kCFCompareAnchored, NULL);
3350 }
3351
3352 Boolean CFStringHasSuffix(CFStringRef string, CFStringRef suffix) {
3353 return CFStringFindWithOptions(string, suffix, CFRangeMake(0, CFStringGetLength(string)), kCFCompareAnchored|kCFCompareBackwards, NULL);
3354 }
3355
3356 #define MAX_TRANSCODING_LENGTH 4
3357
3358 #define HANGUL_JONGSEONG_COUNT (28)
3359
3360 CF_INLINE bool _CFStringIsHangulLVT(UTF32Char character) {
3361 return (((character - HANGUL_SYLLABLE_START) % HANGUL_JONGSEONG_COUNT) ? true : false);
3362 }
3363
3364 static uint8_t __CFTranscodingHintLength[] = {
3365 2, 3, 4, 4, 4, 4, 4, 2, 2, 2, 2, 4, 0, 0, 0, 0
3366 };
3367
3368 enum {
3369 kCFStringHangulStateL,
3370 kCFStringHangulStateV,
3371 kCFStringHangulStateT,
3372 kCFStringHangulStateLV,
3373 kCFStringHangulStateLVT,
3374 kCFStringHangulStateBreak
3375 };
3376
3377 static CFRange _CFStringInlineBufferGetComposedRange(CFStringInlineBuffer *buffer, CFIndex start, CFStringCharacterClusterType type, const uint8_t *bmpBitmap, CFIndex csetType) {
3378 CFIndex end = start + 1;
3379 const uint8_t *bitmap = bmpBitmap;
3380 UTF32Char character;
3381 UTF16Char otherSurrogate;
3382 uint8_t step;
3383
3384 character = CFStringGetCharacterFromInlineBuffer(buffer, start);
3385
3386 // We don't combine characters in Armenian ~ Limbu range for backward deletion
3387 if ((type != kCFStringBackwardDeletionCluster) || (character < 0x0530) || (character > 0x194F)) {
3388 // Check if the current is surrogate
3389 if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, start + 1)))) {
3390 ++end;
3391 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
3392 bitmap = CFUniCharGetBitmapPtrForPlane(csetType, (character >> 16));
3393 }
3394
3395 // Extend backward
3396 while (start > 0) {
3397 if ((type == kCFStringBackwardDeletionCluster) && (character >= 0x0530) && (character < 0x1950)) break;
3398
3399 if (character < 0x10000) { // the first round could be already be non-BMP
3400 if (CFUniCharIsSurrogateLowCharacter(character) && CFUniCharIsSurrogateHighCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, start - 1)))) {
3401 character = CFUniCharGetLongCharacterForSurrogatePair(otherSurrogate, character);
3402 bitmap = CFUniCharGetBitmapPtrForPlane(csetType, (character >> 16));
3403 if (--start == 0) break; // starting with non-BMP combining mark
3404 } else {
3405 bitmap = bmpBitmap;
3406 }
3407 }
3408
3409 if (!CFUniCharIsMemberOfBitmap(character, bitmap) && (character != 0xFF9E) && (character != 0xFF9F) && ((character & 0x1FFFF0) != 0xF870)) break;
3410
3411 --start;
3412
3413 character = CFStringGetCharacterFromInlineBuffer(buffer, start);
3414 }
3415 }
3416
3417 // Hangul
3418 if (((character >= HANGUL_CHOSEONG_START) && (character <= HANGUL_JONGSEONG_END)) || ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END))) {
3419 uint8_t state;
3420 uint8_t initialState;
3421
3422 if (character < HANGUL_JUNGSEONG_START) {
3423 state = kCFStringHangulStateL;
3424 } else if (character < HANGUL_JONGSEONG_START) {
3425 state = kCFStringHangulStateV;
3426 } else if (character < HANGUL_SYLLABLE_START) {
3427 state = kCFStringHangulStateT;
3428 } else {
3429 state = (_CFStringIsHangulLVT(character) ? kCFStringHangulStateLVT : kCFStringHangulStateLV);
3430 }
3431 initialState = state;
3432
3433 // Extend backward
3434 while (((character = CFStringGetCharacterFromInlineBuffer(buffer, start - 1)) >= HANGUL_CHOSEONG_START) && (character <= HANGUL_SYLLABLE_END) && ((character <= HANGUL_JONGSEONG_END) || (character >= HANGUL_SYLLABLE_START))) {
3435 switch (state) {
3436 case kCFStringHangulStateV:
3437 if (character <= HANGUL_CHOSEONG_END) {
3438 state = kCFStringHangulStateL;
3439 } else if ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END) && !_CFStringIsHangulLVT(character)) {
3440 state = kCFStringHangulStateLV;
3441 } else if (character > HANGUL_JUNGSEONG_END) {
3442 state = kCFStringHangulStateBreak;
3443 }
3444 break;
3445
3446 case kCFStringHangulStateT:
3447 if ((character >= HANGUL_JUNGSEONG_START) && (character <= HANGUL_JUNGSEONG_END)) {
3448 state = kCFStringHangulStateV;
3449 } else if ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END)) {
3450 state = (_CFStringIsHangulLVT(character) ? kCFStringHangulStateLVT : kCFStringHangulStateLV);
3451 } else if (character < HANGUL_JUNGSEONG_START) {
3452 state = kCFStringHangulStateBreak;
3453 }
3454 break;
3455
3456 default:
3457 state = ((character < HANGUL_JUNGSEONG_START) ? kCFStringHangulStateL : kCFStringHangulStateBreak);
3458 break;
3459 }
3460
3461 if (state == kCFStringHangulStateBreak) break;
3462 --start;
3463 }
3464
3465 // Extend forward
3466 state = initialState;
3467 while (((character = CFStringGetCharacterFromInlineBuffer(buffer, end)) > 0) && (((character >= HANGUL_CHOSEONG_START) && (character <= HANGUL_JONGSEONG_END)) || ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END)))) {
3468 switch (state) {
3469 case kCFStringHangulStateLV:
3470 case kCFStringHangulStateV:
3471 if ((character >= HANGUL_JUNGSEONG_START) && (character <= HANGUL_JONGSEONG_END)) {
3472 state = ((character < HANGUL_JONGSEONG_START) ? kCFStringHangulStateV : kCFStringHangulStateT);
3473 } else {
3474 state = kCFStringHangulStateBreak;
3475 }
3476 break;
3477
3478 case kCFStringHangulStateLVT:
3479 case kCFStringHangulStateT:
3480 state = (((character >= HANGUL_JONGSEONG_START) && (character <= HANGUL_JONGSEONG_END)) ? kCFStringHangulStateT : kCFStringHangulStateBreak);
3481 break;
3482
3483 default:
3484 if (character < HANGUL_JUNGSEONG_START) {
3485 state = kCFStringHangulStateL;
3486 } else if (character < HANGUL_JONGSEONG_START) {
3487 state = kCFStringHangulStateV;
3488 } else if (character >= HANGUL_SYLLABLE_START) {
3489 state = (_CFStringIsHangulLVT(character) ? kCFStringHangulStateLVT : kCFStringHangulStateLV);
3490 } else {
3491 state = kCFStringHangulStateBreak;
3492 }
3493 break;
3494 }
3495
3496 if (state == kCFStringHangulStateBreak) break;
3497 ++end;
3498 }
3499 }
3500
3501 // Extend forward
3502 while ((character = CFStringGetCharacterFromInlineBuffer(buffer, end)) > 0) {
3503 if ((type == kCFStringBackwardDeletionCluster) && (character >= 0x0530) && (character < 0x1950)) break;
3504
3505 if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, end + 1)))) {
3506 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
3507 bitmap = CFUniCharGetBitmapPtrForPlane(csetType, (character >> 16));
3508 step = 2;
3509 } else {
3510 bitmap = bmpBitmap;
3511 step = 1;
3512 }
3513
3514 if (!CFUniCharIsMemberOfBitmap(character, bitmap) && (character != 0xFF9E) && (character != 0xFF9F) && ((character & 0x1FFFF0) != 0xF870)) break;
3515
3516 end += step;
3517 }
3518
3519 return CFRangeMake(start, end - start);
3520 }
3521
3522 CF_INLINE bool _CFStringIsVirama(UTF32Char character, const uint8_t *combClassBMP) {
3523 return ((character == COMBINING_GRAPHEME_JOINER) || (CFUniCharGetCombiningPropertyForCharacter(character, (const uint8_t *)((character < 0x10000) ? combClassBMP : CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (character >> 16)))) == 9) ? true : false);
3524 }
3525
3526 CFRange CFStringGetRangeOfCharacterClusterAtIndex(CFStringRef string, CFIndex charIndex, CFStringCharacterClusterType type) {
3527 CFRange range;
3528 CFIndex currentIndex;
3529 CFIndex length = CFStringGetLength(string);
3530 CFIndex csetType = ((kCFStringGraphemeCluster == type) ? kCFUniCharGraphemeExtendCharacterSet : kCFUniCharNonBaseCharacterSet);
3531 CFStringInlineBuffer stringBuffer;
3532 const uint8_t *bmpBitmap;
3533 const uint8_t *letterBMP;
3534 static const uint8_t *combClassBMP = NULL;
3535 UTF32Char character;
3536 UTF16Char otherSurrogate;
3537
3538 if (charIndex >= length) return CFRangeMake(kCFNotFound, 0);
3539
3540 /* Fast case. If we're eight-bit, it's either the default encoding is cheap or the content is all ASCII. Watch out when (or if) adding more 8bit Mac-scripts in CFStringEncodingConverters
3541 */
3542 if (!CF_IS_OBJC(__kCFStringTypeID, string) && __CFStrIsEightBit(string)) return CFRangeMake(charIndex, 1);
3543
3544 bmpBitmap = CFUniCharGetBitmapPtrForPlane(csetType, 0);
3545 letterBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, 0);
3546 if (NULL == combClassBMP) combClassBMP = (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0);
3547
3548 CFStringInitInlineBuffer(string, &stringBuffer, CFRangeMake(0, length));
3549
3550 // Get composed character sequence first
3551 range = _CFStringInlineBufferGetComposedRange(&stringBuffer, charIndex, type, bmpBitmap, csetType);
3552
3553 // Do grapheme joiners
3554 if (type < kCFStringCursorMovementCluster) {
3555 const uint8_t *letter = letterBMP;
3556
3557 // Check to see if we have a letter at the beginning of initial cluster
3558 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, range.location);
3559
3560 if ((range.length > 1) && CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(&stringBuffer, range.location + 1)))) {
3561 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
3562 letter = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, (character >> 16));
3563 }
3564
3565 if ((character == ZERO_WIDTH_JOINER) || CFUniCharIsMemberOfBitmap(character, letter)) {
3566 CFRange otherRange;
3567
3568 // Check if preceded by grapheme joiners (U034F and viramas)
3569 otherRange.location = currentIndex = range.location;
3570
3571 while (currentIndex > 1) {
3572 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, --currentIndex);
3573
3574 // ??? We're assuming viramas only in BMP
3575 if ((_CFStringIsVirama(character, combClassBMP) || ((character == ZERO_WIDTH_JOINER) && _CFStringIsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer, --currentIndex), combClassBMP))) && (currentIndex > 0)) {
3576 --currentIndex;
3577 } else {
3578 break;
3579 }
3580
3581 currentIndex = _CFStringInlineBufferGetComposedRange(&stringBuffer, currentIndex, type, bmpBitmap, csetType).location;
3582
3583 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex);
3584
3585 if (CFUniCharIsSurrogateLowCharacter(character) && CFUniCharIsSurrogateHighCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex - 1)))) {
3586 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
3587 letter = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, (character >> 16));
3588 --currentIndex;
3589 } else {
3590 letter = letterBMP;
3591 }
3592
3593 if (!CFUniCharIsMemberOfBitmap(character, letter)) break;
3594 range.location = currentIndex;
3595 }
3596
3597 range.length += otherRange.location - range.location;
3598
3599 // Check if followed by grapheme joiners
3600 if ((range.length > 1) && ((range.location + range.length) < length)) {
3601 otherRange = range;
3602 currentIndex = otherRange.location + otherRange.length;
3603
3604 do {
3605 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex - 1);
3606
3607 // ??? We're assuming viramas only in BMP
3608 if ((character != ZERO_WIDTH_JOINER) && !_CFStringIsVirama(character, combClassBMP)) break;
3609
3610 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex);
3611
3612 if (character == ZERO_WIDTH_JOINER) character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, ++currentIndex);
3613
3614 if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex + 1)))) {
3615 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
3616 letter = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, (character >> 16));
3617 } else {
3618 letter = letterBMP;
3619 }
3620
3621 // We only conjoin letters
3622 if (!CFUniCharIsMemberOfBitmap(character, letter)) break;
3623 otherRange = _CFStringInlineBufferGetComposedRange(&stringBuffer, currentIndex, type, bmpBitmap, csetType);
3624 currentIndex = otherRange.location + otherRange.length;
3625 } while ((otherRange.location + otherRange.length) < length);
3626 range.length = currentIndex - range.location;
3627 }
3628 }
3629 }
3630
3631 // Check if we're part of prefix transcoding hints
3632 CFIndex otherIndex;
3633
3634 currentIndex = (range.location + range.length) - (MAX_TRANSCODING_LENGTH + 1);
3635 if (currentIndex < 0) currentIndex = 0;
3636
3637 while (currentIndex <= range.location) {
3638 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex);
3639
3640 if ((character & 0x1FFFF0) == 0xF860) { // transcoding hint
3641 otherIndex = currentIndex + __CFTranscodingHintLength[(character - 0xF860)] + 1;
3642 if (otherIndex >= (range.location + range.length)) {
3643 if (otherIndex <= length) {
3644 range.location = currentIndex;
3645 range.length = otherIndex - currentIndex;
3646 }
3647 break;
3648 }
3649 }
3650 ++currentIndex;
3651 }
3652
3653 return range;
3654 }
3655
3656 CFRange CFStringGetRangeOfComposedCharactersAtIndex(CFStringRef theString, CFIndex theIndex) {
3657 return CFStringGetRangeOfCharacterClusterAtIndex(theString, theIndex, kCFStringComposedCharacterCluster);
3658 }
3659
3660 /*!
3661 @function CFStringFindCharacterFromSet
3662 Query the range of characters contained in the specified character set.
3663 @param theString The CFString which is to be searched. If this
3664 parameter is not a valid CFString, the behavior is
3665 undefined.
3666 @param theSet The CFCharacterSet against which the membership
3667 of characters is checked. If this parameter is not a valid
3668 CFCharacterSet, the behavior is undefined.
3669 @param range The range of characters within the string to search. If
3670 the range location or end point (defined by the location
3671 plus length minus 1) are outside the index space of the
3672 string (0 to N-1 inclusive, where N is the length of the
3673 string), the behavior is undefined. If the range length is
3674 negative, the behavior is undefined. The range may be empty
3675 (length 0), in which case no search is performed.
3676 @param searchOptions The bitwise-or'ed option flags to control
3677 the search behavior. The supported options are
3678 kCFCompareBackwards andkCFCompareAnchored.
3679 If other option flags are specified, the behavior
3680 is undefined.
3681 @param result The pointer to a CFRange supplied by the caller in
3682 which the search result is stored. If a pointer to an invalid
3683 memory is specified, the behavior is undefined.
3684 @result true, if at least a character which is a member of the character
3685 set is found and result is filled, otherwise, false.
3686 */
3687 #define SURROGATE_START 0xD800
3688 #define SURROGATE_END 0xDFFF
3689
3690 CF_EXPORT Boolean CFStringFindCharacterFromSet(CFStringRef theString, CFCharacterSetRef theSet, CFRange rangeToSearch, CFStringCompareFlags searchOptions, CFRange *result) {
3691 CFStringInlineBuffer stringBuffer;
3692 CFCharacterSetInlineBuffer csetBuffer;
3693 UniChar ch;
3694 CFIndex step;
3695 CFIndex fromLoc, toLoc, cnt; // fromLoc and toLoc are inclusive
3696 Boolean found = false;
3697 Boolean done = false;
3698
3699 //#warning FIX ME !! Should support kCFCompareNonliteral
3700
3701 if ((rangeToSearch.location + rangeToSearch.length > CFStringGetLength(theString)) || (rangeToSearch.length == 0)) return false;
3702
3703 if (searchOptions & kCFCompareBackwards) {
3704 fromLoc = rangeToSearch.location + rangeToSearch.length - 1;
3705 toLoc = rangeToSearch.location;
3706 } else {
3707 fromLoc = rangeToSearch.location;
3708 toLoc = rangeToSearch.location + rangeToSearch.length - 1;
3709 }
3710 if (searchOptions & kCFCompareAnchored) {
3711 toLoc = fromLoc;
3712 }
3713
3714 step = (fromLoc <= toLoc) ? 1 : -1;
3715 cnt = fromLoc;
3716
3717 CFStringInitInlineBuffer(theString, &stringBuffer, rangeToSearch);
3718 CFCharacterSetInitInlineBuffer(theSet, &csetBuffer);
3719
3720 do {
3721 ch = CFStringGetCharacterFromInlineBuffer(&stringBuffer, cnt - rangeToSearch.location);
3722 if ((ch >= SURROGATE_START) && (ch <= SURROGATE_END)) {
3723 int otherCharIndex = cnt + step;
3724
3725 if (((step < 0) && (otherCharIndex < toLoc)) || ((step > 0) && (otherCharIndex > toLoc))) {
3726 done = true;
3727 } else {
3728 UniChar highChar;
3729 UniChar lowChar = CFStringGetCharacterFromInlineBuffer(&stringBuffer, otherCharIndex - rangeToSearch.location);
3730
3731 if (cnt < otherCharIndex) {
3732 highChar = ch;
3733 } else {
3734 highChar = lowChar;
3735 lowChar = ch;
3736 }
3737
3738 if (CFUniCharIsSurrogateHighCharacter(highChar) && CFUniCharIsSurrogateLowCharacter(lowChar) && CFCharacterSetInlineBufferIsLongCharacterMember(&csetBuffer, CFUniCharGetLongCharacterForSurrogatePair(highChar, lowChar))) {
3739 if (result) *result = CFRangeMake((cnt < otherCharIndex ? cnt : otherCharIndex), 2);
3740 return true;
3741 } else if (otherCharIndex == toLoc) {
3742 done = true;
3743 } else {
3744 cnt = otherCharIndex + step;
3745 }
3746 }
3747 } else if (CFCharacterSetInlineBufferIsLongCharacterMember(&csetBuffer, ch)) {
3748 done = found = true;
3749 } else if (cnt == toLoc) {
3750 done = true;
3751 } else {
3752 cnt += step;
3753 }
3754 } while (!done);
3755
3756 if (found && result) *result = CFRangeMake(cnt, 1);
3757 return found;
3758 }
3759
3760 /* Line range code */
3761
3762 #define CarriageReturn '\r' /* 0x0d */
3763 #define NewLine '\n' /* 0x0a */
3764 #define NextLine 0x0085
3765 #define LineSeparator 0x2028
3766 #define ParaSeparator 0x2029
3767
3768 CF_INLINE Boolean isALineSeparatorTypeCharacter(UniChar ch, Boolean includeLineEndings) {
3769 if (ch > CarriageReturn && ch < NextLine) return false; /* Quick test to cover most chars */
3770 return (ch == NewLine || ch == CarriageReturn || ch == ParaSeparator || (includeLineEndings && (ch == NextLine || ch == LineSeparator))) ? true : false;
3771 }
3772
3773 static void __CFStringGetLineOrParagraphBounds(CFStringRef string, CFRange range, CFIndex *lineBeginIndex, CFIndex *lineEndIndex, CFIndex *contentsEndIndex, Boolean includeLineEndings) {
3774 CFIndex len;
3775 CFStringInlineBuffer buf;
3776 UniChar ch;
3777
3778 __CFAssertIsString(string);
3779 __CFAssertRangeIsInStringBounds(string, range.location, range.length);
3780
3781 len = __CFStrLength(string);
3782
3783 if (lineBeginIndex) {
3784 CFIndex start;
3785 if (range.location == 0) {
3786 start = 0;
3787 } else {
3788 CFStringInitInlineBuffer(string, &buf, CFRangeMake(0, len));
3789 CFIndex buf_idx = range.location;
3790
3791 /* Take care of the special case where start happens to fall right between \r and \n */
3792 ch = CFStringGetCharacterFromInlineBuffer(&buf, buf_idx);
3793 buf_idx--;
3794 if ((ch == NewLine) && (CFStringGetCharacterFromInlineBuffer(&buf, buf_idx) == CarriageReturn)) {
3795 buf_idx--;
3796 }
3797 while (1) {
3798 if (buf_idx < 0) {
3799 start = 0;
3800 break;
3801 } else if (isALineSeparatorTypeCharacter(CFStringGetCharacterFromInlineBuffer(&buf, buf_idx), includeLineEndings)) {
3802 start = buf_idx + 1;
3803 break;
3804 } else {
3805 buf_idx--;
3806 }
3807 }
3808 }
3809 *lineBeginIndex = start;
3810 }
3811
3812 /* Now find the ending point */
3813 if (lineEndIndex || contentsEndIndex) {
3814 CFIndex endOfContents, lineSeparatorLength = 1; /* 1 by default */
3815 CFStringInitInlineBuffer(string, &buf, CFRangeMake(0, len));
3816 CFIndex buf_idx = range.location + range.length - (range.length ? 1 : 0);
3817 /* First look at the last char in the range (if the range is zero length, the char after the range) to see if we're already on or within a end of line sequence... */
3818 ch = __CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx);
3819 if (ch == NewLine) {
3820 endOfContents = buf_idx;
3821 buf_idx--;
3822 if (__CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx) == CarriageReturn) {
3823 lineSeparatorLength = 2;
3824 endOfContents--;
3825 }
3826 } else {
3827 while (1) {
3828 if (isALineSeparatorTypeCharacter(ch, includeLineEndings)) {
3829 endOfContents = buf_idx; /* This is actually end of contentsRange */
3830 buf_idx++; /* OK for this to go past the end */
3831 if ((ch == CarriageReturn) && (__CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx) == NewLine)) {
3832 lineSeparatorLength = 2;
3833 }
3834 break;
3835 } else if (buf_idx >= len) {
3836 endOfContents = len;
3837 lineSeparatorLength = 0;
3838 break;
3839 } else {
3840 buf_idx++;
3841 ch = __CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx);
3842 }
3843 }
3844 }
3845 if (contentsEndIndex) *contentsEndIndex = endOfContents;
3846 if (lineEndIndex) *lineEndIndex = endOfContents + lineSeparatorLength;
3847 }
3848 }
3849
3850 void CFStringGetLineBounds(CFStringRef string, CFRange range, CFIndex *lineBeginIndex, CFIndex *lineEndIndex, CFIndex *contentsEndIndex) {
3851 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSString *)string, getLineStart:(NSUInteger *)lineBeginIndex end:(NSUInteger *)lineEndIndex contentsEnd:(NSUInteger *)contentsEndIndex forRange:NSMakeRange(range.location, range.length));
3852 __CFStringGetLineOrParagraphBounds(string, range, lineBeginIndex, lineEndIndex, contentsEndIndex, true);
3853 }
3854
3855 void CFStringGetParagraphBounds(CFStringRef string, CFRange range, CFIndex *parBeginIndex, CFIndex *parEndIndex, CFIndex *contentsEndIndex) {
3856 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSString *)string, getParagraphStart:(NSUInteger *)parBeginIndex end:(NSUInteger *)parEndIndex contentsEnd:(NSUInteger *)contentsEndIndex forRange:NSMakeRange(range.location, range.length));
3857 __CFStringGetLineOrParagraphBounds(string, range, parBeginIndex, parEndIndex, contentsEndIndex, false);
3858 }
3859
3860
3861 CFStringRef CFStringCreateByCombiningStrings(CFAllocatorRef alloc, CFArrayRef array, CFStringRef separatorString) {
3862 CFIndex numChars;
3863 CFIndex separatorNumByte;
3864 CFIndex stringCount = CFArrayGetCount(array);
3865 Boolean isSepCFString = !CF_IS_OBJC(__kCFStringTypeID, separatorString);
3866 Boolean canBeEightbit = isSepCFString && __CFStrIsEightBit(separatorString);
3867 CFIndex idx;
3868 CFStringRef otherString;
3869 void *buffer;
3870 uint8_t *bufPtr;
3871 const void *separatorContents = NULL;
3872
3873 if (stringCount == 0) {
3874 return CFStringCreateWithCharacters(alloc, NULL, 0);
3875 } else if (stringCount == 1) {
3876 return (CFStringRef)CFStringCreateCopy(alloc, (CFStringRef)CFArrayGetValueAtIndex(array, 0));
3877 }
3878
3879 if (alloc == NULL) alloc = __CFGetDefaultAllocator();
3880
3881 numChars = CFStringGetLength(separatorString) * (stringCount - 1);
3882 for (idx = 0; idx < stringCount; idx++) {
3883 otherString = (CFStringRef)CFArrayGetValueAtIndex(array, idx);
3884 numChars += CFStringGetLength(otherString);
3885 // canBeEightbit is already false if the separator is an NSString...
3886 if (CF_IS_OBJC(__kCFStringTypeID, otherString) || ! __CFStrIsEightBit(otherString)) canBeEightbit = false;
3887 }
3888
3889 buffer = (uint8_t *)CFAllocatorAllocate(alloc, canBeEightbit ? ((numChars + 1) * sizeof(uint8_t)) : (numChars * sizeof(UniChar)), 0);
3890 bufPtr = (uint8_t *)buffer;
3891 if (__CFOASafe) __CFSetLastAllocationEventName(buffer, "CFString (store)");
3892 separatorNumByte = CFStringGetLength(separatorString) * (canBeEightbit ? sizeof(uint8_t) : sizeof(UniChar));
3893
3894 for (idx = 0; idx < stringCount; idx++) {
3895 if (idx) { // add separator here unless first string
3896 if (separatorContents) {
3897 memmove(bufPtr, separatorContents, separatorNumByte);
3898 } else {
3899 if (!isSepCFString) { // NSString
3900 CFStringGetCharacters(separatorString, CFRangeMake(0, CFStringGetLength(separatorString)), (UniChar *)bufPtr);
3901 } else if (canBeEightbit) {
3902 memmove(bufPtr, (const uint8_t *)__CFStrContents(separatorString) + __CFStrSkipAnyLengthByte(separatorString), separatorNumByte);
3903 } else {
3904 __CFStrConvertBytesToUnicode((uint8_t *)__CFStrContents(separatorString) + __CFStrSkipAnyLengthByte(separatorString), (UniChar *)bufPtr, __CFStrLength(separatorString));
3905 }
3906 separatorContents = bufPtr;
3907 }
3908 bufPtr += separatorNumByte;
3909 }
3910
3911 otherString = (CFStringRef )CFArrayGetValueAtIndex(array, idx);
3912 if (CF_IS_OBJC(__kCFStringTypeID, otherString)) {
3913 CFIndex otherLength = CFStringGetLength(otherString);
3914 CFStringGetCharacters(otherString, CFRangeMake(0, otherLength), (UniChar *)bufPtr);
3915 bufPtr += otherLength * sizeof(UniChar);
3916 } else {
3917 const uint8_t * otherContents = (const uint8_t *)__CFStrContents(otherString);
3918 CFIndex otherNumByte = __CFStrLength2(otherString, otherContents) * (canBeEightbit ? sizeof(uint8_t) : sizeof(UniChar));
3919
3920 if (canBeEightbit || __CFStrIsUnicode(otherString)) {
3921 memmove(bufPtr, otherContents + __CFStrSkipAnyLengthByte(otherString), otherNumByte);
3922 } else {
3923 __CFStrConvertBytesToUnicode(otherContents + __CFStrSkipAnyLengthByte(otherString), (UniChar *)bufPtr, __CFStrLength2(otherString, otherContents));
3924 }
3925 bufPtr += otherNumByte;
3926 }
3927 }
3928 if (canBeEightbit) *bufPtr = 0; // NULL byte;
3929
3930 return canBeEightbit ?
3931 CFStringCreateWithCStringNoCopy(alloc, (const char*)buffer, __CFStringGetEightBitStringEncoding(), alloc) :
3932 CFStringCreateWithCharactersNoCopy(alloc, (UniChar *)buffer, numChars, alloc);
3933 }
3934
3935
3936 CFArrayRef CFStringCreateArrayBySeparatingStrings(CFAllocatorRef alloc, CFStringRef string, CFStringRef separatorString) {
3937 CFArrayRef separatorRanges;
3938 CFIndex length = CFStringGetLength(string);
3939 /* No objc dispatch needed here since CFStringCreateArrayWithFindResults() works with both CFString and NSString */
3940 if (!(separatorRanges = CFStringCreateArrayWithFindResults(alloc, string, separatorString, CFRangeMake(0, length), 0))) {
3941 return CFArrayCreate(alloc, (const void **)&string, 1, & kCFTypeArrayCallBacks);
3942 } else {
3943 CFIndex idx;
3944 CFIndex count = CFArrayGetCount(separatorRanges);
3945 CFIndex startIndex = 0;
3946 CFIndex numChars;
3947 CFMutableArrayRef array = CFArrayCreateMutable(alloc, count + 2, & kCFTypeArrayCallBacks);
3948 const CFRange *currentRange;
3949 CFStringRef substring;
3950
3951 for (idx = 0;idx < count;idx++) {
3952 currentRange = (const CFRange *)CFArrayGetValueAtIndex(separatorRanges, idx);
3953 numChars = currentRange->location - startIndex;
3954 substring = CFStringCreateWithSubstring(alloc, string, CFRangeMake(startIndex, numChars));
3955 CFArrayAppendValue(array, substring);
3956 CFRelease(substring);
3957 startIndex = currentRange->location + currentRange->length;
3958 }
3959 substring = CFStringCreateWithSubstring(alloc, string, CFRangeMake(startIndex, length - startIndex));
3960 CFArrayAppendValue(array, substring);
3961 CFRelease(substring);
3962
3963 CFRelease(separatorRanges);
3964
3965 return array;
3966 }
3967 }
3968
3969 CFStringRef CFStringCreateFromExternalRepresentation(CFAllocatorRef alloc, CFDataRef data, CFStringEncoding encoding) {
3970 return CFStringCreateWithBytes(alloc, CFDataGetBytePtr(data), CFDataGetLength(data), encoding, true);
3971 }
3972
3973
3974 CFDataRef CFStringCreateExternalRepresentation(CFAllocatorRef alloc, CFStringRef string, CFStringEncoding encoding, uint8_t lossByte) {
3975 CFIndex length;
3976 CFIndex guessedByteLength;
3977 uint8_t *bytes;
3978 CFIndex usedLength;
3979 SInt32 result;
3980
3981 if (CF_IS_OBJC(__kCFStringTypeID, string)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
3982 length = CFStringGetLength(string);
3983 } else {
3984 __CFAssertIsString(string);
3985 length = __CFStrLength(string);
3986 if (__CFStrIsEightBit(string) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string
3987 return CFDataCreate(alloc, ((uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string)), __CFStrLength(string));
3988 }
3989 }
3990
3991 if (alloc == NULL) alloc = __CFGetDefaultAllocator();
3992
3993 if (((encoding & 0x0FFF) == kCFStringEncodingUnicode) && ((encoding == kCFStringEncodingUnicode) || ((encoding > kCFStringEncodingUTF8) && (encoding <= kCFStringEncodingUTF32LE)))) {
3994 guessedByteLength = (length + 1) * ((((encoding >> 26) & 2) == 0) ? sizeof(UTF16Char) : sizeof(UTF32Char)); // UTF32 format has the bit set
3995 } else if (((guessedByteLength = CFStringGetMaximumSizeForEncoding(length, encoding)) > length) && !CF_IS_OBJC(__kCFStringTypeID, string)) { // Multi byte encoding
3996 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
3997 if (__CFStrIsUnicode(string)) {
3998 CFIndex aLength = CFStringEncodingByteLengthForCharacters(encoding, kCFStringEncodingPrependBOM, __CFStrContents(string), __CFStrLength(string));
3999 if (aLength > 0) guessedByteLength = aLength;
4000 } else {
4001 #endif
4002 result = __CFStringEncodeByteStream(string, 0, length, true, encoding, lossByte, NULL, LONG_MAX, &guessedByteLength);
4003 // if result == length, we always succeed
4004 // otherwise, if result == 0, we fail
4005 // otherwise, if there was a lossByte but still result != length, we fail
4006 if ((result != length) && (!result || !lossByte)) return NULL;
4007 if (guessedByteLength == length && __CFStrIsEightBit(string) && __CFStringEncodingIsSupersetOfASCII(encoding)) { // It's all ASCII !!
4008 return CFDataCreate(alloc, ((uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string)), __CFStrLength(string));
4009 }
4010 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
4011 }
4012 #endif
4013 }
4014 bytes = (uint8_t *)CFAllocatorAllocate(alloc, guessedByteLength, 0);
4015 if (__CFOASafe) __CFSetLastAllocationEventName(bytes, "CFData (store)");
4016
4017 result = __CFStringEncodeByteStream(string, 0, length, true, encoding, lossByte, bytes, guessedByteLength, &usedLength);
4018
4019 if ((result != length) && (!result || !lossByte)) { // see comment above about what this means
4020 CFAllocatorDeallocate(alloc, bytes);
4021 return NULL;
4022 }
4023
4024 return CFDataCreateWithBytesNoCopy(alloc, (uint8_t *)bytes, usedLength, alloc);
4025 }
4026
4027
4028 CFStringEncoding CFStringGetSmallestEncoding(CFStringRef str) {
4029 CFIndex len;
4030 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, CFStringEncoding, (NSString *)str, _smallestEncodingInCFStringEncoding);
4031 __CFAssertIsString(str);
4032
4033 if (__CFStrIsEightBit(str)) return __CFStringGetEightBitStringEncoding();
4034 len = __CFStrLength(str);
4035 if (__CFStringEncodeByteStream(str, 0, len, false, __CFStringGetEightBitStringEncoding(), 0, NULL, LONG_MAX, NULL) == len) return __CFStringGetEightBitStringEncoding();
4036 if ((__CFStringGetEightBitStringEncoding() != __CFStringGetSystemEncoding()) && (__CFStringEncodeByteStream(str, 0, len, false, __CFStringGetSystemEncoding(), 0, NULL, LONG_MAX, NULL) == len)) return __CFStringGetSystemEncoding();
4037 return kCFStringEncodingUnicode; /* ??? */
4038 }
4039
4040
4041 CFStringEncoding CFStringGetFastestEncoding(CFStringRef str) {
4042 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, CFStringEncoding, (NSString *)str, _fastestEncodingInCFStringEncoding);
4043 __CFAssertIsString(str);
4044 return __CFStrIsEightBit(str) ? __CFStringGetEightBitStringEncoding() : kCFStringEncodingUnicode; /* ??? */
4045 }
4046
4047
4048 SInt32 CFStringGetIntValue(CFStringRef str) {
4049 Boolean success;
4050 SInt32 result;
4051 SInt32 idx = 0;
4052 CFStringInlineBuffer buf;
4053 CFStringInitInlineBuffer(str, &buf, CFRangeMake(0, CFStringGetLength(str)));
4054 success = __CFStringScanInteger(&buf, NULL, &idx, false, &result);
4055 return success ? result : 0;
4056 }
4057
4058
4059 double CFStringGetDoubleValue(CFStringRef str) {
4060 Boolean success;
4061 double result;
4062 SInt32 idx = 0;
4063 CFStringInlineBuffer buf;
4064 CFStringInitInlineBuffer(str, &buf, CFRangeMake(0, CFStringGetLength(str)));
4065 success = __CFStringScanDouble(&buf, NULL, &idx, &result);
4066 return success ? result : 0.0;
4067 }
4068
4069
4070 /*** Mutable functions... ***/
4071
4072 void CFStringSetExternalCharactersNoCopy(CFMutableStringRef string, UniChar *chars, CFIndex length, CFIndex capacity) {
4073 __CFAssertIsNotNegative(length);
4074 __CFAssertIsStringAndExternalMutable(string);
4075 CFAssert4((length <= capacity) && ((capacity == 0) || ((capacity > 0) && chars)), __kCFLogAssertion, "%s(): Invalid args: characters %p length %d capacity %d", __PRETTY_FUNCTION__, chars, length, capacity);
4076 __CFStrSetContentPtr(string, chars);
4077 __CFStrSetExplicitLength(string, length);
4078 __CFStrSetCapacity(string, capacity * sizeof(UniChar));
4079 __CFStrSetCapacityProvidedExternally(string);
4080 }
4081
4082
4083
4084 void CFStringInsert(CFMutableStringRef str, CFIndex idx, CFStringRef insertedStr) {
4085 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)str, insertString:(NSString *)insertedStr atIndex:(NSUInteger)idx);
4086 __CFAssertIsStringAndMutable(str);
4087 CFAssert3(idx >= 0 && idx <= __CFStrLength(str), __kCFLogAssertion, "%s(): string index %d out of bounds (length %d)", __PRETTY_FUNCTION__, idx, __CFStrLength(str));
4088 __CFStringReplace(str, CFRangeMake(idx, 0), insertedStr);
4089 }
4090
4091
4092 void CFStringDelete(CFMutableStringRef str, CFRange range) {
4093 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)str, deleteCharactersInRange:NSMakeRange(range.location, range.length));
4094 __CFAssertIsStringAndMutable(str);
4095 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
4096 __CFStringChangeSize(str, range, 0, false);
4097 }
4098
4099
4100 void CFStringReplace(CFMutableStringRef str, CFRange range, CFStringRef replacement) {
4101 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)str, replaceCharactersInRange:NSMakeRange(range.location, range.length) withString:(NSString *)replacement);
4102 __CFAssertIsStringAndMutable(str);
4103 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
4104 __CFStringReplace(str, range, replacement);
4105 }
4106
4107
4108 void CFStringReplaceAll(CFMutableStringRef str, CFStringRef replacement) {
4109 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)str, setString:(NSString *)replacement);
4110 __CFAssertIsStringAndMutable(str);
4111 __CFStringReplace(str, CFRangeMake(0, __CFStrLength(str)), replacement);
4112 }
4113
4114
4115 void CFStringAppend(CFMutableStringRef str, CFStringRef appended) {
4116 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)str, appendString:(NSString *)appended);
4117 __CFAssertIsStringAndMutable(str);
4118 __CFStringReplace(str, CFRangeMake(__CFStrLength(str), 0), appended);
4119 }
4120
4121
4122 void CFStringAppendCharacters(CFMutableStringRef str, const UniChar *chars, CFIndex appendedLength) {
4123 CFIndex strLength, idx;
4124
4125 __CFAssertIsNotNegative(appendedLength);
4126
4127 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)str, appendCharacters:chars length:(NSUInteger)appendedLength);
4128
4129 __CFAssertIsStringAndMutable(str);
4130
4131 strLength = __CFStrLength(str);
4132 if (__CFStrIsUnicode(str)) {
4133 __CFStringChangeSize(str, CFRangeMake(strLength, 0), appendedLength, true);
4134 memmove((UniChar *)__CFStrContents(str) + strLength, chars, appendedLength * sizeof(UniChar));
4135 } else {
4136 uint8_t *contents;
4137 bool isASCII = true;
4138 for (idx = 0; isASCII && idx < appendedLength; idx++) isASCII = (chars[idx] < 0x80);
4139 __CFStringChangeSize(str, CFRangeMake(strLength, 0), appendedLength, !isASCII);
4140 if (!isASCII) {
4141 memmove((UniChar *)__CFStrContents(str) + strLength, chars, appendedLength * sizeof(UniChar));
4142 } else {
4143 contents = (uint8_t *)__CFStrContents(str) + strLength + __CFStrSkipAnyLengthByte(str);
4144 for (idx = 0; idx < appendedLength; idx++) contents[idx] = (uint8_t)chars[idx];
4145 }
4146 }
4147 }
4148
4149
4150 void __CFStringAppendBytes(CFMutableStringRef str, const char *cStr, CFIndex appendedLength, CFStringEncoding encoding) {
4151 Boolean appendedIsUnicode = false;
4152 Boolean freeCStrWhenDone = false;
4153 Boolean demoteAppendedUnicode = false;
4154 CFVarWidthCharBuffer vBuf;
4155
4156 __CFAssertIsNotNegative(appendedLength);
4157
4158 if (encoding == kCFStringEncodingASCII || encoding == __CFStringGetEightBitStringEncoding()) {
4159 // appendedLength now denotes length in UniChars
4160 } else if (encoding == kCFStringEncodingUnicode) {
4161 UniChar *chars = (UniChar *)cStr;
4162 CFIndex idx, length = appendedLength / sizeof(UniChar);
4163 bool isASCII = true;
4164 for (idx = 0; isASCII && idx < length; idx++) isASCII = (chars[idx] < 0x80);
4165 if (!isASCII) {
4166 appendedIsUnicode = true;
4167 } else {
4168 demoteAppendedUnicode = true;
4169 }
4170 appendedLength = length;
4171 } else {
4172 Boolean usingPassedInMemory = false;
4173
4174 vBuf.allocator = __CFGetDefaultAllocator(); // We don't want to use client's allocator for temp stuff
4175 vBuf.chars.unicode = NULL; // This will cause the decode function to allocate memory if necessary
4176
4177 if (!__CFStringDecodeByteStream3((const uint8_t *)cStr, appendedLength, encoding, __CFStrIsUnicode(str), &vBuf, &usingPassedInMemory, 0)) {
4178 CFAssert1(0, __kCFLogAssertion, "Supplied bytes could not be converted specified encoding %d", encoding);
4179 return;
4180 }
4181
4182 // If not ASCII, appendedLength now denotes length in UniChars
4183 appendedLength = vBuf.numChars;
4184 appendedIsUnicode = !vBuf.isASCII;
4185 cStr = (const char *)vBuf.chars.ascii;
4186 freeCStrWhenDone = !usingPassedInMemory && vBuf.shouldFreeChars;
4187 }
4188
4189 if (CF_IS_OBJC(__kCFStringTypeID, str)) {
4190 if (!appendedIsUnicode && !demoteAppendedUnicode) {
4191 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)str, _cfAppendCString:(const unsigned char *)cStr length:(NSInteger)appendedLength);
4192 } else {
4193 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)str, appendCharacters:(const unichar *)cStr length:(NSUInteger)appendedLength);
4194 }
4195 } else {
4196 CFIndex strLength;
4197 __CFAssertIsStringAndMutable(str);
4198 strLength = __CFStrLength(str);
4199
4200 __CFStringChangeSize(str, CFRangeMake(strLength, 0), appendedLength, appendedIsUnicode || __CFStrIsUnicode(str));
4201
4202 if (__CFStrIsUnicode(str)) {
4203 UniChar *contents = (UniChar *)__CFStrContents(str);
4204 if (appendedIsUnicode) {
4205 memmove(contents + strLength, cStr, appendedLength * sizeof(UniChar));
4206 } else {
4207 __CFStrConvertBytesToUnicode((const uint8_t *)cStr, contents + strLength, appendedLength);
4208 }
4209 } else {
4210 if (demoteAppendedUnicode) {
4211 UniChar *chars = (UniChar *)cStr;
4212 CFIndex idx;
4213 uint8_t *contents = (uint8_t *)__CFStrContents(str) + strLength + __CFStrSkipAnyLengthByte(str);
4214 for (idx = 0; idx < appendedLength; idx++) contents[idx] = (uint8_t)chars[idx];
4215 } else {
4216 uint8_t *contents = (uint8_t *)__CFStrContents(str);
4217 memmove(contents + strLength + __CFStrSkipAnyLengthByte(str), cStr, appendedLength);
4218 }
4219 }
4220 }
4221
4222 if (freeCStrWhenDone) CFAllocatorDeallocate(__CFGetDefaultAllocator(), (void *)cStr);
4223 }
4224
4225 void CFStringAppendPascalString(CFMutableStringRef str, ConstStringPtr pStr, CFStringEncoding encoding) {
4226 __CFStringAppendBytes(str, (const char *)(pStr + 1), (CFIndex)*pStr, encoding);
4227 }
4228
4229 void CFStringAppendCString(CFMutableStringRef str, const char *cStr, CFStringEncoding encoding) {
4230 __CFStringAppendBytes(str, cStr, strlen(cStr), encoding);
4231 }
4232
4233
4234 void CFStringAppendFormat(CFMutableStringRef str, CFDictionaryRef formatOptions, CFStringRef format, ...) {
4235 va_list argList;
4236
4237 va_start(argList, format);
4238 CFStringAppendFormatAndArguments(str, formatOptions, format, argList);
4239 va_end(argList);
4240 }
4241
4242
4243 CFIndex CFStringFindAndReplace(CFMutableStringRef string, CFStringRef stringToFind, CFStringRef replacementString, CFRange rangeToSearch, CFStringCompareFlags compareOptions) {
4244 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, CFIndex, (NSMutableString *)string, replaceOccurrencesOfString:(NSString *)stringToFind withString:(NSString *)replacementString options:(NSStringCompareOptions)compareOptions range:NSMakeRange(rangeToSearch.location, rangeToSearch.length));
4245 CFRange foundRange;
4246 Boolean backwards = ((compareOptions & kCFCompareBackwards) != 0);
4247 UInt32 endIndex = rangeToSearch.location + rangeToSearch.length;
4248 #define MAX_RANGES_ON_STACK (1000 / sizeof(CFRange))
4249 CFRange rangeBuffer[MAX_RANGES_ON_STACK]; // Used to avoid allocating memory
4250 CFRange *ranges = rangeBuffer;
4251 CFIndex foundCount = 0;
4252 CFIndex capacity = MAX_RANGES_ON_STACK;
4253
4254 __CFAssertIsStringAndMutable(string);
4255 __CFAssertRangeIsInStringBounds(string, rangeToSearch.location, rangeToSearch.length);
4256
4257 // Note: This code is very similar to the one in CFStringCreateArrayWithFindResults().
4258 while ((rangeToSearch.length > 0) && CFStringFindWithOptions(string, stringToFind, rangeToSearch, compareOptions, &foundRange)) {
4259 // Determine the next range
4260 if (backwards) {
4261 rangeToSearch.length = foundRange.location - rangeToSearch.location;
4262 } else {
4263 rangeToSearch.location = foundRange.location + foundRange.length;
4264 rangeToSearch.length = endIndex - rangeToSearch.location;
4265 }
4266
4267 // If necessary, grow the array
4268 if (foundCount >= capacity) {
4269 bool firstAlloc = (ranges == rangeBuffer) ? true : false;
4270 capacity = (capacity + 4) * 2;
4271 // Note that reallocate with NULL previous pointer is same as allocate
4272 ranges = (CFRange *)CFAllocatorReallocate(kCFAllocatorSystemDefault, firstAlloc ? NULL : ranges, capacity * sizeof(CFRange), 0);
4273 if (firstAlloc) memmove(ranges, rangeBuffer, MAX_RANGES_ON_STACK * sizeof(CFRange));
4274 }
4275 ranges[foundCount] = foundRange;
4276 foundCount++;
4277 }
4278
4279 if (foundCount > 0) {
4280 if (backwards) { // Reorder the ranges to be incrementing (better to do this here, then to check other places)
4281 int head = 0;
4282 int tail = foundCount - 1;
4283 while (head < tail) {
4284 CFRange temp = ranges[head];
4285 ranges[head] = ranges[tail];
4286 ranges[tail] = temp;
4287 head++;
4288 tail--;
4289 }
4290 }
4291 __CFStringReplaceMultiple(string, ranges, foundCount, replacementString);
4292 if (ranges != rangeBuffer) CFAllocatorDeallocate(kCFAllocatorSystemDefault, ranges);
4293 }
4294
4295 return foundCount;
4296 }
4297
4298
4299 // This function is here for NSString purposes
4300 // It allows checking for mutability before mutating; this allows NSString to catch invalid mutations
4301
4302 int __CFStringCheckAndReplace(CFMutableStringRef str, CFRange range, CFStringRef replacement) {
4303 if (!__CFStrIsMutable(str)) return _CFStringErrNotMutable; // These three ifs are always here, for NSString usage
4304 if (!replacement && __CFStringNoteErrors()) return _CFStringErrNilArg;
4305 // This attempts to catch bad ranges including those described in 3375535 (-1,1)
4306 unsigned long endOfRange = (unsigned long)(range.location) + (unsigned long)(range.length); // NSRange uses unsigned quantities, hence the casting
4307 if (((endOfRange > (unsigned long)__CFStrLength(str)) || (endOfRange < (unsigned long)(range.location))) && __CFStringNoteErrors()) return _CFStringErrBounds;
4308
4309 __CFAssertIsStringAndMutable(str);
4310 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
4311 __CFStringReplace(str, range, replacement);
4312 return _CFStringErrNone;
4313 }
4314
4315 // This function determines whether errors which would cause string exceptions should
4316 // be ignored or not
4317
4318 Boolean __CFStringNoteErrors(void) {
4319 return true;
4320 }
4321
4322
4323
4324 void CFStringPad(CFMutableStringRef string, CFStringRef padString, CFIndex length, CFIndex indexIntoPad) {
4325 CFIndex originalLength;
4326
4327 __CFAssertIsNotNegative(length);
4328 __CFAssertIsNotNegative(indexIntoPad);
4329
4330 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)string, _cfPad:padString length:(uint32_t)length padIndex:(uint32_t)indexIntoPad);
4331
4332 __CFAssertIsStringAndMutable(string);
4333
4334 originalLength = __CFStrLength(string);
4335 if (length < originalLength) {
4336 __CFStringChangeSize(string, CFRangeMake(length, originalLength - length), 0, false);
4337 } else if (originalLength < length) {
4338 uint8_t *contents;
4339 Boolean isUnicode;
4340 CFIndex charSize;
4341 CFIndex padStringLength;
4342 CFIndex padLength;
4343 CFIndex padRemaining = length - originalLength;
4344
4345 if (CF_IS_OBJC(__kCFStringTypeID, padString)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
4346 padStringLength = CFStringGetLength(padString);
4347 isUnicode = true; /* !!! Bad for now */
4348 } else {
4349 __CFAssertIsString(padString);
4350 padStringLength = __CFStrLength(padString);
4351 isUnicode = __CFStrIsUnicode(string) || __CFStrIsUnicode(padString);
4352 }
4353
4354 charSize = isUnicode ? sizeof(UniChar) : sizeof(uint8_t);
4355
4356 __CFStringChangeSize(string, CFRangeMake(originalLength, 0), padRemaining, isUnicode);
4357
4358 contents = (uint8_t *)__CFStrContents(string) + charSize * originalLength + __CFStrSkipAnyLengthByte(string);
4359 padLength = padStringLength - indexIntoPad;
4360 padLength = padRemaining < padLength ? padRemaining : padLength;
4361
4362 while (padRemaining > 0) {
4363 if (isUnicode) {
4364 CFStringGetCharacters(padString, CFRangeMake(indexIntoPad, padLength), (UniChar *)contents);
4365 } else {
4366 CFStringGetBytes(padString, CFRangeMake(indexIntoPad, padLength), __CFStringGetEightBitStringEncoding(), 0, false, contents, padRemaining * charSize, NULL);
4367 }
4368 contents += padLength * charSize;
4369 padRemaining -= padLength;
4370 indexIntoPad = 0;
4371 padLength = padRemaining < padLength ? padRemaining : padStringLength;
4372 }
4373 }
4374 }
4375
4376 void CFStringTrim(CFMutableStringRef string, CFStringRef trimString) {
4377 CFRange range;
4378 CFIndex newStartIndex;
4379 CFIndex length;
4380
4381 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)string, _cfTrim:trimString);
4382
4383 __CFAssertIsStringAndMutable(string);
4384 __CFAssertIsString(trimString);
4385
4386 newStartIndex = 0;
4387 length = __CFStrLength(string);
4388
4389 while (CFStringFindWithOptions(string, trimString, CFRangeMake(newStartIndex, length - newStartIndex), kCFCompareAnchored, &range)) {
4390 newStartIndex = range.location + range.length;
4391 }
4392
4393 if (newStartIndex < length) {
4394 CFIndex charSize = __CFStrIsUnicode(string) ? sizeof(UniChar) : sizeof(uint8_t);
4395 uint8_t *contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4396
4397 length -= newStartIndex;
4398 if (__CFStrLength(trimString) < length) {
4399 while (CFStringFindWithOptions(string, trimString, CFRangeMake(newStartIndex, length), kCFCompareAnchored|kCFCompareBackwards, &range)) {
4400 length = range.location - newStartIndex;
4401 }
4402 }
4403 memmove(contents, contents + newStartIndex * charSize, length * charSize);
4404 __CFStringChangeSize(string, CFRangeMake(length, __CFStrLength(string) - length), 0, false);
4405 } else { // Only trimString in string, trim all
4406 __CFStringChangeSize(string, CFRangeMake(0, length), 0, false);
4407 }
4408 }
4409
4410 void CFStringTrimWhitespace(CFMutableStringRef string) {
4411 CFIndex newStartIndex;
4412 CFIndex length;
4413 CFStringInlineBuffer buffer;
4414
4415 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)string, _cfTrimWS);
4416
4417 __CFAssertIsStringAndMutable(string);
4418
4419 newStartIndex = 0;
4420 length = __CFStrLength(string);
4421
4422 CFStringInitInlineBuffer(string, &buffer, CFRangeMake(0, length));
4423 CFIndex buffer_idx = 0;
4424
4425 while (buffer_idx < length && CFUniCharIsMemberOf(__CFStringGetCharacterFromInlineBufferQuick(&buffer, buffer_idx), kCFUniCharWhitespaceAndNewlineCharacterSet))
4426 buffer_idx++;
4427 newStartIndex = buffer_idx;
4428
4429 if (newStartIndex < length) {
4430 uint8_t *contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4431 CFIndex charSize = (__CFStrIsUnicode(string) ? sizeof(UniChar) : sizeof(uint8_t));
4432
4433 buffer_idx = length - 1;
4434 while (0 <= buffer_idx && CFUniCharIsMemberOf(__CFStringGetCharacterFromInlineBufferQuick(&buffer, buffer_idx), kCFUniCharWhitespaceAndNewlineCharacterSet))
4435 buffer_idx--;
4436 length = buffer_idx - newStartIndex + 1;
4437
4438 memmove(contents, contents + newStartIndex * charSize, length * charSize);
4439 __CFStringChangeSize(string, CFRangeMake(length, __CFStrLength(string) - length), 0, false);
4440 } else { // Whitespace only string
4441 __CFStringChangeSize(string, CFRangeMake(0, length), 0, false);
4442 }
4443 }
4444
4445 void CFStringLowercase(CFMutableStringRef string, CFLocaleRef locale) {
4446 CFIndex currentIndex = 0;
4447 CFIndex length;
4448 const uint8_t *langCode;
4449 Boolean isEightBit = __CFStrIsEightBit(string);
4450
4451 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)string, _cfLowercase:(const void *)locale);
4452
4453 __CFAssertIsStringAndMutable(string);
4454
4455 length = __CFStrLength(string);
4456
4457 langCode = (const uint8_t *)(_CFCanUseLocale(locale) ? _CFStrGetLanguageIdentifierForLocale(locale, false) : NULL);
4458
4459 if (!langCode && isEightBit) {
4460 uint8_t *contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4461 for (;currentIndex < length;currentIndex++) {
4462 if (contents[currentIndex] >= 'A' && contents[currentIndex] <= 'Z') {
4463 contents[currentIndex] += 'a' - 'A';
4464 } else if (contents[currentIndex] > 127) {
4465 break;
4466 }
4467 }
4468 }
4469
4470 if (currentIndex < length) {
4471 UTF16Char *contents;
4472 UniChar mappedCharacters[MAX_CASE_MAPPING_BUF];
4473 CFIndex mappedLength;
4474 UTF32Char currentChar;
4475 UInt32 flags = 0;
4476
4477 if (isEightBit) __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true);
4478
4479 contents = (UniChar *)__CFStrContents(string);
4480
4481 for (;currentIndex < length;currentIndex++) {
4482
4483 if (CFUniCharIsSurrogateHighCharacter(contents[currentIndex]) && (currentIndex + 1 < length) && CFUniCharIsSurrogateLowCharacter(contents[currentIndex + 1])) {
4484 currentChar = CFUniCharGetLongCharacterForSurrogatePair(contents[currentIndex], contents[currentIndex + 1]);
4485 } else {
4486 currentChar = contents[currentIndex];
4487 }
4488 flags = ((langCode || (currentChar == 0x03A3)) ? CFUniCharGetConditionalCaseMappingFlags(currentChar, contents, currentIndex, length, kCFUniCharToLowercase, langCode, flags) : 0);
4489
4490 mappedLength = CFUniCharMapCaseTo(currentChar, mappedCharacters, MAX_CASE_MAPPING_BUF, kCFUniCharToLowercase, flags, langCode);
4491 if (mappedLength > 0) contents[currentIndex] = *mappedCharacters;
4492
4493 if (currentChar > 0xFFFF) { // Non-BMP char
4494 switch (mappedLength) {
4495 case 0:
4496 __CFStringChangeSize(string, CFRangeMake(currentIndex, 2), 0, true);
4497 contents = (UniChar *)__CFStrContents(string);
4498 length -= 2;
4499 break;
4500
4501 case 1:
4502 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 1), 0, true);
4503 contents = (UniChar *)__CFStrContents(string);
4504 --length;
4505 break;
4506
4507 case 2:
4508 contents[++currentIndex] = mappedCharacters[1];
4509 break;
4510
4511 default:
4512 --mappedLength; // Skip the current char
4513 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength - 1, true);
4514 contents = (UniChar *)__CFStrContents(string);
4515 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4516 length += (mappedLength - 1);
4517 currentIndex += mappedLength;
4518 break;
4519 }
4520 } else if (mappedLength == 0) {
4521 __CFStringChangeSize(string, CFRangeMake(currentIndex, 1), 0, true);
4522 contents = (UniChar *)__CFStrContents(string);
4523 --length;
4524 } else if (mappedLength > 1) {
4525 --mappedLength; // Skip the current char
4526 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength, true);
4527 contents = (UniChar *)__CFStrContents(string);
4528 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4529 length += mappedLength;
4530 currentIndex += mappedLength;
4531 }
4532 }
4533 }
4534 }
4535
4536 void CFStringUppercase(CFMutableStringRef string, CFLocaleRef locale) {
4537 CFIndex currentIndex = 0;
4538 CFIndex length;
4539 const uint8_t *langCode;
4540 Boolean isEightBit = __CFStrIsEightBit(string);
4541
4542 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)string, _cfUppercase:(const void *)locale);
4543
4544 __CFAssertIsStringAndMutable(string);
4545
4546 length = __CFStrLength(string);
4547
4548 langCode = (const uint8_t *)(_CFCanUseLocale(locale) ? _CFStrGetLanguageIdentifierForLocale(locale, false) : NULL);
4549
4550 if (!langCode && isEightBit) {
4551 uint8_t *contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4552 for (;currentIndex < length;currentIndex++) {
4553 if (contents[currentIndex] >= 'a' && contents[currentIndex] <= 'z') {
4554 contents[currentIndex] -= 'a' - 'A';
4555 } else if (contents[currentIndex] > 127) {
4556 break;
4557 }
4558 }
4559 }
4560
4561 if (currentIndex < length) {
4562 UniChar *contents;
4563 UniChar mappedCharacters[MAX_CASE_MAPPING_BUF];
4564 CFIndex mappedLength;
4565 UTF32Char currentChar;
4566 UInt32 flags = 0;
4567
4568 if (isEightBit) __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true);
4569
4570 contents = (UniChar *)__CFStrContents(string);
4571
4572 for (;currentIndex < length;currentIndex++) {
4573 if (CFUniCharIsSurrogateHighCharacter(contents[currentIndex]) && (currentIndex + 1 < length) && CFUniCharIsSurrogateLowCharacter(contents[currentIndex + 1])) {
4574 currentChar = CFUniCharGetLongCharacterForSurrogatePair(contents[currentIndex], contents[currentIndex + 1]);
4575 } else {
4576 currentChar = contents[currentIndex];
4577 }
4578
4579 flags = (langCode ? CFUniCharGetConditionalCaseMappingFlags(currentChar, contents, currentIndex, length, kCFUniCharToUppercase, langCode, flags) : 0);
4580
4581 mappedLength = CFUniCharMapCaseTo(currentChar, mappedCharacters, MAX_CASE_MAPPING_BUF, kCFUniCharToUppercase, flags, langCode);
4582 if (mappedLength > 0) contents[currentIndex] = *mappedCharacters;
4583
4584 if (currentChar > 0xFFFF) { // Non-BMP char
4585 switch (mappedLength) {
4586 case 0:
4587 __CFStringChangeSize(string, CFRangeMake(currentIndex, 2), 0, true);
4588 contents = (UniChar *)__CFStrContents(string);
4589 length -= 2;
4590 break;
4591
4592 case 1:
4593 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 1), 0, true);
4594 contents = (UniChar *)__CFStrContents(string);
4595 --length;
4596 break;
4597
4598 case 2:
4599 contents[++currentIndex] = mappedCharacters[1];
4600 break;
4601
4602 default:
4603 --mappedLength; // Skip the current char
4604 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength - 1, true);
4605 contents = (UniChar *)__CFStrContents(string);
4606 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4607 length += (mappedLength - 1);
4608 currentIndex += mappedLength;
4609 break;
4610 }
4611 } else if (mappedLength == 0) {
4612 __CFStringChangeSize(string, CFRangeMake(currentIndex, 1), 0, true);
4613 contents = (UniChar *)__CFStrContents(string);
4614 --length;
4615 } else if (mappedLength > 1) {
4616 --mappedLength; // Skip the current char
4617 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength, true);
4618 contents = (UniChar *)__CFStrContents(string);
4619 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4620 length += mappedLength;
4621 currentIndex += mappedLength;
4622 }
4623 }
4624 }
4625 }
4626
4627
4628 void CFStringCapitalize(CFMutableStringRef string, CFLocaleRef locale) {
4629 CFIndex currentIndex = 0;
4630 CFIndex length;
4631 const uint8_t *langCode;
4632 Boolean isEightBit = __CFStrIsEightBit(string);
4633 Boolean isLastCased = false;
4634 const uint8_t *caseIgnorableForBMP;
4635
4636 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)string, _cfCapitalize:(const void *)locale);
4637
4638 __CFAssertIsStringAndMutable(string);
4639
4640 length = __CFStrLength(string);
4641
4642 caseIgnorableForBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharCaseIgnorableCharacterSet, 0);
4643
4644 langCode = (const uint8_t *)(_CFCanUseLocale(locale) ? _CFStrGetLanguageIdentifierForLocale(locale, false) : NULL);
4645
4646 if (!langCode && isEightBit) {
4647 uint8_t *contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4648 for (;currentIndex < length;currentIndex++) {
4649 if (contents[currentIndex] > 127) {
4650 break;
4651 } else if (contents[currentIndex] >= 'A' && contents[currentIndex] <= 'Z') {
4652 contents[currentIndex] += (isLastCased ? 'a' - 'A' : 0);
4653 isLastCased = true;
4654 } else if (contents[currentIndex] >= 'a' && contents[currentIndex] <= 'z') {
4655 contents[currentIndex] -= (!isLastCased ? 'a' - 'A' : 0);
4656 isLastCased = true;
4657 } else if (!CFUniCharIsMemberOfBitmap(contents[currentIndex], caseIgnorableForBMP)) {
4658 isLastCased = false;
4659 }
4660 }
4661 }
4662
4663 if (currentIndex < length) {
4664 UniChar *contents;
4665 UniChar mappedCharacters[MAX_CASE_MAPPING_BUF];
4666 CFIndex mappedLength;
4667 UTF32Char currentChar;
4668 UInt32 flags = 0;
4669
4670 if (isEightBit) __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true);
4671
4672 contents = (UniChar *)__CFStrContents(string);
4673
4674 for (;currentIndex < length;currentIndex++) {
4675 if (CFUniCharIsSurrogateHighCharacter(contents[currentIndex]) && (currentIndex + 1 < length) && CFUniCharIsSurrogateLowCharacter(contents[currentIndex + 1])) {
4676 currentChar = CFUniCharGetLongCharacterForSurrogatePair(contents[currentIndex], contents[currentIndex + 1]);
4677 } else {
4678 currentChar = contents[currentIndex];
4679 }
4680 flags = ((langCode || ((currentChar == 0x03A3) && isLastCased)) ? CFUniCharGetConditionalCaseMappingFlags(currentChar, contents, currentIndex, length, (isLastCased ? kCFUniCharToLowercase : kCFUniCharToTitlecase), langCode, flags) : 0);
4681
4682 mappedLength = CFUniCharMapCaseTo(currentChar, mappedCharacters, MAX_CASE_MAPPING_BUF, (isLastCased ? kCFUniCharToLowercase : kCFUniCharToTitlecase), flags, langCode);
4683 if (mappedLength > 0) contents[currentIndex] = *mappedCharacters;
4684
4685 if (currentChar > 0xFFFF) { // Non-BMP char
4686 switch (mappedLength) {
4687 case 0:
4688 __CFStringChangeSize(string, CFRangeMake(currentIndex, 2), 0, true);
4689 contents = (UniChar *)__CFStrContents(string);
4690 length -= 2;
4691 break;
4692
4693 case 1:
4694 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 1), 0, true);
4695 contents = (UniChar *)__CFStrContents(string);
4696 --length;
4697 break;
4698
4699 case 2:
4700 contents[++currentIndex] = mappedCharacters[1];
4701 break;
4702
4703 default:
4704 --mappedLength; // Skip the current char
4705 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength - 1, true);
4706 contents = (UniChar *)__CFStrContents(string);
4707 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4708 length += (mappedLength - 1);
4709 currentIndex += mappedLength;
4710 break;
4711 }
4712 } else if (mappedLength == 0) {
4713 __CFStringChangeSize(string, CFRangeMake(currentIndex, 1), 0, true);
4714 contents = (UniChar *)__CFStrContents(string);
4715 --length;
4716 } else if (mappedLength > 1) {
4717 --mappedLength; // Skip the current char
4718 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength, true);
4719 contents = (UniChar *)__CFStrContents(string);
4720 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4721 length += mappedLength;
4722 currentIndex += mappedLength;
4723 }
4724
4725 if (!((currentChar > 0xFFFF) ? CFUniCharIsMemberOf(currentChar, kCFUniCharCaseIgnorableCharacterSet) : CFUniCharIsMemberOfBitmap(currentChar, caseIgnorableForBMP))) { // We have non-caseignorable here
4726 isLastCased = ((CFUniCharIsMemberOf(currentChar, kCFUniCharUppercaseLetterCharacterSet) || CFUniCharIsMemberOf(currentChar, kCFUniCharLowercaseLetterCharacterSet)) ? true : false);
4727 }
4728 }
4729 }
4730 }
4731
4732
4733 #define MAX_DECOMP_BUF 64
4734
4735 #define HANGUL_SBASE 0xAC00
4736 #define HANGUL_LBASE 0x1100
4737 #define HANGUL_VBASE 0x1161
4738 #define HANGUL_TBASE 0x11A7
4739 #define HANGUL_SCOUNT 11172
4740 #define HANGUL_LCOUNT 19
4741 #define HANGUL_VCOUNT 21
4742 #define HANGUL_TCOUNT 28
4743 #define HANGUL_NCOUNT (HANGUL_VCOUNT * HANGUL_TCOUNT)
4744
4745 CF_INLINE uint32_t __CFGetUTF16Length(const UTF32Char *characters, uint32_t utf32Length) {
4746 const UTF32Char *limit = characters + utf32Length;
4747 uint32_t length = 0;
4748
4749 while (characters < limit) length += (*(characters++) > 0xFFFF ? 2 : 1);
4750
4751 return length;
4752 }
4753
4754 CF_INLINE void __CFFillInUTF16(const UTF32Char *characters, UTF16Char *dst, uint32_t utf32Length) {
4755 const UTF32Char *limit = characters + utf32Length;
4756 UTF32Char currentChar;
4757
4758 while (characters < limit) {
4759 currentChar = *(characters++);
4760 if (currentChar > 0xFFFF) {
4761 currentChar -= 0x10000;
4762 *(dst++) = (UTF16Char)((currentChar >> 10) + 0xD800UL);
4763 *(dst++) = (UTF16Char)((currentChar & 0x3FF) + 0xDC00UL);
4764 } else {
4765 *(dst++) = currentChar;
4766 }
4767 }
4768 }
4769
4770 void CFStringNormalize(CFMutableStringRef string, CFStringNormalizationForm theForm) {
4771 CFIndex currentIndex = 0;
4772 CFIndex length;
4773 bool needToReorder = true;
4774
4775 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)string, _cfNormalize:theForm);
4776
4777 __CFAssertIsStringAndMutable(string);
4778
4779 length = __CFStrLength(string);
4780
4781 if (__CFStrIsEightBit(string)) {
4782 uint8_t *contents;
4783
4784 if (theForm == kCFStringNormalizationFormC) return; // 8bit form has no decomposition
4785
4786 contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4787
4788 for (;currentIndex < length;currentIndex++) {
4789 if (contents[currentIndex] > 127) {
4790 __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true); // need to do harm way
4791 needToReorder = false;
4792 break;
4793 }
4794 }
4795 }
4796
4797 if (currentIndex < length) {
4798 UTF16Char *limit = (UTF16Char *)__CFStrContents(string) + length;
4799 UTF16Char *contents = (UTF16Char *)__CFStrContents(string) + currentIndex;
4800 UTF32Char buffer[MAX_DECOMP_BUF];
4801 UTF32Char *mappedCharacters = buffer;
4802 CFIndex allocatedLength = MAX_DECOMP_BUF;
4803 CFIndex mappedLength;
4804 CFIndex currentLength;
4805 UTF32Char currentChar;
4806 const uint8_t *decompBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, 0);
4807 const uint8_t *nonBaseBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, 0);
4808 const uint8_t *combiningBMP = (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0);
4809
4810 while (contents < limit) {
4811 if (CFUniCharIsSurrogateHighCharacter(*contents) && (contents + 1 < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) {
4812 currentChar = CFUniCharGetLongCharacterForSurrogatePair(*contents, *(contents + 1));
4813 currentLength = 2;
4814 contents += 2;
4815 } else {
4816 currentChar = *(contents++);
4817 currentLength = 1;
4818 }
4819
4820 mappedLength = 0;
4821
4822 if (CFUniCharIsMemberOfBitmap(currentChar, ((currentChar < 0x10000) ? decompBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (currentChar >> 16)))) && (0 == CFUniCharGetCombiningPropertyForCharacter(currentChar, ((currentChar < 0x10000) ? combiningBMP : (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16)))))) {
4823 if ((theForm & kCFStringNormalizationFormC) == 0 || currentChar < HANGUL_SBASE || currentChar > (HANGUL_SBASE + HANGUL_SCOUNT)) { // We don't have to decompose Hangul Syllables if we're precomposing again
4824 mappedLength = CFUniCharDecomposeCharacter(currentChar, mappedCharacters, MAX_DECOMP_BUF);
4825 }
4826 }
4827
4828 if ((needToReorder || (theForm & kCFStringNormalizationFormC)) && ((contents < limit) || (mappedLength == 0))) {
4829 if (mappedLength > 0) {
4830 if (CFUniCharIsSurrogateHighCharacter(*contents) && (contents + 1 < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) {
4831 currentChar = CFUniCharGetLongCharacterForSurrogatePair(*contents, *(contents + 1));
4832 } else {
4833 currentChar = *contents;
4834 }
4835 }
4836
4837 if (0 != CFUniCharGetCombiningPropertyForCharacter(currentChar, (const uint8_t *)((currentChar < 0x10000) ? combiningBMP : CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16))))) {
4838 uint32_t decompLength;
4839
4840 if (mappedLength == 0) {
4841 contents -= (currentChar & 0xFFFF0000 ? 2 : 1);
4842 if (currentIndex > 0) {
4843 if (CFUniCharIsSurrogateLowCharacter(*(contents - 1)) && (currentIndex > 1) && CFUniCharIsSurrogateHighCharacter(*(contents - 2))) {
4844 *mappedCharacters = CFUniCharGetLongCharacterForSurrogatePair(*(contents - 2), *(contents - 1));
4845 currentIndex -= 2;
4846 currentLength += 2;
4847 } else {
4848 *mappedCharacters = *(contents - 1);
4849 --currentIndex;
4850 ++currentLength;
4851 }
4852 mappedLength = 1;
4853 }
4854 } else {
4855 currentLength += (currentChar & 0xFFFF0000 ? 2 : 1);
4856 }
4857 contents += (currentChar & 0xFFFF0000 ? 2 : 1);
4858
4859 if (CFUniCharIsMemberOfBitmap(currentChar, ((currentChar < 0x10000) ? decompBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (currentChar >> 16))))) { // Vietnamese accent, etc.
4860 decompLength = CFUniCharDecomposeCharacter(currentChar, mappedCharacters + mappedLength, MAX_DECOMP_BUF - mappedLength);
4861 mappedLength += decompLength;
4862 } else {
4863 mappedCharacters[mappedLength++] = currentChar;
4864 }
4865
4866 while (contents < limit) {
4867 if (CFUniCharIsSurrogateHighCharacter(*contents) && (contents + 1 < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) {
4868 currentChar = CFUniCharGetLongCharacterForSurrogatePair(*contents, *(contents + 1));
4869 } else {
4870 currentChar = *contents;
4871 }
4872 if (0 == CFUniCharGetCombiningPropertyForCharacter(currentChar, (const uint8_t *)((currentChar < 0x10000) ? combiningBMP : CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16))))) break;
4873 if (currentChar & 0xFFFF0000) {
4874 contents += 2;
4875 currentLength += 2;
4876 } else {
4877 ++contents;
4878 ++currentLength;
4879 }
4880 if (mappedLength == allocatedLength) {
4881 allocatedLength += MAX_DECOMP_BUF;
4882 if (mappedCharacters == buffer) {
4883 mappedCharacters = (UTF32Char *)CFAllocatorAllocate(kCFAllocatorSystemDefault, allocatedLength * sizeof(UTF32Char), 0);
4884 memmove(mappedCharacters, buffer, MAX_DECOMP_BUF * sizeof(UTF32Char));
4885 } else {
4886 mappedCharacters = (UTF32Char *)CFAllocatorReallocate(kCFAllocatorSystemDefault, mappedCharacters, allocatedLength * sizeof(UTF32Char), 0);
4887 }
4888 }
4889 if (CFUniCharIsMemberOfBitmap(currentChar, ((currentChar < 0x10000) ? decompBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (currentChar >> 16))))) { // Vietnamese accent, etc.
4890 decompLength = CFUniCharDecomposeCharacter(currentChar, mappedCharacters + mappedLength, MAX_DECOMP_BUF - mappedLength);
4891 mappedLength += decompLength;
4892 } else {
4893 mappedCharacters[mappedLength++] = currentChar;
4894 }
4895 }
4896 }
4897 if (needToReorder && mappedLength > 1) CFUniCharPrioritySort(mappedCharacters, mappedLength);
4898 }
4899
4900 if (theForm & kCFStringNormalizationFormKD) {
4901 CFIndex newLength = 0;
4902
4903 if (mappedLength == 0 && CFUniCharIsMemberOf(currentChar, kCFUniCharCompatibilityDecomposableCharacterSet)) {
4904 mappedCharacters[mappedLength++] = currentChar;
4905 }
4906 while (newLength < mappedLength) {
4907 newLength = CFUniCharCompatibilityDecompose(mappedCharacters, mappedLength, allocatedLength);
4908 if (newLength == 0) {
4909 allocatedLength += MAX_DECOMP_BUF;
4910 if (mappedCharacters == buffer) {
4911 mappedCharacters = (UTF32Char *)CFAllocatorAllocate(kCFAllocatorSystemDefault, allocatedLength * sizeof(UTF32Char), 0);
4912 memmove(mappedCharacters, buffer, MAX_DECOMP_BUF * sizeof(UTF32Char));
4913 } else {
4914 mappedCharacters = (UTF32Char *)CFAllocatorReallocate(kCFAllocatorSystemDefault, mappedCharacters, allocatedLength * sizeof(UTF32Char), 0);
4915 }
4916 }
4917 }
4918 mappedLength = newLength;
4919 }
4920
4921 if (theForm & kCFStringNormalizationFormC) {
4922 UTF32Char nextChar;
4923
4924 if (mappedLength > 1) {
4925 CFIndex consumedLength = 1;
4926 UTF32Char *currentBase = mappedCharacters;
4927 uint8_t currentClass, lastClass = 0;
4928 bool didCombine = false;
4929
4930 currentChar = *mappedCharacters;
4931
4932 while (consumedLength < mappedLength) {
4933 nextChar = mappedCharacters[consumedLength];
4934 currentClass = CFUniCharGetCombiningPropertyForCharacter(nextChar, (const uint8_t *)((nextChar < 0x10000) ? combiningBMP : CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (nextChar >> 16))));
4935
4936 if (theForm & kCFStringNormalizationFormKD) {
4937 if ((currentChar >= HANGUL_LBASE) && (currentChar < (HANGUL_LBASE + 0xFF))) {
4938 SInt8 lIndex = currentChar - HANGUL_LBASE;
4939
4940 if ((0 <= lIndex) && (lIndex <= HANGUL_LCOUNT)) {
4941 SInt16 vIndex = nextChar - HANGUL_VBASE;
4942
4943 if ((vIndex >= 0) && (vIndex <= HANGUL_VCOUNT)) {
4944 SInt16 tIndex = 0;
4945 CFIndex usedLength = mappedLength;
4946
4947 mappedCharacters[consumedLength++] = 0xFFFD;
4948
4949 if (consumedLength < mappedLength) {
4950 tIndex = mappedCharacters[consumedLength] - HANGUL_TBASE;
4951 if ((tIndex < 0) || (tIndex > HANGUL_TCOUNT)) {
4952 tIndex = 0;
4953 } else {
4954 mappedCharacters[consumedLength++] = 0xFFFD;
4955 }
4956 }
4957 *currentBase = (lIndex * HANGUL_VCOUNT + vIndex) * HANGUL_TCOUNT + tIndex + HANGUL_SBASE;
4958
4959 while (--usedLength > 0) {
4960 if (mappedCharacters[usedLength] == 0xFFFD) {
4961 --mappedLength;
4962 --consumedLength;
4963 memmove(mappedCharacters + usedLength, mappedCharacters + usedLength + 1, (mappedLength - usedLength) * sizeof(UTF32Char));
4964 }
4965 }
4966 currentBase = mappedCharacters + consumedLength;
4967 currentChar = *currentBase;
4968 ++consumedLength;
4969
4970 continue;
4971 }
4972 }
4973 }
4974 if (!CFUniCharIsMemberOfBitmap(nextChar, ((nextChar < 0x10000) ? nonBaseBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (nextChar >> 16))))) {
4975 *currentBase = currentChar;
4976 currentBase = mappedCharacters + consumedLength;
4977 currentChar = nextChar;
4978 ++consumedLength;
4979 continue;
4980 }
4981 }
4982
4983 if ((lastClass == 0) || (currentClass > lastClass)) {
4984 nextChar = CFUniCharPrecomposeCharacter(currentChar, nextChar);
4985 if (nextChar == 0xFFFD) {
4986 lastClass = currentClass;
4987 } else {
4988 mappedCharacters[consumedLength] = 0xFFFD;
4989 didCombine = true;
4990 currentChar = nextChar;
4991 }
4992 }
4993 ++consumedLength;
4994 }
4995
4996 *currentBase = currentChar;
4997 if (didCombine) {
4998 consumedLength = mappedLength;
4999 while (--consumedLength > 0) {
5000 if (mappedCharacters[consumedLength] == 0xFFFD) {
5001 --mappedLength;
5002 memmove(mappedCharacters + consumedLength, mappedCharacters + consumedLength + 1, (mappedLength - consumedLength) * sizeof(UTF32Char));
5003 }
5004 }
5005 }
5006 } else if ((currentChar >= HANGUL_LBASE) && (currentChar < (HANGUL_LBASE + 0xFF))) { // Hangul Jamo
5007 SInt8 lIndex = currentChar - HANGUL_LBASE;
5008
5009 if ((contents < limit) && (0 <= lIndex) && (lIndex <= HANGUL_LCOUNT)) {
5010 SInt16 vIndex = *contents - HANGUL_VBASE;
5011
5012 if ((vIndex >= 0) && (vIndex <= HANGUL_VCOUNT)) {
5013 SInt16 tIndex = 0;
5014
5015 ++contents; ++currentLength;
5016
5017 if (contents < limit) {
5018 tIndex = *contents - HANGUL_TBASE;
5019 if ((tIndex < 0) || (tIndex > HANGUL_TCOUNT)) {
5020 tIndex = 0;
5021 } else {
5022 ++contents; ++currentLength;
5023 }
5024 }
5025 *mappedCharacters = (lIndex * HANGUL_VCOUNT + vIndex) * HANGUL_TCOUNT + tIndex + HANGUL_SBASE;
5026 mappedLength = 1;
5027 }
5028 }
5029 } else { // collect class 0 non-base characters
5030 while (contents < limit) {
5031 nextChar = *contents;
5032 if (CFUniCharIsSurrogateHighCharacter(nextChar) && ((contents + 1) < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) {
5033 nextChar = CFUniCharGetLongCharacterForSurrogatePair(nextChar, *(contents + 1));
5034 if (!CFUniCharIsMemberOfBitmap(nextChar, (const uint8_t *)CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (nextChar >> 16))) || (0 != CFUniCharGetCombiningPropertyForCharacter(nextChar, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (nextChar >> 16))))) break;
5035 } else {
5036 if (!CFUniCharIsMemberOfBitmap(nextChar, nonBaseBMP) || (0 != CFUniCharGetCombiningPropertyForCharacter(nextChar, combiningBMP))) break;
5037 }
5038 currentChar = CFUniCharPrecomposeCharacter(currentChar, nextChar);
5039 if (0xFFFD == currentChar) break;
5040
5041 if (nextChar < 0x10000) {
5042 ++contents; ++currentLength;
5043 } else {
5044 contents += 2;
5045 currentLength += 2;
5046 }
5047
5048 *mappedCharacters = currentChar;
5049 mappedLength = 1;
5050 }
5051 }
5052 }
5053
5054 if (mappedLength > 0) {
5055 CFIndex utf16Length = __CFGetUTF16Length(mappedCharacters, mappedLength);
5056
5057 if (utf16Length != currentLength) {
5058 __CFStringChangeSize(string, CFRangeMake(currentIndex, currentLength), utf16Length, true);
5059 currentLength = utf16Length;
5060 }
5061 contents = (UTF16Char *)__CFStrContents(string);
5062 limit = contents + __CFStrLength(string);
5063 contents += currentIndex;
5064 __CFFillInUTF16(mappedCharacters, contents, mappedLength);
5065 contents += utf16Length;
5066 }
5067 currentIndex += currentLength;
5068 }
5069
5070 if (mappedCharacters != buffer) CFAllocatorDeallocate(kCFAllocatorSystemDefault, mappedCharacters);
5071 }
5072 }
5073
5074 void CFStringFold(CFMutableStringRef theString, CFStringCompareFlags theFlags, CFLocaleRef locale) {
5075 CFStringInlineBuffer stringBuffer;
5076 CFIndex length = CFStringGetLength(theString);
5077 CFIndex currentIndex = 0;
5078 CFIndex bufferLength = 0;
5079 UTF32Char buffer[kCFStringStackBufferLength];
5080 const uint8_t *cString;
5081 const uint8_t *langCode;
5082 CFStringEncoding eightBitEncoding;
5083 bool caseInsensitive = ((theFlags & kCFCompareCaseInsensitive) ? true : false);
5084 bool isObjc = CF_IS_OBJC(__kCFStringTypeID, theString);
5085 CFLocaleRef theLocale = locale;
5086
5087 if ((theFlags & kCFCompareLocalized) && (NULL == locale)) {
5088 theLocale = CFLocaleCopyCurrent();
5089 }
5090
5091 theFlags &= (kCFCompareCaseInsensitive|kCFCompareDiacriticInsensitive|kCFCompareWidthInsensitive);
5092
5093 if ((0 == theFlags) || (0 == length)) goto bail; // nothing to do
5094
5095 langCode = ((NULL == theLocale) ? NULL : (const uint8_t *)_CFStrGetLanguageIdentifierForLocale(theLocale, true));
5096
5097 eightBitEncoding = __CFStringGetEightBitStringEncoding();
5098 cString = (const uint8_t *)CFStringGetCStringPtr(theString, eightBitEncoding);
5099
5100 if ((NULL != cString) && !caseInsensitive && (kCFStringEncodingASCII == eightBitEncoding)) goto bail; // All ASCII
5101
5102 CFStringInitInlineBuffer(theString, &stringBuffer, CFRangeMake(0, length));
5103
5104 if ((NULL != cString) && (theFlags & (kCFCompareCaseInsensitive|kCFCompareDiacriticInsensitive))) {
5105 const uint8_t *cStringPtr = cString;
5106 const uint8_t *cStringLimit = cString + length;
5107 uint8_t *cStringContents = (isObjc ? NULL : (uint8_t *)__CFStrContents(theString) + __CFStrSkipAnyLengthByte(theString));
5108
5109 while (cStringPtr < cStringLimit) {
5110 if ((*cStringPtr < 0x80) && (NULL == langCode)) {
5111 if (caseInsensitive && (*cStringPtr >= 'A') && (*cStringPtr <= 'Z')) {
5112 if (NULL == cStringContents) {
5113 break;
5114 } else {
5115 cStringContents[cStringPtr - cString] += ('a' - 'A');
5116 }
5117 }
5118 } else {
5119 if ((bufferLength = __CFStringFoldCharacterClusterAtIndex((UTF32Char)__CFCharToUniCharTable[*cStringPtr], &stringBuffer, cStringPtr - cString, theFlags, langCode, buffer, kCFStringStackBufferLength, NULL)) > 0) {
5120 if ((*buffer > 0x7F) || (bufferLength > 1) || (NULL == cStringContents)) break;
5121 cStringContents[cStringPtr - cString] = *buffer;
5122 }
5123 }
5124 ++cStringPtr;
5125 }
5126
5127 currentIndex = cStringPtr - cString;
5128 }
5129
5130 if (currentIndex < length) {
5131 UTF16Char *contents;
5132
5133 if (isObjc) {
5134 CFMutableStringRef cfString;
5135 CFRange range = CFRangeMake(currentIndex, length - currentIndex);
5136
5137 contents = (UTF16Char *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(UTF16Char) * range.length, 0);
5138
5139 CFStringGetCharacters(theString, range, contents);
5140
5141 cfString = CFStringCreateMutableWithExternalCharactersNoCopy(kCFAllocatorSystemDefault, contents, range.length, range.length, NULL);
5142
5143 CFStringFold(cfString, theFlags, theLocale);
5144
5145 CFStringReplace(theString, range, cfString);
5146
5147 CFRelease(cfString);
5148 } else {
5149 const UTF32Char *characters;
5150 const UTF32Char *charactersLimit;
5151 UTF32Char character;
5152 CFIndex consumedLength;
5153
5154 contents = NULL;
5155
5156 if (bufferLength > 0) {
5157 __CFStringChangeSize(theString, CFRangeMake(currentIndex + 1, 0), bufferLength - 1, true);
5158 length = __CFStrLength(theString);
5159 CFStringInitInlineBuffer(theString, &stringBuffer, CFRangeMake(0, length));
5160
5161 contents = (UTF16Char *)__CFStrContents(theString) + currentIndex;
5162 characters = buffer;
5163 charactersLimit = characters + bufferLength;
5164 while (characters < charactersLimit) *(contents++) = (UTF16Char)*(characters++);
5165 ++currentIndex;
5166 }
5167
5168 while (currentIndex < length) {
5169 character = __CFStringGetCharacterFromInlineBufferQuick(&stringBuffer, currentIndex);
5170
5171 consumedLength = 0;
5172
5173 if ((NULL == langCode) && (character < 0x80) && (0 == (theFlags & kCFCompareDiacriticInsensitive))) {
5174 if (caseInsensitive && (character >= 'A') && (character <= 'Z')) {
5175 consumedLength = 1;
5176 bufferLength = 1;
5177 *buffer = character + ('a' - 'A');
5178 }
5179 } else {
5180 if (CFUniCharIsSurrogateHighCharacter(character) && ((currentIndex + 1) < length)) {
5181 UTF16Char lowSurrogate = __CFStringGetCharacterFromInlineBufferQuick(&stringBuffer, currentIndex + 1);
5182 if (CFUniCharIsSurrogateLowCharacter(lowSurrogate)) character = CFUniCharGetLongCharacterForSurrogatePair(character, lowSurrogate);
5183 }
5184
5185 bufferLength = __CFStringFoldCharacterClusterAtIndex(character, &stringBuffer, currentIndex, theFlags, langCode, buffer, kCFStringStackBufferLength, &consumedLength);
5186 }
5187
5188 if (consumedLength > 0) {
5189 CFIndex utf16Length = bufferLength;
5190
5191 characters = buffer;
5192 charactersLimit = characters + bufferLength;
5193
5194 while (characters < charactersLimit) if (*(characters++) > 0xFFFF) ++utf16Length; // Extend bufferLength to the UTF-16 length
5195
5196 if ((utf16Length != consumedLength) || __CFStrIsEightBit(theString)) {
5197 CFRange range;
5198 CFIndex insertLength;
5199
5200 if (consumedLength < utf16Length) { // Need to expand
5201 range = CFRangeMake(currentIndex + consumedLength, 0);
5202 insertLength = utf16Length - consumedLength;
5203 } else {
5204 range = CFRangeMake(currentIndex + utf16Length, consumedLength - utf16Length);
5205 insertLength = 0;
5206 }
5207 __CFStringChangeSize(theString, range, insertLength, true);
5208 length = __CFStrLength(theString);
5209 CFStringInitInlineBuffer(theString, &stringBuffer, CFRangeMake(0, length));
5210 }
5211
5212 (void)CFUniCharFromUTF32(buffer, bufferLength, (UTF16Char *)__CFStrContents(theString) + currentIndex, true, __CF_BIG_ENDIAN__);
5213
5214 currentIndex += utf16Length;
5215 } else {
5216 ++currentIndex;
5217 }
5218 }
5219 }
5220 }
5221
5222 bail:
5223 if (NULL == locale && theLocale) {
5224 CFRelease(theLocale);
5225 }
5226 }
5227
5228 enum {
5229 kCFStringFormatZeroFlag = (1 << 0), // if not, padding is space char
5230 kCFStringFormatMinusFlag = (1 << 1), // if not, no flag implied
5231 kCFStringFormatPlusFlag = (1 << 2), // if not, no flag implied, overrides space
5232 kCFStringFormatSpaceFlag = (1 << 3), // if not, no flag implied
5233 kCFStringFormatExternalSpecFlag = (1 << 4), // using config dict
5234 kCFStringFormatLocalizable = (1 << 5) // explicitly mark the specs we can localize
5235 };
5236
5237 typedef struct {
5238 int16_t size;
5239 int16_t type;
5240 SInt32 loc;
5241 SInt32 len;
5242 SInt32 widthArg;
5243 SInt32 precArg;
5244 uint32_t flags;
5245 int8_t mainArgNum;
5246 int8_t precArgNum;
5247 int8_t widthArgNum;
5248 int8_t configDictIndex;
5249 int8_t numericFormatStyle; // Only set for localizable numeric quantities
5250 } CFFormatSpec;
5251
5252 typedef struct {
5253 int16_t type;
5254 int16_t size;
5255 union {
5256 int64_t int64Value;
5257 double doubleValue;
5258 #if LONG_DOUBLE_SUPPORT
5259 long double longDoubleValue;
5260 #endif
5261 void *pointerValue;
5262 } value;
5263 } CFPrintValue;
5264
5265 enum {
5266 CFFormatDefaultSize = 0,
5267 CFFormatSize1 = 1,
5268 CFFormatSize2 = 2,
5269 CFFormatSize4 = 3,
5270 CFFormatSize8 = 4,
5271 CFFormatSize16 = 5,
5272 #if __LP64__
5273 CFFormatSizeLong = CFFormatSize8,
5274 CFFormatSizePointer = CFFormatSize8
5275 #else
5276 CFFormatSizeLong = CFFormatSize4,
5277 CFFormatSizePointer = CFFormatSize4
5278 #endif
5279 };
5280
5281 enum {
5282 CFFormatStyleDecimal = (1 << 0),
5283 CFFormatStyleScientific = (1 << 1),
5284 CFFormatStyleDecimalOrScientific = CFFormatStyleDecimal|CFFormatStyleScientific,
5285 CFFormatStyleUnsigned = (1 << 2)
5286 };
5287
5288 enum {
5289 CFFormatLiteralType = 32,
5290 CFFormatLongType = 33,
5291 CFFormatDoubleType = 34,
5292 CFFormatPointerType = 35,
5293 CFFormatObjectType = 36, /* handled specially */ /* ??? not used anymore, can be removed? */
5294 CFFormatCFType = 37, /* handled specially */
5295 CFFormatUnicharsType = 38, /* handled specially */
5296 CFFormatCharsType = 39, /* handled specially */
5297 CFFormatPascalCharsType = 40, /* handled specially */
5298 CFFormatSingleUnicharType = 41, /* handled specially */
5299 CFFormatDummyPointerType = 42 /* special case for %n */
5300 };
5301
5302 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS
5303 /* Only come in here if spec->type is CFFormatLongType or CFFormatDoubleType. Pass in 0 for width or precision if not specified. Returns false if couldn't do the format (with the assumption the caller falls back to unlocalized).
5304 */
5305 static Boolean __CFStringFormatLocalizedNumber(CFMutableStringRef output, CFLocaleRef locale, const CFPrintValue *values, const CFFormatSpec *spec, SInt32 width, SInt32 precision, Boolean hasPrecision) {
5306 static CFSpinLock_t formatterLock = CFSpinLockInit;
5307 // These formatters are recached if the locale argument is different
5308 static CFNumberFormatterRef decimalFormatter = NULL;
5309 static CFNumberFormatterRef scientificFormatter = NULL;
5310 static CFNumberFormatterRef gFormatter = NULL; // for %g
5311 static SInt32 groupingSize = 0;
5312 static SInt32 secondaryGroupingSize = 0;
5313
5314 // !!! This code should be removed before shipping
5315 static char disableLocalizedFormatting = -1;
5316 if (disableLocalizedFormatting == -1) disableLocalizedFormatting = (getenv("CFStringDisableLocalizedNumberFormatting") != NULL) ? 1 : 0;
5317 if (disableLocalizedFormatting) return false;
5318
5319 CFNumberFormatterRef formatter;
5320
5321 __CFSpinLock(&formatterLock); // We use the formatter from one thread at one time; if this proves to be a bottleneck we need to get fancier
5322
5323 switch (spec->numericFormatStyle) {
5324 case CFFormatStyleUnsigned:
5325 case CFFormatStyleDecimal:
5326 if (!decimalFormatter || !CFEqual(CFNumberFormatterGetLocale(decimalFormatter), locale)) { // cache or recache if the locale is different
5327 if (decimalFormatter) CFRelease(decimalFormatter);
5328 decimalFormatter = CFNumberFormatterCreate(NULL, locale, kCFNumberFormatterDecimalStyle); // since this is shared, remember to reset all its properties!
5329 }
5330 formatter = decimalFormatter;
5331 break;
5332 case CFFormatStyleScientific:
5333 if (!scientificFormatter || !CFEqual(CFNumberFormatterGetLocale(scientificFormatter), locale)) { // cache or recache if the locale is different
5334 if (scientificFormatter) CFRelease(scientificFormatter);
5335 scientificFormatter = CFNumberFormatterCreate(NULL, locale, kCFNumberFormatterScientificStyle);
5336 CFStringRef pattern = CFSTR("#E+00"); // the default pattern does not have the sign if the exponent is positive and it is single digit
5337 CFNumberFormatterSetFormat(scientificFormatter, pattern);
5338 CFNumberFormatterSetProperty(scientificFormatter, kCFNumberFormatterUseSignificantDigitsKey, kCFBooleanTrue);
5339 }
5340 formatter = scientificFormatter;
5341 break;
5342 case CFFormatStyleDecimalOrScientific:
5343 if (!gFormatter || !CFEqual(CFNumberFormatterGetLocale(gFormatter), locale)) { // cache or recache if the locale is different
5344 if (gFormatter) CFRelease(gFormatter);
5345 gFormatter = CFNumberFormatterCreate(NULL, locale, kCFNumberFormatterDecimalStyle);
5346 // when we update the locale in gFormatter, we also need to update the two grouping sizes
5347 CFNumberRef num = (CFNumberRef) CFNumberFormatterCopyProperty(gFormatter, kCFNumberFormatterGroupingSizeKey);
5348 CFNumberGetValue(num, kCFNumberSInt32Type, &groupingSize);
5349 CFRelease(num);
5350 num = (CFNumberRef) CFNumberFormatterCopyProperty(gFormatter, kCFNumberFormatterSecondaryGroupingSizeKey);
5351 CFNumberGetValue(num, kCFNumberSInt32Type, &secondaryGroupingSize);
5352 CFRelease(num);
5353 }
5354 formatter = gFormatter;
5355 break;
5356 }
5357
5358 SInt32 prec = hasPrecision ? precision : ((spec->type == CFFormatLongType) ? 0 : 6); // default precision of printf is 6
5359
5360 // pattern must be set before setting width and padding
5361 // otherwise, the pattern will take over those settings
5362 if (spec->numericFormatStyle == CFFormatStyleDecimalOrScientific) {
5363 if (prec == 0) prec = 1; // at least one sig fig
5364 CFMutableStringRef pattern = CFStringCreateMutable(NULL, 0);
5365 // use significant digits pattern
5366 CFStringAppendCString(pattern, "@", kCFStringEncodingASCII);
5367 CFStringPad(pattern, CFSTR("#"), prec, 0);
5368 double targetValue = values[spec->mainArgNum].value.doubleValue;
5369 #if LONG_DOUBLE_SUPPORT
5370 if (CFFormatSize16 == values[spec->mainArgNum].size) {
5371 targetValue = values[spec->mainArgNum].value.longDoubleValue; // losing precision
5372 }
5373 #endif
5374 double max = pow(10.0, (double)prec); // if the value requires more digits than the number of sig figs, we need to use scientific format
5375 double min = 0.0001; // if the value is less than 10E-4, scientific format is the shorter form
5376 if (((targetValue > 0 && (targetValue > max || targetValue < min)) || (targetValue < 0 && (targetValue < -max || targetValue > -min)))){
5377 CFStringAppendCString(pattern, "E+00", kCFStringEncodingASCII);
5378 } else if (prec > groupingSize && groupingSize != 0) {
5379 CFStringInsert(pattern, prec-groupingSize, CFSTR(",")); // if we are not using scientific format, we need to set the pattern to use grouping separator
5380 if (secondaryGroupingSize != 0 && prec > (groupingSize + secondaryGroupingSize)) CFStringInsert(pattern, prec-groupingSize-secondaryGroupingSize, CFSTR(","));
5381 }
5382 CFNumberFormatterSetFormat(formatter, pattern);
5383 CFRelease(pattern);
5384 }
5385
5386 // clear the padding, we will add it later if we need it
5387 const SInt32 z = 0;
5388 CFNumberRef zero = CFNumberCreate(NULL, kCFNumberSInt32Type, &z);
5389 CFNumberFormatterSetProperty(formatter, kCFNumberFormatterFormatWidthKey, zero);
5390
5391 CFNumberRef tmp = CFNumberCreate(NULL, kCFNumberSInt32Type, &prec);
5392 CFNumberFormatterSetProperty(formatter, kCFNumberFormatterMaxFractionDigitsKey, tmp);
5393 if (spec->type == CFFormatDoubleType) {
5394 CFNumberFormatterSetProperty(formatter, kCFNumberFormatterMinFractionDigitsKey, tmp);
5395 } else {
5396 CFNumberFormatterSetProperty(formatter, kCFNumberFormatterMinFractionDigitsKey, zero);
5397 }
5398 CFRelease(tmp);
5399 CFRelease(zero);
5400
5401
5402 // ??? use the right zero here for Arabic
5403 Boolean padZero = spec->flags & kCFStringFormatZeroFlag;
5404 if (hasPrecision && spec->type == CFFormatLongType) { // if we have precision and %d or %u, we pad 0
5405 padZero = true;
5406 }
5407 CFNumberFormatterSetProperty(formatter, kCFNumberFormatterPaddingCharacterKey, padZero ? CFSTR("0") : CFSTR(" "));
5408
5409
5410 // Left (default) or right padding
5411 SInt32 p = (spec->flags & kCFStringFormatMinusFlag) ? kCFNumberFormatterPadAfterSuffix : (padZero ? kCFNumberFormatterPadAfterPrefix : kCFNumberFormatterPadBeforePrefix);
5412 if (hasPrecision && spec->type == CFFormatLongType) {
5413 SInt32 tmpP = kCFNumberFormatterPadAfterPrefix;
5414 tmp = CFNumberCreate(NULL, kCFNumberSInt32Type, &tmpP);
5415 } else {
5416 tmp = CFNumberCreate(NULL, kCFNumberSInt32Type, &p);
5417 }
5418 CFNumberFormatterSetProperty(formatter, kCFNumberFormatterPaddingPositionKey, tmp);
5419 CFRelease(tmp);
5420
5421 Boolean isNegative = false;
5422 switch (values[spec->mainArgNum].type) {
5423 case CFFormatLongType:
5424 if (values[spec->mainArgNum].value.int64Value < 0) isNegative = true;
5425 break;
5426 case CFFormatDoubleType:
5427 #if LONG_DOUBLE_SUPPORT
5428 if ((CFFormatSize16 == values[spec->mainArgNum].size) && (values[spec->mainArgNum].value.longDoubleValue < 0)) isNegative = true;
5429 else
5430 #endif
5431 if (values[spec->mainArgNum].value.doubleValue < 0) isNegative = true;
5432 break;
5433 }
5434
5435 CFStringRef pattern = CFNumberFormatterGetFormat(formatter);
5436 if ((spec->flags & kCFStringFormatPlusFlag) && !isNegative) {
5437 if (CFStringGetCharacterAtIndex(pattern, 0) != '+') {
5438 CFMutableStringRef newPattern = CFStringCreateMutableCopy(NULL, 0, CFSTR("+"));
5439 CFStringAppend(newPattern, pattern);
5440 CFNumberFormatterSetFormat(formatter, newPattern);
5441 CFRelease(newPattern);
5442 }
5443 } else {
5444 if (CFStringGetCharacterAtIndex(pattern, 0) == '+') {
5445 CFStringRef newPattern = CFStringCreateWithSubstring(NULL, pattern, CFRangeMake(1, CFStringGetLength(pattern)-1));
5446 CFNumberFormatterSetFormat(formatter, newPattern);
5447 CFRelease(newPattern);
5448 }
5449 }
5450
5451 // width == 0 seems to be CFNumberFormatter's default setting
5452 if (hasPrecision && spec->type == CFFormatLongType) { // if we have precision and %d or %u, we pad 0 according to precision first
5453 tmp = CFNumberCreate(NULL, kCFNumberSInt32Type, &prec);
5454 } else {
5455 tmp = CFNumberCreate(NULL, kCFNumberSInt32Type, &width);
5456 }
5457 CFNumberFormatterSetProperty(formatter, kCFNumberFormatterFormatWidthKey, tmp);
5458 CFRelease(tmp);
5459
5460 if (spec->numericFormatStyle == CFFormatStyleScientific) {
5461 prec++; // for %e, precision+1 is the number of sig fig
5462 tmp = CFNumberCreate(NULL, kCFNumberSInt32Type, &prec);
5463 CFNumberFormatterSetProperty(formatter, kCFNumberFormatterMinSignificantDigitsKey, tmp);
5464 CFNumberFormatterSetProperty(formatter, kCFNumberFormatterMaxSignificantDigitsKey, tmp);
5465 CFRelease(tmp);
5466 }
5467
5468 CFStringRef localizedNumberString = NULL;
5469 switch (spec->type) {
5470 case CFFormatLongType:
5471 // ??? Need to do unsigned
5472 localizedNumberString = CFNumberFormatterCreateStringWithValue(NULL, formatter, kCFNumberSInt64Type, &(values[spec->mainArgNum].value.int64Value));
5473 break;
5474 case CFFormatDoubleType: {
5475 #if LONG_DOUBLE_SUPPORT
5476 if (CFFormatSize16 == values[spec->mainArgNum].size) {
5477 double doubleValue = values[spec->mainArgNum].value.longDoubleValue; // losing precision
5478 localizedNumberString = CFNumberFormatterCreateStringWithValue(NULL, formatter, kCFNumberDoubleType, &doubleValue);
5479 } else
5480 #endif
5481 {
5482 localizedNumberString = CFNumberFormatterCreateStringWithValue(NULL, formatter, kCFNumberDoubleType, &(values[spec->mainArgNum].value.doubleValue));
5483 }
5484 break;
5485 }
5486 }
5487 __CFSpinUnlock(&formatterLock);
5488
5489 if (localizedNumberString) {
5490 // we need to pad space if we have %d or %u
5491 if (spec->type == CFFormatLongType && hasPrecision && CFStringGetLength(localizedNumberString) < width) {
5492 CFMutableStringRef finalStr = NULL;
5493 if (p == kCFNumberFormatterPadAfterSuffix) {
5494 finalStr = CFStringCreateMutableCopy(NULL, 0, localizedNumberString);
5495 CFStringPad(finalStr, CFSTR(" "), width, 0);
5496 } else {
5497 finalStr = CFStringCreateMutable(NULL, 0);
5498 CFStringPad(finalStr, CFSTR(" "), width - CFStringGetLength(localizedNumberString), 0);
5499 CFStringAppend(finalStr, localizedNumberString);
5500 }
5501 CFRelease(localizedNumberString);
5502 localizedNumberString = finalStr;
5503 }
5504 CFStringAppend(output, localizedNumberString);
5505 CFRelease(localizedNumberString);
5506 return true;
5507 }
5508 return false;
5509 }
5510 #endif
5511
5512 CF_INLINE void __CFParseFormatSpec(const UniChar *uformat, const uint8_t *cformat, SInt32 *fmtIdx, SInt32 fmtLen, CFFormatSpec *spec, CFStringRef *configKeyPointer) {
5513 Boolean seenDot = false;
5514 Boolean seenSharp = false;
5515 CFIndex keyIndex = kCFNotFound;
5516
5517 for (;;) {
5518 UniChar ch;
5519 if (fmtLen <= *fmtIdx) return; /* no type */
5520 if (cformat) ch = (UniChar)cformat[(*fmtIdx)++]; else ch = uformat[(*fmtIdx)++];
5521
5522 if (keyIndex >= 0) {
5523 if ((ch < '0') || ((ch > '9') && (ch < 'A')) || ((ch > 'Z') && (ch < 'a') && (ch != '_')) || (ch > 'z')) {
5524 if (ch == '@') { // found the key
5525 CFIndex length = (*fmtIdx) - 1 - keyIndex;
5526
5527 spec->flags |= kCFStringFormatExternalSpecFlag;
5528 spec->type = CFFormatCFType;
5529 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5530
5531 if ((NULL != configKeyPointer) && (length > 0)) {
5532 if (cformat) {
5533 *configKeyPointer = CFStringCreateWithBytes(NULL, cformat + keyIndex, length, __CFStringGetEightBitStringEncoding(), FALSE);
5534 } else {
5535 *configKeyPointer = CFStringCreateWithCharactersNoCopy(NULL, uformat + keyIndex, length, kCFAllocatorNull);
5536 }
5537 }
5538 return;
5539 }
5540 keyIndex = kCFNotFound;
5541 }
5542 continue;
5543 }
5544
5545 reswtch:switch (ch) {
5546 case '#': // ignored for now
5547 seenSharp = true;
5548 break;
5549 case 0x20:
5550 if (!(spec->flags & kCFStringFormatPlusFlag)) spec->flags |= kCFStringFormatSpaceFlag;
5551 break;
5552 case '-':
5553 spec->flags |= kCFStringFormatMinusFlag;
5554 spec->flags &= ~kCFStringFormatZeroFlag; // remove zero flag
5555 break;
5556 case '+':
5557 spec->flags |= kCFStringFormatPlusFlag;
5558 spec->flags &= ~kCFStringFormatSpaceFlag; // remove space flag
5559 break;
5560 case '0':
5561 if (seenDot) { // after we see '.' and then we see '0', it is 0 precision. We should not see '.' after '0' if '0' is the zero padding flag
5562 spec->precArg = 0;
5563 break;
5564 }
5565 if (!(spec->flags & kCFStringFormatMinusFlag)) spec->flags |= kCFStringFormatZeroFlag;
5566 break;
5567 case 'h':
5568 if (*fmtIdx < fmtLen) {
5569 // fetch next character, don't increment fmtIdx
5570 if (cformat) ch = (UniChar)cformat[(*fmtIdx)]; else ch = uformat[(*fmtIdx)];
5571 if ('h' == ch) { // 'hh' for char, like 'c'
5572 (*fmtIdx)++;
5573 spec->size = CFFormatSize1;
5574 break;
5575 }
5576 }
5577 spec->size = CFFormatSize2;
5578 break;
5579 case 'l':
5580 if (*fmtIdx < fmtLen) {
5581 // fetch next character, don't increment fmtIdx
5582 if (cformat) ch = (UniChar)cformat[(*fmtIdx)]; else ch = uformat[(*fmtIdx)];
5583 if ('l' == ch) { // 'll' for long long, like 'q'
5584 (*fmtIdx)++;
5585 spec->size = CFFormatSize8;
5586 break;
5587 }
5588 }
5589 spec->size = CFFormatSizeLong; // 4 or 8 depending on LP64
5590 break;
5591 #if LONG_DOUBLE_SUPPORT
5592 case 'L':
5593 spec->size = CFFormatSize16;
5594 break;
5595 #endif
5596 case 'q':
5597 spec->size = CFFormatSize8;
5598 break;
5599 case 't': case 'z':
5600 spec->size = CFFormatSizeLong; // 4 or 8 depending on LP64
5601 break;
5602 case 'j':
5603 spec->size = CFFormatSize8;
5604 break;
5605 case 'c':
5606 spec->type = CFFormatLongType;
5607 spec->size = CFFormatSize1;
5608 return;
5609 case 'D': case 'd': case 'i': case 'U': case 'u':
5610 // we can localize all but octal or hex
5611 if (_CFExecutableLinkedOnOrAfter(CFSystemVersionMountainLion)) spec->flags |= kCFStringFormatLocalizable;
5612 spec->numericFormatStyle = CFFormatStyleDecimal;
5613 if (ch == 'u' || ch == 'U') spec->numericFormatStyle = CFFormatStyleUnsigned;
5614 // fall thru
5615 case 'O': case 'o': case 'x': case 'X':
5616 spec->type = CFFormatLongType;
5617 // Seems like if spec->size == 0, we should spec->size = CFFormatSize4. However, 0 is handled correctly.
5618 return;
5619 case 'f': case 'F': case 'g': case 'G': case 'e': case 'E': {
5620 // we can localize all but hex float output
5621 if (_CFExecutableLinkedOnOrAfter(CFSystemVersionMountainLion)) spec->flags |= kCFStringFormatLocalizable;
5622 char lch = (ch >= 'A' && ch <= 'Z') ? (ch - 'A' + 'a') : ch;
5623 spec->numericFormatStyle = ((lch == 'e' || lch == 'g') ? CFFormatStyleScientific : 0) | ((lch == 'f' || lch == 'g') ? CFFormatStyleDecimal : 0);
5624 if (seenDot && spec->precArg == -1 && spec->precArgNum == -1) { // for the cases that we have '.' but no precision followed, not even '*'
5625 spec->precArg = 0;
5626 }
5627 }
5628 // fall thru
5629 case 'a': case 'A':
5630 spec->type = CFFormatDoubleType;
5631 if (spec->size != CFFormatSize16) spec->size = CFFormatSize8;
5632 return;
5633 case 'n': /* %n is not handled correctly; for Leopard or newer apps, we disable it further */
5634 spec->type = 1 ? CFFormatDummyPointerType : CFFormatPointerType;
5635 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5636 return;
5637 case 'p':
5638 spec->type = CFFormatPointerType;
5639 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5640 return;
5641 case 's':
5642 spec->type = CFFormatCharsType;
5643 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5644 return;
5645 case 'S':
5646 spec->type = CFFormatUnicharsType;
5647 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5648 return;
5649 case 'C':
5650 spec->type = CFFormatSingleUnicharType;
5651 spec->size = CFFormatSize2;
5652 return;
5653 case 'P':
5654 spec->type = CFFormatPascalCharsType;
5655 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5656 return;
5657 case '@':
5658 if (seenSharp) {
5659 seenSharp = false;
5660 keyIndex = *fmtIdx;
5661 break;
5662 } else {
5663 spec->type = CFFormatCFType;
5664 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5665 return;
5666 }
5667 case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': {
5668 int64_t number = 0;
5669 do {
5670 number = 10 * number + (ch - '0');
5671 if (cformat) ch = (UniChar)cformat[(*fmtIdx)++]; else ch = uformat[(*fmtIdx)++];
5672 } while ((UInt32)(ch - '0') <= 9);
5673 if ('$' == ch) {
5674 if (-2 == spec->precArgNum) {
5675 spec->precArgNum = (int8_t)number - 1; // Arg numbers start from 1
5676 } else if (-2 == spec->widthArgNum) {
5677 spec->widthArgNum = (int8_t)number - 1; // Arg numbers start from 1
5678 } else {
5679 spec->mainArgNum = (int8_t)number - 1; // Arg numbers start from 1
5680 }
5681 break;
5682 } else if (seenDot) { /* else it's either precision or width */
5683 spec->precArg = (SInt32)number;
5684 } else {
5685 spec->widthArg = (SInt32)number;
5686 }
5687 goto reswtch;
5688 }
5689 case '*':
5690 spec->widthArgNum = -2;
5691 break;
5692 case '.':
5693 seenDot = true;
5694 if (cformat) ch = (UniChar)cformat[(*fmtIdx)++]; else ch = uformat[(*fmtIdx)++];
5695 if ('*' == ch) {
5696 spec->precArgNum = -2;
5697 break;
5698 }
5699 goto reswtch;
5700 default:
5701 spec->type = CFFormatLiteralType;
5702 return;
5703 }
5704 }
5705 }
5706
5707 /* ??? %s depends on handling of encodings by __CFStringAppendBytes
5708 */
5709 void CFStringAppendFormatAndArguments(CFMutableStringRef outputString, CFDictionaryRef formatOptions, CFStringRef formatString, va_list args) {
5710 __CFStringAppendFormatCore(outputString, NULL, formatOptions, NULL, formatString, 0, NULL, 0, args);
5711 }
5712
5713 // Length of the buffer to call sprintf() with
5714 #define BUFFER_LEN 512
5715
5716 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI
5717 #define SNPRINTF(TYPE, WHAT) { \
5718 TYPE value = (TYPE) WHAT; \
5719 if (-1 != specs[curSpec].widthArgNum) { \
5720 if (-1 != specs[curSpec].precArgNum) { \
5721 snprintf_l(buffer, BUFFER_LEN-1, NULL, formatBuffer, width, precision, value); \
5722 } else { \
5723 snprintf_l(buffer, BUFFER_LEN-1, NULL, formatBuffer, width, value); \
5724 } \
5725 } else { \
5726 if (-1 != specs[curSpec].precArgNum) { \
5727 snprintf_l(buffer, BUFFER_LEN-1, NULL, formatBuffer, precision, value); \
5728 } else { \
5729 snprintf_l(buffer, BUFFER_LEN-1, NULL, formatBuffer, value); \
5730 } \
5731 }}
5732 #else
5733 #define SNPRINTF(TYPE, WHAT) { \
5734 TYPE value = (TYPE) WHAT; \
5735 if (-1 != specs[curSpec].widthArgNum) { \
5736 if (-1 != specs[curSpec].precArgNum) { \
5737 sprintf(buffer, formatBuffer, width, precision, value); \
5738 } else { \
5739 sprintf(buffer, formatBuffer, width, value); \
5740 } \
5741 } else { \
5742 if (-1 != specs[curSpec].precArgNum) { \
5743 sprintf(buffer, formatBuffer, precision, value); \
5744 } else { \
5745 sprintf(buffer, formatBuffer, value); \
5746 } \
5747 }}
5748 #endif
5749
5750 void _CFStringAppendFormatAndArgumentsAux(CFMutableStringRef outputString, CFStringRef (*copyDescFunc)(void *, const void *), CFDictionaryRef formatOptions, CFStringRef formatString, va_list args) { __CFStringAppendFormatCore(outputString, copyDescFunc, formatOptions, NULL, formatString, 0, NULL, 0, args); }
5751
5752 static void __CFStringAppendFormatCore(CFMutableStringRef outputString, CFStringRef (*copyDescFunc)(void *, const void *), CFDictionaryRef formatOptions, CFDictionaryRef stringsDictConfig, CFStringRef formatString, CFIndex initialArgPosition, const void *origValues, CFIndex originalValuesSize, va_list args) {
5753 SInt32 numSpecs, sizeSpecs, sizeArgNum, formatIdx, curSpec, argNum;
5754 CFIndex formatLen;
5755 #define FORMAT_BUFFER_LEN 400
5756 const uint8_t *cformat = NULL;
5757 const UniChar *uformat = NULL;
5758 UniChar *formatChars = NULL;
5759 UniChar localFormatBuffer[FORMAT_BUFFER_LEN];
5760
5761 #define VPRINTF_BUFFER_LEN 61
5762 CFFormatSpec localSpecsBuffer[VPRINTF_BUFFER_LEN];
5763 CFFormatSpec *specs;
5764 CFPrintValue localValuesBuffer[VPRINTF_BUFFER_LEN];
5765 CFPrintValue *values;
5766 const CFPrintValue *originalValues = (const CFPrintValue *)origValues;
5767 CFDictionaryRef localConfigs[VPRINTF_BUFFER_LEN];
5768 CFDictionaryRef *configs;
5769 CFIndex numConfigs;
5770 CFAllocatorRef tmpAlloc = NULL;
5771 intmax_t dummyLocation; // A place for %n to do its thing in; should be the widest possible int value
5772
5773 numSpecs = 0;
5774 sizeSpecs = 0;
5775 sizeArgNum = 0;
5776 numConfigs = 0;
5777 specs = NULL;
5778 values = NULL;
5779 configs = NULL;
5780
5781
5782 formatLen = CFStringGetLength(formatString);
5783 if (!CF_IS_OBJC(__kCFStringTypeID, formatString)) {
5784 __CFAssertIsString(formatString);
5785 if (!__CFStrIsUnicode(formatString)) {
5786 cformat = (const uint8_t *)__CFStrContents(formatString);
5787 if (cformat) cformat += __CFStrSkipAnyLengthByte(formatString);
5788 } else {
5789 uformat = (const UniChar *)__CFStrContents(formatString);
5790 }
5791 }
5792 if (!cformat && !uformat) {
5793 formatChars = (formatLen > FORMAT_BUFFER_LEN) ? (UniChar *)CFAllocatorAllocate(tmpAlloc = __CFGetDefaultAllocator(), formatLen * sizeof(UniChar), 0) : localFormatBuffer;
5794 if (formatChars != localFormatBuffer && __CFOASafe) __CFSetLastAllocationEventName(formatChars, "CFString (temp)");
5795 CFStringGetCharacters(formatString, CFRangeMake(0, formatLen), formatChars);
5796 uformat = formatChars;
5797 }
5798
5799 /* Compute an upper bound for the number of format specifications */
5800 if (cformat) {
5801 for (formatIdx = 0; formatIdx < formatLen; formatIdx++) if ('%' == cformat[formatIdx]) sizeSpecs++;
5802 } else {
5803 for (formatIdx = 0; formatIdx < formatLen; formatIdx++) if ('%' == uformat[formatIdx]) sizeSpecs++;
5804 }
5805 tmpAlloc = __CFGetDefaultAllocator();
5806 specs = ((2 * sizeSpecs + 1) > VPRINTF_BUFFER_LEN) ? (CFFormatSpec *)CFAllocatorAllocate(tmpAlloc, (2 * sizeSpecs + 1) * sizeof(CFFormatSpec), 0) : localSpecsBuffer;
5807 if (specs != localSpecsBuffer && __CFOASafe) __CFSetLastAllocationEventName(specs, "CFString (temp)");
5808
5809 configs = ((sizeSpecs < VPRINTF_BUFFER_LEN) ? localConfigs : (CFDictionaryRef *)CFAllocatorAllocate(tmpAlloc, sizeof(CFStringRef) * sizeSpecs, 0));
5810
5811 /* Collect format specification information from the format string */
5812 for (curSpec = 0, formatIdx = 0; formatIdx < formatLen; curSpec++) {
5813 SInt32 newFmtIdx;
5814 specs[curSpec].loc = formatIdx;
5815 specs[curSpec].len = 0;
5816 specs[curSpec].size = 0;
5817 specs[curSpec].type = 0;
5818 specs[curSpec].flags = 0;
5819 specs[curSpec].widthArg = -1;
5820 specs[curSpec].precArg = -1;
5821 specs[curSpec].mainArgNum = -1;
5822 specs[curSpec].precArgNum = -1;
5823 specs[curSpec].widthArgNum = -1;
5824 specs[curSpec].configDictIndex = -1;
5825 if (cformat) {
5826 for (newFmtIdx = formatIdx; newFmtIdx < formatLen && '%' != cformat[newFmtIdx]; newFmtIdx++);
5827 } else {
5828 for (newFmtIdx = formatIdx; newFmtIdx < formatLen && '%' != uformat[newFmtIdx]; newFmtIdx++);
5829 }
5830 if (newFmtIdx != formatIdx) { /* Literal chunk */
5831 specs[curSpec].type = CFFormatLiteralType;
5832 specs[curSpec].len = newFmtIdx - formatIdx;
5833 } else {
5834 CFStringRef configKey = NULL;
5835 newFmtIdx++; /* Skip % */
5836 __CFParseFormatSpec(uformat, cformat, &newFmtIdx, formatLen, &(specs[curSpec]), &configKey);
5837 if (CFFormatLiteralType == specs[curSpec].type) {
5838 specs[curSpec].loc = formatIdx + 1;
5839 specs[curSpec].len = 1;
5840 } else {
5841 specs[curSpec].len = newFmtIdx - formatIdx;
5842 }
5843 }
5844 formatIdx = newFmtIdx;
5845
5846 // fprintf(stderr, "specs[%d] = {\n size = %d,\n type = %d,\n loc = %d,\n len = %d,\n mainArgNum = %d,\n precArgNum = %d,\n widthArgNum = %d\n}\n", curSpec, specs[curSpec].size, specs[curSpec].type, specs[curSpec].loc, specs[curSpec].len, specs[curSpec].mainArgNum, specs[curSpec].precArgNum, specs[curSpec].widthArgNum);
5847
5848 }
5849 numSpecs = curSpec;
5850
5851 // Max of three args per spec, reasoning thus: 1 width, 1 prec, 1 value
5852 sizeArgNum = ((NULL == originalValues) ? (3 * sizeSpecs + 1) : originalValuesSize);
5853
5854 values = (sizeArgNum > VPRINTF_BUFFER_LEN) ? (CFPrintValue *)CFAllocatorAllocate(tmpAlloc, sizeArgNum * sizeof(CFPrintValue), 0) : localValuesBuffer;
5855 if (values != localValuesBuffer && __CFOASafe) __CFSetLastAllocationEventName(values, "CFString (temp)");
5856 memset(values, 0, sizeArgNum * sizeof(CFPrintValue));
5857
5858 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
5859 // va_copy is a C99 extension. No support on Windows
5860 va_list copiedArgs;
5861 if (numConfigs > 0) va_copy(copiedArgs, args); // we need to preserve the original state for passing down
5862 #endif
5863
5864 /* Compute values array */
5865 argNum = initialArgPosition;
5866 for (curSpec = 0; curSpec < numSpecs; curSpec++) {
5867 SInt32 newMaxArgNum;
5868 if (0 == specs[curSpec].type) continue;
5869 if (CFFormatLiteralType == specs[curSpec].type) continue;
5870 newMaxArgNum = sizeArgNum;
5871 if (newMaxArgNum < specs[curSpec].mainArgNum) {
5872 newMaxArgNum = specs[curSpec].mainArgNum;
5873 }
5874 if (newMaxArgNum < specs[curSpec].precArgNum) {
5875 newMaxArgNum = specs[curSpec].precArgNum;
5876 }
5877 if (newMaxArgNum < specs[curSpec].widthArgNum) {
5878 newMaxArgNum = specs[curSpec].widthArgNum;
5879 }
5880 if (sizeArgNum < newMaxArgNum) {
5881 if (specs != localSpecsBuffer) CFAllocatorDeallocate(tmpAlloc, specs);
5882 if (values != localValuesBuffer) CFAllocatorDeallocate(tmpAlloc, values);
5883 if (formatChars && (formatChars != localFormatBuffer)) CFAllocatorDeallocate(tmpAlloc, formatChars);
5884 return; // more args than we expected!
5885 }
5886 /* It is actually incorrect to reorder some specs and not all; we just do some random garbage here */
5887 if (-2 == specs[curSpec].widthArgNum) {
5888 specs[curSpec].widthArgNum = argNum++;
5889 }
5890 if (-2 == specs[curSpec].precArgNum) {
5891 specs[curSpec].precArgNum = argNum++;
5892 }
5893 if (-1 == specs[curSpec].mainArgNum) {
5894 specs[curSpec].mainArgNum = argNum++;
5895 }
5896
5897 values[specs[curSpec].mainArgNum].size = specs[curSpec].size;
5898 values[specs[curSpec].mainArgNum].type = specs[curSpec].type;
5899
5900
5901 if (-1 != specs[curSpec].widthArgNum) {
5902 values[specs[curSpec].widthArgNum].size = 0;
5903 values[specs[curSpec].widthArgNum].type = CFFormatLongType;
5904 }
5905 if (-1 != specs[curSpec].precArgNum) {
5906 values[specs[curSpec].precArgNum].size = 0;
5907 values[specs[curSpec].precArgNum].type = CFFormatLongType;
5908 }
5909 }
5910
5911 /* Collect the arguments in correct type from vararg list */
5912 for (argNum = 0; argNum < sizeArgNum; argNum++) {
5913 if ((NULL != originalValues) && (0 == values[argNum].type)) values[argNum] = originalValues[argNum];
5914 switch (values[argNum].type) {
5915 case 0:
5916 case CFFormatLiteralType:
5917 break;
5918 case CFFormatLongType:
5919 case CFFormatSingleUnicharType:
5920 if (CFFormatSize1 == values[argNum].size) {
5921 values[argNum].value.int64Value = (int64_t)(int8_t)va_arg(args, int);
5922 } else if (CFFormatSize2 == values[argNum].size) {
5923 values[argNum].value.int64Value = (int64_t)(int16_t)va_arg(args, int);
5924 } else if (CFFormatSize4 == values[argNum].size) {
5925 values[argNum].value.int64Value = (int64_t)va_arg(args, int32_t);
5926 } else if (CFFormatSize8 == values[argNum].size) {
5927 values[argNum].value.int64Value = (int64_t)va_arg(args, int64_t);
5928 } else {
5929 values[argNum].value.int64Value = (int64_t)va_arg(args, int);
5930 }
5931 break;
5932 case CFFormatDoubleType:
5933 #if LONG_DOUBLE_SUPPORT
5934 if (CFFormatSize16 == values[argNum].size) {
5935 values[argNum].value.longDoubleValue = va_arg(args, long double);
5936 } else
5937 #endif
5938 {
5939 values[argNum].value.doubleValue = va_arg(args, double);
5940 }
5941 break;
5942 case CFFormatPointerType:
5943 case CFFormatObjectType:
5944 case CFFormatCFType:
5945 case CFFormatUnicharsType:
5946 case CFFormatCharsType:
5947 case CFFormatPascalCharsType:
5948 values[argNum].value.pointerValue = va_arg(args, void *);
5949 break;
5950 case CFFormatDummyPointerType:
5951 (void)va_arg(args, void *); // Skip the provided argument
5952 values[argNum].value.pointerValue = &dummyLocation;
5953 break;
5954 }
5955 }
5956 va_end(args);
5957
5958 /* Format the pieces together */
5959
5960 if (NULL == originalValues) {
5961 originalValues = values;
5962 originalValuesSize = sizeArgNum;
5963 }
5964
5965 for (curSpec = 0; curSpec < numSpecs; curSpec++) {
5966 SInt32 width = 0, precision = 0;
5967 UniChar *up, ch;
5968 Boolean hasWidth = false, hasPrecision = false;
5969
5970 // widthArgNum and widthArg are never set at the same time; same for precArg*
5971 if (-1 != specs[curSpec].widthArgNum) {
5972 width = (SInt32)values[specs[curSpec].widthArgNum].value.int64Value;
5973 hasWidth = true;
5974 }
5975 if (-1 != specs[curSpec].precArgNum) {
5976 precision = (SInt32)values[specs[curSpec].precArgNum].value.int64Value;
5977 hasPrecision = true;
5978 }
5979 if (-1 != specs[curSpec].widthArg) {
5980 width = specs[curSpec].widthArg;
5981 hasWidth = true;
5982 }
5983 if (-1 != specs[curSpec].precArg) {
5984 precision = specs[curSpec].precArg;
5985 hasPrecision = true;
5986 }
5987
5988 switch (specs[curSpec].type) {
5989 case CFFormatLongType:
5990 case CFFormatDoubleType:
5991 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS
5992 if (formatOptions && (specs[curSpec].flags & kCFStringFormatLocalizable) && (CFGetTypeID(formatOptions) == CFLocaleGetTypeID())) { // We have a locale, so we do localized formatting
5993 if (__CFStringFormatLocalizedNumber(outputString, (CFLocaleRef)formatOptions, values, &specs[curSpec], width, precision, hasPrecision)) break;
5994 }
5995 /* Otherwise fall-thru to the next case! */
5996 #endif
5997 case CFFormatPointerType: {
5998 char formatBuffer[128];
5999 #if defined(__GNUC__)
6000 char buffer[BUFFER_LEN + width + precision];
6001 #else
6002 char stackBuffer[BUFFER_LEN];
6003 char *dynamicBuffer = NULL;
6004 char *buffer = stackBuffer;
6005 if (256+width+precision > BUFFER_LEN) {
6006 dynamicBuffer = (char *)CFAllocatorAllocate(kCFAllocatorSystemDefault, 256+width+precision, 0);
6007 buffer = dynamicBuffer;
6008 }
6009 #endif
6010 SInt32 cidx, idx, loc;
6011 Boolean appended = false;
6012 loc = specs[curSpec].loc;
6013 // In preparation to call snprintf(), copy the format string out
6014 if (cformat) {
6015 for (idx = 0, cidx = 0; cidx < specs[curSpec].len; idx++, cidx++) {
6016 if ('$' == cformat[loc + cidx]) {
6017 for (idx--; '0' <= formatBuffer[idx] && formatBuffer[idx] <= '9'; idx--);
6018 } else {
6019 formatBuffer[idx] = cformat[loc + cidx];
6020 }
6021 }
6022 } else {
6023 for (idx = 0, cidx = 0; cidx < specs[curSpec].len; idx++, cidx++) {
6024 if ('$' == uformat[loc + cidx]) {
6025 for (idx--; '0' <= formatBuffer[idx] && formatBuffer[idx] <= '9'; idx--);
6026 } else {
6027 formatBuffer[idx] = (int8_t)uformat[loc + cidx];
6028 }
6029 }
6030 }
6031 formatBuffer[idx] = '\0';
6032 // Should modify format buffer here if necessary; for example, to translate %qd to
6033 // the equivalent, on architectures which do not have %q.
6034 buffer[sizeof(buffer) - 1] = '\0';
6035 switch (specs[curSpec].type) {
6036 case CFFormatLongType:
6037 if (CFFormatSize8 == specs[curSpec].size) {
6038 SNPRINTF(int64_t, values[specs[curSpec].mainArgNum].value.int64Value)
6039 } else {
6040 SNPRINTF(SInt32, values[specs[curSpec].mainArgNum].value.int64Value)
6041 }
6042 break;
6043 case CFFormatPointerType:
6044 case CFFormatDummyPointerType:
6045 SNPRINTF(void *, values[specs[curSpec].mainArgNum].value.pointerValue)
6046 break;
6047
6048 case CFFormatDoubleType:
6049 #if LONG_DOUBLE_SUPPORT
6050 if (CFFormatSize16 == specs[curSpec].size) {
6051 SNPRINTF(long double, values[specs[curSpec].mainArgNum].value.longDoubleValue)
6052 } else
6053 #endif
6054 {
6055 SNPRINTF(double, values[specs[curSpec].mainArgNum].value.doubleValue)
6056 }
6057 // See if we need to localize the decimal point
6058 if (formatOptions) { // We have localization info
6059 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
6060 CFStringRef decimalSeparator = (CFGetTypeID(formatOptions) == CFLocaleGetTypeID()) ? (CFStringRef)CFLocaleGetValue((CFLocaleRef)formatOptions, kCFLocaleDecimalSeparatorKey) : (CFStringRef)CFDictionaryGetValue(formatOptions, CFSTR("NSDecimalSeparator"));
6061 #else
6062 CFStringRef decimalSeparator = CFSTR(".");
6063 #endif
6064 if (decimalSeparator != NULL) { // We have a decimal separator in there
6065 CFIndex decimalPointLoc = 0;
6066 while (buffer[decimalPointLoc] != 0 && buffer[decimalPointLoc] != '.') decimalPointLoc++;
6067 if (buffer[decimalPointLoc] == '.') { // And we have a decimal point in the formatted string
6068 buffer[decimalPointLoc] = 0;
6069 CFStringAppendCString(outputString, (const char *)buffer, __CFStringGetEightBitStringEncoding());
6070 CFStringAppend(outputString, decimalSeparator);
6071 CFStringAppendCString(outputString, (const char *)(buffer + decimalPointLoc + 1), __CFStringGetEightBitStringEncoding());
6072 appended = true;
6073 }
6074 }
6075 }
6076 break;
6077 }
6078 if (!appended) CFStringAppendCString(outputString, (const char *)buffer, __CFStringGetEightBitStringEncoding());
6079 #if !defined(__GNUC__)
6080 if (dynamicBuffer) {
6081 CFAllocatorDeallocate(kCFAllocatorSystemDefault, dynamicBuffer);
6082 }
6083 #endif
6084 }
6085 break;
6086 case CFFormatLiteralType:
6087 if (cformat) {
6088 __CFStringAppendBytes(outputString, (const char *)(cformat+specs[curSpec].loc), specs[curSpec].len, __CFStringGetEightBitStringEncoding());
6089 } else {
6090 CFStringAppendCharacters(outputString, uformat+specs[curSpec].loc, specs[curSpec].len);
6091 }
6092 break;
6093 case CFFormatPascalCharsType:
6094 case CFFormatCharsType:
6095 if (values[specs[curSpec].mainArgNum].value.pointerValue == NULL) {
6096 CFStringAppendCString(outputString, "(null)", kCFStringEncodingASCII);
6097 } else {
6098 int len;
6099 const char *str = (const char *)values[specs[curSpec].mainArgNum].value.pointerValue;
6100 if (specs[curSpec].type == CFFormatPascalCharsType) { // Pascal string case
6101 len = ((unsigned char *)str)[0];
6102 str++;
6103 if (hasPrecision && precision < len) len = precision;
6104 } else { // C-string case
6105 if (!hasPrecision) { // No precision, so rely on the terminating null character
6106 len = strlen(str);
6107 } else { // Don't blindly call strlen() if there is a precision; the string might not have a terminating null (3131988)
6108 const char *terminatingNull = (const char *)memchr(str, 0, precision); // Basically strlen() on only the first precision characters of str
6109 if (terminatingNull) { // There was a null in the first precision characters
6110 len = terminatingNull - str;
6111 } else {
6112 len = precision;
6113 }
6114 }
6115 }
6116 // Since the spec says the behavior of the ' ', '0', '#', and '+' flags is undefined for
6117 // '%s', and since we have ignored them in the past, the behavior is hereby cast in stone
6118 // to ignore those flags (and, say, never pad with '0' instead of space).
6119 if (specs[curSpec].flags & kCFStringFormatMinusFlag) {
6120 __CFStringAppendBytes(outputString, str, len, __CFStringGetSystemEncoding());
6121 if (hasWidth && width > len) {
6122 int w = width - len; // We need this many spaces; do it ten at a time
6123 do {__CFStringAppendBytes(outputString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
6124 }
6125 } else {
6126 if (hasWidth && width > len) {
6127 int w = width - len; // We need this many spaces; do it ten at a time
6128 do {__CFStringAppendBytes(outputString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
6129 }
6130 __CFStringAppendBytes(outputString, str, len, __CFStringGetSystemEncoding());
6131 }
6132 }
6133 break;
6134 case CFFormatSingleUnicharType:
6135 ch = (UniChar)values[specs[curSpec].mainArgNum].value.int64Value;
6136 CFStringAppendCharacters(outputString, &ch, 1);
6137 break;
6138 case CFFormatUnicharsType:
6139 //??? need to handle width, precision, and padding arguments
6140 up = (UniChar *)values[specs[curSpec].mainArgNum].value.pointerValue;
6141 if (NULL == up) {
6142 CFStringAppendCString(outputString, "(null)", kCFStringEncodingASCII);
6143 } else {
6144 int len;
6145 for (len = 0; 0 != up[len]; len++);
6146 // Since the spec says the behavior of the ' ', '0', '#', and '+' flags is undefined for
6147 // '%s', and since we have ignored them in the past, the behavior is hereby cast in stone
6148 // to ignore those flags (and, say, never pad with '0' instead of space).
6149 if (hasPrecision && precision < len) len = precision;
6150 if (specs[curSpec].flags & kCFStringFormatMinusFlag) {
6151 CFStringAppendCharacters(outputString, up, len);
6152 if (hasWidth && width > len) {
6153 int w = width - len; // We need this many spaces; do it ten at a time
6154 do {__CFStringAppendBytes(outputString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
6155 }
6156 } else {
6157 if (hasWidth && width > len) {
6158 int w = width - len; // We need this many spaces; do it ten at a time
6159 do {__CFStringAppendBytes(outputString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
6160 }
6161 CFStringAppendCharacters(outputString, up, len);
6162 }
6163 }
6164 break;
6165 case CFFormatCFType:
6166 case CFFormatObjectType:
6167 if (specs[curSpec].configDictIndex != -1) { // config dict
6168 CFTypeRef object = NULL;
6169 CFStringRef innerFormat = NULL;
6170
6171 switch (values[specs[curSpec].mainArgNum].type) {
6172 case CFFormatLongType:
6173 object = CFNumberCreate(tmpAlloc, kCFNumberSInt64Type, &(values[specs[curSpec].mainArgNum].value.int64Value));
6174 break;
6175
6176 case CFFormatDoubleType:
6177 #if LONG_DOUBLE_SUPPORT
6178 if (CFFormatSize16 == values[specs[curSpec].mainArgNum].size) {
6179 double aValue = values[specs[curSpec].mainArgNum].value.longDoubleValue; // losing precision
6180
6181 object = CFNumberCreate(tmpAlloc, kCFNumberDoubleType, &aValue);
6182 } else
6183 #endif
6184 {
6185 object = CFNumberCreate(tmpAlloc, kCFNumberDoubleType, &(values[specs[curSpec].mainArgNum].value.doubleValue));
6186 }
6187 break;
6188
6189 case CFFormatPointerType:
6190 object = CFNumberCreate(tmpAlloc, kCFNumberCFIndexType, &(values[specs[curSpec].mainArgNum].value.pointerValue));
6191 break;
6192
6193 case CFFormatPascalCharsType:
6194 case CFFormatCharsType:
6195 if (NULL != values[specs[curSpec].mainArgNum].value.pointerValue) {
6196 CFMutableStringRef aString = CFStringCreateMutable(tmpAlloc, 0);
6197 int len;
6198 const char *str = (const char *)values[specs[curSpec].mainArgNum].value.pointerValue;
6199 if (specs[curSpec].type == CFFormatPascalCharsType) { // Pascal string case
6200 len = ((unsigned char *)str)[0];
6201 str++;
6202 if (hasPrecision && precision < len) len = precision;
6203 } else { // C-string case
6204 if (!hasPrecision) { // No precision, so rely on the terminating null character
6205 len = strlen(str);
6206 } else { // Don't blindly call strlen() if there is a precision; the string might not have a terminating null (3131988)
6207 const char *terminatingNull = (const char *)memchr(str, 0, precision); // Basically strlen() on only the first precision characters of str
6208 if (terminatingNull) { // There was a null in the first precision characters
6209 len = terminatingNull - str;
6210 } else {
6211 len = precision;
6212 }
6213 }
6214 }
6215 // Since the spec says the behavior of the ' ', '0', '#', and '+' flags is undefined for
6216 // '%s', and since we have ignored them in the past, the behavior is hereby cast in stone
6217 // to ignore those flags (and, say, never pad with '0' instead of space).
6218 if (specs[curSpec].flags & kCFStringFormatMinusFlag) {
6219 __CFStringAppendBytes(aString, str, len, __CFStringGetSystemEncoding());
6220 if (hasWidth && width > len) {
6221 int w = width - len; // We need this many spaces; do it ten at a time
6222 do {__CFStringAppendBytes(aString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
6223 }
6224 } else {
6225 if (hasWidth && width > len) {
6226 int w = width - len; // We need this many spaces; do it ten at a time
6227 do {__CFStringAppendBytes(aString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
6228 }
6229 __CFStringAppendBytes(aString, str, len, __CFStringGetSystemEncoding());
6230 }
6231
6232 object = aString;
6233 }
6234 break;
6235
6236 case CFFormatSingleUnicharType:
6237 ch = (UniChar)values[specs[curSpec].mainArgNum].value.int64Value;
6238 object = CFStringCreateWithCharactersNoCopy(tmpAlloc, &ch, 1, kCFAllocatorNull);
6239 break;
6240
6241 case CFFormatUnicharsType:
6242 //??? need to handle width, precision, and padding arguments
6243 up = (UniChar *)values[specs[curSpec].mainArgNum].value.pointerValue;
6244 if (NULL != up) {
6245 CFMutableStringRef aString = CFStringCreateMutable(tmpAlloc, 0);
6246 int len;
6247 for (len = 0; 0 != up[len]; len++);
6248 // Since the spec says the behavior of the ' ', '0', '#', and '+' flags is undefined for
6249 // '%s', and since we have ignored them in the past, the behavior is hereby cast in stone
6250 // to ignore those flags (and, say, never pad with '0' instead of space).
6251 if (hasPrecision && precision < len) len = precision;
6252 if (specs[curSpec].flags & kCFStringFormatMinusFlag) {
6253 CFStringAppendCharacters(aString, up, len);
6254 if (hasWidth && width > len) {
6255 int w = width - len; // We need this many spaces; do it ten at a time
6256 do {__CFStringAppendBytes(aString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
6257 }
6258 } else {
6259 if (hasWidth && width > len) {
6260 int w = width - len; // We need this many spaces; do it ten at a time
6261 do {__CFStringAppendBytes(aString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
6262 }
6263 CFStringAppendCharacters(aString, up, len);
6264 }
6265 object = aString;
6266 }
6267 break;
6268
6269 case CFFormatCFType:
6270 case CFFormatObjectType:
6271 if (NULL != values[specs[curSpec].mainArgNum].value.pointerValue) object = CFRetain(values[specs[curSpec].mainArgNum].value.pointerValue);
6272 break;
6273 }
6274
6275 if (NULL != object) CFRelease(object);
6276
6277 } else if (NULL != values[specs[curSpec].mainArgNum].value.pointerValue) {
6278 CFStringRef str = NULL;
6279 if (copyDescFunc) {
6280 str = copyDescFunc(values[specs[curSpec].mainArgNum].value.pointerValue, formatOptions);
6281 } else {
6282 str = __CFCopyFormattingDescription(values[specs[curSpec].mainArgNum].value.pointerValue, formatOptions);
6283 if (NULL == str) {
6284 str = CFCopyDescription(values[specs[curSpec].mainArgNum].value.pointerValue);
6285 }
6286 }
6287 if (str) {
6288 CFStringAppend(outputString, str);
6289 CFRelease(str);
6290 } else {
6291 CFStringAppendCString(outputString, "(null description)", kCFStringEncodingASCII);
6292 }
6293 } else {
6294 CFStringAppendCString(outputString, "(null)", kCFStringEncodingASCII);
6295 }
6296 break;
6297 }
6298 }
6299
6300 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
6301 // va_copy is a C99 extension. No support on Windows
6302 if (numConfigs > 0) va_end(copiedArgs);
6303 #endif
6304 if (specs != localSpecsBuffer) CFAllocatorDeallocate(tmpAlloc, specs);
6305 if (values != localValuesBuffer) CFAllocatorDeallocate(tmpAlloc, values);
6306 if (formatChars && (formatChars != localFormatBuffer)) CFAllocatorDeallocate(tmpAlloc, formatChars);
6307 if (configs != localConfigs) CFAllocatorDeallocate(tmpAlloc, configs);
6308 }
6309
6310 #undef SNPRINTF
6311
6312 void CFShowStr(CFStringRef str) {
6313 CFAllocatorRef alloc;
6314
6315 if (!str) {
6316 fprintf(stdout, "(null)\n");
6317 return;
6318 }
6319
6320 if (CF_IS_OBJC(__kCFStringTypeID, str)) {
6321 fprintf(stdout, "This is an NSString, not CFString\n");
6322 return;
6323 }
6324
6325 alloc = CFGetAllocator(str);
6326
6327 fprintf(stdout, "\nLength %d\nIsEightBit %d\n", (int)__CFStrLength(str), __CFStrIsEightBit(str));
6328 fprintf(stdout, "HasLengthByte %d\nHasNullByte %d\nInlineContents %d\n",
6329 __CFStrHasLengthByte(str), __CFStrHasNullByte(str), __CFStrIsInline(str));
6330
6331 fprintf(stdout, "Allocator ");
6332 if (alloc != kCFAllocatorSystemDefault) {
6333 fprintf(stdout, "%p\n", (void *)alloc);
6334 } else {
6335 fprintf(stdout, "SystemDefault\n");
6336 }
6337 fprintf(stdout, "Mutable %d\n", __CFStrIsMutable(str));
6338 if (!__CFStrIsMutable(str) && __CFStrHasContentsDeallocator(str)) {
6339 if (__CFStrContentsDeallocator(str)) fprintf(stdout, "ContentsDeallocatorFunc %p\n", (void *)__CFStrContentsDeallocator(str));
6340 else fprintf(stdout, "ContentsDeallocatorFunc None\n");
6341 } else if (__CFStrIsMutable(str) && __CFStrHasContentsAllocator(str)) {
6342 fprintf(stdout, "ExternalContentsAllocator %p\n", (void *)__CFStrContentsAllocator((CFMutableStringRef)str));
6343 }
6344
6345 if (__CFStrIsMutable(str)) {
6346 fprintf(stdout, "CurrentCapacity %d\n%sCapacity %d\n", (int)__CFStrCapacity(str), __CFStrIsFixed(str) ? "Fixed" : "Desired", (int)__CFStrDesiredCapacity(str));
6347 }
6348 fprintf(stdout, "Contents %p\n", (void *)__CFStrContents(str));
6349 }
6350
6351
6352
6353 #undef HANGUL_SBASE
6354 #undef HANGUL_LBASE
6355 #undef HANGUL_VBASE
6356 #undef HANGUL_TBASE
6357 #undef HANGUL_SCOUNT
6358 #undef HANGUL_LCOUNT
6359 #undef HANGUL_VCOUNT
6360 #undef HANGUL_TCOUNT
6361 #undef HANGUL_NCOUNT
6362