]> git.saurik.com Git - apple/cf.git/blob - CFString.c
CF-744.12.tar.gz
[apple/cf.git] / CFString.c
1 /*
2 * Copyright (c) 2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /* CFString.c
25 Copyright (c) 1998-2012, Apple Inc. All rights reserved.
26 Responsibility: Ali Ozer
27
28 !!! For performance reasons, it's important that all functions marked CF_INLINE in this file are inlined.
29 */
30
31 #include <CoreFoundation/CFBase.h>
32 #include <CoreFoundation/CFString.h>
33 #include <CoreFoundation/CFDictionary.h>
34 #include <CoreFoundation/CFStringEncodingConverterExt.h>
35 #include <CoreFoundation/CFUniChar.h>
36 #include <CoreFoundation/CFUnicodeDecomposition.h>
37 #include <CoreFoundation/CFUnicodePrecomposition.h>
38 #include <CoreFoundation/CFPriv.h>
39 #include <CoreFoundation/CFNumber.h>
40 #include <CoreFoundation/CFNumberFormatter.h>
41 #include "CFInternal.h"
42 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
43 #include "CFLocaleInternal.h"
44 #endif
45 #include <stdarg.h>
46 #include <stdio.h>
47 #include <string.h>
48 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
49 #include <unistd.h>
50 #endif
51
52 #if defined(__GNUC__)
53 #define LONG_DOUBLE_SUPPORT 1
54 #else
55 #define LONG_DOUBLE_SUPPORT 0
56 #endif
57
58
59
60 #define USE_STRING_ROM 0
61
62
63 #ifndef INSTRUMENT_SHARED_STRINGS
64 #define INSTRUMENT_SHARED_STRINGS 0
65 #endif
66
67 __private_extern__ const CFStringRef __kCFLocaleCollatorID;
68
69 #if INSTRUMENT_SHARED_STRINGS
70 #include <sys/stat.h> /* for umask() */
71
72 static void __CFRecordStringAllocationEvent(const char *encoding, const char *bytes, CFIndex byteCount) {
73 static CFSpinLock_t lock = CFSpinLockInit;
74
75 if (memchr(bytes, '\n', byteCount)) return; //never record string allocation events for strings with newlines, because those confuse our parser and because they'll never go into the ROM
76
77 __CFSpinLock(&lock);
78 static int fd;
79 if (! fd) {
80 extern char **_NSGetProgname(void);
81 const char *name = *_NSGetProgname();
82 if (! name) name = "UNKNOWN";
83 umask(0);
84 char path[1024];
85 snprintf(path, sizeof(path), "/tmp/CFSharedStringInstrumentation_%s_%d.txt", name, getpid());
86 fd = open(path, O_WRONLY | O_APPEND | O_CREAT, 0666);
87 if (fd <= 0) {
88 int error = errno;
89 const char *errString = strerror(error);
90 fprintf(stderr, "open() failed with error %d (%s)\n", error, errString);
91 }
92 }
93 if (fd > 0) {
94 char *buffer = NULL;
95 char formatString[256];
96 snprintf(formatString, sizeof(formatString), "%%-8d\t%%-16s\t%%.%lds\n", byteCount);
97 int resultCount = asprintf(&buffer, formatString, getpid(), encoding, bytes);
98 if (buffer && resultCount > 0) write(fd, buffer, resultCount);
99 else puts("Couldn't record allocation event");
100 free(buffer);
101 }
102 __CFSpinUnlock(&lock);
103 }
104 #endif //INSTRUMENT_SHARED_STRINGS
105
106
107
108 typedef Boolean (*UNI_CHAR_FUNC)(UInt32 flags, UInt8 ch, UniChar *unicodeChar);
109
110 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI
111 extern size_t malloc_good_size(size_t size);
112 #endif
113 extern void __CFStrConvertBytesToUnicode(const uint8_t *bytes, UniChar *buffer, CFIndex numChars);
114
115 static void __CFStringAppendFormatCore(CFMutableStringRef outputString, CFStringRef (*copyDescFunc)(void *, const void *), CFDictionaryRef formatOptions, CFStringRef formatString, CFIndex initialArgPosition, const void *origValues, CFIndex originalValuesSize, va_list args);
116
117 #if defined(DEBUG)
118
119 // We put this into C & Pascal strings if we can't convert
120 #define CONVERSIONFAILURESTR "CFString conversion failed"
121
122 // We set this to true when purging the constant string table, so CFStringDeallocate doesn't assert
123 static Boolean __CFConstantStringTableBeingFreed = false;
124
125 #endif
126
127
128
129 // This section is for CFString compatibility and other behaviors...
130
131 static CFOptionFlags _CFStringCompatibilityMask = 0;
132
133 void _CFStringSetCompatibility(CFOptionFlags mask) {
134 _CFStringCompatibilityMask |= mask;
135 }
136
137 CF_INLINE Boolean __CFStringGetCompatibility(CFOptionFlags mask) {
138 return (_CFStringCompatibilityMask & mask) == mask;
139 }
140
141
142
143 // Two constant strings used by CFString; these are initialized in CFStringInitialize
144 CONST_STRING_DECL(kCFEmptyString, "")
145
146 // This is separate for C++
147 struct __notInlineMutable {
148 void *buffer;
149 CFIndex length;
150 CFIndex capacity; // Capacity in bytes
151 unsigned int hasGap:1; // Currently unused
152 unsigned int isFixedCapacity:1;
153 unsigned int isExternalMutable:1;
154 unsigned int capacityProvidedExternally:1;
155 #if __LP64__
156 unsigned long desiredCapacity:60;
157 #else
158 unsigned long desiredCapacity:28;
159 #endif
160 CFAllocatorRef contentsAllocator; // Optional
161 }; // The only mutable variant for CFString
162
163
164 /* !!! Never do sizeof(CFString); the union is here just to make it easier to access some fields.
165 */
166 struct __CFString {
167 CFRuntimeBase base;
168 union { // In many cases the allocated structs are smaller than these
169 struct __inline1 {
170 CFIndex length;
171 } inline1; // Bytes follow the length
172 struct __notInlineImmutable1 {
173 void *buffer; // Note that the buffer is in the same place for all non-inline variants of CFString
174 CFIndex length;
175 CFAllocatorRef contentsDeallocator; // Optional; just the dealloc func is used
176 } notInlineImmutable1; // This is the usual not-inline immutable CFString
177 struct __notInlineImmutable2 {
178 void *buffer;
179 CFAllocatorRef contentsDeallocator; // Optional; just the dealloc func is used
180 } notInlineImmutable2; // This is the not-inline immutable CFString when length is stored with the contents (first byte)
181 struct __notInlineMutable notInlineMutable;
182 } variants;
183 };
184
185 /*
186 I = is immutable
187 E = not inline contents
188 U = is Unicode
189 N = has NULL byte
190 L = has length byte
191 D = explicit deallocator for contents (for mutable objects, allocator)
192 C = length field is CFIndex (rather than UInt32); only meaningful for 64-bit, really
193 if needed this bit (valuable real-estate) can be given up for another bit elsewhere, since this info is needed just for 64-bit
194
195 Also need (only for mutable)
196 F = is fixed
197 G = has gap
198 Cap, DesCap = capacity
199
200 B7 B6 B5 B4 B3 B2 B1 B0
201 U N L C I
202
203 B6 B5
204 0 0 inline contents
205 0 1 E (freed with default allocator)
206 1 0 E (not freed)
207 1 1 E D
208
209 !!! Note: Constant CFStrings use the bit patterns:
210 C8 (11001000 = default allocator, not inline, not freed contents; 8-bit; has NULL byte; doesn't have length; is immutable)
211 D0 (11010000 = default allocator, not inline, not freed contents; Unicode; is immutable)
212 The bit usages should not be modified in a way that would effect these bit patterns.
213 */
214
215 enum {
216 __kCFFreeContentsWhenDoneMask = 0x020,
217 __kCFFreeContentsWhenDone = 0x020,
218 __kCFContentsMask = 0x060,
219 __kCFHasInlineContents = 0x000,
220 __kCFNotInlineContentsNoFree = 0x040, // Don't free
221 __kCFNotInlineContentsDefaultFree = 0x020, // Use allocator's free function
222 __kCFNotInlineContentsCustomFree = 0x060, // Use a specially provided free function
223 __kCFHasContentsAllocatorMask = 0x060,
224 __kCFHasContentsAllocator = 0x060, // (For mutable strings) use a specially provided allocator
225 __kCFHasContentsDeallocatorMask = 0x060,
226 __kCFHasContentsDeallocator = 0x060,
227 __kCFIsMutableMask = 0x01,
228 __kCFIsMutable = 0x01,
229 __kCFIsUnicodeMask = 0x10,
230 __kCFIsUnicode = 0x10,
231 __kCFHasNullByteMask = 0x08,
232 __kCFHasNullByte = 0x08,
233 __kCFHasLengthByteMask = 0x04,
234 __kCFHasLengthByte = 0x04,
235 // !!! Bit 0x02 has been freed up
236 };
237
238
239 // !!! Assumptions:
240 // Mutable strings are not inline
241 // Compile-time constant strings are not inline
242 // Mutable strings always have explicit length (but they might also have length byte and null byte)
243 // If there is an explicit length, always use that instead of the length byte (length byte is useful for quickly returning pascal strings)
244 // Never look at the length byte for the length; use __CFStrLength or __CFStrLength2
245
246 /* The following set of functions and macros need to be updated on change to the bit configuration
247 */
248 CF_INLINE Boolean __CFStrIsMutable(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFIsMutableMask) == __kCFIsMutable;}
249 CF_INLINE Boolean __CFStrIsInline(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFContentsMask) == __kCFHasInlineContents;}
250 CF_INLINE Boolean __CFStrFreeContentsWhenDone(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFFreeContentsWhenDoneMask) == __kCFFreeContentsWhenDone;}
251 CF_INLINE Boolean __CFStrHasContentsDeallocator(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFHasContentsDeallocatorMask) == __kCFHasContentsDeallocator;}
252 CF_INLINE Boolean __CFStrIsUnicode(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFIsUnicodeMask) == __kCFIsUnicode;}
253 CF_INLINE Boolean __CFStrIsEightBit(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFIsUnicodeMask) != __kCFIsUnicode;}
254 CF_INLINE Boolean __CFStrHasNullByte(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFHasNullByteMask) == __kCFHasNullByte;}
255 CF_INLINE Boolean __CFStrHasLengthByte(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFHasLengthByteMask) == __kCFHasLengthByte;}
256 CF_INLINE Boolean __CFStrHasExplicitLength(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & (__kCFIsMutableMask | __kCFHasLengthByteMask)) != __kCFHasLengthByte;} // Has explicit length if (1) mutable or (2) not mutable and no length byte
257 CF_INLINE Boolean __CFStrIsConstant(CFStringRef str) {
258 #if __LP64__
259 return str->base._rc == 0;
260 #else
261 return (str->base._cfinfo[CF_RC_BITS]) == 0;
262 #endif
263 }
264
265 CF_INLINE SInt32 __CFStrSkipAnyLengthByte(CFStringRef str) {return ((str->base._cfinfo[CF_INFO_BITS] & __kCFHasLengthByteMask) == __kCFHasLengthByte) ? 1 : 0;} // Number of bytes to skip over the length byte in the contents
266
267 /* Returns ptr to the buffer (which might include the length byte)
268 */
269 CF_INLINE const void *__CFStrContents(CFStringRef str) {
270 if (__CFStrIsInline(str)) {
271 return (const void *)(((uintptr_t)&(str->variants)) + (__CFStrHasExplicitLength(str) ? sizeof(CFIndex) : 0));
272 } else { // Not inline; pointer is always word 2
273 return str->variants.notInlineImmutable1.buffer;
274 }
275 }
276
277 static CFAllocatorRef *__CFStrContentsDeallocatorPtr(CFStringRef str) {
278 return __CFStrHasExplicitLength(str) ? &(((CFMutableStringRef)str)->variants.notInlineImmutable1.contentsDeallocator) : &(((CFMutableStringRef)str)->variants.notInlineImmutable2.contentsDeallocator); }
279
280 // Assumption: Called with immutable strings only, and on strings that are known to have a contentsDeallocator
281 CF_INLINE CFAllocatorRef __CFStrContentsDeallocator(CFStringRef str) {
282 return *__CFStrContentsDeallocatorPtr(str);
283 }
284
285 // Assumption: Called with immutable strings only, and on strings that are known to have a contentsDeallocator
286 CF_INLINE void __CFStrSetContentsDeallocator(CFStringRef str, CFAllocatorRef allocator) {
287 allocator = kCFUseCollectableAllocator ? allocator : _CFConvertAllocatorToNonGCRefZeroEquivalent(allocator);
288 if (!(kCFAllocatorSystemDefaultGCRefZero == allocator || kCFAllocatorDefaultGCRefZero == allocator)) CFRetain(allocator);
289 *__CFStrContentsDeallocatorPtr(str) = allocator;
290 }
291
292 static CFAllocatorRef *__CFStrContentsAllocatorPtr(CFStringRef str) {
293 CFAssert(!__CFStrIsInline(str), __kCFLogAssertion, "Asking for contents allocator of inline string");
294 CFAssert(__CFStrIsMutable(str), __kCFLogAssertion, "Asking for contents allocator of an immutable string");
295 return (CFAllocatorRef *)&(str->variants.notInlineMutable.contentsAllocator);
296 }
297
298 // Assumption: Called with strings that have a contents allocator; also, contents allocator follows custom
299 CF_INLINE CFAllocatorRef __CFStrContentsAllocator(CFMutableStringRef str) {
300 return *(__CFStrContentsAllocatorPtr(str));
301 }
302
303 // Assumption: Called with strings that have a contents allocator; also, contents allocator follows custom
304 CF_INLINE void __CFStrSetContentsAllocator(CFMutableStringRef str, CFAllocatorRef allocator) {
305 allocator = kCFUseCollectableAllocator ? allocator : _CFConvertAllocatorToNonGCRefZeroEquivalent(allocator);
306 if (!(kCFAllocatorSystemDefaultGCRefZero == allocator || kCFAllocatorDefaultGCRefZero == allocator)) CFRetain(allocator);
307 *(__CFStrContentsAllocatorPtr(str)) = allocator;
308 }
309
310 /* Returns length; use __CFStrLength2 if contents buffer pointer has already been computed.
311 */
312 CF_INLINE CFIndex __CFStrLength(CFStringRef str) {
313 if (__CFStrHasExplicitLength(str)) {
314 if (__CFStrIsInline(str)) {
315 return str->variants.inline1.length;
316 } else {
317 return str->variants.notInlineImmutable1.length;
318 }
319 } else {
320 return (CFIndex)(*((uint8_t *)__CFStrContents(str)));
321 }
322 }
323
324 CF_INLINE CFIndex __CFStrLength2(CFStringRef str, const void *buffer) {
325 if (__CFStrHasExplicitLength(str)) {
326 if (__CFStrIsInline(str)) {
327 return str->variants.inline1.length;
328 } else {
329 return str->variants.notInlineImmutable1.length;
330 }
331 } else {
332 return (CFIndex)(*((uint8_t *)buffer));
333 }
334 }
335
336
337 Boolean __CFStringIsEightBit(CFStringRef str) {
338 return __CFStrIsEightBit(str);
339 }
340
341 /* Sets the content pointer for immutable or mutable strings.
342 */
343 CF_INLINE void __CFStrSetContentPtr(CFStringRef str, const void *p) {
344 // XXX_PCB catch all writes for mutable string case.
345 __CFAssignWithWriteBarrier((void **)&((CFMutableStringRef)str)->variants.notInlineImmutable1.buffer, (void *)p);
346 }
347 CF_INLINE void __CFStrSetInfoBits(CFStringRef str, UInt32 v) {__CFBitfieldSetValue(((CFMutableStringRef)str)->base._cfinfo[CF_INFO_BITS], 6, 0, v);}
348
349 CF_INLINE void __CFStrSetExplicitLength(CFStringRef str, CFIndex v) {
350 if (__CFStrIsInline(str)) {
351 ((CFMutableStringRef)str)->variants.inline1.length = v;
352 } else {
353 ((CFMutableStringRef)str)->variants.notInlineImmutable1.length = v;
354 }
355 }
356
357 CF_INLINE void __CFStrSetUnicode(CFMutableStringRef str) {str->base._cfinfo[CF_INFO_BITS] |= __kCFIsUnicode;}
358 CF_INLINE void __CFStrClearUnicode(CFMutableStringRef str) {str->base._cfinfo[CF_INFO_BITS] &= ~__kCFIsUnicode;}
359 CF_INLINE void __CFStrSetHasLengthAndNullBytes(CFMutableStringRef str) {str->base._cfinfo[CF_INFO_BITS] |= (__kCFHasLengthByte | __kCFHasNullByte);}
360 CF_INLINE void __CFStrClearHasLengthAndNullBytes(CFMutableStringRef str) {str->base._cfinfo[CF_INFO_BITS] &= ~(__kCFHasLengthByte | __kCFHasNullByte);}
361
362
363 // Assumption: The following set of inlines (using str->variants.notInlineMutable) are called with mutable strings only
364 CF_INLINE Boolean __CFStrIsFixed(CFStringRef str) {return str->variants.notInlineMutable.isFixedCapacity;}
365 CF_INLINE Boolean __CFStrIsExternalMutable(CFStringRef str) {return str->variants.notInlineMutable.isExternalMutable;}
366 CF_INLINE Boolean __CFStrHasContentsAllocator(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFHasContentsAllocatorMask) == __kCFHasContentsAllocator;}
367 CF_INLINE void __CFStrSetIsFixed(CFMutableStringRef str) {str->variants.notInlineMutable.isFixedCapacity = 1;}
368 CF_INLINE void __CFStrSetIsExternalMutable(CFMutableStringRef str) {str->variants.notInlineMutable.isExternalMutable = 1;}
369 CF_INLINE void __CFStrSetHasGap(CFMutableStringRef str) {str->variants.notInlineMutable.hasGap = 1;}
370
371 // If capacity is provided externally, we only change it when we need to grow beyond it
372 CF_INLINE Boolean __CFStrCapacityProvidedExternally(CFStringRef str) {return str->variants.notInlineMutable.capacityProvidedExternally;}
373 CF_INLINE void __CFStrSetCapacityProvidedExternally(CFMutableStringRef str) {str->variants.notInlineMutable.capacityProvidedExternally = 1;}
374 CF_INLINE void __CFStrClearCapacityProvidedExternally(CFMutableStringRef str) {str->variants.notInlineMutable.capacityProvidedExternally = 0;}
375
376 // "Capacity" is stored in number of bytes, not characters. It indicates the total number of bytes in the contents buffer.
377 CF_INLINE CFIndex __CFStrCapacity(CFStringRef str) {return str->variants.notInlineMutable.capacity;}
378 CF_INLINE void __CFStrSetCapacity(CFMutableStringRef str, CFIndex cap) {str->variants.notInlineMutable.capacity = cap;}
379
380 // "Desired capacity" is in number of characters; it is the client requested capacity; if fixed, it is the upper bound on the mutable string backing store.
381 CF_INLINE CFIndex __CFStrDesiredCapacity(CFStringRef str) {return str->variants.notInlineMutable.desiredCapacity;}
382 CF_INLINE void __CFStrSetDesiredCapacity(CFMutableStringRef str, CFIndex size) {str->variants.notInlineMutable.desiredCapacity = size;}
383
384
385 static void *__CFStrAllocateMutableContents(CFMutableStringRef str, CFIndex size) {
386 void *ptr;
387 CFAllocatorRef alloc = (__CFStrHasContentsAllocator(str)) ? __CFStrContentsAllocator(str) : __CFGetAllocator(str);
388 ptr = CFAllocatorAllocate(alloc, size, 0);
389 if (__CFOASafe) __CFSetLastAllocationEventName(ptr, "CFString (store)");
390 return ptr;
391 }
392
393 static void __CFStrDeallocateMutableContents(CFMutableStringRef str, void *buffer) {
394 CFAllocatorRef alloc = (__CFStrHasContentsAllocator(str)) ? __CFStrContentsAllocator(str) : __CFGetAllocator(str);
395 if (__CFStrIsMutable(str) && __CFStrHasContentsAllocator(str) && _CFAllocatorIsGCRefZero(alloc)) {
396 // do nothing
397 } else if (CF_IS_COLLECTABLE_ALLOCATOR(alloc)) {
398 // GC: for finalization safety, let collector reclaim the buffer in the next GC cycle.
399 auto_zone_release(objc_collectableZone(), buffer);
400 } else {
401 CFAllocatorDeallocate(alloc, buffer);
402 }
403 }
404
405
406
407
408 /* CFString specific init flags
409 Note that you cannot count on the external buffer not being copied.
410 Also, if you specify an external buffer, you should not change it behind the CFString's back.
411 */
412 enum {
413 __kCFThinUnicodeIfPossible = 0x1000000, /* See if the Unicode contents can be thinned down to 8-bit */
414 kCFStringPascal = 0x10000, /* Indicating that the string data has a Pascal string structure (length byte at start) */
415 kCFStringNoCopyProvidedContents = 0x20000, /* Don't copy the provided string contents if possible; free it when no longer needed */
416 kCFStringNoCopyNoFreeProvidedContents = 0x30000 /* Don't copy the provided string contents if possible; don't free it when no longer needed */
417 };
418
419 /* System Encoding.
420 */
421 static CFStringEncoding __CFDefaultSystemEncoding = kCFStringEncodingInvalidId;
422 static CFStringEncoding __CFDefaultFileSystemEncoding = kCFStringEncodingInvalidId;
423 CFStringEncoding __CFDefaultEightBitStringEncoding = kCFStringEncodingInvalidId;
424
425
426 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX
427 #define __defaultEncoding kCFStringEncodingMacRoman
428 #elif DEPLOYMENT_TARGET_WINDOWS
429 #define __defaultEncoding kCFStringEncodingWindowsLatin1
430 #else
431 #warning This value must match __CFGetConverter condition in CFStringEncodingConverter.c
432 #define __defaultEncoding kCFStringEncodingISOLatin1
433 #endif
434
435 CFStringEncoding CFStringGetSystemEncoding(void) {
436 if (__CFDefaultSystemEncoding == kCFStringEncodingInvalidId) {
437 __CFDefaultSystemEncoding = __defaultEncoding;
438 const CFStringEncodingConverter *converter = CFStringEncodingGetConverter(__CFDefaultSystemEncoding);
439 __CFSetCharToUniCharFunc(converter->encodingClass == kCFStringEncodingConverterCheapEightBit ? (UNI_CHAR_FUNC)converter->toUnicode : NULL);
440 }
441 return __CFDefaultSystemEncoding;
442 }
443
444 // Fast version for internal use
445
446 CF_INLINE CFStringEncoding __CFStringGetSystemEncoding(void) {
447 if (__CFDefaultSystemEncoding == kCFStringEncodingInvalidId) (void)CFStringGetSystemEncoding();
448 return __CFDefaultSystemEncoding;
449 }
450
451 CFStringEncoding CFStringFileSystemEncoding(void) {
452 if (__CFDefaultFileSystemEncoding == kCFStringEncodingInvalidId) {
453 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_WINDOWS
454 __CFDefaultFileSystemEncoding = kCFStringEncodingUTF8;
455 #else
456 __CFDefaultFileSystemEncoding = CFStringGetSystemEncoding();
457 #endif
458 }
459
460 return __CFDefaultFileSystemEncoding;
461 }
462
463 /* ??? Is returning length when no other answer is available the right thing?
464 !!! All of the (length > (LONG_MAX / N)) type checks are to avoid wrap-around and eventual malloc overflow in the client
465 */
466 CFIndex CFStringGetMaximumSizeForEncoding(CFIndex length, CFStringEncoding encoding) {
467 if (encoding == kCFStringEncodingUTF8) {
468 return (length > (LONG_MAX / 3)) ? kCFNotFound : (length * 3);
469 } else if ((encoding == kCFStringEncodingUTF32) || (encoding == kCFStringEncodingUTF32BE) || (encoding == kCFStringEncodingUTF32LE)) { // UTF-32
470 return (length > (LONG_MAX / sizeof(UTF32Char))) ? kCFNotFound : (length * sizeof(UTF32Char));
471 } else {
472 encoding &= 0xFFF; // Mask off non-base part
473 }
474 switch (encoding) {
475 case kCFStringEncodingUnicode:
476 return (length > (LONG_MAX / sizeof(UniChar))) ? kCFNotFound : (length * sizeof(UniChar));
477
478 case kCFStringEncodingNonLossyASCII:
479 return (length > (LONG_MAX / 6)) ? kCFNotFound : (length * 6); // 1 Unichar can expand to 6 bytes
480
481 case kCFStringEncodingMacRoman:
482 case kCFStringEncodingWindowsLatin1:
483 case kCFStringEncodingISOLatin1:
484 case kCFStringEncodingNextStepLatin:
485 case kCFStringEncodingASCII:
486 return length / sizeof(uint8_t);
487
488 default:
489 return length / sizeof(uint8_t);
490 }
491 }
492
493
494 /* Returns whether the indicated encoding can be stored in 8-bit chars
495 */
496 CF_INLINE Boolean __CFStrEncodingCanBeStoredInEightBit(CFStringEncoding encoding) {
497 switch (encoding & 0xFFF) { // just use encoding base
498 case kCFStringEncodingInvalidId:
499 case kCFStringEncodingUnicode:
500 case kCFStringEncodingNonLossyASCII:
501 return false;
502
503 case kCFStringEncodingMacRoman:
504 case kCFStringEncodingWindowsLatin1:
505 case kCFStringEncodingISOLatin1:
506 case kCFStringEncodingNextStepLatin:
507 case kCFStringEncodingASCII:
508 return true;
509
510 default: return false;
511 }
512 }
513
514 /* Returns the encoding used in eight bit CFStrings (can't be any encoding which isn't 1-to-1 with Unicode)
515 ??? Perhaps only ASCII fits the bill due to Unicode decomposition.
516 */
517 CFStringEncoding __CFStringComputeEightBitStringEncoding(void) {
518 if (__CFDefaultEightBitStringEncoding == kCFStringEncodingInvalidId) {
519 CFStringEncoding systemEncoding = CFStringGetSystemEncoding();
520 if (systemEncoding == kCFStringEncodingInvalidId) { // We're right in the middle of querying system encoding from default database. Delaying to set until system encoding is determined.
521 return kCFStringEncodingASCII;
522 } else if (__CFStrEncodingCanBeStoredInEightBit(systemEncoding)) {
523 __CFDefaultEightBitStringEncoding = systemEncoding;
524 } else {
525 __CFDefaultEightBitStringEncoding = kCFStringEncodingASCII;
526 }
527 }
528
529 return __CFDefaultEightBitStringEncoding;
530 }
531
532 /* Returns whether the provided bytes can be stored in ASCII
533 */
534 CF_INLINE Boolean __CFBytesInASCII(const uint8_t *bytes, CFIndex len) {
535 #if __LP64__
536 /* A bit of unrolling; go by 32s, 16s, and 8s first */
537 while (len >= 32) {
538 uint64_t val = *(const uint64_t *)bytes;
539 uint64_t hiBits = (val & 0x8080808080808080ULL); // More efficient to collect this rather than do a conditional at every step
540 bytes += 8;
541 val = *(const uint64_t *)bytes;
542 hiBits |= (val & 0x8080808080808080ULL);
543 bytes += 8;
544 val = *(const uint64_t *)bytes;
545 hiBits |= (val & 0x8080808080808080ULL);
546 bytes += 8;
547 val = *(const uint64_t *)bytes;
548 if (hiBits | (val & 0x8080808080808080ULL)) return false;
549 bytes += 8;
550 len -= 32;
551 }
552
553 while (len >= 16) {
554 uint64_t val = *(const uint64_t *)bytes;
555 uint64_t hiBits = (val & 0x8080808080808080ULL);
556 bytes += 8;
557 val = *(const uint64_t *)bytes;
558 if (hiBits | (val & 0x8080808080808080ULL)) return false;
559 bytes += 8;
560 len -= 16;
561 }
562
563 while (len >= 8) {
564 uint64_t val = *(const uint64_t *)bytes;
565 if (val & 0x8080808080808080ULL) return false;
566 bytes += 8;
567 len -= 8;
568 }
569 #endif
570 /* Go by 4s */
571 while (len >= 4) {
572 uint32_t val = *(const uint32_t *)bytes;
573 if (val & 0x80808080U) return false;
574 bytes += 4;
575 len -= 4;
576 }
577 /* Handle the rest one byte at a time */
578 while (len--) {
579 if (*bytes++ & 0x80) return false;
580 }
581
582 return true;
583 }
584
585 /* Returns whether the provided 8-bit string in the specified encoding can be stored in an 8-bit CFString.
586 */
587 CF_INLINE Boolean __CFCanUseEightBitCFStringForBytes(const uint8_t *bytes, CFIndex len, CFStringEncoding encoding) {
588 // If the encoding is the same as the 8-bit CFString encoding, we can just use the bytes as-is.
589 // One exception is ASCII, which unfortunately needs to mean ISOLatin1 for compatibility reasons <rdar://problem/5458321>.
590 if (encoding == __CFStringGetEightBitStringEncoding() && encoding != kCFStringEncodingASCII) return true;
591 if (__CFStringEncodingIsSupersetOfASCII(encoding) && __CFBytesInASCII(bytes, len)) return true;
592 return false;
593 }
594
595
596 /* Returns whether a length byte can be tacked on to a string of the indicated length.
597 */
598 CF_INLINE Boolean __CFCanUseLengthByte(CFIndex len) {
599 #define __kCFMaxPascalStrLen 255
600 return (len <= __kCFMaxPascalStrLen) ? true : false;
601 }
602
603 /* Various string assertions
604 */
605 #define __CFAssertIsString(cf) __CFGenericValidateType(cf, __kCFStringTypeID)
606 #define __CFAssertIndexIsInStringBounds(cf, idx) CFAssert3((idx) >= 0 && (idx) < __CFStrLength(cf), __kCFLogAssertion, "%s(): string index %d out of bounds (length %d)", __PRETTY_FUNCTION__, idx, __CFStrLength(cf))
607 #define __CFAssertRangeIsInStringBounds(cf, idx, count) CFAssert4((idx) >= 0 && (idx + count) <= __CFStrLength(cf), __kCFLogAssertion, "%s(): string range %d,%d out of bounds (length %d)", __PRETTY_FUNCTION__, idx, count, __CFStrLength(cf))
608 #define __CFAssertIsStringAndMutable(cf) {__CFGenericValidateType(cf, __kCFStringTypeID); CFAssert1(__CFStrIsMutable(cf), __kCFLogAssertion, "%s(): string not mutable", __PRETTY_FUNCTION__);}
609 #define __CFAssertIsStringAndExternalMutable(cf) {__CFGenericValidateType(cf, __kCFStringTypeID); CFAssert1(__CFStrIsMutable(cf) && __CFStrIsExternalMutable(cf), __kCFLogAssertion, "%s(): string not external mutable", __PRETTY_FUNCTION__);}
610 #define __CFAssertIsNotNegative(idx) CFAssert2(idx >= 0, __kCFLogAssertion, "%s(): index %d is negative", __PRETTY_FUNCTION__, idx)
611 #define __CFAssertIfFixedLengthIsOK(cf, reqLen) CFAssert2(!__CFStrIsFixed(cf) || (reqLen <= __CFStrDesiredCapacity(cf)), __kCFLogAssertion, "%s(): length %d too large", __PRETTY_FUNCTION__, reqLen)
612
613
614 /* Basic algorithm is to shrink memory when capacity is SHRINKFACTOR times the required capacity or to allocate memory when the capacity is less than GROWFACTOR times the required capacity. This function will return -1 if the new capacity is just too big (> LONG_MAX).
615 Additional complications are applied in the following order:
616 - desiredCapacity, which is the minimum (except initially things can be at zero)
617 - rounding up to factor of 8
618 - compressing (to fit the number if 16 bits), which effectively rounds up to factor of 256
619 - we need to make sure GROWFACTOR computation doesn't suffer from overflow issues on 32-bit, hence the casting to unsigned. Normally for required capacity of C bytes, the allocated space is (3C+1)/2. If C > ULONG_MAX/3, we instead simply return LONG_MAX
620 */
621 #define SHRINKFACTOR(c) (c / 2)
622
623 #if __LP64__
624 #define GROWFACTOR(c) ((c * 3 + 1) / 2)
625 #else
626 #define GROWFACTOR(c) (((c) >= (ULONG_MAX / 3UL)) ? __CFMax(LONG_MAX - 4095, (c)) : (((unsigned long)c * 3 + 1) / 2))
627 #endif
628
629 CF_INLINE CFIndex __CFStrNewCapacity(CFMutableStringRef str, unsigned long reqCapacity, CFIndex capacity, Boolean leaveExtraRoom, CFIndex charSize) {
630 if (capacity != 0 || reqCapacity != 0) { /* If initially zero, and space not needed, leave it at that... */
631 if ((capacity < reqCapacity) || /* We definitely need the room... */
632 (!__CFStrCapacityProvidedExternally(str) && /* Assuming we control the capacity... */
633 ((reqCapacity < SHRINKFACTOR(capacity)) || /* ...we have too much room! */
634 (!leaveExtraRoom && (reqCapacity < capacity))))) { /* ...we need to eliminate the extra space... */
635 if (reqCapacity > LONG_MAX) return -1; /* Too big any way you cut it */
636 unsigned long newCapacity = leaveExtraRoom ? GROWFACTOR(reqCapacity) : reqCapacity; /* Grow by 3/2 if extra room is desired */
637 CFIndex desiredCapacity = __CFStrDesiredCapacity(str) * charSize;
638 if (newCapacity < desiredCapacity) { /* If less than desired, bump up to desired */
639 newCapacity = desiredCapacity;
640 } else if (__CFStrIsFixed(str)) { /* Otherwise, if fixed, no need to go above the desired (fixed) capacity */
641 newCapacity = __CFMax(desiredCapacity, reqCapacity); /* !!! So, fixed is not really fixed, but "tight" */
642 }
643 if (__CFStrHasContentsAllocator(str)) { /* Also apply any preferred size from the allocator */
644 newCapacity = CFAllocatorGetPreferredSizeForSize(__CFStrContentsAllocator(str), newCapacity, 0);
645 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI
646 } else {
647 newCapacity = malloc_good_size(newCapacity);
648 #endif
649 }
650 return (newCapacity > LONG_MAX) ? -1 : (CFIndex)newCapacity; // If packing: __CFStrUnpackNumber(__CFStrPackNumber(newCapacity));
651 }
652 }
653 return capacity;
654 }
655
656
657 /* rearrangeBlocks() rearranges the blocks of data within the buffer so that they are "evenly spaced". buffer is assumed to have enough room for the result.
658 numBlocks is current total number of blocks within buffer.
659 blockSize is the size of each block in bytes
660 ranges and numRanges hold the ranges that are no longer needed; ranges are stored sorted in increasing order, and don't overlap
661 insertLength is the final spacing between the remaining blocks
662
663 Example: buffer = A B C D E F G H, blockSize = 1, ranges = { (2,1) , (4,2) } (so we want to "delete" C and E F), fromEnd = NO
664 if insertLength = 4, result = A B ? ? ? ? D ? ? ? ? G H
665 if insertLength = 0, result = A B D G H
666
667 Example: buffer = A B C D E F G H I J K L M N O P Q R S T U, blockSize = 1, ranges { (1,1), (3,1), (5,11), (17,1), (19,1) }, fromEnd = NO
668 if insertLength = 3, result = A ? ? ? C ? ? ? E ? ? ? Q ? ? ? S ? ? ? U
669
670 */
671 typedef struct _CFStringDeferredRange {
672 CFIndex beginning;
673 CFIndex length;
674 CFIndex shift;
675 } CFStringDeferredRange;
676
677 typedef struct _CFStringStackInfo {
678 CFIndex capacity; // Capacity (if capacity == count, need to realloc to add another)
679 CFIndex count; // Number of elements actually stored
680 CFStringDeferredRange *stack;
681 Boolean hasMalloced; // Indicates "stack" is allocated and needs to be deallocated when done
682 char _padding[3];
683 } CFStringStackInfo;
684
685 CF_INLINE void pop (CFStringStackInfo *si, CFStringDeferredRange *topRange) {
686 si->count = si->count - 1;
687 *topRange = si->stack[si->count];
688 }
689
690 CF_INLINE void push (CFStringStackInfo *si, const CFStringDeferredRange *newRange) {
691 if (si->count == si->capacity) {
692 // increase size of the stack
693 si->capacity = (si->capacity + 4) * 2;
694 if (si->hasMalloced) {
695 si->stack = (CFStringDeferredRange *)CFAllocatorReallocate(kCFAllocatorSystemDefault, si->stack, si->capacity * sizeof(CFStringDeferredRange), 0);
696 } else {
697 CFStringDeferredRange *newStack = (CFStringDeferredRange *)CFAllocatorAllocate(kCFAllocatorSystemDefault, si->capacity * sizeof(CFStringDeferredRange), 0);
698 memmove(newStack, si->stack, si->count * sizeof(CFStringDeferredRange));
699 si->stack = newStack;
700 si->hasMalloced = true;
701 }
702 }
703 si->stack[si->count] = *newRange;
704 si->count = si->count + 1;
705 }
706
707 static void rearrangeBlocks(
708 uint8_t *buffer,
709 CFIndex numBlocks,
710 CFIndex blockSize,
711 const CFRange *ranges,
712 CFIndex numRanges,
713 CFIndex insertLength) {
714
715 #define origStackSize 10
716 CFStringDeferredRange origStack[origStackSize];
717 CFStringStackInfo si = {origStackSize, 0, origStack, false, {0, 0, 0}};
718 CFStringDeferredRange currentNonRange = {0, 0, 0};
719 CFIndex currentRange = 0;
720 CFIndex amountShifted = 0;
721
722 // must have at least 1 range left.
723
724 while (currentRange < numRanges) {
725 currentNonRange.beginning = (ranges[currentRange].location + ranges[currentRange].length) * blockSize;
726 if ((numRanges - currentRange) == 1) {
727 // at the end.
728 currentNonRange.length = numBlocks * blockSize - currentNonRange.beginning;
729 if (currentNonRange.length == 0) break;
730 } else {
731 currentNonRange.length = (ranges[currentRange + 1].location * blockSize) - currentNonRange.beginning;
732 }
733 currentNonRange.shift = amountShifted + (insertLength * blockSize) - (ranges[currentRange].length * blockSize);
734 amountShifted = currentNonRange.shift;
735 if (amountShifted <= 0) {
736 // process current item and rest of stack
737 if (currentNonRange.shift && currentNonRange.length) memmove (&buffer[currentNonRange.beginning + currentNonRange.shift], &buffer[currentNonRange.beginning], currentNonRange.length);
738 while (si.count > 0) {
739 pop (&si, &currentNonRange); // currentNonRange now equals the top element of the stack.
740 if (currentNonRange.shift && currentNonRange.length) memmove (&buffer[currentNonRange.beginning + currentNonRange.shift], &buffer[currentNonRange.beginning], currentNonRange.length);
741 }
742 } else {
743 // add currentNonRange to stack.
744 push (&si, &currentNonRange);
745 }
746 currentRange++;
747 }
748
749 // no more ranges. if anything is on the stack, process.
750
751 while (si.count > 0) {
752 pop (&si, &currentNonRange); // currentNonRange now equals the top element of the stack.
753 if (currentNonRange.shift && currentNonRange.length) memmove (&buffer[currentNonRange.beginning + currentNonRange.shift], &buffer[currentNonRange.beginning], currentNonRange.length);
754 }
755 if (si.hasMalloced) CFAllocatorDeallocate (kCFAllocatorSystemDefault, si.stack);
756 }
757
758 /* See comments for rearrangeBlocks(); this is the same, but the string is assembled in another buffer (dstBuffer), so the algorithm is much easier. We also take care of the case where the source is not-Unicode but destination is. (The reverse case is not supported.)
759 */
760 static void copyBlocks(
761 const uint8_t *srcBuffer,
762 uint8_t *dstBuffer,
763 CFIndex srcLength,
764 Boolean srcIsUnicode,
765 Boolean dstIsUnicode,
766 const CFRange *ranges,
767 CFIndex numRanges,
768 CFIndex insertLength) {
769
770 CFIndex srcLocationInBytes = 0; // in order to avoid multiplying all the time, this is in terms of bytes, not blocks
771 CFIndex dstLocationInBytes = 0; // ditto
772 CFIndex srcBlockSize = srcIsUnicode ? sizeof(UniChar) : sizeof(uint8_t);
773 CFIndex insertLengthInBytes = insertLength * (dstIsUnicode ? sizeof(UniChar) : sizeof(uint8_t));
774 CFIndex rangeIndex = 0;
775 CFIndex srcToDstMultiplier = (srcIsUnicode == dstIsUnicode) ? 1 : (sizeof(UniChar) / sizeof(uint8_t));
776
777 // Loop over the ranges, copying the range to be preserved (right before each range)
778 while (rangeIndex < numRanges) {
779 CFIndex srcLengthInBytes = ranges[rangeIndex].location * srcBlockSize - srcLocationInBytes; // srcLengthInBytes is in terms of bytes, not blocks; represents length of region to be preserved
780 if (srcLengthInBytes > 0) {
781 if (srcIsUnicode == dstIsUnicode) {
782 memmove(dstBuffer + dstLocationInBytes, srcBuffer + srcLocationInBytes, srcLengthInBytes);
783 } else {
784 __CFStrConvertBytesToUnicode(srcBuffer + srcLocationInBytes, (UniChar *)(dstBuffer + dstLocationInBytes), srcLengthInBytes);
785 }
786 }
787 srcLocationInBytes += srcLengthInBytes + ranges[rangeIndex].length * srcBlockSize; // Skip over the just-copied and to-be-deleted stuff
788 dstLocationInBytes += srcLengthInBytes * srcToDstMultiplier + insertLengthInBytes;
789 rangeIndex++;
790 }
791
792 // Do last range (the one beyond last range)
793 if (srcLocationInBytes < srcLength * srcBlockSize) {
794 if (srcIsUnicode == dstIsUnicode) {
795 memmove(dstBuffer + dstLocationInBytes, srcBuffer + srcLocationInBytes, srcLength * srcBlockSize - srcLocationInBytes);
796 } else {
797 __CFStrConvertBytesToUnicode(srcBuffer + srcLocationInBytes, (UniChar *)(dstBuffer + dstLocationInBytes), srcLength * srcBlockSize - srcLocationInBytes);
798 }
799 }
800 }
801
802 /* Call the callback; if it doesn't exist or returns false, then log
803 */
804 static void __CFStringHandleOutOfMemory(CFTypeRef obj) {
805 CFStringRef msg = CFSTR("Out of memory. We suggest restarting the application. If you have an unsaved document, create a backup copy in Finder, then try to save.");
806 {
807 CFLog(kCFLogLevelCritical, CFSTR("%@"), msg);
808 }
809 }
810
811 /* Reallocates the backing store of the string to accomodate the new length. Space is reserved or characters are deleted as indicated by insertLength and the ranges in deleteRanges. The length is updated to reflect the new state. Will also maintain a length byte and a null byte in 8-bit strings. If length cannot fit in length byte, the space will still be reserved, but will be 0. (Hence the reason the length byte should never be looked at as length unless there is no explicit length.)
812 */
813 static void __CFStringChangeSizeMultiple(CFMutableStringRef str, const CFRange *deleteRanges, CFIndex numDeleteRanges, CFIndex insertLength, Boolean makeUnicode) {
814 const uint8_t *curContents = (uint8_t *)__CFStrContents(str);
815 CFIndex curLength = curContents ? __CFStrLength2(str, curContents) : 0;
816 unsigned long newLength; // We use unsigned to better keep track of overflow
817
818 // Compute new length of the string
819 if (numDeleteRanges == 1) {
820 newLength = curLength + insertLength - deleteRanges[0].length;
821 } else {
822 CFIndex cnt;
823 newLength = curLength + insertLength * numDeleteRanges;
824 for (cnt = 0; cnt < numDeleteRanges; cnt++) newLength -= deleteRanges[cnt].length;
825 }
826
827 __CFAssertIfFixedLengthIsOK(str, newLength);
828
829 if (newLength == 0) {
830 // An somewhat optimized code-path for this special case, with the following implicit values:
831 // newIsUnicode = false
832 // useLengthAndNullBytes = false
833 // newCharSize = sizeof(uint8_t)
834 // If the newCapacity happens to be the same as the old, we don't free the buffer; otherwise we just free it totally
835 // instead of doing a potentially useless reallocation (as the needed capacity later might turn out to be different anyway)
836 CFIndex curCapacity = __CFStrCapacity(str);
837 CFIndex newCapacity = __CFStrNewCapacity(str, 0, curCapacity, true, sizeof(uint8_t));
838 if (newCapacity != curCapacity) { // If we're reallocing anyway (larger or smaller --- larger could happen if desired capacity was changed in the meantime), let's just free it all
839 if (curContents) __CFStrDeallocateMutableContents(str, (uint8_t *)curContents);
840 __CFStrSetContentPtr(str, NULL);
841 __CFStrSetCapacity(str, 0);
842 __CFStrClearCapacityProvidedExternally(str);
843 __CFStrClearHasLengthAndNullBytes(str);
844 if (!__CFStrIsExternalMutable(str)) __CFStrClearUnicode(str); // External mutable implies Unicode
845 } else {
846 if (!__CFStrIsExternalMutable(str)) {
847 __CFStrClearUnicode(str);
848 if (curCapacity >= (int)(sizeof(uint8_t) * 2)) { // If there's room
849 __CFStrSetHasLengthAndNullBytes(str);
850 ((uint8_t *)curContents)[0] = ((uint8_t *)curContents)[1] = 0;
851 } else {
852 __CFStrClearHasLengthAndNullBytes(str);
853 }
854 }
855 }
856 __CFStrSetExplicitLength(str, 0);
857 } else { /* This else-clause assumes newLength > 0 */
858 Boolean oldIsUnicode = __CFStrIsUnicode(str);
859 Boolean newIsUnicode = makeUnicode || (oldIsUnicode /* && (newLength > 0) - implicit */ ) || __CFStrIsExternalMutable(str);
860 CFIndex newCharSize = newIsUnicode ? sizeof(UniChar) : sizeof(uint8_t);
861 Boolean useLengthAndNullBytes = !newIsUnicode /* && (newLength > 0) - implicit */;
862 CFIndex numExtraBytes = useLengthAndNullBytes ? 2 : 0; /* 2 extra bytes to keep the length byte & null... */
863 CFIndex curCapacity = __CFStrCapacity(str);
864 if (newLength > (LONG_MAX - numExtraBytes) / newCharSize) __CFStringHandleOutOfMemory(str); // Does not return
865 CFIndex newCapacity = __CFStrNewCapacity(str, newLength * newCharSize + numExtraBytes, curCapacity, true, newCharSize);
866 if (newCapacity == -1) __CFStringHandleOutOfMemory(str); // Does not return
867 Boolean allocNewBuffer = (newCapacity != curCapacity) || (curLength > 0 && !oldIsUnicode && newIsUnicode); /* We alloc new buffer if oldIsUnicode != newIsUnicode because the contents have to be copied */
868 uint8_t *newContents;
869 if (allocNewBuffer) {
870 newContents = (uint8_t *)__CFStrAllocateMutableContents(str, newCapacity);
871 if (!newContents) { // Try allocating without extra room
872 newCapacity = __CFStrNewCapacity(str, newLength * newCharSize + numExtraBytes, curCapacity, false, newCharSize);
873 // Since we checked for this above, it shouldn't be the case here, but just in case
874 if (newCapacity == -1) __CFStringHandleOutOfMemory(str); // Does not return
875 newContents = (uint8_t *)__CFStrAllocateMutableContents(str, newCapacity);
876 if (!newContents) __CFStringHandleOutOfMemory(str); // Does not return
877 }
878 } else {
879 newContents = (uint8_t *)curContents;
880 }
881
882 Boolean hasLengthAndNullBytes = __CFStrHasLengthByte(str);
883
884 CFAssert1(hasLengthAndNullBytes == __CFStrHasNullByte(str), __kCFLogAssertion, "%s(): Invalid state in 8-bit string", __PRETTY_FUNCTION__);
885
886 // Calculate pointers to the actual string content (skipping over the length byte, if present). Note that keeping a reference to the base is needed for newContents under GC, since the copy may take a long time.
887 const uint8_t *curContentsBody = hasLengthAndNullBytes ? (curContents+1) : curContents;
888 uint8_t *newContentsBody = useLengthAndNullBytes ? (newContents+1) : newContents;
889
890 if (curContents) {
891 if (oldIsUnicode == newIsUnicode) {
892 if (newContentsBody == curContentsBody) {
893 rearrangeBlocks(newContentsBody, curLength, newCharSize, deleteRanges, numDeleteRanges, insertLength);
894 } else {
895 copyBlocks(curContentsBody, newContentsBody, curLength, oldIsUnicode, newIsUnicode, deleteRanges, numDeleteRanges, insertLength);
896 }
897 } else if (newIsUnicode) { /* this implies we have a new buffer */
898 copyBlocks(curContentsBody, newContentsBody, curLength, oldIsUnicode, newIsUnicode, deleteRanges, numDeleteRanges, insertLength);
899 }
900 if (allocNewBuffer && __CFStrFreeContentsWhenDone(str)) __CFStrDeallocateMutableContents(str, (void *)curContents);
901 }
902
903 if (!newIsUnicode) {
904 if (useLengthAndNullBytes) {
905 newContentsBody[newLength] = 0; /* Always have null byte, if not unicode */
906 newContents[0] = __CFCanUseLengthByte(newLength) ? (uint8_t)newLength : 0;
907 if (!hasLengthAndNullBytes) __CFStrSetHasLengthAndNullBytes(str);
908 } else {
909 if (hasLengthAndNullBytes) __CFStrClearHasLengthAndNullBytes(str);
910 }
911 if (oldIsUnicode) __CFStrClearUnicode(str);
912 } else { // New is unicode...
913 if (!oldIsUnicode) __CFStrSetUnicode(str);
914 if (hasLengthAndNullBytes) __CFStrClearHasLengthAndNullBytes(str);
915 }
916 __CFStrSetExplicitLength(str, newLength);
917
918 if (allocNewBuffer) {
919 __CFStrSetCapacity(str, newCapacity);
920 __CFStrClearCapacityProvidedExternally(str);
921 __CFStrSetContentPtr(str, newContents);
922 }
923 }
924 }
925
926 /* Same as above, but takes one range (very common case)
927 */
928 CF_INLINE void __CFStringChangeSize(CFMutableStringRef str, CFRange range, CFIndex insertLength, Boolean makeUnicode) {
929 __CFStringChangeSizeMultiple(str, &range, 1, insertLength, makeUnicode);
930 }
931
932
933 #if defined(DEBUG)
934 static Boolean __CFStrIsConstantString(CFStringRef str);
935 #endif
936
937 static void __CFStringDeallocate(CFTypeRef cf) {
938 CFStringRef str = (CFStringRef)cf;
939
940 // If in DEBUG mode, check to see if the string a CFSTR, and complain.
941 CFAssert1(__CFConstantStringTableBeingFreed || !__CFStrIsConstantString((CFStringRef)cf), __kCFLogAssertion, "Tried to deallocate CFSTR(\"%@\")", str);
942
943 if (!__CFStrIsInline(str)) {
944 uint8_t *contents;
945 Boolean isMutable = __CFStrIsMutable(str);
946 if (__CFStrFreeContentsWhenDone(str) && (contents = (uint8_t *)__CFStrContents(str))) {
947 if (isMutable) {
948 __CFStrDeallocateMutableContents((CFMutableStringRef)str, contents);
949 } else {
950 if (__CFStrHasContentsDeallocator(str)) {
951 CFAllocatorRef allocator = __CFStrContentsDeallocator(str);
952 CFAllocatorDeallocate(allocator, contents);
953 if (!(kCFAllocatorSystemDefaultGCRefZero == allocator || kCFAllocatorDefaultGCRefZero == allocator)) CFRelease(allocator);
954 } else {
955 CFAllocatorRef alloc = __CFGetAllocator(str);
956 CFAllocatorDeallocate(alloc, contents);
957 }
958 }
959 }
960 if (isMutable && __CFStrHasContentsAllocator(str)) {
961 CFAllocatorRef allocator = __CFStrContentsAllocator((CFMutableStringRef)str);
962 if (!(kCFAllocatorSystemDefaultGCRefZero == allocator || kCFAllocatorDefaultGCRefZero == allocator)) CFRelease(allocator);
963 }
964 }
965 }
966
967 static Boolean __CFStringEqual(CFTypeRef cf1, CFTypeRef cf2) {
968 CFStringRef str1 = (CFStringRef)cf1;
969 CFStringRef str2 = (CFStringRef)cf2;
970 const uint8_t *contents1;
971 const uint8_t *contents2;
972 CFIndex len1;
973
974 /* !!! We do not need IsString assertions, as the CFBase runtime assures this */
975 /* !!! We do not need == test, as the CFBase runtime assures this */
976
977 contents1 = (uint8_t *)__CFStrContents(str1);
978 contents2 = (uint8_t *)__CFStrContents(str2);
979 len1 = __CFStrLength2(str1, contents1);
980
981 if (len1 != __CFStrLength2(str2, contents2)) return false;
982
983 contents1 += __CFStrSkipAnyLengthByte(str1);
984 contents2 += __CFStrSkipAnyLengthByte(str2);
985
986 if (__CFStrIsEightBit(str1) && __CFStrIsEightBit(str2)) {
987 return memcmp((const char *)contents1, (const char *)contents2, len1) ? false : true;
988 } else if (__CFStrIsEightBit(str1)) { /* One string has Unicode contents */
989 CFStringInlineBuffer buf;
990 CFIndex buf_idx = 0;
991
992 CFStringInitInlineBuffer(str1, &buf, CFRangeMake(0, len1));
993 for (buf_idx = 0; buf_idx < len1; buf_idx++) {
994 if (__CFStringGetCharacterFromInlineBufferQuick(&buf, buf_idx) != ((UniChar *)contents2)[buf_idx]) return false;
995 }
996 } else if (__CFStrIsEightBit(str2)) { /* One string has Unicode contents */
997 CFStringInlineBuffer buf;
998 CFIndex buf_idx = 0;
999
1000 CFStringInitInlineBuffer(str2, &buf, CFRangeMake(0, len1));
1001 for (buf_idx = 0; buf_idx < len1; buf_idx++) {
1002 if (__CFStringGetCharacterFromInlineBufferQuick(&buf, buf_idx) != ((UniChar *)contents1)[buf_idx]) return false;
1003 }
1004 } else { /* Both strings have Unicode contents */
1005 CFIndex idx;
1006 for (idx = 0; idx < len1; idx++) {
1007 if (((UniChar *)contents1)[idx] != ((UniChar *)contents2)[idx]) return false;
1008 }
1009 }
1010 return true;
1011 }
1012
1013
1014 /* String hashing: Should give the same results whatever the encoding; so we hash UniChars.
1015 If the length is less than or equal to 96, then the hash function is simply the
1016 following (n is the nth UniChar character, starting from 0):
1017
1018 hash(-1) = length
1019 hash(n) = hash(n-1) * 257 + unichar(n);
1020 Hash = hash(length-1) * ((length & 31) + 1)
1021
1022 If the length is greater than 96, then the above algorithm applies to
1023 characters 0..31, (length/2)-16..(length/2)+15, and length-32..length-1, inclusive;
1024 thus the first, middle, and last 32 characters.
1025
1026 Note that the loops below are unrolled; and: 257^2 = 66049; 257^3 = 16974593; 257^4 = 4362470401; 67503105 is 257^4 - 256^4
1027 If hashcode is changed from UInt32 to something else, this last piece needs to be readjusted.
1028 !!! We haven't updated for LP64 yet
1029
1030 NOTE: The hash algorithm used to be duplicated in CF and Foundation; but now it should only be in the four functions below.
1031
1032 Hash function was changed between Panther and Tiger, and Tiger and Leopard.
1033 */
1034 #define HashEverythingLimit 96
1035
1036 #define HashNextFourUniChars(accessStart, accessEnd, pointer) \
1037 {result = result * 67503105 + (accessStart 0 accessEnd) * 16974593 + (accessStart 1 accessEnd) * 66049 + (accessStart 2 accessEnd) * 257 + (accessStart 3 accessEnd); pointer += 4;}
1038
1039 #define HashNextUniChar(accessStart, accessEnd, pointer) \
1040 {result = result * 257 + (accessStart 0 accessEnd); pointer++;}
1041
1042
1043 /* In this function, actualLen is the length of the original string; but len is the number of characters in buffer. The buffer is expected to contain the parts of the string relevant to hashing.
1044 */
1045 CF_INLINE CFHashCode __CFStrHashCharacters(const UniChar *uContents, CFIndex len, CFIndex actualLen) {
1046 CFHashCode result = actualLen;
1047 if (len <= HashEverythingLimit) {
1048 const UniChar *end4 = uContents + (len & ~3);
1049 const UniChar *end = uContents + len;
1050 while (uContents < end4) HashNextFourUniChars(uContents[, ], uContents); // First count in fours
1051 while (uContents < end) HashNextUniChar(uContents[, ], uContents); // Then for the last <4 chars, count in ones...
1052 } else {
1053 const UniChar *contents, *end;
1054 contents = uContents;
1055 end = contents + 32;
1056 while (contents < end) HashNextFourUniChars(contents[, ], contents);
1057 contents = uContents + (len >> 1) - 16;
1058 end = contents + 32;
1059 while (contents < end) HashNextFourUniChars(contents[, ], contents);
1060 end = uContents + len;
1061 contents = end - 32;
1062 while (contents < end) HashNextFourUniChars(contents[, ], contents);
1063 }
1064 return result + (result << (actualLen & 31));
1065 }
1066
1067 /* This hashes cString in the eight bit string encoding. It also includes the little debug-time sanity check.
1068 */
1069 CF_INLINE CFHashCode __CFStrHashEightBit(const uint8_t *cContents, CFIndex len) {
1070 #if defined(DEBUG)
1071 if (!__CFCharToUniCharFunc) { // A little sanity verification: If this is not set, trying to hash high byte chars would be a bad idea
1072 CFIndex cnt;
1073 Boolean err = false;
1074 if (len <= HashEverythingLimit) {
1075 for (cnt = 0; cnt < len; cnt++) if (cContents[cnt] >= 128) err = true;
1076 } else {
1077 for (cnt = 0; cnt < 32; cnt++) if (cContents[cnt] >= 128) err = true;
1078 for (cnt = (len >> 1) - 16; cnt < (len >> 1) + 16; cnt++) if (cContents[cnt] >= 128) err = true;
1079 for (cnt = (len - 32); cnt < len; cnt++) if (cContents[cnt] >= 128) err = true;
1080 }
1081 if (err) {
1082 // Can't do log here, as it might be too early
1083 fprintf(stderr, "Warning: CFHash() attempting to hash CFString containing high bytes before properly initialized to do so\n");
1084 }
1085 }
1086 #endif
1087 CFHashCode result = len;
1088 if (len <= HashEverythingLimit) {
1089 const uint8_t *end4 = cContents + (len & ~3);
1090 const uint8_t *end = cContents + len;
1091 while (cContents < end4) HashNextFourUniChars(__CFCharToUniCharTable[cContents[, ]], cContents); // First count in fours
1092 while (cContents < end) HashNextUniChar(__CFCharToUniCharTable[cContents[, ]], cContents); // Then for the last <4 chars, count in ones...
1093 } else {
1094 const uint8_t *contents, *end;
1095 contents = cContents;
1096 end = contents + 32;
1097 while (contents < end) HashNextFourUniChars(__CFCharToUniCharTable[contents[, ]], contents);
1098 contents = cContents + (len >> 1) - 16;
1099 end = contents + 32;
1100 while (contents < end) HashNextFourUniChars(__CFCharToUniCharTable[contents[, ]], contents);
1101 end = cContents + len;
1102 contents = end - 32;
1103 while (contents < end) HashNextFourUniChars(__CFCharToUniCharTable[contents[, ]], contents);
1104 }
1105 return result + (result << (len & 31));
1106 }
1107
1108 CFHashCode CFStringHashISOLatin1CString(const uint8_t *bytes, CFIndex len) {
1109 CFHashCode result = len;
1110 if (len <= HashEverythingLimit) {
1111 const uint8_t *end4 = bytes + (len & ~3);
1112 const uint8_t *end = bytes + len;
1113 while (bytes < end4) HashNextFourUniChars(bytes[, ], bytes); // First count in fours
1114 while (bytes < end) HashNextUniChar(bytes[, ], bytes); // Then for the last <4 chars, count in ones...
1115 } else {
1116 const uint8_t *contents, *end;
1117 contents = bytes;
1118 end = contents + 32;
1119 while (contents < end) HashNextFourUniChars(contents[, ], contents);
1120 contents = bytes + (len >> 1) - 16;
1121 end = contents + 32;
1122 while (contents < end) HashNextFourUniChars(contents[, ], contents);
1123 end = bytes + len;
1124 contents = end - 32;
1125 while (contents < end) HashNextFourUniChars(contents[, ], contents);
1126 }
1127 return result + (result << (len & 31));
1128 }
1129
1130 CFHashCode CFStringHashCString(const uint8_t *bytes, CFIndex len) {
1131 return __CFStrHashEightBit(bytes, len);
1132 }
1133
1134 CFHashCode CFStringHashCharacters(const UniChar *characters, CFIndex len) {
1135 return __CFStrHashCharacters(characters, len, len);
1136 }
1137
1138 /* This is meant to be called from NSString or subclassers only. It is an error for this to be called without the ObjC runtime or an argument which is not an NSString or subclass. It can be called with NSCFString, although that would be inefficient (causing indirection) and won't normally happen anyway, as NSCFString overrides hash.
1139 */
1140 CFHashCode CFStringHashNSString(CFStringRef str) {
1141 UniChar buffer[HashEverythingLimit];
1142 CFIndex bufLen; // Number of characters in the buffer for hashing
1143 CFIndex len = 0; // Actual length of the string
1144
1145 len = CF_OBJC_CALLV((NSString *)str, length);
1146 if (len <= HashEverythingLimit) {
1147 (void)CF_OBJC_CALLV((NSString *)str, getCharacters:buffer range:NSMakeRange(0, len));
1148 bufLen = len;
1149 } else {
1150 (void)CF_OBJC_CALLV((NSString *)str, getCharacters:buffer range:NSMakeRange(0, 32));
1151 (void)CF_OBJC_CALLV((NSString *)str, getCharacters:buffer+32 range:NSMakeRange((len >> 1) - 16, 32));
1152 (void)CF_OBJC_CALLV((NSString *)str, getCharacters:buffer+64 range:NSMakeRange(len - 32, 32));
1153 bufLen = HashEverythingLimit;
1154 }
1155 return __CFStrHashCharacters(buffer, bufLen, len);
1156 }
1157
1158 CFHashCode __CFStringHash(CFTypeRef cf) {
1159 /* !!! We do not need an IsString assertion here, as this is called by the CFBase runtime only */
1160 CFStringRef str = (CFStringRef)cf;
1161 const uint8_t *contents = (uint8_t *)__CFStrContents(str);
1162 CFIndex len = __CFStrLength2(str, contents);
1163
1164 if (__CFStrIsEightBit(str)) {
1165 contents += __CFStrSkipAnyLengthByte(str);
1166 return __CFStrHashEightBit(contents, len);
1167 } else {
1168 return __CFStrHashCharacters((const UniChar *)contents, len, len);
1169 }
1170 }
1171
1172
1173 static CFStringRef __CFStringCopyDescription(CFTypeRef cf) {
1174 return CFStringCreateWithFormat(kCFAllocatorSystemDefault, NULL, CFSTR("<CFString %p [%p]>{contents = \"%@\"}"), cf, __CFGetAllocator(cf), cf);
1175 }
1176
1177 static CFStringRef __CFStringCopyFormattingDescription(CFTypeRef cf, CFDictionaryRef formatOptions) {
1178 return (CFStringRef)CFStringCreateCopy(__CFGetAllocator(cf), (CFStringRef)cf);
1179 }
1180
1181 static CFTypeID __kCFStringTypeID = _kCFRuntimeNotATypeID;
1182
1183 typedef CFTypeRef (*CF_STRING_CREATE_COPY)(CFAllocatorRef alloc, CFTypeRef theString);
1184
1185 static const CFRuntimeClass __CFStringClass = {
1186 _kCFRuntimeScannedObject,
1187 "CFString",
1188 NULL, // init
1189 (CF_STRING_CREATE_COPY)CFStringCreateCopy,
1190 __CFStringDeallocate,
1191 __CFStringEqual,
1192 __CFStringHash,
1193 __CFStringCopyFormattingDescription,
1194 __CFStringCopyDescription
1195 };
1196
1197 __private_extern__ void __CFStringInitialize(void) {
1198 __kCFStringTypeID = _CFRuntimeRegisterClass(&__CFStringClass);
1199 }
1200
1201 CFTypeID CFStringGetTypeID(void) {
1202 return __kCFStringTypeID;
1203 }
1204
1205
1206 static Boolean CFStrIsUnicode(CFStringRef str) {
1207 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, Boolean, (NSString *)str, _encodingCantBeStoredInEightBitCFString);
1208 return __CFStrIsUnicode(str);
1209 }
1210
1211
1212
1213 #define ALLOCATORSFREEFUNC ((CFAllocatorRef)-1)
1214
1215 /* contentsDeallocator indicates how to free the data if it's noCopy == true:
1216 kCFAllocatorNull: don't free
1217 ALLOCATORSFREEFUNC: free with main allocator's free func (don't pass in the real func ptr here)
1218 NULL: default allocator
1219 otherwise it's the allocator that should be used (it will be explicitly stored)
1220 if noCopy == false, then freeFunc should be ALLOCATORSFREEFUNC
1221 hasLengthByte, hasNullByte: refers to bytes; used only if encoding != Unicode
1222 possiblyExternalFormat indicates that the bytes might have BOM and be swapped
1223 tryToReduceUnicode means that the Unicode should be checked to see if it contains just ASCII (and reduce it if so)
1224 numBytes contains the actual number of bytes in "bytes", including Length byte,
1225 BUT not the NULL byte at the end
1226 bytes should not contain BOM characters
1227 !!! Various flags should be combined to reduce number of arguments, if possible
1228 */
1229 __private_extern__ CFStringRef __CFStringCreateImmutableFunnel3(
1230 CFAllocatorRef alloc, const void *bytes, CFIndex numBytes, CFStringEncoding encoding,
1231 Boolean possiblyExternalFormat, Boolean tryToReduceUnicode, Boolean hasLengthByte, Boolean hasNullByte, Boolean noCopy,
1232 CFAllocatorRef contentsDeallocator, UInt32 converterFlags) {
1233
1234 CFMutableStringRef str;
1235 CFVarWidthCharBuffer vBuf;
1236 CFIndex size;
1237 Boolean useLengthByte = false;
1238 Boolean useNullByte = false;
1239 Boolean useInlineData = false;
1240
1241 #if INSTRUMENT_SHARED_STRINGS
1242 const char *recordedEncoding;
1243 char encodingBuffer[128];
1244 if (encoding == kCFStringEncodingUnicode) recordedEncoding = "Unicode";
1245 else if (encoding == kCFStringEncodingASCII) recordedEncoding = "ASCII";
1246 else if (encoding == kCFStringEncodingUTF8) recordedEncoding = "UTF8";
1247 else if (encoding == kCFStringEncodingMacRoman) recordedEncoding = "MacRoman";
1248 else {
1249 snprintf(encodingBuffer, sizeof(encodingBuffer), "0x%lX", (unsigned long)encoding);
1250 recordedEncoding = encodingBuffer;
1251 }
1252 #endif
1253
1254 if (alloc == NULL) alloc = __CFGetDefaultAllocator();
1255
1256 if (contentsDeallocator == ALLOCATORSFREEFUNC) {
1257 contentsDeallocator = alloc;
1258 } else if (contentsDeallocator == NULL) {
1259 contentsDeallocator = __CFGetDefaultAllocator();
1260 }
1261
1262 if ((NULL != kCFEmptyString) && (numBytes == 0) && _CFAllocatorIsSystemDefault(alloc)) { // If we are using the system default allocator, and the string is empty, then use the empty string!
1263 if (noCopy && (contentsDeallocator != kCFAllocatorNull)) { // See 2365208... This change was done after Sonata; before we didn't free the bytes at all (leak).
1264 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes);
1265 }
1266 return (CFStringRef)CFRetain(kCFEmptyString); // Quick exit; won't catch all empty strings, but most
1267 }
1268
1269 // At this point, contentsDeallocator is either same as alloc, or kCFAllocatorNull, or something else, but not NULL
1270
1271 vBuf.shouldFreeChars = false; // We use this to remember to free the buffer possibly allocated by decode
1272
1273 // Record whether we're starting out with an ASCII-superset string, because we need to know this later for the string ROM; this may get changed later if we successfully convert down from Unicode. We only record this once because __CFCanUseEightBitCFStringForBytes() can be expensive.
1274 Boolean stringSupportsEightBitCFRepresentation = encoding != kCFStringEncodingUnicode && __CFCanUseEightBitCFStringForBytes((const uint8_t *)bytes, numBytes, encoding);
1275
1276 // We may also change noCopy within this function if we have to decode the string into an external buffer. We do not want to avoid the use of the string ROM merely because we tried to be efficient and reuse the decoded buffer for the CFString's external storage. Therefore, we use this variable to track whether we actually can ignore the noCopy flag (which may or may not be set anyways).
1277 Boolean stringROMShouldIgnoreNoCopy = false;
1278
1279 // First check to see if the data needs to be converted...
1280 // ??? We could be more efficient here and in some cases (Unicode data) eliminate a copy
1281
1282 if ((encoding == kCFStringEncodingUnicode && possiblyExternalFormat) || (encoding != kCFStringEncodingUnicode && ! stringSupportsEightBitCFRepresentation)) {
1283 const void *realBytes = (uint8_t *) bytes + (hasLengthByte ? 1 : 0);
1284 CFIndex realNumBytes = numBytes - (hasLengthByte ? 1 : 0);
1285 Boolean usingPassedInMemory = false;
1286
1287 vBuf.allocator = kCFAllocatorSystemDefault; // We don't want to use client's allocator for temp stuff
1288 vBuf.chars.unicode = NULL; // This will cause the decode function to allocate memory if necessary
1289
1290 if (!__CFStringDecodeByteStream3((const uint8_t *)realBytes, realNumBytes, encoding, false, &vBuf, &usingPassedInMemory, converterFlags)) {
1291 // Note that if the string can't be created, we don't free the buffer, even if there is a contents deallocator. This is on purpose.
1292 return NULL;
1293 }
1294
1295 encoding = vBuf.isASCII ? kCFStringEncodingASCII : kCFStringEncodingUnicode;
1296
1297 // Update our flag according to whether the decoded buffer is ASCII
1298 stringSupportsEightBitCFRepresentation = vBuf.isASCII;
1299
1300 if (!usingPassedInMemory) {
1301
1302 // Because __CFStringDecodeByteStream3() allocated our buffer, it's OK for us to free it if we can get the string from the ROM.
1303 stringROMShouldIgnoreNoCopy = true;
1304
1305 // Make the parameters fit the new situation
1306 numBytes = vBuf.isASCII ? vBuf.numChars : (vBuf.numChars * sizeof(UniChar));
1307 hasLengthByte = hasNullByte = false;
1308
1309 // Get rid of the original buffer if its not being used
1310 if (noCopy && (contentsDeallocator != kCFAllocatorNull)) {
1311 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes);
1312 }
1313 contentsDeallocator = alloc; // At this point we are using the string's allocator, as the original buffer is gone...
1314
1315 // See if we can reuse any storage the decode func might have allocated
1316 // We do this only for Unicode, as otherwise we would not have NULL and Length bytes
1317
1318 if (vBuf.shouldFreeChars && (alloc == vBuf.allocator) && encoding == kCFStringEncodingUnicode) {
1319 vBuf.shouldFreeChars = false; // Transferring ownership to the CFString
1320 bytes = CFAllocatorReallocate(vBuf.allocator, (void *)vBuf.chars.unicode, numBytes, 0); // Tighten up the storage
1321 noCopy = true;
1322 #if INSTRUMENT_SHARED_STRINGS
1323 if (encoding == kCFStringEncodingASCII) recordedEncoding = "ForeignASCII-NoCopy";
1324 else recordedEncoding = "ForeignUnicode-NoCopy";
1325 #endif
1326 } else {
1327 #if INSTRUMENT_SHARED_STRINGS
1328 if (encoding == kCFStringEncodingASCII) recordedEncoding = "ForeignASCII-Copy";
1329 else recordedEncoding = "ForeignUnicode-Copy";
1330 #endif
1331 bytes = vBuf.chars.unicode;
1332 noCopy = false; // Can't do noCopy anymore
1333 // If vBuf.shouldFreeChars is true, the buffer will be freed as intended near the end of this func
1334 }
1335
1336 }
1337
1338 // At this point, all necessary input arguments have been changed to reflect the new state
1339
1340 } else if (encoding == kCFStringEncodingUnicode && tryToReduceUnicode) { // Check to see if we can reduce Unicode to ASCII
1341 CFIndex cnt;
1342 CFIndex len = numBytes / sizeof(UniChar);
1343 Boolean allASCII = true;
1344
1345 for (cnt = 0; cnt < len; cnt++) if (((const UniChar *)bytes)[cnt] > 127) {
1346 allASCII = false;
1347 break;
1348 }
1349
1350 if (allASCII) { // Yes we can!
1351 uint8_t *ptr, *mem;
1352 Boolean newHasLengthByte = __CFCanUseLengthByte(len);
1353 numBytes = (len + 1 + (newHasLengthByte ? 1 : 0)) * sizeof(uint8_t); // NULL and possible length byte
1354 // See if we can use that temporary local buffer in vBuf...
1355 if (numBytes >= __kCFVarWidthLocalBufferSize) {
1356 mem = ptr = (uint8_t *)CFAllocatorAllocate(alloc, numBytes, 0);
1357 if (__CFOASafe) __CFSetLastAllocationEventName(mem, "CFString (store)");
1358 } else {
1359 mem = ptr = (uint8_t *)(vBuf.localBuffer);
1360 }
1361 if (mem) { // If we can't allocate memory for some reason, use what we had (that is, as if we didn't have all ASCII)
1362 // Copy the Unicode bytes into the new ASCII buffer
1363 hasLengthByte = newHasLengthByte;
1364 hasNullByte = true;
1365 if (hasLengthByte) *ptr++ = (uint8_t)len;
1366 for (cnt = 0; cnt < len; cnt++) ptr[cnt] = (uint8_t)(((const UniChar *)bytes)[cnt]);
1367 ptr[len] = 0;
1368 if (noCopy && (contentsDeallocator != kCFAllocatorNull)) {
1369 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes);
1370 }
1371 // Now make everything look like we had an ASCII buffer to start with
1372 bytes = mem;
1373 encoding = kCFStringEncodingASCII;
1374 contentsDeallocator = alloc; // At this point we are using the string's allocator, as the original buffer is gone...
1375 noCopy = (numBytes >= __kCFVarWidthLocalBufferSize); // If we had to allocate it, make sure it's kept around
1376 numBytes--; // Should not contain the NULL byte at end...
1377 stringSupportsEightBitCFRepresentation = true; // We're ASCII now!
1378 stringROMShouldIgnoreNoCopy = true; // We allocated this buffer, so we should feel free to get rid of it if we can use the string ROM
1379 #if INSTRUMENT_SHARED_STRINGS
1380 recordedEncoding = "U->A";
1381 #endif
1382 }
1383 }
1384
1385 // At this point, all necessary input arguments have been changed to reflect the new state
1386 }
1387
1388 // Now determine the necessary size
1389 #if INSTRUMENT_SHARED_STRINGS || USE_STRING_ROM
1390 Boolean stringSupportsROM = stringSupportsEightBitCFRepresentation;
1391 #endif
1392
1393 #if INSTRUMENT_SHARED_STRINGS
1394 if (stringSupportsROM) {
1395 const void *realBytes = (uint8_t *) bytes + (hasLengthByte ? 1 : 0);
1396 CFIndex realNumBytes = numBytes - !! hasLengthByte;
1397 __CFRecordStringAllocationEvent(recordedEncoding, realBytes, realNumBytes);
1398 }
1399 #endif
1400
1401 CFStringRef romResult = NULL;
1402
1403 #if USE_STRING_ROM
1404
1405 if (stringSupportsROM) {
1406 // Disable the string ROM if necessary
1407 static char sDisableStringROM = -1;
1408 if (sDisableStringROM == -1) sDisableStringROM = !! __CFgetenv("CFStringDisableROM");
1409
1410 if (sDisableStringROM == 0) romResult = _CFSearchStringROM(bytes + !! hasLengthByte, numBytes - !! hasLengthByte);
1411 }
1412 /* if we get a result from our ROM, and noCopy is set, then deallocate the buffer immediately */
1413 if (romResult) {
1414 if (noCopy && (contentsDeallocator != kCFAllocatorNull)) {
1415 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes);
1416 }
1417
1418 /* these don't get used again, but clear them for consistency */
1419 noCopy = false;
1420 bytes = NULL;
1421
1422 /* set our result to the ROM result which is not really mutable, of course, but that's OK because we don't try to modify it. */
1423 str = (CFMutableStringRef)romResult;
1424 }
1425 #endif
1426
1427 if (! romResult) {
1428 // Now determine the necessary size
1429
1430 if (noCopy) {
1431
1432 size = sizeof(void *); // Pointer to the buffer
1433 // special GCRefZero allocator usage always needs saving
1434 if (_CFAllocatorIsGCRefZero(contentsDeallocator) || (contentsDeallocator != alloc && contentsDeallocator != kCFAllocatorNull)) {
1435 size += sizeof(void *); // The contentsDeallocator
1436 }
1437 if (!hasLengthByte) size += sizeof(CFIndex); // Explicit length
1438 useLengthByte = hasLengthByte;
1439 useNullByte = hasNullByte;
1440
1441 } else { // Inline data; reserve space for it
1442
1443 useInlineData = true;
1444 size = numBytes;
1445
1446 if (hasLengthByte || (encoding != kCFStringEncodingUnicode && __CFCanUseLengthByte(numBytes))) {
1447 useLengthByte = true;
1448 if (!hasLengthByte) size += 1;
1449 } else {
1450 size += sizeof(CFIndex); // Explicit length
1451 }
1452 if (hasNullByte || encoding != kCFStringEncodingUnicode) {
1453 useNullByte = true;
1454 size += 1;
1455 }
1456 }
1457
1458 #ifdef STRING_SIZE_STATS
1459 // Dump alloced CFString size info every so often
1460 static int cnt = 0;
1461 static unsigned sizes[256] = {0};
1462 int allocedSize = size + sizeof(CFRuntimeBase);
1463 if (allocedSize < 255) sizes[allocedSize]++; else sizes[255]++;
1464 if ((++cnt % 1000) == 0) {
1465 printf ("\nTotal: %d\n", cnt);
1466 int i; for (i = 0; i < 256; i++) printf("%03d: %5d%s", i, sizes[i], ((i % 8) == 7) ? "\n" : " ");
1467 }
1468 #endif
1469
1470 // Finally, allocate!
1471
1472 str = (CFMutableStringRef)_CFRuntimeCreateInstance(alloc, __kCFStringTypeID, size, NULL);
1473 if (str) {
1474 if (__CFOASafe) __CFSetLastAllocationEventName(str, "CFString (immutable)");
1475
1476 CFOptionFlags allocBits = _CFAllocatorIsGCRefZero(contentsDeallocator) ? __kCFHasContentsDeallocator : (contentsDeallocator == alloc ? __kCFNotInlineContentsDefaultFree : (contentsDeallocator == kCFAllocatorNull ? __kCFNotInlineContentsNoFree : __kCFNotInlineContentsCustomFree));
1477 __CFStrSetInfoBits(str,
1478 (useInlineData ? __kCFHasInlineContents : allocBits) |
1479 ((encoding == kCFStringEncodingUnicode) ? __kCFIsUnicode : 0) |
1480 (useNullByte ? __kCFHasNullByte : 0) |
1481 (useLengthByte ? __kCFHasLengthByte : 0));
1482
1483 if (!useLengthByte) {
1484 CFIndex length = numBytes - (hasLengthByte ? 1 : 0);
1485 if (encoding == kCFStringEncodingUnicode) length /= sizeof(UniChar);
1486 __CFStrSetExplicitLength(str, length);
1487 }
1488
1489 if (useInlineData) {
1490 uint8_t *contents = (uint8_t *)__CFStrContents(str);
1491 if (useLengthByte && !hasLengthByte) *contents++ = (uint8_t)numBytes;
1492 memmove(contents, bytes, numBytes);
1493 if (useNullByte) contents[numBytes] = 0;
1494 } else {
1495 __CFStrSetContentPtr(str, bytes);
1496 if (__CFStrHasContentsDeallocator(str)) __CFStrSetContentsDeallocator(str, contentsDeallocator);
1497 }
1498 } else {
1499 if (noCopy && (contentsDeallocator != kCFAllocatorNull)) {
1500 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes);
1501 }
1502 }
1503 }
1504 if (vBuf.shouldFreeChars) CFAllocatorDeallocate(vBuf.allocator, (void *)bytes);
1505
1506 return str;
1507 }
1508
1509 /* !!! __CFStringCreateImmutableFunnel2() is kept around for compatibility; it should be deprecated
1510 */
1511 CFStringRef __CFStringCreateImmutableFunnel2(
1512 CFAllocatorRef alloc, const void *bytes, CFIndex numBytes, CFStringEncoding encoding,
1513 Boolean possiblyExternalFormat, Boolean tryToReduceUnicode, Boolean hasLengthByte, Boolean hasNullByte, Boolean noCopy,
1514 CFAllocatorRef contentsDeallocator) {
1515 return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, possiblyExternalFormat, tryToReduceUnicode, hasLengthByte, hasNullByte, noCopy, contentsDeallocator, 0);
1516 }
1517
1518
1519
1520 CFStringRef CFStringCreateWithPascalString(CFAllocatorRef alloc, ConstStringPtr pStr, CFStringEncoding encoding) {
1521 CFIndex len = (CFIndex)(*(uint8_t *)pStr);
1522 return __CFStringCreateImmutableFunnel3(alloc, pStr, len+1, encoding, false, false, true, false, false, ALLOCATORSFREEFUNC, 0);
1523 }
1524
1525
1526 CFStringRef CFStringCreateWithCString(CFAllocatorRef alloc, const char *cStr, CFStringEncoding encoding) {
1527 CFIndex len = strlen(cStr);
1528 return __CFStringCreateImmutableFunnel3(alloc, cStr, len, encoding, false, false, false, true, false, ALLOCATORSFREEFUNC, 0);
1529 }
1530
1531 CFStringRef CFStringCreateWithPascalStringNoCopy(CFAllocatorRef alloc, ConstStringPtr pStr, CFStringEncoding encoding, CFAllocatorRef contentsDeallocator) {
1532 CFIndex len = (CFIndex)(*(uint8_t *)pStr);
1533 return __CFStringCreateImmutableFunnel3(alloc, pStr, len+1, encoding, false, false, true, false, true, contentsDeallocator, 0);
1534 }
1535
1536
1537 CFStringRef CFStringCreateWithCStringNoCopy(CFAllocatorRef alloc, const char *cStr, CFStringEncoding encoding, CFAllocatorRef contentsDeallocator) {
1538 CFIndex len = strlen(cStr);
1539 return __CFStringCreateImmutableFunnel3(alloc, cStr, len, encoding, false, false, false, true, true, contentsDeallocator, 0);
1540 }
1541
1542
1543 CFStringRef CFStringCreateWithCharacters(CFAllocatorRef alloc, const UniChar *chars, CFIndex numChars) {
1544 return __CFStringCreateImmutableFunnel3(alloc, chars, numChars * sizeof(UniChar), kCFStringEncodingUnicode, false, true, false, false, false, ALLOCATORSFREEFUNC, 0);
1545 }
1546
1547
1548 CFStringRef CFStringCreateWithCharactersNoCopy(CFAllocatorRef alloc, const UniChar *chars, CFIndex numChars, CFAllocatorRef contentsDeallocator) {
1549 return __CFStringCreateImmutableFunnel3(alloc, chars, numChars * sizeof(UniChar), kCFStringEncodingUnicode, false, false, false, false, true, contentsDeallocator, 0);
1550 }
1551
1552
1553 CFStringRef CFStringCreateWithBytes(CFAllocatorRef alloc, const uint8_t *bytes, CFIndex numBytes, CFStringEncoding encoding, Boolean externalFormat) {
1554 return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, externalFormat, true, false, false, false, ALLOCATORSFREEFUNC, 0);
1555 }
1556
1557 CFStringRef _CFStringCreateWithBytesNoCopy(CFAllocatorRef alloc, const uint8_t *bytes, CFIndex numBytes, CFStringEncoding encoding, Boolean externalFormat, CFAllocatorRef contentsDeallocator) {
1558 return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, externalFormat, true, false, false, true, contentsDeallocator, 0);
1559 }
1560
1561 CFStringRef CFStringCreateWithBytesNoCopy(CFAllocatorRef alloc, const uint8_t *bytes, CFIndex numBytes, CFStringEncoding encoding, Boolean externalFormat, CFAllocatorRef contentsDeallocator) {
1562 return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, externalFormat, true, false, false, true, contentsDeallocator, 0);
1563 }
1564
1565 CFStringRef CFStringCreateWithFormatAndArguments(CFAllocatorRef alloc, CFDictionaryRef formatOptions, CFStringRef format, va_list arguments) {
1566 return _CFStringCreateWithFormatAndArgumentsAux(alloc, NULL, formatOptions, format, arguments);
1567 }
1568
1569 CFStringRef _CFStringCreateWithFormatAndArgumentsAux(CFAllocatorRef alloc, CFStringRef (*copyDescFunc)(void *, const void *), CFDictionaryRef formatOptions, CFStringRef format, va_list arguments) {
1570 CFStringRef str;
1571 CFMutableStringRef outputString = CFStringCreateMutable(kCFAllocatorSystemDefault, 0); //should use alloc if no copy/release
1572 __CFStrSetDesiredCapacity(outputString, 120); // Given this will be tightened later, choosing a larger working string is fine
1573 __CFStringAppendFormatCore(outputString, copyDescFunc, formatOptions, format, 0, NULL, 0, arguments);
1574 // ??? copy/release should not be necessary here -- just make immutable, compress if possible
1575 // (However, this does make the string inline, and cause the supplied allocator to be used...)
1576 str = (CFStringRef)CFStringCreateCopy(alloc, outputString);
1577 CFRelease(outputString);
1578 return str;
1579 }
1580
1581 CFStringRef CFStringCreateWithFormat(CFAllocatorRef alloc, CFDictionaryRef formatOptions, CFStringRef format, ...) {
1582 CFStringRef result;
1583 va_list argList;
1584
1585 va_start(argList, format);
1586 result = CFStringCreateWithFormatAndArguments(alloc, formatOptions, format, argList);
1587 va_end(argList);
1588
1589 return result;
1590 }
1591
1592 CFStringRef CFStringCreateWithSubstring(CFAllocatorRef alloc, CFStringRef str, CFRange range) {
1593 // CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, CFStringRef , (NSString *)str, _createSubstringWithRange:NSMakeRange(range.location, range.length));
1594
1595 __CFAssertIsString(str);
1596 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
1597
1598 if ((range.location == 0) && (range.length == __CFStrLength(str))) { /* The substring is the whole string... */
1599 return (CFStringRef)CFStringCreateCopy(alloc, str);
1600 } else if (__CFStrIsEightBit(str)) {
1601 const uint8_t *contents = (const uint8_t *)__CFStrContents(str);
1602 return __CFStringCreateImmutableFunnel3(alloc, contents + range.location + __CFStrSkipAnyLengthByte(str), range.length, __CFStringGetEightBitStringEncoding(), false, false, false, false, false, ALLOCATORSFREEFUNC, 0);
1603 } else {
1604 const UniChar *contents = (UniChar *)__CFStrContents(str);
1605 return __CFStringCreateImmutableFunnel3(alloc, contents + range.location, range.length * sizeof(UniChar), kCFStringEncodingUnicode, false, true, false, false, false, ALLOCATORSFREEFUNC, 0);
1606 }
1607 }
1608
1609 CFStringRef CFStringCreateCopy(CFAllocatorRef alloc, CFStringRef str) {
1610 // CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, CFStringRef, (NSString *)str, copy);
1611
1612 __CFAssertIsString(str);
1613 if (!__CFStrIsMutable((CFStringRef)str) && // If the string is not mutable
1614 ((alloc ? _CFConvertAllocatorToNonGCRefZeroEquivalent(alloc) : __CFGetDefaultAllocator()) == __CFGetAllocator(str)) && // and it has the same allocator as the one we're using
1615 (__CFStrIsInline((CFStringRef)str) || __CFStrFreeContentsWhenDone((CFStringRef)str) || __CFStrIsConstant((CFStringRef)str))) { // and the characters are inline, or are owned by the string, or the string is constant
1616 if (!(kCFUseCollectableAllocator && _CFAllocatorIsGCRefZero(alloc))) CFRetain(str); // Then just retain instead of making a true copy
1617 return str;
1618 }
1619 if (__CFStrIsEightBit((CFStringRef)str)) {
1620 const uint8_t *contents = (const uint8_t *)__CFStrContents((CFStringRef)str);
1621 return __CFStringCreateImmutableFunnel3(alloc, contents + __CFStrSkipAnyLengthByte((CFStringRef)str), __CFStrLength2((CFStringRef)str, contents), __CFStringGetEightBitStringEncoding(), false, false, false, false, false, ALLOCATORSFREEFUNC, 0);
1622 } else {
1623 const UniChar *contents = (const UniChar *)__CFStrContents((CFStringRef)str);
1624 return __CFStringCreateImmutableFunnel3(alloc, contents, __CFStrLength2((CFStringRef)str, contents) * sizeof(UniChar), kCFStringEncodingUnicode, false, true, false, false, false, ALLOCATORSFREEFUNC, 0);
1625 }
1626 }
1627
1628
1629
1630 /*** Constant string stuff... ***/
1631
1632 /* Table which holds constant strings created with CFSTR, when -fconstant-cfstrings option is not used. These dynamically created constant strings are stored in constantStringTable. The keys are the 8-bit constant C-strings from the compiler; the values are the CFStrings created for them. _CFSTRLock protects this table.
1633 */
1634 static CFMutableDictionaryRef constantStringTable = NULL;
1635 static CFSpinLock_t _CFSTRLock = CFSpinLockInit;
1636
1637 static CFStringRef __cStrCopyDescription(const void *ptr) {
1638 return CFStringCreateWithCStringNoCopy(kCFAllocatorSystemDefault, (const char *)ptr, __CFStringGetEightBitStringEncoding(), kCFAllocatorNull);
1639 }
1640
1641 static Boolean __cStrEqual(const void *ptr1, const void *ptr2) {
1642 return (strcmp((const char *)ptr1, (const char *)ptr2) == 0);
1643 }
1644
1645 static CFHashCode __cStrHash(const void *ptr) {
1646 // It doesn't quite matter if we convert to Unicode correctly, as long as we do it consistently
1647 const char *cStr = (const char *)ptr;
1648 CFIndex len = strlen(cStr);
1649 CFHashCode result = 0;
1650 if (len <= 4) { // All chars
1651 unsigned cnt = len;
1652 while (cnt--) result += (result << 8) + *cStr++;
1653 } else { // First and last 2 chars
1654 result += (result << 8) + cStr[0];
1655 result += (result << 8) + cStr[1];
1656 result += (result << 8) + cStr[len-2];
1657 result += (result << 8) + cStr[len-1];
1658 }
1659 result += (result << (len & 31));
1660 return result;
1661 }
1662
1663
1664 CFStringRef __CFStringMakeConstantString(const char *cStr) {
1665 CFStringRef result;
1666 #if defined(DEBUG)
1667 // StringTest checks that we share kCFEmptyString, which is defeated by constantStringAllocatorForDebugging
1668 if ('\0' == *cStr) return kCFEmptyString;
1669 #endif
1670 if (constantStringTable == NULL) {
1671 CFDictionaryKeyCallBacks constantStringCallBacks = {0, NULL, NULL, __cStrCopyDescription, __cStrEqual, __cStrHash};
1672 CFDictionaryValueCallBacks constantStringValueCallBacks = kCFTypeDictionaryValueCallBacks;
1673 constantStringValueCallBacks.equal = NULL; // So that we only find strings that are ==
1674 CFMutableDictionaryRef table = CFDictionaryCreateMutable(kCFAllocatorSystemDefault, 0, &constantStringCallBacks, &constantStringValueCallBacks);
1675 _CFDictionarySetCapacity(table, 2500); // avoid lots of rehashing
1676 __CFSpinLock(&_CFSTRLock);
1677 if (constantStringTable == NULL) constantStringTable = table;
1678 __CFSpinUnlock(&_CFSTRLock);
1679 if (constantStringTable != table) CFRelease(table);
1680 }
1681
1682 __CFSpinLock(&_CFSTRLock);
1683 if ((result = (CFStringRef)CFDictionaryGetValue(constantStringTable, cStr))) {
1684 __CFSpinUnlock(&_CFSTRLock);
1685 } else {
1686 __CFSpinUnlock(&_CFSTRLock);
1687
1688 {
1689 char *key;
1690 Boolean isASCII = true;
1691 // Given this code path is rarer these days, OK to do this extra work to verify the strings
1692 const char *tmp = cStr;
1693 while (*tmp) {
1694 if (*(tmp++) & 0x80) {
1695 isASCII = false;
1696 break;
1697 }
1698 }
1699 if (!isASCII) {
1700 CFMutableStringRef ms = CFStringCreateMutable(kCFAllocatorSystemDefault, 0);
1701 tmp = cStr;
1702 while (*tmp) {
1703 CFStringAppendFormat(ms, NULL, (*tmp & 0x80) ? CFSTR("\\%3o") : CFSTR("%1c"), *tmp);
1704 tmp++;
1705 }
1706 CFLog(kCFLogLevelWarning, CFSTR("WARNING: CFSTR(\"%@\") has non-7 bit chars, interpreting using MacOS Roman encoding for now, but this will change. Please eliminate usages of non-7 bit chars (including escaped characters above \\177 octal) in CFSTR()."), ms);
1707 CFRelease(ms);
1708 }
1709 // Treat non-7 bit chars in CFSTR() as MacOSRoman, for compatibility
1710 result = CFStringCreateWithCString(kCFAllocatorSystemDefault, cStr, kCFStringEncodingMacRoman);
1711 if (result == NULL) {
1712 CFLog(__kCFLogAssertion, CFSTR("Can't interpret CFSTR() as MacOS Roman, crashing"));
1713 HALT;
1714 }
1715 if (__CFOASafe) __CFSetLastAllocationEventName((void *)result, "CFString (CFSTR)");
1716 if (__CFStrIsEightBit(result)) {
1717 key = (char *)__CFStrContents(result) + __CFStrSkipAnyLengthByte(result);
1718 } else { // For some reason the string is not 8-bit!
1719 CFIndex keySize = strlen(cStr) + 1;
1720 key = (char *)CFAllocatorAllocate(kCFAllocatorSystemDefault, keySize, 0);
1721 if (__CFOASafe) __CFSetLastAllocationEventName((void *)key, "CFString (CFSTR key)");
1722 strlcpy(key, cStr, keySize); // !!! We will leak this, if the string is removed from the table (or table is freed)
1723 }
1724
1725 {
1726 CFStringRef resultToBeReleased = result;
1727 CFIndex count;
1728 __CFSpinLock(&_CFSTRLock);
1729 count = CFDictionaryGetCount(constantStringTable);
1730 CFDictionaryAddValue(constantStringTable, key, result);
1731 if (CFDictionaryGetCount(constantStringTable) == count) { // add did nothing, someone already put it there
1732 result = (CFStringRef)CFDictionaryGetValue(constantStringTable, key);
1733 } else {
1734 #if __LP64__
1735 ((struct __CFString *)result)->base._rc = 0;
1736 #else
1737 ((struct __CFString *)result)->base._cfinfo[CF_RC_BITS] = 0;
1738 #endif
1739 }
1740 __CFSpinUnlock(&_CFSTRLock);
1741 // This either eliminates the extra retain on the freshly created string, or frees it, if it was actually not inserted into the table
1742 CFRelease(resultToBeReleased);
1743 }
1744 }
1745 }
1746 return result;
1747 }
1748
1749 #if defined(DEBUG)
1750 static Boolean __CFStrIsConstantString(CFStringRef str) {
1751 Boolean found = false;
1752 if (constantStringTable) {
1753 __CFSpinLock(&_CFSTRLock);
1754 found = CFDictionaryContainsValue(constantStringTable, str);
1755 __CFSpinUnlock(&_CFSTRLock);
1756 }
1757 return found;
1758 }
1759 #endif
1760
1761
1762 #if DEPLOYMENT_TARGET_WINDOWS
1763 void __CFStringCleanup (void) {
1764 /* in case library is unloaded, release store for the constant string table */
1765 if (constantStringTable != NULL) {
1766 #if defined(DEBUG)
1767 __CFConstantStringTableBeingFreed = true;
1768 CFRelease(constantStringTable);
1769 __CFConstantStringTableBeingFreed = false;
1770 #else
1771 CFRelease(constantStringTable);
1772 #endif
1773 constantStringTable = NULL;
1774 }
1775 }
1776 #endif
1777
1778
1779 // Can pass in NSString as replacement string
1780 // Call with numRanges > 0, and incrementing ranges
1781
1782 static void __CFStringReplaceMultiple(CFMutableStringRef str, CFRange *ranges, CFIndex numRanges, CFStringRef replacement) {
1783 int cnt;
1784 CFStringRef copy = NULL;
1785 if (replacement == str) copy = replacement = CFStringCreateCopy(kCFAllocatorSystemDefault, replacement); // Very special and hopefully rare case
1786 CFIndex replacementLength = CFStringGetLength(replacement);
1787
1788 __CFStringChangeSizeMultiple(str, ranges, numRanges, replacementLength, (replacementLength > 0) && CFStrIsUnicode(replacement));
1789
1790 if (__CFStrIsUnicode(str)) {
1791 UniChar *contents = (UniChar *)__CFStrContents(str);
1792 UniChar *firstReplacement = contents + ranges[0].location;
1793 // Extract the replacementString into the first location, then copy from there
1794 CFStringGetCharacters(replacement, CFRangeMake(0, replacementLength), firstReplacement);
1795 for (cnt = 1; cnt < numRanges; cnt++) {
1796 // The ranges are in terms of the original string; so offset by the change in length due to insertion
1797 contents += replacementLength - ranges[cnt - 1].length;
1798 memmove(contents + ranges[cnt].location, firstReplacement, replacementLength * sizeof(UniChar));
1799 }
1800 } else {
1801 uint8_t *contents = (uint8_t *)__CFStrContents(str);
1802 uint8_t *firstReplacement = contents + ranges[0].location + __CFStrSkipAnyLengthByte(str);
1803 // Extract the replacementString into the first location, then copy from there
1804 CFStringGetBytes(replacement, CFRangeMake(0, replacementLength), __CFStringGetEightBitStringEncoding(), 0, false, firstReplacement, replacementLength, NULL);
1805 contents += __CFStrSkipAnyLengthByte(str); // Now contents will simply track the location to insert next string into
1806 for (cnt = 1; cnt < numRanges; cnt++) {
1807 // The ranges are in terms of the original string; so offset by the change in length due to insertion
1808 contents += replacementLength - ranges[cnt - 1].length;
1809 memmove(contents + ranges[cnt].location, firstReplacement, replacementLength);
1810 }
1811 }
1812 if (copy) CFRelease(copy);
1813 }
1814
1815 // Can pass in NSString as replacement string
1816
1817 CF_INLINE void __CFStringReplace(CFMutableStringRef str, CFRange range, CFStringRef replacement) {
1818 CFStringRef copy = NULL;
1819 if (replacement == str) copy = replacement = (CFStringRef)CFStringCreateCopy(kCFAllocatorSystemDefault, replacement); // Very special and hopefully rare case
1820 CFIndex replacementLength = CFStringGetLength(replacement);
1821
1822 __CFStringChangeSize(str, range, replacementLength, (replacementLength > 0) && CFStrIsUnicode(replacement));
1823
1824 if (__CFStrIsUnicode(str)) {
1825 UniChar *contents = (UniChar *)__CFStrContents(str);
1826 CFStringGetCharacters(replacement, CFRangeMake(0, replacementLength), contents + range.location);
1827 } else {
1828 uint8_t *contents = (uint8_t *)__CFStrContents(str);
1829 CFStringGetBytes(replacement, CFRangeMake(0, replacementLength), __CFStringGetEightBitStringEncoding(), 0, false, contents + range.location + __CFStrSkipAnyLengthByte(str), replacementLength, NULL);
1830 }
1831
1832 if (copy) CFRelease(copy);
1833 }
1834
1835 /* If client does not provide a minimum capacity
1836 */
1837 #define DEFAULTMINCAPACITY 32
1838
1839 CF_INLINE CFMutableStringRef __CFStringCreateMutableFunnel(CFAllocatorRef alloc, CFIndex maxLength, UInt32 additionalInfoBits) {
1840 CFMutableStringRef str;
1841 if (_CFAllocatorIsGCRefZero(alloc)) additionalInfoBits |= __kCFHasContentsAllocator;
1842 Boolean hasExternalContentsAllocator = (additionalInfoBits & __kCFHasContentsAllocator) ? true : false;
1843
1844 if (alloc == NULL) alloc = __CFGetDefaultAllocator();
1845
1846 // Note that if there is an externalContentsAllocator, then we also have the storage for the string allocator...
1847 str = (CFMutableStringRef)_CFRuntimeCreateInstance(alloc, __kCFStringTypeID, sizeof(struct __notInlineMutable) - (hasExternalContentsAllocator ? 0 : sizeof(CFAllocatorRef)), NULL);
1848 if (str) {
1849 if (__CFOASafe) __CFSetLastAllocationEventName(str, "CFString (mutable)");
1850
1851 __CFStrSetInfoBits(str, __kCFIsMutable | additionalInfoBits);
1852 str->variants.notInlineMutable.buffer = NULL;
1853 __CFStrSetExplicitLength(str, 0);
1854 str->variants.notInlineMutable.hasGap = str->variants.notInlineMutable.isFixedCapacity = str->variants.notInlineMutable.isExternalMutable = str->variants.notInlineMutable.capacityProvidedExternally = 0;
1855 if (maxLength != 0) __CFStrSetIsFixed(str);
1856 __CFStrSetDesiredCapacity(str, (maxLength == 0) ? DEFAULTMINCAPACITY : maxLength);
1857 __CFStrSetCapacity(str, 0);
1858 if (__CFStrHasContentsAllocator(str)) {
1859 // contents allocator starts out as the string's own allocator
1860 __CFStrSetContentsAllocator(str, alloc);
1861 }
1862 }
1863 return str;
1864 }
1865
1866 CFMutableStringRef CFStringCreateMutableWithExternalCharactersNoCopy(CFAllocatorRef alloc, UniChar *chars, CFIndex numChars, CFIndex capacity, CFAllocatorRef externalCharactersAllocator) {
1867 CFOptionFlags contentsAllocationBits = externalCharactersAllocator ? ((externalCharactersAllocator == kCFAllocatorNull) ? __kCFNotInlineContentsNoFree : __kCFHasContentsAllocator) : __kCFNotInlineContentsDefaultFree;
1868 CFMutableStringRef string = __CFStringCreateMutableFunnel(alloc, 0, contentsAllocationBits | __kCFIsUnicode);
1869 if (string) {
1870 __CFStrSetIsExternalMutable(string);
1871 if (__CFStrHasContentsAllocator(string)) {
1872 CFAllocatorRef allocator = __CFStrContentsAllocator((CFMutableStringRef)string);
1873 if (!(kCFAllocatorSystemDefaultGCRefZero == allocator || kCFAllocatorDefaultGCRefZero == allocator)) CFRelease(allocator);
1874 __CFStrSetContentsAllocator(string, externalCharactersAllocator);
1875 }
1876 CFStringSetExternalCharactersNoCopy(string, chars, numChars, capacity);
1877 }
1878 return string;
1879 }
1880
1881 CFMutableStringRef CFStringCreateMutable(CFAllocatorRef alloc, CFIndex maxLength) {
1882 return __CFStringCreateMutableFunnel(alloc, maxLength, __kCFNotInlineContentsDefaultFree);
1883 }
1884
1885 CFMutableStringRef CFStringCreateMutableCopy(CFAllocatorRef alloc, CFIndex maxLength, CFStringRef string) {
1886 CFMutableStringRef newString;
1887
1888 // CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, CFMutableStringRef, (NSString *)string, mutableCopy);
1889
1890 __CFAssertIsString(string);
1891
1892 newString = CFStringCreateMutable(alloc, maxLength);
1893 __CFStringReplace(newString, CFRangeMake(0, 0), string);
1894
1895 return newString;
1896 }
1897
1898
1899 __private_extern__ void _CFStrSetDesiredCapacity(CFMutableStringRef str, CFIndex len) {
1900 __CFAssertIsStringAndMutable(str);
1901 __CFStrSetDesiredCapacity(str, len);
1902 }
1903
1904
1905 /* This one is for CF
1906 */
1907 CFIndex CFStringGetLength(CFStringRef str) {
1908 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, CFIndex, (NSString *)str, length);
1909
1910 __CFAssertIsString(str);
1911 return __CFStrLength(str);
1912 }
1913
1914 /* This one is for NSCFString; it does not ObjC dispatch or assertion check
1915 */
1916 CFIndex _CFStringGetLength2(CFStringRef str) {
1917 return __CFStrLength(str);
1918 }
1919
1920
1921 /* Guts of CFStringGetCharacterAtIndex(); called from the two functions below. Don't call it from elsewhere.
1922 */
1923 CF_INLINE UniChar __CFStringGetCharacterAtIndexGuts(CFStringRef str, CFIndex idx, const uint8_t *contents) {
1924 if (__CFStrIsEightBit(str)) {
1925 contents += __CFStrSkipAnyLengthByte(str);
1926 #if defined(DEBUG)
1927 if (!__CFCharToUniCharFunc && (contents[idx] >= 128)) {
1928 // Can't do log here, as it might be too early
1929 fprintf(stderr, "Warning: CFStringGetCharacterAtIndex() attempted on CFString containing high bytes before properly initialized to do so\n");
1930 }
1931 #endif
1932 return __CFCharToUniCharTable[contents[idx]];
1933 }
1934
1935 return ((UniChar *)contents)[idx];
1936 }
1937
1938 /* This one is for the CF API
1939 */
1940 UniChar CFStringGetCharacterAtIndex(CFStringRef str, CFIndex idx) {
1941 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, UniChar, (NSString *)str, characterAtIndex:(NSUInteger)idx);
1942
1943 __CFAssertIsString(str);
1944 __CFAssertIndexIsInStringBounds(str, idx);
1945 return __CFStringGetCharacterAtIndexGuts(str, idx, (const uint8_t *)__CFStrContents(str));
1946 }
1947
1948 /* This one is for NSCFString usage; it doesn't do ObjC dispatch; but it does do range check
1949 */
1950 int _CFStringCheckAndGetCharacterAtIndex(CFStringRef str, CFIndex idx, UniChar *ch) {
1951 const uint8_t *contents = (const uint8_t *)__CFStrContents(str);
1952 if (idx >= __CFStrLength2(str, contents) && __CFStringNoteErrors()) return _CFStringErrBounds;
1953 *ch = __CFStringGetCharacterAtIndexGuts(str, idx, contents);
1954 return _CFStringErrNone;
1955 }
1956
1957
1958 /* Guts of CFStringGetCharacters(); called from the two functions below. Don't call it from elsewhere.
1959 */
1960 CF_INLINE void __CFStringGetCharactersGuts(CFStringRef str, CFRange range, UniChar *buffer, const uint8_t *contents) {
1961 if (__CFStrIsEightBit(str)) {
1962 __CFStrConvertBytesToUnicode(((uint8_t *)contents) + (range.location + __CFStrSkipAnyLengthByte(str)), buffer, range.length);
1963 } else {
1964 const UniChar *uContents = ((UniChar *)contents) + range.location;
1965 memmove(buffer, uContents, range.length * sizeof(UniChar));
1966 }
1967 }
1968
1969 /* This one is for the CF API
1970 */
1971 void CFStringGetCharacters(CFStringRef str, CFRange range, UniChar *buffer) {
1972 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSString *)str, getCharacters:(unichar *)buffer range:NSMakeRange(range.location, range.length));
1973
1974 __CFAssertIsString(str);
1975 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
1976 __CFStringGetCharactersGuts(str, range, buffer, (const uint8_t *)__CFStrContents(str));
1977 }
1978
1979 /* This one is for NSCFString usage; it doesn't do ObjC dispatch; but it does do range check
1980 */
1981 int _CFStringCheckAndGetCharacters(CFStringRef str, CFRange range, UniChar *buffer) {
1982 const uint8_t *contents = (const uint8_t *)__CFStrContents(str);
1983 if (range.location + range.length > __CFStrLength2(str, contents) && __CFStringNoteErrors()) return _CFStringErrBounds;
1984 __CFStringGetCharactersGuts(str, range, buffer, contents);
1985 return _CFStringErrNone;
1986 }
1987
1988
1989 CFIndex CFStringGetBytes(CFStringRef str, CFRange range, CFStringEncoding encoding, uint8_t lossByte, Boolean isExternalRepresentation, uint8_t *buffer, CFIndex maxBufLen, CFIndex *usedBufLen) {
1990
1991 /* No objc dispatch needed here since __CFStringEncodeByteStream works with both CFString and NSString */
1992 __CFAssertIsNotNegative(maxBufLen);
1993
1994 if (!CF_IS_OBJC(__kCFStringTypeID, str)) { // If we can grope the ivars, let's do it...
1995 __CFAssertIsString(str);
1996 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
1997
1998 if (__CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string
1999 const unsigned char *contents = (const unsigned char *)__CFStrContents(str);
2000 CFIndex cLength = range.length;
2001
2002 if (buffer) {
2003 if (cLength > maxBufLen) cLength = maxBufLen;
2004 memmove(buffer, contents + __CFStrSkipAnyLengthByte(str) + range.location, cLength);
2005 }
2006 if (usedBufLen) *usedBufLen = cLength;
2007
2008 return cLength;
2009 }
2010 }
2011
2012 return __CFStringEncodeByteStream(str, range.location, range.length, isExternalRepresentation, encoding, lossByte, buffer, maxBufLen, usedBufLen);
2013 }
2014
2015
2016 ConstStringPtr CFStringGetPascalStringPtr (CFStringRef str, CFStringEncoding encoding) {
2017
2018 if (!CF_IS_OBJC(__kCFStringTypeID, str)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
2019 __CFAssertIsString(str);
2020 if (__CFStrHasLengthByte(str) && __CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string || the contents is in ASCII
2021 const uint8_t *contents = (const uint8_t *)__CFStrContents(str);
2022 if (__CFStrHasExplicitLength(str) && (__CFStrLength2(str, contents) != (SInt32)(*contents))) return NULL; // Invalid length byte
2023 return (ConstStringPtr)contents;
2024 }
2025 // ??? Also check for encoding = SystemEncoding and perhaps bytes are all ASCII?
2026 }
2027 return NULL;
2028 }
2029
2030
2031 const char * CFStringGetCStringPtr(CFStringRef str, CFStringEncoding encoding) {
2032
2033 if (encoding != __CFStringGetEightBitStringEncoding() && (kCFStringEncodingASCII != __CFStringGetEightBitStringEncoding() || !__CFStringEncodingIsSupersetOfASCII(encoding))) return NULL;
2034 // ??? Also check for encoding = SystemEncoding and perhaps bytes are all ASCII?
2035
2036 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, const char *, (NSString *)str, _fastCStringContents:true);
2037
2038 __CFAssertIsString(str);
2039
2040 if (__CFStrHasNullByte(str)) {
2041 // Note: this is called a lot, 27000 times to open a small xcode project with one file open.
2042 // Of these uses about 1500 are for cStrings/utf8strings.
2043 #if 0
2044 // Only sometimes when the stars are aligned will this call return a gc pointer
2045 // under GC we can only really return a pointer to the start of a GC buffer for cString use
2046 // (Is there a simpler way to ask if contents isGC?)
2047 CFAllocatorRef alloc = (__CFStrHasContentsAllocator(str)) ? __CFStrContentsAllocator(str) : __CFGetAllocator(str);
2048 if (CF_IS_COLLECTABLE_ALLOCATOR(alloc)) {
2049 if (__CFStrSkipAnyLengthByte(str) != 0 || !__CFStrIsMutable(str)) {
2050 static int counter = 0;
2051 printf("CFString %dth unsafe safe string %s\n", ++counter, __CFStrContents(str) + __CFStrSkipAnyLengthByte(str));
2052 return NULL;
2053 }
2054 }
2055 #endif
2056 return (const char *)__CFStrContents(str) + __CFStrSkipAnyLengthByte(str);
2057 } else {
2058 return NULL;
2059 }
2060 }
2061
2062
2063 const UniChar *CFStringGetCharactersPtr(CFStringRef str) {
2064
2065 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, const UniChar *, (NSString *)str, _fastCharacterContents);
2066
2067 __CFAssertIsString(str);
2068 if (__CFStrIsUnicode(str)) return (const UniChar *)__CFStrContents(str);
2069 return NULL;
2070 }
2071
2072
2073 Boolean CFStringGetPascalString(CFStringRef str, Str255 buffer, CFIndex bufferSize, CFStringEncoding encoding) {
2074 CFIndex length;
2075 CFIndex usedLen;
2076
2077 __CFAssertIsNotNegative(bufferSize);
2078 if (bufferSize < 1) return false;
2079
2080 if (CF_IS_OBJC(__kCFStringTypeID, str)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
2081 length = CFStringGetLength(str);
2082 if (!__CFCanUseLengthByte(length)) return false; // Can't fit into pstring
2083 } else {
2084 const uint8_t *contents;
2085
2086 __CFAssertIsString(str);
2087
2088 contents = (const uint8_t *)__CFStrContents(str);
2089 length = __CFStrLength2(str, contents);
2090
2091 if (!__CFCanUseLengthByte(length)) return false; // Can't fit into pstring
2092
2093 if (__CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string
2094 if (length >= bufferSize) return false;
2095 memmove((void*)(1 + (const char*)buffer), (__CFStrSkipAnyLengthByte(str) + contents), length);
2096 *buffer = (unsigned char)length;
2097 return true;
2098 }
2099 }
2100
2101 if (__CFStringEncodeByteStream(str, 0, length, false, encoding, false, (UInt8 *)(1 + (uint8_t *)buffer), bufferSize - 1, &usedLen) != length) {
2102
2103 #if defined(DEBUG)
2104 if (bufferSize > 0) {
2105 strlcpy((char *)buffer + 1, CONVERSIONFAILURESTR, bufferSize - 1);
2106 buffer[0] = (unsigned char)((CFIndex)sizeof(CONVERSIONFAILURESTR) < (bufferSize - 1) ? (CFIndex)sizeof(CONVERSIONFAILURESTR) : (bufferSize - 1));
2107 }
2108 #else
2109 if (bufferSize > 0) buffer[0] = 0;
2110 #endif
2111 return false;
2112 }
2113 *buffer = (unsigned char)usedLen;
2114 return true;
2115 }
2116
2117 Boolean CFStringGetCString(CFStringRef str, char *buffer, CFIndex bufferSize, CFStringEncoding encoding) {
2118 const uint8_t *contents;
2119 CFIndex len;
2120
2121 __CFAssertIsNotNegative(bufferSize);
2122 if (bufferSize < 1) return false;
2123
2124 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, Boolean, (NSString *)str, _getCString:buffer maxLength:(NSUInteger)bufferSize - 1 encoding:encoding);
2125
2126 __CFAssertIsString(str);
2127
2128 contents = (const uint8_t *)__CFStrContents(str);
2129 len = __CFStrLength2(str, contents);
2130
2131 if (__CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string
2132 if (len >= bufferSize) return false;
2133 memmove(buffer, contents + __CFStrSkipAnyLengthByte(str), len);
2134 buffer[len] = 0;
2135 return true;
2136 } else {
2137 CFIndex usedLen;
2138
2139 if (__CFStringEncodeByteStream(str, 0, len, false, encoding, false, (unsigned char*) buffer, bufferSize - 1, &usedLen) == len) {
2140 buffer[usedLen] = '\0';
2141 return true;
2142 } else {
2143 #if defined(DEBUG)
2144 strlcpy(buffer, CONVERSIONFAILURESTR, bufferSize);
2145 #else
2146 if (bufferSize > 0) buffer[0] = 0;
2147 #endif
2148 return false;
2149 }
2150 }
2151 }
2152
2153 extern Boolean __CFLocaleGetNullLocale(struct __CFLocale *locale);
2154 extern void __CFLocaleSetNullLocale(struct __CFLocale *locale);
2155
2156 static const char *_CFStrGetLanguageIdentifierForLocale(CFLocaleRef locale) {
2157 CFStringRef collatorID;
2158 const char *langID = NULL;
2159 static const void *lastLocale = NULL;
2160 static const char *lastLangID = NULL;
2161 static CFSpinLock_t lock = CFSpinLockInit;
2162
2163 if (__CFLocaleGetNullLocale((struct __CFLocale *)locale)) return NULL;
2164
2165 __CFSpinLock(&lock);
2166 if ((NULL != lastLocale) && (lastLocale == locale)) {
2167 __CFSpinUnlock(&lock);
2168 return lastLangID;
2169 }
2170 __CFSpinUnlock(&lock);
2171
2172 collatorID = (CFStringRef)CFLocaleGetValue(locale, __kCFLocaleCollatorID);
2173
2174 // This is somewhat depending on CFLocale implementation always creating CFString for locale identifer ???
2175 if (__CFStrLength(collatorID) > 1) {
2176 const void *contents = __CFStrContents(collatorID);
2177 const char *string;
2178 char buffer[2];
2179
2180 if (__CFStrIsEightBit(collatorID)) {
2181 string = ((const char *)contents) + __CFStrSkipAnyLengthByte(collatorID);
2182 } else {
2183 const UTF16Char *characters = (const UTF16Char *)contents;
2184
2185 buffer[0] = (char)*(characters++);
2186 buffer[1] = (char)*characters;
2187 string = buffer;
2188 }
2189
2190 if (!strncmp(string, "az", 2)) { // Azerbaijani
2191 langID = "az";
2192 } else if (!strncmp(string, "lt", 2)) { // Lithuanian
2193 langID = "lt";
2194 } else if (!strncmp(string, "tr", 2)) { // Turkish
2195 langID = "tr";
2196 } else if (!strncmp(string, "nl", 2)) { // Dutch
2197 langID = "nl";
2198 } else if (!strncmp(string, "el", 2)) { // Greek
2199 langID = "el";
2200 }
2201 }
2202
2203
2204 if (langID == NULL) __CFLocaleSetNullLocale((struct __CFLocale *)locale);
2205
2206 __CFSpinLock(&lock);
2207 lastLocale = locale;
2208 lastLangID = langID;
2209 __CFSpinUnlock(&lock);
2210
2211 return langID;
2212 }
2213
2214 CF_INLINE bool _CFCanUseLocale(CFLocaleRef locale) {
2215 if (locale) {
2216 return true;
2217 }
2218 return false;
2219 }
2220
2221 #define MAX_CASE_MAPPING_BUF (8)
2222 #define ZERO_WIDTH_JOINER (0x200D)
2223 #define COMBINING_GRAPHEME_JOINER (0x034F)
2224 // Hangul ranges
2225 #define HANGUL_CHOSEONG_START (0x1100)
2226 #define HANGUL_CHOSEONG_END (0x115F)
2227 #define HANGUL_JUNGSEONG_START (0x1160)
2228 #define HANGUL_JUNGSEONG_END (0x11A2)
2229 #define HANGUL_JONGSEONG_START (0x11A8)
2230 #define HANGUL_JONGSEONG_END (0x11F9)
2231
2232 #define HANGUL_SYLLABLE_START (0xAC00)
2233 #define HANGUL_SYLLABLE_END (0xD7AF)
2234
2235
2236 // Returns the length of characters filled into outCharacters. If no change, returns 0. maxBufLen shoule be at least 8
2237 static CFIndex __CFStringFoldCharacterClusterAtIndex(UTF32Char character, CFStringInlineBuffer *buffer, CFIndex index, CFOptionFlags flags, const uint8_t *langCode, UTF32Char *outCharacters, CFIndex maxBufferLength, CFIndex *consumedLength) {
2238 CFIndex filledLength = 0, currentIndex = index;
2239
2240 if (0 != character) {
2241 UTF16Char lowSurrogate;
2242 CFIndex planeNo = (character >> 16);
2243 bool isTurkikCapitalI = false;
2244 static const uint8_t *decompBMP = NULL;
2245 static const uint8_t *graphemeBMP = NULL;
2246
2247 if (NULL == decompBMP) {
2248 decompBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, 0);
2249 graphemeBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, 0);
2250 }
2251
2252 ++currentIndex;
2253
2254 if ((character < 0x0080) && ((NULL == langCode) || (character != 'I'))) { // ASCII
2255 if ((flags & kCFCompareCaseInsensitive) && (character >= 'A') && (character <= 'Z')) {
2256 character += ('a' - 'A');
2257 *outCharacters = character;
2258 filledLength = 1;
2259 }
2260 } else {
2261 // do width-insensitive mapping
2262 if ((flags & kCFCompareWidthInsensitive) && (character >= 0xFF00) && (character <= 0xFFEF)) {
2263 (void)CFUniCharCompatibilityDecompose(&character, 1, 1);
2264 *outCharacters = character;
2265 filledLength = 1;
2266 }
2267
2268 // map surrogates
2269 if ((0 == planeNo) && CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((lowSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex)))) {
2270 character = CFUniCharGetLongCharacterForSurrogatePair(character, lowSurrogate);
2271 ++currentIndex;
2272 planeNo = (character >> 16);
2273 }
2274
2275 // decompose
2276 if (flags & (kCFCompareDiacriticInsensitive|kCFCompareNonliteral)) {
2277 if (CFUniCharIsMemberOfBitmap(character, ((0 == planeNo) ? decompBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, planeNo)))) {
2278 UTF32Char original = character;
2279
2280 filledLength = CFUniCharDecomposeCharacter(character, outCharacters, maxBufferLength);
2281 character = *outCharacters;
2282
2283 if ((flags & kCFCompareDiacriticInsensitive) && (character < 0x0510)) {
2284 filledLength = 1; // reset if Roman, Greek, Cyrillic
2285 } else if (0 == (flags & kCFCompareNonliteral)) {
2286 character = original;
2287 filledLength = 0;
2288 }
2289 }
2290 }
2291
2292 // fold case
2293 if (flags & kCFCompareCaseInsensitive) {
2294 const uint8_t *nonBaseBitmap;
2295 bool filterNonBase = (((flags & kCFCompareDiacriticInsensitive) && (character < 0x0510)) ? true : false);
2296 static const uint8_t *lowerBMP = NULL;
2297 static const uint8_t *caseFoldBMP = NULL;
2298
2299 if (NULL == lowerBMP) {
2300 lowerBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfLowercaseCharacterSet, 0);
2301 caseFoldBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfCaseFoldingCharacterSet, 0);
2302 }
2303
2304 if ((NULL != langCode) && ('I' == character) && ((0 == strcmp((const char *)langCode, "tr")) || (0 == strcmp((const char *)langCode, "az")))) { // do Turkik special-casing
2305 if (filledLength > 1) {
2306 if (0x0307 == outCharacters[1]) {
2307 if (--filledLength > 1) memmove((outCharacters + 1), (outCharacters + 2), sizeof(UTF32Char) * (filledLength - 1));
2308 character = *outCharacters = 'i';
2309 isTurkikCapitalI = true;
2310 }
2311 } else if (0x0307 == CFStringGetCharacterFromInlineBuffer(buffer, currentIndex)) {
2312 character = *outCharacters = 'i';
2313 filledLength = 1;
2314 ++currentIndex;
2315 isTurkikCapitalI = true;
2316 }
2317 }
2318 if (!isTurkikCapitalI && (CFUniCharIsMemberOfBitmap(character, ((0 == planeNo) ? lowerBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfLowercaseCharacterSet, planeNo))) || CFUniCharIsMemberOfBitmap(character, ((0 == planeNo) ? caseFoldBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfCaseFoldingCharacterSet, planeNo))))) {
2319 UTF16Char caseFoldBuffer[MAX_CASE_MAPPING_BUF];
2320 const UTF16Char *bufferP = caseFoldBuffer, *bufferLimit;
2321 UTF32Char *outCharactersP = outCharacters;
2322 uint32_t bufferLength = CFUniCharMapCaseTo(character, caseFoldBuffer, MAX_CASE_MAPPING_BUF, kCFUniCharCaseFold, 0, langCode);
2323
2324 bufferLimit = bufferP + bufferLength;
2325
2326 if (filledLength > 0) --filledLength; // decrement filledLength (will add back later)
2327
2328 // make space for casefold characters
2329 if ((filledLength > 0) && (bufferLength > 1)) {
2330 CFIndex totalScalerLength = 0;
2331
2332 while (bufferP < bufferLimit) {
2333 if (CFUniCharIsSurrogateHighCharacter(*(bufferP++)) && (bufferP < bufferLimit) && CFUniCharIsSurrogateLowCharacter(*bufferP)) ++bufferP;
2334 ++totalScalerLength;
2335 }
2336 memmove(outCharacters + totalScalerLength, outCharacters + 1, filledLength * sizeof(UTF32Char));
2337 bufferP = caseFoldBuffer;
2338 }
2339
2340 // fill
2341 while (bufferP < bufferLimit) {
2342 character = *(bufferP++);
2343 if (CFUniCharIsSurrogateHighCharacter(character) && (bufferP < bufferLimit) && CFUniCharIsSurrogateLowCharacter(*bufferP)) {
2344 character = CFUniCharGetLongCharacterForSurrogatePair(character, *(bufferP++));
2345 nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (character >> 16));
2346 } else {
2347 nonBaseBitmap = graphemeBMP;
2348 }
2349
2350 if (!filterNonBase || !CFUniCharIsMemberOfBitmap(character, nonBaseBitmap)) {
2351 *(outCharactersP++) = character;
2352 ++filledLength;
2353 }
2354 }
2355 }
2356 }
2357 }
2358
2359 // collect following combining marks
2360 if (flags & (kCFCompareDiacriticInsensitive|kCFCompareNonliteral)) {
2361 const uint8_t *nonBaseBitmap;
2362 const uint8_t *decompBitmap;
2363 bool doFill = (((flags & kCFCompareDiacriticInsensitive) && (character < 0x0510)) ? false : true);
2364
2365 if (0 == filledLength) {
2366 *outCharacters = character; // filledLength will be updated below on demand
2367
2368 if (doFill) { // check if really needs to fill
2369 UTF32Char nonBaseCharacter = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex);
2370
2371 if (CFUniCharIsSurrogateHighCharacter(nonBaseCharacter) && CFUniCharIsSurrogateLowCharacter((lowSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex + 1)))) {
2372 nonBaseCharacter = CFUniCharGetLongCharacterForSurrogatePair(nonBaseCharacter, lowSurrogate);
2373 nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (nonBaseCharacter >> 16));
2374 decompBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (nonBaseCharacter >> 16));
2375 } else {
2376 nonBaseBitmap = graphemeBMP;
2377 decompBitmap = decompBMP;
2378 }
2379
2380 if (CFUniCharIsMemberOfBitmap(nonBaseCharacter, nonBaseBitmap)) {
2381 filledLength = 1; // For the base character
2382
2383 if ((0 == (flags & kCFCompareDiacriticInsensitive)) || (nonBaseCharacter > 0x050F)) {
2384 if (CFUniCharIsMemberOfBitmap(nonBaseCharacter, decompBitmap)) {
2385 filledLength += CFUniCharDecomposeCharacter(nonBaseCharacter, &(outCharacters[filledLength]), maxBufferLength - filledLength);
2386 } else {
2387 outCharacters[filledLength++] = nonBaseCharacter;
2388 }
2389 }
2390 currentIndex += ((nonBaseBitmap == graphemeBMP) ? 1 : 2);
2391 } else {
2392 doFill = false;
2393 }
2394 }
2395 }
2396
2397 while (filledLength < maxBufferLength) { // do the rest
2398 character = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex);
2399
2400 if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((lowSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex + 1)))) {
2401 character = CFUniCharGetLongCharacterForSurrogatePair(character, lowSurrogate);
2402 nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (character >> 16));
2403 decompBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (character >> 16));
2404 } else {
2405 nonBaseBitmap = graphemeBMP;
2406 decompBitmap = decompBMP;
2407 }
2408 if (isTurkikCapitalI) {
2409 isTurkikCapitalI = false;
2410 } else if (CFUniCharIsMemberOfBitmap(character, nonBaseBitmap)) {
2411 if (doFill) {
2412 if (CFUniCharIsMemberOfBitmap(character, decompBitmap)) {
2413 CFIndex currentLength = CFUniCharDecomposeCharacter(character, &(outCharacters[filledLength]), maxBufferLength - filledLength);
2414
2415 if (0 == currentLength) break; // didn't fit
2416
2417 filledLength += currentLength;
2418 } else {
2419 outCharacters[filledLength++] = character;
2420 }
2421 } else if (0 == filledLength) {
2422 filledLength = 1; // For the base character
2423 }
2424 currentIndex += ((nonBaseBitmap == graphemeBMP) ? 1 : 2);
2425 } else {
2426 break;
2427 }
2428 }
2429
2430 if (filledLength > 1) {
2431 UTF32Char *sortCharactersLimit = outCharacters + filledLength;
2432 UTF32Char *sortCharacters = sortCharactersLimit - 1;
2433
2434 while ((outCharacters < sortCharacters) && CFUniCharIsMemberOfBitmap(*sortCharacters, ((*sortCharacters < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (*sortCharacters >> 16))))) --sortCharacters;
2435
2436 if ((sortCharactersLimit - sortCharacters) > 1) CFUniCharPrioritySort(sortCharacters, (sortCharactersLimit - sortCharacters)); // priority sort
2437 }
2438 }
2439 }
2440
2441 if ((filledLength > 0) && (NULL != consumedLength)) *consumedLength = (currentIndex - index);
2442
2443 return filledLength;
2444 }
2445
2446 static bool __CFStringFillCharacterSetInlineBuffer(CFCharacterSetInlineBuffer *buffer, CFStringCompareFlags compareOptions) {
2447 if (0 != (compareOptions & kCFCompareIgnoreNonAlphanumeric)) {
2448 static CFCharacterSetRef nonAlnumChars = NULL;
2449
2450 if (NULL == nonAlnumChars) {
2451 CFMutableCharacterSetRef cset = CFCharacterSetCreateMutableCopy(kCFAllocatorSystemDefault, CFCharacterSetGetPredefined(kCFCharacterSetAlphaNumeric));
2452 CFCharacterSetInvert(cset);
2453 if (!OSAtomicCompareAndSwapPtrBarrier(NULL, cset, (void **)&nonAlnumChars)) CFRelease(cset);
2454 }
2455
2456 CFCharacterSetInitInlineBuffer(nonAlnumChars, buffer);
2457
2458 return true;
2459 }
2460
2461 return false;
2462 }
2463
2464 #define kCFStringStackBufferLength (__kCFStringInlineBufferLength)
2465
2466 CFComparisonResult CFStringCompareWithOptionsAndLocale(CFStringRef string, CFStringRef string2, CFRange rangeToCompare, CFStringCompareFlags compareOptions, CFLocaleRef locale) {
2467 /* No objc dispatch needed here since CFStringInlineBuffer works with both CFString and NSString */
2468 UTF32Char strBuf1[kCFStringStackBufferLength];
2469 UTF32Char strBuf2[kCFStringStackBufferLength];
2470 CFStringInlineBuffer inlineBuf1, inlineBuf2;
2471 UTF32Char str1Char, str2Char;
2472 CFIndex str1UsedLen, str2UsedLen;
2473 CFIndex str1Index = 0, str2Index = 0, strBuf1Index = 0, strBuf2Index = 0, strBuf1Len = 0, strBuf2Len = 0;
2474 CFIndex str1LocalizedIndex = 0, str2LocalizedIndex = 0;
2475 CFIndex forcedIndex1 = 0, forcedIndex2 = 0;
2476 CFIndex str2Len = CFStringGetLength(string2);
2477 bool caseInsensitive = ((compareOptions & kCFCompareCaseInsensitive) ? true : false);
2478 bool diacriticsInsensitive = ((compareOptions & kCFCompareDiacriticInsensitive) ? true : false);
2479 bool equalityOptions = ((compareOptions & (kCFCompareCaseInsensitive|kCFCompareNonliteral|kCFCompareDiacriticInsensitive|kCFCompareWidthInsensitive)) ? true : false);
2480 bool numerically = ((compareOptions & kCFCompareNumerically) ? true : false);
2481 bool forceOrdering = ((compareOptions & kCFCompareForcedOrdering) ? true : false);
2482 const uint8_t *graphemeBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, 0);
2483 const uint8_t *langCode;
2484 CFComparisonResult compareResult = kCFCompareEqualTo;
2485 UTF16Char otherChar;
2486 Boolean freeLocale = false;
2487 CFCharacterSetInlineBuffer *ignoredChars = NULL;
2488 CFCharacterSetInlineBuffer csetBuffer;
2489 bool numericEquivalence = false;
2490
2491 if ((compareOptions & kCFCompareLocalized) && (NULL == locale)) {
2492 locale = CFLocaleCopyCurrent();
2493 freeLocale = true;
2494 }
2495
2496 langCode = ((NULL == locale) ? NULL : (const uint8_t *)_CFStrGetLanguageIdentifierForLocale(locale));
2497
2498 if (__CFStringFillCharacterSetInlineBuffer(&csetBuffer, compareOptions)) {
2499 ignoredChars = &csetBuffer;
2500 equalityOptions = true;
2501 }
2502
2503 if ((NULL == locale) && (NULL == ignoredChars) && !numerically) { // could do binary comp (be careful when adding new flags)
2504 CFStringEncoding eightBitEncoding = __CFStringGetEightBitStringEncoding();
2505 const uint8_t *str1Bytes = (const uint8_t *)CFStringGetCStringPtr(string, eightBitEncoding);
2506 const uint8_t *str2Bytes = (const uint8_t *)CFStringGetCStringPtr(string2, eightBitEncoding);
2507 CFIndex factor = sizeof(uint8_t);
2508
2509 if ((NULL != str1Bytes) && (NULL != str2Bytes)) {
2510 compareOptions &= ~kCFCompareNonliteral; // remove non-literal
2511
2512 if ((kCFStringEncodingASCII == eightBitEncoding) && (false == forceOrdering)) {
2513 if (caseInsensitive) {
2514 int cmpResult = strncasecmp_l((const char *)str1Bytes + rangeToCompare.location, (const char *)str2Bytes, __CFMin(rangeToCompare.length, str2Len), NULL);
2515
2516 if (0 == cmpResult) cmpResult = rangeToCompare.length - str2Len;
2517
2518 return ((0 == cmpResult) ? kCFCompareEqualTo : ((cmpResult < 0) ? kCFCompareLessThan : kCFCompareGreaterThan));
2519 }
2520 } else if (caseInsensitive || diacriticsInsensitive) {
2521 CFIndex limitLength = __CFMin(rangeToCompare.length, str2Len);
2522
2523 str1Bytes += rangeToCompare.location;
2524
2525 while (str1Index < limitLength) {
2526 str1Char = str1Bytes[str1Index];
2527 str2Char = str2Bytes[str1Index];
2528
2529 if (str1Char != str2Char) {
2530 if ((str1Char < 0x80) && (str2Char < 0x80)) {
2531 if (forceOrdering && (kCFCompareEqualTo == compareResult) && (str1Char != str2Char)) compareResult = ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan);
2532 if (caseInsensitive) {
2533 if ((str1Char >= 'A') && (str1Char <= 'Z')) str1Char += ('a' - 'A');
2534 if ((str2Char >= 'A') && (str2Char <= 'Z')) str2Char += ('a' - 'A');
2535 }
2536
2537 if (str1Char != str2Char) return ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan);
2538 } else {
2539 str1Bytes = NULL;
2540 break;
2541 }
2542 }
2543 ++str1Index;
2544 }
2545
2546 str2Index = str1Index;
2547
2548 if (str1Index == limitLength) {
2549 int cmpResult = rangeToCompare.length - str2Len;
2550
2551 return ((0 == cmpResult) ? compareResult : ((cmpResult < 0) ? kCFCompareLessThan : kCFCompareGreaterThan));
2552 }
2553 }
2554 } else if (!equalityOptions && (NULL == str1Bytes) && (NULL == str2Bytes)) {
2555 str1Bytes = (const uint8_t *)CFStringGetCharactersPtr(string);
2556 str2Bytes = (const uint8_t *)CFStringGetCharactersPtr(string2);
2557 factor = sizeof(UTF16Char);
2558 #if __LITTLE_ENDIAN__
2559 if ((NULL != str1Bytes) && (NULL != str2Bytes)) { // we cannot use memcmp
2560 const UTF16Char *str1 = ((const UTF16Char *)str1Bytes) + rangeToCompare.location;
2561 const UTF16Char *str1Limit = str1 + __CFMin(rangeToCompare.length, str2Len);
2562 const UTF16Char *str2 = (const UTF16Char *)str2Bytes;
2563 CFIndex cmpResult = 0;
2564
2565 while ((0 == cmpResult) && (str1 < str1Limit)) cmpResult = (CFIndex)*(str1++) - (CFIndex)*(str2++);
2566
2567 if (0 == cmpResult) cmpResult = rangeToCompare.length - str2Len;
2568
2569 return ((0 == cmpResult) ? kCFCompareEqualTo : ((cmpResult < 0) ? kCFCompareLessThan : kCFCompareGreaterThan));
2570 }
2571 #endif /* __LITTLE_ENDIAN__ */
2572 }
2573 if ((NULL != str1Bytes) && (NULL != str2Bytes)) {
2574 int cmpResult = memcmp(str1Bytes + (rangeToCompare.location * factor), str2Bytes, __CFMin(rangeToCompare.length, str2Len) * factor);
2575
2576 if (0 == cmpResult) cmpResult = rangeToCompare.length - str2Len;
2577
2578 return ((0 == cmpResult) ? kCFCompareEqualTo : ((cmpResult < 0) ? kCFCompareLessThan : kCFCompareGreaterThan));
2579 }
2580 }
2581
2582 CFStringInitInlineBuffer(string, &inlineBuf1, rangeToCompare);
2583 CFStringInitInlineBuffer(string2, &inlineBuf2, CFRangeMake(0, str2Len));
2584
2585 if (NULL != locale) {
2586 str1LocalizedIndex = str1Index;
2587 str2LocalizedIndex = str2Index;
2588
2589 // We temporarily disable kCFCompareDiacriticInsensitive for SL <rdar://problem/6767096>. Should be revisited in NMOS <rdar://problem/7003830>
2590 if (forceOrdering) {
2591 diacriticsInsensitive = false;
2592 compareOptions &= ~kCFCompareDiacriticInsensitive;
2593 }
2594 }
2595 while ((str1Index < rangeToCompare.length) && (str2Index < str2Len)) {
2596 if (strBuf1Len == 0) {
2597 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
2598 if (caseInsensitive && (str1Char >= 'A') && (str1Char <= 'Z') && ((NULL == langCode) || (str1Char != 'I')) && ((false == forceOrdering) || (kCFCompareEqualTo != compareResult))) str1Char += ('a' - 'A');
2599 str1UsedLen = 1;
2600 } else {
2601 str1Char = strBuf1[strBuf1Index++];
2602 }
2603 if (strBuf2Len == 0) {
2604 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
2605 if (caseInsensitive && (str2Char >= 'A') && (str2Char <= 'Z') && ((NULL == langCode) || (str2Char != 'I')) && ((false == forceOrdering) || (kCFCompareEqualTo != compareResult))) str2Char += ('a' - 'A');
2606 str2UsedLen = 1;
2607 } else {
2608 str2Char = strBuf2[strBuf2Index++];
2609 }
2610
2611 if (numerically && ((0 == strBuf1Len) && (str1Char <= '9') && (str1Char >= '0')) && ((0 == strBuf2Len) && (str2Char <= '9') && (str2Char >= '0'))) { // If both are not ASCII digits, then don't do numerical comparison here
2612 uint64_t intValue1 = 0, intValue2 = 0; // !!! Doesn't work if numbers are > max uint64_t
2613 CFIndex str1NumRangeIndex = str1Index;
2614 CFIndex str2NumRangeIndex = str2Index;
2615
2616 do {
2617 intValue1 = (intValue1 * 10) + (str1Char - '0');
2618 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, ++str1Index);
2619 } while ((str1Char <= '9') && (str1Char >= '0'));
2620
2621 do {
2622 intValue2 = intValue2 * 10 + (str2Char - '0');
2623 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, ++str2Index);
2624 } while ((str2Char <= '9') && (str2Char >= '0'));
2625
2626 if (intValue1 == intValue2) {
2627 if (forceOrdering && (kCFCompareEqualTo == compareResult) && ((str1Index - str1NumRangeIndex) != (str2Index - str2NumRangeIndex))) {
2628 compareResult = (((str1Index - str1NumRangeIndex) < (str2Index - str2NumRangeIndex)) ? kCFCompareLessThan : kCFCompareGreaterThan);
2629 numericEquivalence = true;
2630 forcedIndex1 = str1NumRangeIndex;
2631 forcedIndex2 = str2NumRangeIndex;
2632 }
2633
2634 continue;
2635 } else if (intValue1 < intValue2) {
2636 if (freeLocale && locale) {
2637 CFRelease(locale);
2638 }
2639 return kCFCompareLessThan;
2640 } else {
2641 if (freeLocale && locale) {
2642 CFRelease(locale);
2643 }
2644 return kCFCompareGreaterThan;
2645 }
2646 }
2647
2648 if (str1Char != str2Char) {
2649 if (!equalityOptions) {
2650 compareResult = ((NULL == locale) ? ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1Index, rangeToCompare.length - str1Index), &inlineBuf2, CFRangeMake(str2Index, str2Len - str2Index), compareOptions, locale));
2651 if (freeLocale && locale) {
2652 CFRelease(locale);
2653 }
2654 return compareResult;
2655 }
2656
2657 if (forceOrdering && (kCFCompareEqualTo == compareResult)) {
2658 compareResult = ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan);
2659 forcedIndex1 = str1LocalizedIndex;
2660 forcedIndex2 = str2LocalizedIndex;
2661 }
2662
2663 if ((str1Char < 0x80) && (str2Char < 0x80) && (NULL == ignoredChars)) {
2664 if (NULL != locale) {
2665 compareResult = _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1Index, rangeToCompare.length - str1Index), &inlineBuf2, CFRangeMake(str2Index, str2Len - str2Index), compareOptions, locale);
2666 if (freeLocale && locale) {
2667 CFRelease(locale);
2668 }
2669 return compareResult;
2670 } else if (!caseInsensitive) {
2671 if (freeLocale && locale) {
2672 CFRelease(locale);
2673 }
2674 return ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan);
2675 }
2676 }
2677
2678 if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) {
2679 str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);
2680 str1UsedLen = 2;
2681 }
2682
2683 if (CFUniCharIsSurrogateHighCharacter(str2Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index + 1)))) {
2684 str2Char = CFUniCharGetLongCharacterForSurrogatePair(str2Char, otherChar);
2685 str2UsedLen = 2;
2686 }
2687
2688 if (NULL != ignoredChars) {
2689 if (CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char)) {
2690 if ((strBuf1Len > 0) && (strBuf1Index == strBuf1Len)) strBuf1Len = 0;
2691 if (strBuf1Len == 0) str1Index += str1UsedLen;
2692 if (strBuf2Len > 0) --strBuf2Index;
2693 continue;
2694 }
2695 if (CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char)) {
2696 if ((strBuf2Len > 0) && (strBuf2Index == strBuf2Len)) strBuf2Len = 0;
2697 if (strBuf2Len == 0) str2Index += str2UsedLen;
2698 if (strBuf1Len > 0) -- strBuf1Index;
2699 continue;
2700 }
2701 }
2702
2703 if (diacriticsInsensitive && (str1Index > 0)) {
2704 bool str1Skip = false;
2705 bool str2Skip = false;
2706
2707 if ((0 == strBuf1Len) && CFUniCharIsMemberOfBitmap(str1Char, ((str1Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str1Char >> 16))))) {
2708 str1Char = str2Char;
2709 str1Skip = true;
2710 }
2711 if ((0 == strBuf2Len) && CFUniCharIsMemberOfBitmap(str2Char, ((str2Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str2Char >> 16))))) {
2712 str2Char = str1Char;
2713 str2Skip = true;
2714 }
2715
2716 if (str1Skip != str2Skip) {
2717 if (str1Skip) str2Index -= str2UsedLen;
2718 if (str2Skip) str1Index -= str1UsedLen;
2719 }
2720 }
2721
2722 if (str1Char != str2Char) {
2723 if (0 == strBuf1Len) {
2724 strBuf1Len = __CFStringFoldCharacterClusterAtIndex(str1Char, &inlineBuf1, str1Index, compareOptions, langCode, strBuf1, kCFStringStackBufferLength, &str1UsedLen);
2725 if (strBuf1Len > 0) {
2726 str1Char = *strBuf1;
2727 strBuf1Index = 1;
2728 }
2729 }
2730
2731 if ((0 == strBuf1Len) && (0 < strBuf2Len)) {
2732 compareResult = ((NULL == locale) ? ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1LocalizedIndex, rangeToCompare.length - str1LocalizedIndex), &inlineBuf2, CFRangeMake(str2LocalizedIndex, str2Len - str2LocalizedIndex), compareOptions, locale));
2733 if (freeLocale && locale) {
2734 CFRelease(locale);
2735 }
2736 return compareResult;
2737 }
2738
2739 if ((0 == strBuf2Len) && ((0 == strBuf1Len) || (str1Char != str2Char))) {
2740 strBuf2Len = __CFStringFoldCharacterClusterAtIndex(str2Char, &inlineBuf2, str2Index, compareOptions, langCode, strBuf2, kCFStringStackBufferLength, &str2UsedLen);
2741 if (strBuf2Len > 0) {
2742 str2Char = *strBuf2;
2743 strBuf2Index = 1;
2744 }
2745 if ((0 == strBuf2Len) || (str1Char != str2Char)) {
2746 compareResult = ((NULL == locale) ? ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1LocalizedIndex, rangeToCompare.length - str1LocalizedIndex), &inlineBuf2, CFRangeMake(str2LocalizedIndex, str2Len - str2LocalizedIndex), compareOptions, locale));
2747 if (freeLocale && locale) {
2748 CFRelease(locale);
2749 }
2750 return compareResult;
2751 }
2752 }
2753 }
2754
2755 if ((strBuf1Len > 0) && (strBuf2Len > 0)) {
2756 while ((strBuf1Index < strBuf1Len) && (strBuf2Index < strBuf2Len)) {
2757 if (strBuf1[strBuf1Index] != strBuf2[strBuf2Index]) break;
2758 ++strBuf1Index; ++strBuf2Index;
2759 }
2760 if ((strBuf1Index < strBuf1Len) && (strBuf2Index < strBuf2Len)) {
2761 CFComparisonResult res = ((NULL == locale) ? ((strBuf1[strBuf1Index] < strBuf2[strBuf2Index]) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1LocalizedIndex, rangeToCompare.length - str1LocalizedIndex), &inlineBuf2, CFRangeMake(str2LocalizedIndex, str2Len - str2LocalizedIndex), compareOptions, locale));
2762 if (freeLocale && locale) {
2763 CFRelease(locale);
2764 }
2765 return res;
2766 }
2767 }
2768 }
2769
2770 if ((strBuf1Len > 0) && (strBuf1Index == strBuf1Len)) strBuf1Len = 0;
2771 if ((strBuf2Len > 0) && (strBuf2Index == strBuf2Len)) strBuf2Len = 0;
2772
2773 if (strBuf1Len == 0) str1Index += str1UsedLen;
2774 if (strBuf2Len == 0) str2Index += str2UsedLen;
2775 if ((strBuf1Len == 0) && (strBuf2Len == 0)) {
2776 str1LocalizedIndex = str1Index;
2777 str2LocalizedIndex = str2Index;
2778 }
2779 }
2780
2781 if (diacriticsInsensitive || (NULL != ignoredChars)) {
2782 while (str1Index < rangeToCompare.length) {
2783 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
2784 if ((str1Char < 0x80) && (NULL == ignoredChars)) break; // found ASCII
2785
2786 if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);
2787
2788 if ((!diacriticsInsensitive || !CFUniCharIsMemberOfBitmap(str1Char, ((str1Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str1Char >> 16))))) && ((NULL == ignoredChars) || !CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char))) break;
2789
2790 str1Index += ((str1Char < 0x10000) ? 1 : 2);
2791 }
2792
2793 while (str2Index < str2Len) {
2794 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
2795 if ((str2Char < 0x80) && (NULL == ignoredChars)) break; // found ASCII
2796
2797 if (CFUniCharIsSurrogateHighCharacter(str2Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index + 1)))) str2Char = CFUniCharGetLongCharacterForSurrogatePair(str2Char, otherChar);
2798
2799 if ((!diacriticsInsensitive || !CFUniCharIsMemberOfBitmap(str2Char, ((str2Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str2Char >> 16))))) && ((NULL == ignoredChars) || !CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char))) break;
2800
2801 str2Index += ((str2Char < 0x10000) ? 1 : 2);
2802 }
2803 }
2804 // Need to recalc localized result here for forced ordering, ICU cannot do numericEquivalence
2805 if (!numericEquivalence && (NULL != locale) && (kCFCompareEqualTo != compareResult) && (str1Index == rangeToCompare.length) && (str2Index == str2Len)) compareResult = _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(forcedIndex1, rangeToCompare.length - forcedIndex1), &inlineBuf2, CFRangeMake(forcedIndex2, str2Len - forcedIndex2), compareOptions, locale);
2806
2807 if (freeLocale && locale) {
2808 CFRelease(locale);
2809 }
2810
2811 return ((str1Index < rangeToCompare.length) ? kCFCompareGreaterThan : ((str2Index < str2Len) ? kCFCompareLessThan : compareResult));
2812 }
2813
2814
2815 CFComparisonResult CFStringCompareWithOptions(CFStringRef string, CFStringRef string2, CFRange rangeToCompare, CFStringCompareFlags compareOptions) { return CFStringCompareWithOptionsAndLocale(string, string2, rangeToCompare, compareOptions, NULL); }
2816
2817 CFComparisonResult CFStringCompare(CFStringRef string, CFStringRef str2, CFStringCompareFlags options) {
2818 return CFStringCompareWithOptions(string, str2, CFRangeMake(0, CFStringGetLength(string)), options);
2819 }
2820
2821 Boolean CFStringFindWithOptionsAndLocale(CFStringRef string, CFStringRef stringToFind, CFRange rangeToSearch, CFStringCompareFlags compareOptions, CFLocaleRef locale, CFRange *result) {
2822 /* No objc dispatch needed here since CFStringInlineBuffer works with both CFString and NSString */
2823 CFIndex findStrLen = CFStringGetLength(stringToFind);
2824 Boolean didFind = false;
2825 bool lengthVariants = ((compareOptions & (kCFCompareCaseInsensitive|kCFCompareNonliteral|kCFCompareDiacriticInsensitive)) ? true : false);
2826 CFCharacterSetInlineBuffer *ignoredChars = NULL;
2827 CFCharacterSetInlineBuffer csetBuffer;
2828
2829 if (__CFStringFillCharacterSetInlineBuffer(&csetBuffer, compareOptions)) {
2830 ignoredChars = &csetBuffer;
2831 lengthVariants = true;
2832 }
2833
2834 if ((findStrLen > 0) && (rangeToSearch.length > 0) && ((findStrLen <= rangeToSearch.length) || lengthVariants)) {
2835 UTF32Char strBuf1[kCFStringStackBufferLength];
2836 UTF32Char strBuf2[kCFStringStackBufferLength];
2837 CFStringInlineBuffer inlineBuf1, inlineBuf2;
2838 UTF32Char str1Char = 0, str2Char = 0;
2839 CFStringEncoding eightBitEncoding = __CFStringGetEightBitStringEncoding();
2840 const uint8_t *str1Bytes = (const uint8_t *)CFStringGetCStringPtr(string, eightBitEncoding);
2841 const uint8_t *str2Bytes = (const uint8_t *)CFStringGetCStringPtr(stringToFind, eightBitEncoding);
2842 const UTF32Char *characters, *charactersLimit;
2843 const uint8_t *langCode = NULL;
2844 CFIndex fromLoc, toLoc;
2845 CFIndex str1Index, str2Index;
2846 CFIndex strBuf1Len, strBuf2Len;
2847 CFIndex maxStr1Index = (rangeToSearch.location + rangeToSearch.length);
2848 bool equalityOptions = ((lengthVariants || (compareOptions & kCFCompareWidthInsensitive)) ? true : false);
2849 bool caseInsensitive = ((compareOptions & kCFCompareCaseInsensitive) ? true : false);
2850 bool forwardAnchor = ((kCFCompareAnchored == (compareOptions & (kCFCompareBackwards|kCFCompareAnchored))) ? true : false);
2851 bool backwardAnchor = (((kCFCompareBackwards|kCFCompareAnchored) == (compareOptions & (kCFCompareBackwards|kCFCompareAnchored))) ? true : false);
2852 int8_t delta;
2853
2854 if (NULL == locale) {
2855 if (compareOptions & kCFCompareLocalized) {
2856 CFLocaleRef currentLocale = CFLocaleCopyCurrent();
2857 langCode = (const uint8_t *)_CFStrGetLanguageIdentifierForLocale(currentLocale);
2858 CFRelease(currentLocale);
2859 }
2860 } else {
2861 langCode = (const uint8_t *)_CFStrGetLanguageIdentifierForLocale(locale);
2862 }
2863
2864 CFStringInitInlineBuffer(string, &inlineBuf1, CFRangeMake(0, rangeToSearch.location + rangeToSearch.length));
2865 CFStringInitInlineBuffer(stringToFind, &inlineBuf2, CFRangeMake(0, findStrLen));
2866
2867 if (compareOptions & kCFCompareBackwards) {
2868 fromLoc = rangeToSearch.location + rangeToSearch.length - (lengthVariants ? 1 : findStrLen);
2869 toLoc = (((compareOptions & kCFCompareAnchored) && !lengthVariants) ? fromLoc : rangeToSearch.location);
2870 } else {
2871 fromLoc = rangeToSearch.location;
2872 toLoc = ((compareOptions & kCFCompareAnchored) ? fromLoc : rangeToSearch.location + rangeToSearch.length - (lengthVariants ? 1 : findStrLen));
2873 }
2874
2875 delta = ((fromLoc <= toLoc) ? 1 : -1);
2876
2877 if ((NULL != str1Bytes) && (NULL != str2Bytes)) {
2878 uint8_t str1Byte, str2Byte;
2879
2880 while (1) {
2881 str1Index = fromLoc;
2882 str2Index = 0;
2883
2884 while ((str1Index < maxStr1Index) && (str2Index < findStrLen)) {
2885 str1Byte = str1Bytes[str1Index];
2886 str2Byte = str2Bytes[str2Index];
2887
2888 if (str1Byte != str2Byte) {
2889 if (equalityOptions) {
2890 if ((str1Byte < 0x80) && ((NULL == langCode) || ('I' != str1Byte))) {
2891 if (caseInsensitive && (str1Byte >= 'A') && (str1Byte <= 'Z')) str1Byte += ('a' - 'A');
2892 *strBuf1 = str1Byte;
2893 strBuf1Len = 1;
2894 } else {
2895 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
2896 strBuf1Len = __CFStringFoldCharacterClusterAtIndex(str1Char, &inlineBuf1, str1Index, compareOptions, langCode, strBuf1, kCFStringStackBufferLength, NULL);
2897 if (1 > strBuf1Len) {
2898 *strBuf1 = str1Char;
2899 strBuf1Len = 1;
2900 }
2901 }
2902
2903 if ((NULL != ignoredChars) && (forwardAnchor || (str1Index != fromLoc)) && CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, ((str1Byte < 0x80) ? str1Byte : str1Char))) {
2904 ++str1Index;
2905 continue;
2906 }
2907
2908 if ((str2Byte < 0x80) && ((NULL == langCode) || ('I' != str2Byte))) {
2909 if (caseInsensitive && (str2Byte >= 'A') && (str2Byte <= 'Z')) str2Byte += ('a' - 'A');
2910 *strBuf2 = str2Byte;
2911 strBuf2Len = 1;
2912 } else {
2913 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
2914 strBuf2Len = __CFStringFoldCharacterClusterAtIndex(str2Char, &inlineBuf2, str2Index, compareOptions, langCode, strBuf2, kCFStringStackBufferLength, NULL);
2915 if (1 > strBuf2Len) {
2916 *strBuf2 = str2Char;
2917 strBuf2Len = 1;
2918 }
2919 }
2920
2921 if ((NULL != ignoredChars) && CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, ((str2Byte < 0x80) ? str2Byte : str2Char))) {
2922 ++str2Index;
2923 continue;
2924 }
2925
2926 if ((1 == strBuf1Len) && (1 == strBuf2Len)) { // normal case
2927 if (*strBuf1 != *strBuf2) break;
2928 } else {
2929 CFIndex delta;
2930
2931 if (!caseInsensitive && (strBuf1Len != strBuf2Len)) break;
2932 if (memcmp(strBuf1, strBuf2, sizeof(UTF32Char) * __CFMin(strBuf1Len, strBuf2Len))) break;
2933
2934 if (strBuf1Len < strBuf2Len) {
2935 delta = strBuf2Len - strBuf1Len;
2936
2937 if ((str1Index + strBuf1Len + delta) > maxStr1Index) break;
2938
2939 characters = &(strBuf2[strBuf1Len]);
2940 charactersLimit = characters + delta;
2941
2942 while (characters < charactersLimit) {
2943 strBuf1Len = __CFStringFoldCharacterClusterAtIndex(CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1), &inlineBuf1, str1Index + 1, compareOptions, langCode, strBuf1, kCFStringStackBufferLength, NULL);
2944 if ((strBuf1Len > 0) || (*characters != *strBuf1)) break;
2945 ++characters; ++str1Index;
2946 }
2947 if (characters < charactersLimit) break;
2948 } else if (strBuf2Len < strBuf1Len) {
2949 delta = strBuf1Len - strBuf2Len;
2950
2951 if ((str2Index + strBuf2Len + delta) > findStrLen) break;
2952
2953 characters = &(strBuf1[strBuf2Len]);
2954 charactersLimit = characters + delta;
2955
2956 while (characters < charactersLimit) {
2957 strBuf2Len = __CFStringFoldCharacterClusterAtIndex(CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str1Index + 1), &inlineBuf2, str2Index + 1, compareOptions, langCode, strBuf2, kCFStringStackBufferLength, NULL);
2958 if ((strBuf2Len > 0) || (*characters != *strBuf2)) break;
2959 ++characters; ++str2Index;
2960 }
2961 if (characters < charactersLimit) break;
2962 }
2963 }
2964 } else {
2965 break;
2966 }
2967 }
2968 ++str1Index; ++str2Index;
2969 }
2970
2971 if ((NULL != ignoredChars) && (str1Index == maxStr1Index) && (str2Index < findStrLen)) { // Process the stringToFind tail
2972 while (str2Index < findStrLen) {
2973 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
2974
2975 if (!CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char)) break;
2976 ++str2Index;
2977 }
2978 }
2979
2980 if (str2Index == findStrLen) {
2981 if ((NULL != ignoredChars) && backwardAnchor && (str1Index < maxStr1Index)) { // Process the anchor tail
2982 while (str1Index < maxStr1Index) {
2983 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
2984
2985 if (!CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char)) break;
2986 ++str1Index;
2987 }
2988 }
2989
2990 if (!backwardAnchor || (str1Index == maxStr1Index)) {
2991 didFind = true;
2992 if (NULL != result) *result = CFRangeMake(fromLoc, str1Index - fromLoc);
2993 }
2994 break;
2995 }
2996
2997 if (fromLoc == toLoc) break;
2998 fromLoc += delta;
2999 }
3000 } else if (equalityOptions) {
3001 UTF16Char otherChar;
3002 CFIndex str1UsedLen, str2UsedLen, strBuf1Index = 0, strBuf2Index = 0;
3003 bool diacriticsInsensitive = ((compareOptions & kCFCompareDiacriticInsensitive) ? true : false);
3004 const uint8_t *graphemeBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, 0);
3005 const uint8_t *combClassBMP = (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0);
3006
3007 while (1) {
3008 str1Index = fromLoc;
3009 str2Index = 0;
3010
3011 strBuf1Len = strBuf2Len = 0;
3012
3013 while (str2Index < findStrLen) {
3014 if (strBuf1Len == 0) {
3015 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
3016 if (caseInsensitive && (str1Char >= 'A') && (str1Char <= 'Z') && ((NULL == langCode) || (str1Char != 'I'))) str1Char += ('a' - 'A');
3017 str1UsedLen = 1;
3018 } else {
3019 str1Char = strBuf1[strBuf1Index++];
3020 }
3021 if (strBuf2Len == 0) {
3022 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
3023 if (caseInsensitive && (str2Char >= 'A') && (str2Char <= 'Z') && ((NULL == langCode) || (str2Char != 'I'))) str2Char += ('a' - 'A');
3024 str2UsedLen = 1;
3025 } else {
3026 str2Char = strBuf2[strBuf2Index++];
3027 }
3028
3029 if (str1Char != str2Char) {
3030 if ((str1Char < 0x80) && (str2Char < 0x80) && (NULL == ignoredChars) && ((NULL == langCode) || !caseInsensitive)) break;
3031
3032 if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) {
3033 str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);
3034 str1UsedLen = 2;
3035 }
3036
3037 if (CFUniCharIsSurrogateHighCharacter(str2Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index + 1)))) {
3038 str2Char = CFUniCharGetLongCharacterForSurrogatePair(str2Char, otherChar);
3039 str2UsedLen = 2;
3040 }
3041
3042 if (NULL != ignoredChars) {
3043 if ((forwardAnchor || (str1Index != fromLoc)) && (str1Index < maxStr1Index) && CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char)) {
3044 if ((strBuf1Len > 0) && (strBuf1Index == strBuf1Len)) strBuf1Len = 0;
3045 if (strBuf1Len == 0) str1Index += str1UsedLen;
3046 if (strBuf2Len > 0) --strBuf2Index;
3047 continue;
3048 }
3049 if (CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char)) {
3050 if ((strBuf2Len > 0) && (strBuf2Index == strBuf2Len)) strBuf2Len = 0;
3051 if (strBuf2Len == 0) str2Index += str2UsedLen;
3052 if (strBuf1Len > 0) -- strBuf1Index;
3053 continue;
3054 }
3055 }
3056
3057 if (diacriticsInsensitive && (str1Index > fromLoc)) {
3058 bool str1Skip = false;
3059 bool str2Skip = false;
3060
3061 if ((0 == strBuf1Len) && CFUniCharIsMemberOfBitmap(str1Char, ((str1Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str1Char >> 16))))) {
3062 str1Char = str2Char;
3063 str1Skip = true;
3064 }
3065 if ((0 == strBuf2Len) && CFUniCharIsMemberOfBitmap(str2Char, ((str2Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str2Char >> 16))))) {
3066 str2Char = str1Char;
3067 str2Skip = true;
3068 }
3069
3070 if (str1Skip != str2Skip) {
3071 if (str1Skip) str2Index -= str2UsedLen;
3072 if (str2Skip) str1Index -= str1UsedLen;
3073 }
3074 }
3075
3076 if (str1Char != str2Char) {
3077 if (0 == strBuf1Len) {
3078 strBuf1Len = __CFStringFoldCharacterClusterAtIndex(str1Char, &inlineBuf1, str1Index, compareOptions, langCode, strBuf1, kCFStringStackBufferLength, &str1UsedLen);
3079 if (strBuf1Len > 0) {
3080 str1Char = *strBuf1;
3081 strBuf1Index = 1;
3082 }
3083 }
3084
3085 if ((0 == strBuf1Len) && (0 < strBuf2Len)) break;
3086
3087 if ((0 == strBuf2Len) && ((0 == strBuf1Len) || (str1Char != str2Char))) {
3088 strBuf2Len = __CFStringFoldCharacterClusterAtIndex(str2Char, &inlineBuf2, str2Index, compareOptions, langCode, strBuf2, kCFStringStackBufferLength, &str2UsedLen);
3089 if ((0 == strBuf2Len) || (str1Char != *strBuf2)) break;
3090 strBuf2Index = 1;
3091 }
3092 }
3093
3094 if ((strBuf1Len > 0) && (strBuf2Len > 0)) {
3095 while ((strBuf1Index < strBuf1Len) && (strBuf2Index < strBuf2Len)) {
3096 if (strBuf1[strBuf1Index] != strBuf2[strBuf2Index]) break;
3097 ++strBuf1Index; ++strBuf2Index;
3098 }
3099 if ((strBuf1Index < strBuf1Len) && (strBuf2Index < strBuf2Len)) break;
3100 }
3101 }
3102
3103 if ((strBuf1Len > 0) && (strBuf1Index == strBuf1Len)) strBuf1Len = 0;
3104 if ((strBuf2Len > 0) && (strBuf2Index == strBuf2Len)) strBuf2Len = 0;
3105
3106 if (strBuf1Len == 0) str1Index += str1UsedLen;
3107 if (strBuf2Len == 0) str2Index += str2UsedLen;
3108 }
3109
3110 if ((NULL != ignoredChars) && (str1Index == maxStr1Index) && (str2Index < findStrLen)) { // Process the stringToFind tail
3111 while (str2Index < findStrLen) {
3112 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
3113 if (CFUniCharIsSurrogateHighCharacter(str2Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index + 1)))) {
3114 str2Char = CFUniCharGetLongCharacterForSurrogatePair(str2Char, otherChar);
3115 }
3116 if (!CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char)) break;
3117 str2Index += ((str2Char < 0x10000) ? 1 : 2);
3118 }
3119 }
3120
3121 if (str2Index == findStrLen) {
3122 bool match = true;
3123
3124 if (strBuf1Len > 0) {
3125 match = false;
3126
3127 if (diacriticsInsensitive && (strBuf1[0] < 0x0510)) {
3128 while (strBuf1Index < strBuf1Len) {
3129 if (!CFUniCharIsMemberOfBitmap(strBuf1[strBuf1Index], ((strBuf1[strBuf1Index] < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (strBuf1[strBuf1Index] >> 16))))) break;
3130 ++strBuf1Index;
3131 }
3132
3133 if (strBuf1Index == strBuf1Len) {
3134 str1Index += str1UsedLen;
3135 match = true;
3136 }
3137 }
3138 }
3139
3140 if (match && (compareOptions & (kCFCompareDiacriticInsensitive|kCFCompareNonliteral)) && (str1Index < maxStr1Index)) {
3141 const uint8_t *nonBaseBitmap;
3142
3143 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
3144
3145 if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) {
3146 str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);
3147 nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str1Char >> 16));
3148 } else {
3149 nonBaseBitmap = graphemeBMP;
3150 }
3151
3152 if (CFUniCharIsMemberOfBitmap(str1Char, nonBaseBitmap)) {
3153 if (diacriticsInsensitive) {
3154 if (str1Char < 0x10000) {
3155 CFIndex index = str1Index;
3156
3157 do {
3158 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, --index);
3159 } while (CFUniCharIsMemberOfBitmap(str1Char, graphemeBMP), (rangeToSearch.location < index));
3160
3161 if (str1Char < 0x0510) {
3162 while (++str1Index < maxStr1Index) if (!CFUniCharIsMemberOfBitmap(CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index), graphemeBMP)) break;
3163 }
3164 }
3165 } else {
3166 match = false;
3167 }
3168 } else if (!diacriticsInsensitive) {
3169 otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index - 1);
3170
3171 // this is assuming viramas are only in BMP ???
3172 if ((str1Char == COMBINING_GRAPHEME_JOINER) || (otherChar == COMBINING_GRAPHEME_JOINER) || (otherChar == ZERO_WIDTH_JOINER) || ((otherChar >= HANGUL_CHOSEONG_START) && (otherChar <= HANGUL_JONGSEONG_END)) || (CFUniCharGetCombiningPropertyForCharacter(otherChar, combClassBMP) == 9)) {
3173 CFRange clusterRange = CFStringGetRangeOfCharacterClusterAtIndex(string, str1Index - 1, kCFStringGraphemeCluster);
3174
3175 if (str1Index < (clusterRange.location + clusterRange.length)) match = false;
3176 }
3177 }
3178 }
3179
3180 if (match) {
3181 if ((NULL != ignoredChars) && backwardAnchor && (str1Index < maxStr1Index)) { // Process the anchor tail
3182 while (str1Index < maxStr1Index) {
3183 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
3184 if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) {
3185 str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);
3186 }
3187 if (!CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char)) break;
3188 str1Index += ((str1Char < 0x10000) ? 1 : 2);
3189 }
3190 }
3191
3192 if (!backwardAnchor || (str1Index == maxStr1Index)) {
3193 didFind = true;
3194 if (NULL != result) *result = CFRangeMake(fromLoc, str1Index - fromLoc);
3195 }
3196 break;
3197 }
3198 }
3199
3200 if (fromLoc == toLoc) break;
3201 fromLoc += delta;
3202 }
3203 } else {
3204 while (1) {
3205 str1Index = fromLoc;
3206 str2Index = 0;
3207
3208 while (str2Index < findStrLen) {
3209 if (CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index) != CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index)) break;
3210
3211 ++str1Index; ++str2Index;
3212 }
3213
3214 if (str2Index == findStrLen) {
3215 didFind = true;
3216 if (NULL != result) *result = CFRangeMake(fromLoc, findStrLen);
3217 break;
3218 }
3219
3220 if (fromLoc == toLoc) break;
3221 fromLoc += delta;
3222 }
3223 }
3224 }
3225
3226 return didFind;
3227 }
3228
3229
3230 Boolean CFStringFindWithOptions(CFStringRef string, CFStringRef stringToFind, CFRange rangeToSearch, CFStringCompareFlags compareOptions, CFRange *result) { return CFStringFindWithOptionsAndLocale(string, stringToFind, rangeToSearch, compareOptions, NULL, result); }
3231
3232 // Functions to deal with special arrays of CFRange, CFDataRef, created by CFStringCreateArrayWithFindResults()
3233
3234 static const void *__rangeRetain(CFAllocatorRef allocator, const void *ptr) {
3235 CFRetain(*(CFDataRef *)((uint8_t *)ptr + sizeof(CFRange)));
3236 return ptr;
3237 }
3238
3239 static void __rangeRelease(CFAllocatorRef allocator, const void *ptr) {
3240 CFRelease(*(CFDataRef *)((uint8_t *)ptr + sizeof(CFRange)));
3241 }
3242
3243 static CFStringRef __rangeCopyDescription(const void *ptr) {
3244 CFRange range = *(CFRange *)ptr;
3245 return CFStringCreateWithFormat(kCFAllocatorSystemDefault, NULL, CFSTR("{%d, %d}"), range.location, range.length);
3246 }
3247
3248 static Boolean __rangeEqual(const void *ptr1, const void *ptr2) {
3249 CFRange range1 = *(CFRange *)ptr1;
3250 CFRange range2 = *(CFRange *)ptr2;
3251 return (range1.location == range2.location) && (range1.length == range2.length);
3252 }
3253
3254
3255 CFArrayRef CFStringCreateArrayWithFindResults(CFAllocatorRef alloc, CFStringRef string, CFStringRef stringToFind, CFRange rangeToSearch, CFStringCompareFlags compareOptions) {
3256 CFRange foundRange;
3257 Boolean backwards = ((compareOptions & kCFCompareBackwards) != 0);
3258 UInt32 endIndex = rangeToSearch.location + rangeToSearch.length;
3259 CFMutableDataRef rangeStorage = NULL; // Basically an array of CFRange, CFDataRef (packed)
3260 uint8_t *rangeStorageBytes = NULL;
3261 CFIndex foundCount = 0;
3262 CFIndex capacity = 0; // Number of CFRange, CFDataRef element slots in rangeStorage
3263
3264 if (alloc == NULL) alloc = __CFGetDefaultAllocator();
3265
3266 while ((rangeToSearch.length > 0) && CFStringFindWithOptions(string, stringToFind, rangeToSearch, compareOptions, &foundRange)) {
3267 // Determine the next range
3268 if (backwards) {
3269 rangeToSearch.length = foundRange.location - rangeToSearch.location;
3270 } else {
3271 rangeToSearch.location = foundRange.location + foundRange.length;
3272 rangeToSearch.length = endIndex - rangeToSearch.location;
3273 }
3274
3275 // If necessary, grow the data and squirrel away the found range
3276 if (foundCount >= capacity) {
3277 // Note that rangeStorage is not allowed to be allocated from one of the GCRefZero allocators
3278 if (rangeStorage == NULL) rangeStorage = CFDataCreateMutable(_CFConvertAllocatorToNonGCRefZeroEquivalent(alloc), 0);
3279 capacity = (capacity + 4) * 2;
3280 CFDataSetLength(rangeStorage, capacity * (sizeof(CFRange) + sizeof(CFDataRef)));
3281 rangeStorageBytes = (uint8_t *)CFDataGetMutableBytePtr(rangeStorage) + foundCount * (sizeof(CFRange) + sizeof(CFDataRef));
3282 }
3283 memmove(rangeStorageBytes, &foundRange, sizeof(CFRange)); // The range
3284 memmove(rangeStorageBytes + sizeof(CFRange), &rangeStorage, sizeof(CFDataRef)); // The data
3285 rangeStorageBytes += (sizeof(CFRange) + sizeof(CFDataRef));
3286 foundCount++;
3287 }
3288
3289 if (foundCount > 0) {
3290 CFIndex cnt;
3291 CFMutableArrayRef array;
3292 const CFArrayCallBacks callbacks = {0, __rangeRetain, __rangeRelease, __rangeCopyDescription, __rangeEqual};
3293
3294 CFDataSetLength(rangeStorage, foundCount * (sizeof(CFRange) + sizeof(CFDataRef))); // Tighten storage up
3295 rangeStorageBytes = (uint8_t *)CFDataGetMutableBytePtr(rangeStorage);
3296
3297 array = CFArrayCreateMutable(alloc, foundCount * sizeof(CFRange *), &callbacks);
3298 for (cnt = 0; cnt < foundCount; cnt++) {
3299 // Each element points to the appropriate CFRange in the CFData
3300 CFArrayAppendValue(array, rangeStorageBytes + cnt * (sizeof(CFRange) + sizeof(CFDataRef)));
3301 }
3302 CFRelease(rangeStorage); // We want the data to go away when all CFRanges inside it are released...
3303 return array;
3304 } else {
3305 return NULL;
3306 }
3307 }
3308
3309
3310 CFRange CFStringFind(CFStringRef string, CFStringRef stringToFind, CFStringCompareFlags compareOptions) {
3311 CFRange foundRange;
3312
3313 if (CFStringFindWithOptions(string, stringToFind, CFRangeMake(0, CFStringGetLength(string)), compareOptions, &foundRange)) {
3314 return foundRange;
3315 } else {
3316 return CFRangeMake(kCFNotFound, 0);
3317 }
3318 }
3319
3320 Boolean CFStringHasPrefix(CFStringRef string, CFStringRef prefix) {
3321 return CFStringFindWithOptions(string, prefix, CFRangeMake(0, CFStringGetLength(string)), kCFCompareAnchored, NULL);
3322 }
3323
3324 Boolean CFStringHasSuffix(CFStringRef string, CFStringRef suffix) {
3325 return CFStringFindWithOptions(string, suffix, CFRangeMake(0, CFStringGetLength(string)), kCFCompareAnchored|kCFCompareBackwards, NULL);
3326 }
3327
3328 #define MAX_TRANSCODING_LENGTH 4
3329
3330 #define HANGUL_JONGSEONG_COUNT (28)
3331
3332 CF_INLINE bool _CFStringIsHangulLVT(UTF32Char character) {
3333 return (((character - HANGUL_SYLLABLE_START) % HANGUL_JONGSEONG_COUNT) ? true : false);
3334 }
3335
3336 static uint8_t __CFTranscodingHintLength[] = {
3337 2, 3, 4, 4, 4, 4, 4, 2, 2, 2, 2, 4, 0, 0, 0, 0
3338 };
3339
3340 enum {
3341 kCFStringHangulStateL,
3342 kCFStringHangulStateV,
3343 kCFStringHangulStateT,
3344 kCFStringHangulStateLV,
3345 kCFStringHangulStateLVT,
3346 kCFStringHangulStateBreak
3347 };
3348
3349 static CFRange _CFStringInlineBufferGetComposedRange(CFStringInlineBuffer *buffer, CFIndex start, CFStringCharacterClusterType type, const uint8_t *bmpBitmap, CFIndex csetType) {
3350 CFIndex end = start + 1;
3351 const uint8_t *bitmap = bmpBitmap;
3352 UTF32Char character;
3353 UTF16Char otherSurrogate;
3354 uint8_t step;
3355
3356 character = CFStringGetCharacterFromInlineBuffer(buffer, start);
3357
3358 // We don't combine characters in Armenian ~ Limbu range for backward deletion
3359 if ((type != kCFStringBackwardDeletionCluster) || (character < 0x0530) || (character > 0x194F)) {
3360 // Check if the current is surrogate
3361 if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, start + 1)))) {
3362 ++end;
3363 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
3364 bitmap = CFUniCharGetBitmapPtrForPlane(csetType, (character >> 16));
3365 }
3366
3367 // Extend backward
3368 while (start > 0) {
3369 if ((type == kCFStringBackwardDeletionCluster) && (character >= 0x0530) && (character < 0x1950)) break;
3370
3371 if (character < 0x10000) { // the first round could be already be non-BMP
3372 if (CFUniCharIsSurrogateLowCharacter(character) && CFUniCharIsSurrogateHighCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, start - 1)))) {
3373 character = CFUniCharGetLongCharacterForSurrogatePair(otherSurrogate, character);
3374 bitmap = CFUniCharGetBitmapPtrForPlane(csetType, (character >> 16));
3375 if (--start == 0) break; // starting with non-BMP combining mark
3376 } else {
3377 bitmap = bmpBitmap;
3378 }
3379 }
3380
3381 if (!CFUniCharIsMemberOfBitmap(character, bitmap) && (character != 0xFF9E) && (character != 0xFF9F) && ((character & 0x1FFFF0) != 0xF870)) break;
3382
3383 --start;
3384
3385 character = CFStringGetCharacterFromInlineBuffer(buffer, start);
3386 }
3387 }
3388
3389 // Hangul
3390 if (((character >= HANGUL_CHOSEONG_START) && (character <= HANGUL_JONGSEONG_END)) || ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END))) {
3391 uint8_t state;
3392 uint8_t initialState;
3393
3394 if (character < HANGUL_JUNGSEONG_START) {
3395 state = kCFStringHangulStateL;
3396 } else if (character < HANGUL_JONGSEONG_START) {
3397 state = kCFStringHangulStateV;
3398 } else if (character < HANGUL_SYLLABLE_START) {
3399 state = kCFStringHangulStateT;
3400 } else {
3401 state = (_CFStringIsHangulLVT(character) ? kCFStringHangulStateLVT : kCFStringHangulStateLV);
3402 }
3403 initialState = state;
3404
3405 // Extend backward
3406 while (((character = CFStringGetCharacterFromInlineBuffer(buffer, start - 1)) >= HANGUL_CHOSEONG_START) && (character <= HANGUL_SYLLABLE_END) && ((character <= HANGUL_JONGSEONG_END) || (character >= HANGUL_SYLLABLE_START))) {
3407 switch (state) {
3408 case kCFStringHangulStateV:
3409 if (character <= HANGUL_CHOSEONG_END) {
3410 state = kCFStringHangulStateL;
3411 } else if ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END) && !_CFStringIsHangulLVT(character)) {
3412 state = kCFStringHangulStateLV;
3413 } else if (character > HANGUL_JUNGSEONG_END) {
3414 state = kCFStringHangulStateBreak;
3415 }
3416 break;
3417
3418 case kCFStringHangulStateT:
3419 if ((character >= HANGUL_JUNGSEONG_START) && (character <= HANGUL_JUNGSEONG_END)) {
3420 state = kCFStringHangulStateV;
3421 } else if ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END)) {
3422 state = (_CFStringIsHangulLVT(character) ? kCFStringHangulStateLVT : kCFStringHangulStateLV);
3423 } else if (character < HANGUL_JUNGSEONG_START) {
3424 state = kCFStringHangulStateBreak;
3425 }
3426 break;
3427
3428 default:
3429 state = ((character < HANGUL_JUNGSEONG_START) ? kCFStringHangulStateL : kCFStringHangulStateBreak);
3430 break;
3431 }
3432
3433 if (state == kCFStringHangulStateBreak) break;
3434 --start;
3435 }
3436
3437 // Extend forward
3438 state = initialState;
3439 while (((character = CFStringGetCharacterFromInlineBuffer(buffer, end)) > 0) && (((character >= HANGUL_CHOSEONG_START) && (character <= HANGUL_JONGSEONG_END)) || ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END)))) {
3440 switch (state) {
3441 case kCFStringHangulStateLV:
3442 case kCFStringHangulStateV:
3443 if ((character >= HANGUL_JUNGSEONG_START) && (character <= HANGUL_JONGSEONG_END)) {
3444 state = ((character < HANGUL_JONGSEONG_START) ? kCFStringHangulStateV : kCFStringHangulStateT);
3445 } else {
3446 state = kCFStringHangulStateBreak;
3447 }
3448 break;
3449
3450 case kCFStringHangulStateLVT:
3451 case kCFStringHangulStateT:
3452 state = (((character >= HANGUL_JONGSEONG_START) && (character <= HANGUL_JONGSEONG_END)) ? kCFStringHangulStateT : kCFStringHangulStateBreak);
3453 break;
3454
3455 default:
3456 if (character < HANGUL_JUNGSEONG_START) {
3457 state = kCFStringHangulStateL;
3458 } else if (character < HANGUL_JONGSEONG_START) {
3459 state = kCFStringHangulStateV;
3460 } else if (character >= HANGUL_SYLLABLE_START) {
3461 state = (_CFStringIsHangulLVT(character) ? kCFStringHangulStateLVT : kCFStringHangulStateLV);
3462 } else {
3463 state = kCFStringHangulStateBreak;
3464 }
3465 break;
3466 }
3467
3468 if (state == kCFStringHangulStateBreak) break;
3469 ++end;
3470 }
3471 }
3472
3473 // Extend forward
3474 while ((character = CFStringGetCharacterFromInlineBuffer(buffer, end)) > 0) {
3475 if ((type == kCFStringBackwardDeletionCluster) && (character >= 0x0530) && (character < 0x1950)) break;
3476
3477 if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, end + 1)))) {
3478 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
3479 bitmap = CFUniCharGetBitmapPtrForPlane(csetType, (character >> 16));
3480 step = 2;
3481 } else {
3482 bitmap = bmpBitmap;
3483 step = 1;
3484 }
3485
3486 if (!CFUniCharIsMemberOfBitmap(character, bitmap) && (character != 0xFF9E) && (character != 0xFF9F) && ((character & 0x1FFFF0) != 0xF870)) break;
3487
3488 end += step;
3489 }
3490
3491 return CFRangeMake(start, end - start);
3492 }
3493
3494 CF_INLINE bool _CFStringIsVirama(UTF32Char character, const uint8_t *combClassBMP) {
3495 return ((character == COMBINING_GRAPHEME_JOINER) || (CFUniCharGetCombiningPropertyForCharacter(character, (const uint8_t *)((character < 0x10000) ? combClassBMP : CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (character >> 16)))) == 9) ? true : false);
3496 }
3497
3498 CFRange CFStringGetRangeOfCharacterClusterAtIndex(CFStringRef string, CFIndex charIndex, CFStringCharacterClusterType type) {
3499 CFRange range;
3500 CFIndex currentIndex;
3501 CFIndex length = CFStringGetLength(string);
3502 CFIndex csetType = ((kCFStringGraphemeCluster == type) ? kCFUniCharGraphemeExtendCharacterSet : kCFUniCharNonBaseCharacterSet);
3503 CFStringInlineBuffer stringBuffer;
3504 const uint8_t *bmpBitmap;
3505 const uint8_t *letterBMP;
3506 static const uint8_t *combClassBMP = NULL;
3507 UTF32Char character;
3508 UTF16Char otherSurrogate;
3509
3510 if (charIndex >= length) return CFRangeMake(kCFNotFound, 0);
3511
3512 /* Fast case. If we're eight-bit, it's either the default encoding is cheap or the content is all ASCII. Watch out when (or if) adding more 8bit Mac-scripts in CFStringEncodingConverters
3513 */
3514 if (!CF_IS_OBJC(__kCFStringTypeID, string) && __CFStrIsEightBit(string)) return CFRangeMake(charIndex, 1);
3515
3516 bmpBitmap = CFUniCharGetBitmapPtrForPlane(csetType, 0);
3517 letterBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, 0);
3518 if (NULL == combClassBMP) combClassBMP = (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0);
3519
3520 CFStringInitInlineBuffer(string, &stringBuffer, CFRangeMake(0, length));
3521
3522 // Get composed character sequence first
3523 range = _CFStringInlineBufferGetComposedRange(&stringBuffer, charIndex, type, bmpBitmap, csetType);
3524
3525 // Do grapheme joiners
3526 if (type < kCFStringCursorMovementCluster) {
3527 const uint8_t *letter = letterBMP;
3528
3529 // Check to see if we have a letter at the beginning of initial cluster
3530 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, range.location);
3531
3532 if ((range.length > 1) && CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(&stringBuffer, range.location + 1)))) {
3533 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
3534 letter = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, (character >> 16));
3535 }
3536
3537 if ((character == ZERO_WIDTH_JOINER) || CFUniCharIsMemberOfBitmap(character, letter)) {
3538 CFRange otherRange;
3539
3540 // Check if preceded by grapheme joiners (U034F and viramas)
3541 otherRange.location = currentIndex = range.location;
3542
3543 while (currentIndex > 1) {
3544 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, --currentIndex);
3545
3546 // ??? We're assuming viramas only in BMP
3547 if ((_CFStringIsVirama(character, combClassBMP) || ((character == ZERO_WIDTH_JOINER) && _CFStringIsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer, --currentIndex), combClassBMP))) && (currentIndex > 0)) {
3548 --currentIndex;
3549 } else {
3550 break;
3551 }
3552
3553 currentIndex = _CFStringInlineBufferGetComposedRange(&stringBuffer, currentIndex, type, bmpBitmap, csetType).location;
3554
3555 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex);
3556
3557 if (CFUniCharIsSurrogateLowCharacter(character) && CFUniCharIsSurrogateHighCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex - 1)))) {
3558 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
3559 letter = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, (character >> 16));
3560 --currentIndex;
3561 } else {
3562 letter = letterBMP;
3563 }
3564
3565 if (!CFUniCharIsMemberOfBitmap(character, letter)) break;
3566 range.location = currentIndex;
3567 }
3568
3569 range.length += otherRange.location - range.location;
3570
3571 // Check if followed by grapheme joiners
3572 if ((range.length > 1) && ((range.location + range.length) < length)) {
3573 otherRange = range;
3574 currentIndex = otherRange.location + otherRange.length;
3575
3576 do {
3577 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex - 1);
3578
3579 // ??? We're assuming viramas only in BMP
3580 if ((character != ZERO_WIDTH_JOINER) && !_CFStringIsVirama(character, combClassBMP)) break;
3581
3582 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex);
3583
3584 if (character == ZERO_WIDTH_JOINER) character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, ++currentIndex);
3585
3586 if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex + 1)))) {
3587 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
3588 letter = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, (character >> 16));
3589 } else {
3590 letter = letterBMP;
3591 }
3592
3593 // We only conjoin letters
3594 if (!CFUniCharIsMemberOfBitmap(character, letter)) break;
3595 otherRange = _CFStringInlineBufferGetComposedRange(&stringBuffer, currentIndex, type, bmpBitmap, csetType);
3596 currentIndex = otherRange.location + otherRange.length;
3597 } while ((otherRange.location + otherRange.length) < length);
3598 range.length = currentIndex - range.location;
3599 }
3600 }
3601 }
3602
3603 // Check if we're part of prefix transcoding hints
3604 CFIndex otherIndex;
3605
3606 currentIndex = (range.location + range.length) - (MAX_TRANSCODING_LENGTH + 1);
3607 if (currentIndex < 0) currentIndex = 0;
3608
3609 while (currentIndex <= range.location) {
3610 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex);
3611
3612 if ((character & 0x1FFFF0) == 0xF860) { // transcoding hint
3613 otherIndex = currentIndex + __CFTranscodingHintLength[(character - 0xF860)] + 1;
3614 if (otherIndex >= (range.location + range.length)) {
3615 if (otherIndex <= length) {
3616 range.location = currentIndex;
3617 range.length = otherIndex - currentIndex;
3618 }
3619 break;
3620 }
3621 }
3622 ++currentIndex;
3623 }
3624
3625 return range;
3626 }
3627
3628 CFRange CFStringGetRangeOfComposedCharactersAtIndex(CFStringRef theString, CFIndex theIndex) {
3629 return CFStringGetRangeOfCharacterClusterAtIndex(theString, theIndex, kCFStringComposedCharacterCluster);
3630 }
3631
3632 /*!
3633 @function CFStringFindCharacterFromSet
3634 Query the range of characters contained in the specified character set.
3635 @param theString The CFString which is to be searched. If this
3636 parameter is not a valid CFString, the behavior is
3637 undefined.
3638 @param theSet The CFCharacterSet against which the membership
3639 of characters is checked. If this parameter is not a valid
3640 CFCharacterSet, the behavior is undefined.
3641 @param range The range of characters within the string to search. If
3642 the range location or end point (defined by the location
3643 plus length minus 1) are outside the index space of the
3644 string (0 to N-1 inclusive, where N is the length of the
3645 string), the behavior is undefined. If the range length is
3646 negative, the behavior is undefined. The range may be empty
3647 (length 0), in which case no search is performed.
3648 @param searchOptions The bitwise-or'ed option flags to control
3649 the search behavior. The supported options are
3650 kCFCompareBackwards andkCFCompareAnchored.
3651 If other option flags are specified, the behavior
3652 is undefined.
3653 @param result The pointer to a CFRange supplied by the caller in
3654 which the search result is stored. If a pointer to an invalid
3655 memory is specified, the behavior is undefined.
3656 @result true, if at least a character which is a member of the character
3657 set is found and result is filled, otherwise, false.
3658 */
3659 #define SURROGATE_START 0xD800
3660 #define SURROGATE_END 0xDFFF
3661
3662 CF_EXPORT Boolean CFStringFindCharacterFromSet(CFStringRef theString, CFCharacterSetRef theSet, CFRange rangeToSearch, CFStringCompareFlags searchOptions, CFRange *result) {
3663 CFStringInlineBuffer stringBuffer;
3664 CFCharacterSetInlineBuffer csetBuffer;
3665 UniChar ch;
3666 CFIndex step;
3667 CFIndex fromLoc, toLoc, cnt; // fromLoc and toLoc are inclusive
3668 Boolean found = false;
3669 Boolean done = false;
3670
3671 //#warning FIX ME !! Should support kCFCompareNonliteral
3672
3673 if ((rangeToSearch.location + rangeToSearch.length > CFStringGetLength(theString)) || (rangeToSearch.length == 0)) return false;
3674
3675 if (searchOptions & kCFCompareBackwards) {
3676 fromLoc = rangeToSearch.location + rangeToSearch.length - 1;
3677 toLoc = rangeToSearch.location;
3678 } else {
3679 fromLoc = rangeToSearch.location;
3680 toLoc = rangeToSearch.location + rangeToSearch.length - 1;
3681 }
3682 if (searchOptions & kCFCompareAnchored) {
3683 toLoc = fromLoc;
3684 }
3685
3686 step = (fromLoc <= toLoc) ? 1 : -1;
3687 cnt = fromLoc;
3688
3689 CFStringInitInlineBuffer(theString, &stringBuffer, rangeToSearch);
3690 CFCharacterSetInitInlineBuffer(theSet, &csetBuffer);
3691
3692 do {
3693 ch = CFStringGetCharacterFromInlineBuffer(&stringBuffer, cnt - rangeToSearch.location);
3694 if ((ch >= SURROGATE_START) && (ch <= SURROGATE_END)) {
3695 int otherCharIndex = cnt + step;
3696
3697 if (((step < 0) && (otherCharIndex < toLoc)) || ((step > 0) && (otherCharIndex > toLoc))) {
3698 done = true;
3699 } else {
3700 UniChar highChar;
3701 UniChar lowChar = CFStringGetCharacterFromInlineBuffer(&stringBuffer, otherCharIndex - rangeToSearch.location);
3702
3703 if (cnt < otherCharIndex) {
3704 highChar = ch;
3705 } else {
3706 highChar = lowChar;
3707 lowChar = ch;
3708 }
3709
3710 if (CFUniCharIsSurrogateHighCharacter(highChar) && CFUniCharIsSurrogateLowCharacter(lowChar) && CFCharacterSetInlineBufferIsLongCharacterMember(&csetBuffer, CFUniCharGetLongCharacterForSurrogatePair(highChar, lowChar))) {
3711 if (result) *result = CFRangeMake((cnt < otherCharIndex ? cnt : otherCharIndex), 2);
3712 return true;
3713 } else if (otherCharIndex == toLoc) {
3714 done = true;
3715 } else {
3716 cnt = otherCharIndex + step;
3717 }
3718 }
3719 } else if (CFCharacterSetInlineBufferIsLongCharacterMember(&csetBuffer, ch)) {
3720 done = found = true;
3721 } else if (cnt == toLoc) {
3722 done = true;
3723 } else {
3724 cnt += step;
3725 }
3726 } while (!done);
3727
3728 if (found && result) *result = CFRangeMake(cnt, 1);
3729 return found;
3730 }
3731
3732 /* Line range code */
3733
3734 #define CarriageReturn '\r' /* 0x0d */
3735 #define NewLine '\n' /* 0x0a */
3736 #define NextLine 0x0085
3737 #define LineSeparator 0x2028
3738 #define ParaSeparator 0x2029
3739
3740 CF_INLINE Boolean isALineSeparatorTypeCharacter(UniChar ch, Boolean includeLineEndings) {
3741 if (ch > CarriageReturn && ch < NextLine) return false; /* Quick test to cover most chars */
3742 return (ch == NewLine || ch == CarriageReturn || ch == ParaSeparator || (includeLineEndings && (ch == NextLine || ch == LineSeparator))) ? true : false;
3743 }
3744
3745 static void __CFStringGetLineOrParagraphBounds(CFStringRef string, CFRange range, CFIndex *lineBeginIndex, CFIndex *lineEndIndex, CFIndex *contentsEndIndex, Boolean includeLineEndings) {
3746 CFIndex len;
3747 CFStringInlineBuffer buf;
3748 UniChar ch;
3749
3750 __CFAssertIsString(string);
3751 __CFAssertRangeIsInStringBounds(string, range.location, range.length);
3752
3753 len = __CFStrLength(string);
3754
3755 if (lineBeginIndex) {
3756 CFIndex start;
3757 if (range.location == 0) {
3758 start = 0;
3759 } else {
3760 CFStringInitInlineBuffer(string, &buf, CFRangeMake(0, len));
3761 CFIndex buf_idx = range.location;
3762
3763 /* Take care of the special case where start happens to fall right between \r and \n */
3764 ch = CFStringGetCharacterFromInlineBuffer(&buf, buf_idx);
3765 buf_idx--;
3766 if ((ch == NewLine) && (CFStringGetCharacterFromInlineBuffer(&buf, buf_idx) == CarriageReturn)) {
3767 buf_idx--;
3768 }
3769 while (1) {
3770 if (buf_idx < 0) {
3771 start = 0;
3772 break;
3773 } else if (isALineSeparatorTypeCharacter(CFStringGetCharacterFromInlineBuffer(&buf, buf_idx), includeLineEndings)) {
3774 start = buf_idx + 1;
3775 break;
3776 } else {
3777 buf_idx--;
3778 }
3779 }
3780 }
3781 *lineBeginIndex = start;
3782 }
3783
3784 /* Now find the ending point */
3785 if (lineEndIndex || contentsEndIndex) {
3786 CFIndex endOfContents, lineSeparatorLength = 1; /* 1 by default */
3787 CFStringInitInlineBuffer(string, &buf, CFRangeMake(0, len));
3788 CFIndex buf_idx = range.location + range.length - (range.length ? 1 : 0);
3789 /* First look at the last char in the range (if the range is zero length, the char after the range) to see if we're already on or within a end of line sequence... */
3790 ch = __CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx);
3791 if (ch == NewLine) {
3792 endOfContents = buf_idx;
3793 buf_idx--;
3794 if (__CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx) == CarriageReturn) {
3795 lineSeparatorLength = 2;
3796 endOfContents--;
3797 }
3798 } else {
3799 while (1) {
3800 if (isALineSeparatorTypeCharacter(ch, includeLineEndings)) {
3801 endOfContents = buf_idx; /* This is actually end of contentsRange */
3802 buf_idx++; /* OK for this to go past the end */
3803 if ((ch == CarriageReturn) && (__CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx) == NewLine)) {
3804 lineSeparatorLength = 2;
3805 }
3806 break;
3807 } else if (buf_idx >= len) {
3808 endOfContents = len;
3809 lineSeparatorLength = 0;
3810 break;
3811 } else {
3812 buf_idx++;
3813 ch = __CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx);
3814 }
3815 }
3816 }
3817 if (contentsEndIndex) *contentsEndIndex = endOfContents;
3818 if (lineEndIndex) *lineEndIndex = endOfContents + lineSeparatorLength;
3819 }
3820 }
3821
3822 void CFStringGetLineBounds(CFStringRef string, CFRange range, CFIndex *lineBeginIndex, CFIndex *lineEndIndex, CFIndex *contentsEndIndex) {
3823 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSString *)string, getLineStart:(NSUInteger *)lineBeginIndex end:(NSUInteger *)lineEndIndex contentsEnd:(NSUInteger *)contentsEndIndex forRange:NSMakeRange(range.location, range.length));
3824 __CFStringGetLineOrParagraphBounds(string, range, lineBeginIndex, lineEndIndex, contentsEndIndex, true);
3825 }
3826
3827 void CFStringGetParagraphBounds(CFStringRef string, CFRange range, CFIndex *parBeginIndex, CFIndex *parEndIndex, CFIndex *contentsEndIndex) {
3828 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSString *)string, getParagraphStart:(NSUInteger *)parBeginIndex end:(NSUInteger *)parEndIndex contentsEnd:(NSUInteger *)contentsEndIndex forRange:NSMakeRange(range.location, range.length));
3829 __CFStringGetLineOrParagraphBounds(string, range, parBeginIndex, parEndIndex, contentsEndIndex, false);
3830 }
3831
3832
3833 CFStringRef CFStringCreateByCombiningStrings(CFAllocatorRef alloc, CFArrayRef array, CFStringRef separatorString) {
3834 CFIndex numChars;
3835 CFIndex separatorNumByte;
3836 CFIndex stringCount = CFArrayGetCount(array);
3837 Boolean isSepCFString = !CF_IS_OBJC(__kCFStringTypeID, separatorString);
3838 Boolean canBeEightbit = isSepCFString && __CFStrIsEightBit(separatorString);
3839 CFIndex idx;
3840 CFStringRef otherString;
3841 void *buffer;
3842 uint8_t *bufPtr;
3843 const void *separatorContents = NULL;
3844
3845 if (stringCount == 0) {
3846 return CFStringCreateWithCharacters(alloc, NULL, 0);
3847 } else if (stringCount == 1) {
3848 return (CFStringRef)CFStringCreateCopy(alloc, (CFStringRef)CFArrayGetValueAtIndex(array, 0));
3849 }
3850
3851 if (alloc == NULL) alloc = __CFGetDefaultAllocator();
3852
3853 numChars = CFStringGetLength(separatorString) * (stringCount - 1);
3854 for (idx = 0; idx < stringCount; idx++) {
3855 otherString = (CFStringRef)CFArrayGetValueAtIndex(array, idx);
3856 numChars += CFStringGetLength(otherString);
3857 // canBeEightbit is already false if the separator is an NSString...
3858 if (CF_IS_OBJC(__kCFStringTypeID, otherString) || ! __CFStrIsEightBit(otherString)) canBeEightbit = false;
3859 }
3860
3861 buffer = (uint8_t *)CFAllocatorAllocate(alloc, canBeEightbit ? ((numChars + 1) * sizeof(uint8_t)) : (numChars * sizeof(UniChar)), 0);
3862 bufPtr = (uint8_t *)buffer;
3863 if (__CFOASafe) __CFSetLastAllocationEventName(buffer, "CFString (store)");
3864 separatorNumByte = CFStringGetLength(separatorString) * (canBeEightbit ? sizeof(uint8_t) : sizeof(UniChar));
3865
3866 for (idx = 0; idx < stringCount; idx++) {
3867 if (idx) { // add separator here unless first string
3868 if (separatorContents) {
3869 memmove(bufPtr, separatorContents, separatorNumByte);
3870 } else {
3871 if (!isSepCFString) { // NSString
3872 CFStringGetCharacters(separatorString, CFRangeMake(0, CFStringGetLength(separatorString)), (UniChar *)bufPtr);
3873 } else if (canBeEightbit) {
3874 memmove(bufPtr, (const uint8_t *)__CFStrContents(separatorString) + __CFStrSkipAnyLengthByte(separatorString), separatorNumByte);
3875 } else {
3876 __CFStrConvertBytesToUnicode((uint8_t *)__CFStrContents(separatorString) + __CFStrSkipAnyLengthByte(separatorString), (UniChar *)bufPtr, __CFStrLength(separatorString));
3877 }
3878 separatorContents = bufPtr;
3879 }
3880 bufPtr += separatorNumByte;
3881 }
3882
3883 otherString = (CFStringRef )CFArrayGetValueAtIndex(array, idx);
3884 if (CF_IS_OBJC(__kCFStringTypeID, otherString)) {
3885 CFIndex otherLength = CFStringGetLength(otherString);
3886 CFStringGetCharacters(otherString, CFRangeMake(0, otherLength), (UniChar *)bufPtr);
3887 bufPtr += otherLength * sizeof(UniChar);
3888 } else {
3889 const uint8_t * otherContents = (const uint8_t *)__CFStrContents(otherString);
3890 CFIndex otherNumByte = __CFStrLength2(otherString, otherContents) * (canBeEightbit ? sizeof(uint8_t) : sizeof(UniChar));
3891
3892 if (canBeEightbit || __CFStrIsUnicode(otherString)) {
3893 memmove(bufPtr, otherContents + __CFStrSkipAnyLengthByte(otherString), otherNumByte);
3894 } else {
3895 __CFStrConvertBytesToUnicode(otherContents + __CFStrSkipAnyLengthByte(otherString), (UniChar *)bufPtr, __CFStrLength2(otherString, otherContents));
3896 }
3897 bufPtr += otherNumByte;
3898 }
3899 }
3900 if (canBeEightbit) *bufPtr = 0; // NULL byte;
3901
3902 return canBeEightbit ?
3903 CFStringCreateWithCStringNoCopy(alloc, (const char*)buffer, __CFStringGetEightBitStringEncoding(), alloc) :
3904 CFStringCreateWithCharactersNoCopy(alloc, (UniChar *)buffer, numChars, alloc);
3905 }
3906
3907
3908 CFArrayRef CFStringCreateArrayBySeparatingStrings(CFAllocatorRef alloc, CFStringRef string, CFStringRef separatorString) {
3909 CFArrayRef separatorRanges;
3910 CFIndex length = CFStringGetLength(string);
3911 /* No objc dispatch needed here since CFStringCreateArrayWithFindResults() works with both CFString and NSString */
3912 if (!(separatorRanges = CFStringCreateArrayWithFindResults(alloc, string, separatorString, CFRangeMake(0, length), 0))) {
3913 return CFArrayCreate(alloc, (const void **)&string, 1, & kCFTypeArrayCallBacks);
3914 } else {
3915 CFIndex idx;
3916 CFIndex count = CFArrayGetCount(separatorRanges);
3917 CFIndex startIndex = 0;
3918 CFIndex numChars;
3919 CFMutableArrayRef array = CFArrayCreateMutable(alloc, count + 2, & kCFTypeArrayCallBacks);
3920 const CFRange *currentRange;
3921 CFStringRef substring;
3922
3923 for (idx = 0;idx < count;idx++) {
3924 currentRange = (const CFRange *)CFArrayGetValueAtIndex(separatorRanges, idx);
3925 numChars = currentRange->location - startIndex;
3926 substring = CFStringCreateWithSubstring(alloc, string, CFRangeMake(startIndex, numChars));
3927 CFArrayAppendValue(array, substring);
3928 if (!_CFAllocatorIsGCRefZero(alloc)) CFRelease(substring);
3929 startIndex = currentRange->location + currentRange->length;
3930 }
3931 substring = CFStringCreateWithSubstring(alloc, string, CFRangeMake(startIndex, length - startIndex));
3932 CFArrayAppendValue(array, substring);
3933 if (!_CFAllocatorIsGCRefZero(alloc)) CFRelease(substring);
3934
3935 if (!_CFAllocatorIsGCRefZero(alloc)) CFRelease(separatorRanges);
3936
3937 return array;
3938 }
3939 }
3940
3941 CFStringRef CFStringCreateFromExternalRepresentation(CFAllocatorRef alloc, CFDataRef data, CFStringEncoding encoding) {
3942 return CFStringCreateWithBytes(alloc, CFDataGetBytePtr(data), CFDataGetLength(data), encoding, true);
3943 }
3944
3945
3946 CFDataRef CFStringCreateExternalRepresentation(CFAllocatorRef alloc, CFStringRef string, CFStringEncoding encoding, uint8_t lossByte) {
3947 CFIndex length;
3948 CFIndex guessedByteLength;
3949 uint8_t *bytes;
3950 CFIndex usedLength;
3951 SInt32 result;
3952
3953 if (CF_IS_OBJC(__kCFStringTypeID, string)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
3954 length = CFStringGetLength(string);
3955 } else {
3956 __CFAssertIsString(string);
3957 length = __CFStrLength(string);
3958 if (__CFStrIsEightBit(string) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string
3959 return CFDataCreate(alloc, ((uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string)), __CFStrLength(string));
3960 }
3961 }
3962
3963 if (alloc == NULL) alloc = __CFGetDefaultAllocator();
3964
3965 if (((encoding & 0x0FFF) == kCFStringEncodingUnicode) && ((encoding == kCFStringEncodingUnicode) || ((encoding > kCFStringEncodingUTF8) && (encoding <= kCFStringEncodingUTF32LE)))) {
3966 guessedByteLength = (length + 1) * ((((encoding >> 26) & 2) == 0) ? sizeof(UTF16Char) : sizeof(UTF32Char)); // UTF32 format has the bit set
3967 } else if (((guessedByteLength = CFStringGetMaximumSizeForEncoding(length, encoding)) > length) && !CF_IS_OBJC(__kCFStringTypeID, string)) { // Multi byte encoding
3968 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
3969 if (__CFStrIsUnicode(string)) {
3970 CFIndex aLength = CFStringEncodingByteLengthForCharacters(encoding, kCFStringEncodingPrependBOM, __CFStrContents(string), __CFStrLength(string));
3971 if (aLength > 0) guessedByteLength = aLength;
3972 } else {
3973 #endif
3974 result = __CFStringEncodeByteStream(string, 0, length, true, encoding, lossByte, NULL, LONG_MAX, &guessedByteLength);
3975 // if result == length, we always succeed
3976 // otherwise, if result == 0, we fail
3977 // otherwise, if there was a lossByte but still result != length, we fail
3978 if ((result != length) && (!result || !lossByte)) return NULL;
3979 if (guessedByteLength == length && __CFStrIsEightBit(string) && __CFStringEncodingIsSupersetOfASCII(encoding)) { // It's all ASCII !!
3980 return CFDataCreate(alloc, ((uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string)), __CFStrLength(string));
3981 }
3982 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
3983 }
3984 #endif
3985 }
3986 bytes = (uint8_t *)CFAllocatorAllocate(alloc, guessedByteLength, 0);
3987 if (__CFOASafe) __CFSetLastAllocationEventName(bytes, "CFData (store)");
3988
3989 result = __CFStringEncodeByteStream(string, 0, length, true, encoding, lossByte, bytes, guessedByteLength, &usedLength);
3990
3991 if ((result != length) && (!result || !lossByte)) { // see comment above about what this means
3992 CFAllocatorDeallocate(alloc, bytes);
3993 return NULL;
3994 }
3995
3996 return CFDataCreateWithBytesNoCopy(alloc, (uint8_t *)bytes, usedLength, alloc);
3997 }
3998
3999
4000 CFStringEncoding CFStringGetSmallestEncoding(CFStringRef str) {
4001 CFIndex len;
4002 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, CFStringEncoding, (NSString *)str, _smallestEncodingInCFStringEncoding);
4003 __CFAssertIsString(str);
4004
4005 if (__CFStrIsEightBit(str)) return __CFStringGetEightBitStringEncoding();
4006 len = __CFStrLength(str);
4007 if (__CFStringEncodeByteStream(str, 0, len, false, __CFStringGetEightBitStringEncoding(), 0, NULL, LONG_MAX, NULL) == len) return __CFStringGetEightBitStringEncoding();
4008 if ((__CFStringGetEightBitStringEncoding() != __CFStringGetSystemEncoding()) && (__CFStringEncodeByteStream(str, 0, len, false, __CFStringGetSystemEncoding(), 0, NULL, LONG_MAX, NULL) == len)) return __CFStringGetSystemEncoding();
4009 return kCFStringEncodingUnicode; /* ??? */
4010 }
4011
4012
4013 CFStringEncoding CFStringGetFastestEncoding(CFStringRef str) {
4014 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, CFStringEncoding, (NSString *)str, _fastestEncodingInCFStringEncoding);
4015 __CFAssertIsString(str);
4016 return __CFStrIsEightBit(str) ? __CFStringGetEightBitStringEncoding() : kCFStringEncodingUnicode; /* ??? */
4017 }
4018
4019
4020 SInt32 CFStringGetIntValue(CFStringRef str) {
4021 Boolean success;
4022 SInt32 result;
4023 SInt32 idx = 0;
4024 CFStringInlineBuffer buf;
4025 CFStringInitInlineBuffer(str, &buf, CFRangeMake(0, CFStringGetLength(str)));
4026 success = __CFStringScanInteger(&buf, NULL, &idx, false, &result);
4027 return success ? result : 0;
4028 }
4029
4030
4031 double CFStringGetDoubleValue(CFStringRef str) {
4032 Boolean success;
4033 double result;
4034 SInt32 idx = 0;
4035 CFStringInlineBuffer buf;
4036 CFStringInitInlineBuffer(str, &buf, CFRangeMake(0, CFStringGetLength(str)));
4037 success = __CFStringScanDouble(&buf, NULL, &idx, &result);
4038 return success ? result : 0.0;
4039 }
4040
4041
4042 /*** Mutable functions... ***/
4043
4044 void CFStringSetExternalCharactersNoCopy(CFMutableStringRef string, UniChar *chars, CFIndex length, CFIndex capacity) {
4045 __CFAssertIsNotNegative(length);
4046 __CFAssertIsStringAndExternalMutable(string);
4047 CFAssert4((length <= capacity) && ((capacity == 0) || ((capacity > 0) && chars)), __kCFLogAssertion, "%s(): Invalid args: characters %p length %d capacity %d", __PRETTY_FUNCTION__, chars, length, capacity);
4048 __CFStrSetContentPtr(string, chars);
4049 __CFStrSetExplicitLength(string, length);
4050 __CFStrSetCapacity(string, capacity * sizeof(UniChar));
4051 __CFStrSetCapacityProvidedExternally(string);
4052 }
4053
4054
4055
4056 void CFStringInsert(CFMutableStringRef str, CFIndex idx, CFStringRef insertedStr) {
4057 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)str, insertString:(NSString *)insertedStr atIndex:(NSUInteger)idx);
4058 __CFAssertIsStringAndMutable(str);
4059 CFAssert3(idx >= 0 && idx <= __CFStrLength(str), __kCFLogAssertion, "%s(): string index %d out of bounds (length %d)", __PRETTY_FUNCTION__, idx, __CFStrLength(str));
4060 __CFStringReplace(str, CFRangeMake(idx, 0), insertedStr);
4061 }
4062
4063
4064 void CFStringDelete(CFMutableStringRef str, CFRange range) {
4065 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)str, deleteCharactersInRange:NSMakeRange(range.location, range.length));
4066 __CFAssertIsStringAndMutable(str);
4067 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
4068 __CFStringChangeSize(str, range, 0, false);
4069 }
4070
4071
4072 void CFStringReplace(CFMutableStringRef str, CFRange range, CFStringRef replacement) {
4073 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)str, replaceCharactersInRange:NSMakeRange(range.location, range.length) withString:(NSString *)replacement);
4074 __CFAssertIsStringAndMutable(str);
4075 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
4076 __CFStringReplace(str, range, replacement);
4077 }
4078
4079
4080 void CFStringReplaceAll(CFMutableStringRef str, CFStringRef replacement) {
4081 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)str, setString:(NSString *)replacement);
4082 __CFAssertIsStringAndMutable(str);
4083 __CFStringReplace(str, CFRangeMake(0, __CFStrLength(str)), replacement);
4084 }
4085
4086
4087 void CFStringAppend(CFMutableStringRef str, CFStringRef appended) {
4088 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)str, appendString:(NSString *)appended);
4089 __CFAssertIsStringAndMutable(str);
4090 __CFStringReplace(str, CFRangeMake(__CFStrLength(str), 0), appended);
4091 }
4092
4093
4094 void CFStringAppendCharacters(CFMutableStringRef str, const UniChar *chars, CFIndex appendedLength) {
4095 CFIndex strLength, idx;
4096
4097 __CFAssertIsNotNegative(appendedLength);
4098
4099 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)str, appendCharacters:chars length:(NSUInteger)appendedLength);
4100
4101 __CFAssertIsStringAndMutable(str);
4102
4103 strLength = __CFStrLength(str);
4104 if (__CFStrIsUnicode(str)) {
4105 __CFStringChangeSize(str, CFRangeMake(strLength, 0), appendedLength, true);
4106 memmove((UniChar *)__CFStrContents(str) + strLength, chars, appendedLength * sizeof(UniChar));
4107 } else {
4108 uint8_t *contents;
4109 bool isASCII = true;
4110 for (idx = 0; isASCII && idx < appendedLength; idx++) isASCII = (chars[idx] < 0x80);
4111 __CFStringChangeSize(str, CFRangeMake(strLength, 0), appendedLength, !isASCII);
4112 if (!isASCII) {
4113 memmove((UniChar *)__CFStrContents(str) + strLength, chars, appendedLength * sizeof(UniChar));
4114 } else {
4115 contents = (uint8_t *)__CFStrContents(str) + strLength + __CFStrSkipAnyLengthByte(str);
4116 for (idx = 0; idx < appendedLength; idx++) contents[idx] = (uint8_t)chars[idx];
4117 }
4118 }
4119 }
4120
4121
4122 void __CFStringAppendBytes(CFMutableStringRef str, const char *cStr, CFIndex appendedLength, CFStringEncoding encoding) {
4123 Boolean appendedIsUnicode = false;
4124 Boolean freeCStrWhenDone = false;
4125 Boolean demoteAppendedUnicode = false;
4126 CFVarWidthCharBuffer vBuf;
4127
4128 __CFAssertIsNotNegative(appendedLength);
4129
4130 if (encoding == kCFStringEncodingASCII || encoding == __CFStringGetEightBitStringEncoding()) {
4131 // appendedLength now denotes length in UniChars
4132 } else if (encoding == kCFStringEncodingUnicode) {
4133 UniChar *chars = (UniChar *)cStr;
4134 CFIndex idx, length = appendedLength / sizeof(UniChar);
4135 bool isASCII = true;
4136 for (idx = 0; isASCII && idx < length; idx++) isASCII = (chars[idx] < 0x80);
4137 if (!isASCII) {
4138 appendedIsUnicode = true;
4139 } else {
4140 demoteAppendedUnicode = true;
4141 }
4142 appendedLength = length;
4143 } else {
4144 Boolean usingPassedInMemory = false;
4145
4146 vBuf.allocator = __CFGetDefaultAllocator(); // We don't want to use client's allocator for temp stuff
4147 vBuf.chars.unicode = NULL; // This will cause the decode function to allocate memory if necessary
4148
4149 if (!__CFStringDecodeByteStream3((const uint8_t *)cStr, appendedLength, encoding, __CFStrIsUnicode(str), &vBuf, &usingPassedInMemory, 0)) {
4150 CFAssert1(0, __kCFLogAssertion, "Supplied bytes could not be converted specified encoding %d", encoding);
4151 return;
4152 }
4153
4154 // If not ASCII, appendedLength now denotes length in UniChars
4155 appendedLength = vBuf.numChars;
4156 appendedIsUnicode = !vBuf.isASCII;
4157 cStr = (const char *)vBuf.chars.ascii;
4158 freeCStrWhenDone = !usingPassedInMemory && vBuf.shouldFreeChars;
4159 }
4160
4161 if (CF_IS_OBJC(__kCFStringTypeID, str)) {
4162 if (!appendedIsUnicode && !demoteAppendedUnicode) {
4163 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)str, _cfAppendCString:(const unsigned char *)cStr length:(NSInteger)appendedLength);
4164 } else {
4165 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)str, appendCharacters:(const unichar *)cStr length:(NSUInteger)appendedLength);
4166 }
4167 } else {
4168 CFIndex strLength;
4169 __CFAssertIsStringAndMutable(str);
4170 strLength = __CFStrLength(str);
4171
4172 __CFStringChangeSize(str, CFRangeMake(strLength, 0), appendedLength, appendedIsUnicode || __CFStrIsUnicode(str));
4173
4174 if (__CFStrIsUnicode(str)) {
4175 UniChar *contents = (UniChar *)__CFStrContents(str);
4176 if (appendedIsUnicode) {
4177 memmove(contents + strLength, cStr, appendedLength * sizeof(UniChar));
4178 } else {
4179 __CFStrConvertBytesToUnicode((const uint8_t *)cStr, contents + strLength, appendedLength);
4180 }
4181 } else {
4182 if (demoteAppendedUnicode) {
4183 UniChar *chars = (UniChar *)cStr;
4184 CFIndex idx;
4185 uint8_t *contents = (uint8_t *)__CFStrContents(str) + strLength + __CFStrSkipAnyLengthByte(str);
4186 for (idx = 0; idx < appendedLength; idx++) contents[idx] = (uint8_t)chars[idx];
4187 } else {
4188 uint8_t *contents = (uint8_t *)__CFStrContents(str);
4189 memmove(contents + strLength + __CFStrSkipAnyLengthByte(str), cStr, appendedLength);
4190 }
4191 }
4192 }
4193
4194 if (freeCStrWhenDone) CFAllocatorDeallocate(__CFGetDefaultAllocator(), (void *)cStr);
4195 }
4196
4197 void CFStringAppendPascalString(CFMutableStringRef str, ConstStringPtr pStr, CFStringEncoding encoding) {
4198 __CFStringAppendBytes(str, (const char *)(pStr + 1), (CFIndex)*pStr, encoding);
4199 }
4200
4201 void CFStringAppendCString(CFMutableStringRef str, const char *cStr, CFStringEncoding encoding) {
4202 __CFStringAppendBytes(str, cStr, strlen(cStr), encoding);
4203 }
4204
4205
4206 void CFStringAppendFormat(CFMutableStringRef str, CFDictionaryRef formatOptions, CFStringRef format, ...) {
4207 va_list argList;
4208
4209 va_start(argList, format);
4210 CFStringAppendFormatAndArguments(str, formatOptions, format, argList);
4211 va_end(argList);
4212 }
4213
4214
4215 CFIndex CFStringFindAndReplace(CFMutableStringRef string, CFStringRef stringToFind, CFStringRef replacementString, CFRange rangeToSearch, CFStringCompareFlags compareOptions) {
4216 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, CFIndex, (NSMutableString *)string, replaceOccurrencesOfString:(NSString *)stringToFind withString:(NSString *)replacementString options:(NSStringCompareOptions)compareOptions range:NSMakeRange(rangeToSearch.location, rangeToSearch.length));
4217 CFRange foundRange;
4218 Boolean backwards = ((compareOptions & kCFCompareBackwards) != 0);
4219 UInt32 endIndex = rangeToSearch.location + rangeToSearch.length;
4220 #define MAX_RANGES_ON_STACK (1000 / sizeof(CFRange))
4221 CFRange rangeBuffer[MAX_RANGES_ON_STACK]; // Used to avoid allocating memory
4222 CFRange *ranges = rangeBuffer;
4223 CFIndex foundCount = 0;
4224 CFIndex capacity = MAX_RANGES_ON_STACK;
4225
4226 __CFAssertIsStringAndMutable(string);
4227 __CFAssertRangeIsInStringBounds(string, rangeToSearch.location, rangeToSearch.length);
4228
4229 // Note: This code is very similar to the one in CFStringCreateArrayWithFindResults().
4230 while ((rangeToSearch.length > 0) && CFStringFindWithOptions(string, stringToFind, rangeToSearch, compareOptions, &foundRange)) {
4231 // Determine the next range
4232 if (backwards) {
4233 rangeToSearch.length = foundRange.location - rangeToSearch.location;
4234 } else {
4235 rangeToSearch.location = foundRange.location + foundRange.length;
4236 rangeToSearch.length = endIndex - rangeToSearch.location;
4237 }
4238
4239 // If necessary, grow the array
4240 if (foundCount >= capacity) {
4241 bool firstAlloc = (ranges == rangeBuffer) ? true : false;
4242 capacity = (capacity + 4) * 2;
4243 // Note that reallocate with NULL previous pointer is same as allocate
4244 ranges = (CFRange *)CFAllocatorReallocate(kCFAllocatorSystemDefault, firstAlloc ? NULL : ranges, capacity * sizeof(CFRange), 0);
4245 if (firstAlloc) memmove(ranges, rangeBuffer, MAX_RANGES_ON_STACK * sizeof(CFRange));
4246 }
4247 ranges[foundCount] = foundRange;
4248 foundCount++;
4249 }
4250
4251 if (foundCount > 0) {
4252 if (backwards) { // Reorder the ranges to be incrementing (better to do this here, then to check other places)
4253 int head = 0;
4254 int tail = foundCount - 1;
4255 while (head < tail) {
4256 CFRange temp = ranges[head];
4257 ranges[head] = ranges[tail];
4258 ranges[tail] = temp;
4259 head++;
4260 tail--;
4261 }
4262 }
4263 __CFStringReplaceMultiple(string, ranges, foundCount, replacementString);
4264 if (ranges != rangeBuffer) CFAllocatorDeallocate(kCFAllocatorSystemDefault, ranges);
4265 }
4266
4267 return foundCount;
4268 }
4269
4270
4271 // This function is here for NSString purposes
4272 // It allows checking for mutability before mutating; this allows NSString to catch invalid mutations
4273
4274 int __CFStringCheckAndReplace(CFMutableStringRef str, CFRange range, CFStringRef replacement) {
4275 if (!__CFStrIsMutable(str)) return _CFStringErrNotMutable; // These three ifs are always here, for NSString usage
4276 if (!replacement && __CFStringNoteErrors()) return _CFStringErrNilArg;
4277 // This attempts to catch bad ranges including those described in 3375535 (-1,1)
4278 unsigned long endOfRange = (unsigned long)(range.location) + (unsigned long)(range.length); // NSRange uses unsigned quantities, hence the casting
4279 if (((endOfRange > (unsigned long)__CFStrLength(str)) || (endOfRange < (unsigned long)(range.location))) && __CFStringNoteErrors()) return _CFStringErrBounds;
4280
4281 __CFAssertIsStringAndMutable(str);
4282 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
4283 __CFStringReplace(str, range, replacement);
4284 return _CFStringErrNone;
4285 }
4286
4287 // This function determines whether errors which would cause string exceptions should
4288 // be ignored or not
4289
4290 Boolean __CFStringNoteErrors(void) {
4291 return true;
4292 }
4293
4294
4295
4296 void CFStringPad(CFMutableStringRef string, CFStringRef padString, CFIndex length, CFIndex indexIntoPad) {
4297 CFIndex originalLength;
4298
4299 __CFAssertIsNotNegative(length);
4300 __CFAssertIsNotNegative(indexIntoPad);
4301
4302 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)string, _cfPad:padString length:(uint32_t)length padIndex:(uint32_t)indexIntoPad);
4303
4304 __CFAssertIsStringAndMutable(string);
4305
4306 originalLength = __CFStrLength(string);
4307 if (length < originalLength) {
4308 __CFStringChangeSize(string, CFRangeMake(length, originalLength - length), 0, false);
4309 } else if (originalLength < length) {
4310 uint8_t *contents;
4311 Boolean isUnicode;
4312 CFIndex charSize;
4313 CFIndex padStringLength;
4314 CFIndex padLength;
4315 CFIndex padRemaining = length - originalLength;
4316
4317 if (CF_IS_OBJC(__kCFStringTypeID, padString)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
4318 padStringLength = CFStringGetLength(padString);
4319 isUnicode = true; /* !!! Bad for now */
4320 } else {
4321 __CFAssertIsString(padString);
4322 padStringLength = __CFStrLength(padString);
4323 isUnicode = __CFStrIsUnicode(string) || __CFStrIsUnicode(padString);
4324 }
4325
4326 charSize = isUnicode ? sizeof(UniChar) : sizeof(uint8_t);
4327
4328 __CFStringChangeSize(string, CFRangeMake(originalLength, 0), padRemaining, isUnicode);
4329
4330 contents = (uint8_t *)__CFStrContents(string) + charSize * originalLength + __CFStrSkipAnyLengthByte(string);
4331 padLength = padStringLength - indexIntoPad;
4332 padLength = padRemaining < padLength ? padRemaining : padLength;
4333
4334 while (padRemaining > 0) {
4335 if (isUnicode) {
4336 CFStringGetCharacters(padString, CFRangeMake(indexIntoPad, padLength), (UniChar *)contents);
4337 } else {
4338 CFStringGetBytes(padString, CFRangeMake(indexIntoPad, padLength), __CFStringGetEightBitStringEncoding(), 0, false, contents, padRemaining * charSize, NULL);
4339 }
4340 contents += padLength * charSize;
4341 padRemaining -= padLength;
4342 indexIntoPad = 0;
4343 padLength = padRemaining < padLength ? padRemaining : padStringLength;
4344 }
4345 }
4346 }
4347
4348 void CFStringTrim(CFMutableStringRef string, CFStringRef trimString) {
4349 CFRange range;
4350 CFIndex newStartIndex;
4351 CFIndex length;
4352
4353 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)string, _cfTrim:trimString);
4354
4355 __CFAssertIsStringAndMutable(string);
4356 __CFAssertIsString(trimString);
4357
4358 newStartIndex = 0;
4359 length = __CFStrLength(string);
4360
4361 while (CFStringFindWithOptions(string, trimString, CFRangeMake(newStartIndex, length - newStartIndex), kCFCompareAnchored, &range)) {
4362 newStartIndex = range.location + range.length;
4363 }
4364
4365 if (newStartIndex < length) {
4366 CFIndex charSize = __CFStrIsUnicode(string) ? sizeof(UniChar) : sizeof(uint8_t);
4367 uint8_t *contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4368
4369 length -= newStartIndex;
4370 if (__CFStrLength(trimString) < length) {
4371 while (CFStringFindWithOptions(string, trimString, CFRangeMake(newStartIndex, length), kCFCompareAnchored|kCFCompareBackwards, &range)) {
4372 length = range.location - newStartIndex;
4373 }
4374 }
4375 memmove(contents, contents + newStartIndex * charSize, length * charSize);
4376 __CFStringChangeSize(string, CFRangeMake(length, __CFStrLength(string) - length), 0, false);
4377 } else { // Only trimString in string, trim all
4378 __CFStringChangeSize(string, CFRangeMake(0, length), 0, false);
4379 }
4380 }
4381
4382 void CFStringTrimWhitespace(CFMutableStringRef string) {
4383 CFIndex newStartIndex;
4384 CFIndex length;
4385 CFStringInlineBuffer buffer;
4386
4387 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)string, _cfTrimWS);
4388
4389 __CFAssertIsStringAndMutable(string);
4390
4391 newStartIndex = 0;
4392 length = __CFStrLength(string);
4393
4394 CFStringInitInlineBuffer(string, &buffer, CFRangeMake(0, length));
4395 CFIndex buffer_idx = 0;
4396
4397 while (buffer_idx < length && CFUniCharIsMemberOf(__CFStringGetCharacterFromInlineBufferQuick(&buffer, buffer_idx), kCFUniCharWhitespaceAndNewlineCharacterSet))
4398 buffer_idx++;
4399 newStartIndex = buffer_idx;
4400
4401 if (newStartIndex < length) {
4402 uint8_t *contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4403 CFIndex charSize = (__CFStrIsUnicode(string) ? sizeof(UniChar) : sizeof(uint8_t));
4404
4405 buffer_idx = length - 1;
4406 while (0 <= buffer_idx && CFUniCharIsMemberOf(__CFStringGetCharacterFromInlineBufferQuick(&buffer, buffer_idx), kCFUniCharWhitespaceAndNewlineCharacterSet))
4407 buffer_idx--;
4408 length = buffer_idx - newStartIndex + 1;
4409
4410 memmove(contents, contents + newStartIndex * charSize, length * charSize);
4411 __CFStringChangeSize(string, CFRangeMake(length, __CFStrLength(string) - length), 0, false);
4412 } else { // Whitespace only string
4413 __CFStringChangeSize(string, CFRangeMake(0, length), 0, false);
4414 }
4415 }
4416
4417 void CFStringLowercase(CFMutableStringRef string, CFLocaleRef locale) {
4418 CFIndex currentIndex = 0;
4419 CFIndex length;
4420 const uint8_t *langCode;
4421 Boolean isEightBit = __CFStrIsEightBit(string);
4422
4423 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)string, _cfLowercase:(const void *)locale);
4424
4425 __CFAssertIsStringAndMutable(string);
4426
4427 length = __CFStrLength(string);
4428
4429 langCode = (const uint8_t *)(_CFCanUseLocale(locale) ? _CFStrGetLanguageIdentifierForLocale(locale) : NULL);
4430
4431 if (!langCode && isEightBit) {
4432 uint8_t *contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4433 for (;currentIndex < length;currentIndex++) {
4434 if (contents[currentIndex] >= 'A' && contents[currentIndex] <= 'Z') {
4435 contents[currentIndex] += 'a' - 'A';
4436 } else if (contents[currentIndex] > 127) {
4437 break;
4438 }
4439 }
4440 }
4441
4442 if (currentIndex < length) {
4443 UTF16Char *contents;
4444 UniChar mappedCharacters[MAX_CASE_MAPPING_BUF];
4445 CFIndex mappedLength;
4446 UTF32Char currentChar;
4447 UInt32 flags = 0;
4448
4449 if (isEightBit) __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true);
4450
4451 contents = (UniChar *)__CFStrContents(string);
4452
4453 for (;currentIndex < length;currentIndex++) {
4454
4455 if (CFUniCharIsSurrogateHighCharacter(contents[currentIndex]) && (currentIndex + 1 < length) && CFUniCharIsSurrogateLowCharacter(contents[currentIndex + 1])) {
4456 currentChar = CFUniCharGetLongCharacterForSurrogatePair(contents[currentIndex], contents[currentIndex + 1]);
4457 } else {
4458 currentChar = contents[currentIndex];
4459 }
4460 flags = ((langCode || (currentChar == 0x03A3)) ? CFUniCharGetConditionalCaseMappingFlags(currentChar, contents, currentIndex, length, kCFUniCharToLowercase, langCode, flags) : 0);
4461
4462 mappedLength = CFUniCharMapCaseTo(currentChar, mappedCharacters, MAX_CASE_MAPPING_BUF, kCFUniCharToLowercase, flags, langCode);
4463 if (mappedLength > 0) contents[currentIndex] = *mappedCharacters;
4464
4465 if (currentChar > 0xFFFF) { // Non-BMP char
4466 switch (mappedLength) {
4467 case 0:
4468 __CFStringChangeSize(string, CFRangeMake(currentIndex, 2), 0, true);
4469 contents = (UniChar *)__CFStrContents(string);
4470 length -= 2;
4471 break;
4472
4473 case 1:
4474 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 1), 0, true);
4475 contents = (UniChar *)__CFStrContents(string);
4476 --length;
4477 break;
4478
4479 case 2:
4480 contents[++currentIndex] = mappedCharacters[1];
4481 break;
4482
4483 default:
4484 --mappedLength; // Skip the current char
4485 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength - 1, true);
4486 contents = (UniChar *)__CFStrContents(string);
4487 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4488 length += (mappedLength - 1);
4489 currentIndex += mappedLength;
4490 break;
4491 }
4492 } else if (mappedLength == 0) {
4493 __CFStringChangeSize(string, CFRangeMake(currentIndex, 1), 0, true);
4494 contents = (UniChar *)__CFStrContents(string);
4495 --length;
4496 } else if (mappedLength > 1) {
4497 --mappedLength; // Skip the current char
4498 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength, true);
4499 contents = (UniChar *)__CFStrContents(string);
4500 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4501 length += mappedLength;
4502 currentIndex += mappedLength;
4503 }
4504 }
4505 }
4506 }
4507
4508 void CFStringUppercase(CFMutableStringRef string, CFLocaleRef locale) {
4509 CFIndex currentIndex = 0;
4510 CFIndex length;
4511 const uint8_t *langCode;
4512 Boolean isEightBit = __CFStrIsEightBit(string);
4513
4514 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)string, _cfUppercase:(const void *)locale);
4515
4516 __CFAssertIsStringAndMutable(string);
4517
4518 length = __CFStrLength(string);
4519
4520 langCode = (const uint8_t *)(_CFCanUseLocale(locale) ? _CFStrGetLanguageIdentifierForLocale(locale) : NULL);
4521
4522 if (!langCode && isEightBit) {
4523 uint8_t *contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4524 for (;currentIndex < length;currentIndex++) {
4525 if (contents[currentIndex] >= 'a' && contents[currentIndex] <= 'z') {
4526 contents[currentIndex] -= 'a' - 'A';
4527 } else if (contents[currentIndex] > 127) {
4528 break;
4529 }
4530 }
4531 }
4532
4533 if (currentIndex < length) {
4534 UniChar *contents;
4535 UniChar mappedCharacters[MAX_CASE_MAPPING_BUF];
4536 CFIndex mappedLength;
4537 UTF32Char currentChar;
4538 UInt32 flags = 0;
4539
4540 if (isEightBit) __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true);
4541
4542 contents = (UniChar *)__CFStrContents(string);
4543
4544 for (;currentIndex < length;currentIndex++) {
4545 if (CFUniCharIsSurrogateHighCharacter(contents[currentIndex]) && (currentIndex + 1 < length) && CFUniCharIsSurrogateLowCharacter(contents[currentIndex + 1])) {
4546 currentChar = CFUniCharGetLongCharacterForSurrogatePair(contents[currentIndex], contents[currentIndex + 1]);
4547 } else {
4548 currentChar = contents[currentIndex];
4549 }
4550
4551 flags = (langCode ? CFUniCharGetConditionalCaseMappingFlags(currentChar, contents, currentIndex, length, kCFUniCharToUppercase, langCode, flags) : 0);
4552
4553 mappedLength = CFUniCharMapCaseTo(currentChar, mappedCharacters, MAX_CASE_MAPPING_BUF, kCFUniCharToUppercase, flags, langCode);
4554 if (mappedLength > 0) contents[currentIndex] = *mappedCharacters;
4555
4556 if (currentChar > 0xFFFF) { // Non-BMP char
4557 switch (mappedLength) {
4558 case 0:
4559 __CFStringChangeSize(string, CFRangeMake(currentIndex, 2), 0, true);
4560 contents = (UniChar *)__CFStrContents(string);
4561 length -= 2;
4562 break;
4563
4564 case 1:
4565 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 1), 0, true);
4566 contents = (UniChar *)__CFStrContents(string);
4567 --length;
4568 break;
4569
4570 case 2:
4571 contents[++currentIndex] = mappedCharacters[1];
4572 break;
4573
4574 default:
4575 --mappedLength; // Skip the current char
4576 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength - 1, true);
4577 contents = (UniChar *)__CFStrContents(string);
4578 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4579 length += (mappedLength - 1);
4580 currentIndex += mappedLength;
4581 break;
4582 }
4583 } else if (mappedLength == 0) {
4584 __CFStringChangeSize(string, CFRangeMake(currentIndex, 1), 0, true);
4585 contents = (UniChar *)__CFStrContents(string);
4586 --length;
4587 } else if (mappedLength > 1) {
4588 --mappedLength; // Skip the current char
4589 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength, true);
4590 contents = (UniChar *)__CFStrContents(string);
4591 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4592 length += mappedLength;
4593 currentIndex += mappedLength;
4594 }
4595 }
4596 }
4597 }
4598
4599
4600 void CFStringCapitalize(CFMutableStringRef string, CFLocaleRef locale) {
4601 CFIndex currentIndex = 0;
4602 CFIndex length;
4603 const uint8_t *langCode;
4604 Boolean isEightBit = __CFStrIsEightBit(string);
4605 Boolean isLastCased = false;
4606 const uint8_t *caseIgnorableForBMP;
4607
4608 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)string, _cfCapitalize:(const void *)locale);
4609
4610 __CFAssertIsStringAndMutable(string);
4611
4612 length = __CFStrLength(string);
4613
4614 caseIgnorableForBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharCaseIgnorableCharacterSet, 0);
4615
4616 langCode = (const uint8_t *)(_CFCanUseLocale(locale) ? _CFStrGetLanguageIdentifierForLocale(locale) : NULL);
4617
4618 if (!langCode && isEightBit) {
4619 uint8_t *contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4620 for (;currentIndex < length;currentIndex++) {
4621 if (contents[currentIndex] > 127) {
4622 break;
4623 } else if (contents[currentIndex] >= 'A' && contents[currentIndex] <= 'Z') {
4624 contents[currentIndex] += (isLastCased ? 'a' - 'A' : 0);
4625 isLastCased = true;
4626 } else if (contents[currentIndex] >= 'a' && contents[currentIndex] <= 'z') {
4627 contents[currentIndex] -= (!isLastCased ? 'a' - 'A' : 0);
4628 isLastCased = true;
4629 } else if (!CFUniCharIsMemberOfBitmap(contents[currentIndex], caseIgnorableForBMP)) {
4630 isLastCased = false;
4631 }
4632 }
4633 }
4634
4635 if (currentIndex < length) {
4636 UniChar *contents;
4637 UniChar mappedCharacters[MAX_CASE_MAPPING_BUF];
4638 CFIndex mappedLength;
4639 UTF32Char currentChar;
4640 UInt32 flags = 0;
4641
4642 if (isEightBit) __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true);
4643
4644 contents = (UniChar *)__CFStrContents(string);
4645
4646 for (;currentIndex < length;currentIndex++) {
4647 if (CFUniCharIsSurrogateHighCharacter(contents[currentIndex]) && (currentIndex + 1 < length) && CFUniCharIsSurrogateLowCharacter(contents[currentIndex + 1])) {
4648 currentChar = CFUniCharGetLongCharacterForSurrogatePair(contents[currentIndex], contents[currentIndex + 1]);
4649 } else {
4650 currentChar = contents[currentIndex];
4651 }
4652 flags = ((langCode || ((currentChar == 0x03A3) && isLastCased)) ? CFUniCharGetConditionalCaseMappingFlags(currentChar, contents, currentIndex, length, (isLastCased ? kCFUniCharToLowercase : kCFUniCharToTitlecase), langCode, flags) : 0);
4653
4654 mappedLength = CFUniCharMapCaseTo(currentChar, mappedCharacters, MAX_CASE_MAPPING_BUF, (isLastCased ? kCFUniCharToLowercase : kCFUniCharToTitlecase), flags, langCode);
4655 if (mappedLength > 0) contents[currentIndex] = *mappedCharacters;
4656
4657 if (currentChar > 0xFFFF) { // Non-BMP char
4658 switch (mappedLength) {
4659 case 0:
4660 __CFStringChangeSize(string, CFRangeMake(currentIndex, 2), 0, true);
4661 contents = (UniChar *)__CFStrContents(string);
4662 length -= 2;
4663 break;
4664
4665 case 1:
4666 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 1), 0, true);
4667 contents = (UniChar *)__CFStrContents(string);
4668 --length;
4669 break;
4670
4671 case 2:
4672 contents[++currentIndex] = mappedCharacters[1];
4673 break;
4674
4675 default:
4676 --mappedLength; // Skip the current char
4677 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength - 1, true);
4678 contents = (UniChar *)__CFStrContents(string);
4679 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4680 length += (mappedLength - 1);
4681 currentIndex += mappedLength;
4682 break;
4683 }
4684 } else if (mappedLength == 0) {
4685 __CFStringChangeSize(string, CFRangeMake(currentIndex, 1), 0, true);
4686 contents = (UniChar *)__CFStrContents(string);
4687 --length;
4688 } else if (mappedLength > 1) {
4689 --mappedLength; // Skip the current char
4690 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength, true);
4691 contents = (UniChar *)__CFStrContents(string);
4692 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4693 length += mappedLength;
4694 currentIndex += mappedLength;
4695 }
4696
4697 if (!((currentChar > 0xFFFF) ? CFUniCharIsMemberOf(currentChar, kCFUniCharCaseIgnorableCharacterSet) : CFUniCharIsMemberOfBitmap(currentChar, caseIgnorableForBMP))) { // We have non-caseignorable here
4698 isLastCased = ((CFUniCharIsMemberOf(currentChar, kCFUniCharUppercaseLetterCharacterSet) || CFUniCharIsMemberOf(currentChar, kCFUniCharLowercaseLetterCharacterSet)) ? true : false);
4699 }
4700 }
4701 }
4702 }
4703
4704
4705 #define MAX_DECOMP_BUF 64
4706
4707 #define HANGUL_SBASE 0xAC00
4708 #define HANGUL_LBASE 0x1100
4709 #define HANGUL_VBASE 0x1161
4710 #define HANGUL_TBASE 0x11A7
4711 #define HANGUL_SCOUNT 11172
4712 #define HANGUL_LCOUNT 19
4713 #define HANGUL_VCOUNT 21
4714 #define HANGUL_TCOUNT 28
4715 #define HANGUL_NCOUNT (HANGUL_VCOUNT * HANGUL_TCOUNT)
4716
4717 CF_INLINE uint32_t __CFGetUTF16Length(const UTF32Char *characters, uint32_t utf32Length) {
4718 const UTF32Char *limit = characters + utf32Length;
4719 uint32_t length = 0;
4720
4721 while (characters < limit) length += (*(characters++) > 0xFFFF ? 2 : 1);
4722
4723 return length;
4724 }
4725
4726 CF_INLINE void __CFFillInUTF16(const UTF32Char *characters, UTF16Char *dst, uint32_t utf32Length) {
4727 const UTF32Char *limit = characters + utf32Length;
4728 UTF32Char currentChar;
4729
4730 while (characters < limit) {
4731 currentChar = *(characters++);
4732 if (currentChar > 0xFFFF) {
4733 currentChar -= 0x10000;
4734 *(dst++) = (UTF16Char)((currentChar >> 10) + 0xD800UL);
4735 *(dst++) = (UTF16Char)((currentChar & 0x3FF) + 0xDC00UL);
4736 } else {
4737 *(dst++) = currentChar;
4738 }
4739 }
4740 }
4741
4742 void CFStringNormalize(CFMutableStringRef string, CFStringNormalizationForm theForm) {
4743 CFIndex currentIndex = 0;
4744 CFIndex length;
4745 bool needToReorder = true;
4746
4747 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)string, _cfNormalize:theForm);
4748
4749 __CFAssertIsStringAndMutable(string);
4750
4751 length = __CFStrLength(string);
4752
4753 if (__CFStrIsEightBit(string)) {
4754 uint8_t *contents;
4755
4756 if (theForm == kCFStringNormalizationFormC) return; // 8bit form has no decomposition
4757
4758 contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4759
4760 for (;currentIndex < length;currentIndex++) {
4761 if (contents[currentIndex] > 127) {
4762 __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true); // need to do harm way
4763 needToReorder = false;
4764 break;
4765 }
4766 }
4767 }
4768
4769 if (currentIndex < length) {
4770 UTF16Char *limit = (UTF16Char *)__CFStrContents(string) + length;
4771 UTF16Char *contents = (UTF16Char *)__CFStrContents(string) + currentIndex;
4772 UTF32Char buffer[MAX_DECOMP_BUF];
4773 UTF32Char *mappedCharacters = buffer;
4774 CFIndex allocatedLength = MAX_DECOMP_BUF;
4775 CFIndex mappedLength;
4776 CFIndex currentLength;
4777 UTF32Char currentChar;
4778 const uint8_t *decompBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, 0);
4779 const uint8_t *nonBaseBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, 0);
4780 const uint8_t *combiningBMP = (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0);
4781
4782 while (contents < limit) {
4783 if (CFUniCharIsSurrogateHighCharacter(*contents) && (contents + 1 < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) {
4784 currentChar = CFUniCharGetLongCharacterForSurrogatePair(*contents, *(contents + 1));
4785 currentLength = 2;
4786 contents += 2;
4787 } else {
4788 currentChar = *(contents++);
4789 currentLength = 1;
4790 }
4791
4792 mappedLength = 0;
4793
4794 if (CFUniCharIsMemberOfBitmap(currentChar, ((currentChar < 0x10000) ? decompBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (currentChar >> 16)))) && (0 == CFUniCharGetCombiningPropertyForCharacter(currentChar, ((currentChar < 0x10000) ? combiningBMP : (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16)))))) {
4795 if ((theForm & kCFStringNormalizationFormC) == 0 || currentChar < HANGUL_SBASE || currentChar > (HANGUL_SBASE + HANGUL_SCOUNT)) { // We don't have to decompose Hangul Syllables if we're precomposing again
4796 mappedLength = CFUniCharDecomposeCharacter(currentChar, mappedCharacters, MAX_DECOMP_BUF);
4797 }
4798 }
4799
4800 if ((needToReorder || (theForm & kCFStringNormalizationFormC)) && ((contents < limit) || (mappedLength == 0))) {
4801 if (mappedLength > 0) {
4802 if (CFUniCharIsSurrogateHighCharacter(*contents) && (contents + 1 < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) {
4803 currentChar = CFUniCharGetLongCharacterForSurrogatePair(*contents, *(contents + 1));
4804 } else {
4805 currentChar = *contents;
4806 }
4807 }
4808
4809 if (0 != CFUniCharGetCombiningPropertyForCharacter(currentChar, (const uint8_t *)((currentChar < 0x10000) ? combiningBMP : CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16))))) {
4810 uint32_t decompLength;
4811
4812 if (mappedLength == 0) {
4813 contents -= (currentChar & 0xFFFF0000 ? 2 : 1);
4814 if (currentIndex > 0) {
4815 if (CFUniCharIsSurrogateLowCharacter(*(contents - 1)) && (currentIndex > 1) && CFUniCharIsSurrogateHighCharacter(*(contents - 2))) {
4816 *mappedCharacters = CFUniCharGetLongCharacterForSurrogatePair(*(contents - 2), *(contents - 1));
4817 currentIndex -= 2;
4818 currentLength += 2;
4819 } else {
4820 *mappedCharacters = *(contents - 1);
4821 --currentIndex;
4822 ++currentLength;
4823 }
4824 mappedLength = 1;
4825 }
4826 } else {
4827 currentLength += (currentChar & 0xFFFF0000 ? 2 : 1);
4828 }
4829 contents += (currentChar & 0xFFFF0000 ? 2 : 1);
4830
4831 if (CFUniCharIsMemberOfBitmap(currentChar, ((currentChar < 0x10000) ? decompBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (currentChar >> 16))))) { // Vietnamese accent, etc.
4832 decompLength = CFUniCharDecomposeCharacter(currentChar, mappedCharacters + mappedLength, MAX_DECOMP_BUF - mappedLength);
4833 mappedLength += decompLength;
4834 } else {
4835 mappedCharacters[mappedLength++] = currentChar;
4836 }
4837
4838 while (contents < limit) {
4839 if (CFUniCharIsSurrogateHighCharacter(*contents) && (contents + 1 < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) {
4840 currentChar = CFUniCharGetLongCharacterForSurrogatePair(*contents, *(contents + 1));
4841 } else {
4842 currentChar = *contents;
4843 }
4844 if (0 == CFUniCharGetCombiningPropertyForCharacter(currentChar, (const uint8_t *)((currentChar < 0x10000) ? combiningBMP : CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16))))) break;
4845 if (currentChar & 0xFFFF0000) {
4846 contents += 2;
4847 currentLength += 2;
4848 } else {
4849 ++contents;
4850 ++currentLength;
4851 }
4852 if (mappedLength == allocatedLength) {
4853 allocatedLength += MAX_DECOMP_BUF;
4854 if (mappedCharacters == buffer) {
4855 mappedCharacters = (UTF32Char *)CFAllocatorAllocate(kCFAllocatorSystemDefault, allocatedLength * sizeof(UTF32Char), 0);
4856 memmove(mappedCharacters, buffer, MAX_DECOMP_BUF * sizeof(UTF32Char));
4857 } else {
4858 mappedCharacters = (UTF32Char *)CFAllocatorReallocate(kCFAllocatorSystemDefault, mappedCharacters, allocatedLength * sizeof(UTF32Char), 0);
4859 }
4860 }
4861 if (CFUniCharIsMemberOfBitmap(currentChar, ((currentChar < 0x10000) ? decompBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (currentChar >> 16))))) { // Vietnamese accent, etc.
4862 decompLength = CFUniCharDecomposeCharacter(currentChar, mappedCharacters + mappedLength, MAX_DECOMP_BUF - mappedLength);
4863 mappedLength += decompLength;
4864 } else {
4865 mappedCharacters[mappedLength++] = currentChar;
4866 }
4867 }
4868 }
4869 if (needToReorder && mappedLength > 1) CFUniCharPrioritySort(mappedCharacters, mappedLength);
4870 }
4871
4872 if (theForm & kCFStringNormalizationFormKD) {
4873 CFIndex newLength = 0;
4874
4875 if (mappedLength == 0 && CFUniCharIsMemberOf(currentChar, kCFUniCharCompatibilityDecomposableCharacterSet)) {
4876 mappedCharacters[mappedLength++] = currentChar;
4877 }
4878 while (newLength < mappedLength) {
4879 newLength = CFUniCharCompatibilityDecompose(mappedCharacters, mappedLength, allocatedLength);
4880 if (newLength == 0) {
4881 allocatedLength += MAX_DECOMP_BUF;
4882 if (mappedCharacters == buffer) {
4883 mappedCharacters = (UTF32Char *)CFAllocatorAllocate(kCFAllocatorSystemDefault, allocatedLength * sizeof(UTF32Char), 0);
4884 memmove(mappedCharacters, buffer, MAX_DECOMP_BUF * sizeof(UTF32Char));
4885 } else {
4886 mappedCharacters = (UTF32Char *)CFAllocatorReallocate(kCFAllocatorSystemDefault, mappedCharacters, allocatedLength * sizeof(UTF32Char), 0);
4887 }
4888 }
4889 }
4890 mappedLength = newLength;
4891 }
4892
4893 if (theForm & kCFStringNormalizationFormC) {
4894 UTF32Char nextChar;
4895
4896 if (mappedLength > 1) {
4897 CFIndex consumedLength = 1;
4898 UTF32Char *currentBase = mappedCharacters;
4899 uint8_t currentClass, lastClass = 0;
4900 bool didCombine = false;
4901
4902 currentChar = *mappedCharacters;
4903
4904 while (consumedLength < mappedLength) {
4905 nextChar = mappedCharacters[consumedLength];
4906 currentClass = CFUniCharGetCombiningPropertyForCharacter(nextChar, (const uint8_t *)((nextChar < 0x10000) ? combiningBMP : CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (nextChar >> 16))));
4907
4908 if (theForm & kCFStringNormalizationFormKD) {
4909 if ((currentChar >= HANGUL_LBASE) && (currentChar < (HANGUL_LBASE + 0xFF))) {
4910 SInt8 lIndex = currentChar - HANGUL_LBASE;
4911
4912 if ((0 <= lIndex) && (lIndex <= HANGUL_LCOUNT)) {
4913 SInt16 vIndex = nextChar - HANGUL_VBASE;
4914
4915 if ((vIndex >= 0) && (vIndex <= HANGUL_VCOUNT)) {
4916 SInt16 tIndex = 0;
4917 CFIndex usedLength = mappedLength;
4918
4919 mappedCharacters[consumedLength++] = 0xFFFD;
4920
4921 if (consumedLength < mappedLength) {
4922 tIndex = mappedCharacters[consumedLength] - HANGUL_TBASE;
4923 if ((tIndex < 0) || (tIndex > HANGUL_TCOUNT)) {
4924 tIndex = 0;
4925 } else {
4926 mappedCharacters[consumedLength++] = 0xFFFD;
4927 }
4928 }
4929 *currentBase = (lIndex * HANGUL_VCOUNT + vIndex) * HANGUL_TCOUNT + tIndex + HANGUL_SBASE;
4930
4931 while (--usedLength > 0) {
4932 if (mappedCharacters[usedLength] == 0xFFFD) {
4933 --mappedLength;
4934 --consumedLength;
4935 memmove(mappedCharacters + usedLength, mappedCharacters + usedLength + 1, (mappedLength - usedLength) * sizeof(UTF32Char));
4936 }
4937 }
4938 currentBase = mappedCharacters + consumedLength;
4939 currentChar = *currentBase;
4940 ++consumedLength;
4941
4942 continue;
4943 }
4944 }
4945 }
4946 if (!CFUniCharIsMemberOfBitmap(nextChar, ((nextChar < 0x10000) ? nonBaseBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (nextChar >> 16))))) {
4947 *currentBase = currentChar;
4948 currentBase = mappedCharacters + consumedLength;
4949 currentChar = nextChar;
4950 ++consumedLength;
4951 continue;
4952 }
4953 }
4954
4955 if ((lastClass == 0) || (currentClass > lastClass)) {
4956 nextChar = CFUniCharPrecomposeCharacter(currentChar, nextChar);
4957 if (nextChar == 0xFFFD) {
4958 lastClass = currentClass;
4959 } else {
4960 mappedCharacters[consumedLength] = 0xFFFD;
4961 didCombine = true;
4962 currentChar = nextChar;
4963 }
4964 }
4965 ++consumedLength;
4966 }
4967
4968 *currentBase = currentChar;
4969 if (didCombine) {
4970 consumedLength = mappedLength;
4971 while (--consumedLength > 0) {
4972 if (mappedCharacters[consumedLength] == 0xFFFD) {
4973 --mappedLength;
4974 memmove(mappedCharacters + consumedLength, mappedCharacters + consumedLength + 1, (mappedLength - consumedLength) * sizeof(UTF32Char));
4975 }
4976 }
4977 }
4978 } else if ((currentChar >= HANGUL_LBASE) && (currentChar < (HANGUL_LBASE + 0xFF))) { // Hangul Jamo
4979 SInt8 lIndex = currentChar - HANGUL_LBASE;
4980
4981 if ((contents < limit) && (0 <= lIndex) && (lIndex <= HANGUL_LCOUNT)) {
4982 SInt16 vIndex = *contents - HANGUL_VBASE;
4983
4984 if ((vIndex >= 0) && (vIndex <= HANGUL_VCOUNT)) {
4985 SInt16 tIndex = 0;
4986
4987 ++contents; ++currentLength;
4988
4989 if (contents < limit) {
4990 tIndex = *contents - HANGUL_TBASE;
4991 if ((tIndex < 0) || (tIndex > HANGUL_TCOUNT)) {
4992 tIndex = 0;
4993 } else {
4994 ++contents; ++currentLength;
4995 }
4996 }
4997 *mappedCharacters = (lIndex * HANGUL_VCOUNT + vIndex) * HANGUL_TCOUNT + tIndex + HANGUL_SBASE;
4998 mappedLength = 1;
4999 }
5000 }
5001 } else { // collect class 0 non-base characters
5002 while (contents < limit) {
5003 nextChar = *contents;
5004 if (CFUniCharIsSurrogateHighCharacter(nextChar) && ((contents + 1) < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) {
5005 nextChar = CFUniCharGetLongCharacterForSurrogatePair(nextChar, *(contents + 1));
5006 if (!CFUniCharIsMemberOfBitmap(nextChar, (const uint8_t *)CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (nextChar >> 16))) || (0 != CFUniCharGetCombiningPropertyForCharacter(nextChar, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (nextChar >> 16))))) break;
5007 } else {
5008 if (!CFUniCharIsMemberOfBitmap(nextChar, nonBaseBMP) || (0 != CFUniCharGetCombiningPropertyForCharacter(nextChar, combiningBMP))) break;
5009 }
5010 currentChar = CFUniCharPrecomposeCharacter(currentChar, nextChar);
5011 if (0xFFFD == currentChar) break;
5012
5013 if (nextChar < 0x10000) {
5014 ++contents; ++currentLength;
5015 } else {
5016 contents += 2;
5017 currentLength += 2;
5018 }
5019
5020 *mappedCharacters = currentChar;
5021 mappedLength = 1;
5022 }
5023 }
5024 }
5025
5026 if (mappedLength > 0) {
5027 CFIndex utf16Length = __CFGetUTF16Length(mappedCharacters, mappedLength);
5028
5029 if (utf16Length != currentLength) {
5030 __CFStringChangeSize(string, CFRangeMake(currentIndex, currentLength), utf16Length, true);
5031 currentLength = utf16Length;
5032 }
5033 contents = (UTF16Char *)__CFStrContents(string);
5034 limit = contents + __CFStrLength(string);
5035 contents += currentIndex;
5036 __CFFillInUTF16(mappedCharacters, contents, mappedLength);
5037 contents += utf16Length;
5038 }
5039 currentIndex += currentLength;
5040 }
5041
5042 if (mappedCharacters != buffer) CFAllocatorDeallocate(kCFAllocatorSystemDefault, mappedCharacters);
5043 }
5044 }
5045
5046 void CFStringFold(CFMutableStringRef theString, CFOptionFlags theFlags, CFLocaleRef locale) {
5047 CFStringInlineBuffer stringBuffer;
5048 CFIndex length = CFStringGetLength(theString);
5049 CFIndex currentIndex = 0;
5050 CFIndex bufferLength = 0;
5051 UTF32Char buffer[kCFStringStackBufferLength];
5052 const uint8_t *cString;
5053 const uint8_t *langCode;
5054 CFStringEncoding eightBitEncoding;
5055 bool caseInsensitive = ((theFlags & kCFCompareCaseInsensitive) ? true : false);
5056 bool isObjc = CF_IS_OBJC(__kCFStringTypeID, theString);
5057 CFLocaleRef theLocale = locale;
5058
5059 if ((theFlags & kCFCompareLocalized) && (NULL == locale)) {
5060 theLocale = CFLocaleCopyCurrent();
5061 }
5062
5063 theFlags &= (kCFCompareCaseInsensitive|kCFCompareDiacriticInsensitive|kCFCompareWidthInsensitive);
5064
5065 if ((0 == theFlags) || (0 == length)) goto bail; // nothing to do
5066
5067 langCode = ((NULL == theLocale) ? NULL : (const uint8_t *)_CFStrGetLanguageIdentifierForLocale(theLocale));
5068
5069 eightBitEncoding = __CFStringGetEightBitStringEncoding();
5070 cString = (const uint8_t *)CFStringGetCStringPtr(theString, eightBitEncoding);
5071
5072 if ((NULL != cString) && !caseInsensitive && (kCFStringEncodingASCII == eightBitEncoding)) goto bail; // All ASCII
5073
5074 CFStringInitInlineBuffer(theString, &stringBuffer, CFRangeMake(0, length));
5075
5076 if ((NULL != cString) && (theFlags & (kCFCompareCaseInsensitive|kCFCompareDiacriticInsensitive))) {
5077 const uint8_t *cStringPtr = cString;
5078 const uint8_t *cStringLimit = cString + length;
5079 uint8_t *cStringContents = (isObjc ? NULL : (uint8_t *)__CFStrContents(theString) + __CFStrSkipAnyLengthByte(theString));
5080
5081 while (cStringPtr < cStringLimit) {
5082 if ((*cStringPtr < 0x80) && (NULL == langCode)) {
5083 if (caseInsensitive && (*cStringPtr >= 'A') && (*cStringPtr <= 'Z')) {
5084 if (NULL == cStringContents) {
5085 break;
5086 } else {
5087 cStringContents[cStringPtr - cString] += ('a' - 'A');
5088 }
5089 }
5090 } else {
5091 if ((bufferLength = __CFStringFoldCharacterClusterAtIndex((UTF32Char)__CFCharToUniCharTable[*cStringPtr], &stringBuffer, cStringPtr - cString, theFlags, langCode, buffer, kCFStringStackBufferLength, NULL)) > 0) {
5092 if ((*buffer > 0x7F) || (bufferLength > 1) || (NULL == cStringContents)) break;
5093 cStringContents[cStringPtr - cString] = *buffer;
5094 }
5095 }
5096 ++cStringPtr;
5097 }
5098
5099 currentIndex = cStringPtr - cString;
5100 }
5101
5102 if (currentIndex < length) {
5103 UTF16Char *contents;
5104
5105 if (isObjc) {
5106 CFMutableStringRef cfString;
5107 CFRange range = CFRangeMake(currentIndex, length - currentIndex);
5108
5109 contents = (UTF16Char *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(UTF16Char) * range.length, 0);
5110
5111 CFStringGetCharacters(theString, range, contents);
5112
5113 cfString = CFStringCreateMutableWithExternalCharactersNoCopy(kCFAllocatorSystemDefault, contents, range.length, range.length, NULL);
5114
5115 CFStringFold(cfString, theFlags, theLocale);
5116
5117 CFStringReplace(theString, range, cfString);
5118
5119 CFRelease(cfString);
5120 } else {
5121 const UTF32Char *characters;
5122 const UTF32Char *charactersLimit;
5123 UTF32Char character;
5124 CFIndex consumedLength;
5125
5126 contents = NULL;
5127
5128 if (bufferLength > 0) {
5129 __CFStringChangeSize(theString, CFRangeMake(currentIndex + 1, 0), bufferLength - 1, true);
5130 length = __CFStrLength(theString);
5131 CFStringInitInlineBuffer(theString, &stringBuffer, CFRangeMake(0, length));
5132
5133 contents = (UTF16Char *)__CFStrContents(theString) + currentIndex;
5134 characters = buffer;
5135 charactersLimit = characters + bufferLength;
5136 while (characters < charactersLimit) *(contents++) = (UTF16Char)*(characters++);
5137 ++currentIndex;
5138 }
5139
5140 while (currentIndex < length) {
5141 character = __CFStringGetCharacterFromInlineBufferQuick(&stringBuffer, currentIndex);
5142
5143 consumedLength = 0;
5144
5145 if ((NULL == langCode) && (character < 0x80) && (0 == (theFlags & kCFCompareDiacriticInsensitive))) {
5146 if (caseInsensitive && (character >= 'A') && (character <= 'Z')) {
5147 consumedLength = 1;
5148 bufferLength = 1;
5149 *buffer = character + ('a' - 'A');
5150 }
5151 } else {
5152 if (CFUniCharIsSurrogateHighCharacter(character) && ((currentIndex + 1) < length)) {
5153 UTF16Char lowSurrogate = __CFStringGetCharacterFromInlineBufferQuick(&stringBuffer, currentIndex + 1);
5154 if (CFUniCharIsSurrogateLowCharacter(lowSurrogate)) character = CFUniCharGetLongCharacterForSurrogatePair(character, lowSurrogate);
5155 }
5156
5157 bufferLength = __CFStringFoldCharacterClusterAtIndex(character, &stringBuffer, currentIndex, theFlags, langCode, buffer, kCFStringStackBufferLength, &consumedLength);
5158 }
5159
5160 if (consumedLength > 0) {
5161 CFIndex utf16Length = bufferLength;
5162
5163 characters = buffer;
5164 charactersLimit = characters + bufferLength;
5165
5166 while (characters < charactersLimit) if (*(characters++) > 0xFFFF) ++utf16Length; // Extend bufferLength to the UTF-16 length
5167
5168 if ((utf16Length != consumedLength) || __CFStrIsEightBit(theString)) {
5169 CFRange range;
5170 CFIndex insertLength;
5171
5172 if (consumedLength < utf16Length) { // Need to expand
5173 range = CFRangeMake(currentIndex + consumedLength, 0);
5174 insertLength = utf16Length - consumedLength;
5175 } else {
5176 range = CFRangeMake(currentIndex + utf16Length, consumedLength - utf16Length);
5177 insertLength = 0;
5178 }
5179 __CFStringChangeSize(theString, range, insertLength, true);
5180 length = __CFStrLength(theString);
5181 CFStringInitInlineBuffer(theString, &stringBuffer, CFRangeMake(0, length));
5182 }
5183
5184 (void)CFUniCharFromUTF32(buffer, bufferLength, (UTF16Char *)__CFStrContents(theString) + currentIndex, true, __CF_BIG_ENDIAN__);
5185
5186 currentIndex += utf16Length;
5187 } else {
5188 ++currentIndex;
5189 }
5190 }
5191 }
5192 }
5193
5194 bail:
5195 if (NULL == locale && theLocale) {
5196 CFRelease(theLocale);
5197 }
5198 }
5199
5200 enum {
5201 kCFStringFormatZeroFlag = (1 << 0), // if not, padding is space char
5202 kCFStringFormatMinusFlag = (1 << 1), // if not, no flag implied
5203 kCFStringFormatPlusFlag = (1 << 2), // if not, no flag implied, overrides space
5204 kCFStringFormatSpaceFlag = (1 << 3), // if not, no flag implied
5205 kCFStringFormatExternalSpecFlag = (1 << 4), // using config dict
5206 kCFStringFormatLocalizable = (1 << 5) // explicitly mark the specs we can localize
5207 };
5208
5209 typedef struct {
5210 int16_t size;
5211 int16_t type;
5212 SInt32 loc;
5213 SInt32 len;
5214 SInt32 widthArg;
5215 SInt32 precArg;
5216 uint32_t flags;
5217 int8_t mainArgNum;
5218 int8_t precArgNum;
5219 int8_t widthArgNum;
5220 int8_t configDictIndex;
5221 int8_t numericFormatStyle; // Only set for localizable numeric quantities
5222 } CFFormatSpec;
5223
5224 typedef struct {
5225 int16_t type;
5226 int16_t size;
5227 union {
5228 int64_t int64Value;
5229 double doubleValue;
5230 #if LONG_DOUBLE_SUPPORT
5231 long double longDoubleValue;
5232 #endif
5233 void *pointerValue;
5234 } value;
5235 } CFPrintValue;
5236
5237 enum {
5238 CFFormatDefaultSize = 0,
5239 CFFormatSize1 = 1,
5240 CFFormatSize2 = 2,
5241 CFFormatSize4 = 3,
5242 CFFormatSize8 = 4,
5243 CFFormatSize16 = 5,
5244 #if __LP64__
5245 CFFormatSizeLong = CFFormatSize8,
5246 CFFormatSizePointer = CFFormatSize8
5247 #else
5248 CFFormatSizeLong = CFFormatSize4,
5249 CFFormatSizePointer = CFFormatSize4
5250 #endif
5251 };
5252
5253 enum {
5254 CFFormatStyleDecimal = (1 << 0),
5255 CFFormatStyleScientific = (1 << 1),
5256 CFFormatStyleDecimalOrScientific = CFFormatStyleDecimal|CFFormatStyleScientific,
5257 CFFormatStyleUnsigned = (1 << 2)
5258 };
5259
5260 enum {
5261 CFFormatLiteralType = 32,
5262 CFFormatLongType = 33,
5263 CFFormatDoubleType = 34,
5264 CFFormatPointerType = 35,
5265 CFFormatObjectType = 36, /* handled specially */ /* ??? not used anymore, can be removed? */
5266 CFFormatCFType = 37, /* handled specially */
5267 CFFormatUnicharsType = 38, /* handled specially */
5268 CFFormatCharsType = 39, /* handled specially */
5269 CFFormatPascalCharsType = 40, /* handled specially */
5270 CFFormatSingleUnicharType = 41, /* handled specially */
5271 CFFormatDummyPointerType = 42 /* special case for %n */
5272 };
5273
5274 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS
5275 /* Only come in here if spec->type is CFFormatLongType or CFFormatDoubleType. Pass in 0 for width or precision if not specified. Returns false if couldn't do the format (with the assumption the caller falls back to unlocalized).
5276 */
5277 static Boolean __CFStringFormatLocalizedNumber(CFMutableStringRef output, CFLocaleRef locale, const CFPrintValue *values, const CFFormatSpec *spec, SInt32 width, SInt32 precision, Boolean hasPrecision) {
5278 static CFSpinLock_t formatterLock = CFSpinLockInit;
5279 // These formatters are recached if the locale argument is different
5280 static CFNumberFormatterRef decimalFormatter = NULL;
5281 static CFNumberFormatterRef scientificFormatter = NULL;
5282 static CFNumberFormatterRef gFormatter = NULL; // for %g
5283 static SInt32 groupingSize = 0;
5284 static SInt32 secondaryGroupingSize = 0;
5285
5286 // !!! This code should be removed before shipping
5287 static char disableLocalizedFormatting = -1;
5288 if (disableLocalizedFormatting == -1) disableLocalizedFormatting = (getenv("CFStringDisableLocalizedNumberFormatting") != NULL) ? 1 : 0;
5289 if (disableLocalizedFormatting) return false;
5290
5291 CFNumberFormatterRef formatter;
5292
5293 __CFSpinLock(&formatterLock); // We use the formatter from one thread at one time; if this proves to be a bottleneck we need to get fancier
5294
5295 switch (spec->numericFormatStyle) {
5296 case CFFormatStyleUnsigned:
5297 case CFFormatStyleDecimal:
5298 if (!decimalFormatter || !CFEqual(CFNumberFormatterGetLocale(decimalFormatter), locale)) { // cache or recache if the locale is different
5299 if (decimalFormatter) CFRelease(decimalFormatter);
5300 decimalFormatter = CFNumberFormatterCreate(NULL, locale, kCFNumberFormatterDecimalStyle); // since this is shared, remember to reset all its properties!
5301 }
5302 formatter = decimalFormatter;
5303 break;
5304 case CFFormatStyleScientific:
5305 if (!scientificFormatter || !CFEqual(CFNumberFormatterGetLocale(scientificFormatter), locale)) { // cache or recache if the locale is different
5306 if (scientificFormatter) CFRelease(scientificFormatter);
5307 scientificFormatter = CFNumberFormatterCreate(NULL, locale, kCFNumberFormatterScientificStyle);
5308 CFStringRef pattern = CFSTR("#E+00"); // the default pattern does not have the sign if the exponent is positive and it is single digit
5309 CFNumberFormatterSetFormat(scientificFormatter, pattern);
5310 CFNumberFormatterSetProperty(scientificFormatter, kCFNumberFormatterUseSignificantDigitsKey, kCFBooleanTrue);
5311 }
5312 formatter = scientificFormatter;
5313 break;
5314 case CFFormatStyleDecimalOrScientific:
5315 if (!gFormatter || !CFEqual(CFNumberFormatterGetLocale(gFormatter), locale)) { // cache or recache if the locale is different
5316 if (gFormatter) CFRelease(gFormatter);
5317 gFormatter = CFNumberFormatterCreate(NULL, locale, kCFNumberFormatterDecimalStyle);
5318 // when we update the locale in gFormatter, we also need to update the two grouping sizes
5319 CFNumberRef num = (CFNumberRef) CFNumberFormatterCopyProperty(gFormatter, kCFNumberFormatterGroupingSizeKey);
5320 CFNumberGetValue(num, kCFNumberSInt32Type, &groupingSize);
5321 CFRelease(num);
5322 num = (CFNumberRef) CFNumberFormatterCopyProperty(gFormatter, kCFNumberFormatterSecondaryGroupingSizeKey);
5323 CFNumberGetValue(num, kCFNumberSInt32Type, &secondaryGroupingSize);
5324 CFRelease(num);
5325 }
5326 formatter = gFormatter;
5327 break;
5328 }
5329
5330 SInt32 prec = hasPrecision ? precision : ((spec->type == CFFormatLongType) ? 0 : 6); // default precision of printf is 6
5331
5332 // pattern must be set before setting width and padding
5333 // otherwise, the pattern will take over those settings
5334 if (spec->numericFormatStyle == CFFormatStyleDecimalOrScientific) {
5335 if (prec == 0) prec = 1; // at least one sig fig
5336 CFMutableStringRef pattern = CFStringCreateMutable(NULL, 0);
5337 // use significant digits pattern
5338 CFStringAppendCString(pattern, "@", kCFStringEncodingASCII);
5339 CFStringPad(pattern, CFSTR("#"), prec, 0);
5340 double targetValue = values[spec->mainArgNum].value.doubleValue;;
5341 #if LONG_DOUBLE_SUPPORT
5342 if (CFFormatSize16 == values[spec->mainArgNum].size) {
5343 targetValue = values[spec->mainArgNum].value.longDoubleValue; // losing precision
5344 }
5345 #endif
5346 double max = pow(10.0, (double)prec); // if the value requires more digits than the number of sig figs, we need to use scientific format
5347 double min = 0.0001; // if the value is less than 10E-4, scientific format is the shorter form
5348 if (((targetValue > 0 && (targetValue > max || targetValue < min)) || (targetValue < 0 && (targetValue < -max || targetValue > -min)))){
5349 CFStringAppendCString(pattern, "E+00", kCFStringEncodingASCII);
5350 } else if (prec > groupingSize && groupingSize != 0) {
5351 CFStringInsert(pattern, prec-groupingSize, CFSTR(",")); // if we are not using scientific format, we need to set the pattern to use grouping separator
5352 if (secondaryGroupingSize != 0 && prec > (groupingSize + secondaryGroupingSize)) CFStringInsert(pattern, prec-groupingSize-secondaryGroupingSize, CFSTR(","));
5353 }
5354 CFNumberFormatterSetFormat(formatter, pattern);
5355 CFRelease(pattern);
5356 }
5357
5358 CFNumberRef tmp;
5359
5360 tmp = CFNumberCreate(NULL, kCFNumberSInt32Type, &prec);
5361 CFNumberFormatterSetProperty(formatter, kCFNumberFormatterMaxFractionDigitsKey, tmp);
5362 if (spec->type == CFFormatDoubleType) {
5363 CFNumberFormatterSetProperty(formatter, kCFNumberFormatterMinFractionDigitsKey, tmp);
5364 } else {
5365 CFRelease(tmp);
5366 SInt32 zero = 0;
5367 tmp = CFNumberCreate(NULL, kCFNumberSInt32Type, &zero);
5368 CFNumberFormatterSetProperty(formatter, kCFNumberFormatterMinFractionDigitsKey, tmp);
5369 }
5370 CFRelease(tmp);
5371
5372
5373 // ??? use the right zero here for Arabic
5374 Boolean padZero = spec->flags & kCFStringFormatZeroFlag;
5375 if (hasPrecision && spec->type == CFFormatLongType) { // if we have precision and %d or %u, we pad 0
5376 padZero = true;
5377 }
5378 CFNumberFormatterSetProperty(formatter, kCFNumberFormatterPaddingCharacterKey, padZero ? CFSTR("0") : CFSTR(" "));
5379
5380
5381 // Left (default) or right padding
5382 SInt32 p = (spec->flags & kCFStringFormatMinusFlag) ? kCFNumberFormatterPadAfterSuffix : (padZero ? kCFNumberFormatterPadAfterPrefix : kCFNumberFormatterPadBeforePrefix);
5383 if (hasPrecision && spec->type == CFFormatLongType) {
5384 SInt32 tmpP = kCFNumberFormatterPadAfterPrefix;
5385 tmp = CFNumberCreate(NULL, kCFNumberSInt32Type, &tmpP);
5386 } else {
5387 tmp = CFNumberCreate(NULL, kCFNumberSInt32Type, &p);
5388 }
5389 CFNumberFormatterSetProperty(formatter, kCFNumberFormatterPaddingPositionKey, tmp);
5390 CFRelease(tmp);
5391
5392 CFStringRef pattern = CFNumberFormatterGetFormat(formatter);
5393 if (spec->flags & kCFStringFormatPlusFlag) {
5394 if (CFStringGetCharacterAtIndex(pattern, 0) != '+') {
5395 CFMutableStringRef newPattern = CFStringCreateMutableCopy(NULL, 0, CFSTR("+"));
5396 CFStringAppend(newPattern, pattern);
5397 CFNumberFormatterSetFormat(formatter, newPattern);
5398 CFRelease(newPattern);
5399 }
5400 } else {
5401 if (CFStringGetCharacterAtIndex(pattern, 0) == '+') {
5402 CFStringRef newPattern = CFStringCreateWithSubstring(NULL, pattern, CFRangeMake(1, CFStringGetLength(pattern)-1));
5403 CFNumberFormatterSetFormat(formatter, newPattern);
5404 CFRelease(newPattern);
5405 }
5406 }
5407
5408 // width == 0 seems to be CFNumberFormatter's default setting
5409 if (hasPrecision && spec->type == CFFormatLongType) { // if we have precision and %d or %u, we pad 0 according to precision first
5410 tmp = CFNumberCreate(NULL, kCFNumberSInt32Type, &prec);
5411 } else {
5412 tmp = CFNumberCreate(NULL, kCFNumberSInt32Type, &width);
5413 }
5414 CFNumberFormatterSetProperty(formatter, kCFNumberFormatterFormatWidthKey, tmp);
5415 CFRelease(tmp);
5416
5417 if (spec->numericFormatStyle == CFFormatStyleScientific) {
5418 prec++; // for %e, precision+1 is the number of sig fig
5419 tmp = CFNumberCreate(NULL, kCFNumberSInt32Type, &prec);
5420 CFNumberFormatterSetProperty(formatter, kCFNumberFormatterMinSignificantDigitsKey, tmp);
5421 CFNumberFormatterSetProperty(formatter, kCFNumberFormatterMaxSignificantDigitsKey, tmp);
5422 CFRelease(tmp);
5423 }
5424
5425 CFStringRef localizedNumberString = NULL;
5426 switch (spec->type) {
5427 case CFFormatLongType:
5428 // ??? Need to do unsigned
5429 localizedNumberString = CFNumberFormatterCreateStringWithValue(NULL, formatter, kCFNumberSInt64Type, &(values[spec->mainArgNum].value.int64Value));
5430 break;
5431 case CFFormatDoubleType: {
5432 #if LONG_DOUBLE_SUPPORT
5433 if (CFFormatSize16 == values[spec->mainArgNum].size) {
5434 double doubleValue = values[spec->mainArgNum].value.longDoubleValue; // losing precision
5435 localizedNumberString = CFNumberFormatterCreateStringWithValue(NULL, formatter, kCFNumberDoubleType, &doubleValue);
5436 } else
5437 #endif
5438 {
5439 localizedNumberString = CFNumberFormatterCreateStringWithValue(NULL, formatter, kCFNumberDoubleType, &(values[spec->mainArgNum].value.doubleValue));
5440 }
5441 break;
5442 }
5443 }
5444 __CFSpinUnlock(&formatterLock);
5445
5446 if (localizedNumberString) {
5447 // we need to pad space if we have %d or %u
5448 if (spec->type == CFFormatLongType && hasPrecision && CFStringGetLength(localizedNumberString) < width) {
5449 CFMutableStringRef finalStr = NULL;
5450 if (p == kCFNumberFormatterPadAfterSuffix) {
5451 finalStr = CFStringCreateMutableCopy(NULL, 0, localizedNumberString);
5452 CFStringPad(finalStr, CFSTR(" "), width, 0);
5453 } else {
5454 finalStr = CFStringCreateMutable(NULL, 0);
5455 CFStringPad(finalStr, CFSTR(" "), width - CFStringGetLength(localizedNumberString), 0);
5456 CFStringAppend(finalStr, localizedNumberString);
5457 }
5458 CFRelease(localizedNumberString);
5459 localizedNumberString = finalStr;
5460 }
5461 CFStringAppend(output, localizedNumberString);
5462 CFRelease(localizedNumberString);
5463 return true;
5464 }
5465 return false;
5466 }
5467 #endif
5468
5469 CF_INLINE void __CFParseFormatSpec(const UniChar *uformat, const uint8_t *cformat, SInt32 *fmtIdx, SInt32 fmtLen, CFFormatSpec *spec, CFStringRef *configKeyPointer) {
5470 Boolean seenDot = false;
5471 Boolean seenSharp = false;
5472 CFIndex keyIndex = kCFNotFound;
5473
5474 for (;;) {
5475 UniChar ch;
5476 if (fmtLen <= *fmtIdx) return; /* no type */
5477 if (cformat) ch = (UniChar)cformat[(*fmtIdx)++]; else ch = uformat[(*fmtIdx)++];
5478
5479 if (keyIndex >= 0) {
5480 if ((ch < '0') || ((ch > '9') && (ch < 'A')) || ((ch > 'Z') && (ch < 'a') && (ch != '_')) || (ch > 'z')) {
5481 if (ch == '@') { // found the key
5482 CFIndex length = (*fmtIdx) - 1 - keyIndex;
5483
5484 spec->flags |= kCFStringFormatExternalSpecFlag;
5485 spec->type = CFFormatCFType;
5486 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5487
5488 if ((NULL != configKeyPointer) && (length > 0)) {
5489 if (cformat) {
5490 *configKeyPointer = CFStringCreateWithBytes(NULL, cformat + keyIndex, length, __CFStringGetEightBitStringEncoding(), FALSE);
5491 } else {
5492 *configKeyPointer = CFStringCreateWithCharactersNoCopy(NULL, uformat + keyIndex, length, kCFAllocatorNull);
5493 }
5494 }
5495 return;
5496 }
5497 keyIndex = kCFNotFound;
5498 }
5499 continue;
5500 }
5501
5502 reswtch:switch (ch) {
5503 case '#': // ignored for now
5504 seenSharp = true;
5505 break;
5506 case 0x20:
5507 if (!(spec->flags & kCFStringFormatPlusFlag)) spec->flags |= kCFStringFormatSpaceFlag;
5508 break;
5509 case '-':
5510 spec->flags |= kCFStringFormatMinusFlag;
5511 spec->flags &= ~kCFStringFormatZeroFlag; // remove zero flag
5512 break;
5513 case '+':
5514 spec->flags |= kCFStringFormatPlusFlag;
5515 spec->flags &= ~kCFStringFormatSpaceFlag; // remove space flag
5516 break;
5517 case '0':
5518 if (seenDot) { // after we see '.' and then we see '0', it is 0 precision. We should not see '.' after '0' if '0' is the zero padding flag
5519 spec->precArg = 0;
5520 break;
5521 }
5522 if (!(spec->flags & kCFStringFormatMinusFlag)) spec->flags |= kCFStringFormatZeroFlag;
5523 break;
5524 case 'h':
5525 if (*fmtIdx < fmtLen) {
5526 // fetch next character, don't increment fmtIdx
5527 if (cformat) ch = (UniChar)cformat[(*fmtIdx)]; else ch = uformat[(*fmtIdx)];
5528 if ('h' == ch) { // 'hh' for char, like 'c'
5529 (*fmtIdx)++;
5530 spec->size = CFFormatSize1;
5531 break;
5532 }
5533 }
5534 spec->size = CFFormatSize2;
5535 break;
5536 case 'l':
5537 if (*fmtIdx < fmtLen) {
5538 // fetch next character, don't increment fmtIdx
5539 if (cformat) ch = (UniChar)cformat[(*fmtIdx)]; else ch = uformat[(*fmtIdx)];
5540 if ('l' == ch) { // 'll' for long long, like 'q'
5541 (*fmtIdx)++;
5542 spec->size = CFFormatSize8;
5543 break;
5544 }
5545 }
5546 spec->size = CFFormatSizeLong; // 4 or 8 depending on LP64
5547 break;
5548 #if LONG_DOUBLE_SUPPORT
5549 case 'L':
5550 spec->size = CFFormatSize16;
5551 break;
5552 #endif
5553 case 'q':
5554 spec->size = CFFormatSize8;
5555 break;
5556 case 't': case 'z':
5557 spec->size = CFFormatSizeLong; // 4 or 8 depending on LP64
5558 break;
5559 case 'j':
5560 spec->size = CFFormatSize8;
5561 break;
5562 case 'c':
5563 spec->type = CFFormatLongType;
5564 spec->size = CFFormatSize1;
5565 return;
5566 case 'D': case 'd': case 'i': case 'U': case 'u':
5567 // we can localize all but octal or hex
5568 if (_CFExecutableLinkedOnOrAfter(CFSystemVersionMountainLion)) spec->flags |= kCFStringFormatLocalizable;
5569 spec->numericFormatStyle = CFFormatStyleDecimal;
5570 if (ch == 'u' || ch == 'U') spec->numericFormatStyle = CFFormatStyleUnsigned;
5571 // fall thru
5572 case 'O': case 'o': case 'x': case 'X':
5573 spec->type = CFFormatLongType;
5574 // Seems like if spec->size == 0, we should spec->size = CFFormatSize4. However, 0 is handled correctly.
5575 return;
5576 case 'f': case 'F': case 'g': case 'G': case 'e': case 'E': {
5577 // we can localize all but hex float output
5578 if (_CFExecutableLinkedOnOrAfter(CFSystemVersionMountainLion)) spec->flags |= kCFStringFormatLocalizable;
5579 char lch = (ch >= 'A' && ch <= 'Z') ? (ch - 'A' + 'a') : ch;
5580 spec->numericFormatStyle = ((lch == 'e' || lch == 'g') ? CFFormatStyleScientific : 0) | ((lch == 'f' || lch == 'g') ? CFFormatStyleDecimal : 0);
5581 if (seenDot && spec->precArg == -1 && spec->precArgNum == -1) { // for the cases that we have '.' but no precision followed, not even '*'
5582 spec->precArg = 0;
5583 }
5584 }
5585 // fall thru
5586 case 'a': case 'A':
5587 spec->type = CFFormatDoubleType;
5588 if (spec->size != CFFormatSize16) spec->size = CFFormatSize8;
5589 return;
5590 case 'n': /* %n is not handled correctly; for Leopard or newer apps, we disable it further */
5591 spec->type = 1 ? CFFormatDummyPointerType : CFFormatPointerType;
5592 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5593 return;
5594 case 'p':
5595 spec->type = CFFormatPointerType;
5596 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5597 return;
5598 case 's':
5599 spec->type = CFFormatCharsType;
5600 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5601 return;
5602 case 'S':
5603 spec->type = CFFormatUnicharsType;
5604 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5605 return;
5606 case 'C':
5607 spec->type = CFFormatSingleUnicharType;
5608 spec->size = CFFormatSize2;
5609 return;
5610 case 'P':
5611 spec->type = CFFormatPascalCharsType;
5612 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5613 return;
5614 case '@':
5615 if (seenSharp) {
5616 seenSharp = false;
5617 keyIndex = *fmtIdx;
5618 break;
5619 } else {
5620 spec->type = CFFormatCFType;
5621 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5622 return;
5623 }
5624 case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': {
5625 int64_t number = 0;
5626 do {
5627 number = 10 * number + (ch - '0');
5628 if (cformat) ch = (UniChar)cformat[(*fmtIdx)++]; else ch = uformat[(*fmtIdx)++];
5629 } while ((UInt32)(ch - '0') <= 9);
5630 if ('$' == ch) {
5631 if (-2 == spec->precArgNum) {
5632 spec->precArgNum = (int8_t)number - 1; // Arg numbers start from 1
5633 } else if (-2 == spec->widthArgNum) {
5634 spec->widthArgNum = (int8_t)number - 1; // Arg numbers start from 1
5635 } else {
5636 spec->mainArgNum = (int8_t)number - 1; // Arg numbers start from 1
5637 }
5638 break;
5639 } else if (seenDot) { /* else it's either precision or width */
5640 spec->precArg = (SInt32)number;
5641 } else {
5642 spec->widthArg = (SInt32)number;
5643 }
5644 goto reswtch;
5645 }
5646 case '*':
5647 spec->widthArgNum = -2;
5648 break;
5649 case '.':
5650 seenDot = true;
5651 if (cformat) ch = (UniChar)cformat[(*fmtIdx)++]; else ch = uformat[(*fmtIdx)++];
5652 if ('*' == ch) {
5653 spec->precArgNum = -2;
5654 break;
5655 }
5656 goto reswtch;
5657 default:
5658 spec->type = CFFormatLiteralType;
5659 return;
5660 }
5661 }
5662 }
5663
5664 /* ??? %s depends on handling of encodings by __CFStringAppendBytes
5665 */
5666 void CFStringAppendFormatAndArguments(CFMutableStringRef outputString, CFDictionaryRef formatOptions, CFStringRef formatString, va_list args) {
5667 __CFStringAppendFormatCore(outputString, NULL, formatOptions, formatString, 0, NULL, 0, args);
5668 }
5669
5670 // Length of the buffer to call sprintf() with
5671 #define BUFFER_LEN 512
5672
5673 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI
5674 #define SNPRINTF(TYPE, WHAT) { \
5675 TYPE value = (TYPE) WHAT; \
5676 if (-1 != specs[curSpec].widthArgNum) { \
5677 if (-1 != specs[curSpec].precArgNum) { \
5678 snprintf_l(buffer, BUFFER_LEN-1, NULL, formatBuffer, width, precision, value); \
5679 } else { \
5680 snprintf_l(buffer, BUFFER_LEN-1, NULL, formatBuffer, width, value); \
5681 } \
5682 } else { \
5683 if (-1 != specs[curSpec].precArgNum) { \
5684 snprintf_l(buffer, BUFFER_LEN-1, NULL, formatBuffer, precision, value); \
5685 } else { \
5686 snprintf_l(buffer, BUFFER_LEN-1, NULL, formatBuffer, value); \
5687 } \
5688 }}
5689 #else
5690 #define SNPRINTF(TYPE, WHAT) { \
5691 TYPE value = (TYPE) WHAT; \
5692 if (-1 != specs[curSpec].widthArgNum) { \
5693 if (-1 != specs[curSpec].precArgNum) { \
5694 sprintf(buffer, formatBuffer, width, precision, value); \
5695 } else { \
5696 sprintf(buffer, formatBuffer, width, value); \
5697 } \
5698 } else { \
5699 if (-1 != specs[curSpec].precArgNum) { \
5700 sprintf(buffer, formatBuffer, precision, value); \
5701 } else { \
5702 sprintf(buffer, formatBuffer, value); \
5703 } \
5704 }}
5705 #endif
5706
5707 void _CFStringAppendFormatAndArgumentsAux(CFMutableStringRef outputString, CFStringRef (*copyDescFunc)(void *, const void *), CFDictionaryRef formatOptions, CFStringRef formatString, va_list args) { __CFStringAppendFormatCore(outputString, copyDescFunc, formatOptions, formatString, 0, NULL, 0, args); }
5708
5709 static void __CFStringAppendFormatCore(CFMutableStringRef outputString, CFStringRef (*copyDescFunc)(void *, const void *), CFDictionaryRef formatOptions, CFStringRef formatString, CFIndex initialArgPosition, const void *origValues, CFIndex originalValuesSize, va_list args) {
5710 SInt32 numSpecs, sizeSpecs, sizeArgNum, formatIdx, curSpec, argNum;
5711 CFIndex formatLen;
5712 #define FORMAT_BUFFER_LEN 400
5713 const uint8_t *cformat = NULL;
5714 const UniChar *uformat = NULL;
5715 UniChar *formatChars = NULL;
5716 UniChar localFormatBuffer[FORMAT_BUFFER_LEN];
5717
5718 #define VPRINTF_BUFFER_LEN 61
5719 CFFormatSpec localSpecsBuffer[VPRINTF_BUFFER_LEN];
5720 CFFormatSpec *specs;
5721 CFPrintValue localValuesBuffer[VPRINTF_BUFFER_LEN];
5722 CFPrintValue *values;
5723 const CFPrintValue *originalValues = (const CFPrintValue *)origValues;
5724 CFDictionaryRef localConfigs[VPRINTF_BUFFER_LEN];
5725 CFDictionaryRef *configs;
5726 CFIndex numConfigs;
5727 CFAllocatorRef tmpAlloc = NULL;
5728 intmax_t dummyLocation; // A place for %n to do its thing in; should be the widest possible int value
5729
5730 numSpecs = 0;
5731 sizeSpecs = 0;
5732 sizeArgNum = 0;
5733 numConfigs = 0;
5734 specs = NULL;
5735 values = NULL;
5736 configs = NULL;
5737
5738
5739 formatLen = CFStringGetLength(formatString);
5740 if (!CF_IS_OBJC(__kCFStringTypeID, formatString)) {
5741 __CFAssertIsString(formatString);
5742 if (!__CFStrIsUnicode(formatString)) {
5743 cformat = (const uint8_t *)__CFStrContents(formatString);
5744 if (cformat) cformat += __CFStrSkipAnyLengthByte(formatString);
5745 } else {
5746 uformat = (const UniChar *)__CFStrContents(formatString);
5747 }
5748 }
5749 if (!cformat && !uformat) {
5750 formatChars = (formatLen > FORMAT_BUFFER_LEN) ? (UniChar *)CFAllocatorAllocate(tmpAlloc = __CFGetDefaultAllocator(), formatLen * sizeof(UniChar), 0) : localFormatBuffer;
5751 if (formatChars != localFormatBuffer && __CFOASafe) __CFSetLastAllocationEventName(formatChars, "CFString (temp)");
5752 CFStringGetCharacters(formatString, CFRangeMake(0, formatLen), formatChars);
5753 uformat = formatChars;
5754 }
5755
5756 /* Compute an upper bound for the number of format specifications */
5757 if (cformat) {
5758 for (formatIdx = 0; formatIdx < formatLen; formatIdx++) if ('%' == cformat[formatIdx]) sizeSpecs++;
5759 } else {
5760 for (formatIdx = 0; formatIdx < formatLen; formatIdx++) if ('%' == uformat[formatIdx]) sizeSpecs++;
5761 }
5762 tmpAlloc = __CFGetDefaultAllocator();
5763 specs = ((2 * sizeSpecs + 1) > VPRINTF_BUFFER_LEN) ? (CFFormatSpec *)CFAllocatorAllocate(tmpAlloc, (2 * sizeSpecs + 1) * sizeof(CFFormatSpec), 0) : localSpecsBuffer;
5764 if (specs != localSpecsBuffer && __CFOASafe) __CFSetLastAllocationEventName(specs, "CFString (temp)");
5765
5766 configs = ((sizeSpecs < VPRINTF_BUFFER_LEN) ? localConfigs : (CFDictionaryRef *)CFAllocatorAllocate(tmpAlloc, sizeof(CFStringRef) * sizeSpecs, 0));
5767
5768 /* Collect format specification information from the format string */
5769 for (curSpec = 0, formatIdx = 0; formatIdx < formatLen; curSpec++) {
5770 SInt32 newFmtIdx;
5771 specs[curSpec].loc = formatIdx;
5772 specs[curSpec].len = 0;
5773 specs[curSpec].size = 0;
5774 specs[curSpec].type = 0;
5775 specs[curSpec].flags = 0;
5776 specs[curSpec].widthArg = -1;
5777 specs[curSpec].precArg = -1;
5778 specs[curSpec].mainArgNum = -1;
5779 specs[curSpec].precArgNum = -1;
5780 specs[curSpec].widthArgNum = -1;
5781 specs[curSpec].configDictIndex = -1;
5782 if (cformat) {
5783 for (newFmtIdx = formatIdx; newFmtIdx < formatLen && '%' != cformat[newFmtIdx]; newFmtIdx++);
5784 } else {
5785 for (newFmtIdx = formatIdx; newFmtIdx < formatLen && '%' != uformat[newFmtIdx]; newFmtIdx++);
5786 }
5787 if (newFmtIdx != formatIdx) { /* Literal chunk */
5788 specs[curSpec].type = CFFormatLiteralType;
5789 specs[curSpec].len = newFmtIdx - formatIdx;
5790 } else {
5791 CFStringRef configKey = NULL;
5792 newFmtIdx++; /* Skip % */
5793 __CFParseFormatSpec(uformat, cformat, &newFmtIdx, formatLen, &(specs[curSpec]), &configKey);
5794 if (CFFormatLiteralType == specs[curSpec].type) {
5795 specs[curSpec].loc = formatIdx + 1;
5796 specs[curSpec].len = 1;
5797 } else {
5798 specs[curSpec].len = newFmtIdx - formatIdx;
5799 }
5800 }
5801 formatIdx = newFmtIdx;
5802
5803 // fprintf(stderr, "specs[%d] = {\n size = %d,\n type = %d,\n loc = %d,\n len = %d,\n mainArgNum = %d,\n precArgNum = %d,\n widthArgNum = %d\n}\n", curSpec, specs[curSpec].size, specs[curSpec].type, specs[curSpec].loc, specs[curSpec].len, specs[curSpec].mainArgNum, specs[curSpec].precArgNum, specs[curSpec].widthArgNum);
5804
5805 }
5806 numSpecs = curSpec;
5807
5808 // Max of three args per spec, reasoning thus: 1 width, 1 prec, 1 value
5809 sizeArgNum = ((NULL == originalValues) ? (3 * sizeSpecs + 1) : originalValuesSize);
5810
5811 values = (sizeArgNum > VPRINTF_BUFFER_LEN) ? (CFPrintValue *)CFAllocatorAllocate(tmpAlloc, sizeArgNum * sizeof(CFPrintValue), 0) : localValuesBuffer;
5812 if (values != localValuesBuffer && __CFOASafe) __CFSetLastAllocationEventName(values, "CFString (temp)");
5813 memset(values, 0, sizeArgNum * sizeof(CFPrintValue));
5814
5815 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
5816 // va_copy is a C99 extension. No support on Windows
5817 va_list copiedArgs;
5818 if (numConfigs > 0) va_copy(copiedArgs, args); // we need to preserve the original state for passing down
5819 #endif
5820
5821 /* Compute values array */
5822 argNum = initialArgPosition;
5823 for (curSpec = 0; curSpec < numSpecs; curSpec++) {
5824 SInt32 newMaxArgNum;
5825 if (0 == specs[curSpec].type) continue;
5826 if (CFFormatLiteralType == specs[curSpec].type) continue;
5827 newMaxArgNum = sizeArgNum;
5828 if (newMaxArgNum < specs[curSpec].mainArgNum) {
5829 newMaxArgNum = specs[curSpec].mainArgNum;
5830 }
5831 if (newMaxArgNum < specs[curSpec].precArgNum) {
5832 newMaxArgNum = specs[curSpec].precArgNum;
5833 }
5834 if (newMaxArgNum < specs[curSpec].widthArgNum) {
5835 newMaxArgNum = specs[curSpec].widthArgNum;
5836 }
5837 if (sizeArgNum < newMaxArgNum) {
5838 if (specs != localSpecsBuffer) CFAllocatorDeallocate(tmpAlloc, specs);
5839 if (values != localValuesBuffer) CFAllocatorDeallocate(tmpAlloc, values);
5840 if (formatChars && (formatChars != localFormatBuffer)) CFAllocatorDeallocate(tmpAlloc, formatChars);
5841 return; // more args than we expected!
5842 }
5843 /* It is actually incorrect to reorder some specs and not all; we just do some random garbage here */
5844 if (-2 == specs[curSpec].widthArgNum) {
5845 specs[curSpec].widthArgNum = argNum++;
5846 }
5847 if (-2 == specs[curSpec].precArgNum) {
5848 specs[curSpec].precArgNum = argNum++;
5849 }
5850 if (-1 == specs[curSpec].mainArgNum) {
5851 specs[curSpec].mainArgNum = argNum++;
5852 }
5853
5854 values[specs[curSpec].mainArgNum].size = specs[curSpec].size;
5855 values[specs[curSpec].mainArgNum].type = specs[curSpec].type;
5856
5857
5858 if (-1 != specs[curSpec].widthArgNum) {
5859 values[specs[curSpec].widthArgNum].size = 0;
5860 values[specs[curSpec].widthArgNum].type = CFFormatLongType;
5861 }
5862 if (-1 != specs[curSpec].precArgNum) {
5863 values[specs[curSpec].precArgNum].size = 0;
5864 values[specs[curSpec].precArgNum].type = CFFormatLongType;
5865 }
5866 }
5867
5868 /* Collect the arguments in correct type from vararg list */
5869 for (argNum = 0; argNum < sizeArgNum; argNum++) {
5870 if ((NULL != originalValues) && (0 == values[argNum].type)) values[argNum] = originalValues[argNum];
5871 switch (values[argNum].type) {
5872 case 0:
5873 case CFFormatLiteralType:
5874 break;
5875 case CFFormatLongType:
5876 case CFFormatSingleUnicharType:
5877 if (CFFormatSize1 == values[argNum].size) {
5878 values[argNum].value.int64Value = (int64_t)(int8_t)va_arg(args, int);
5879 } else if (CFFormatSize2 == values[argNum].size) {
5880 values[argNum].value.int64Value = (int64_t)(int16_t)va_arg(args, int);
5881 } else if (CFFormatSize4 == values[argNum].size) {
5882 values[argNum].value.int64Value = (int64_t)va_arg(args, int32_t);
5883 } else if (CFFormatSize8 == values[argNum].size) {
5884 values[argNum].value.int64Value = (int64_t)va_arg(args, int64_t);
5885 } else {
5886 values[argNum].value.int64Value = (int64_t)va_arg(args, int);
5887 }
5888 break;
5889 case CFFormatDoubleType:
5890 #if LONG_DOUBLE_SUPPORT
5891 if (CFFormatSize16 == values[argNum].size) {
5892 values[argNum].value.longDoubleValue = va_arg(args, long double);
5893 } else
5894 #endif
5895 {
5896 values[argNum].value.doubleValue = va_arg(args, double);
5897 }
5898 break;
5899 case CFFormatPointerType:
5900 case CFFormatObjectType:
5901 case CFFormatCFType:
5902 case CFFormatUnicharsType:
5903 case CFFormatCharsType:
5904 case CFFormatPascalCharsType:
5905 values[argNum].value.pointerValue = va_arg(args, void *);
5906 break;
5907 case CFFormatDummyPointerType:
5908 (void)va_arg(args, void *); // Skip the provided argument
5909 values[argNum].value.pointerValue = &dummyLocation;
5910 break;
5911 }
5912 }
5913 va_end(args);
5914
5915 /* Format the pieces together */
5916
5917 if (NULL == originalValues) {
5918 originalValues = values;
5919 originalValuesSize = sizeArgNum;
5920 }
5921
5922 for (curSpec = 0; curSpec < numSpecs; curSpec++) {
5923 SInt32 width = 0, precision = 0;
5924 UniChar *up, ch;
5925 Boolean hasWidth = false, hasPrecision = false;
5926
5927 // widthArgNum and widthArg are never set at the same time; same for precArg*
5928 if (-1 != specs[curSpec].widthArgNum) {
5929 width = (SInt32)values[specs[curSpec].widthArgNum].value.int64Value;
5930 hasWidth = true;
5931 }
5932 if (-1 != specs[curSpec].precArgNum) {
5933 precision = (SInt32)values[specs[curSpec].precArgNum].value.int64Value;
5934 hasPrecision = true;
5935 }
5936 if (-1 != specs[curSpec].widthArg) {
5937 width = specs[curSpec].widthArg;
5938 hasWidth = true;
5939 }
5940 if (-1 != specs[curSpec].precArg) {
5941 precision = specs[curSpec].precArg;
5942 hasPrecision = true;
5943 }
5944
5945 switch (specs[curSpec].type) {
5946 case CFFormatLongType:
5947 case CFFormatDoubleType:
5948 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS
5949 if (formatOptions && (specs[curSpec].flags & kCFStringFormatLocalizable) && (CFGetTypeID(formatOptions) == CFLocaleGetTypeID())) { // We have a locale, so we do localized formatting
5950 if (__CFStringFormatLocalizedNumber(outputString, (CFLocaleRef)formatOptions, values, &specs[curSpec], width, precision, hasPrecision)) break;
5951 }
5952 /* Otherwise fall-thru to the next case! */
5953 #endif
5954 case CFFormatPointerType: {
5955 char formatBuffer[128];
5956 #if defined(__GNUC__)
5957 char buffer[BUFFER_LEN + width + precision];
5958 #else
5959 char stackBuffer[BUFFER_LEN];
5960 char *dynamicBuffer = NULL;
5961 char *buffer = stackBuffer;
5962 if (256+width+precision > BUFFER_LEN) {
5963 dynamicBuffer = (char *)CFAllocatorAllocate(kCFAllocatorSystemDefault, 256+width+precision, 0);
5964 buffer = dynamicBuffer;
5965 }
5966 #endif
5967 SInt32 cidx, idx, loc;
5968 Boolean appended = false;
5969 loc = specs[curSpec].loc;
5970 // In preparation to call snprintf(), copy the format string out
5971 if (cformat) {
5972 for (idx = 0, cidx = 0; cidx < specs[curSpec].len; idx++, cidx++) {
5973 if ('$' == cformat[loc + cidx]) {
5974 for (idx--; '0' <= formatBuffer[idx] && formatBuffer[idx] <= '9'; idx--);
5975 } else {
5976 formatBuffer[idx] = cformat[loc + cidx];
5977 }
5978 }
5979 } else {
5980 for (idx = 0, cidx = 0; cidx < specs[curSpec].len; idx++, cidx++) {
5981 if ('$' == uformat[loc + cidx]) {
5982 for (idx--; '0' <= formatBuffer[idx] && formatBuffer[idx] <= '9'; idx--);
5983 } else {
5984 formatBuffer[idx] = (int8_t)uformat[loc + cidx];
5985 }
5986 }
5987 }
5988 formatBuffer[idx] = '\0';
5989 // Should modify format buffer here if necessary; for example, to translate %qd to
5990 // the equivalent, on architectures which do not have %q.
5991 buffer[sizeof(buffer) - 1] = '\0';
5992 switch (specs[curSpec].type) {
5993 case CFFormatLongType:
5994 if (CFFormatSize8 == specs[curSpec].size) {
5995 SNPRINTF(int64_t, values[specs[curSpec].mainArgNum].value.int64Value)
5996 } else {
5997 SNPRINTF(SInt32, values[specs[curSpec].mainArgNum].value.int64Value)
5998 }
5999 break;
6000 case CFFormatPointerType:
6001 case CFFormatDummyPointerType:
6002 SNPRINTF(void *, values[specs[curSpec].mainArgNum].value.pointerValue)
6003 break;
6004
6005 case CFFormatDoubleType:
6006 #if LONG_DOUBLE_SUPPORT
6007 if (CFFormatSize16 == specs[curSpec].size) {
6008 SNPRINTF(long double, values[specs[curSpec].mainArgNum].value.longDoubleValue)
6009 } else
6010 #endif
6011 {
6012 SNPRINTF(double, values[specs[curSpec].mainArgNum].value.doubleValue)
6013 }
6014 // See if we need to localize the decimal point
6015 if (formatOptions) { // We have localization info
6016 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
6017 CFStringRef decimalSeparator = (CFGetTypeID(formatOptions) == CFLocaleGetTypeID()) ? (CFStringRef)CFLocaleGetValue((CFLocaleRef)formatOptions, kCFLocaleDecimalSeparatorKey) : (CFStringRef)CFDictionaryGetValue(formatOptions, CFSTR("NSDecimalSeparator"));
6018 #else
6019 CFStringRef decimalSeparator = CFSTR(".");
6020 #endif
6021 if (decimalSeparator != NULL) { // We have a decimal separator in there
6022 CFIndex decimalPointLoc = 0;
6023 while (buffer[decimalPointLoc] != 0 && buffer[decimalPointLoc] != '.') decimalPointLoc++;
6024 if (buffer[decimalPointLoc] == '.') { // And we have a decimal point in the formatted string
6025 buffer[decimalPointLoc] = 0;
6026 CFStringAppendCString(outputString, (const char *)buffer, __CFStringGetEightBitStringEncoding());
6027 CFStringAppend(outputString, decimalSeparator);
6028 CFStringAppendCString(outputString, (const char *)(buffer + decimalPointLoc + 1), __CFStringGetEightBitStringEncoding());
6029 appended = true;
6030 }
6031 }
6032 }
6033 break;
6034 }
6035 if (!appended) CFStringAppendCString(outputString, (const char *)buffer, __CFStringGetEightBitStringEncoding());
6036 #if !defined(__GNUC__)
6037 if (dynamicBuffer) {
6038 CFAllocatorDeallocate(kCFAllocatorSystemDefault, dynamicBuffer);
6039 }
6040 #endif
6041 }
6042 break;
6043 case CFFormatLiteralType:
6044 if (cformat) {
6045 __CFStringAppendBytes(outputString, (const char *)(cformat+specs[curSpec].loc), specs[curSpec].len, __CFStringGetEightBitStringEncoding());
6046 } else {
6047 CFStringAppendCharacters(outputString, uformat+specs[curSpec].loc, specs[curSpec].len);
6048 }
6049 break;
6050 case CFFormatPascalCharsType:
6051 case CFFormatCharsType:
6052 if (values[specs[curSpec].mainArgNum].value.pointerValue == NULL) {
6053 CFStringAppendCString(outputString, "(null)", kCFStringEncodingASCII);
6054 } else {
6055 int len;
6056 const char *str = (const char *)values[specs[curSpec].mainArgNum].value.pointerValue;
6057 if (specs[curSpec].type == CFFormatPascalCharsType) { // Pascal string case
6058 len = ((unsigned char *)str)[0];
6059 str++;
6060 if (hasPrecision && precision < len) len = precision;
6061 } else { // C-string case
6062 if (!hasPrecision) { // No precision, so rely on the terminating null character
6063 len = strlen(str);
6064 } else { // Don't blindly call strlen() if there is a precision; the string might not have a terminating null (3131988)
6065 const char *terminatingNull = (const char *)memchr(str, 0, precision); // Basically strlen() on only the first precision characters of str
6066 if (terminatingNull) { // There was a null in the first precision characters
6067 len = terminatingNull - str;
6068 } else {
6069 len = precision;
6070 }
6071 }
6072 }
6073 // Since the spec says the behavior of the ' ', '0', '#', and '+' flags is undefined for
6074 // '%s', and since we have ignored them in the past, the behavior is hereby cast in stone
6075 // to ignore those flags (and, say, never pad with '0' instead of space).
6076 if (specs[curSpec].flags & kCFStringFormatMinusFlag) {
6077 __CFStringAppendBytes(outputString, str, len, __CFStringGetSystemEncoding());
6078 if (hasWidth && width > len) {
6079 int w = width - len; // We need this many spaces; do it ten at a time
6080 do {__CFStringAppendBytes(outputString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
6081 }
6082 } else {
6083 if (hasWidth && width > len) {
6084 int w = width - len; // We need this many spaces; do it ten at a time
6085 do {__CFStringAppendBytes(outputString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
6086 }
6087 __CFStringAppendBytes(outputString, str, len, __CFStringGetSystemEncoding());
6088 }
6089 }
6090 break;
6091 case CFFormatSingleUnicharType:
6092 ch = (UniChar)values[specs[curSpec].mainArgNum].value.int64Value;
6093 CFStringAppendCharacters(outputString, &ch, 1);
6094 break;
6095 case CFFormatUnicharsType:
6096 //??? need to handle width, precision, and padding arguments
6097 up = (UniChar *)values[specs[curSpec].mainArgNum].value.pointerValue;
6098 if (NULL == up) {
6099 CFStringAppendCString(outputString, "(null)", kCFStringEncodingASCII);
6100 } else {
6101 int len;
6102 for (len = 0; 0 != up[len]; len++);
6103 // Since the spec says the behavior of the ' ', '0', '#', and '+' flags is undefined for
6104 // '%s', and since we have ignored them in the past, the behavior is hereby cast in stone
6105 // to ignore those flags (and, say, never pad with '0' instead of space).
6106 if (hasPrecision && precision < len) len = precision;
6107 if (specs[curSpec].flags & kCFStringFormatMinusFlag) {
6108 CFStringAppendCharacters(outputString, up, len);
6109 if (hasWidth && width > len) {
6110 int w = width - len; // We need this many spaces; do it ten at a time
6111 do {__CFStringAppendBytes(outputString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
6112 }
6113 } else {
6114 if (hasWidth && width > len) {
6115 int w = width - len; // We need this many spaces; do it ten at a time
6116 do {__CFStringAppendBytes(outputString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
6117 }
6118 CFStringAppendCharacters(outputString, up, len);
6119 }
6120 }
6121 break;
6122 case CFFormatCFType:
6123 case CFFormatObjectType:
6124 if (specs[curSpec].configDictIndex != -1) { // config dict
6125 CFTypeRef object = NULL;
6126 CFStringRef innerFormat = NULL;
6127
6128 switch (values[specs[curSpec].mainArgNum].type) {
6129 case CFFormatLongType:
6130 object = CFNumberCreate(tmpAlloc, kCFNumberSInt64Type, &(values[specs[curSpec].mainArgNum].value.int64Value));
6131 break;
6132
6133 case CFFormatDoubleType:
6134 #if LONG_DOUBLE_SUPPORT
6135 if (CFFormatSize16 == values[specs[curSpec].mainArgNum].size) {
6136 double aValue = values[specs[curSpec].mainArgNum].value.longDoubleValue; // losing precision
6137
6138 object = CFNumberCreate(tmpAlloc, kCFNumberDoubleType, &aValue);
6139 } else
6140 #endif
6141 {
6142 object = CFNumberCreate(tmpAlloc, kCFNumberDoubleType, &(values[specs[curSpec].mainArgNum].value.doubleValue));
6143 }
6144 break;
6145
6146 case CFFormatPointerType:
6147 object = CFNumberCreate(tmpAlloc, kCFNumberCFIndexType, &(values[specs[curSpec].mainArgNum].value.pointerValue));
6148 break;
6149
6150 case CFFormatPascalCharsType:
6151 case CFFormatCharsType:
6152 if (NULL != values[specs[curSpec].mainArgNum].value.pointerValue) {
6153 CFMutableStringRef aString = CFStringCreateMutable(tmpAlloc, 0);
6154 int len;
6155 const char *str = (const char *)values[specs[curSpec].mainArgNum].value.pointerValue;
6156 if (specs[curSpec].type == CFFormatPascalCharsType) { // Pascal string case
6157 len = ((unsigned char *)str)[0];
6158 str++;
6159 if (hasPrecision && precision < len) len = precision;
6160 } else { // C-string case
6161 if (!hasPrecision) { // No precision, so rely on the terminating null character
6162 len = strlen(str);
6163 } else { // Don't blindly call strlen() if there is a precision; the string might not have a terminating null (3131988)
6164 const char *terminatingNull = (const char *)memchr(str, 0, precision); // Basically strlen() on only the first precision characters of str
6165 if (terminatingNull) { // There was a null in the first precision characters
6166 len = terminatingNull - str;
6167 } else {
6168 len = precision;
6169 }
6170 }
6171 }
6172 // Since the spec says the behavior of the ' ', '0', '#', and '+' flags is undefined for
6173 // '%s', and since we have ignored them in the past, the behavior is hereby cast in stone
6174 // to ignore those flags (and, say, never pad with '0' instead of space).
6175 if (specs[curSpec].flags & kCFStringFormatMinusFlag) {
6176 __CFStringAppendBytes(aString, str, len, __CFStringGetSystemEncoding());
6177 if (hasWidth && width > len) {
6178 int w = width - len; // We need this many spaces; do it ten at a time
6179 do {__CFStringAppendBytes(aString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
6180 }
6181 } else {
6182 if (hasWidth && width > len) {
6183 int w = width - len; // We need this many spaces; do it ten at a time
6184 do {__CFStringAppendBytes(aString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
6185 }
6186 __CFStringAppendBytes(aString, str, len, __CFStringGetSystemEncoding());
6187 }
6188
6189 object = aString;
6190 }
6191 break;
6192
6193 case CFFormatSingleUnicharType:
6194 ch = (UniChar)values[specs[curSpec].mainArgNum].value.int64Value;
6195 object = CFStringCreateWithCharactersNoCopy(tmpAlloc, &ch, 1, kCFAllocatorNull);
6196 break;
6197
6198 case CFFormatUnicharsType:
6199 //??? need to handle width, precision, and padding arguments
6200 up = (UniChar *)values[specs[curSpec].mainArgNum].value.pointerValue;
6201 if (NULL != up) {
6202 CFMutableStringRef aString = CFStringCreateMutable(tmpAlloc, 0);
6203 int len;
6204 for (len = 0; 0 != up[len]; len++);
6205 // Since the spec says the behavior of the ' ', '0', '#', and '+' flags is undefined for
6206 // '%s', and since we have ignored them in the past, the behavior is hereby cast in stone
6207 // to ignore those flags (and, say, never pad with '0' instead of space).
6208 if (hasPrecision && precision < len) len = precision;
6209 if (specs[curSpec].flags & kCFStringFormatMinusFlag) {
6210 CFStringAppendCharacters(aString, up, len);
6211 if (hasWidth && width > len) {
6212 int w = width - len; // We need this many spaces; do it ten at a time
6213 do {__CFStringAppendBytes(aString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
6214 }
6215 } else {
6216 if (hasWidth && width > len) {
6217 int w = width - len; // We need this many spaces; do it ten at a time
6218 do {__CFStringAppendBytes(aString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
6219 }
6220 CFStringAppendCharacters(aString, up, len);
6221 }
6222 object = aString;
6223 }
6224 break;
6225
6226 case CFFormatCFType:
6227 case CFFormatObjectType:
6228 if (NULL != values[specs[curSpec].mainArgNum].value.pointerValue) object = CFRetain(values[specs[curSpec].mainArgNum].value.pointerValue);
6229 break;
6230 }
6231
6232 if (NULL != object) CFRelease(object);
6233
6234 } else if (NULL != values[specs[curSpec].mainArgNum].value.pointerValue) {
6235 CFStringRef str = NULL;
6236 if (copyDescFunc) {
6237 str = copyDescFunc(values[specs[curSpec].mainArgNum].value.pointerValue, formatOptions);
6238 } else {
6239 str = __CFCopyFormattingDescription(values[specs[curSpec].mainArgNum].value.pointerValue, formatOptions);
6240 if (NULL == str) {
6241 str = CFCopyDescription(values[specs[curSpec].mainArgNum].value.pointerValue);
6242 }
6243 }
6244 if (str) {
6245 CFStringAppend(outputString, str);
6246 CFRelease(str);
6247 } else {
6248 CFStringAppendCString(outputString, "(null description)", kCFStringEncodingASCII);
6249 }
6250 } else {
6251 CFStringAppendCString(outputString, "(null)", kCFStringEncodingASCII);
6252 }
6253 break;
6254 }
6255 }
6256
6257 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
6258 // va_copy is a C99 extension. No support on Windows
6259 if (numConfigs > 0) va_end(copiedArgs);
6260 #endif
6261 if (specs != localSpecsBuffer) CFAllocatorDeallocate(tmpAlloc, specs);
6262 if (values != localValuesBuffer) CFAllocatorDeallocate(tmpAlloc, values);
6263 if (formatChars && (formatChars != localFormatBuffer)) CFAllocatorDeallocate(tmpAlloc, formatChars);
6264 if (configs != localConfigs) CFAllocatorDeallocate(tmpAlloc, configs);
6265 }
6266
6267 #undef SNPRINTF
6268
6269 void CFShowStr(CFStringRef str) {
6270 CFAllocatorRef alloc;
6271
6272 if (!str) {
6273 fprintf(stdout, "(null)\n");
6274 return;
6275 }
6276
6277 if (CF_IS_OBJC(__kCFStringTypeID, str)) {
6278 fprintf(stdout, "This is an NSString, not CFString\n");
6279 return;
6280 }
6281
6282 alloc = CFGetAllocator(str);
6283
6284 fprintf(stdout, "\nLength %d\nIsEightBit %d\n", (int)__CFStrLength(str), __CFStrIsEightBit(str));
6285 fprintf(stdout, "HasLengthByte %d\nHasNullByte %d\nInlineContents %d\n",
6286 __CFStrHasLengthByte(str), __CFStrHasNullByte(str), __CFStrIsInline(str));
6287
6288 fprintf(stdout, "Allocator ");
6289 if (alloc != kCFAllocatorSystemDefault) {
6290 fprintf(stdout, "%p\n", (void *)alloc);
6291 } else {
6292 fprintf(stdout, "SystemDefault\n");
6293 }
6294 fprintf(stdout, "Mutable %d\n", __CFStrIsMutable(str));
6295 if (!__CFStrIsMutable(str) && __CFStrHasContentsDeallocator(str)) {
6296 if (__CFStrContentsDeallocator(str)) fprintf(stdout, "ContentsDeallocatorFunc %p\n", (void *)__CFStrContentsDeallocator(str));
6297 else fprintf(stdout, "ContentsDeallocatorFunc None\n");
6298 } else if (__CFStrIsMutable(str) && __CFStrHasContentsAllocator(str)) {
6299 fprintf(stdout, "ExternalContentsAllocator %p\n", (void *)__CFStrContentsAllocator((CFMutableStringRef)str));
6300 }
6301
6302 if (__CFStrIsMutable(str)) {
6303 fprintf(stdout, "CurrentCapacity %d\n%sCapacity %d\n", (int)__CFStrCapacity(str), __CFStrIsFixed(str) ? "Fixed" : "Desired", (int)__CFStrDesiredCapacity(str));
6304 }
6305 fprintf(stdout, "Contents %p\n", (void *)__CFStrContents(str));
6306 }
6307
6308
6309
6310 #undef HANGUL_SBASE
6311 #undef HANGUL_LBASE
6312 #undef HANGUL_VBASE
6313 #undef HANGUL_TBASE
6314 #undef HANGUL_SCOUNT
6315 #undef HANGUL_LCOUNT
6316 #undef HANGUL_VCOUNT
6317 #undef HANGUL_TCOUNT
6318 #undef HANGUL_NCOUNT
6319