]> git.saurik.com Git - apple/cf.git/blob - CFString.c
CF-1151.16.tar.gz
[apple/cf.git] / CFString.c
1 /*
2 * Copyright (c) 2014 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /* CFString.c
25 Copyright (c) 1998-2014, Apple Inc. All rights reserved.
26 Responsibility: Ali Ozer
27
28 !!! For performance reasons, it's important that all functions marked CF_INLINE in this file are inlined.
29 */
30
31 #include <CoreFoundation/CFBase.h>
32 #include <CoreFoundation/CFString.h>
33 #include <CoreFoundation/CFDictionary.h>
34 #include <CoreFoundation/CFStringEncodingConverterExt.h>
35 #include <CoreFoundation/CFUniChar.h>
36 #include <CoreFoundation/CFUnicodeDecomposition.h>
37 #include <CoreFoundation/CFUnicodePrecomposition.h>
38 #include <CoreFoundation/CFPriv.h>
39 #include <CoreFoundation/CFNumber.h>
40 #include <CoreFoundation/CFNumberFormatter.h>
41 #include "CFInternal.h"
42 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
43 #include "CFLocaleInternal.h"
44 #endif
45 #include <stdarg.h>
46 #include <stdio.h>
47 #include <string.h>
48 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
49 #include <unistd.h>
50 #endif
51
52 #if defined(__GNUC__)
53 #define LONG_DOUBLE_SUPPORT 1
54 #else
55 #define LONG_DOUBLE_SUPPORT 0
56 #endif
57
58
59
60 #define USE_STRING_ROM 0
61
62
63 #ifndef INSTRUMENT_SHARED_STRINGS
64 #define INSTRUMENT_SHARED_STRINGS 0
65 #endif
66
67 CF_PRIVATE const CFStringRef __kCFLocaleCollatorID;
68
69 #if INSTRUMENT_SHARED_STRINGS
70 #include <sys/stat.h> /* for umask() */
71
72 static void __CFRecordStringAllocationEvent(const char *encoding, const char *bytes, CFIndex byteCount) {
73 static CFLock_t lock = CFLockInit;
74
75 if (memchr(bytes, '\n', byteCount)) return; //never record string allocation events for strings with newlines, because those confuse our parser and because they'll never go into the ROM
76
77 __CFLock(&lock);
78 static int fd;
79 if (! fd) {
80 extern char **_NSGetProgname(void);
81 const char *name = *_NSGetProgname();
82 if (! name) name = "UNKNOWN";
83 umask(0);
84 char path[1024];
85 snprintf(path, sizeof(path), "/tmp/CFSharedStringInstrumentation_%s_%d.txt", name, getpid());
86 fd = open(path, O_WRONLY | O_APPEND | O_CREAT, 0666);
87 if (fd <= 0) {
88 int error = errno;
89 const char *errString = strerror(error);
90 fprintf(stderr, "open() failed with error %d (%s)\n", error, errString);
91 }
92 }
93 if (fd > 0) {
94 char *buffer = NULL;
95 char formatString[256];
96 snprintf(formatString, sizeof(formatString), "%%-8d\t%%-16s\t%%.%lds\n", byteCount);
97 int resultCount = asprintf(&buffer, formatString, getpid(), encoding, bytes);
98 if (buffer && resultCount > 0) write(fd, buffer, resultCount);
99 else puts("Couldn't record allocation event");
100 free(buffer);
101 }
102 __CFUnlock(&lock);
103 }
104 #endif //INSTRUMENT_SHARED_STRINGS
105
106 typedef Boolean (*UNI_CHAR_FUNC)(UInt32 flags, UInt8 ch, UniChar *unicodeChar);
107
108 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI
109 extern size_t malloc_good_size(size_t size);
110 #endif
111 extern void __CFStrConvertBytesToUnicode(const uint8_t *bytes, UniChar *buffer, CFIndex numChars);
112
113 static void __CFStringAppendFormatCore(CFMutableStringRef outputString, CFStringRef (*copyDescFunc)(void *, const void *), CFStringRef (*contextDescFunc)(void *, const void *, const void *, bool, bool *), CFDictionaryRef formatOptions, CFDictionaryRef stringsDictConfig, CFStringRef formatString, CFIndex initialArgPosition, const void *origValues, CFIndex originalValuesSize, va_list args);
114
115 #if defined(DEBUG)
116
117 // We put this into C & Pascal strings if we can't convert
118 #define CONVERSIONFAILURESTR "CFString conversion failed"
119
120 // We set this to true when purging the constant string table, so CFStringDeallocate doesn't assert
121 static Boolean __CFConstantStringTableBeingFreed = false;
122
123 #endif
124
125
126
127 // This section is for CFString compatibility and other behaviors...
128
129 static CFOptionFlags _CFStringCompatibilityMask = 0;
130
131 void _CFStringSetCompatibility(CFOptionFlags mask) {
132 _CFStringCompatibilityMask |= mask;
133 }
134
135 __attribute__((used))
136 CF_INLINE Boolean __CFStringGetCompatibility(CFOptionFlags mask) {
137 return (_CFStringCompatibilityMask & mask) == mask;
138 }
139
140
141
142 // Two constant strings used by CFString; these are initialized in CFStringInitialize
143 CONST_STRING_DECL(kCFEmptyString, "")
144
145 // This is separate for C++
146 struct __notInlineMutable {
147 void *buffer;
148 CFIndex length;
149 CFIndex capacity; // Capacity in bytes
150 unsigned int hasGap:1; // Currently unused
151 unsigned int isFixedCapacity:1;
152 unsigned int isExternalMutable:1;
153 unsigned int capacityProvidedExternally:1;
154 #if __LP64__
155 unsigned long desiredCapacity:60;
156 #else
157 unsigned long desiredCapacity:28;
158 #endif
159 CFAllocatorRef contentsAllocator; // Optional
160 }; // The only mutable variant for CFString
161
162
163 /* !!! Never do sizeof(CFString); the union is here just to make it easier to access some fields.
164 */
165 struct __CFString {
166 CFRuntimeBase base;
167 union { // In many cases the allocated structs are smaller than these
168 struct __inline1 {
169 CFIndex length;
170 } inline1; // Bytes follow the length
171 struct __notInlineImmutable1 {
172 void *buffer; // Note that the buffer is in the same place for all non-inline variants of CFString
173 CFIndex length;
174 CFAllocatorRef contentsDeallocator; // Optional; just the dealloc func is used
175 } notInlineImmutable1; // This is the usual not-inline immutable CFString
176 struct __notInlineImmutable2 {
177 void *buffer;
178 CFAllocatorRef contentsDeallocator; // Optional; just the dealloc func is used
179 } notInlineImmutable2; // This is the not-inline immutable CFString when length is stored with the contents (first byte)
180 struct __notInlineMutable notInlineMutable;
181 } variants;
182 };
183
184 /*
185 I = is immutable
186 E = not inline contents
187 U = is Unicode
188 N = has NULL byte
189 L = has length byte
190 D = explicit deallocator for contents (for mutable objects, allocator)
191 C = length field is CFIndex (rather than UInt32); only meaningful for 64-bit, really
192 if needed this bit (valuable real-estate) can be given up for another bit elsewhere, since this info is needed just for 64-bit
193
194 Also need (only for mutable)
195 F = is fixed
196 G = has gap
197 Cap, DesCap = capacity
198
199 B7 B6 B5 B4 B3 B2 B1 B0
200 U N L C I
201
202 B6 B5
203 0 0 inline contents
204 0 1 E (freed with default allocator)
205 1 0 E (not freed)
206 1 1 E D
207
208 !!! Note: Constant CFStrings use the bit patterns:
209 C8 (11001000 = default allocator, not inline, not freed contents; 8-bit; has NULL byte; doesn't have length; is immutable)
210 D0 (11010000 = default allocator, not inline, not freed contents; Unicode; is immutable)
211 The bit usages should not be modified in a way that would effect these bit patterns.
212 */
213
214 enum {
215 __kCFFreeContentsWhenDoneMask = 0x020,
216 __kCFFreeContentsWhenDone = 0x020,
217 __kCFContentsMask = 0x060,
218 __kCFHasInlineContents = 0x000,
219 __kCFNotInlineContentsNoFree = 0x040, // Don't free
220 __kCFNotInlineContentsDefaultFree = 0x020, // Use allocator's free function
221 __kCFNotInlineContentsCustomFree = 0x060, // Use a specially provided free function
222 __kCFHasContentsAllocatorMask = 0x060,
223 __kCFHasContentsAllocator = 0x060, // (For mutable strings) use a specially provided allocator
224 __kCFHasContentsDeallocatorMask = 0x060,
225 __kCFHasContentsDeallocator = 0x060,
226 __kCFIsMutableMask = 0x01,
227 __kCFIsMutable = 0x01,
228 __kCFIsUnicodeMask = 0x10,
229 __kCFIsUnicode = 0x10,
230 __kCFHasNullByteMask = 0x08,
231 __kCFHasNullByte = 0x08,
232 __kCFHasLengthByteMask = 0x04,
233 __kCFHasLengthByte = 0x04,
234 // !!! Bit 0x02 has been freed up
235 };
236
237
238 // !!! Assumptions:
239 // Mutable strings are not inline
240 // Compile-time constant strings are not inline
241 // Mutable strings always have explicit length (but they might also have length byte and null byte)
242 // If there is an explicit length, always use that instead of the length byte (length byte is useful for quickly returning pascal strings)
243 // Never look at the length byte for the length; use __CFStrLength or __CFStrLength2
244
245 /* The following set of functions and macros need to be updated on change to the bit configuration
246 */
247 CF_INLINE Boolean __CFStrIsMutable(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFIsMutableMask) == __kCFIsMutable;}
248 CF_INLINE Boolean __CFStrIsInline(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFContentsMask) == __kCFHasInlineContents;}
249 CF_INLINE Boolean __CFStrFreeContentsWhenDone(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFFreeContentsWhenDoneMask) == __kCFFreeContentsWhenDone;}
250 CF_INLINE Boolean __CFStrHasContentsDeallocator(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFHasContentsDeallocatorMask) == __kCFHasContentsDeallocator;}
251 CF_INLINE Boolean __CFStrIsUnicode(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFIsUnicodeMask) == __kCFIsUnicode;}
252 CF_INLINE Boolean __CFStrIsEightBit(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFIsUnicodeMask) != __kCFIsUnicode;}
253 CF_INLINE Boolean __CFStrHasNullByte(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFHasNullByteMask) == __kCFHasNullByte;}
254 CF_INLINE Boolean __CFStrHasLengthByte(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFHasLengthByteMask) == __kCFHasLengthByte;}
255 CF_INLINE Boolean __CFStrHasExplicitLength(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & (__kCFIsMutableMask | __kCFHasLengthByteMask)) != __kCFHasLengthByte;} // Has explicit length if (1) mutable or (2) not mutable and no length byte
256 CF_INLINE Boolean __CFStrIsConstant(CFStringRef str) {
257 #if __LP64__
258 return str->base._rc == 0;
259 #else
260 return (str->base._cfinfo[CF_RC_BITS]) == 0;
261 #endif
262 }
263
264 CF_INLINE SInt32 __CFStrSkipAnyLengthByte(CFStringRef str) {return ((str->base._cfinfo[CF_INFO_BITS] & __kCFHasLengthByteMask) == __kCFHasLengthByte) ? 1 : 0;} // Number of bytes to skip over the length byte in the contents
265
266 /* Returns ptr to the buffer (which might include the length byte).
267 */
268 CF_INLINE const void *__CFStrContents(CFStringRef str) {
269 if (__CFStrIsInline(str)) {
270 return (const void *)(((uintptr_t)&(str->variants)) + (__CFStrHasExplicitLength(str) ? sizeof(CFIndex) : 0));
271 } else { // Not inline; pointer is always word 2
272 return str->variants.notInlineImmutable1.buffer;
273 }
274 }
275
276 static CFAllocatorRef *__CFStrContentsDeallocatorPtr(CFStringRef str) {
277 return __CFStrHasExplicitLength(str) ? &(((CFMutableStringRef)str)->variants.notInlineImmutable1.contentsDeallocator) : &(((CFMutableStringRef)str)->variants.notInlineImmutable2.contentsDeallocator); }
278
279 // Assumption: Called with immutable strings only, and on strings that are known to have a contentsDeallocator
280 CF_INLINE CFAllocatorRef __CFStrContentsDeallocator(CFStringRef str) {
281 return *__CFStrContentsDeallocatorPtr(str);
282 }
283
284 // Assumption: Called with immutable strings only, and on strings that are known to have a contentsDeallocator
285 CF_INLINE void __CFStrSetContentsDeallocator(CFStringRef str, CFAllocatorRef allocator) {
286 if (!(0 || 0)) CFRetain(allocator);
287 *__CFStrContentsDeallocatorPtr(str) = allocator;
288 }
289
290 static CFAllocatorRef *__CFStrContentsAllocatorPtr(CFStringRef str) {
291 CFAssert(!__CFStrIsInline(str), __kCFLogAssertion, "Asking for contents allocator of inline string");
292 CFAssert(__CFStrIsMutable(str), __kCFLogAssertion, "Asking for contents allocator of an immutable string");
293 return (CFAllocatorRef *)&(str->variants.notInlineMutable.contentsAllocator);
294 }
295
296 // Assumption: Called with strings that have a contents allocator; also, contents allocator follows custom
297 CF_INLINE CFAllocatorRef __CFStrContentsAllocator(CFMutableStringRef str) {
298 return *(__CFStrContentsAllocatorPtr(str));
299 }
300
301 // Assumption: Called with strings that have a contents allocator; also, contents allocator follows custom
302 CF_INLINE void __CFStrSetContentsAllocator(CFMutableStringRef str, CFAllocatorRef allocator) {
303 if (!(0 || 0)) CFRetain(allocator);
304 *(__CFStrContentsAllocatorPtr(str)) = allocator;
305 }
306
307 /* Returns length; use __CFStrLength2 if contents buffer pointer has already been computed.
308 */
309 CF_INLINE CFIndex __CFStrLength(CFStringRef str) {
310 if (__CFStrHasExplicitLength(str)) {
311 if (__CFStrIsInline(str)) {
312 return str->variants.inline1.length;
313 } else {
314 return str->variants.notInlineImmutable1.length;
315 }
316 } else {
317 return (CFIndex)(*((uint8_t *)__CFStrContents(str)));
318 }
319 }
320
321 CF_INLINE CFIndex __CFStrLength2(CFStringRef str, const void *buffer) {
322 if (__CFStrHasExplicitLength(str)) {
323 if (__CFStrIsInline(str)) {
324 return str->variants.inline1.length;
325 } else {
326 return str->variants.notInlineImmutable1.length;
327 }
328 } else {
329 return (CFIndex)(*((uint8_t *)buffer));
330 }
331 }
332
333
334 Boolean __CFStringIsEightBit(CFStringRef str) {
335 return __CFStrIsEightBit(str);
336 }
337
338 /* Sets the content pointer for immutable or mutable strings.
339 */
340 CF_INLINE void __CFStrSetContentPtr(CFStringRef str, const void *p) {
341 // XXX_PCB catch all writes for mutable string case.
342 __CFAssignWithWriteBarrier((void **)&((CFMutableStringRef)str)->variants.notInlineImmutable1.buffer, (void *)p);
343 }
344 CF_INLINE void __CFStrSetInfoBits(CFStringRef str, UInt32 v) {__CFBitfieldSetValue(((CFMutableStringRef)str)->base._cfinfo[CF_INFO_BITS], 6, 0, v);}
345
346 CF_INLINE void __CFStrSetExplicitLength(CFStringRef str, CFIndex v) {
347 if (__CFStrIsInline(str)) {
348 ((CFMutableStringRef)str)->variants.inline1.length = v;
349 } else {
350 ((CFMutableStringRef)str)->variants.notInlineImmutable1.length = v;
351 }
352 }
353
354 CF_INLINE void __CFStrSetUnicode(CFMutableStringRef str) {str->base._cfinfo[CF_INFO_BITS] |= __kCFIsUnicode;}
355 CF_INLINE void __CFStrClearUnicode(CFMutableStringRef str) {str->base._cfinfo[CF_INFO_BITS] &= ~__kCFIsUnicode;}
356 CF_INLINE void __CFStrSetHasLengthAndNullBytes(CFMutableStringRef str) {str->base._cfinfo[CF_INFO_BITS] |= (__kCFHasLengthByte | __kCFHasNullByte);}
357 CF_INLINE void __CFStrClearHasLengthAndNullBytes(CFMutableStringRef str) {str->base._cfinfo[CF_INFO_BITS] &= ~(__kCFHasLengthByte | __kCFHasNullByte);}
358
359
360 // Assumption: The following set of inlines (using str->variants.notInlineMutable) are called with mutable strings only
361 CF_INLINE Boolean __CFStrIsFixed(CFStringRef str) {return str->variants.notInlineMutable.isFixedCapacity;}
362 CF_INLINE Boolean __CFStrIsExternalMutable(CFStringRef str) {return str->variants.notInlineMutable.isExternalMutable;}
363 CF_INLINE Boolean __CFStrHasContentsAllocator(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFHasContentsAllocatorMask) == __kCFHasContentsAllocator;}
364 CF_INLINE void __CFStrSetIsFixed(CFMutableStringRef str) {str->variants.notInlineMutable.isFixedCapacity = 1;}
365 CF_INLINE void __CFStrSetIsExternalMutable(CFMutableStringRef str) {str->variants.notInlineMutable.isExternalMutable = 1;}
366 //CF_INLINE void __CFStrSetHasGap(CFMutableStringRef str) {str->variants.notInlineMutable.hasGap = 1;} currently unused
367
368 // If capacity is provided externally, we only change it when we need to grow beyond it
369 CF_INLINE Boolean __CFStrCapacityProvidedExternally(CFStringRef str) {return str->variants.notInlineMutable.capacityProvidedExternally;}
370 CF_INLINE void __CFStrSetCapacityProvidedExternally(CFMutableStringRef str) {str->variants.notInlineMutable.capacityProvidedExternally = 1;}
371 CF_INLINE void __CFStrClearCapacityProvidedExternally(CFMutableStringRef str) {str->variants.notInlineMutable.capacityProvidedExternally = 0;}
372
373 // "Capacity" is stored in number of bytes, not characters. It indicates the total number of bytes in the contents buffer.
374 CF_INLINE CFIndex __CFStrCapacity(CFStringRef str) {return str->variants.notInlineMutable.capacity;}
375 CF_INLINE void __CFStrSetCapacity(CFMutableStringRef str, CFIndex cap) {str->variants.notInlineMutable.capacity = cap;}
376
377 // "Desired capacity" is in number of characters; it is the client requested capacity; if fixed, it is the upper bound on the mutable string backing store.
378 CF_INLINE CFIndex __CFStrDesiredCapacity(CFStringRef str) {return str->variants.notInlineMutable.desiredCapacity;}
379 CF_INLINE void __CFStrSetDesiredCapacity(CFMutableStringRef str, CFIndex size) {str->variants.notInlineMutable.desiredCapacity = size;}
380
381
382 static void *__CFStrAllocateMutableContents(CFMutableStringRef str, CFIndex size) {
383 void *ptr;
384 CFAllocatorRef alloc = (__CFStrHasContentsAllocator(str)) ? __CFStrContentsAllocator(str) : __CFGetAllocator(str);
385 ptr = CFAllocatorAllocate(alloc, size, 0);
386 if (__CFOASafe) __CFSetLastAllocationEventName(ptr, "CFString (store)");
387 return ptr;
388 }
389
390 static void __CFStrDeallocateMutableContents(CFMutableStringRef str, void *buffer) {
391 CFAllocatorRef alloc = (__CFStrHasContentsAllocator(str)) ? __CFStrContentsAllocator(str) : __CFGetAllocator(str);
392 if (__CFStrIsMutable(str) && __CFStrHasContentsAllocator(str) && (0)) {
393 // do nothing
394 } else if (CF_IS_COLLECTABLE_ALLOCATOR(alloc)) {
395 // GC: for finalization safety, let collector reclaim the buffer in the next GC cycle.
396 auto_zone_release(objc_collectableZone(), buffer);
397 } else {
398 CFAllocatorDeallocate(alloc, buffer);
399 }
400 }
401
402
403
404
405 /* CFString specific init flags
406 Note that you cannot count on the external buffer not being copied.
407 Also, if you specify an external buffer, you should not change it behind the CFString's back.
408 */
409 enum {
410 __kCFThinUnicodeIfPossible = 0x1000000, /* See if the Unicode contents can be thinned down to 8-bit */
411 kCFStringPascal = 0x10000, /* Indicating that the string data has a Pascal string structure (length byte at start) */
412 kCFStringNoCopyProvidedContents = 0x20000, /* Don't copy the provided string contents if possible; free it when no longer needed */
413 kCFStringNoCopyNoFreeProvidedContents = 0x30000 /* Don't copy the provided string contents if possible; don't free it when no longer needed */
414 };
415
416 /* System Encoding.
417 */
418 static CFStringEncoding __CFDefaultSystemEncoding = kCFStringEncodingInvalidId;
419 static CFStringEncoding __CFDefaultFileSystemEncoding = kCFStringEncodingInvalidId;
420 CFStringEncoding __CFDefaultEightBitStringEncoding = kCFStringEncodingInvalidId;
421
422
423 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX
424 #define __defaultEncoding kCFStringEncodingMacRoman
425 #elif DEPLOYMENT_TARGET_WINDOWS
426 #define __defaultEncoding kCFStringEncodingWindowsLatin1
427 #else
428 #warning This value must match __CFGetConverter condition in CFStringEncodingConverter.c
429 #define __defaultEncoding kCFStringEncodingISOLatin1
430 #endif
431
432 CFStringEncoding CFStringGetSystemEncoding(void) {
433 if (__CFDefaultSystemEncoding == kCFStringEncodingInvalidId) {
434 __CFDefaultSystemEncoding = __defaultEncoding;
435 const CFStringEncodingConverter *converter = CFStringEncodingGetConverter(__CFDefaultSystemEncoding);
436 __CFSetCharToUniCharFunc(converter->encodingClass == kCFStringEncodingConverterCheapEightBit ? (UNI_CHAR_FUNC)converter->toUnicode : NULL);
437 }
438 return __CFDefaultSystemEncoding;
439 }
440
441 // Fast version for internal use
442
443 CF_INLINE CFStringEncoding __CFStringGetSystemEncoding(void) {
444 if (__CFDefaultSystemEncoding == kCFStringEncodingInvalidId) (void)CFStringGetSystemEncoding();
445 return __CFDefaultSystemEncoding;
446 }
447
448 CFStringEncoding CFStringFileSystemEncoding(void) {
449 if (__CFDefaultFileSystemEncoding == kCFStringEncodingInvalidId) {
450 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_WINDOWS
451 __CFDefaultFileSystemEncoding = kCFStringEncodingUTF8;
452 #else
453 __CFDefaultFileSystemEncoding = CFStringGetSystemEncoding();
454 #endif
455 }
456
457 return __CFDefaultFileSystemEncoding;
458 }
459
460 /* ??? Is returning length when no other answer is available the right thing?
461 !!! All of the (length > (LONG_MAX / N)) type checks are to avoid wrap-around and eventual malloc overflow in the client
462 */
463 CFIndex CFStringGetMaximumSizeForEncoding(CFIndex length, CFStringEncoding encoding) {
464 if (encoding == kCFStringEncodingUTF8) {
465 return (length > (LONG_MAX / 3)) ? kCFNotFound : (length * 3);
466 } else if ((encoding == kCFStringEncodingUTF32) || (encoding == kCFStringEncodingUTF32BE) || (encoding == kCFStringEncodingUTF32LE)) { // UTF-32
467 return (length > (LONG_MAX / sizeof(UTF32Char))) ? kCFNotFound : (length * sizeof(UTF32Char));
468 } else {
469 encoding &= 0xFFF; // Mask off non-base part
470 }
471 switch (encoding) {
472 case kCFStringEncodingUnicode:
473 return (length > (LONG_MAX / sizeof(UniChar))) ? kCFNotFound : (length * sizeof(UniChar));
474
475 case kCFStringEncodingNonLossyASCII:
476 return (length > (LONG_MAX / 6)) ? kCFNotFound : (length * 6); // 1 Unichar can expand to 6 bytes
477
478 case kCFStringEncodingMacRoman:
479 case kCFStringEncodingWindowsLatin1:
480 case kCFStringEncodingISOLatin1:
481 case kCFStringEncodingNextStepLatin:
482 case kCFStringEncodingASCII:
483 return length / sizeof(uint8_t);
484
485 default:
486 return length / sizeof(uint8_t);
487 }
488 }
489
490
491 /* Returns whether the indicated encoding can be stored in 8-bit chars
492 */
493 CF_INLINE Boolean __CFStrEncodingCanBeStoredInEightBit(CFStringEncoding encoding) {
494 switch (encoding & 0xFFF) { // just use encoding base
495 case kCFStringEncodingInvalidId:
496 case kCFStringEncodingUnicode:
497 case kCFStringEncodingNonLossyASCII:
498 return false;
499
500 case kCFStringEncodingMacRoman:
501 case kCFStringEncodingWindowsLatin1:
502 case kCFStringEncodingISOLatin1:
503 case kCFStringEncodingNextStepLatin:
504 case kCFStringEncodingASCII:
505 return true;
506
507 default: return false;
508 }
509 }
510
511 /* Returns the encoding used in eight bit CFStrings (can't be any encoding which isn't 1-to-1 with Unicode)
512 For 10.9-linked apps, we've set this encoding to ASCII for all cases; see <rdar://problem/3597233>
513 */
514 CFStringEncoding __CFStringComputeEightBitStringEncoding(void) {
515 // This flag prevents recursive entry into __CFStringComputeEightBitStringEncoding
516 static Boolean __CFStringIsBeingInitialized2 = false;
517 if (__CFStringIsBeingInitialized2) return kCFStringEncodingASCII;
518 __CFStringIsBeingInitialized2 = true;
519
520 Boolean useAscii = true;
521 __CFStringIsBeingInitialized2 = false;
522 if (useAscii) {
523 __CFDefaultEightBitStringEncoding = kCFStringEncodingASCII;
524 } else {
525 if (__CFDefaultEightBitStringEncoding == kCFStringEncodingInvalidId) {
526 CFStringEncoding systemEncoding = CFStringGetSystemEncoding();
527 if (systemEncoding == kCFStringEncodingInvalidId) { // We're right in the middle of querying system encoding from default database. Delaying to set until system encoding is determined.
528 return kCFStringEncodingASCII;
529 } else if (__CFStrEncodingCanBeStoredInEightBit(systemEncoding)) {
530 __CFDefaultEightBitStringEncoding = systemEncoding;
531 } else {
532 __CFDefaultEightBitStringEncoding = kCFStringEncodingASCII;
533 }
534 }
535 }
536 return __CFDefaultEightBitStringEncoding;
537 }
538
539 /* Returns whether the provided bytes can be stored in ASCII
540 */
541 CF_INLINE Boolean __CFBytesInASCII(const uint8_t *bytes, CFIndex len) {
542 #if __LP64__
543 /* A bit of unrolling; go by 32s, 16s, and 8s first */
544 while (len >= 32) {
545 uint64_t val = *(const uint64_t *)bytes;
546 uint64_t hiBits = (val & 0x8080808080808080ULL); // More efficient to collect this rather than do a conditional at every step
547 bytes += 8;
548 val = *(const uint64_t *)bytes;
549 hiBits |= (val & 0x8080808080808080ULL);
550 bytes += 8;
551 val = *(const uint64_t *)bytes;
552 hiBits |= (val & 0x8080808080808080ULL);
553 bytes += 8;
554 val = *(const uint64_t *)bytes;
555 if (hiBits | (val & 0x8080808080808080ULL)) return false;
556 bytes += 8;
557 len -= 32;
558 }
559
560 while (len >= 16) {
561 uint64_t val = *(const uint64_t *)bytes;
562 uint64_t hiBits = (val & 0x8080808080808080ULL);
563 bytes += 8;
564 val = *(const uint64_t *)bytes;
565 if (hiBits | (val & 0x8080808080808080ULL)) return false;
566 bytes += 8;
567 len -= 16;
568 }
569
570 while (len >= 8) {
571 uint64_t val = *(const uint64_t *)bytes;
572 if (val & 0x8080808080808080ULL) return false;
573 bytes += 8;
574 len -= 8;
575 }
576 #endif
577 /* Go by 4s */
578 while (len >= 4) {
579 uint32_t val = *(const uint32_t *)bytes;
580 if (val & 0x80808080U) return false;
581 bytes += 4;
582 len -= 4;
583 }
584 /* Handle the rest one byte at a time */
585 while (len--) {
586 if (*bytes++ & 0x80) return false;
587 }
588
589 return true;
590 }
591
592 /* Returns whether the provided 8-bit string in the specified encoding can be stored in an 8-bit CFString.
593 */
594 CF_INLINE Boolean __CFCanUseEightBitCFStringForBytes(const uint8_t *bytes, CFIndex len, CFStringEncoding encoding) {
595 // If the encoding is the same as the 8-bit CFString encoding, we can just use the bytes as-is.
596 // One exception is ASCII, which unfortunately needs to mean ISOLatin1 for compatibility reasons <rdar://problem/5458321>.
597 if (encoding == __CFStringGetEightBitStringEncoding() && encoding != kCFStringEncodingASCII) return true;
598 if (__CFStringEncodingIsSupersetOfASCII(encoding) && __CFBytesInASCII(bytes, len)) return true;
599 return false;
600 }
601
602
603 /* Returns whether a length byte can be tacked on to a string of the indicated length.
604 */
605 CF_INLINE Boolean __CFCanUseLengthByte(CFIndex len) {
606 #define __kCFMaxPascalStrLen 255
607 return (len <= __kCFMaxPascalStrLen) ? true : false;
608 }
609
610 /* Various string assertions
611 */
612 #define __CFAssertIsString(cf) __CFGenericValidateType(cf, __kCFStringTypeID)
613 #define __CFAssertIndexIsInStringBounds(cf, idx) CFAssert3((idx) >= 0 && (idx) < __CFStrLength(cf), __kCFLogAssertion, "%s(): string index %d out of bounds (length %d)", __PRETTY_FUNCTION__, idx, __CFStrLength(cf))
614 #define __CFAssertRangeIsInStringBounds(cf, idx, count) CFAssert4((idx) >= 0 && (idx + count) <= __CFStrLength(cf), __kCFLogAssertion, "%s(): string range %d,%d out of bounds (length %d)", __PRETTY_FUNCTION__, idx, count, __CFStrLength(cf))
615 #define __CFAssertIsStringAndMutable(cf) {__CFGenericValidateType(cf, __kCFStringTypeID); CFAssert1(__CFStrIsMutable(cf), __kCFLogAssertion, "%s(): string not mutable", __PRETTY_FUNCTION__);}
616 #define __CFAssertIsStringAndExternalMutable(cf) {__CFGenericValidateType(cf, __kCFStringTypeID); CFAssert1(__CFStrIsMutable(cf) && __CFStrIsExternalMutable(cf), __kCFLogAssertion, "%s(): string not external mutable", __PRETTY_FUNCTION__);}
617 #define __CFAssertIsNotNegative(idx) CFAssert2(idx >= 0, __kCFLogAssertion, "%s(): index %d is negative", __PRETTY_FUNCTION__, idx)
618 #define __CFAssertIfFixedLengthIsOK(cf, reqLen) CFAssert2(!__CFStrIsFixed(cf) || (reqLen <= __CFStrDesiredCapacity(cf)), __kCFLogAssertion, "%s(): length %d too large", __PRETTY_FUNCTION__, reqLen)
619
620
621 /* Basic algorithm is to shrink memory when capacity is SHRINKFACTOR times the required capacity or to allocate memory when the capacity is less than GROWFACTOR times the required capacity. This function will return -1 if the new capacity is just too big (> LONG_MAX).
622 Additional complications are applied in the following order:
623 - desiredCapacity, which is the minimum (except initially things can be at zero)
624 - rounding up to factor of 8
625 - compressing (to fit the number if 16 bits), which effectively rounds up to factor of 256
626 - we need to make sure GROWFACTOR computation doesn't suffer from overflow issues on 32-bit, hence the casting to unsigned. Normally for required capacity of C bytes, the allocated space is (3C+1)/2. If C > ULONG_MAX/3, we instead simply return LONG_MAX
627 */
628 #define SHRINKFACTOR(c) (c / 2)
629
630 #if __LP64__
631 #define GROWFACTOR(c) ((c * 3 + 1) / 2)
632 #else
633 #define GROWFACTOR(c) (((c) >= (ULONG_MAX / 3UL)) ? __CFMax(LONG_MAX - 4095, (c)) : (((unsigned long)c * 3 + 1) / 2))
634 #endif
635
636 CF_INLINE CFIndex __CFStrNewCapacity(CFMutableStringRef str, unsigned long reqCapacity, CFIndex capacity, Boolean leaveExtraRoom, CFIndex charSize) {
637 if (capacity != 0 || reqCapacity != 0) { /* If initially zero, and space not needed, leave it at that... */
638 if ((capacity < reqCapacity) || /* We definitely need the room... */
639 (!__CFStrCapacityProvidedExternally(str) && /* Assuming we control the capacity... */
640 ((reqCapacity < SHRINKFACTOR(capacity)) || /* ...we have too much room! */
641 (!leaveExtraRoom && (reqCapacity < capacity))))) { /* ...we need to eliminate the extra space... */
642 if (reqCapacity > LONG_MAX) return -1; /* Too big any way you cut it */
643 unsigned long newCapacity = leaveExtraRoom ? GROWFACTOR(reqCapacity) : reqCapacity; /* Grow by 3/2 if extra room is desired */
644 CFIndex desiredCapacity = __CFStrDesiredCapacity(str) * charSize;
645 if (newCapacity < desiredCapacity) { /* If less than desired, bump up to desired */
646 newCapacity = desiredCapacity;
647 } else if (__CFStrIsFixed(str)) { /* Otherwise, if fixed, no need to go above the desired (fixed) capacity */
648 newCapacity = __CFMax(desiredCapacity, reqCapacity); /* !!! So, fixed is not really fixed, but "tight" */
649 }
650 if (__CFStrHasContentsAllocator(str)) { /* Also apply any preferred size from the allocator */
651 newCapacity = CFAllocatorGetPreferredSizeForSize(__CFStrContentsAllocator(str), newCapacity, 0);
652 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI
653 } else {
654 newCapacity = malloc_good_size(newCapacity);
655 #endif
656 }
657 return (newCapacity > LONG_MAX) ? -1 : (CFIndex)newCapacity; // If packing: __CFStrUnpackNumber(__CFStrPackNumber(newCapacity));
658 }
659 }
660 return capacity;
661 }
662
663
664 /* rearrangeBlocks() rearranges the blocks of data within the buffer so that they are "evenly spaced". buffer is assumed to have enough room for the result.
665 numBlocks is current total number of blocks within buffer.
666 blockSize is the size of each block in bytes
667 ranges and numRanges hold the ranges that are no longer needed; ranges are stored sorted in increasing order, and don't overlap
668 insertLength is the final spacing between the remaining blocks
669
670 Example: buffer = A B C D E F G H, blockSize = 1, ranges = { (2,1) , (4,2) } (so we want to "delete" C and E F), fromEnd = NO
671 if insertLength = 4, result = A B ? ? ? ? D ? ? ? ? G H
672 if insertLength = 0, result = A B D G H
673
674 Example: buffer = A B C D E F G H I J K L M N O P Q R S T U, blockSize = 1, ranges { (1,1), (3,1), (5,11), (17,1), (19,1) }, fromEnd = NO
675 if insertLength = 3, result = A ? ? ? C ? ? ? E ? ? ? Q ? ? ? S ? ? ? U
676
677 */
678 typedef struct _CFStringDeferredRange {
679 CFIndex beginning;
680 CFIndex length;
681 CFIndex shift;
682 } CFStringDeferredRange;
683
684 typedef struct _CFStringStackInfo {
685 CFIndex capacity; // Capacity (if capacity == count, need to realloc to add another)
686 CFIndex count; // Number of elements actually stored
687 CFStringDeferredRange *stack;
688 Boolean hasMalloced; // Indicates "stack" is allocated and needs to be deallocated when done
689 char _padding[3];
690 } CFStringStackInfo;
691
692 CF_INLINE void pop (CFStringStackInfo *si, CFStringDeferredRange *topRange) {
693 si->count = si->count - 1;
694 *topRange = si->stack[si->count];
695 }
696
697 CF_INLINE void push (CFStringStackInfo *si, const CFStringDeferredRange *newRange) {
698 if (si->count == si->capacity) {
699 // increase size of the stack
700 si->capacity = (si->capacity + 4) * 2;
701 if (si->hasMalloced) {
702 si->stack = (CFStringDeferredRange *)CFAllocatorReallocate(kCFAllocatorSystemDefault, si->stack, si->capacity * sizeof(CFStringDeferredRange), 0);
703 } else {
704 CFStringDeferredRange *newStack = (CFStringDeferredRange *)CFAllocatorAllocate(kCFAllocatorSystemDefault, si->capacity * sizeof(CFStringDeferredRange), 0);
705 memmove(newStack, si->stack, si->count * sizeof(CFStringDeferredRange));
706 si->stack = newStack;
707 si->hasMalloced = true;
708 }
709 }
710 si->stack[si->count] = *newRange;
711 si->count = si->count + 1;
712 }
713
714 static void rearrangeBlocks(
715 uint8_t *buffer,
716 CFIndex numBlocks,
717 CFIndex blockSize,
718 const CFRange *ranges,
719 CFIndex numRanges,
720 CFIndex insertLength) {
721
722 #define origStackSize 10
723 CFStringDeferredRange origStack[origStackSize];
724 CFStringStackInfo si = {origStackSize, 0, origStack, false, {0, 0, 0}};
725 CFStringDeferredRange currentNonRange = {0, 0, 0};
726 CFIndex currentRange = 0;
727 CFIndex amountShifted = 0;
728
729 // must have at least 1 range left.
730
731 while (currentRange < numRanges) {
732 currentNonRange.beginning = (ranges[currentRange].location + ranges[currentRange].length) * blockSize;
733 if ((numRanges - currentRange) == 1) {
734 // at the end.
735 currentNonRange.length = numBlocks * blockSize - currentNonRange.beginning;
736 if (currentNonRange.length == 0) break;
737 } else {
738 currentNonRange.length = (ranges[currentRange + 1].location * blockSize) - currentNonRange.beginning;
739 }
740 currentNonRange.shift = amountShifted + (insertLength * blockSize) - (ranges[currentRange].length * blockSize);
741 amountShifted = currentNonRange.shift;
742 if (amountShifted <= 0) {
743 // process current item and rest of stack
744 if (currentNonRange.shift && currentNonRange.length) memmove (&buffer[currentNonRange.beginning + currentNonRange.shift], &buffer[currentNonRange.beginning], currentNonRange.length);
745 while (si.count > 0) {
746 pop (&si, &currentNonRange); // currentNonRange now equals the top element of the stack.
747 if (currentNonRange.shift && currentNonRange.length) memmove (&buffer[currentNonRange.beginning + currentNonRange.shift], &buffer[currentNonRange.beginning], currentNonRange.length);
748 }
749 } else {
750 // add currentNonRange to stack.
751 push (&si, &currentNonRange);
752 }
753 currentRange++;
754 }
755
756 // no more ranges. if anything is on the stack, process.
757
758 while (si.count > 0) {
759 pop (&si, &currentNonRange); // currentNonRange now equals the top element of the stack.
760 if (currentNonRange.shift && currentNonRange.length) memmove (&buffer[currentNonRange.beginning + currentNonRange.shift], &buffer[currentNonRange.beginning], currentNonRange.length);
761 }
762 if (si.hasMalloced) CFAllocatorDeallocate (kCFAllocatorSystemDefault, si.stack);
763 }
764
765 /* See comments for rearrangeBlocks(); this is the same, but the string is assembled in another buffer (dstBuffer), so the algorithm is much easier. We also take care of the case where the source is not-Unicode but destination is. (The reverse case is not supported.)
766 */
767 static void copyBlocks(
768 const uint8_t *srcBuffer,
769 uint8_t *dstBuffer,
770 CFIndex srcLength,
771 Boolean srcIsUnicode,
772 Boolean dstIsUnicode,
773 const CFRange *ranges,
774 CFIndex numRanges,
775 CFIndex insertLength) {
776
777 CFIndex srcLocationInBytes = 0; // in order to avoid multiplying all the time, this is in terms of bytes, not blocks
778 CFIndex dstLocationInBytes = 0; // ditto
779 CFIndex srcBlockSize = srcIsUnicode ? sizeof(UniChar) : sizeof(uint8_t);
780 CFIndex insertLengthInBytes = insertLength * (dstIsUnicode ? sizeof(UniChar) : sizeof(uint8_t));
781 CFIndex rangeIndex = 0;
782 CFIndex srcToDstMultiplier = (srcIsUnicode == dstIsUnicode) ? 1 : (sizeof(UniChar) / sizeof(uint8_t));
783
784 // Loop over the ranges, copying the range to be preserved (right before each range)
785 while (rangeIndex < numRanges) {
786 CFIndex srcLengthInBytes = ranges[rangeIndex].location * srcBlockSize - srcLocationInBytes; // srcLengthInBytes is in terms of bytes, not blocks; represents length of region to be preserved
787 if (srcLengthInBytes > 0) {
788 if (srcIsUnicode == dstIsUnicode) {
789 memmove(dstBuffer + dstLocationInBytes, srcBuffer + srcLocationInBytes, srcLengthInBytes);
790 } else {
791 __CFStrConvertBytesToUnicode(srcBuffer + srcLocationInBytes, (UniChar *)(dstBuffer + dstLocationInBytes), srcLengthInBytes);
792 }
793 }
794 srcLocationInBytes += srcLengthInBytes + ranges[rangeIndex].length * srcBlockSize; // Skip over the just-copied and to-be-deleted stuff
795 dstLocationInBytes += srcLengthInBytes * srcToDstMultiplier + insertLengthInBytes;
796 rangeIndex++;
797 }
798
799 // Do last range (the one beyond last range)
800 if (srcLocationInBytes < srcLength * srcBlockSize) {
801 if (srcIsUnicode == dstIsUnicode) {
802 memmove(dstBuffer + dstLocationInBytes, srcBuffer + srcLocationInBytes, srcLength * srcBlockSize - srcLocationInBytes);
803 } else {
804 __CFStrConvertBytesToUnicode(srcBuffer + srcLocationInBytes, (UniChar *)(dstBuffer + dstLocationInBytes), srcLength * srcBlockSize - srcLocationInBytes);
805 }
806 }
807 }
808
809 /* Call the callback; if it doesn't exist or returns false, then log
810 */
811 static void __CFStringHandleOutOfMemory(CFTypeRef obj) {
812 CFStringRef msg = CFSTR("Out of memory. We suggest restarting the application. If you have an unsaved document, create a backup copy in Finder, then try to save.");
813 {
814 CFLog(kCFLogLevelCritical, CFSTR("%@"), msg);
815 }
816 }
817
818 /* Reallocates the backing store of the string to accomodate the new length. Space is reserved or characters are deleted as indicated by insertLength and the ranges in deleteRanges. The length is updated to reflect the new state. Will also maintain a length byte and a null byte in 8-bit strings. If length cannot fit in length byte, the space will still be reserved, but will be 0. (Hence the reason the length byte should never be looked at as length unless there is no explicit length.)
819 */
820 static void __CFStringChangeSizeMultiple(CFMutableStringRef str, const CFRange *deleteRanges, CFIndex numDeleteRanges, CFIndex insertLength, Boolean makeUnicode) {
821 const uint8_t *curContents = (uint8_t *)__CFStrContents(str);
822 CFIndex curLength = curContents ? __CFStrLength2(str, curContents) : 0;
823 unsigned long newLength; // We use unsigned to better keep track of overflow
824
825 // Compute new length of the string
826 if (numDeleteRanges == 1) {
827 newLength = curLength + insertLength - deleteRanges[0].length;
828 } else {
829 CFIndex cnt;
830 newLength = curLength + insertLength * numDeleteRanges;
831 for (cnt = 0; cnt < numDeleteRanges; cnt++) newLength -= deleteRanges[cnt].length;
832 }
833
834 __CFAssertIfFixedLengthIsOK(str, newLength);
835
836 if (newLength == 0) {
837 // An somewhat optimized code-path for this special case, with the following implicit values:
838 // newIsUnicode = false
839 // useLengthAndNullBytes = false
840 // newCharSize = sizeof(uint8_t)
841 // If the newCapacity happens to be the same as the old, we don't free the buffer; otherwise we just free it totally
842 // instead of doing a potentially useless reallocation (as the needed capacity later might turn out to be different anyway)
843 CFIndex curCapacity = __CFStrCapacity(str);
844 CFIndex newCapacity = __CFStrNewCapacity(str, 0, curCapacity, true, sizeof(uint8_t));
845 if (newCapacity != curCapacity) { // If we're reallocing anyway (larger or smaller --- larger could happen if desired capacity was changed in the meantime), let's just free it all
846 if (curContents) __CFStrDeallocateMutableContents(str, (uint8_t *)curContents);
847 __CFStrSetContentPtr(str, NULL);
848 __CFStrSetCapacity(str, 0);
849 __CFStrClearCapacityProvidedExternally(str);
850 __CFStrClearHasLengthAndNullBytes(str);
851 if (!__CFStrIsExternalMutable(str)) __CFStrClearUnicode(str); // External mutable implies Unicode
852 } else {
853 if (!__CFStrIsExternalMutable(str)) {
854 __CFStrClearUnicode(str);
855 if (curCapacity >= (int)(sizeof(uint8_t) * 2)) { // If there's room
856 __CFStrSetHasLengthAndNullBytes(str);
857 ((uint8_t *)curContents)[0] = ((uint8_t *)curContents)[1] = 0;
858 } else {
859 __CFStrClearHasLengthAndNullBytes(str);
860 }
861 }
862 }
863 __CFStrSetExplicitLength(str, 0);
864 } else { /* This else-clause assumes newLength > 0 */
865 Boolean oldIsUnicode = __CFStrIsUnicode(str);
866 Boolean newIsUnicode = makeUnicode || (oldIsUnicode /* && (newLength > 0) - implicit */ ) || __CFStrIsExternalMutable(str);
867 CFIndex newCharSize = newIsUnicode ? sizeof(UniChar) : sizeof(uint8_t);
868 Boolean useLengthAndNullBytes = !newIsUnicode /* && (newLength > 0) - implicit */;
869 CFIndex numExtraBytes = useLengthAndNullBytes ? 2 : 0; /* 2 extra bytes to keep the length byte & null... */
870 CFIndex curCapacity = __CFStrCapacity(str);
871 if (newLength > (LONG_MAX - numExtraBytes) / newCharSize) __CFStringHandleOutOfMemory(str); // Does not return
872 CFIndex newCapacity = __CFStrNewCapacity(str, newLength * newCharSize + numExtraBytes, curCapacity, true, newCharSize);
873 if (newCapacity == -1) __CFStringHandleOutOfMemory(str); // Does not return
874 Boolean allocNewBuffer = (newCapacity != curCapacity) || (curLength > 0 && !oldIsUnicode && newIsUnicode); /* We alloc new buffer if oldIsUnicode != newIsUnicode because the contents have to be copied */
875 uint8_t *newContents;
876 if (allocNewBuffer) {
877 newContents = (uint8_t *)__CFStrAllocateMutableContents(str, newCapacity);
878 if (!newContents) { // Try allocating without extra room
879 newCapacity = __CFStrNewCapacity(str, newLength * newCharSize + numExtraBytes, curCapacity, false, newCharSize);
880 // Since we checked for this above, it shouldn't be the case here, but just in case
881 if (newCapacity == -1) __CFStringHandleOutOfMemory(str); // Does not return
882 newContents = (uint8_t *)__CFStrAllocateMutableContents(str, newCapacity);
883 if (!newContents) __CFStringHandleOutOfMemory(str); // Does not return
884 }
885 } else {
886 newContents = (uint8_t *)curContents;
887 }
888
889 Boolean hasLengthAndNullBytes = __CFStrHasLengthByte(str);
890
891 CFAssert1(hasLengthAndNullBytes == __CFStrHasNullByte(str), __kCFLogAssertion, "%s(): Invalid state in 8-bit string", __PRETTY_FUNCTION__);
892
893 // Calculate pointers to the actual string content (skipping over the length byte, if present). Note that keeping a reference to the base is needed for newContents under GC, since the copy may take a long time.
894 const uint8_t *curContentsBody = hasLengthAndNullBytes ? (curContents+1) : curContents;
895 uint8_t *newContentsBody = useLengthAndNullBytes ? (newContents+1) : newContents;
896
897 if (curContents) {
898 if (oldIsUnicode == newIsUnicode) {
899 if (newContentsBody == curContentsBody) {
900 rearrangeBlocks(newContentsBody, curLength, newCharSize, deleteRanges, numDeleteRanges, insertLength);
901 } else {
902 copyBlocks(curContentsBody, newContentsBody, curLength, oldIsUnicode, newIsUnicode, deleteRanges, numDeleteRanges, insertLength);
903 }
904 } else if (newIsUnicode) { /* this implies we have a new buffer */
905 copyBlocks(curContentsBody, newContentsBody, curLength, oldIsUnicode, newIsUnicode, deleteRanges, numDeleteRanges, insertLength);
906 }
907 if (allocNewBuffer && __CFStrFreeContentsWhenDone(str)) __CFStrDeallocateMutableContents(str, (void *)curContents);
908 }
909
910 if (!newIsUnicode) {
911 if (useLengthAndNullBytes) {
912 newContentsBody[newLength] = 0; /* Always have null byte, if not unicode */
913 newContents[0] = __CFCanUseLengthByte(newLength) ? (uint8_t)newLength : 0;
914 if (!hasLengthAndNullBytes) __CFStrSetHasLengthAndNullBytes(str);
915 } else {
916 if (hasLengthAndNullBytes) __CFStrClearHasLengthAndNullBytes(str);
917 }
918 if (oldIsUnicode) __CFStrClearUnicode(str);
919 } else { // New is unicode...
920 if (!oldIsUnicode) __CFStrSetUnicode(str);
921 if (hasLengthAndNullBytes) __CFStrClearHasLengthAndNullBytes(str);
922 }
923 __CFStrSetExplicitLength(str, newLength);
924
925 if (allocNewBuffer) {
926 __CFStrSetCapacity(str, newCapacity);
927 __CFStrClearCapacityProvidedExternally(str);
928 __CFStrSetContentPtr(str, newContents);
929 }
930 }
931 }
932
933 /* Same as above, but takes one range (very common case)
934 */
935 CF_INLINE void __CFStringChangeSize(CFMutableStringRef str, CFRange range, CFIndex insertLength, Boolean makeUnicode) {
936 __CFStringChangeSizeMultiple(str, &range, 1, insertLength, makeUnicode);
937 }
938
939
940 #if defined(DEBUG)
941 static Boolean __CFStrIsConstantString(CFStringRef str);
942 #endif
943
944 static void __CFStringDeallocate(CFTypeRef cf) {
945 CFStringRef str = (CFStringRef)cf;
946
947 // If in DEBUG mode, check to see if the string a CFSTR, and complain.
948 CFAssert1(__CFConstantStringTableBeingFreed || !__CFStrIsConstantString((CFStringRef)cf), __kCFLogAssertion, "Tried to deallocate CFSTR(\"%@\")", str);
949
950 if (!__CFStrIsInline(str)) {
951 uint8_t *contents;
952 Boolean isMutable = __CFStrIsMutable(str);
953 if (__CFStrFreeContentsWhenDone(str) && (contents = (uint8_t *)__CFStrContents(str))) {
954 if (isMutable) {
955 __CFStrDeallocateMutableContents((CFMutableStringRef)str, contents);
956 } else {
957 if (__CFStrHasContentsDeallocator(str)) {
958 CFAllocatorRef allocator = __CFStrContentsDeallocator(str);
959 CFAllocatorDeallocate(allocator, contents);
960 if (!(0 || 0 )) CFRelease(allocator);
961 } else {
962 CFAllocatorRef alloc = __CFGetAllocator(str);
963 CFAllocatorDeallocate(alloc, contents);
964 }
965 }
966 }
967 if (isMutable && __CFStrHasContentsAllocator(str)) {
968 CFAllocatorRef allocator = __CFStrContentsAllocator((CFMutableStringRef)str);
969 if (!(0 || 0)) CFRelease(allocator);
970 }
971 }
972 }
973
974 static Boolean __CFStringEqual(CFTypeRef cf1, CFTypeRef cf2) {
975 CFStringRef str1 = (CFStringRef)cf1;
976 CFStringRef str2 = (CFStringRef)cf2;
977 const uint8_t *contents1;
978 const uint8_t *contents2;
979 CFIndex len1;
980
981 /* !!! We do not need IsString assertions, as the CFBase runtime assures this */
982 /* !!! We do not need == test, as the CFBase runtime assures this */
983
984 contents1 = (uint8_t *)__CFStrContents(str1);
985 contents2 = (uint8_t *)__CFStrContents(str2);
986 len1 = __CFStrLength2(str1, contents1);
987
988 if (len1 != __CFStrLength2(str2, contents2)) return false;
989
990 contents1 += __CFStrSkipAnyLengthByte(str1);
991 contents2 += __CFStrSkipAnyLengthByte(str2);
992
993 if (__CFStrIsEightBit(str1) && __CFStrIsEightBit(str2)) {
994 return memcmp((const char *)contents1, (const char *)contents2, len1) ? false : true;
995 } else if (__CFStrIsEightBit(str1)) { /* One string has Unicode contents */
996 CFStringInlineBuffer buf;
997 CFIndex buf_idx = 0;
998
999 CFStringInitInlineBuffer(str1, &buf, CFRangeMake(0, len1));
1000 for (buf_idx = 0; buf_idx < len1; buf_idx++) {
1001 if (__CFStringGetCharacterFromInlineBufferQuick(&buf, buf_idx) != ((UniChar *)contents2)[buf_idx]) return false;
1002 }
1003 } else if (__CFStrIsEightBit(str2)) { /* One string has Unicode contents */
1004 CFStringInlineBuffer buf;
1005 CFIndex buf_idx = 0;
1006
1007 CFStringInitInlineBuffer(str2, &buf, CFRangeMake(0, len1));
1008 for (buf_idx = 0; buf_idx < len1; buf_idx++) {
1009 if (__CFStringGetCharacterFromInlineBufferQuick(&buf, buf_idx) != ((UniChar *)contents1)[buf_idx]) return false;
1010 }
1011 } else { /* Both strings have Unicode contents */
1012 CFIndex idx;
1013 for (idx = 0; idx < len1; idx++) {
1014 if (((UniChar *)contents1)[idx] != ((UniChar *)contents2)[idx]) return false;
1015 }
1016 }
1017 return true;
1018 }
1019
1020
1021 /* String hashing: Should give the same results whatever the encoding; so we hash UniChars.
1022 If the length is less than or equal to 96, then the hash function is simply the
1023 following (n is the nth UniChar character, starting from 0):
1024
1025 hash(-1) = length
1026 hash(n) = hash(n-1) * 257 + unichar(n);
1027 Hash = hash(length-1) * ((length & 31) + 1)
1028
1029 If the length is greater than 96, then the above algorithm applies to
1030 characters 0..31, (length/2)-16..(length/2)+15, and length-32..length-1, inclusive;
1031 thus the first, middle, and last 32 characters.
1032
1033 Note that the loops below are unrolled; and: 257^2 = 66049; 257^3 = 16974593; 257^4 = 4362470401; 67503105 is 257^4 - 256^4
1034 If hashcode is changed from UInt32 to something else, this last piece needs to be readjusted.
1035 !!! We haven't updated for LP64 yet
1036
1037 NOTE: The hash algorithm used to be duplicated in CF and Foundation; but now it should only be in the four functions below.
1038
1039 Hash function was changed between Panther and Tiger, and Tiger and Leopard.
1040 */
1041 #define HashEverythingLimit 96
1042
1043 #define HashNextFourUniChars(accessStart, accessEnd, pointer) \
1044 {result = result * 67503105 + (accessStart 0 accessEnd) * 16974593 + (accessStart 1 accessEnd) * 66049 + (accessStart 2 accessEnd) * 257 + (accessStart 3 accessEnd); pointer += 4;}
1045
1046 #define HashNextUniChar(accessStart, accessEnd, pointer) \
1047 {result = result * 257 + (accessStart 0 accessEnd); pointer++;}
1048
1049
1050 /* In this function, actualLen is the length of the original string; but len is the number of characters in buffer. The buffer is expected to contain the parts of the string relevant to hashing.
1051 */
1052 CF_INLINE CFHashCode __CFStrHashCharacters(const UniChar *uContents, CFIndex len, CFIndex actualLen) {
1053 CFHashCode result = actualLen;
1054 if (len <= HashEverythingLimit) {
1055 const UniChar *end4 = uContents + (len & ~3);
1056 const UniChar *end = uContents + len;
1057 while (uContents < end4) HashNextFourUniChars(uContents[, ], uContents); // First count in fours
1058 while (uContents < end) HashNextUniChar(uContents[, ], uContents); // Then for the last <4 chars, count in ones...
1059 } else {
1060 const UniChar *contents, *end;
1061 contents = uContents;
1062 end = contents + 32;
1063 while (contents < end) HashNextFourUniChars(contents[, ], contents);
1064 contents = uContents + (len >> 1) - 16;
1065 end = contents + 32;
1066 while (contents < end) HashNextFourUniChars(contents[, ], contents);
1067 end = uContents + len;
1068 contents = end - 32;
1069 while (contents < end) HashNextFourUniChars(contents[, ], contents);
1070 }
1071 return result + (result << (actualLen & 31));
1072 }
1073
1074 /* This hashes cString in the eight bit string encoding. It also includes the little debug-time sanity check.
1075 */
1076 CF_INLINE CFHashCode __CFStrHashEightBit(const uint8_t *cContents, CFIndex len) {
1077 #if defined(DEBUG)
1078 if (!__CFCharToUniCharFunc) { // A little sanity verification: If this is not set, trying to hash high byte chars would be a bad idea
1079 CFIndex cnt;
1080 Boolean err = false;
1081 if (len <= HashEverythingLimit) {
1082 for (cnt = 0; cnt < len; cnt++) if (cContents[cnt] >= 128) err = true;
1083 } else {
1084 for (cnt = 0; cnt < 32; cnt++) if (cContents[cnt] >= 128) err = true;
1085 for (cnt = (len >> 1) - 16; cnt < (len >> 1) + 16; cnt++) if (cContents[cnt] >= 128) err = true;
1086 for (cnt = (len - 32); cnt < len; cnt++) if (cContents[cnt] >= 128) err = true;
1087 }
1088 if (err) {
1089 // Can't do log here, as it might be too early
1090 fprintf(stderr, "Warning: CFHash() attempting to hash CFString containing high bytes before properly initialized to do so\n");
1091 }
1092 }
1093 #endif
1094 CFHashCode result = len;
1095 if (len <= HashEverythingLimit) {
1096 const uint8_t *end4 = cContents + (len & ~3);
1097 const uint8_t *end = cContents + len;
1098 while (cContents < end4) HashNextFourUniChars(__CFCharToUniCharTable[cContents[, ]], cContents); // First count in fours
1099 while (cContents < end) HashNextUniChar(__CFCharToUniCharTable[cContents[, ]], cContents); // Then for the last <4 chars, count in ones...
1100 } else {
1101 const uint8_t *contents, *end;
1102 contents = cContents;
1103 end = contents + 32;
1104 while (contents < end) HashNextFourUniChars(__CFCharToUniCharTable[contents[, ]], contents);
1105 contents = cContents + (len >> 1) - 16;
1106 end = contents + 32;
1107 while (contents < end) HashNextFourUniChars(__CFCharToUniCharTable[contents[, ]], contents);
1108 end = cContents + len;
1109 contents = end - 32;
1110 while (contents < end) HashNextFourUniChars(__CFCharToUniCharTable[contents[, ]], contents);
1111 }
1112 return result + (result << (len & 31));
1113 }
1114
1115 // This is for NSStringROMKeySet.
1116 CF_PRIVATE CFHashCode __CFStrHashEightBit2(const uint8_t *cContents, CFIndex len) {
1117 return __CFStrHashEightBit(cContents, len);
1118 }
1119
1120 CFHashCode CFStringHashISOLatin1CString(const uint8_t *bytes, CFIndex len) {
1121 CFHashCode result = len;
1122 if (len <= HashEverythingLimit) {
1123 const uint8_t *end4 = bytes + (len & ~3);
1124 const uint8_t *end = bytes + len;
1125 while (bytes < end4) HashNextFourUniChars(bytes[, ], bytes); // First count in fours
1126 while (bytes < end) HashNextUniChar(bytes[, ], bytes); // Then for the last <4 chars, count in ones...
1127 } else {
1128 const uint8_t *contents, *end;
1129 contents = bytes;
1130 end = contents + 32;
1131 while (contents < end) HashNextFourUniChars(contents[, ], contents);
1132 contents = bytes + (len >> 1) - 16;
1133 end = contents + 32;
1134 while (contents < end) HashNextFourUniChars(contents[, ], contents);
1135 end = bytes + len;
1136 contents = end - 32;
1137 while (contents < end) HashNextFourUniChars(contents[, ], contents);
1138 }
1139 return result + (result << (len & 31));
1140 }
1141
1142 CFHashCode CFStringHashCString(const uint8_t *bytes, CFIndex len) {
1143 return __CFStrHashEightBit(bytes, len);
1144 }
1145
1146 CFHashCode CFStringHashCharacters(const UniChar *characters, CFIndex len) {
1147 return __CFStrHashCharacters(characters, len, len);
1148 }
1149
1150 /* This is meant to be called from NSString or subclassers only. It is an error for this to be called without the ObjC runtime or an argument which is not an NSString or subclass. It can be called with NSCFString, although that would be inefficient (causing indirection) and won't normally happen anyway, as NSCFString overrides hash.
1151 */
1152 CFHashCode CFStringHashNSString(CFStringRef str) {
1153 UniChar buffer[HashEverythingLimit];
1154 CFIndex bufLen; // Number of characters in the buffer for hashing
1155 CFIndex len = 0; // Actual length of the string
1156
1157 len = CF_OBJC_CALLV((NSString *)str, length);
1158 if (len <= HashEverythingLimit) {
1159 (void)CF_OBJC_CALLV((NSString *)str, getCharacters:buffer range:NSMakeRange(0, len));
1160 bufLen = len;
1161 } else {
1162 (void)CF_OBJC_CALLV((NSString *)str, getCharacters:buffer range:NSMakeRange(0, 32));
1163 (void)CF_OBJC_CALLV((NSString *)str, getCharacters:buffer+32 range:NSMakeRange((len >> 1) - 16, 32));
1164 (void)CF_OBJC_CALLV((NSString *)str, getCharacters:buffer+64 range:NSMakeRange(len - 32, 32));
1165 bufLen = HashEverythingLimit;
1166 }
1167 return __CFStrHashCharacters(buffer, bufLen, len);
1168 }
1169
1170 CFHashCode __CFStringHash(CFTypeRef cf) {
1171 /* !!! We do not need an IsString assertion here, as this is called by the CFBase runtime only */
1172 CFStringRef str = (CFStringRef)cf;
1173 const uint8_t *contents = (uint8_t *)__CFStrContents(str);
1174 CFIndex len = __CFStrLength2(str, contents);
1175
1176 if (__CFStrIsEightBit(str)) {
1177 contents += __CFStrSkipAnyLengthByte(str);
1178 return __CFStrHashEightBit(contents, len);
1179 } else {
1180 return __CFStrHashCharacters((const UniChar *)contents, len, len);
1181 }
1182 }
1183
1184
1185 static CFStringRef __CFStringCopyDescription(CFTypeRef cf) {
1186 return CFStringCreateWithFormat(kCFAllocatorSystemDefault, NULL, CFSTR("<CFString %p [%p]>{contents = \"%@\"}"), cf, __CFGetAllocator(cf), cf);
1187 }
1188
1189 static CFStringRef __CFStringCopyFormattingDescription(CFTypeRef cf, CFDictionaryRef formatOptions) {
1190 return (CFStringRef)CFStringCreateCopy(__CFGetAllocator(cf), (CFStringRef)cf);
1191 }
1192
1193 static CFTypeID __kCFStringTypeID = _kCFRuntimeNotATypeID;
1194
1195 typedef CFTypeRef (*CF_STRING_CREATE_COPY)(CFAllocatorRef alloc, CFTypeRef theString);
1196
1197 static const CFRuntimeClass __CFStringClass = {
1198 _kCFRuntimeScannedObject,
1199 "CFString",
1200 NULL, // init
1201 (CF_STRING_CREATE_COPY)CFStringCreateCopy,
1202 __CFStringDeallocate,
1203 __CFStringEqual,
1204 __CFStringHash,
1205 __CFStringCopyFormattingDescription,
1206 __CFStringCopyDescription
1207 };
1208
1209 CF_PRIVATE void __CFStringInitialize(void) {
1210 static dispatch_once_t initOnce;
1211 dispatch_once(&initOnce, ^{ __kCFStringTypeID = _CFRuntimeRegisterClass(&__CFStringClass); });
1212 }
1213
1214
1215 CFTypeID CFStringGetTypeID(void) {
1216 return __kCFStringTypeID;
1217 }
1218
1219
1220 static Boolean CFStrIsUnicode(CFStringRef str) {
1221 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, Boolean, (NSString *)str, _encodingCantBeStoredInEightBitCFString);
1222 return __CFStrIsUnicode(str);
1223 }
1224
1225
1226 #define ALLOCATORSFREEFUNC ((CFAllocatorRef)-1)
1227
1228 /* contentsDeallocator indicates how to free the data if it's noCopy == true:
1229 kCFAllocatorNull: don't free
1230 ALLOCATORSFREEFUNC: free with main allocator's free func (don't pass in the real func ptr here)
1231 NULL: default allocator
1232 otherwise it's the allocator that should be used (it will be explicitly stored)
1233 if noCopy == false, then freeFunc should be ALLOCATORSFREEFUNC
1234 hasLengthByte, hasNullByte: refers to bytes; used only if encoding != Unicode
1235 possiblyExternalFormat indicates that the bytes might have BOM and be swapped
1236 tryToReduceUnicode means that the Unicode should be checked to see if it contains just ASCII (and reduce it if so)
1237 numBytes contains the actual number of bytes in "bytes", including Length byte,
1238 BUT not the NULL byte at the end
1239 bytes should not contain BOM characters
1240 !!! Various flags should be combined to reduce number of arguments, if possible
1241 */
1242 CF_PRIVATE CFStringRef __CFStringCreateImmutableFunnel3(
1243 CFAllocatorRef alloc, const void *bytes, CFIndex numBytes, CFStringEncoding encoding,
1244 Boolean possiblyExternalFormat, Boolean tryToReduceUnicode, Boolean hasLengthByte, Boolean hasNullByte, Boolean noCopy,
1245 CFAllocatorRef contentsDeallocator, UInt32 converterFlags) {
1246
1247 CFMutableStringRef str = NULL;
1248 CFVarWidthCharBuffer vBuf;
1249 CFIndex size;
1250 Boolean useLengthByte = false;
1251 Boolean useNullByte = false;
1252 Boolean useInlineData = false;
1253
1254 #if INSTRUMENT_SHARED_STRINGS
1255 const char *recordedEncoding;
1256 char encodingBuffer[128];
1257 if (encoding == kCFStringEncodingUnicode) recordedEncoding = "Unicode";
1258 else if (encoding == kCFStringEncodingASCII) recordedEncoding = "ASCII";
1259 else if (encoding == kCFStringEncodingUTF8) recordedEncoding = "UTF8";
1260 else if (encoding == kCFStringEncodingMacRoman) recordedEncoding = "MacRoman";
1261 else {
1262 snprintf(encodingBuffer, sizeof(encodingBuffer), "0x%lX", (unsigned long)encoding);
1263 recordedEncoding = encodingBuffer;
1264 }
1265 #endif
1266
1267 if (alloc == NULL) alloc = __CFGetDefaultAllocator();
1268
1269 if (contentsDeallocator == ALLOCATORSFREEFUNC) {
1270 contentsDeallocator = alloc;
1271 } else if (contentsDeallocator == NULL) {
1272 contentsDeallocator = __CFGetDefaultAllocator();
1273 }
1274
1275 if ((NULL != kCFEmptyString) && (numBytes == 0) && _CFAllocatorIsSystemDefault(alloc)) { // If we are using the system default allocator, and the string is empty, then use the empty string!
1276 if (noCopy && (contentsDeallocator != kCFAllocatorNull)) { // See 2365208... This change was done after Sonata; before we didn't free the bytes at all (leak).
1277 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes);
1278 }
1279 return (CFStringRef)CFRetain(kCFEmptyString); // Quick exit; won't catch all empty strings, but most
1280 }
1281
1282 // At this point, contentsDeallocator is either same as alloc, or kCFAllocatorNull, or something else, but not NULL
1283
1284 vBuf.shouldFreeChars = false; // We use this to remember to free the buffer possibly allocated by decode
1285
1286 // Record whether we're starting out with an ASCII-superset string, because we need to know this later for the string ROM; this may get changed later if we successfully convert down from Unicode. We only record this once because __CFCanUseEightBitCFStringForBytes() can be expensive.
1287 Boolean stringSupportsEightBitCFRepresentation = encoding != kCFStringEncodingUnicode && __CFCanUseEightBitCFStringForBytes((const uint8_t *)bytes, numBytes, encoding);
1288
1289 // We may also change noCopy within this function if we have to decode the string into an external buffer. We do not want to avoid the use of the string ROM merely because we tried to be efficient and reuse the decoded buffer for the CFString's external storage. Therefore, we use this variable to track whether we actually can ignore the noCopy flag (which may or may not be set anyways).
1290 Boolean stringROMShouldIgnoreNoCopy = false;
1291
1292 // First check to see if the data needs to be converted...
1293 // ??? We could be more efficient here and in some cases (Unicode data) eliminate a copy
1294
1295 if ((encoding == kCFStringEncodingUnicode && possiblyExternalFormat) || (encoding != kCFStringEncodingUnicode && ! stringSupportsEightBitCFRepresentation)) {
1296 const void *realBytes = (uint8_t *) bytes + (hasLengthByte ? 1 : 0);
1297 CFIndex realNumBytes = numBytes - (hasLengthByte ? 1 : 0);
1298 Boolean usingPassedInMemory = false;
1299
1300 vBuf.allocator = kCFAllocatorSystemDefault; // We don't want to use client's allocator for temp stuff
1301 vBuf.chars.unicode = NULL; // This will cause the decode function to allocate memory if necessary
1302
1303 if (!__CFStringDecodeByteStream3((const uint8_t *)realBytes, realNumBytes, encoding, false, &vBuf, &usingPassedInMemory, converterFlags)) {
1304 // Note that if the string can't be created, we don't free the buffer, even if there is a contents deallocator. This is on purpose.
1305 return NULL;
1306 }
1307
1308 encoding = vBuf.isASCII ? kCFStringEncodingASCII : kCFStringEncodingUnicode;
1309
1310 // Update our flag according to whether the decoded buffer is ASCII
1311 stringSupportsEightBitCFRepresentation = vBuf.isASCII;
1312
1313 if (!usingPassedInMemory) {
1314
1315 // Because __CFStringDecodeByteStream3() allocated our buffer, it's OK for us to free it if we can get the string from the ROM.
1316 stringROMShouldIgnoreNoCopy = true;
1317
1318 // Make the parameters fit the new situation
1319 numBytes = vBuf.isASCII ? vBuf.numChars : (vBuf.numChars * sizeof(UniChar));
1320 hasLengthByte = hasNullByte = false;
1321
1322 // Get rid of the original buffer if its not being used
1323 if (noCopy && (contentsDeallocator != kCFAllocatorNull)) {
1324 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes);
1325 }
1326 contentsDeallocator = alloc; // At this point we are using the string's allocator, as the original buffer is gone...
1327
1328 // See if we can reuse any storage the decode func might have allocated
1329 // We do this only for Unicode, as otherwise we would not have NULL and Length bytes
1330
1331 if (vBuf.shouldFreeChars && (alloc == vBuf.allocator) && encoding == kCFStringEncodingUnicode) {
1332 vBuf.shouldFreeChars = false; // Transferring ownership to the CFString
1333 bytes = CFAllocatorReallocate(vBuf.allocator, (void *)vBuf.chars.unicode, numBytes, 0); // Tighten up the storage
1334 noCopy = true;
1335 #if INSTRUMENT_SHARED_STRINGS
1336 if (encoding == kCFStringEncodingASCII) recordedEncoding = "ForeignASCII-NoCopy";
1337 else recordedEncoding = "ForeignUnicode-NoCopy";
1338 #endif
1339 } else {
1340 #if INSTRUMENT_SHARED_STRINGS
1341 if (encoding == kCFStringEncodingASCII) recordedEncoding = "ForeignASCII-Copy";
1342 else recordedEncoding = "ForeignUnicode-Copy";
1343 #endif
1344 bytes = vBuf.chars.unicode;
1345 noCopy = false; // Can't do noCopy anymore
1346 // If vBuf.shouldFreeChars is true, the buffer will be freed as intended near the end of this func
1347 }
1348
1349 }
1350
1351 // At this point, all necessary input arguments have been changed to reflect the new state
1352
1353 } else if (encoding == kCFStringEncodingUnicode && tryToReduceUnicode) { // Check to see if we can reduce Unicode to ASCII
1354 CFIndex cnt;
1355 CFIndex len = numBytes / sizeof(UniChar);
1356 Boolean allASCII = true;
1357
1358 for (cnt = 0; cnt < len; cnt++) if (((const UniChar *)bytes)[cnt] > 127) {
1359 allASCII = false;
1360 break;
1361 }
1362
1363 if (allASCII) { // Yes we can!
1364 uint8_t *ptr, *mem;
1365 Boolean newHasLengthByte = __CFCanUseLengthByte(len);
1366 numBytes = (len + 1 + (newHasLengthByte ? 1 : 0)) * sizeof(uint8_t); // NULL and possible length byte
1367 // See if we can use that temporary local buffer in vBuf...
1368 if (numBytes >= __kCFVarWidthLocalBufferSize) {
1369 mem = ptr = (uint8_t *)CFAllocatorAllocate(alloc, numBytes, 0);
1370 if (__CFOASafe) __CFSetLastAllocationEventName(mem, "CFString (store)");
1371 } else {
1372 mem = ptr = (uint8_t *)(vBuf.localBuffer);
1373 }
1374 if (mem) { // If we can't allocate memory for some reason, use what we had (that is, as if we didn't have all ASCII)
1375 // Copy the Unicode bytes into the new ASCII buffer
1376 hasLengthByte = newHasLengthByte;
1377 hasNullByte = true;
1378 if (hasLengthByte) *ptr++ = (uint8_t)len;
1379 for (cnt = 0; cnt < len; cnt++) ptr[cnt] = (uint8_t)(((const UniChar *)bytes)[cnt]);
1380 ptr[len] = 0;
1381 if (noCopy && (contentsDeallocator != kCFAllocatorNull)) {
1382 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes);
1383 }
1384 // Now make everything look like we had an ASCII buffer to start with
1385 bytes = mem;
1386 encoding = kCFStringEncodingASCII;
1387 contentsDeallocator = alloc; // At this point we are using the string's allocator, as the original buffer is gone...
1388 noCopy = (numBytes >= __kCFVarWidthLocalBufferSize); // If we had to allocate it, make sure it's kept around
1389 numBytes--; // Should not contain the NULL byte at end...
1390 stringSupportsEightBitCFRepresentation = true; // We're ASCII now!
1391 stringROMShouldIgnoreNoCopy = true; // We allocated this buffer, so we should feel free to get rid of it if we can use the string ROM
1392 #if INSTRUMENT_SHARED_STRINGS
1393 recordedEncoding = "U->A";
1394 #endif
1395 }
1396 }
1397
1398 // At this point, all necessary input arguments have been changed to reflect the new state
1399 }
1400
1401 #if USE_STRING_ROM || ENABLE_TAGGED_POINTER_STRINGS || INSTRUMENT_SHARED_STRINGS
1402 CFIndex lengthByte = (hasLengthByte ? 1 : 0);
1403 CFIndex realNumBytes = numBytes - lengthByte;
1404 const uint8_t *realBytes = bytes + lengthByte;
1405 #endif
1406
1407
1408 if (!str) {
1409 // Now determine the necessary size
1410 #if INSTRUMENT_SHARED_STRINGS || USE_STRING_ROM
1411 Boolean stringSupportsROM = stringSupportsEightBitCFRepresentation;
1412 #endif
1413
1414 #if INSTRUMENT_SHARED_STRINGS
1415 if (stringSupportsROM) __CFRecordStringAllocationEvent(recordedEncoding, realBytes, realNumBytes);
1416 #endif
1417
1418 #if USE_STRING_ROM
1419 CFStringRef romResult = NULL;
1420
1421
1422 if (stringSupportsROM) {
1423 // Disable the string ROM if necessary
1424 static char sDisableStringROM = -1;
1425 if (sDisableStringROM == -1) sDisableStringROM = !! __CFgetenv("CFStringDisableROM");
1426
1427 if (sDisableStringROM == 0) romResult = __CFSearchStringROM((const char *)realBytes, realNumBytes);
1428 }
1429 /* if we get a result from our ROM, and noCopy is set, then deallocate the buffer immediately */
1430 if (romResult) {
1431 if (noCopy && (contentsDeallocator != kCFAllocatorNull)) {
1432 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes);
1433 }
1434
1435 /* these don't get used again, but clear them for consistency */
1436 noCopy = false;
1437 bytes = NULL;
1438
1439 /* set our result to the ROM result which is not really mutable, of course, but that's OK because we don't try to modify it. */
1440 str = (CFMutableStringRef)romResult;
1441
1442 #if INSTRUMENT_TAGGED_POINTER_STRINGS
1443 _CFTaggedPointerStringStats.stringROMCount++;
1444 #endif
1445 }
1446
1447 if (! romResult) {
1448 #else
1449 if (1) {
1450 #endif
1451
1452 #if INSTRUMENT_SHARED_STRINGS
1453 if (stringSupportsROM) __CFRecordStringAllocationEvent(recordedEncoding, realBytes, realNumBytes);
1454 #endif
1455 #if INSTRUMENT_TAGGED_POINTER_STRINGS
1456 _CFTaggedPointerStringStats.otherStringCount++;
1457 #endif
1458
1459 // Now determine the necessary size
1460
1461 if (noCopy) {
1462
1463 size = sizeof(void *); // Pointer to the buffer
1464 if ((0) || (contentsDeallocator != alloc && contentsDeallocator != kCFAllocatorNull)) {
1465 size += sizeof(void *); // The contentsDeallocator
1466 }
1467 if (!hasLengthByte) size += sizeof(CFIndex); // Explicit length
1468 useLengthByte = hasLengthByte;
1469 useNullByte = hasNullByte;
1470
1471 } else { // Inline data; reserve space for it
1472
1473 useInlineData = true;
1474 size = numBytes;
1475
1476 if (hasLengthByte || (encoding != kCFStringEncodingUnicode && __CFCanUseLengthByte(numBytes))) {
1477 useLengthByte = true;
1478 if (!hasLengthByte) size += 1;
1479 } else {
1480 size += sizeof(CFIndex); // Explicit length
1481 }
1482 if (hasNullByte || encoding != kCFStringEncodingUnicode) {
1483 useNullByte = true;
1484 size += 1;
1485 }
1486 }
1487
1488 #ifdef STRING_SIZE_STATS
1489 // Dump alloced CFString size info every so often
1490 static int cnt = 0;
1491 static unsigned sizes[256] = {0};
1492 int allocedSize = size + sizeof(CFRuntimeBase);
1493 if (allocedSize < 255) sizes[allocedSize]++; else sizes[255]++;
1494 if ((++cnt % 1000) == 0) {
1495 printf ("\nTotal: %d\n", cnt);
1496 int i; for (i = 0; i < 256; i++) printf("%03d: %5d%s", i, sizes[i], ((i % 8) == 7) ? "\n" : " ");
1497 }
1498 #endif
1499
1500 // Finally, allocate!
1501
1502 str = (CFMutableStringRef)_CFRuntimeCreateInstance(alloc, __kCFStringTypeID, size, NULL);
1503 if (str) {
1504 if (__CFOASafe) __CFSetLastAllocationEventName(str, "CFString (immutable)");
1505
1506 CFOptionFlags allocBits = (0) ? __kCFHasContentsDeallocator : (contentsDeallocator == alloc ? __kCFNotInlineContentsDefaultFree : (contentsDeallocator == kCFAllocatorNull ? __kCFNotInlineContentsNoFree : __kCFNotInlineContentsCustomFree));
1507 __CFStrSetInfoBits(str,
1508 (useInlineData ? __kCFHasInlineContents : allocBits) |
1509 ((encoding == kCFStringEncodingUnicode) ? __kCFIsUnicode : 0) |
1510 (useNullByte ? __kCFHasNullByte : 0) |
1511 (useLengthByte ? __kCFHasLengthByte : 0));
1512
1513 if (!useLengthByte) {
1514 CFIndex length = numBytes - (hasLengthByte ? 1 : 0);
1515 if (encoding == kCFStringEncodingUnicode) length /= sizeof(UniChar);
1516 __CFStrSetExplicitLength(str, length);
1517 }
1518
1519 if (useInlineData) {
1520 uint8_t *contents = (uint8_t *)__CFStrContents(str);
1521 if (useLengthByte && !hasLengthByte) *contents++ = (uint8_t)numBytes;
1522 memmove(contents, bytes, numBytes);
1523 if (useNullByte) contents[numBytes] = 0;
1524 } else {
1525 __CFStrSetContentPtr(str, bytes);
1526 if (__CFStrHasContentsDeallocator(str)) __CFStrSetContentsDeallocator(str, contentsDeallocator);
1527 }
1528 } else {
1529 if (noCopy && (contentsDeallocator != kCFAllocatorNull)) {
1530 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes);
1531 }
1532 }
1533 }
1534 }
1535 if (vBuf.shouldFreeChars) CFAllocatorDeallocate(vBuf.allocator, (void *)bytes);
1536
1537 #if 0
1538 #warning Debug code
1539 const uint8_t *contents = (uint8_t *)__CFStrContents(str);
1540 CFIndex len = __CFStrLength2(str, contents);
1541
1542 if (__CFStrIsEightBit(str)) {
1543 contents += __CFStrSkipAnyLengthByte(str);
1544 if (!__CFBytesInASCII(contents, len)) {
1545 printf("CFString with 8 bit backing store not ASCII: %p, \"%.*s\"\n", str, (int)len, contents);
1546 }
1547 }
1548 #endif
1549
1550 return str;
1551 }
1552
1553 /* !!! __CFStringCreateImmutableFunnel2() is kept around for compatibility; it should be deprecated
1554 */
1555 CFStringRef __CFStringCreateImmutableFunnel2(
1556 CFAllocatorRef alloc, const void *bytes, CFIndex numBytes, CFStringEncoding encoding,
1557 Boolean possiblyExternalFormat, Boolean tryToReduceUnicode, Boolean hasLengthByte, Boolean hasNullByte, Boolean noCopy,
1558 CFAllocatorRef contentsDeallocator) {
1559 return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, possiblyExternalFormat, tryToReduceUnicode, hasLengthByte, hasNullByte, noCopy, contentsDeallocator, 0);
1560 }
1561
1562
1563
1564 CFStringRef CFStringCreateWithPascalString(CFAllocatorRef alloc, ConstStringPtr pStr, CFStringEncoding encoding) {
1565 CFIndex len = (CFIndex)(*(uint8_t *)pStr);
1566 return __CFStringCreateImmutableFunnel3(alloc, pStr, len+1, encoding, false, false, true, false, false, ALLOCATORSFREEFUNC, 0);
1567 }
1568
1569
1570 CFStringRef CFStringCreateWithCString(CFAllocatorRef alloc, const char *cStr, CFStringEncoding encoding) {
1571 CFIndex len = strlen(cStr);
1572 return __CFStringCreateImmutableFunnel3(alloc, cStr, len, encoding, false, false, false, true, false, ALLOCATORSFREEFUNC, 0);
1573 }
1574
1575 CFStringRef CFStringCreateWithPascalStringNoCopy(CFAllocatorRef alloc, ConstStringPtr pStr, CFStringEncoding encoding, CFAllocatorRef contentsDeallocator) {
1576 CFIndex len = (CFIndex)(*(uint8_t *)pStr);
1577 return __CFStringCreateImmutableFunnel3(alloc, pStr, len+1, encoding, false, false, true, false, true, contentsDeallocator, 0);
1578 }
1579
1580
1581 CFStringRef CFStringCreateWithCStringNoCopy(CFAllocatorRef alloc, const char *cStr, CFStringEncoding encoding, CFAllocatorRef contentsDeallocator) {
1582 CFIndex len = strlen(cStr);
1583 return __CFStringCreateImmutableFunnel3(alloc, cStr, len, encoding, false, false, false, true, true, contentsDeallocator, 0);
1584 }
1585
1586
1587 CFStringRef CFStringCreateWithCharacters(CFAllocatorRef alloc, const UniChar *chars, CFIndex numChars) {
1588 return __CFStringCreateImmutableFunnel3(alloc, chars, numChars * sizeof(UniChar), kCFStringEncodingUnicode, false, true, false, false, false, ALLOCATORSFREEFUNC, 0);
1589 }
1590
1591
1592 CFStringRef CFStringCreateWithCharactersNoCopy(CFAllocatorRef alloc, const UniChar *chars, CFIndex numChars, CFAllocatorRef contentsDeallocator) {
1593 return __CFStringCreateImmutableFunnel3(alloc, chars, numChars * sizeof(UniChar), kCFStringEncodingUnicode, false, false, false, false, true, contentsDeallocator, 0);
1594 }
1595
1596
1597 CFStringRef CFStringCreateWithBytes(CFAllocatorRef alloc, const uint8_t *bytes, CFIndex numBytes, CFStringEncoding encoding, Boolean externalFormat) {
1598 return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, externalFormat, true, false, false, false, ALLOCATORSFREEFUNC, 0);
1599 }
1600
1601 CFStringRef _CFStringCreateWithBytesNoCopy(CFAllocatorRef alloc, const uint8_t *bytes, CFIndex numBytes, CFStringEncoding encoding, Boolean externalFormat, CFAllocatorRef contentsDeallocator) {
1602 return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, externalFormat, true, false, false, true, contentsDeallocator, 0);
1603 }
1604
1605 CFStringRef CFStringCreateWithBytesNoCopy(CFAllocatorRef alloc, const uint8_t *bytes, CFIndex numBytes, CFStringEncoding encoding, Boolean externalFormat, CFAllocatorRef contentsDeallocator) {
1606 return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, externalFormat, true, false, false, true, contentsDeallocator, 0);
1607 }
1608
1609 CFStringRef CFStringCreateWithFormatAndArguments(CFAllocatorRef alloc, CFDictionaryRef formatOptions, CFStringRef format, va_list arguments) {
1610 return _CFStringCreateWithFormatAndArgumentsAux2(alloc, NULL, NULL, formatOptions, format, arguments);
1611 }
1612
1613 CFStringRef _CFStringCreateWithFormatAndArgumentsAux2(CFAllocatorRef alloc, CFStringRef (*copyDescFunc)(void *, const void *), CFStringRef (*contextDescFunc)(void *, const void *, const void *, bool , bool *), CFDictionaryRef formatOptions, CFStringRef format, va_list arguments) {
1614 CFStringRef str;
1615 CFMutableStringRef outputString = CFStringCreateMutable(kCFAllocatorSystemDefault, 0); //should use alloc if no copy/release
1616 __CFStrSetDesiredCapacity(outputString, 120); // Given this will be tightened later, choosing a larger working string is fine
1617 __CFStringAppendFormatCore(outputString, copyDescFunc, contextDescFunc, formatOptions, NULL, format, 0, NULL, 0, arguments);
1618 // ??? copy/release should not be necessary here -- just make immutable, compress if possible
1619 // (However, this does make the string inline, and cause the supplied allocator to be used...)
1620 str = (CFStringRef)CFStringCreateCopy(alloc, outputString);
1621 CFRelease(outputString);
1622 return str;
1623 }
1624
1625 CFStringRef _CFStringCreateWithFormatAndArgumentsAux(CFAllocatorRef alloc, CFStringRef (*copyDescFunc)(void *, const void *), CFDictionaryRef formatOptions, CFStringRef format, va_list arguments) {
1626 return _CFStringCreateWithFormatAndArgumentsAux2(alloc, copyDescFunc, NULL, formatOptions, format, arguments);
1627 }
1628
1629 CFStringRef CFStringCreateWithFormat(CFAllocatorRef alloc, CFDictionaryRef formatOptions, CFStringRef format, ...) {
1630 CFStringRef result;
1631 va_list argList;
1632
1633 va_start(argList, format);
1634 result = CFStringCreateWithFormatAndArguments(alloc, formatOptions, format, argList);
1635 va_end(argList);
1636
1637 return result;
1638 }
1639
1640 CFStringRef CFStringCreateWithSubstring(CFAllocatorRef alloc, CFStringRef str, CFRange range) {
1641 // CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, CFStringRef , (NSString *)str, _createSubstringWithRange:NSMakeRange(range.location, range.length));
1642
1643 __CFAssertIsString(str);
1644 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
1645
1646 if ((range.location == 0) && (range.length == __CFStrLength(str))) { /* The substring is the whole string... */
1647 return (CFStringRef)CFStringCreateCopy(alloc, str);
1648 } else if (__CFStrIsEightBit(str)) {
1649 const uint8_t *contents = (const uint8_t *)__CFStrContents(str);
1650 return __CFStringCreateImmutableFunnel3(alloc, contents + range.location + __CFStrSkipAnyLengthByte(str), range.length, __CFStringGetEightBitStringEncoding(), false, false, false, false, false, ALLOCATORSFREEFUNC, 0);
1651 } else {
1652 const UniChar *contents = (UniChar *)__CFStrContents(str);
1653 return __CFStringCreateImmutableFunnel3(alloc, contents + range.location, range.length * sizeof(UniChar), kCFStringEncodingUnicode, false, true, false, false, false, ALLOCATORSFREEFUNC, 0);
1654 }
1655 }
1656
1657 CFStringRef CFStringCreateCopy(CFAllocatorRef alloc, CFStringRef str) {
1658 // CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, CFStringRef, (NSString *)str, copy);
1659
1660 __CFAssertIsString(str);
1661 if (!__CFStrIsMutable((CFStringRef)str) && // If the string is not mutable
1662 ((alloc ? alloc : __CFGetDefaultAllocator()) == __CFGetAllocator(str)) && // and it has the same allocator as the one we're using
1663 (__CFStrIsInline((CFStringRef)str) || __CFStrFreeContentsWhenDone((CFStringRef)str) || __CFStrIsConstant((CFStringRef)str))) { // and the characters are inline, or are owned by the string, or the string is constant
1664 if (!(kCFUseCollectableAllocator && (0))) CFRetain(str); // Then just retain instead of making a true copy
1665 return str;
1666 }
1667 if (__CFStrIsEightBit((CFStringRef)str)) {
1668 const uint8_t *contents = (const uint8_t *)__CFStrContents((CFStringRef)str);
1669 return __CFStringCreateImmutableFunnel3(alloc, contents + __CFStrSkipAnyLengthByte((CFStringRef)str), __CFStrLength2((CFStringRef)str, contents), __CFStringGetEightBitStringEncoding(), false, false, false, false, false, ALLOCATORSFREEFUNC, 0);
1670 } else {
1671 const UniChar *contents = (const UniChar *)__CFStrContents((CFStringRef)str);
1672 return __CFStringCreateImmutableFunnel3(alloc, contents, __CFStrLength2((CFStringRef)str, contents) * sizeof(UniChar), kCFStringEncodingUnicode, false, true, false, false, false, ALLOCATORSFREEFUNC, 0);
1673 }
1674 }
1675
1676
1677
1678 /*** Constant string stuff... ***/
1679
1680 /* Table which holds constant strings created with CFSTR, when -fconstant-cfstrings option is not used. These dynamically created constant strings are stored in constantStringTable. The keys are the 8-bit constant C-strings from the compiler; the values are the CFStrings created for them. _CFSTRLock protects this table.
1681 */
1682 static CFMutableDictionaryRef constantStringTable = NULL;
1683 static CFLock_t _CFSTRLock = CFLockInit;
1684
1685 static CFStringRef __cStrCopyDescription(const void *ptr) {
1686 return CFStringCreateWithCStringNoCopy(kCFAllocatorSystemDefault, (const char *)ptr, __CFStringGetEightBitStringEncoding(), kCFAllocatorNull);
1687 }
1688
1689 static Boolean __cStrEqual(const void *ptr1, const void *ptr2) {
1690 return (strcmp((const char *)ptr1, (const char *)ptr2) == 0);
1691 }
1692
1693 static CFHashCode __cStrHash(const void *ptr) {
1694 // It doesn't quite matter if we convert to Unicode correctly, as long as we do it consistently
1695 const char *cStr = (const char *)ptr;
1696 CFIndex len = strlen(cStr);
1697 CFHashCode result = 0;
1698 if (len <= 4) { // All chars
1699 unsigned cnt = len;
1700 while (cnt--) result += (result << 8) + *cStr++;
1701 } else { // First and last 2 chars
1702 result += (result << 8) + cStr[0];
1703 result += (result << 8) + cStr[1];
1704 result += (result << 8) + cStr[len-2];
1705 result += (result << 8) + cStr[len-1];
1706 }
1707 result += (result << (len & 31));
1708 return result;
1709 }
1710
1711
1712 CFStringRef __CFStringMakeConstantString(const char *cStr) {
1713 CFStringRef result;
1714 #if defined(DEBUG)
1715 // StringTest checks that we share kCFEmptyString, which is defeated by constantStringAllocatorForDebugging
1716 if ('\0' == *cStr) return kCFEmptyString;
1717 #endif
1718 if (constantStringTable == NULL) {
1719 CFDictionaryKeyCallBacks constantStringCallBacks = {0, NULL, NULL, __cStrCopyDescription, __cStrEqual, __cStrHash};
1720 CFDictionaryValueCallBacks constantStringValueCallBacks = kCFTypeDictionaryValueCallBacks;
1721 constantStringValueCallBacks.equal = NULL; // So that we only find strings that are ==
1722 CFMutableDictionaryRef table = CFDictionaryCreateMutable(kCFAllocatorSystemDefault, 0, &constantStringCallBacks, &constantStringValueCallBacks);
1723 _CFDictionarySetCapacity(table, 2500); // avoid lots of rehashing
1724 __CFLock(&_CFSTRLock);
1725 if (constantStringTable == NULL) constantStringTable = table;
1726 __CFUnlock(&_CFSTRLock);
1727 if (constantStringTable != table) CFRelease(table);
1728 }
1729
1730 __CFLock(&_CFSTRLock);
1731 if ((result = (CFStringRef)CFDictionaryGetValue(constantStringTable, cStr))) {
1732 __CFUnlock(&_CFSTRLock);
1733 } else {
1734 __CFUnlock(&_CFSTRLock);
1735
1736 {
1737 char *key = NULL;
1738 Boolean isASCII = true;
1739 // Given this code path is rarer these days, OK to do this extra work to verify the strings
1740 const char *tmp = cStr;
1741 while (*tmp) {
1742 if (*(tmp++) & 0x80) {
1743 isASCII = false;
1744 break;
1745 }
1746 }
1747 if (!isASCII) {
1748 CFMutableStringRef ms = CFStringCreateMutable(kCFAllocatorSystemDefault, 0);
1749 tmp = cStr;
1750 while (*tmp) {
1751 CFStringAppendFormat(ms, NULL, (*tmp & 0x80) ? CFSTR("\\%3o") : CFSTR("%1c"), *tmp);
1752 tmp++;
1753 }
1754 CFLog(kCFLogLevelWarning, CFSTR("WARNING: CFSTR(\"%@\") has non-7 bit chars, interpreting using MacOS Roman encoding for now, but this will change. Please eliminate usages of non-7 bit chars (including escaped characters above \\177 octal) in CFSTR()."), ms);
1755 CFRelease(ms);
1756 }
1757 // Treat non-7 bit chars in CFSTR() as MacOSRoman, for compatibility
1758 result = CFStringCreateWithCString(kCFAllocatorSystemDefault, cStr, kCFStringEncodingMacRoman);
1759 if (result == NULL) {
1760 CFLog(__kCFLogAssertion, CFSTR("Can't interpret CFSTR() as MacOS Roman, crashing"));
1761 HALT;
1762 }
1763 Boolean isTaggedPointerString = CF_IS_OBJC(__kCFStringTypeID, result);
1764
1765 if (!isTaggedPointerString) {
1766 if (__CFOASafe) __CFSetLastAllocationEventName((void *)result, "CFString (CFSTR)");
1767 if (__CFStrIsEightBit(result)) key = (char *)__CFStrContents(result) + __CFStrSkipAnyLengthByte(result);
1768 }
1769 if (!key) { // Either the string is not 8-bit or it's a tagged pointer string
1770 CFIndex keySize = strlen(cStr) + 1;
1771 key = (char *)CFAllocatorAllocate(kCFAllocatorSystemDefault, keySize, 0);
1772 if (__CFOASafe) __CFSetLastAllocationEventName((void *)key, "CFString (CFSTR key)");
1773 strlcpy(key, cStr, keySize); // !!! We will leak this, if the string is removed from the table (or table is freed)
1774 }
1775
1776 {
1777 CFStringRef resultToBeReleased = result;
1778 CFIndex count;
1779 __CFLock(&_CFSTRLock);
1780 count = CFDictionaryGetCount(constantStringTable);
1781 CFDictionaryAddValue(constantStringTable, key, result);
1782 if (CFDictionaryGetCount(constantStringTable) == count) { // add did nothing, someone already put it there
1783 result = (CFStringRef)CFDictionaryGetValue(constantStringTable, key);
1784 } else if (!isTaggedPointerString) {
1785 #if __LP64__
1786 ((struct __CFString *)result)->base._rc = 0;
1787 #else
1788 ((struct __CFString *)result)->base._cfinfo[CF_RC_BITS] = 0;
1789 #endif
1790 }
1791 __CFUnlock(&_CFSTRLock);
1792 // This either eliminates the extra retain on the freshly created string, or frees it, if it was actually not inserted into the table
1793 CFRelease(resultToBeReleased);
1794 }
1795 }
1796 }
1797 return result;
1798 }
1799
1800 #if defined(DEBUG)
1801 static Boolean __CFStrIsConstantString(CFStringRef str) {
1802 Boolean found = false;
1803 if (constantStringTable) {
1804 __CFLock(&_CFSTRLock);
1805 found = CFDictionaryContainsValue(constantStringTable, str);
1806 __CFUnlock(&_CFSTRLock);
1807 }
1808 return found;
1809 }
1810 #endif
1811
1812
1813 #if DEPLOYMENT_TARGET_WINDOWS
1814 void __CFStringCleanup (void) {
1815 /* in case library is unloaded, release store for the constant string table */
1816 if (constantStringTable != NULL) {
1817 #if defined(DEBUG)
1818 __CFConstantStringTableBeingFreed = true;
1819 CFRelease(constantStringTable);
1820 __CFConstantStringTableBeingFreed = false;
1821 #else
1822 CFRelease(constantStringTable);
1823 #endif
1824 constantStringTable = NULL;
1825 }
1826 }
1827 #endif
1828
1829
1830 // Can pass in NSString as replacement string
1831 // Call with numRanges > 0, and incrementing ranges
1832
1833 static void __CFStringReplaceMultiple(CFMutableStringRef str, CFRange *ranges, CFIndex numRanges, CFStringRef replacement) {
1834 int cnt;
1835 CFStringRef copy = NULL;
1836 if (replacement == str) copy = replacement = CFStringCreateCopy(kCFAllocatorSystemDefault, replacement); // Very special and hopefully rare case
1837 CFIndex replacementLength = CFStringGetLength(replacement);
1838
1839 __CFStringChangeSizeMultiple(str, ranges, numRanges, replacementLength, (replacementLength > 0) && CFStrIsUnicode(replacement));
1840
1841 if (__CFStrIsUnicode(str)) {
1842 UniChar *contents = (UniChar *)__CFStrContents(str);
1843 UniChar *firstReplacement = contents + ranges[0].location;
1844 // Extract the replacementString into the first location, then copy from there
1845 CFStringGetCharacters(replacement, CFRangeMake(0, replacementLength), firstReplacement);
1846 for (cnt = 1; cnt < numRanges; cnt++) {
1847 // The ranges are in terms of the original string; so offset by the change in length due to insertion
1848 contents += replacementLength - ranges[cnt - 1].length;
1849 memmove(contents + ranges[cnt].location, firstReplacement, replacementLength * sizeof(UniChar));
1850 }
1851 } else {
1852 uint8_t *contents = (uint8_t *)__CFStrContents(str);
1853 uint8_t *firstReplacement = contents + ranges[0].location + __CFStrSkipAnyLengthByte(str);
1854 // Extract the replacementString into the first location, then copy from there
1855 CFStringGetBytes(replacement, CFRangeMake(0, replacementLength), __CFStringGetEightBitStringEncoding(), 0, false, firstReplacement, replacementLength, NULL);
1856 contents += __CFStrSkipAnyLengthByte(str); // Now contents will simply track the location to insert next string into
1857 for (cnt = 1; cnt < numRanges; cnt++) {
1858 // The ranges are in terms of the original string; so offset by the change in length due to insertion
1859 contents += replacementLength - ranges[cnt - 1].length;
1860 memmove(contents + ranges[cnt].location, firstReplacement, replacementLength);
1861 }
1862 }
1863 if (copy) CFRelease(copy);
1864 }
1865
1866 // Can pass in NSString as replacement string
1867
1868 CF_INLINE void __CFStringReplace(CFMutableStringRef str, CFRange range, CFStringRef replacement) {
1869 CFStringRef copy = NULL;
1870 if (replacement == str) copy = replacement = (CFStringRef)CFStringCreateCopy(kCFAllocatorSystemDefault, replacement); // Very special and hopefully rare case
1871 CFIndex replacementLength = CFStringGetLength(replacement);
1872
1873 __CFStringChangeSize(str, range, replacementLength, (replacementLength > 0) && CFStrIsUnicode(replacement));
1874
1875 if (__CFStrIsUnicode(str)) {
1876 UniChar *contents = (UniChar *)__CFStrContents(str);
1877 CFStringGetCharacters(replacement, CFRangeMake(0, replacementLength), contents + range.location);
1878 } else {
1879 uint8_t *contents = (uint8_t *)__CFStrContents(str);
1880 CFStringGetBytes(replacement, CFRangeMake(0, replacementLength), __CFStringGetEightBitStringEncoding(), 0, false, contents + range.location + __CFStrSkipAnyLengthByte(str), replacementLength, NULL);
1881 }
1882
1883 if (copy) CFRelease(copy);
1884 }
1885
1886 /* If client does not provide a minimum capacity
1887 */
1888 #define DEFAULTMINCAPACITY 32
1889
1890 CF_INLINE CFMutableStringRef __CFStringCreateMutableFunnel(CFAllocatorRef alloc, CFIndex maxLength, UInt32 additionalInfoBits) {
1891 CFMutableStringRef str;
1892 if ((0)) additionalInfoBits |= __kCFHasContentsAllocator;
1893 Boolean hasExternalContentsAllocator = (additionalInfoBits & __kCFHasContentsAllocator) ? true : false;
1894
1895 if (alloc == NULL) alloc = __CFGetDefaultAllocator();
1896
1897 // Note that if there is an externalContentsAllocator, then we also have the storage for the string allocator...
1898 str = (CFMutableStringRef)_CFRuntimeCreateInstance(alloc, __kCFStringTypeID, sizeof(struct __notInlineMutable) - (hasExternalContentsAllocator ? 0 : sizeof(CFAllocatorRef)), NULL);
1899 if (str) {
1900 if (__CFOASafe) __CFSetLastAllocationEventName(str, "CFString (mutable)");
1901
1902 __CFStrSetInfoBits(str, __kCFIsMutable | additionalInfoBits);
1903 str->variants.notInlineMutable.buffer = NULL;
1904 __CFStrSetExplicitLength(str, 0);
1905 str->variants.notInlineMutable.hasGap = str->variants.notInlineMutable.isFixedCapacity = str->variants.notInlineMutable.isExternalMutable = str->variants.notInlineMutable.capacityProvidedExternally = 0;
1906 if (maxLength != 0) __CFStrSetIsFixed(str);
1907 __CFStrSetDesiredCapacity(str, (maxLength == 0) ? DEFAULTMINCAPACITY : maxLength);
1908 __CFStrSetCapacity(str, 0);
1909 if (__CFStrHasContentsAllocator(str)) {
1910 // contents allocator starts out as the string's own allocator
1911 __CFStrSetContentsAllocator(str, alloc);
1912 }
1913 }
1914 return str;
1915 }
1916
1917 CFMutableStringRef CFStringCreateMutableWithExternalCharactersNoCopy(CFAllocatorRef alloc, UniChar *chars, CFIndex numChars, CFIndex capacity, CFAllocatorRef externalCharactersAllocator) {
1918 CFOptionFlags contentsAllocationBits = externalCharactersAllocator ? ((externalCharactersAllocator == kCFAllocatorNull) ? __kCFNotInlineContentsNoFree : __kCFHasContentsAllocator) : __kCFNotInlineContentsDefaultFree;
1919 CFMutableStringRef string = __CFStringCreateMutableFunnel(alloc, 0, contentsAllocationBits | __kCFIsUnicode);
1920 if (string) {
1921 __CFStrSetIsExternalMutable(string);
1922 if (__CFStrHasContentsAllocator(string)) {
1923 CFAllocatorRef allocator = __CFStrContentsAllocator((CFMutableStringRef)string);
1924 if (!(0 || 0)) CFRelease(allocator);
1925 __CFStrSetContentsAllocator(string, externalCharactersAllocator);
1926 }
1927 CFStringSetExternalCharactersNoCopy(string, chars, numChars, capacity);
1928 }
1929 return string;
1930 }
1931
1932 CFMutableStringRef CFStringCreateMutable(CFAllocatorRef alloc, CFIndex maxLength) {
1933 return __CFStringCreateMutableFunnel(alloc, maxLength, __kCFNotInlineContentsDefaultFree);
1934 }
1935
1936 CFMutableStringRef CFStringCreateMutableCopy(CFAllocatorRef alloc, CFIndex maxLength, CFStringRef string) {
1937 CFMutableStringRef newString;
1938
1939 // CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, CFMutableStringRef, (NSString *)string, mutableCopy);
1940
1941 __CFAssertIsString(string);
1942
1943 newString = CFStringCreateMutable(alloc, maxLength);
1944 __CFStringReplace(newString, CFRangeMake(0, 0), string);
1945
1946 return newString;
1947 }
1948
1949
1950 CF_PRIVATE void _CFStrSetDesiredCapacity(CFMutableStringRef str, CFIndex len) {
1951 __CFAssertIsStringAndMutable(str);
1952 __CFStrSetDesiredCapacity(str, len);
1953 }
1954
1955
1956 /* This one is for CF
1957 */
1958 CFIndex CFStringGetLength(CFStringRef str) {
1959 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, CFIndex, (NSString *)str, length);
1960
1961 __CFAssertIsString(str);
1962 return __CFStrLength(str);
1963 }
1964
1965 /* This one is for NSCFString; it does not ObjC dispatch or assertion check
1966 */
1967 CFIndex _CFStringGetLength2(CFStringRef str) {
1968 return __CFStrLength(str);
1969 }
1970
1971
1972 /* Guts of CFStringGetCharacterAtIndex(); called from the two functions below. Don't call it from elsewhere.
1973 */
1974 CF_INLINE UniChar __CFStringGetCharacterAtIndexGuts(CFStringRef str, CFIndex idx, const uint8_t *contents) {
1975 if (__CFStrIsEightBit(str)) {
1976 contents += __CFStrSkipAnyLengthByte(str);
1977 #if defined(DEBUG)
1978 if (!__CFCharToUniCharFunc && (contents[idx] >= 128)) {
1979 // Can't do log here, as it might be too early
1980 fprintf(stderr, "Warning: CFStringGetCharacterAtIndex() attempted on CFString containing high bytes before properly initialized to do so\n");
1981 }
1982 #endif
1983 return __CFCharToUniCharTable[contents[idx]];
1984 }
1985
1986 return ((UniChar *)contents)[idx];
1987 }
1988
1989 /* This one is for the CF API
1990 */
1991 UniChar CFStringGetCharacterAtIndex(CFStringRef str, CFIndex idx) {
1992 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, UniChar, (NSString *)str, characterAtIndex:(NSUInteger)idx);
1993
1994 __CFAssertIsString(str);
1995 __CFAssertIndexIsInStringBounds(str, idx);
1996 return __CFStringGetCharacterAtIndexGuts(str, idx, (const uint8_t *)__CFStrContents(str));
1997 }
1998
1999 /* This one is for NSCFString usage; it doesn't do ObjC dispatch; but it does do range check
2000 */
2001 int _CFStringCheckAndGetCharacterAtIndex(CFStringRef str, CFIndex idx, UniChar *ch) {
2002 const uint8_t *contents = (const uint8_t *)__CFStrContents(str);
2003 if (idx >= __CFStrLength2(str, contents) && __CFStringNoteErrors()) return _CFStringErrBounds;
2004 *ch = __CFStringGetCharacterAtIndexGuts(str, idx, contents);
2005 return _CFStringErrNone;
2006 }
2007
2008
2009 /* Guts of CFStringGetCharacters(); called from the two functions below. Don't call it from elsewhere.
2010 */
2011 CF_INLINE void __CFStringGetCharactersGuts(CFStringRef str, CFRange range, UniChar *buffer, const uint8_t *contents) {
2012 if (__CFStrIsEightBit(str)) {
2013 __CFStrConvertBytesToUnicode(((uint8_t *)contents) + (range.location + __CFStrSkipAnyLengthByte(str)), buffer, range.length);
2014 } else {
2015 const UniChar *uContents = ((UniChar *)contents) + range.location;
2016 memmove(buffer, uContents, range.length * sizeof(UniChar));
2017 }
2018 }
2019
2020 /* This one is for the CF API
2021 */
2022 void CFStringGetCharacters(CFStringRef str, CFRange range, UniChar *buffer) {
2023 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSString *)str, getCharacters:(unichar *)buffer range:NSMakeRange(range.location, range.length));
2024
2025 __CFAssertIsString(str);
2026 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
2027 __CFStringGetCharactersGuts(str, range, buffer, (const uint8_t *)__CFStrContents(str));
2028 }
2029
2030 /* This one is for NSCFString usage; it doesn't do ObjC dispatch; but it does do range check
2031 */
2032 int _CFStringCheckAndGetCharacters(CFStringRef str, CFRange range, UniChar *buffer) {
2033 const uint8_t *contents = (const uint8_t *)__CFStrContents(str);
2034 if (range.location + range.length > __CFStrLength2(str, contents) && __CFStringNoteErrors()) return _CFStringErrBounds;
2035 __CFStringGetCharactersGuts(str, range, buffer, contents);
2036 return _CFStringErrNone;
2037 }
2038
2039
2040 CFIndex CFStringGetBytes(CFStringRef str, CFRange range, CFStringEncoding encoding, uint8_t lossByte, Boolean isExternalRepresentation, uint8_t *buffer, CFIndex maxBufLen, CFIndex *usedBufLen) {
2041
2042 __CFAssertIsNotNegative(maxBufLen);
2043
2044 {
2045 __CFAssertIsString(str);
2046 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
2047
2048 if (__CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string
2049 const unsigned char *contents = (const unsigned char *)__CFStrContents(str);
2050 CFIndex cLength = range.length;
2051
2052 if (buffer) {
2053 if (cLength > maxBufLen) cLength = maxBufLen;
2054 memmove(buffer, contents + __CFStrSkipAnyLengthByte(str) + range.location, cLength);
2055 }
2056 if (usedBufLen) *usedBufLen = cLength;
2057
2058 return cLength;
2059 }
2060 }
2061
2062 return __CFStringEncodeByteStream(str, range.location, range.length, isExternalRepresentation, encoding, lossByte, buffer, maxBufLen, usedBufLen);
2063 }
2064
2065
2066 ConstStringPtr CFStringGetPascalStringPtr (CFStringRef str, CFStringEncoding encoding) {
2067
2068 if (!CF_IS_OBJC(__kCFStringTypeID, str)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
2069 __CFAssertIsString(str);
2070 if (__CFStrHasLengthByte(str) && __CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string || the contents is in ASCII
2071 const uint8_t *contents = (const uint8_t *)__CFStrContents(str);
2072 if (__CFStrHasExplicitLength(str) && (__CFStrLength2(str, contents) != (SInt32)(*contents))) return NULL; // Invalid length byte
2073 return (ConstStringPtr)contents;
2074 }
2075 // ??? Also check for encoding = SystemEncoding and perhaps bytes are all ASCII?
2076 }
2077 return NULL;
2078 }
2079
2080
2081 const char * CFStringGetCStringPtr(CFStringRef str, CFStringEncoding encoding) {
2082
2083 if (encoding != __CFStringGetEightBitStringEncoding() && (kCFStringEncodingASCII != __CFStringGetEightBitStringEncoding() || !__CFStringEncodingIsSupersetOfASCII(encoding))) return NULL;
2084 // ??? Also check for encoding = SystemEncoding and perhaps bytes are all ASCII?
2085
2086 if (str == NULL) return NULL; // Should really just crash, but for compatibility... see <rdar://problem/12340248>
2087
2088 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, const char *, (NSString *)str, _fastCStringContents:true);
2089
2090 __CFAssertIsString(str);
2091
2092 if (__CFStrHasNullByte(str)) {
2093 // Note: this is called a lot, 27000 times to open a small xcode project with one file open.
2094 // Of these uses about 1500 are for cStrings/utf8strings.
2095 #if 0
2096 // Only sometimes when the stars are aligned will this call return a gc pointer
2097 // under GC we can only really return a pointer to the start of a GC buffer for cString use
2098 // (Is there a simpler way to ask if contents isGC?)
2099 CFAllocatorRef alloc = (__CFStrHasContentsAllocator(str)) ? __CFStrContentsAllocator(str) : __CFGetAllocator(str);
2100 if (CF_IS_COLLECTABLE_ALLOCATOR(alloc)) {
2101 if (__CFStrSkipAnyLengthByte(str) != 0 || !__CFStrIsMutable(str)) {
2102 static int counter = 0;
2103 printf("CFString %dth unsafe safe string %s\n", ++counter, __CFStrContents(str) + __CFStrSkipAnyLengthByte(str));
2104 return NULL;
2105 }
2106 }
2107 #endif
2108 return (const char *)__CFStrContents(str) + __CFStrSkipAnyLengthByte(str);
2109 } else {
2110 return NULL;
2111 }
2112 }
2113
2114
2115 const UniChar *CFStringGetCharactersPtr(CFStringRef str) {
2116
2117 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, const UniChar *, (NSString *)str, _fastCharacterContents);
2118
2119 __CFAssertIsString(str);
2120 if (__CFStrIsUnicode(str)) return (const UniChar *)__CFStrContents(str);
2121 return NULL;
2122 }
2123
2124
2125 Boolean CFStringGetPascalString(CFStringRef str, Str255 buffer, CFIndex bufferSize, CFStringEncoding encoding) {
2126 CFIndex length;
2127 CFIndex usedLen;
2128
2129 __CFAssertIsNotNegative(bufferSize);
2130 if (bufferSize < 1) return false;
2131
2132 if (CF_IS_OBJC(__kCFStringTypeID, str)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
2133 length = CFStringGetLength(str);
2134 if (!__CFCanUseLengthByte(length)) return false; // Can't fit into pstring
2135 } else {
2136 const uint8_t *contents;
2137
2138 __CFAssertIsString(str);
2139
2140 contents = (const uint8_t *)__CFStrContents(str);
2141 length = __CFStrLength2(str, contents);
2142
2143 if (!__CFCanUseLengthByte(length)) return false; // Can't fit into pstring
2144
2145 if (__CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string
2146 if (length >= bufferSize) return false;
2147 memmove((void*)(1 + (const char*)buffer), (__CFStrSkipAnyLengthByte(str) + contents), length);
2148 *buffer = (unsigned char)length;
2149 return true;
2150 }
2151 }
2152
2153 if (__CFStringEncodeByteStream(str, 0, length, false, encoding, false, (UInt8 *)(1 + (uint8_t *)buffer), bufferSize - 1, &usedLen) != length) {
2154
2155 #if defined(DEBUG)
2156 if (bufferSize > 0) {
2157 strlcpy((char *)buffer + 1, CONVERSIONFAILURESTR, bufferSize - 1);
2158 buffer[0] = (unsigned char)((CFIndex)sizeof(CONVERSIONFAILURESTR) < (bufferSize - 1) ? (CFIndex)sizeof(CONVERSIONFAILURESTR) : (bufferSize - 1));
2159 }
2160 #else
2161 if (bufferSize > 0) buffer[0] = 0;
2162 #endif
2163 return false;
2164 }
2165 *buffer = (unsigned char)usedLen;
2166 return true;
2167 }
2168
2169 Boolean CFStringGetCString(CFStringRef str, char *buffer, CFIndex bufferSize, CFStringEncoding encoding) {
2170 const uint8_t *contents;
2171 CFIndex len;
2172
2173 __CFAssertIsNotNegative(bufferSize);
2174 if (bufferSize < 1) return false;
2175
2176 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, Boolean, (NSString *)str, _getCString:buffer maxLength:(NSUInteger)bufferSize - 1 encoding:encoding);
2177
2178 __CFAssertIsString(str);
2179
2180 contents = (const uint8_t *)__CFStrContents(str);
2181 len = __CFStrLength2(str, contents);
2182
2183 if (__CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string
2184 if (len >= bufferSize) return false;
2185 memmove(buffer, contents + __CFStrSkipAnyLengthByte(str), len);
2186 buffer[len] = 0;
2187 return true;
2188 } else {
2189 CFIndex usedLen;
2190
2191 if (__CFStringEncodeByteStream(str, 0, len, false, encoding, false, (unsigned char*) buffer, bufferSize - 1, &usedLen) == len) {
2192 buffer[usedLen] = '\0';
2193 return true;
2194 } else {
2195 #if defined(DEBUG)
2196 strlcpy(buffer, CONVERSIONFAILURESTR, bufferSize);
2197 #else
2198 if (bufferSize > 0) buffer[0] = 0;
2199 #endif
2200 return false;
2201 }
2202 }
2203 }
2204
2205 extern Boolean __CFLocaleGetNullLocale(struct __CFLocale *locale);
2206 extern void __CFLocaleSetNullLocale(struct __CFLocale *locale);
2207
2208 static const char *_CFStrGetLanguageIdentifierForLocale(CFLocaleRef locale, bool collatorOnly) {
2209 CFStringRef localeID;
2210 const char *langID = NULL;
2211 static const void *lastLocale = NULL;
2212 static const char *lastLangID = NULL;
2213 static CFLock_t lock = CFLockInit;
2214
2215 if (__CFLocaleGetNullLocale((struct __CFLocale *)locale)) return NULL;
2216
2217 __CFLock(&lock);
2218 if ((NULL != lastLocale) && (lastLocale == locale)) {
2219 __CFUnlock(&lock);
2220 return lastLangID;
2221 }
2222 __CFUnlock(&lock);
2223
2224 localeID = (CFStringRef)CFLocaleGetValue(locale, __kCFLocaleCollatorID);
2225 CFIndex length = CFStringGetLength(localeID);
2226
2227 if (!collatorOnly) {
2228 if ((length < 2) || ((4 == length) && CFEqual(localeID, CFSTR("root")))) {
2229 localeID = (CFStringRef)CFLocaleGetIdentifier(locale);
2230 length = CFStringGetLength(localeID);
2231 }
2232 }
2233
2234 if (length > 1) {
2235 uint8_t buffer[2];
2236 const uint8_t *contents = (const uint8_t *)CFStringGetCStringPtr(localeID, kCFStringEncodingUTF8);
2237 if (!contents) {
2238 if (2 == CFStringGetBytes(localeID, CFRangeMake(0,2), kCFStringEncodingUTF8, 0, false, buffer, sizeof(buffer), NULL)) contents = buffer;
2239 }
2240 if (contents) {
2241 const char *string = (const char *)contents;
2242 if (!strncmp(string, "az", 2)) { // Azerbaijani
2243 langID = "az";
2244 } else if (!strncmp(string, "lt", 2)) { // Lithuanian
2245 langID = "lt";
2246 } else if (!strncmp(string, "tr", 2)) { // Turkish
2247 langID = "tr";
2248 } else if (!strncmp(string, "nl", 2)) { // Dutch
2249 langID = "nl";
2250 } else if (!strncmp(string, "el", 2)) { // Greek
2251 langID = "el";
2252 }
2253 }
2254 }
2255
2256 if (langID == NULL) __CFLocaleSetNullLocale((struct __CFLocale *)locale);
2257
2258 __CFLock(&lock);
2259 lastLocale = locale;
2260 lastLangID = langID;
2261 __CFUnlock(&lock);
2262
2263 return langID;
2264 }
2265
2266 CF_INLINE bool _CFCanUseLocale(CFLocaleRef locale) {
2267 if (locale) {
2268 return true;
2269 }
2270 return false;
2271 }
2272
2273 #define MAX_CASE_MAPPING_BUF (8)
2274 #define ZERO_WIDTH_JOINER (0x200D)
2275 #define COMBINING_GRAPHEME_JOINER (0x034F)
2276 // Hangul ranges
2277 #define HANGUL_CHOSEONG_START (0x1100)
2278 #define HANGUL_CHOSEONG_END (0x115F)
2279 #define HANGUL_JUNGSEONG_START (0x1160)
2280 #define HANGUL_JUNGSEONG_END (0x11A2)
2281 #define HANGUL_JONGSEONG_START (0x11A8)
2282 #define HANGUL_JONGSEONG_END (0x11F9)
2283
2284 #define HANGUL_SYLLABLE_START (0xAC00)
2285 #define HANGUL_SYLLABLE_END (0xD7AF)
2286
2287
2288 // Returns the length of characters filled into outCharacters. If no change, returns 0. maxBufLen shoule be at least 8
2289 static CFIndex __CFStringFoldCharacterClusterAtIndex(UTF32Char character, CFStringInlineBuffer *buffer, CFIndex index, CFOptionFlags flags, const uint8_t *langCode, UTF32Char *outCharacters, CFIndex maxBufferLength, CFIndex *consumedLength) {
2290 CFIndex filledLength = 0, currentIndex = index;
2291
2292 if (0 != character) {
2293 UTF16Char lowSurrogate;
2294 CFIndex planeNo = (character >> 16);
2295 bool isTurkikCapitalI = false;
2296 static const uint8_t *decompBMP = NULL;
2297 static const uint8_t *graphemeBMP = NULL;
2298
2299 if (NULL == decompBMP) {
2300 decompBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, 0);
2301 graphemeBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, 0);
2302 }
2303
2304 currentIndex += ((character > 0xFFFF) ? 2 : 1);
2305
2306 if ((character < 0x0080) && ((NULL == langCode) || (character != 'I'))) { // ASCII
2307 if ((flags & kCFCompareCaseInsensitive) && (character >= 'A') && (character <= 'Z')) {
2308 character += ('a' - 'A');
2309 *outCharacters = character;
2310 filledLength = 1;
2311 }
2312 } else {
2313 // do width-insensitive mapping
2314 if ((flags & kCFCompareWidthInsensitive) && (character >= 0xFF00) && (character <= 0xFFEF)) {
2315 (void)CFUniCharCompatibilityDecompose(&character, 1, 1);
2316 *outCharacters = character;
2317 filledLength = 1;
2318 }
2319
2320 // map surrogates
2321 if ((0 == planeNo) && CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((lowSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex)))) {
2322 character = CFUniCharGetLongCharacterForSurrogatePair(character, lowSurrogate);
2323 ++currentIndex;
2324 planeNo = (character >> 16);
2325 }
2326
2327 // decompose
2328 if (flags & (kCFCompareDiacriticInsensitive|kCFCompareNonliteral)) {
2329 if (CFUniCharIsMemberOfBitmap(character, ((0 == planeNo) ? decompBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, planeNo)))) {
2330 UTF32Char original = character;
2331
2332 filledLength = CFUniCharDecomposeCharacter(character, outCharacters, maxBufferLength);
2333 character = *outCharacters;
2334
2335 if ((flags & kCFCompareDiacriticInsensitive) && (character < 0x0510)) {
2336 filledLength = 1; // reset if Roman, Greek, Cyrillic
2337 } else if (0 == (flags & kCFCompareNonliteral)) {
2338 character = original;
2339 filledLength = 0;
2340 }
2341 }
2342 }
2343
2344 // fold case
2345 if (flags & kCFCompareCaseInsensitive) {
2346 const uint8_t *nonBaseBitmap;
2347 bool filterNonBase = (((flags & kCFCompareDiacriticInsensitive) && (character < 0x0510)) ? true : false);
2348 static const uint8_t *lowerBMP = NULL;
2349 static const uint8_t *caseFoldBMP = NULL;
2350
2351 if (NULL == lowerBMP) {
2352 lowerBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfLowercaseCharacterSet, 0);
2353 caseFoldBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfCaseFoldingCharacterSet, 0);
2354 }
2355
2356 if ((NULL != langCode) && ('I' == character) && ((0 == strcmp((const char *)langCode, "tr")) || (0 == strcmp((const char *)langCode, "az")))) { // do Turkik special-casing
2357 if (filledLength > 1) {
2358 if (0x0307 == outCharacters[1]) {
2359 if (--filledLength > 1) memmove((outCharacters + 1), (outCharacters + 2), sizeof(UTF32Char) * (filledLength - 1));
2360 character = *outCharacters = 'i';
2361 isTurkikCapitalI = true;
2362 }
2363 } else if (0x0307 == CFStringGetCharacterFromInlineBuffer(buffer, currentIndex)) {
2364 character = *outCharacters = 'i';
2365 filledLength = 1;
2366 ++currentIndex;
2367 isTurkikCapitalI = true;
2368 }
2369 }
2370 if (!isTurkikCapitalI && (CFUniCharIsMemberOfBitmap(character, ((0 == planeNo) ? lowerBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfLowercaseCharacterSet, planeNo))) || CFUniCharIsMemberOfBitmap(character, ((0 == planeNo) ? caseFoldBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfCaseFoldingCharacterSet, planeNo))))) {
2371 UTF16Char caseFoldBuffer[MAX_CASE_MAPPING_BUF];
2372 const UTF16Char *bufferP = caseFoldBuffer, *bufferLimit;
2373 UTF32Char *outCharactersP = outCharacters;
2374 uint32_t bufferLength = CFUniCharMapCaseTo(character, caseFoldBuffer, MAX_CASE_MAPPING_BUF, kCFUniCharCaseFold, 0, langCode);
2375
2376 bufferLimit = bufferP + bufferLength;
2377
2378 if (filledLength > 0) --filledLength; // decrement filledLength (will add back later)
2379
2380 // make space for casefold characters
2381 if ((filledLength > 0) && (bufferLength > 1)) {
2382 CFIndex totalScalerLength = 0;
2383
2384 while (bufferP < bufferLimit) {
2385 if (CFUniCharIsSurrogateHighCharacter(*(bufferP++)) && (bufferP < bufferLimit) && CFUniCharIsSurrogateLowCharacter(*bufferP)) ++bufferP;
2386 ++totalScalerLength;
2387 }
2388 memmove(outCharacters + totalScalerLength, outCharacters + 1, filledLength * sizeof(UTF32Char));
2389 bufferP = caseFoldBuffer;
2390 }
2391
2392 // fill
2393 while (bufferP < bufferLimit) {
2394 character = *(bufferP++);
2395 if (CFUniCharIsSurrogateHighCharacter(character) && (bufferP < bufferLimit) && CFUniCharIsSurrogateLowCharacter(*bufferP)) {
2396 character = CFUniCharGetLongCharacterForSurrogatePair(character, *(bufferP++));
2397 nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (character >> 16));
2398 } else {
2399 nonBaseBitmap = graphemeBMP;
2400 }
2401
2402 if (!filterNonBase || !CFUniCharIsMemberOfBitmap(character, nonBaseBitmap)) {
2403 *(outCharactersP++) = character;
2404 ++filledLength;
2405 }
2406 }
2407 }
2408 }
2409 }
2410
2411 // collect following combining marks
2412 if (flags & (kCFCompareDiacriticInsensitive|kCFCompareNonliteral)) {
2413 const uint8_t *nonBaseBitmap;
2414 const uint8_t *decompBitmap;
2415 bool doFill = (((flags & kCFCompareDiacriticInsensitive) && (character < 0x0510)) ? false : true);
2416
2417 if (0 == filledLength) {
2418 *outCharacters = character; // filledLength will be updated below on demand
2419
2420 if (doFill) { // check if really needs to fill
2421 UTF32Char nonBaseCharacter = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex);
2422
2423 if (CFUniCharIsSurrogateHighCharacter(nonBaseCharacter) && CFUniCharIsSurrogateLowCharacter((lowSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex + 1)))) {
2424 nonBaseCharacter = CFUniCharGetLongCharacterForSurrogatePair(nonBaseCharacter, lowSurrogate);
2425 nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (nonBaseCharacter >> 16));
2426 decompBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (nonBaseCharacter >> 16));
2427 } else {
2428 nonBaseBitmap = graphemeBMP;
2429 decompBitmap = decompBMP;
2430 }
2431
2432 if (CFUniCharIsMemberOfBitmap(nonBaseCharacter, nonBaseBitmap)) {
2433 filledLength = 1; // For the base character
2434
2435 if ((0 == (flags & kCFCompareDiacriticInsensitive)) || (nonBaseCharacter > 0x050F)) {
2436 if (CFUniCharIsMemberOfBitmap(nonBaseCharacter, decompBitmap)) {
2437 filledLength += CFUniCharDecomposeCharacter(nonBaseCharacter, &(outCharacters[filledLength]), maxBufferLength - filledLength);
2438 } else {
2439 outCharacters[filledLength++] = nonBaseCharacter;
2440 }
2441 }
2442 currentIndex += ((nonBaseBitmap == graphemeBMP) ? 1 : 2);
2443 } else {
2444 doFill = false;
2445 }
2446 }
2447 }
2448
2449 while (filledLength < maxBufferLength) { // do the rest
2450 character = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex);
2451
2452 if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((lowSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex + 1)))) {
2453 character = CFUniCharGetLongCharacterForSurrogatePair(character, lowSurrogate);
2454 nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (character >> 16));
2455 decompBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (character >> 16));
2456 } else {
2457 nonBaseBitmap = graphemeBMP;
2458 decompBitmap = decompBMP;
2459 }
2460 if (isTurkikCapitalI) {
2461 isTurkikCapitalI = false;
2462 } else if (CFUniCharIsMemberOfBitmap(character, nonBaseBitmap)) {
2463 if (doFill) {
2464 if (CFUniCharIsMemberOfBitmap(character, decompBitmap)) {
2465 CFIndex currentLength = CFUniCharDecomposeCharacter(character, &(outCharacters[filledLength]), maxBufferLength - filledLength);
2466
2467 if (0 == currentLength) break; // didn't fit
2468
2469 filledLength += currentLength;
2470 } else {
2471 outCharacters[filledLength++] = character;
2472 }
2473 } else if (0 == filledLength) {
2474 filledLength = 1; // For the base character
2475 }
2476 currentIndex += ((nonBaseBitmap == graphemeBMP) ? 1 : 2);
2477 } else {
2478 break;
2479 }
2480 }
2481
2482 if (filledLength > 1) {
2483 UTF32Char *sortCharactersLimit = outCharacters + filledLength;
2484 UTF32Char *sortCharacters = sortCharactersLimit - 1;
2485
2486 while ((outCharacters < sortCharacters) && CFUniCharIsMemberOfBitmap(*sortCharacters, ((*sortCharacters < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (*sortCharacters >> 16))))) --sortCharacters;
2487
2488 if ((sortCharactersLimit - sortCharacters) > 1) CFUniCharPrioritySort(sortCharacters, (sortCharactersLimit - sortCharacters)); // priority sort
2489 }
2490 }
2491 }
2492
2493 if ((filledLength > 0) && (NULL != consumedLength)) *consumedLength = (currentIndex - index);
2494
2495 return filledLength;
2496 }
2497
2498 static bool __CFStringFillCharacterSetInlineBuffer(CFCharacterSetInlineBuffer *buffer, CFStringCompareFlags compareOptions) {
2499 if (0 != (compareOptions & kCFCompareIgnoreNonAlphanumeric)) {
2500 static CFCharacterSetRef nonAlnumChars = NULL;
2501
2502 if (NULL == nonAlnumChars) {
2503 CFMutableCharacterSetRef cset = CFCharacterSetCreateMutableCopy(kCFAllocatorSystemDefault, CFCharacterSetGetPredefined(kCFCharacterSetAlphaNumeric));
2504 CFCharacterSetInvert(cset);
2505 if (!OSAtomicCompareAndSwapPtrBarrier(NULL, cset, (void **)&nonAlnumChars)) CFRelease(cset);
2506 }
2507
2508 CFCharacterSetInitInlineBuffer(nonAlnumChars, buffer);
2509
2510 return true;
2511 }
2512
2513 return false;
2514 }
2515
2516 #define kCFStringStackBufferLength (__kCFStringInlineBufferLength)
2517
2518 CFComparisonResult CFStringCompareWithOptionsAndLocale(CFStringRef string, CFStringRef string2, CFRange rangeToCompare, CFStringCompareFlags compareOptions, CFLocaleRef locale) {
2519 /* No objc dispatch needed here since CFStringInlineBuffer works with both CFString and NSString */
2520 UTF32Char strBuf1[kCFStringStackBufferLength];
2521 UTF32Char strBuf2[kCFStringStackBufferLength];
2522 CFStringInlineBuffer inlineBuf1, inlineBuf2;
2523 UTF32Char str1Char, str2Char;
2524 CFIndex str1UsedLen, str2UsedLen;
2525 CFIndex str1Index = 0, str2Index = 0, strBuf1Index = 0, strBuf2Index = 0, strBuf1Len = 0, strBuf2Len = 0;
2526 CFIndex str1LocalizedIndex = 0, str2LocalizedIndex = 0;
2527 CFIndex forcedIndex1 = 0, forcedIndex2 = 0;
2528 CFIndex str2Len = CFStringGetLength(string2);
2529 bool caseInsensitive = ((compareOptions & kCFCompareCaseInsensitive) ? true : false);
2530 bool diacriticsInsensitive = ((compareOptions & kCFCompareDiacriticInsensitive) ? true : false);
2531 bool equalityOptions = ((compareOptions & (kCFCompareCaseInsensitive|kCFCompareNonliteral|kCFCompareDiacriticInsensitive|kCFCompareWidthInsensitive)) ? true : false);
2532 bool numerically = ((compareOptions & kCFCompareNumerically) ? true : false);
2533 bool forceOrdering = ((compareOptions & kCFCompareForcedOrdering) ? true : false);
2534 const uint8_t *langCode;
2535 CFComparisonResult compareResult = kCFCompareEqualTo;
2536 UTF16Char otherChar;
2537 Boolean freeLocale = false;
2538 CFCharacterSetInlineBuffer *ignoredChars = NULL;
2539 CFCharacterSetInlineBuffer csetBuffer;
2540 bool numericEquivalence = false;
2541
2542 if ((compareOptions & kCFCompareLocalized) && (NULL == locale)) {
2543 locale = CFLocaleCopyCurrent();
2544 freeLocale = true;
2545 }
2546
2547 langCode = ((NULL == locale) ? NULL : (const uint8_t *)_CFStrGetLanguageIdentifierForLocale(locale, true));
2548
2549 if (__CFStringFillCharacterSetInlineBuffer(&csetBuffer, compareOptions)) {
2550 ignoredChars = &csetBuffer;
2551 equalityOptions = true;
2552 }
2553
2554 if ((NULL == locale) && (NULL == ignoredChars) && !numerically) { // could do binary comp (be careful when adding new flags)
2555 CFStringEncoding eightBitEncoding = __CFStringGetEightBitStringEncoding();
2556 const uint8_t *str1Bytes = (const uint8_t *)CFStringGetCStringPtr(string, eightBitEncoding);
2557 const uint8_t *str2Bytes = (const uint8_t *)CFStringGetCStringPtr(string2, eightBitEncoding);
2558 CFIndex factor = sizeof(uint8_t);
2559
2560 if ((NULL != str1Bytes) && (NULL != str2Bytes)) {
2561 compareOptions &= ~kCFCompareNonliteral; // remove non-literal
2562
2563 if ((kCFStringEncodingASCII == eightBitEncoding) && (false == forceOrdering)) {
2564 if (caseInsensitive) {
2565 int cmpResult = strncasecmp_l((const char *)str1Bytes + rangeToCompare.location, (const char *)str2Bytes, __CFMin(rangeToCompare.length, str2Len), NULL);
2566
2567 if (0 == cmpResult) cmpResult = rangeToCompare.length - str2Len;
2568
2569 return ((0 == cmpResult) ? kCFCompareEqualTo : ((cmpResult < 0) ? kCFCompareLessThan : kCFCompareGreaterThan));
2570 }
2571 } else if (caseInsensitive || diacriticsInsensitive) {
2572 CFIndex limitLength = __CFMin(rangeToCompare.length, str2Len);
2573
2574 str1Bytes += rangeToCompare.location;
2575
2576 while (str1Index < limitLength) {
2577 str1Char = str1Bytes[str1Index];
2578 str2Char = str2Bytes[str1Index];
2579
2580 if (str1Char != str2Char) {
2581 if ((str1Char < 0x80) && (str2Char < 0x80)) {
2582 if (forceOrdering && (kCFCompareEqualTo == compareResult) && (str1Char != str2Char)) compareResult = ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan);
2583 if (caseInsensitive) {
2584 if ((str1Char >= 'A') && (str1Char <= 'Z')) str1Char += ('a' - 'A');
2585 if ((str2Char >= 'A') && (str2Char <= 'Z')) str2Char += ('a' - 'A');
2586 }
2587
2588 if (str1Char != str2Char) return ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan);
2589 } else {
2590 str1Bytes = NULL;
2591 break;
2592 }
2593 }
2594 ++str1Index;
2595 }
2596
2597 str2Index = str1Index;
2598
2599 if (str1Index == limitLength) {
2600 int cmpResult = rangeToCompare.length - str2Len;
2601
2602 return ((0 == cmpResult) ? compareResult : ((cmpResult < 0) ? kCFCompareLessThan : kCFCompareGreaterThan));
2603 }
2604 }
2605 } else if (!equalityOptions && (NULL == str1Bytes) && (NULL == str2Bytes)) {
2606 str1Bytes = (const uint8_t *)CFStringGetCharactersPtr(string);
2607 str2Bytes = (const uint8_t *)CFStringGetCharactersPtr(string2);
2608 factor = sizeof(UTF16Char);
2609 #if __LITTLE_ENDIAN__
2610 if ((NULL != str1Bytes) && (NULL != str2Bytes)) { // we cannot use memcmp
2611 const UTF16Char *str1 = ((const UTF16Char *)str1Bytes) + rangeToCompare.location;
2612 const UTF16Char *str1Limit = str1 + __CFMin(rangeToCompare.length, str2Len);
2613 const UTF16Char *str2 = (const UTF16Char *)str2Bytes;
2614 CFIndex cmpResult = 0;
2615
2616 while ((0 == cmpResult) && (str1 < str1Limit)) cmpResult = (CFIndex)*(str1++) - (CFIndex)*(str2++);
2617
2618 if (0 == cmpResult) cmpResult = rangeToCompare.length - str2Len;
2619
2620 return ((0 == cmpResult) ? kCFCompareEqualTo : ((cmpResult < 0) ? kCFCompareLessThan : kCFCompareGreaterThan));
2621 }
2622 #endif /* __LITTLE_ENDIAN__ */
2623 }
2624 if ((NULL != str1Bytes) && (NULL != str2Bytes)) {
2625 int cmpResult = memcmp(str1Bytes + (rangeToCompare.location * factor), str2Bytes, __CFMin(rangeToCompare.length, str2Len) * factor);
2626
2627 if (0 == cmpResult) cmpResult = rangeToCompare.length - str2Len;
2628
2629 return ((0 == cmpResult) ? kCFCompareEqualTo : ((cmpResult < 0) ? kCFCompareLessThan : kCFCompareGreaterThan));
2630 }
2631 }
2632
2633 const uint8_t *graphemeBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, 0);
2634
2635 CFStringInitInlineBuffer(string, &inlineBuf1, rangeToCompare);
2636 CFStringInitInlineBuffer(string2, &inlineBuf2, CFRangeMake(0, str2Len));
2637
2638 if (NULL != locale) {
2639 str1LocalizedIndex = str1Index;
2640 str2LocalizedIndex = str2Index;
2641
2642 // We temporarily disable kCFCompareDiacriticInsensitive for SL <rdar://problem/6767096>. Should be revisited in NMOS <rdar://problem/7003830>
2643 if (forceOrdering) {
2644 diacriticsInsensitive = false;
2645 compareOptions &= ~kCFCompareDiacriticInsensitive;
2646 }
2647 }
2648 while ((str1Index < rangeToCompare.length) && (str2Index < str2Len)) {
2649 if (strBuf1Len == 0) {
2650 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
2651 if (caseInsensitive && (str1Char >= 'A') && (str1Char <= 'Z') && ((NULL == langCode) || (str1Char != 'I')) && ((false == forceOrdering) || (kCFCompareEqualTo != compareResult))) str1Char += ('a' - 'A');
2652 str1UsedLen = 1;
2653 } else {
2654 str1Char = strBuf1[strBuf1Index++];
2655 }
2656 if (strBuf2Len == 0) {
2657 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
2658 if (caseInsensitive && (str2Char >= 'A') && (str2Char <= 'Z') && ((NULL == langCode) || (str2Char != 'I')) && ((false == forceOrdering) || (kCFCompareEqualTo != compareResult))) str2Char += ('a' - 'A');
2659 str2UsedLen = 1;
2660 } else {
2661 str2Char = strBuf2[strBuf2Index++];
2662 }
2663
2664 if (numerically && ((0 == strBuf1Len) && (str1Char <= '9') && (str1Char >= '0')) && ((0 == strBuf2Len) && (str2Char <= '9') && (str2Char >= '0'))) { // If both are not ASCII digits, then don't do numerical comparison here
2665 uint64_t intValue1 = 0, intValue2 = 0; // !!! Doesn't work if numbers are > max uint64_t
2666 CFIndex str1NumRangeIndex = str1Index;
2667 CFIndex str2NumRangeIndex = str2Index;
2668
2669 do {
2670 intValue1 = (intValue1 * 10) + (str1Char - '0');
2671 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, ++str1Index);
2672 } while ((str1Char <= '9') && (str1Char >= '0'));
2673
2674 do {
2675 intValue2 = intValue2 * 10 + (str2Char - '0');
2676 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, ++str2Index);
2677 } while ((str2Char <= '9') && (str2Char >= '0'));
2678
2679 if (intValue1 == intValue2) {
2680 if (forceOrdering && (kCFCompareEqualTo == compareResult) && ((str1Index - str1NumRangeIndex) != (str2Index - str2NumRangeIndex))) {
2681 compareResult = (((str1Index - str1NumRangeIndex) < (str2Index - str2NumRangeIndex)) ? kCFCompareLessThan : kCFCompareGreaterThan);
2682 numericEquivalence = true;
2683 forcedIndex1 = str1NumRangeIndex;
2684 forcedIndex2 = str2NumRangeIndex;
2685 }
2686
2687 continue;
2688 } else if (intValue1 < intValue2) {
2689 if (freeLocale && locale) {
2690 CFRelease(locale);
2691 }
2692 return kCFCompareLessThan;
2693 } else {
2694 if (freeLocale && locale) {
2695 CFRelease(locale);
2696 }
2697 return kCFCompareGreaterThan;
2698 }
2699 }
2700
2701 if (str1Char != str2Char) {
2702 if (!equalityOptions) {
2703 compareResult = ((NULL == locale) ? ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1Index, rangeToCompare.length - str1Index), &inlineBuf2, CFRangeMake(str2Index, str2Len - str2Index), compareOptions, locale));
2704 if (freeLocale && locale) {
2705 CFRelease(locale);
2706 }
2707 return compareResult;
2708 }
2709
2710 if (forceOrdering && (kCFCompareEqualTo == compareResult)) {
2711 compareResult = ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan);
2712 forcedIndex1 = str1LocalizedIndex;
2713 forcedIndex2 = str2LocalizedIndex;
2714 }
2715
2716 if ((str1Char < 0x80) && (str2Char < 0x80) && (NULL == ignoredChars)) {
2717 if (NULL != locale) {
2718 compareResult = _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1Index, rangeToCompare.length - str1Index), &inlineBuf2, CFRangeMake(str2Index, str2Len - str2Index), compareOptions, locale);
2719 if (freeLocale && locale) {
2720 CFRelease(locale);
2721 }
2722 return compareResult;
2723 } else if (!caseInsensitive) {
2724 if (freeLocale && locale) {
2725 CFRelease(locale);
2726 }
2727 return ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan);
2728 }
2729 }
2730
2731 if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) {
2732 str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);
2733 str1UsedLen = 2;
2734 }
2735
2736 if (CFUniCharIsSurrogateHighCharacter(str2Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index + 1)))) {
2737 str2Char = CFUniCharGetLongCharacterForSurrogatePair(str2Char, otherChar);
2738 str2UsedLen = 2;
2739 }
2740
2741 if (NULL != ignoredChars) {
2742 if (CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char)) {
2743 if ((strBuf1Len > 0) && (strBuf1Index == strBuf1Len)) strBuf1Len = 0;
2744 if (strBuf1Len == 0) str1Index += str1UsedLen;
2745 if (strBuf2Len > 0) --strBuf2Index;
2746 continue;
2747 }
2748 if (CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char)) {
2749 if ((strBuf2Len > 0) && (strBuf2Index == strBuf2Len)) strBuf2Len = 0;
2750 if (strBuf2Len == 0) str2Index += str2UsedLen;
2751 if (strBuf1Len > 0) -- strBuf1Index;
2752 continue;
2753 }
2754 }
2755
2756 if (diacriticsInsensitive && (str1Index > 0)) {
2757 bool str1Skip = false;
2758 bool str2Skip = false;
2759
2760 if ((0 == strBuf1Len) && CFUniCharIsMemberOfBitmap(str1Char, ((str1Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str1Char >> 16))))) {
2761 str1Char = str2Char;
2762 str1Skip = true;
2763 }
2764 if ((0 == strBuf2Len) && CFUniCharIsMemberOfBitmap(str2Char, ((str2Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str2Char >> 16))))) {
2765 str2Char = str1Char;
2766 str2Skip = true;
2767 }
2768
2769 if (str1Skip != str2Skip) {
2770 if (str1Skip) str2Index -= str2UsedLen;
2771 if (str2Skip) str1Index -= str1UsedLen;
2772 }
2773 }
2774
2775 if (str1Char != str2Char) {
2776 if (0 == strBuf1Len) {
2777 strBuf1Len = __CFStringFoldCharacterClusterAtIndex(str1Char, &inlineBuf1, str1Index, compareOptions, langCode, strBuf1, kCFStringStackBufferLength, &str1UsedLen);
2778 if (strBuf1Len > 0) {
2779 str1Char = *strBuf1;
2780 strBuf1Index = 1;
2781 }
2782 }
2783
2784 if ((0 == strBuf1Len) && (0 < strBuf2Len)) {
2785 compareResult = ((NULL == locale) ? ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1LocalizedIndex, rangeToCompare.length - str1LocalizedIndex), &inlineBuf2, CFRangeMake(str2LocalizedIndex, str2Len - str2LocalizedIndex), compareOptions, locale));
2786 if (freeLocale && locale) {
2787 CFRelease(locale);
2788 }
2789 return compareResult;
2790 }
2791
2792 if ((0 == strBuf2Len) && ((0 == strBuf1Len) || (str1Char != str2Char))) {
2793 strBuf2Len = __CFStringFoldCharacterClusterAtIndex(str2Char, &inlineBuf2, str2Index, compareOptions, langCode, strBuf2, kCFStringStackBufferLength, &str2UsedLen);
2794 if (strBuf2Len > 0) {
2795 str2Char = *strBuf2;
2796 strBuf2Index = 1;
2797 }
2798 if ((0 == strBuf2Len) || (str1Char != str2Char)) {
2799 compareResult = ((NULL == locale) ? ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1LocalizedIndex, rangeToCompare.length - str1LocalizedIndex), &inlineBuf2, CFRangeMake(str2LocalizedIndex, str2Len - str2LocalizedIndex), compareOptions, locale));
2800 if (freeLocale && locale) {
2801 CFRelease(locale);
2802 }
2803 return compareResult;
2804 }
2805 }
2806 }
2807
2808 if ((strBuf1Len > 0) && (strBuf2Len > 0)) {
2809 while ((strBuf1Index < strBuf1Len) && (strBuf2Index < strBuf2Len)) {
2810 if (strBuf1[strBuf1Index] != strBuf2[strBuf2Index]) break;
2811 ++strBuf1Index; ++strBuf2Index;
2812 }
2813 if ((strBuf1Index < strBuf1Len) && (strBuf2Index < strBuf2Len)) {
2814 CFComparisonResult res = ((NULL == locale) ? ((strBuf1[strBuf1Index] < strBuf2[strBuf2Index]) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1LocalizedIndex, rangeToCompare.length - str1LocalizedIndex), &inlineBuf2, CFRangeMake(str2LocalizedIndex, str2Len - str2LocalizedIndex), compareOptions, locale));
2815 if (freeLocale && locale) {
2816 CFRelease(locale);
2817 }
2818 return res;
2819 }
2820 }
2821 }
2822
2823 if ((strBuf1Len > 0) && (strBuf1Index == strBuf1Len)) strBuf1Len = 0;
2824 if ((strBuf2Len > 0) && (strBuf2Index == strBuf2Len)) strBuf2Len = 0;
2825
2826 if (strBuf1Len == 0) str1Index += str1UsedLen;
2827 if (strBuf2Len == 0) str2Index += str2UsedLen;
2828 if ((strBuf1Len == 0) && (strBuf2Len == 0)) {
2829 str1LocalizedIndex = str1Index;
2830 str2LocalizedIndex = str2Index;
2831 }
2832 }
2833
2834 if (diacriticsInsensitive || (NULL != ignoredChars)) {
2835 while (str1Index < rangeToCompare.length) {
2836 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
2837 if ((str1Char < 0x80) && (NULL == ignoredChars)) break; // found ASCII
2838
2839 if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);
2840
2841 if ((!diacriticsInsensitive || !CFUniCharIsMemberOfBitmap(str1Char, ((str1Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str1Char >> 16))))) && ((NULL == ignoredChars) || !CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char))) break;
2842
2843 str1Index += ((str1Char < 0x10000) ? 1 : 2);
2844 }
2845
2846 while (str2Index < str2Len) {
2847 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
2848 if ((str2Char < 0x80) && (NULL == ignoredChars)) break; // found ASCII
2849
2850 if (CFUniCharIsSurrogateHighCharacter(str2Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index + 1)))) str2Char = CFUniCharGetLongCharacterForSurrogatePair(str2Char, otherChar);
2851
2852 if ((!diacriticsInsensitive || !CFUniCharIsMemberOfBitmap(str2Char, ((str2Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str2Char >> 16))))) && ((NULL == ignoredChars) || !CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char))) break;
2853
2854 str2Index += ((str2Char < 0x10000) ? 1 : 2);
2855 }
2856 }
2857 // Need to recalc localized result here for forced ordering, ICU cannot do numericEquivalence
2858 if (!numericEquivalence && (NULL != locale) && (kCFCompareEqualTo != compareResult) && (str1Index == rangeToCompare.length) && (str2Index == str2Len)) compareResult = _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(forcedIndex1, rangeToCompare.length - forcedIndex1), &inlineBuf2, CFRangeMake(forcedIndex2, str2Len - forcedIndex2), compareOptions, locale);
2859
2860 if (freeLocale && locale) {
2861 CFRelease(locale);
2862 }
2863
2864 return ((str1Index < rangeToCompare.length) ? kCFCompareGreaterThan : ((str2Index < str2Len) ? kCFCompareLessThan : compareResult));
2865 }
2866
2867
2868 CFComparisonResult CFStringCompareWithOptions(CFStringRef string, CFStringRef string2, CFRange rangeToCompare, CFStringCompareFlags compareOptions) { return CFStringCompareWithOptionsAndLocale(string, string2, rangeToCompare, compareOptions, NULL); }
2869
2870 CFComparisonResult CFStringCompare(CFStringRef string, CFStringRef str2, CFStringCompareFlags options) {
2871 return CFStringCompareWithOptions(string, str2, CFRangeMake(0, CFStringGetLength(string)), options);
2872 }
2873
2874 Boolean CFStringFindWithOptionsAndLocale(CFStringRef string, CFStringRef stringToFind, CFRange rangeToSearch, CFStringCompareFlags compareOptions, CFLocaleRef locale, CFRange *result) {
2875 /* No objc dispatch needed here since CFStringInlineBuffer works with both CFString and NSString */
2876 CFIndex findStrLen = CFStringGetLength(stringToFind);
2877 Boolean didFind = false;
2878 bool lengthVariants = ((compareOptions & (kCFCompareCaseInsensitive|kCFCompareNonliteral|kCFCompareDiacriticInsensitive)) ? true : false);
2879 CFCharacterSetInlineBuffer *ignoredChars = NULL;
2880 CFCharacterSetInlineBuffer csetBuffer;
2881
2882 if (__CFStringFillCharacterSetInlineBuffer(&csetBuffer, compareOptions)) {
2883 ignoredChars = &csetBuffer;
2884 lengthVariants = true;
2885 }
2886
2887 if ((findStrLen > 0) && (rangeToSearch.length > 0) && ((findStrLen <= rangeToSearch.length) || lengthVariants)) {
2888 UTF32Char strBuf1[kCFStringStackBufferLength];
2889 UTF32Char strBuf2[kCFStringStackBufferLength];
2890 CFStringInlineBuffer inlineBuf1, inlineBuf2;
2891 UTF32Char str1Char = 0, str2Char = 0;
2892 CFStringEncoding eightBitEncoding = __CFStringGetEightBitStringEncoding();
2893 const uint8_t *str1Bytes = (const uint8_t *)CFStringGetCStringPtr(string, eightBitEncoding);
2894 const uint8_t *str2Bytes = (const uint8_t *)CFStringGetCStringPtr(stringToFind, eightBitEncoding);
2895 const UTF32Char *characters, *charactersLimit;
2896 const uint8_t *langCode = NULL;
2897 CFIndex fromLoc, toLoc;
2898 CFIndex str1Index, str2Index;
2899 CFIndex strBuf1Len, strBuf2Len;
2900 CFIndex maxStr1Index = (rangeToSearch.location + rangeToSearch.length);
2901 bool equalityOptions = ((lengthVariants || (compareOptions & kCFCompareWidthInsensitive)) ? true : false);
2902 bool caseInsensitive = ((compareOptions & kCFCompareCaseInsensitive) ? true : false);
2903 bool forwardAnchor = ((kCFCompareAnchored == (compareOptions & (kCFCompareBackwards|kCFCompareAnchored))) ? true : false);
2904 bool backwardAnchor = (((kCFCompareBackwards|kCFCompareAnchored) == (compareOptions & (kCFCompareBackwards|kCFCompareAnchored))) ? true : false);
2905 int8_t delta;
2906
2907 if (NULL == locale) {
2908 if (compareOptions & kCFCompareLocalized) {
2909 CFLocaleRef currentLocale = CFLocaleCopyCurrent();
2910 langCode = (const uint8_t *)_CFStrGetLanguageIdentifierForLocale(currentLocale, true);
2911 CFRelease(currentLocale);
2912 }
2913 } else {
2914 langCode = (const uint8_t *)_CFStrGetLanguageIdentifierForLocale(locale, true);
2915 }
2916
2917 CFStringInitInlineBuffer(string, &inlineBuf1, CFRangeMake(0, rangeToSearch.location + rangeToSearch.length));
2918 CFStringInitInlineBuffer(stringToFind, &inlineBuf2, CFRangeMake(0, findStrLen));
2919
2920 if (compareOptions & kCFCompareBackwards) {
2921 fromLoc = rangeToSearch.location + rangeToSearch.length - (lengthVariants ? 1 : findStrLen);
2922 toLoc = (((compareOptions & kCFCompareAnchored) && !lengthVariants) ? fromLoc : rangeToSearch.location);
2923 } else {
2924 fromLoc = rangeToSearch.location;
2925 toLoc = ((compareOptions & kCFCompareAnchored) ? fromLoc : rangeToSearch.location + rangeToSearch.length - (lengthVariants ? 1 : findStrLen));
2926 }
2927
2928 delta = ((fromLoc <= toLoc) ? 1 : -1);
2929
2930 if ((NULL != str1Bytes) && (NULL != str2Bytes)) {
2931 uint8_t str1Byte, str2Byte;
2932
2933 while (1) {
2934 str1Index = fromLoc;
2935 str2Index = 0;
2936
2937 while ((str1Index < maxStr1Index) && (str2Index < findStrLen)) {
2938 str1Byte = str1Bytes[str1Index];
2939 str2Byte = str2Bytes[str2Index];
2940
2941 if (str1Byte != str2Byte) {
2942 if (equalityOptions) {
2943 if ((str1Byte < 0x80) && ((NULL == langCode) || ('I' != str1Byte))) {
2944 if (caseInsensitive && (str1Byte >= 'A') && (str1Byte <= 'Z')) str1Byte += ('a' - 'A');
2945 *strBuf1 = str1Byte;
2946 strBuf1Len = 1;
2947 } else {
2948 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
2949 strBuf1Len = __CFStringFoldCharacterClusterAtIndex(str1Char, &inlineBuf1, str1Index, compareOptions, langCode, strBuf1, kCFStringStackBufferLength, NULL);
2950 if (1 > strBuf1Len) {
2951 *strBuf1 = str1Char;
2952 strBuf1Len = 1;
2953 }
2954 }
2955
2956 if ((NULL != ignoredChars) && (forwardAnchor || (str1Index != fromLoc)) && CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, ((str1Byte < 0x80) ? str1Byte : str1Char))) {
2957 ++str1Index;
2958 continue;
2959 }
2960
2961 if ((str2Byte < 0x80) && ((NULL == langCode) || ('I' != str2Byte))) {
2962 if (caseInsensitive && (str2Byte >= 'A') && (str2Byte <= 'Z')) str2Byte += ('a' - 'A');
2963 *strBuf2 = str2Byte;
2964 strBuf2Len = 1;
2965 } else {
2966 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
2967 strBuf2Len = __CFStringFoldCharacterClusterAtIndex(str2Char, &inlineBuf2, str2Index, compareOptions, langCode, strBuf2, kCFStringStackBufferLength, NULL);
2968 if (1 > strBuf2Len) {
2969 *strBuf2 = str2Char;
2970 strBuf2Len = 1;
2971 }
2972 }
2973
2974 if ((NULL != ignoredChars) && CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, ((str2Byte < 0x80) ? str2Byte : str2Char))) {
2975 ++str2Index;
2976 continue;
2977 }
2978
2979 if ((1 == strBuf1Len) && (1 == strBuf2Len)) { // normal case
2980 if (*strBuf1 != *strBuf2) break;
2981 } else {
2982 CFIndex delta;
2983
2984 if (!caseInsensitive && (strBuf1Len != strBuf2Len)) break;
2985 if (memcmp(strBuf1, strBuf2, sizeof(UTF32Char) * __CFMin(strBuf1Len, strBuf2Len))) break;
2986
2987 if (strBuf1Len < strBuf2Len) {
2988 delta = strBuf2Len - strBuf1Len;
2989
2990 if ((str1Index + strBuf1Len + delta) > maxStr1Index) break;
2991
2992 characters = &(strBuf2[strBuf1Len]);
2993 charactersLimit = characters + delta;
2994
2995 while (characters < charactersLimit) {
2996 strBuf1Len = __CFStringFoldCharacterClusterAtIndex(CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1), &inlineBuf1, str1Index + 1, compareOptions, langCode, strBuf1, kCFStringStackBufferLength, NULL);
2997 if ((strBuf1Len > 0) || (*characters != *strBuf1)) break;
2998 ++characters; ++str1Index;
2999 }
3000 if (characters < charactersLimit) break;
3001 } else if (strBuf2Len < strBuf1Len) {
3002 delta = strBuf1Len - strBuf2Len;
3003
3004 if ((str2Index + strBuf2Len + delta) > findStrLen) break;
3005
3006 characters = &(strBuf1[strBuf2Len]);
3007 charactersLimit = characters + delta;
3008
3009 while (characters < charactersLimit) {
3010 strBuf2Len = __CFStringFoldCharacterClusterAtIndex(CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str1Index + 1), &inlineBuf2, str2Index + 1, compareOptions, langCode, strBuf2, kCFStringStackBufferLength, NULL);
3011 if ((strBuf2Len > 0) || (*characters != *strBuf2)) break;
3012 ++characters; ++str2Index;
3013 }
3014 if (characters < charactersLimit) break;
3015 }
3016 }
3017 } else {
3018 break;
3019 }
3020 }
3021 ++str1Index; ++str2Index;
3022 }
3023
3024 if ((NULL != ignoredChars) && (str1Index == maxStr1Index) && (str2Index < findStrLen)) { // Process the stringToFind tail
3025 while (str2Index < findStrLen) {
3026 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
3027
3028 if (!CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char)) break;
3029 ++str2Index;
3030 }
3031 }
3032
3033 if (str2Index == findStrLen) {
3034 if ((NULL != ignoredChars) && backwardAnchor && (str1Index < maxStr1Index)) { // Process the anchor tail
3035 while (str1Index < maxStr1Index) {
3036 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
3037
3038 if (!CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char)) break;
3039 ++str1Index;
3040 }
3041 }
3042
3043 if (!backwardAnchor || (str1Index == maxStr1Index)) {
3044 didFind = true;
3045 if (NULL != result) *result = CFRangeMake(fromLoc, str1Index - fromLoc);
3046 }
3047 break;
3048 }
3049
3050 if (fromLoc == toLoc) break;
3051 fromLoc += delta;
3052 }
3053 } else if (equalityOptions) {
3054 UTF16Char otherChar;
3055 CFIndex str1UsedLen, str2UsedLen, strBuf1Index = 0, strBuf2Index = 0;
3056 bool diacriticsInsensitive = ((compareOptions & kCFCompareDiacriticInsensitive) ? true : false);
3057 const uint8_t *graphemeBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, 0);
3058 const uint8_t *combClassBMP = (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0);
3059
3060 while (1) {
3061 str1Index = fromLoc;
3062 str2Index = 0;
3063
3064 strBuf1Len = strBuf2Len = 0;
3065
3066 while (str2Index < findStrLen) {
3067 if (strBuf1Len == 0) {
3068 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
3069 if (caseInsensitive && (str1Char >= 'A') && (str1Char <= 'Z') && ((NULL == langCode) || (str1Char != 'I'))) str1Char += ('a' - 'A');
3070 str1UsedLen = 1;
3071 } else {
3072 str1Char = strBuf1[strBuf1Index++];
3073 }
3074 if (strBuf2Len == 0) {
3075 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
3076 if (caseInsensitive && (str2Char >= 'A') && (str2Char <= 'Z') && ((NULL == langCode) || (str2Char != 'I'))) str2Char += ('a' - 'A');
3077 str2UsedLen = 1;
3078 } else {
3079 str2Char = strBuf2[strBuf2Index++];
3080 }
3081
3082 if (str1Char != str2Char) {
3083 if ((str1Char < 0x80) && (str2Char < 0x80) && (NULL == ignoredChars) && ((NULL == langCode) || !caseInsensitive)) break;
3084
3085 if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) {
3086 str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);
3087 str1UsedLen = 2;
3088 }
3089
3090 if (CFUniCharIsSurrogateHighCharacter(str2Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index + 1)))) {
3091 str2Char = CFUniCharGetLongCharacterForSurrogatePair(str2Char, otherChar);
3092 str2UsedLen = 2;
3093 }
3094
3095 if (NULL != ignoredChars) {
3096 if ((forwardAnchor || (str1Index != fromLoc)) && (str1Index < maxStr1Index) && CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char)) {
3097 if ((strBuf1Len > 0) && (strBuf1Index == strBuf1Len)) strBuf1Len = 0;
3098 if (strBuf1Len == 0) str1Index += str1UsedLen;
3099 if (strBuf2Len > 0) --strBuf2Index;
3100 continue;
3101 }
3102 if (CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char)) {
3103 if ((strBuf2Len > 0) && (strBuf2Index == strBuf2Len)) strBuf2Len = 0;
3104 if (strBuf2Len == 0) str2Index += str2UsedLen;
3105 if (strBuf1Len > 0) -- strBuf1Index;
3106 continue;
3107 }
3108 }
3109
3110 if (diacriticsInsensitive && (str1Index > fromLoc)) {
3111 bool str1Skip = false;
3112 bool str2Skip = false;
3113
3114 if ((0 == strBuf1Len) && CFUniCharIsMemberOfBitmap(str1Char, ((str1Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str1Char >> 16))))) {
3115 str1Char = str2Char;
3116 str1Skip = true;
3117 }
3118 if ((0 == strBuf2Len) && CFUniCharIsMemberOfBitmap(str2Char, ((str2Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str2Char >> 16))))) {
3119 str2Char = str1Char;
3120 str2Skip = true;
3121 }
3122
3123 if (str1Skip != str2Skip) {
3124 if (str1Skip) str2Index -= str2UsedLen;
3125 if (str2Skip) str1Index -= str1UsedLen;
3126 }
3127 }
3128
3129 if (str1Char != str2Char) {
3130 if (0 == strBuf1Len) {
3131 strBuf1Len = __CFStringFoldCharacterClusterAtIndex(str1Char, &inlineBuf1, str1Index, compareOptions, langCode, strBuf1, kCFStringStackBufferLength, &str1UsedLen);
3132 if (strBuf1Len > 0) {
3133 str1Char = *strBuf1;
3134 strBuf1Index = 1;
3135 }
3136 }
3137
3138 if ((0 == strBuf1Len) && (0 < strBuf2Len)) break;
3139
3140 if ((0 == strBuf2Len) && ((0 == strBuf1Len) || (str1Char != str2Char))) {
3141 strBuf2Len = __CFStringFoldCharacterClusterAtIndex(str2Char, &inlineBuf2, str2Index, compareOptions, langCode, strBuf2, kCFStringStackBufferLength, &str2UsedLen);
3142 if ((0 == strBuf2Len) || (str1Char != *strBuf2)) break;
3143 strBuf2Index = 1;
3144 }
3145 }
3146
3147 if ((strBuf1Len > 0) && (strBuf2Len > 0)) {
3148 while ((strBuf1Index < strBuf1Len) && (strBuf2Index < strBuf2Len)) {
3149 if (strBuf1[strBuf1Index] != strBuf2[strBuf2Index]) break;
3150 ++strBuf1Index; ++strBuf2Index;
3151 }
3152 if ((strBuf1Index < strBuf1Len) && (strBuf2Index < strBuf2Len)) break;
3153 }
3154 }
3155
3156 if ((strBuf1Len > 0) && (strBuf1Index == strBuf1Len)) strBuf1Len = 0;
3157 if ((strBuf2Len > 0) && (strBuf2Index == strBuf2Len)) strBuf2Len = 0;
3158
3159 if (strBuf1Len == 0) str1Index += str1UsedLen;
3160 if (strBuf2Len == 0) str2Index += str2UsedLen;
3161 }
3162
3163 if ((NULL != ignoredChars) && (str1Index == maxStr1Index) && (str2Index < findStrLen)) { // Process the stringToFind tail
3164 while (str2Index < findStrLen) {
3165 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
3166 if (CFUniCharIsSurrogateHighCharacter(str2Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index + 1)))) {
3167 str2Char = CFUniCharGetLongCharacterForSurrogatePair(str2Char, otherChar);
3168 }
3169 if (!CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char)) break;
3170 str2Index += ((str2Char < 0x10000) ? 1 : 2);
3171 }
3172 }
3173
3174 if (str2Index == findStrLen) {
3175 bool match = true;
3176
3177 if (strBuf1Len > 0) {
3178 match = false;
3179
3180 if (diacriticsInsensitive && (strBuf1[0] < 0x0510)) {
3181 while (strBuf1Index < strBuf1Len) {
3182 if (!CFUniCharIsMemberOfBitmap(strBuf1[strBuf1Index], ((strBuf1[strBuf1Index] < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (strBuf1[strBuf1Index] >> 16))))) break;
3183 ++strBuf1Index;
3184 }
3185
3186 if (strBuf1Index == strBuf1Len) {
3187 str1Index += str1UsedLen;
3188 match = true;
3189 }
3190 }
3191 }
3192
3193 if (match && (compareOptions & (kCFCompareDiacriticInsensitive|kCFCompareNonliteral)) && (str1Index < maxStr1Index)) {
3194 const uint8_t *nonBaseBitmap;
3195
3196 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
3197
3198 if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) {
3199 str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);
3200 nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str1Char >> 16));
3201 } else {
3202 nonBaseBitmap = graphemeBMP;
3203 }
3204
3205 if (CFUniCharIsMemberOfBitmap(str1Char, nonBaseBitmap)) {
3206 if (diacriticsInsensitive) {
3207 if (str1Char < 0x10000) {
3208 CFIndex index = str1Index;
3209
3210 do {
3211 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, --index);
3212 } while (CFUniCharIsMemberOfBitmap(str1Char, graphemeBMP), (rangeToSearch.location < index));
3213
3214 if (str1Char < 0x0510) {
3215 while (++str1Index < maxStr1Index) if (!CFUniCharIsMemberOfBitmap(CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index), graphemeBMP)) break;
3216 }
3217 }
3218 } else {
3219 match = false;
3220 }
3221 } else if (!diacriticsInsensitive) {
3222 otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index - 1);
3223
3224 // this is assuming viramas are only in BMP ???
3225 if ((str1Char == COMBINING_GRAPHEME_JOINER) || (otherChar == COMBINING_GRAPHEME_JOINER) || (otherChar == ZERO_WIDTH_JOINER) || ((otherChar >= HANGUL_CHOSEONG_START) && (otherChar <= HANGUL_JONGSEONG_END)) || (CFUniCharGetCombiningPropertyForCharacter(otherChar, combClassBMP) == 9)) {
3226 CFRange clusterRange = CFStringGetRangeOfCharacterClusterAtIndex(string, str1Index - 1, kCFStringGraphemeCluster);
3227
3228 if (str1Index < (clusterRange.location + clusterRange.length)) match = false;
3229 }
3230 }
3231 }
3232
3233 if (match) {
3234 if ((NULL != ignoredChars) && backwardAnchor && (str1Index < maxStr1Index)) { // Process the anchor tail
3235 while (str1Index < maxStr1Index) {
3236 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
3237 if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) {
3238 str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);
3239 }
3240 if (!CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char)) break;
3241 str1Index += ((str1Char < 0x10000) ? 1 : 2);
3242 }
3243 }
3244
3245 if (!backwardAnchor || (str1Index == maxStr1Index)) {
3246 didFind = true;
3247 if (NULL != result) *result = CFRangeMake(fromLoc, str1Index - fromLoc);
3248 }
3249 break;
3250 }
3251 }
3252
3253 if (fromLoc == toLoc) break;
3254 fromLoc += delta;
3255 }
3256 } else {
3257 while (1) {
3258 str1Index = fromLoc;
3259 str2Index = 0;
3260
3261 while (str2Index < findStrLen) {
3262 if (CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index) != CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index)) break;
3263
3264 ++str1Index; ++str2Index;
3265 }
3266
3267 if (str2Index == findStrLen) {
3268 didFind = true;
3269 if (NULL != result) *result = CFRangeMake(fromLoc, findStrLen);
3270 break;
3271 }
3272
3273 if (fromLoc == toLoc) break;
3274 fromLoc += delta;
3275 }
3276 }
3277 }
3278
3279 return didFind;
3280 }
3281
3282
3283 Boolean CFStringFindWithOptions(CFStringRef string, CFStringRef stringToFind, CFRange rangeToSearch, CFStringCompareFlags compareOptions, CFRange *result) { return CFStringFindWithOptionsAndLocale(string, stringToFind, rangeToSearch, compareOptions, NULL, result); }
3284
3285 // Functions to deal with special arrays of CFRange, CFDataRef, created by CFStringCreateArrayWithFindResults()
3286
3287 static const void *__rangeRetain(CFAllocatorRef allocator, const void *ptr) {
3288 CFRetain(*(CFDataRef *)((uint8_t *)ptr + sizeof(CFRange)));
3289 return ptr;
3290 }
3291
3292 static void __rangeRelease(CFAllocatorRef allocator, const void *ptr) {
3293 CFRelease(*(CFDataRef *)((uint8_t *)ptr + sizeof(CFRange)));
3294 }
3295
3296 static CFStringRef __rangeCopyDescription(const void *ptr) {
3297 CFRange range = *(CFRange *)ptr;
3298 return CFStringCreateWithFormat(kCFAllocatorSystemDefault, NULL, CFSTR("{%ld, %ld}"), (long)range.location, (long)range.length);
3299 }
3300
3301 static Boolean __rangeEqual(const void *ptr1, const void *ptr2) {
3302 CFRange range1 = *(CFRange *)ptr1;
3303 CFRange range2 = *(CFRange *)ptr2;
3304 return (range1.location == range2.location) && (range1.length == range2.length);
3305 }
3306
3307
3308 CFArrayRef CFStringCreateArrayWithFindResults(CFAllocatorRef alloc, CFStringRef string, CFStringRef stringToFind, CFRange rangeToSearch, CFStringCompareFlags compareOptions) {
3309 CFRange foundRange;
3310 Boolean backwards = ((compareOptions & kCFCompareBackwards) != 0);
3311 UInt32 endIndex = rangeToSearch.location + rangeToSearch.length;
3312 CFMutableDataRef rangeStorage = NULL; // Basically an array of CFRange, CFDataRef (packed)
3313 uint8_t *rangeStorageBytes = NULL;
3314 CFIndex foundCount = 0;
3315 CFIndex capacity = 0; // Number of CFRange, CFDataRef element slots in rangeStorage
3316
3317 if (alloc == NULL) alloc = __CFGetDefaultAllocator();
3318
3319 while ((rangeToSearch.length > 0) && CFStringFindWithOptions(string, stringToFind, rangeToSearch, compareOptions, &foundRange)) {
3320 // Determine the next range
3321 if (backwards) {
3322 rangeToSearch.length = foundRange.location - rangeToSearch.location;
3323 } else {
3324 rangeToSearch.location = foundRange.location + foundRange.length;
3325 rangeToSearch.length = endIndex - rangeToSearch.location;
3326 }
3327
3328 // If necessary, grow the data and squirrel away the found range
3329 if (foundCount >= capacity) {
3330 if (rangeStorage == NULL) rangeStorage = CFDataCreateMutable(alloc, 0);
3331 capacity = (capacity + 4) * 2;
3332 CFDataSetLength(rangeStorage, capacity * (sizeof(CFRange) + sizeof(CFDataRef)));
3333 rangeStorageBytes = (uint8_t *)CFDataGetMutableBytePtr(rangeStorage) + foundCount * (sizeof(CFRange) + sizeof(CFDataRef));
3334 }
3335 memmove(rangeStorageBytes, &foundRange, sizeof(CFRange)); // The range
3336 memmove(rangeStorageBytes + sizeof(CFRange), &rangeStorage, sizeof(CFDataRef)); // The data
3337 rangeStorageBytes += (sizeof(CFRange) + sizeof(CFDataRef));
3338 foundCount++;
3339 }
3340
3341 if (foundCount > 0) {
3342 CFIndex cnt;
3343 CFMutableArrayRef array;
3344 const CFArrayCallBacks callbacks = {0, __rangeRetain, __rangeRelease, __rangeCopyDescription, __rangeEqual};
3345
3346 CFDataSetLength(rangeStorage, foundCount * (sizeof(CFRange) + sizeof(CFDataRef))); // Tighten storage up
3347 rangeStorageBytes = (uint8_t *)CFDataGetMutableBytePtr(rangeStorage);
3348
3349 array = CFArrayCreateMutable(alloc, foundCount * sizeof(CFRange *), &callbacks);
3350 for (cnt = 0; cnt < foundCount; cnt++) {
3351 // Each element points to the appropriate CFRange in the CFData
3352 CFArrayAppendValue(array, rangeStorageBytes + cnt * (sizeof(CFRange) + sizeof(CFDataRef)));
3353 }
3354 CFRelease(rangeStorage); // We want the data to go away when all CFRanges inside it are released...
3355 return array;
3356 } else {
3357 return NULL;
3358 }
3359 }
3360
3361
3362 CFRange CFStringFind(CFStringRef string, CFStringRef stringToFind, CFStringCompareFlags compareOptions) {
3363 CFRange foundRange;
3364
3365 if (CFStringFindWithOptions(string, stringToFind, CFRangeMake(0, CFStringGetLength(string)), compareOptions, &foundRange)) {
3366 return foundRange;
3367 } else {
3368 return CFRangeMake(kCFNotFound, 0);
3369 }
3370 }
3371
3372 Boolean CFStringHasPrefix(CFStringRef string, CFStringRef prefix) {
3373 return CFStringFindWithOptions(string, prefix, CFRangeMake(0, CFStringGetLength(string)), kCFCompareAnchored, NULL);
3374 }
3375
3376 Boolean CFStringHasSuffix(CFStringRef string, CFStringRef suffix) {
3377 return CFStringFindWithOptions(string, suffix, CFRangeMake(0, CFStringGetLength(string)), kCFCompareAnchored|kCFCompareBackwards, NULL);
3378 }
3379
3380 #define MAX_TRANSCODING_LENGTH 4
3381
3382 #define HANGUL_JONGSEONG_COUNT (28)
3383
3384 CF_INLINE bool _CFStringIsHangulLVT(UTF32Char character) {
3385 return (((character - HANGUL_SYLLABLE_START) % HANGUL_JONGSEONG_COUNT) ? true : false);
3386 }
3387
3388 static uint8_t __CFTranscodingHintLength[] = {
3389 2, 3, 4, 4, 4, 4, 4, 2, 2, 2, 2, 4, 0, 0, 0, 0
3390 };
3391
3392 enum {
3393 kCFStringHangulStateL,
3394 kCFStringHangulStateV,
3395 kCFStringHangulStateT,
3396 kCFStringHangulStateLV,
3397 kCFStringHangulStateLVT,
3398 kCFStringHangulStateBreak
3399 };
3400
3401 static CFRange _CFStringInlineBufferGetComposedRange(CFStringInlineBuffer *buffer, CFIndex start, CFStringCharacterClusterType type, const uint8_t *bmpBitmap, CFIndex csetType) {
3402 CFIndex end = start + 1;
3403 const uint8_t *bitmap = bmpBitmap;
3404 UTF32Char character;
3405 UTF16Char otherSurrogate;
3406 uint8_t step;
3407
3408 character = CFStringGetCharacterFromInlineBuffer(buffer, start);
3409
3410 // We don't combine characters in Armenian ~ Limbu range for backward deletion
3411 if ((type != kCFStringBackwardDeletionCluster) || (character < 0x0530) || (character > 0x194F)) {
3412 // Check if the current is surrogate
3413 if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, start + 1)))) {
3414 ++end;
3415 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
3416 bitmap = CFUniCharGetBitmapPtrForPlane(csetType, (character >> 16));
3417 }
3418
3419 // Extend backward
3420 while (start > 0) {
3421 if ((type == kCFStringBackwardDeletionCluster) && (character >= 0x0530) && (character < 0x1950)) break;
3422
3423 if (character < 0x10000) { // the first round could be already be non-BMP
3424 if (CFUniCharIsSurrogateLowCharacter(character) && CFUniCharIsSurrogateHighCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, start - 1)))) {
3425 character = CFUniCharGetLongCharacterForSurrogatePair(otherSurrogate, character);
3426 bitmap = CFUniCharGetBitmapPtrForPlane(csetType, (character >> 16));
3427 if (--start == 0) break; // starting with non-BMP combining mark
3428 } else {
3429 bitmap = bmpBitmap;
3430 }
3431 }
3432
3433 if (!CFUniCharIsMemberOfBitmap(character, bitmap) && (character != 0xFF9E) && (character != 0xFF9F) && ((character & 0x1FFFF0) != 0xF870)) break;
3434
3435 --start;
3436
3437 character = CFStringGetCharacterFromInlineBuffer(buffer, start);
3438 }
3439 }
3440
3441 // Hangul
3442 if (((character >= HANGUL_CHOSEONG_START) && (character <= HANGUL_JONGSEONG_END)) || ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END))) {
3443 uint8_t state;
3444 uint8_t initialState;
3445
3446 if (character < HANGUL_JUNGSEONG_START) {
3447 state = kCFStringHangulStateL;
3448 } else if (character < HANGUL_JONGSEONG_START) {
3449 state = kCFStringHangulStateV;
3450 } else if (character < HANGUL_SYLLABLE_START) {
3451 state = kCFStringHangulStateT;
3452 } else {
3453 state = (_CFStringIsHangulLVT(character) ? kCFStringHangulStateLVT : kCFStringHangulStateLV);
3454 }
3455 initialState = state;
3456
3457 // Extend backward
3458 while (((character = CFStringGetCharacterFromInlineBuffer(buffer, start - 1)) >= HANGUL_CHOSEONG_START) && (character <= HANGUL_SYLLABLE_END) && ((character <= HANGUL_JONGSEONG_END) || (character >= HANGUL_SYLLABLE_START))) {
3459 switch (state) {
3460 case kCFStringHangulStateV:
3461 if (character <= HANGUL_CHOSEONG_END) {
3462 state = kCFStringHangulStateL;
3463 } else if ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END) && !_CFStringIsHangulLVT(character)) {
3464 state = kCFStringHangulStateLV;
3465 } else if (character > HANGUL_JUNGSEONG_END) {
3466 state = kCFStringHangulStateBreak;
3467 }
3468 break;
3469
3470 case kCFStringHangulStateT:
3471 if ((character >= HANGUL_JUNGSEONG_START) && (character <= HANGUL_JUNGSEONG_END)) {
3472 state = kCFStringHangulStateV;
3473 } else if ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END)) {
3474 state = (_CFStringIsHangulLVT(character) ? kCFStringHangulStateLVT : kCFStringHangulStateLV);
3475 } else if (character < HANGUL_JUNGSEONG_START) {
3476 state = kCFStringHangulStateBreak;
3477 }
3478 break;
3479
3480 default:
3481 state = ((character < HANGUL_JUNGSEONG_START) ? kCFStringHangulStateL : kCFStringHangulStateBreak);
3482 break;
3483 }
3484
3485 if (state == kCFStringHangulStateBreak) break;
3486 --start;
3487 }
3488
3489 // Extend forward
3490 state = initialState;
3491 while (((character = CFStringGetCharacterFromInlineBuffer(buffer, end)) > 0) && (((character >= HANGUL_CHOSEONG_START) && (character <= HANGUL_JONGSEONG_END)) || ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END)))) {
3492 switch (state) {
3493 case kCFStringHangulStateLV:
3494 case kCFStringHangulStateV:
3495 if ((character >= HANGUL_JUNGSEONG_START) && (character <= HANGUL_JONGSEONG_END)) {
3496 state = ((character < HANGUL_JONGSEONG_START) ? kCFStringHangulStateV : kCFStringHangulStateT);
3497 } else {
3498 state = kCFStringHangulStateBreak;
3499 }
3500 break;
3501
3502 case kCFStringHangulStateLVT:
3503 case kCFStringHangulStateT:
3504 state = (((character >= HANGUL_JONGSEONG_START) && (character <= HANGUL_JONGSEONG_END)) ? kCFStringHangulStateT : kCFStringHangulStateBreak);
3505 break;
3506
3507 default:
3508 if (character < HANGUL_JUNGSEONG_START) {
3509 state = kCFStringHangulStateL;
3510 } else if (character < HANGUL_JONGSEONG_START) {
3511 state = kCFStringHangulStateV;
3512 } else if (character >= HANGUL_SYLLABLE_START) {
3513 state = (_CFStringIsHangulLVT(character) ? kCFStringHangulStateLVT : kCFStringHangulStateLV);
3514 } else {
3515 state = kCFStringHangulStateBreak;
3516 }
3517 break;
3518 }
3519
3520 if (state == kCFStringHangulStateBreak) break;
3521 ++end;
3522 }
3523 }
3524
3525 // Extend forward
3526 while ((character = CFStringGetCharacterFromInlineBuffer(buffer, end)) > 0) {
3527 if ((type == kCFStringBackwardDeletionCluster) && (character >= 0x0530) && (character < 0x1950)) break;
3528
3529 if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, end + 1)))) {
3530 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
3531 bitmap = CFUniCharGetBitmapPtrForPlane(csetType, (character >> 16));
3532 step = 2;
3533 } else {
3534 bitmap = bmpBitmap;
3535 step = 1;
3536 }
3537
3538 if (!CFUniCharIsMemberOfBitmap(character, bitmap) && (character != 0xFF9E) && (character != 0xFF9F) && ((character & 0x1FFFF0) != 0xF870)) break;
3539
3540 end += step;
3541 }
3542
3543 return CFRangeMake(start, end - start);
3544 }
3545
3546 CF_INLINE bool _CFStringIsVirama(UTF32Char character, const uint8_t *combClassBMP) {
3547 return ((character == COMBINING_GRAPHEME_JOINER) || (CFUniCharGetCombiningPropertyForCharacter(character, (const uint8_t *)((character < 0x10000) ? combClassBMP : CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (character >> 16)))) == 9) ? true : false);
3548 }
3549
3550 CFRange CFStringGetRangeOfCharacterClusterAtIndex(CFStringRef string, CFIndex charIndex, CFStringCharacterClusterType type) {
3551 CFRange range;
3552 CFIndex currentIndex;
3553 CFIndex length = CFStringGetLength(string);
3554 CFIndex csetType = ((kCFStringGraphemeCluster == type) ? kCFUniCharGraphemeExtendCharacterSet : kCFUniCharNonBaseCharacterSet);
3555 CFStringInlineBuffer stringBuffer;
3556 const uint8_t *bmpBitmap;
3557 const uint8_t *letterBMP;
3558 static const uint8_t *combClassBMP = NULL;
3559 UTF32Char character;
3560 UTF16Char otherSurrogate;
3561
3562 if (charIndex >= length) return CFRangeMake(kCFNotFound, 0);
3563
3564 /* Fast case. If we're eight-bit, it's either the default encoding is cheap or the content is all ASCII. Watch out when (or if) adding more 8bit Mac-scripts in CFStringEncodingConverters
3565 */
3566 if (!CF_IS_OBJC(__kCFStringTypeID, string) && __CFStrIsEightBit(string)) return CFRangeMake(charIndex, 1);
3567
3568 bmpBitmap = CFUniCharGetBitmapPtrForPlane(csetType, 0);
3569 letterBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, 0);
3570 if (NULL == combClassBMP) combClassBMP = (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0);
3571
3572 CFStringInitInlineBuffer(string, &stringBuffer, CFRangeMake(0, length));
3573
3574 // Get composed character sequence first
3575 range = _CFStringInlineBufferGetComposedRange(&stringBuffer, charIndex, type, bmpBitmap, csetType);
3576
3577 // Do grapheme joiners
3578 if (type < kCFStringCursorMovementCluster) {
3579 const uint8_t *letter = letterBMP;
3580
3581 // Check to see if we have a letter at the beginning of initial cluster
3582 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, range.location);
3583
3584 if ((range.length > 1) && CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(&stringBuffer, range.location + 1)))) {
3585 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
3586 letter = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, (character >> 16));
3587 }
3588
3589 if ((character == ZERO_WIDTH_JOINER) || CFUniCharIsMemberOfBitmap(character, letter)) {
3590 CFRange otherRange;
3591
3592 // Check if preceded by grapheme joiners (U034F and viramas)
3593 otherRange.location = currentIndex = range.location;
3594
3595 while (currentIndex > 1) {
3596 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, --currentIndex);
3597
3598 // ??? We're assuming viramas only in BMP
3599 if ((_CFStringIsVirama(character, combClassBMP) || ((character == ZERO_WIDTH_JOINER) && _CFStringIsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer, --currentIndex), combClassBMP))) && (currentIndex > 0)) {
3600 --currentIndex;
3601 } else {
3602 break;
3603 }
3604
3605 currentIndex = _CFStringInlineBufferGetComposedRange(&stringBuffer, currentIndex, type, bmpBitmap, csetType).location;
3606
3607 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex);
3608
3609 if (CFUniCharIsSurrogateLowCharacter(character) && CFUniCharIsSurrogateHighCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex - 1)))) {
3610 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
3611 letter = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, (character >> 16));
3612 --currentIndex;
3613 } else {
3614 letter = letterBMP;
3615 }
3616
3617 if (!CFUniCharIsMemberOfBitmap(character, letter)) break;
3618 range.location = currentIndex;
3619 }
3620
3621 range.length += otherRange.location - range.location;
3622
3623 // Check if followed by grapheme joiners
3624 if ((range.length > 1) && ((range.location + range.length) < length)) {
3625 otherRange = range;
3626 currentIndex = otherRange.location + otherRange.length;
3627
3628 do {
3629 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex - 1);
3630
3631 // ??? We're assuming viramas only in BMP
3632 if ((character != ZERO_WIDTH_JOINER) && !_CFStringIsVirama(character, combClassBMP)) break;
3633
3634 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex);
3635
3636 if (character == ZERO_WIDTH_JOINER) character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, ++currentIndex);
3637
3638 if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex + 1)))) {
3639 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
3640 letter = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, (character >> 16));
3641 } else {
3642 letter = letterBMP;
3643 }
3644
3645 // We only conjoin letters
3646 if (!CFUniCharIsMemberOfBitmap(character, letter)) break;
3647 otherRange = _CFStringInlineBufferGetComposedRange(&stringBuffer, currentIndex, type, bmpBitmap, csetType);
3648 currentIndex = otherRange.location + otherRange.length;
3649 } while ((otherRange.location + otherRange.length) < length);
3650 range.length = currentIndex - range.location;
3651 }
3652 }
3653 }
3654
3655 // Check if we're part of prefix transcoding hints
3656 CFIndex otherIndex;
3657
3658 currentIndex = (range.location + range.length) - (MAX_TRANSCODING_LENGTH + 1);
3659 if (currentIndex < 0) currentIndex = 0;
3660
3661 while (currentIndex <= range.location) {
3662 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex);
3663
3664 if ((character & 0x1FFFF0) == 0xF860) { // transcoding hint
3665 otherIndex = currentIndex + __CFTranscodingHintLength[(character - 0xF860)] + 1;
3666 if (otherIndex >= (range.location + range.length)) {
3667 if (otherIndex <= length) {
3668 range.location = currentIndex;
3669 range.length = otherIndex - currentIndex;
3670 }
3671 break;
3672 }
3673 }
3674 ++currentIndex;
3675 }
3676
3677 return range;
3678 }
3679
3680 CFRange CFStringGetRangeOfComposedCharactersAtIndex(CFStringRef theString, CFIndex theIndex) {
3681 return CFStringGetRangeOfCharacterClusterAtIndex(theString, theIndex, kCFStringComposedCharacterCluster);
3682 }
3683
3684 /*!
3685 @function CFStringFindCharacterFromSet
3686 Query the range of characters contained in the specified character set.
3687 @param theString The CFString which is to be searched. If this
3688 parameter is not a valid CFString, the behavior is
3689 undefined.
3690 @param theSet The CFCharacterSet against which the membership
3691 of characters is checked. If this parameter is not a valid
3692 CFCharacterSet, the behavior is undefined.
3693 @param range The range of characters within the string to search. If
3694 the range location or end point (defined by the location
3695 plus length minus 1) are outside the index space of the
3696 string (0 to N-1 inclusive, where N is the length of the
3697 string), the behavior is undefined. If the range length is
3698 negative, the behavior is undefined. The range may be empty
3699 (length 0), in which case no search is performed.
3700 @param searchOptions The bitwise-or'ed option flags to control
3701 the search behavior. The supported options are
3702 kCFCompareBackwards andkCFCompareAnchored.
3703 If other option flags are specified, the behavior
3704 is undefined.
3705 @param result The pointer to a CFRange supplied by the caller in
3706 which the search result is stored. If a pointer to an invalid
3707 memory is specified, the behavior is undefined.
3708 @result true, if at least a character which is a member of the character
3709 set is found and result is filled, otherwise, false.
3710 */
3711 #define SURROGATE_START 0xD800
3712 #define SURROGATE_END 0xDFFF
3713
3714 CF_EXPORT Boolean CFStringFindCharacterFromSet(CFStringRef theString, CFCharacterSetRef theSet, CFRange rangeToSearch, CFStringCompareFlags searchOptions, CFRange *result) {
3715 CFStringInlineBuffer stringBuffer;
3716 CFCharacterSetInlineBuffer csetBuffer;
3717 UniChar ch;
3718 CFIndex step;
3719 CFIndex fromLoc, toLoc, cnt; // fromLoc and toLoc are inclusive
3720 Boolean found = false;
3721 Boolean done = false;
3722
3723 //#warning FIX ME !! Should support kCFCompareNonliteral
3724
3725 if ((rangeToSearch.location + rangeToSearch.length > CFStringGetLength(theString)) || (rangeToSearch.length == 0)) return false;
3726
3727 if (searchOptions & kCFCompareBackwards) {
3728 fromLoc = rangeToSearch.location + rangeToSearch.length - 1;
3729 toLoc = rangeToSearch.location;
3730 } else {
3731 fromLoc = rangeToSearch.location;
3732 toLoc = rangeToSearch.location + rangeToSearch.length - 1;
3733 }
3734 if (searchOptions & kCFCompareAnchored) {
3735 toLoc = fromLoc;
3736 }
3737
3738 step = (fromLoc <= toLoc) ? 1 : -1;
3739 cnt = fromLoc;
3740
3741 CFStringInitInlineBuffer(theString, &stringBuffer, rangeToSearch);
3742 CFCharacterSetInitInlineBuffer(theSet, &csetBuffer);
3743
3744 do {
3745 ch = CFStringGetCharacterFromInlineBuffer(&stringBuffer, cnt - rangeToSearch.location);
3746 if ((ch >= SURROGATE_START) && (ch <= SURROGATE_END)) {
3747 int otherCharIndex = cnt + step;
3748
3749 if (((step < 0) && (otherCharIndex < toLoc)) || ((step > 0) && (otherCharIndex > toLoc))) {
3750 done = true;
3751 } else {
3752 UniChar highChar;
3753 UniChar lowChar = CFStringGetCharacterFromInlineBuffer(&stringBuffer, otherCharIndex - rangeToSearch.location);
3754
3755 if (cnt < otherCharIndex) {
3756 highChar = ch;
3757 } else {
3758 highChar = lowChar;
3759 lowChar = ch;
3760 }
3761
3762 if (CFUniCharIsSurrogateHighCharacter(highChar) && CFUniCharIsSurrogateLowCharacter(lowChar) && CFCharacterSetInlineBufferIsLongCharacterMember(&csetBuffer, CFUniCharGetLongCharacterForSurrogatePair(highChar, lowChar))) {
3763 if (result) *result = CFRangeMake((cnt < otherCharIndex ? cnt : otherCharIndex), 2);
3764 return true;
3765 } else if (otherCharIndex == toLoc) {
3766 done = true;
3767 } else {
3768 cnt = otherCharIndex + step;
3769 }
3770 }
3771 } else if (CFCharacterSetInlineBufferIsLongCharacterMember(&csetBuffer, ch)) {
3772 done = found = true;
3773 } else if (cnt == toLoc) {
3774 done = true;
3775 } else {
3776 cnt += step;
3777 }
3778 } while (!done);
3779
3780 if (found && result) *result = CFRangeMake(cnt, 1);
3781 return found;
3782 }
3783
3784 /* Line range code */
3785
3786 #define CarriageReturn '\r' /* 0x0d */
3787 #define NewLine '\n' /* 0x0a */
3788 #define NextLine 0x0085
3789 #define LineSeparator 0x2028
3790 #define ParaSeparator 0x2029
3791
3792 CF_INLINE Boolean isALineSeparatorTypeCharacter(UniChar ch, Boolean includeLineEndings) {
3793 if (ch > CarriageReturn && ch < NextLine) return false; /* Quick test to cover most chars */
3794 return (ch == NewLine || ch == CarriageReturn || ch == ParaSeparator || (includeLineEndings && (ch == NextLine || ch == LineSeparator))) ? true : false;
3795 }
3796
3797 static void __CFStringGetLineOrParagraphBounds(CFStringRef string, CFRange range, CFIndex *lineBeginIndex, CFIndex *lineEndIndex, CFIndex *contentsEndIndex, Boolean includeLineEndings) {
3798 CFIndex len;
3799 CFStringInlineBuffer buf;
3800 UniChar ch;
3801
3802 __CFAssertIsString(string);
3803 __CFAssertRangeIsInStringBounds(string, range.location, range.length);
3804
3805 len = __CFStrLength(string);
3806
3807 if (lineBeginIndex) {
3808 CFIndex start;
3809 if (range.location == 0) {
3810 start = 0;
3811 } else {
3812 CFStringInitInlineBuffer(string, &buf, CFRangeMake(0, len));
3813 CFIndex buf_idx = range.location;
3814
3815 /* Take care of the special case where start happens to fall right between \r and \n */
3816 ch = CFStringGetCharacterFromInlineBuffer(&buf, buf_idx);
3817 buf_idx--;
3818 if ((ch == NewLine) && (CFStringGetCharacterFromInlineBuffer(&buf, buf_idx) == CarriageReturn)) {
3819 buf_idx--;
3820 }
3821 while (1) {
3822 if (buf_idx < 0) {
3823 start = 0;
3824 break;
3825 } else if (isALineSeparatorTypeCharacter(CFStringGetCharacterFromInlineBuffer(&buf, buf_idx), includeLineEndings)) {
3826 start = buf_idx + 1;
3827 break;
3828 } else {
3829 buf_idx--;
3830 }
3831 }
3832 }
3833 *lineBeginIndex = start;
3834 }
3835
3836 /* Now find the ending point */
3837 if (lineEndIndex || contentsEndIndex) {
3838 CFIndex endOfContents, lineSeparatorLength = 1; /* 1 by default */
3839 CFStringInitInlineBuffer(string, &buf, CFRangeMake(0, len));
3840 CFIndex buf_idx = range.location + range.length - (range.length ? 1 : 0);
3841 /* First look at the last char in the range (if the range is zero length, the char after the range) to see if we're already on or within a end of line sequence... */
3842 ch = __CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx);
3843 if (ch == NewLine) {
3844 endOfContents = buf_idx;
3845 buf_idx--;
3846 if (__CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx) == CarriageReturn) {
3847 lineSeparatorLength = 2;
3848 endOfContents--;
3849 }
3850 } else {
3851 while (1) {
3852 if (isALineSeparatorTypeCharacter(ch, includeLineEndings)) {
3853 endOfContents = buf_idx; /* This is actually end of contentsRange */
3854 buf_idx++; /* OK for this to go past the end */
3855 if ((ch == CarriageReturn) && (__CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx) == NewLine)) {
3856 lineSeparatorLength = 2;
3857 }
3858 break;
3859 } else if (buf_idx >= len) {
3860 endOfContents = len;
3861 lineSeparatorLength = 0;
3862 break;
3863 } else {
3864 buf_idx++;
3865 ch = __CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx);
3866 }
3867 }
3868 }
3869 if (contentsEndIndex) *contentsEndIndex = endOfContents;
3870 if (lineEndIndex) *lineEndIndex = endOfContents + lineSeparatorLength;
3871 }
3872 }
3873
3874 void CFStringGetLineBounds(CFStringRef string, CFRange range, CFIndex *lineBeginIndex, CFIndex *lineEndIndex, CFIndex *contentsEndIndex) {
3875 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSString *)string, getLineStart:(NSUInteger *)lineBeginIndex end:(NSUInteger *)lineEndIndex contentsEnd:(NSUInteger *)contentsEndIndex forRange:NSMakeRange(range.location, range.length));
3876 __CFStringGetLineOrParagraphBounds(string, range, lineBeginIndex, lineEndIndex, contentsEndIndex, true);
3877 }
3878
3879 void CFStringGetParagraphBounds(CFStringRef string, CFRange range, CFIndex *parBeginIndex, CFIndex *parEndIndex, CFIndex *contentsEndIndex) {
3880 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSString *)string, getParagraphStart:(NSUInteger *)parBeginIndex end:(NSUInteger *)parEndIndex contentsEnd:(NSUInteger *)contentsEndIndex forRange:NSMakeRange(range.location, range.length));
3881 __CFStringGetLineOrParagraphBounds(string, range, parBeginIndex, parEndIndex, contentsEndIndex, false);
3882 }
3883
3884
3885 CFStringRef CFStringCreateByCombiningStrings(CFAllocatorRef alloc, CFArrayRef array, CFStringRef separatorString) {
3886 CFIndex numChars;
3887 CFIndex separatorNumByte;
3888 CFIndex stringCount = CFArrayGetCount(array);
3889 Boolean isSepCFString = !CF_IS_OBJC(__kCFStringTypeID, separatorString);
3890 Boolean canBeEightbit = isSepCFString && __CFStrIsEightBit(separatorString);
3891 CFIndex idx;
3892 CFStringRef otherString;
3893 void *buffer;
3894 uint8_t *bufPtr;
3895 const void *separatorContents = NULL;
3896
3897 if (stringCount == 0) {
3898 return CFStringCreateWithCharacters(alloc, NULL, 0);
3899 } else if (stringCount == 1) {
3900 return (CFStringRef)CFStringCreateCopy(alloc, (CFStringRef)CFArrayGetValueAtIndex(array, 0));
3901 }
3902
3903 if (alloc == NULL) alloc = __CFGetDefaultAllocator();
3904
3905 numChars = CFStringGetLength(separatorString) * (stringCount - 1);
3906 for (idx = 0; idx < stringCount; idx++) {
3907 otherString = (CFStringRef)CFArrayGetValueAtIndex(array, idx);
3908 numChars += CFStringGetLength(otherString);
3909 // canBeEightbit is already false if the separator is an NSString...
3910 if (CF_IS_OBJC(__kCFStringTypeID, otherString) || ! __CFStrIsEightBit(otherString)) canBeEightbit = false;
3911 }
3912
3913 buffer = (uint8_t *)CFAllocatorAllocate(alloc, canBeEightbit ? ((numChars + 1) * sizeof(uint8_t)) : (numChars * sizeof(UniChar)), 0);
3914 bufPtr = (uint8_t *)buffer;
3915 if (__CFOASafe) __CFSetLastAllocationEventName(buffer, "CFString (store)");
3916 separatorNumByte = CFStringGetLength(separatorString) * (canBeEightbit ? sizeof(uint8_t) : sizeof(UniChar));
3917
3918 for (idx = 0; idx < stringCount; idx++) {
3919 if (idx) { // add separator here unless first string
3920 if (separatorContents) {
3921 memmove(bufPtr, separatorContents, separatorNumByte);
3922 } else {
3923 if (!isSepCFString) { // NSString
3924 CFStringGetCharacters(separatorString, CFRangeMake(0, CFStringGetLength(separatorString)), (UniChar *)bufPtr);
3925 } else if (canBeEightbit) {
3926 memmove(bufPtr, (const uint8_t *)__CFStrContents(separatorString) + __CFStrSkipAnyLengthByte(separatorString), separatorNumByte);
3927 } else {
3928 __CFStrConvertBytesToUnicode((uint8_t *)__CFStrContents(separatorString) + __CFStrSkipAnyLengthByte(separatorString), (UniChar *)bufPtr, __CFStrLength(separatorString));
3929 }
3930 separatorContents = bufPtr;
3931 }
3932 bufPtr += separatorNumByte;
3933 }
3934
3935 otherString = (CFStringRef )CFArrayGetValueAtIndex(array, idx);
3936 if (CF_IS_OBJC(__kCFStringTypeID, otherString)) {
3937 CFIndex otherLength = CFStringGetLength(otherString);
3938 CFStringGetCharacters(otherString, CFRangeMake(0, otherLength), (UniChar *)bufPtr);
3939 bufPtr += otherLength * sizeof(UniChar);
3940 } else {
3941 const uint8_t * otherContents = (const uint8_t *)__CFStrContents(otherString);
3942 CFIndex otherNumByte = __CFStrLength2(otherString, otherContents) * (canBeEightbit ? sizeof(uint8_t) : sizeof(UniChar));
3943
3944 if (canBeEightbit || __CFStrIsUnicode(otherString)) {
3945 memmove(bufPtr, otherContents + __CFStrSkipAnyLengthByte(otherString), otherNumByte);
3946 } else {
3947 __CFStrConvertBytesToUnicode(otherContents + __CFStrSkipAnyLengthByte(otherString), (UniChar *)bufPtr, __CFStrLength2(otherString, otherContents));
3948 }
3949 bufPtr += otherNumByte;
3950 }
3951 }
3952 if (canBeEightbit) *bufPtr = 0; // NULL byte;
3953
3954 return canBeEightbit ?
3955 CFStringCreateWithCStringNoCopy(alloc, (const char*)buffer, __CFStringGetEightBitStringEncoding(), alloc) :
3956 CFStringCreateWithCharactersNoCopy(alloc, (UniChar *)buffer, numChars, alloc);
3957 }
3958
3959
3960 CFArrayRef CFStringCreateArrayBySeparatingStrings(CFAllocatorRef alloc, CFStringRef string, CFStringRef separatorString) {
3961 CFArrayRef separatorRanges;
3962 CFIndex length = CFStringGetLength(string);
3963 /* No objc dispatch needed here since CFStringCreateArrayWithFindResults() works with both CFString and NSString */
3964 if (!(separatorRanges = CFStringCreateArrayWithFindResults(alloc, string, separatorString, CFRangeMake(0, length), 0))) {
3965 return CFArrayCreate(alloc, (const void **)&string, 1, & kCFTypeArrayCallBacks);
3966 } else {
3967 CFIndex idx;
3968 CFIndex count = CFArrayGetCount(separatorRanges);
3969 CFIndex startIndex = 0;
3970 CFIndex numChars;
3971 CFMutableArrayRef array = CFArrayCreateMutable(alloc, count + 2, & kCFTypeArrayCallBacks);
3972 const CFRange *currentRange;
3973 CFStringRef substring;
3974
3975 for (idx = 0;idx < count;idx++) {
3976 currentRange = (const CFRange *)CFArrayGetValueAtIndex(separatorRanges, idx);
3977 numChars = currentRange->location - startIndex;
3978 substring = CFStringCreateWithSubstring(alloc, string, CFRangeMake(startIndex, numChars));
3979 CFArrayAppendValue(array, substring);
3980 CFRelease(substring);
3981 startIndex = currentRange->location + currentRange->length;
3982 }
3983 substring = CFStringCreateWithSubstring(alloc, string, CFRangeMake(startIndex, length - startIndex));
3984 CFArrayAppendValue(array, substring);
3985 CFRelease(substring);
3986
3987 CFRelease(separatorRanges);
3988
3989 return array;
3990 }
3991 }
3992
3993 CFStringRef CFStringCreateFromExternalRepresentation(CFAllocatorRef alloc, CFDataRef data, CFStringEncoding encoding) {
3994 return CFStringCreateWithBytes(alloc, CFDataGetBytePtr(data), CFDataGetLength(data), encoding, true);
3995 }
3996
3997
3998 CFDataRef CFStringCreateExternalRepresentation(CFAllocatorRef alloc, CFStringRef string, CFStringEncoding encoding, uint8_t lossByte) {
3999 CFIndex length;
4000 CFIndex guessedByteLength;
4001 uint8_t *bytes;
4002 CFIndex usedLength;
4003 SInt32 result;
4004
4005 if (CF_IS_OBJC(__kCFStringTypeID, string)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
4006 length = CFStringGetLength(string);
4007 } else {
4008 __CFAssertIsString(string);
4009 length = __CFStrLength(string);
4010 if (__CFStrIsEightBit(string) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string
4011 return CFDataCreate(alloc, ((uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string)), __CFStrLength(string));
4012 }
4013 }
4014
4015 if (alloc == NULL) alloc = __CFGetDefaultAllocator();
4016
4017 if (((encoding & 0x0FFF) == kCFStringEncodingUnicode) && ((encoding == kCFStringEncodingUnicode) || ((encoding > kCFStringEncodingUTF8) && (encoding <= kCFStringEncodingUTF32LE)))) {
4018 guessedByteLength = (length + 1) * ((((encoding >> 26) & 2) == 0) ? sizeof(UTF16Char) : sizeof(UTF32Char)); // UTF32 format has the bit set
4019 } else if (((guessedByteLength = CFStringGetMaximumSizeForEncoding(length, encoding)) > length) && !CF_IS_OBJC(__kCFStringTypeID, string)) { // Multi byte encoding
4020 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
4021 if (__CFStrIsUnicode(string)) {
4022 CFIndex aLength = CFStringEncodingByteLengthForCharacters(encoding, kCFStringEncodingPrependBOM, __CFStrContents(string), __CFStrLength(string));
4023 if (aLength > 0) guessedByteLength = aLength;
4024 } else {
4025 #endif
4026 result = __CFStringEncodeByteStream(string, 0, length, true, encoding, lossByte, NULL, LONG_MAX, &guessedByteLength);
4027 // if result == length, we always succeed
4028 // otherwise, if result == 0, we fail
4029 // otherwise, if there was a lossByte but still result != length, we fail
4030 if ((result != length) && (!result || !lossByte)) return NULL;
4031 if (guessedByteLength == length && __CFStrIsEightBit(string) && __CFStringEncodingIsSupersetOfASCII(encoding)) { // It's all ASCII !!
4032 return CFDataCreate(alloc, ((uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string)), __CFStrLength(string));
4033 }
4034 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
4035 }
4036 #endif
4037 }
4038 bytes = (uint8_t *)CFAllocatorAllocate(alloc, guessedByteLength, 0);
4039 if (__CFOASafe) __CFSetLastAllocationEventName(bytes, "CFData (store)");
4040
4041 result = __CFStringEncodeByteStream(string, 0, length, true, encoding, lossByte, bytes, guessedByteLength, &usedLength);
4042
4043 if ((result != length) && (!result || !lossByte)) { // see comment above about what this means
4044 CFAllocatorDeallocate(alloc, bytes);
4045 return NULL;
4046 }
4047
4048 return CFDataCreateWithBytesNoCopy(alloc, (uint8_t *)bytes, usedLength, alloc);
4049 }
4050
4051
4052 CFStringEncoding CFStringGetSmallestEncoding(CFStringRef str) {
4053 CFIndex len;
4054 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, CFStringEncoding, (NSString *)str, _smallestEncodingInCFStringEncoding);
4055 __CFAssertIsString(str);
4056
4057 if (__CFStrIsEightBit(str)) return __CFStringGetEightBitStringEncoding();
4058 len = __CFStrLength(str);
4059 if (__CFStringEncodeByteStream(str, 0, len, false, __CFStringGetEightBitStringEncoding(), 0, NULL, LONG_MAX, NULL) == len) return __CFStringGetEightBitStringEncoding();
4060 if ((__CFStringGetEightBitStringEncoding() != __CFStringGetSystemEncoding()) && (__CFStringEncodeByteStream(str, 0, len, false, __CFStringGetSystemEncoding(), 0, NULL, LONG_MAX, NULL) == len)) return __CFStringGetSystemEncoding();
4061 return kCFStringEncodingUnicode; /* ??? */
4062 }
4063
4064
4065 CFStringEncoding CFStringGetFastestEncoding(CFStringRef str) {
4066 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, CFStringEncoding, (NSString *)str, _fastestEncodingInCFStringEncoding);
4067 __CFAssertIsString(str);
4068 return __CFStrIsEightBit(str) ? __CFStringGetEightBitStringEncoding() : kCFStringEncodingUnicode; /* ??? */
4069 }
4070
4071
4072 SInt32 CFStringGetIntValue(CFStringRef str) {
4073 Boolean success;
4074 SInt32 result;
4075 SInt32 idx = 0;
4076 CFStringInlineBuffer buf;
4077 CFStringInitInlineBuffer(str, &buf, CFRangeMake(0, CFStringGetLength(str)));
4078 success = __CFStringScanInteger(&buf, NULL, &idx, false, &result);
4079 return success ? result : 0;
4080 }
4081
4082
4083 double CFStringGetDoubleValue(CFStringRef str) {
4084 Boolean success;
4085 double result;
4086 SInt32 idx = 0;
4087 CFStringInlineBuffer buf;
4088 CFStringInitInlineBuffer(str, &buf, CFRangeMake(0, CFStringGetLength(str)));
4089 success = __CFStringScanDouble(&buf, NULL, &idx, &result);
4090 return success ? result : 0.0;
4091 }
4092
4093
4094 /*** Mutable functions... ***/
4095
4096 void CFStringSetExternalCharactersNoCopy(CFMutableStringRef string, UniChar *chars, CFIndex length, CFIndex capacity) {
4097 __CFAssertIsNotNegative(length);
4098 __CFAssertIsStringAndExternalMutable(string);
4099 CFAssert4((length <= capacity) && ((capacity == 0) || ((capacity > 0) && chars)), __kCFLogAssertion, "%s(): Invalid args: characters %p length %d capacity %d", __PRETTY_FUNCTION__, chars, length, capacity);
4100 __CFStrSetContentPtr(string, chars);
4101 __CFStrSetExplicitLength(string, length);
4102 __CFStrSetCapacity(string, capacity * sizeof(UniChar));
4103 __CFStrSetCapacityProvidedExternally(string);
4104 }
4105
4106
4107
4108 void CFStringInsert(CFMutableStringRef str, CFIndex idx, CFStringRef insertedStr) {
4109 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)str, insertString:(NSString *)insertedStr atIndex:(NSUInteger)idx);
4110 __CFAssertIsStringAndMutable(str);
4111 CFAssert3(idx >= 0 && idx <= __CFStrLength(str), __kCFLogAssertion, "%s(): string index %d out of bounds (length %d)", __PRETTY_FUNCTION__, idx, __CFStrLength(str));
4112 __CFStringReplace(str, CFRangeMake(idx, 0), insertedStr);
4113 }
4114
4115
4116 void CFStringDelete(CFMutableStringRef str, CFRange range) {
4117 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)str, deleteCharactersInRange:NSMakeRange(range.location, range.length));
4118 __CFAssertIsStringAndMutable(str);
4119 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
4120 __CFStringChangeSize(str, range, 0, false);
4121 }
4122
4123
4124 void CFStringReplace(CFMutableStringRef str, CFRange range, CFStringRef replacement) {
4125 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)str, replaceCharactersInRange:NSMakeRange(range.location, range.length) withString:(NSString *)replacement);
4126 __CFAssertIsStringAndMutable(str);
4127 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
4128 __CFStringReplace(str, range, replacement);
4129 }
4130
4131
4132 void CFStringReplaceAll(CFMutableStringRef str, CFStringRef replacement) {
4133 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)str, setString:(NSString *)replacement);
4134 __CFAssertIsStringAndMutable(str);
4135 __CFStringReplace(str, CFRangeMake(0, __CFStrLength(str)), replacement);
4136 }
4137
4138
4139 void CFStringAppend(CFMutableStringRef str, CFStringRef appended) {
4140 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)str, appendString:(NSString *)appended);
4141 __CFAssertIsStringAndMutable(str);
4142 __CFStringReplace(str, CFRangeMake(__CFStrLength(str), 0), appended);
4143 }
4144
4145
4146 void CFStringAppendCharacters(CFMutableStringRef str, const UniChar *chars, CFIndex appendedLength) {
4147 CFIndex strLength, idx;
4148
4149 __CFAssertIsNotNegative(appendedLength);
4150
4151 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)str, appendCharacters:chars length:(NSUInteger)appendedLength);
4152
4153 __CFAssertIsStringAndMutable(str);
4154
4155 strLength = __CFStrLength(str);
4156 if (__CFStrIsUnicode(str)) {
4157 __CFStringChangeSize(str, CFRangeMake(strLength, 0), appendedLength, true);
4158 memmove((UniChar *)__CFStrContents(str) + strLength, chars, appendedLength * sizeof(UniChar));
4159 } else {
4160 uint8_t *contents;
4161 bool isASCII = true;
4162 for (idx = 0; isASCII && idx < appendedLength; idx++) isASCII = (chars[idx] < 0x80);
4163 __CFStringChangeSize(str, CFRangeMake(strLength, 0), appendedLength, !isASCII);
4164 if (!isASCII) {
4165 memmove((UniChar *)__CFStrContents(str) + strLength, chars, appendedLength * sizeof(UniChar));
4166 } else {
4167 contents = (uint8_t *)__CFStrContents(str) + strLength + __CFStrSkipAnyLengthByte(str);
4168 for (idx = 0; idx < appendedLength; idx++) contents[idx] = (uint8_t)chars[idx];
4169 }
4170 }
4171 }
4172
4173
4174 void __CFStringAppendBytes(CFMutableStringRef str, const char *cStr, CFIndex appendedLength, CFStringEncoding encoding) {
4175 Boolean appendedIsUnicode = false;
4176 Boolean freeCStrWhenDone = false;
4177 Boolean demoteAppendedUnicode = false;
4178 CFVarWidthCharBuffer vBuf;
4179
4180 __CFAssertIsNotNegative(appendedLength);
4181
4182 if (encoding == kCFStringEncodingASCII || encoding == __CFStringGetEightBitStringEncoding()) {
4183 // appendedLength now denotes length in UniChars
4184 } else if (encoding == kCFStringEncodingUnicode) {
4185 UniChar *chars = (UniChar *)cStr;
4186 CFIndex idx, length = appendedLength / sizeof(UniChar);
4187 bool isASCII = true;
4188 for (idx = 0; isASCII && idx < length; idx++) isASCII = (chars[idx] < 0x80);
4189 if (!isASCII) {
4190 appendedIsUnicode = true;
4191 } else {
4192 demoteAppendedUnicode = true;
4193 }
4194 appendedLength = length;
4195 } else {
4196 Boolean usingPassedInMemory = false;
4197
4198 vBuf.allocator = __CFGetDefaultAllocator(); // We don't want to use client's allocator for temp stuff
4199 vBuf.chars.unicode = NULL; // This will cause the decode function to allocate memory if necessary
4200
4201 if (!__CFStringDecodeByteStream3((const uint8_t *)cStr, appendedLength, encoding, __CFStrIsUnicode(str), &vBuf, &usingPassedInMemory, 0)) {
4202 CFAssert1(0, __kCFLogAssertion, "Supplied bytes could not be converted specified encoding %d", encoding);
4203 return;
4204 }
4205
4206 // If not ASCII, appendedLength now denotes length in UniChars
4207 appendedLength = vBuf.numChars;
4208 appendedIsUnicode = !vBuf.isASCII;
4209 cStr = (const char *)vBuf.chars.ascii;
4210 freeCStrWhenDone = !usingPassedInMemory && vBuf.shouldFreeChars;
4211 }
4212
4213 if (CF_IS_OBJC(__kCFStringTypeID, str)) {
4214 if (!appendedIsUnicode && !demoteAppendedUnicode) {
4215 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)str, _cfAppendCString:(const unsigned char *)cStr length:(NSInteger)appendedLength);
4216 } else {
4217 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)str, appendCharacters:(const unichar *)cStr length:(NSUInteger)appendedLength);
4218 }
4219 } else {
4220 CFIndex strLength;
4221 __CFAssertIsStringAndMutable(str);
4222 strLength = __CFStrLength(str);
4223
4224 __CFStringChangeSize(str, CFRangeMake(strLength, 0), appendedLength, appendedIsUnicode || __CFStrIsUnicode(str));
4225
4226 if (__CFStrIsUnicode(str)) {
4227 UniChar *contents = (UniChar *)__CFStrContents(str);
4228 if (appendedIsUnicode) {
4229 memmove(contents + strLength, cStr, appendedLength * sizeof(UniChar));
4230 } else {
4231 __CFStrConvertBytesToUnicode((const uint8_t *)cStr, contents + strLength, appendedLength);
4232 }
4233 } else {
4234 if (demoteAppendedUnicode) {
4235 UniChar *chars = (UniChar *)cStr;
4236 CFIndex idx;
4237 uint8_t *contents = (uint8_t *)__CFStrContents(str) + strLength + __CFStrSkipAnyLengthByte(str);
4238 for (idx = 0; idx < appendedLength; idx++) contents[idx] = (uint8_t)chars[idx];
4239 } else {
4240 uint8_t *contents = (uint8_t *)__CFStrContents(str);
4241 memmove(contents + strLength + __CFStrSkipAnyLengthByte(str), cStr, appendedLength);
4242 }
4243 }
4244 }
4245
4246 if (freeCStrWhenDone) CFAllocatorDeallocate(__CFGetDefaultAllocator(), (void *)cStr);
4247 }
4248
4249 void CFStringAppendPascalString(CFMutableStringRef str, ConstStringPtr pStr, CFStringEncoding encoding) {
4250 __CFStringAppendBytes(str, (const char *)(pStr + 1), (CFIndex)*pStr, encoding);
4251 }
4252
4253 void CFStringAppendCString(CFMutableStringRef str, const char *cStr, CFStringEncoding encoding) {
4254 __CFStringAppendBytes(str, cStr, strlen(cStr), encoding);
4255 }
4256
4257
4258 void CFStringAppendFormat(CFMutableStringRef str, CFDictionaryRef formatOptions, CFStringRef format, ...) {
4259 va_list argList;
4260
4261 va_start(argList, format);
4262 CFStringAppendFormatAndArguments(str, formatOptions, format, argList);
4263 va_end(argList);
4264 }
4265
4266
4267 CFIndex CFStringFindAndReplace(CFMutableStringRef string, CFStringRef stringToFind, CFStringRef replacementString, CFRange rangeToSearch, CFStringCompareFlags compareOptions) {
4268 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, CFIndex, (NSMutableString *)string, replaceOccurrencesOfString:(NSString *)stringToFind withString:(NSString *)replacementString options:(NSStringCompareOptions)compareOptions range:NSMakeRange(rangeToSearch.location, rangeToSearch.length));
4269 CFRange foundRange;
4270 Boolean backwards = ((compareOptions & kCFCompareBackwards) != 0);
4271 UInt32 endIndex = rangeToSearch.location + rangeToSearch.length;
4272 #define MAX_RANGES_ON_STACK (1000 / sizeof(CFRange))
4273 CFRange rangeBuffer[MAX_RANGES_ON_STACK]; // Used to avoid allocating memory
4274 CFRange *ranges = rangeBuffer;
4275 CFIndex foundCount = 0;
4276 CFIndex capacity = MAX_RANGES_ON_STACK;
4277
4278 __CFAssertIsStringAndMutable(string);
4279 __CFAssertRangeIsInStringBounds(string, rangeToSearch.location, rangeToSearch.length);
4280
4281 // Note: This code is very similar to the one in CFStringCreateArrayWithFindResults().
4282 while ((rangeToSearch.length > 0) && CFStringFindWithOptions(string, stringToFind, rangeToSearch, compareOptions, &foundRange)) {
4283 // Determine the next range
4284 if (backwards) {
4285 rangeToSearch.length = foundRange.location - rangeToSearch.location;
4286 } else {
4287 rangeToSearch.location = foundRange.location + foundRange.length;
4288 rangeToSearch.length = endIndex - rangeToSearch.location;
4289 }
4290
4291 // If necessary, grow the array
4292 if (foundCount >= capacity) {
4293 bool firstAlloc = (ranges == rangeBuffer) ? true : false;
4294 capacity = (capacity + 4) * 2;
4295 // Note that reallocate with NULL previous pointer is same as allocate
4296 ranges = (CFRange *)CFAllocatorReallocate(kCFAllocatorSystemDefault, firstAlloc ? NULL : ranges, capacity * sizeof(CFRange), 0);
4297 if (firstAlloc) memmove(ranges, rangeBuffer, MAX_RANGES_ON_STACK * sizeof(CFRange));
4298 }
4299 ranges[foundCount] = foundRange;
4300 foundCount++;
4301 }
4302
4303 if (foundCount > 0) {
4304 if (backwards) { // Reorder the ranges to be incrementing (better to do this here, then to check other places)
4305 int head = 0;
4306 int tail = foundCount - 1;
4307 while (head < tail) {
4308 CFRange temp = ranges[head];
4309 ranges[head] = ranges[tail];
4310 ranges[tail] = temp;
4311 head++;
4312 tail--;
4313 }
4314 }
4315 __CFStringReplaceMultiple(string, ranges, foundCount, replacementString);
4316 if (ranges != rangeBuffer) CFAllocatorDeallocate(kCFAllocatorSystemDefault, ranges);
4317 }
4318
4319 return foundCount;
4320 }
4321
4322
4323 // This function is here for NSString purposes
4324 // It allows checking for mutability before mutating; this allows NSString to catch invalid mutations
4325
4326 int __CFStringCheckAndReplace(CFMutableStringRef str, CFRange range, CFStringRef replacement) {
4327 if (!__CFStrIsMutable(str)) return _CFStringErrNotMutable; // These three ifs are always here, for NSString usage
4328 if (!replacement && __CFStringNoteErrors()) return _CFStringErrNilArg;
4329 // This attempts to catch bad ranges including those described in 3375535 (-1,1)
4330 unsigned long endOfRange = (unsigned long)(range.location) + (unsigned long)(range.length); // NSRange uses unsigned quantities, hence the casting
4331 if (((endOfRange > (unsigned long)__CFStrLength(str)) || (endOfRange < (unsigned long)(range.location))) && __CFStringNoteErrors()) return _CFStringErrBounds;
4332
4333 __CFAssertIsStringAndMutable(str);
4334 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
4335 __CFStringReplace(str, range, replacement);
4336 return _CFStringErrNone;
4337 }
4338
4339 // This function determines whether errors which would cause string exceptions should
4340 // be ignored or not
4341
4342 Boolean __CFStringNoteErrors(void) {
4343 return true;
4344 }
4345
4346
4347
4348 void CFStringPad(CFMutableStringRef string, CFStringRef padString, CFIndex length, CFIndex indexIntoPad) {
4349 CFIndex originalLength;
4350
4351 __CFAssertIsNotNegative(length);
4352 __CFAssertIsNotNegative(indexIntoPad);
4353
4354 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)string, _cfPad:padString length:(uint32_t)length padIndex:(uint32_t)indexIntoPad);
4355
4356 __CFAssertIsStringAndMutable(string);
4357
4358 originalLength = __CFStrLength(string);
4359 if (length < originalLength) {
4360 __CFStringChangeSize(string, CFRangeMake(length, originalLength - length), 0, false);
4361 } else if (originalLength < length) {
4362 uint8_t *contents;
4363 Boolean isUnicode;
4364 CFIndex charSize;
4365 CFIndex padStringLength;
4366 CFIndex padLength;
4367 CFIndex padRemaining = length - originalLength;
4368
4369 if (CF_IS_OBJC(__kCFStringTypeID, padString)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
4370 padStringLength = CFStringGetLength(padString);
4371 isUnicode = true; /* !!! Bad for now */
4372 } else {
4373 __CFAssertIsString(padString);
4374 padStringLength = __CFStrLength(padString);
4375 isUnicode = __CFStrIsUnicode(string) || __CFStrIsUnicode(padString);
4376 }
4377
4378 charSize = isUnicode ? sizeof(UniChar) : sizeof(uint8_t);
4379
4380 __CFStringChangeSize(string, CFRangeMake(originalLength, 0), padRemaining, isUnicode);
4381
4382 contents = (uint8_t *)__CFStrContents(string) + charSize * originalLength + __CFStrSkipAnyLengthByte(string);
4383 padLength = padStringLength - indexIntoPad;
4384 padLength = padRemaining < padLength ? padRemaining : padLength;
4385
4386 while (padRemaining > 0) {
4387 if (isUnicode) {
4388 CFStringGetCharacters(padString, CFRangeMake(indexIntoPad, padLength), (UniChar *)contents);
4389 } else {
4390 CFStringGetBytes(padString, CFRangeMake(indexIntoPad, padLength), __CFStringGetEightBitStringEncoding(), 0, false, contents, padRemaining * charSize, NULL);
4391 }
4392 contents += padLength * charSize;
4393 padRemaining -= padLength;
4394 indexIntoPad = 0;
4395 padLength = padRemaining < padLength ? padRemaining : padStringLength;
4396 }
4397 }
4398 }
4399
4400 void CFStringTrim(CFMutableStringRef string, CFStringRef trimString) {
4401 CFRange range;
4402 CFIndex newStartIndex;
4403 CFIndex length;
4404
4405 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)string, _cfTrim:trimString);
4406
4407 __CFAssertIsStringAndMutable(string);
4408 __CFAssertIsString(trimString);
4409
4410 newStartIndex = 0;
4411 length = __CFStrLength(string);
4412
4413 while (CFStringFindWithOptions(string, trimString, CFRangeMake(newStartIndex, length - newStartIndex), kCFCompareAnchored, &range)) {
4414 newStartIndex = range.location + range.length;
4415 }
4416
4417 if (newStartIndex < length) {
4418 CFIndex charSize = __CFStrIsUnicode(string) ? sizeof(UniChar) : sizeof(uint8_t);
4419 uint8_t *contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4420
4421 length -= newStartIndex;
4422 if (CFStringGetLength(trimString) < length) {
4423 while (CFStringFindWithOptions(string, trimString, CFRangeMake(newStartIndex, length), kCFCompareAnchored|kCFCompareBackwards, &range)) {
4424 length = range.location - newStartIndex;
4425 }
4426 }
4427 memmove(contents, contents + newStartIndex * charSize, length * charSize);
4428 __CFStringChangeSize(string, CFRangeMake(length, __CFStrLength(string) - length), 0, false);
4429 } else { // Only trimString in string, trim all
4430 __CFStringChangeSize(string, CFRangeMake(0, length), 0, false);
4431 }
4432 }
4433
4434 void CFStringTrimWhitespace(CFMutableStringRef string) {
4435 CFIndex newStartIndex;
4436 CFIndex length;
4437 CFStringInlineBuffer buffer;
4438
4439 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)string, _cfTrimWS);
4440
4441 __CFAssertIsStringAndMutable(string);
4442
4443 newStartIndex = 0;
4444 length = __CFStrLength(string);
4445
4446 CFStringInitInlineBuffer(string, &buffer, CFRangeMake(0, length));
4447 CFIndex buffer_idx = 0;
4448
4449 while (buffer_idx < length && CFUniCharIsMemberOf(__CFStringGetCharacterFromInlineBufferQuick(&buffer, buffer_idx), kCFUniCharWhitespaceAndNewlineCharacterSet))
4450 buffer_idx++;
4451 newStartIndex = buffer_idx;
4452
4453 if (newStartIndex < length) {
4454 uint8_t *contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4455 CFIndex charSize = (__CFStrIsUnicode(string) ? sizeof(UniChar) : sizeof(uint8_t));
4456
4457 buffer_idx = length - 1;
4458 while (0 <= buffer_idx && CFUniCharIsMemberOf(__CFStringGetCharacterFromInlineBufferQuick(&buffer, buffer_idx), kCFUniCharWhitespaceAndNewlineCharacterSet))
4459 buffer_idx--;
4460 length = buffer_idx - newStartIndex + 1;
4461
4462 memmove(contents, contents + newStartIndex * charSize, length * charSize);
4463 __CFStringChangeSize(string, CFRangeMake(length, __CFStrLength(string) - length), 0, false);
4464 } else { // Whitespace only string
4465 __CFStringChangeSize(string, CFRangeMake(0, length), 0, false);
4466 }
4467 }
4468
4469 void CFStringLowercase(CFMutableStringRef string, CFLocaleRef locale) {
4470 CFIndex currentIndex = 0;
4471 CFIndex length;
4472 const uint8_t *langCode;
4473 Boolean isEightBit = __CFStrIsEightBit(string);
4474
4475 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)string, _cfLowercase:(const void *)locale);
4476
4477 __CFAssertIsStringAndMutable(string);
4478
4479 length = __CFStrLength(string);
4480
4481 langCode = (const uint8_t *)(_CFCanUseLocale(locale) ? _CFStrGetLanguageIdentifierForLocale(locale, false) : NULL);
4482
4483 if (!langCode && isEightBit) {
4484 uint8_t *contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4485 for (;currentIndex < length;currentIndex++) {
4486 if (contents[currentIndex] >= 'A' && contents[currentIndex] <= 'Z') {
4487 contents[currentIndex] += 'a' - 'A';
4488 } else if (contents[currentIndex] > 127) {
4489 break;
4490 }
4491 }
4492 }
4493
4494 if (currentIndex < length) {
4495 UTF16Char *contents;
4496 UniChar mappedCharacters[MAX_CASE_MAPPING_BUF];
4497 CFIndex mappedLength;
4498 UTF32Char currentChar;
4499 UInt32 flags = 0;
4500
4501 if (isEightBit) __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true);
4502
4503 contents = (UniChar *)__CFStrContents(string);
4504
4505 for (;currentIndex < length;currentIndex++) {
4506
4507 if (CFUniCharIsSurrogateHighCharacter(contents[currentIndex]) && (currentIndex + 1 < length) && CFUniCharIsSurrogateLowCharacter(contents[currentIndex + 1])) {
4508 currentChar = CFUniCharGetLongCharacterForSurrogatePair(contents[currentIndex], contents[currentIndex + 1]);
4509 } else {
4510 currentChar = contents[currentIndex];
4511 }
4512 flags = ((langCode || (currentChar == 0x03A3)) ? CFUniCharGetConditionalCaseMappingFlags(currentChar, contents, currentIndex, length, kCFUniCharToLowercase, langCode, flags) : 0);
4513
4514 mappedLength = CFUniCharMapCaseTo(currentChar, mappedCharacters, MAX_CASE_MAPPING_BUF, kCFUniCharToLowercase, flags, langCode);
4515 if (mappedLength > 0) contents[currentIndex] = *mappedCharacters;
4516
4517 if (currentChar > 0xFFFF) { // Non-BMP char
4518 switch (mappedLength) {
4519 case 0:
4520 __CFStringChangeSize(string, CFRangeMake(currentIndex, 2), 0, true);
4521 contents = (UniChar *)__CFStrContents(string);
4522 length -= 2;
4523 break;
4524
4525 case 1:
4526 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 1), 0, true);
4527 contents = (UniChar *)__CFStrContents(string);
4528 --length;
4529 break;
4530
4531 case 2:
4532 contents[++currentIndex] = mappedCharacters[1];
4533 break;
4534
4535 default:
4536 --mappedLength; // Skip the current char
4537 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength - 1, true);
4538 contents = (UniChar *)__CFStrContents(string);
4539 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4540 length += (mappedLength - 1);
4541 currentIndex += mappedLength;
4542 break;
4543 }
4544 } else if (mappedLength == 0) {
4545 __CFStringChangeSize(string, CFRangeMake(currentIndex, 1), 0, true);
4546 contents = (UniChar *)__CFStrContents(string);
4547 --length;
4548 } else if (mappedLength > 1) {
4549 --mappedLength; // Skip the current char
4550 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength, true);
4551 contents = (UniChar *)__CFStrContents(string);
4552 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4553 length += mappedLength;
4554 currentIndex += mappedLength;
4555 }
4556 }
4557 }
4558 }
4559
4560 void CFStringUppercase(CFMutableStringRef string, CFLocaleRef locale) {
4561 CFIndex currentIndex = 0;
4562 CFIndex length;
4563 const uint8_t *langCode;
4564 Boolean isEightBit = __CFStrIsEightBit(string);
4565
4566 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)string, _cfUppercase:(const void *)locale);
4567
4568 __CFAssertIsStringAndMutable(string);
4569
4570 length = __CFStrLength(string);
4571
4572 langCode = (const uint8_t *)(_CFCanUseLocale(locale) ? _CFStrGetLanguageIdentifierForLocale(locale, false) : NULL);
4573
4574 if (!langCode && isEightBit) {
4575 uint8_t *contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4576 for (;currentIndex < length;currentIndex++) {
4577 if (contents[currentIndex] >= 'a' && contents[currentIndex] <= 'z') {
4578 contents[currentIndex] -= 'a' - 'A';
4579 } else if (contents[currentIndex] > 127) {
4580 break;
4581 }
4582 }
4583 }
4584
4585 if (currentIndex < length) {
4586 UniChar *contents;
4587 UniChar mappedCharacters[MAX_CASE_MAPPING_BUF];
4588 CFIndex mappedLength;
4589 UTF32Char currentChar;
4590 UInt32 flags = 0;
4591
4592 if (isEightBit) __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true);
4593
4594 contents = (UniChar *)__CFStrContents(string);
4595
4596 for (;currentIndex < length;currentIndex++) {
4597 if (CFUniCharIsSurrogateHighCharacter(contents[currentIndex]) && (currentIndex + 1 < length) && CFUniCharIsSurrogateLowCharacter(contents[currentIndex + 1])) {
4598 currentChar = CFUniCharGetLongCharacterForSurrogatePair(contents[currentIndex], contents[currentIndex + 1]);
4599 } else {
4600 currentChar = contents[currentIndex];
4601 }
4602
4603 flags = (langCode ? CFUniCharGetConditionalCaseMappingFlags(currentChar, contents, currentIndex, length, kCFUniCharToUppercase, langCode, flags) : 0);
4604
4605 mappedLength = CFUniCharMapCaseTo(currentChar, mappedCharacters, MAX_CASE_MAPPING_BUF, kCFUniCharToUppercase, flags, langCode);
4606 if (mappedLength > 0) contents[currentIndex] = *mappedCharacters;
4607
4608 if (currentChar > 0xFFFF) { // Non-BMP char
4609 switch (mappedLength) {
4610 case 0:
4611 __CFStringChangeSize(string, CFRangeMake(currentIndex, 2), 0, true);
4612 contents = (UniChar *)__CFStrContents(string);
4613 length -= 2;
4614 break;
4615
4616 case 1:
4617 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 1), 0, true);
4618 contents = (UniChar *)__CFStrContents(string);
4619 --length;
4620 break;
4621
4622 case 2:
4623 contents[++currentIndex] = mappedCharacters[1];
4624 break;
4625
4626 default:
4627 --mappedLength; // Skip the current char
4628 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength - 1, true);
4629 contents = (UniChar *)__CFStrContents(string);
4630 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4631 length += (mappedLength - 1);
4632 currentIndex += mappedLength;
4633 break;
4634 }
4635 } else if (mappedLength == 0) {
4636 __CFStringChangeSize(string, CFRangeMake(currentIndex, 1), 0, true);
4637 contents = (UniChar *)__CFStrContents(string);
4638 --length;
4639 } else if (mappedLength > 1) {
4640 --mappedLength; // Skip the current char
4641 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength, true);
4642 contents = (UniChar *)__CFStrContents(string);
4643 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4644 length += mappedLength;
4645 currentIndex += mappedLength;
4646 }
4647 }
4648 }
4649 }
4650
4651
4652 void CFStringCapitalize(CFMutableStringRef string, CFLocaleRef locale) {
4653 CFIndex currentIndex = 0;
4654 CFIndex length;
4655 const uint8_t *langCode;
4656 Boolean isEightBit = __CFStrIsEightBit(string);
4657 Boolean isLastCased = false;
4658 const uint8_t *caseIgnorableForBMP;
4659
4660 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)string, _cfCapitalize:(const void *)locale);
4661
4662 __CFAssertIsStringAndMutable(string);
4663
4664 length = __CFStrLength(string);
4665
4666 caseIgnorableForBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharCaseIgnorableCharacterSet, 0);
4667
4668 langCode = (const uint8_t *)(_CFCanUseLocale(locale) ? _CFStrGetLanguageIdentifierForLocale(locale, false) : NULL);
4669
4670 if (!langCode && isEightBit) {
4671 uint8_t *contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4672 for (;currentIndex < length;currentIndex++) {
4673 if (contents[currentIndex] > 127) {
4674 break;
4675 } else if (contents[currentIndex] >= 'A' && contents[currentIndex] <= 'Z') {
4676 contents[currentIndex] += (isLastCased ? 'a' - 'A' : 0);
4677 isLastCased = true;
4678 } else if (contents[currentIndex] >= 'a' && contents[currentIndex] <= 'z') {
4679 contents[currentIndex] -= (!isLastCased ? 'a' - 'A' : 0);
4680 isLastCased = true;
4681 } else if (!CFUniCharIsMemberOfBitmap(contents[currentIndex], caseIgnorableForBMP)) {
4682 isLastCased = false;
4683 }
4684 }
4685 }
4686
4687 if (currentIndex < length) {
4688 UniChar *contents;
4689 UniChar mappedCharacters[MAX_CASE_MAPPING_BUF];
4690 CFIndex mappedLength;
4691 UTF32Char currentChar;
4692 UInt32 flags = 0;
4693
4694 if (isEightBit) __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true);
4695
4696 contents = (UniChar *)__CFStrContents(string);
4697
4698 for (;currentIndex < length;currentIndex++) {
4699 if (CFUniCharIsSurrogateHighCharacter(contents[currentIndex]) && (currentIndex + 1 < length) && CFUniCharIsSurrogateLowCharacter(contents[currentIndex + 1])) {
4700 currentChar = CFUniCharGetLongCharacterForSurrogatePair(contents[currentIndex], contents[currentIndex + 1]);
4701 } else {
4702 currentChar = contents[currentIndex];
4703 }
4704 flags = ((langCode || ((currentChar == 0x03A3) && isLastCased)) ? CFUniCharGetConditionalCaseMappingFlags(currentChar, contents, currentIndex, length, (isLastCased ? kCFUniCharToLowercase : kCFUniCharToTitlecase), langCode, flags) : 0);
4705
4706 mappedLength = CFUniCharMapCaseTo(currentChar, mappedCharacters, MAX_CASE_MAPPING_BUF, (isLastCased ? kCFUniCharToLowercase : kCFUniCharToTitlecase), flags, langCode);
4707 if (mappedLength > 0) contents[currentIndex] = *mappedCharacters;
4708
4709 if (currentChar > 0xFFFF) { // Non-BMP char
4710 switch (mappedLength) {
4711 case 0:
4712 __CFStringChangeSize(string, CFRangeMake(currentIndex, 2), 0, true);
4713 contents = (UniChar *)__CFStrContents(string);
4714 length -= 2;
4715 break;
4716
4717 case 1:
4718 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 1), 0, true);
4719 contents = (UniChar *)__CFStrContents(string);
4720 --length;
4721 break;
4722
4723 case 2:
4724 contents[++currentIndex] = mappedCharacters[1];
4725 break;
4726
4727 default:
4728 --mappedLength; // Skip the current char
4729 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength - 1, true);
4730 contents = (UniChar *)__CFStrContents(string);
4731 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4732 length += (mappedLength - 1);
4733 currentIndex += mappedLength;
4734 break;
4735 }
4736 } else if (mappedLength == 0) {
4737 __CFStringChangeSize(string, CFRangeMake(currentIndex, 1), 0, true);
4738 contents = (UniChar *)__CFStrContents(string);
4739 --length;
4740 } else if (mappedLength > 1) {
4741 --mappedLength; // Skip the current char
4742 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength, true);
4743 contents = (UniChar *)__CFStrContents(string);
4744 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4745 length += mappedLength;
4746 currentIndex += mappedLength;
4747 }
4748
4749 if (!((currentChar > 0xFFFF) ? CFUniCharIsMemberOf(currentChar, kCFUniCharCaseIgnorableCharacterSet) : CFUniCharIsMemberOfBitmap(currentChar, caseIgnorableForBMP))) { // We have non-caseignorable here
4750 isLastCased = ((CFUniCharIsMemberOf(currentChar, kCFUniCharUppercaseLetterCharacterSet) || CFUniCharIsMemberOf(currentChar, kCFUniCharLowercaseLetterCharacterSet)) ? true : false);
4751 }
4752 }
4753 }
4754 }
4755
4756
4757 #define MAX_DECOMP_BUF 64
4758
4759 #define HANGUL_SBASE 0xAC00
4760 #define HANGUL_LBASE 0x1100
4761 #define HANGUL_VBASE 0x1161
4762 #define HANGUL_TBASE 0x11A7
4763 #define HANGUL_SCOUNT 11172
4764 #define HANGUL_LCOUNT 19
4765 #define HANGUL_VCOUNT 21
4766 #define HANGUL_TCOUNT 28
4767 #define HANGUL_NCOUNT (HANGUL_VCOUNT * HANGUL_TCOUNT)
4768
4769 CF_INLINE uint32_t __CFGetUTF16Length(const UTF32Char *characters, uint32_t utf32Length) {
4770 const UTF32Char *limit = characters + utf32Length;
4771 uint32_t length = 0;
4772
4773 while (characters < limit) length += (*(characters++) > 0xFFFF ? 2 : 1);
4774
4775 return length;
4776 }
4777
4778 CF_INLINE void __CFFillInUTF16(const UTF32Char *characters, UTF16Char *dst, uint32_t utf32Length) {
4779 const UTF32Char *limit = characters + utf32Length;
4780 UTF32Char currentChar;
4781
4782 while (characters < limit) {
4783 currentChar = *(characters++);
4784 if (currentChar > 0xFFFF) {
4785 currentChar -= 0x10000;
4786 *(dst++) = (UTF16Char)((currentChar >> 10) + 0xD800UL);
4787 *(dst++) = (UTF16Char)((currentChar & 0x3FF) + 0xDC00UL);
4788 } else {
4789 *(dst++) = currentChar;
4790 }
4791 }
4792 }
4793
4794 void CFStringNormalize(CFMutableStringRef string, CFStringNormalizationForm theForm) {
4795 CFIndex currentIndex = 0;
4796 CFIndex length;
4797 bool needToReorder = true;
4798
4799 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)string, _cfNormalize:theForm);
4800
4801 __CFAssertIsStringAndMutable(string);
4802
4803 length = __CFStrLength(string);
4804
4805 if (__CFStrIsEightBit(string)) {
4806 uint8_t *contents;
4807
4808 if (theForm == kCFStringNormalizationFormC) return; // 8bit form has no decomposition
4809
4810 contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4811
4812 for (;currentIndex < length;currentIndex++) {
4813 if (contents[currentIndex] > 127) {
4814 __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true); // need to do harm way
4815 needToReorder = false;
4816 break;
4817 }
4818 }
4819 }
4820
4821 if (currentIndex < length) {
4822 UTF16Char *limit = (UTF16Char *)__CFStrContents(string) + length;
4823 UTF16Char *contents = (UTF16Char *)__CFStrContents(string) + currentIndex;
4824 UTF32Char buffer[MAX_DECOMP_BUF];
4825 UTF32Char *mappedCharacters = buffer;
4826 CFIndex allocatedLength = MAX_DECOMP_BUF;
4827 CFIndex mappedLength;
4828 CFIndex currentLength;
4829 UTF32Char currentChar;
4830 const uint8_t *decompBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, 0);
4831 const uint8_t *nonBaseBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, 0);
4832 const uint8_t *combiningBMP = (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0);
4833
4834 while (contents < limit) {
4835 if (CFUniCharIsSurrogateHighCharacter(*contents) && (contents + 1 < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) {
4836 currentChar = CFUniCharGetLongCharacterForSurrogatePair(*contents, *(contents + 1));
4837 currentLength = 2;
4838 contents += 2;
4839 } else {
4840 currentChar = *(contents++);
4841 currentLength = 1;
4842 }
4843
4844 mappedLength = 0;
4845
4846 if (CFUniCharIsMemberOfBitmap(currentChar, ((currentChar < 0x10000) ? decompBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (currentChar >> 16)))) && (0 == CFUniCharGetCombiningPropertyForCharacter(currentChar, ((currentChar < 0x10000) ? combiningBMP : (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16)))))) {
4847 if ((theForm & kCFStringNormalizationFormC) == 0 || currentChar < HANGUL_SBASE || currentChar > (HANGUL_SBASE + HANGUL_SCOUNT)) { // We don't have to decompose Hangul Syllables if we're precomposing again
4848 mappedLength = CFUniCharDecomposeCharacter(currentChar, mappedCharacters, MAX_DECOMP_BUF);
4849 }
4850 }
4851
4852 if ((needToReorder || (theForm & kCFStringNormalizationFormC)) && ((contents < limit) || (mappedLength == 0))) {
4853 if (mappedLength > 0) {
4854 if (CFUniCharIsSurrogateHighCharacter(*contents) && (contents + 1 < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) {
4855 currentChar = CFUniCharGetLongCharacterForSurrogatePair(*contents, *(contents + 1));
4856 } else {
4857 currentChar = *contents;
4858 }
4859 }
4860
4861 if (0 != CFUniCharGetCombiningPropertyForCharacter(currentChar, (const uint8_t *)((currentChar < 0x10000) ? combiningBMP : CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16))))) {
4862 uint32_t decompLength;
4863
4864 if (mappedLength == 0) {
4865 contents -= (currentChar & 0xFFFF0000 ? 2 : 1);
4866 if (currentIndex > 0) {
4867 if (CFUniCharIsSurrogateLowCharacter(*(contents - 1)) && (currentIndex > 1) && CFUniCharIsSurrogateHighCharacter(*(contents - 2))) {
4868 *mappedCharacters = CFUniCharGetLongCharacterForSurrogatePair(*(contents - 2), *(contents - 1));
4869 currentIndex -= 2;
4870 currentLength += 2;
4871 } else {
4872 *mappedCharacters = *(contents - 1);
4873 --currentIndex;
4874 ++currentLength;
4875 }
4876 mappedLength = 1;
4877 }
4878 } else {
4879 currentLength += (currentChar & 0xFFFF0000 ? 2 : 1);
4880 }
4881 contents += (currentChar & 0xFFFF0000 ? 2 : 1);
4882
4883 if (CFUniCharIsMemberOfBitmap(currentChar, ((currentChar < 0x10000) ? decompBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (currentChar >> 16))))) { // Vietnamese accent, etc.
4884 decompLength = CFUniCharDecomposeCharacter(currentChar, mappedCharacters + mappedLength, MAX_DECOMP_BUF - mappedLength);
4885 mappedLength += decompLength;
4886 } else {
4887 mappedCharacters[mappedLength++] = currentChar;
4888 }
4889
4890 while (contents < limit) {
4891 if (CFUniCharIsSurrogateHighCharacter(*contents) && (contents + 1 < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) {
4892 currentChar = CFUniCharGetLongCharacterForSurrogatePair(*contents, *(contents + 1));
4893 } else {
4894 currentChar = *contents;
4895 }
4896 if (0 == CFUniCharGetCombiningPropertyForCharacter(currentChar, (const uint8_t *)((currentChar < 0x10000) ? combiningBMP : CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16))))) break;
4897 if (currentChar & 0xFFFF0000) {
4898 contents += 2;
4899 currentLength += 2;
4900 } else {
4901 ++contents;
4902 ++currentLength;
4903 }
4904 if (mappedLength == allocatedLength) {
4905 allocatedLength += MAX_DECOMP_BUF;
4906 if (mappedCharacters == buffer) {
4907 mappedCharacters = (UTF32Char *)CFAllocatorAllocate(kCFAllocatorSystemDefault, allocatedLength * sizeof(UTF32Char), 0);
4908 memmove(mappedCharacters, buffer, MAX_DECOMP_BUF * sizeof(UTF32Char));
4909 } else {
4910 mappedCharacters = (UTF32Char *)CFAllocatorReallocate(kCFAllocatorSystemDefault, mappedCharacters, allocatedLength * sizeof(UTF32Char), 0);
4911 }
4912 }
4913 if (CFUniCharIsMemberOfBitmap(currentChar, ((currentChar < 0x10000) ? decompBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (currentChar >> 16))))) { // Vietnamese accent, etc.
4914 decompLength = CFUniCharDecomposeCharacter(currentChar, mappedCharacters + mappedLength, MAX_DECOMP_BUF - mappedLength);
4915 mappedLength += decompLength;
4916 } else {
4917 mappedCharacters[mappedLength++] = currentChar;
4918 }
4919 }
4920 }
4921 if (needToReorder && mappedLength > 1) CFUniCharPrioritySort(mappedCharacters, mappedLength);
4922 }
4923
4924 if (theForm & kCFStringNormalizationFormKD) {
4925 CFIndex newLength = 0;
4926
4927 if (mappedLength == 0 && CFUniCharIsMemberOf(currentChar, kCFUniCharCompatibilityDecomposableCharacterSet)) {
4928 mappedCharacters[mappedLength++] = currentChar;
4929 }
4930 while (newLength < mappedLength) {
4931 newLength = CFUniCharCompatibilityDecompose(mappedCharacters, mappedLength, allocatedLength);
4932 if (newLength == 0) {
4933 allocatedLength += MAX_DECOMP_BUF;
4934 if (mappedCharacters == buffer) {
4935 mappedCharacters = (UTF32Char *)CFAllocatorAllocate(kCFAllocatorSystemDefault, allocatedLength * sizeof(UTF32Char), 0);
4936 memmove(mappedCharacters, buffer, MAX_DECOMP_BUF * sizeof(UTF32Char));
4937 } else {
4938 mappedCharacters = (UTF32Char *)CFAllocatorReallocate(kCFAllocatorSystemDefault, mappedCharacters, allocatedLength * sizeof(UTF32Char), 0);
4939 }
4940 }
4941 }
4942 mappedLength = newLength;
4943 }
4944
4945 if (theForm & kCFStringNormalizationFormC) {
4946 UTF32Char nextChar;
4947
4948 if (mappedLength > 1) {
4949 CFIndex consumedLength = 1;
4950 UTF32Char *currentBase = mappedCharacters;
4951 uint8_t currentClass, lastClass = 0;
4952 bool didCombine = false;
4953
4954 currentChar = *mappedCharacters;
4955
4956 while (consumedLength < mappedLength) {
4957 nextChar = mappedCharacters[consumedLength];
4958 currentClass = CFUniCharGetCombiningPropertyForCharacter(nextChar, (const uint8_t *)((nextChar < 0x10000) ? combiningBMP : CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (nextChar >> 16))));
4959
4960 if (theForm & kCFStringNormalizationFormKD) {
4961 if ((currentChar >= HANGUL_LBASE) && (currentChar < (HANGUL_LBASE + 0xFF))) {
4962 SInt8 lIndex = currentChar - HANGUL_LBASE;
4963
4964 if ((0 <= lIndex) && (lIndex <= HANGUL_LCOUNT)) {
4965 SInt16 vIndex = nextChar - HANGUL_VBASE;
4966
4967 if ((vIndex >= 0) && (vIndex <= HANGUL_VCOUNT)) {
4968 SInt16 tIndex = 0;
4969 CFIndex usedLength = mappedLength;
4970
4971 mappedCharacters[consumedLength++] = 0xFFFD;
4972
4973 if (consumedLength < mappedLength) {
4974 tIndex = mappedCharacters[consumedLength] - HANGUL_TBASE;
4975 if ((tIndex < 0) || (tIndex > HANGUL_TCOUNT)) {
4976 tIndex = 0;
4977 } else {
4978 mappedCharacters[consumedLength++] = 0xFFFD;
4979 }
4980 }
4981 *currentBase = (lIndex * HANGUL_VCOUNT + vIndex) * HANGUL_TCOUNT + tIndex + HANGUL_SBASE;
4982
4983 while (--usedLength > 0) {
4984 if (mappedCharacters[usedLength] == 0xFFFD) {
4985 --mappedLength;
4986 --consumedLength;
4987 memmove(mappedCharacters + usedLength, mappedCharacters + usedLength + 1, (mappedLength - usedLength) * sizeof(UTF32Char));
4988 }
4989 }
4990 currentBase = mappedCharacters + consumedLength;
4991 currentChar = *currentBase;
4992 ++consumedLength;
4993
4994 continue;
4995 }
4996 }
4997 }
4998 if (!CFUniCharIsMemberOfBitmap(nextChar, ((nextChar < 0x10000) ? nonBaseBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (nextChar >> 16))))) {
4999 *currentBase = currentChar;
5000 currentBase = mappedCharacters + consumedLength;
5001 currentChar = nextChar;
5002 ++consumedLength;
5003 continue;
5004 }
5005 }
5006
5007 if ((lastClass == 0) || (currentClass > lastClass)) {
5008 nextChar = CFUniCharPrecomposeCharacter(currentChar, nextChar);
5009 if (nextChar == 0xFFFD) {
5010 lastClass = currentClass;
5011 } else {
5012 mappedCharacters[consumedLength] = 0xFFFD;
5013 didCombine = true;
5014 currentChar = nextChar;
5015 }
5016 }
5017 ++consumedLength;
5018 }
5019
5020 *currentBase = currentChar;
5021 if (didCombine) {
5022 consumedLength = mappedLength;
5023 while (--consumedLength > 0) {
5024 if (mappedCharacters[consumedLength] == 0xFFFD) {
5025 --mappedLength;
5026 memmove(mappedCharacters + consumedLength, mappedCharacters + consumedLength + 1, (mappedLength - consumedLength) * sizeof(UTF32Char));
5027 }
5028 }
5029 }
5030 } else if ((currentChar >= HANGUL_LBASE) && (currentChar < (HANGUL_LBASE + 0xFF))) { // Hangul Jamo
5031 SInt8 lIndex = currentChar - HANGUL_LBASE;
5032
5033 if ((contents < limit) && (0 <= lIndex) && (lIndex <= HANGUL_LCOUNT)) {
5034 SInt16 vIndex = *contents - HANGUL_VBASE;
5035
5036 if ((vIndex >= 0) && (vIndex <= HANGUL_VCOUNT)) {
5037 SInt16 tIndex = 0;
5038
5039 ++contents; ++currentLength;
5040
5041 if (contents < limit) {
5042 tIndex = *contents - HANGUL_TBASE;
5043 if ((tIndex < 0) || (tIndex > HANGUL_TCOUNT)) {
5044 tIndex = 0;
5045 } else {
5046 ++contents; ++currentLength;
5047 }
5048 }
5049 *mappedCharacters = (lIndex * HANGUL_VCOUNT + vIndex) * HANGUL_TCOUNT + tIndex + HANGUL_SBASE;
5050 mappedLength = 1;
5051 }
5052 }
5053 } else { // collect class 0 non-base characters
5054 while (contents < limit) {
5055 nextChar = *contents;
5056 if (CFUniCharIsSurrogateHighCharacter(nextChar) && ((contents + 1) < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) {
5057 nextChar = CFUniCharGetLongCharacterForSurrogatePair(nextChar, *(contents + 1));
5058 if (!CFUniCharIsMemberOfBitmap(nextChar, (const uint8_t *)CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (nextChar >> 16))) || (0 != CFUniCharGetCombiningPropertyForCharacter(nextChar, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (nextChar >> 16))))) break;
5059 } else {
5060 if (!CFUniCharIsMemberOfBitmap(nextChar, nonBaseBMP) || (0 != CFUniCharGetCombiningPropertyForCharacter(nextChar, combiningBMP))) break;
5061 }
5062 currentChar = CFUniCharPrecomposeCharacter(currentChar, nextChar);
5063 if (0xFFFD == currentChar) break;
5064
5065 if (nextChar < 0x10000) {
5066 ++contents; ++currentLength;
5067 } else {
5068 contents += 2;
5069 currentLength += 2;
5070 }
5071
5072 *mappedCharacters = currentChar;
5073 mappedLength = 1;
5074 }
5075 }
5076 }
5077
5078 if (mappedLength > 0) {
5079 CFIndex utf16Length = __CFGetUTF16Length(mappedCharacters, mappedLength);
5080
5081 if (utf16Length != currentLength) {
5082 __CFStringChangeSize(string, CFRangeMake(currentIndex, currentLength), utf16Length, true);
5083 currentLength = utf16Length;
5084 }
5085 contents = (UTF16Char *)__CFStrContents(string);
5086 limit = contents + __CFStrLength(string);
5087 contents += currentIndex;
5088 __CFFillInUTF16(mappedCharacters, contents, mappedLength);
5089 contents += utf16Length;
5090 }
5091 currentIndex += currentLength;
5092 }
5093
5094 if (mappedCharacters != buffer) CFAllocatorDeallocate(kCFAllocatorSystemDefault, mappedCharacters);
5095 }
5096 }
5097
5098 void CFStringFold(CFMutableStringRef theString, CFStringCompareFlags theFlags, CFLocaleRef locale) {
5099 CFStringInlineBuffer stringBuffer;
5100 CFIndex length = CFStringGetLength(theString);
5101 CFIndex currentIndex = 0;
5102 CFIndex bufferLength = 0;
5103 UTF32Char buffer[kCFStringStackBufferLength];
5104 const uint8_t *cString;
5105 const uint8_t *langCode;
5106 CFStringEncoding eightBitEncoding;
5107 bool caseInsensitive = ((theFlags & kCFCompareCaseInsensitive) ? true : false);
5108 bool isObjc = CF_IS_OBJC(__kCFStringTypeID, theString);
5109 CFLocaleRef theLocale = locale;
5110
5111 if ((theFlags & kCFCompareLocalized) && (NULL == locale)) {
5112 theLocale = CFLocaleCopyCurrent();
5113 }
5114
5115 theFlags &= (kCFCompareCaseInsensitive|kCFCompareDiacriticInsensitive|kCFCompareWidthInsensitive);
5116
5117 if ((0 == theFlags) || (0 == length)) goto bail; // nothing to do
5118
5119 langCode = ((NULL == theLocale) ? NULL : (const uint8_t *)_CFStrGetLanguageIdentifierForLocale(theLocale, true));
5120
5121 eightBitEncoding = __CFStringGetEightBitStringEncoding();
5122 cString = (const uint8_t *)CFStringGetCStringPtr(theString, eightBitEncoding);
5123
5124 if ((NULL != cString) && !caseInsensitive && (kCFStringEncodingASCII == eightBitEncoding)) goto bail; // All ASCII
5125
5126 CFStringInitInlineBuffer(theString, &stringBuffer, CFRangeMake(0, length));
5127
5128 if ((NULL != cString) && (theFlags & (kCFCompareCaseInsensitive|kCFCompareDiacriticInsensitive))) {
5129 const uint8_t *cStringPtr = cString;
5130 const uint8_t *cStringLimit = cString + length;
5131 uint8_t *cStringContents = (isObjc ? NULL : (uint8_t *)__CFStrContents(theString) + __CFStrSkipAnyLengthByte(theString));
5132
5133 while (cStringPtr < cStringLimit) {
5134 if ((*cStringPtr < 0x80) && (NULL == langCode)) {
5135 if (caseInsensitive && (*cStringPtr >= 'A') && (*cStringPtr <= 'Z')) {
5136 if (NULL == cStringContents) {
5137 break;
5138 } else {
5139 cStringContents[cStringPtr - cString] += ('a' - 'A');
5140 }
5141 }
5142 } else {
5143 if ((bufferLength = __CFStringFoldCharacterClusterAtIndex((UTF32Char)__CFCharToUniCharTable[*cStringPtr], &stringBuffer, cStringPtr - cString, theFlags, langCode, buffer, kCFStringStackBufferLength, NULL)) > 0) {
5144 if ((*buffer > 0x7F) || (bufferLength > 1) || (NULL == cStringContents)) break;
5145 cStringContents[cStringPtr - cString] = *buffer;
5146 }
5147 }
5148 ++cStringPtr;
5149 }
5150
5151 currentIndex = cStringPtr - cString;
5152 }
5153
5154 if (currentIndex < length) {
5155 UTF16Char *contents;
5156
5157 if (isObjc) {
5158 CFMutableStringRef cfString;
5159 CFRange range = CFRangeMake(currentIndex, length - currentIndex);
5160
5161 contents = (UTF16Char *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(UTF16Char) * range.length, 0);
5162
5163 CFStringGetCharacters(theString, range, contents);
5164
5165 cfString = CFStringCreateMutableWithExternalCharactersNoCopy(kCFAllocatorSystemDefault, contents, range.length, range.length, NULL);
5166
5167 CFStringFold(cfString, theFlags, theLocale);
5168
5169 CFStringReplace(theString, range, cfString);
5170
5171 CFRelease(cfString);
5172 } else {
5173 const UTF32Char *characters;
5174 const UTF32Char *charactersLimit;
5175 UTF32Char character;
5176 CFIndex consumedLength;
5177
5178 contents = NULL;
5179
5180 if (bufferLength > 0) {
5181 __CFStringChangeSize(theString, CFRangeMake(currentIndex + 1, 0), bufferLength - 1, true);
5182 length = __CFStrLength(theString);
5183 CFStringInitInlineBuffer(theString, &stringBuffer, CFRangeMake(0, length));
5184
5185 contents = (UTF16Char *)__CFStrContents(theString) + currentIndex;
5186 characters = buffer;
5187 charactersLimit = characters + bufferLength;
5188 while (characters < charactersLimit) *(contents++) = (UTF16Char)*(characters++);
5189 ++currentIndex;
5190 }
5191
5192 while (currentIndex < length) {
5193 character = __CFStringGetCharacterFromInlineBufferQuick(&stringBuffer, currentIndex);
5194
5195 consumedLength = 0;
5196
5197 if ((NULL == langCode) && (character < 0x80) && (0 == (theFlags & kCFCompareDiacriticInsensitive))) {
5198 if (caseInsensitive && (character >= 'A') && (character <= 'Z')) {
5199 consumedLength = 1;
5200 bufferLength = 1;
5201 *buffer = character + ('a' - 'A');
5202 }
5203 } else {
5204 if (CFUniCharIsSurrogateHighCharacter(character) && ((currentIndex + 1) < length)) {
5205 UTF16Char lowSurrogate = __CFStringGetCharacterFromInlineBufferQuick(&stringBuffer, currentIndex + 1);
5206 if (CFUniCharIsSurrogateLowCharacter(lowSurrogate)) character = CFUniCharGetLongCharacterForSurrogatePair(character, lowSurrogate);
5207 }
5208
5209 bufferLength = __CFStringFoldCharacterClusterAtIndex(character, &stringBuffer, currentIndex, theFlags, langCode, buffer, kCFStringStackBufferLength, &consumedLength);
5210 }
5211
5212 if (consumedLength > 0) {
5213 CFIndex utf16Length = bufferLength;
5214
5215 characters = buffer;
5216 charactersLimit = characters + bufferLength;
5217
5218 while (characters < charactersLimit) if (*(characters++) > 0xFFFF) ++utf16Length; // Extend bufferLength to the UTF-16 length
5219
5220 if ((utf16Length != consumedLength) || __CFStrIsEightBit(theString)) {
5221 CFRange range;
5222 CFIndex insertLength;
5223
5224 if (consumedLength < utf16Length) { // Need to expand
5225 range = CFRangeMake(currentIndex + consumedLength, 0);
5226 insertLength = utf16Length - consumedLength;
5227 } else {
5228 range = CFRangeMake(currentIndex + utf16Length, consumedLength - utf16Length);
5229 insertLength = 0;
5230 }
5231 __CFStringChangeSize(theString, range, insertLength, true);
5232 length = __CFStrLength(theString);
5233 CFStringInitInlineBuffer(theString, &stringBuffer, CFRangeMake(0, length));
5234 }
5235
5236 (void)CFUniCharFromUTF32(buffer, bufferLength, (UTF16Char *)__CFStrContents(theString) + currentIndex, true, __CF_BIG_ENDIAN__);
5237
5238 currentIndex += utf16Length;
5239 } else {
5240 ++currentIndex;
5241 }
5242 }
5243 }
5244 }
5245
5246 bail:
5247 if (NULL == locale && theLocale) {
5248 CFRelease(theLocale);
5249 }
5250 }
5251
5252 enum {
5253 kCFStringFormatZeroFlag = (1 << 0), // if not, padding is space char
5254 kCFStringFormatMinusFlag = (1 << 1), // if not, no flag implied
5255 kCFStringFormatPlusFlag = (1 << 2), // if not, no flag implied, overrides space
5256 kCFStringFormatSpaceFlag = (1 << 3), // if not, no flag implied
5257 kCFStringFormatExternalSpecFlag = (1 << 4), // using config dict
5258 kCFStringFormatLocalizable = (1 << 5) // explicitly mark the specs we can localize
5259 };
5260
5261 typedef struct {
5262 int16_t size;
5263 int16_t type;
5264 SInt32 loc;
5265 SInt32 len;
5266 SInt32 widthArg;
5267 SInt32 precArg;
5268 uint32_t flags;
5269 int8_t mainArgNum;
5270 int8_t precArgNum;
5271 int8_t widthArgNum;
5272 int8_t configDictIndex;
5273 int8_t numericFormatStyle; // Only set for localizable numeric quantities
5274 } CFFormatSpec;
5275
5276 typedef struct {
5277 int16_t type;
5278 int16_t size;
5279 union {
5280 int64_t int64Value;
5281 double doubleValue;
5282 #if LONG_DOUBLE_SUPPORT
5283 long double longDoubleValue;
5284 #endif
5285 void *pointerValue;
5286 } value;
5287 } CFPrintValue;
5288
5289 enum {
5290 CFFormatDefaultSize = 0,
5291 CFFormatSize1 = 1,
5292 CFFormatSize2 = 2,
5293 CFFormatSize4 = 3,
5294 CFFormatSize8 = 4,
5295 CFFormatSize16 = 5,
5296 #if __LP64__
5297 CFFormatSizeLong = CFFormatSize8,
5298 CFFormatSizePointer = CFFormatSize8
5299 #else
5300 CFFormatSizeLong = CFFormatSize4,
5301 CFFormatSizePointer = CFFormatSize4
5302 #endif
5303 };
5304
5305 enum {
5306 CFFormatStyleDecimal = (1 << 0),
5307 CFFormatStyleScientific = (1 << 1),
5308 CFFormatStyleDecimalOrScientific = CFFormatStyleDecimal|CFFormatStyleScientific,
5309 CFFormatStyleUnsigned = (1 << 2)
5310 };
5311
5312 enum {
5313 CFFormatLiteralType = 32,
5314 CFFormatLongType = 33,
5315 CFFormatDoubleType = 34,
5316 CFFormatPointerType = 35,
5317 CFFormatObjectType = 36, /* handled specially */ /* ??? not used anymore, can be removed? */
5318 CFFormatCFType = 37, /* handled specially */
5319 CFFormatUnicharsType = 38, /* handled specially */
5320 CFFormatCharsType = 39, /* handled specially */
5321 CFFormatPascalCharsType = 40, /* handled specially */
5322 CFFormatSingleUnicharType = 41, /* handled specially */
5323 CFFormatDummyPointerType = 42 /* special case for %n */
5324 };
5325
5326 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS
5327 /* Only come in here if spec->type is CFFormatLongType or CFFormatDoubleType. Pass in 0 for width or precision if not specified. Returns false if couldn't do the format (with the assumption the caller falls back to unlocalized).
5328 */
5329 static Boolean __CFStringFormatLocalizedNumber(CFMutableStringRef output, CFLocaleRef locale, const CFPrintValue *values, const CFFormatSpec *spec, SInt32 width, SInt32 precision, Boolean hasPrecision) {
5330 static CFLock_t formatterLock = CFLockInit;
5331 // These formatters are recached if the locale argument is different
5332 static CFNumberFormatterRef decimalFormatter = NULL;
5333 static CFNumberFormatterRef scientificFormatter = NULL;
5334 static CFNumberFormatterRef gFormatter = NULL; // for %g
5335 static SInt32 groupingSize = 0;
5336 static SInt32 secondaryGroupingSize = 0;
5337
5338 // !!! This code should be removed before shipping
5339 static char disableLocalizedFormatting = -1;
5340 if (disableLocalizedFormatting == -1) disableLocalizedFormatting = (getenv("CFStringDisableLocalizedNumberFormatting") != NULL) ? 1 : 0;
5341 if (disableLocalizedFormatting) return false;
5342
5343 CFNumberFormatterRef formatter;
5344
5345 __CFLock(&formatterLock); // We use the formatter from one thread at one time; if this proves to be a bottleneck we need to get fancier
5346
5347 switch (spec->numericFormatStyle) {
5348 case CFFormatStyleUnsigned:
5349 case CFFormatStyleDecimal:
5350 if (!decimalFormatter || !CFEqual(CFNumberFormatterGetLocale(decimalFormatter), locale)) { // cache or recache if the locale is different
5351 if (decimalFormatter) CFRelease(decimalFormatter);
5352 decimalFormatter = CFNumberFormatterCreate(NULL, locale, kCFNumberFormatterDecimalStyle); // since this is shared, remember to reset all its properties!
5353 }
5354 formatter = decimalFormatter;
5355 break;
5356 case CFFormatStyleScientific:
5357 if (!scientificFormatter || !CFEqual(CFNumberFormatterGetLocale(scientificFormatter), locale)) { // cache or recache if the locale is different
5358 if (scientificFormatter) CFRelease(scientificFormatter);
5359 scientificFormatter = CFNumberFormatterCreate(NULL, locale, kCFNumberFormatterScientificStyle);
5360 CFStringRef pattern = CFSTR("#E+00"); // the default pattern does not have the sign if the exponent is positive and it is single digit
5361 CFNumberFormatterSetFormat(scientificFormatter, pattern);
5362 CFNumberFormatterSetProperty(scientificFormatter, kCFNumberFormatterUseSignificantDigitsKey, kCFBooleanTrue);
5363 }
5364 formatter = scientificFormatter;
5365 break;
5366 case CFFormatStyleDecimalOrScientific:
5367 if (!gFormatter || !CFEqual(CFNumberFormatterGetLocale(gFormatter), locale)) { // cache or recache if the locale is different
5368 if (gFormatter) CFRelease(gFormatter);
5369 gFormatter = CFNumberFormatterCreate(NULL, locale, kCFNumberFormatterDecimalStyle);
5370 // when we update the locale in gFormatter, we also need to update the two grouping sizes
5371 CFNumberRef num = (CFNumberRef) CFNumberFormatterCopyProperty(gFormatter, kCFNumberFormatterGroupingSizeKey);
5372 CFNumberGetValue(num, kCFNumberSInt32Type, &groupingSize);
5373 CFRelease(num);
5374 num = (CFNumberRef) CFNumberFormatterCopyProperty(gFormatter, kCFNumberFormatterSecondaryGroupingSizeKey);
5375 CFNumberGetValue(num, kCFNumberSInt32Type, &secondaryGroupingSize);
5376 CFRelease(num);
5377 }
5378 formatter = gFormatter;
5379 break;
5380 }
5381
5382 SInt32 prec = hasPrecision ? precision : ((spec->type == CFFormatLongType) ? 0 : 6); // default precision of printf is 6
5383
5384 // pattern must be set before setting width and padding
5385 // otherwise, the pattern will take over those settings
5386 if (spec->numericFormatStyle == CFFormatStyleDecimalOrScientific) {
5387 if (prec == 0) prec = 1; // at least one sig fig
5388 CFMutableStringRef pattern = CFStringCreateMutable(NULL, 0);
5389 // use significant digits pattern
5390 CFStringAppendCString(pattern, "@", kCFStringEncodingASCII);
5391 CFStringPad(pattern, CFSTR("#"), prec, 0);
5392 double targetValue = values[spec->mainArgNum].value.doubleValue;
5393 #if LONG_DOUBLE_SUPPORT
5394 if (CFFormatSize16 == values[spec->mainArgNum].size) {
5395 targetValue = values[spec->mainArgNum].value.longDoubleValue; // losing precision
5396 }
5397 #endif
5398 double max = pow(10.0, (double)prec); // if the value requires more digits than the number of sig figs, we need to use scientific format
5399 double min = 0.0001; // if the value is less than 10E-4, scientific format is the shorter form
5400 if (((targetValue > 0 && (targetValue > max || targetValue < min)) || (targetValue < 0 && (targetValue < -max || targetValue > -min)))){
5401 CFStringAppendCString(pattern, "E+00", kCFStringEncodingASCII);
5402 } else if (prec > groupingSize && groupingSize != 0) {
5403 CFStringInsert(pattern, prec-groupingSize, CFSTR(",")); // if we are not using scientific format, we need to set the pattern to use grouping separator
5404 if (secondaryGroupingSize != 0 && prec > (groupingSize + secondaryGroupingSize)) CFStringInsert(pattern, prec-groupingSize-secondaryGroupingSize, CFSTR(","));
5405 }
5406 CFNumberFormatterSetFormat(formatter, pattern);
5407 CFRelease(pattern);
5408 }
5409
5410 // clear the padding, we will add it later if we need it
5411 const SInt32 z = 0;
5412 CFNumberRef zero = CFNumberCreate(NULL, kCFNumberSInt32Type, &z);
5413 CFNumberFormatterSetProperty(formatter, kCFNumberFormatterFormatWidthKey, zero);
5414
5415 CFNumberRef tmp = CFNumberCreate(NULL, kCFNumberSInt32Type, &prec);
5416 CFNumberFormatterSetProperty(formatter, kCFNumberFormatterMaxFractionDigitsKey, tmp);
5417 if (spec->type == CFFormatDoubleType) {
5418 CFNumberFormatterSetProperty(formatter, kCFNumberFormatterMinFractionDigitsKey, tmp);
5419 } else {
5420 CFNumberFormatterSetProperty(formatter, kCFNumberFormatterMinFractionDigitsKey, zero);
5421 }
5422 CFRelease(tmp);
5423 CFRelease(zero);
5424
5425 Boolean isNegative = false;
5426 switch (values[spec->mainArgNum].type) {
5427 case CFFormatLongType:
5428 if (values[spec->mainArgNum].value.int64Value < 0) isNegative = true;
5429 break;
5430 case CFFormatDoubleType:
5431 #if LONG_DOUBLE_SUPPORT
5432 if ((CFFormatSize16 == values[spec->mainArgNum].size) && (values[spec->mainArgNum].value.longDoubleValue < 0)) isNegative = true;
5433 else
5434 #endif
5435 if (values[spec->mainArgNum].value.doubleValue < 0) isNegative = true;
5436 break;
5437 }
5438
5439 CFStringRef pattern = CFNumberFormatterGetFormat(formatter);
5440 if ((spec->flags & kCFStringFormatPlusFlag) && !isNegative) {
5441 if (CFStringGetCharacterAtIndex(pattern, 0) != '+') {
5442 CFMutableStringRef newPattern = CFStringCreateMutableCopy(NULL, 0, CFSTR("+"));
5443 CFStringAppend(newPattern, pattern);
5444 CFNumberFormatterSetFormat(formatter, newPattern);
5445 CFRelease(newPattern);
5446 }
5447 } else {
5448 if (CFStringGetCharacterAtIndex(pattern, 0) == '+') {
5449 CFStringRef newPattern = CFStringCreateWithSubstring(NULL, pattern, CFRangeMake(1, CFStringGetLength(pattern)-1));
5450 CFNumberFormatterSetFormat(formatter, newPattern);
5451 CFRelease(newPattern);
5452 }
5453 }
5454
5455 // width == 0 seems to be CFNumberFormatter's default setting
5456 if (hasPrecision && spec->type == CFFormatLongType) { // if we have precision and %d or %u, we pad 0 according to precision first
5457 tmp = CFNumberCreate(NULL, kCFNumberSInt32Type, &prec);
5458 } else {
5459 tmp = CFNumberCreate(NULL, kCFNumberSInt32Type, &width);
5460 }
5461 CFNumberFormatterSetProperty(formatter, kCFNumberFormatterFormatWidthKey, tmp);
5462 CFRelease(tmp);
5463
5464 // ??? use the right zero here for Arabic
5465 Boolean padZero = spec->flags & kCFStringFormatZeroFlag;
5466 if (hasPrecision && spec->type == CFFormatLongType) { // if we have precision and %d or %u, we pad 0
5467 padZero = true;
5468 }
5469 CFNumberFormatterSetProperty(formatter, kCFNumberFormatterPaddingCharacterKey, padZero ? CFSTR("0") : CFSTR(" "));
5470
5471
5472 // Left (default) or right padding
5473 SInt32 p = (spec->flags & kCFStringFormatMinusFlag) ? kCFNumberFormatterPadAfterSuffix : (padZero ? kCFNumberFormatterPadAfterPrefix : kCFNumberFormatterPadBeforePrefix);
5474 if (hasPrecision && spec->type == CFFormatLongType) {
5475 SInt32 tmpP = kCFNumberFormatterPadAfterPrefix;
5476 tmp = CFNumberCreate(NULL, kCFNumberSInt32Type, &tmpP);
5477 } else {
5478 tmp = CFNumberCreate(NULL, kCFNumberSInt32Type, &p);
5479 }
5480 CFNumberFormatterSetProperty(formatter, kCFNumberFormatterPaddingPositionKey, tmp);
5481 CFRelease(tmp);
5482
5483 if (spec->numericFormatStyle == CFFormatStyleScientific) {
5484 prec++; // for %e, precision+1 is the number of sig fig
5485 tmp = CFNumberCreate(NULL, kCFNumberSInt32Type, &prec);
5486 CFNumberFormatterSetProperty(formatter, kCFNumberFormatterMinSignificantDigitsKey, tmp);
5487 CFNumberFormatterSetProperty(formatter, kCFNumberFormatterMaxSignificantDigitsKey, tmp);
5488 CFRelease(tmp);
5489 }
5490
5491 CFStringRef localizedNumberString = NULL;
5492 switch (spec->type) {
5493 case CFFormatLongType:
5494 // ??? Need to do unsigned
5495 localizedNumberString = CFNumberFormatterCreateStringWithValue(NULL, formatter, kCFNumberSInt64Type, &(values[spec->mainArgNum].value.int64Value));
5496 break;
5497 case CFFormatDoubleType: {
5498 #if LONG_DOUBLE_SUPPORT
5499 if (CFFormatSize16 == values[spec->mainArgNum].size) {
5500 double doubleValue = values[spec->mainArgNum].value.longDoubleValue; // losing precision
5501 localizedNumberString = CFNumberFormatterCreateStringWithValue(NULL, formatter, kCFNumberDoubleType, &doubleValue);
5502 } else
5503 #endif
5504 {
5505 localizedNumberString = CFNumberFormatterCreateStringWithValue(NULL, formatter, kCFNumberDoubleType, &(values[spec->mainArgNum].value.doubleValue));
5506 }
5507 break;
5508 }
5509 }
5510 __CFUnlock(&formatterLock);
5511
5512 if (localizedNumberString) {
5513 // we need to pad space if we have %d or %u
5514 if (spec->type == CFFormatLongType && hasPrecision && CFStringGetLength(localizedNumberString) < width) {
5515 CFMutableStringRef finalStr = NULL;
5516 if (p == kCFNumberFormatterPadAfterSuffix) {
5517 finalStr = CFStringCreateMutableCopy(NULL, 0, localizedNumberString);
5518 CFStringPad(finalStr, CFSTR(" "), width, 0);
5519 } else {
5520 finalStr = CFStringCreateMutable(NULL, 0);
5521 CFStringPad(finalStr, CFSTR(" "), width - CFStringGetLength(localizedNumberString), 0);
5522 CFStringAppend(finalStr, localizedNumberString);
5523 }
5524 CFRelease(localizedNumberString);
5525 localizedNumberString = finalStr;
5526 }
5527 CFStringAppend(output, localizedNumberString);
5528 CFRelease(localizedNumberString);
5529 return true;
5530 }
5531 return false;
5532 }
5533 #endif
5534
5535 CF_INLINE void __CFParseFormatSpec(const UniChar *uformat, const uint8_t *cformat, SInt32 *fmtIdx, SInt32 fmtLen, CFFormatSpec *spec, CFStringRef *configKeyPointer) {
5536 Boolean seenDot = false;
5537 Boolean seenSharp = false;
5538 CFIndex keyIndex = kCFNotFound;
5539
5540 for (;;) {
5541 UniChar ch;
5542 if (fmtLen <= *fmtIdx) return; /* no type */
5543 if (cformat) ch = (UniChar)cformat[(*fmtIdx)++]; else ch = uformat[(*fmtIdx)++];
5544
5545 if (keyIndex >= 0) {
5546 if ((ch < '0') || ((ch > '9') && (ch < 'A')) || ((ch > 'Z') && (ch < 'a') && (ch != '_')) || (ch > 'z')) {
5547 if (ch == '@') { // found the key
5548 CFIndex length = (*fmtIdx) - 1 - keyIndex;
5549
5550 spec->flags |= kCFStringFormatExternalSpecFlag;
5551 spec->type = CFFormatCFType;
5552 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5553
5554 if ((NULL != configKeyPointer) && (length > 0)) {
5555 if (cformat) {
5556 *configKeyPointer = CFStringCreateWithBytes(NULL, cformat + keyIndex, length, __CFStringGetEightBitStringEncoding(), FALSE);
5557 } else {
5558 *configKeyPointer = CFStringCreateWithCharactersNoCopy(NULL, uformat + keyIndex, length, kCFAllocatorNull);
5559 }
5560 }
5561 return;
5562 }
5563 keyIndex = kCFNotFound;
5564 }
5565 continue;
5566 }
5567
5568 reswtch:switch (ch) {
5569 case '#': // ignored for now
5570 seenSharp = true;
5571 break;
5572 case 0x20:
5573 if (!(spec->flags & kCFStringFormatPlusFlag)) spec->flags |= kCFStringFormatSpaceFlag;
5574 break;
5575 case '-':
5576 spec->flags |= kCFStringFormatMinusFlag;
5577 spec->flags &= ~kCFStringFormatZeroFlag; // remove zero flag
5578 break;
5579 case '+':
5580 spec->flags |= kCFStringFormatPlusFlag;
5581 spec->flags &= ~kCFStringFormatSpaceFlag; // remove space flag
5582 break;
5583 case '0':
5584 if (seenDot) { // after we see '.' and then we see '0', it is 0 precision. We should not see '.' after '0' if '0' is the zero padding flag
5585 spec->precArg = 0;
5586 break;
5587 }
5588 if (!(spec->flags & kCFStringFormatMinusFlag)) spec->flags |= kCFStringFormatZeroFlag;
5589 break;
5590 case 'h':
5591 if (*fmtIdx < fmtLen) {
5592 // fetch next character, don't increment fmtIdx
5593 if (cformat) ch = (UniChar)cformat[(*fmtIdx)]; else ch = uformat[(*fmtIdx)];
5594 if ('h' == ch) { // 'hh' for char, like 'c'
5595 (*fmtIdx)++;
5596 spec->size = CFFormatSize1;
5597 break;
5598 }
5599 }
5600 spec->size = CFFormatSize2;
5601 break;
5602 case 'l':
5603 if (*fmtIdx < fmtLen) {
5604 // fetch next character, don't increment fmtIdx
5605 if (cformat) ch = (UniChar)cformat[(*fmtIdx)]; else ch = uformat[(*fmtIdx)];
5606 if ('l' == ch) { // 'll' for long long, like 'q'
5607 (*fmtIdx)++;
5608 spec->size = CFFormatSize8;
5609 break;
5610 }
5611 }
5612 spec->size = CFFormatSizeLong; // 4 or 8 depending on LP64
5613 break;
5614 #if LONG_DOUBLE_SUPPORT
5615 case 'L':
5616 spec->size = CFFormatSize16;
5617 break;
5618 #endif
5619 case 'q':
5620 spec->size = CFFormatSize8;
5621 break;
5622 case 't': case 'z':
5623 spec->size = CFFormatSizeLong; // 4 or 8 depending on LP64
5624 break;
5625 case 'j':
5626 spec->size = CFFormatSize8;
5627 break;
5628 case 'c':
5629 spec->type = CFFormatLongType;
5630 spec->size = CFFormatSize1;
5631 return;
5632 case 'D': case 'd': case 'i': case 'U': case 'u':
5633 // we can localize all but octal or hex
5634 if (_CFExecutableLinkedOnOrAfter(CFSystemVersionMountainLion)) spec->flags |= kCFStringFormatLocalizable;
5635 spec->numericFormatStyle = CFFormatStyleDecimal;
5636 if (ch == 'u' || ch == 'U') spec->numericFormatStyle = CFFormatStyleUnsigned;
5637 // fall thru
5638 case 'O': case 'o': case 'x': case 'X':
5639 spec->type = CFFormatLongType;
5640 // Seems like if spec->size == 0, we should spec->size = CFFormatSize4. However, 0 is handled correctly.
5641 return;
5642 case 'f': case 'F': case 'g': case 'G': case 'e': case 'E': {
5643 // we can localize all but hex float output
5644 if (_CFExecutableLinkedOnOrAfter(CFSystemVersionMountainLion)) spec->flags |= kCFStringFormatLocalizable;
5645 char lch = (ch >= 'A' && ch <= 'Z') ? (ch - 'A' + 'a') : ch;
5646 spec->numericFormatStyle = ((lch == 'e' || lch == 'g') ? CFFormatStyleScientific : 0) | ((lch == 'f' || lch == 'g') ? CFFormatStyleDecimal : 0);
5647 if (seenDot && spec->precArg == -1 && spec->precArgNum == -1) { // for the cases that we have '.' but no precision followed, not even '*'
5648 spec->precArg = 0;
5649 }
5650 }
5651 // fall thru
5652 case 'a': case 'A':
5653 spec->type = CFFormatDoubleType;
5654 if (spec->size != CFFormatSize16) spec->size = CFFormatSize8;
5655 return;
5656 case 'n': /* %n is not handled correctly; for Leopard or newer apps, we disable it further */
5657 spec->type = 1 ? CFFormatDummyPointerType : CFFormatPointerType;
5658 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5659 return;
5660 case 'p':
5661 spec->type = CFFormatPointerType;
5662 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5663 return;
5664 case 's':
5665 spec->type = CFFormatCharsType;
5666 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5667 return;
5668 case 'S':
5669 spec->type = CFFormatUnicharsType;
5670 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5671 return;
5672 case 'C':
5673 spec->type = CFFormatSingleUnicharType;
5674 spec->size = CFFormatSize2;
5675 return;
5676 case 'P':
5677 spec->type = CFFormatPascalCharsType;
5678 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5679 return;
5680 case '@':
5681 if (seenSharp) {
5682 seenSharp = false;
5683 keyIndex = *fmtIdx;
5684 break;
5685 } else {
5686 spec->type = CFFormatCFType;
5687 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5688 return;
5689 }
5690 case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': {
5691 int64_t number = 0;
5692 do {
5693 number = 10 * number + (ch - '0');
5694 if (cformat) ch = (UniChar)cformat[(*fmtIdx)++]; else ch = uformat[(*fmtIdx)++];
5695 } while ((UInt32)(ch - '0') <= 9);
5696 if ('$' == ch) {
5697 if (-2 == spec->precArgNum) {
5698 spec->precArgNum = (int8_t)number - 1; // Arg numbers start from 1
5699 } else if (-2 == spec->widthArgNum) {
5700 spec->widthArgNum = (int8_t)number - 1; // Arg numbers start from 1
5701 } else {
5702 spec->mainArgNum = (int8_t)number - 1; // Arg numbers start from 1
5703 }
5704 break;
5705 } else if (seenDot) { /* else it's either precision or width */
5706 spec->precArg = (SInt32)number;
5707 } else {
5708 spec->widthArg = (SInt32)number;
5709 }
5710 goto reswtch;
5711 }
5712 case '*':
5713 spec->widthArgNum = -2;
5714 break;
5715 case '.':
5716 seenDot = true;
5717 if (cformat) ch = (UniChar)cformat[(*fmtIdx)++]; else ch = uformat[(*fmtIdx)++];
5718 if ('*' == ch) {
5719 spec->precArgNum = -2;
5720 break;
5721 }
5722 goto reswtch;
5723 default:
5724 spec->type = CFFormatLiteralType;
5725 return;
5726 }
5727 }
5728 }
5729
5730 /* ??? %s depends on handling of encodings by __CFStringAppendBytes
5731 */
5732 void CFStringAppendFormatAndArguments(CFMutableStringRef outputString, CFDictionaryRef formatOptions, CFStringRef formatString, va_list args) {
5733 __CFStringAppendFormatCore(outputString, NULL, NULL, formatOptions, NULL, formatString, 0, NULL, 0, args);
5734 }
5735
5736 // Length of the buffer to call sprintf() with
5737 #define BUFFER_LEN 512
5738
5739 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI
5740 #define SNPRINTF(TYPE, WHAT) { \
5741 TYPE value = (TYPE) WHAT; \
5742 if (-1 != specs[curSpec].widthArgNum) { \
5743 if (-1 != specs[curSpec].precArgNum) { \
5744 snprintf_l(buffer, BUFFER_LEN-1, NULL, formatBuffer, width, precision, value); \
5745 } else { \
5746 snprintf_l(buffer, BUFFER_LEN-1, NULL, formatBuffer, width, value); \
5747 } \
5748 } else { \
5749 if (-1 != specs[curSpec].precArgNum) { \
5750 snprintf_l(buffer, BUFFER_LEN-1, NULL, formatBuffer, precision, value); \
5751 } else { \
5752 snprintf_l(buffer, BUFFER_LEN-1, NULL, formatBuffer, value); \
5753 } \
5754 }}
5755 #else
5756 #define SNPRINTF(TYPE, WHAT) { \
5757 TYPE value = (TYPE) WHAT; \
5758 if (-1 != specs[curSpec].widthArgNum) { \
5759 if (-1 != specs[curSpec].precArgNum) { \
5760 sprintf(buffer, formatBuffer, width, precision, value); \
5761 } else { \
5762 sprintf(buffer, formatBuffer, width, value); \
5763 } \
5764 } else { \
5765 if (-1 != specs[curSpec].precArgNum) { \
5766 sprintf(buffer, formatBuffer, precision, value); \
5767 } else { \
5768 sprintf(buffer, formatBuffer, value); \
5769 } \
5770 }}
5771 #endif
5772
5773 void _CFStringAppendFormatAndArgumentsAux2(CFMutableStringRef outputString, CFStringRef (*copyDescFunc)(void *, const void *), CFStringRef (*contextDescFunc)(void *, const void *, const void *, bool, bool *), CFDictionaryRef formatOptions, CFStringRef formatString, va_list args) {
5774 __CFStringAppendFormatCore(outputString, copyDescFunc, contextDescFunc, formatOptions, NULL, formatString, 0, NULL, 0, args);
5775 }
5776
5777 void _CFStringAppendFormatAndArgumentsAux(CFMutableStringRef outputString, CFStringRef (*copyDescFunc)(void *, const void *), CFDictionaryRef formatOptions, CFStringRef formatString, va_list args) {
5778 _CFStringAppendFormatAndArgumentsAux2(outputString, copyDescFunc, NULL, formatOptions, formatString, args);
5779 }
5780
5781 static void __CFStringAppendFormatCore(CFMutableStringRef outputString, CFStringRef (*copyDescFunc)(void *, const void *), CFStringRef (*contextDescFunc)(void *, const void *, const void *, bool, bool *), CFDictionaryRef formatOptions, CFDictionaryRef stringsDictConfig, CFStringRef formatString, CFIndex initialArgPosition, const void *origValues, CFIndex originalValuesSize, va_list args) {
5782 SInt32 numSpecs, sizeSpecs, sizeArgNum, formatIdx, curSpec, argNum;
5783 CFIndex formatLen;
5784 #define FORMAT_BUFFER_LEN 400
5785 const uint8_t *cformat = NULL;
5786 const UniChar *uformat = NULL;
5787 UniChar *formatChars = NULL;
5788 UniChar localFormatBuffer[FORMAT_BUFFER_LEN];
5789
5790 #define VPRINTF_BUFFER_LEN 61
5791 CFFormatSpec localSpecsBuffer[VPRINTF_BUFFER_LEN];
5792 CFFormatSpec *specs;
5793 CFPrintValue localValuesBuffer[VPRINTF_BUFFER_LEN];
5794 CFPrintValue *values;
5795 const CFPrintValue *originalValues = (const CFPrintValue *)origValues;
5796 CFDictionaryRef localConfigs[VPRINTF_BUFFER_LEN];
5797 CFDictionaryRef *configs;
5798 CFIndex numConfigs;
5799 CFAllocatorRef tmpAlloc = NULL;
5800 intmax_t dummyLocation; // A place for %n to do its thing in; should be the widest possible int value
5801
5802 numSpecs = 0;
5803 sizeSpecs = 0;
5804 sizeArgNum = 0;
5805 numConfigs = 0;
5806 specs = NULL;
5807 values = NULL;
5808 configs = NULL;
5809
5810
5811 formatLen = CFStringGetLength(formatString);
5812 if (!CF_IS_OBJC(__kCFStringTypeID, formatString)) {
5813 __CFAssertIsString(formatString);
5814 if (!__CFStrIsUnicode(formatString)) {
5815 cformat = (const uint8_t *)__CFStrContents(formatString);
5816 if (cformat) cformat += __CFStrSkipAnyLengthByte(formatString);
5817 } else {
5818 uformat = (const UniChar *)__CFStrContents(formatString);
5819 }
5820 }
5821 if (!cformat && !uformat) {
5822 formatChars = (formatLen > FORMAT_BUFFER_LEN) ? (UniChar *)CFAllocatorAllocate(tmpAlloc = __CFGetDefaultAllocator(), formatLen * sizeof(UniChar), 0) : localFormatBuffer;
5823 if (formatChars != localFormatBuffer && __CFOASafe) __CFSetLastAllocationEventName(formatChars, "CFString (temp)");
5824 CFStringGetCharacters(formatString, CFRangeMake(0, formatLen), formatChars);
5825 uformat = formatChars;
5826 }
5827
5828 /* Compute an upper bound for the number of format specifications */
5829 if (cformat) {
5830 for (formatIdx = 0; formatIdx < formatLen; formatIdx++) if ('%' == cformat[formatIdx]) sizeSpecs++;
5831 } else {
5832 for (formatIdx = 0; formatIdx < formatLen; formatIdx++) if ('%' == uformat[formatIdx]) sizeSpecs++;
5833 }
5834 tmpAlloc = __CFGetDefaultAllocator();
5835 specs = ((2 * sizeSpecs + 1) > VPRINTF_BUFFER_LEN) ? (CFFormatSpec *)CFAllocatorAllocate(tmpAlloc, (2 * sizeSpecs + 1) * sizeof(CFFormatSpec), 0) : localSpecsBuffer;
5836 if (specs != localSpecsBuffer && __CFOASafe) __CFSetLastAllocationEventName(specs, "CFString (temp)");
5837
5838 configs = ((sizeSpecs < VPRINTF_BUFFER_LEN) ? localConfigs : (CFDictionaryRef *)CFAllocatorAllocate(tmpAlloc, sizeof(CFStringRef) * sizeSpecs, 0));
5839
5840 /* Collect format specification information from the format string */
5841 for (curSpec = 0, formatIdx = 0; formatIdx < formatLen; curSpec++) {
5842 SInt32 newFmtIdx;
5843 specs[curSpec].loc = formatIdx;
5844 specs[curSpec].len = 0;
5845 specs[curSpec].size = 0;
5846 specs[curSpec].type = 0;
5847 specs[curSpec].flags = 0;
5848 specs[curSpec].widthArg = -1;
5849 specs[curSpec].precArg = -1;
5850 specs[curSpec].mainArgNum = -1;
5851 specs[curSpec].precArgNum = -1;
5852 specs[curSpec].widthArgNum = -1;
5853 specs[curSpec].configDictIndex = -1;
5854 if (cformat) {
5855 for (newFmtIdx = formatIdx; newFmtIdx < formatLen && '%' != cformat[newFmtIdx]; newFmtIdx++);
5856 } else {
5857 for (newFmtIdx = formatIdx; newFmtIdx < formatLen && '%' != uformat[newFmtIdx]; newFmtIdx++);
5858 }
5859 if (newFmtIdx != formatIdx) { /* Literal chunk */
5860 specs[curSpec].type = CFFormatLiteralType;
5861 specs[curSpec].len = newFmtIdx - formatIdx;
5862 } else {
5863 CFStringRef configKey = NULL;
5864 newFmtIdx++; /* Skip % */
5865 __CFParseFormatSpec(uformat, cformat, &newFmtIdx, formatLen, &(specs[curSpec]), &configKey);
5866 if (CFFormatLiteralType == specs[curSpec].type) {
5867 specs[curSpec].loc = formatIdx + 1;
5868 specs[curSpec].len = 1;
5869 } else {
5870 specs[curSpec].len = newFmtIdx - formatIdx;
5871 }
5872 }
5873 formatIdx = newFmtIdx;
5874
5875 // fprintf(stderr, "specs[%d] = {\n size = %d,\n type = %d,\n loc = %d,\n len = %d,\n mainArgNum = %d,\n precArgNum = %d,\n widthArgNum = %d\n}\n", curSpec, specs[curSpec].size, specs[curSpec].type, specs[curSpec].loc, specs[curSpec].len, specs[curSpec].mainArgNum, specs[curSpec].precArgNum, specs[curSpec].widthArgNum);
5876
5877 }
5878 numSpecs = curSpec;
5879
5880 // Max of three args per spec, reasoning thus: 1 width, 1 prec, 1 value
5881 sizeArgNum = ((NULL == originalValues) ? (3 * sizeSpecs + 1) : originalValuesSize);
5882
5883 values = (sizeArgNum > VPRINTF_BUFFER_LEN) ? (CFPrintValue *)CFAllocatorAllocate(tmpAlloc, sizeArgNum * sizeof(CFPrintValue), 0) : localValuesBuffer;
5884 if (values != localValuesBuffer && __CFOASafe) __CFSetLastAllocationEventName(values, "CFString (temp)");
5885 memset(values, 0, sizeArgNum * sizeof(CFPrintValue));
5886
5887 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
5888 // va_copy is a C99 extension. No support on Windows
5889 va_list copiedArgs;
5890 if (numConfigs > 0) va_copy(copiedArgs, args); // we need to preserve the original state for passing down
5891 #endif
5892
5893 /* Compute values array */
5894 argNum = initialArgPosition;
5895 for (curSpec = 0; curSpec < numSpecs; curSpec++) {
5896 SInt32 newMaxArgNum;
5897 if (0 == specs[curSpec].type) continue;
5898 if (CFFormatLiteralType == specs[curSpec].type) continue;
5899 newMaxArgNum = sizeArgNum;
5900 if (newMaxArgNum < specs[curSpec].mainArgNum) {
5901 newMaxArgNum = specs[curSpec].mainArgNum;
5902 }
5903 if (newMaxArgNum < specs[curSpec].precArgNum) {
5904 newMaxArgNum = specs[curSpec].precArgNum;
5905 }
5906 if (newMaxArgNum < specs[curSpec].widthArgNum) {
5907 newMaxArgNum = specs[curSpec].widthArgNum;
5908 }
5909 if (sizeArgNum < newMaxArgNum) {
5910 if (specs != localSpecsBuffer) CFAllocatorDeallocate(tmpAlloc, specs);
5911 if (values != localValuesBuffer) CFAllocatorDeallocate(tmpAlloc, values);
5912 if (formatChars && (formatChars != localFormatBuffer)) CFAllocatorDeallocate(tmpAlloc, formatChars);
5913 return; // more args than we expected!
5914 }
5915 /* It is actually incorrect to reorder some specs and not all; we just do some random garbage here */
5916 if (-2 == specs[curSpec].widthArgNum) {
5917 specs[curSpec].widthArgNum = argNum++;
5918 }
5919 if (-2 == specs[curSpec].precArgNum) {
5920 specs[curSpec].precArgNum = argNum++;
5921 }
5922 if (-1 == specs[curSpec].mainArgNum) {
5923 specs[curSpec].mainArgNum = argNum++;
5924 }
5925
5926 values[specs[curSpec].mainArgNum].size = specs[curSpec].size;
5927 values[specs[curSpec].mainArgNum].type = specs[curSpec].type;
5928
5929
5930 if (-1 != specs[curSpec].widthArgNum) {
5931 values[specs[curSpec].widthArgNum].size = 0;
5932 values[specs[curSpec].widthArgNum].type = CFFormatLongType;
5933 }
5934 if (-1 != specs[curSpec].precArgNum) {
5935 values[specs[curSpec].precArgNum].size = 0;
5936 values[specs[curSpec].precArgNum].type = CFFormatLongType;
5937 }
5938 }
5939
5940 /* Collect the arguments in correct type from vararg list */
5941 for (argNum = 0; argNum < sizeArgNum; argNum++) {
5942 if ((NULL != originalValues) && (0 == values[argNum].type)) values[argNum] = originalValues[argNum];
5943 switch (values[argNum].type) {
5944 case 0:
5945 case CFFormatLiteralType:
5946 break;
5947 case CFFormatLongType:
5948 case CFFormatSingleUnicharType:
5949 if (CFFormatSize1 == values[argNum].size) {
5950 values[argNum].value.int64Value = (int64_t)(int8_t)va_arg(args, int);
5951 } else if (CFFormatSize2 == values[argNum].size) {
5952 values[argNum].value.int64Value = (int64_t)(int16_t)va_arg(args, int);
5953 } else if (CFFormatSize4 == values[argNum].size) {
5954 values[argNum].value.int64Value = (int64_t)va_arg(args, int32_t);
5955 } else if (CFFormatSize8 == values[argNum].size) {
5956 values[argNum].value.int64Value = (int64_t)va_arg(args, int64_t);
5957 } else {
5958 values[argNum].value.int64Value = (int64_t)va_arg(args, int);
5959 }
5960 break;
5961 case CFFormatDoubleType:
5962 #if LONG_DOUBLE_SUPPORT
5963 if (CFFormatSize16 == values[argNum].size) {
5964 values[argNum].value.longDoubleValue = va_arg(args, long double);
5965 } else
5966 #endif
5967 {
5968 values[argNum].value.doubleValue = va_arg(args, double);
5969 }
5970 break;
5971 case CFFormatPointerType:
5972 case CFFormatObjectType:
5973 case CFFormatCFType:
5974 case CFFormatUnicharsType:
5975 case CFFormatCharsType:
5976 case CFFormatPascalCharsType:
5977 values[argNum].value.pointerValue = va_arg(args, void *);
5978 break;
5979 case CFFormatDummyPointerType:
5980 (void)va_arg(args, void *); // Skip the provided argument
5981 values[argNum].value.pointerValue = &dummyLocation;
5982 break;
5983 }
5984 }
5985 va_end(args);
5986
5987 /* Format the pieces together */
5988
5989 if (NULL == originalValues) {
5990 originalValues = values;
5991 originalValuesSize = sizeArgNum;
5992 }
5993
5994 SInt32 numSpecsContext = 0;
5995 CFFormatSpec *specsContext = (CFFormatSpec *)calloc(numSpecs, sizeof(CFFormatSpec));
5996 static const CFStringRef replacement = CFSTR("%@NSCONTEXT");
5997
5998 for (curSpec = 0; curSpec < numSpecs; curSpec++) {
5999 SInt32 width = 0, precision = 0;
6000 UniChar *up, ch;
6001 Boolean hasWidth = false, hasPrecision = false;
6002
6003 // widthArgNum and widthArg are never set at the same time; same for precArg*
6004 if (-1 != specs[curSpec].widthArgNum) {
6005 width = (SInt32)values[specs[curSpec].widthArgNum].value.int64Value;
6006 hasWidth = true;
6007 }
6008 if (-1 != specs[curSpec].precArgNum) {
6009 precision = (SInt32)values[specs[curSpec].precArgNum].value.int64Value;
6010 hasPrecision = true;
6011 }
6012 if (-1 != specs[curSpec].widthArg) {
6013 width = specs[curSpec].widthArg;
6014 hasWidth = true;
6015 }
6016 if (-1 != specs[curSpec].precArg) {
6017 precision = specs[curSpec].precArg;
6018 hasPrecision = true;
6019 }
6020
6021 switch (specs[curSpec].type) {
6022 case CFFormatLongType:
6023 case CFFormatDoubleType:
6024 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS
6025 if (formatOptions && (specs[curSpec].flags & kCFStringFormatLocalizable) && (CFGetTypeID(formatOptions) == CFLocaleGetTypeID())) { // We have a locale, so we do localized formatting
6026 if (__CFStringFormatLocalizedNumber(outputString, (CFLocaleRef)formatOptions, values, &specs[curSpec], width, precision, hasPrecision)) break;
6027 }
6028 /* Otherwise fall-thru to the next case! */
6029 #endif
6030 case CFFormatPointerType: {
6031 char formatBuffer[128];
6032 #if defined(__GNUC__)
6033 char buffer[BUFFER_LEN + width + precision];
6034 #else
6035 char stackBuffer[BUFFER_LEN];
6036 char *dynamicBuffer = NULL;
6037 char *buffer = stackBuffer;
6038 if (256+width+precision > BUFFER_LEN) {
6039 dynamicBuffer = (char *)CFAllocatorAllocate(kCFAllocatorSystemDefault, 256+width+precision, 0);
6040 buffer = dynamicBuffer;
6041 }
6042 #endif
6043 SInt32 cidx, idx, loc;
6044 Boolean appended = false;
6045 loc = specs[curSpec].loc;
6046 // In preparation to call snprintf(), copy the format string out
6047 if (cformat) {
6048 for (idx = 0, cidx = 0; cidx < specs[curSpec].len; idx++, cidx++) {
6049 if ('$' == cformat[loc + cidx]) {
6050 for (idx--; '0' <= formatBuffer[idx] && formatBuffer[idx] <= '9'; idx--);
6051 } else {
6052 formatBuffer[idx] = cformat[loc + cidx];
6053 }
6054 }
6055 } else {
6056 for (idx = 0, cidx = 0; cidx < specs[curSpec].len; idx++, cidx++) {
6057 if ('$' == uformat[loc + cidx]) {
6058 for (idx--; '0' <= formatBuffer[idx] && formatBuffer[idx] <= '9'; idx--);
6059 } else {
6060 formatBuffer[idx] = (int8_t)uformat[loc + cidx];
6061 }
6062 }
6063 }
6064 formatBuffer[idx] = '\0';
6065 // Should modify format buffer here if necessary; for example, to translate %qd to
6066 // the equivalent, on architectures which do not have %q.
6067 buffer[sizeof(buffer) - 1] = '\0';
6068 switch (specs[curSpec].type) {
6069 case CFFormatLongType:
6070 if (CFFormatSize8 == specs[curSpec].size) {
6071 SNPRINTF(int64_t, values[specs[curSpec].mainArgNum].value.int64Value)
6072 } else {
6073 SNPRINTF(SInt32, values[specs[curSpec].mainArgNum].value.int64Value)
6074 }
6075 break;
6076 case CFFormatPointerType:
6077 case CFFormatDummyPointerType:
6078 SNPRINTF(void *, values[specs[curSpec].mainArgNum].value.pointerValue)
6079 break;
6080
6081 case CFFormatDoubleType:
6082 #if LONG_DOUBLE_SUPPORT
6083 if (CFFormatSize16 == specs[curSpec].size) {
6084 SNPRINTF(long double, values[specs[curSpec].mainArgNum].value.longDoubleValue)
6085 } else
6086 #endif
6087 {
6088 SNPRINTF(double, values[specs[curSpec].mainArgNum].value.doubleValue)
6089 }
6090 // See if we need to localize the decimal point
6091 if (formatOptions) { // We have localization info
6092 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
6093 CFStringRef decimalSeparator = (CFGetTypeID(formatOptions) == CFLocaleGetTypeID()) ? (CFStringRef)CFLocaleGetValue((CFLocaleRef)formatOptions, kCFLocaleDecimalSeparatorKey) : (CFStringRef)CFDictionaryGetValue(formatOptions, CFSTR("NSDecimalSeparator"));
6094 #else
6095 CFStringRef decimalSeparator = CFSTR(".");
6096 #endif
6097 if (decimalSeparator != NULL) { // We have a decimal separator in there
6098 CFIndex decimalPointLoc = 0;
6099 while (buffer[decimalPointLoc] != 0 && buffer[decimalPointLoc] != '.') decimalPointLoc++;
6100 if (buffer[decimalPointLoc] == '.') { // And we have a decimal point in the formatted string
6101 buffer[decimalPointLoc] = 0;
6102 CFStringAppendCString(outputString, (const char *)buffer, __CFStringGetEightBitStringEncoding());
6103 CFStringAppend(outputString, decimalSeparator);
6104 CFStringAppendCString(outputString, (const char *)(buffer + decimalPointLoc + 1), __CFStringGetEightBitStringEncoding());
6105 appended = true;
6106 }
6107 }
6108 }
6109 break;
6110 }
6111 if (!appended) CFStringAppendCString(outputString, (const char *)buffer, __CFStringGetEightBitStringEncoding());
6112 #if !defined(__GNUC__)
6113 if (dynamicBuffer) {
6114 CFAllocatorDeallocate(kCFAllocatorSystemDefault, dynamicBuffer);
6115 }
6116 #endif
6117 }
6118 break;
6119 case CFFormatLiteralType:
6120 if (cformat) {
6121 __CFStringAppendBytes(outputString, (const char *)(cformat+specs[curSpec].loc), specs[curSpec].len, __CFStringGetEightBitStringEncoding());
6122 } else {
6123 CFStringAppendCharacters(outputString, uformat+specs[curSpec].loc, specs[curSpec].len);
6124 }
6125 break;
6126 case CFFormatPascalCharsType:
6127 case CFFormatCharsType:
6128 if (values[specs[curSpec].mainArgNum].value.pointerValue == NULL) {
6129 CFStringAppendCString(outputString, "(null)", kCFStringEncodingASCII);
6130 } else {
6131 int len;
6132 const char *str = (const char *)values[specs[curSpec].mainArgNum].value.pointerValue;
6133 if (specs[curSpec].type == CFFormatPascalCharsType) { // Pascal string case
6134 len = ((unsigned char *)str)[0];
6135 str++;
6136 if (hasPrecision && precision < len) len = precision;
6137 } else { // C-string case
6138 if (!hasPrecision) { // No precision, so rely on the terminating null character
6139 len = strlen(str);
6140 } else { // Don't blindly call strlen() if there is a precision; the string might not have a terminating null (3131988)
6141 const char *terminatingNull = (const char *)memchr(str, 0, precision); // Basically strlen() on only the first precision characters of str
6142 if (terminatingNull) { // There was a null in the first precision characters
6143 len = terminatingNull - str;
6144 } else {
6145 len = precision;
6146 }
6147 }
6148 }
6149 // Since the spec says the behavior of the ' ', '0', '#', and '+' flags is undefined for
6150 // '%s', and since we have ignored them in the past, the behavior is hereby cast in stone
6151 // to ignore those flags (and, say, never pad with '0' instead of space).
6152 if (specs[curSpec].flags & kCFStringFormatMinusFlag) {
6153 __CFStringAppendBytes(outputString, str, len, __CFStringGetSystemEncoding());
6154 if (hasWidth && width > len) {
6155 int w = width - len; // We need this many spaces; do it ten at a time
6156 do {__CFStringAppendBytes(outputString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
6157 }
6158 } else {
6159 if (hasWidth && width > len) {
6160 int w = width - len; // We need this many spaces; do it ten at a time
6161 do {__CFStringAppendBytes(outputString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
6162 }
6163 __CFStringAppendBytes(outputString, str, len, __CFStringGetSystemEncoding());
6164 }
6165 }
6166 break;
6167 case CFFormatSingleUnicharType:
6168 ch = (UniChar)values[specs[curSpec].mainArgNum].value.int64Value;
6169 CFStringAppendCharacters(outputString, &ch, 1);
6170 break;
6171 case CFFormatUnicharsType:
6172 //??? need to handle width, precision, and padding arguments
6173 up = (UniChar *)values[specs[curSpec].mainArgNum].value.pointerValue;
6174 if (NULL == up) {
6175 CFStringAppendCString(outputString, "(null)", kCFStringEncodingASCII);
6176 } else {
6177 int len;
6178 for (len = 0; 0 != up[len]; len++);
6179 // Since the spec says the behavior of the ' ', '0', '#', and '+' flags is undefined for
6180 // '%s', and since we have ignored them in the past, the behavior is hereby cast in stone
6181 // to ignore those flags (and, say, never pad with '0' instead of space).
6182 if (hasPrecision && precision < len) len = precision;
6183 if (specs[curSpec].flags & kCFStringFormatMinusFlag) {
6184 CFStringAppendCharacters(outputString, up, len);
6185 if (hasWidth && width > len) {
6186 int w = width - len; // We need this many spaces; do it ten at a time
6187 do {__CFStringAppendBytes(outputString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
6188 }
6189 } else {
6190 if (hasWidth && width > len) {
6191 int w = width - len; // We need this many spaces; do it ten at a time
6192 do {__CFStringAppendBytes(outputString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
6193 }
6194 CFStringAppendCharacters(outputString, up, len);
6195 }
6196 }
6197 break;
6198 case CFFormatCFType:
6199 case CFFormatObjectType:
6200 if (specs[curSpec].configDictIndex != -1) { // config dict
6201 CFTypeRef object = NULL;
6202 switch (values[specs[curSpec].mainArgNum].type) {
6203 case CFFormatLongType:
6204 object = CFNumberCreate(tmpAlloc, kCFNumberSInt64Type, &(values[specs[curSpec].mainArgNum].value.int64Value));
6205 break;
6206
6207 case CFFormatDoubleType:
6208 #if LONG_DOUBLE_SUPPORT
6209 if (CFFormatSize16 == values[specs[curSpec].mainArgNum].size) {
6210 double aValue = values[specs[curSpec].mainArgNum].value.longDoubleValue; // losing precision
6211
6212 object = CFNumberCreate(tmpAlloc, kCFNumberDoubleType, &aValue);
6213 } else
6214 #endif
6215 {
6216 object = CFNumberCreate(tmpAlloc, kCFNumberDoubleType, &(values[specs[curSpec].mainArgNum].value.doubleValue));
6217 }
6218 break;
6219
6220 case CFFormatPointerType:
6221 object = CFNumberCreate(tmpAlloc, kCFNumberCFIndexType, &(values[specs[curSpec].mainArgNum].value.pointerValue));
6222 break;
6223
6224 case CFFormatPascalCharsType:
6225 case CFFormatCharsType:
6226 if (NULL != values[specs[curSpec].mainArgNum].value.pointerValue) {
6227 CFMutableStringRef aString = CFStringCreateMutable(tmpAlloc, 0);
6228 int len;
6229 const char *str = (const char *)values[specs[curSpec].mainArgNum].value.pointerValue;
6230 if (specs[curSpec].type == CFFormatPascalCharsType) { // Pascal string case
6231 len = ((unsigned char *)str)[0];
6232 str++;
6233 if (hasPrecision && precision < len) len = precision;
6234 } else { // C-string case
6235 if (!hasPrecision) { // No precision, so rely on the terminating null character
6236 len = strlen(str);
6237 } else { // Don't blindly call strlen() if there is a precision; the string might not have a terminating null (3131988)
6238 const char *terminatingNull = (const char *)memchr(str, 0, precision); // Basically strlen() on only the first precision characters of str
6239 if (terminatingNull) { // There was a null in the first precision characters
6240 len = terminatingNull - str;
6241 } else {
6242 len = precision;
6243 }
6244 }
6245 }
6246 // Since the spec says the behavior of the ' ', '0', '#', and '+' flags is undefined for
6247 // '%s', and since we have ignored them in the past, the behavior is hereby cast in stone
6248 // to ignore those flags (and, say, never pad with '0' instead of space).
6249 if (specs[curSpec].flags & kCFStringFormatMinusFlag) {
6250 __CFStringAppendBytes(aString, str, len, __CFStringGetSystemEncoding());
6251 if (hasWidth && width > len) {
6252 int w = width - len; // We need this many spaces; do it ten at a time
6253 do {__CFStringAppendBytes(aString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
6254 }
6255 } else {
6256 if (hasWidth && width > len) {
6257 int w = width - len; // We need this many spaces; do it ten at a time
6258 do {__CFStringAppendBytes(aString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
6259 }
6260 __CFStringAppendBytes(aString, str, len, __CFStringGetSystemEncoding());
6261 }
6262
6263 object = aString;
6264 }
6265 break;
6266
6267 case CFFormatSingleUnicharType:
6268 ch = (UniChar)values[specs[curSpec].mainArgNum].value.int64Value;
6269 object = CFStringCreateWithCharactersNoCopy(tmpAlloc, &ch, 1, kCFAllocatorNull);
6270 break;
6271
6272 case CFFormatUnicharsType:
6273 //??? need to handle width, precision, and padding arguments
6274 up = (UniChar *)values[specs[curSpec].mainArgNum].value.pointerValue;
6275 if (NULL != up) {
6276 CFMutableStringRef aString = CFStringCreateMutable(tmpAlloc, 0);
6277 int len;
6278 for (len = 0; 0 != up[len]; len++);
6279 // Since the spec says the behavior of the ' ', '0', '#', and '+' flags is undefined for
6280 // '%s', and since we have ignored them in the past, the behavior is hereby cast in stone
6281 // to ignore those flags (and, say, never pad with '0' instead of space).
6282 if (hasPrecision && precision < len) len = precision;
6283 if (specs[curSpec].flags & kCFStringFormatMinusFlag) {
6284 CFStringAppendCharacters(aString, up, len);
6285 if (hasWidth && width > len) {
6286 int w = width - len; // We need this many spaces; do it ten at a time
6287 do {__CFStringAppendBytes(aString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
6288 }
6289 } else {
6290 if (hasWidth && width > len) {
6291 int w = width - len; // We need this many spaces; do it ten at a time
6292 do {__CFStringAppendBytes(aString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
6293 }
6294 CFStringAppendCharacters(aString, up, len);
6295 }
6296 object = aString;
6297 }
6298 break;
6299
6300 case CFFormatCFType:
6301 case CFFormatObjectType:
6302 if (NULL != values[specs[curSpec].mainArgNum].value.pointerValue) object = CFRetain(values[specs[curSpec].mainArgNum].value.pointerValue);
6303 break;
6304 }
6305
6306 if (NULL != object) CFRelease(object);
6307
6308 } else if (NULL != values[specs[curSpec].mainArgNum].value.pointerValue) {
6309 CFStringRef str = NULL;
6310 if (contextDescFunc) {
6311 bool found = NO;
6312 str = contextDescFunc(values[specs[curSpec].mainArgNum].value.pointerValue, formatString, replacement, NO, &found);
6313 if (found) {
6314 str = CFRetain(replacement);
6315 specsContext[numSpecsContext] = specs[curSpec];
6316 numSpecsContext++;
6317 }
6318 }
6319 if (!str) {
6320 if (copyDescFunc) {
6321 str = copyDescFunc(values[specs[curSpec].mainArgNum].value.pointerValue, formatOptions);
6322 } else {
6323 str = __CFCopyFormattingDescription(values[specs[curSpec].mainArgNum].value.pointerValue, formatOptions);
6324 if (NULL == str) {
6325 str = CFCopyDescription(values[specs[curSpec].mainArgNum].value.pointerValue);
6326 }
6327 }
6328 }
6329 if (str) {
6330 CFStringAppend(outputString, str);
6331 CFRelease(str);
6332 } else {
6333 CFStringAppendCString(outputString, "(null description)", kCFStringEncodingASCII);
6334 }
6335 } else {
6336 CFStringAppendCString(outputString, "(null)", kCFStringEncodingASCII);
6337 }
6338 break;
6339 }
6340 }
6341
6342 for (SInt32 i = 0; i < numSpecsContext; i++) {
6343 CFRange range = CFStringFind(outputString, replacement, 0);
6344 CFStringRef str = contextDescFunc(values[specsContext[i].mainArgNum].value.pointerValue, outputString, replacement, true, NULL);
6345 if (str) {
6346 CFStringReplace(outputString, range, str);
6347 CFRelease(str);
6348 }
6349 }
6350
6351 free(specsContext);
6352
6353 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
6354 // va_copy is a C99 extension. No support on Windows
6355 if (numConfigs > 0) va_end(copiedArgs);
6356 #endif
6357 if (specs != localSpecsBuffer) CFAllocatorDeallocate(tmpAlloc, specs);
6358 if (values != localValuesBuffer) CFAllocatorDeallocate(tmpAlloc, values);
6359 if (formatChars && (formatChars != localFormatBuffer)) CFAllocatorDeallocate(tmpAlloc, formatChars);
6360 if (configs != localConfigs) CFAllocatorDeallocate(tmpAlloc, configs);
6361 }
6362
6363 #undef SNPRINTF
6364
6365 void CFShowStr(CFStringRef str) {
6366 CFAllocatorRef alloc;
6367
6368 if (!str) {
6369 fprintf(stdout, "(null)\n");
6370 return;
6371 }
6372
6373 if (CF_IS_OBJC(__kCFStringTypeID, str)) {
6374 fprintf(stdout, "This is an NSString, not CFString\n");
6375 return;
6376 }
6377
6378 alloc = CFGetAllocator(str);
6379
6380 fprintf(stdout, "\nLength %d\nIsEightBit %d\n", (int)__CFStrLength(str), __CFStrIsEightBit(str));
6381 fprintf(stdout, "HasLengthByte %d\nHasNullByte %d\nInlineContents %d\n",
6382 __CFStrHasLengthByte(str), __CFStrHasNullByte(str), __CFStrIsInline(str));
6383
6384 fprintf(stdout, "Allocator ");
6385 if (alloc != kCFAllocatorSystemDefault) {
6386 fprintf(stdout, "%p\n", (void *)alloc);
6387 } else {
6388 fprintf(stdout, "SystemDefault\n");
6389 }
6390 fprintf(stdout, "Mutable %d\n", __CFStrIsMutable(str));
6391 if (!__CFStrIsMutable(str) && __CFStrHasContentsDeallocator(str)) {
6392 if (__CFStrContentsDeallocator(str)) fprintf(stdout, "ContentsDeallocatorFunc %p\n", (void *)__CFStrContentsDeallocator(str));
6393 else fprintf(stdout, "ContentsDeallocatorFunc None\n");
6394 } else if (__CFStrIsMutable(str) && __CFStrHasContentsAllocator(str)) {
6395 fprintf(stdout, "ExternalContentsAllocator %p\n", (void *)__CFStrContentsAllocator((CFMutableStringRef)str));
6396 }
6397
6398 if (__CFStrIsMutable(str)) {
6399 fprintf(stdout, "CurrentCapacity %d\n%sCapacity %d\n", (int)__CFStrCapacity(str), __CFStrIsFixed(str) ? "Fixed" : "Desired", (int)__CFStrDesiredCapacity(str));
6400 }
6401 fprintf(stdout, "Contents %p\n", (void *)__CFStrContents(str));
6402 }
6403
6404
6405
6406 #undef HANGUL_SBASE
6407 #undef HANGUL_LBASE
6408 #undef HANGUL_VBASE
6409 #undef HANGUL_TBASE
6410 #undef HANGUL_SCOUNT
6411 #undef HANGUL_LCOUNT
6412 #undef HANGUL_VCOUNT
6413 #undef HANGUL_TCOUNT
6414 #undef HANGUL_NCOUNT
6415