]> git.saurik.com Git - apple/cf.git/blob - CFString.c
CF-1153.18.tar.gz
[apple/cf.git] / CFString.c
1 /*
2 * Copyright (c) 2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /* CFString.c
25 Copyright (c) 1998-2014, Apple Inc. All rights reserved.
26 Responsibility: Ali Ozer
27
28 !!! For performance reasons, it's important that all functions marked CF_INLINE in this file are inlined.
29 */
30
31 #include <CoreFoundation/CFBase.h>
32 #include <CoreFoundation/CFString.h>
33 #include <CoreFoundation/CFDictionary.h>
34 #include <CoreFoundation/CFStringEncodingConverterExt.h>
35 #include <CoreFoundation/CFUniChar.h>
36 #include <CoreFoundation/CFUnicodeDecomposition.h>
37 #include <CoreFoundation/CFUnicodePrecomposition.h>
38 #include <CoreFoundation/CFPriv.h>
39 #include <CoreFoundation/CFNumber.h>
40 #include <CoreFoundation/CFNumberFormatter.h>
41 #include "CFInternal.h"
42 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
43 #include "CFLocaleInternal.h"
44 #endif
45 #include <stdarg.h>
46 #include <stdio.h>
47 #include <string.h>
48 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
49 #include <unistd.h>
50 #endif
51
52 #if defined(__GNUC__)
53 #define LONG_DOUBLE_SUPPORT 1
54 #else
55 #define LONG_DOUBLE_SUPPORT 0
56 #endif
57
58
59
60 #define USE_STRING_ROM 0
61
62
63 #ifndef INSTRUMENT_SHARED_STRINGS
64 #define INSTRUMENT_SHARED_STRINGS 0
65 #endif
66
67 CF_PRIVATE const CFStringRef __kCFLocaleCollatorID;
68
69 #if INSTRUMENT_SHARED_STRINGS
70 #include <sys/stat.h> /* for umask() */
71
72 static void __CFRecordStringAllocationEvent(const char *encoding, const char *bytes, CFIndex byteCount) {
73 static CFLock_t lock = CFLockInit;
74
75 if (memchr(bytes, '\n', byteCount)) return; //never record string allocation events for strings with newlines, because those confuse our parser and because they'll never go into the ROM
76
77 __CFLock(&lock);
78 static int fd;
79 if (! fd) {
80 extern char **_NSGetProgname(void);
81 const char *name = *_NSGetProgname();
82 if (! name) name = "UNKNOWN";
83 umask(0);
84 char path[1024];
85 snprintf(path, sizeof(path), "/tmp/CFSharedStringInstrumentation_%s_%d.txt", name, getpid());
86 fd = open(path, O_WRONLY | O_APPEND | O_CREAT, 0666);
87 if (fd <= 0) {
88 int error = errno;
89 const char *errString = strerror(error);
90 fprintf(stderr, "open() failed with error %d (%s)\n", error, errString);
91 }
92 }
93 if (fd > 0) {
94 char *buffer = NULL;
95 char formatString[256];
96 snprintf(formatString, sizeof(formatString), "%%-8d\t%%-16s\t%%.%lds\n", byteCount);
97 int resultCount = asprintf(&buffer, formatString, getpid(), encoding, bytes);
98 if (buffer && resultCount > 0) write(fd, buffer, resultCount);
99 else puts("Couldn't record allocation event");
100 free(buffer);
101 }
102 __CFUnlock(&lock);
103 }
104 #endif //INSTRUMENT_SHARED_STRINGS
105
106 typedef Boolean (*UNI_CHAR_FUNC)(UInt32 flags, UInt8 ch, UniChar *unicodeChar);
107
108 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI
109 extern size_t malloc_good_size(size_t size);
110 #endif
111 extern void __CFStrConvertBytesToUnicode(const uint8_t *bytes, UniChar *buffer, CFIndex numChars);
112
113 static void __CFStringAppendFormatCore(CFMutableStringRef outputString, CFStringRef (*copyDescFunc)(void *, const void *), CFStringRef (*contextDescFunc)(void *, const void *, const void *, bool, bool *), CFDictionaryRef formatOptions, CFDictionaryRef stringsDictConfig, CFStringRef formatString, CFIndex initialArgPosition, const void *origValues, CFIndex originalValuesSize, va_list args);
114
115 #if defined(DEBUG)
116
117 // We put this into C & Pascal strings if we can't convert
118 #define CONVERSIONFAILURESTR "CFString conversion failed"
119
120 // We set this to true when purging the constant string table, so CFStringDeallocate doesn't assert
121 static Boolean __CFConstantStringTableBeingFreed = false;
122
123 #endif
124
125
126
127 // This section is for CFString compatibility and other behaviors...
128
129 static CFOptionFlags _CFStringCompatibilityMask = 0;
130
131 void _CFStringSetCompatibility(CFOptionFlags mask) {
132 _CFStringCompatibilityMask |= mask;
133 }
134
135 __attribute__((used))
136 CF_INLINE Boolean __CFStringGetCompatibility(CFOptionFlags mask) {
137 return (_CFStringCompatibilityMask & mask) == mask;
138 }
139
140
141
142 // Two constant strings used by CFString; these are initialized in CFStringInitialize
143 CONST_STRING_DECL(kCFEmptyString, "")
144
145 // This is separate for C++
146 struct __notInlineMutable {
147 void *buffer;
148 CFIndex length;
149 CFIndex capacity; // Capacity in bytes
150 unsigned int hasGap:1; // Currently unused
151 unsigned int isFixedCapacity:1;
152 unsigned int isExternalMutable:1;
153 unsigned int capacityProvidedExternally:1;
154 #if __LP64__
155 unsigned long desiredCapacity:60;
156 #else
157 unsigned long desiredCapacity:28;
158 #endif
159 CFAllocatorRef contentsAllocator; // Optional
160 }; // The only mutable variant for CFString
161
162
163 /* !!! Never do sizeof(CFString); the union is here just to make it easier to access some fields.
164 */
165 struct __CFString {
166 CFRuntimeBase base;
167 union { // In many cases the allocated structs are smaller than these
168 struct __inline1 {
169 CFIndex length;
170 } inline1; // Bytes follow the length
171 struct __notInlineImmutable1 {
172 void *buffer; // Note that the buffer is in the same place for all non-inline variants of CFString
173 CFIndex length;
174 CFAllocatorRef contentsDeallocator; // Optional; just the dealloc func is used
175 } notInlineImmutable1; // This is the usual not-inline immutable CFString
176 struct __notInlineImmutable2 {
177 void *buffer;
178 CFAllocatorRef contentsDeallocator; // Optional; just the dealloc func is used
179 } notInlineImmutable2; // This is the not-inline immutable CFString when length is stored with the contents (first byte)
180 struct __notInlineMutable notInlineMutable;
181 } variants;
182 };
183
184 /*
185 I = is immutable
186 E = not inline contents
187 U = is Unicode
188 N = has NULL byte
189 L = has length byte
190 D = explicit deallocator for contents (for mutable objects, allocator)
191 C = length field is CFIndex (rather than UInt32); only meaningful for 64-bit, really
192 if needed this bit (valuable real-estate) can be given up for another bit elsewhere, since this info is needed just for 64-bit
193
194 Also need (only for mutable)
195 F = is fixed
196 G = has gap
197 Cap, DesCap = capacity
198
199 B7 B6 B5 B4 B3 B2 B1 B0
200 U N L C I
201
202 B6 B5
203 0 0 inline contents
204 0 1 E (freed with default allocator)
205 1 0 E (not freed)
206 1 1 E D
207
208 !!! Note: Constant CFStrings use the bit patterns:
209 C8 (11001000 = default allocator, not inline, not freed contents; 8-bit; has NULL byte; doesn't have length; is immutable)
210 D0 (11010000 = default allocator, not inline, not freed contents; Unicode; is immutable)
211 The bit usages should not be modified in a way that would effect these bit patterns.
212 */
213
214 enum {
215 __kCFFreeContentsWhenDoneMask = 0x020,
216 __kCFFreeContentsWhenDone = 0x020,
217 __kCFContentsMask = 0x060,
218 __kCFHasInlineContents = 0x000,
219 __kCFNotInlineContentsNoFree = 0x040, // Don't free
220 __kCFNotInlineContentsDefaultFree = 0x020, // Use allocator's free function
221 __kCFNotInlineContentsCustomFree = 0x060, // Use a specially provided free function
222 __kCFHasContentsAllocatorMask = 0x060,
223 __kCFHasContentsAllocator = 0x060, // (For mutable strings) use a specially provided allocator
224 __kCFHasContentsDeallocatorMask = 0x060,
225 __kCFHasContentsDeallocator = 0x060,
226 __kCFIsMutableMask = 0x01,
227 __kCFIsMutable = 0x01,
228 __kCFIsUnicodeMask = 0x10,
229 __kCFIsUnicode = 0x10,
230 __kCFHasNullByteMask = 0x08,
231 __kCFHasNullByte = 0x08,
232 __kCFHasLengthByteMask = 0x04,
233 __kCFHasLengthByte = 0x04,
234 // !!! Bit 0x02 has been freed up
235 };
236
237
238 // !!! Assumptions:
239 // Mutable strings are not inline
240 // Compile-time constant strings are not inline
241 // Mutable strings always have explicit length (but they might also have length byte and null byte)
242 // If there is an explicit length, always use that instead of the length byte (length byte is useful for quickly returning pascal strings)
243 // Never look at the length byte for the length; use __CFStrLength or __CFStrLength2
244
245 /* The following set of functions and macros need to be updated on change to the bit configuration
246 */
247 CF_INLINE Boolean __CFStrIsMutable(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFIsMutableMask) == __kCFIsMutable;}
248 CF_INLINE Boolean __CFStrIsInline(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFContentsMask) == __kCFHasInlineContents;}
249 CF_INLINE Boolean __CFStrFreeContentsWhenDone(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFFreeContentsWhenDoneMask) == __kCFFreeContentsWhenDone;}
250 CF_INLINE Boolean __CFStrHasContentsDeallocator(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFHasContentsDeallocatorMask) == __kCFHasContentsDeallocator;}
251 CF_INLINE Boolean __CFStrIsUnicode(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFIsUnicodeMask) == __kCFIsUnicode;}
252 CF_INLINE Boolean __CFStrIsEightBit(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFIsUnicodeMask) != __kCFIsUnicode;}
253 CF_INLINE Boolean __CFStrHasNullByte(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFHasNullByteMask) == __kCFHasNullByte;}
254 CF_INLINE Boolean __CFStrHasLengthByte(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFHasLengthByteMask) == __kCFHasLengthByte;}
255 CF_INLINE Boolean __CFStrHasExplicitLength(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & (__kCFIsMutableMask | __kCFHasLengthByteMask)) != __kCFHasLengthByte;} // Has explicit length if (1) mutable or (2) not mutable and no length byte
256 CF_INLINE Boolean __CFStrIsConstant(CFStringRef str) {
257 #if __LP64__
258 return str->base._rc == 0;
259 #else
260 return (str->base._cfinfo[CF_RC_BITS]) == 0;
261 #endif
262 }
263
264 CF_INLINE SInt32 __CFStrSkipAnyLengthByte(CFStringRef str) {return ((str->base._cfinfo[CF_INFO_BITS] & __kCFHasLengthByteMask) == __kCFHasLengthByte) ? 1 : 0;} // Number of bytes to skip over the length byte in the contents
265
266 /* Returns ptr to the buffer (which might include the length byte).
267 */
268 CF_INLINE const void *__CFStrContents(CFStringRef str) {
269 if (__CFStrIsInline(str)) {
270 return (const void *)(((uintptr_t)&(str->variants)) + (__CFStrHasExplicitLength(str) ? sizeof(CFIndex) : 0));
271 } else { // Not inline; pointer is always word 2
272 return str->variants.notInlineImmutable1.buffer;
273 }
274 }
275
276 static CFAllocatorRef *__CFStrContentsDeallocatorPtr(CFStringRef str) {
277 return __CFStrHasExplicitLength(str) ? &(((CFMutableStringRef)str)->variants.notInlineImmutable1.contentsDeallocator) : &(((CFMutableStringRef)str)->variants.notInlineImmutable2.contentsDeallocator); }
278
279 // Assumption: Called with immutable strings only, and on strings that are known to have a contentsDeallocator
280 CF_INLINE CFAllocatorRef __CFStrContentsDeallocator(CFStringRef str) {
281 return *__CFStrContentsDeallocatorPtr(str);
282 }
283
284 // Assumption: Called with immutable strings only, and on strings that are known to have a contentsDeallocator
285 CF_INLINE void __CFStrSetContentsDeallocator(CFStringRef str, CFAllocatorRef allocator) {
286 if (!(0 || 0)) CFRetain(allocator);
287 *__CFStrContentsDeallocatorPtr(str) = allocator;
288 }
289
290 static CFAllocatorRef *__CFStrContentsAllocatorPtr(CFStringRef str) {
291 CFAssert(!__CFStrIsInline(str), __kCFLogAssertion, "Asking for contents allocator of inline string");
292 CFAssert(__CFStrIsMutable(str), __kCFLogAssertion, "Asking for contents allocator of an immutable string");
293 return (CFAllocatorRef *)&(str->variants.notInlineMutable.contentsAllocator);
294 }
295
296 // Assumption: Called with strings that have a contents allocator; also, contents allocator follows custom
297 CF_INLINE CFAllocatorRef __CFStrContentsAllocator(CFMutableStringRef str) {
298 return *(__CFStrContentsAllocatorPtr(str));
299 }
300
301 // Assumption: Called with strings that have a contents allocator; also, contents allocator follows custom
302 CF_INLINE void __CFStrSetContentsAllocator(CFMutableStringRef str, CFAllocatorRef allocator) {
303 if (!(0 || 0)) CFRetain(allocator);
304 *(__CFStrContentsAllocatorPtr(str)) = allocator;
305 }
306
307 /* Returns length; use __CFStrLength2 if contents buffer pointer has already been computed.
308 */
309 CF_INLINE CFIndex __CFStrLength(CFStringRef str) {
310 if (__CFStrHasExplicitLength(str)) {
311 if (__CFStrIsInline(str)) {
312 return str->variants.inline1.length;
313 } else {
314 return str->variants.notInlineImmutable1.length;
315 }
316 } else {
317 return (CFIndex)(*((uint8_t *)__CFStrContents(str)));
318 }
319 }
320
321 CF_INLINE CFIndex __CFStrLength2(CFStringRef str, const void *buffer) {
322 if (__CFStrHasExplicitLength(str)) {
323 if (__CFStrIsInline(str)) {
324 return str->variants.inline1.length;
325 } else {
326 return str->variants.notInlineImmutable1.length;
327 }
328 } else {
329 return (CFIndex)(*((uint8_t *)buffer));
330 }
331 }
332
333
334 Boolean __CFStringIsEightBit(CFStringRef str) {
335 return __CFStrIsEightBit(str);
336 }
337
338 /* Sets the content pointer for immutable or mutable strings.
339 */
340 CF_INLINE void __CFStrSetContentPtr(CFStringRef str, const void *p) {
341 // XXX_PCB catch all writes for mutable string case.
342 __CFAssignWithWriteBarrier((void **)&((CFMutableStringRef)str)->variants.notInlineImmutable1.buffer, (void *)p);
343 }
344 CF_INLINE void __CFStrSetInfoBits(CFStringRef str, UInt32 v) {__CFBitfieldSetValue(((CFMutableStringRef)str)->base._cfinfo[CF_INFO_BITS], 6, 0, v);}
345
346 CF_INLINE void __CFStrSetExplicitLength(CFStringRef str, CFIndex v) {
347 if (__CFStrIsInline(str)) {
348 ((CFMutableStringRef)str)->variants.inline1.length = v;
349 } else {
350 ((CFMutableStringRef)str)->variants.notInlineImmutable1.length = v;
351 }
352 }
353
354 CF_INLINE void __CFStrSetUnicode(CFMutableStringRef str) {str->base._cfinfo[CF_INFO_BITS] |= __kCFIsUnicode;}
355 CF_INLINE void __CFStrClearUnicode(CFMutableStringRef str) {str->base._cfinfo[CF_INFO_BITS] &= ~__kCFIsUnicode;}
356 CF_INLINE void __CFStrSetHasLengthAndNullBytes(CFMutableStringRef str) {str->base._cfinfo[CF_INFO_BITS] |= (__kCFHasLengthByte | __kCFHasNullByte);}
357 CF_INLINE void __CFStrClearHasLengthAndNullBytes(CFMutableStringRef str) {str->base._cfinfo[CF_INFO_BITS] &= ~(__kCFHasLengthByte | __kCFHasNullByte);}
358
359
360 // Assumption: The following set of inlines (using str->variants.notInlineMutable) are called with mutable strings only
361 CF_INLINE Boolean __CFStrIsFixed(CFStringRef str) {return str->variants.notInlineMutable.isFixedCapacity;}
362 CF_INLINE Boolean __CFStrIsExternalMutable(CFStringRef str) {return str->variants.notInlineMutable.isExternalMutable;}
363 CF_INLINE Boolean __CFStrHasContentsAllocator(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFHasContentsAllocatorMask) == __kCFHasContentsAllocator;}
364 CF_INLINE void __CFStrSetIsFixed(CFMutableStringRef str) {str->variants.notInlineMutable.isFixedCapacity = 1;}
365 CF_INLINE void __CFStrSetIsExternalMutable(CFMutableStringRef str) {str->variants.notInlineMutable.isExternalMutable = 1;}
366 //CF_INLINE void __CFStrSetHasGap(CFMutableStringRef str) {str->variants.notInlineMutable.hasGap = 1;} currently unused
367
368 // If capacity is provided externally, we only change it when we need to grow beyond it
369 CF_INLINE Boolean __CFStrCapacityProvidedExternally(CFStringRef str) {return str->variants.notInlineMutable.capacityProvidedExternally;}
370 CF_INLINE void __CFStrSetCapacityProvidedExternally(CFMutableStringRef str) {str->variants.notInlineMutable.capacityProvidedExternally = 1;}
371 CF_INLINE void __CFStrClearCapacityProvidedExternally(CFMutableStringRef str) {str->variants.notInlineMutable.capacityProvidedExternally = 0;}
372
373 // "Capacity" is stored in number of bytes, not characters. It indicates the total number of bytes in the contents buffer.
374 CF_INLINE CFIndex __CFStrCapacity(CFStringRef str) {return str->variants.notInlineMutable.capacity;}
375 CF_INLINE void __CFStrSetCapacity(CFMutableStringRef str, CFIndex cap) {str->variants.notInlineMutable.capacity = cap;}
376
377 // "Desired capacity" is in number of characters; it is the client requested capacity; if fixed, it is the upper bound on the mutable string backing store.
378 CF_INLINE CFIndex __CFStrDesiredCapacity(CFStringRef str) {return str->variants.notInlineMutable.desiredCapacity;}
379 CF_INLINE void __CFStrSetDesiredCapacity(CFMutableStringRef str, CFIndex size) {str->variants.notInlineMutable.desiredCapacity = size;}
380
381
382 static void *__CFStrAllocateMutableContents(CFMutableStringRef str, CFIndex size) {
383 void *ptr;
384 CFAllocatorRef alloc = (__CFStrHasContentsAllocator(str)) ? __CFStrContentsAllocator(str) : __CFGetAllocator(str);
385 ptr = CFAllocatorAllocate(alloc, size, 0);
386 if (__CFOASafe) __CFSetLastAllocationEventName(ptr, "CFString (store)");
387 return ptr;
388 }
389
390 static void __CFStrDeallocateMutableContents(CFMutableStringRef str, void *buffer) {
391 CFAllocatorRef alloc = (__CFStrHasContentsAllocator(str)) ? __CFStrContentsAllocator(str) : __CFGetAllocator(str);
392 if (__CFStrIsMutable(str) && __CFStrHasContentsAllocator(str) && (0)) {
393 // do nothing
394 } else if (CF_IS_COLLECTABLE_ALLOCATOR(alloc)) {
395 // GC: for finalization safety, let collector reclaim the buffer in the next GC cycle.
396 auto_zone_release(objc_collectableZone(), buffer);
397 } else {
398 CFAllocatorDeallocate(alloc, buffer);
399 }
400 }
401
402
403
404
405 /* CFString specific init flags
406 Note that you cannot count on the external buffer not being copied.
407 Also, if you specify an external buffer, you should not change it behind the CFString's back.
408 */
409 enum {
410 __kCFThinUnicodeIfPossible = 0x1000000, /* See if the Unicode contents can be thinned down to 8-bit */
411 kCFStringPascal = 0x10000, /* Indicating that the string data has a Pascal string structure (length byte at start) */
412 kCFStringNoCopyProvidedContents = 0x20000, /* Don't copy the provided string contents if possible; free it when no longer needed */
413 kCFStringNoCopyNoFreeProvidedContents = 0x30000 /* Don't copy the provided string contents if possible; don't free it when no longer needed */
414 };
415
416 /* System Encoding.
417 */
418 static CFStringEncoding __CFDefaultSystemEncoding = kCFStringEncodingInvalidId;
419 static CFStringEncoding __CFDefaultFileSystemEncoding = kCFStringEncodingInvalidId;
420 CFStringEncoding __CFDefaultEightBitStringEncoding = kCFStringEncodingInvalidId;
421
422
423 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX
424 #define __defaultEncoding kCFStringEncodingMacRoman
425 #elif DEPLOYMENT_TARGET_WINDOWS
426 #define __defaultEncoding kCFStringEncodingWindowsLatin1
427 #else
428 #warning This value must match __CFGetConverter condition in CFStringEncodingConverter.c
429 #define __defaultEncoding kCFStringEncodingISOLatin1
430 #endif
431
432 CFStringEncoding CFStringGetSystemEncoding(void) {
433 if (__CFDefaultSystemEncoding == kCFStringEncodingInvalidId) {
434 __CFDefaultSystemEncoding = __defaultEncoding;
435 const CFStringEncodingConverter *converter = CFStringEncodingGetConverter(__CFDefaultSystemEncoding);
436 __CFSetCharToUniCharFunc(converter->encodingClass == kCFStringEncodingConverterCheapEightBit ? (UNI_CHAR_FUNC)converter->toUnicode : NULL);
437 }
438 return __CFDefaultSystemEncoding;
439 }
440
441 // Fast version for internal use
442
443 CF_INLINE CFStringEncoding __CFStringGetSystemEncoding(void) {
444 if (__CFDefaultSystemEncoding == kCFStringEncodingInvalidId) (void)CFStringGetSystemEncoding();
445 return __CFDefaultSystemEncoding;
446 }
447
448 CFStringEncoding CFStringFileSystemEncoding(void) {
449 if (__CFDefaultFileSystemEncoding == kCFStringEncodingInvalidId) {
450 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_WINDOWS
451 __CFDefaultFileSystemEncoding = kCFStringEncodingUTF8;
452 #else
453 __CFDefaultFileSystemEncoding = CFStringGetSystemEncoding();
454 #endif
455 }
456
457 return __CFDefaultFileSystemEncoding;
458 }
459
460 /* ??? Is returning length when no other answer is available the right thing?
461 !!! All of the (length > (LONG_MAX / N)) type checks are to avoid wrap-around and eventual malloc overflow in the client
462 */
463 CFIndex CFStringGetMaximumSizeForEncoding(CFIndex length, CFStringEncoding encoding) {
464 if (encoding == kCFStringEncodingUTF8) {
465 return (length > (LONG_MAX / 3)) ? kCFNotFound : (length * 3);
466 } else if ((encoding == kCFStringEncodingUTF32) || (encoding == kCFStringEncodingUTF32BE) || (encoding == kCFStringEncodingUTF32LE)) { // UTF-32
467 return (length > (LONG_MAX / sizeof(UTF32Char))) ? kCFNotFound : (length * sizeof(UTF32Char));
468 } else {
469 encoding &= 0xFFF; // Mask off non-base part
470 }
471 switch (encoding) {
472 case kCFStringEncodingUnicode:
473 return (length > (LONG_MAX / sizeof(UniChar))) ? kCFNotFound : (length * sizeof(UniChar));
474
475 case kCFStringEncodingNonLossyASCII:
476 return (length > (LONG_MAX / 6)) ? kCFNotFound : (length * 6); // 1 Unichar can expand to 6 bytes
477
478 case kCFStringEncodingMacRoman:
479 case kCFStringEncodingWindowsLatin1:
480 case kCFStringEncodingISOLatin1:
481 case kCFStringEncodingNextStepLatin:
482 case kCFStringEncodingASCII:
483 return length / sizeof(uint8_t);
484
485 default:
486 return length / sizeof(uint8_t);
487 }
488 }
489
490
491 /* Returns whether the indicated encoding can be stored in 8-bit chars
492 */
493 CF_INLINE Boolean __CFStrEncodingCanBeStoredInEightBit(CFStringEncoding encoding) {
494 switch (encoding & 0xFFF) { // just use encoding base
495 case kCFStringEncodingInvalidId:
496 case kCFStringEncodingUnicode:
497 case kCFStringEncodingNonLossyASCII:
498 return false;
499
500 case kCFStringEncodingMacRoman:
501 case kCFStringEncodingWindowsLatin1:
502 case kCFStringEncodingISOLatin1:
503 case kCFStringEncodingNextStepLatin:
504 case kCFStringEncodingASCII:
505 return true;
506
507 default: return false;
508 }
509 }
510
511 /* Returns the encoding used in eight bit CFStrings (can't be any encoding which isn't 1-to-1 with Unicode)
512 For 10.9-linked apps, we've set this encoding to ASCII for all cases; see <rdar://problem/3597233>
513 */
514 CFStringEncoding __CFStringComputeEightBitStringEncoding(void) {
515 // This flag prevents recursive entry into __CFStringComputeEightBitStringEncoding
516 static Boolean __CFStringIsBeingInitialized2 = false;
517 if (__CFStringIsBeingInitialized2) return kCFStringEncodingASCII;
518 __CFStringIsBeingInitialized2 = true;
519
520 Boolean useAscii = true;
521 __CFStringIsBeingInitialized2 = false;
522 if (useAscii) {
523 __CFDefaultEightBitStringEncoding = kCFStringEncodingASCII;
524 } else {
525 if (__CFDefaultEightBitStringEncoding == kCFStringEncodingInvalidId) {
526 CFStringEncoding systemEncoding = CFStringGetSystemEncoding();
527 if (systemEncoding == kCFStringEncodingInvalidId) { // We're right in the middle of querying system encoding from default database. Delaying to set until system encoding is determined.
528 return kCFStringEncodingASCII;
529 } else if (__CFStrEncodingCanBeStoredInEightBit(systemEncoding)) {
530 __CFDefaultEightBitStringEncoding = systemEncoding;
531 } else {
532 __CFDefaultEightBitStringEncoding = kCFStringEncodingASCII;
533 }
534 }
535 }
536 return __CFDefaultEightBitStringEncoding;
537 }
538
539 /* Returns whether the provided bytes can be stored in ASCII
540 */
541 CF_INLINE Boolean __CFBytesInASCII(const uint8_t *bytes, CFIndex len) {
542 #if __LP64__
543 /* A bit of unrolling; go by 32s, 16s, and 8s first */
544 while (len >= 32) {
545 uint64_t val = *(const uint64_t *)bytes;
546 uint64_t hiBits = (val & 0x8080808080808080ULL); // More efficient to collect this rather than do a conditional at every step
547 bytes += 8;
548 val = *(const uint64_t *)bytes;
549 hiBits |= (val & 0x8080808080808080ULL);
550 bytes += 8;
551 val = *(const uint64_t *)bytes;
552 hiBits |= (val & 0x8080808080808080ULL);
553 bytes += 8;
554 val = *(const uint64_t *)bytes;
555 if (hiBits | (val & 0x8080808080808080ULL)) return false;
556 bytes += 8;
557 len -= 32;
558 }
559
560 while (len >= 16) {
561 uint64_t val = *(const uint64_t *)bytes;
562 uint64_t hiBits = (val & 0x8080808080808080ULL);
563 bytes += 8;
564 val = *(const uint64_t *)bytes;
565 if (hiBits | (val & 0x8080808080808080ULL)) return false;
566 bytes += 8;
567 len -= 16;
568 }
569
570 while (len >= 8) {
571 uint64_t val = *(const uint64_t *)bytes;
572 if (val & 0x8080808080808080ULL) return false;
573 bytes += 8;
574 len -= 8;
575 }
576 #endif
577 /* Go by 4s */
578 while (len >= 4) {
579 uint32_t val = *(const uint32_t *)bytes;
580 if (val & 0x80808080U) return false;
581 bytes += 4;
582 len -= 4;
583 }
584 /* Handle the rest one byte at a time */
585 while (len--) {
586 if (*bytes++ & 0x80) return false;
587 }
588
589 return true;
590 }
591
592 /* Returns whether the provided 8-bit string in the specified encoding can be stored in an 8-bit CFString.
593 */
594 CF_INLINE Boolean __CFCanUseEightBitCFStringForBytes(const uint8_t *bytes, CFIndex len, CFStringEncoding encoding) {
595 // If the encoding is the same as the 8-bit CFString encoding, we can just use the bytes as-is.
596 // One exception is ASCII, which unfortunately needs to mean ISOLatin1 for compatibility reasons <rdar://problem/5458321>.
597 if (encoding == __CFStringGetEightBitStringEncoding() && encoding != kCFStringEncodingASCII) return true;
598 if (__CFStringEncodingIsSupersetOfASCII(encoding) && __CFBytesInASCII(bytes, len)) return true;
599 return false;
600 }
601
602
603 /* Returns whether a length byte can be tacked on to a string of the indicated length.
604 */
605 CF_INLINE Boolean __CFCanUseLengthByte(CFIndex len) {
606 #define __kCFMaxPascalStrLen 255
607 return (len <= __kCFMaxPascalStrLen) ? true : false;
608 }
609
610 /* Various string assertions
611 */
612 #define __CFAssertIsString(cf) __CFGenericValidateType(cf, __kCFStringTypeID)
613 #define __CFAssertIndexIsInStringBounds(cf, idx) CFAssert3((idx) >= 0 && (idx) < __CFStrLength(cf), __kCFLogAssertion, "%s(): string index %d out of bounds (length %d)", __PRETTY_FUNCTION__, idx, __CFStrLength(cf))
614 #define __CFAssertRangeIsInStringBounds(cf, idx, count) CFAssert4((idx) >= 0 && (idx + count) <= __CFStrLength(cf), __kCFLogAssertion, "%s(): string range %d,%d out of bounds (length %d)", __PRETTY_FUNCTION__, idx, count, __CFStrLength(cf))
615 #define __CFAssertIsStringAndMutable(cf) {__CFGenericValidateType(cf, __kCFStringTypeID); CFAssert1(__CFStrIsMutable(cf), __kCFLogAssertion, "%s(): string not mutable", __PRETTY_FUNCTION__);}
616 #define __CFAssertIsStringAndExternalMutable(cf) {__CFGenericValidateType(cf, __kCFStringTypeID); CFAssert1(__CFStrIsMutable(cf) && __CFStrIsExternalMutable(cf), __kCFLogAssertion, "%s(): string not external mutable", __PRETTY_FUNCTION__);}
617 #define __CFAssertIsNotNegative(idx) CFAssert2(idx >= 0, __kCFLogAssertion, "%s(): index %d is negative", __PRETTY_FUNCTION__, idx)
618 #define __CFAssertIfFixedLengthIsOK(cf, reqLen) CFAssert2(!__CFStrIsFixed(cf) || (reqLen <= __CFStrDesiredCapacity(cf)), __kCFLogAssertion, "%s(): length %d too large", __PRETTY_FUNCTION__, reqLen)
619
620
621 /* Basic algorithm is to shrink memory when capacity is SHRINKFACTOR times the required capacity or to allocate memory when the capacity is less than GROWFACTOR times the required capacity. This function will return -1 if the new capacity is just too big (> LONG_MAX).
622 Additional complications are applied in the following order:
623 - desiredCapacity, which is the minimum (except initially things can be at zero)
624 - rounding up to factor of 8
625 - compressing (to fit the number if 16 bits), which effectively rounds up to factor of 256
626 - we need to make sure GROWFACTOR computation doesn't suffer from overflow issues on 32-bit, hence the casting to unsigned. Normally for required capacity of C bytes, the allocated space is (3C+1)/2. If C > ULONG_MAX/3, we instead simply return LONG_MAX
627 */
628 #define SHRINKFACTOR(c) (c / 2)
629
630 #if __LP64__
631 #define GROWFACTOR(c) ((c * 3 + 1) / 2)
632 #else
633 #define GROWFACTOR(c) (((c) >= (ULONG_MAX / 3UL)) ? __CFMax(LONG_MAX - 4095, (c)) : (((unsigned long)c * 3 + 1) / 2))
634 #endif
635
636 CF_INLINE CFIndex __CFStrNewCapacity(CFMutableStringRef str, unsigned long reqCapacity, CFIndex capacity, Boolean leaveExtraRoom, CFIndex charSize) {
637 if (capacity != 0 || reqCapacity != 0) { /* If initially zero, and space not needed, leave it at that... */
638 if ((capacity < reqCapacity) || /* We definitely need the room... */
639 (!__CFStrCapacityProvidedExternally(str) && /* Assuming we control the capacity... */
640 ((reqCapacity < SHRINKFACTOR(capacity)) || /* ...we have too much room! */
641 (!leaveExtraRoom && (reqCapacity < capacity))))) { /* ...we need to eliminate the extra space... */
642 if (reqCapacity > LONG_MAX) return -1; /* Too big any way you cut it */
643 unsigned long newCapacity = leaveExtraRoom ? GROWFACTOR(reqCapacity) : reqCapacity; /* Grow by 3/2 if extra room is desired */
644 CFIndex desiredCapacity = __CFStrDesiredCapacity(str) * charSize;
645 if (newCapacity < desiredCapacity) { /* If less than desired, bump up to desired */
646 newCapacity = desiredCapacity;
647 } else if (__CFStrIsFixed(str)) { /* Otherwise, if fixed, no need to go above the desired (fixed) capacity */
648 newCapacity = __CFMax(desiredCapacity, reqCapacity); /* !!! So, fixed is not really fixed, but "tight" */
649 }
650 if (__CFStrHasContentsAllocator(str)) { /* Also apply any preferred size from the allocator */
651 newCapacity = CFAllocatorGetPreferredSizeForSize(__CFStrContentsAllocator(str), newCapacity, 0);
652 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI
653 } else {
654 newCapacity = malloc_good_size(newCapacity);
655 #endif
656 }
657 return (newCapacity > LONG_MAX) ? -1 : (CFIndex)newCapacity; // If packing: __CFStrUnpackNumber(__CFStrPackNumber(newCapacity));
658 }
659 }
660 return capacity;
661 }
662
663
664 /* rearrangeBlocks() rearranges the blocks of data within the buffer so that they are "evenly spaced". buffer is assumed to have enough room for the result.
665 numBlocks is current total number of blocks within buffer.
666 blockSize is the size of each block in bytes
667 ranges and numRanges hold the ranges that are no longer needed; ranges are stored sorted in increasing order, and don't overlap
668 insertLength is the final spacing between the remaining blocks
669
670 Example: buffer = A B C D E F G H, blockSize = 1, ranges = { (2,1) , (4,2) } (so we want to "delete" C and E F), fromEnd = NO
671 if insertLength = 4, result = A B ? ? ? ? D ? ? ? ? G H
672 if insertLength = 0, result = A B D G H
673
674 Example: buffer = A B C D E F G H I J K L M N O P Q R S T U, blockSize = 1, ranges { (1,1), (3,1), (5,11), (17,1), (19,1) }, fromEnd = NO
675 if insertLength = 3, result = A ? ? ? C ? ? ? E ? ? ? Q ? ? ? S ? ? ? U
676
677 */
678 typedef struct _CFStringDeferredRange {
679 CFIndex beginning;
680 CFIndex length;
681 CFIndex shift;
682 } CFStringDeferredRange;
683
684 typedef struct _CFStringStackInfo {
685 CFIndex capacity; // Capacity (if capacity == count, need to realloc to add another)
686 CFIndex count; // Number of elements actually stored
687 CFStringDeferredRange *stack;
688 Boolean hasMalloced; // Indicates "stack" is allocated and needs to be deallocated when done
689 char _padding[3];
690 } CFStringStackInfo;
691
692 CF_INLINE void pop (CFStringStackInfo *si, CFStringDeferredRange *topRange) {
693 si->count = si->count - 1;
694 *topRange = si->stack[si->count];
695 }
696
697 CF_INLINE void push (CFStringStackInfo *si, const CFStringDeferredRange *newRange) {
698 if (si->count == si->capacity) {
699 // increase size of the stack
700 si->capacity = (si->capacity + 4) * 2;
701 if (si->hasMalloced) {
702 si->stack = (CFStringDeferredRange *)CFAllocatorReallocate(kCFAllocatorSystemDefault, si->stack, si->capacity * sizeof(CFStringDeferredRange), 0);
703 } else {
704 CFStringDeferredRange *newStack = (CFStringDeferredRange *)CFAllocatorAllocate(kCFAllocatorSystemDefault, si->capacity * sizeof(CFStringDeferredRange), 0);
705 memmove(newStack, si->stack, si->count * sizeof(CFStringDeferredRange));
706 si->stack = newStack;
707 si->hasMalloced = true;
708 }
709 }
710 si->stack[si->count] = *newRange;
711 si->count = si->count + 1;
712 }
713
714 static void rearrangeBlocks(
715 uint8_t *buffer,
716 CFIndex numBlocks,
717 CFIndex blockSize,
718 const CFRange *ranges,
719 CFIndex numRanges,
720 CFIndex insertLength) {
721
722 #define origStackSize 10
723 CFStringDeferredRange origStack[origStackSize];
724 CFStringStackInfo si = {origStackSize, 0, origStack, false, {0, 0, 0}};
725 CFStringDeferredRange currentNonRange = {0, 0, 0};
726 CFIndex currentRange = 0;
727 CFIndex amountShifted = 0;
728
729 // must have at least 1 range left.
730
731 while (currentRange < numRanges) {
732 currentNonRange.beginning = (ranges[currentRange].location + ranges[currentRange].length) * blockSize;
733 if ((numRanges - currentRange) == 1) {
734 // at the end.
735 currentNonRange.length = numBlocks * blockSize - currentNonRange.beginning;
736 if (currentNonRange.length == 0) break;
737 } else {
738 currentNonRange.length = (ranges[currentRange + 1].location * blockSize) - currentNonRange.beginning;
739 }
740 currentNonRange.shift = amountShifted + (insertLength * blockSize) - (ranges[currentRange].length * blockSize);
741 amountShifted = currentNonRange.shift;
742 if (amountShifted <= 0) {
743 // process current item and rest of stack
744 if (currentNonRange.shift && currentNonRange.length) memmove (&buffer[currentNonRange.beginning + currentNonRange.shift], &buffer[currentNonRange.beginning], currentNonRange.length);
745 while (si.count > 0) {
746 pop (&si, &currentNonRange); // currentNonRange now equals the top element of the stack.
747 if (currentNonRange.shift && currentNonRange.length) memmove (&buffer[currentNonRange.beginning + currentNonRange.shift], &buffer[currentNonRange.beginning], currentNonRange.length);
748 }
749 } else {
750 // add currentNonRange to stack.
751 push (&si, &currentNonRange);
752 }
753 currentRange++;
754 }
755
756 // no more ranges. if anything is on the stack, process.
757
758 while (si.count > 0) {
759 pop (&si, &currentNonRange); // currentNonRange now equals the top element of the stack.
760 if (currentNonRange.shift && currentNonRange.length) memmove (&buffer[currentNonRange.beginning + currentNonRange.shift], &buffer[currentNonRange.beginning], currentNonRange.length);
761 }
762 if (si.hasMalloced) CFAllocatorDeallocate (kCFAllocatorSystemDefault, si.stack);
763 }
764
765 /* See comments for rearrangeBlocks(); this is the same, but the string is assembled in another buffer (dstBuffer), so the algorithm is much easier. We also take care of the case where the source is not-Unicode but destination is. (The reverse case is not supported.)
766 */
767 static void copyBlocks(
768 const uint8_t *srcBuffer,
769 uint8_t *dstBuffer,
770 CFIndex srcLength,
771 Boolean srcIsUnicode,
772 Boolean dstIsUnicode,
773 const CFRange *ranges,
774 CFIndex numRanges,
775 CFIndex insertLength) {
776
777 CFIndex srcLocationInBytes = 0; // in order to avoid multiplying all the time, this is in terms of bytes, not blocks
778 CFIndex dstLocationInBytes = 0; // ditto
779 CFIndex srcBlockSize = srcIsUnicode ? sizeof(UniChar) : sizeof(uint8_t);
780 CFIndex insertLengthInBytes = insertLength * (dstIsUnicode ? sizeof(UniChar) : sizeof(uint8_t));
781 CFIndex rangeIndex = 0;
782 CFIndex srcToDstMultiplier = (srcIsUnicode == dstIsUnicode) ? 1 : (sizeof(UniChar) / sizeof(uint8_t));
783
784 // Loop over the ranges, copying the range to be preserved (right before each range)
785 while (rangeIndex < numRanges) {
786 CFIndex srcLengthInBytes = ranges[rangeIndex].location * srcBlockSize - srcLocationInBytes; // srcLengthInBytes is in terms of bytes, not blocks; represents length of region to be preserved
787 if (srcLengthInBytes > 0) {
788 if (srcIsUnicode == dstIsUnicode) {
789 memmove(dstBuffer + dstLocationInBytes, srcBuffer + srcLocationInBytes, srcLengthInBytes);
790 } else {
791 __CFStrConvertBytesToUnicode(srcBuffer + srcLocationInBytes, (UniChar *)(dstBuffer + dstLocationInBytes), srcLengthInBytes);
792 }
793 }
794 srcLocationInBytes += srcLengthInBytes + ranges[rangeIndex].length * srcBlockSize; // Skip over the just-copied and to-be-deleted stuff
795 dstLocationInBytes += srcLengthInBytes * srcToDstMultiplier + insertLengthInBytes;
796 rangeIndex++;
797 }
798
799 // Do last range (the one beyond last range)
800 if (srcLocationInBytes < srcLength * srcBlockSize) {
801 if (srcIsUnicode == dstIsUnicode) {
802 memmove(dstBuffer + dstLocationInBytes, srcBuffer + srcLocationInBytes, srcLength * srcBlockSize - srcLocationInBytes);
803 } else {
804 __CFStrConvertBytesToUnicode(srcBuffer + srcLocationInBytes, (UniChar *)(dstBuffer + dstLocationInBytes), srcLength * srcBlockSize - srcLocationInBytes);
805 }
806 }
807 }
808
809 /* Call the callback; if it doesn't exist or returns false, then log
810 */
811 static void __CFStringHandleOutOfMemory(CFTypeRef obj) {
812 CFStringRef msg = CFSTR("Out of memory. We suggest restarting the application. If you have an unsaved document, create a backup copy in Finder, then try to save.");
813 {
814 CFLog(kCFLogLevelCritical, CFSTR("%@"), msg);
815 }
816 }
817
818 /* Reallocates the backing store of the string to accomodate the new length. Space is reserved or characters are deleted as indicated by insertLength and the ranges in deleteRanges. The length is updated to reflect the new state. Will also maintain a length byte and a null byte in 8-bit strings. If length cannot fit in length byte, the space will still be reserved, but will be 0. (Hence the reason the length byte should never be looked at as length unless there is no explicit length.)
819 */
820 static void __CFStringChangeSizeMultiple(CFMutableStringRef str, const CFRange *deleteRanges, CFIndex numDeleteRanges, CFIndex insertLength, Boolean makeUnicode) {
821 const uint8_t *curContents = (uint8_t *)__CFStrContents(str);
822 CFIndex curLength = curContents ? __CFStrLength2(str, curContents) : 0;
823 unsigned long newLength; // We use unsigned to better keep track of overflow
824
825 // Compute new length of the string
826 if (numDeleteRanges == 1) {
827 newLength = curLength + insertLength - deleteRanges[0].length;
828 } else {
829 CFIndex cnt;
830 newLength = curLength + insertLength * numDeleteRanges;
831 for (cnt = 0; cnt < numDeleteRanges; cnt++) newLength -= deleteRanges[cnt].length;
832 }
833
834 __CFAssertIfFixedLengthIsOK(str, newLength);
835
836 if (newLength == 0) {
837 // An somewhat optimized code-path for this special case, with the following implicit values:
838 // newIsUnicode = false
839 // useLengthAndNullBytes = false
840 // newCharSize = sizeof(uint8_t)
841 // If the newCapacity happens to be the same as the old, we don't free the buffer; otherwise we just free it totally
842 // instead of doing a potentially useless reallocation (as the needed capacity later might turn out to be different anyway)
843 CFIndex curCapacity = __CFStrCapacity(str);
844 CFIndex newCapacity = __CFStrNewCapacity(str, 0, curCapacity, true, sizeof(uint8_t));
845 if (newCapacity != curCapacity) { // If we're reallocing anyway (larger or smaller --- larger could happen if desired capacity was changed in the meantime), let's just free it all
846 if (curContents) __CFStrDeallocateMutableContents(str, (uint8_t *)curContents);
847 __CFStrSetContentPtr(str, NULL);
848 __CFStrSetCapacity(str, 0);
849 __CFStrClearCapacityProvidedExternally(str);
850 __CFStrClearHasLengthAndNullBytes(str);
851 if (!__CFStrIsExternalMutable(str)) __CFStrClearUnicode(str); // External mutable implies Unicode
852 } else {
853 if (!__CFStrIsExternalMutable(str)) {
854 __CFStrClearUnicode(str);
855 if (curCapacity >= (int)(sizeof(uint8_t) * 2)) { // If there's room
856 __CFStrSetHasLengthAndNullBytes(str);
857 ((uint8_t *)curContents)[0] = ((uint8_t *)curContents)[1] = 0;
858 } else {
859 __CFStrClearHasLengthAndNullBytes(str);
860 }
861 }
862 }
863 __CFStrSetExplicitLength(str, 0);
864 } else { /* This else-clause assumes newLength > 0 */
865 Boolean oldIsUnicode = __CFStrIsUnicode(str);
866 Boolean newIsUnicode = makeUnicode || (oldIsUnicode /* && (newLength > 0) - implicit */ ) || __CFStrIsExternalMutable(str);
867 CFIndex newCharSize = newIsUnicode ? sizeof(UniChar) : sizeof(uint8_t);
868 Boolean useLengthAndNullBytes = !newIsUnicode /* && (newLength > 0) - implicit */;
869 CFIndex numExtraBytes = useLengthAndNullBytes ? 2 : 0; /* 2 extra bytes to keep the length byte & null... */
870 CFIndex curCapacity = __CFStrCapacity(str);
871 if (newLength > (LONG_MAX - numExtraBytes) / newCharSize) __CFStringHandleOutOfMemory(str); // Does not return
872 CFIndex newCapacity = __CFStrNewCapacity(str, newLength * newCharSize + numExtraBytes, curCapacity, true, newCharSize);
873 if (newCapacity == -1) __CFStringHandleOutOfMemory(str); // Does not return
874 Boolean allocNewBuffer = (newCapacity != curCapacity) || (curLength > 0 && !oldIsUnicode && newIsUnicode); /* We alloc new buffer if oldIsUnicode != newIsUnicode because the contents have to be copied */
875 uint8_t *newContents;
876 if (allocNewBuffer) {
877 newContents = (uint8_t *)__CFStrAllocateMutableContents(str, newCapacity);
878 if (!newContents) { // Try allocating without extra room
879 newCapacity = __CFStrNewCapacity(str, newLength * newCharSize + numExtraBytes, curCapacity, false, newCharSize);
880 // Since we checked for this above, it shouldn't be the case here, but just in case
881 if (newCapacity == -1) __CFStringHandleOutOfMemory(str); // Does not return
882 newContents = (uint8_t *)__CFStrAllocateMutableContents(str, newCapacity);
883 if (!newContents) __CFStringHandleOutOfMemory(str); // Does not return
884 }
885 } else {
886 newContents = (uint8_t *)curContents;
887 }
888
889 Boolean hasLengthAndNullBytes = __CFStrHasLengthByte(str);
890
891 CFAssert1(hasLengthAndNullBytes == __CFStrHasNullByte(str), __kCFLogAssertion, "%s(): Invalid state in 8-bit string", __PRETTY_FUNCTION__);
892
893 // Calculate pointers to the actual string content (skipping over the length byte, if present). Note that keeping a reference to the base is needed for newContents under GC, since the copy may take a long time.
894 const uint8_t *curContentsBody = hasLengthAndNullBytes ? (curContents+1) : curContents;
895 uint8_t *newContentsBody = useLengthAndNullBytes ? (newContents+1) : newContents;
896
897 if (curContents) {
898 if (oldIsUnicode == newIsUnicode) {
899 if (newContentsBody == curContentsBody) {
900 rearrangeBlocks(newContentsBody, curLength, newCharSize, deleteRanges, numDeleteRanges, insertLength);
901 } else {
902 copyBlocks(curContentsBody, newContentsBody, curLength, oldIsUnicode, newIsUnicode, deleteRanges, numDeleteRanges, insertLength);
903 }
904 } else if (newIsUnicode) { /* this implies we have a new buffer */
905 copyBlocks(curContentsBody, newContentsBody, curLength, oldIsUnicode, newIsUnicode, deleteRanges, numDeleteRanges, insertLength);
906 }
907 if (allocNewBuffer && __CFStrFreeContentsWhenDone(str)) __CFStrDeallocateMutableContents(str, (void *)curContents);
908 }
909
910 if (!newIsUnicode) {
911 if (useLengthAndNullBytes) {
912 newContentsBody[newLength] = 0; /* Always have null byte, if not unicode */
913 newContents[0] = __CFCanUseLengthByte(newLength) ? (uint8_t)newLength : 0;
914 if (!hasLengthAndNullBytes) __CFStrSetHasLengthAndNullBytes(str);
915 } else {
916 if (hasLengthAndNullBytes) __CFStrClearHasLengthAndNullBytes(str);
917 }
918 if (oldIsUnicode) __CFStrClearUnicode(str);
919 } else { // New is unicode...
920 if (!oldIsUnicode) __CFStrSetUnicode(str);
921 if (hasLengthAndNullBytes) __CFStrClearHasLengthAndNullBytes(str);
922 }
923 __CFStrSetExplicitLength(str, newLength);
924
925 if (allocNewBuffer) {
926 __CFStrSetCapacity(str, newCapacity);
927 __CFStrClearCapacityProvidedExternally(str);
928 __CFStrSetContentPtr(str, newContents);
929 }
930 }
931 }
932
933 /* Same as above, but takes one range (very common case)
934 */
935 CF_INLINE void __CFStringChangeSize(CFMutableStringRef str, CFRange range, CFIndex insertLength, Boolean makeUnicode) {
936 __CFStringChangeSizeMultiple(str, &range, 1, insertLength, makeUnicode);
937 }
938
939
940 #if defined(DEBUG)
941 static Boolean __CFStrIsConstantString(CFStringRef str);
942 #endif
943
944 static void __CFStringDeallocate(CFTypeRef cf) {
945 CFStringRef str = (CFStringRef)cf;
946
947 // If in DEBUG mode, check to see if the string a CFSTR, and complain.
948 CFAssert1(__CFConstantStringTableBeingFreed || !__CFStrIsConstantString((CFStringRef)cf), __kCFLogAssertion, "Tried to deallocate CFSTR(\"%@\")", str);
949
950 if (!__CFStrIsInline(str)) {
951 uint8_t *contents;
952 Boolean isMutable = __CFStrIsMutable(str);
953 if (__CFStrFreeContentsWhenDone(str) && (contents = (uint8_t *)__CFStrContents(str))) {
954 if (isMutable) {
955 __CFStrDeallocateMutableContents((CFMutableStringRef)str, contents);
956 } else {
957 if (__CFStrHasContentsDeallocator(str)) {
958 CFAllocatorRef allocator = __CFStrContentsDeallocator(str);
959 CFAllocatorDeallocate(allocator, contents);
960 if (!(0 || 0 )) CFRelease(allocator);
961 } else {
962 CFAllocatorRef alloc = __CFGetAllocator(str);
963 CFAllocatorDeallocate(alloc, contents);
964 }
965 }
966 }
967 if (isMutable && __CFStrHasContentsAllocator(str)) {
968 CFAllocatorRef allocator = __CFStrContentsAllocator((CFMutableStringRef)str);
969 if (!(0 || 0)) CFRelease(allocator);
970 }
971 }
972 }
973
974 static Boolean __CFStringEqual(CFTypeRef cf1, CFTypeRef cf2) {
975 CFStringRef str1 = (CFStringRef)cf1;
976 CFStringRef str2 = (CFStringRef)cf2;
977 const uint8_t *contents1;
978 const uint8_t *contents2;
979 CFIndex len1;
980
981 /* !!! We do not need IsString assertions, as the CFBase runtime assures this */
982 /* !!! We do not need == test, as the CFBase runtime assures this */
983
984 contents1 = (uint8_t *)__CFStrContents(str1);
985 contents2 = (uint8_t *)__CFStrContents(str2);
986 len1 = __CFStrLength2(str1, contents1);
987
988 if (len1 != __CFStrLength2(str2, contents2)) return false;
989
990 contents1 += __CFStrSkipAnyLengthByte(str1);
991 contents2 += __CFStrSkipAnyLengthByte(str2);
992
993 if (__CFStrIsEightBit(str1) && __CFStrIsEightBit(str2)) {
994 return memcmp((const char *)contents1, (const char *)contents2, len1) ? false : true;
995 } else if (__CFStrIsEightBit(str1)) { /* One string has Unicode contents */
996 CFStringInlineBuffer buf;
997 CFIndex buf_idx = 0;
998
999 CFStringInitInlineBuffer(str1, &buf, CFRangeMake(0, len1));
1000 for (buf_idx = 0; buf_idx < len1; buf_idx++) {
1001 if (__CFStringGetCharacterFromInlineBufferQuick(&buf, buf_idx) != ((UniChar *)contents2)[buf_idx]) return false;
1002 }
1003 } else if (__CFStrIsEightBit(str2)) { /* One string has Unicode contents */
1004 CFStringInlineBuffer buf;
1005 CFIndex buf_idx = 0;
1006
1007 CFStringInitInlineBuffer(str2, &buf, CFRangeMake(0, len1));
1008 for (buf_idx = 0; buf_idx < len1; buf_idx++) {
1009 if (__CFStringGetCharacterFromInlineBufferQuick(&buf, buf_idx) != ((UniChar *)contents1)[buf_idx]) return false;
1010 }
1011 } else { /* Both strings have Unicode contents */
1012 CFIndex idx;
1013 for (idx = 0; idx < len1; idx++) {
1014 if (((UniChar *)contents1)[idx] != ((UniChar *)contents2)[idx]) return false;
1015 }
1016 }
1017 return true;
1018 }
1019
1020
1021 /* String hashing: Should give the same results whatever the encoding; so we hash UniChars.
1022 If the length is less than or equal to 96, then the hash function is simply the
1023 following (n is the nth UniChar character, starting from 0):
1024
1025 hash(-1) = length
1026 hash(n) = hash(n-1) * 257 + unichar(n);
1027 Hash = hash(length-1) * ((length & 31) + 1)
1028
1029 If the length is greater than 96, then the above algorithm applies to
1030 characters 0..31, (length/2)-16..(length/2)+15, and length-32..length-1, inclusive;
1031 thus the first, middle, and last 32 characters.
1032
1033 Note that the loops below are unrolled; and: 257^2 = 66049; 257^3 = 16974593; 257^4 = 4362470401; 67503105 is 257^4 - 256^4
1034 If hashcode is changed from UInt32 to something else, this last piece needs to be readjusted.
1035 !!! We haven't updated for LP64 yet
1036
1037 NOTE: The hash algorithm used to be duplicated in CF and Foundation; but now it should only be in the four functions below.
1038
1039 Hash function was changed between Panther and Tiger, and Tiger and Leopard.
1040 */
1041 #define HashEverythingLimit 96
1042
1043 #define HashNextFourUniChars(accessStart, accessEnd, pointer) \
1044 {result = result * 67503105 + (accessStart 0 accessEnd) * 16974593 + (accessStart 1 accessEnd) * 66049 + (accessStart 2 accessEnd) * 257 + (accessStart 3 accessEnd); pointer += 4;}
1045
1046 #define HashNextUniChar(accessStart, accessEnd, pointer) \
1047 {result = result * 257 + (accessStart 0 accessEnd); pointer++;}
1048
1049
1050 /* In this function, actualLen is the length of the original string; but len is the number of characters in buffer. The buffer is expected to contain the parts of the string relevant to hashing.
1051 */
1052 CF_INLINE CFHashCode __CFStrHashCharacters(const UniChar *uContents, CFIndex len, CFIndex actualLen) {
1053 CFHashCode result = actualLen;
1054 if (len <= HashEverythingLimit) {
1055 const UniChar *end4 = uContents + (len & ~3);
1056 const UniChar *end = uContents + len;
1057 while (uContents < end4) HashNextFourUniChars(uContents[, ], uContents); // First count in fours
1058 while (uContents < end) HashNextUniChar(uContents[, ], uContents); // Then for the last <4 chars, count in ones...
1059 } else {
1060 const UniChar *contents, *end;
1061 contents = uContents;
1062 end = contents + 32;
1063 while (contents < end) HashNextFourUniChars(contents[, ], contents);
1064 contents = uContents + (len >> 1) - 16;
1065 end = contents + 32;
1066 while (contents < end) HashNextFourUniChars(contents[, ], contents);
1067 end = uContents + len;
1068 contents = end - 32;
1069 while (contents < end) HashNextFourUniChars(contents[, ], contents);
1070 }
1071 return result + (result << (actualLen & 31));
1072 }
1073
1074 /* This hashes cString in the eight bit string encoding. It also includes the little debug-time sanity check.
1075 */
1076 CF_INLINE CFHashCode __CFStrHashEightBit(const uint8_t *cContents, CFIndex len) {
1077 #if defined(DEBUG)
1078 if (!__CFCharToUniCharFunc) { // A little sanity verification: If this is not set, trying to hash high byte chars would be a bad idea
1079 CFIndex cnt;
1080 Boolean err = false;
1081 if (len <= HashEverythingLimit) {
1082 for (cnt = 0; cnt < len; cnt++) if (cContents[cnt] >= 128) err = true;
1083 } else {
1084 for (cnt = 0; cnt < 32; cnt++) if (cContents[cnt] >= 128) err = true;
1085 for (cnt = (len >> 1) - 16; cnt < (len >> 1) + 16; cnt++) if (cContents[cnt] >= 128) err = true;
1086 for (cnt = (len - 32); cnt < len; cnt++) if (cContents[cnt] >= 128) err = true;
1087 }
1088 if (err) {
1089 // Can't do log here, as it might be too early
1090 fprintf(stderr, "Warning: CFHash() attempting to hash CFString containing high bytes before properly initialized to do so\n");
1091 }
1092 }
1093 #endif
1094 CFHashCode result = len;
1095 if (len <= HashEverythingLimit) {
1096 const uint8_t *end4 = cContents + (len & ~3);
1097 const uint8_t *end = cContents + len;
1098 while (cContents < end4) HashNextFourUniChars(__CFCharToUniCharTable[cContents[, ]], cContents); // First count in fours
1099 while (cContents < end) HashNextUniChar(__CFCharToUniCharTable[cContents[, ]], cContents); // Then for the last <4 chars, count in ones...
1100 } else {
1101 const uint8_t *contents, *end;
1102 contents = cContents;
1103 end = contents + 32;
1104 while (contents < end) HashNextFourUniChars(__CFCharToUniCharTable[contents[, ]], contents);
1105 contents = cContents + (len >> 1) - 16;
1106 end = contents + 32;
1107 while (contents < end) HashNextFourUniChars(__CFCharToUniCharTable[contents[, ]], contents);
1108 end = cContents + len;
1109 contents = end - 32;
1110 while (contents < end) HashNextFourUniChars(__CFCharToUniCharTable[contents[, ]], contents);
1111 }
1112 return result + (result << (len & 31));
1113 }
1114
1115 // This is for NSStringROMKeySet.
1116 CF_PRIVATE CFHashCode __CFStrHashEightBit2(const uint8_t *cContents, CFIndex len) {
1117 return __CFStrHashEightBit(cContents, len);
1118 }
1119
1120 CFHashCode CFStringHashISOLatin1CString(const uint8_t *bytes, CFIndex len) {
1121 CFHashCode result = len;
1122 if (len <= HashEverythingLimit) {
1123 const uint8_t *end4 = bytes + (len & ~3);
1124 const uint8_t *end = bytes + len;
1125 while (bytes < end4) HashNextFourUniChars(bytes[, ], bytes); // First count in fours
1126 while (bytes < end) HashNextUniChar(bytes[, ], bytes); // Then for the last <4 chars, count in ones...
1127 } else {
1128 const uint8_t *contents, *end;
1129 contents = bytes;
1130 end = contents + 32;
1131 while (contents < end) HashNextFourUniChars(contents[, ], contents);
1132 contents = bytes + (len >> 1) - 16;
1133 end = contents + 32;
1134 while (contents < end) HashNextFourUniChars(contents[, ], contents);
1135 end = bytes + len;
1136 contents = end - 32;
1137 while (contents < end) HashNextFourUniChars(contents[, ], contents);
1138 }
1139 return result + (result << (len & 31));
1140 }
1141
1142 CFHashCode CFStringHashCString(const uint8_t *bytes, CFIndex len) {
1143 return __CFStrHashEightBit(bytes, len);
1144 }
1145
1146 CFHashCode CFStringHashCharacters(const UniChar *characters, CFIndex len) {
1147 return __CFStrHashCharacters(characters, len, len);
1148 }
1149
1150 /* This is meant to be called from NSString or subclassers only. It is an error for this to be called without the ObjC runtime or an argument which is not an NSString or subclass. It can be called with NSCFString, although that would be inefficient (causing indirection) and won't normally happen anyway, as NSCFString overrides hash.
1151 */
1152 CFHashCode CFStringHashNSString(CFStringRef str) {
1153 UniChar buffer[HashEverythingLimit];
1154 CFIndex bufLen; // Number of characters in the buffer for hashing
1155 CFIndex len = 0; // Actual length of the string
1156
1157 len = CF_OBJC_CALLV((NSString *)str, length);
1158 if (len <= HashEverythingLimit) {
1159 (void)CF_OBJC_CALLV((NSString *)str, getCharacters:buffer range:NSMakeRange(0, len));
1160 bufLen = len;
1161 } else {
1162 (void)CF_OBJC_CALLV((NSString *)str, getCharacters:buffer range:NSMakeRange(0, 32));
1163 (void)CF_OBJC_CALLV((NSString *)str, getCharacters:buffer+32 range:NSMakeRange((len >> 1) - 16, 32));
1164 (void)CF_OBJC_CALLV((NSString *)str, getCharacters:buffer+64 range:NSMakeRange(len - 32, 32));
1165 bufLen = HashEverythingLimit;
1166 }
1167 return __CFStrHashCharacters(buffer, bufLen, len);
1168 }
1169
1170 CFHashCode __CFStringHash(CFTypeRef cf) {
1171 /* !!! We do not need an IsString assertion here, as this is called by the CFBase runtime only */
1172 CFStringRef str = (CFStringRef)cf;
1173 const uint8_t *contents = (uint8_t *)__CFStrContents(str);
1174 CFIndex len = __CFStrLength2(str, contents);
1175
1176 if (__CFStrIsEightBit(str)) {
1177 contents += __CFStrSkipAnyLengthByte(str);
1178 return __CFStrHashEightBit(contents, len);
1179 } else {
1180 return __CFStrHashCharacters((const UniChar *)contents, len, len);
1181 }
1182 }
1183
1184
1185 static CFStringRef __CFStringCopyDescription(CFTypeRef cf) {
1186 return CFStringCreateWithFormat(kCFAllocatorSystemDefault, NULL, CFSTR("<CFString %p [%p]>{contents = \"%@\"}"), cf, __CFGetAllocator(cf), cf);
1187 }
1188
1189 static CFStringRef __CFStringCopyFormattingDescription(CFTypeRef cf, CFDictionaryRef formatOptions) {
1190 return (CFStringRef)CFStringCreateCopy(__CFGetAllocator(cf), (CFStringRef)cf);
1191 }
1192
1193 static CFTypeID __kCFStringTypeID = _kCFRuntimeNotATypeID;
1194
1195 typedef CFTypeRef (*CF_STRING_CREATE_COPY)(CFAllocatorRef alloc, CFTypeRef theString);
1196
1197 static const CFRuntimeClass __CFStringClass = {
1198 _kCFRuntimeScannedObject,
1199 "CFString",
1200 NULL, // init
1201 (CF_STRING_CREATE_COPY)CFStringCreateCopy,
1202 __CFStringDeallocate,
1203 __CFStringEqual,
1204 __CFStringHash,
1205 __CFStringCopyFormattingDescription,
1206 __CFStringCopyDescription
1207 };
1208
1209 CF_PRIVATE void __CFStringInitialize(void) {
1210 static dispatch_once_t initOnce;
1211 dispatch_once(&initOnce, ^{ __kCFStringTypeID = _CFRuntimeRegisterClass(&__CFStringClass); });
1212 }
1213
1214
1215 CFTypeID CFStringGetTypeID(void) {
1216 return __kCFStringTypeID;
1217 }
1218
1219
1220 static Boolean CFStrIsUnicode(CFStringRef str) {
1221 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, Boolean, (NSString *)str, _encodingCantBeStoredInEightBitCFString);
1222 return __CFStrIsUnicode(str);
1223 }
1224
1225
1226 #define ALLOCATORSFREEFUNC ((CFAllocatorRef)-1)
1227
1228 /* contentsDeallocator indicates how to free the data if it's noCopy == true:
1229 kCFAllocatorNull: don't free
1230 ALLOCATORSFREEFUNC: free with main allocator's free func (don't pass in the real func ptr here)
1231 NULL: default allocator
1232 otherwise it's the allocator that should be used (it will be explicitly stored)
1233 if noCopy == false, then freeFunc should be ALLOCATORSFREEFUNC
1234 hasLengthByte, hasNullByte: refers to bytes; used only if encoding != Unicode
1235 possiblyExternalFormat indicates that the bytes might have BOM and be swapped
1236 tryToReduceUnicode means that the Unicode should be checked to see if it contains just ASCII (and reduce it if so)
1237 numBytes contains the actual number of bytes in "bytes", including Length byte,
1238 BUT not the NULL byte at the end
1239 bytes should not contain BOM characters
1240 !!! Various flags should be combined to reduce number of arguments, if possible
1241 */
1242 CF_PRIVATE CFStringRef __CFStringCreateImmutableFunnel3(
1243 CFAllocatorRef alloc, const void *bytes, CFIndex numBytes, CFStringEncoding encoding,
1244 Boolean possiblyExternalFormat, Boolean tryToReduceUnicode, Boolean hasLengthByte, Boolean hasNullByte, Boolean noCopy,
1245 CFAllocatorRef contentsDeallocator, UInt32 converterFlags) {
1246
1247 CFMutableStringRef str = NULL;
1248 CFVarWidthCharBuffer vBuf;
1249 CFIndex size;
1250 Boolean useLengthByte = false;
1251 Boolean useNullByte = false;
1252 Boolean useInlineData = false;
1253
1254 #if INSTRUMENT_SHARED_STRINGS
1255 const char *recordedEncoding;
1256 char encodingBuffer[128];
1257 if (encoding == kCFStringEncodingUnicode) recordedEncoding = "Unicode";
1258 else if (encoding == kCFStringEncodingASCII) recordedEncoding = "ASCII";
1259 else if (encoding == kCFStringEncodingUTF8) recordedEncoding = "UTF8";
1260 else if (encoding == kCFStringEncodingMacRoman) recordedEncoding = "MacRoman";
1261 else {
1262 snprintf(encodingBuffer, sizeof(encodingBuffer), "0x%lX", (unsigned long)encoding);
1263 recordedEncoding = encodingBuffer;
1264 }
1265 #endif
1266
1267 if (alloc == NULL) alloc = __CFGetDefaultAllocator();
1268
1269 if (contentsDeallocator == ALLOCATORSFREEFUNC) {
1270 contentsDeallocator = alloc;
1271 } else if (contentsDeallocator == NULL) {
1272 contentsDeallocator = __CFGetDefaultAllocator();
1273 }
1274
1275 if ((NULL != kCFEmptyString) && (numBytes == 0) && _CFAllocatorIsSystemDefault(alloc)) { // If we are using the system default allocator, and the string is empty, then use the empty string!
1276 if (noCopy && (contentsDeallocator != kCFAllocatorNull)) { // See 2365208... This change was done after Sonata; before we didn't free the bytes at all (leak).
1277 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes);
1278 }
1279 return (CFStringRef)CFRetain(kCFEmptyString); // Quick exit; won't catch all empty strings, but most
1280 }
1281
1282 // At this point, contentsDeallocator is either same as alloc, or kCFAllocatorNull, or something else, but not NULL
1283
1284 vBuf.shouldFreeChars = false; // We use this to remember to free the buffer possibly allocated by decode
1285
1286 // Record whether we're starting out with an ASCII-superset string, because we need to know this later for the string ROM; this may get changed later if we successfully convert down from Unicode. We only record this once because __CFCanUseEightBitCFStringForBytes() can be expensive.
1287 Boolean stringSupportsEightBitCFRepresentation = encoding != kCFStringEncodingUnicode && __CFCanUseEightBitCFStringForBytes((const uint8_t *)bytes, numBytes, encoding);
1288
1289 // We may also change noCopy within this function if we have to decode the string into an external buffer. We do not want to avoid the use of the string ROM merely because we tried to be efficient and reuse the decoded buffer for the CFString's external storage. Therefore, we use this variable to track whether we actually can ignore the noCopy flag (which may or may not be set anyways).
1290 Boolean stringROMShouldIgnoreNoCopy = false;
1291
1292 // First check to see if the data needs to be converted...
1293 // ??? We could be more efficient here and in some cases (Unicode data) eliminate a copy
1294
1295 if ((encoding == kCFStringEncodingUnicode && possiblyExternalFormat) || (encoding != kCFStringEncodingUnicode && ! stringSupportsEightBitCFRepresentation)) {
1296 const void *realBytes = (uint8_t *) bytes + (hasLengthByte ? 1 : 0);
1297 CFIndex realNumBytes = numBytes - (hasLengthByte ? 1 : 0);
1298 Boolean usingPassedInMemory = false;
1299
1300 vBuf.allocator = kCFAllocatorSystemDefault; // We don't want to use client's allocator for temp stuff
1301 vBuf.chars.unicode = NULL; // This will cause the decode function to allocate memory if necessary
1302
1303 if (!__CFStringDecodeByteStream3((const uint8_t *)realBytes, realNumBytes, encoding, false, &vBuf, &usingPassedInMemory, converterFlags)) {
1304 // Note that if the string can't be created, we don't free the buffer, even if there is a contents deallocator. This is on purpose.
1305 return NULL;
1306 }
1307
1308 encoding = vBuf.isASCII ? kCFStringEncodingASCII : kCFStringEncodingUnicode;
1309
1310 // Update our flag according to whether the decoded buffer is ASCII
1311 stringSupportsEightBitCFRepresentation = vBuf.isASCII;
1312
1313 if (!usingPassedInMemory) {
1314
1315 // Because __CFStringDecodeByteStream3() allocated our buffer, it's OK for us to free it if we can get the string from the ROM.
1316 stringROMShouldIgnoreNoCopy = true;
1317
1318 // Make the parameters fit the new situation
1319 numBytes = vBuf.isASCII ? vBuf.numChars : (vBuf.numChars * sizeof(UniChar));
1320 hasLengthByte = hasNullByte = false;
1321
1322 // Get rid of the original buffer if its not being used
1323 if (noCopy && (contentsDeallocator != kCFAllocatorNull)) {
1324 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes);
1325 }
1326 contentsDeallocator = alloc; // At this point we are using the string's allocator, as the original buffer is gone...
1327
1328 // See if we can reuse any storage the decode func might have allocated
1329 // We do this only for Unicode, as otherwise we would not have NULL and Length bytes
1330
1331 if (vBuf.shouldFreeChars && (alloc == vBuf.allocator) && encoding == kCFStringEncodingUnicode) {
1332 vBuf.shouldFreeChars = false; // Transferring ownership to the CFString
1333 bytes = CFAllocatorReallocate(vBuf.allocator, (void *)vBuf.chars.unicode, numBytes, 0); // Tighten up the storage
1334 noCopy = true;
1335 #if INSTRUMENT_SHARED_STRINGS
1336 if (encoding == kCFStringEncodingASCII) recordedEncoding = "ForeignASCII-NoCopy";
1337 else recordedEncoding = "ForeignUnicode-NoCopy";
1338 #endif
1339 } else {
1340 #if INSTRUMENT_SHARED_STRINGS
1341 if (encoding == kCFStringEncodingASCII) recordedEncoding = "ForeignASCII-Copy";
1342 else recordedEncoding = "ForeignUnicode-Copy";
1343 #endif
1344 bytes = vBuf.chars.unicode;
1345 noCopy = false; // Can't do noCopy anymore
1346 // If vBuf.shouldFreeChars is true, the buffer will be freed as intended near the end of this func
1347 }
1348
1349 }
1350
1351 // At this point, all necessary input arguments have been changed to reflect the new state
1352
1353 } else if (encoding == kCFStringEncodingUnicode && tryToReduceUnicode) { // Check to see if we can reduce Unicode to ASCII
1354 CFIndex cnt;
1355 CFIndex len = numBytes / sizeof(UniChar);
1356 Boolean allASCII = true;
1357
1358 for (cnt = 0; cnt < len; cnt++) if (((const UniChar *)bytes)[cnt] > 127) {
1359 allASCII = false;
1360 break;
1361 }
1362
1363 if (allASCII) { // Yes we can!
1364 uint8_t *ptr, *mem;
1365 Boolean newHasLengthByte = __CFCanUseLengthByte(len);
1366 numBytes = (len + 1 + (newHasLengthByte ? 1 : 0)) * sizeof(uint8_t); // NULL and possible length byte
1367 // See if we can use that temporary local buffer in vBuf...
1368 if (numBytes >= __kCFVarWidthLocalBufferSize) {
1369 mem = ptr = (uint8_t *)CFAllocatorAllocate(alloc, numBytes, 0);
1370 if (__CFOASafe) __CFSetLastAllocationEventName(mem, "CFString (store)");
1371 } else {
1372 mem = ptr = (uint8_t *)(vBuf.localBuffer);
1373 }
1374 if (mem) { // If we can't allocate memory for some reason, use what we had (that is, as if we didn't have all ASCII)
1375 // Copy the Unicode bytes into the new ASCII buffer
1376 hasLengthByte = newHasLengthByte;
1377 hasNullByte = true;
1378 if (hasLengthByte) *ptr++ = (uint8_t)len;
1379 for (cnt = 0; cnt < len; cnt++) ptr[cnt] = (uint8_t)(((const UniChar *)bytes)[cnt]);
1380 ptr[len] = 0;
1381 if (noCopy && (contentsDeallocator != kCFAllocatorNull)) {
1382 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes);
1383 }
1384 // Now make everything look like we had an ASCII buffer to start with
1385 bytes = mem;
1386 encoding = kCFStringEncodingASCII;
1387 contentsDeallocator = alloc; // At this point we are using the string's allocator, as the original buffer is gone...
1388 noCopy = (numBytes >= __kCFVarWidthLocalBufferSize); // If we had to allocate it, make sure it's kept around
1389 numBytes--; // Should not contain the NULL byte at end...
1390 stringSupportsEightBitCFRepresentation = true; // We're ASCII now!
1391 stringROMShouldIgnoreNoCopy = true; // We allocated this buffer, so we should feel free to get rid of it if we can use the string ROM
1392 #if INSTRUMENT_SHARED_STRINGS
1393 recordedEncoding = "U->A";
1394 #endif
1395 }
1396 }
1397
1398 // At this point, all necessary input arguments have been changed to reflect the new state
1399 }
1400
1401 #if USE_STRING_ROM || ENABLE_TAGGED_POINTER_STRINGS || INSTRUMENT_SHARED_STRINGS
1402 CFIndex lengthByte = (hasLengthByte ? 1 : 0);
1403 CFIndex realNumBytes = numBytes - lengthByte;
1404 const uint8_t *realBytes = bytes + lengthByte;
1405 #endif
1406
1407
1408 if (!str) {
1409 // Now determine the necessary size
1410 #if INSTRUMENT_SHARED_STRINGS || USE_STRING_ROM
1411 Boolean stringSupportsROM = stringSupportsEightBitCFRepresentation;
1412 #endif
1413
1414 #if INSTRUMENT_SHARED_STRINGS
1415 if (stringSupportsROM) __CFRecordStringAllocationEvent(recordedEncoding, realBytes, realNumBytes);
1416 #endif
1417
1418 #if USE_STRING_ROM
1419 CFStringRef romResult = NULL;
1420
1421
1422 if (stringSupportsROM) {
1423 // Disable the string ROM if necessary
1424 static char sDisableStringROM = -1;
1425 if (sDisableStringROM == -1) sDisableStringROM = !! __CFgetenv("CFStringDisableROM");
1426
1427 if (sDisableStringROM == 0) romResult = __CFSearchStringROM((const char *)realBytes, realNumBytes);
1428 }
1429 /* if we get a result from our ROM, and noCopy is set, then deallocate the buffer immediately */
1430 if (romResult) {
1431 if (noCopy && (contentsDeallocator != kCFAllocatorNull)) {
1432 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes);
1433 }
1434
1435 /* these don't get used again, but clear them for consistency */
1436 noCopy = false;
1437 bytes = NULL;
1438
1439 /* set our result to the ROM result which is not really mutable, of course, but that's OK because we don't try to modify it. */
1440 str = (CFMutableStringRef)romResult;
1441
1442 #if INSTRUMENT_TAGGED_POINTER_STRINGS
1443 _CFTaggedPointerStringStats.stringROMCount++;
1444 #endif
1445 }
1446
1447 if (! romResult) {
1448 #else
1449 if (1) {
1450 #endif
1451
1452 #if INSTRUMENT_SHARED_STRINGS
1453 if (stringSupportsROM) __CFRecordStringAllocationEvent(recordedEncoding, realBytes, realNumBytes);
1454 #endif
1455 #if INSTRUMENT_TAGGED_POINTER_STRINGS
1456 _CFTaggedPointerStringStats.otherStringCount++;
1457 #endif
1458
1459 // Now determine the necessary size
1460
1461 if (noCopy) {
1462
1463 size = sizeof(void *); // Pointer to the buffer
1464 if ((0) || (contentsDeallocator != alloc && contentsDeallocator != kCFAllocatorNull)) {
1465 size += sizeof(void *); // The contentsDeallocator
1466 }
1467 if (!hasLengthByte) size += sizeof(CFIndex); // Explicit length
1468 useLengthByte = hasLengthByte;
1469 useNullByte = hasNullByte;
1470
1471 } else { // Inline data; reserve space for it
1472
1473 useInlineData = true;
1474 size = numBytes;
1475
1476 if (hasLengthByte || (encoding != kCFStringEncodingUnicode && __CFCanUseLengthByte(numBytes))) {
1477 useLengthByte = true;
1478 if (!hasLengthByte) size += 1;
1479 } else {
1480 size += sizeof(CFIndex); // Explicit length
1481 }
1482 if (hasNullByte || encoding != kCFStringEncodingUnicode) {
1483 useNullByte = true;
1484 size += 1;
1485 }
1486 }
1487
1488 #ifdef STRING_SIZE_STATS
1489 // Dump alloced CFString size info every so often
1490 static int cnt = 0;
1491 static unsigned sizes[256] = {0};
1492 int allocedSize = size + sizeof(CFRuntimeBase);
1493 if (allocedSize < 255) sizes[allocedSize]++; else sizes[255]++;
1494 if ((++cnt % 1000) == 0) {
1495 printf ("\nTotal: %d\n", cnt);
1496 int i; for (i = 0; i < 256; i++) printf("%03d: %5d%s", i, sizes[i], ((i % 8) == 7) ? "\n" : " ");
1497 }
1498 #endif
1499
1500 // Finally, allocate!
1501
1502 str = (CFMutableStringRef)_CFRuntimeCreateInstance(alloc, __kCFStringTypeID, size, NULL);
1503 if (str) {
1504 if (__CFOASafe) __CFSetLastAllocationEventName(str, "CFString (immutable)");
1505
1506 CFOptionFlags allocBits = (0) ? __kCFHasContentsDeallocator : (contentsDeallocator == alloc ? __kCFNotInlineContentsDefaultFree : (contentsDeallocator == kCFAllocatorNull ? __kCFNotInlineContentsNoFree : __kCFNotInlineContentsCustomFree));
1507 __CFStrSetInfoBits(str,
1508 (useInlineData ? __kCFHasInlineContents : allocBits) |
1509 ((encoding == kCFStringEncodingUnicode) ? __kCFIsUnicode : 0) |
1510 (useNullByte ? __kCFHasNullByte : 0) |
1511 (useLengthByte ? __kCFHasLengthByte : 0));
1512
1513 if (!useLengthByte) {
1514 CFIndex length = numBytes - (hasLengthByte ? 1 : 0);
1515 if (encoding == kCFStringEncodingUnicode) length /= sizeof(UniChar);
1516 __CFStrSetExplicitLength(str, length);
1517 }
1518
1519 if (useInlineData) {
1520 uint8_t *contents = (uint8_t *)__CFStrContents(str);
1521 if (useLengthByte && !hasLengthByte) *contents++ = (uint8_t)numBytes;
1522 memmove(contents, bytes, numBytes);
1523 if (useNullByte) contents[numBytes] = 0;
1524 } else {
1525 __CFStrSetContentPtr(str, bytes);
1526 if (__CFStrHasContentsDeallocator(str)) __CFStrSetContentsDeallocator(str, contentsDeallocator);
1527 }
1528 } else {
1529 if (noCopy && (contentsDeallocator != kCFAllocatorNull)) {
1530 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes);
1531 }
1532 }
1533 }
1534 }
1535 if (vBuf.shouldFreeChars) CFAllocatorDeallocate(vBuf.allocator, (void *)bytes);
1536
1537 #if 0
1538 #warning Debug code
1539 const uint8_t *contents = (uint8_t *)__CFStrContents(str);
1540 CFIndex len = __CFStrLength2(str, contents);
1541
1542 if (__CFStrIsEightBit(str)) {
1543 contents += __CFStrSkipAnyLengthByte(str);
1544 if (!__CFBytesInASCII(contents, len)) {
1545 printf("CFString with 8 bit backing store not ASCII: %p, \"%.*s\"\n", str, (int)len, contents);
1546 }
1547 }
1548 #endif
1549
1550 return str;
1551 }
1552
1553 /* !!! __CFStringCreateImmutableFunnel2() is kept around for compatibility; it should be deprecated
1554 */
1555 CFStringRef __CFStringCreateImmutableFunnel2(
1556 CFAllocatorRef alloc, const void *bytes, CFIndex numBytes, CFStringEncoding encoding,
1557 Boolean possiblyExternalFormat, Boolean tryToReduceUnicode, Boolean hasLengthByte, Boolean hasNullByte, Boolean noCopy,
1558 CFAllocatorRef contentsDeallocator) {
1559 return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, possiblyExternalFormat, tryToReduceUnicode, hasLengthByte, hasNullByte, noCopy, contentsDeallocator, 0);
1560 }
1561
1562
1563
1564 CFStringRef CFStringCreateWithPascalString(CFAllocatorRef alloc, ConstStringPtr pStr, CFStringEncoding encoding) {
1565 CFIndex len = (CFIndex)(*(uint8_t *)pStr);
1566 return __CFStringCreateImmutableFunnel3(alloc, pStr, len+1, encoding, false, false, true, false, false, ALLOCATORSFREEFUNC, 0);
1567 }
1568
1569
1570 CFStringRef CFStringCreateWithCString(CFAllocatorRef alloc, const char *cStr, CFStringEncoding encoding) {
1571 CFIndex len = strlen(cStr);
1572 return __CFStringCreateImmutableFunnel3(alloc, cStr, len, encoding, false, false, false, true, false, ALLOCATORSFREEFUNC, 0);
1573 }
1574
1575 CFStringRef CFStringCreateWithPascalStringNoCopy(CFAllocatorRef alloc, ConstStringPtr pStr, CFStringEncoding encoding, CFAllocatorRef contentsDeallocator) {
1576 CFIndex len = (CFIndex)(*(uint8_t *)pStr);
1577 return __CFStringCreateImmutableFunnel3(alloc, pStr, len+1, encoding, false, false, true, false, true, contentsDeallocator, 0);
1578 }
1579
1580
1581 CFStringRef CFStringCreateWithCStringNoCopy(CFAllocatorRef alloc, const char *cStr, CFStringEncoding encoding, CFAllocatorRef contentsDeallocator) {
1582 CFIndex len = strlen(cStr);
1583 return __CFStringCreateImmutableFunnel3(alloc, cStr, len, encoding, false, false, false, true, true, contentsDeallocator, 0);
1584 }
1585
1586
1587 CFStringRef CFStringCreateWithCharacters(CFAllocatorRef alloc, const UniChar *chars, CFIndex numChars) {
1588 return __CFStringCreateImmutableFunnel3(alloc, chars, numChars * sizeof(UniChar), kCFStringEncodingUnicode, false, true, false, false, false, ALLOCATORSFREEFUNC, 0);
1589 }
1590
1591
1592 CFStringRef CFStringCreateWithCharactersNoCopy(CFAllocatorRef alloc, const UniChar *chars, CFIndex numChars, CFAllocatorRef contentsDeallocator) {
1593 return __CFStringCreateImmutableFunnel3(alloc, chars, numChars * sizeof(UniChar), kCFStringEncodingUnicode, false, false, false, false, true, contentsDeallocator, 0);
1594 }
1595
1596
1597 CFStringRef CFStringCreateWithBytes(CFAllocatorRef alloc, const uint8_t *bytes, CFIndex numBytes, CFStringEncoding encoding, Boolean externalFormat) {
1598 return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, externalFormat, true, false, false, false, ALLOCATORSFREEFUNC, 0);
1599 }
1600
1601 CFStringRef _CFStringCreateWithBytesNoCopy(CFAllocatorRef alloc, const uint8_t *bytes, CFIndex numBytes, CFStringEncoding encoding, Boolean externalFormat, CFAllocatorRef contentsDeallocator) {
1602 return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, externalFormat, true, false, false, true, contentsDeallocator, 0);
1603 }
1604
1605 CFStringRef CFStringCreateWithBytesNoCopy(CFAllocatorRef alloc, const uint8_t *bytes, CFIndex numBytes, CFStringEncoding encoding, Boolean externalFormat, CFAllocatorRef contentsDeallocator) {
1606 return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, externalFormat, true, false, false, true, contentsDeallocator, 0);
1607 }
1608
1609 CFStringRef CFStringCreateWithFormatAndArguments(CFAllocatorRef alloc, CFDictionaryRef formatOptions, CFStringRef format, va_list arguments) {
1610 return _CFStringCreateWithFormatAndArgumentsAux2(alloc, NULL, NULL, formatOptions, format, arguments);
1611 }
1612
1613 CFStringRef _CFStringCreateWithFormatAndArgumentsAux2(CFAllocatorRef alloc, CFStringRef (*copyDescFunc)(void *, const void *), CFStringRef (*contextDescFunc)(void *, const void *, const void *, bool , bool *), CFDictionaryRef formatOptions, CFStringRef format, va_list arguments) {
1614 CFStringRef str;
1615 CFMutableStringRef outputString = CFStringCreateMutable(kCFAllocatorSystemDefault, 0); //should use alloc if no copy/release
1616 __CFStrSetDesiredCapacity(outputString, 120); // Given this will be tightened later, choosing a larger working string is fine
1617 __CFStringAppendFormatCore(outputString, copyDescFunc, contextDescFunc, formatOptions, NULL, format, 0, NULL, 0, arguments);
1618 // ??? copy/release should not be necessary here -- just make immutable, compress if possible
1619 // (However, this does make the string inline, and cause the supplied allocator to be used...)
1620 str = (CFStringRef)CFStringCreateCopy(alloc, outputString);
1621 CFRelease(outputString);
1622 return str;
1623 }
1624
1625 CFStringRef _CFStringCreateWithFormatAndArgumentsAux(CFAllocatorRef alloc, CFStringRef (*copyDescFunc)(void *, const void *), CFDictionaryRef formatOptions, CFStringRef format, va_list arguments) {
1626 return _CFStringCreateWithFormatAndArgumentsAux2(alloc, copyDescFunc, NULL, formatOptions, format, arguments);
1627 }
1628
1629 CFStringRef CFStringCreateWithFormat(CFAllocatorRef alloc, CFDictionaryRef formatOptions, CFStringRef format, ...) {
1630 CFStringRef result;
1631 va_list argList;
1632
1633 va_start(argList, format);
1634 result = CFStringCreateWithFormatAndArguments(alloc, formatOptions, format, argList);
1635 va_end(argList);
1636
1637 return result;
1638 }
1639
1640 CFStringRef CFStringCreateWithSubstring(CFAllocatorRef alloc, CFStringRef str, CFRange range) {
1641 // CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, CFStringRef , (NSString *)str, _createSubstringWithRange:NSMakeRange(range.location, range.length));
1642
1643 __CFAssertIsString(str);
1644 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
1645
1646 if ((range.location == 0) && (range.length == __CFStrLength(str))) { /* The substring is the whole string... */
1647 return (CFStringRef)CFStringCreateCopy(alloc, str);
1648 } else if (__CFStrIsEightBit(str)) {
1649 const uint8_t *contents = (const uint8_t *)__CFStrContents(str);
1650 return __CFStringCreateImmutableFunnel3(alloc, contents + range.location + __CFStrSkipAnyLengthByte(str), range.length, __CFStringGetEightBitStringEncoding(), false, false, false, false, false, ALLOCATORSFREEFUNC, 0);
1651 } else {
1652 const UniChar *contents = (UniChar *)__CFStrContents(str);
1653 return __CFStringCreateImmutableFunnel3(alloc, contents + range.location, range.length * sizeof(UniChar), kCFStringEncodingUnicode, false, true, false, false, false, ALLOCATORSFREEFUNC, 0);
1654 }
1655 }
1656
1657 CFStringRef CFStringCreateCopy(CFAllocatorRef alloc, CFStringRef str) {
1658 // CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, CFStringRef, (NSString *)str, copy);
1659
1660 __CFAssertIsString(str);
1661 if (!__CFStrIsMutable((CFStringRef)str) && // If the string is not mutable
1662 ((alloc ? alloc : __CFGetDefaultAllocator()) == __CFGetAllocator(str)) && // and it has the same allocator as the one we're using
1663 (__CFStrIsInline((CFStringRef)str) || __CFStrFreeContentsWhenDone((CFStringRef)str) || __CFStrIsConstant((CFStringRef)str))) { // and the characters are inline, or are owned by the string, or the string is constant
1664 if (!(kCFUseCollectableAllocator && (0))) CFRetain(str); // Then just retain instead of making a true copy
1665 return str;
1666 }
1667 if (__CFStrIsEightBit((CFStringRef)str)) {
1668 const uint8_t *contents = (const uint8_t *)__CFStrContents((CFStringRef)str);
1669 return __CFStringCreateImmutableFunnel3(alloc, contents + __CFStrSkipAnyLengthByte((CFStringRef)str), __CFStrLength2((CFStringRef)str, contents), __CFStringGetEightBitStringEncoding(), false, false, false, false, false, ALLOCATORSFREEFUNC, 0);
1670 } else {
1671 const UniChar *contents = (const UniChar *)__CFStrContents((CFStringRef)str);
1672 return __CFStringCreateImmutableFunnel3(alloc, contents, __CFStrLength2((CFStringRef)str, contents) * sizeof(UniChar), kCFStringEncodingUnicode, false, true, false, false, false, ALLOCATORSFREEFUNC, 0);
1673 }
1674 }
1675
1676
1677
1678 /*** Constant string stuff... ***/
1679
1680 /* Table which holds constant strings created with CFSTR, when -fconstant-cfstrings option is not used. These dynamically created constant strings are stored in constantStringTable. The keys are the 8-bit constant C-strings from the compiler; the values are the CFStrings created for them. _CFSTRLock protects this table.
1681 */
1682 static CFMutableDictionaryRef constantStringTable = NULL;
1683 static CFLock_t _CFSTRLock = CFLockInit;
1684
1685 static CFStringRef __cStrCopyDescription(const void *ptr) {
1686 return CFStringCreateWithCStringNoCopy(kCFAllocatorSystemDefault, (const char *)ptr, __CFStringGetEightBitStringEncoding(), kCFAllocatorNull);
1687 }
1688
1689 static Boolean __cStrEqual(const void *ptr1, const void *ptr2) {
1690 return (strcmp((const char *)ptr1, (const char *)ptr2) == 0);
1691 }
1692
1693 static CFHashCode __cStrHash(const void *ptr) {
1694 // It doesn't quite matter if we convert to Unicode correctly, as long as we do it consistently
1695 const char *cStr = (const char *)ptr;
1696 CFIndex len = strlen(cStr);
1697 CFHashCode result = 0;
1698 if (len <= 4) { // All chars
1699 unsigned cnt = len;
1700 while (cnt--) result += (result << 8) + *cStr++;
1701 } else { // First and last 2 chars
1702 result += (result << 8) + cStr[0];
1703 result += (result << 8) + cStr[1];
1704 result += (result << 8) + cStr[len-2];
1705 result += (result << 8) + cStr[len-1];
1706 }
1707 result += (result << (len & 31));
1708 return result;
1709 }
1710
1711
1712 CFStringRef __CFStringMakeConstantString(const char *cStr) {
1713 CFStringRef result;
1714 #if defined(DEBUG)
1715 // StringTest checks that we share kCFEmptyString, which is defeated by constantStringAllocatorForDebugging
1716 if ('\0' == *cStr) return kCFEmptyString;
1717 #endif
1718 if (constantStringTable == NULL) {
1719 CFDictionaryKeyCallBacks constantStringCallBacks = {0, NULL, NULL, __cStrCopyDescription, __cStrEqual, __cStrHash};
1720 CFDictionaryValueCallBacks constantStringValueCallBacks = kCFTypeDictionaryValueCallBacks;
1721 constantStringValueCallBacks.equal = NULL; // So that we only find strings that are ==
1722 CFMutableDictionaryRef table = CFDictionaryCreateMutable(kCFAllocatorSystemDefault, 0, &constantStringCallBacks, &constantStringValueCallBacks);
1723 _CFDictionarySetCapacity(table, 2500); // avoid lots of rehashing
1724 __CFLock(&_CFSTRLock);
1725 if (constantStringTable == NULL) constantStringTable = table;
1726 __CFUnlock(&_CFSTRLock);
1727 if (constantStringTable != table) CFRelease(table);
1728 }
1729
1730 __CFLock(&_CFSTRLock);
1731 if ((result = (CFStringRef)CFDictionaryGetValue(constantStringTable, cStr))) {
1732 __CFUnlock(&_CFSTRLock);
1733 } else {
1734 __CFUnlock(&_CFSTRLock);
1735
1736 {
1737 char *key = NULL;
1738 Boolean isASCII = true;
1739 // Given this code path is rarer these days, OK to do this extra work to verify the strings
1740 const char *tmp = cStr;
1741 while (*tmp) {
1742 if (*(tmp++) & 0x80) {
1743 isASCII = false;
1744 break;
1745 }
1746 }
1747 if (!isASCII) {
1748 CFMutableStringRef ms = CFStringCreateMutable(kCFAllocatorSystemDefault, 0);
1749 tmp = cStr;
1750 while (*tmp) {
1751 CFStringAppendFormat(ms, NULL, (*tmp & 0x80) ? CFSTR("\\%3o") : CFSTR("%1c"), *tmp);
1752 tmp++;
1753 }
1754 CFLog(kCFLogLevelWarning, CFSTR("WARNING: CFSTR(\"%@\") has non-7 bit chars, interpreting using MacOS Roman encoding for now, but this will change. Please eliminate usages of non-7 bit chars (including escaped characters above \\177 octal) in CFSTR()."), ms);
1755 CFRelease(ms);
1756 }
1757 // Treat non-7 bit chars in CFSTR() as MacOSRoman, for compatibility
1758 result = CFStringCreateWithCString(kCFAllocatorSystemDefault, cStr, kCFStringEncodingMacRoman);
1759 if (result == NULL) {
1760 CFLog(__kCFLogAssertion, CFSTR("Can't interpret CFSTR() as MacOS Roman, crashing"));
1761 HALT;
1762 }
1763 Boolean isTaggedPointerString = CF_IS_OBJC(__kCFStringTypeID, result);
1764
1765 if (!isTaggedPointerString) {
1766 if (__CFOASafe) __CFSetLastAllocationEventName((void *)result, "CFString (CFSTR)");
1767 if (__CFStrIsEightBit(result)) key = (char *)__CFStrContents(result) + __CFStrSkipAnyLengthByte(result);
1768 }
1769 if (!key) { // Either the string is not 8-bit or it's a tagged pointer string
1770 CFIndex keySize = strlen(cStr) + 1;
1771 key = (char *)CFAllocatorAllocate(kCFAllocatorSystemDefault, keySize, 0);
1772 if (__CFOASafe) __CFSetLastAllocationEventName((void *)key, "CFString (CFSTR key)");
1773 strlcpy(key, cStr, keySize); // !!! We will leak this, if the string is removed from the table (or table is freed)
1774 }
1775
1776 {
1777 CFStringRef resultToBeReleased = result;
1778 CFIndex count;
1779 __CFLock(&_CFSTRLock);
1780 count = CFDictionaryGetCount(constantStringTable);
1781 CFDictionaryAddValue(constantStringTable, key, result);
1782 if (CFDictionaryGetCount(constantStringTable) == count) { // add did nothing, someone already put it there
1783 result = (CFStringRef)CFDictionaryGetValue(constantStringTable, key);
1784 } else if (!isTaggedPointerString) {
1785 #if __LP64__
1786 ((struct __CFString *)result)->base._rc = 0;
1787 #else
1788 ((struct __CFString *)result)->base._cfinfo[CF_RC_BITS] = 0;
1789 #endif
1790 }
1791 __CFUnlock(&_CFSTRLock);
1792 // This either eliminates the extra retain on the freshly created string, or frees it, if it was actually not inserted into the table
1793 CFRelease(resultToBeReleased);
1794 }
1795 }
1796 }
1797 return result;
1798 }
1799
1800 #if defined(DEBUG)
1801 static Boolean __CFStrIsConstantString(CFStringRef str) {
1802 Boolean found = false;
1803 if (constantStringTable) {
1804 __CFLock(&_CFSTRLock);
1805 found = CFDictionaryContainsValue(constantStringTable, str);
1806 __CFUnlock(&_CFSTRLock);
1807 }
1808 return found;
1809 }
1810 #endif
1811
1812
1813 #if DEPLOYMENT_TARGET_WINDOWS
1814 void __CFStringCleanup (void) {
1815 /* in case library is unloaded, release store for the constant string table */
1816 if (constantStringTable != NULL) {
1817 #if defined(DEBUG)
1818 __CFConstantStringTableBeingFreed = true;
1819 CFRelease(constantStringTable);
1820 __CFConstantStringTableBeingFreed = false;
1821 #else
1822 CFRelease(constantStringTable);
1823 #endif
1824 constantStringTable = NULL;
1825 }
1826 }
1827 #endif
1828
1829
1830 // Can pass in NSString as replacement string
1831 // Call with numRanges > 0, and incrementing ranges
1832
1833 static void __CFStringReplaceMultiple(CFMutableStringRef str, CFRange *ranges, CFIndex numRanges, CFStringRef replacement) {
1834 int cnt;
1835 CFStringRef copy = NULL;
1836 if (replacement == str) copy = replacement = CFStringCreateCopy(kCFAllocatorSystemDefault, replacement); // Very special and hopefully rare case
1837 CFIndex replacementLength = CFStringGetLength(replacement);
1838
1839 __CFStringChangeSizeMultiple(str, ranges, numRanges, replacementLength, (replacementLength > 0) && CFStrIsUnicode(replacement));
1840
1841 if (__CFStrIsUnicode(str)) {
1842 UniChar *contents = (UniChar *)__CFStrContents(str);
1843 UniChar *firstReplacement = contents + ranges[0].location;
1844 // Extract the replacementString into the first location, then copy from there
1845 CFStringGetCharacters(replacement, CFRangeMake(0, replacementLength), firstReplacement);
1846 for (cnt = 1; cnt < numRanges; cnt++) {
1847 // The ranges are in terms of the original string; so offset by the change in length due to insertion
1848 contents += replacementLength - ranges[cnt - 1].length;
1849 memmove(contents + ranges[cnt].location, firstReplacement, replacementLength * sizeof(UniChar));
1850 }
1851 } else {
1852 uint8_t *contents = (uint8_t *)__CFStrContents(str);
1853 uint8_t *firstReplacement = contents + ranges[0].location + __CFStrSkipAnyLengthByte(str);
1854 // Extract the replacementString into the first location, then copy from there
1855 CFStringGetBytes(replacement, CFRangeMake(0, replacementLength), __CFStringGetEightBitStringEncoding(), 0, false, firstReplacement, replacementLength, NULL);
1856 contents += __CFStrSkipAnyLengthByte(str); // Now contents will simply track the location to insert next string into
1857 for (cnt = 1; cnt < numRanges; cnt++) {
1858 // The ranges are in terms of the original string; so offset by the change in length due to insertion
1859 contents += replacementLength - ranges[cnt - 1].length;
1860 memmove(contents + ranges[cnt].location, firstReplacement, replacementLength);
1861 }
1862 }
1863 if (copy) CFRelease(copy);
1864 }
1865
1866 // Can pass in NSString as replacement string
1867
1868 CF_INLINE void __CFStringReplace(CFMutableStringRef str, CFRange range, CFStringRef replacement) {
1869 CFStringRef copy = NULL;
1870 if (replacement == str) copy = replacement = (CFStringRef)CFStringCreateCopy(kCFAllocatorSystemDefault, replacement); // Very special and hopefully rare case
1871 CFIndex replacementLength = CFStringGetLength(replacement);
1872
1873 __CFStringChangeSize(str, range, replacementLength, (replacementLength > 0) && CFStrIsUnicode(replacement));
1874
1875 if (__CFStrIsUnicode(str)) {
1876 UniChar *contents = (UniChar *)__CFStrContents(str);
1877 CFStringGetCharacters(replacement, CFRangeMake(0, replacementLength), contents + range.location);
1878 } else {
1879 uint8_t *contents = (uint8_t *)__CFStrContents(str);
1880 CFStringGetBytes(replacement, CFRangeMake(0, replacementLength), __CFStringGetEightBitStringEncoding(), 0, false, contents + range.location + __CFStrSkipAnyLengthByte(str), replacementLength, NULL);
1881 }
1882
1883 if (copy) CFRelease(copy);
1884 }
1885
1886 /* If client does not provide a minimum capacity
1887 */
1888 #define DEFAULTMINCAPACITY 32
1889
1890 CF_INLINE CFMutableStringRef __CFStringCreateMutableFunnel(CFAllocatorRef alloc, CFIndex maxLength, UInt32 additionalInfoBits) {
1891 CFMutableStringRef str;
1892 if ((0)) additionalInfoBits |= __kCFHasContentsAllocator;
1893 Boolean hasExternalContentsAllocator = (additionalInfoBits & __kCFHasContentsAllocator) ? true : false;
1894
1895 if (alloc == NULL) alloc = __CFGetDefaultAllocator();
1896
1897 // Note that if there is an externalContentsAllocator, then we also have the storage for the string allocator...
1898 str = (CFMutableStringRef)_CFRuntimeCreateInstance(alloc, __kCFStringTypeID, sizeof(struct __notInlineMutable) - (hasExternalContentsAllocator ? 0 : sizeof(CFAllocatorRef)), NULL);
1899 if (str) {
1900 if (__CFOASafe) __CFSetLastAllocationEventName(str, "CFString (mutable)");
1901
1902 __CFStrSetInfoBits(str, __kCFIsMutable | additionalInfoBits);
1903 str->variants.notInlineMutable.buffer = NULL;
1904 __CFStrSetExplicitLength(str, 0);
1905 str->variants.notInlineMutable.hasGap = str->variants.notInlineMutable.isFixedCapacity = str->variants.notInlineMutable.isExternalMutable = str->variants.notInlineMutable.capacityProvidedExternally = 0;
1906 if (maxLength != 0) __CFStrSetIsFixed(str);
1907 __CFStrSetDesiredCapacity(str, (maxLength == 0) ? DEFAULTMINCAPACITY : maxLength);
1908 __CFStrSetCapacity(str, 0);
1909 if (__CFStrHasContentsAllocator(str)) {
1910 // contents allocator starts out as the string's own allocator
1911 __CFStrSetContentsAllocator(str, alloc);
1912 }
1913 }
1914 return str;
1915 }
1916
1917 CFMutableStringRef CFStringCreateMutableWithExternalCharactersNoCopy(CFAllocatorRef alloc, UniChar *chars, CFIndex numChars, CFIndex capacity, CFAllocatorRef externalCharactersAllocator) {
1918 CFOptionFlags contentsAllocationBits = externalCharactersAllocator ? ((externalCharactersAllocator == kCFAllocatorNull) ? __kCFNotInlineContentsNoFree : __kCFHasContentsAllocator) : __kCFNotInlineContentsDefaultFree;
1919 CFMutableStringRef string = __CFStringCreateMutableFunnel(alloc, 0, contentsAllocationBits | __kCFIsUnicode);
1920 if (string) {
1921 __CFStrSetIsExternalMutable(string);
1922 if (__CFStrHasContentsAllocator(string)) {
1923 CFAllocatorRef allocator = __CFStrContentsAllocator((CFMutableStringRef)string);
1924 if (!(0 || 0)) CFRelease(allocator);
1925 __CFStrSetContentsAllocator(string, externalCharactersAllocator);
1926 }
1927 CFStringSetExternalCharactersNoCopy(string, chars, numChars, capacity);
1928 }
1929 return string;
1930 }
1931
1932 CFMutableStringRef CFStringCreateMutable(CFAllocatorRef alloc, CFIndex maxLength) {
1933 return __CFStringCreateMutableFunnel(alloc, maxLength, __kCFNotInlineContentsDefaultFree);
1934 }
1935
1936 CFMutableStringRef CFStringCreateMutableCopy(CFAllocatorRef alloc, CFIndex maxLength, CFStringRef string) {
1937 CFMutableStringRef newString;
1938
1939 // CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, CFMutableStringRef, (NSString *)string, mutableCopy);
1940
1941 __CFAssertIsString(string);
1942
1943 newString = CFStringCreateMutable(alloc, maxLength);
1944 __CFStringReplace(newString, CFRangeMake(0, 0), string);
1945
1946 return newString;
1947 }
1948
1949
1950 CF_PRIVATE void _CFStrSetDesiredCapacity(CFMutableStringRef str, CFIndex len) {
1951 __CFAssertIsStringAndMutable(str);
1952 __CFStrSetDesiredCapacity(str, len);
1953 }
1954
1955
1956 /* This one is for CF
1957 */
1958 CFIndex CFStringGetLength(CFStringRef str) {
1959 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, CFIndex, (NSString *)str, length);
1960
1961 __CFAssertIsString(str);
1962 return __CFStrLength(str);
1963 }
1964
1965 /* This one is for NSCFString; it does not ObjC dispatch or assertion check
1966 */
1967 CFIndex _CFStringGetLength2(CFStringRef str) {
1968 return __CFStrLength(str);
1969 }
1970
1971
1972 /* Guts of CFStringGetCharacterAtIndex(); called from the two functions below. Don't call it from elsewhere.
1973 */
1974 CF_INLINE UniChar __CFStringGetCharacterAtIndexGuts(CFStringRef str, CFIndex idx, const uint8_t *contents) {
1975 if (__CFStrIsEightBit(str)) {
1976 contents += __CFStrSkipAnyLengthByte(str);
1977 #if defined(DEBUG)
1978 if (!__CFCharToUniCharFunc && (contents[idx] >= 128)) {
1979 // Can't do log here, as it might be too early
1980 fprintf(stderr, "Warning: CFStringGetCharacterAtIndex() attempted on CFString containing high bytes before properly initialized to do so\n");
1981 }
1982 #endif
1983 return __CFCharToUniCharTable[contents[idx]];
1984 }
1985
1986 return ((UniChar *)contents)[idx];
1987 }
1988
1989 /* This one is for the CF API
1990 */
1991 UniChar CFStringGetCharacterAtIndex(CFStringRef str, CFIndex idx) {
1992 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, UniChar, (NSString *)str, characterAtIndex:(NSUInteger)idx);
1993
1994 __CFAssertIsString(str);
1995 __CFAssertIndexIsInStringBounds(str, idx);
1996 return __CFStringGetCharacterAtIndexGuts(str, idx, (const uint8_t *)__CFStrContents(str));
1997 }
1998
1999 /* This one is for NSCFString usage; it doesn't do ObjC dispatch; but it does do range check
2000 */
2001 int _CFStringCheckAndGetCharacterAtIndex(CFStringRef str, CFIndex idx, UniChar *ch) {
2002 const uint8_t *contents = (const uint8_t *)__CFStrContents(str);
2003 if (idx >= __CFStrLength2(str, contents) && __CFStringNoteErrors()) return _CFStringErrBounds;
2004 *ch = __CFStringGetCharacterAtIndexGuts(str, idx, contents);
2005 return _CFStringErrNone;
2006 }
2007
2008
2009 /* Guts of CFStringGetCharacters(); called from the two functions below. Don't call it from elsewhere.
2010 */
2011 CF_INLINE void __CFStringGetCharactersGuts(CFStringRef str, CFRange range, UniChar *buffer, const uint8_t *contents) {
2012 if (__CFStrIsEightBit(str)) {
2013 __CFStrConvertBytesToUnicode(((uint8_t *)contents) + (range.location + __CFStrSkipAnyLengthByte(str)), buffer, range.length);
2014 } else {
2015 const UniChar *uContents = ((UniChar *)contents) + range.location;
2016 memmove(buffer, uContents, range.length * sizeof(UniChar));
2017 }
2018 }
2019
2020 /* This one is for the CF API
2021 */
2022 void CFStringGetCharacters(CFStringRef str, CFRange range, UniChar *buffer) {
2023 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSString *)str, getCharacters:(unichar *)buffer range:NSMakeRange(range.location, range.length));
2024
2025 __CFAssertIsString(str);
2026 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
2027 __CFStringGetCharactersGuts(str, range, buffer, (const uint8_t *)__CFStrContents(str));
2028 }
2029
2030 /* This one is for NSCFString usage; it doesn't do ObjC dispatch; but it does do range check
2031 */
2032 int _CFStringCheckAndGetCharacters(CFStringRef str, CFRange range, UniChar *buffer) {
2033 const uint8_t *contents = (const uint8_t *)__CFStrContents(str);
2034 if (range.location + range.length > __CFStrLength2(str, contents) && __CFStringNoteErrors()) return _CFStringErrBounds;
2035 __CFStringGetCharactersGuts(str, range, buffer, contents);
2036 return _CFStringErrNone;
2037 }
2038
2039
2040 CFIndex CFStringGetBytes(CFStringRef str, CFRange range, CFStringEncoding encoding, uint8_t lossByte, Boolean isExternalRepresentation, uint8_t *buffer, CFIndex maxBufLen, CFIndex *usedBufLen) {
2041
2042 __CFAssertIsNotNegative(maxBufLen);
2043
2044 {
2045 __CFAssertIsString(str);
2046 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
2047
2048 if (__CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string
2049 const unsigned char *contents = (const unsigned char *)__CFStrContents(str);
2050 CFIndex cLength = range.length;
2051
2052 if (buffer) {
2053 if (cLength > maxBufLen) cLength = maxBufLen;
2054 memmove(buffer, contents + __CFStrSkipAnyLengthByte(str) + range.location, cLength);
2055 }
2056 if (usedBufLen) *usedBufLen = cLength;
2057
2058 return cLength;
2059 }
2060 }
2061
2062 return __CFStringEncodeByteStream(str, range.location, range.length, isExternalRepresentation, encoding, lossByte, buffer, maxBufLen, usedBufLen);
2063 }
2064
2065
2066 ConstStringPtr CFStringGetPascalStringPtr (CFStringRef str, CFStringEncoding encoding) {
2067
2068 if (!CF_IS_OBJC(__kCFStringTypeID, str)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
2069 __CFAssertIsString(str);
2070 if (__CFStrHasLengthByte(str) && __CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string || the contents is in ASCII
2071 const uint8_t *contents = (const uint8_t *)__CFStrContents(str);
2072 if (__CFStrHasExplicitLength(str) && (__CFStrLength2(str, contents) != (SInt32)(*contents))) return NULL; // Invalid length byte
2073 return (ConstStringPtr)contents;
2074 }
2075 // ??? Also check for encoding = SystemEncoding and perhaps bytes are all ASCII?
2076 }
2077 return NULL;
2078 }
2079
2080
2081 const char * CFStringGetCStringPtr(CFStringRef str, CFStringEncoding encoding) {
2082
2083 if (encoding != __CFStringGetEightBitStringEncoding() && (kCFStringEncodingASCII != __CFStringGetEightBitStringEncoding() || !__CFStringEncodingIsSupersetOfASCII(encoding))) return NULL;
2084 // ??? Also check for encoding = SystemEncoding and perhaps bytes are all ASCII?
2085
2086 if (str == NULL) return NULL; // Should really just crash, but for compatibility... see <rdar://problem/12340248>
2087
2088 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, const char *, (NSString *)str, _fastCStringContents:true);
2089
2090 __CFAssertIsString(str);
2091
2092 if (__CFStrHasNullByte(str)) {
2093 // Note: this is called a lot, 27000 times to open a small xcode project with one file open.
2094 // Of these uses about 1500 are for cStrings/utf8strings.
2095 #if 0
2096 // Only sometimes when the stars are aligned will this call return a gc pointer
2097 // under GC we can only really return a pointer to the start of a GC buffer for cString use
2098 // (Is there a simpler way to ask if contents isGC?)
2099 CFAllocatorRef alloc = (__CFStrHasContentsAllocator(str)) ? __CFStrContentsAllocator(str) : __CFGetAllocator(str);
2100 if (CF_IS_COLLECTABLE_ALLOCATOR(alloc)) {
2101 if (__CFStrSkipAnyLengthByte(str) != 0 || !__CFStrIsMutable(str)) {
2102 static int counter = 0;
2103 printf("CFString %dth unsafe safe string %s\n", ++counter, __CFStrContents(str) + __CFStrSkipAnyLengthByte(str));
2104 return NULL;
2105 }
2106 }
2107 #endif
2108 return (const char *)__CFStrContents(str) + __CFStrSkipAnyLengthByte(str);
2109 } else {
2110 return NULL;
2111 }
2112 }
2113
2114
2115 const UniChar *CFStringGetCharactersPtr(CFStringRef str) {
2116
2117 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, const UniChar *, (NSString *)str, _fastCharacterContents);
2118
2119 __CFAssertIsString(str);
2120 if (__CFStrIsUnicode(str)) return (const UniChar *)__CFStrContents(str);
2121 return NULL;
2122 }
2123
2124
2125 Boolean CFStringGetPascalString(CFStringRef str, Str255 buffer, CFIndex bufferSize, CFStringEncoding encoding) {
2126 CFIndex length;
2127 CFIndex usedLen;
2128
2129 __CFAssertIsNotNegative(bufferSize);
2130 if (bufferSize < 1) return false;
2131
2132 if (CF_IS_OBJC(__kCFStringTypeID, str)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
2133 length = CFStringGetLength(str);
2134 if (!__CFCanUseLengthByte(length)) return false; // Can't fit into pstring
2135 } else {
2136 const uint8_t *contents;
2137
2138 __CFAssertIsString(str);
2139
2140 contents = (const uint8_t *)__CFStrContents(str);
2141 length = __CFStrLength2(str, contents);
2142
2143 if (!__CFCanUseLengthByte(length)) return false; // Can't fit into pstring
2144
2145 if (__CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string
2146 if (length >= bufferSize) return false;
2147 memmove((void*)(1 + (const char*)buffer), (__CFStrSkipAnyLengthByte(str) + contents), length);
2148 *buffer = (unsigned char)length;
2149 return true;
2150 }
2151 }
2152
2153 if (__CFStringEncodeByteStream(str, 0, length, false, encoding, false, (UInt8 *)(1 + (uint8_t *)buffer), bufferSize - 1, &usedLen) != length) {
2154
2155 #if defined(DEBUG)
2156 if (bufferSize > 0) {
2157 strlcpy((char *)buffer + 1, CONVERSIONFAILURESTR, bufferSize - 1);
2158 buffer[0] = (unsigned char)((CFIndex)sizeof(CONVERSIONFAILURESTR) < (bufferSize - 1) ? (CFIndex)sizeof(CONVERSIONFAILURESTR) : (bufferSize - 1));
2159 }
2160 #else
2161 if (bufferSize > 0) buffer[0] = 0;
2162 #endif
2163 return false;
2164 }
2165 *buffer = (unsigned char)usedLen;
2166 return true;
2167 }
2168
2169 Boolean CFStringGetCString(CFStringRef str, char *buffer, CFIndex bufferSize, CFStringEncoding encoding) {
2170 const uint8_t *contents;
2171 CFIndex len;
2172
2173 __CFAssertIsNotNegative(bufferSize);
2174 if (bufferSize < 1) return false;
2175
2176 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, Boolean, (NSString *)str, _getCString:buffer maxLength:(NSUInteger)bufferSize - 1 encoding:encoding);
2177
2178 __CFAssertIsString(str);
2179
2180 contents = (const uint8_t *)__CFStrContents(str);
2181 len = __CFStrLength2(str, contents);
2182
2183 if (__CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string
2184 if (len >= bufferSize) return false;
2185 memmove(buffer, contents + __CFStrSkipAnyLengthByte(str), len);
2186 buffer[len] = 0;
2187 return true;
2188 } else {
2189 CFIndex usedLen;
2190
2191 if (__CFStringEncodeByteStream(str, 0, len, false, encoding, false, (unsigned char*) buffer, bufferSize - 1, &usedLen) == len) {
2192 buffer[usedLen] = '\0';
2193 return true;
2194 } else {
2195 #if defined(DEBUG)
2196 strlcpy(buffer, CONVERSIONFAILURESTR, bufferSize);
2197 #else
2198 if (bufferSize > 0) buffer[0] = 0;
2199 #endif
2200 return false;
2201 }
2202 }
2203 }
2204
2205 extern Boolean __CFLocaleGetNullLocale(struct __CFLocale *locale);
2206 extern void __CFLocaleSetNullLocale(struct __CFLocale *locale);
2207
2208 static const char *_CFStrGetLanguageIdentifierForLocale(CFLocaleRef locale, bool collatorOnly) {
2209 CFStringRef localeID;
2210 const char *langID = NULL;
2211 static const void *lastLocale = NULL;
2212 static const char *lastLangID = NULL;
2213 static CFLock_t lock = CFLockInit;
2214
2215 if (__CFLocaleGetNullLocale((struct __CFLocale *)locale)) return NULL;
2216
2217 __CFLock(&lock);
2218 if ((NULL != lastLocale) && (lastLocale == locale)) {
2219 __CFUnlock(&lock);
2220 return lastLangID;
2221 }
2222 __CFUnlock(&lock);
2223
2224 localeID = (CFStringRef)CFLocaleGetValue(locale, __kCFLocaleCollatorID);
2225 CFIndex length = CFStringGetLength(localeID);
2226
2227 if (!collatorOnly) {
2228 if ((length < 2) || ((4 == length) && CFEqual(localeID, CFSTR("root")))) {
2229 localeID = (CFStringRef)CFLocaleGetIdentifier(locale);
2230 length = CFStringGetLength(localeID);
2231 }
2232 }
2233
2234 if (length > 1) {
2235 uint8_t buffer[2];
2236 const uint8_t *contents = (const uint8_t *)CFStringGetCStringPtr(localeID, kCFStringEncodingUTF8);
2237 if (!contents) {
2238 if (2 == CFStringGetBytes(localeID, CFRangeMake(0,2), kCFStringEncodingUTF8, 0, false, buffer, sizeof(buffer), NULL)) contents = buffer;
2239 }
2240 if (contents) {
2241 const char *string = (const char *)contents;
2242 if (!strncmp(string, "az", 2)) { // Azerbaijani
2243 langID = "az";
2244 } else if (!strncmp(string, "lt", 2)) { // Lithuanian
2245 langID = "lt";
2246 } else if (!strncmp(string, "tr", 2)) { // Turkish
2247 langID = "tr";
2248 } else if (!strncmp(string, "nl", 2)) { // Dutch
2249 langID = "nl";
2250 } else if (!strncmp(string, "el", 2)) { // Greek
2251 langID = "el";
2252 }
2253 }
2254 }
2255
2256 if (langID == NULL) __CFLocaleSetNullLocale((struct __CFLocale *)locale);
2257
2258 __CFLock(&lock);
2259 lastLocale = locale;
2260 lastLangID = langID;
2261 __CFUnlock(&lock);
2262
2263 return langID;
2264 }
2265
2266 CF_INLINE bool _CFCanUseLocale(CFLocaleRef locale) {
2267 if (locale) {
2268 return true;
2269 }
2270 return false;
2271 }
2272
2273 #define MAX_CASE_MAPPING_BUF (8)
2274 #define ZERO_WIDTH_JOINER (0x200D)
2275 #define COMBINING_GRAPHEME_JOINER (0x034F)
2276 // Hangul ranges
2277 #define HANGUL_CHOSEONG_START (0x1100)
2278 #define HANGUL_CHOSEONG_END (0x115F)
2279 #define HANGUL_JUNGSEONG_START (0x1160)
2280 #define HANGUL_JUNGSEONG_END (0x11A2)
2281 #define HANGUL_JONGSEONG_START (0x11A8)
2282 #define HANGUL_JONGSEONG_END (0x11F9)
2283
2284 #define HANGUL_SYLLABLE_START (0xAC00)
2285 #define HANGUL_SYLLABLE_END (0xD7AF)
2286
2287
2288 // Returns the length of characters filled into outCharacters. If no change, returns 0. maxBufLen shoule be at least 8
2289 static CFIndex __CFStringFoldCharacterClusterAtIndex(UTF32Char character, CFStringInlineBuffer *buffer, CFIndex index, CFOptionFlags flags, const uint8_t *langCode, UTF32Char *outCharacters, CFIndex maxBufferLength, CFIndex *consumedLength) {
2290 CFIndex filledLength = 0, currentIndex = index;
2291
2292 if (0 != character) {
2293 UTF16Char lowSurrogate;
2294 CFIndex planeNo = (character >> 16);
2295 bool isTurkikCapitalI = false;
2296 static const uint8_t *decompBMP = NULL;
2297 static const uint8_t *graphemeBMP = NULL;
2298
2299 if (NULL == decompBMP) {
2300 decompBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, 0);
2301 graphemeBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, 0);
2302 }
2303
2304 currentIndex += ((character > 0xFFFF) ? 2 : 1);
2305
2306 if ((character < 0x0080) && ((NULL == langCode) || (character != 'I'))) { // ASCII
2307 if ((flags & kCFCompareCaseInsensitive) && (character >= 'A') && (character <= 'Z')) {
2308 character += ('a' - 'A');
2309 *outCharacters = character;
2310 filledLength = 1;
2311 }
2312 } else {
2313 // do width-insensitive mapping
2314 if ((flags & kCFCompareWidthInsensitive) && (character >= 0xFF00) && (character <= 0xFFEF)) {
2315 (void)CFUniCharCompatibilityDecompose(&character, 1, 1);
2316 *outCharacters = character;
2317 filledLength = 1;
2318 }
2319
2320 // map surrogates
2321 if ((0 == planeNo) && CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((lowSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex)))) {
2322 character = CFUniCharGetLongCharacterForSurrogatePair(character, lowSurrogate);
2323 ++currentIndex;
2324 planeNo = (character >> 16);
2325 }
2326
2327 // decompose
2328 if (flags & (kCFCompareDiacriticInsensitive|kCFCompareNonliteral)) {
2329 if (CFUniCharIsMemberOfBitmap(character, ((0 == planeNo) ? decompBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, planeNo)))) {
2330 UTF32Char original = character;
2331
2332 filledLength = CFUniCharDecomposeCharacter(character, outCharacters, maxBufferLength);
2333 character = *outCharacters;
2334
2335 if ((flags & kCFCompareDiacriticInsensitive) && (character < 0x0510)) {
2336 filledLength = 1; // reset if Roman, Greek, Cyrillic
2337 } else if (0 == (flags & kCFCompareNonliteral)) {
2338 character = original;
2339 filledLength = 0;
2340 }
2341 }
2342 }
2343
2344 // fold case
2345 if (flags & kCFCompareCaseInsensitive) {
2346 const uint8_t *nonBaseBitmap;
2347 bool filterNonBase = (((flags & kCFCompareDiacriticInsensitive) && (character < 0x0510)) ? true : false);
2348 static const uint8_t *lowerBMP = NULL;
2349 static const uint8_t *caseFoldBMP = NULL;
2350
2351 if (NULL == lowerBMP) {
2352 lowerBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfLowercaseCharacterSet, 0);
2353 caseFoldBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfCaseFoldingCharacterSet, 0);
2354 }
2355
2356 if ((NULL != langCode) && ('I' == character) && ((0 == strcmp((const char *)langCode, "tr")) || (0 == strcmp((const char *)langCode, "az")))) { // do Turkik special-casing
2357 if (filledLength > 1) {
2358 if (0x0307 == outCharacters[1]) {
2359 if (--filledLength > 1) memmove((outCharacters + 1), (outCharacters + 2), sizeof(UTF32Char) * (filledLength - 1));
2360 character = *outCharacters = 'i';
2361 isTurkikCapitalI = true;
2362 }
2363 } else if (0x0307 == CFStringGetCharacterFromInlineBuffer(buffer, currentIndex)) {
2364 character = *outCharacters = 'i';
2365 filledLength = 1;
2366 ++currentIndex;
2367 isTurkikCapitalI = true;
2368 }
2369 }
2370 if (!isTurkikCapitalI && (CFUniCharIsMemberOfBitmap(character, ((0 == planeNo) ? lowerBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfLowercaseCharacterSet, planeNo))) || CFUniCharIsMemberOfBitmap(character, ((0 == planeNo) ? caseFoldBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfCaseFoldingCharacterSet, planeNo))))) {
2371 UTF16Char caseFoldBuffer[MAX_CASE_MAPPING_BUF];
2372 const UTF16Char *bufferP = caseFoldBuffer, *bufferLimit;
2373 UTF32Char *outCharactersP = outCharacters;
2374 uint32_t bufferLength = CFUniCharMapCaseTo(character, caseFoldBuffer, MAX_CASE_MAPPING_BUF, kCFUniCharCaseFold, 0, langCode);
2375
2376 bufferLimit = bufferP + bufferLength;
2377
2378 if (filledLength > 0) --filledLength; // decrement filledLength (will add back later)
2379
2380 // make space for casefold characters
2381 if ((filledLength > 0) && (bufferLength > 1)) {
2382 CFIndex totalScalerLength = 0;
2383
2384 while (bufferP < bufferLimit) {
2385 if (CFUniCharIsSurrogateHighCharacter(*(bufferP++)) && (bufferP < bufferLimit) && CFUniCharIsSurrogateLowCharacter(*bufferP)) ++bufferP;
2386 ++totalScalerLength;
2387 }
2388 memmove(outCharacters + totalScalerLength, outCharacters + 1, filledLength * sizeof(UTF32Char));
2389 bufferP = caseFoldBuffer;
2390 }
2391
2392 // fill
2393 while (bufferP < bufferLimit) {
2394 character = *(bufferP++);
2395 if (CFUniCharIsSurrogateHighCharacter(character) && (bufferP < bufferLimit) && CFUniCharIsSurrogateLowCharacter(*bufferP)) {
2396 character = CFUniCharGetLongCharacterForSurrogatePair(character, *(bufferP++));
2397 nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (character >> 16));
2398 } else {
2399 nonBaseBitmap = graphemeBMP;
2400 }
2401
2402 if (!filterNonBase || !CFUniCharIsMemberOfBitmap(character, nonBaseBitmap)) {
2403 *(outCharactersP++) = character;
2404 ++filledLength;
2405 }
2406 }
2407 }
2408 }
2409 }
2410
2411 // collect following combining marks
2412 if (flags & (kCFCompareDiacriticInsensitive|kCFCompareNonliteral)) {
2413 const uint8_t *nonBaseBitmap;
2414 const uint8_t *decompBitmap;
2415 bool doFill = (((flags & kCFCompareDiacriticInsensitive) && (character < 0x0510)) ? false : true);
2416
2417 if (0 == filledLength) {
2418 *outCharacters = character; // filledLength will be updated below on demand
2419
2420 if (doFill) { // check if really needs to fill
2421 UTF32Char nonBaseCharacter = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex);
2422
2423 if (CFUniCharIsSurrogateHighCharacter(nonBaseCharacter) && CFUniCharIsSurrogateLowCharacter((lowSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex + 1)))) {
2424 nonBaseCharacter = CFUniCharGetLongCharacterForSurrogatePair(nonBaseCharacter, lowSurrogate);
2425 nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (nonBaseCharacter >> 16));
2426 decompBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (nonBaseCharacter >> 16));
2427 } else {
2428 nonBaseBitmap = graphemeBMP;
2429 decompBitmap = decompBMP;
2430 }
2431
2432 if (CFUniCharIsMemberOfBitmap(nonBaseCharacter, nonBaseBitmap)) {
2433 filledLength = 1; // For the base character
2434
2435 if ((0 == (flags & kCFCompareDiacriticInsensitive)) || (nonBaseCharacter > 0x050F)) {
2436 if (CFUniCharIsMemberOfBitmap(nonBaseCharacter, decompBitmap)) {
2437 filledLength += CFUniCharDecomposeCharacter(nonBaseCharacter, &(outCharacters[filledLength]), maxBufferLength - filledLength);
2438 } else {
2439 outCharacters[filledLength++] = nonBaseCharacter;
2440 }
2441 }
2442 currentIndex += ((nonBaseBitmap == graphemeBMP) ? 1 : 2);
2443 } else {
2444 doFill = false;
2445 }
2446 }
2447 }
2448
2449 while (filledLength < maxBufferLength) { // do the rest
2450 character = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex);
2451
2452 if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((lowSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex + 1)))) {
2453 character = CFUniCharGetLongCharacterForSurrogatePair(character, lowSurrogate);
2454 nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (character >> 16));
2455 decompBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (character >> 16));
2456 } else {
2457 nonBaseBitmap = graphemeBMP;
2458 decompBitmap = decompBMP;
2459 }
2460 if (isTurkikCapitalI) {
2461 isTurkikCapitalI = false;
2462 } else if (CFUniCharIsMemberOfBitmap(character, nonBaseBitmap)) {
2463 if (doFill) {
2464 if (CFUniCharIsMemberOfBitmap(character, decompBitmap)) {
2465 CFIndex currentLength = CFUniCharDecomposeCharacter(character, &(outCharacters[filledLength]), maxBufferLength - filledLength);
2466
2467 if (0 == currentLength) break; // didn't fit
2468
2469 filledLength += currentLength;
2470 } else {
2471 outCharacters[filledLength++] = character;
2472 }
2473 } else if (0 == filledLength) {
2474 filledLength = 1; // For the base character
2475 }
2476 currentIndex += ((nonBaseBitmap == graphemeBMP) ? 1 : 2);
2477 } else {
2478 break;
2479 }
2480 }
2481
2482 if (filledLength > 1) {
2483 UTF32Char *sortCharactersLimit = outCharacters + filledLength;
2484 UTF32Char *sortCharacters = sortCharactersLimit - 1;
2485
2486 while ((outCharacters < sortCharacters) && CFUniCharIsMemberOfBitmap(*sortCharacters, ((*sortCharacters < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (*sortCharacters >> 16))))) --sortCharacters;
2487
2488 if ((sortCharactersLimit - sortCharacters) > 1) CFUniCharPrioritySort(sortCharacters, (sortCharactersLimit - sortCharacters)); // priority sort
2489 }
2490 }
2491 }
2492
2493 if ((filledLength > 0) && (NULL != consumedLength)) *consumedLength = (currentIndex - index);
2494
2495 return filledLength;
2496 }
2497
2498 static bool __CFStringFillCharacterSetInlineBuffer(CFCharacterSetInlineBuffer *buffer, CFStringCompareFlags compareOptions) {
2499 if (0 != (compareOptions & kCFCompareIgnoreNonAlphanumeric)) {
2500 static CFCharacterSetRef nonAlnumChars = NULL;
2501
2502 if (NULL == nonAlnumChars) {
2503 CFMutableCharacterSetRef cset = CFCharacterSetCreateMutableCopy(kCFAllocatorSystemDefault, CFCharacterSetGetPredefined(kCFCharacterSetAlphaNumeric));
2504 CFCharacterSetInvert(cset);
2505 if (!OSAtomicCompareAndSwapPtrBarrier(NULL, cset, (void **)&nonAlnumChars)) CFRelease(cset);
2506 }
2507
2508 CFCharacterSetInitInlineBuffer(nonAlnumChars, buffer);
2509
2510 return true;
2511 }
2512
2513 return false;
2514 }
2515
2516 #define kCFStringStackBufferLength (__kCFStringInlineBufferLength)
2517
2518 CFComparisonResult CFStringCompareWithOptionsAndLocale(CFStringRef string, CFStringRef string2, CFRange rangeToCompare, CFStringCompareFlags compareOptions, CFLocaleRef locale) {
2519 /* No objc dispatch needed here since CFStringInlineBuffer works with both CFString and NSString */
2520 UTF32Char strBuf1[kCFStringStackBufferLength];
2521 UTF32Char strBuf2[kCFStringStackBufferLength];
2522 CFStringInlineBuffer inlineBuf1, inlineBuf2;
2523 UTF32Char str1Char, str2Char;
2524 CFIndex str1UsedLen, str2UsedLen;
2525 CFIndex str1Index = 0, str2Index = 0, strBuf1Index = 0, strBuf2Index = 0, strBuf1Len = 0, strBuf2Len = 0;
2526 CFIndex str1LocalizedIndex = 0, str2LocalizedIndex = 0;
2527 CFIndex forcedIndex1 = 0, forcedIndex2 = 0;
2528 CFIndex str2Len = CFStringGetLength(string2);
2529 bool caseInsensitive = ((compareOptions & kCFCompareCaseInsensitive) ? true : false);
2530 bool diacriticsInsensitive = ((compareOptions & kCFCompareDiacriticInsensitive) ? true : false);
2531 bool equalityOptions = ((compareOptions & (kCFCompareCaseInsensitive|kCFCompareNonliteral|kCFCompareDiacriticInsensitive|kCFCompareWidthInsensitive)) ? true : false);
2532 bool numerically = ((compareOptions & kCFCompareNumerically) ? true : false);
2533 bool forceOrdering = ((compareOptions & kCFCompareForcedOrdering) ? true : false);
2534 const uint8_t *langCode;
2535 CFComparisonResult compareResult = kCFCompareEqualTo;
2536 UTF16Char otherChar;
2537 Boolean freeLocale = false;
2538 CFCharacterSetInlineBuffer *ignoredChars = NULL;
2539 CFCharacterSetInlineBuffer csetBuffer;
2540 bool numericEquivalence = false;
2541
2542 if ((compareOptions & kCFCompareLocalized) && (NULL == locale)) {
2543 locale = CFLocaleCopyCurrent();
2544 freeLocale = true;
2545 }
2546
2547 langCode = ((NULL == locale) ? NULL : (const uint8_t *)_CFStrGetLanguageIdentifierForLocale(locale, true));
2548
2549 if (__CFStringFillCharacterSetInlineBuffer(&csetBuffer, compareOptions)) {
2550 ignoredChars = &csetBuffer;
2551 equalityOptions = true;
2552 }
2553
2554 if ((NULL == locale) && (NULL == ignoredChars) && !numerically) { // could do binary comp (be careful when adding new flags)
2555 CFStringEncoding eightBitEncoding = __CFStringGetEightBitStringEncoding();
2556 const uint8_t *str1Bytes = (const uint8_t *)CFStringGetCStringPtr(string, eightBitEncoding);
2557 const uint8_t *str2Bytes = (const uint8_t *)CFStringGetCStringPtr(string2, eightBitEncoding);
2558 CFIndex factor = sizeof(uint8_t);
2559
2560 if ((NULL != str1Bytes) && (NULL != str2Bytes)) {
2561 compareOptions &= ~kCFCompareNonliteral; // remove non-literal
2562
2563 if ((kCFStringEncodingASCII == eightBitEncoding) && (false == forceOrdering)) {
2564 if (caseInsensitive) {
2565 int cmpResult = strncasecmp_l((const char *)str1Bytes + rangeToCompare.location, (const char *)str2Bytes, __CFMin(rangeToCompare.length, str2Len), NULL);
2566
2567 if (0 == cmpResult) cmpResult = rangeToCompare.length - str2Len;
2568
2569 return ((0 == cmpResult) ? kCFCompareEqualTo : ((cmpResult < 0) ? kCFCompareLessThan : kCFCompareGreaterThan));
2570 }
2571 } else if (caseInsensitive || diacriticsInsensitive) {
2572 CFIndex limitLength = __CFMin(rangeToCompare.length, str2Len);
2573
2574 str1Bytes += rangeToCompare.location;
2575
2576 while (str1Index < limitLength) {
2577 str1Char = str1Bytes[str1Index];
2578 str2Char = str2Bytes[str1Index];
2579
2580 if (str1Char != str2Char) {
2581 if ((str1Char < 0x80) && (str2Char < 0x80)) {
2582 if (forceOrdering && (kCFCompareEqualTo == compareResult) && (str1Char != str2Char)) compareResult = ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan);
2583 if (caseInsensitive) {
2584 if ((str1Char >= 'A') && (str1Char <= 'Z')) str1Char += ('a' - 'A');
2585 if ((str2Char >= 'A') && (str2Char <= 'Z')) str2Char += ('a' - 'A');
2586 }
2587
2588 if (str1Char != str2Char) return ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan);
2589 } else {
2590 str1Bytes = NULL;
2591 break;
2592 }
2593 }
2594 ++str1Index;
2595 }
2596
2597 str2Index = str1Index;
2598
2599 if (str1Index == limitLength) {
2600 int cmpResult = rangeToCompare.length - str2Len;
2601
2602 return ((0 == cmpResult) ? compareResult : ((cmpResult < 0) ? kCFCompareLessThan : kCFCompareGreaterThan));
2603 }
2604 }
2605 } else if (!equalityOptions && (NULL == str1Bytes) && (NULL == str2Bytes)) {
2606 str1Bytes = (const uint8_t *)CFStringGetCharactersPtr(string);
2607 str2Bytes = (const uint8_t *)CFStringGetCharactersPtr(string2);
2608 factor = sizeof(UTF16Char);
2609 #if __LITTLE_ENDIAN__
2610 if ((NULL != str1Bytes) && (NULL != str2Bytes)) { // we cannot use memcmp
2611 const UTF16Char *str1 = ((const UTF16Char *)str1Bytes) + rangeToCompare.location;
2612 const UTF16Char *str1Limit = str1 + __CFMin(rangeToCompare.length, str2Len);
2613 const UTF16Char *str2 = (const UTF16Char *)str2Bytes;
2614 CFIndex cmpResult = 0;
2615
2616 while ((0 == cmpResult) && (str1 < str1Limit)) cmpResult = (CFIndex)*(str1++) - (CFIndex)*(str2++);
2617
2618 if (0 == cmpResult) cmpResult = rangeToCompare.length - str2Len;
2619
2620 return ((0 == cmpResult) ? kCFCompareEqualTo : ((cmpResult < 0) ? kCFCompareLessThan : kCFCompareGreaterThan));
2621 }
2622 #endif /* __LITTLE_ENDIAN__ */
2623 }
2624 if ((NULL != str1Bytes) && (NULL != str2Bytes)) {
2625 int cmpResult = memcmp(str1Bytes + (rangeToCompare.location * factor), str2Bytes, __CFMin(rangeToCompare.length, str2Len) * factor);
2626
2627 if (0 == cmpResult) cmpResult = rangeToCompare.length - str2Len;
2628
2629 return ((0 == cmpResult) ? kCFCompareEqualTo : ((cmpResult < 0) ? kCFCompareLessThan : kCFCompareGreaterThan));
2630 }
2631 }
2632
2633 const uint8_t *graphemeBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, 0);
2634
2635 CFStringInitInlineBuffer(string, &inlineBuf1, rangeToCompare);
2636 CFStringInitInlineBuffer(string2, &inlineBuf2, CFRangeMake(0, str2Len));
2637
2638 if (NULL != locale) {
2639 str1LocalizedIndex = str1Index;
2640 str2LocalizedIndex = str2Index;
2641
2642 // We temporarily disable kCFCompareDiacriticInsensitive for SL <rdar://problem/6767096>. Should be revisited in NMOS <rdar://problem/7003830>
2643 if (forceOrdering) {
2644 diacriticsInsensitive = false;
2645 compareOptions &= ~kCFCompareDiacriticInsensitive;
2646 }
2647 }
2648 while ((str1Index < rangeToCompare.length) && (str2Index < str2Len)) {
2649 if (strBuf1Len == 0) {
2650 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
2651 if (caseInsensitive && (str1Char >= 'A') && (str1Char <= 'Z') && ((NULL == langCode) || (str1Char != 'I')) && ((false == forceOrdering) || (kCFCompareEqualTo != compareResult))) str1Char += ('a' - 'A');
2652 str1UsedLen = 1;
2653 } else {
2654 str1Char = strBuf1[strBuf1Index++];
2655 }
2656 if (strBuf2Len == 0) {
2657 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
2658 if (caseInsensitive && (str2Char >= 'A') && (str2Char <= 'Z') && ((NULL == langCode) || (str2Char != 'I')) && ((false == forceOrdering) || (kCFCompareEqualTo != compareResult))) str2Char += ('a' - 'A');
2659 str2UsedLen = 1;
2660 } else {
2661 str2Char = strBuf2[strBuf2Index++];
2662 }
2663
2664 if (numerically && ((0 == strBuf1Len) && (str1Char <= '9') && (str1Char >= '0')) && ((0 == strBuf2Len) && (str2Char <= '9') && (str2Char >= '0'))) { // If both are not ASCII digits, then don't do numerical comparison here
2665 uint64_t intValue1 = 0, intValue2 = 0; // !!! Doesn't work if numbers are > max uint64_t
2666 CFIndex str1NumRangeIndex = str1Index;
2667 CFIndex str2NumRangeIndex = str2Index;
2668
2669 do {
2670 intValue1 = (intValue1 * 10) + (str1Char - '0');
2671 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, ++str1Index);
2672 } while ((str1Char <= '9') && (str1Char >= '0'));
2673
2674 do {
2675 intValue2 = intValue2 * 10 + (str2Char - '0');
2676 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, ++str2Index);
2677 } while ((str2Char <= '9') && (str2Char >= '0'));
2678
2679 if (intValue1 == intValue2) {
2680 if (forceOrdering && (kCFCompareEqualTo == compareResult) && ((str1Index - str1NumRangeIndex) != (str2Index - str2NumRangeIndex))) {
2681 compareResult = (((str1Index - str1NumRangeIndex) < (str2Index - str2NumRangeIndex)) ? kCFCompareLessThan : kCFCompareGreaterThan);
2682 numericEquivalence = true;
2683 forcedIndex1 = str1NumRangeIndex;
2684 forcedIndex2 = str2NumRangeIndex;
2685 }
2686
2687 continue;
2688 } else if (intValue1 < intValue2) {
2689 if (freeLocale && locale) {
2690 CFRelease(locale);
2691 }
2692 return kCFCompareLessThan;
2693 } else {
2694 if (freeLocale && locale) {
2695 CFRelease(locale);
2696 }
2697 return kCFCompareGreaterThan;
2698 }
2699 }
2700
2701 if (str1Char != str2Char) {
2702 if (!equalityOptions) {
2703 compareResult = ((NULL == locale) ? ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1Index, rangeToCompare.length - str1Index), &inlineBuf2, CFRangeMake(str2Index, str2Len - str2Index), compareOptions, locale));
2704 if (freeLocale && locale) {
2705 CFRelease(locale);
2706 }
2707 return compareResult;
2708 }
2709
2710 if (forceOrdering && (kCFCompareEqualTo == compareResult)) {
2711 compareResult = ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan);
2712 forcedIndex1 = str1LocalizedIndex;
2713 forcedIndex2 = str2LocalizedIndex;
2714 }
2715
2716 if ((str1Char < 0x80) && (str2Char < 0x80) && (NULL == ignoredChars)) {
2717 if (NULL != locale) {
2718 compareResult = _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1Index, rangeToCompare.length - str1Index), &inlineBuf2, CFRangeMake(str2Index, str2Len - str2Index), compareOptions, locale);
2719 if (freeLocale && locale) {
2720 CFRelease(locale);
2721 }
2722 return compareResult;
2723 } else if (!caseInsensitive) {
2724 if (freeLocale && locale) {
2725 CFRelease(locale);
2726 }
2727 return ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan);
2728 }
2729 }
2730
2731 if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) {
2732 str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);
2733 str1UsedLen = 2;
2734 }
2735
2736 if (CFUniCharIsSurrogateHighCharacter(str2Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index + 1)))) {
2737 str2Char = CFUniCharGetLongCharacterForSurrogatePair(str2Char, otherChar);
2738 str2UsedLen = 2;
2739 }
2740
2741 if (NULL != ignoredChars) {
2742 if (CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char)) {
2743 if ((strBuf1Len > 0) && (strBuf1Index == strBuf1Len)) strBuf1Len = 0;
2744 if (strBuf1Len == 0) str1Index += str1UsedLen;
2745 if (strBuf2Len > 0) --strBuf2Index;
2746 continue;
2747 }
2748 if (CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char)) {
2749 if ((strBuf2Len > 0) && (strBuf2Index == strBuf2Len)) strBuf2Len = 0;
2750 if (strBuf2Len == 0) str2Index += str2UsedLen;
2751 if (strBuf1Len > 0) -- strBuf1Index;
2752 continue;
2753 }
2754 }
2755
2756 if (diacriticsInsensitive && (str1Index > 0)) {
2757 bool str1Skip = false;
2758 bool str2Skip = false;
2759
2760 if ((0 == strBuf1Len) && CFUniCharIsMemberOfBitmap(str1Char, ((str1Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str1Char >> 16))))) {
2761 str1Char = str2Char;
2762 str1Skip = true;
2763 }
2764 if ((0 == strBuf2Len) && CFUniCharIsMemberOfBitmap(str2Char, ((str2Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str2Char >> 16))))) {
2765 str2Char = str1Char;
2766 str2Skip = true;
2767 }
2768
2769 if (str1Skip != str2Skip) {
2770 if (str1Skip) str2Index -= str2UsedLen;
2771 if (str2Skip) str1Index -= str1UsedLen;
2772 }
2773 }
2774
2775 if (str1Char != str2Char) {
2776 if (0 == strBuf1Len) {
2777 strBuf1Len = __CFStringFoldCharacterClusterAtIndex(str1Char, &inlineBuf1, str1Index, compareOptions, langCode, strBuf1, kCFStringStackBufferLength, &str1UsedLen);
2778 if (strBuf1Len > 0) {
2779 str1Char = *strBuf1;
2780 strBuf1Index = 1;
2781 }
2782 }
2783
2784 if ((0 == strBuf1Len) && (0 < strBuf2Len)) {
2785 compareResult = ((NULL == locale) ? ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1LocalizedIndex, rangeToCompare.length - str1LocalizedIndex), &inlineBuf2, CFRangeMake(str2LocalizedIndex, str2Len - str2LocalizedIndex), compareOptions, locale));
2786 if (freeLocale && locale) {
2787 CFRelease(locale);
2788 }
2789 return compareResult;
2790 }
2791
2792 if ((0 == strBuf2Len) && ((0 == strBuf1Len) || (str1Char != str2Char))) {
2793 strBuf2Len = __CFStringFoldCharacterClusterAtIndex(str2Char, &inlineBuf2, str2Index, compareOptions, langCode, strBuf2, kCFStringStackBufferLength, &str2UsedLen);
2794 if (strBuf2Len > 0) {
2795 str2Char = *strBuf2;
2796 strBuf2Index = 1;
2797 }
2798 if ((0 == strBuf2Len) || (str1Char != str2Char)) {
2799 compareResult = ((NULL == locale) ? ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1LocalizedIndex, rangeToCompare.length - str1LocalizedIndex), &inlineBuf2, CFRangeMake(str2LocalizedIndex, str2Len - str2LocalizedIndex), compareOptions, locale));
2800 if (freeLocale && locale) {
2801 CFRelease(locale);
2802 }
2803 return compareResult;
2804 }
2805 }
2806 }
2807
2808 if ((strBuf1Len > 0) && (strBuf2Len > 0)) {
2809 while ((strBuf1Index < strBuf1Len) && (strBuf2Index < strBuf2Len)) {
2810 if (strBuf1[strBuf1Index] != strBuf2[strBuf2Index]) break;
2811 ++strBuf1Index; ++strBuf2Index;
2812 }
2813 if ((strBuf1Index < strBuf1Len) && (strBuf2Index < strBuf2Len)) {
2814 CFComparisonResult res = ((NULL == locale) ? ((strBuf1[strBuf1Index] < strBuf2[strBuf2Index]) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1LocalizedIndex, rangeToCompare.length - str1LocalizedIndex), &inlineBuf2, CFRangeMake(str2LocalizedIndex, str2Len - str2LocalizedIndex), compareOptions, locale));
2815 if (freeLocale && locale) {
2816 CFRelease(locale);
2817 }
2818 return res;
2819 }
2820 }
2821 }
2822
2823 if ((strBuf1Len > 0) && (strBuf1Index == strBuf1Len)) strBuf1Len = 0;
2824 if ((strBuf2Len > 0) && (strBuf2Index == strBuf2Len)) strBuf2Len = 0;
2825
2826 if (strBuf1Len == 0) str1Index += str1UsedLen;
2827 if (strBuf2Len == 0) str2Index += str2UsedLen;
2828 if ((strBuf1Len == 0) && (strBuf2Len == 0)) {
2829 str1LocalizedIndex = str1Index;
2830 str2LocalizedIndex = str2Index;
2831 }
2832 }
2833
2834 if (diacriticsInsensitive || (NULL != ignoredChars)) {
2835 while (str1Index < rangeToCompare.length) {
2836 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
2837 if ((str1Char < 0x80) && (NULL == ignoredChars)) break; // found ASCII
2838
2839 if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);
2840
2841 if ((!diacriticsInsensitive || !CFUniCharIsMemberOfBitmap(str1Char, ((str1Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str1Char >> 16))))) && ((NULL == ignoredChars) || !CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char))) break;
2842
2843 str1Index += ((str1Char < 0x10000) ? 1 : 2);
2844 }
2845
2846 while (str2Index < str2Len) {
2847 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
2848 if ((str2Char < 0x80) && (NULL == ignoredChars)) break; // found ASCII
2849
2850 if (CFUniCharIsSurrogateHighCharacter(str2Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index + 1)))) str2Char = CFUniCharGetLongCharacterForSurrogatePair(str2Char, otherChar);
2851
2852 if ((!diacriticsInsensitive || !CFUniCharIsMemberOfBitmap(str2Char, ((str2Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str2Char >> 16))))) && ((NULL == ignoredChars) || !CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char))) break;
2853
2854 str2Index += ((str2Char < 0x10000) ? 1 : 2);
2855 }
2856 }
2857 // Need to recalc localized result here for forced ordering, ICU cannot do numericEquivalence
2858 if (!numericEquivalence && (NULL != locale) && (kCFCompareEqualTo != compareResult) && (str1Index == rangeToCompare.length) && (str2Index == str2Len)) compareResult = _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(forcedIndex1, rangeToCompare.length - forcedIndex1), &inlineBuf2, CFRangeMake(forcedIndex2, str2Len - forcedIndex2), compareOptions, locale);
2859
2860 if (freeLocale && locale) {
2861 CFRelease(locale);
2862 }
2863
2864 return ((str1Index < rangeToCompare.length) ? kCFCompareGreaterThan : ((str2Index < str2Len) ? kCFCompareLessThan : compareResult));
2865 }
2866
2867
2868 CFComparisonResult CFStringCompareWithOptions(CFStringRef string, CFStringRef string2, CFRange rangeToCompare, CFStringCompareFlags compareOptions) { return CFStringCompareWithOptionsAndLocale(string, string2, rangeToCompare, compareOptions, NULL); }
2869
2870 CFComparisonResult CFStringCompare(CFStringRef string, CFStringRef str2, CFStringCompareFlags options) {
2871 return CFStringCompareWithOptions(string, str2, CFRangeMake(0, CFStringGetLength(string)), options);
2872 }
2873
2874 Boolean CFStringFindWithOptionsAndLocale(CFStringRef string, CFStringRef stringToFind, CFRange rangeToSearch, CFStringCompareFlags compareOptions, CFLocaleRef locale, CFRange *result) {
2875 /* No objc dispatch needed here since CFStringInlineBuffer works with both CFString and NSString */
2876 CFIndex findStrLen = CFStringGetLength(stringToFind);
2877 Boolean didFind = false;
2878 bool lengthVariants = ((compareOptions & (kCFCompareCaseInsensitive|kCFCompareNonliteral|kCFCompareDiacriticInsensitive)) ? true : false);
2879 CFCharacterSetInlineBuffer *ignoredChars = NULL;
2880 CFCharacterSetInlineBuffer csetBuffer;
2881
2882 if (__CFStringFillCharacterSetInlineBuffer(&csetBuffer, compareOptions)) {
2883 ignoredChars = &csetBuffer;
2884 lengthVariants = true;
2885 }
2886
2887 if ((findStrLen > 0) && (rangeToSearch.length > 0) && ((findStrLen <= rangeToSearch.length) || lengthVariants)) {
2888 UTF32Char strBuf1[kCFStringStackBufferLength];
2889 UTF32Char strBuf2[kCFStringStackBufferLength];
2890 CFStringInlineBuffer inlineBuf1, inlineBuf2;
2891 UTF32Char str1Char = 0, str2Char = 0;
2892 CFStringEncoding eightBitEncoding = __CFStringGetEightBitStringEncoding();
2893 const uint8_t *str1Bytes = (const uint8_t *)CFStringGetCStringPtr(string, eightBitEncoding);
2894 const uint8_t *str2Bytes = (const uint8_t *)CFStringGetCStringPtr(stringToFind, eightBitEncoding);
2895 const UTF32Char *characters, *charactersLimit;
2896 const uint8_t *langCode = NULL;
2897 CFIndex fromLoc, toLoc;
2898 CFIndex str1Index, str2Index;
2899 CFIndex strBuf1Len, strBuf2Len;
2900 CFIndex maxStr1Index = (rangeToSearch.location + rangeToSearch.length);
2901 bool equalityOptions = ((lengthVariants || (compareOptions & kCFCompareWidthInsensitive)) ? true : false);
2902 bool caseInsensitive = ((compareOptions & kCFCompareCaseInsensitive) ? true : false);
2903 bool forwardAnchor = ((kCFCompareAnchored == (compareOptions & (kCFCompareBackwards|kCFCompareAnchored))) ? true : false);
2904 bool backwardAnchor = (((kCFCompareBackwards|kCFCompareAnchored) == (compareOptions & (kCFCompareBackwards|kCFCompareAnchored))) ? true : false);
2905 int8_t delta;
2906
2907 if (NULL == locale) {
2908 if (compareOptions & kCFCompareLocalized) {
2909 CFLocaleRef currentLocale = CFLocaleCopyCurrent();
2910 langCode = (const uint8_t *)_CFStrGetLanguageIdentifierForLocale(currentLocale, true);
2911 CFRelease(currentLocale);
2912 }
2913 } else {
2914 langCode = (const uint8_t *)_CFStrGetLanguageIdentifierForLocale(locale, true);
2915 }
2916
2917 CFStringInitInlineBuffer(string, &inlineBuf1, CFRangeMake(0, rangeToSearch.location + rangeToSearch.length));
2918 CFStringInitInlineBuffer(stringToFind, &inlineBuf2, CFRangeMake(0, findStrLen));
2919
2920 if (compareOptions & kCFCompareBackwards) {
2921 fromLoc = rangeToSearch.location + rangeToSearch.length - (lengthVariants ? 1 : findStrLen);
2922 toLoc = (((compareOptions & kCFCompareAnchored) && !lengthVariants) ? fromLoc : rangeToSearch.location);
2923 } else {
2924 fromLoc = rangeToSearch.location;
2925 toLoc = ((compareOptions & kCFCompareAnchored) ? fromLoc : rangeToSearch.location + rangeToSearch.length - (lengthVariants ? 1 : findStrLen));
2926 }
2927
2928 delta = ((fromLoc <= toLoc) ? 1 : -1);
2929
2930 if ((NULL != str1Bytes) && (NULL != str2Bytes)) {
2931 uint8_t str1Byte, str2Byte;
2932
2933 while (1) {
2934 str1Index = fromLoc;
2935 str2Index = 0;
2936
2937 while ((str1Index < maxStr1Index) && (str2Index < findStrLen)) {
2938 str1Byte = str1Bytes[str1Index];
2939 str2Byte = str2Bytes[str2Index];
2940
2941 if (str1Byte != str2Byte) {
2942 if (equalityOptions) {
2943 if ((str1Byte < 0x80) && ((NULL == langCode) || ('I' != str1Byte))) {
2944 if (caseInsensitive && (str1Byte >= 'A') && (str1Byte <= 'Z')) str1Byte += ('a' - 'A');
2945 *strBuf1 = str1Byte;
2946 strBuf1Len = 1;
2947 } else {
2948 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
2949 strBuf1Len = __CFStringFoldCharacterClusterAtIndex(str1Char, &inlineBuf1, str1Index, compareOptions, langCode, strBuf1, kCFStringStackBufferLength, NULL);
2950 if (1 > strBuf1Len) {
2951 *strBuf1 = str1Char;
2952 strBuf1Len = 1;
2953 }
2954 }
2955
2956 if ((NULL != ignoredChars) && (forwardAnchor || (str1Index != fromLoc)) && CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, ((str1Byte < 0x80) ? str1Byte : str1Char))) {
2957 ++str1Index;
2958 continue;
2959 }
2960
2961 if ((str2Byte < 0x80) && ((NULL == langCode) || ('I' != str2Byte))) {
2962 if (caseInsensitive && (str2Byte >= 'A') && (str2Byte <= 'Z')) str2Byte += ('a' - 'A');
2963 *strBuf2 = str2Byte;
2964 strBuf2Len = 1;
2965 } else {
2966 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
2967 strBuf2Len = __CFStringFoldCharacterClusterAtIndex(str2Char, &inlineBuf2, str2Index, compareOptions, langCode, strBuf2, kCFStringStackBufferLength, NULL);
2968 if (1 > strBuf2Len) {
2969 *strBuf2 = str2Char;
2970 strBuf2Len = 1;
2971 }
2972 }
2973
2974 if ((NULL != ignoredChars) && CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, ((str2Byte < 0x80) ? str2Byte : str2Char))) {
2975 ++str2Index;
2976 continue;
2977 }
2978
2979 if ((1 == strBuf1Len) && (1 == strBuf2Len)) { // normal case
2980 if (*strBuf1 != *strBuf2) break;
2981 } else {
2982 CFIndex delta;
2983
2984 if (!caseInsensitive && (strBuf1Len != strBuf2Len)) break;
2985 if (memcmp(strBuf1, strBuf2, sizeof(UTF32Char) * __CFMin(strBuf1Len, strBuf2Len))) break;
2986
2987 if (strBuf1Len < strBuf2Len) {
2988 delta = strBuf2Len - strBuf1Len;
2989
2990 if ((str1Index + strBuf1Len + delta) > maxStr1Index) break;
2991
2992 characters = &(strBuf2[strBuf1Len]);
2993 charactersLimit = characters + delta;
2994
2995 while (characters < charactersLimit) {
2996 strBuf1Len = __CFStringFoldCharacterClusterAtIndex(CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1), &inlineBuf1, str1Index + 1, compareOptions, langCode, strBuf1, kCFStringStackBufferLength, NULL);
2997 if ((strBuf1Len > 0) || (*characters != *strBuf1)) break;
2998 ++characters; ++str1Index;
2999 }
3000 if (characters < charactersLimit) break;
3001 } else if (strBuf2Len < strBuf1Len) {
3002 delta = strBuf1Len - strBuf2Len;
3003
3004 if ((str2Index + strBuf2Len + delta) > findStrLen) break;
3005
3006 characters = &(strBuf1[strBuf2Len]);
3007 charactersLimit = characters + delta;
3008
3009 while (characters < charactersLimit) {
3010 strBuf2Len = __CFStringFoldCharacterClusterAtIndex(CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str1Index + 1), &inlineBuf2, str2Index + 1, compareOptions, langCode, strBuf2, kCFStringStackBufferLength, NULL);
3011 if ((strBuf2Len > 0) || (*characters != *strBuf2)) break;
3012 ++characters; ++str2Index;
3013 }
3014 if (characters < charactersLimit) break;
3015 }
3016 }
3017 } else {
3018 break;
3019 }
3020 }
3021 ++str1Index; ++str2Index;
3022 }
3023
3024 if ((NULL != ignoredChars) && (str1Index == maxStr1Index) && (str2Index < findStrLen)) { // Process the stringToFind tail
3025 while (str2Index < findStrLen) {
3026 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
3027
3028 if (!CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char)) break;
3029 ++str2Index;
3030 }
3031 }
3032
3033 if (str2Index == findStrLen) {
3034 if ((NULL != ignoredChars) && backwardAnchor && (str1Index < maxStr1Index)) { // Process the anchor tail
3035 while (str1Index < maxStr1Index) {
3036 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
3037
3038 if (!CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char)) break;
3039 ++str1Index;
3040 }
3041 }
3042
3043 if (!backwardAnchor || (str1Index == maxStr1Index)) {
3044 didFind = true;
3045 if (NULL != result) *result = CFRangeMake(fromLoc, str1Index - fromLoc);
3046 }
3047 break;
3048 }
3049
3050 if (fromLoc == toLoc) break;
3051 fromLoc += delta;
3052 }
3053 } else if (equalityOptions) {
3054 UTF16Char otherChar;
3055 CFIndex str1UsedLen, str2UsedLen, strBuf1Index = 0, strBuf2Index = 0;
3056 bool diacriticsInsensitive = ((compareOptions & kCFCompareDiacriticInsensitive) ? true : false);
3057 const uint8_t *graphemeBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, 0);
3058 const uint8_t *combClassBMP = (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0);
3059
3060 while (1) {
3061 str1Index = fromLoc;
3062 str2Index = 0;
3063
3064 strBuf1Len = strBuf2Len = 0;
3065
3066 while (str2Index < findStrLen) {
3067 if (strBuf1Len == 0) {
3068 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
3069 if (caseInsensitive && (str1Char >= 'A') && (str1Char <= 'Z') && ((NULL == langCode) || (str1Char != 'I'))) str1Char += ('a' - 'A');
3070 str1UsedLen = 1;
3071 } else {
3072 str1Char = strBuf1[strBuf1Index++];
3073 }
3074 if (strBuf2Len == 0) {
3075 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
3076 if (caseInsensitive && (str2Char >= 'A') && (str2Char <= 'Z') && ((NULL == langCode) || (str2Char != 'I'))) str2Char += ('a' - 'A');
3077 str2UsedLen = 1;
3078 } else {
3079 str2Char = strBuf2[strBuf2Index++];
3080 }
3081
3082 if (str1Char != str2Char) {
3083 if ((str1Char < 0x80) && (str2Char < 0x80) && (NULL == ignoredChars) && ((NULL == langCode) || !caseInsensitive)) break;
3084
3085 if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) {
3086 str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);
3087 str1UsedLen = 2;
3088 }
3089
3090 if (CFUniCharIsSurrogateHighCharacter(str2Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index + 1)))) {
3091 str2Char = CFUniCharGetLongCharacterForSurrogatePair(str2Char, otherChar);
3092 str2UsedLen = 2;
3093 }
3094
3095 if (NULL != ignoredChars) {
3096 if ((forwardAnchor || (str1Index != fromLoc)) && (str1Index < maxStr1Index) && CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char)) {
3097 if ((strBuf1Len > 0) && (strBuf1Index == strBuf1Len)) strBuf1Len = 0;
3098 if (strBuf1Len == 0) str1Index += str1UsedLen;
3099 if (strBuf2Len > 0) --strBuf2Index;
3100 continue;
3101 }
3102 if (CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char)) {
3103 if ((strBuf2Len > 0) && (strBuf2Index == strBuf2Len)) strBuf2Len = 0;
3104 if (strBuf2Len == 0) str2Index += str2UsedLen;
3105 if (strBuf1Len > 0) -- strBuf1Index;
3106 continue;
3107 }
3108 }
3109
3110 if (diacriticsInsensitive && (str1Index > fromLoc)) {
3111 bool str1Skip = false;
3112 bool str2Skip = false;
3113
3114 if ((0 == strBuf1Len) && CFUniCharIsMemberOfBitmap(str1Char, ((str1Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str1Char >> 16))))) {
3115 str1Char = str2Char;
3116 str1Skip = true;
3117 }
3118 if ((0 == strBuf2Len) && CFUniCharIsMemberOfBitmap(str2Char, ((str2Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str2Char >> 16))))) {
3119 str2Char = str1Char;
3120 str2Skip = true;
3121 }
3122
3123 if (str1Skip != str2Skip) {
3124 if (str1Skip) str2Index -= str2UsedLen;
3125 if (str2Skip) str1Index -= str1UsedLen;
3126 }
3127 }
3128
3129 if (str1Char != str2Char) {
3130 if (0 == strBuf1Len) {
3131 strBuf1Len = __CFStringFoldCharacterClusterAtIndex(str1Char, &inlineBuf1, str1Index, compareOptions, langCode, strBuf1, kCFStringStackBufferLength, &str1UsedLen);
3132 if (strBuf1Len > 0) {
3133 str1Char = *strBuf1;
3134 strBuf1Index = 1;
3135 }
3136 }
3137
3138 if ((0 == strBuf1Len) && (0 < strBuf2Len)) break;
3139
3140 if ((0 == strBuf2Len) && ((0 == strBuf1Len) || (str1Char != str2Char))) {
3141 strBuf2Len = __CFStringFoldCharacterClusterAtIndex(str2Char, &inlineBuf2, str2Index, compareOptions, langCode, strBuf2, kCFStringStackBufferLength, &str2UsedLen);
3142 if ((0 == strBuf2Len) || (str1Char != *strBuf2)) break;
3143 strBuf2Index = 1;
3144 }
3145 }
3146
3147 if ((strBuf1Len > 0) && (strBuf2Len > 0)) {
3148 while ((strBuf1Index < strBuf1Len) && (strBuf2Index < strBuf2Len)) {
3149 if (strBuf1[strBuf1Index] != strBuf2[strBuf2Index]) break;
3150 ++strBuf1Index; ++strBuf2Index;
3151 }
3152 if ((strBuf1Index < strBuf1Len) && (strBuf2Index < strBuf2Len)) break;
3153 }
3154 }
3155
3156 if ((strBuf1Len > 0) && (strBuf1Index == strBuf1Len)) strBuf1Len = 0;
3157 if ((strBuf2Len > 0) && (strBuf2Index == strBuf2Len)) strBuf2Len = 0;
3158
3159 if (strBuf1Len == 0) str1Index += str1UsedLen;
3160 if (strBuf2Len == 0) str2Index += str2UsedLen;
3161 }
3162
3163 if ((NULL != ignoredChars) && (str1Index == maxStr1Index) && (str2Index < findStrLen)) { // Process the stringToFind tail
3164 while (str2Index < findStrLen) {
3165 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
3166 if (CFUniCharIsSurrogateHighCharacter(str2Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index + 1)))) {
3167 str2Char = CFUniCharGetLongCharacterForSurrogatePair(str2Char, otherChar);
3168 }
3169 if (!CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char)) break;
3170 str2Index += ((str2Char < 0x10000) ? 1 : 2);
3171 }
3172 }
3173
3174 if (str2Index == findStrLen) {
3175 bool match = true;
3176
3177 if (strBuf1Len > 0) {
3178 match = false;
3179
3180 if (diacriticsInsensitive && (strBuf1[0] < 0x0510)) {
3181 while (strBuf1Index < strBuf1Len) {
3182 if (!CFUniCharIsMemberOfBitmap(strBuf1[strBuf1Index], ((strBuf1[strBuf1Index] < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (strBuf1[strBuf1Index] >> 16))))) break;
3183 ++strBuf1Index;
3184 }
3185
3186 if (strBuf1Index == strBuf1Len) {
3187 str1Index += str1UsedLen;
3188 match = true;
3189 }
3190 }
3191 }
3192
3193 if (match && (compareOptions & (kCFCompareDiacriticInsensitive|kCFCompareNonliteral)) && (str1Index < maxStr1Index)) {
3194 const uint8_t *nonBaseBitmap;
3195
3196 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
3197
3198 if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) {
3199 str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);
3200 nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str1Char >> 16));
3201 } else {
3202 nonBaseBitmap = graphemeBMP;
3203 }
3204
3205 if (CFUniCharIsMemberOfBitmap(str1Char, nonBaseBitmap)) {
3206 if (diacriticsInsensitive) {
3207 if (str1Char < 0x10000) {
3208 CFIndex index = str1Index;
3209
3210 do {
3211 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, --index);
3212 } while (CFUniCharIsMemberOfBitmap(str1Char, graphemeBMP), (rangeToSearch.location < index));
3213
3214 if (str1Char < 0x0510) {
3215 while (++str1Index < maxStr1Index) if (!CFUniCharIsMemberOfBitmap(CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index), graphemeBMP)) break;
3216 }
3217 }
3218 } else {
3219 match = false;
3220 }
3221 } else if (!diacriticsInsensitive) {
3222 otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index - 1);
3223
3224 // this is assuming viramas are only in BMP ???
3225 if ((str1Char == COMBINING_GRAPHEME_JOINER) || (otherChar == COMBINING_GRAPHEME_JOINER) || (otherChar == ZERO_WIDTH_JOINER) || ((otherChar >= HANGUL_CHOSEONG_START) && (otherChar <= HANGUL_JONGSEONG_END)) || (CFUniCharGetCombiningPropertyForCharacter(otherChar, combClassBMP) == 9)) {
3226 CFRange clusterRange = CFStringGetRangeOfCharacterClusterAtIndex(string, str1Index - 1, kCFStringGraphemeCluster);
3227
3228 if (str1Index < (clusterRange.location + clusterRange.length)) match = false;
3229 }
3230 }
3231 }
3232
3233 if (match) {
3234 if ((NULL != ignoredChars) && backwardAnchor && (str1Index < maxStr1Index)) { // Process the anchor tail
3235 while (str1Index < maxStr1Index) {
3236 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
3237 if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) {
3238 str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);
3239 }
3240 if (!CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char)) break;
3241 str1Index += ((str1Char < 0x10000) ? 1 : 2);
3242 }
3243 }
3244
3245 if (!backwardAnchor || (str1Index == maxStr1Index)) {
3246 didFind = true;
3247 if (NULL != result) *result = CFRangeMake(fromLoc, str1Index - fromLoc);
3248 }
3249 break;
3250 }
3251 }
3252
3253 if (fromLoc == toLoc) break;
3254 fromLoc += delta;
3255 }
3256 } else {
3257 while (1) {
3258 str1Index = fromLoc;
3259 str2Index = 0;
3260
3261 while (str2Index < findStrLen) {
3262 if (CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index) != CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index)) break;
3263
3264 ++str1Index; ++str2Index;
3265 }
3266
3267 if (str2Index == findStrLen) {
3268 didFind = true;
3269 if (NULL != result) *result = CFRangeMake(fromLoc, findStrLen);
3270 break;
3271 }
3272
3273 if (fromLoc == toLoc) break;
3274 fromLoc += delta;
3275 }
3276 }
3277 }
3278
3279 return didFind;
3280 }
3281
3282
3283 Boolean CFStringFindWithOptions(CFStringRef string, CFStringRef stringToFind, CFRange rangeToSearch, CFStringCompareFlags compareOptions, CFRange *result) { return CFStringFindWithOptionsAndLocale(string, stringToFind, rangeToSearch, compareOptions, NULL, result); }
3284
3285 // Functions to deal with special arrays of CFRange, CFDataRef, created by CFStringCreateArrayWithFindResults()
3286
3287 static const void *__rangeRetain(CFAllocatorRef allocator, const void *ptr) {
3288 CFRetain(*(CFDataRef *)((uint8_t *)ptr + sizeof(CFRange)));
3289 return ptr;
3290 }
3291
3292 static void __rangeRelease(CFAllocatorRef allocator, const void *ptr) {
3293 CFRelease(*(CFDataRef *)((uint8_t *)ptr + sizeof(CFRange)));
3294 }
3295
3296 static CFStringRef __rangeCopyDescription(const void *ptr) {
3297 CFRange range = *(CFRange *)ptr;
3298 return CFStringCreateWithFormat(kCFAllocatorSystemDefault, NULL, CFSTR("{%ld, %ld}"), (long)range.location, (long)range.length);
3299 }
3300
3301 static Boolean __rangeEqual(const void *ptr1, const void *ptr2) {
3302 CFRange range1 = *(CFRange *)ptr1;
3303 CFRange range2 = *(CFRange *)ptr2;
3304 return (range1.location == range2.location) && (range1.length == range2.length);
3305 }
3306
3307
3308 CFArrayRef CFStringCreateArrayWithFindResults(CFAllocatorRef alloc, CFStringRef string, CFStringRef stringToFind, CFRange rangeToSearch, CFStringCompareFlags compareOptions) {
3309 CFRange foundRange;
3310 Boolean backwards = ((compareOptions & kCFCompareBackwards) != 0);
3311 UInt32 endIndex = rangeToSearch.location + rangeToSearch.length;
3312 CFMutableDataRef rangeStorage = NULL; // Basically an array of CFRange, CFDataRef (packed)
3313 uint8_t *rangeStorageBytes = NULL;
3314 CFIndex foundCount = 0;
3315 CFIndex capacity = 0; // Number of CFRange, CFDataRef element slots in rangeStorage
3316
3317 if (alloc == NULL) alloc = __CFGetDefaultAllocator();
3318
3319 while ((rangeToSearch.length > 0) && CFStringFindWithOptions(string, stringToFind, rangeToSearch, compareOptions, &foundRange)) {
3320 // Determine the next range
3321 if (backwards) {
3322 rangeToSearch.length = foundRange.location - rangeToSearch.location;
3323 } else {
3324 rangeToSearch.location = foundRange.location + foundRange.length;
3325 rangeToSearch.length = endIndex - rangeToSearch.location;
3326 }
3327
3328 // If necessary, grow the data and squirrel away the found range
3329 if (foundCount >= capacity) {
3330 if (rangeStorage == NULL) rangeStorage = CFDataCreateMutable(alloc, 0);
3331 capacity = (capacity + 4) * 2;
3332 CFDataSetLength(rangeStorage, capacity * (sizeof(CFRange) + sizeof(CFDataRef)));
3333 rangeStorageBytes = (uint8_t *)CFDataGetMutableBytePtr(rangeStorage) + foundCount * (sizeof(CFRange) + sizeof(CFDataRef));
3334 }
3335 memmove(rangeStorageBytes, &foundRange, sizeof(CFRange)); // The range
3336 memmove(rangeStorageBytes + sizeof(CFRange), &rangeStorage, sizeof(CFDataRef)); // The data
3337 rangeStorageBytes += (sizeof(CFRange) + sizeof(CFDataRef));
3338 foundCount++;
3339 }
3340
3341 if (foundCount > 0) {
3342 CFIndex cnt;
3343 CFMutableArrayRef array;
3344 const CFArrayCallBacks callbacks = {0, __rangeRetain, __rangeRelease, __rangeCopyDescription, __rangeEqual};
3345
3346 CFDataSetLength(rangeStorage, foundCount * (sizeof(CFRange) + sizeof(CFDataRef))); // Tighten storage up
3347 rangeStorageBytes = (uint8_t *)CFDataGetMutableBytePtr(rangeStorage);
3348
3349 array = CFArrayCreateMutable(alloc, foundCount * sizeof(CFRange *), &callbacks);
3350 for (cnt = 0; cnt < foundCount; cnt++) {
3351 // Each element points to the appropriate CFRange in the CFData
3352 CFArrayAppendValue(array, rangeStorageBytes + cnt * (sizeof(CFRange) + sizeof(CFDataRef)));
3353 }
3354 CFRelease(rangeStorage); // We want the data to go away when all CFRanges inside it are released...
3355 return array;
3356 } else {
3357 return NULL;
3358 }
3359 }
3360
3361
3362 CFRange CFStringFind(CFStringRef string, CFStringRef stringToFind, CFStringCompareFlags compareOptions) {
3363 CFRange foundRange;
3364
3365 if (CFStringFindWithOptions(string, stringToFind, CFRangeMake(0, CFStringGetLength(string)), compareOptions, &foundRange)) {
3366 return foundRange;
3367 } else {
3368 return CFRangeMake(kCFNotFound, 0);
3369 }
3370 }
3371
3372 Boolean CFStringHasPrefix(CFStringRef string, CFStringRef prefix) {
3373 return CFStringFindWithOptions(string, prefix, CFRangeMake(0, CFStringGetLength(string)), kCFCompareAnchored, NULL);
3374 }
3375
3376 Boolean CFStringHasSuffix(CFStringRef string, CFStringRef suffix) {
3377 return CFStringFindWithOptions(string, suffix, CFRangeMake(0, CFStringGetLength(string)), kCFCompareAnchored|kCFCompareBackwards, NULL);
3378 }
3379
3380 #define MAX_TRANSCODING_LENGTH 4
3381
3382 #define HANGUL_JONGSEONG_COUNT (28)
3383
3384 CF_INLINE bool _CFStringIsHangulLVT(UTF32Char character) {
3385 return (((character - HANGUL_SYLLABLE_START) % HANGUL_JONGSEONG_COUNT) ? true : false);
3386 }
3387
3388 static uint8_t __CFTranscodingHintLength[] = {
3389 2, 3, 4, 4, 4, 4, 4, 2, 2, 2, 2, 4, 0, 0, 0, 0
3390 };
3391
3392 enum {
3393 kCFStringHangulStateL,
3394 kCFStringHangulStateV,
3395 kCFStringHangulStateT,
3396 kCFStringHangulStateLV,
3397 kCFStringHangulStateLVT,
3398 kCFStringHangulStateBreak
3399 };
3400
3401 static const CFCharacterSetInlineBuffer *__CFStringGetFitzpatrickModifierBaseCharacterSet(void) {
3402 static CFCharacterSetInlineBuffer buffer;
3403 static dispatch_once_t initOnce;
3404 dispatch_once(&initOnce, ^{ // based on UTR#51 1.0 (draft 7) for Unicode 8.0
3405 /*
3406 U+261D WHITE UP POINTING INDEX
3407 U+2639 WHITE FROWNING FACE…U+263A WHITE SMILING FACE
3408 U+270A RAISED FIST…U+270D WRITING HAND
3409 U+1F385 FATHER CHRISTMAS
3410 U+1F3C2 SNOWBOARDER…U+1F3C4 SURFER
3411 U+1F3C7 HORSE RACING
3412 U+1F3CA SWIMMER
3413 U+1F442 EAR…U+1F443 NOSE
3414 U+1F446 WHITE UP POINTING BACKHAND INDEX…U+1F450 OPEN HANDS SIGN
3415 U+1F466 BOY…U+1F469 WOMAN
3416 U+1F46E POLICE OFFICER…U+1F478 PRINCESS
3417 U+1F47C BABY ANGEL
3418 U+1F47F IMP
3419 U+1F481 INFORMATION DESK PERSON…U+1F482 GUARDSMAN
3420 U+1F483 DANCER
3421 U+1F485 NAIL POLISH
3422 U+1F486 FACE MASSAGE…U+1F487 HAIRCUT
3423 U+1F4AA FLEXED BICEPS
3424 U+1F590 RAISED HAND WITH FINGERS SPLAYED
3425 U+1F595 REVERSED HAND WITH MIDDLE FINGER EXTENDED…U+1F596 RAISED HAND WITH PART BETWEEN MIDDLE AND RING FINGERS
3426 U+1F600 GRINNING FACE…U+1F637 FACE WITH MEDICAL MASK
3427 U+1F641 SLIGHTLY FROWNING FACE…U+1F642 SLIGHTLY SMILING FACE
3428 U+1F645 FACE WITH NO GOOD GESTURE…U+1F647 PERSON BOWING DEEPLY
3429 U+1F64B HAPPY PERSON RAISING ONE HAND
3430 U+1F64C PERSON RAISING BOTH HANDS IN CELEBRATION
3431 U+1F64D PERSON FROWNING…U+1F64E PERSON WITH POUTING FACE
3432 U+1F64F PERSON WITH FOLDED HANDS
3433 U+1F6A3 ROWBOAT
3434 U+1F6B4 BICYCLIST…U+1F6B6 PEDESTRIAN
3435 U+1F6C0 BATH
3436 */
3437 CFMutableCharacterSetRef cset = CFCharacterSetCreateMutable(NULL);
3438 CFCharacterSetAddCharactersInRange(cset, CFRangeMake(0x261D, 1)); // WHITE UP POINTING INDEX
3439 CFCharacterSetAddCharactersInRange(cset, CFRangeMake(0x2639, 2)); // WHITE FROWNING FACE ~ WHITE SMILING FACE
3440 CFCharacterSetAddCharactersInRange(cset, CFRangeMake(0x270A, 4)); // RAISED FIST ~ WRITING HAND
3441 CFCharacterSetAddCharactersInRange(cset, CFRangeMake(0x1F385, 1)); // FATHER CHRISTMAS
3442 CFCharacterSetAddCharactersInRange(cset, CFRangeMake(0x1F3C2, 3)); // SNOWBOARDER ~ SURFER
3443 CFCharacterSetAddCharactersInRange(cset, CFRangeMake(0x1F3C7, 1)); // HORSE RACING
3444 CFCharacterSetAddCharactersInRange(cset, CFRangeMake(0x1F3CA, 1)); // SWIMMER
3445 CFCharacterSetAddCharactersInRange(cset, CFRangeMake(0x1F442, 2)); // EAR ~ NOSE
3446 CFCharacterSetAddCharactersInRange(cset, CFRangeMake(0x1F446, 0x1F451 - 0x1F446)); // WHITE UP POINTING BACKHAND INDEX ~ OPEN HANDS SIGN
3447 CFCharacterSetAddCharactersInRange(cset, CFRangeMake(0x1F466, 4)); // BOY ~ WOMAN
3448 CFCharacterSetAddCharactersInRange(cset, CFRangeMake(0x1F46E, 0x1F479 - 0x1F46E)); // POLICE OFFICER ~ PRINCESS
3449 CFCharacterSetAddCharactersInRange(cset, CFRangeMake(0x1F47C, 1)); // BABY ANGEL
3450 CFCharacterSetAddCharactersInRange(cset, CFRangeMake(0x1F47F, 1)); // IMP
3451 CFCharacterSetAddCharactersInRange(cset, CFRangeMake(0x1F481, 3)); // INFORMATION DESK PERSON ~ DANCER
3452 CFCharacterSetAddCharactersInRange(cset, CFRangeMake(0x1F485, 3)); // NAIL POLISH ~ HAIRCUT
3453 CFCharacterSetAddCharactersInRange(cset, CFRangeMake(0x1F4AA, 1)); // FLEXED BICEPS
3454 CFCharacterSetAddCharactersInRange(cset, CFRangeMake(0x1F590, 1)); // RAISED HAND WITH FINGERS SPLAYED
3455 CFCharacterSetAddCharactersInRange(cset, CFRangeMake(0x1F595, 2)); // REVERSED HAND WITH MIDDLE FINGER EXTENDED ~ RAISED HAND WITH PART BETWEEN MIDDLE AND RING FINGERS
3456 CFCharacterSetAddCharactersInRange(cset, CFRangeMake(0x1F600, 0x1F638 - 0x1F600)); // GRINNING FACE ~ FACE WITH MEDICAL MASK
3457 CFCharacterSetAddCharactersInRange(cset, CFRangeMake(0x1F641, 2)); // SLIGHTLY FROWNING FACE ~ SLIGHTLY SMILING FACE
3458
3459 CFCharacterSetAddCharactersInRange(cset, CFRangeMake(0x1F645, 3)); // FACE WITH NO GOOD GESTURE ~ PERSON BOWING DEEPLY
3460 CFCharacterSetAddCharactersInRange(cset, CFRangeMake(0x1F64B, 0x1F650 - 0x1F64B)); // HAPPY PERSON RAISING ONE HAND ~ PERSON WITH FOLDED HANDS
3461 CFCharacterSetAddCharactersInRange(cset, CFRangeMake(0x1F6A3, 1)); // ROWBOAT
3462 CFCharacterSetAddCharactersInRange(cset, CFRangeMake(0x1F6B4, 0x1F6B7 - 0x1F6B4)); // BICYCLIST ~ PEDESTRIAN
3463 CFCharacterSetAddCharactersInRange(cset, CFRangeMake(0x1F6C0, 1)); // BATH
3464 CFCharacterSetCompact(cset);
3465 CFCharacterSetInitInlineBuffer(cset, &buffer);
3466 });
3467
3468 return (const CFCharacterSetInlineBuffer *)&buffer;
3469 }
3470
3471 static inline bool __CFStringIsFitzpatrickModifiers(UTF32Char character) { return ((character >= 0x1F3FB) && (character <= 0x1F3FF) ? true : false); }
3472 static inline bool __CFStringIsBaseForFitzpatrickModifiers(UTF32Char character) {
3473 if (((character >= 0x2600) && (character < 0x27C0)) || ((character >= 0x1F300) && (character < 0x1F700))) { // Misc symbols, dingbats, & emoticons
3474 return (CFCharacterSetInlineBufferIsLongCharacterMember(__CFStringGetFitzpatrickModifierBaseCharacterSet(), character) ? true : false);
3475 }
3476
3477 return false;
3478 }
3479
3480 static inline bool __CFStringIsFamilySequenceBaseCharacterHigh(UTF16Char character) { return (character == 0xD83D) ? true : false; }
3481 static inline bool __CFStringIsFamilySequenceBaseCharacterLow(UTF16Char character) { return (((character >= 0xDC66) && (character <= 0xDC69)) || (character == 0xDC8B) ? true : false); }
3482 static inline bool __CFStringIsFamilySequenceCluster(CFStringInlineBuffer *buffer, CFRange range) {
3483 UTF16Char character = CFStringGetCharacterFromInlineBuffer(buffer, range.location);
3484
3485 if (character == 0x2764) { // HEART
3486 return true;
3487 } else if (range.length > 1) {
3488 if (__CFStringIsFamilySequenceBaseCharacterHigh(character) && __CFStringIsFamilySequenceBaseCharacterLow(CFStringGetCharacterFromInlineBuffer(buffer, range.location + 1))) return true;
3489 }
3490 return false;
3491 }
3492
3493 static CFRange _CFStringInlineBufferGetComposedRange(CFStringInlineBuffer *buffer, CFIndex start, CFStringCharacterClusterType type, const uint8_t *bmpBitmap, CFIndex csetType) {
3494 CFIndex end = start + 1;
3495 const uint8_t *bitmap = bmpBitmap;
3496 UTF32Char character;
3497 UTF16Char otherSurrogate;
3498 uint8_t step;
3499
3500 character = CFStringGetCharacterFromInlineBuffer(buffer, start);
3501
3502 // We don't combine characters in Armenian ~ Limbu range for backward deletion
3503 if ((type != kCFStringBackwardDeletionCluster) || (character < 0x0530) || (character > 0x194F)) {
3504 // Check if the current is surrogate
3505 if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, start + 1)))) {
3506 ++end;
3507 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
3508 bitmap = CFUniCharGetBitmapPtrForPlane(csetType, (character >> 16));
3509 }
3510
3511 // Extend backward
3512 while (start > 0) {
3513 if ((type == kCFStringBackwardDeletionCluster) && (character >= 0x0530) && (character < 0x1950)) break;
3514
3515 if (character < 0x10000) { // the first round could be already be non-BMP
3516 if (CFUniCharIsSurrogateLowCharacter(character) && CFUniCharIsSurrogateHighCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, start - 1)))) {
3517 character = CFUniCharGetLongCharacterForSurrogatePair(otherSurrogate, character);
3518 bitmap = CFUniCharGetBitmapPtrForPlane(csetType, (character >> 16));
3519 if (--start == 0) break; // starting with non-BMP combining mark
3520 } else {
3521 bitmap = bmpBitmap;
3522 }
3523 }
3524
3525 if (__CFStringIsFitzpatrickModifiers(character) && (start > 0)) {
3526 UTF32Char baseCharacter = CFStringGetCharacterFromInlineBuffer(buffer, start - 1);
3527
3528 if (CFUniCharIsSurrogateLowCharacter(baseCharacter) && ((start - 1) > 0)) {
3529 UTF16Char otherCharacter = CFStringGetCharacterFromInlineBuffer(buffer, start - 2);
3530
3531 if (CFUniCharIsSurrogateHighCharacter(otherCharacter)) baseCharacter = CFUniCharGetLongCharacterForSurrogatePair(otherCharacter, baseCharacter);
3532 }
3533
3534 if (!__CFStringIsBaseForFitzpatrickModifiers(baseCharacter)) break;
3535 } else {
3536 if (!CFUniCharIsMemberOfBitmap(character, bitmap) && (character != 0xFF9E) && (character != 0xFF9F) && ((character & 0x1FFFF0) != 0xF870)) break;
3537 }
3538
3539 --start;
3540
3541 character = CFStringGetCharacterFromInlineBuffer(buffer, start);
3542 }
3543 }
3544
3545 // Hangul
3546 if (((character >= HANGUL_CHOSEONG_START) && (character <= HANGUL_JONGSEONG_END)) || ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END))) {
3547 uint8_t state;
3548 uint8_t initialState;
3549
3550 if (character < HANGUL_JUNGSEONG_START) {
3551 state = kCFStringHangulStateL;
3552 } else if (character < HANGUL_JONGSEONG_START) {
3553 state = kCFStringHangulStateV;
3554 } else if (character < HANGUL_SYLLABLE_START) {
3555 state = kCFStringHangulStateT;
3556 } else {
3557 state = (_CFStringIsHangulLVT(character) ? kCFStringHangulStateLVT : kCFStringHangulStateLV);
3558 }
3559 initialState = state;
3560
3561 // Extend backward
3562 while (((character = CFStringGetCharacterFromInlineBuffer(buffer, start - 1)) >= HANGUL_CHOSEONG_START) && (character <= HANGUL_SYLLABLE_END) && ((character <= HANGUL_JONGSEONG_END) || (character >= HANGUL_SYLLABLE_START))) {
3563 switch (state) {
3564 case kCFStringHangulStateV:
3565 if (character <= HANGUL_CHOSEONG_END) {
3566 state = kCFStringHangulStateL;
3567 } else if ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END) && !_CFStringIsHangulLVT(character)) {
3568 state = kCFStringHangulStateLV;
3569 } else if (character > HANGUL_JUNGSEONG_END) {
3570 state = kCFStringHangulStateBreak;
3571 }
3572 break;
3573
3574 case kCFStringHangulStateT:
3575 if ((character >= HANGUL_JUNGSEONG_START) && (character <= HANGUL_JUNGSEONG_END)) {
3576 state = kCFStringHangulStateV;
3577 } else if ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END)) {
3578 state = (_CFStringIsHangulLVT(character) ? kCFStringHangulStateLVT : kCFStringHangulStateLV);
3579 } else if (character < HANGUL_JUNGSEONG_START) {
3580 state = kCFStringHangulStateBreak;
3581 }
3582 break;
3583
3584 default:
3585 state = ((character < HANGUL_JUNGSEONG_START) ? kCFStringHangulStateL : kCFStringHangulStateBreak);
3586 break;
3587 }
3588
3589 if (state == kCFStringHangulStateBreak) break;
3590 --start;
3591 }
3592
3593 // Extend forward
3594 state = initialState;
3595 while (((character = CFStringGetCharacterFromInlineBuffer(buffer, end)) > 0) && (((character >= HANGUL_CHOSEONG_START) && (character <= HANGUL_JONGSEONG_END)) || ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END)))) {
3596 switch (state) {
3597 case kCFStringHangulStateLV:
3598 case kCFStringHangulStateV:
3599 if ((character >= HANGUL_JUNGSEONG_START) && (character <= HANGUL_JONGSEONG_END)) {
3600 state = ((character < HANGUL_JONGSEONG_START) ? kCFStringHangulStateV : kCFStringHangulStateT);
3601 } else {
3602 state = kCFStringHangulStateBreak;
3603 }
3604 break;
3605
3606 case kCFStringHangulStateLVT:
3607 case kCFStringHangulStateT:
3608 state = (((character >= HANGUL_JONGSEONG_START) && (character <= HANGUL_JONGSEONG_END)) ? kCFStringHangulStateT : kCFStringHangulStateBreak);
3609 break;
3610
3611 default:
3612 if (character < HANGUL_JUNGSEONG_START) {
3613 state = kCFStringHangulStateL;
3614 } else if (character < HANGUL_JONGSEONG_START) {
3615 state = kCFStringHangulStateV;
3616 } else if (character >= HANGUL_SYLLABLE_START) {
3617 state = (_CFStringIsHangulLVT(character) ? kCFStringHangulStateLVT : kCFStringHangulStateLV);
3618 } else {
3619 state = kCFStringHangulStateBreak;
3620 }
3621 break;
3622 }
3623
3624 if (state == kCFStringHangulStateBreak) break;
3625 ++end;
3626 }
3627 }
3628
3629 bool prevIsFitzpatrickBase = __CFStringIsBaseForFitzpatrickModifiers(character);
3630
3631 // Extend forward
3632 while ((character = CFStringGetCharacterFromInlineBuffer(buffer, end)) > 0) {
3633 if ((type == kCFStringBackwardDeletionCluster) && (character >= 0x0530) && (character < 0x1950)) break;
3634
3635 if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, end + 1)))) {
3636 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
3637 bitmap = CFUniCharGetBitmapPtrForPlane(csetType, (character >> 16));
3638 step = 2;
3639 } else {
3640 bitmap = bmpBitmap;
3641 step = 1;
3642 }
3643
3644 if ((!prevIsFitzpatrickBase || !__CFStringIsFitzpatrickModifiers(character)) && !CFUniCharIsMemberOfBitmap(character, bitmap) && (character != 0xFF9E) && (character != 0xFF9F) && ((character & 0x1FFFF0) != 0xF870)) break;
3645
3646 prevIsFitzpatrickBase = __CFStringIsBaseForFitzpatrickModifiers(character);
3647
3648 end += step;
3649 }
3650
3651 return CFRangeMake(start, end - start);
3652 }
3653
3654 CF_INLINE bool _CFStringIsVirama(UTF32Char character, const uint8_t *combClassBMP) {
3655 return ((character == COMBINING_GRAPHEME_JOINER) || (CFUniCharGetCombiningPropertyForCharacter(character, (const uint8_t *)((character < 0x10000) ? combClassBMP : CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (character >> 16)))) == 9) ? true : false);
3656 }
3657
3658 CFRange CFStringGetRangeOfCharacterClusterAtIndex(CFStringRef string, CFIndex charIndex, CFStringCharacterClusterType type) {
3659 CFRange range;
3660 CFIndex currentIndex;
3661 CFIndex length = CFStringGetLength(string);
3662 CFIndex csetType = ((kCFStringGraphemeCluster == type) ? kCFUniCharGraphemeExtendCharacterSet : kCFUniCharNonBaseCharacterSet);
3663 CFStringInlineBuffer stringBuffer;
3664 const uint8_t *bmpBitmap;
3665 const uint8_t *letterBMP;
3666 static const uint8_t *combClassBMP = NULL;
3667 UTF32Char character;
3668 UTF16Char otherSurrogate;
3669
3670 if (charIndex >= length) return CFRangeMake(kCFNotFound, 0);
3671
3672 /* Fast case. If we're eight-bit, it's either the default encoding is cheap or the content is all ASCII. Watch out when (or if) adding more 8bit Mac-scripts in CFStringEncodingConverters
3673 */
3674 if (!CF_IS_OBJC(__kCFStringTypeID, string) && __CFStrIsEightBit(string)) return CFRangeMake(charIndex, 1);
3675
3676 bmpBitmap = CFUniCharGetBitmapPtrForPlane(csetType, 0);
3677 letterBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, 0);
3678 if (NULL == combClassBMP) combClassBMP = (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0);
3679
3680 CFStringInitInlineBuffer(string, &stringBuffer, CFRangeMake(0, length));
3681
3682 // Get composed character sequence first
3683 range = _CFStringInlineBufferGetComposedRange(&stringBuffer, charIndex, type, bmpBitmap, csetType);
3684
3685 // Do grapheme joiners
3686 if (type < kCFStringCursorMovementCluster) {
3687 const uint8_t *letter = letterBMP;
3688
3689 // Check to see if we have a letter at the beginning of initial cluster
3690 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, range.location);
3691
3692 if ((range.length > 1) && CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(&stringBuffer, range.location + 1)))) {
3693 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
3694 letter = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, (character >> 16));
3695 }
3696
3697 if ((character == ZERO_WIDTH_JOINER) || CFUniCharIsMemberOfBitmap(character, letter)) {
3698 CFRange otherRange;
3699
3700 // Check if preceded by grapheme joiners (U034F and viramas)
3701 otherRange.location = currentIndex = range.location;
3702
3703 while (currentIndex > 1) {
3704 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, --currentIndex);
3705
3706 // ??? We're assuming viramas only in BMP
3707 if ((_CFStringIsVirama(character, combClassBMP) || ((character == ZERO_WIDTH_JOINER) && _CFStringIsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer, --currentIndex), combClassBMP))) && (currentIndex > 0)) {
3708 --currentIndex;
3709 } else {
3710 break;
3711 }
3712
3713 currentIndex = _CFStringInlineBufferGetComposedRange(&stringBuffer, currentIndex, type, bmpBitmap, csetType).location;
3714
3715 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex);
3716
3717 if (CFUniCharIsSurrogateLowCharacter(character) && CFUniCharIsSurrogateHighCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex - 1)))) {
3718 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
3719 letter = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, (character >> 16));
3720 --currentIndex;
3721 } else {
3722 letter = letterBMP;
3723 }
3724
3725 if (!CFUniCharIsMemberOfBitmap(character, letter)) break;
3726 range.location = currentIndex;
3727 }
3728
3729 range.length += otherRange.location - range.location;
3730
3731 // Check if followed by grapheme joiners
3732 if ((range.length > 1) && ((range.location + range.length) < length)) {
3733 otherRange = range;
3734 currentIndex = otherRange.location + otherRange.length;
3735
3736 do {
3737 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex - 1);
3738
3739 // ??? We're assuming viramas only in BMP
3740 if ((character != ZERO_WIDTH_JOINER) && !_CFStringIsVirama(character, combClassBMP)) break;
3741
3742 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex);
3743
3744 if (character == ZERO_WIDTH_JOINER) character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, ++currentIndex);
3745
3746 if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex + 1)))) {
3747 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
3748 letter = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, (character >> 16));
3749 } else {
3750 letter = letterBMP;
3751 }
3752
3753 // We only conjoin letters
3754 if (!CFUniCharIsMemberOfBitmap(character, letter)) break;
3755 otherRange = _CFStringInlineBufferGetComposedRange(&stringBuffer, currentIndex, type, bmpBitmap, csetType);
3756 currentIndex = otherRange.location + otherRange.length;
3757 } while ((otherRange.location + otherRange.length) < length);
3758 range.length = currentIndex - range.location;
3759 }
3760 }
3761 }
3762
3763 // Check if we're part of prefix transcoding hints
3764 CFIndex otherIndex;
3765
3766 currentIndex = (range.location + range.length) - (MAX_TRANSCODING_LENGTH + 1);
3767 if (currentIndex < 0) currentIndex = 0;
3768
3769 while (currentIndex <= range.location) {
3770 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex);
3771
3772 if ((character & 0x1FFFF0) == 0xF860) { // transcoding hint
3773 otherIndex = currentIndex + __CFTranscodingHintLength[(character - 0xF860)] + 1;
3774 if (otherIndex >= (range.location + range.length)) {
3775 if (otherIndex <= length) {
3776 range.location = currentIndex;
3777 range.length = otherIndex - currentIndex;
3778 }
3779 break;
3780 }
3781 }
3782 ++currentIndex;
3783 }
3784
3785 // Family face sequence
3786 CFRange aCluster;
3787
3788 if (range.location > 1) { // there are more than 2 chars
3789 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, range.location);
3790
3791 if (__CFStringIsFamilySequenceCluster(&stringBuffer, range) || (character == ZERO_WIDTH_JOINER)) { // extend backward
3792 currentIndex = (character == ZERO_WIDTH_JOINER) ? range.location + 1 : range.location;
3793
3794 while ((currentIndex > 1) && (ZERO_WIDTH_JOINER == CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex - 1))) {
3795 aCluster = _CFStringInlineBufferGetComposedRange(&stringBuffer, currentIndex - 2, type, bmpBitmap, csetType);
3796
3797 if (__CFStringIsFamilySequenceCluster(&stringBuffer, aCluster) && (aCluster.location < range.location)) {
3798 currentIndex = aCluster.location;
3799 } else {
3800 break;
3801 }
3802 }
3803
3804 if (currentIndex < range.location) {
3805 range.length += range.location - currentIndex;
3806 range.location = currentIndex;
3807 }
3808 }
3809 }
3810
3811 // Extend forward
3812 if (range.location + range.length < length) {
3813 currentIndex = range.location + range.length - 1;
3814 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex);
3815
3816 if ((ZERO_WIDTH_JOINER == character) || __CFStringIsFamilySequenceCluster(&stringBuffer, _CFStringInlineBufferGetComposedRange(&stringBuffer, currentIndex, type, bmpBitmap, csetType))) {
3817
3818 if (ZERO_WIDTH_JOINER != character) ++currentIndex; // move to the end of cluster
3819
3820 while (((currentIndex + 1) < length) && (ZERO_WIDTH_JOINER == CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex))) {
3821 aCluster = _CFStringInlineBufferGetComposedRange(&stringBuffer, currentIndex + 1, type, bmpBitmap, csetType);
3822 if (__CFStringIsFamilySequenceCluster(&stringBuffer, aCluster)) {
3823 currentIndex = aCluster.location + aCluster.length;
3824 if ((aCluster.length > 1) && (ZERO_WIDTH_JOINER == CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex - 1))) --currentIndex;
3825 } else {
3826 break;
3827 }
3828 }
3829 if (currentIndex > (range.location + range.length)) range.length = currentIndex - range.location;
3830 }
3831 }
3832 return range;
3833 }
3834
3835 CFRange CFStringGetRangeOfComposedCharactersAtIndex(CFStringRef theString, CFIndex theIndex) {
3836 return CFStringGetRangeOfCharacterClusterAtIndex(theString, theIndex, kCFStringComposedCharacterCluster);
3837 }
3838
3839 /*!
3840 @function CFStringFindCharacterFromSet
3841 Query the range of characters contained in the specified character set.
3842 @param theString The CFString which is to be searched. If this
3843 parameter is not a valid CFString, the behavior is
3844 undefined.
3845 @param theSet The CFCharacterSet against which the membership
3846 of characters is checked. If this parameter is not a valid
3847 CFCharacterSet, the behavior is undefined.
3848 @param range The range of characters within the string to search. If
3849 the range location or end point (defined by the location
3850 plus length minus 1) are outside the index space of the
3851 string (0 to N-1 inclusive, where N is the length of the
3852 string), the behavior is undefined. If the range length is
3853 negative, the behavior is undefined. The range may be empty
3854 (length 0), in which case no search is performed.
3855 @param searchOptions The bitwise-or'ed option flags to control
3856 the search behavior. The supported options are
3857 kCFCompareBackwards andkCFCompareAnchored.
3858 If other option flags are specified, the behavior
3859 is undefined.
3860 @param result The pointer to a CFRange supplied by the caller in
3861 which the search result is stored. If a pointer to an invalid
3862 memory is specified, the behavior is undefined.
3863 @result true, if at least a character which is a member of the character
3864 set is found and result is filled, otherwise, false.
3865 */
3866 #define SURROGATE_START 0xD800
3867 #define SURROGATE_END 0xDFFF
3868
3869 CF_EXPORT Boolean CFStringFindCharacterFromSet(CFStringRef theString, CFCharacterSetRef theSet, CFRange rangeToSearch, CFStringCompareFlags searchOptions, CFRange *result) {
3870 CFStringInlineBuffer stringBuffer;
3871 CFCharacterSetInlineBuffer csetBuffer;
3872 UniChar ch;
3873 CFIndex step;
3874 CFIndex fromLoc, toLoc, cnt; // fromLoc and toLoc are inclusive
3875 Boolean found = false;
3876 Boolean done = false;
3877
3878 //#warning FIX ME !! Should support kCFCompareNonliteral
3879
3880 if ((rangeToSearch.location + rangeToSearch.length > CFStringGetLength(theString)) || (rangeToSearch.length == 0)) return false;
3881
3882 if (searchOptions & kCFCompareBackwards) {
3883 fromLoc = rangeToSearch.location + rangeToSearch.length - 1;
3884 toLoc = rangeToSearch.location;
3885 } else {
3886 fromLoc = rangeToSearch.location;
3887 toLoc = rangeToSearch.location + rangeToSearch.length - 1;
3888 }
3889 if (searchOptions & kCFCompareAnchored) {
3890 toLoc = fromLoc;
3891 }
3892
3893 step = (fromLoc <= toLoc) ? 1 : -1;
3894 cnt = fromLoc;
3895
3896 CFStringInitInlineBuffer(theString, &stringBuffer, rangeToSearch);
3897 CFCharacterSetInitInlineBuffer(theSet, &csetBuffer);
3898
3899 do {
3900 ch = CFStringGetCharacterFromInlineBuffer(&stringBuffer, cnt - rangeToSearch.location);
3901 if ((ch >= SURROGATE_START) && (ch <= SURROGATE_END)) {
3902 int otherCharIndex = cnt + step;
3903
3904 if (((step < 0) && (otherCharIndex < toLoc)) || ((step > 0) && (otherCharIndex > toLoc))) {
3905 done = true;
3906 } else {
3907 UniChar highChar;
3908 UniChar lowChar = CFStringGetCharacterFromInlineBuffer(&stringBuffer, otherCharIndex - rangeToSearch.location);
3909
3910 if (cnt < otherCharIndex) {
3911 highChar = ch;
3912 } else {
3913 highChar = lowChar;
3914 lowChar = ch;
3915 }
3916
3917 if (CFUniCharIsSurrogateHighCharacter(highChar) && CFUniCharIsSurrogateLowCharacter(lowChar) && CFCharacterSetInlineBufferIsLongCharacterMember(&csetBuffer, CFUniCharGetLongCharacterForSurrogatePair(highChar, lowChar))) {
3918 if (result) *result = CFRangeMake((cnt < otherCharIndex ? cnt : otherCharIndex), 2);
3919 return true;
3920 } else if (otherCharIndex == toLoc) {
3921 done = true;
3922 } else {
3923 cnt = otherCharIndex + step;
3924 }
3925 }
3926 } else if (CFCharacterSetInlineBufferIsLongCharacterMember(&csetBuffer, ch)) {
3927 done = found = true;
3928 } else if (cnt == toLoc) {
3929 done = true;
3930 } else {
3931 cnt += step;
3932 }
3933 } while (!done);
3934
3935 if (found && result) *result = CFRangeMake(cnt, 1);
3936 return found;
3937 }
3938
3939 /* Line range code */
3940
3941 #define CarriageReturn '\r' /* 0x0d */
3942 #define NewLine '\n' /* 0x0a */
3943 #define NextLine 0x0085
3944 #define LineSeparator 0x2028
3945 #define ParaSeparator 0x2029
3946
3947 CF_INLINE Boolean isALineSeparatorTypeCharacter(UniChar ch, Boolean includeLineEndings) {
3948 if (ch > CarriageReturn && ch < NextLine) return false; /* Quick test to cover most chars */
3949 return (ch == NewLine || ch == CarriageReturn || ch == ParaSeparator || (includeLineEndings && (ch == NextLine || ch == LineSeparator))) ? true : false;
3950 }
3951
3952 static void __CFStringGetLineOrParagraphBounds(CFStringRef string, CFRange range, CFIndex *lineBeginIndex, CFIndex *lineEndIndex, CFIndex *contentsEndIndex, Boolean includeLineEndings) {
3953 CFIndex len;
3954 CFStringInlineBuffer buf;
3955 UniChar ch;
3956
3957 __CFAssertIsString(string);
3958 __CFAssertRangeIsInStringBounds(string, range.location, range.length);
3959
3960 len = __CFStrLength(string);
3961
3962 if (lineBeginIndex) {
3963 CFIndex start;
3964 if (range.location == 0) {
3965 start = 0;
3966 } else {
3967 CFStringInitInlineBuffer(string, &buf, CFRangeMake(0, len));
3968 CFIndex buf_idx = range.location;
3969
3970 /* Take care of the special case where start happens to fall right between \r and \n */
3971 ch = CFStringGetCharacterFromInlineBuffer(&buf, buf_idx);
3972 buf_idx--;
3973 if ((ch == NewLine) && (CFStringGetCharacterFromInlineBuffer(&buf, buf_idx) == CarriageReturn)) {
3974 buf_idx--;
3975 }
3976 while (1) {
3977 if (buf_idx < 0) {
3978 start = 0;
3979 break;
3980 } else if (isALineSeparatorTypeCharacter(CFStringGetCharacterFromInlineBuffer(&buf, buf_idx), includeLineEndings)) {
3981 start = buf_idx + 1;
3982 break;
3983 } else {
3984 buf_idx--;
3985 }
3986 }
3987 }
3988 *lineBeginIndex = start;
3989 }
3990
3991 /* Now find the ending point */
3992 if (lineEndIndex || contentsEndIndex) {
3993 CFIndex endOfContents, lineSeparatorLength = 1; /* 1 by default */
3994 CFStringInitInlineBuffer(string, &buf, CFRangeMake(0, len));
3995 CFIndex buf_idx = range.location + range.length - (range.length ? 1 : 0);
3996 /* First look at the last char in the range (if the range is zero length, the char after the range) to see if we're already on or within a end of line sequence... */
3997 ch = __CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx);
3998 if (ch == NewLine) {
3999 endOfContents = buf_idx;
4000 buf_idx--;
4001 if (__CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx) == CarriageReturn) {
4002 lineSeparatorLength = 2;
4003 endOfContents--;
4004 }
4005 } else {
4006 while (1) {
4007 if (isALineSeparatorTypeCharacter(ch, includeLineEndings)) {
4008 endOfContents = buf_idx; /* This is actually end of contentsRange */
4009 buf_idx++; /* OK for this to go past the end */
4010 if ((ch == CarriageReturn) && (__CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx) == NewLine)) {
4011 lineSeparatorLength = 2;
4012 }
4013 break;
4014 } else if (buf_idx >= len) {
4015 endOfContents = len;
4016 lineSeparatorLength = 0;
4017 break;
4018 } else {
4019 buf_idx++;
4020 ch = __CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx);
4021 }
4022 }
4023 }
4024 if (contentsEndIndex) *contentsEndIndex = endOfContents;
4025 if (lineEndIndex) *lineEndIndex = endOfContents + lineSeparatorLength;
4026 }
4027 }
4028
4029 void CFStringGetLineBounds(CFStringRef string, CFRange range, CFIndex *lineBeginIndex, CFIndex *lineEndIndex, CFIndex *contentsEndIndex) {
4030 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSString *)string, getLineStart:(NSUInteger *)lineBeginIndex end:(NSUInteger *)lineEndIndex contentsEnd:(NSUInteger *)contentsEndIndex forRange:NSMakeRange(range.location, range.length));
4031 __CFStringGetLineOrParagraphBounds(string, range, lineBeginIndex, lineEndIndex, contentsEndIndex, true);
4032 }
4033
4034 void CFStringGetParagraphBounds(CFStringRef string, CFRange range, CFIndex *parBeginIndex, CFIndex *parEndIndex, CFIndex *contentsEndIndex) {
4035 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSString *)string, getParagraphStart:(NSUInteger *)parBeginIndex end:(NSUInteger *)parEndIndex contentsEnd:(NSUInteger *)contentsEndIndex forRange:NSMakeRange(range.location, range.length));
4036 __CFStringGetLineOrParagraphBounds(string, range, parBeginIndex, parEndIndex, contentsEndIndex, false);
4037 }
4038
4039
4040 CFStringRef CFStringCreateByCombiningStrings(CFAllocatorRef alloc, CFArrayRef array, CFStringRef separatorString) {
4041 CFIndex numChars;
4042 CFIndex separatorNumByte;
4043 CFIndex stringCount = CFArrayGetCount(array);
4044 Boolean isSepCFString = !CF_IS_OBJC(__kCFStringTypeID, separatorString);
4045 Boolean canBeEightbit = isSepCFString && __CFStrIsEightBit(separatorString);
4046 CFIndex idx;
4047 CFStringRef otherString;
4048 void *buffer;
4049 uint8_t *bufPtr;
4050 const void *separatorContents = NULL;
4051
4052 if (stringCount == 0) {
4053 return CFStringCreateWithCharacters(alloc, NULL, 0);
4054 } else if (stringCount == 1) {
4055 return (CFStringRef)CFStringCreateCopy(alloc, (CFStringRef)CFArrayGetValueAtIndex(array, 0));
4056 }
4057
4058 if (alloc == NULL) alloc = __CFGetDefaultAllocator();
4059
4060 numChars = CFStringGetLength(separatorString) * (stringCount - 1);
4061 for (idx = 0; idx < stringCount; idx++) {
4062 otherString = (CFStringRef)CFArrayGetValueAtIndex(array, idx);
4063 numChars += CFStringGetLength(otherString);
4064 // canBeEightbit is already false if the separator is an NSString...
4065 if (CF_IS_OBJC(__kCFStringTypeID, otherString) || ! __CFStrIsEightBit(otherString)) canBeEightbit = false;
4066 }
4067
4068 buffer = (uint8_t *)CFAllocatorAllocate(alloc, canBeEightbit ? ((numChars + 1) * sizeof(uint8_t)) : (numChars * sizeof(UniChar)), 0);
4069 bufPtr = (uint8_t *)buffer;
4070 if (__CFOASafe) __CFSetLastAllocationEventName(buffer, "CFString (store)");
4071 separatorNumByte = CFStringGetLength(separatorString) * (canBeEightbit ? sizeof(uint8_t) : sizeof(UniChar));
4072
4073 for (idx = 0; idx < stringCount; idx++) {
4074 if (idx) { // add separator here unless first string
4075 if (separatorContents) {
4076 memmove(bufPtr, separatorContents, separatorNumByte);
4077 } else {
4078 if (!isSepCFString) { // NSString
4079 CFStringGetCharacters(separatorString, CFRangeMake(0, CFStringGetLength(separatorString)), (UniChar *)bufPtr);
4080 } else if (canBeEightbit) {
4081 memmove(bufPtr, (const uint8_t *)__CFStrContents(separatorString) + __CFStrSkipAnyLengthByte(separatorString), separatorNumByte);
4082 } else {
4083 __CFStrConvertBytesToUnicode((uint8_t *)__CFStrContents(separatorString) + __CFStrSkipAnyLengthByte(separatorString), (UniChar *)bufPtr, __CFStrLength(separatorString));
4084 }
4085 separatorContents = bufPtr;
4086 }
4087 bufPtr += separatorNumByte;
4088 }
4089
4090 otherString = (CFStringRef )CFArrayGetValueAtIndex(array, idx);
4091 if (CF_IS_OBJC(__kCFStringTypeID, otherString)) {
4092 CFIndex otherLength = CFStringGetLength(otherString);
4093 CFStringGetCharacters(otherString, CFRangeMake(0, otherLength), (UniChar *)bufPtr);
4094 bufPtr += otherLength * sizeof(UniChar);
4095 } else {
4096 const uint8_t * otherContents = (const uint8_t *)__CFStrContents(otherString);
4097 CFIndex otherNumByte = __CFStrLength2(otherString, otherContents) * (canBeEightbit ? sizeof(uint8_t) : sizeof(UniChar));
4098
4099 if (canBeEightbit || __CFStrIsUnicode(otherString)) {
4100 memmove(bufPtr, otherContents + __CFStrSkipAnyLengthByte(otherString), otherNumByte);
4101 } else {
4102 __CFStrConvertBytesToUnicode(otherContents + __CFStrSkipAnyLengthByte(otherString), (UniChar *)bufPtr, __CFStrLength2(otherString, otherContents));
4103 }
4104 bufPtr += otherNumByte;
4105 }
4106 }
4107 if (canBeEightbit) *bufPtr = 0; // NULL byte;
4108
4109 return canBeEightbit ?
4110 CFStringCreateWithCStringNoCopy(alloc, (const char*)buffer, __CFStringGetEightBitStringEncoding(), alloc) :
4111 CFStringCreateWithCharactersNoCopy(alloc, (UniChar *)buffer, numChars, alloc);
4112 }
4113
4114
4115 CFArrayRef CFStringCreateArrayBySeparatingStrings(CFAllocatorRef alloc, CFStringRef string, CFStringRef separatorString) {
4116 CFArrayRef separatorRanges;
4117 CFIndex length = CFStringGetLength(string);
4118 /* No objc dispatch needed here since CFStringCreateArrayWithFindResults() works with both CFString and NSString */
4119 if (!(separatorRanges = CFStringCreateArrayWithFindResults(alloc, string, separatorString, CFRangeMake(0, length), 0))) {
4120 return CFArrayCreate(alloc, (const void **)&string, 1, & kCFTypeArrayCallBacks);
4121 } else {
4122 CFIndex idx;
4123 CFIndex count = CFArrayGetCount(separatorRanges);
4124 CFIndex startIndex = 0;
4125 CFIndex numChars;
4126 CFMutableArrayRef array = CFArrayCreateMutable(alloc, count + 2, & kCFTypeArrayCallBacks);
4127 const CFRange *currentRange;
4128 CFStringRef substring;
4129
4130 for (idx = 0;idx < count;idx++) {
4131 currentRange = (const CFRange *)CFArrayGetValueAtIndex(separatorRanges, idx);
4132 numChars = currentRange->location - startIndex;
4133 substring = CFStringCreateWithSubstring(alloc, string, CFRangeMake(startIndex, numChars));
4134 CFArrayAppendValue(array, substring);
4135 CFRelease(substring);
4136 startIndex = currentRange->location + currentRange->length;
4137 }
4138 substring = CFStringCreateWithSubstring(alloc, string, CFRangeMake(startIndex, length - startIndex));
4139 CFArrayAppendValue(array, substring);
4140 CFRelease(substring);
4141
4142 CFRelease(separatorRanges);
4143
4144 return array;
4145 }
4146 }
4147
4148 CFStringRef CFStringCreateFromExternalRepresentation(CFAllocatorRef alloc, CFDataRef data, CFStringEncoding encoding) {
4149 return CFStringCreateWithBytes(alloc, CFDataGetBytePtr(data), CFDataGetLength(data), encoding, true);
4150 }
4151
4152
4153 CFDataRef CFStringCreateExternalRepresentation(CFAllocatorRef alloc, CFStringRef string, CFStringEncoding encoding, uint8_t lossByte) {
4154 CFIndex length;
4155 CFIndex guessedByteLength;
4156 uint8_t *bytes;
4157 CFIndex usedLength;
4158 SInt32 result;
4159
4160 if (CF_IS_OBJC(__kCFStringTypeID, string)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
4161 length = CFStringGetLength(string);
4162 } else {
4163 __CFAssertIsString(string);
4164 length = __CFStrLength(string);
4165 if (__CFStrIsEightBit(string) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string
4166 return CFDataCreate(alloc, ((uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string)), __CFStrLength(string));
4167 }
4168 }
4169
4170 if (alloc == NULL) alloc = __CFGetDefaultAllocator();
4171
4172 if (((encoding & 0x0FFF) == kCFStringEncodingUnicode) && ((encoding == kCFStringEncodingUnicode) || ((encoding > kCFStringEncodingUTF8) && (encoding <= kCFStringEncodingUTF32LE)))) {
4173 guessedByteLength = (length + 1) * ((((encoding >> 26) & 2) == 0) ? sizeof(UTF16Char) : sizeof(UTF32Char)); // UTF32 format has the bit set
4174 } else if (((guessedByteLength = CFStringGetMaximumSizeForEncoding(length, encoding)) > length) && !CF_IS_OBJC(__kCFStringTypeID, string)) { // Multi byte encoding
4175 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
4176 if (__CFStrIsUnicode(string)) {
4177 CFIndex aLength = CFStringEncodingByteLengthForCharacters(encoding, kCFStringEncodingPrependBOM, __CFStrContents(string), __CFStrLength(string));
4178 if (aLength > 0) guessedByteLength = aLength;
4179 } else {
4180 #endif
4181 result = __CFStringEncodeByteStream(string, 0, length, true, encoding, lossByte, NULL, LONG_MAX, &guessedByteLength);
4182 // if result == length, we always succeed
4183 // otherwise, if result == 0, we fail
4184 // otherwise, if there was a lossByte but still result != length, we fail
4185 if ((result != length) && (!result || !lossByte)) return NULL;
4186 if (guessedByteLength == length && __CFStrIsEightBit(string) && __CFStringEncodingIsSupersetOfASCII(encoding)) { // It's all ASCII !!
4187 return CFDataCreate(alloc, ((uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string)), __CFStrLength(string));
4188 }
4189 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
4190 }
4191 #endif
4192 }
4193 bytes = (uint8_t *)CFAllocatorAllocate(alloc, guessedByteLength, 0);
4194 if (__CFOASafe) __CFSetLastAllocationEventName(bytes, "CFData (store)");
4195
4196 result = __CFStringEncodeByteStream(string, 0, length, true, encoding, lossByte, bytes, guessedByteLength, &usedLength);
4197
4198 if ((result != length) && (!result || !lossByte)) { // see comment above about what this means
4199 CFAllocatorDeallocate(alloc, bytes);
4200 return NULL;
4201 }
4202
4203 return CFDataCreateWithBytesNoCopy(alloc, (uint8_t *)bytes, usedLength, alloc);
4204 }
4205
4206
4207 CFStringEncoding CFStringGetSmallestEncoding(CFStringRef str) {
4208 CFIndex len;
4209 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, CFStringEncoding, (NSString *)str, _smallestEncodingInCFStringEncoding);
4210 __CFAssertIsString(str);
4211
4212 if (__CFStrIsEightBit(str)) return __CFStringGetEightBitStringEncoding();
4213 len = __CFStrLength(str);
4214 if (__CFStringEncodeByteStream(str, 0, len, false, __CFStringGetEightBitStringEncoding(), 0, NULL, LONG_MAX, NULL) == len) return __CFStringGetEightBitStringEncoding();
4215 if ((__CFStringGetEightBitStringEncoding() != __CFStringGetSystemEncoding()) && (__CFStringEncodeByteStream(str, 0, len, false, __CFStringGetSystemEncoding(), 0, NULL, LONG_MAX, NULL) == len)) return __CFStringGetSystemEncoding();
4216 return kCFStringEncodingUnicode; /* ??? */
4217 }
4218
4219
4220 CFStringEncoding CFStringGetFastestEncoding(CFStringRef str) {
4221 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, CFStringEncoding, (NSString *)str, _fastestEncodingInCFStringEncoding);
4222 __CFAssertIsString(str);
4223 return __CFStrIsEightBit(str) ? __CFStringGetEightBitStringEncoding() : kCFStringEncodingUnicode; /* ??? */
4224 }
4225
4226
4227 SInt32 CFStringGetIntValue(CFStringRef str) {
4228 Boolean success;
4229 SInt32 result;
4230 SInt32 idx = 0;
4231 CFStringInlineBuffer buf;
4232 CFStringInitInlineBuffer(str, &buf, CFRangeMake(0, CFStringGetLength(str)));
4233 success = __CFStringScanInteger(&buf, NULL, &idx, false, &result);
4234 return success ? result : 0;
4235 }
4236
4237
4238 double CFStringGetDoubleValue(CFStringRef str) {
4239 Boolean success;
4240 double result;
4241 SInt32 idx = 0;
4242 CFStringInlineBuffer buf;
4243 CFStringInitInlineBuffer(str, &buf, CFRangeMake(0, CFStringGetLength(str)));
4244 success = __CFStringScanDouble(&buf, NULL, &idx, &result);
4245 return success ? result : 0.0;
4246 }
4247
4248
4249 /*** Mutable functions... ***/
4250
4251 void CFStringSetExternalCharactersNoCopy(CFMutableStringRef string, UniChar *chars, CFIndex length, CFIndex capacity) {
4252 __CFAssertIsNotNegative(length);
4253 __CFAssertIsStringAndExternalMutable(string);
4254 CFAssert4((length <= capacity) && ((capacity == 0) || ((capacity > 0) && chars)), __kCFLogAssertion, "%s(): Invalid args: characters %p length %d capacity %d", __PRETTY_FUNCTION__, chars, length, capacity);
4255 __CFStrSetContentPtr(string, chars);
4256 __CFStrSetExplicitLength(string, length);
4257 __CFStrSetCapacity(string, capacity * sizeof(UniChar));
4258 __CFStrSetCapacityProvidedExternally(string);
4259 }
4260
4261
4262
4263 void CFStringInsert(CFMutableStringRef str, CFIndex idx, CFStringRef insertedStr) {
4264 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)str, insertString:(NSString *)insertedStr atIndex:(NSUInteger)idx);
4265 __CFAssertIsStringAndMutable(str);
4266 CFAssert3(idx >= 0 && idx <= __CFStrLength(str), __kCFLogAssertion, "%s(): string index %d out of bounds (length %d)", __PRETTY_FUNCTION__, idx, __CFStrLength(str));
4267 __CFStringReplace(str, CFRangeMake(idx, 0), insertedStr);
4268 }
4269
4270
4271 void CFStringDelete(CFMutableStringRef str, CFRange range) {
4272 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)str, deleteCharactersInRange:NSMakeRange(range.location, range.length));
4273 __CFAssertIsStringAndMutable(str);
4274 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
4275 __CFStringChangeSize(str, range, 0, false);
4276 }
4277
4278
4279 void CFStringReplace(CFMutableStringRef str, CFRange range, CFStringRef replacement) {
4280 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)str, replaceCharactersInRange:NSMakeRange(range.location, range.length) withString:(NSString *)replacement);
4281 __CFAssertIsStringAndMutable(str);
4282 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
4283 __CFStringReplace(str, range, replacement);
4284 }
4285
4286
4287 void CFStringReplaceAll(CFMutableStringRef str, CFStringRef replacement) {
4288 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)str, setString:(NSString *)replacement);
4289 __CFAssertIsStringAndMutable(str);
4290 __CFStringReplace(str, CFRangeMake(0, __CFStrLength(str)), replacement);
4291 }
4292
4293
4294 void CFStringAppend(CFMutableStringRef str, CFStringRef appended) {
4295 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)str, appendString:(NSString *)appended);
4296 __CFAssertIsStringAndMutable(str);
4297 __CFStringReplace(str, CFRangeMake(__CFStrLength(str), 0), appended);
4298 }
4299
4300
4301 void CFStringAppendCharacters(CFMutableStringRef str, const UniChar *chars, CFIndex appendedLength) {
4302 CFIndex strLength, idx;
4303
4304 __CFAssertIsNotNegative(appendedLength);
4305
4306 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)str, appendCharacters:chars length:(NSUInteger)appendedLength);
4307
4308 __CFAssertIsStringAndMutable(str);
4309
4310 strLength = __CFStrLength(str);
4311 if (__CFStrIsUnicode(str)) {
4312 __CFStringChangeSize(str, CFRangeMake(strLength, 0), appendedLength, true);
4313 memmove((UniChar *)__CFStrContents(str) + strLength, chars, appendedLength * sizeof(UniChar));
4314 } else {
4315 uint8_t *contents;
4316 bool isASCII = true;
4317 for (idx = 0; isASCII && idx < appendedLength; idx++) isASCII = (chars[idx] < 0x80);
4318 __CFStringChangeSize(str, CFRangeMake(strLength, 0), appendedLength, !isASCII);
4319 if (!isASCII) {
4320 memmove((UniChar *)__CFStrContents(str) + strLength, chars, appendedLength * sizeof(UniChar));
4321 } else {
4322 contents = (uint8_t *)__CFStrContents(str) + strLength + __CFStrSkipAnyLengthByte(str);
4323 for (idx = 0; idx < appendedLength; idx++) contents[idx] = (uint8_t)chars[idx];
4324 }
4325 }
4326 }
4327
4328
4329 void __CFStringAppendBytes(CFMutableStringRef str, const char *cStr, CFIndex appendedLength, CFStringEncoding encoding) {
4330 Boolean appendedIsUnicode = false;
4331 Boolean freeCStrWhenDone = false;
4332 Boolean demoteAppendedUnicode = false;
4333 CFVarWidthCharBuffer vBuf;
4334
4335 __CFAssertIsNotNegative(appendedLength);
4336
4337 if (encoding == kCFStringEncodingASCII || encoding == __CFStringGetEightBitStringEncoding()) {
4338 // appendedLength now denotes length in UniChars
4339 } else if (encoding == kCFStringEncodingUnicode) {
4340 UniChar *chars = (UniChar *)cStr;
4341 CFIndex idx, length = appendedLength / sizeof(UniChar);
4342 bool isASCII = true;
4343 for (idx = 0; isASCII && idx < length; idx++) isASCII = (chars[idx] < 0x80);
4344 if (!isASCII) {
4345 appendedIsUnicode = true;
4346 } else {
4347 demoteAppendedUnicode = true;
4348 }
4349 appendedLength = length;
4350 } else {
4351 Boolean usingPassedInMemory = false;
4352
4353 vBuf.allocator = __CFGetDefaultAllocator(); // We don't want to use client's allocator for temp stuff
4354 vBuf.chars.unicode = NULL; // This will cause the decode function to allocate memory if necessary
4355
4356 if (!__CFStringDecodeByteStream3((const uint8_t *)cStr, appendedLength, encoding, __CFStrIsUnicode(str), &vBuf, &usingPassedInMemory, 0)) {
4357 CFAssert1(0, __kCFLogAssertion, "Supplied bytes could not be converted specified encoding %d", encoding);
4358 return;
4359 }
4360
4361 // If not ASCII, appendedLength now denotes length in UniChars
4362 appendedLength = vBuf.numChars;
4363 appendedIsUnicode = !vBuf.isASCII;
4364 cStr = (const char *)vBuf.chars.ascii;
4365 freeCStrWhenDone = !usingPassedInMemory && vBuf.shouldFreeChars;
4366 }
4367
4368 if (CF_IS_OBJC(__kCFStringTypeID, str)) {
4369 if (!appendedIsUnicode && !demoteAppendedUnicode) {
4370 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)str, _cfAppendCString:(const unsigned char *)cStr length:(NSInteger)appendedLength);
4371 } else {
4372 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)str, appendCharacters:(const unichar *)cStr length:(NSUInteger)appendedLength);
4373 }
4374 } else {
4375 CFIndex strLength;
4376 __CFAssertIsStringAndMutable(str);
4377 strLength = __CFStrLength(str);
4378
4379 __CFStringChangeSize(str, CFRangeMake(strLength, 0), appendedLength, appendedIsUnicode || __CFStrIsUnicode(str));
4380
4381 if (__CFStrIsUnicode(str)) {
4382 UniChar *contents = (UniChar *)__CFStrContents(str);
4383 if (appendedIsUnicode) {
4384 memmove(contents + strLength, cStr, appendedLength * sizeof(UniChar));
4385 } else {
4386 __CFStrConvertBytesToUnicode((const uint8_t *)cStr, contents + strLength, appendedLength);
4387 }
4388 } else {
4389 if (demoteAppendedUnicode) {
4390 UniChar *chars = (UniChar *)cStr;
4391 CFIndex idx;
4392 uint8_t *contents = (uint8_t *)__CFStrContents(str) + strLength + __CFStrSkipAnyLengthByte(str);
4393 for (idx = 0; idx < appendedLength; idx++) contents[idx] = (uint8_t)chars[idx];
4394 } else {
4395 uint8_t *contents = (uint8_t *)__CFStrContents(str);
4396 memmove(contents + strLength + __CFStrSkipAnyLengthByte(str), cStr, appendedLength);
4397 }
4398 }
4399 }
4400
4401 if (freeCStrWhenDone) CFAllocatorDeallocate(__CFGetDefaultAllocator(), (void *)cStr);
4402 }
4403
4404 void CFStringAppendPascalString(CFMutableStringRef str, ConstStringPtr pStr, CFStringEncoding encoding) {
4405 __CFStringAppendBytes(str, (const char *)(pStr + 1), (CFIndex)*pStr, encoding);
4406 }
4407
4408 void CFStringAppendCString(CFMutableStringRef str, const char *cStr, CFStringEncoding encoding) {
4409 __CFStringAppendBytes(str, cStr, strlen(cStr), encoding);
4410 }
4411
4412
4413 void CFStringAppendFormat(CFMutableStringRef str, CFDictionaryRef formatOptions, CFStringRef format, ...) {
4414 va_list argList;
4415
4416 va_start(argList, format);
4417 CFStringAppendFormatAndArguments(str, formatOptions, format, argList);
4418 va_end(argList);
4419 }
4420
4421
4422 CFIndex CFStringFindAndReplace(CFMutableStringRef string, CFStringRef stringToFind, CFStringRef replacementString, CFRange rangeToSearch, CFStringCompareFlags compareOptions) {
4423 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, CFIndex, (NSMutableString *)string, replaceOccurrencesOfString:(NSString *)stringToFind withString:(NSString *)replacementString options:(NSStringCompareOptions)compareOptions range:NSMakeRange(rangeToSearch.location, rangeToSearch.length));
4424 CFRange foundRange;
4425 Boolean backwards = ((compareOptions & kCFCompareBackwards) != 0);
4426 UInt32 endIndex = rangeToSearch.location + rangeToSearch.length;
4427 #define MAX_RANGES_ON_STACK (1000 / sizeof(CFRange))
4428 CFRange rangeBuffer[MAX_RANGES_ON_STACK]; // Used to avoid allocating memory
4429 CFRange *ranges = rangeBuffer;
4430 CFIndex foundCount = 0;
4431 CFIndex capacity = MAX_RANGES_ON_STACK;
4432
4433 __CFAssertIsStringAndMutable(string);
4434 __CFAssertRangeIsInStringBounds(string, rangeToSearch.location, rangeToSearch.length);
4435
4436 // Note: This code is very similar to the one in CFStringCreateArrayWithFindResults().
4437 while ((rangeToSearch.length > 0) && CFStringFindWithOptions(string, stringToFind, rangeToSearch, compareOptions, &foundRange)) {
4438 // Determine the next range
4439 if (backwards) {
4440 rangeToSearch.length = foundRange.location - rangeToSearch.location;
4441 } else {
4442 rangeToSearch.location = foundRange.location + foundRange.length;
4443 rangeToSearch.length = endIndex - rangeToSearch.location;
4444 }
4445
4446 // If necessary, grow the array
4447 if (foundCount >= capacity) {
4448 bool firstAlloc = (ranges == rangeBuffer) ? true : false;
4449 capacity = (capacity + 4) * 2;
4450 // Note that reallocate with NULL previous pointer is same as allocate
4451 ranges = (CFRange *)CFAllocatorReallocate(kCFAllocatorSystemDefault, firstAlloc ? NULL : ranges, capacity * sizeof(CFRange), 0);
4452 if (firstAlloc) memmove(ranges, rangeBuffer, MAX_RANGES_ON_STACK * sizeof(CFRange));
4453 }
4454 ranges[foundCount] = foundRange;
4455 foundCount++;
4456 }
4457
4458 if (foundCount > 0) {
4459 if (backwards) { // Reorder the ranges to be incrementing (better to do this here, then to check other places)
4460 int head = 0;
4461 int tail = foundCount - 1;
4462 while (head < tail) {
4463 CFRange temp = ranges[head];
4464 ranges[head] = ranges[tail];
4465 ranges[tail] = temp;
4466 head++;
4467 tail--;
4468 }
4469 }
4470 __CFStringReplaceMultiple(string, ranges, foundCount, replacementString);
4471 if (ranges != rangeBuffer) CFAllocatorDeallocate(kCFAllocatorSystemDefault, ranges);
4472 }
4473
4474 return foundCount;
4475 }
4476
4477
4478 // This function is here for NSString purposes
4479 // It allows checking for mutability before mutating; this allows NSString to catch invalid mutations
4480
4481 int __CFStringCheckAndReplace(CFMutableStringRef str, CFRange range, CFStringRef replacement) {
4482 if (!__CFStrIsMutable(str)) return _CFStringErrNotMutable; // These three ifs are always here, for NSString usage
4483 if (!replacement && __CFStringNoteErrors()) return _CFStringErrNilArg;
4484 // This attempts to catch bad ranges including those described in 3375535 (-1,1)
4485 unsigned long endOfRange = (unsigned long)(range.location) + (unsigned long)(range.length); // NSRange uses unsigned quantities, hence the casting
4486 if (((endOfRange > (unsigned long)__CFStrLength(str)) || (endOfRange < (unsigned long)(range.location))) && __CFStringNoteErrors()) return _CFStringErrBounds;
4487
4488 __CFAssertIsStringAndMutable(str);
4489 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
4490 __CFStringReplace(str, range, replacement);
4491 return _CFStringErrNone;
4492 }
4493
4494 // This function determines whether errors which would cause string exceptions should
4495 // be ignored or not
4496
4497 Boolean __CFStringNoteErrors(void) {
4498 return true;
4499 }
4500
4501
4502
4503 void CFStringPad(CFMutableStringRef string, CFStringRef padString, CFIndex length, CFIndex indexIntoPad) {
4504 CFIndex originalLength;
4505
4506 __CFAssertIsNotNegative(length);
4507 __CFAssertIsNotNegative(indexIntoPad);
4508
4509 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)string, _cfPad:padString length:(uint32_t)length padIndex:(uint32_t)indexIntoPad);
4510
4511 __CFAssertIsStringAndMutable(string);
4512
4513 originalLength = __CFStrLength(string);
4514 if (length < originalLength) {
4515 __CFStringChangeSize(string, CFRangeMake(length, originalLength - length), 0, false);
4516 } else if (originalLength < length) {
4517 uint8_t *contents;
4518 Boolean isUnicode;
4519 CFIndex charSize;
4520 CFIndex padStringLength;
4521 CFIndex padLength;
4522 CFIndex padRemaining = length - originalLength;
4523
4524 if (CF_IS_OBJC(__kCFStringTypeID, padString)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
4525 padStringLength = CFStringGetLength(padString);
4526 isUnicode = true; /* !!! Bad for now */
4527 } else {
4528 __CFAssertIsString(padString);
4529 padStringLength = __CFStrLength(padString);
4530 isUnicode = __CFStrIsUnicode(string) || __CFStrIsUnicode(padString);
4531 }
4532
4533 charSize = isUnicode ? sizeof(UniChar) : sizeof(uint8_t);
4534
4535 __CFStringChangeSize(string, CFRangeMake(originalLength, 0), padRemaining, isUnicode);
4536
4537 contents = (uint8_t *)__CFStrContents(string) + charSize * originalLength + __CFStrSkipAnyLengthByte(string);
4538 padLength = padStringLength - indexIntoPad;
4539 padLength = padRemaining < padLength ? padRemaining : padLength;
4540
4541 while (padRemaining > 0) {
4542 if (isUnicode) {
4543 CFStringGetCharacters(padString, CFRangeMake(indexIntoPad, padLength), (UniChar *)contents);
4544 } else {
4545 CFStringGetBytes(padString, CFRangeMake(indexIntoPad, padLength), __CFStringGetEightBitStringEncoding(), 0, false, contents, padRemaining * charSize, NULL);
4546 }
4547 contents += padLength * charSize;
4548 padRemaining -= padLength;
4549 indexIntoPad = 0;
4550 padLength = padRemaining < padLength ? padRemaining : padStringLength;
4551 }
4552 }
4553 }
4554
4555 void CFStringTrim(CFMutableStringRef string, CFStringRef trimString) {
4556 CFRange range;
4557 CFIndex newStartIndex;
4558 CFIndex length;
4559
4560 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)string, _cfTrim:trimString);
4561
4562 __CFAssertIsStringAndMutable(string);
4563 __CFAssertIsString(trimString);
4564
4565 newStartIndex = 0;
4566 length = __CFStrLength(string);
4567
4568 while (CFStringFindWithOptions(string, trimString, CFRangeMake(newStartIndex, length - newStartIndex), kCFCompareAnchored, &range)) {
4569 newStartIndex = range.location + range.length;
4570 }
4571
4572 if (newStartIndex < length) {
4573 CFIndex charSize = __CFStrIsUnicode(string) ? sizeof(UniChar) : sizeof(uint8_t);
4574 uint8_t *contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4575
4576 length -= newStartIndex;
4577 if (CFStringGetLength(trimString) < length) {
4578 while (CFStringFindWithOptions(string, trimString, CFRangeMake(newStartIndex, length), kCFCompareAnchored|kCFCompareBackwards, &range)) {
4579 length = range.location - newStartIndex;
4580 }
4581 }
4582 memmove(contents, contents + newStartIndex * charSize, length * charSize);
4583 __CFStringChangeSize(string, CFRangeMake(length, __CFStrLength(string) - length), 0, false);
4584 } else { // Only trimString in string, trim all
4585 __CFStringChangeSize(string, CFRangeMake(0, length), 0, false);
4586 }
4587 }
4588
4589 void CFStringTrimWhitespace(CFMutableStringRef string) {
4590 CFIndex newStartIndex;
4591 CFIndex length;
4592 CFStringInlineBuffer buffer;
4593
4594 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)string, _cfTrimWS);
4595
4596 __CFAssertIsStringAndMutable(string);
4597
4598 newStartIndex = 0;
4599 length = __CFStrLength(string);
4600
4601 CFStringInitInlineBuffer(string, &buffer, CFRangeMake(0, length));
4602 CFIndex buffer_idx = 0;
4603
4604 while (buffer_idx < length && CFUniCharIsMemberOf(__CFStringGetCharacterFromInlineBufferQuick(&buffer, buffer_idx), kCFUniCharWhitespaceAndNewlineCharacterSet))
4605 buffer_idx++;
4606 newStartIndex = buffer_idx;
4607
4608 if (newStartIndex < length) {
4609 uint8_t *contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4610 CFIndex charSize = (__CFStrIsUnicode(string) ? sizeof(UniChar) : sizeof(uint8_t));
4611
4612 buffer_idx = length - 1;
4613 while (0 <= buffer_idx && CFUniCharIsMemberOf(__CFStringGetCharacterFromInlineBufferQuick(&buffer, buffer_idx), kCFUniCharWhitespaceAndNewlineCharacterSet))
4614 buffer_idx--;
4615 length = buffer_idx - newStartIndex + 1;
4616
4617 memmove(contents, contents + newStartIndex * charSize, length * charSize);
4618 __CFStringChangeSize(string, CFRangeMake(length, __CFStrLength(string) - length), 0, false);
4619 } else { // Whitespace only string
4620 __CFStringChangeSize(string, CFRangeMake(0, length), 0, false);
4621 }
4622 }
4623
4624 void CFStringLowercase(CFMutableStringRef string, CFLocaleRef locale) {
4625 CFIndex currentIndex = 0;
4626 CFIndex length;
4627 const uint8_t *langCode;
4628 Boolean isEightBit = __CFStrIsEightBit(string);
4629
4630 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)string, _cfLowercase:(const void *)locale);
4631
4632 __CFAssertIsStringAndMutable(string);
4633
4634 length = __CFStrLength(string);
4635
4636 langCode = (const uint8_t *)(_CFCanUseLocale(locale) ? _CFStrGetLanguageIdentifierForLocale(locale, false) : NULL);
4637
4638 if (!langCode && isEightBit) {
4639 uint8_t *contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4640 for (;currentIndex < length;currentIndex++) {
4641 if (contents[currentIndex] >= 'A' && contents[currentIndex] <= 'Z') {
4642 contents[currentIndex] += 'a' - 'A';
4643 } else if (contents[currentIndex] > 127) {
4644 break;
4645 }
4646 }
4647 }
4648
4649 if (currentIndex < length) {
4650 UTF16Char *contents;
4651 UniChar mappedCharacters[MAX_CASE_MAPPING_BUF];
4652 CFIndex mappedLength;
4653 UTF32Char currentChar;
4654 UInt32 flags = 0;
4655
4656 if (isEightBit) __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true);
4657
4658 contents = (UniChar *)__CFStrContents(string);
4659
4660 for (;currentIndex < length;currentIndex++) {
4661
4662 if (CFUniCharIsSurrogateHighCharacter(contents[currentIndex]) && (currentIndex + 1 < length) && CFUniCharIsSurrogateLowCharacter(contents[currentIndex + 1])) {
4663 currentChar = CFUniCharGetLongCharacterForSurrogatePair(contents[currentIndex], contents[currentIndex + 1]);
4664 } else {
4665 currentChar = contents[currentIndex];
4666 }
4667 flags = ((langCode || (currentChar == 0x03A3)) ? CFUniCharGetConditionalCaseMappingFlags(currentChar, contents, currentIndex, length, kCFUniCharToLowercase, langCode, flags) : 0);
4668
4669 mappedLength = CFUniCharMapCaseTo(currentChar, mappedCharacters, MAX_CASE_MAPPING_BUF, kCFUniCharToLowercase, flags, langCode);
4670 if (mappedLength > 0) contents[currentIndex] = *mappedCharacters;
4671
4672 if (currentChar > 0xFFFF) { // Non-BMP char
4673 switch (mappedLength) {
4674 case 0:
4675 __CFStringChangeSize(string, CFRangeMake(currentIndex, 2), 0, true);
4676 contents = (UniChar *)__CFStrContents(string);
4677 length -= 2;
4678 break;
4679
4680 case 1:
4681 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 1), 0, true);
4682 contents = (UniChar *)__CFStrContents(string);
4683 --length;
4684 break;
4685
4686 case 2:
4687 contents[++currentIndex] = mappedCharacters[1];
4688 break;
4689
4690 default:
4691 --mappedLength; // Skip the current char
4692 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength - 1, true);
4693 contents = (UniChar *)__CFStrContents(string);
4694 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4695 length += (mappedLength - 1);
4696 currentIndex += mappedLength;
4697 break;
4698 }
4699 } else if (mappedLength == 0) {
4700 __CFStringChangeSize(string, CFRangeMake(currentIndex, 1), 0, true);
4701 contents = (UniChar *)__CFStrContents(string);
4702 --length;
4703 } else if (mappedLength > 1) {
4704 --mappedLength; // Skip the current char
4705 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength, true);
4706 contents = (UniChar *)__CFStrContents(string);
4707 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4708 length += mappedLength;
4709 currentIndex += mappedLength;
4710 }
4711 }
4712 }
4713 }
4714
4715 void CFStringUppercase(CFMutableStringRef string, CFLocaleRef locale) {
4716 CFIndex currentIndex = 0;
4717 CFIndex length;
4718 const uint8_t *langCode;
4719 Boolean isEightBit = __CFStrIsEightBit(string);
4720
4721 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)string, _cfUppercase:(const void *)locale);
4722
4723 __CFAssertIsStringAndMutable(string);
4724
4725 length = __CFStrLength(string);
4726
4727 langCode = (const uint8_t *)(_CFCanUseLocale(locale) ? _CFStrGetLanguageIdentifierForLocale(locale, false) : NULL);
4728
4729 if (!langCode && isEightBit) {
4730 uint8_t *contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4731 for (;currentIndex < length;currentIndex++) {
4732 if (contents[currentIndex] >= 'a' && contents[currentIndex] <= 'z') {
4733 contents[currentIndex] -= 'a' - 'A';
4734 } else if (contents[currentIndex] > 127) {
4735 break;
4736 }
4737 }
4738 }
4739
4740 if (currentIndex < length) {
4741 UniChar *contents;
4742 UniChar mappedCharacters[MAX_CASE_MAPPING_BUF];
4743 CFIndex mappedLength;
4744 UTF32Char currentChar;
4745 UInt32 flags = 0;
4746
4747 if (isEightBit) __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true);
4748
4749 contents = (UniChar *)__CFStrContents(string);
4750
4751 for (;currentIndex < length;currentIndex++) {
4752 if (CFUniCharIsSurrogateHighCharacter(contents[currentIndex]) && (currentIndex + 1 < length) && CFUniCharIsSurrogateLowCharacter(contents[currentIndex + 1])) {
4753 currentChar = CFUniCharGetLongCharacterForSurrogatePair(contents[currentIndex], contents[currentIndex + 1]);
4754 } else {
4755 currentChar = contents[currentIndex];
4756 }
4757
4758 flags = (langCode ? CFUniCharGetConditionalCaseMappingFlags(currentChar, contents, currentIndex, length, kCFUniCharToUppercase, langCode, flags) : 0);
4759
4760 mappedLength = CFUniCharMapCaseTo(currentChar, mappedCharacters, MAX_CASE_MAPPING_BUF, kCFUniCharToUppercase, flags, langCode);
4761 if (mappedLength > 0) contents[currentIndex] = *mappedCharacters;
4762
4763 if (currentChar > 0xFFFF) { // Non-BMP char
4764 switch (mappedLength) {
4765 case 0:
4766 __CFStringChangeSize(string, CFRangeMake(currentIndex, 2), 0, true);
4767 contents = (UniChar *)__CFStrContents(string);
4768 length -= 2;
4769 break;
4770
4771 case 1:
4772 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 1), 0, true);
4773 contents = (UniChar *)__CFStrContents(string);
4774 --length;
4775 break;
4776
4777 case 2:
4778 contents[++currentIndex] = mappedCharacters[1];
4779 break;
4780
4781 default:
4782 --mappedLength; // Skip the current char
4783 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength - 1, true);
4784 contents = (UniChar *)__CFStrContents(string);
4785 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4786 length += (mappedLength - 1);
4787 currentIndex += mappedLength;
4788 break;
4789 }
4790 } else if (mappedLength == 0) {
4791 __CFStringChangeSize(string, CFRangeMake(currentIndex, 1), 0, true);
4792 contents = (UniChar *)__CFStrContents(string);
4793 --length;
4794 } else if (mappedLength > 1) {
4795 --mappedLength; // Skip the current char
4796 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength, true);
4797 contents = (UniChar *)__CFStrContents(string);
4798 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4799 length += mappedLength;
4800 currentIndex += mappedLength;
4801 }
4802 }
4803 }
4804 }
4805
4806
4807 void CFStringCapitalize(CFMutableStringRef string, CFLocaleRef locale) {
4808 CFIndex currentIndex = 0;
4809 CFIndex length;
4810 const uint8_t *langCode;
4811 Boolean isEightBit = __CFStrIsEightBit(string);
4812 Boolean isLastCased = false;
4813 const uint8_t *caseIgnorableForBMP;
4814
4815 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)string, _cfCapitalize:(const void *)locale);
4816
4817 __CFAssertIsStringAndMutable(string);
4818
4819 length = __CFStrLength(string);
4820
4821 caseIgnorableForBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharCaseIgnorableCharacterSet, 0);
4822
4823 langCode = (const uint8_t *)(_CFCanUseLocale(locale) ? _CFStrGetLanguageIdentifierForLocale(locale, false) : NULL);
4824
4825 if (!langCode && isEightBit) {
4826 uint8_t *contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4827 for (;currentIndex < length;currentIndex++) {
4828 if (contents[currentIndex] > 127) {
4829 break;
4830 } else if (contents[currentIndex] >= 'A' && contents[currentIndex] <= 'Z') {
4831 contents[currentIndex] += (isLastCased ? 'a' - 'A' : 0);
4832 isLastCased = true;
4833 } else if (contents[currentIndex] >= 'a' && contents[currentIndex] <= 'z') {
4834 contents[currentIndex] -= (!isLastCased ? 'a' - 'A' : 0);
4835 isLastCased = true;
4836 } else if (!CFUniCharIsMemberOfBitmap(contents[currentIndex], caseIgnorableForBMP)) {
4837 isLastCased = false;
4838 }
4839 }
4840 }
4841
4842 if (currentIndex < length) {
4843 UniChar *contents;
4844 UniChar mappedCharacters[MAX_CASE_MAPPING_BUF];
4845 CFIndex mappedLength;
4846 UTF32Char currentChar;
4847 UInt32 flags = 0;
4848
4849 if (isEightBit) __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true);
4850
4851 contents = (UniChar *)__CFStrContents(string);
4852
4853 for (;currentIndex < length;currentIndex++) {
4854 if (CFUniCharIsSurrogateHighCharacter(contents[currentIndex]) && (currentIndex + 1 < length) && CFUniCharIsSurrogateLowCharacter(contents[currentIndex + 1])) {
4855 currentChar = CFUniCharGetLongCharacterForSurrogatePair(contents[currentIndex], contents[currentIndex + 1]);
4856 } else {
4857 currentChar = contents[currentIndex];
4858 }
4859 flags = ((langCode || ((currentChar == 0x03A3) && isLastCased)) ? CFUniCharGetConditionalCaseMappingFlags(currentChar, contents, currentIndex, length, (isLastCased ? kCFUniCharToLowercase : kCFUniCharToTitlecase), langCode, flags) : 0);
4860
4861 mappedLength = CFUniCharMapCaseTo(currentChar, mappedCharacters, MAX_CASE_MAPPING_BUF, (isLastCased ? kCFUniCharToLowercase : kCFUniCharToTitlecase), flags, langCode);
4862 if (mappedLength > 0) contents[currentIndex] = *mappedCharacters;
4863
4864 if (currentChar > 0xFFFF) { // Non-BMP char
4865 switch (mappedLength) {
4866 case 0:
4867 __CFStringChangeSize(string, CFRangeMake(currentIndex, 2), 0, true);
4868 contents = (UniChar *)__CFStrContents(string);
4869 length -= 2;
4870 break;
4871
4872 case 1:
4873 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 1), 0, true);
4874 contents = (UniChar *)__CFStrContents(string);
4875 --length;
4876 break;
4877
4878 case 2:
4879 contents[++currentIndex] = mappedCharacters[1];
4880 break;
4881
4882 default:
4883 --mappedLength; // Skip the current char
4884 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength - 1, true);
4885 contents = (UniChar *)__CFStrContents(string);
4886 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4887 length += (mappedLength - 1);
4888 currentIndex += mappedLength;
4889 break;
4890 }
4891 } else if (mappedLength == 0) {
4892 __CFStringChangeSize(string, CFRangeMake(currentIndex, 1), 0, true);
4893 contents = (UniChar *)__CFStrContents(string);
4894 --length;
4895 } else if (mappedLength > 1) {
4896 --mappedLength; // Skip the current char
4897 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength, true);
4898 contents = (UniChar *)__CFStrContents(string);
4899 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4900 length += mappedLength;
4901 currentIndex += mappedLength;
4902 }
4903
4904 if (!((currentChar > 0xFFFF) ? CFUniCharIsMemberOf(currentChar, kCFUniCharCaseIgnorableCharacterSet) : CFUniCharIsMemberOfBitmap(currentChar, caseIgnorableForBMP))) { // We have non-caseignorable here
4905 isLastCased = ((CFUniCharIsMemberOf(currentChar, kCFUniCharUppercaseLetterCharacterSet) || CFUniCharIsMemberOf(currentChar, kCFUniCharLowercaseLetterCharacterSet)) ? true : false);
4906 }
4907 }
4908 }
4909 }
4910
4911
4912 #define MAX_DECOMP_BUF 64
4913
4914 #define HANGUL_SBASE 0xAC00
4915 #define HANGUL_LBASE 0x1100
4916 #define HANGUL_VBASE 0x1161
4917 #define HANGUL_TBASE 0x11A7
4918 #define HANGUL_SCOUNT 11172
4919 #define HANGUL_LCOUNT 19
4920 #define HANGUL_VCOUNT 21
4921 #define HANGUL_TCOUNT 28
4922 #define HANGUL_NCOUNT (HANGUL_VCOUNT * HANGUL_TCOUNT)
4923
4924 CF_INLINE uint32_t __CFGetUTF16Length(const UTF32Char *characters, uint32_t utf32Length) {
4925 const UTF32Char *limit = characters + utf32Length;
4926 uint32_t length = 0;
4927
4928 while (characters < limit) length += (*(characters++) > 0xFFFF ? 2 : 1);
4929
4930 return length;
4931 }
4932
4933 CF_INLINE void __CFFillInUTF16(const UTF32Char *characters, UTF16Char *dst, uint32_t utf32Length) {
4934 const UTF32Char *limit = characters + utf32Length;
4935 UTF32Char currentChar;
4936
4937 while (characters < limit) {
4938 currentChar = *(characters++);
4939 if (currentChar > 0xFFFF) {
4940 currentChar -= 0x10000;
4941 *(dst++) = (UTF16Char)((currentChar >> 10) + 0xD800UL);
4942 *(dst++) = (UTF16Char)((currentChar & 0x3FF) + 0xDC00UL);
4943 } else {
4944 *(dst++) = currentChar;
4945 }
4946 }
4947 }
4948
4949 void CFStringNormalize(CFMutableStringRef string, CFStringNormalizationForm theForm) {
4950 CFIndex currentIndex = 0;
4951 CFIndex length;
4952 bool needToReorder = true;
4953
4954 CF_OBJC_FUNCDISPATCHV(__kCFStringTypeID, void, (NSMutableString *)string, _cfNormalize:theForm);
4955
4956 __CFAssertIsStringAndMutable(string);
4957
4958 length = __CFStrLength(string);
4959
4960 if (__CFStrIsEightBit(string)) {
4961 uint8_t *contents;
4962
4963 if (theForm == kCFStringNormalizationFormC) return; // 8bit form has no decomposition
4964
4965 contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4966
4967 for (;currentIndex < length;currentIndex++) {
4968 if (contents[currentIndex] > 127) {
4969 __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true); // need to do harm way
4970 needToReorder = false;
4971 break;
4972 }
4973 }
4974 }
4975
4976 if (currentIndex < length) {
4977 UTF16Char *limit = (UTF16Char *)__CFStrContents(string) + length;
4978 UTF16Char *contents = (UTF16Char *)__CFStrContents(string) + currentIndex;
4979 UTF32Char buffer[MAX_DECOMP_BUF];
4980 UTF32Char *mappedCharacters = buffer;
4981 CFIndex allocatedLength = MAX_DECOMP_BUF;
4982 CFIndex mappedLength;
4983 CFIndex currentLength;
4984 UTF32Char currentChar;
4985 const uint8_t *decompBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, 0);
4986 const uint8_t *nonBaseBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, 0);
4987 const uint8_t *combiningBMP = (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0);
4988
4989 while (contents < limit) {
4990 if (CFUniCharIsSurrogateHighCharacter(*contents) && (contents + 1 < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) {
4991 currentChar = CFUniCharGetLongCharacterForSurrogatePair(*contents, *(contents + 1));
4992 currentLength = 2;
4993 contents += 2;
4994 } else {
4995 currentChar = *(contents++);
4996 currentLength = 1;
4997 }
4998
4999 mappedLength = 0;
5000
5001 if (CFUniCharIsMemberOfBitmap(currentChar, ((currentChar < 0x10000) ? decompBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (currentChar >> 16)))) && (0 == CFUniCharGetCombiningPropertyForCharacter(currentChar, ((currentChar < 0x10000) ? combiningBMP : (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16)))))) {
5002 if ((theForm & kCFStringNormalizationFormC) == 0 || currentChar < HANGUL_SBASE || currentChar > (HANGUL_SBASE + HANGUL_SCOUNT)) { // We don't have to decompose Hangul Syllables if we're precomposing again
5003 mappedLength = CFUniCharDecomposeCharacter(currentChar, mappedCharacters, MAX_DECOMP_BUF);
5004 }
5005 }
5006
5007 if ((needToReorder || (theForm & kCFStringNormalizationFormC)) && ((contents < limit) || (mappedLength == 0))) {
5008 if (mappedLength > 0) {
5009 if (CFUniCharIsSurrogateHighCharacter(*contents) && (contents + 1 < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) {
5010 currentChar = CFUniCharGetLongCharacterForSurrogatePair(*contents, *(contents + 1));
5011 } else {
5012 currentChar = *contents;
5013 }
5014 }
5015
5016 if (0 != CFUniCharGetCombiningPropertyForCharacter(currentChar, (const uint8_t *)((currentChar < 0x10000) ? combiningBMP : CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16))))) {
5017 uint32_t decompLength;
5018
5019 if (mappedLength == 0) {
5020 contents -= (currentChar & 0xFFFF0000 ? 2 : 1);
5021 if (currentIndex > 0) {
5022 if (CFUniCharIsSurrogateLowCharacter(*(contents - 1)) && (currentIndex > 1) && CFUniCharIsSurrogateHighCharacter(*(contents - 2))) {
5023 *mappedCharacters = CFUniCharGetLongCharacterForSurrogatePair(*(contents - 2), *(contents - 1));
5024 currentIndex -= 2;
5025 currentLength += 2;
5026 } else {
5027 *mappedCharacters = *(contents - 1);
5028 --currentIndex;
5029 ++currentLength;
5030 }
5031 mappedLength = 1;
5032 }
5033 } else {
5034 currentLength += (currentChar & 0xFFFF0000 ? 2 : 1);
5035 }
5036 contents += (currentChar & 0xFFFF0000 ? 2 : 1);
5037
5038 if (CFUniCharIsMemberOfBitmap(currentChar, ((currentChar < 0x10000) ? decompBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (currentChar >> 16))))) { // Vietnamese accent, etc.
5039 decompLength = CFUniCharDecomposeCharacter(currentChar, mappedCharacters + mappedLength, MAX_DECOMP_BUF - mappedLength);
5040 mappedLength += decompLength;
5041 } else {
5042 mappedCharacters[mappedLength++] = currentChar;
5043 }
5044
5045 while (contents < limit) {
5046 if (CFUniCharIsSurrogateHighCharacter(*contents) && (contents + 1 < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) {
5047 currentChar = CFUniCharGetLongCharacterForSurrogatePair(*contents, *(contents + 1));
5048 } else {
5049 currentChar = *contents;
5050 }
5051 if (0 == CFUniCharGetCombiningPropertyForCharacter(currentChar, (const uint8_t *)((currentChar < 0x10000) ? combiningBMP : CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16))))) break;
5052 if (currentChar & 0xFFFF0000) {
5053 contents += 2;
5054 currentLength += 2;
5055 } else {
5056 ++contents;
5057 ++currentLength;
5058 }
5059 if (mappedLength == allocatedLength) {
5060 allocatedLength += MAX_DECOMP_BUF;
5061 if (mappedCharacters == buffer) {
5062 mappedCharacters = (UTF32Char *)CFAllocatorAllocate(kCFAllocatorSystemDefault, allocatedLength * sizeof(UTF32Char), 0);
5063 memmove(mappedCharacters, buffer, MAX_DECOMP_BUF * sizeof(UTF32Char));
5064 } else {
5065 mappedCharacters = (UTF32Char *)CFAllocatorReallocate(kCFAllocatorSystemDefault, mappedCharacters, allocatedLength * sizeof(UTF32Char), 0);
5066 }
5067 }
5068 if (CFUniCharIsMemberOfBitmap(currentChar, ((currentChar < 0x10000) ? decompBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (currentChar >> 16))))) { // Vietnamese accent, etc.
5069 decompLength = CFUniCharDecomposeCharacter(currentChar, mappedCharacters + mappedLength, MAX_DECOMP_BUF - mappedLength);
5070 mappedLength += decompLength;
5071 } else {
5072 mappedCharacters[mappedLength++] = currentChar;
5073 }
5074 }
5075 }
5076 if (needToReorder && mappedLength > 1) CFUniCharPrioritySort(mappedCharacters, mappedLength);
5077 }
5078
5079 if (theForm & kCFStringNormalizationFormKD) {
5080 CFIndex newLength = 0;
5081
5082 if (mappedLength == 0 && CFUniCharIsMemberOf(currentChar, kCFUniCharCompatibilityDecomposableCharacterSet)) {
5083 mappedCharacters[mappedLength++] = currentChar;
5084 }
5085 while (newLength < mappedLength) {
5086 newLength = CFUniCharCompatibilityDecompose(mappedCharacters, mappedLength, allocatedLength);
5087 if (newLength == 0) {
5088 allocatedLength += MAX_DECOMP_BUF;
5089 if (mappedCharacters == buffer) {
5090 mappedCharacters = (UTF32Char *)CFAllocatorAllocate(kCFAllocatorSystemDefault, allocatedLength * sizeof(UTF32Char), 0);
5091 memmove(mappedCharacters, buffer, MAX_DECOMP_BUF * sizeof(UTF32Char));
5092 } else {
5093 mappedCharacters = (UTF32Char *)CFAllocatorReallocate(kCFAllocatorSystemDefault, mappedCharacters, allocatedLength * sizeof(UTF32Char), 0);
5094 }
5095 }
5096 }
5097 mappedLength = newLength;
5098 }
5099
5100 if (theForm & kCFStringNormalizationFormC) {
5101 UTF32Char nextChar;
5102
5103 if (mappedLength > 1) {
5104 CFIndex consumedLength = 1;
5105 UTF32Char *currentBase = mappedCharacters;
5106 uint8_t currentClass, lastClass = 0;
5107 bool didCombine = false;
5108
5109 currentChar = *mappedCharacters;
5110
5111 while (consumedLength < mappedLength) {
5112 nextChar = mappedCharacters[consumedLength];
5113 currentClass = CFUniCharGetCombiningPropertyForCharacter(nextChar, (const uint8_t *)((nextChar < 0x10000) ? combiningBMP : CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (nextChar >> 16))));
5114
5115 if (theForm & kCFStringNormalizationFormKD) {
5116 if ((currentChar >= HANGUL_LBASE) && (currentChar < (HANGUL_LBASE + 0xFF))) {
5117 SInt8 lIndex = currentChar - HANGUL_LBASE;
5118
5119 if ((0 <= lIndex) && (lIndex <= HANGUL_LCOUNT)) {
5120 SInt16 vIndex = nextChar - HANGUL_VBASE;
5121
5122 if ((vIndex >= 0) && (vIndex <= HANGUL_VCOUNT)) {
5123 SInt16 tIndex = 0;
5124 CFIndex usedLength = mappedLength;
5125
5126 mappedCharacters[consumedLength++] = 0xFFFD;
5127
5128 if (consumedLength < mappedLength) {
5129 tIndex = mappedCharacters[consumedLength] - HANGUL_TBASE;
5130 if ((tIndex < 0) || (tIndex > HANGUL_TCOUNT)) {
5131 tIndex = 0;
5132 } else {
5133 mappedCharacters[consumedLength++] = 0xFFFD;
5134 }
5135 }
5136 *currentBase = (lIndex * HANGUL_VCOUNT + vIndex) * HANGUL_TCOUNT + tIndex + HANGUL_SBASE;
5137
5138 while (--usedLength > 0) {
5139 if (mappedCharacters[usedLength] == 0xFFFD) {
5140 --mappedLength;
5141 --consumedLength;
5142 memmove(mappedCharacters + usedLength, mappedCharacters + usedLength + 1, (mappedLength - usedLength) * sizeof(UTF32Char));
5143 }
5144 }
5145 currentBase = mappedCharacters + consumedLength;
5146 currentChar = *currentBase;
5147 ++consumedLength;
5148
5149 continue;
5150 }
5151 }
5152 }
5153 if (!CFUniCharIsMemberOfBitmap(nextChar, ((nextChar < 0x10000) ? nonBaseBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (nextChar >> 16))))) {
5154 *currentBase = currentChar;
5155 currentBase = mappedCharacters + consumedLength;
5156 currentChar = nextChar;
5157 ++consumedLength;
5158 continue;
5159 }
5160 }
5161
5162 if ((lastClass == 0) || (currentClass > lastClass)) {
5163 nextChar = CFUniCharPrecomposeCharacter(currentChar, nextChar);
5164 if (nextChar == 0xFFFD) {
5165 lastClass = currentClass;
5166 } else {
5167 mappedCharacters[consumedLength] = 0xFFFD;
5168 didCombine = true;
5169 currentChar = nextChar;
5170 }
5171 }
5172 ++consumedLength;
5173 }
5174
5175 *currentBase = currentChar;
5176 if (didCombine) {
5177 consumedLength = mappedLength;
5178 while (--consumedLength > 0) {
5179 if (mappedCharacters[consumedLength] == 0xFFFD) {
5180 --mappedLength;
5181 memmove(mappedCharacters + consumedLength, mappedCharacters + consumedLength + 1, (mappedLength - consumedLength) * sizeof(UTF32Char));
5182 }
5183 }
5184 }
5185 } else if ((currentChar >= HANGUL_LBASE) && (currentChar < (HANGUL_LBASE + 0xFF))) { // Hangul Jamo
5186 SInt8 lIndex = currentChar - HANGUL_LBASE;
5187
5188 if ((contents < limit) && (0 <= lIndex) && (lIndex <= HANGUL_LCOUNT)) {
5189 SInt16 vIndex = *contents - HANGUL_VBASE;
5190
5191 if ((vIndex >= 0) && (vIndex <= HANGUL_VCOUNT)) {
5192 SInt16 tIndex = 0;
5193
5194 ++contents; ++currentLength;
5195
5196 if (contents < limit) {
5197 tIndex = *contents - HANGUL_TBASE;
5198 if ((tIndex < 0) || (tIndex > HANGUL_TCOUNT)) {
5199 tIndex = 0;
5200 } else {
5201 ++contents; ++currentLength;
5202 }
5203 }
5204 *mappedCharacters = (lIndex * HANGUL_VCOUNT + vIndex) * HANGUL_TCOUNT + tIndex + HANGUL_SBASE;
5205 mappedLength = 1;
5206 }
5207 }
5208 } else { // collect class 0 non-base characters
5209 while (contents < limit) {
5210 nextChar = *contents;
5211 if (CFUniCharIsSurrogateHighCharacter(nextChar) && ((contents + 1) < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) {
5212 nextChar = CFUniCharGetLongCharacterForSurrogatePair(nextChar, *(contents + 1));
5213 if (!CFUniCharIsMemberOfBitmap(nextChar, (const uint8_t *)CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (nextChar >> 16))) || (0 != CFUniCharGetCombiningPropertyForCharacter(nextChar, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (nextChar >> 16))))) break;
5214 } else {
5215 if (!CFUniCharIsMemberOfBitmap(nextChar, nonBaseBMP) || (0 != CFUniCharGetCombiningPropertyForCharacter(nextChar, combiningBMP))) break;
5216 }
5217 currentChar = CFUniCharPrecomposeCharacter(currentChar, nextChar);
5218 if (0xFFFD == currentChar) break;
5219
5220 if (nextChar < 0x10000) {
5221 ++contents; ++currentLength;
5222 } else {
5223 contents += 2;
5224 currentLength += 2;
5225 }
5226
5227 *mappedCharacters = currentChar;
5228 mappedLength = 1;
5229 }
5230 }
5231 }
5232
5233 if (mappedLength > 0) {
5234 CFIndex utf16Length = __CFGetUTF16Length(mappedCharacters, mappedLength);
5235
5236 if (utf16Length != currentLength) {
5237 __CFStringChangeSize(string, CFRangeMake(currentIndex, currentLength), utf16Length, true);
5238 currentLength = utf16Length;
5239 }
5240 contents = (UTF16Char *)__CFStrContents(string);
5241 limit = contents + __CFStrLength(string);
5242 contents += currentIndex;
5243 __CFFillInUTF16(mappedCharacters, contents, mappedLength);
5244 contents += utf16Length;
5245 }
5246 currentIndex += currentLength;
5247 }
5248
5249 if (mappedCharacters != buffer) CFAllocatorDeallocate(kCFAllocatorSystemDefault, mappedCharacters);
5250 }
5251 }
5252
5253 void CFStringFold(CFMutableStringRef theString, CFStringCompareFlags theFlags, CFLocaleRef locale) {
5254 CFStringInlineBuffer stringBuffer;
5255 CFIndex length = CFStringGetLength(theString);
5256 CFIndex currentIndex = 0;
5257 CFIndex bufferLength = 0;
5258 UTF32Char buffer[kCFStringStackBufferLength];
5259 const uint8_t *cString;
5260 const uint8_t *langCode;
5261 CFStringEncoding eightBitEncoding;
5262 bool caseInsensitive = ((theFlags & kCFCompareCaseInsensitive) ? true : false);
5263 bool isObjc = CF_IS_OBJC(__kCFStringTypeID, theString);
5264 CFLocaleRef theLocale = locale;
5265
5266 if ((theFlags & kCFCompareLocalized) && (NULL == locale)) {
5267 theLocale = CFLocaleCopyCurrent();
5268 }
5269
5270 theFlags &= (kCFCompareCaseInsensitive|kCFCompareDiacriticInsensitive|kCFCompareWidthInsensitive);
5271
5272 if ((0 == theFlags) || (0 == length)) goto bail; // nothing to do
5273
5274 langCode = ((NULL == theLocale) ? NULL : (const uint8_t *)_CFStrGetLanguageIdentifierForLocale(theLocale, true));
5275
5276 eightBitEncoding = __CFStringGetEightBitStringEncoding();
5277 cString = (const uint8_t *)CFStringGetCStringPtr(theString, eightBitEncoding);
5278
5279 if ((NULL != cString) && !caseInsensitive && (kCFStringEncodingASCII == eightBitEncoding)) goto bail; // All ASCII
5280
5281 CFStringInitInlineBuffer(theString, &stringBuffer, CFRangeMake(0, length));
5282
5283 if ((NULL != cString) && (theFlags & (kCFCompareCaseInsensitive|kCFCompareDiacriticInsensitive))) {
5284 const uint8_t *cStringPtr = cString;
5285 const uint8_t *cStringLimit = cString + length;
5286 uint8_t *cStringContents = (isObjc ? NULL : (uint8_t *)__CFStrContents(theString) + __CFStrSkipAnyLengthByte(theString));
5287
5288 while (cStringPtr < cStringLimit) {
5289 if ((*cStringPtr < 0x80) && (NULL == langCode)) {
5290 if (caseInsensitive && (*cStringPtr >= 'A') && (*cStringPtr <= 'Z')) {
5291 if (NULL == cStringContents) {
5292 break;
5293 } else {
5294 cStringContents[cStringPtr - cString] += ('a' - 'A');
5295 }
5296 }
5297 } else {
5298 if ((bufferLength = __CFStringFoldCharacterClusterAtIndex((UTF32Char)__CFCharToUniCharTable[*cStringPtr], &stringBuffer, cStringPtr - cString, theFlags, langCode, buffer, kCFStringStackBufferLength, NULL)) > 0) {
5299 if ((*buffer > 0x7F) || (bufferLength > 1) || (NULL == cStringContents)) break;
5300 cStringContents[cStringPtr - cString] = *buffer;
5301 }
5302 }
5303 ++cStringPtr;
5304 }
5305
5306 currentIndex = cStringPtr - cString;
5307 }
5308
5309 if (currentIndex < length) {
5310 UTF16Char *contents;
5311
5312 if (isObjc) {
5313 CFMutableStringRef cfString;
5314 CFRange range = CFRangeMake(currentIndex, length - currentIndex);
5315
5316 contents = (UTF16Char *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(UTF16Char) * range.length, 0);
5317
5318 CFStringGetCharacters(theString, range, contents);
5319
5320 cfString = CFStringCreateMutableWithExternalCharactersNoCopy(kCFAllocatorSystemDefault, contents, range.length, range.length, NULL);
5321
5322 CFStringFold(cfString, theFlags, theLocale);
5323
5324 CFStringReplace(theString, range, cfString);
5325
5326 CFRelease(cfString);
5327 } else {
5328 const UTF32Char *characters;
5329 const UTF32Char *charactersLimit;
5330 UTF32Char character;
5331 CFIndex consumedLength;
5332
5333 contents = NULL;
5334
5335 if (bufferLength > 0) {
5336 __CFStringChangeSize(theString, CFRangeMake(currentIndex + 1, 0), bufferLength - 1, true);
5337 length = __CFStrLength(theString);
5338 CFStringInitInlineBuffer(theString, &stringBuffer, CFRangeMake(0, length));
5339
5340 contents = (UTF16Char *)__CFStrContents(theString) + currentIndex;
5341 characters = buffer;
5342 charactersLimit = characters + bufferLength;
5343 while (characters < charactersLimit) *(contents++) = (UTF16Char)*(characters++);
5344 ++currentIndex;
5345 }
5346
5347 while (currentIndex < length) {
5348 character = __CFStringGetCharacterFromInlineBufferQuick(&stringBuffer, currentIndex);
5349
5350 consumedLength = 0;
5351
5352 if ((NULL == langCode) && (character < 0x80) && (0 == (theFlags & kCFCompareDiacriticInsensitive))) {
5353 if (caseInsensitive && (character >= 'A') && (character <= 'Z')) {
5354 consumedLength = 1;
5355 bufferLength = 1;
5356 *buffer = character + ('a' - 'A');
5357 }
5358 } else {
5359 if (CFUniCharIsSurrogateHighCharacter(character) && ((currentIndex + 1) < length)) {
5360 UTF16Char lowSurrogate = __CFStringGetCharacterFromInlineBufferQuick(&stringBuffer, currentIndex + 1);
5361 if (CFUniCharIsSurrogateLowCharacter(lowSurrogate)) character = CFUniCharGetLongCharacterForSurrogatePair(character, lowSurrogate);
5362 }
5363
5364 bufferLength = __CFStringFoldCharacterClusterAtIndex(character, &stringBuffer, currentIndex, theFlags, langCode, buffer, kCFStringStackBufferLength, &consumedLength);
5365 }
5366
5367 if (consumedLength > 0) {
5368 CFIndex utf16Length = bufferLength;
5369
5370 characters = buffer;
5371 charactersLimit = characters + bufferLength;
5372
5373 while (characters < charactersLimit) if (*(characters++) > 0xFFFF) ++utf16Length; // Extend bufferLength to the UTF-16 length
5374
5375 if ((utf16Length != consumedLength) || __CFStrIsEightBit(theString)) {
5376 CFRange range;
5377 CFIndex insertLength;
5378
5379 if (consumedLength < utf16Length) { // Need to expand
5380 range = CFRangeMake(currentIndex + consumedLength, 0);
5381 insertLength = utf16Length - consumedLength;
5382 } else {
5383 range = CFRangeMake(currentIndex + utf16Length, consumedLength - utf16Length);
5384 insertLength = 0;
5385 }
5386 __CFStringChangeSize(theString, range, insertLength, true);
5387 length = __CFStrLength(theString);
5388 CFStringInitInlineBuffer(theString, &stringBuffer, CFRangeMake(0, length));
5389 }
5390
5391 (void)CFUniCharFromUTF32(buffer, bufferLength, (UTF16Char *)__CFStrContents(theString) + currentIndex, true, __CF_BIG_ENDIAN__);
5392
5393 currentIndex += utf16Length;
5394 } else {
5395 ++currentIndex;
5396 }
5397 }
5398 }
5399 }
5400
5401 bail:
5402 if (NULL == locale && theLocale) {
5403 CFRelease(theLocale);
5404 }
5405 }
5406
5407 enum {
5408 kCFStringFormatZeroFlag = (1 << 0), // if not, padding is space char
5409 kCFStringFormatMinusFlag = (1 << 1), // if not, no flag implied
5410 kCFStringFormatPlusFlag = (1 << 2), // if not, no flag implied, overrides space
5411 kCFStringFormatSpaceFlag = (1 << 3), // if not, no flag implied
5412 kCFStringFormatExternalSpecFlag = (1 << 4), // using config dict
5413 kCFStringFormatLocalizable = (1 << 5) // explicitly mark the specs we can localize
5414 };
5415
5416 typedef struct {
5417 int16_t size;
5418 int16_t type;
5419 SInt32 loc;
5420 SInt32 len;
5421 SInt32 widthArg;
5422 SInt32 precArg;
5423 uint32_t flags;
5424 int8_t mainArgNum;
5425 int8_t precArgNum;
5426 int8_t widthArgNum;
5427 int8_t configDictIndex;
5428 int8_t numericFormatStyle; // Only set for localizable numeric quantities
5429 } CFFormatSpec;
5430
5431 typedef struct {
5432 int16_t type;
5433 int16_t size;
5434 union {
5435 int64_t int64Value;
5436 double doubleValue;
5437 #if LONG_DOUBLE_SUPPORT
5438 long double longDoubleValue;
5439 #endif
5440 void *pointerValue;
5441 } value;
5442 } CFPrintValue;
5443
5444 enum {
5445 CFFormatDefaultSize = 0,
5446 CFFormatSize1 = 1,
5447 CFFormatSize2 = 2,
5448 CFFormatSize4 = 3,
5449 CFFormatSize8 = 4,
5450 CFFormatSize16 = 5,
5451 #if __LP64__
5452 CFFormatSizeLong = CFFormatSize8,
5453 CFFormatSizePointer = CFFormatSize8
5454 #else
5455 CFFormatSizeLong = CFFormatSize4,
5456 CFFormatSizePointer = CFFormatSize4
5457 #endif
5458 };
5459
5460 enum {
5461 CFFormatStyleDecimal = (1 << 0),
5462 CFFormatStyleScientific = (1 << 1),
5463 CFFormatStyleDecimalOrScientific = CFFormatStyleDecimal|CFFormatStyleScientific,
5464 CFFormatStyleUnsigned = (1 << 2)
5465 };
5466
5467 enum {
5468 CFFormatLiteralType = 32,
5469 CFFormatLongType = 33,
5470 CFFormatDoubleType = 34,
5471 CFFormatPointerType = 35,
5472 CFFormatObjectType = 36, /* handled specially */ /* ??? not used anymore, can be removed? */
5473 CFFormatCFType = 37, /* handled specially */
5474 CFFormatUnicharsType = 38, /* handled specially */
5475 CFFormatCharsType = 39, /* handled specially */
5476 CFFormatPascalCharsType = 40, /* handled specially */
5477 CFFormatSingleUnicharType = 41, /* handled specially */
5478 CFFormatDummyPointerType = 42 /* special case for %n */
5479 };
5480
5481 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS
5482 /* Only come in here if spec->type is CFFormatLongType or CFFormatDoubleType. Pass in 0 for width or precision if not specified. Returns false if couldn't do the format (with the assumption the caller falls back to unlocalized).
5483 */
5484 static Boolean __CFStringFormatLocalizedNumber(CFMutableStringRef output, CFLocaleRef locale, const CFPrintValue *values, const CFFormatSpec *spec, SInt32 width, SInt32 precision, Boolean hasPrecision) {
5485 static CFLock_t formatterLock = CFLockInit;
5486 // These formatters are recached if the locale argument is different
5487 static CFNumberFormatterRef decimalFormatter = NULL;
5488 static CFNumberFormatterRef scientificFormatter = NULL;
5489 static CFNumberFormatterRef gFormatter = NULL; // for %g
5490 static SInt32 groupingSize = 0;
5491 static SInt32 secondaryGroupingSize = 0;
5492
5493 // !!! This code should be removed before shipping
5494 static char disableLocalizedFormatting = -1;
5495 if (disableLocalizedFormatting == -1) disableLocalizedFormatting = (getenv("CFStringDisableLocalizedNumberFormatting") != NULL) ? 1 : 0;
5496 if (disableLocalizedFormatting) return false;
5497
5498 CFNumberFormatterRef formatter;
5499
5500 __CFLock(&formatterLock); // We use the formatter from one thread at one time; if this proves to be a bottleneck we need to get fancier
5501
5502 switch (spec->numericFormatStyle) {
5503 case CFFormatStyleUnsigned:
5504 case CFFormatStyleDecimal:
5505 if (!decimalFormatter || !CFEqual(CFNumberFormatterGetLocale(decimalFormatter), locale)) { // cache or recache if the locale is different
5506 if (decimalFormatter) CFRelease(decimalFormatter);
5507 decimalFormatter = CFNumberFormatterCreate(NULL, locale, kCFNumberFormatterDecimalStyle); // since this is shared, remember to reset all its properties!
5508 }
5509 formatter = decimalFormatter;
5510 break;
5511 case CFFormatStyleScientific:
5512 if (!scientificFormatter || !CFEqual(CFNumberFormatterGetLocale(scientificFormatter), locale)) { // cache or recache if the locale is different
5513 if (scientificFormatter) CFRelease(scientificFormatter);
5514 scientificFormatter = CFNumberFormatterCreate(NULL, locale, kCFNumberFormatterScientificStyle);
5515 CFStringRef pattern = CFSTR("#E+00"); // the default pattern does not have the sign if the exponent is positive and it is single digit
5516 CFNumberFormatterSetFormat(scientificFormatter, pattern);
5517 CFNumberFormatterSetProperty(scientificFormatter, kCFNumberFormatterUseSignificantDigitsKey, kCFBooleanTrue);
5518 }
5519 formatter = scientificFormatter;
5520 break;
5521 case CFFormatStyleDecimalOrScientific:
5522 if (!gFormatter || !CFEqual(CFNumberFormatterGetLocale(gFormatter), locale)) { // cache or recache if the locale is different
5523 if (gFormatter) CFRelease(gFormatter);
5524 gFormatter = CFNumberFormatterCreate(NULL, locale, kCFNumberFormatterDecimalStyle);
5525 // when we update the locale in gFormatter, we also need to update the two grouping sizes
5526 CFNumberRef num = (CFNumberRef) CFNumberFormatterCopyProperty(gFormatter, kCFNumberFormatterGroupingSizeKey);
5527 CFNumberGetValue(num, kCFNumberSInt32Type, &groupingSize);
5528 CFRelease(num);
5529 num = (CFNumberRef) CFNumberFormatterCopyProperty(gFormatter, kCFNumberFormatterSecondaryGroupingSizeKey);
5530 CFNumberGetValue(num, kCFNumberSInt32Type, &secondaryGroupingSize);
5531 CFRelease(num);
5532 }
5533 formatter = gFormatter;
5534 break;
5535 }
5536
5537 SInt32 prec = hasPrecision ? precision : ((spec->type == CFFormatLongType) ? 0 : 6); // default precision of printf is 6
5538
5539 // pattern must be set before setting width and padding
5540 // otherwise, the pattern will take over those settings
5541 if (spec->numericFormatStyle == CFFormatStyleDecimalOrScientific) {
5542 if (prec == 0) prec = 1; // at least one sig fig
5543 CFMutableStringRef pattern = CFStringCreateMutable(NULL, 0);
5544 // use significant digits pattern
5545 CFStringAppendCString(pattern, "@", kCFStringEncodingASCII);
5546 CFStringPad(pattern, CFSTR("#"), prec, 0);
5547 double targetValue = values[spec->mainArgNum].value.doubleValue;
5548 #if LONG_DOUBLE_SUPPORT
5549 if (CFFormatSize16 == values[spec->mainArgNum].size) {
5550 targetValue = values[spec->mainArgNum].value.longDoubleValue; // losing precision
5551 }
5552 #endif
5553 double max = pow(10.0, (double)prec); // if the value requires more digits than the number of sig figs, we need to use scientific format
5554 double min = 0.0001; // if the value is less than 10E-4, scientific format is the shorter form
5555 if (((targetValue > 0 && (targetValue > max || targetValue < min)) || (targetValue < 0 && (targetValue < -max || targetValue > -min)))){
5556 CFStringAppendCString(pattern, "E+00", kCFStringEncodingASCII);
5557 } else if (prec > groupingSize && groupingSize != 0) {
5558 CFStringInsert(pattern, prec-groupingSize, CFSTR(",")); // if we are not using scientific format, we need to set the pattern to use grouping separator
5559 if (secondaryGroupingSize != 0 && prec > (groupingSize + secondaryGroupingSize)) CFStringInsert(pattern, prec-groupingSize-secondaryGroupingSize, CFSTR(","));
5560 }
5561 CFNumberFormatterSetFormat(formatter, pattern);
5562 CFRelease(pattern);
5563 }
5564
5565 // clear the padding, we will add it later if we need it
5566 const SInt32 z = 0;
5567 CFNumberRef zero = CFNumberCreate(NULL, kCFNumberSInt32Type, &z);
5568 CFNumberFormatterSetProperty(formatter, kCFNumberFormatterFormatWidthKey, zero);
5569
5570 CFNumberRef tmp = CFNumberCreate(NULL, kCFNumberSInt32Type, &prec);
5571 CFNumberFormatterSetProperty(formatter, kCFNumberFormatterMaxFractionDigitsKey, tmp);
5572 if (spec->type == CFFormatDoubleType) {
5573 CFNumberFormatterSetProperty(formatter, kCFNumberFormatterMinFractionDigitsKey, tmp);
5574 } else {
5575 CFNumberFormatterSetProperty(formatter, kCFNumberFormatterMinFractionDigitsKey, zero);
5576 }
5577 CFRelease(tmp);
5578 CFRelease(zero);
5579
5580 Boolean isNegative = false;
5581 switch (values[spec->mainArgNum].type) {
5582 case CFFormatLongType:
5583 if (values[spec->mainArgNum].value.int64Value < 0) isNegative = true;
5584 break;
5585 case CFFormatDoubleType:
5586 #if LONG_DOUBLE_SUPPORT
5587 if ((CFFormatSize16 == values[spec->mainArgNum].size) && (values[spec->mainArgNum].value.longDoubleValue < 0)) isNegative = true;
5588 else
5589 #endif
5590 if (values[spec->mainArgNum].value.doubleValue < 0) isNegative = true;
5591 break;
5592 }
5593
5594 CFStringRef pattern = CFNumberFormatterGetFormat(formatter);
5595 if ((spec->flags & kCFStringFormatPlusFlag) && !isNegative) {
5596 if (CFStringGetCharacterAtIndex(pattern, 0) != '+') {
5597 CFMutableStringRef newPattern = CFStringCreateMutableCopy(NULL, 0, CFSTR("+"));
5598 CFStringAppend(newPattern, pattern);
5599 CFNumberFormatterSetFormat(formatter, newPattern);
5600 CFRelease(newPattern);
5601 }
5602 } else {
5603 if (CFStringGetCharacterAtIndex(pattern, 0) == '+') {
5604 CFStringRef newPattern = CFStringCreateWithSubstring(NULL, pattern, CFRangeMake(1, CFStringGetLength(pattern)-1));
5605 CFNumberFormatterSetFormat(formatter, newPattern);
5606 CFRelease(newPattern);
5607 }
5608 }
5609
5610 // width == 0 seems to be CFNumberFormatter's default setting
5611 if (hasPrecision && spec->type == CFFormatLongType) { // if we have precision and %d or %u, we pad 0 according to precision first
5612 tmp = CFNumberCreate(NULL, kCFNumberSInt32Type, &prec);
5613 } else {
5614 tmp = CFNumberCreate(NULL, kCFNumberSInt32Type, &width);
5615 }
5616 CFNumberFormatterSetProperty(formatter, kCFNumberFormatterFormatWidthKey, tmp);
5617 CFRelease(tmp);
5618
5619 // ??? use the right zero here for Arabic
5620 Boolean padZero = spec->flags & kCFStringFormatZeroFlag;
5621 if (hasPrecision && spec->type == CFFormatLongType) { // if we have precision and %d or %u, we pad 0
5622 padZero = true;
5623 }
5624 CFNumberFormatterSetProperty(formatter, kCFNumberFormatterPaddingCharacterKey, padZero ? CFSTR("0") : CFSTR(" "));
5625
5626
5627 // Left (default) or right padding
5628 SInt32 p = (spec->flags & kCFStringFormatMinusFlag) ? kCFNumberFormatterPadAfterSuffix : (padZero ? kCFNumberFormatterPadAfterPrefix : kCFNumberFormatterPadBeforePrefix);
5629 if (hasPrecision && spec->type == CFFormatLongType) {
5630 SInt32 tmpP = kCFNumberFormatterPadAfterPrefix;
5631 tmp = CFNumberCreate(NULL, kCFNumberSInt32Type, &tmpP);
5632 } else {
5633 tmp = CFNumberCreate(NULL, kCFNumberSInt32Type, &p);
5634 }
5635 CFNumberFormatterSetProperty(formatter, kCFNumberFormatterPaddingPositionKey, tmp);
5636 CFRelease(tmp);
5637
5638 if (spec->numericFormatStyle == CFFormatStyleScientific) {
5639 prec++; // for %e, precision+1 is the number of sig fig
5640 tmp = CFNumberCreate(NULL, kCFNumberSInt32Type, &prec);
5641 CFNumberFormatterSetProperty(formatter, kCFNumberFormatterMinSignificantDigitsKey, tmp);
5642 CFNumberFormatterSetProperty(formatter, kCFNumberFormatterMaxSignificantDigitsKey, tmp);
5643 CFRelease(tmp);
5644 }
5645
5646 CFStringRef localizedNumberString = NULL;
5647 switch (spec->type) {
5648 case CFFormatLongType:
5649 // ??? Need to do unsigned
5650 localizedNumberString = CFNumberFormatterCreateStringWithValue(NULL, formatter, kCFNumberSInt64Type, &(values[spec->mainArgNum].value.int64Value));
5651 break;
5652 case CFFormatDoubleType: {
5653 #if LONG_DOUBLE_SUPPORT
5654 if (CFFormatSize16 == values[spec->mainArgNum].size) {
5655 double doubleValue = values[spec->mainArgNum].value.longDoubleValue; // losing precision
5656 localizedNumberString = CFNumberFormatterCreateStringWithValue(NULL, formatter, kCFNumberDoubleType, &doubleValue);
5657 } else
5658 #endif
5659 {
5660 localizedNumberString = CFNumberFormatterCreateStringWithValue(NULL, formatter, kCFNumberDoubleType, &(values[spec->mainArgNum].value.doubleValue));
5661 }
5662 break;
5663 }
5664 }
5665 __CFUnlock(&formatterLock);
5666
5667 if (localizedNumberString) {
5668 // we need to pad space if we have %d or %u
5669 if (spec->type == CFFormatLongType && hasPrecision && CFStringGetLength(localizedNumberString) < width) {
5670 CFMutableStringRef finalStr = NULL;
5671 if (p == kCFNumberFormatterPadAfterSuffix) {
5672 finalStr = CFStringCreateMutableCopy(NULL, 0, localizedNumberString);
5673 CFStringPad(finalStr, CFSTR(" "), width, 0);
5674 } else {
5675 finalStr = CFStringCreateMutable(NULL, 0);
5676 CFStringPad(finalStr, CFSTR(" "), width - CFStringGetLength(localizedNumberString), 0);
5677 CFStringAppend(finalStr, localizedNumberString);
5678 }
5679 CFRelease(localizedNumberString);
5680 localizedNumberString = finalStr;
5681 }
5682 CFStringAppend(output, localizedNumberString);
5683 CFRelease(localizedNumberString);
5684 return true;
5685 }
5686 return false;
5687 }
5688 #endif
5689
5690 CF_INLINE void __CFParseFormatSpec(const UniChar *uformat, const uint8_t *cformat, SInt32 *fmtIdx, SInt32 fmtLen, CFFormatSpec *spec, CFStringRef *configKeyPointer) {
5691 Boolean seenDot = false;
5692 Boolean seenSharp = false;
5693 CFIndex keyIndex = kCFNotFound;
5694
5695 for (;;) {
5696 UniChar ch;
5697 if (fmtLen <= *fmtIdx) return; /* no type */
5698 if (cformat) ch = (UniChar)cformat[(*fmtIdx)++]; else ch = uformat[(*fmtIdx)++];
5699
5700 if (keyIndex >= 0) {
5701 if ((ch < '0') || ((ch > '9') && (ch < 'A')) || ((ch > 'Z') && (ch < 'a') && (ch != '_')) || (ch > 'z')) {
5702 if (ch == '@') { // found the key
5703 CFIndex length = (*fmtIdx) - 1 - keyIndex;
5704
5705 spec->flags |= kCFStringFormatExternalSpecFlag;
5706 spec->type = CFFormatCFType;
5707 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5708
5709 if ((NULL != configKeyPointer) && (length > 0)) {
5710 if (cformat) {
5711 *configKeyPointer = CFStringCreateWithBytes(NULL, cformat + keyIndex, length, __CFStringGetEightBitStringEncoding(), FALSE);
5712 } else {
5713 *configKeyPointer = CFStringCreateWithCharactersNoCopy(NULL, uformat + keyIndex, length, kCFAllocatorNull);
5714 }
5715 }
5716 return;
5717 }
5718 keyIndex = kCFNotFound;
5719 }
5720 continue;
5721 }
5722
5723 reswtch:switch (ch) {
5724 case '#': // ignored for now
5725 seenSharp = true;
5726 break;
5727 case 0x20:
5728 if (!(spec->flags & kCFStringFormatPlusFlag)) spec->flags |= kCFStringFormatSpaceFlag;
5729 break;
5730 case '-':
5731 spec->flags |= kCFStringFormatMinusFlag;
5732 spec->flags &= ~kCFStringFormatZeroFlag; // remove zero flag
5733 break;
5734 case '+':
5735 spec->flags |= kCFStringFormatPlusFlag;
5736 spec->flags &= ~kCFStringFormatSpaceFlag; // remove space flag
5737 break;
5738 case '0':
5739 if (seenDot) { // after we see '.' and then we see '0', it is 0 precision. We should not see '.' after '0' if '0' is the zero padding flag
5740 spec->precArg = 0;
5741 break;
5742 }
5743 if (!(spec->flags & kCFStringFormatMinusFlag)) spec->flags |= kCFStringFormatZeroFlag;
5744 break;
5745 case 'h':
5746 if (*fmtIdx < fmtLen) {
5747 // fetch next character, don't increment fmtIdx
5748 if (cformat) ch = (UniChar)cformat[(*fmtIdx)]; else ch = uformat[(*fmtIdx)];
5749 if ('h' == ch) { // 'hh' for char, like 'c'
5750 (*fmtIdx)++;
5751 spec->size = CFFormatSize1;
5752 break;
5753 }
5754 }
5755 spec->size = CFFormatSize2;
5756 break;
5757 case 'l':
5758 if (*fmtIdx < fmtLen) {
5759 // fetch next character, don't increment fmtIdx
5760 if (cformat) ch = (UniChar)cformat[(*fmtIdx)]; else ch = uformat[(*fmtIdx)];
5761 if ('l' == ch) { // 'll' for long long, like 'q'
5762 (*fmtIdx)++;
5763 spec->size = CFFormatSize8;
5764 break;
5765 }
5766 }
5767 spec->size = CFFormatSizeLong; // 4 or 8 depending on LP64
5768 break;
5769 #if LONG_DOUBLE_SUPPORT
5770 case 'L':
5771 spec->size = CFFormatSize16;
5772 break;
5773 #endif
5774 case 'q':
5775 spec->size = CFFormatSize8;
5776 break;
5777 case 't': case 'z':
5778 spec->size = CFFormatSizeLong; // 4 or 8 depending on LP64
5779 break;
5780 case 'j':
5781 spec->size = CFFormatSize8;
5782 break;
5783 case 'c':
5784 spec->type = CFFormatLongType;
5785 spec->size = CFFormatSize1;
5786 return;
5787 case 'D': case 'd': case 'i': case 'U': case 'u':
5788 // we can localize all but octal or hex
5789 if (_CFExecutableLinkedOnOrAfter(CFSystemVersionMountainLion)) spec->flags |= kCFStringFormatLocalizable;
5790 spec->numericFormatStyle = CFFormatStyleDecimal;
5791 if (ch == 'u' || ch == 'U') spec->numericFormatStyle = CFFormatStyleUnsigned;
5792 // fall thru
5793 case 'O': case 'o': case 'x': case 'X':
5794 spec->type = CFFormatLongType;
5795 // Seems like if spec->size == 0, we should spec->size = CFFormatSize4. However, 0 is handled correctly.
5796 return;
5797 case 'f': case 'F': case 'g': case 'G': case 'e': case 'E': {
5798 // we can localize all but hex float output
5799 if (_CFExecutableLinkedOnOrAfter(CFSystemVersionMountainLion)) spec->flags |= kCFStringFormatLocalizable;
5800 char lch = (ch >= 'A' && ch <= 'Z') ? (ch - 'A' + 'a') : ch;
5801 spec->numericFormatStyle = ((lch == 'e' || lch == 'g') ? CFFormatStyleScientific : 0) | ((lch == 'f' || lch == 'g') ? CFFormatStyleDecimal : 0);
5802 if (seenDot && spec->precArg == -1 && spec->precArgNum == -1) { // for the cases that we have '.' but no precision followed, not even '*'
5803 spec->precArg = 0;
5804 }
5805 }
5806 // fall thru
5807 case 'a': case 'A':
5808 spec->type = CFFormatDoubleType;
5809 if (spec->size != CFFormatSize16) spec->size = CFFormatSize8;
5810 return;
5811 case 'n': /* %n is not handled correctly; for Leopard or newer apps, we disable it further */
5812 spec->type = 1 ? CFFormatDummyPointerType : CFFormatPointerType;
5813 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5814 return;
5815 case 'p':
5816 spec->type = CFFormatPointerType;
5817 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5818 return;
5819 case 's':
5820 spec->type = CFFormatCharsType;
5821 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5822 return;
5823 case 'S':
5824 spec->type = CFFormatUnicharsType;
5825 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5826 return;
5827 case 'C':
5828 spec->type = CFFormatSingleUnicharType;
5829 spec->size = CFFormatSize2;
5830 return;
5831 case 'P':
5832 spec->type = CFFormatPascalCharsType;
5833 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5834 return;
5835 case '@':
5836 if (seenSharp) {
5837 seenSharp = false;
5838 keyIndex = *fmtIdx;
5839 break;
5840 } else {
5841 spec->type = CFFormatCFType;
5842 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5843 return;
5844 }
5845 case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': {
5846 int64_t number = 0;
5847 do {
5848 number = 10 * number + (ch - '0');
5849 if (cformat) ch = (UniChar)cformat[(*fmtIdx)++]; else ch = uformat[(*fmtIdx)++];
5850 } while ((UInt32)(ch - '0') <= 9);
5851 if ('$' == ch) {
5852 if (-2 == spec->precArgNum) {
5853 spec->precArgNum = (int8_t)number - 1; // Arg numbers start from 1
5854 } else if (-2 == spec->widthArgNum) {
5855 spec->widthArgNum = (int8_t)number - 1; // Arg numbers start from 1
5856 } else {
5857 spec->mainArgNum = (int8_t)number - 1; // Arg numbers start from 1
5858 }
5859 break;
5860 } else if (seenDot) { /* else it's either precision or width */
5861 spec->precArg = (SInt32)number;
5862 } else {
5863 spec->widthArg = (SInt32)number;
5864 }
5865 goto reswtch;
5866 }
5867 case '*':
5868 spec->widthArgNum = -2;
5869 break;
5870 case '.':
5871 seenDot = true;
5872 if (cformat) ch = (UniChar)cformat[(*fmtIdx)++]; else ch = uformat[(*fmtIdx)++];
5873 if ('*' == ch) {
5874 spec->precArgNum = -2;
5875 break;
5876 }
5877 goto reswtch;
5878 default:
5879 spec->type = CFFormatLiteralType;
5880 return;
5881 }
5882 }
5883 }
5884
5885 /* ??? %s depends on handling of encodings by __CFStringAppendBytes
5886 */
5887 void CFStringAppendFormatAndArguments(CFMutableStringRef outputString, CFDictionaryRef formatOptions, CFStringRef formatString, va_list args) {
5888 __CFStringAppendFormatCore(outputString, NULL, NULL, formatOptions, NULL, formatString, 0, NULL, 0, args);
5889 }
5890
5891 // Length of the buffer to call sprintf() with
5892 #define BUFFER_LEN 512
5893
5894 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI
5895 #define SNPRINTF(TYPE, WHAT) { \
5896 TYPE value = (TYPE) WHAT; \
5897 if (-1 != specs[curSpec].widthArgNum) { \
5898 if (-1 != specs[curSpec].precArgNum) { \
5899 snprintf_l(buffer, BUFFER_LEN-1, NULL, formatBuffer, width, precision, value); \
5900 } else { \
5901 snprintf_l(buffer, BUFFER_LEN-1, NULL, formatBuffer, width, value); \
5902 } \
5903 } else { \
5904 if (-1 != specs[curSpec].precArgNum) { \
5905 snprintf_l(buffer, BUFFER_LEN-1, NULL, formatBuffer, precision, value); \
5906 } else { \
5907 snprintf_l(buffer, BUFFER_LEN-1, NULL, formatBuffer, value); \
5908 } \
5909 }}
5910 #else
5911 #define SNPRINTF(TYPE, WHAT) { \
5912 TYPE value = (TYPE) WHAT; \
5913 if (-1 != specs[curSpec].widthArgNum) { \
5914 if (-1 != specs[curSpec].precArgNum) { \
5915 sprintf(buffer, formatBuffer, width, precision, value); \
5916 } else { \
5917 sprintf(buffer, formatBuffer, width, value); \
5918 } \
5919 } else { \
5920 if (-1 != specs[curSpec].precArgNum) { \
5921 sprintf(buffer, formatBuffer, precision, value); \
5922 } else { \
5923 sprintf(buffer, formatBuffer, value); \
5924 } \
5925 }}
5926 #endif
5927
5928 void _CFStringAppendFormatAndArgumentsAux2(CFMutableStringRef outputString, CFStringRef (*copyDescFunc)(void *, const void *), CFStringRef (*contextDescFunc)(void *, const void *, const void *, bool, bool *), CFDictionaryRef formatOptions, CFStringRef formatString, va_list args) {
5929 __CFStringAppendFormatCore(outputString, copyDescFunc, contextDescFunc, formatOptions, NULL, formatString, 0, NULL, 0, args);
5930 }
5931
5932 void _CFStringAppendFormatAndArgumentsAux(CFMutableStringRef outputString, CFStringRef (*copyDescFunc)(void *, const void *), CFDictionaryRef formatOptions, CFStringRef formatString, va_list args) {
5933 _CFStringAppendFormatAndArgumentsAux2(outputString, copyDescFunc, NULL, formatOptions, formatString, args);
5934 }
5935
5936 static void __CFStringAppendFormatCore(CFMutableStringRef outputString, CFStringRef (*copyDescFunc)(void *, const void *), CFStringRef (*contextDescFunc)(void *, const void *, const void *, bool, bool *), CFDictionaryRef formatOptions, CFDictionaryRef stringsDictConfig, CFStringRef formatString, CFIndex initialArgPosition, const void *origValues, CFIndex originalValuesSize, va_list args) {
5937 SInt32 numSpecs, sizeSpecs, sizeArgNum, formatIdx, curSpec, argNum;
5938 CFIndex formatLen;
5939 #define FORMAT_BUFFER_LEN 400
5940 const uint8_t *cformat = NULL;
5941 const UniChar *uformat = NULL;
5942 UniChar *formatChars = NULL;
5943 UniChar localFormatBuffer[FORMAT_BUFFER_LEN];
5944
5945 #define VPRINTF_BUFFER_LEN 61
5946 CFFormatSpec localSpecsBuffer[VPRINTF_BUFFER_LEN];
5947 CFFormatSpec *specs;
5948 CFPrintValue localValuesBuffer[VPRINTF_BUFFER_LEN];
5949 CFPrintValue *values;
5950 const CFPrintValue *originalValues = (const CFPrintValue *)origValues;
5951 CFDictionaryRef localConfigs[VPRINTF_BUFFER_LEN];
5952 CFDictionaryRef *configs;
5953 CFIndex numConfigs;
5954 CFAllocatorRef tmpAlloc = NULL;
5955 intmax_t dummyLocation; // A place for %n to do its thing in; should be the widest possible int value
5956
5957 numSpecs = 0;
5958 sizeSpecs = 0;
5959 sizeArgNum = 0;
5960 numConfigs = 0;
5961 specs = NULL;
5962 values = NULL;
5963 configs = NULL;
5964
5965
5966 formatLen = CFStringGetLength(formatString);
5967 if (!CF_IS_OBJC(__kCFStringTypeID, formatString)) {
5968 __CFAssertIsString(formatString);
5969 if (!__CFStrIsUnicode(formatString)) {
5970 cformat = (const uint8_t *)__CFStrContents(formatString);
5971 if (cformat) cformat += __CFStrSkipAnyLengthByte(formatString);
5972 } else {
5973 uformat = (const UniChar *)__CFStrContents(formatString);
5974 }
5975 }
5976 if (!cformat && !uformat) {
5977 formatChars = (formatLen > FORMAT_BUFFER_LEN) ? (UniChar *)CFAllocatorAllocate(tmpAlloc = __CFGetDefaultAllocator(), formatLen * sizeof(UniChar), 0) : localFormatBuffer;
5978 if (formatChars != localFormatBuffer && __CFOASafe) __CFSetLastAllocationEventName(formatChars, "CFString (temp)");
5979 CFStringGetCharacters(formatString, CFRangeMake(0, formatLen), formatChars);
5980 uformat = formatChars;
5981 }
5982
5983 /* Compute an upper bound for the number of format specifications */
5984 if (cformat) {
5985 for (formatIdx = 0; formatIdx < formatLen; formatIdx++) if ('%' == cformat[formatIdx]) sizeSpecs++;
5986 } else {
5987 for (formatIdx = 0; formatIdx < formatLen; formatIdx++) if ('%' == uformat[formatIdx]) sizeSpecs++;
5988 }
5989 tmpAlloc = __CFGetDefaultAllocator();
5990 specs = ((2 * sizeSpecs + 1) > VPRINTF_BUFFER_LEN) ? (CFFormatSpec *)CFAllocatorAllocate(tmpAlloc, (2 * sizeSpecs + 1) * sizeof(CFFormatSpec), 0) : localSpecsBuffer;
5991 if (specs != localSpecsBuffer && __CFOASafe) __CFSetLastAllocationEventName(specs, "CFString (temp)");
5992
5993 configs = ((sizeSpecs < VPRINTF_BUFFER_LEN) ? localConfigs : (CFDictionaryRef *)CFAllocatorAllocate(tmpAlloc, sizeof(CFStringRef) * sizeSpecs, 0));
5994
5995 /* Collect format specification information from the format string */
5996 for (curSpec = 0, formatIdx = 0; formatIdx < formatLen; curSpec++) {
5997 SInt32 newFmtIdx;
5998 specs[curSpec].loc = formatIdx;
5999 specs[curSpec].len = 0;
6000 specs[curSpec].size = 0;
6001 specs[curSpec].type = 0;
6002 specs[curSpec].flags = 0;
6003 specs[curSpec].widthArg = -1;
6004 specs[curSpec].precArg = -1;
6005 specs[curSpec].mainArgNum = -1;
6006 specs[curSpec].precArgNum = -1;
6007 specs[curSpec].widthArgNum = -1;
6008 specs[curSpec].configDictIndex = -1;
6009 if (cformat) {
6010 for (newFmtIdx = formatIdx; newFmtIdx < formatLen && '%' != cformat[newFmtIdx]; newFmtIdx++);
6011 } else {
6012 for (newFmtIdx = formatIdx; newFmtIdx < formatLen && '%' != uformat[newFmtIdx]; newFmtIdx++);
6013 }
6014 if (newFmtIdx != formatIdx) { /* Literal chunk */
6015 specs[curSpec].type = CFFormatLiteralType;
6016 specs[curSpec].len = newFmtIdx - formatIdx;
6017 } else {
6018 CFStringRef configKey = NULL;
6019 newFmtIdx++; /* Skip % */
6020 __CFParseFormatSpec(uformat, cformat, &newFmtIdx, formatLen, &(specs[curSpec]), &configKey);
6021 if (CFFormatLiteralType == specs[curSpec].type) {
6022 specs[curSpec].loc = formatIdx + 1;
6023 specs[curSpec].len = 1;
6024 } else {
6025 specs[curSpec].len = newFmtIdx - formatIdx;
6026 }
6027 }
6028 formatIdx = newFmtIdx;
6029
6030 // fprintf(stderr, "specs[%d] = {\n size = %d,\n type = %d,\n loc = %d,\n len = %d,\n mainArgNum = %d,\n precArgNum = %d,\n widthArgNum = %d\n}\n", curSpec, specs[curSpec].size, specs[curSpec].type, specs[curSpec].loc, specs[curSpec].len, specs[curSpec].mainArgNum, specs[curSpec].precArgNum, specs[curSpec].widthArgNum);
6031
6032 }
6033 numSpecs = curSpec;
6034
6035 // Max of three args per spec, reasoning thus: 1 width, 1 prec, 1 value
6036 sizeArgNum = ((NULL == originalValues) ? (3 * sizeSpecs + 1) : originalValuesSize);
6037
6038 values = (sizeArgNum > VPRINTF_BUFFER_LEN) ? (CFPrintValue *)CFAllocatorAllocate(tmpAlloc, sizeArgNum * sizeof(CFPrintValue), 0) : localValuesBuffer;
6039 if (values != localValuesBuffer && __CFOASafe) __CFSetLastAllocationEventName(values, "CFString (temp)");
6040 memset(values, 0, sizeArgNum * sizeof(CFPrintValue));
6041
6042 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
6043 // va_copy is a C99 extension. No support on Windows
6044 va_list copiedArgs;
6045 if (numConfigs > 0) va_copy(copiedArgs, args); // we need to preserve the original state for passing down
6046 #endif
6047
6048 /* Compute values array */
6049 argNum = initialArgPosition;
6050 for (curSpec = 0; curSpec < numSpecs; curSpec++) {
6051 SInt32 newMaxArgNum;
6052 if (0 == specs[curSpec].type) continue;
6053 if (CFFormatLiteralType == specs[curSpec].type) continue;
6054 newMaxArgNum = sizeArgNum;
6055 if (newMaxArgNum < specs[curSpec].mainArgNum) {
6056 newMaxArgNum = specs[curSpec].mainArgNum;
6057 }
6058 if (newMaxArgNum < specs[curSpec].precArgNum) {
6059 newMaxArgNum = specs[curSpec].precArgNum;
6060 }
6061 if (newMaxArgNum < specs[curSpec].widthArgNum) {
6062 newMaxArgNum = specs[curSpec].widthArgNum;
6063 }
6064 if (sizeArgNum < newMaxArgNum) {
6065 if (specs != localSpecsBuffer) CFAllocatorDeallocate(tmpAlloc, specs);
6066 if (values != localValuesBuffer) CFAllocatorDeallocate(tmpAlloc, values);
6067 if (formatChars && (formatChars != localFormatBuffer)) CFAllocatorDeallocate(tmpAlloc, formatChars);
6068 return; // more args than we expected!
6069 }
6070 /* It is actually incorrect to reorder some specs and not all; we just do some random garbage here */
6071 if (-2 == specs[curSpec].widthArgNum) {
6072 specs[curSpec].widthArgNum = argNum++;
6073 }
6074 if (-2 == specs[curSpec].precArgNum) {
6075 specs[curSpec].precArgNum = argNum++;
6076 }
6077 if (-1 == specs[curSpec].mainArgNum) {
6078 specs[curSpec].mainArgNum = argNum++;
6079 }
6080
6081 values[specs[curSpec].mainArgNum].size = specs[curSpec].size;
6082 values[specs[curSpec].mainArgNum].type = specs[curSpec].type;
6083
6084
6085 if (-1 != specs[curSpec].widthArgNum) {
6086 values[specs[curSpec].widthArgNum].size = 0;
6087 values[specs[curSpec].widthArgNum].type = CFFormatLongType;
6088 }
6089 if (-1 != specs[curSpec].precArgNum) {
6090 values[specs[curSpec].precArgNum].size = 0;
6091 values[specs[curSpec].precArgNum].type = CFFormatLongType;
6092 }
6093 }
6094
6095 /* Collect the arguments in correct type from vararg list */
6096 for (argNum = 0; argNum < sizeArgNum; argNum++) {
6097 if ((NULL != originalValues) && (0 == values[argNum].type)) values[argNum] = originalValues[argNum];
6098 switch (values[argNum].type) {
6099 case 0:
6100 case CFFormatLiteralType:
6101 break;
6102 case CFFormatLongType:
6103 case CFFormatSingleUnicharType:
6104 if (CFFormatSize1 == values[argNum].size) {
6105 values[argNum].value.int64Value = (int64_t)(int8_t)va_arg(args, int);
6106 } else if (CFFormatSize2 == values[argNum].size) {
6107 values[argNum].value.int64Value = (int64_t)(int16_t)va_arg(args, int);
6108 } else if (CFFormatSize4 == values[argNum].size) {
6109 values[argNum].value.int64Value = (int64_t)va_arg(args, int32_t);
6110 } else if (CFFormatSize8 == values[argNum].size) {
6111 values[argNum].value.int64Value = (int64_t)va_arg(args, int64_t);
6112 } else {
6113 values[argNum].value.int64Value = (int64_t)va_arg(args, int);
6114 }
6115 break;
6116 case CFFormatDoubleType:
6117 #if LONG_DOUBLE_SUPPORT
6118 if (CFFormatSize16 == values[argNum].size) {
6119 values[argNum].value.longDoubleValue = va_arg(args, long double);
6120 } else
6121 #endif
6122 {
6123 values[argNum].value.doubleValue = va_arg(args, double);
6124 }
6125 break;
6126 case CFFormatPointerType:
6127 case CFFormatObjectType:
6128 case CFFormatCFType:
6129 case CFFormatUnicharsType:
6130 case CFFormatCharsType:
6131 case CFFormatPascalCharsType:
6132 values[argNum].value.pointerValue = va_arg(args, void *);
6133 break;
6134 case CFFormatDummyPointerType:
6135 (void)va_arg(args, void *); // Skip the provided argument
6136 values[argNum].value.pointerValue = &dummyLocation;
6137 break;
6138 }
6139 }
6140 va_end(args);
6141
6142 /* Format the pieces together */
6143
6144 if (NULL == originalValues) {
6145 originalValues = values;
6146 originalValuesSize = sizeArgNum;
6147 }
6148
6149 SInt32 numSpecsContext = 0;
6150 CFFormatSpec *specsContext = (CFFormatSpec *)calloc(numSpecs, sizeof(CFFormatSpec));
6151 static const CFStringRef replacement = CFSTR("%@NSCONTEXT");
6152
6153 for (curSpec = 0; curSpec < numSpecs; curSpec++) {
6154 SInt32 width = 0, precision = 0;
6155 UniChar *up, ch;
6156 Boolean hasWidth = false, hasPrecision = false;
6157
6158 // widthArgNum and widthArg are never set at the same time; same for precArg*
6159 if (-1 != specs[curSpec].widthArgNum) {
6160 width = (SInt32)values[specs[curSpec].widthArgNum].value.int64Value;
6161 hasWidth = true;
6162 }
6163 if (-1 != specs[curSpec].precArgNum) {
6164 precision = (SInt32)values[specs[curSpec].precArgNum].value.int64Value;
6165 hasPrecision = true;
6166 }
6167 if (-1 != specs[curSpec].widthArg) {
6168 width = specs[curSpec].widthArg;
6169 hasWidth = true;
6170 }
6171 if (-1 != specs[curSpec].precArg) {
6172 precision = specs[curSpec].precArg;
6173 hasPrecision = true;
6174 }
6175
6176 switch (specs[curSpec].type) {
6177 case CFFormatLongType:
6178 case CFFormatDoubleType:
6179 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS
6180 if (formatOptions && (specs[curSpec].flags & kCFStringFormatLocalizable) && (CFGetTypeID(formatOptions) == CFLocaleGetTypeID())) { // We have a locale, so we do localized formatting
6181 if (__CFStringFormatLocalizedNumber(outputString, (CFLocaleRef)formatOptions, values, &specs[curSpec], width, precision, hasPrecision)) break;
6182 }
6183 /* Otherwise fall-thru to the next case! */
6184 #endif
6185 case CFFormatPointerType: {
6186 char formatBuffer[128];
6187 #if defined(__GNUC__)
6188 char buffer[BUFFER_LEN + width + precision];
6189 #else
6190 char stackBuffer[BUFFER_LEN];
6191 char *dynamicBuffer = NULL;
6192 char *buffer = stackBuffer;
6193 if (256+width+precision > BUFFER_LEN) {
6194 dynamicBuffer = (char *)CFAllocatorAllocate(kCFAllocatorSystemDefault, 256+width+precision, 0);
6195 buffer = dynamicBuffer;
6196 }
6197 #endif
6198 SInt32 cidx, idx, loc;
6199 Boolean appended = false;
6200 loc = specs[curSpec].loc;
6201 // In preparation to call snprintf(), copy the format string out
6202 if (cformat) {
6203 for (idx = 0, cidx = 0; cidx < specs[curSpec].len; idx++, cidx++) {
6204 if ('$' == cformat[loc + cidx]) {
6205 for (idx--; '0' <= formatBuffer[idx] && formatBuffer[idx] <= '9'; idx--);
6206 } else {
6207 formatBuffer[idx] = cformat[loc + cidx];
6208 }
6209 }
6210 } else {
6211 for (idx = 0, cidx = 0; cidx < specs[curSpec].len; idx++, cidx++) {
6212 if ('$' == uformat[loc + cidx]) {
6213 for (idx--; '0' <= formatBuffer[idx] && formatBuffer[idx] <= '9'; idx--);
6214 } else {
6215 formatBuffer[idx] = (int8_t)uformat[loc + cidx];
6216 }
6217 }
6218 }
6219 formatBuffer[idx] = '\0';
6220 // Should modify format buffer here if necessary; for example, to translate %qd to
6221 // the equivalent, on architectures which do not have %q.
6222 buffer[sizeof(buffer) - 1] = '\0';
6223 switch (specs[curSpec].type) {
6224 case CFFormatLongType:
6225 if (CFFormatSize8 == specs[curSpec].size) {
6226 SNPRINTF(int64_t, values[specs[curSpec].mainArgNum].value.int64Value)
6227 } else {
6228 SNPRINTF(SInt32, values[specs[curSpec].mainArgNum].value.int64Value)
6229 }
6230 break;
6231 case CFFormatPointerType:
6232 case CFFormatDummyPointerType:
6233 SNPRINTF(void *, values[specs[curSpec].mainArgNum].value.pointerValue)
6234 break;
6235
6236 case CFFormatDoubleType:
6237 #if LONG_DOUBLE_SUPPORT
6238 if (CFFormatSize16 == specs[curSpec].size) {
6239 SNPRINTF(long double, values[specs[curSpec].mainArgNum].value.longDoubleValue)
6240 } else
6241 #endif
6242 {
6243 SNPRINTF(double, values[specs[curSpec].mainArgNum].value.doubleValue)
6244 }
6245 // See if we need to localize the decimal point
6246 if (formatOptions) { // We have localization info
6247 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
6248 CFStringRef decimalSeparator = (CFGetTypeID(formatOptions) == CFLocaleGetTypeID()) ? (CFStringRef)CFLocaleGetValue((CFLocaleRef)formatOptions, kCFLocaleDecimalSeparatorKey) : (CFStringRef)CFDictionaryGetValue(formatOptions, CFSTR("NSDecimalSeparator"));
6249 #else
6250 CFStringRef decimalSeparator = CFSTR(".");
6251 #endif
6252 if (decimalSeparator != NULL) { // We have a decimal separator in there
6253 CFIndex decimalPointLoc = 0;
6254 while (buffer[decimalPointLoc] != 0 && buffer[decimalPointLoc] != '.') decimalPointLoc++;
6255 if (buffer[decimalPointLoc] == '.') { // And we have a decimal point in the formatted string
6256 buffer[decimalPointLoc] = 0;
6257 CFStringAppendCString(outputString, (const char *)buffer, __CFStringGetEightBitStringEncoding());
6258 CFStringAppend(outputString, decimalSeparator);
6259 CFStringAppendCString(outputString, (const char *)(buffer + decimalPointLoc + 1), __CFStringGetEightBitStringEncoding());
6260 appended = true;
6261 }
6262 }
6263 }
6264 break;
6265 }
6266 if (!appended) CFStringAppendCString(outputString, (const char *)buffer, __CFStringGetEightBitStringEncoding());
6267 #if !defined(__GNUC__)
6268 if (dynamicBuffer) {
6269 CFAllocatorDeallocate(kCFAllocatorSystemDefault, dynamicBuffer);
6270 }
6271 #endif
6272 }
6273 break;
6274 case CFFormatLiteralType:
6275 if (cformat) {
6276 __CFStringAppendBytes(outputString, (const char *)(cformat+specs[curSpec].loc), specs[curSpec].len, __CFStringGetEightBitStringEncoding());
6277 } else {
6278 CFStringAppendCharacters(outputString, uformat+specs[curSpec].loc, specs[curSpec].len);
6279 }
6280 break;
6281 case CFFormatPascalCharsType:
6282 case CFFormatCharsType:
6283 if (values[specs[curSpec].mainArgNum].value.pointerValue == NULL) {
6284 CFStringAppendCString(outputString, "(null)", kCFStringEncodingASCII);
6285 } else {
6286 int len;
6287 const char *str = (const char *)values[specs[curSpec].mainArgNum].value.pointerValue;
6288 if (specs[curSpec].type == CFFormatPascalCharsType) { // Pascal string case
6289 len = ((unsigned char *)str)[0];
6290 str++;
6291 if (hasPrecision && precision < len) len = precision;
6292 } else { // C-string case
6293 if (!hasPrecision) { // No precision, so rely on the terminating null character
6294 len = strlen(str);
6295 } else { // Don't blindly call strlen() if there is a precision; the string might not have a terminating null (3131988)
6296 const char *terminatingNull = (const char *)memchr(str, 0, precision); // Basically strlen() on only the first precision characters of str
6297 if (terminatingNull) { // There was a null in the first precision characters
6298 len = terminatingNull - str;
6299 } else {
6300 len = precision;
6301 }
6302 }
6303 }
6304 // Since the spec says the behavior of the ' ', '0', '#', and '+' flags is undefined for
6305 // '%s', and since we have ignored them in the past, the behavior is hereby cast in stone
6306 // to ignore those flags (and, say, never pad with '0' instead of space).
6307 if (specs[curSpec].flags & kCFStringFormatMinusFlag) {
6308 __CFStringAppendBytes(outputString, str, len, __CFStringGetSystemEncoding());
6309 if (hasWidth && width > len) {
6310 int w = width - len; // We need this many spaces; do it ten at a time
6311 do {__CFStringAppendBytes(outputString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
6312 }
6313 } else {
6314 if (hasWidth && width > len) {
6315 int w = width - len; // We need this many spaces; do it ten at a time
6316 do {__CFStringAppendBytes(outputString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
6317 }
6318 __CFStringAppendBytes(outputString, str, len, __CFStringGetSystemEncoding());
6319 }
6320 }
6321 break;
6322 case CFFormatSingleUnicharType:
6323 ch = (UniChar)values[specs[curSpec].mainArgNum].value.int64Value;
6324 CFStringAppendCharacters(outputString, &ch, 1);
6325 break;
6326 case CFFormatUnicharsType:
6327 //??? need to handle width, precision, and padding arguments
6328 up = (UniChar *)values[specs[curSpec].mainArgNum].value.pointerValue;
6329 if (NULL == up) {
6330 CFStringAppendCString(outputString, "(null)", kCFStringEncodingASCII);
6331 } else {
6332 int len;
6333 for (len = 0; 0 != up[len]; len++);
6334 // Since the spec says the behavior of the ' ', '0', '#', and '+' flags is undefined for
6335 // '%s', and since we have ignored them in the past, the behavior is hereby cast in stone
6336 // to ignore those flags (and, say, never pad with '0' instead of space).
6337 if (hasPrecision && precision < len) len = precision;
6338 if (specs[curSpec].flags & kCFStringFormatMinusFlag) {
6339 CFStringAppendCharacters(outputString, up, len);
6340 if (hasWidth && width > len) {
6341 int w = width - len; // We need this many spaces; do it ten at a time
6342 do {__CFStringAppendBytes(outputString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
6343 }
6344 } else {
6345 if (hasWidth && width > len) {
6346 int w = width - len; // We need this many spaces; do it ten at a time
6347 do {__CFStringAppendBytes(outputString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
6348 }
6349 CFStringAppendCharacters(outputString, up, len);
6350 }
6351 }
6352 break;
6353 case CFFormatCFType:
6354 case CFFormatObjectType:
6355 if (specs[curSpec].configDictIndex != -1) { // config dict
6356 CFTypeRef object = NULL;
6357 switch (values[specs[curSpec].mainArgNum].type) {
6358 case CFFormatLongType:
6359 object = CFNumberCreate(tmpAlloc, kCFNumberSInt64Type, &(values[specs[curSpec].mainArgNum].value.int64Value));
6360 break;
6361
6362 case CFFormatDoubleType:
6363 #if LONG_DOUBLE_SUPPORT
6364 if (CFFormatSize16 == values[specs[curSpec].mainArgNum].size) {
6365 double aValue = values[specs[curSpec].mainArgNum].value.longDoubleValue; // losing precision
6366
6367 object = CFNumberCreate(tmpAlloc, kCFNumberDoubleType, &aValue);
6368 } else
6369 #endif
6370 {
6371 object = CFNumberCreate(tmpAlloc, kCFNumberDoubleType, &(values[specs[curSpec].mainArgNum].value.doubleValue));
6372 }
6373 break;
6374
6375 case CFFormatPointerType:
6376 object = CFNumberCreate(tmpAlloc, kCFNumberCFIndexType, &(values[specs[curSpec].mainArgNum].value.pointerValue));
6377 break;
6378
6379 case CFFormatPascalCharsType:
6380 case CFFormatCharsType:
6381 if (NULL != values[specs[curSpec].mainArgNum].value.pointerValue) {
6382 CFMutableStringRef aString = CFStringCreateMutable(tmpAlloc, 0);
6383 int len;
6384 const char *str = (const char *)values[specs[curSpec].mainArgNum].value.pointerValue;
6385 if (specs[curSpec].type == CFFormatPascalCharsType) { // Pascal string case
6386 len = ((unsigned char *)str)[0];
6387 str++;
6388 if (hasPrecision && precision < len) len = precision;
6389 } else { // C-string case
6390 if (!hasPrecision) { // No precision, so rely on the terminating null character
6391 len = strlen(str);
6392 } else { // Don't blindly call strlen() if there is a precision; the string might not have a terminating null (3131988)
6393 const char *terminatingNull = (const char *)memchr(str, 0, precision); // Basically strlen() on only the first precision characters of str
6394 if (terminatingNull) { // There was a null in the first precision characters
6395 len = terminatingNull - str;
6396 } else {
6397 len = precision;
6398 }
6399 }
6400 }
6401 // Since the spec says the behavior of the ' ', '0', '#', and '+' flags is undefined for
6402 // '%s', and since we have ignored them in the past, the behavior is hereby cast in stone
6403 // to ignore those flags (and, say, never pad with '0' instead of space).
6404 if (specs[curSpec].flags & kCFStringFormatMinusFlag) {
6405 __CFStringAppendBytes(aString, str, len, __CFStringGetSystemEncoding());
6406 if (hasWidth && width > len) {
6407 int w = width - len; // We need this many spaces; do it ten at a time
6408 do {__CFStringAppendBytes(aString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
6409 }
6410 } else {
6411 if (hasWidth && width > len) {
6412 int w = width - len; // We need this many spaces; do it ten at a time
6413 do {__CFStringAppendBytes(aString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
6414 }
6415 __CFStringAppendBytes(aString, str, len, __CFStringGetSystemEncoding());
6416 }
6417
6418 object = aString;
6419 }
6420 break;
6421
6422 case CFFormatSingleUnicharType:
6423 ch = (UniChar)values[specs[curSpec].mainArgNum].value.int64Value;
6424 object = CFStringCreateWithCharactersNoCopy(tmpAlloc, &ch, 1, kCFAllocatorNull);
6425 break;
6426
6427 case CFFormatUnicharsType:
6428 //??? need to handle width, precision, and padding arguments
6429 up = (UniChar *)values[specs[curSpec].mainArgNum].value.pointerValue;
6430 if (NULL != up) {
6431 CFMutableStringRef aString = CFStringCreateMutable(tmpAlloc, 0);
6432 int len;
6433 for (len = 0; 0 != up[len]; len++);
6434 // Since the spec says the behavior of the ' ', '0', '#', and '+' flags is undefined for
6435 // '%s', and since we have ignored them in the past, the behavior is hereby cast in stone
6436 // to ignore those flags (and, say, never pad with '0' instead of space).
6437 if (hasPrecision && precision < len) len = precision;
6438 if (specs[curSpec].flags & kCFStringFormatMinusFlag) {
6439 CFStringAppendCharacters(aString, up, len);
6440 if (hasWidth && width > len) {
6441 int w = width - len; // We need this many spaces; do it ten at a time
6442 do {__CFStringAppendBytes(aString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
6443 }
6444 } else {
6445 if (hasWidth && width > len) {
6446 int w = width - len; // We need this many spaces; do it ten at a time
6447 do {__CFStringAppendBytes(aString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
6448 }
6449 CFStringAppendCharacters(aString, up, len);
6450 }
6451 object = aString;
6452 }
6453 break;
6454
6455 case CFFormatCFType:
6456 case CFFormatObjectType:
6457 if (NULL != values[specs[curSpec].mainArgNum].value.pointerValue) object = CFRetain(values[specs[curSpec].mainArgNum].value.pointerValue);
6458 break;
6459 }
6460
6461 if (NULL != object) CFRelease(object);
6462
6463 } else if (NULL != values[specs[curSpec].mainArgNum].value.pointerValue) {
6464 CFStringRef str = NULL;
6465 if (contextDescFunc) {
6466 bool found = NO;
6467 str = contextDescFunc(values[specs[curSpec].mainArgNum].value.pointerValue, formatString, replacement, NO, &found);
6468 if (found) {
6469 str = CFRetain(replacement);
6470 specsContext[numSpecsContext] = specs[curSpec];
6471 numSpecsContext++;
6472 }
6473 }
6474 if (!str) {
6475 if (copyDescFunc) {
6476 str = copyDescFunc(values[specs[curSpec].mainArgNum].value.pointerValue, formatOptions);
6477 } else {
6478 str = __CFCopyFormattingDescription(values[specs[curSpec].mainArgNum].value.pointerValue, formatOptions);
6479 if (NULL == str) {
6480 str = CFCopyDescription(values[specs[curSpec].mainArgNum].value.pointerValue);
6481 }
6482 }
6483 }
6484 if (str) {
6485 CFStringAppend(outputString, str);
6486 CFRelease(str);
6487 } else {
6488 CFStringAppendCString(outputString, "(null description)", kCFStringEncodingASCII);
6489 }
6490 } else {
6491 CFStringAppendCString(outputString, "(null)", kCFStringEncodingASCII);
6492 }
6493 break;
6494 }
6495 }
6496
6497 for (SInt32 i = 0; i < numSpecsContext; i++) {
6498 CFRange range = CFStringFind(outputString, replacement, 0);
6499 CFStringRef str = contextDescFunc(values[specsContext[i].mainArgNum].value.pointerValue, outputString, replacement, true, NULL);
6500 if (str) {
6501 CFStringReplace(outputString, range, str);
6502 CFRelease(str);
6503 }
6504 }
6505
6506 free(specsContext);
6507
6508 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
6509 // va_copy is a C99 extension. No support on Windows
6510 if (numConfigs > 0) va_end(copiedArgs);
6511 #endif
6512 if (specs != localSpecsBuffer) CFAllocatorDeallocate(tmpAlloc, specs);
6513 if (values != localValuesBuffer) CFAllocatorDeallocate(tmpAlloc, values);
6514 if (formatChars && (formatChars != localFormatBuffer)) CFAllocatorDeallocate(tmpAlloc, formatChars);
6515 if (configs != localConfigs) CFAllocatorDeallocate(tmpAlloc, configs);
6516 }
6517
6518 #undef SNPRINTF
6519
6520 void CFShowStr(CFStringRef str) {
6521 CFAllocatorRef alloc;
6522
6523 if (!str) {
6524 fprintf(stdout, "(null)\n");
6525 return;
6526 }
6527
6528 if (CF_IS_OBJC(__kCFStringTypeID, str)) {
6529 fprintf(stdout, "This is an NSString, not CFString\n");
6530 return;
6531 }
6532
6533 alloc = CFGetAllocator(str);
6534
6535 fprintf(stdout, "\nLength %d\nIsEightBit %d\n", (int)__CFStrLength(str), __CFStrIsEightBit(str));
6536 fprintf(stdout, "HasLengthByte %d\nHasNullByte %d\nInlineContents %d\n",
6537 __CFStrHasLengthByte(str), __CFStrHasNullByte(str), __CFStrIsInline(str));
6538
6539 fprintf(stdout, "Allocator ");
6540 if (alloc != kCFAllocatorSystemDefault) {
6541 fprintf(stdout, "%p\n", (void *)alloc);
6542 } else {
6543 fprintf(stdout, "SystemDefault\n");
6544 }
6545 fprintf(stdout, "Mutable %d\n", __CFStrIsMutable(str));
6546 if (!__CFStrIsMutable(str) && __CFStrHasContentsDeallocator(str)) {
6547 if (__CFStrContentsDeallocator(str)) fprintf(stdout, "ContentsDeallocatorFunc %p\n", (void *)__CFStrContentsDeallocator(str));
6548 else fprintf(stdout, "ContentsDeallocatorFunc None\n");
6549 } else if (__CFStrIsMutable(str) && __CFStrHasContentsAllocator(str)) {
6550 fprintf(stdout, "ExternalContentsAllocator %p\n", (void *)__CFStrContentsAllocator((CFMutableStringRef)str));
6551 }
6552
6553 if (__CFStrIsMutable(str)) {
6554 fprintf(stdout, "CurrentCapacity %d\n%sCapacity %d\n", (int)__CFStrCapacity(str), __CFStrIsFixed(str) ? "Fixed" : "Desired", (int)__CFStrDesiredCapacity(str));
6555 }
6556 fprintf(stdout, "Contents %p\n", (void *)__CFStrContents(str));
6557 }
6558
6559
6560
6561 #undef HANGUL_SBASE
6562 #undef HANGUL_LBASE
6563 #undef HANGUL_VBASE
6564 #undef HANGUL_TBASE
6565 #undef HANGUL_SCOUNT
6566 #undef HANGUL_LCOUNT
6567 #undef HANGUL_VCOUNT
6568 #undef HANGUL_TCOUNT
6569 #undef HANGUL_NCOUNT
6570