]> git.saurik.com Git - apple/cf.git/blob - CFString.c
CF-635.19.tar.gz
[apple/cf.git] / CFString.c
1 /*
2 * Copyright (c) 2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /* CFString.c
25 Copyright (c) 1998-2011, Apple Inc. All rights reserved.
26 Responsibility: Ali Ozer
27
28 !!! For performance reasons, it's important that all functions marked CF_INLINE in this file are inlined.
29 */
30
31 #include <CoreFoundation/CFBase.h>
32 #include <CoreFoundation/CFString.h>
33 #include <CoreFoundation/CFDictionary.h>
34 #include <CoreFoundation/CFStringEncodingConverterExt.h>
35 #include <CoreFoundation/CFUniChar.h>
36 #include <CoreFoundation/CFUnicodeDecomposition.h>
37 #include <CoreFoundation/CFUnicodePrecomposition.h>
38 #include <CoreFoundation/CFPriv.h>
39 #include <CoreFoundation/CFNumber.h>
40 #include "CFInternal.h"
41 #include "CFLocaleInternal.h"
42 #include <stdarg.h>
43 #include <stdio.h>
44 #include <string.h>
45 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
46 #include <unistd.h>
47 #endif
48
49 #if defined(__GNUC__)
50 #define LONG_DOUBLE_SUPPORT 1
51 #else
52 #define LONG_DOUBLE_SUPPORT 0
53 #endif
54
55
56
57 #define USE_STRING_ROM 0
58
59
60 #ifndef INSTRUMENT_SHARED_STRINGS
61 #define INSTRUMENT_SHARED_STRINGS 0
62 #endif
63
64 __private_extern__ const CFStringRef __kCFLocaleCollatorID;
65
66 #if INSTRUMENT_SHARED_STRINGS
67 #include <sys/stat.h> /* for umask() */
68
69 static void __CFRecordStringAllocationEvent(const char *encoding, const char *bytes, CFIndex byteCount) {
70 static CFSpinLock_t lock = CFSpinLockInit;
71
72 if (memchr(bytes, '\n', byteCount)) return; //never record string allocation events for strings with newlines, because those confuse our parser and because they'll never go into the ROM
73
74 __CFSpinLock(&lock);
75 static int fd;
76 if (! fd) {
77 extern char **_NSGetProgname(void);
78 const char *name = *_NSGetProgname();
79 if (! name) name = "UNKNOWN";
80 umask(0);
81 char path[1024];
82 snprintf(path, sizeof(path), "/tmp/CFSharedStringInstrumentation_%s_%d.txt", name, getpid());
83 fd = open(path, O_WRONLY | O_APPEND | O_CREAT, 0666);
84 if (fd <= 0) {
85 int error = errno;
86 const char *errString = strerror(error);
87 fprintf(stderr, "open() failed with error %d (%s)\n", error, errString);
88 }
89 }
90 if (fd > 0) {
91 char *buffer = NULL;
92 char formatString[256];
93 snprintf(formatString, sizeof(formatString), "%%-8d\t%%-16s\t%%.%lds\n", byteCount);
94 int resultCount = asprintf(&buffer, formatString, getpid(), encoding, bytes);
95 if (buffer && resultCount > 0) write(fd, buffer, resultCount);
96 else puts("Couldn't record allocation event");
97 free(buffer);
98 }
99 __CFSpinUnlock(&lock);
100 }
101 #endif //INSTRUMENT_SHARED_STRINGS
102
103
104
105 typedef Boolean (*UNI_CHAR_FUNC)(UInt32 flags, UInt8 ch, UniChar *unicodeChar);
106
107 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
108 extern size_t malloc_good_size(size_t size);
109 #endif
110 extern void __CFStrConvertBytesToUnicode(const uint8_t *bytes, UniChar *buffer, CFIndex numChars);
111
112 static void __CFStringAppendFormatCore(CFMutableStringRef outputString, CFStringRef (*copyDescFunc)(void *, const void *), CFDictionaryRef formatOptions, CFStringRef formatString, CFIndex initialArgPosition, const void *origValues, CFIndex originalValuesSize, va_list args);
113
114 #if defined(DEBUG)
115
116 // We put this into C & Pascal strings if we can't convert
117 #define CONVERSIONFAILURESTR "CFString conversion failed"
118
119 // We set this to true when purging the constant string table, so CFStringDeallocate doesn't assert
120 static Boolean __CFConstantStringTableBeingFreed = false;
121
122 #endif
123
124
125
126 // This section is for CFString compatibility and other behaviors...
127
128 static CFOptionFlags _CFStringCompatibilityMask = 0;
129
130 void _CFStringSetCompatibility(CFOptionFlags mask) {
131 _CFStringCompatibilityMask |= mask;
132 }
133
134 CF_INLINE Boolean __CFStringGetCompatibility(CFOptionFlags mask) {
135 return (_CFStringCompatibilityMask & mask) == mask;
136 }
137
138
139
140 // Two constant strings used by CFString; these are initialized in CFStringInitialize
141 CONST_STRING_DECL(kCFEmptyString, "")
142
143 // This is separate for C++
144 struct __notInlineMutable {
145 void *buffer;
146 CFIndex length;
147 CFIndex capacity; // Capacity in bytes
148 unsigned int hasGap:1; // Currently unused
149 unsigned int isFixedCapacity:1;
150 unsigned int isExternalMutable:1;
151 unsigned int capacityProvidedExternally:1;
152 #if __LP64__
153 unsigned long desiredCapacity:60;
154 #else
155 unsigned long desiredCapacity:28;
156 #endif
157 CFAllocatorRef contentsAllocator; // Optional
158 }; // The only mutable variant for CFString
159
160
161 /* !!! Never do sizeof(CFString); the union is here just to make it easier to access some fields.
162 */
163 struct __CFString {
164 CFRuntimeBase base;
165 union { // In many cases the allocated structs are smaller than these
166 struct __inline1 {
167 CFIndex length;
168 } inline1; // Bytes follow the length
169 struct __notInlineImmutable1 {
170 void *buffer; // Note that the buffer is in the same place for all non-inline variants of CFString
171 CFIndex length;
172 CFAllocatorRef contentsDeallocator; // Optional; just the dealloc func is used
173 } notInlineImmutable1; // This is the usual not-inline immutable CFString
174 struct __notInlineImmutable2 {
175 void *buffer;
176 CFAllocatorRef contentsDeallocator; // Optional; just the dealloc func is used
177 } notInlineImmutable2; // This is the not-inline immutable CFString when length is stored with the contents (first byte)
178 struct __notInlineMutable notInlineMutable;
179 } variants;
180 };
181
182 /*
183 I = is immutable
184 E = not inline contents
185 U = is Unicode
186 N = has NULL byte
187 L = has length byte
188 D = explicit deallocator for contents (for mutable objects, allocator)
189 C = length field is CFIndex (rather than UInt32); only meaningful for 64-bit, really
190 if needed this bit (valuable real-estate) can be given up for another bit elsewhere, since this info is needed just for 64-bit
191
192 Also need (only for mutable)
193 F = is fixed
194 G = has gap
195 Cap, DesCap = capacity
196
197 B7 B6 B5 B4 B3 B2 B1 B0
198 U N L C I
199
200 B6 B5
201 0 0 inline contents
202 0 1 E (freed with default allocator)
203 1 0 E (not freed)
204 1 1 E D
205
206 !!! Note: Constant CFStrings use the bit patterns:
207 C8 (11001000 = default allocator, not inline, not freed contents; 8-bit; has NULL byte; doesn't have length; is immutable)
208 D0 (11010000 = default allocator, not inline, not freed contents; Unicode; is immutable)
209 The bit usages should not be modified in a way that would effect these bit patterns.
210 */
211
212 enum {
213 __kCFFreeContentsWhenDoneMask = 0x020,
214 __kCFFreeContentsWhenDone = 0x020,
215 __kCFContentsMask = 0x060,
216 __kCFHasInlineContents = 0x000,
217 __kCFNotInlineContentsNoFree = 0x040, // Don't free
218 __kCFNotInlineContentsDefaultFree = 0x020, // Use allocator's free function
219 __kCFNotInlineContentsCustomFree = 0x060, // Use a specially provided free function
220 __kCFHasContentsAllocatorMask = 0x060,
221 __kCFHasContentsAllocator = 0x060, // (For mutable strings) use a specially provided allocator
222 __kCFHasContentsDeallocatorMask = 0x060,
223 __kCFHasContentsDeallocator = 0x060,
224 __kCFIsMutableMask = 0x01,
225 __kCFIsMutable = 0x01,
226 __kCFIsUnicodeMask = 0x10,
227 __kCFIsUnicode = 0x10,
228 __kCFHasNullByteMask = 0x08,
229 __kCFHasNullByte = 0x08,
230 __kCFHasLengthByteMask = 0x04,
231 __kCFHasLengthByte = 0x04,
232 // !!! Bit 0x02 has been freed up
233 };
234
235
236 // !!! Assumptions:
237 // Mutable strings are not inline
238 // Compile-time constant strings are not inline
239 // Mutable strings always have explicit length (but they might also have length byte and null byte)
240 // If there is an explicit length, always use that instead of the length byte (length byte is useful for quickly returning pascal strings)
241 // Never look at the length byte for the length; use __CFStrLength or __CFStrLength2
242
243 /* The following set of functions and macros need to be updated on change to the bit configuration
244 */
245 CF_INLINE Boolean __CFStrIsMutable(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFIsMutableMask) == __kCFIsMutable;}
246 CF_INLINE Boolean __CFStrIsInline(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFContentsMask) == __kCFHasInlineContents;}
247 CF_INLINE Boolean __CFStrFreeContentsWhenDone(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFFreeContentsWhenDoneMask) == __kCFFreeContentsWhenDone;}
248 CF_INLINE Boolean __CFStrHasContentsDeallocator(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFHasContentsDeallocatorMask) == __kCFHasContentsDeallocator;}
249 CF_INLINE Boolean __CFStrIsUnicode(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFIsUnicodeMask) == __kCFIsUnicode;}
250 CF_INLINE Boolean __CFStrIsEightBit(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFIsUnicodeMask) != __kCFIsUnicode;}
251 CF_INLINE Boolean __CFStrHasNullByte(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFHasNullByteMask) == __kCFHasNullByte;}
252 CF_INLINE Boolean __CFStrHasLengthByte(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFHasLengthByteMask) == __kCFHasLengthByte;}
253 CF_INLINE Boolean __CFStrHasExplicitLength(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & (__kCFIsMutableMask | __kCFHasLengthByteMask)) != __kCFHasLengthByte;} // Has explicit length if (1) mutable or (2) not mutable and no length byte
254 CF_INLINE Boolean __CFStrIsConstant(CFStringRef str) {
255 #if __LP64__
256 return str->base._rc == 0;
257 #else
258 return (str->base._cfinfo[CF_RC_BITS]) == 0;
259 #endif
260 }
261
262 CF_INLINE SInt32 __CFStrSkipAnyLengthByte(CFStringRef str) {return ((str->base._cfinfo[CF_INFO_BITS] & __kCFHasLengthByteMask) == __kCFHasLengthByte) ? 1 : 0;} // Number of bytes to skip over the length byte in the contents
263
264 /* Returns ptr to the buffer (which might include the length byte)
265 */
266 CF_INLINE const void *__CFStrContents(CFStringRef str) {
267 if (__CFStrIsInline(str)) {
268 return (const void *)(((uintptr_t)&(str->variants)) + (__CFStrHasExplicitLength(str) ? sizeof(CFIndex) : 0));
269 } else { // Not inline; pointer is always word 2
270 return str->variants.notInlineImmutable1.buffer;
271 }
272 }
273
274 static CFAllocatorRef *__CFStrContentsDeallocatorPtr(CFStringRef str) {
275 return __CFStrHasExplicitLength(str) ? &(((CFMutableStringRef)str)->variants.notInlineImmutable1.contentsDeallocator) : &(((CFMutableStringRef)str)->variants.notInlineImmutable2.contentsDeallocator); }
276
277 // Assumption: Called with immutable strings only, and on strings that are known to have a contentsDeallocator
278 CF_INLINE CFAllocatorRef __CFStrContentsDeallocator(CFStringRef str) {
279 return *__CFStrContentsDeallocatorPtr(str);
280 }
281
282 // Assumption: Called with immutable strings only, and on strings that are known to have a contentsDeallocator
283 CF_INLINE void __CFStrSetContentsDeallocator(CFStringRef str, CFAllocatorRef allocator) {
284 allocator = kCFUseCollectableAllocator ? allocator : _CFConvertAllocatorToNonGCRefZeroEquivalent(allocator);
285 if (!(kCFAllocatorSystemDefaultGCRefZero == allocator || kCFAllocatorDefaultGCRefZero == allocator)) CFRetain(allocator);
286 *__CFStrContentsDeallocatorPtr(str) = allocator;
287 }
288
289 static CFAllocatorRef *__CFStrContentsAllocatorPtr(CFStringRef str) {
290 CFAssert(!__CFStrIsInline(str), __kCFLogAssertion, "Asking for contents allocator of inline string");
291 CFAssert(__CFStrIsMutable(str), __kCFLogAssertion, "Asking for contents allocator of an immutable string");
292 return (CFAllocatorRef *)&(str->variants.notInlineMutable.contentsAllocator);
293 }
294
295 // Assumption: Called with strings that have a contents allocator; also, contents allocator follows custom
296 CF_INLINE CFAllocatorRef __CFStrContentsAllocator(CFMutableStringRef str) {
297 return *(__CFStrContentsAllocatorPtr(str));
298 }
299
300 // Assumption: Called with strings that have a contents allocator; also, contents allocator follows custom
301 CF_INLINE void __CFStrSetContentsAllocator(CFMutableStringRef str, CFAllocatorRef allocator) {
302 allocator = kCFUseCollectableAllocator ? allocator : _CFConvertAllocatorToNonGCRefZeroEquivalent(allocator);
303 if (!(kCFAllocatorSystemDefaultGCRefZero == allocator || kCFAllocatorDefaultGCRefZero == allocator)) CFRetain(allocator);
304 *(__CFStrContentsAllocatorPtr(str)) = allocator;
305 }
306
307 /* Returns length; use __CFStrLength2 if contents buffer pointer has already been computed.
308 */
309 CF_INLINE CFIndex __CFStrLength(CFStringRef str) {
310 if (__CFStrHasExplicitLength(str)) {
311 if (__CFStrIsInline(str)) {
312 return str->variants.inline1.length;
313 } else {
314 return str->variants.notInlineImmutable1.length;
315 }
316 } else {
317 return (CFIndex)(*((uint8_t *)__CFStrContents(str)));
318 }
319 }
320
321 CF_INLINE CFIndex __CFStrLength2(CFStringRef str, const void *buffer) {
322 if (__CFStrHasExplicitLength(str)) {
323 if (__CFStrIsInline(str)) {
324 return str->variants.inline1.length;
325 } else {
326 return str->variants.notInlineImmutable1.length;
327 }
328 } else {
329 return (CFIndex)(*((uint8_t *)buffer));
330 }
331 }
332
333
334 Boolean __CFStringIsEightBit(CFStringRef str) {
335 return __CFStrIsEightBit(str);
336 }
337
338 /* Sets the content pointer for immutable or mutable strings.
339 */
340 CF_INLINE void __CFStrSetContentPtr(CFStringRef str, const void *p) {
341 // XXX_PCB catch all writes for mutable string case.
342 __CFAssignWithWriteBarrier((void **)&((CFMutableStringRef)str)->variants.notInlineImmutable1.buffer, (void *)p);
343 }
344 CF_INLINE void __CFStrSetInfoBits(CFStringRef str, UInt32 v) {__CFBitfieldSetValue(((CFMutableStringRef)str)->base._cfinfo[CF_INFO_BITS], 6, 0, v);}
345
346 CF_INLINE void __CFStrSetExplicitLength(CFStringRef str, CFIndex v) {
347 if (__CFStrIsInline(str)) {
348 ((CFMutableStringRef)str)->variants.inline1.length = v;
349 } else {
350 ((CFMutableStringRef)str)->variants.notInlineImmutable1.length = v;
351 }
352 }
353
354 CF_INLINE void __CFStrSetUnicode(CFMutableStringRef str) {str->base._cfinfo[CF_INFO_BITS] |= __kCFIsUnicode;}
355 CF_INLINE void __CFStrClearUnicode(CFMutableStringRef str) {str->base._cfinfo[CF_INFO_BITS] &= ~__kCFIsUnicode;}
356 CF_INLINE void __CFStrSetHasLengthAndNullBytes(CFMutableStringRef str) {str->base._cfinfo[CF_INFO_BITS] |= (__kCFHasLengthByte | __kCFHasNullByte);}
357 CF_INLINE void __CFStrClearHasLengthAndNullBytes(CFMutableStringRef str) {str->base._cfinfo[CF_INFO_BITS] &= ~(__kCFHasLengthByte | __kCFHasNullByte);}
358
359
360 // Assumption: The following set of inlines (using str->variants.notInlineMutable) are called with mutable strings only
361 CF_INLINE Boolean __CFStrIsFixed(CFStringRef str) {return str->variants.notInlineMutable.isFixedCapacity;}
362 CF_INLINE Boolean __CFStrIsExternalMutable(CFStringRef str) {return str->variants.notInlineMutable.isExternalMutable;}
363 CF_INLINE Boolean __CFStrHasContentsAllocator(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFHasContentsAllocatorMask) == __kCFHasContentsAllocator;}
364 CF_INLINE void __CFStrSetIsFixed(CFMutableStringRef str) {str->variants.notInlineMutable.isFixedCapacity = 1;}
365 CF_INLINE void __CFStrSetIsExternalMutable(CFMutableStringRef str) {str->variants.notInlineMutable.isExternalMutable = 1;}
366 CF_INLINE void __CFStrSetHasGap(CFMutableStringRef str) {str->variants.notInlineMutable.hasGap = 1;}
367
368 // If capacity is provided externally, we only change it when we need to grow beyond it
369 CF_INLINE Boolean __CFStrCapacityProvidedExternally(CFStringRef str) {return str->variants.notInlineMutable.capacityProvidedExternally;}
370 CF_INLINE void __CFStrSetCapacityProvidedExternally(CFMutableStringRef str) {str->variants.notInlineMutable.capacityProvidedExternally = 1;}
371 CF_INLINE void __CFStrClearCapacityProvidedExternally(CFMutableStringRef str) {str->variants.notInlineMutable.capacityProvidedExternally = 0;}
372
373 // "Capacity" is stored in number of bytes, not characters. It indicates the total number of bytes in the contents buffer.
374 CF_INLINE CFIndex __CFStrCapacity(CFStringRef str) {return str->variants.notInlineMutable.capacity;}
375 CF_INLINE void __CFStrSetCapacity(CFMutableStringRef str, CFIndex cap) {str->variants.notInlineMutable.capacity = cap;}
376
377 // "Desired capacity" is in number of characters; it is the client requested capacity; if fixed, it is the upper bound on the mutable string backing store.
378 CF_INLINE CFIndex __CFStrDesiredCapacity(CFStringRef str) {return str->variants.notInlineMutable.desiredCapacity;}
379 CF_INLINE void __CFStrSetDesiredCapacity(CFMutableStringRef str, CFIndex size) {str->variants.notInlineMutable.desiredCapacity = size;}
380
381
382 static void *__CFStrAllocateMutableContents(CFMutableStringRef str, CFIndex size) {
383 void *ptr;
384 CFAllocatorRef alloc = (__CFStrHasContentsAllocator(str)) ? __CFStrContentsAllocator(str) : __CFGetAllocator(str);
385 ptr = CFAllocatorAllocate(alloc, size, 0);
386 if (__CFOASafe) __CFSetLastAllocationEventName(ptr, "CFString (store)");
387 return ptr;
388 }
389
390 static void __CFStrDeallocateMutableContents(CFMutableStringRef str, void *buffer) {
391 CFAllocatorRef alloc = (__CFStrHasContentsAllocator(str)) ? __CFStrContentsAllocator(str) : __CFGetAllocator(str);
392 if (__CFStrIsMutable(str) && __CFStrHasContentsAllocator(str) && _CFAllocatorIsGCRefZero(alloc)) {
393 // do nothing
394 } else if (CF_IS_COLLECTABLE_ALLOCATOR(alloc)) {
395 // GC: for finalization safety, let collector reclaim the buffer in the next GC cycle.
396 auto_zone_release(objc_collectableZone(), buffer);
397 } else {
398 CFAllocatorDeallocate(alloc, buffer);
399 }
400 }
401
402
403
404
405 /* CFString specific init flags
406 Note that you cannot count on the external buffer not being copied.
407 Also, if you specify an external buffer, you should not change it behind the CFString's back.
408 */
409 enum {
410 __kCFThinUnicodeIfPossible = 0x1000000, /* See if the Unicode contents can be thinned down to 8-bit */
411 kCFStringPascal = 0x10000, /* Indicating that the string data has a Pascal string structure (length byte at start) */
412 kCFStringNoCopyProvidedContents = 0x20000, /* Don't copy the provided string contents if possible; free it when no longer needed */
413 kCFStringNoCopyNoFreeProvidedContents = 0x30000 /* Don't copy the provided string contents if possible; don't free it when no longer needed */
414 };
415
416 /* System Encoding.
417 */
418 static CFStringEncoding __CFDefaultSystemEncoding = kCFStringEncodingInvalidId;
419 static CFStringEncoding __CFDefaultFileSystemEncoding = kCFStringEncodingInvalidId;
420 CFStringEncoding __CFDefaultEightBitStringEncoding = kCFStringEncodingInvalidId;
421
422
423 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_LINUX
424 #define __defaultEncoding kCFStringEncodingMacRoman
425 #elif DEPLOYMENT_TARGET_WINDOWS
426 #define __defaultEncoding kCFStringEncodingWindowsLatin1
427 #else
428 #warning This value must match __CFGetConverter condition in CFStringEncodingConverter.c
429 #define __defaultEncoding kCFStringEncodingISOLatin1
430 #endif
431
432 CFStringEncoding CFStringGetSystemEncoding(void) {
433 if (__CFDefaultSystemEncoding == kCFStringEncodingInvalidId) {
434 __CFDefaultSystemEncoding = __defaultEncoding;
435 const CFStringEncodingConverter *converter = CFStringEncodingGetConverter(__CFDefaultSystemEncoding);
436 __CFSetCharToUniCharFunc(converter->encodingClass == kCFStringEncodingConverterCheapEightBit ? (UNI_CHAR_FUNC)converter->toUnicode : NULL);
437 }
438 return __CFDefaultSystemEncoding;
439 }
440
441 // Fast version for internal use
442
443 CF_INLINE CFStringEncoding __CFStringGetSystemEncoding(void) {
444 if (__CFDefaultSystemEncoding == kCFStringEncodingInvalidId) (void)CFStringGetSystemEncoding();
445 return __CFDefaultSystemEncoding;
446 }
447
448 CFStringEncoding CFStringFileSystemEncoding(void) {
449 if (__CFDefaultFileSystemEncoding == kCFStringEncodingInvalidId) {
450 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS
451 __CFDefaultFileSystemEncoding = kCFStringEncodingUTF8;
452 #else
453 __CFDefaultFileSystemEncoding = CFStringGetSystemEncoding();
454 #endif
455 }
456
457 return __CFDefaultFileSystemEncoding;
458 }
459
460 /* ??? Is returning length when no other answer is available the right thing?
461 !!! All of the (length > (LONG_MAX / N)) type checks are to avoid wrap-around and eventual malloc overflow in the client
462 */
463 CFIndex CFStringGetMaximumSizeForEncoding(CFIndex length, CFStringEncoding encoding) {
464 if (encoding == kCFStringEncodingUTF8) {
465 return (length > (LONG_MAX / 3)) ? kCFNotFound : (length * 3);
466 } else if ((encoding == kCFStringEncodingUTF32) || (encoding == kCFStringEncodingUTF32BE) || (encoding == kCFStringEncodingUTF32LE)) { // UTF-32
467 return (length > (LONG_MAX / sizeof(UTF32Char))) ? kCFNotFound : (length * sizeof(UTF32Char));
468 } else {
469 encoding &= 0xFFF; // Mask off non-base part
470 }
471 switch (encoding) {
472 case kCFStringEncodingUnicode:
473 return (length > (LONG_MAX / sizeof(UniChar))) ? kCFNotFound : (length * sizeof(UniChar));
474
475 case kCFStringEncodingNonLossyASCII:
476 return (length > (LONG_MAX / 6)) ? kCFNotFound : (length * 6); // 1 Unichar can expand to 6 bytes
477
478 case kCFStringEncodingMacRoman:
479 case kCFStringEncodingWindowsLatin1:
480 case kCFStringEncodingISOLatin1:
481 case kCFStringEncodingNextStepLatin:
482 case kCFStringEncodingASCII:
483 return length / sizeof(uint8_t);
484
485 default:
486 return length / sizeof(uint8_t);
487 }
488 }
489
490
491 /* Returns whether the indicated encoding can be stored in 8-bit chars
492 */
493 CF_INLINE Boolean __CFStrEncodingCanBeStoredInEightBit(CFStringEncoding encoding) {
494 switch (encoding & 0xFFF) { // just use encoding base
495 case kCFStringEncodingInvalidId:
496 case kCFStringEncodingUnicode:
497 case kCFStringEncodingNonLossyASCII:
498 return false;
499
500 case kCFStringEncodingMacRoman:
501 case kCFStringEncodingWindowsLatin1:
502 case kCFStringEncodingISOLatin1:
503 case kCFStringEncodingNextStepLatin:
504 case kCFStringEncodingASCII:
505 return true;
506
507 default: return false;
508 }
509 }
510
511 /* Returns the encoding used in eight bit CFStrings (can't be any encoding which isn't 1-to-1 with Unicode)
512 ??? Perhaps only ASCII fits the bill due to Unicode decomposition.
513 */
514 CFStringEncoding __CFStringComputeEightBitStringEncoding(void) {
515 if (__CFDefaultEightBitStringEncoding == kCFStringEncodingInvalidId) {
516 CFStringEncoding systemEncoding = CFStringGetSystemEncoding();
517 if (systemEncoding == kCFStringEncodingInvalidId) { // We're right in the middle of querying system encoding from default database. Delaying to set until system encoding is determined.
518 return kCFStringEncodingASCII;
519 } else if (__CFStrEncodingCanBeStoredInEightBit(systemEncoding)) {
520 __CFDefaultEightBitStringEncoding = systemEncoding;
521 } else {
522 __CFDefaultEightBitStringEncoding = kCFStringEncodingASCII;
523 }
524 }
525
526 return __CFDefaultEightBitStringEncoding;
527 }
528
529 /* Returns whether the provided bytes can be stored in ASCII
530 */
531 CF_INLINE Boolean __CFBytesInASCII(const uint8_t *bytes, CFIndex len) {
532 #if __LP64__
533 /* Go by 8s in 64 bit */
534 while (len >= 8) {
535 uint64_t val = *(const uint64_t *)bytes;
536 if (val & 0x8080808080808080ULL) return false;
537 bytes += 8;
538 len -= 8;
539 }
540 #endif
541 /* Go by 4s */
542 while (len >= 4) {
543 uint32_t val = *(const uint32_t *)bytes;
544 if (val & 0x80808080U) return false;
545 bytes += 4;
546 len -= 4;
547 }
548 /* Handle the rest one byte at a time */
549 while (len--) {
550 if (*bytes++ & 0x80) return false;
551 }
552
553 return true;
554 }
555
556 /* Returns whether the provided 8-bit string in the specified encoding can be stored in an 8-bit CFString.
557 */
558 CF_INLINE Boolean __CFCanUseEightBitCFStringForBytes(const uint8_t *bytes, CFIndex len, CFStringEncoding encoding) {
559 // If the encoding is the same as the 8-bit CFString encoding, we can just use the bytes as-is.
560 // One exception is ASCII, which unfortunately needs to mean ISOLatin1 for compatibility reasons <rdar://problem/5458321>.
561 if (encoding == __CFStringGetEightBitStringEncoding() && encoding != kCFStringEncodingASCII) return true;
562 if (__CFStringEncodingIsSupersetOfASCII(encoding) && __CFBytesInASCII(bytes, len)) return true;
563 return false;
564 }
565
566
567 /* Returns whether a length byte can be tacked on to a string of the indicated length.
568 */
569 CF_INLINE Boolean __CFCanUseLengthByte(CFIndex len) {
570 #define __kCFMaxPascalStrLen 255
571 return (len <= __kCFMaxPascalStrLen) ? true : false;
572 }
573
574 /* Various string assertions
575 */
576 #define __CFAssertIsString(cf) __CFGenericValidateType(cf, __kCFStringTypeID)
577 #define __CFAssertIndexIsInStringBounds(cf, idx) CFAssert3((idx) >= 0 && (idx) < __CFStrLength(cf), __kCFLogAssertion, "%s(): string index %d out of bounds (length %d)", __PRETTY_FUNCTION__, idx, __CFStrLength(cf))
578 #define __CFAssertRangeIsInStringBounds(cf, idx, count) CFAssert4((idx) >= 0 && (idx + count) <= __CFStrLength(cf), __kCFLogAssertion, "%s(): string range %d,%d out of bounds (length %d)", __PRETTY_FUNCTION__, idx, count, __CFStrLength(cf))
579 #define __CFAssertIsStringAndMutable(cf) {__CFGenericValidateType(cf, __kCFStringTypeID); CFAssert1(__CFStrIsMutable(cf), __kCFLogAssertion, "%s(): string not mutable", __PRETTY_FUNCTION__);}
580 #define __CFAssertIsStringAndExternalMutable(cf) {__CFGenericValidateType(cf, __kCFStringTypeID); CFAssert1(__CFStrIsMutable(cf) && __CFStrIsExternalMutable(cf), __kCFLogAssertion, "%s(): string not external mutable", __PRETTY_FUNCTION__);}
581 #define __CFAssertIsNotNegative(idx) CFAssert2(idx >= 0, __kCFLogAssertion, "%s(): index %d is negative", __PRETTY_FUNCTION__, idx)
582 #define __CFAssertIfFixedLengthIsOK(cf, reqLen) CFAssert2(!__CFStrIsFixed(cf) || (reqLen <= __CFStrDesiredCapacity(cf)), __kCFLogAssertion, "%s(): length %d too large", __PRETTY_FUNCTION__, reqLen)
583
584
585 /* Basic algorithm is to shrink memory when capacity is SHRINKFACTOR times the required capacity or to allocate memory when the capacity is less than GROWFACTOR times the required capacity. This function will return -1 if the new capacity is just too big (> LONG_MAX).
586 Additional complications are applied in the following order:
587 - desiredCapacity, which is the minimum (except initially things can be at zero)
588 - rounding up to factor of 8
589 - compressing (to fit the number if 16 bits), which effectively rounds up to factor of 256
590 - we need to make sure GROWFACTOR computation doesn't suffer from overflow issues on 32-bit, hence the casting to unsigned. Normally for required capacity of C bytes, the allocated space is (3C+1)/2. If C > ULONG_MAX/3, we instead simply return LONG_MAX
591 */
592 #define SHRINKFACTOR(c) (c / 2)
593
594 #if __LP64__
595 #define GROWFACTOR(c) ((c * 3 + 1) / 2)
596 #else
597 #define GROWFACTOR(c) (((c) >= (ULONG_MAX / 3UL)) ? __CFMax(LONG_MAX - 4095, (c)) : (((unsigned long)c * 3 + 1) / 2))
598 #endif
599
600 CF_INLINE CFIndex __CFStrNewCapacity(CFMutableStringRef str, unsigned long reqCapacity, CFIndex capacity, Boolean leaveExtraRoom, CFIndex charSize) {
601 if (capacity != 0 || reqCapacity != 0) { /* If initially zero, and space not needed, leave it at that... */
602 if ((capacity < reqCapacity) || /* We definitely need the room... */
603 (!__CFStrCapacityProvidedExternally(str) && /* Assuming we control the capacity... */
604 ((reqCapacity < SHRINKFACTOR(capacity)) || /* ...we have too much room! */
605 (!leaveExtraRoom && (reqCapacity < capacity))))) { /* ...we need to eliminate the extra space... */
606 if (reqCapacity > LONG_MAX) return -1; /* Too big any way you cut it */
607 unsigned long newCapacity = leaveExtraRoom ? GROWFACTOR(reqCapacity) : reqCapacity; /* Grow by 3/2 if extra room is desired */
608 CFIndex desiredCapacity = __CFStrDesiredCapacity(str) * charSize;
609 if (newCapacity < desiredCapacity) { /* If less than desired, bump up to desired */
610 newCapacity = desiredCapacity;
611 } else if (__CFStrIsFixed(str)) { /* Otherwise, if fixed, no need to go above the desired (fixed) capacity */
612 newCapacity = __CFMax(desiredCapacity, reqCapacity); /* !!! So, fixed is not really fixed, but "tight" */
613 }
614 if (__CFStrHasContentsAllocator(str)) { /* Also apply any preferred size from the allocator */
615 newCapacity = CFAllocatorGetPreferredSizeForSize(__CFStrContentsAllocator(str), newCapacity, 0);
616 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
617 } else {
618 newCapacity = malloc_good_size(newCapacity);
619 #endif
620 }
621 return (newCapacity > LONG_MAX) ? -1 : (CFIndex)newCapacity; // If packing: __CFStrUnpackNumber(__CFStrPackNumber(newCapacity));
622 }
623 }
624 return capacity;
625 }
626
627
628 /* rearrangeBlocks() rearranges the blocks of data within the buffer so that they are "evenly spaced". buffer is assumed to have enough room for the result.
629 numBlocks is current total number of blocks within buffer.
630 blockSize is the size of each block in bytes
631 ranges and numRanges hold the ranges that are no longer needed; ranges are stored sorted in increasing order, and don't overlap
632 insertLength is the final spacing between the remaining blocks
633
634 Example: buffer = A B C D E F G H, blockSize = 1, ranges = { (2,1) , (4,2) } (so we want to "delete" C and E F), fromEnd = NO
635 if insertLength = 4, result = A B ? ? ? ? D ? ? ? ? G H
636 if insertLength = 0, result = A B D G H
637
638 Example: buffer = A B C D E F G H I J K L M N O P Q R S T U, blockSize = 1, ranges { (1,1), (3,1), (5,11), (17,1), (19,1) }, fromEnd = NO
639 if insertLength = 3, result = A ? ? ? C ? ? ? E ? ? ? Q ? ? ? S ? ? ? U
640
641 */
642 typedef struct _CFStringDeferredRange {
643 CFIndex beginning;
644 CFIndex length;
645 CFIndex shift;
646 } CFStringDeferredRange;
647
648 typedef struct _CFStringStackInfo {
649 CFIndex capacity; // Capacity (if capacity == count, need to realloc to add another)
650 CFIndex count; // Number of elements actually stored
651 CFStringDeferredRange *stack;
652 Boolean hasMalloced; // Indicates "stack" is allocated and needs to be deallocated when done
653 char _padding[3];
654 } CFStringStackInfo;
655
656 CF_INLINE void pop (CFStringStackInfo *si, CFStringDeferredRange *topRange) {
657 si->count = si->count - 1;
658 *topRange = si->stack[si->count];
659 }
660
661 CF_INLINE void push (CFStringStackInfo *si, const CFStringDeferredRange *newRange) {
662 if (si->count == si->capacity) {
663 // increase size of the stack
664 si->capacity = (si->capacity + 4) * 2;
665 if (si->hasMalloced) {
666 si->stack = (CFStringDeferredRange *)CFAllocatorReallocate(kCFAllocatorSystemDefault, si->stack, si->capacity * sizeof(CFStringDeferredRange), 0);
667 } else {
668 CFStringDeferredRange *newStack = (CFStringDeferredRange *)CFAllocatorAllocate(kCFAllocatorSystemDefault, si->capacity * sizeof(CFStringDeferredRange), 0);
669 memmove(newStack, si->stack, si->count * sizeof(CFStringDeferredRange));
670 si->stack = newStack;
671 si->hasMalloced = true;
672 }
673 }
674 si->stack[si->count] = *newRange;
675 si->count = si->count + 1;
676 }
677
678 static void rearrangeBlocks(
679 uint8_t *buffer,
680 CFIndex numBlocks,
681 CFIndex blockSize,
682 const CFRange *ranges,
683 CFIndex numRanges,
684 CFIndex insertLength) {
685
686 #define origStackSize 10
687 CFStringDeferredRange origStack[origStackSize];
688 CFStringStackInfo si = {origStackSize, 0, origStack, false, {0, 0, 0}};
689 CFStringDeferredRange currentNonRange = {0, 0, 0};
690 CFIndex currentRange = 0;
691 CFIndex amountShifted = 0;
692
693 // must have at least 1 range left.
694
695 while (currentRange < numRanges) {
696 currentNonRange.beginning = (ranges[currentRange].location + ranges[currentRange].length) * blockSize;
697 if ((numRanges - currentRange) == 1) {
698 // at the end.
699 currentNonRange.length = numBlocks * blockSize - currentNonRange.beginning;
700 if (currentNonRange.length == 0) break;
701 } else {
702 currentNonRange.length = (ranges[currentRange + 1].location * blockSize) - currentNonRange.beginning;
703 }
704 currentNonRange.shift = amountShifted + (insertLength * blockSize) - (ranges[currentRange].length * blockSize);
705 amountShifted = currentNonRange.shift;
706 if (amountShifted <= 0) {
707 // process current item and rest of stack
708 if (currentNonRange.shift && currentNonRange.length) memmove (&buffer[currentNonRange.beginning + currentNonRange.shift], &buffer[currentNonRange.beginning], currentNonRange.length);
709 while (si.count > 0) {
710 pop (&si, &currentNonRange); // currentNonRange now equals the top element of the stack.
711 if (currentNonRange.shift && currentNonRange.length) memmove (&buffer[currentNonRange.beginning + currentNonRange.shift], &buffer[currentNonRange.beginning], currentNonRange.length);
712 }
713 } else {
714 // add currentNonRange to stack.
715 push (&si, &currentNonRange);
716 }
717 currentRange++;
718 }
719
720 // no more ranges. if anything is on the stack, process.
721
722 while (si.count > 0) {
723 pop (&si, &currentNonRange); // currentNonRange now equals the top element of the stack.
724 if (currentNonRange.shift && currentNonRange.length) memmove (&buffer[currentNonRange.beginning + currentNonRange.shift], &buffer[currentNonRange.beginning], currentNonRange.length);
725 }
726 if (si.hasMalloced) CFAllocatorDeallocate (kCFAllocatorSystemDefault, si.stack);
727 }
728
729 /* See comments for rearrangeBlocks(); this is the same, but the string is assembled in another buffer (dstBuffer), so the algorithm is much easier. We also take care of the case where the source is not-Unicode but destination is. (The reverse case is not supported.)
730 */
731 static void copyBlocks(
732 const uint8_t *srcBuffer,
733 uint8_t *dstBuffer,
734 CFIndex srcLength,
735 Boolean srcIsUnicode,
736 Boolean dstIsUnicode,
737 const CFRange *ranges,
738 CFIndex numRanges,
739 CFIndex insertLength) {
740
741 CFIndex srcLocationInBytes = 0; // in order to avoid multiplying all the time, this is in terms of bytes, not blocks
742 CFIndex dstLocationInBytes = 0; // ditto
743 CFIndex srcBlockSize = srcIsUnicode ? sizeof(UniChar) : sizeof(uint8_t);
744 CFIndex insertLengthInBytes = insertLength * (dstIsUnicode ? sizeof(UniChar) : sizeof(uint8_t));
745 CFIndex rangeIndex = 0;
746 CFIndex srcToDstMultiplier = (srcIsUnicode == dstIsUnicode) ? 1 : (sizeof(UniChar) / sizeof(uint8_t));
747
748 // Loop over the ranges, copying the range to be preserved (right before each range)
749 while (rangeIndex < numRanges) {
750 CFIndex srcLengthInBytes = ranges[rangeIndex].location * srcBlockSize - srcLocationInBytes; // srcLengthInBytes is in terms of bytes, not blocks; represents length of region to be preserved
751 if (srcLengthInBytes > 0) {
752 if (srcIsUnicode == dstIsUnicode) {
753 memmove(dstBuffer + dstLocationInBytes, srcBuffer + srcLocationInBytes, srcLengthInBytes);
754 } else {
755 __CFStrConvertBytesToUnicode(srcBuffer + srcLocationInBytes, (UniChar *)(dstBuffer + dstLocationInBytes), srcLengthInBytes);
756 }
757 }
758 srcLocationInBytes += srcLengthInBytes + ranges[rangeIndex].length * srcBlockSize; // Skip over the just-copied and to-be-deleted stuff
759 dstLocationInBytes += srcLengthInBytes * srcToDstMultiplier + insertLengthInBytes;
760 rangeIndex++;
761 }
762
763 // Do last range (the one beyond last range)
764 if (srcLocationInBytes < srcLength * srcBlockSize) {
765 if (srcIsUnicode == dstIsUnicode) {
766 memmove(dstBuffer + dstLocationInBytes, srcBuffer + srcLocationInBytes, srcLength * srcBlockSize - srcLocationInBytes);
767 } else {
768 __CFStrConvertBytesToUnicode(srcBuffer + srcLocationInBytes, (UniChar *)(dstBuffer + dstLocationInBytes), srcLength * srcBlockSize - srcLocationInBytes);
769 }
770 }
771 }
772
773 /* Call the callback; if it doesn't exist or returns false, then log
774 */
775 static void __CFStringHandleOutOfMemory(CFTypeRef obj) {
776 CFStringRef msg = CFSTR("Out of memory. We suggest restarting the application. If you have an unsaved document, create a backup copy in Finder, then try to save.");
777 {
778 CFLog(kCFLogLevelCritical, CFSTR("%@"), msg);
779 }
780 }
781
782 /* Reallocates the backing store of the string to accomodate the new length. Space is reserved or characters are deleted as indicated by insertLength and the ranges in deleteRanges. The length is updated to reflect the new state. Will also maintain a length byte and a null byte in 8-bit strings. If length cannot fit in length byte, the space will still be reserved, but will be 0. (Hence the reason the length byte should never be looked at as length unless there is no explicit length.)
783 */
784 static void __CFStringChangeSizeMultiple(CFMutableStringRef str, const CFRange *deleteRanges, CFIndex numDeleteRanges, CFIndex insertLength, Boolean makeUnicode) {
785 const uint8_t *curContents = (uint8_t *)__CFStrContents(str);
786 CFIndex curLength = curContents ? __CFStrLength2(str, curContents) : 0;
787 unsigned long newLength; // We use unsigned to better keep track of overflow
788
789 // Compute new length of the string
790 if (numDeleteRanges == 1) {
791 newLength = curLength + insertLength - deleteRanges[0].length;
792 } else {
793 CFIndex cnt;
794 newLength = curLength + insertLength * numDeleteRanges;
795 for (cnt = 0; cnt < numDeleteRanges; cnt++) newLength -= deleteRanges[cnt].length;
796 }
797
798 __CFAssertIfFixedLengthIsOK(str, newLength);
799
800 if (newLength == 0) {
801 // An somewhat optimized code-path for this special case, with the following implicit values:
802 // newIsUnicode = false
803 // useLengthAndNullBytes = false
804 // newCharSize = sizeof(uint8_t)
805 // If the newCapacity happens to be the same as the old, we don't free the buffer; otherwise we just free it totally
806 // instead of doing a potentially useless reallocation (as the needed capacity later might turn out to be different anyway)
807 CFIndex curCapacity = __CFStrCapacity(str);
808 CFIndex newCapacity = __CFStrNewCapacity(str, 0, curCapacity, true, sizeof(uint8_t));
809 if (newCapacity != curCapacity) { // If we're reallocing anyway (larger or smaller --- larger could happen if desired capacity was changed in the meantime), let's just free it all
810 if (curContents) __CFStrDeallocateMutableContents(str, (uint8_t *)curContents);
811 __CFStrSetContentPtr(str, NULL);
812 __CFStrSetCapacity(str, 0);
813 __CFStrClearCapacityProvidedExternally(str);
814 __CFStrClearHasLengthAndNullBytes(str);
815 if (!__CFStrIsExternalMutable(str)) __CFStrClearUnicode(str); // External mutable implies Unicode
816 } else {
817 if (!__CFStrIsExternalMutable(str)) {
818 __CFStrClearUnicode(str);
819 if (curCapacity >= (int)(sizeof(uint8_t) * 2)) { // If there's room
820 __CFStrSetHasLengthAndNullBytes(str);
821 ((uint8_t *)curContents)[0] = ((uint8_t *)curContents)[1] = 0;
822 } else {
823 __CFStrClearHasLengthAndNullBytes(str);
824 }
825 }
826 }
827 __CFStrSetExplicitLength(str, 0);
828 } else { /* This else-clause assumes newLength > 0 */
829 Boolean oldIsUnicode = __CFStrIsUnicode(str);
830 Boolean newIsUnicode = makeUnicode || (oldIsUnicode /* && (newLength > 0) - implicit */ ) || __CFStrIsExternalMutable(str);
831 CFIndex newCharSize = newIsUnicode ? sizeof(UniChar) : sizeof(uint8_t);
832 Boolean useLengthAndNullBytes = !newIsUnicode /* && (newLength > 0) - implicit */;
833 CFIndex numExtraBytes = useLengthAndNullBytes ? 2 : 0; /* 2 extra bytes to keep the length byte & null... */
834 CFIndex curCapacity = __CFStrCapacity(str);
835 if (newLength > (LONG_MAX - numExtraBytes) / newCharSize) __CFStringHandleOutOfMemory(str); // Does not return
836 CFIndex newCapacity = __CFStrNewCapacity(str, newLength * newCharSize + numExtraBytes, curCapacity, true, newCharSize);
837 if (newCapacity == -1) __CFStringHandleOutOfMemory(str); // Does not return
838 Boolean allocNewBuffer = (newCapacity != curCapacity) || (curLength > 0 && !oldIsUnicode && newIsUnicode); /* We alloc new buffer if oldIsUnicode != newIsUnicode because the contents have to be copied */
839 uint8_t *newContents;
840 if (allocNewBuffer) {
841 newContents = (uint8_t *)__CFStrAllocateMutableContents(str, newCapacity);
842 if (!newContents) { // Try allocating without extra room
843 newCapacity = __CFStrNewCapacity(str, newLength * newCharSize + numExtraBytes, curCapacity, false, newCharSize);
844 // Since we checked for this above, it shouldn't be the case here, but just in case
845 if (newCapacity == -1) __CFStringHandleOutOfMemory(str); // Does not return
846 newContents = (uint8_t *)__CFStrAllocateMutableContents(str, newCapacity);
847 if (!newContents) __CFStringHandleOutOfMemory(str); // Does not return
848 }
849 } else {
850 newContents = (uint8_t *)curContents;
851 }
852
853 Boolean hasLengthAndNullBytes = __CFStrHasLengthByte(str);
854
855 CFAssert1(hasLengthAndNullBytes == __CFStrHasNullByte(str), __kCFLogAssertion, "%s(): Invalid state in 8-bit string", __PRETTY_FUNCTION__);
856
857 if (hasLengthAndNullBytes) curContents++;
858 if (useLengthAndNullBytes) newContents++;
859
860 if (curContents) {
861 if (oldIsUnicode == newIsUnicode) {
862 if (newContents == curContents) {
863 rearrangeBlocks(newContents, curLength, newCharSize, deleteRanges, numDeleteRanges, insertLength);
864 } else {
865 copyBlocks(curContents, newContents, curLength, oldIsUnicode, newIsUnicode, deleteRanges, numDeleteRanges, insertLength);
866 }
867 } else if (newIsUnicode) { /* this implies we have a new buffer */
868 copyBlocks(curContents, newContents, curLength, oldIsUnicode, newIsUnicode, deleteRanges, numDeleteRanges, insertLength);
869 }
870 if (hasLengthAndNullBytes) curContents--; /* Undo the damage from above */
871 if (allocNewBuffer && __CFStrFreeContentsWhenDone(str)) __CFStrDeallocateMutableContents(str, (void *)curContents);
872 }
873
874 if (!newIsUnicode) {
875 if (useLengthAndNullBytes) {
876 newContents[newLength] = 0; /* Always have null byte, if not unicode */
877 newContents--; /* Undo the damage from above */
878 newContents[0] = __CFCanUseLengthByte(newLength) ? (uint8_t)newLength : 0;
879 if (!hasLengthAndNullBytes) __CFStrSetHasLengthAndNullBytes(str);
880 } else {
881 if (hasLengthAndNullBytes) __CFStrClearHasLengthAndNullBytes(str);
882 }
883 if (oldIsUnicode) __CFStrClearUnicode(str);
884 } else { // New is unicode...
885 if (!oldIsUnicode) __CFStrSetUnicode(str);
886 if (hasLengthAndNullBytes) __CFStrClearHasLengthAndNullBytes(str);
887 }
888 __CFStrSetExplicitLength(str, newLength);
889
890 if (allocNewBuffer) {
891 __CFStrSetCapacity(str, newCapacity);
892 __CFStrClearCapacityProvidedExternally(str);
893 __CFStrSetContentPtr(str, newContents);
894 }
895 }
896 }
897
898 /* Same as above, but takes one range (very common case)
899 */
900 CF_INLINE void __CFStringChangeSize(CFMutableStringRef str, CFRange range, CFIndex insertLength, Boolean makeUnicode) {
901 __CFStringChangeSizeMultiple(str, &range, 1, insertLength, makeUnicode);
902 }
903
904
905 #if defined(DEBUG)
906 static Boolean __CFStrIsConstantString(CFStringRef str);
907 #endif
908
909 static void __CFStringDeallocate(CFTypeRef cf) {
910 CFStringRef str = (CFStringRef)cf;
911
912 // If in DEBUG mode, check to see if the string a CFSTR, and complain.
913 CFAssert1(__CFConstantStringTableBeingFreed || !__CFStrIsConstantString((CFStringRef)cf), __kCFLogAssertion, "Tried to deallocate CFSTR(\"%@\")", str);
914
915 if (!__CFStrIsInline(str)) {
916 uint8_t *contents;
917 Boolean isMutable = __CFStrIsMutable(str);
918 if (__CFStrFreeContentsWhenDone(str) && (contents = (uint8_t *)__CFStrContents(str))) {
919 if (isMutable) {
920 __CFStrDeallocateMutableContents((CFMutableStringRef)str, contents);
921 } else {
922 if (__CFStrHasContentsDeallocator(str)) {
923 CFAllocatorRef allocator = __CFStrContentsDeallocator(str);
924 CFAllocatorDeallocate(allocator, contents);
925 if (!(kCFAllocatorSystemDefaultGCRefZero == allocator || kCFAllocatorDefaultGCRefZero == allocator)) CFRelease(allocator);
926 } else {
927 CFAllocatorRef alloc = __CFGetAllocator(str);
928 CFAllocatorDeallocate(alloc, contents);
929 }
930 }
931 }
932 if (isMutable && __CFStrHasContentsAllocator(str)) {
933 CFAllocatorRef allocator = __CFStrContentsAllocator((CFMutableStringRef)str);
934 if (!(kCFAllocatorSystemDefaultGCRefZero == allocator || kCFAllocatorDefaultGCRefZero == allocator)) CFRelease(allocator);
935 }
936 }
937 }
938
939 static Boolean __CFStringEqual(CFTypeRef cf1, CFTypeRef cf2) {
940 CFStringRef str1 = (CFStringRef)cf1;
941 CFStringRef str2 = (CFStringRef)cf2;
942 const uint8_t *contents1;
943 const uint8_t *contents2;
944 CFIndex len1;
945
946 /* !!! We do not need IsString assertions, as the CFBase runtime assures this */
947 /* !!! We do not need == test, as the CFBase runtime assures this */
948
949 contents1 = (uint8_t *)__CFStrContents(str1);
950 contents2 = (uint8_t *)__CFStrContents(str2);
951 len1 = __CFStrLength2(str1, contents1);
952
953 if (len1 != __CFStrLength2(str2, contents2)) return false;
954
955 contents1 += __CFStrSkipAnyLengthByte(str1);
956 contents2 += __CFStrSkipAnyLengthByte(str2);
957
958 if (__CFStrIsEightBit(str1) && __CFStrIsEightBit(str2)) {
959 return memcmp((const char *)contents1, (const char *)contents2, len1) ? false : true;
960 } else if (__CFStrIsEightBit(str1)) { /* One string has Unicode contents */
961 CFStringInlineBuffer buf;
962 CFIndex buf_idx = 0;
963
964 CFStringInitInlineBuffer(str1, &buf, CFRangeMake(0, len1));
965 for (buf_idx = 0; buf_idx < len1; buf_idx++) {
966 if (__CFStringGetCharacterFromInlineBufferQuick(&buf, buf_idx) != ((UniChar *)contents2)[buf_idx]) return false;
967 }
968 } else if (__CFStrIsEightBit(str2)) { /* One string has Unicode contents */
969 CFStringInlineBuffer buf;
970 CFIndex buf_idx = 0;
971
972 CFStringInitInlineBuffer(str2, &buf, CFRangeMake(0, len1));
973 for (buf_idx = 0; buf_idx < len1; buf_idx++) {
974 if (__CFStringGetCharacterFromInlineBufferQuick(&buf, buf_idx) != ((UniChar *)contents1)[buf_idx]) return false;
975 }
976 } else { /* Both strings have Unicode contents */
977 CFIndex idx;
978 for (idx = 0; idx < len1; idx++) {
979 if (((UniChar *)contents1)[idx] != ((UniChar *)contents2)[idx]) return false;
980 }
981 }
982 return true;
983 }
984
985
986 /* String hashing: Should give the same results whatever the encoding; so we hash UniChars.
987 If the length is less than or equal to 96, then the hash function is simply the
988 following (n is the nth UniChar character, starting from 0):
989
990 hash(-1) = length
991 hash(n) = hash(n-1) * 257 + unichar(n);
992 Hash = hash(length-1) * ((length & 31) + 1)
993
994 If the length is greater than 96, then the above algorithm applies to
995 characters 0..31, (length/2)-16..(length/2)+15, and length-32..length-1, inclusive;
996 thus the first, middle, and last 32 characters.
997
998 Note that the loops below are unrolled; and: 257^2 = 66049; 257^3 = 16974593; 257^4 = 4362470401; 67503105 is 257^4 - 256^4
999 If hashcode is changed from UInt32 to something else, this last piece needs to be readjusted.
1000 !!! We haven't updated for LP64 yet
1001
1002 NOTE: The hash algorithm used to be duplicated in CF and Foundation; but now it should only be in the four functions below.
1003
1004 Hash function was changed between Panther and Tiger, and Tiger and Leopard.
1005 */
1006 #define HashEverythingLimit 96
1007
1008 #define HashNextFourUniChars(accessStart, accessEnd, pointer) \
1009 {result = result * 67503105 + (accessStart 0 accessEnd) * 16974593 + (accessStart 1 accessEnd) * 66049 + (accessStart 2 accessEnd) * 257 + (accessStart 3 accessEnd); pointer += 4;}
1010
1011 #define HashNextUniChar(accessStart, accessEnd, pointer) \
1012 {result = result * 257 + (accessStart 0 accessEnd); pointer++;}
1013
1014
1015 /* In this function, actualLen is the length of the original string; but len is the number of characters in buffer. The buffer is expected to contain the parts of the string relevant to hashing.
1016 */
1017 CF_INLINE CFHashCode __CFStrHashCharacters(const UniChar *uContents, CFIndex len, CFIndex actualLen) {
1018 CFHashCode result = actualLen;
1019 if (len <= HashEverythingLimit) {
1020 const UniChar *end4 = uContents + (len & ~3);
1021 const UniChar *end = uContents + len;
1022 while (uContents < end4) HashNextFourUniChars(uContents[, ], uContents); // First count in fours
1023 while (uContents < end) HashNextUniChar(uContents[, ], uContents); // Then for the last <4 chars, count in ones...
1024 } else {
1025 const UniChar *contents, *end;
1026 contents = uContents;
1027 end = contents + 32;
1028 while (contents < end) HashNextFourUniChars(contents[, ], contents);
1029 contents = uContents + (len >> 1) - 16;
1030 end = contents + 32;
1031 while (contents < end) HashNextFourUniChars(contents[, ], contents);
1032 end = uContents + len;
1033 contents = end - 32;
1034 while (contents < end) HashNextFourUniChars(contents[, ], contents);
1035 }
1036 return result + (result << (actualLen & 31));
1037 }
1038
1039 /* This hashes cString in the eight bit string encoding. It also includes the little debug-time sanity check.
1040 */
1041 CF_INLINE CFHashCode __CFStrHashEightBit(const uint8_t *cContents, CFIndex len) {
1042 #if defined(DEBUG)
1043 if (!__CFCharToUniCharFunc) { // A little sanity verification: If this is not set, trying to hash high byte chars would be a bad idea
1044 CFIndex cnt;
1045 Boolean err = false;
1046 if (len <= HashEverythingLimit) {
1047 for (cnt = 0; cnt < len; cnt++) if (cContents[cnt] >= 128) err = true;
1048 } else {
1049 for (cnt = 0; cnt < 32; cnt++) if (cContents[cnt] >= 128) err = true;
1050 for (cnt = (len >> 1) - 16; cnt < (len >> 1) + 16; cnt++) if (cContents[cnt] >= 128) err = true;
1051 for (cnt = (len - 32); cnt < len; cnt++) if (cContents[cnt] >= 128) err = true;
1052 }
1053 if (err) {
1054 // Can't do log here, as it might be too early
1055 fprintf(stderr, "Warning: CFHash() attempting to hash CFString containing high bytes before properly initialized to do so\n");
1056 }
1057 }
1058 #endif
1059 CFHashCode result = len;
1060 if (len <= HashEverythingLimit) {
1061 const uint8_t *end4 = cContents + (len & ~3);
1062 const uint8_t *end = cContents + len;
1063 while (cContents < end4) HashNextFourUniChars(__CFCharToUniCharTable[cContents[, ]], cContents); // First count in fours
1064 while (cContents < end) HashNextUniChar(__CFCharToUniCharTable[cContents[, ]], cContents); // Then for the last <4 chars, count in ones...
1065 } else {
1066 const uint8_t *contents, *end;
1067 contents = cContents;
1068 end = contents + 32;
1069 while (contents < end) HashNextFourUniChars(__CFCharToUniCharTable[contents[, ]], contents);
1070 contents = cContents + (len >> 1) - 16;
1071 end = contents + 32;
1072 while (contents < end) HashNextFourUniChars(__CFCharToUniCharTable[contents[, ]], contents);
1073 end = cContents + len;
1074 contents = end - 32;
1075 while (contents < end) HashNextFourUniChars(__CFCharToUniCharTable[contents[, ]], contents);
1076 }
1077 return result + (result << (len & 31));
1078 }
1079
1080 CFHashCode CFStringHashISOLatin1CString(const uint8_t *bytes, CFIndex len) {
1081 CFHashCode result = len;
1082 if (len <= HashEverythingLimit) {
1083 const uint8_t *end4 = bytes + (len & ~3);
1084 const uint8_t *end = bytes + len;
1085 while (bytes < end4) HashNextFourUniChars(bytes[, ], bytes); // First count in fours
1086 while (bytes < end) HashNextUniChar(bytes[, ], bytes); // Then for the last <4 chars, count in ones...
1087 } else {
1088 const uint8_t *contents, *end;
1089 contents = bytes;
1090 end = contents + 32;
1091 while (contents < end) HashNextFourUniChars(contents[, ], contents);
1092 contents = bytes + (len >> 1) - 16;
1093 end = contents + 32;
1094 while (contents < end) HashNextFourUniChars(contents[, ], contents);
1095 end = bytes + len;
1096 contents = end - 32;
1097 while (contents < end) HashNextFourUniChars(contents[, ], contents);
1098 }
1099 return result + (result << (len & 31));
1100 }
1101
1102 CFHashCode CFStringHashCString(const uint8_t *bytes, CFIndex len) {
1103 return __CFStrHashEightBit(bytes, len);
1104 }
1105
1106 CFHashCode CFStringHashCharacters(const UniChar *characters, CFIndex len) {
1107 return __CFStrHashCharacters(characters, len, len);
1108 }
1109
1110 /* This is meant to be called from NSString or subclassers only. It is an error for this to be called without the ObjC runtime or an argument which is not an NSString or subclass. It can be called with NSCFString, although that would be inefficient (causing indirection) and won't normally happen anyway, as NSCFString overrides hash.
1111 */
1112 CFHashCode CFStringHashNSString(CFStringRef str) {
1113 UniChar buffer[HashEverythingLimit];
1114 CFIndex bufLen; // Number of characters in the buffer for hashing
1115 CFIndex len = 0; // Actual length of the string
1116
1117 CF_OBJC_CALL0(CFIndex, len, str, "length");
1118 if (len <= HashEverythingLimit) {
1119 CF_OBJC_VOIDCALL2(str, "getCharacters:range:", buffer, CFRangeMake(0, len));
1120 bufLen = len;
1121 } else {
1122 CF_OBJC_VOIDCALL2(str, "getCharacters:range:", buffer, CFRangeMake(0, 32));
1123 CF_OBJC_VOIDCALL2(str, "getCharacters:range:", buffer+32, CFRangeMake((len >> 1) - 16, 32));
1124 CF_OBJC_VOIDCALL2(str, "getCharacters:range:", buffer+64, CFRangeMake(len - 32, 32));
1125 bufLen = HashEverythingLimit;
1126 }
1127 return __CFStrHashCharacters(buffer, bufLen, len);
1128 }
1129
1130 CFHashCode __CFStringHash(CFTypeRef cf) {
1131 /* !!! We do not need an IsString assertion here, as this is called by the CFBase runtime only */
1132 CFStringRef str = (CFStringRef)cf;
1133 const uint8_t *contents = (uint8_t *)__CFStrContents(str);
1134 CFIndex len = __CFStrLength2(str, contents);
1135
1136 if (__CFStrIsEightBit(str)) {
1137 contents += __CFStrSkipAnyLengthByte(str);
1138 return __CFStrHashEightBit(contents, len);
1139 } else {
1140 return __CFStrHashCharacters((const UniChar *)contents, len, len);
1141 }
1142 }
1143
1144
1145 static CFStringRef __CFStringCopyDescription(CFTypeRef cf) {
1146 return CFStringCreateWithFormat(kCFAllocatorSystemDefault, NULL, CFSTR("<CFString %p [%p]>{contents = \"%@\"}"), cf, __CFGetAllocator(cf), cf);
1147 }
1148
1149 static CFStringRef __CFStringCopyFormattingDescription(CFTypeRef cf, CFDictionaryRef formatOptions) {
1150 return (CFStringRef)CFStringCreateCopy(__CFGetAllocator(cf), (CFStringRef)cf);
1151 }
1152
1153 static CFTypeID __kCFStringTypeID = _kCFRuntimeNotATypeID;
1154
1155 typedef CFTypeRef (*CF_STRING_CREATE_COPY)(CFAllocatorRef alloc, CFTypeRef theString);
1156
1157 static const CFRuntimeClass __CFStringClass = {
1158 _kCFRuntimeScannedObject,
1159 "CFString",
1160 NULL, // init
1161 (CF_STRING_CREATE_COPY)CFStringCreateCopy,
1162 __CFStringDeallocate,
1163 __CFStringEqual,
1164 __CFStringHash,
1165 __CFStringCopyFormattingDescription,
1166 __CFStringCopyDescription
1167 };
1168
1169 __private_extern__ void __CFStringInitialize(void) {
1170 __kCFStringTypeID = _CFRuntimeRegisterClass(&__CFStringClass);
1171 }
1172
1173 CFTypeID CFStringGetTypeID(void) {
1174 return __kCFStringTypeID;
1175 }
1176
1177
1178 static Boolean CFStrIsUnicode(CFStringRef str) {
1179 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, Boolean, str, "_encodingCantBeStoredInEightBitCFString");
1180 return __CFStrIsUnicode(str);
1181 }
1182
1183
1184
1185 #define ALLOCATORSFREEFUNC ((CFAllocatorRef)-1)
1186
1187 /* contentsDeallocator indicates how to free the data if it's noCopy == true:
1188 kCFAllocatorNull: don't free
1189 ALLOCATORSFREEFUNC: free with main allocator's free func (don't pass in the real func ptr here)
1190 NULL: default allocator
1191 otherwise it's the allocator that should be used (it will be explicitly stored)
1192 if noCopy == false, then freeFunc should be ALLOCATORSFREEFUNC
1193 hasLengthByte, hasNullByte: refers to bytes; used only if encoding != Unicode
1194 possiblyExternalFormat indicates that the bytes might have BOM and be swapped
1195 tryToReduceUnicode means that the Unicode should be checked to see if it contains just ASCII (and reduce it if so)
1196 numBytes contains the actual number of bytes in "bytes", including Length byte,
1197 BUT not the NULL byte at the end
1198 bytes should not contain BOM characters
1199 !!! Various flags should be combined to reduce number of arguments, if possible
1200 */
1201 __private_extern__ CFStringRef __CFStringCreateImmutableFunnel3(
1202 CFAllocatorRef alloc, const void *bytes, CFIndex numBytes, CFStringEncoding encoding,
1203 Boolean possiblyExternalFormat, Boolean tryToReduceUnicode, Boolean hasLengthByte, Boolean hasNullByte, Boolean noCopy,
1204 CFAllocatorRef contentsDeallocator, UInt32 converterFlags) {
1205
1206 CFMutableStringRef str;
1207 CFVarWidthCharBuffer vBuf;
1208 CFIndex size;
1209 Boolean useLengthByte = false;
1210 Boolean useNullByte = false;
1211 Boolean useInlineData = false;
1212
1213 #if INSTRUMENT_SHARED_STRINGS
1214 const char *recordedEncoding;
1215 char encodingBuffer[128];
1216 if (encoding == kCFStringEncodingUnicode) recordedEncoding = "Unicode";
1217 else if (encoding == kCFStringEncodingASCII) recordedEncoding = "ASCII";
1218 else if (encoding == kCFStringEncodingUTF8) recordedEncoding = "UTF8";
1219 else if (encoding == kCFStringEncodingMacRoman) recordedEncoding = "MacRoman";
1220 else {
1221 snprintf(encodingBuffer, sizeof(encodingBuffer), "0x%lX", (unsigned long)encoding);
1222 recordedEncoding = encodingBuffer;
1223 }
1224 #endif
1225
1226 if (alloc == NULL) alloc = __CFGetDefaultAllocator();
1227
1228 if (contentsDeallocator == ALLOCATORSFREEFUNC) {
1229 contentsDeallocator = alloc;
1230 } else if (contentsDeallocator == NULL) {
1231 contentsDeallocator = __CFGetDefaultAllocator();
1232 }
1233
1234 if ((NULL != kCFEmptyString) && (numBytes == 0) && _CFAllocatorIsSystemDefault(alloc)) { // If we are using the system default allocator, and the string is empty, then use the empty string!
1235 if (noCopy && (contentsDeallocator != kCFAllocatorNull)) { // See 2365208... This change was done after Sonata; before we didn't free the bytes at all (leak).
1236 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes);
1237 }
1238 return (CFStringRef)CFRetain(kCFEmptyString); // Quick exit; won't catch all empty strings, but most
1239 }
1240
1241 // At this point, contentsDeallocator is either same as alloc, or kCFAllocatorNull, or something else, but not NULL
1242
1243 vBuf.shouldFreeChars = false; // We use this to remember to free the buffer possibly allocated by decode
1244
1245 // Record whether we're starting out with an ASCII-superset string, because we need to know this later for the string ROM; this may get changed later if we successfully convert down from Unicode. We only record this once because __CFCanUseEightBitCFStringForBytes() can be expensive.
1246 Boolean stringSupportsEightBitCFRepresentation = encoding != kCFStringEncodingUnicode && __CFCanUseEightBitCFStringForBytes((const uint8_t *)bytes, numBytes, encoding);
1247
1248 // We may also change noCopy within this function if we have to decode the string into an external buffer. We do not want to avoid the use of the string ROM merely because we tried to be efficient and reuse the decoded buffer for the CFString's external storage. Therefore, we use this variable to track whether we actually can ignore the noCopy flag (which may or may not be set anyways).
1249 Boolean stringROMShouldIgnoreNoCopy = false;
1250
1251 // First check to see if the data needs to be converted...
1252 // ??? We could be more efficient here and in some cases (Unicode data) eliminate a copy
1253
1254 if ((encoding == kCFStringEncodingUnicode && possiblyExternalFormat) || encoding != kCFStringEncodingUnicode && ! stringSupportsEightBitCFRepresentation) {
1255 const void *realBytes = (uint8_t *) bytes + (hasLengthByte ? 1 : 0);
1256 CFIndex realNumBytes = numBytes - (hasLengthByte ? 1 : 0);
1257 Boolean usingPassedInMemory = false;
1258
1259 vBuf.allocator = kCFAllocatorSystemDefault; // We don't want to use client's allocator for temp stuff
1260 vBuf.chars.unicode = NULL; // This will cause the decode function to allocate memory if necessary
1261
1262 if (!__CFStringDecodeByteStream3((const uint8_t *)realBytes, realNumBytes, encoding, false, &vBuf, &usingPassedInMemory, converterFlags)) {
1263 // Note that if the string can't be created, we don't free the buffer, even if there is a contents deallocator. This is on purpose.
1264 return NULL;
1265 }
1266
1267 encoding = vBuf.isASCII ? kCFStringEncodingASCII : kCFStringEncodingUnicode;
1268
1269 // Update our flag according to whether the decoded buffer is ASCII
1270 stringSupportsEightBitCFRepresentation = vBuf.isASCII;
1271
1272 if (!usingPassedInMemory) {
1273
1274 // Because __CFStringDecodeByteStream3() allocated our buffer, it's OK for us to free it if we can get the string from the ROM.
1275 stringROMShouldIgnoreNoCopy = true;
1276
1277 // Make the parameters fit the new situation
1278 numBytes = vBuf.isASCII ? vBuf.numChars : (vBuf.numChars * sizeof(UniChar));
1279 hasLengthByte = hasNullByte = false;
1280
1281 // Get rid of the original buffer if its not being used
1282 if (noCopy && (contentsDeallocator != kCFAllocatorNull)) {
1283 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes);
1284 }
1285 contentsDeallocator = alloc; // At this point we are using the string's allocator, as the original buffer is gone...
1286
1287 // See if we can reuse any storage the decode func might have allocated
1288 // We do this only for Unicode, as otherwise we would not have NULL and Length bytes
1289
1290 if (vBuf.shouldFreeChars && (alloc == vBuf.allocator) && encoding == kCFStringEncodingUnicode) {
1291 vBuf.shouldFreeChars = false; // Transferring ownership to the CFString
1292 bytes = CFAllocatorReallocate(vBuf.allocator, (void *)vBuf.chars.unicode, numBytes, 0); // Tighten up the storage
1293 noCopy = true;
1294 #if INSTRUMENT_SHARED_STRINGS
1295 if (encoding == kCFStringEncodingASCII) recordedEncoding = "ForeignASCII-NoCopy";
1296 else recordedEncoding = "ForeignUnicode-NoCopy";
1297 #endif
1298 } else {
1299 #if INSTRUMENT_SHARED_STRINGS
1300 if (encoding == kCFStringEncodingASCII) recordedEncoding = "ForeignASCII-Copy";
1301 else recordedEncoding = "ForeignUnicode-Copy";
1302 #endif
1303 bytes = vBuf.chars.unicode;
1304 noCopy = false; // Can't do noCopy anymore
1305 // If vBuf.shouldFreeChars is true, the buffer will be freed as intended near the end of this func
1306 }
1307
1308 }
1309
1310 // At this point, all necessary input arguments have been changed to reflect the new state
1311
1312 } else if (encoding == kCFStringEncodingUnicode && tryToReduceUnicode) { // Check to see if we can reduce Unicode to ASCII
1313 CFIndex cnt;
1314 CFIndex len = numBytes / sizeof(UniChar);
1315 Boolean allASCII = true;
1316
1317 for (cnt = 0; cnt < len; cnt++) if (((const UniChar *)bytes)[cnt] > 127) {
1318 allASCII = false;
1319 break;
1320 }
1321
1322 if (allASCII) { // Yes we can!
1323 uint8_t *ptr, *mem;
1324 Boolean newHasLengthByte = __CFCanUseLengthByte(len);
1325 numBytes = (len + 1 + (newHasLengthByte ? 1 : 0)) * sizeof(uint8_t); // NULL and possible length byte
1326 // See if we can use that temporary local buffer in vBuf...
1327 if (numBytes >= __kCFVarWidthLocalBufferSize) {
1328 mem = ptr = (uint8_t *)CFAllocatorAllocate(alloc, numBytes, 0);
1329 if (__CFOASafe) __CFSetLastAllocationEventName(mem, "CFString (store)");
1330 } else {
1331 mem = ptr = (uint8_t *)(vBuf.localBuffer);
1332 }
1333 if (mem) { // If we can't allocate memory for some reason, use what we had (that is, as if we didn't have all ASCII)
1334 // Copy the Unicode bytes into the new ASCII buffer
1335 hasLengthByte = newHasLengthByte;
1336 hasNullByte = true;
1337 if (hasLengthByte) *ptr++ = (uint8_t)len;
1338 for (cnt = 0; cnt < len; cnt++) ptr[cnt] = (uint8_t)(((const UniChar *)bytes)[cnt]);
1339 ptr[len] = 0;
1340 if (noCopy && (contentsDeallocator != kCFAllocatorNull)) {
1341 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes);
1342 }
1343 // Now make everything look like we had an ASCII buffer to start with
1344 bytes = mem;
1345 encoding = kCFStringEncodingASCII;
1346 contentsDeallocator = alloc; // At this point we are using the string's allocator, as the original buffer is gone...
1347 noCopy = (numBytes >= __kCFVarWidthLocalBufferSize); // If we had to allocate it, make sure it's kept around
1348 numBytes--; // Should not contain the NULL byte at end...
1349 stringSupportsEightBitCFRepresentation = true; // We're ASCII now!
1350 stringROMShouldIgnoreNoCopy = true; // We allocated this buffer, so we should feel free to get rid of it if we can use the string ROM
1351 #if INSTRUMENT_SHARED_STRINGS
1352 recordedEncoding = "U->A";
1353 #endif
1354 }
1355 }
1356
1357 // At this point, all necessary input arguments have been changed to reflect the new state
1358 }
1359
1360 // Now determine the necessary size
1361 #if INSTRUMENT_SHARED_STRINGS || USE_STRING_ROM
1362 Boolean stringSupportsROM = stringSupportsEightBitCFRepresentation;
1363 #endif
1364
1365 #if INSTRUMENT_SHARED_STRINGS
1366 if (stringSupportsROM) {
1367 const void *realBytes = (uint8_t *) bytes + (hasLengthByte ? 1 : 0);
1368 CFIndex realNumBytes = numBytes - !! hasLengthByte;
1369 __CFRecordStringAllocationEvent(recordedEncoding, realBytes, realNumBytes);
1370 }
1371 #endif
1372
1373 CFStringRef romResult = NULL;
1374
1375 #if USE_STRING_ROM
1376
1377 if (stringSupportsROM) {
1378 // Disable the string ROM if necessary
1379 static char sDisableStringROM = -1;
1380 if (sDisableStringROM == -1) sDisableStringROM = !! __CFgetenv("CFStringDisableROM");
1381
1382 if (sDisableStringROM == 0) romResult = _CFSearchStringROM(bytes + !! hasLengthByte, numBytes - !! hasLengthByte);
1383 }
1384 /* if we get a result from our ROM, and noCopy is set, then deallocate the buffer immediately */
1385 if (romResult) {
1386 if (noCopy && (contentsDeallocator != kCFAllocatorNull)) {
1387 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes);
1388 }
1389
1390 /* these don't get used again, but clear them for consistency */
1391 noCopy = false;
1392 bytes = NULL;
1393
1394 /* set our result to the ROM result which is not really mutable, of course, but that's OK because we don't try to modify it. */
1395 str = (CFMutableStringRef)romResult;
1396 }
1397 #endif
1398
1399 if (! romResult) {
1400 // Now determine the necessary size
1401
1402 if (noCopy) {
1403
1404 size = sizeof(void *); // Pointer to the buffer
1405 // special GCRefZero allocator usage always needs saving
1406 if (_CFAllocatorIsGCRefZero(contentsDeallocator) || (contentsDeallocator != alloc && contentsDeallocator != kCFAllocatorNull)) {
1407 size += sizeof(void *); // The contentsDeallocator
1408 }
1409 if (!hasLengthByte) size += sizeof(CFIndex); // Explicit length
1410 useLengthByte = hasLengthByte;
1411 useNullByte = hasNullByte;
1412
1413 } else { // Inline data; reserve space for it
1414
1415 useInlineData = true;
1416 size = numBytes;
1417
1418 if (hasLengthByte || (encoding != kCFStringEncodingUnicode && __CFCanUseLengthByte(numBytes))) {
1419 useLengthByte = true;
1420 if (!hasLengthByte) size += 1;
1421 } else {
1422 size += sizeof(CFIndex); // Explicit length
1423 }
1424 if (hasNullByte || encoding != kCFStringEncodingUnicode) {
1425 useNullByte = true;
1426 size += 1;
1427 }
1428 }
1429
1430 #ifdef STRING_SIZE_STATS
1431 // Dump alloced CFString size info every so often
1432 static int cnt = 0;
1433 static unsigned sizes[256] = {0};
1434 int allocedSize = size + sizeof(CFRuntimeBase);
1435 if (allocedSize < 255) sizes[allocedSize]++; else sizes[255]++;
1436 if ((++cnt % 1000) == 0) {
1437 printf ("\nTotal: %d\n", cnt);
1438 int i; for (i = 0; i < 256; i++) printf("%03d: %5d%s", i, sizes[i], ((i % 8) == 7) ? "\n" : " ");
1439 }
1440 #endif
1441
1442 // Finally, allocate!
1443
1444 str = (CFMutableStringRef)_CFRuntimeCreateInstance(alloc, __kCFStringTypeID, size, NULL);
1445 if (str) {
1446 if (__CFOASafe) __CFSetLastAllocationEventName(str, "CFString (immutable)");
1447
1448 CFOptionFlags allocBits = _CFAllocatorIsGCRefZero(contentsDeallocator) ? __kCFHasContentsDeallocator : (contentsDeallocator == alloc ? __kCFNotInlineContentsDefaultFree : (contentsDeallocator == kCFAllocatorNull ? __kCFNotInlineContentsNoFree : __kCFNotInlineContentsCustomFree));
1449 __CFStrSetInfoBits(str,
1450 (useInlineData ? __kCFHasInlineContents : allocBits) |
1451 ((encoding == kCFStringEncodingUnicode) ? __kCFIsUnicode : 0) |
1452 (useNullByte ? __kCFHasNullByte : 0) |
1453 (useLengthByte ? __kCFHasLengthByte : 0));
1454
1455 if (!useLengthByte) {
1456 CFIndex length = numBytes - (hasLengthByte ? 1 : 0);
1457 if (encoding == kCFStringEncodingUnicode) length /= sizeof(UniChar);
1458 __CFStrSetExplicitLength(str, length);
1459 }
1460
1461 if (useInlineData) {
1462 uint8_t *contents = (uint8_t *)__CFStrContents(str);
1463 if (useLengthByte && !hasLengthByte) *contents++ = (uint8_t)numBytes;
1464 memmove(contents, bytes, numBytes);
1465 if (useNullByte) contents[numBytes] = 0;
1466 } else {
1467 __CFStrSetContentPtr(str, bytes);
1468 if (__CFStrHasContentsDeallocator(str)) __CFStrSetContentsDeallocator(str, contentsDeallocator);
1469 }
1470 } else {
1471 if (noCopy && (contentsDeallocator != kCFAllocatorNull)) {
1472 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes);
1473 }
1474 }
1475 }
1476 if (vBuf.shouldFreeChars) CFAllocatorDeallocate(vBuf.allocator, (void *)bytes);
1477
1478 return str;
1479 }
1480
1481 /* !!! __CFStringCreateImmutableFunnel2() is kept around for compatibility; it should be deprecated
1482 */
1483 CFStringRef __CFStringCreateImmutableFunnel2(
1484 CFAllocatorRef alloc, const void *bytes, CFIndex numBytes, CFStringEncoding encoding,
1485 Boolean possiblyExternalFormat, Boolean tryToReduceUnicode, Boolean hasLengthByte, Boolean hasNullByte, Boolean noCopy,
1486 CFAllocatorRef contentsDeallocator) {
1487 return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, possiblyExternalFormat, tryToReduceUnicode, hasLengthByte, hasNullByte, noCopy, contentsDeallocator, 0);
1488 }
1489
1490
1491
1492 CFStringRef CFStringCreateWithPascalString(CFAllocatorRef alloc, ConstStringPtr pStr, CFStringEncoding encoding) {
1493 CFIndex len = (CFIndex)(*(uint8_t *)pStr);
1494 return __CFStringCreateImmutableFunnel3(alloc, pStr, len+1, encoding, false, false, true, false, false, ALLOCATORSFREEFUNC, 0);
1495 }
1496
1497
1498 CFStringRef CFStringCreateWithCString(CFAllocatorRef alloc, const char *cStr, CFStringEncoding encoding) {
1499 CFIndex len = strlen(cStr);
1500 return __CFStringCreateImmutableFunnel3(alloc, cStr, len, encoding, false, false, false, true, false, ALLOCATORSFREEFUNC, 0);
1501 }
1502
1503 CFStringRef CFStringCreateWithPascalStringNoCopy(CFAllocatorRef alloc, ConstStringPtr pStr, CFStringEncoding encoding, CFAllocatorRef contentsDeallocator) {
1504 CFIndex len = (CFIndex)(*(uint8_t *)pStr);
1505 return __CFStringCreateImmutableFunnel3(alloc, pStr, len+1, encoding, false, false, true, false, true, contentsDeallocator, 0);
1506 }
1507
1508
1509 CFStringRef CFStringCreateWithCStringNoCopy(CFAllocatorRef alloc, const char *cStr, CFStringEncoding encoding, CFAllocatorRef contentsDeallocator) {
1510 CFIndex len = strlen(cStr);
1511 return __CFStringCreateImmutableFunnel3(alloc, cStr, len, encoding, false, false, false, true, true, contentsDeallocator, 0);
1512 }
1513
1514
1515 CFStringRef CFStringCreateWithCharacters(CFAllocatorRef alloc, const UniChar *chars, CFIndex numChars) {
1516 return __CFStringCreateImmutableFunnel3(alloc, chars, numChars * sizeof(UniChar), kCFStringEncodingUnicode, false, true, false, false, false, ALLOCATORSFREEFUNC, 0);
1517 }
1518
1519
1520 CFStringRef CFStringCreateWithCharactersNoCopy(CFAllocatorRef alloc, const UniChar *chars, CFIndex numChars, CFAllocatorRef contentsDeallocator) {
1521 return __CFStringCreateImmutableFunnel3(alloc, chars, numChars * sizeof(UniChar), kCFStringEncodingUnicode, false, false, false, false, true, contentsDeallocator, 0);
1522 }
1523
1524
1525 CFStringRef CFStringCreateWithBytes(CFAllocatorRef alloc, const uint8_t *bytes, CFIndex numBytes, CFStringEncoding encoding, Boolean externalFormat) {
1526 return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, externalFormat, true, false, false, false, ALLOCATORSFREEFUNC, 0);
1527 }
1528
1529 CFStringRef _CFStringCreateWithBytesNoCopy(CFAllocatorRef alloc, const uint8_t *bytes, CFIndex numBytes, CFStringEncoding encoding, Boolean externalFormat, CFAllocatorRef contentsDeallocator) {
1530 return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, externalFormat, true, false, false, true, contentsDeallocator, 0);
1531 }
1532
1533 CFStringRef CFStringCreateWithBytesNoCopy(CFAllocatorRef alloc, const uint8_t *bytes, CFIndex numBytes, CFStringEncoding encoding, Boolean externalFormat, CFAllocatorRef contentsDeallocator) {
1534 return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, externalFormat, true, false, false, true, contentsDeallocator, 0);
1535 }
1536
1537 CFStringRef CFStringCreateWithFormatAndArguments(CFAllocatorRef alloc, CFDictionaryRef formatOptions, CFStringRef format, va_list arguments) {
1538 return _CFStringCreateWithFormatAndArgumentsAux(alloc, NULL, formatOptions, format, arguments);
1539 }
1540
1541 CFStringRef _CFStringCreateWithFormatAndArgumentsAux(CFAllocatorRef alloc, CFStringRef (*copyDescFunc)(void *, const void *), CFDictionaryRef formatOptions, CFStringRef format, va_list arguments) {
1542 CFStringRef str;
1543 CFMutableStringRef outputString = CFStringCreateMutable(kCFAllocatorSystemDefault, 0); //should use alloc if no copy/release
1544 __CFStrSetDesiredCapacity(outputString, 120); // Given this will be tightened later, choosing a larger working string is fine
1545 __CFStringAppendFormatCore(outputString, copyDescFunc, formatOptions, format, 0, NULL, 0, arguments);
1546 // ??? copy/release should not be necessary here -- just make immutable, compress if possible
1547 // (However, this does make the string inline, and cause the supplied allocator to be used...)
1548 str = (CFStringRef)CFStringCreateCopy(alloc, outputString);
1549 CFRelease(outputString);
1550 return str;
1551 }
1552
1553 CFStringRef CFStringCreateWithFormat(CFAllocatorRef alloc, CFDictionaryRef formatOptions, CFStringRef format, ...) {
1554 CFStringRef result;
1555 va_list argList;
1556
1557 va_start(argList, format);
1558 result = CFStringCreateWithFormatAndArguments(alloc, formatOptions, format, argList);
1559 va_end(argList);
1560
1561 return result;
1562 }
1563
1564 CFStringRef CFStringCreateWithSubstring(CFAllocatorRef alloc, CFStringRef str, CFRange range) {
1565 // CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, CFStringRef , str, "_createSubstringWithRange:", CFRangeMake(range.location, range.length));
1566
1567 __CFAssertIsString(str);
1568 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
1569
1570 if ((range.location == 0) && (range.length == __CFStrLength(str))) { /* The substring is the whole string... */
1571 return (CFStringRef)CFStringCreateCopy(alloc, str);
1572 } else if (__CFStrIsEightBit(str)) {
1573 const uint8_t *contents = (const uint8_t *)__CFStrContents(str);
1574 return __CFStringCreateImmutableFunnel3(alloc, contents + range.location + __CFStrSkipAnyLengthByte(str), range.length, __CFStringGetEightBitStringEncoding(), false, false, false, false, false, ALLOCATORSFREEFUNC, 0);
1575 } else {
1576 const UniChar *contents = (UniChar *)__CFStrContents(str);
1577 return __CFStringCreateImmutableFunnel3(alloc, contents + range.location, range.length * sizeof(UniChar), kCFStringEncodingUnicode, false, true, false, false, false, ALLOCATORSFREEFUNC, 0);
1578 }
1579 }
1580
1581 CFStringRef CFStringCreateCopy(CFAllocatorRef alloc, CFStringRef str) {
1582 // CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, CFStringRef, str, "copy");
1583
1584 __CFAssertIsString(str);
1585 if (!__CFStrIsMutable((CFStringRef)str) && // If the string is not mutable
1586 ((alloc ? _CFConvertAllocatorToNonGCRefZeroEquivalent(alloc) : __CFGetDefaultAllocator()) == __CFGetAllocator(str)) && // and it has the same allocator as the one we're using
1587 (__CFStrIsInline((CFStringRef)str) || __CFStrFreeContentsWhenDone((CFStringRef)str) || __CFStrIsConstant((CFStringRef)str))) { // and the characters are inline, or are owned by the string, or the string is constant
1588 if (!(kCFUseCollectableAllocator && _CFAllocatorIsGCRefZero(alloc))) CFRetain(str); // Then just retain instead of making a true copy
1589 return str;
1590 }
1591 if (__CFStrIsEightBit((CFStringRef)str)) {
1592 const uint8_t *contents = (const uint8_t *)__CFStrContents((CFStringRef)str);
1593 return __CFStringCreateImmutableFunnel3(alloc, contents + __CFStrSkipAnyLengthByte((CFStringRef)str), __CFStrLength2((CFStringRef)str, contents), __CFStringGetEightBitStringEncoding(), false, false, false, false, false, ALLOCATORSFREEFUNC, 0);
1594 } else {
1595 const UniChar *contents = (const UniChar *)__CFStrContents((CFStringRef)str);
1596 return __CFStringCreateImmutableFunnel3(alloc, contents, __CFStrLength2((CFStringRef)str, contents) * sizeof(UniChar), kCFStringEncodingUnicode, false, true, false, false, false, ALLOCATORSFREEFUNC, 0);
1597 }
1598 }
1599
1600
1601
1602 /*** Constant string stuff... ***/
1603
1604 /* Table which holds constant strings created with CFSTR, when -fconstant-cfstrings option is not used. These dynamically created constant strings are stored in constantStringTable. The keys are the 8-bit constant C-strings from the compiler; the values are the CFStrings created for them. _CFSTRLock protects this table.
1605 */
1606 static CFMutableDictionaryRef constantStringTable = NULL;
1607 static CFSpinLock_t _CFSTRLock = CFSpinLockInit;
1608
1609 static CFStringRef __cStrCopyDescription(const void *ptr) {
1610 return CFStringCreateWithCStringNoCopy(kCFAllocatorSystemDefault, (const char *)ptr, __CFStringGetEightBitStringEncoding(), kCFAllocatorNull);
1611 }
1612
1613 static Boolean __cStrEqual(const void *ptr1, const void *ptr2) {
1614 return (strcmp((const char *)ptr1, (const char *)ptr2) == 0);
1615 }
1616
1617 static CFHashCode __cStrHash(const void *ptr) {
1618 // It doesn't quite matter if we convert to Unicode correctly, as long as we do it consistently
1619 const char *cStr = (const char *)ptr;
1620 CFIndex len = strlen(cStr);
1621 CFHashCode result = 0;
1622 if (len <= 4) { // All chars
1623 unsigned cnt = len;
1624 while (cnt--) result += (result << 8) + *cStr++;
1625 } else { // First and last 2 chars
1626 result += (result << 8) + cStr[0];
1627 result += (result << 8) + cStr[1];
1628 result += (result << 8) + cStr[len-2];
1629 result += (result << 8) + cStr[len-1];
1630 }
1631 result += (result << (len & 31));
1632 return result;
1633 }
1634
1635
1636 CFStringRef __CFStringMakeConstantString(const char *cStr) {
1637 CFStringRef result;
1638 #if defined(DEBUG)
1639 // StringTest checks that we share kCFEmptyString, which is defeated by constantStringAllocatorForDebugging
1640 if ('\0' == *cStr) return kCFEmptyString;
1641 #endif
1642 if (constantStringTable == NULL) {
1643 CFDictionaryKeyCallBacks constantStringCallBacks = {0, NULL, NULL, __cStrCopyDescription, __cStrEqual, __cStrHash};
1644 CFDictionaryValueCallBacks constantStringValueCallBacks = kCFTypeDictionaryValueCallBacks;
1645 constantStringValueCallBacks.equal = NULL; // So that we only find strings that are ==
1646 CFMutableDictionaryRef table = CFDictionaryCreateMutable(kCFAllocatorSystemDefault, 0, &constantStringCallBacks, &constantStringValueCallBacks);
1647 _CFDictionarySetCapacity(table, 2500); // avoid lots of rehashing
1648 __CFSpinLock(&_CFSTRLock);
1649 if (constantStringTable == NULL) constantStringTable = table;
1650 __CFSpinUnlock(&_CFSTRLock);
1651 if (constantStringTable != table) CFRelease(table);
1652 }
1653
1654 __CFSpinLock(&_CFSTRLock);
1655 if ((result = (CFStringRef)CFDictionaryGetValue(constantStringTable, cStr))) {
1656 __CFSpinUnlock(&_CFSTRLock);
1657 } else {
1658 __CFSpinUnlock(&_CFSTRLock);
1659
1660 {
1661 char *key;
1662 Boolean isASCII = true;
1663 // Given this code path is rarer these days, OK to do this extra work to verify the strings
1664 const char *tmp = cStr;
1665 while (*tmp) {
1666 if (*(tmp++) & 0x80) {
1667 isASCII = false;
1668 break;
1669 }
1670 }
1671 if (!isASCII) {
1672 CFMutableStringRef ms = CFStringCreateMutable(kCFAllocatorSystemDefault, 0);
1673 tmp = cStr;
1674 while (*tmp) {
1675 CFStringAppendFormat(ms, NULL, (*tmp & 0x80) ? CFSTR("\\%3o") : CFSTR("%1c"), *tmp);
1676 tmp++;
1677 }
1678 CFLog(kCFLogLevelWarning, CFSTR("WARNING: CFSTR(\"%@\") has non-7 bit chars, interpreting using MacOS Roman encoding for now, but this will change. Please eliminate usages of non-7 bit chars (including escaped characters above \\177 octal) in CFSTR()."), ms);
1679 CFRelease(ms);
1680 }
1681 // Treat non-7 bit chars in CFSTR() as MacOSRoman, for compatibility
1682 result = CFStringCreateWithCString(kCFAllocatorSystemDefault, cStr, kCFStringEncodingMacRoman);
1683 if (result == NULL) {
1684 CFLog(__kCFLogAssertion, CFSTR("Can't interpret CFSTR() as MacOS Roman, crashing"));
1685 HALT;
1686 }
1687 if (__CFOASafe) __CFSetLastAllocationEventName((void *)result, "CFString (CFSTR)");
1688 if (__CFStrIsEightBit(result)) {
1689 key = (char *)__CFStrContents(result) + __CFStrSkipAnyLengthByte(result);
1690 } else { // For some reason the string is not 8-bit!
1691 key = (char *)CFAllocatorAllocate(kCFAllocatorSystemDefault, strlen(cStr) + 1, 0);
1692 if (__CFOASafe) __CFSetLastAllocationEventName((void *)key, "CFString (CFSTR key)");
1693 strlcpy(key, cStr, strlen(cStr) + 1); // !!! We will leak this, if the string is removed from the table (or table is freed)
1694 }
1695
1696 {
1697 CFStringRef resultToBeReleased = result;
1698 CFIndex count;
1699 __CFSpinLock(&_CFSTRLock);
1700 count = CFDictionaryGetCount(constantStringTable);
1701 CFDictionaryAddValue(constantStringTable, key, result);
1702 if (CFDictionaryGetCount(constantStringTable) == count) { // add did nothing, someone already put it there
1703 result = (CFStringRef)CFDictionaryGetValue(constantStringTable, key);
1704 } else {
1705 #if __LP64__
1706 ((struct __CFString *)result)->base._rc = 0;
1707 #else
1708 ((struct __CFString *)result)->base._cfinfo[CF_RC_BITS] = 0;
1709 #endif
1710 }
1711 __CFSpinUnlock(&_CFSTRLock);
1712 // This either eliminates the extra retain on the freshly created string, or frees it, if it was actually not inserted into the table
1713 CFRelease(resultToBeReleased);
1714 }
1715 }
1716 }
1717 return result;
1718 }
1719
1720 #if defined(DEBUG)
1721 static Boolean __CFStrIsConstantString(CFStringRef str) {
1722 Boolean found = false;
1723 if (constantStringTable) {
1724 __CFSpinLock(&_CFSTRLock);
1725 found = CFDictionaryContainsValue(constantStringTable, str);
1726 __CFSpinUnlock(&_CFSTRLock);
1727 }
1728 return found;
1729 }
1730 #endif
1731
1732
1733 #if DEPLOYMENT_TARGET_WINDOWS
1734 void __CFStringCleanup (void) {
1735 /* in case library is unloaded, release store for the constant string table */
1736 if (constantStringTable != NULL) {
1737 #if defined(DEBUG)
1738 __CFConstantStringTableBeingFreed = true;
1739 CFRelease(constantStringTable);
1740 __CFConstantStringTableBeingFreed = false;
1741 #else
1742 CFRelease(constantStringTable);
1743 #endif
1744 constantStringTable = NULL;
1745 }
1746 }
1747 #endif
1748
1749
1750 // Can pass in NSString as replacement string
1751 // Call with numRanges > 0, and incrementing ranges
1752
1753 static void __CFStringReplaceMultiple(CFMutableStringRef str, CFRange *ranges, CFIndex numRanges, CFStringRef replacement) {
1754 int cnt;
1755 CFStringRef copy = NULL;
1756 if (replacement == str) copy = replacement = CFStringCreateCopy(kCFAllocatorSystemDefault, replacement); // Very special and hopefully rare case
1757 CFIndex replacementLength = CFStringGetLength(replacement);
1758
1759 __CFStringChangeSizeMultiple(str, ranges, numRanges, replacementLength, (replacementLength > 0) && CFStrIsUnicode(replacement));
1760
1761 if (__CFStrIsUnicode(str)) {
1762 UniChar *contents = (UniChar *)__CFStrContents(str);
1763 UniChar *firstReplacement = contents + ranges[0].location;
1764 // Extract the replacementString into the first location, then copy from there
1765 CFStringGetCharacters(replacement, CFRangeMake(0, replacementLength), firstReplacement);
1766 for (cnt = 1; cnt < numRanges; cnt++) {
1767 // The ranges are in terms of the original string; so offset by the change in length due to insertion
1768 contents += replacementLength - ranges[cnt - 1].length;
1769 memmove(contents + ranges[cnt].location, firstReplacement, replacementLength * sizeof(UniChar));
1770 }
1771 } else {
1772 uint8_t *contents = (uint8_t *)__CFStrContents(str);
1773 uint8_t *firstReplacement = contents + ranges[0].location + __CFStrSkipAnyLengthByte(str);
1774 // Extract the replacementString into the first location, then copy from there
1775 CFStringGetBytes(replacement, CFRangeMake(0, replacementLength), __CFStringGetEightBitStringEncoding(), 0, false, firstReplacement, replacementLength, NULL);
1776 contents += __CFStrSkipAnyLengthByte(str); // Now contents will simply track the location to insert next string into
1777 for (cnt = 1; cnt < numRanges; cnt++) {
1778 // The ranges are in terms of the original string; so offset by the change in length due to insertion
1779 contents += replacementLength - ranges[cnt - 1].length;
1780 memmove(contents + ranges[cnt].location, firstReplacement, replacementLength);
1781 }
1782 }
1783 if (copy) CFRelease(copy);
1784 }
1785
1786 // Can pass in NSString as replacement string
1787
1788 CF_INLINE void __CFStringReplace(CFMutableStringRef str, CFRange range, CFStringRef replacement) {
1789 CFStringRef copy = NULL;
1790 if (replacement == str) copy = replacement = (CFStringRef)CFStringCreateCopy(kCFAllocatorSystemDefault, replacement); // Very special and hopefully rare case
1791 CFIndex replacementLength = CFStringGetLength(replacement);
1792
1793 __CFStringChangeSize(str, range, replacementLength, (replacementLength > 0) && CFStrIsUnicode(replacement));
1794
1795 if (__CFStrIsUnicode(str)) {
1796 UniChar *contents = (UniChar *)__CFStrContents(str);
1797 CFStringGetCharacters(replacement, CFRangeMake(0, replacementLength), contents + range.location);
1798 } else {
1799 uint8_t *contents = (uint8_t *)__CFStrContents(str);
1800 CFStringGetBytes(replacement, CFRangeMake(0, replacementLength), __CFStringGetEightBitStringEncoding(), 0, false, contents + range.location + __CFStrSkipAnyLengthByte(str), replacementLength, NULL);
1801 }
1802
1803 if (copy) CFRelease(copy);
1804 }
1805
1806 /* If client does not provide a minimum capacity
1807 */
1808 #define DEFAULTMINCAPACITY 32
1809
1810 CF_INLINE CFMutableStringRef __CFStringCreateMutableFunnel(CFAllocatorRef alloc, CFIndex maxLength, UInt32 additionalInfoBits) {
1811 CFMutableStringRef str;
1812 if (_CFAllocatorIsGCRefZero(alloc)) additionalInfoBits |= __kCFHasContentsAllocator;
1813 Boolean hasExternalContentsAllocator = (additionalInfoBits & __kCFHasContentsAllocator) ? true : false;
1814
1815 if (alloc == NULL) alloc = __CFGetDefaultAllocator();
1816
1817 // Note that if there is an externalContentsAllocator, then we also have the storage for the string allocator...
1818 str = (CFMutableStringRef)_CFRuntimeCreateInstance(alloc, __kCFStringTypeID, sizeof(struct __notInlineMutable) - (hasExternalContentsAllocator ? 0 : sizeof(CFAllocatorRef)), NULL);
1819 if (str) {
1820 if (__CFOASafe) __CFSetLastAllocationEventName(str, "CFString (mutable)");
1821
1822 __CFStrSetInfoBits(str, __kCFIsMutable | additionalInfoBits);
1823 str->variants.notInlineMutable.buffer = NULL;
1824 __CFStrSetExplicitLength(str, 0);
1825 str->variants.notInlineMutable.hasGap = str->variants.notInlineMutable.isFixedCapacity = str->variants.notInlineMutable.isExternalMutable = str->variants.notInlineMutable.capacityProvidedExternally = 0;
1826 if (maxLength != 0) __CFStrSetIsFixed(str);
1827 __CFStrSetDesiredCapacity(str, (maxLength == 0) ? DEFAULTMINCAPACITY : maxLength);
1828 __CFStrSetCapacity(str, 0);
1829 if (__CFStrHasContentsAllocator(str)) {
1830 // contents allocator starts out as the string's own allocator
1831 __CFStrSetContentsAllocator(str, alloc);
1832 }
1833 }
1834 return str;
1835 }
1836
1837 CFMutableStringRef CFStringCreateMutableWithExternalCharactersNoCopy(CFAllocatorRef alloc, UniChar *chars, CFIndex numChars, CFIndex capacity, CFAllocatorRef externalCharactersAllocator) {
1838 CFOptionFlags contentsAllocationBits = externalCharactersAllocator ? ((externalCharactersAllocator == kCFAllocatorNull) ? __kCFNotInlineContentsNoFree : __kCFHasContentsAllocator) : __kCFNotInlineContentsDefaultFree;
1839 CFMutableStringRef string = __CFStringCreateMutableFunnel(alloc, 0, contentsAllocationBits | __kCFIsUnicode);
1840 if (string) {
1841 __CFStrSetIsExternalMutable(string);
1842 if (__CFStrHasContentsAllocator(string)) {
1843 CFAllocatorRef allocator = __CFStrContentsAllocator((CFMutableStringRef)string);
1844 if (!(kCFAllocatorSystemDefaultGCRefZero == allocator || kCFAllocatorDefaultGCRefZero == allocator)) CFRelease(allocator);
1845 __CFStrSetContentsAllocator(string, externalCharactersAllocator);
1846 }
1847 CFStringSetExternalCharactersNoCopy(string, chars, numChars, capacity);
1848 }
1849 return string;
1850 }
1851
1852 CFMutableStringRef CFStringCreateMutable(CFAllocatorRef alloc, CFIndex maxLength) {
1853 return __CFStringCreateMutableFunnel(alloc, maxLength, __kCFNotInlineContentsDefaultFree);
1854 }
1855
1856 CFMutableStringRef CFStringCreateMutableCopy(CFAllocatorRef alloc, CFIndex maxLength, CFStringRef string) {
1857 CFMutableStringRef newString;
1858
1859 // CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, CFMutableStringRef, string, "mutableCopy");
1860
1861 __CFAssertIsString(string);
1862
1863 newString = CFStringCreateMutable(alloc, maxLength);
1864 __CFStringReplace(newString, CFRangeMake(0, 0), string);
1865
1866 return newString;
1867 }
1868
1869
1870 __private_extern__ void _CFStrSetDesiredCapacity(CFMutableStringRef str, CFIndex len) {
1871 __CFAssertIsStringAndMutable(str);
1872 __CFStrSetDesiredCapacity(str, len);
1873 }
1874
1875
1876 /* This one is for CF
1877 */
1878 CFIndex CFStringGetLength(CFStringRef str) {
1879 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, CFIndex, str, "length");
1880
1881 __CFAssertIsString(str);
1882 return __CFStrLength(str);
1883 }
1884
1885 /* This one is for NSCFString; it does not ObjC dispatch or assertion check
1886 */
1887 CFIndex _CFStringGetLength2(CFStringRef str) {
1888 return __CFStrLength(str);
1889 }
1890
1891
1892 /* Guts of CFStringGetCharacterAtIndex(); called from the two functions below. Don't call it from elsewhere.
1893 */
1894 CF_INLINE UniChar __CFStringGetCharacterAtIndexGuts(CFStringRef str, CFIndex idx, const uint8_t *contents) {
1895 if (__CFStrIsEightBit(str)) {
1896 contents += __CFStrSkipAnyLengthByte(str);
1897 #if defined(DEBUG)
1898 if (!__CFCharToUniCharFunc && (contents[idx] >= 128)) {
1899 // Can't do log here, as it might be too early
1900 fprintf(stderr, "Warning: CFStringGetCharacterAtIndex() attempted on CFString containing high bytes before properly initialized to do so\n");
1901 }
1902 #endif
1903 return __CFCharToUniCharTable[contents[idx]];
1904 }
1905
1906 return ((UniChar *)contents)[idx];
1907 }
1908
1909 /* This one is for the CF API
1910 */
1911 UniChar CFStringGetCharacterAtIndex(CFStringRef str, CFIndex idx) {
1912 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, UniChar, str, "characterAtIndex:", idx);
1913
1914 __CFAssertIsString(str);
1915 __CFAssertIndexIsInStringBounds(str, idx);
1916 return __CFStringGetCharacterAtIndexGuts(str, idx, (const uint8_t *)__CFStrContents(str));
1917 }
1918
1919 /* This one is for NSCFString usage; it doesn't do ObjC dispatch; but it does do range check
1920 */
1921 int _CFStringCheckAndGetCharacterAtIndex(CFStringRef str, CFIndex idx, UniChar *ch) {
1922 const uint8_t *contents = (const uint8_t *)__CFStrContents(str);
1923 if (idx >= __CFStrLength2(str, contents) && __CFStringNoteErrors()) return _CFStringErrBounds;
1924 *ch = __CFStringGetCharacterAtIndexGuts(str, idx, contents);
1925 return _CFStringErrNone;
1926 }
1927
1928
1929 /* Guts of CFStringGetCharacters(); called from the two functions below. Don't call it from elsewhere.
1930 */
1931 CF_INLINE void __CFStringGetCharactersGuts(CFStringRef str, CFRange range, UniChar *buffer, const uint8_t *contents) {
1932 if (__CFStrIsEightBit(str)) {
1933 __CFStrConvertBytesToUnicode(((uint8_t *)contents) + (range.location + __CFStrSkipAnyLengthByte(str)), buffer, range.length);
1934 } else {
1935 const UniChar *uContents = ((UniChar *)contents) + range.location;
1936 memmove(buffer, uContents, range.length * sizeof(UniChar));
1937 }
1938 }
1939
1940 /* This one is for the CF API
1941 */
1942 void CFStringGetCharacters(CFStringRef str, CFRange range, UniChar *buffer) {
1943 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID, void, str, "getCharacters:range:", buffer, range);
1944
1945 __CFAssertIsString(str);
1946 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
1947 __CFStringGetCharactersGuts(str, range, buffer, (const uint8_t *)__CFStrContents(str));
1948 }
1949
1950 /* This one is for NSCFString usage; it doesn't do ObjC dispatch; but it does do range check
1951 */
1952 int _CFStringCheckAndGetCharacters(CFStringRef str, CFRange range, UniChar *buffer) {
1953 const uint8_t *contents = (const uint8_t *)__CFStrContents(str);
1954 if (range.location + range.length > __CFStrLength2(str, contents) && __CFStringNoteErrors()) return _CFStringErrBounds;
1955 __CFStringGetCharactersGuts(str, range, buffer, contents);
1956 return _CFStringErrNone;
1957 }
1958
1959
1960 CFIndex CFStringGetBytes(CFStringRef str, CFRange range, CFStringEncoding encoding, uint8_t lossByte, Boolean isExternalRepresentation, uint8_t *buffer, CFIndex maxBufLen, CFIndex *usedBufLen) {
1961
1962 /* No objc dispatch needed here since __CFStringEncodeByteStream works with both CFString and NSString */
1963 __CFAssertIsNotNegative(maxBufLen);
1964
1965 if (!CF_IS_OBJC(__kCFStringTypeID, str)) { // If we can grope the ivars, let's do it...
1966 __CFAssertIsString(str);
1967 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
1968
1969 if (__CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string
1970 const unsigned char *contents = (const unsigned char *)__CFStrContents(str);
1971 CFIndex cLength = range.length;
1972
1973 if (buffer) {
1974 if (cLength > maxBufLen) cLength = maxBufLen;
1975 memmove(buffer, contents + __CFStrSkipAnyLengthByte(str) + range.location, cLength);
1976 }
1977 if (usedBufLen) *usedBufLen = cLength;
1978
1979 return cLength;
1980 }
1981 }
1982
1983 return __CFStringEncodeByteStream(str, range.location, range.length, isExternalRepresentation, encoding, lossByte, buffer, maxBufLen, usedBufLen);
1984 }
1985
1986
1987 ConstStringPtr CFStringGetPascalStringPtr (CFStringRef str, CFStringEncoding encoding) {
1988
1989 if (!CF_IS_OBJC(__kCFStringTypeID, str)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
1990 __CFAssertIsString(str);
1991 if (__CFStrHasLengthByte(str) && __CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string || the contents is in ASCII
1992 const uint8_t *contents = (const uint8_t *)__CFStrContents(str);
1993 if (__CFStrHasExplicitLength(str) && (__CFStrLength2(str, contents) != (SInt32)(*contents))) return NULL; // Invalid length byte
1994 return (ConstStringPtr)contents;
1995 }
1996 // ??? Also check for encoding = SystemEncoding and perhaps bytes are all ASCII?
1997 }
1998 return NULL;
1999 }
2000
2001
2002 const char * CFStringGetCStringPtr(CFStringRef str, CFStringEncoding encoding) {
2003
2004 if (encoding != __CFStringGetEightBitStringEncoding() && (kCFStringEncodingASCII != __CFStringGetEightBitStringEncoding() || !__CFStringEncodingIsSupersetOfASCII(encoding))) return NULL;
2005 // ??? Also check for encoding = SystemEncoding and perhaps bytes are all ASCII?
2006
2007 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, const char *, str, "_fastCStringContents:", true);
2008
2009 __CFAssertIsString(str);
2010
2011 if (__CFStrHasNullByte(str)) {
2012 // Note: this is called a lot, 27000 times to open a small xcode project with one file open.
2013 // Of these uses about 1500 are for cStrings/utf8strings.
2014 #if 0
2015 // Only sometimes when the stars are aligned will this call return a gc pointer
2016 // under GC we can only really return a pointer to the start of a GC buffer for cString use
2017 // (Is there a simpler way to ask if contents isGC?)
2018 CFAllocatorRef alloc = (__CFStrHasContentsAllocator(str)) ? __CFStrContentsAllocator(str) : __CFGetAllocator(str);
2019 if (CF_IS_COLLECTABLE_ALLOCATOR(alloc)) {
2020 if (__CFStrSkipAnyLengthByte(str) != 0 || !__CFStrIsMutable(str)) {
2021 static int counter = 0;
2022 printf("CFString %dth unsafe safe string %s\n", ++counter, __CFStrContents(str) + __CFStrSkipAnyLengthByte(str));
2023 return NULL;
2024 }
2025 }
2026 #endif
2027 return (const char *)__CFStrContents(str) + __CFStrSkipAnyLengthByte(str);
2028 } else {
2029 return NULL;
2030 }
2031 }
2032
2033
2034 const UniChar *CFStringGetCharactersPtr(CFStringRef str) {
2035
2036 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, const UniChar *, str, "_fastCharacterContents");
2037
2038 __CFAssertIsString(str);
2039 if (__CFStrIsUnicode(str)) return (const UniChar *)__CFStrContents(str);
2040 return NULL;
2041 }
2042
2043
2044 Boolean CFStringGetPascalString(CFStringRef str, Str255 buffer, CFIndex bufferSize, CFStringEncoding encoding) {
2045 CFIndex length;
2046 CFIndex usedLen;
2047
2048 __CFAssertIsNotNegative(bufferSize);
2049 if (bufferSize < 1) return false;
2050
2051 if (CF_IS_OBJC(__kCFStringTypeID, str)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
2052 length = CFStringGetLength(str);
2053 if (!__CFCanUseLengthByte(length)) return false; // Can't fit into pstring
2054 } else {
2055 const uint8_t *contents;
2056
2057 __CFAssertIsString(str);
2058
2059 contents = (const uint8_t *)__CFStrContents(str);
2060 length = __CFStrLength2(str, contents);
2061
2062 if (!__CFCanUseLengthByte(length)) return false; // Can't fit into pstring
2063
2064 if (__CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string
2065 if (length >= bufferSize) return false;
2066 memmove((void*)(1 + (const char*)buffer), (__CFStrSkipAnyLengthByte(str) + contents), length);
2067 *buffer = (unsigned char)length;
2068 return true;
2069 }
2070 }
2071
2072 if (__CFStringEncodeByteStream(str, 0, length, false, encoding, false, (UInt8 *)(1 + (uint8_t *)buffer), bufferSize - 1, &usedLen) != length) {
2073
2074 #if defined(DEBUG)
2075 if (bufferSize > 0) {
2076 strlcpy((char *)buffer + 1, CONVERSIONFAILURESTR, bufferSize - 1);
2077 buffer[0] = (unsigned char)((CFIndex)sizeof(CONVERSIONFAILURESTR) < (bufferSize - 1) ? (CFIndex)sizeof(CONVERSIONFAILURESTR) : (bufferSize - 1));
2078 }
2079 #else
2080 if (bufferSize > 0) buffer[0] = 0;
2081 #endif
2082 return false;
2083 }
2084 *buffer = (unsigned char)usedLen;
2085 return true;
2086 }
2087
2088 Boolean CFStringGetCString(CFStringRef str, char *buffer, CFIndex bufferSize, CFStringEncoding encoding) {
2089 const uint8_t *contents;
2090 CFIndex len;
2091
2092 __CFAssertIsNotNegative(bufferSize);
2093 if (bufferSize < 1) return false;
2094
2095 CF_OBJC_FUNCDISPATCH3(__kCFStringTypeID, Boolean, str, "_getCString:maxLength:encoding:", buffer, bufferSize - 1, encoding);
2096
2097 __CFAssertIsString(str);
2098
2099 contents = (const uint8_t *)__CFStrContents(str);
2100 len = __CFStrLength2(str, contents);
2101
2102 if (__CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string
2103 if (len >= bufferSize) return false;
2104 memmove(buffer, contents + __CFStrSkipAnyLengthByte(str), len);
2105 buffer[len] = 0;
2106 return true;
2107 } else {
2108 CFIndex usedLen;
2109
2110 if (__CFStringEncodeByteStream(str, 0, len, false, encoding, false, (unsigned char*) buffer, bufferSize - 1, &usedLen) == len) {
2111 buffer[usedLen] = '\0';
2112 return true;
2113 } else {
2114 #if defined(DEBUG)
2115 strlcpy(buffer, CONVERSIONFAILURESTR, bufferSize);
2116 #else
2117 if (bufferSize > 0) buffer[0] = 0;
2118 #endif
2119 return false;
2120 }
2121 }
2122 }
2123
2124 extern Boolean __CFLocaleGetNullLocale(struct __CFLocale *locale);
2125 extern void __CFLocaleSetNullLocale(struct __CFLocale *locale);
2126
2127 static const char *_CFStrGetLanguageIdentifierForLocale(CFLocaleRef locale) {
2128 CFStringRef collatorID;
2129 const char *langID = NULL;
2130 static const void *lastLocale = NULL;
2131 static const char *lastLangID = NULL;
2132 static CFSpinLock_t lock = CFSpinLockInit;
2133
2134 if (__CFLocaleGetNullLocale((struct __CFLocale *)locale)) return NULL;
2135
2136 __CFSpinLock(&lock);
2137 if ((NULL != lastLocale) && (lastLocale == locale)) {
2138 __CFSpinUnlock(&lock);
2139 return lastLangID;
2140 }
2141 __CFSpinUnlock(&lock);
2142
2143 collatorID = (CFStringRef)CFLocaleGetValue(locale, __kCFLocaleCollatorID);
2144
2145 // This is somewhat depending on CFLocale implementation always creating CFString for locale identifer ???
2146 if (__CFStrLength(collatorID) > 1) {
2147 const void *contents = __CFStrContents(collatorID);
2148 const char *string;
2149 char buffer[2];
2150
2151 if (__CFStrIsEightBit(collatorID)) {
2152 string = ((const char *)contents) + __CFStrSkipAnyLengthByte(collatorID);
2153 } else {
2154 const UTF16Char *characters = (const UTF16Char *)contents;
2155
2156 buffer[0] = (char)*(characters++);
2157 buffer[1] = (char)*characters;
2158 string = buffer;
2159 }
2160
2161 if (!strncmp(string, "az", 2)) { // Azerbaijani
2162 langID = "az";
2163 } else if (!strncmp(string, "lt", 2)) { // Lithuanian
2164 langID = "lt";
2165 } else if (!strncmp(string, "tr", 2)) { // Turkish
2166 langID = "tr";
2167 } else if (!strncmp(string, "nl", 2)) { // Dutch
2168 langID = "nl";
2169 }
2170 }
2171
2172
2173 if (langID == NULL) __CFLocaleSetNullLocale((struct __CFLocale *)locale);
2174
2175 __CFSpinLock(&lock);
2176 lastLocale = locale;
2177 lastLangID = langID;
2178 __CFSpinUnlock(&lock);
2179
2180 return langID;
2181 }
2182
2183 CF_INLINE bool _CFCanUseLocale(CFLocaleRef locale) {
2184 if (locale) {
2185 return true;
2186 }
2187 return false;
2188 }
2189
2190 #define MAX_CASE_MAPPING_BUF (8)
2191 #define ZERO_WIDTH_JOINER (0x200D)
2192 #define COMBINING_GRAPHEME_JOINER (0x034F)
2193 // Hangul ranges
2194 #define HANGUL_CHOSEONG_START (0x1100)
2195 #define HANGUL_CHOSEONG_END (0x115F)
2196 #define HANGUL_JUNGSEONG_START (0x1160)
2197 #define HANGUL_JUNGSEONG_END (0x11A2)
2198 #define HANGUL_JONGSEONG_START (0x11A8)
2199 #define HANGUL_JONGSEONG_END (0x11F9)
2200
2201 #define HANGUL_SYLLABLE_START (0xAC00)
2202 #define HANGUL_SYLLABLE_END (0xD7AF)
2203
2204
2205 // Returns the length of characters filled into outCharacters. If no change, returns 0. maxBufLen shoule be at least 8
2206 static CFIndex __CFStringFoldCharacterClusterAtIndex(UTF32Char character, CFStringInlineBuffer *buffer, CFIndex index, CFOptionFlags flags, const uint8_t *langCode, UTF32Char *outCharacters, CFIndex maxBufferLength, CFIndex *consumedLength) {
2207 CFIndex filledLength = 0, currentIndex = index;
2208
2209 if (0 != character) {
2210 UTF16Char lowSurrogate;
2211 CFIndex planeNo = (character >> 16);
2212 bool isTurkikCapitalI = false;
2213 static const uint8_t *decompBMP = NULL;
2214 static const uint8_t *graphemeBMP = NULL;
2215
2216 if (NULL == decompBMP) {
2217 decompBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, 0);
2218 graphemeBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, 0);
2219 }
2220
2221 ++currentIndex;
2222
2223 if ((character < 0x0080) && ((NULL == langCode) || (character != 'I'))) { // ASCII
2224 if ((flags & kCFCompareCaseInsensitive) && (character >= 'A') && (character <= 'Z')) {
2225 character += ('a' - 'A');
2226 *outCharacters = character;
2227 filledLength = 1;
2228 }
2229 } else {
2230 // do width-insensitive mapping
2231 if ((flags & kCFCompareWidthInsensitive) && (character >= 0xFF00) && (character <= 0xFFEF)) {
2232 (void)CFUniCharCompatibilityDecompose(&character, 1, 1);
2233 *outCharacters = character;
2234 filledLength = 1;
2235 }
2236
2237 // map surrogates
2238 if ((0 == planeNo) && CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((lowSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex)))) {
2239 character = CFUniCharGetLongCharacterForSurrogatePair(character, lowSurrogate);
2240 ++currentIndex;
2241 planeNo = (character >> 16);
2242 }
2243
2244 // decompose
2245 if (flags & (kCFCompareDiacriticInsensitive|kCFCompareNonliteral)) {
2246 if (CFUniCharIsMemberOfBitmap(character, ((0 == planeNo) ? decompBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, planeNo)))) {
2247 UTF32Char original = character;
2248
2249 filledLength = CFUniCharDecomposeCharacter(character, outCharacters, maxBufferLength);
2250 character = *outCharacters;
2251
2252 if ((flags & kCFCompareDiacriticInsensitive) && (character < 0x0510)) {
2253 filledLength = 1; // reset if Roman, Greek, Cyrillic
2254 } else if (0 == (flags & kCFCompareNonliteral)) {
2255 character = original;
2256 filledLength = 0;
2257 }
2258 }
2259 }
2260
2261 // fold case
2262 if (flags & kCFCompareCaseInsensitive) {
2263 const uint8_t *nonBaseBitmap;
2264 bool filterNonBase = (((flags & kCFCompareDiacriticInsensitive) && (character < 0x0510)) ? true : false);
2265 static const uint8_t *lowerBMP = NULL;
2266 static const uint8_t *caseFoldBMP = NULL;
2267
2268 if (NULL == lowerBMP) {
2269 lowerBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfLowercaseCharacterSet, 0);
2270 caseFoldBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfCaseFoldingCharacterSet, 0);
2271 }
2272
2273 if ((NULL != langCode) && ('I' == character) && ((0 == strcmp((const char *)langCode, "tr")) || (0 == strcmp((const char *)langCode, "az")))) { // do Turkik special-casing
2274 if (filledLength > 1) {
2275 if (0x0307 == outCharacters[1]) {
2276 if (--filledLength > 1) memmove((outCharacters + 1), (outCharacters + 2), sizeof(UTF32Char) * (filledLength - 1));
2277 character = *outCharacters = 'i';
2278 isTurkikCapitalI = true;
2279 }
2280 } else if (0x0307 == CFStringGetCharacterFromInlineBuffer(buffer, currentIndex)) {
2281 character = *outCharacters = 'i';
2282 filledLength = 1;
2283 ++currentIndex;
2284 isTurkikCapitalI = true;
2285 }
2286 }
2287 if (!isTurkikCapitalI && (CFUniCharIsMemberOfBitmap(character, ((0 == planeNo) ? lowerBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfLowercaseCharacterSet, planeNo))) || CFUniCharIsMemberOfBitmap(character, ((0 == planeNo) ? caseFoldBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfCaseFoldingCharacterSet, planeNo))))) {
2288 UTF16Char caseFoldBuffer[MAX_CASE_MAPPING_BUF];
2289 const UTF16Char *bufferP = caseFoldBuffer, *bufferLimit;
2290 UTF32Char *outCharactersP = outCharacters;
2291 uint32_t bufferLength = CFUniCharMapCaseTo(character, caseFoldBuffer, MAX_CASE_MAPPING_BUF, kCFUniCharCaseFold, 0, langCode);
2292
2293 bufferLimit = bufferP + bufferLength;
2294
2295 if (filledLength > 0) --filledLength; // decrement filledLength (will add back later)
2296
2297 // make space for casefold characters
2298 if ((filledLength > 0) && (bufferLength > 1)) {
2299 CFIndex totalScalerLength = 0;
2300
2301 while (bufferP < bufferLimit) {
2302 if (CFUniCharIsSurrogateHighCharacter(*(bufferP++)) && (bufferP < bufferLimit) && CFUniCharIsSurrogateLowCharacter(*bufferP)) ++bufferP;
2303 ++totalScalerLength;
2304 }
2305 memmove(outCharacters + totalScalerLength, outCharacters + 1, filledLength * sizeof(UTF32Char));
2306 bufferP = caseFoldBuffer;
2307 }
2308
2309 // fill
2310 while (bufferP < bufferLimit) {
2311 character = *(bufferP++);
2312 if (CFUniCharIsSurrogateHighCharacter(character) && (bufferP < bufferLimit) && CFUniCharIsSurrogateLowCharacter(*bufferP)) {
2313 character = CFUniCharGetLongCharacterForSurrogatePair(character, *(bufferP++));
2314 nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (character >> 16));
2315 } else {
2316 nonBaseBitmap = graphemeBMP;
2317 }
2318
2319 if (!filterNonBase || !CFUniCharIsMemberOfBitmap(character, nonBaseBitmap)) {
2320 *(outCharactersP++) = character;
2321 ++filledLength;
2322 }
2323 }
2324 }
2325 }
2326 }
2327
2328 // collect following combining marks
2329 if (flags & (kCFCompareDiacriticInsensitive|kCFCompareNonliteral)) {
2330 const uint8_t *nonBaseBitmap;
2331 const uint8_t *decompBitmap;
2332 bool doFill = (((flags & kCFCompareDiacriticInsensitive) && (character < 0x0510)) ? false : true);
2333
2334 if (0 == filledLength) {
2335 *outCharacters = character; // filledLength will be updated below on demand
2336
2337 if (doFill) { // check if really needs to fill
2338 UTF32Char nonBaseCharacter = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex);
2339
2340 if (CFUniCharIsSurrogateHighCharacter(nonBaseCharacter) && CFUniCharIsSurrogateLowCharacter((lowSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex + 1)))) {
2341 nonBaseCharacter = CFUniCharGetLongCharacterForSurrogatePair(nonBaseCharacter, lowSurrogate);
2342 nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (nonBaseCharacter >> 16));
2343 decompBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (nonBaseCharacter >> 16));
2344 } else {
2345 nonBaseBitmap = graphemeBMP;
2346 decompBitmap = decompBMP;
2347 }
2348
2349 if (CFUniCharIsMemberOfBitmap(nonBaseCharacter, nonBaseBitmap)) {
2350 filledLength = 1; // For the base character
2351
2352 if ((0 == (flags & kCFCompareDiacriticInsensitive)) || (nonBaseCharacter > 0x050F)) {
2353 if (CFUniCharIsMemberOfBitmap(nonBaseCharacter, decompBitmap)) {
2354 filledLength += CFUniCharDecomposeCharacter(nonBaseCharacter, &(outCharacters[filledLength]), maxBufferLength - filledLength);
2355 } else {
2356 outCharacters[filledLength++] = nonBaseCharacter;
2357 }
2358 }
2359 currentIndex += ((nonBaseBitmap == graphemeBMP) ? 1 : 2);
2360 } else {
2361 doFill = false;
2362 }
2363 }
2364 }
2365
2366 while (filledLength < maxBufferLength) { // do the rest
2367 character = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex);
2368
2369 if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((lowSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex + 1)))) {
2370 character = CFUniCharGetLongCharacterForSurrogatePair(character, lowSurrogate);
2371 nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (character >> 16));
2372 decompBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (character >> 16));
2373 } else {
2374 nonBaseBitmap = graphemeBMP;
2375 decompBitmap = decompBMP;
2376 }
2377 if (isTurkikCapitalI) {
2378 isTurkikCapitalI = false;
2379 } else if (CFUniCharIsMemberOfBitmap(character, nonBaseBitmap)) {
2380 if (doFill) {
2381 if (CFUniCharIsMemberOfBitmap(character, decompBitmap)) {
2382 CFIndex currentLength = CFUniCharDecomposeCharacter(character, &(outCharacters[filledLength]), maxBufferLength - filledLength);
2383
2384 if (0 == currentLength) break; // didn't fit
2385
2386 filledLength += currentLength;
2387 } else {
2388 outCharacters[filledLength++] = character;
2389 }
2390 } else if (0 == filledLength) {
2391 filledLength = 1; // For the base character
2392 }
2393 currentIndex += ((nonBaseBitmap == graphemeBMP) ? 1 : 2);
2394 } else {
2395 break;
2396 }
2397 }
2398
2399 if (filledLength > 1) {
2400 UTF32Char *sortCharactersLimit = outCharacters + filledLength;
2401 UTF32Char *sortCharacters = sortCharactersLimit - 1;
2402
2403 while ((outCharacters < sortCharacters) && CFUniCharIsMemberOfBitmap(*sortCharacters, ((*sortCharacters < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (*sortCharacters >> 16))))) --sortCharacters;
2404
2405 if ((sortCharactersLimit - sortCharacters) > 1) CFUniCharPrioritySort(sortCharacters, (sortCharactersLimit - sortCharacters)); // priority sort
2406 }
2407 }
2408 }
2409
2410 if ((filledLength > 0) && (NULL != consumedLength)) *consumedLength = (currentIndex - index);
2411
2412 return filledLength;
2413 }
2414
2415 static bool __CFStringFillCharacterSetInlineBuffer(CFCharacterSetInlineBuffer *buffer, CFStringCompareFlags compareOptions) {
2416 if (0 != (compareOptions & kCFCompareIgnoreNonAlphanumeric)) {
2417 static CFCharacterSetRef nonAlnumChars = NULL;
2418
2419 if (NULL == nonAlnumChars) {
2420 CFMutableCharacterSetRef cset = CFCharacterSetCreateMutableCopy(kCFAllocatorSystemDefault, CFCharacterSetGetPredefined(kCFCharacterSetAlphaNumeric));
2421 CFCharacterSetInvert(cset);
2422 if (!OSAtomicCompareAndSwapPtrBarrier(NULL, cset, (void **)&nonAlnumChars)) CFRelease(cset);
2423 }
2424
2425 CFCharacterSetInitInlineBuffer(nonAlnumChars, buffer);
2426
2427 return true;
2428 }
2429
2430 return false;
2431 }
2432
2433 #define kCFStringStackBufferLength (__kCFStringInlineBufferLength)
2434
2435 CFComparisonResult CFStringCompareWithOptionsAndLocale(CFStringRef string, CFStringRef string2, CFRange rangeToCompare, CFStringCompareFlags compareOptions, CFLocaleRef locale) {
2436 /* No objc dispatch needed here since CFStringInlineBuffer works with both CFString and NSString */
2437 UTF32Char strBuf1[kCFStringStackBufferLength];
2438 UTF32Char strBuf2[kCFStringStackBufferLength];
2439 CFStringInlineBuffer inlineBuf1, inlineBuf2;
2440 UTF32Char str1Char, str2Char;
2441 CFIndex str1UsedLen, str2UsedLen;
2442 CFIndex str1Index = 0, str2Index = 0, strBuf1Index = 0, strBuf2Index = 0, strBuf1Len = 0, strBuf2Len = 0;
2443 CFIndex str1LocalizedIndex = 0, str2LocalizedIndex = 0;
2444 CFIndex forcedIndex1 = 0, forcedIndex2 = 0;
2445 CFIndex str2Len = CFStringGetLength(string2);
2446 bool caseInsensitive = ((compareOptions & kCFCompareCaseInsensitive) ? true : false);
2447 bool diacriticsInsensitive = ((compareOptions & kCFCompareDiacriticInsensitive) ? true : false);
2448 bool equalityOptions = ((compareOptions & (kCFCompareCaseInsensitive|kCFCompareNonliteral|kCFCompareDiacriticInsensitive|kCFCompareWidthInsensitive)) ? true : false);
2449 bool numerically = ((compareOptions & kCFCompareNumerically) ? true : false);
2450 bool forceOrdering = ((compareOptions & kCFCompareForcedOrdering) ? true : false);
2451 const uint8_t *graphemeBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, 0);
2452 const uint8_t *langCode;
2453 CFComparisonResult compareResult = kCFCompareEqualTo;
2454 UTF16Char otherChar;
2455 Boolean freeLocale = false;
2456 CFCharacterSetInlineBuffer *ignoredChars = NULL;
2457 CFCharacterSetInlineBuffer csetBuffer;
2458 bool numericEquivalence = false;
2459
2460 if ((compareOptions & kCFCompareLocalized) && (NULL == locale)) {
2461 locale = CFLocaleCopyCurrent();
2462 freeLocale = true;
2463 }
2464
2465 langCode = ((NULL == locale) ? NULL : (const uint8_t *)_CFStrGetLanguageIdentifierForLocale(locale));
2466
2467 if (__CFStringFillCharacterSetInlineBuffer(&csetBuffer, compareOptions)) {
2468 ignoredChars = &csetBuffer;
2469 equalityOptions = true;
2470 }
2471
2472 if ((NULL == locale) && (NULL == ignoredChars) && !numerically) { // could do binary comp (be careful when adding new flags)
2473 CFStringEncoding eightBitEncoding = __CFStringGetEightBitStringEncoding();
2474 const uint8_t *str1Bytes = (const uint8_t *)CFStringGetCStringPtr(string, eightBitEncoding);
2475 const uint8_t *str2Bytes = (const uint8_t *)CFStringGetCStringPtr(string2, eightBitEncoding);
2476 CFIndex factor = sizeof(uint8_t);
2477
2478 if ((NULL != str1Bytes) && (NULL != str2Bytes)) {
2479 compareOptions &= ~kCFCompareNonliteral; // remove non-literal
2480
2481 if ((kCFStringEncodingASCII == eightBitEncoding) && (false == forceOrdering)) {
2482 if (caseInsensitive) {
2483 int cmpResult = strncasecmp_l((const char *)str1Bytes + rangeToCompare.location, (const char *)str2Bytes, __CFMin(rangeToCompare.length, str2Len), NULL);
2484
2485 if (0 == cmpResult) cmpResult = rangeToCompare.length - str2Len;
2486
2487 return ((0 == cmpResult) ? kCFCompareEqualTo : ((cmpResult < 0) ? kCFCompareLessThan : kCFCompareGreaterThan));
2488 }
2489 } else if (caseInsensitive || diacriticsInsensitive) {
2490 CFIndex limitLength = __CFMin(rangeToCompare.length, str2Len);
2491
2492 str1Bytes += rangeToCompare.location;
2493
2494 while (str1Index < limitLength) {
2495 str1Char = str1Bytes[str1Index];
2496 str2Char = str2Bytes[str1Index];
2497
2498 if (str1Char != str2Char) {
2499 if ((str1Char < 0x80) && (str2Char < 0x80)) {
2500 if (forceOrdering && (kCFCompareEqualTo == compareResult) && (str1Char != str2Char)) compareResult = ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan);
2501 if (caseInsensitive) {
2502 if ((str1Char >= 'A') && (str1Char <= 'Z')) str1Char += ('a' - 'A');
2503 if ((str2Char >= 'A') && (str2Char <= 'Z')) str2Char += ('a' - 'A');
2504 }
2505
2506 if (str1Char != str2Char) return ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan);
2507 } else {
2508 str1Bytes = NULL;
2509 break;
2510 }
2511 }
2512 ++str1Index;
2513 }
2514
2515 str2Index = str1Index;
2516
2517 if (str1Index == limitLength) {
2518 int cmpResult = rangeToCompare.length - str2Len;
2519
2520 return ((0 == cmpResult) ? compareResult : ((cmpResult < 0) ? kCFCompareLessThan : kCFCompareGreaterThan));
2521 }
2522 }
2523 } else if (!equalityOptions && (NULL == str1Bytes) && (NULL == str2Bytes)) {
2524 str1Bytes = (const uint8_t *)CFStringGetCharactersPtr(string);
2525 str2Bytes = (const uint8_t *)CFStringGetCharactersPtr(string2);
2526 factor = sizeof(UTF16Char);
2527 #if __LITTLE_ENDIAN__
2528 if ((NULL != str1Bytes) && (NULL != str2Bytes)) { // we cannot use memcmp
2529 const UTF16Char *str1 = ((const UTF16Char *)str1Bytes) + rangeToCompare.location;
2530 const UTF16Char *str1Limit = str1 + __CFMin(rangeToCompare.length, str2Len);
2531 const UTF16Char *str2 = (const UTF16Char *)str2Bytes;
2532 CFIndex cmpResult = 0;
2533
2534 while ((0 == cmpResult) && (str1 < str1Limit)) cmpResult = (CFIndex)*(str1++) - (CFIndex)*(str2++);
2535
2536 if (0 == cmpResult) cmpResult = rangeToCompare.length - str2Len;
2537
2538 return ((0 == cmpResult) ? kCFCompareEqualTo : ((cmpResult < 0) ? kCFCompareLessThan : kCFCompareGreaterThan));
2539 }
2540 #endif /* __LITTLE_ENDIAN__ */
2541 }
2542 if ((NULL != str1Bytes) && (NULL != str2Bytes)) {
2543 int cmpResult = memcmp(str1Bytes + (rangeToCompare.location * factor), str2Bytes, __CFMin(rangeToCompare.length, str2Len) * factor);
2544
2545 if (0 == cmpResult) cmpResult = rangeToCompare.length - str2Len;
2546
2547 return ((0 == cmpResult) ? kCFCompareEqualTo : ((cmpResult < 0) ? kCFCompareLessThan : kCFCompareGreaterThan));
2548 }
2549 }
2550
2551 CFStringInitInlineBuffer(string, &inlineBuf1, rangeToCompare);
2552 CFStringInitInlineBuffer(string2, &inlineBuf2, CFRangeMake(0, str2Len));
2553
2554 if (NULL != locale) {
2555 str1LocalizedIndex = str1Index;
2556 str2LocalizedIndex = str2Index;
2557
2558 // We temporarily disable kCFCompareDiacriticInsensitive for SL <rdar://problem/6767096>. Should be revisited in NMOS <rdar://problem/7003830>
2559 if (forceOrdering) {
2560 diacriticsInsensitive = false;
2561 compareOptions &= ~kCFCompareDiacriticInsensitive;
2562 }
2563 }
2564 while ((str1Index < rangeToCompare.length) && (str2Index < str2Len)) {
2565 if (strBuf1Len == 0) {
2566 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
2567 if (caseInsensitive && (str1Char >= 'A') && (str1Char <= 'Z') && ((NULL == langCode) || (str1Char != 'I')) && ((false == forceOrdering) || (kCFCompareEqualTo != compareResult))) str1Char += ('a' - 'A');
2568 str1UsedLen = 1;
2569 } else {
2570 str1Char = strBuf1[strBuf1Index++];
2571 }
2572 if (strBuf2Len == 0) {
2573 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
2574 if (caseInsensitive && (str2Char >= 'A') && (str2Char <= 'Z') && ((NULL == langCode) || (str2Char != 'I')) && ((false == forceOrdering) || (kCFCompareEqualTo != compareResult))) str2Char += ('a' - 'A');
2575 str2UsedLen = 1;
2576 } else {
2577 str2Char = strBuf2[strBuf2Index++];
2578 }
2579
2580 if (numerically && ((0 == strBuf1Len) && (str1Char <= '9') && (str1Char >= '0')) && ((0 == strBuf2Len) && (str2Char <= '9') && (str2Char >= '0'))) { // If both are not ASCII digits, then don't do numerical comparison here
2581 uint64_t intValue1 = 0, intValue2 = 0; // !!! Doesn't work if numbers are > max uint64_t
2582 CFIndex str1NumRangeIndex = str1Index;
2583 CFIndex str2NumRangeIndex = str2Index;
2584
2585 do {
2586 intValue1 = (intValue1 * 10) + (str1Char - '0');
2587 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, ++str1Index);
2588 } while ((str1Char <= '9') && (str1Char >= '0'));
2589
2590 do {
2591 intValue2 = intValue2 * 10 + (str2Char - '0');
2592 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, ++str2Index);
2593 } while ((str2Char <= '9') && (str2Char >= '0'));
2594
2595 if (intValue1 == intValue2) {
2596 if (forceOrdering && (kCFCompareEqualTo == compareResult) && ((str1Index - str1NumRangeIndex) != (str2Index - str2NumRangeIndex))) {
2597 compareResult = (((str1Index - str1NumRangeIndex) < (str2Index - str2NumRangeIndex)) ? kCFCompareLessThan : kCFCompareGreaterThan);
2598 numericEquivalence = true;
2599 forcedIndex1 = str1NumRangeIndex;
2600 forcedIndex2 = str2NumRangeIndex;
2601 }
2602
2603 continue;
2604 } else if (intValue1 < intValue2) {
2605 if (freeLocale && locale) {
2606 CFRelease(locale);
2607 }
2608 return kCFCompareLessThan;
2609 } else {
2610 if (freeLocale && locale) {
2611 CFRelease(locale);
2612 }
2613 return kCFCompareGreaterThan;
2614 }
2615 }
2616
2617 if (str1Char != str2Char) {
2618 if (!equalityOptions) {
2619 compareResult = ((NULL == locale) ? ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1Index, rangeToCompare.length - str1Index), &inlineBuf2, CFRangeMake(str2Index, str2Len - str2Index), compareOptions, locale));
2620 if (freeLocale && locale) {
2621 CFRelease(locale);
2622 }
2623 return compareResult;
2624 }
2625
2626 if (forceOrdering && (kCFCompareEqualTo == compareResult)) {
2627 compareResult = ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan);
2628 forcedIndex1 = str1LocalizedIndex;
2629 forcedIndex2 = str2LocalizedIndex;
2630 }
2631
2632 if ((str1Char < 0x80) && (str2Char < 0x80) && (NULL == ignoredChars)) {
2633 if (NULL != locale) {
2634 compareResult = _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1Index, rangeToCompare.length - str1Index), &inlineBuf2, CFRangeMake(str2Index, str2Len - str2Index), compareOptions, locale);
2635 if (freeLocale && locale) {
2636 CFRelease(locale);
2637 }
2638 return compareResult;
2639 } else if (!caseInsensitive) {
2640 if (freeLocale && locale) {
2641 CFRelease(locale);
2642 }
2643 return ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan);
2644 }
2645 }
2646
2647 if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) {
2648 str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);
2649 str1UsedLen = 2;
2650 }
2651
2652 if (CFUniCharIsSurrogateHighCharacter(str2Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index + 1)))) {
2653 str2Char = CFUniCharGetLongCharacterForSurrogatePair(str2Char, otherChar);
2654 str2UsedLen = 2;
2655 }
2656
2657 if (NULL != ignoredChars) {
2658 if (CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char)) {
2659 if ((strBuf1Len > 0) && (strBuf1Index == strBuf1Len)) strBuf1Len = 0;
2660 if (strBuf1Len == 0) str1Index += str1UsedLen;
2661 if (strBuf2Len > 0) --strBuf2Index;
2662 continue;
2663 }
2664 if (CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char)) {
2665 if ((strBuf2Len > 0) && (strBuf2Index == strBuf2Len)) strBuf2Len = 0;
2666 if (strBuf2Len == 0) str2Index += str2UsedLen;
2667 if (strBuf1Len > 0) -- strBuf1Index;
2668 continue;
2669 }
2670 }
2671
2672 if (diacriticsInsensitive && (str1Index > 0)) {
2673 bool str1Skip = false;
2674 bool str2Skip = false;
2675
2676 if ((0 == strBuf1Len) && CFUniCharIsMemberOfBitmap(str1Char, ((str1Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str1Char >> 16))))) {
2677 str1Char = str2Char;
2678 str1Skip = true;
2679 }
2680 if ((0 == strBuf2Len) && CFUniCharIsMemberOfBitmap(str2Char, ((str2Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str2Char >> 16))))) {
2681 str2Char = str1Char;
2682 str2Skip = true;
2683 }
2684
2685 if (str1Skip != str2Skip) {
2686 if (str1Skip) str2Index -= str2UsedLen;
2687 if (str2Skip) str1Index -= str1UsedLen;
2688 }
2689 }
2690
2691 if (str1Char != str2Char) {
2692 if (0 == strBuf1Len) {
2693 strBuf1Len = __CFStringFoldCharacterClusterAtIndex(str1Char, &inlineBuf1, str1Index, compareOptions, langCode, strBuf1, kCFStringStackBufferLength, &str1UsedLen);
2694 if (strBuf1Len > 0) {
2695 str1Char = *strBuf1;
2696 strBuf1Index = 1;
2697 }
2698 }
2699
2700 if ((0 == strBuf1Len) && (0 < strBuf2Len)) {
2701 compareResult = ((NULL == locale) ? ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1LocalizedIndex, rangeToCompare.length - str1LocalizedIndex), &inlineBuf2, CFRangeMake(str2LocalizedIndex, str2Len - str2LocalizedIndex), compareOptions, locale));
2702 if (freeLocale && locale) {
2703 CFRelease(locale);
2704 }
2705 return compareResult;
2706 }
2707
2708 if ((0 == strBuf2Len) && ((0 == strBuf1Len) || (str1Char != str2Char))) {
2709 strBuf2Len = __CFStringFoldCharacterClusterAtIndex(str2Char, &inlineBuf2, str2Index, compareOptions, langCode, strBuf2, kCFStringStackBufferLength, &str2UsedLen);
2710 if (strBuf2Len > 0) {
2711 str2Char = *strBuf2;
2712 strBuf2Index = 1;
2713 }
2714 if ((0 == strBuf2Len) || (str1Char != str2Char)) {
2715 compareResult = ((NULL == locale) ? ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1LocalizedIndex, rangeToCompare.length - str1LocalizedIndex), &inlineBuf2, CFRangeMake(str2LocalizedIndex, str2Len - str2LocalizedIndex), compareOptions, locale));
2716 if (freeLocale && locale) {
2717 CFRelease(locale);
2718 }
2719 return compareResult;
2720 }
2721 }
2722 }
2723
2724 if ((strBuf1Len > 0) && (strBuf2Len > 0)) {
2725 while ((strBuf1Index < strBuf1Len) && (strBuf2Index < strBuf2Len)) {
2726 if (strBuf1[strBuf1Index] != strBuf2[strBuf2Index]) break;
2727 ++strBuf1Index; ++strBuf2Index;
2728 }
2729 if ((strBuf1Index < strBuf1Len) && (strBuf2Index < strBuf2Len)) {
2730 CFComparisonResult res = ((NULL == locale) ? ((strBuf1[strBuf1Index] < strBuf2[strBuf2Index]) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1LocalizedIndex, rangeToCompare.length - str1LocalizedIndex), &inlineBuf2, CFRangeMake(str2LocalizedIndex, str2Len - str2LocalizedIndex), compareOptions, locale));
2731 if (freeLocale && locale) {
2732 CFRelease(locale);
2733 }
2734 return res;
2735 }
2736 }
2737 }
2738
2739 if ((strBuf1Len > 0) && (strBuf1Index == strBuf1Len)) strBuf1Len = 0;
2740 if ((strBuf2Len > 0) && (strBuf2Index == strBuf2Len)) strBuf2Len = 0;
2741
2742 if (strBuf1Len == 0) str1Index += str1UsedLen;
2743 if (strBuf2Len == 0) str2Index += str2UsedLen;
2744 if ((strBuf1Len == 0) && (strBuf2Len == 0)) {
2745 str1LocalizedIndex = str1Index;
2746 str2LocalizedIndex = str2Index;
2747 }
2748 }
2749
2750 if (diacriticsInsensitive || (NULL != ignoredChars)) {
2751 while (str1Index < rangeToCompare.length) {
2752 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
2753 if ((str1Char < 0x80) && (NULL == ignoredChars)) break; // found ASCII
2754
2755 if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);
2756
2757 if ((!diacriticsInsensitive || !CFUniCharIsMemberOfBitmap(str1Char, ((str1Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str1Char >> 16))))) && ((NULL == ignoredChars) || !CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char))) break;
2758
2759 str1Index += ((str1Char < 0x10000) ? 1 : 2);
2760 }
2761
2762 while (str2Index < str2Len) {
2763 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
2764 if ((str2Char < 0x80) && (NULL == ignoredChars)) break; // found ASCII
2765
2766 if (CFUniCharIsSurrogateHighCharacter(str2Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index + 1)))) str2Char = CFUniCharGetLongCharacterForSurrogatePair(str2Char, otherChar);
2767
2768 if ((!diacriticsInsensitive || !CFUniCharIsMemberOfBitmap(str2Char, ((str2Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str2Char >> 16))))) && ((NULL == ignoredChars) || !CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char))) break;
2769
2770 str2Index += ((str2Char < 0x10000) ? 1 : 2);
2771 }
2772 }
2773 // Need to recalc localized result here for forced ordering, ICU cannot do numericEquivalence
2774 if (!numericEquivalence && (NULL != locale) && (kCFCompareEqualTo != compareResult) && (str1Index == rangeToCompare.length) && (str2Index == str2Len)) compareResult = _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(forcedIndex1, rangeToCompare.length - forcedIndex1), &inlineBuf2, CFRangeMake(forcedIndex2, str2Len - forcedIndex2), compareOptions, locale);
2775
2776 if (freeLocale && locale) {
2777 CFRelease(locale);
2778 }
2779
2780 return ((str1Index < rangeToCompare.length) ? kCFCompareGreaterThan : ((str2Index < str2Len) ? kCFCompareLessThan : compareResult));
2781 }
2782
2783
2784 CFComparisonResult CFStringCompareWithOptions(CFStringRef string, CFStringRef string2, CFRange rangeToCompare, CFStringCompareFlags compareOptions) { return CFStringCompareWithOptionsAndLocale(string, string2, rangeToCompare, compareOptions, NULL); }
2785
2786 CFComparisonResult CFStringCompare(CFStringRef string, CFStringRef str2, CFOptionFlags options) {
2787 return CFStringCompareWithOptions(string, str2, CFRangeMake(0, CFStringGetLength(string)), options);
2788 }
2789
2790 Boolean CFStringFindWithOptionsAndLocale(CFStringRef string, CFStringRef stringToFind, CFRange rangeToSearch, CFStringCompareFlags compareOptions, CFLocaleRef locale, CFRange *result) {
2791 /* No objc dispatch needed here since CFStringInlineBuffer works with both CFString and NSString */
2792 CFIndex findStrLen = CFStringGetLength(stringToFind);
2793 Boolean didFind = false;
2794 bool lengthVariants = ((compareOptions & (kCFCompareCaseInsensitive|kCFCompareNonliteral|kCFCompareDiacriticInsensitive)) ? true : false);
2795 CFCharacterSetInlineBuffer *ignoredChars = NULL;
2796 CFCharacterSetInlineBuffer csetBuffer;
2797
2798 if (__CFStringFillCharacterSetInlineBuffer(&csetBuffer, compareOptions)) {
2799 ignoredChars = &csetBuffer;
2800 lengthVariants = true;
2801 }
2802
2803 if ((findStrLen > 0) && (rangeToSearch.length > 0) && ((findStrLen <= rangeToSearch.length) || lengthVariants)) {
2804 UTF32Char strBuf1[kCFStringStackBufferLength];
2805 UTF32Char strBuf2[kCFStringStackBufferLength];
2806 CFStringInlineBuffer inlineBuf1, inlineBuf2;
2807 UTF32Char str1Char = 0, str2Char = 0;
2808 CFStringEncoding eightBitEncoding = __CFStringGetEightBitStringEncoding();
2809 const uint8_t *str1Bytes = (const uint8_t *)CFStringGetCStringPtr(string, eightBitEncoding);
2810 const uint8_t *str2Bytes = (const uint8_t *)CFStringGetCStringPtr(stringToFind, eightBitEncoding);
2811 const UTF32Char *characters, *charactersLimit;
2812 const uint8_t *langCode = NULL;
2813 CFIndex fromLoc, toLoc;
2814 CFIndex str1Index, str2Index;
2815 CFIndex strBuf1Len, strBuf2Len;
2816 CFIndex maxStr1Index = (rangeToSearch.location + rangeToSearch.length);
2817 bool equalityOptions = ((lengthVariants || (compareOptions & kCFCompareWidthInsensitive)) ? true : false);
2818 bool caseInsensitive = ((compareOptions & kCFCompareCaseInsensitive) ? true : false);
2819 bool forwardAnchor = ((kCFCompareAnchored == (compareOptions & (kCFCompareBackwards|kCFCompareAnchored))) ? true : false);
2820 bool backwardAnchor = (((kCFCompareBackwards|kCFCompareAnchored) == (compareOptions & (kCFCompareBackwards|kCFCompareAnchored))) ? true : false);
2821 int8_t delta;
2822
2823 if (NULL == locale) {
2824 if (compareOptions & kCFCompareLocalized) {
2825 CFLocaleRef currentLocale = CFLocaleCopyCurrent();
2826 langCode = (const uint8_t *)_CFStrGetLanguageIdentifierForLocale(currentLocale);
2827 CFRelease(currentLocale);
2828 }
2829 } else {
2830 langCode = (const uint8_t *)_CFStrGetLanguageIdentifierForLocale(locale);
2831 }
2832
2833 CFStringInitInlineBuffer(string, &inlineBuf1, CFRangeMake(0, rangeToSearch.location + rangeToSearch.length));
2834 CFStringInitInlineBuffer(stringToFind, &inlineBuf2, CFRangeMake(0, findStrLen));
2835
2836 if (compareOptions & kCFCompareBackwards) {
2837 fromLoc = rangeToSearch.location + rangeToSearch.length - (lengthVariants ? 1 : findStrLen);
2838 toLoc = (((compareOptions & kCFCompareAnchored) && !lengthVariants) ? fromLoc : rangeToSearch.location);
2839 } else {
2840 fromLoc = rangeToSearch.location;
2841 toLoc = ((compareOptions & kCFCompareAnchored) ? fromLoc : rangeToSearch.location + rangeToSearch.length - (lengthVariants ? 1 : findStrLen));
2842 }
2843
2844 delta = ((fromLoc <= toLoc) ? 1 : -1);
2845
2846 if ((NULL != str1Bytes) && (NULL != str2Bytes)) {
2847 uint8_t str1Byte, str2Byte;
2848
2849 while (1) {
2850 str1Index = fromLoc;
2851 str2Index = 0;
2852
2853 while ((str1Index < maxStr1Index) && (str2Index < findStrLen)) {
2854 str1Byte = str1Bytes[str1Index];
2855 str2Byte = str2Bytes[str2Index];
2856
2857 if (str1Byte != str2Byte) {
2858 if (equalityOptions) {
2859 if ((str1Byte < 0x80) && ((NULL == langCode) || ('I' != str1Byte))) {
2860 if (caseInsensitive && (str1Byte >= 'A') && (str1Byte <= 'Z')) str1Byte += ('a' - 'A');
2861 *strBuf1 = str1Byte;
2862 strBuf1Len = 1;
2863 } else {
2864 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
2865 strBuf1Len = __CFStringFoldCharacterClusterAtIndex(str1Char, &inlineBuf1, str1Index, compareOptions, langCode, strBuf1, kCFStringStackBufferLength, NULL);
2866 if (1 > strBuf1Len) {
2867 *strBuf1 = str1Char;
2868 strBuf1Len = 1;
2869 }
2870 }
2871
2872 if ((NULL != ignoredChars) && (forwardAnchor || (str1Index != fromLoc)) && CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, ((str1Byte < 0x80) ? str1Byte : str1Char))) {
2873 ++str1Index;
2874 continue;
2875 }
2876
2877 if ((str2Byte < 0x80) && ((NULL == langCode) || ('I' != str2Byte))) {
2878 if (caseInsensitive && (str2Byte >= 'A') && (str2Byte <= 'Z')) str2Byte += ('a' - 'A');
2879 *strBuf2 = str2Byte;
2880 strBuf2Len = 1;
2881 } else {
2882 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
2883 strBuf2Len = __CFStringFoldCharacterClusterAtIndex(str2Char, &inlineBuf2, str2Index, compareOptions, langCode, strBuf2, kCFStringStackBufferLength, NULL);
2884 if (1 > strBuf2Len) {
2885 *strBuf2 = str2Char;
2886 strBuf2Len = 1;
2887 }
2888 }
2889
2890 if ((NULL != ignoredChars) && CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, ((str2Byte < 0x80) ? str2Byte : str2Char))) {
2891 ++str2Index;
2892 continue;
2893 }
2894
2895 if ((1 == strBuf1Len) && (1 == strBuf2Len)) { // normal case
2896 if (*strBuf1 != *strBuf2) break;
2897 } else {
2898 CFIndex delta;
2899
2900 if (!caseInsensitive && (strBuf1Len != strBuf2Len)) break;
2901 if (memcmp(strBuf1, strBuf2, sizeof(UTF32Char) * __CFMin(strBuf1Len, strBuf2Len))) break;
2902
2903 if (strBuf1Len < strBuf2Len) {
2904 delta = strBuf2Len - strBuf1Len;
2905
2906 if ((str1Index + strBuf1Len + delta) > maxStr1Index) break;
2907
2908 characters = &(strBuf2[strBuf1Len]);
2909 charactersLimit = characters + delta;
2910
2911 while (characters < charactersLimit) {
2912 strBuf1Len = __CFStringFoldCharacterClusterAtIndex(CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1), &inlineBuf1, str1Index + 1, compareOptions, langCode, strBuf1, kCFStringStackBufferLength, NULL);
2913 if ((strBuf1Len > 0) || (*characters != *strBuf1)) break;
2914 ++characters; ++str1Index;
2915 }
2916 if (characters < charactersLimit) break;
2917 } else if (strBuf2Len < strBuf1Len) {
2918 delta = strBuf1Len - strBuf2Len;
2919
2920 if ((str2Index + strBuf2Len + delta) > findStrLen) break;
2921
2922 characters = &(strBuf1[strBuf2Len]);
2923 charactersLimit = characters + delta;
2924
2925 while (characters < charactersLimit) {
2926 strBuf2Len = __CFStringFoldCharacterClusterAtIndex(CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str1Index + 1), &inlineBuf2, str2Index + 1, compareOptions, langCode, strBuf2, kCFStringStackBufferLength, NULL);
2927 if ((strBuf2Len > 0) || (*characters != *strBuf2)) break;
2928 ++characters; ++str2Index;
2929 }
2930 if (characters < charactersLimit) break;
2931 }
2932 }
2933 } else {
2934 break;
2935 }
2936 }
2937 ++str1Index; ++str2Index;
2938 }
2939
2940 if ((NULL != ignoredChars) && (str1Index == maxStr1Index) && (str2Index < findStrLen)) { // Process the stringToFind tail
2941 while (str2Index < findStrLen) {
2942 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
2943
2944 if (!CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char)) break;
2945 ++str2Index;
2946 }
2947 }
2948
2949 if (str2Index == findStrLen) {
2950 if ((NULL != ignoredChars) && backwardAnchor && (str1Index < maxStr1Index)) { // Process the anchor tail
2951 while (str1Index < maxStr1Index) {
2952 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
2953
2954 if (!CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char)) break;
2955 ++str1Index;
2956 }
2957 }
2958
2959 if (!backwardAnchor || (str1Index == maxStr1Index)) {
2960 didFind = true;
2961 if (NULL != result) *result = CFRangeMake(fromLoc, str1Index - fromLoc);
2962 }
2963 break;
2964 }
2965
2966 if (fromLoc == toLoc) break;
2967 fromLoc += delta;
2968 }
2969 } else if (equalityOptions) {
2970 UTF16Char otherChar;
2971 CFIndex str1UsedLen, str2UsedLen, strBuf1Index = 0, strBuf2Index = 0;
2972 bool diacriticsInsensitive = ((compareOptions & kCFCompareDiacriticInsensitive) ? true : false);
2973 const uint8_t *graphemeBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, 0);
2974 const uint8_t *combClassBMP = (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0);
2975
2976 while (1) {
2977 str1Index = fromLoc;
2978 str2Index = 0;
2979
2980 strBuf1Len = strBuf2Len = 0;
2981
2982 while (str2Index < findStrLen) {
2983 if (strBuf1Len == 0) {
2984 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
2985 if (caseInsensitive && (str1Char >= 'A') && (str1Char <= 'Z') && ((NULL == langCode) || (str1Char != 'I'))) str1Char += ('a' - 'A');
2986 str1UsedLen = 1;
2987 } else {
2988 str1Char = strBuf1[strBuf1Index++];
2989 }
2990 if (strBuf2Len == 0) {
2991 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
2992 if (caseInsensitive && (str2Char >= 'A') && (str2Char <= 'Z') && ((NULL == langCode) || (str2Char != 'I'))) str2Char += ('a' - 'A');
2993 str2UsedLen = 1;
2994 } else {
2995 str2Char = strBuf2[strBuf2Index++];
2996 }
2997
2998 if (str1Char != str2Char) {
2999 if ((str1Char < 0x80) && (str2Char < 0x80) && (NULL == ignoredChars) && ((NULL == langCode) || !caseInsensitive)) break;
3000
3001 if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) {
3002 str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);
3003 str1UsedLen = 2;
3004 }
3005
3006 if (CFUniCharIsSurrogateHighCharacter(str2Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index + 1)))) {
3007 str2Char = CFUniCharGetLongCharacterForSurrogatePair(str2Char, otherChar);
3008 str2UsedLen = 2;
3009 }
3010
3011 if (NULL != ignoredChars) {
3012 if ((forwardAnchor || (str1Index != fromLoc)) && (str1Index < maxStr1Index) && CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char)) {
3013 if ((strBuf1Len > 0) && (strBuf1Index == strBuf1Len)) strBuf1Len = 0;
3014 if (strBuf1Len == 0) str1Index += str1UsedLen;
3015 if (strBuf2Len > 0) --strBuf2Index;
3016 continue;
3017 }
3018 if (CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char)) {
3019 if ((strBuf2Len > 0) && (strBuf2Index == strBuf2Len)) strBuf2Len = 0;
3020 if (strBuf2Len == 0) str2Index += str2UsedLen;
3021 if (strBuf1Len > 0) -- strBuf1Index;
3022 continue;
3023 }
3024 }
3025
3026 if (diacriticsInsensitive && (str1Index > fromLoc)) {
3027 bool str1Skip = false;
3028 bool str2Skip = false;
3029
3030 if ((0 == strBuf1Len) && CFUniCharIsMemberOfBitmap(str1Char, ((str1Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str1Char >> 16))))) {
3031 str1Char = str2Char;
3032 str1Skip = true;
3033 }
3034 if ((0 == strBuf2Len) && CFUniCharIsMemberOfBitmap(str2Char, ((str2Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str2Char >> 16))))) {
3035 str2Char = str1Char;
3036 str2Skip = true;
3037 }
3038
3039 if (str1Skip != str2Skip) {
3040 if (str1Skip) str2Index -= str2UsedLen;
3041 if (str2Skip) str1Index -= str1UsedLen;
3042 }
3043 }
3044
3045 if (str1Char != str2Char) {
3046 if (0 == strBuf1Len) {
3047 strBuf1Len = __CFStringFoldCharacterClusterAtIndex(str1Char, &inlineBuf1, str1Index, compareOptions, langCode, strBuf1, kCFStringStackBufferLength, &str1UsedLen);
3048 if (strBuf1Len > 0) {
3049 str1Char = *strBuf1;
3050 strBuf1Index = 1;
3051 }
3052 }
3053
3054 if ((0 == strBuf1Len) && (0 < strBuf2Len)) break;
3055
3056 if ((0 == strBuf2Len) && ((0 == strBuf1Len) || (str1Char != str2Char))) {
3057 strBuf2Len = __CFStringFoldCharacterClusterAtIndex(str2Char, &inlineBuf2, str2Index, compareOptions, langCode, strBuf2, kCFStringStackBufferLength, &str2UsedLen);
3058 if ((0 == strBuf2Len) || (str1Char != *strBuf2)) break;
3059 strBuf2Index = 1;
3060 }
3061 }
3062
3063 if ((strBuf1Len > 0) && (strBuf2Len > 0)) {
3064 while ((strBuf1Index < strBuf1Len) && (strBuf2Index < strBuf2Len)) {
3065 if (strBuf1[strBuf1Index] != strBuf2[strBuf2Index]) break;
3066 ++strBuf1Index; ++strBuf2Index;
3067 }
3068 if ((strBuf1Index < strBuf1Len) && (strBuf2Index < strBuf2Len)) break;
3069 }
3070 }
3071
3072 if ((strBuf1Len > 0) && (strBuf1Index == strBuf1Len)) strBuf1Len = 0;
3073 if ((strBuf2Len > 0) && (strBuf2Index == strBuf2Len)) strBuf2Len = 0;
3074
3075 if (strBuf1Len == 0) str1Index += str1UsedLen;
3076 if (strBuf2Len == 0) str2Index += str2UsedLen;
3077 }
3078
3079 if ((NULL != ignoredChars) && (str1Index == maxStr1Index) && (str2Index < findStrLen)) { // Process the stringToFind tail
3080 while (str2Index < findStrLen) {
3081 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
3082 if (CFUniCharIsSurrogateHighCharacter(str2Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index + 1)))) {
3083 str2Char = CFUniCharGetLongCharacterForSurrogatePair(str2Char, otherChar);
3084 }
3085 if (!CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char)) break;
3086 str2Index += ((str2Char < 0x10000) ? 1 : 2);
3087 }
3088 }
3089
3090 if (str2Index == findStrLen) {
3091 bool match = true;
3092
3093 if (strBuf1Len > 0) {
3094 match = false;
3095
3096 if (diacriticsInsensitive && (strBuf1[0] < 0x0510)) {
3097 while (strBuf1Index < strBuf1Len) {
3098 if (!CFUniCharIsMemberOfBitmap(strBuf1[strBuf1Index], ((strBuf1[strBuf1Index] < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (strBuf1[strBuf1Index] >> 16))))) break;
3099 ++strBuf1Index;
3100 }
3101
3102 if (strBuf1Index == strBuf1Len) {
3103 str1Index += str1UsedLen;
3104 match = true;
3105 }
3106 }
3107 }
3108
3109 if (match && (compareOptions & (kCFCompareDiacriticInsensitive|kCFCompareNonliteral)) && (str1Index < maxStr1Index)) {
3110 const uint8_t *nonBaseBitmap;
3111
3112 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
3113
3114 if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) {
3115 str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);
3116 nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str1Char >> 16));
3117 } else {
3118 nonBaseBitmap = graphemeBMP;
3119 }
3120
3121 if (CFUniCharIsMemberOfBitmap(str1Char, nonBaseBitmap)) {
3122 if (diacriticsInsensitive) {
3123 if (str1Char < 0x10000) {
3124 CFIndex index = str1Index;
3125
3126 do {
3127 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, --index);
3128 } while (CFUniCharIsMemberOfBitmap(str1Char, graphemeBMP), (rangeToSearch.location < index));
3129
3130 if (str1Char < 0x0510) {
3131 while (++str1Index < maxStr1Index) if (!CFUniCharIsMemberOfBitmap(CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index), graphemeBMP)) break;
3132 }
3133 }
3134 } else {
3135 match = false;
3136 }
3137 } else if (!diacriticsInsensitive) {
3138 otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index - 1);
3139
3140 // this is assuming viramas are only in BMP ???
3141 if ((str1Char == COMBINING_GRAPHEME_JOINER) || (otherChar == COMBINING_GRAPHEME_JOINER) || (otherChar == ZERO_WIDTH_JOINER) || ((otherChar >= HANGUL_CHOSEONG_START) && (otherChar <= HANGUL_JONGSEONG_END)) || (CFUniCharGetCombiningPropertyForCharacter(otherChar, combClassBMP) == 9)) {
3142 CFRange clusterRange = CFStringGetRangeOfCharacterClusterAtIndex(string, str1Index - 1, kCFStringGraphemeCluster);
3143
3144 if (str1Index < (clusterRange.location + clusterRange.length)) match = false;
3145 }
3146 }
3147 }
3148
3149 if (match) {
3150 if ((NULL != ignoredChars) && backwardAnchor && (str1Index < maxStr1Index)) { // Process the anchor tail
3151 while (str1Index < maxStr1Index) {
3152 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
3153 if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) {
3154 str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);
3155 }
3156 if (!CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char)) break;
3157 str1Index += ((str1Char < 0x10000) ? 1 : 2);
3158 }
3159 }
3160
3161 if (!backwardAnchor || (str1Index == maxStr1Index)) {
3162 didFind = true;
3163 if (NULL != result) *result = CFRangeMake(fromLoc, str1Index - fromLoc);
3164 }
3165 break;
3166 }
3167 }
3168
3169 if (fromLoc == toLoc) break;
3170 fromLoc += delta;
3171 }
3172 } else {
3173 while (1) {
3174 str1Index = fromLoc;
3175 str2Index = 0;
3176
3177 while (str2Index < findStrLen) {
3178 if (CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index) != CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index)) break;
3179
3180 ++str1Index; ++str2Index;
3181 }
3182
3183 if (str2Index == findStrLen) {
3184 didFind = true;
3185 if (NULL != result) *result = CFRangeMake(fromLoc, findStrLen);
3186 break;
3187 }
3188
3189 if (fromLoc == toLoc) break;
3190 fromLoc += delta;
3191 }
3192 }
3193 }
3194
3195 return didFind;
3196 }
3197
3198
3199 Boolean CFStringFindWithOptions(CFStringRef string, CFStringRef stringToFind, CFRange rangeToSearch, CFStringCompareFlags compareOptions, CFRange *result) { return CFStringFindWithOptionsAndLocale(string, stringToFind, rangeToSearch, compareOptions, NULL, result); }
3200
3201 // Functions to deal with special arrays of CFRange, CFDataRef, created by CFStringCreateArrayWithFindResults()
3202
3203 static const void *__rangeRetain(CFAllocatorRef allocator, const void *ptr) {
3204 CFRetain(*(CFDataRef *)((uint8_t *)ptr + sizeof(CFRange)));
3205 return ptr;
3206 }
3207
3208 static void __rangeRelease(CFAllocatorRef allocator, const void *ptr) {
3209 CFRelease(*(CFDataRef *)((uint8_t *)ptr + sizeof(CFRange)));
3210 }
3211
3212 static CFStringRef __rangeCopyDescription(const void *ptr) {
3213 CFRange range = *(CFRange *)ptr;
3214 return CFStringCreateWithFormat(kCFAllocatorSystemDefault, NULL, CFSTR("{%d, %d}"), range.location, range.length);
3215 }
3216
3217 static Boolean __rangeEqual(const void *ptr1, const void *ptr2) {
3218 CFRange range1 = *(CFRange *)ptr1;
3219 CFRange range2 = *(CFRange *)ptr2;
3220 return (range1.location == range2.location) && (range1.length == range2.length);
3221 }
3222
3223
3224 CFArrayRef CFStringCreateArrayWithFindResults(CFAllocatorRef alloc, CFStringRef string, CFStringRef stringToFind, CFRange rangeToSearch, CFStringCompareFlags compareOptions) {
3225 CFRange foundRange;
3226 Boolean backwards = ((compareOptions & kCFCompareBackwards) != 0);
3227 UInt32 endIndex = rangeToSearch.location + rangeToSearch.length;
3228 CFMutableDataRef rangeStorage = NULL; // Basically an array of CFRange, CFDataRef (packed)
3229 uint8_t *rangeStorageBytes = NULL;
3230 CFIndex foundCount = 0;
3231 CFIndex capacity = 0; // Number of CFRange, CFDataRef element slots in rangeStorage
3232
3233 if (alloc == NULL) alloc = __CFGetDefaultAllocator();
3234
3235 while ((rangeToSearch.length > 0) && CFStringFindWithOptions(string, stringToFind, rangeToSearch, compareOptions, &foundRange)) {
3236 // Determine the next range
3237 if (backwards) {
3238 rangeToSearch.length = foundRange.location - rangeToSearch.location;
3239 } else {
3240 rangeToSearch.location = foundRange.location + foundRange.length;
3241 rangeToSearch.length = endIndex - rangeToSearch.location;
3242 }
3243
3244 // If necessary, grow the data and squirrel away the found range
3245 if (foundCount >= capacity) {
3246 // Note that rangeStorage is not allowed to be allocated from one of the GCRefZero allocators
3247 if (rangeStorage == NULL) rangeStorage = CFDataCreateMutable(_CFConvertAllocatorToNonGCRefZeroEquivalent(alloc), 0);
3248 capacity = (capacity + 4) * 2;
3249 CFDataSetLength(rangeStorage, capacity * (sizeof(CFRange) + sizeof(CFDataRef)));
3250 rangeStorageBytes = (uint8_t *)CFDataGetMutableBytePtr(rangeStorage) + foundCount * (sizeof(CFRange) + sizeof(CFDataRef));
3251 }
3252 memmove(rangeStorageBytes, &foundRange, sizeof(CFRange)); // The range
3253 memmove(rangeStorageBytes + sizeof(CFRange), &rangeStorage, sizeof(CFDataRef)); // The data
3254 rangeStorageBytes += (sizeof(CFRange) + sizeof(CFDataRef));
3255 foundCount++;
3256 }
3257
3258 if (foundCount > 0) {
3259 CFIndex cnt;
3260 CFMutableArrayRef array;
3261 const CFArrayCallBacks callbacks = {0, __rangeRetain, __rangeRelease, __rangeCopyDescription, __rangeEqual};
3262
3263 CFDataSetLength(rangeStorage, foundCount * (sizeof(CFRange) + sizeof(CFDataRef))); // Tighten storage up
3264 rangeStorageBytes = (uint8_t *)CFDataGetMutableBytePtr(rangeStorage);
3265
3266 array = CFArrayCreateMutable(alloc, foundCount * sizeof(CFRange *), &callbacks);
3267 for (cnt = 0; cnt < foundCount; cnt++) {
3268 // Each element points to the appropriate CFRange in the CFData
3269 CFArrayAppendValue(array, rangeStorageBytes + cnt * (sizeof(CFRange) + sizeof(CFDataRef)));
3270 }
3271 CFRelease(rangeStorage); // We want the data to go away when all CFRanges inside it are released...
3272 return array;
3273 } else {
3274 return NULL;
3275 }
3276 }
3277
3278
3279 CFRange CFStringFind(CFStringRef string, CFStringRef stringToFind, CFStringCompareFlags compareOptions) {
3280 CFRange foundRange;
3281
3282 if (CFStringFindWithOptions(string, stringToFind, CFRangeMake(0, CFStringGetLength(string)), compareOptions, &foundRange)) {
3283 return foundRange;
3284 } else {
3285 return CFRangeMake(kCFNotFound, 0);
3286 }
3287 }
3288
3289 Boolean CFStringHasPrefix(CFStringRef string, CFStringRef prefix) {
3290 return CFStringFindWithOptions(string, prefix, CFRangeMake(0, CFStringGetLength(string)), kCFCompareAnchored, NULL);
3291 }
3292
3293 Boolean CFStringHasSuffix(CFStringRef string, CFStringRef suffix) {
3294 return CFStringFindWithOptions(string, suffix, CFRangeMake(0, CFStringGetLength(string)), kCFCompareAnchored|kCFCompareBackwards, NULL);
3295 }
3296
3297 #define MAX_TRANSCODING_LENGTH 4
3298
3299 #define HANGUL_JONGSEONG_COUNT (28)
3300
3301 CF_INLINE bool _CFStringIsHangulLVT(UTF32Char character) {
3302 return (((character - HANGUL_SYLLABLE_START) % HANGUL_JONGSEONG_COUNT) ? true : false);
3303 }
3304
3305 static uint8_t __CFTranscodingHintLength[] = {
3306 2, 3, 4, 4, 4, 4, 4, 2, 2, 2, 2, 4, 0, 0, 0, 0
3307 };
3308
3309 enum {
3310 kCFStringHangulStateL,
3311 kCFStringHangulStateV,
3312 kCFStringHangulStateT,
3313 kCFStringHangulStateLV,
3314 kCFStringHangulStateLVT,
3315 kCFStringHangulStateBreak
3316 };
3317
3318 static CFRange _CFStringInlineBufferGetComposedRange(CFStringInlineBuffer *buffer, CFIndex start, CFStringCharacterClusterType type, const uint8_t *bmpBitmap, CFIndex csetType) {
3319 CFIndex end = start + 1;
3320 const uint8_t *bitmap = bmpBitmap;
3321 UTF32Char character;
3322 UTF16Char otherSurrogate;
3323 uint8_t step;
3324
3325 character = CFStringGetCharacterFromInlineBuffer(buffer, start);
3326
3327 // We don't combine characters in Armenian ~ Limbu range for backward deletion
3328 if ((type != kCFStringBackwardDeletionCluster) || (character < 0x0530) || (character > 0x194F)) {
3329 // Check if the current is surrogate
3330 if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, start + 1)))) {
3331 ++end;
3332 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
3333 bitmap = CFUniCharGetBitmapPtrForPlane(csetType, (character >> 16));
3334 }
3335
3336 // Extend backward
3337 while (start > 0) {
3338 if ((type == kCFStringBackwardDeletionCluster) && (character >= 0x0530) && (character < 0x1950)) break;
3339
3340 if (character < 0x10000) { // the first round could be already be non-BMP
3341 if (CFUniCharIsSurrogateLowCharacter(character) && CFUniCharIsSurrogateHighCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, start - 1)))) {
3342 character = CFUniCharGetLongCharacterForSurrogatePair(otherSurrogate, character);
3343 bitmap = CFUniCharGetBitmapPtrForPlane(csetType, (character >> 16));
3344 if (--start == 0) break; // starting with non-BMP combining mark
3345 } else {
3346 bitmap = bmpBitmap;
3347 }
3348 }
3349
3350 if (!CFUniCharIsMemberOfBitmap(character, bitmap) && (character != 0xFF9E) && (character != 0xFF9F) && ((character & 0x1FFFF0) != 0xF870)) break;
3351
3352 --start;
3353
3354 character = CFStringGetCharacterFromInlineBuffer(buffer, start);
3355 }
3356 }
3357
3358 // Hangul
3359 if (((character >= HANGUL_CHOSEONG_START) && (character <= HANGUL_JONGSEONG_END)) || ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END))) {
3360 uint8_t state;
3361 uint8_t initialState;
3362
3363 if (character < HANGUL_JUNGSEONG_START) {
3364 state = kCFStringHangulStateL;
3365 } else if (character < HANGUL_JONGSEONG_START) {
3366 state = kCFStringHangulStateV;
3367 } else if (character < HANGUL_SYLLABLE_START) {
3368 state = kCFStringHangulStateT;
3369 } else {
3370 state = (_CFStringIsHangulLVT(character) ? kCFStringHangulStateLVT : kCFStringHangulStateLV);
3371 }
3372 initialState = state;
3373
3374 // Extend backward
3375 while (((character = CFStringGetCharacterFromInlineBuffer(buffer, start - 1)) >= HANGUL_CHOSEONG_START) && (character <= HANGUL_SYLLABLE_END) && ((character <= HANGUL_JONGSEONG_END) || (character >= HANGUL_SYLLABLE_START))) {
3376 switch (state) {
3377 case kCFStringHangulStateV:
3378 if (character <= HANGUL_CHOSEONG_END) {
3379 state = kCFStringHangulStateL;
3380 } else if ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END) && !_CFStringIsHangulLVT(character)) {
3381 state = kCFStringHangulStateLV;
3382 } else if (character > HANGUL_JUNGSEONG_END) {
3383 state = kCFStringHangulStateBreak;
3384 }
3385 break;
3386
3387 case kCFStringHangulStateT:
3388 if ((character >= HANGUL_JUNGSEONG_START) && (character <= HANGUL_JUNGSEONG_END)) {
3389 state = kCFStringHangulStateV;
3390 } else if ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END)) {
3391 state = (_CFStringIsHangulLVT(character) ? kCFStringHangulStateLVT : kCFStringHangulStateLV);
3392 } else if (character < HANGUL_JUNGSEONG_START) {
3393 state = kCFStringHangulStateBreak;
3394 }
3395 break;
3396
3397 default:
3398 state = ((character < HANGUL_JUNGSEONG_START) ? kCFStringHangulStateL : kCFStringHangulStateBreak);
3399 break;
3400 }
3401
3402 if (state == kCFStringHangulStateBreak) break;
3403 --start;
3404 }
3405
3406 // Extend forward
3407 state = initialState;
3408 while (((character = CFStringGetCharacterFromInlineBuffer(buffer, end)) > 0) && (((character >= HANGUL_CHOSEONG_START) && (character <= HANGUL_JONGSEONG_END)) || ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END)))) {
3409 switch (state) {
3410 case kCFStringHangulStateLV:
3411 case kCFStringHangulStateV:
3412 if ((character >= HANGUL_JUNGSEONG_START) && (character <= HANGUL_JONGSEONG_END)) {
3413 state = ((character < HANGUL_JONGSEONG_START) ? kCFStringHangulStateV : kCFStringHangulStateT);
3414 } else {
3415 state = kCFStringHangulStateBreak;
3416 }
3417 break;
3418
3419 case kCFStringHangulStateLVT:
3420 case kCFStringHangulStateT:
3421 state = (((character >= HANGUL_JONGSEONG_START) && (character <= HANGUL_JONGSEONG_END)) ? kCFStringHangulStateT : kCFStringHangulStateBreak);
3422 break;
3423
3424 default:
3425 if (character < HANGUL_JUNGSEONG_START) {
3426 state = kCFStringHangulStateL;
3427 } else if (character < HANGUL_JONGSEONG_START) {
3428 state = kCFStringHangulStateV;
3429 } else if (character >= HANGUL_SYLLABLE_START) {
3430 state = (_CFStringIsHangulLVT(character) ? kCFStringHangulStateLVT : kCFStringHangulStateLV);
3431 } else {
3432 state = kCFStringHangulStateBreak;
3433 }
3434 break;
3435 }
3436
3437 if (state == kCFStringHangulStateBreak) break;
3438 ++end;
3439 }
3440 }
3441
3442 // Extend forward
3443 while ((character = CFStringGetCharacterFromInlineBuffer(buffer, end)) > 0) {
3444 if ((type == kCFStringBackwardDeletionCluster) && (character >= 0x0530) && (character < 0x1950)) break;
3445
3446 if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, end + 1)))) {
3447 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
3448 bitmap = CFUniCharGetBitmapPtrForPlane(csetType, (character >> 16));
3449 step = 2;
3450 } else {
3451 bitmap = bmpBitmap;
3452 step = 1;
3453 }
3454
3455 if (!CFUniCharIsMemberOfBitmap(character, bitmap) && (character != 0xFF9E) && (character != 0xFF9F) && ((character & 0x1FFFF0) != 0xF870)) break;
3456
3457 end += step;
3458 }
3459
3460 return CFRangeMake(start, end - start);
3461 }
3462
3463 CF_INLINE bool _CFStringIsVirama(UTF32Char character, const uint8_t *combClassBMP) {
3464 return ((character == COMBINING_GRAPHEME_JOINER) || (CFUniCharGetCombiningPropertyForCharacter(character, (const uint8_t *)((character < 0x10000) ? combClassBMP : CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (character >> 16)))) == 9) ? true : false);
3465 }
3466
3467 CFRange CFStringGetRangeOfCharacterClusterAtIndex(CFStringRef string, CFIndex charIndex, CFStringCharacterClusterType type) {
3468 CFRange range;
3469 CFIndex currentIndex;
3470 CFIndex length = CFStringGetLength(string);
3471 CFIndex csetType = ((kCFStringGraphemeCluster == type) ? kCFUniCharGraphemeExtendCharacterSet : kCFUniCharNonBaseCharacterSet);
3472 CFStringInlineBuffer stringBuffer;
3473 const uint8_t *bmpBitmap;
3474 const uint8_t *letterBMP;
3475 static const uint8_t *combClassBMP = NULL;
3476 UTF32Char character;
3477 UTF16Char otherSurrogate;
3478
3479 if (charIndex >= length) return CFRangeMake(kCFNotFound, 0);
3480
3481 /* Fast case. If we're eight-bit, it's either the default encoding is cheap or the content is all ASCII. Watch out when (or if) adding more 8bit Mac-scripts in CFStringEncodingConverters
3482 */
3483 if (!CF_IS_OBJC(__kCFStringTypeID, string) && __CFStrIsEightBit(string)) return CFRangeMake(charIndex, 1);
3484
3485 bmpBitmap = CFUniCharGetBitmapPtrForPlane(csetType, 0);
3486 letterBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, 0);
3487 if (NULL == combClassBMP) combClassBMP = (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0);
3488
3489 CFStringInitInlineBuffer(string, &stringBuffer, CFRangeMake(0, length));
3490
3491 // Get composed character sequence first
3492 range = _CFStringInlineBufferGetComposedRange(&stringBuffer, charIndex, type, bmpBitmap, csetType);
3493
3494 // Do grapheme joiners
3495 if (type < kCFStringCursorMovementCluster) {
3496 const uint8_t *letter = letterBMP;
3497
3498 // Check to see if we have a letter at the beginning of initial cluster
3499 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, range.location);
3500
3501 if ((range.length > 1) && CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(&stringBuffer, range.location + 1)))) {
3502 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
3503 letter = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, (character >> 16));
3504 }
3505
3506 if ((character == ZERO_WIDTH_JOINER) || CFUniCharIsMemberOfBitmap(character, letter)) {
3507 CFRange otherRange;
3508
3509 // Check if preceded by grapheme joiners (U034F and viramas)
3510 otherRange.location = currentIndex = range.location;
3511
3512 while (currentIndex > 1) {
3513 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, --currentIndex);
3514
3515 // ??? We're assuming viramas only in BMP
3516 if ((_CFStringIsVirama(character, combClassBMP) || ((character == ZERO_WIDTH_JOINER) && _CFStringIsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer, --currentIndex), combClassBMP))) && (currentIndex > 0)) {
3517 --currentIndex;
3518 } else {
3519 break;
3520 }
3521
3522 currentIndex = _CFStringInlineBufferGetComposedRange(&stringBuffer, currentIndex, type, bmpBitmap, csetType).location;
3523
3524 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex);
3525
3526 if (CFUniCharIsSurrogateLowCharacter(character) && CFUniCharIsSurrogateHighCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex - 1)))) {
3527 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
3528 letter = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, (character >> 16));
3529 --currentIndex;
3530 } else {
3531 letter = letterBMP;
3532 }
3533
3534 if (!CFUniCharIsMemberOfBitmap(character, letter)) break;
3535 range.location = currentIndex;
3536 }
3537
3538 range.length += otherRange.location - range.location;
3539
3540 // Check if followed by grapheme joiners
3541 if ((range.length > 1) && ((range.location + range.length) < length)) {
3542 otherRange = range;
3543 currentIndex = otherRange.location + otherRange.length;
3544
3545 do {
3546 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex - 1);
3547
3548 // ??? We're assuming viramas only in BMP
3549 if ((character != ZERO_WIDTH_JOINER) && !_CFStringIsVirama(character, combClassBMP)) break;
3550
3551 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex);
3552
3553 if (character == ZERO_WIDTH_JOINER) character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, ++currentIndex);
3554
3555 if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex + 1)))) {
3556 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
3557 letter = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, (character >> 16));
3558 } else {
3559 letter = letterBMP;
3560 }
3561
3562 // We only conjoin letters
3563 if (!CFUniCharIsMemberOfBitmap(character, letter)) break;
3564 otherRange = _CFStringInlineBufferGetComposedRange(&stringBuffer, currentIndex, type, bmpBitmap, csetType);
3565 currentIndex = otherRange.location + otherRange.length;
3566 } while ((otherRange.location + otherRange.length) < length);
3567 range.length = currentIndex - range.location;
3568 }
3569 }
3570 }
3571
3572 // Check if we're part of prefix transcoding hints
3573 CFIndex otherIndex;
3574
3575 currentIndex = (range.location + range.length) - (MAX_TRANSCODING_LENGTH + 1);
3576 if (currentIndex < 0) currentIndex = 0;
3577
3578 while (currentIndex <= range.location) {
3579 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex);
3580
3581 if ((character & 0x1FFFF0) == 0xF860) { // transcoding hint
3582 otherIndex = currentIndex + __CFTranscodingHintLength[(character - 0xF860)] + 1;
3583 if (otherIndex >= (range.location + range.length)) {
3584 if (otherIndex <= length) {
3585 range.location = currentIndex;
3586 range.length = otherIndex - currentIndex;
3587 }
3588 break;
3589 }
3590 }
3591 ++currentIndex;
3592 }
3593
3594 return range;
3595 }
3596
3597 CFRange CFStringGetRangeOfComposedCharactersAtIndex(CFStringRef theString, CFIndex theIndex) {
3598 return CFStringGetRangeOfCharacterClusterAtIndex(theString, theIndex, kCFStringComposedCharacterCluster);
3599 }
3600
3601 /*!
3602 @function CFStringFindCharacterFromSet
3603 Query the range of characters contained in the specified character set.
3604 @param theString The CFString which is to be searched. If this
3605 parameter is not a valid CFString, the behavior is
3606 undefined.
3607 @param theSet The CFCharacterSet against which the membership
3608 of characters is checked. If this parameter is not a valid
3609 CFCharacterSet, the behavior is undefined.
3610 @param range The range of characters within the string to search. If
3611 the range location or end point (defined by the location
3612 plus length minus 1) are outside the index space of the
3613 string (0 to N-1 inclusive, where N is the length of the
3614 string), the behavior is undefined. If the range length is
3615 negative, the behavior is undefined. The range may be empty
3616 (length 0), in which case no search is performed.
3617 @param searchOptions The bitwise-or'ed option flags to control
3618 the search behavior. The supported options are
3619 kCFCompareBackwards andkCFCompareAnchored.
3620 If other option flags are specified, the behavior
3621 is undefined.
3622 @param result The pointer to a CFRange supplied by the caller in
3623 which the search result is stored. If a pointer to an invalid
3624 memory is specified, the behavior is undefined.
3625 @result true, if at least a character which is a member of the character
3626 set is found and result is filled, otherwise, false.
3627 */
3628 #define SURROGATE_START 0xD800
3629 #define SURROGATE_END 0xDFFF
3630
3631 CF_EXPORT Boolean CFStringFindCharacterFromSet(CFStringRef theString, CFCharacterSetRef theSet, CFRange rangeToSearch, CFStringCompareFlags searchOptions, CFRange *result) {
3632 CFStringInlineBuffer stringBuffer;
3633 CFCharacterSetInlineBuffer csetBuffer;
3634 UniChar ch;
3635 CFIndex step;
3636 CFIndex fromLoc, toLoc, cnt; // fromLoc and toLoc are inclusive
3637 Boolean found = false;
3638 Boolean done = false;
3639
3640 //#warning FIX ME !! Should support kCFCompareNonliteral
3641
3642 if ((rangeToSearch.location + rangeToSearch.length > CFStringGetLength(theString)) || (rangeToSearch.length == 0)) return false;
3643
3644 if (searchOptions & kCFCompareBackwards) {
3645 fromLoc = rangeToSearch.location + rangeToSearch.length - 1;
3646 toLoc = rangeToSearch.location;
3647 } else {
3648 fromLoc = rangeToSearch.location;
3649 toLoc = rangeToSearch.location + rangeToSearch.length - 1;
3650 }
3651 if (searchOptions & kCFCompareAnchored) {
3652 toLoc = fromLoc;
3653 }
3654
3655 step = (fromLoc <= toLoc) ? 1 : -1;
3656 cnt = fromLoc;
3657
3658 CFStringInitInlineBuffer(theString, &stringBuffer, rangeToSearch);
3659 CFCharacterSetInitInlineBuffer(theSet, &csetBuffer);
3660
3661 do {
3662 ch = CFStringGetCharacterFromInlineBuffer(&stringBuffer, cnt - rangeToSearch.location);
3663 if ((ch >= SURROGATE_START) && (ch <= SURROGATE_END)) {
3664 int otherCharIndex = cnt + step;
3665
3666 if (((step < 0) && (otherCharIndex < toLoc)) || ((step > 0) && (otherCharIndex > toLoc))) {
3667 done = true;
3668 } else {
3669 UniChar highChar;
3670 UniChar lowChar = CFStringGetCharacterFromInlineBuffer(&stringBuffer, otherCharIndex - rangeToSearch.location);
3671
3672 if (cnt < otherCharIndex) {
3673 highChar = ch;
3674 } else {
3675 highChar = lowChar;
3676 lowChar = ch;
3677 }
3678
3679 if (CFUniCharIsSurrogateHighCharacter(highChar) && CFUniCharIsSurrogateLowCharacter(lowChar) && CFCharacterSetInlineBufferIsLongCharacterMember(&csetBuffer, CFUniCharGetLongCharacterForSurrogatePair(highChar, lowChar))) {
3680 if (result) *result = CFRangeMake((cnt < otherCharIndex ? cnt : otherCharIndex), 2);
3681 return true;
3682 } else if (otherCharIndex == toLoc) {
3683 done = true;
3684 } else {
3685 cnt = otherCharIndex + step;
3686 }
3687 }
3688 } else if (CFCharacterSetInlineBufferIsLongCharacterMember(&csetBuffer, ch)) {
3689 done = found = true;
3690 } else if (cnt == toLoc) {
3691 done = true;
3692 } else {
3693 cnt += step;
3694 }
3695 } while (!done);
3696
3697 if (found && result) *result = CFRangeMake(cnt, 1);
3698 return found;
3699 }
3700
3701 /* Line range code */
3702
3703 #define CarriageReturn '\r' /* 0x0d */
3704 #define NewLine '\n' /* 0x0a */
3705 #define NextLine 0x0085
3706 #define LineSeparator 0x2028
3707 #define ParaSeparator 0x2029
3708
3709 CF_INLINE Boolean isALineSeparatorTypeCharacter(UniChar ch, Boolean includeLineEndings) {
3710 if (ch > CarriageReturn && ch < NextLine) return false; /* Quick test to cover most chars */
3711 return (ch == NewLine || ch == CarriageReturn || ch == ParaSeparator || (includeLineEndings && (ch == NextLine || ch == LineSeparator))) ? true : false;
3712 }
3713
3714 static void __CFStringGetLineOrParagraphBounds(CFStringRef string, CFRange range, CFIndex *lineBeginIndex, CFIndex *lineEndIndex, CFIndex *contentsEndIndex, Boolean includeLineEndings) {
3715 CFIndex len;
3716 CFStringInlineBuffer buf;
3717 UniChar ch;
3718
3719 __CFAssertIsString(string);
3720 __CFAssertRangeIsInStringBounds(string, range.location, range.length);
3721
3722 len = __CFStrLength(string);
3723
3724 if (lineBeginIndex) {
3725 CFIndex start;
3726 if (range.location == 0) {
3727 start = 0;
3728 } else {
3729 CFStringInitInlineBuffer(string, &buf, CFRangeMake(0, len));
3730 CFIndex buf_idx = range.location;
3731
3732 /* Take care of the special case where start happens to fall right between \r and \n */
3733 ch = CFStringGetCharacterFromInlineBuffer(&buf, buf_idx);
3734 buf_idx--;
3735 if ((ch == NewLine) && (CFStringGetCharacterFromInlineBuffer(&buf, buf_idx) == CarriageReturn)) {
3736 buf_idx--;
3737 }
3738 while (1) {
3739 if (buf_idx < 0) {
3740 start = 0;
3741 break;
3742 } else if (isALineSeparatorTypeCharacter(CFStringGetCharacterFromInlineBuffer(&buf, buf_idx), includeLineEndings)) {
3743 start = buf_idx + 1;
3744 break;
3745 } else {
3746 buf_idx--;
3747 }
3748 }
3749 }
3750 *lineBeginIndex = start;
3751 }
3752
3753 /* Now find the ending point */
3754 if (lineEndIndex || contentsEndIndex) {
3755 CFIndex endOfContents, lineSeparatorLength = 1; /* 1 by default */
3756 CFStringInitInlineBuffer(string, &buf, CFRangeMake(0, len));
3757 CFIndex buf_idx = range.location + range.length - (range.length ? 1 : 0);
3758 /* First look at the last char in the range (if the range is zero length, the char after the range) to see if we're already on or within a end of line sequence... */
3759 ch = __CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx);
3760 if (ch == NewLine) {
3761 endOfContents = buf_idx;
3762 buf_idx--;
3763 if (__CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx) == CarriageReturn) {
3764 lineSeparatorLength = 2;
3765 endOfContents--;
3766 }
3767 } else {
3768 while (1) {
3769 if (isALineSeparatorTypeCharacter(ch, includeLineEndings)) {
3770 endOfContents = buf_idx; /* This is actually end of contentsRange */
3771 buf_idx++; /* OK for this to go past the end */
3772 if ((ch == CarriageReturn) && (__CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx) == NewLine)) {
3773 lineSeparatorLength = 2;
3774 }
3775 break;
3776 } else if (buf_idx >= len) {
3777 endOfContents = len;
3778 lineSeparatorLength = 0;
3779 break;
3780 } else {
3781 buf_idx++;
3782 ch = __CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx);
3783 }
3784 }
3785 }
3786 if (contentsEndIndex) *contentsEndIndex = endOfContents;
3787 if (lineEndIndex) *lineEndIndex = endOfContents + lineSeparatorLength;
3788 }
3789 }
3790
3791 void CFStringGetLineBounds(CFStringRef string, CFRange range, CFIndex *lineBeginIndex, CFIndex *lineEndIndex, CFIndex *contentsEndIndex) {
3792 CF_OBJC_FUNCDISPATCH4(__kCFStringTypeID, void, string, "getLineStart:end:contentsEnd:forRange:", lineBeginIndex, lineEndIndex, contentsEndIndex, range);
3793 __CFStringGetLineOrParagraphBounds(string, range, lineBeginIndex, lineEndIndex, contentsEndIndex, true);
3794 }
3795
3796 void CFStringGetParagraphBounds(CFStringRef string, CFRange range, CFIndex *parBeginIndex, CFIndex *parEndIndex, CFIndex *contentsEndIndex) {
3797 CF_OBJC_FUNCDISPATCH4(__kCFStringTypeID, void, string, "getParagraphStart:end:contentsEnd:forRange:", parBeginIndex, parEndIndex, contentsEndIndex, range);
3798 __CFStringGetLineOrParagraphBounds(string, range, parBeginIndex, parEndIndex, contentsEndIndex, false);
3799 }
3800
3801
3802 CFStringRef CFStringCreateByCombiningStrings(CFAllocatorRef alloc, CFArrayRef array, CFStringRef separatorString) {
3803 CFIndex numChars;
3804 CFIndex separatorNumByte;
3805 CFIndex stringCount = CFArrayGetCount(array);
3806 Boolean isSepCFString = !CF_IS_OBJC(__kCFStringTypeID, separatorString);
3807 Boolean canBeEightbit = isSepCFString && __CFStrIsEightBit(separatorString);
3808 CFIndex idx;
3809 CFStringRef otherString;
3810 void *buffer;
3811 uint8_t *bufPtr;
3812 const void *separatorContents = NULL;
3813
3814 if (stringCount == 0) {
3815 return CFStringCreateWithCharacters(alloc, NULL, 0);
3816 } else if (stringCount == 1) {
3817 return (CFStringRef)CFStringCreateCopy(alloc, (CFStringRef)CFArrayGetValueAtIndex(array, 0));
3818 }
3819
3820 if (alloc == NULL) alloc = __CFGetDefaultAllocator();
3821
3822 numChars = CFStringGetLength(separatorString) * (stringCount - 1);
3823 for (idx = 0; idx < stringCount; idx++) {
3824 otherString = (CFStringRef)CFArrayGetValueAtIndex(array, idx);
3825 numChars += CFStringGetLength(otherString);
3826 // canBeEightbit is already false if the separator is an NSString...
3827 if (CF_IS_OBJC(__kCFStringTypeID, otherString) || ! __CFStrIsEightBit(otherString)) canBeEightbit = false;
3828 }
3829
3830 buffer = (uint8_t *)CFAllocatorAllocate(alloc, canBeEightbit ? ((numChars + 1) * sizeof(uint8_t)) : (numChars * sizeof(UniChar)), 0);
3831 bufPtr = (uint8_t *)buffer;
3832 if (__CFOASafe) __CFSetLastAllocationEventName(buffer, "CFString (store)");
3833 separatorNumByte = CFStringGetLength(separatorString) * (canBeEightbit ? sizeof(uint8_t) : sizeof(UniChar));
3834
3835 for (idx = 0; idx < stringCount; idx++) {
3836 if (idx) { // add separator here unless first string
3837 if (separatorContents) {
3838 memmove(bufPtr, separatorContents, separatorNumByte);
3839 } else {
3840 if (!isSepCFString) { // NSString
3841 CFStringGetCharacters(separatorString, CFRangeMake(0, CFStringGetLength(separatorString)), (UniChar *)bufPtr);
3842 } else if (canBeEightbit) {
3843 memmove(bufPtr, (const uint8_t *)__CFStrContents(separatorString) + __CFStrSkipAnyLengthByte(separatorString), separatorNumByte);
3844 } else {
3845 __CFStrConvertBytesToUnicode((uint8_t *)__CFStrContents(separatorString) + __CFStrSkipAnyLengthByte(separatorString), (UniChar *)bufPtr, __CFStrLength(separatorString));
3846 }
3847 separatorContents = bufPtr;
3848 }
3849 bufPtr += separatorNumByte;
3850 }
3851
3852 otherString = (CFStringRef )CFArrayGetValueAtIndex(array, idx);
3853 if (CF_IS_OBJC(__kCFStringTypeID, otherString)) {
3854 CFIndex otherLength = CFStringGetLength(otherString);
3855 CFStringGetCharacters(otherString, CFRangeMake(0, otherLength), (UniChar *)bufPtr);
3856 bufPtr += otherLength * sizeof(UniChar);
3857 } else {
3858 const uint8_t * otherContents = (const uint8_t *)__CFStrContents(otherString);
3859 CFIndex otherNumByte = __CFStrLength2(otherString, otherContents) * (canBeEightbit ? sizeof(uint8_t) : sizeof(UniChar));
3860
3861 if (canBeEightbit || __CFStrIsUnicode(otherString)) {
3862 memmove(bufPtr, otherContents + __CFStrSkipAnyLengthByte(otherString), otherNumByte);
3863 } else {
3864 __CFStrConvertBytesToUnicode(otherContents + __CFStrSkipAnyLengthByte(otherString), (UniChar *)bufPtr, __CFStrLength2(otherString, otherContents));
3865 }
3866 bufPtr += otherNumByte;
3867 }
3868 }
3869 if (canBeEightbit) *bufPtr = 0; // NULL byte;
3870
3871 return canBeEightbit ?
3872 CFStringCreateWithCStringNoCopy(alloc, (const char*)buffer, __CFStringGetEightBitStringEncoding(), alloc) :
3873 CFStringCreateWithCharactersNoCopy(alloc, (UniChar *)buffer, numChars, alloc);
3874 }
3875
3876
3877 CFArrayRef CFStringCreateArrayBySeparatingStrings(CFAllocatorRef alloc, CFStringRef string, CFStringRef separatorString) {
3878 CFArrayRef separatorRanges;
3879 CFIndex length = CFStringGetLength(string);
3880 /* No objc dispatch needed here since CFStringCreateArrayWithFindResults() works with both CFString and NSString */
3881 if (!(separatorRanges = CFStringCreateArrayWithFindResults(alloc, string, separatorString, CFRangeMake(0, length), 0))) {
3882 return CFArrayCreate(alloc, (const void **)&string, 1, & kCFTypeArrayCallBacks);
3883 } else {
3884 CFIndex idx;
3885 CFIndex count = CFArrayGetCount(separatorRanges);
3886 CFIndex startIndex = 0;
3887 CFIndex numChars;
3888 CFMutableArrayRef array = CFArrayCreateMutable(alloc, count + 2, & kCFTypeArrayCallBacks);
3889 const CFRange *currentRange;
3890 CFStringRef substring;
3891
3892 for (idx = 0;idx < count;idx++) {
3893 currentRange = (const CFRange *)CFArrayGetValueAtIndex(separatorRanges, idx);
3894 numChars = currentRange->location - startIndex;
3895 substring = CFStringCreateWithSubstring(alloc, string, CFRangeMake(startIndex, numChars));
3896 CFArrayAppendValue(array, substring);
3897 if (!_CFAllocatorIsGCRefZero(alloc)) CFRelease(substring);
3898 startIndex = currentRange->location + currentRange->length;
3899 }
3900 substring = CFStringCreateWithSubstring(alloc, string, CFRangeMake(startIndex, length - startIndex));
3901 CFArrayAppendValue(array, substring);
3902 if (!_CFAllocatorIsGCRefZero(alloc)) CFRelease(substring);
3903
3904 if (!_CFAllocatorIsGCRefZero(alloc)) CFRelease(separatorRanges);
3905
3906 return array;
3907 }
3908 }
3909
3910 CFStringRef CFStringCreateFromExternalRepresentation(CFAllocatorRef alloc, CFDataRef data, CFStringEncoding encoding) {
3911 return CFStringCreateWithBytes(alloc, CFDataGetBytePtr(data), CFDataGetLength(data), encoding, true);
3912 }
3913
3914
3915 CFDataRef CFStringCreateExternalRepresentation(CFAllocatorRef alloc, CFStringRef string, CFStringEncoding encoding, uint8_t lossByte) {
3916 CFIndex length;
3917 CFIndex guessedByteLength;
3918 uint8_t *bytes;
3919 CFIndex usedLength;
3920 SInt32 result;
3921
3922 if (CF_IS_OBJC(__kCFStringTypeID, string)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
3923 length = CFStringGetLength(string);
3924 } else {
3925 __CFAssertIsString(string);
3926 length = __CFStrLength(string);
3927 if (__CFStrIsEightBit(string) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string
3928 return CFDataCreate(alloc, ((uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string)), __CFStrLength(string));
3929 }
3930 }
3931
3932 if (alloc == NULL) alloc = __CFGetDefaultAllocator();
3933
3934 if (((encoding & 0x0FFF) == kCFStringEncodingUnicode) && ((encoding == kCFStringEncodingUnicode) || ((encoding > kCFStringEncodingUTF8) && (encoding <= kCFStringEncodingUTF32LE)))) {
3935 guessedByteLength = (length + 1) * ((((encoding >> 26) & 2) == 0) ? sizeof(UTF16Char) : sizeof(UTF32Char)); // UTF32 format has the bit set
3936 } else if (((guessedByteLength = CFStringGetMaximumSizeForEncoding(length, encoding)) > length) && !CF_IS_OBJC(__kCFStringTypeID, string)) { // Multi byte encoding
3937 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
3938 if (__CFStrIsUnicode(string)) {
3939 CFIndex aLength = CFStringEncodingByteLengthForCharacters(encoding, kCFStringEncodingPrependBOM, __CFStrContents(string), __CFStrLength(string));
3940 if (aLength > 0) guessedByteLength = aLength;
3941 } else {
3942 #endif
3943 result = __CFStringEncodeByteStream(string, 0, length, true, encoding, lossByte, NULL, LONG_MAX, &guessedByteLength);
3944 // if result == length, we always succeed
3945 // otherwise, if result == 0, we fail
3946 // otherwise, if there was a lossByte but still result != length, we fail
3947 if ((result != length) && (!result || !lossByte)) return NULL;
3948 if (guessedByteLength == length && __CFStrIsEightBit(string) && __CFStringEncodingIsSupersetOfASCII(encoding)) { // It's all ASCII !!
3949 return CFDataCreate(alloc, ((uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string)), __CFStrLength(string));
3950 }
3951 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
3952 }
3953 #endif
3954 }
3955 bytes = (uint8_t *)CFAllocatorAllocate(alloc, guessedByteLength, 0);
3956 if (__CFOASafe) __CFSetLastAllocationEventName(bytes, "CFData (store)");
3957
3958 result = __CFStringEncodeByteStream(string, 0, length, true, encoding, lossByte, bytes, guessedByteLength, &usedLength);
3959
3960 if ((result != length) && (!result || !lossByte)) { // see comment above about what this means
3961 CFAllocatorDeallocate(alloc, bytes);
3962 return NULL;
3963 }
3964
3965 return CFDataCreateWithBytesNoCopy(alloc, (uint8_t *)bytes, usedLength, alloc);
3966 }
3967
3968
3969 CFStringEncoding CFStringGetSmallestEncoding(CFStringRef str) {
3970 CFIndex len;
3971 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, CFStringEncoding, str, "_smallestEncodingInCFStringEncoding");
3972 __CFAssertIsString(str);
3973
3974 if (__CFStrIsEightBit(str)) return __CFStringGetEightBitStringEncoding();
3975 len = __CFStrLength(str);
3976 if (__CFStringEncodeByteStream(str, 0, len, false, __CFStringGetEightBitStringEncoding(), 0, NULL, LONG_MAX, NULL) == len) return __CFStringGetEightBitStringEncoding();
3977 if ((__CFStringGetEightBitStringEncoding() != __CFStringGetSystemEncoding()) && (__CFStringEncodeByteStream(str, 0, len, false, __CFStringGetSystemEncoding(), 0, NULL, LONG_MAX, NULL) == len)) return __CFStringGetSystemEncoding();
3978 return kCFStringEncodingUnicode; /* ??? */
3979 }
3980
3981
3982 CFStringEncoding CFStringGetFastestEncoding(CFStringRef str) {
3983 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, CFStringEncoding, str, "_fastestEncodingInCFStringEncoding");
3984 __CFAssertIsString(str);
3985 return __CFStrIsEightBit(str) ? __CFStringGetEightBitStringEncoding() : kCFStringEncodingUnicode; /* ??? */
3986 }
3987
3988
3989 SInt32 CFStringGetIntValue(CFStringRef str) {
3990 Boolean success;
3991 SInt32 result;
3992 SInt32 idx = 0;
3993 CFStringInlineBuffer buf;
3994 CFStringInitInlineBuffer(str, &buf, CFRangeMake(0, CFStringGetLength(str)));
3995 success = __CFStringScanInteger(&buf, NULL, &idx, false, &result);
3996 return success ? result : 0;
3997 }
3998
3999
4000 double CFStringGetDoubleValue(CFStringRef str) {
4001 Boolean success;
4002 double result;
4003 SInt32 idx = 0;
4004 CFStringInlineBuffer buf;
4005 CFStringInitInlineBuffer(str, &buf, CFRangeMake(0, CFStringGetLength(str)));
4006 success = __CFStringScanDouble(&buf, NULL, &idx, &result);
4007 return success ? result : 0.0;
4008 }
4009
4010
4011 /*** Mutable functions... ***/
4012
4013 void CFStringSetExternalCharactersNoCopy(CFMutableStringRef string, UniChar *chars, CFIndex length, CFIndex capacity) {
4014 __CFAssertIsNotNegative(length);
4015 __CFAssertIsStringAndExternalMutable(string);
4016 CFAssert4((length <= capacity) && ((capacity == 0) || ((capacity > 0) && chars)), __kCFLogAssertion, "%s(): Invalid args: characters %p length %d capacity %d", __PRETTY_FUNCTION__, chars, length, capacity);
4017 __CFStrSetContentPtr(string, chars);
4018 __CFStrSetExplicitLength(string, length);
4019 __CFStrSetCapacity(string, capacity * sizeof(UniChar));
4020 __CFStrSetCapacityProvidedExternally(string);
4021 }
4022
4023
4024
4025 void CFStringInsert(CFMutableStringRef str, CFIndex idx, CFStringRef insertedStr) {
4026 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID, void, str, "insertString:atIndex:", insertedStr, idx);
4027 __CFAssertIsStringAndMutable(str);
4028 CFAssert3(idx >= 0 && idx <= __CFStrLength(str), __kCFLogAssertion, "%s(): string index %d out of bounds (length %d)", __PRETTY_FUNCTION__, idx, __CFStrLength(str));
4029 __CFStringReplace(str, CFRangeMake(idx, 0), insertedStr);
4030 }
4031
4032
4033 void CFStringDelete(CFMutableStringRef str, CFRange range) {
4034 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, str, "deleteCharactersInRange:", range);
4035 __CFAssertIsStringAndMutable(str);
4036 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
4037 __CFStringChangeSize(str, range, 0, false);
4038 }
4039
4040
4041 void CFStringReplace(CFMutableStringRef str, CFRange range, CFStringRef replacement) {
4042 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID, void, str, "replaceCharactersInRange:withString:", range, replacement);
4043 __CFAssertIsStringAndMutable(str);
4044 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
4045 __CFStringReplace(str, range, replacement);
4046 }
4047
4048
4049 void CFStringReplaceAll(CFMutableStringRef str, CFStringRef replacement) {
4050 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, str, "setString:", replacement);
4051 __CFAssertIsStringAndMutable(str);
4052 __CFStringReplace(str, CFRangeMake(0, __CFStrLength(str)), replacement);
4053 }
4054
4055
4056 void CFStringAppend(CFMutableStringRef str, CFStringRef appended) {
4057 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, str, "appendString:", appended);
4058 __CFAssertIsStringAndMutable(str);
4059 __CFStringReplace(str, CFRangeMake(__CFStrLength(str), 0), appended);
4060 }
4061
4062
4063 void CFStringAppendCharacters(CFMutableStringRef str, const UniChar *chars, CFIndex appendedLength) {
4064 CFIndex strLength, idx;
4065
4066 __CFAssertIsNotNegative(appendedLength);
4067
4068 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID, void, str, "appendCharacters:length:", chars, appendedLength);
4069
4070 __CFAssertIsStringAndMutable(str);
4071
4072 strLength = __CFStrLength(str);
4073 if (__CFStrIsUnicode(str)) {
4074 __CFStringChangeSize(str, CFRangeMake(strLength, 0), appendedLength, true);
4075 memmove((UniChar *)__CFStrContents(str) + strLength, chars, appendedLength * sizeof(UniChar));
4076 } else {
4077 uint8_t *contents;
4078 bool isASCII = true;
4079 for (idx = 0; isASCII && idx < appendedLength; idx++) isASCII = (chars[idx] < 0x80);
4080 __CFStringChangeSize(str, CFRangeMake(strLength, 0), appendedLength, !isASCII);
4081 if (!isASCII) {
4082 memmove((UniChar *)__CFStrContents(str) + strLength, chars, appendedLength * sizeof(UniChar));
4083 } else {
4084 contents = (uint8_t *)__CFStrContents(str) + strLength + __CFStrSkipAnyLengthByte(str);
4085 for (idx = 0; idx < appendedLength; idx++) contents[idx] = (uint8_t)chars[idx];
4086 }
4087 }
4088 }
4089
4090
4091 void __CFStringAppendBytes(CFMutableStringRef str, const char *cStr, CFIndex appendedLength, CFStringEncoding encoding) {
4092 Boolean appendedIsUnicode = false;
4093 Boolean freeCStrWhenDone = false;
4094 Boolean demoteAppendedUnicode = false;
4095 CFVarWidthCharBuffer vBuf;
4096
4097 __CFAssertIsNotNegative(appendedLength);
4098
4099 if (encoding == kCFStringEncodingASCII || encoding == __CFStringGetEightBitStringEncoding()) {
4100 // appendedLength now denotes length in UniChars
4101 } else if (encoding == kCFStringEncodingUnicode) {
4102 UniChar *chars = (UniChar *)cStr;
4103 CFIndex idx, length = appendedLength / sizeof(UniChar);
4104 bool isASCII = true;
4105 for (idx = 0; isASCII && idx < length; idx++) isASCII = (chars[idx] < 0x80);
4106 if (!isASCII) {
4107 appendedIsUnicode = true;
4108 } else {
4109 demoteAppendedUnicode = true;
4110 }
4111 appendedLength = length;
4112 } else {
4113 Boolean usingPassedInMemory = false;
4114
4115 vBuf.allocator = __CFGetDefaultAllocator(); // We don't want to use client's allocator for temp stuff
4116 vBuf.chars.unicode = NULL; // This will cause the decode function to allocate memory if necessary
4117
4118 if (!__CFStringDecodeByteStream3((const uint8_t *)cStr, appendedLength, encoding, __CFStrIsUnicode(str), &vBuf, &usingPassedInMemory, 0)) {
4119 CFAssert1(0, __kCFLogAssertion, "Supplied bytes could not be converted specified encoding %d", encoding);
4120 return;
4121 }
4122
4123 // If not ASCII, appendedLength now denotes length in UniChars
4124 appendedLength = vBuf.numChars;
4125 appendedIsUnicode = !vBuf.isASCII;
4126 cStr = (const char *)vBuf.chars.ascii;
4127 freeCStrWhenDone = !usingPassedInMemory && vBuf.shouldFreeChars;
4128 }
4129
4130 if (CF_IS_OBJC(__kCFStringTypeID, str)) {
4131 if (!appendedIsUnicode && !demoteAppendedUnicode) {
4132 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID, void, str, "_cfAppendCString:length:", cStr, appendedLength);
4133 } else {
4134 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID, void, str, "appendCharacters:length:", cStr, appendedLength);
4135 }
4136 } else {
4137 CFIndex strLength;
4138 __CFAssertIsStringAndMutable(str);
4139 strLength = __CFStrLength(str);
4140
4141 __CFStringChangeSize(str, CFRangeMake(strLength, 0), appendedLength, appendedIsUnicode || __CFStrIsUnicode(str));
4142
4143 if (__CFStrIsUnicode(str)) {
4144 UniChar *contents = (UniChar *)__CFStrContents(str);
4145 if (appendedIsUnicode) {
4146 memmove(contents + strLength, cStr, appendedLength * sizeof(UniChar));
4147 } else {
4148 __CFStrConvertBytesToUnicode((const uint8_t *)cStr, contents + strLength, appendedLength);
4149 }
4150 } else {
4151 if (demoteAppendedUnicode) {
4152 UniChar *chars = (UniChar *)cStr;
4153 CFIndex idx;
4154 uint8_t *contents = (uint8_t *)__CFStrContents(str) + strLength + __CFStrSkipAnyLengthByte(str);
4155 for (idx = 0; idx < appendedLength; idx++) contents[idx] = (uint8_t)chars[idx];
4156 } else {
4157 uint8_t *contents = (uint8_t *)__CFStrContents(str);
4158 memmove(contents + strLength + __CFStrSkipAnyLengthByte(str), cStr, appendedLength);
4159 }
4160 }
4161 }
4162
4163 if (freeCStrWhenDone) CFAllocatorDeallocate(__CFGetDefaultAllocator(), (void *)cStr);
4164 }
4165
4166 void CFStringAppendPascalString(CFMutableStringRef str, ConstStringPtr pStr, CFStringEncoding encoding) {
4167 __CFStringAppendBytes(str, (const char *)(pStr + 1), (CFIndex)*pStr, encoding);
4168 }
4169
4170 void CFStringAppendCString(CFMutableStringRef str, const char *cStr, CFStringEncoding encoding) {
4171 __CFStringAppendBytes(str, cStr, strlen(cStr), encoding);
4172 }
4173
4174
4175 void CFStringAppendFormat(CFMutableStringRef str, CFDictionaryRef formatOptions, CFStringRef format, ...) {
4176 va_list argList;
4177
4178 va_start(argList, format);
4179 CFStringAppendFormatAndArguments(str, formatOptions, format, argList);
4180 va_end(argList);
4181 }
4182
4183
4184 CFIndex CFStringFindAndReplace(CFMutableStringRef string, CFStringRef stringToFind, CFStringRef replacementString, CFRange rangeToSearch, CFStringCompareFlags compareOptions) {
4185 CF_OBJC_FUNCDISPATCH4(__kCFStringTypeID, CFIndex, string, "replaceOccurrencesOfString:withString:options:range:", stringToFind, replacementString, compareOptions, rangeToSearch);
4186 CFRange foundRange;
4187 Boolean backwards = ((compareOptions & kCFCompareBackwards) != 0);
4188 UInt32 endIndex = rangeToSearch.location + rangeToSearch.length;
4189 #define MAX_RANGES_ON_STACK (1000 / sizeof(CFRange))
4190 CFRange rangeBuffer[MAX_RANGES_ON_STACK]; // Used to avoid allocating memory
4191 CFRange *ranges = rangeBuffer;
4192 CFIndex foundCount = 0;
4193 CFIndex capacity = MAX_RANGES_ON_STACK;
4194
4195 __CFAssertIsStringAndMutable(string);
4196 __CFAssertRangeIsInStringBounds(string, rangeToSearch.location, rangeToSearch.length);
4197
4198 // Note: This code is very similar to the one in CFStringCreateArrayWithFindResults().
4199 while ((rangeToSearch.length > 0) && CFStringFindWithOptions(string, stringToFind, rangeToSearch, compareOptions, &foundRange)) {
4200 // Determine the next range
4201 if (backwards) {
4202 rangeToSearch.length = foundRange.location - rangeToSearch.location;
4203 } else {
4204 rangeToSearch.location = foundRange.location + foundRange.length;
4205 rangeToSearch.length = endIndex - rangeToSearch.location;
4206 }
4207
4208 // If necessary, grow the array
4209 if (foundCount >= capacity) {
4210 bool firstAlloc = (ranges == rangeBuffer) ? true : false;
4211 capacity = (capacity + 4) * 2;
4212 // Note that reallocate with NULL previous pointer is same as allocate
4213 ranges = (CFRange *)CFAllocatorReallocate(kCFAllocatorSystemDefault, firstAlloc ? NULL : ranges, capacity * sizeof(CFRange), 0);
4214 if (firstAlloc) memmove(ranges, rangeBuffer, MAX_RANGES_ON_STACK * sizeof(CFRange));
4215 }
4216 ranges[foundCount] = foundRange;
4217 foundCount++;
4218 }
4219
4220 if (foundCount > 0) {
4221 if (backwards) { // Reorder the ranges to be incrementing (better to do this here, then to check other places)
4222 int head = 0;
4223 int tail = foundCount - 1;
4224 while (head < tail) {
4225 CFRange temp = ranges[head];
4226 ranges[head] = ranges[tail];
4227 ranges[tail] = temp;
4228 head++;
4229 tail--;
4230 }
4231 }
4232 __CFStringReplaceMultiple(string, ranges, foundCount, replacementString);
4233 if (ranges != rangeBuffer) CFAllocatorDeallocate(kCFAllocatorSystemDefault, ranges);
4234 }
4235
4236 return foundCount;
4237 }
4238
4239
4240 // This function is here for NSString purposes
4241 // It allows checking for mutability before mutating; this allows NSString to catch invalid mutations
4242
4243 int __CFStringCheckAndReplace(CFMutableStringRef str, CFRange range, CFStringRef replacement) {
4244 if (!__CFStrIsMutable(str)) return _CFStringErrNotMutable; // These three ifs are always here, for NSString usage
4245 if (!replacement && __CFStringNoteErrors()) return _CFStringErrNilArg;
4246 // This attempts to catch bad ranges including those described in 3375535 (-1,1)
4247 unsigned long endOfRange = (unsigned long)(range.location) + (unsigned long)(range.length); // NSRange uses unsigned quantities, hence the casting
4248 if (((endOfRange > (unsigned long)__CFStrLength(str)) || (endOfRange < (unsigned long)(range.location))) && __CFStringNoteErrors()) return _CFStringErrBounds;
4249
4250 __CFAssertIsStringAndMutable(str);
4251 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
4252 __CFStringReplace(str, range, replacement);
4253 return _CFStringErrNone;
4254 }
4255
4256 // This function determines whether errors which would cause string exceptions should
4257 // be ignored or not
4258
4259 Boolean __CFStringNoteErrors(void) {
4260 return true;
4261 }
4262
4263
4264
4265 void CFStringPad(CFMutableStringRef string, CFStringRef padString, CFIndex length, CFIndex indexIntoPad) {
4266 CFIndex originalLength;
4267
4268 __CFAssertIsNotNegative(length);
4269 __CFAssertIsNotNegative(indexIntoPad);
4270
4271 CF_OBJC_FUNCDISPATCH3(__kCFStringTypeID, void, string, "_cfPad:length:padIndex:", padString, length, indexIntoPad);
4272
4273 __CFAssertIsStringAndMutable(string);
4274
4275 originalLength = __CFStrLength(string);
4276 if (length < originalLength) {
4277 __CFStringChangeSize(string, CFRangeMake(length, originalLength - length), 0, false);
4278 } else if (originalLength < length) {
4279 uint8_t *contents;
4280 Boolean isUnicode;
4281 CFIndex charSize;
4282 CFIndex padStringLength;
4283 CFIndex padLength;
4284 CFIndex padRemaining = length - originalLength;
4285
4286 if (CF_IS_OBJC(__kCFStringTypeID, padString)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
4287 padStringLength = CFStringGetLength(padString);
4288 isUnicode = true; /* !!! Bad for now */
4289 } else {
4290 __CFAssertIsString(padString);
4291 padStringLength = __CFStrLength(padString);
4292 isUnicode = __CFStrIsUnicode(string) || __CFStrIsUnicode(padString);
4293 }
4294
4295 charSize = isUnicode ? sizeof(UniChar) : sizeof(uint8_t);
4296
4297 __CFStringChangeSize(string, CFRangeMake(originalLength, 0), padRemaining, isUnicode);
4298
4299 contents = (uint8_t *)__CFStrContents(string) + charSize * originalLength + __CFStrSkipAnyLengthByte(string);
4300 padLength = padStringLength - indexIntoPad;
4301 padLength = padRemaining < padLength ? padRemaining : padLength;
4302
4303 while (padRemaining > 0) {
4304 if (isUnicode) {
4305 CFStringGetCharacters(padString, CFRangeMake(indexIntoPad, padLength), (UniChar *)contents);
4306 } else {
4307 CFStringGetBytes(padString, CFRangeMake(indexIntoPad, padLength), __CFStringGetEightBitStringEncoding(), 0, false, contents, padRemaining * charSize, NULL);
4308 }
4309 contents += padLength * charSize;
4310 padRemaining -= padLength;
4311 indexIntoPad = 0;
4312 padLength = padRemaining < padLength ? padRemaining : padStringLength;
4313 }
4314 }
4315 }
4316
4317 void CFStringTrim(CFMutableStringRef string, CFStringRef trimString) {
4318 CFRange range;
4319 CFIndex newStartIndex;
4320 CFIndex length;
4321
4322 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, string, "_cfTrim:", trimString);
4323
4324 __CFAssertIsStringAndMutable(string);
4325 __CFAssertIsString(trimString);
4326
4327 newStartIndex = 0;
4328 length = __CFStrLength(string);
4329
4330 while (CFStringFindWithOptions(string, trimString, CFRangeMake(newStartIndex, length - newStartIndex), kCFCompareAnchored, &range)) {
4331 newStartIndex = range.location + range.length;
4332 }
4333
4334 if (newStartIndex < length) {
4335 CFIndex charSize = __CFStrIsUnicode(string) ? sizeof(UniChar) : sizeof(uint8_t);
4336 uint8_t *contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4337
4338 length -= newStartIndex;
4339 if (__CFStrLength(trimString) < length) {
4340 while (CFStringFindWithOptions(string, trimString, CFRangeMake(newStartIndex, length), kCFCompareAnchored|kCFCompareBackwards, &range)) {
4341 length = range.location - newStartIndex;
4342 }
4343 }
4344 memmove(contents, contents + newStartIndex * charSize, length * charSize);
4345 __CFStringChangeSize(string, CFRangeMake(length, __CFStrLength(string) - length), 0, false);
4346 } else { // Only trimString in string, trim all
4347 __CFStringChangeSize(string, CFRangeMake(0, length), 0, false);
4348 }
4349 }
4350
4351 void CFStringTrimWhitespace(CFMutableStringRef string) {
4352 CFIndex newStartIndex;
4353 CFIndex length;
4354 CFStringInlineBuffer buffer;
4355
4356 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, void, string, "_cfTrimWS");
4357
4358 __CFAssertIsStringAndMutable(string);
4359
4360 newStartIndex = 0;
4361 length = __CFStrLength(string);
4362
4363 CFStringInitInlineBuffer(string, &buffer, CFRangeMake(0, length));
4364 CFIndex buffer_idx = 0;
4365
4366 while (buffer_idx < length && CFUniCharIsMemberOf(__CFStringGetCharacterFromInlineBufferQuick(&buffer, buffer_idx), kCFUniCharWhitespaceAndNewlineCharacterSet))
4367 buffer_idx++;
4368 newStartIndex = buffer_idx;
4369
4370 if (newStartIndex < length) {
4371 uint8_t *contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4372 CFIndex charSize = (__CFStrIsUnicode(string) ? sizeof(UniChar) : sizeof(uint8_t));
4373
4374 buffer_idx = length - 1;
4375 while (0 <= buffer_idx && CFUniCharIsMemberOf(__CFStringGetCharacterFromInlineBufferQuick(&buffer, buffer_idx), kCFUniCharWhitespaceAndNewlineCharacterSet))
4376 buffer_idx--;
4377 length = buffer_idx - newStartIndex + 1;
4378
4379 memmove(contents, contents + newStartIndex * charSize, length * charSize);
4380 __CFStringChangeSize(string, CFRangeMake(length, __CFStrLength(string) - length), 0, false);
4381 } else { // Whitespace only string
4382 __CFStringChangeSize(string, CFRangeMake(0, length), 0, false);
4383 }
4384 }
4385
4386 void CFStringLowercase(CFMutableStringRef string, CFLocaleRef locale) {
4387 CFIndex currentIndex = 0;
4388 CFIndex length;
4389 const uint8_t *langCode;
4390 Boolean isEightBit = __CFStrIsEightBit(string);
4391
4392 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, string, "_cfLowercase:", locale);
4393
4394 __CFAssertIsStringAndMutable(string);
4395
4396 length = __CFStrLength(string);
4397
4398 langCode = (const uint8_t *)(_CFCanUseLocale(locale) ? _CFStrGetLanguageIdentifierForLocale(locale) : NULL);
4399
4400 if (!langCode && isEightBit) {
4401 uint8_t *contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4402 for (;currentIndex < length;currentIndex++) {
4403 if (contents[currentIndex] >= 'A' && contents[currentIndex] <= 'Z') {
4404 contents[currentIndex] += 'a' - 'A';
4405 } else if (contents[currentIndex] > 127) {
4406 break;
4407 }
4408 }
4409 }
4410
4411 if (currentIndex < length) {
4412 UTF16Char *contents;
4413 UniChar mappedCharacters[MAX_CASE_MAPPING_BUF];
4414 CFIndex mappedLength;
4415 UTF32Char currentChar;
4416 UInt32 flags = 0;
4417
4418 if (isEightBit) __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true);
4419
4420 contents = (UniChar *)__CFStrContents(string);
4421
4422 for (;currentIndex < length;currentIndex++) {
4423
4424 if (CFUniCharIsSurrogateHighCharacter(contents[currentIndex]) && (currentIndex + 1 < length) && CFUniCharIsSurrogateLowCharacter(contents[currentIndex + 1])) {
4425 currentChar = CFUniCharGetLongCharacterForSurrogatePair(contents[currentIndex], contents[currentIndex + 1]);
4426 } else {
4427 currentChar = contents[currentIndex];
4428 }
4429 flags = ((langCode || (currentChar == 0x03A3)) ? CFUniCharGetConditionalCaseMappingFlags(currentChar, contents, currentIndex, length, kCFUniCharToLowercase, langCode, flags) : 0);
4430
4431 mappedLength = CFUniCharMapCaseTo(currentChar, mappedCharacters, MAX_CASE_MAPPING_BUF, kCFUniCharToLowercase, flags, langCode);
4432 if (mappedLength > 0) contents[currentIndex] = *mappedCharacters;
4433
4434 if (currentChar > 0xFFFF) { // Non-BMP char
4435 switch (mappedLength) {
4436 case 0:
4437 __CFStringChangeSize(string, CFRangeMake(currentIndex, 2), 0, true);
4438 contents = (UniChar *)__CFStrContents(string);
4439 length -= 2;
4440 break;
4441
4442 case 1:
4443 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 1), 0, true);
4444 contents = (UniChar *)__CFStrContents(string);
4445 --length;
4446 break;
4447
4448 case 2:
4449 contents[++currentIndex] = mappedCharacters[1];
4450 break;
4451
4452 default:
4453 --mappedLength; // Skip the current char
4454 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength - 1, true);
4455 contents = (UniChar *)__CFStrContents(string);
4456 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4457 length += (mappedLength - 1);
4458 currentIndex += mappedLength;
4459 break;
4460 }
4461 } else if (mappedLength == 0) {
4462 __CFStringChangeSize(string, CFRangeMake(currentIndex, 1), 0, true);
4463 contents = (UniChar *)__CFStrContents(string);
4464 --length;
4465 } else if (mappedLength > 1) {
4466 --mappedLength; // Skip the current char
4467 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength, true);
4468 contents = (UniChar *)__CFStrContents(string);
4469 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4470 length += mappedLength;
4471 currentIndex += mappedLength;
4472 }
4473 }
4474 }
4475 }
4476
4477 void CFStringUppercase(CFMutableStringRef string, CFLocaleRef locale) {
4478 CFIndex currentIndex = 0;
4479 CFIndex length;
4480 const uint8_t *langCode;
4481 Boolean isEightBit = __CFStrIsEightBit(string);
4482
4483 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, string, "_cfUppercase:", locale);
4484
4485 __CFAssertIsStringAndMutable(string);
4486
4487 length = __CFStrLength(string);
4488
4489 langCode = (const uint8_t *)(_CFCanUseLocale(locale) ? _CFStrGetLanguageIdentifierForLocale(locale) : NULL);
4490
4491 if (!langCode && isEightBit) {
4492 uint8_t *contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4493 for (;currentIndex < length;currentIndex++) {
4494 if (contents[currentIndex] >= 'a' && contents[currentIndex] <= 'z') {
4495 contents[currentIndex] -= 'a' - 'A';
4496 } else if (contents[currentIndex] > 127) {
4497 break;
4498 }
4499 }
4500 }
4501
4502 if (currentIndex < length) {
4503 UniChar *contents;
4504 UniChar mappedCharacters[MAX_CASE_MAPPING_BUF];
4505 CFIndex mappedLength;
4506 UTF32Char currentChar;
4507 UInt32 flags = 0;
4508
4509 if (isEightBit) __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true);
4510
4511 contents = (UniChar *)__CFStrContents(string);
4512
4513 for (;currentIndex < length;currentIndex++) {
4514 if (CFUniCharIsSurrogateHighCharacter(contents[currentIndex]) && (currentIndex + 1 < length) && CFUniCharIsSurrogateLowCharacter(contents[currentIndex + 1])) {
4515 currentChar = CFUniCharGetLongCharacterForSurrogatePair(contents[currentIndex], contents[currentIndex + 1]);
4516 } else {
4517 currentChar = contents[currentIndex];
4518 }
4519
4520 flags = (langCode ? CFUniCharGetConditionalCaseMappingFlags(currentChar, contents, currentIndex, length, kCFUniCharToUppercase, langCode, flags) : 0);
4521
4522 mappedLength = CFUniCharMapCaseTo(currentChar, mappedCharacters, MAX_CASE_MAPPING_BUF, kCFUniCharToUppercase, flags, langCode);
4523 if (mappedLength > 0) contents[currentIndex] = *mappedCharacters;
4524
4525 if (currentChar > 0xFFFF) { // Non-BMP char
4526 switch (mappedLength) {
4527 case 0:
4528 __CFStringChangeSize(string, CFRangeMake(currentIndex, 2), 0, true);
4529 contents = (UniChar *)__CFStrContents(string);
4530 length -= 2;
4531 break;
4532
4533 case 1:
4534 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 1), 0, true);
4535 contents = (UniChar *)__CFStrContents(string);
4536 --length;
4537 break;
4538
4539 case 2:
4540 contents[++currentIndex] = mappedCharacters[1];
4541 break;
4542
4543 default:
4544 --mappedLength; // Skip the current char
4545 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength - 1, true);
4546 contents = (UniChar *)__CFStrContents(string);
4547 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4548 length += (mappedLength - 1);
4549 currentIndex += mappedLength;
4550 break;
4551 }
4552 } else if (mappedLength == 0) {
4553 __CFStringChangeSize(string, CFRangeMake(currentIndex, 1), 0, true);
4554 contents = (UniChar *)__CFStrContents(string);
4555 --length;
4556 } else if (mappedLength > 1) {
4557 --mappedLength; // Skip the current char
4558 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength, true);
4559 contents = (UniChar *)__CFStrContents(string);
4560 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4561 length += mappedLength;
4562 currentIndex += mappedLength;
4563 }
4564 }
4565 }
4566 }
4567
4568
4569 void CFStringCapitalize(CFMutableStringRef string, CFLocaleRef locale) {
4570 CFIndex currentIndex = 0;
4571 CFIndex length;
4572 const uint8_t *langCode;
4573 Boolean isEightBit = __CFStrIsEightBit(string);
4574 Boolean isLastCased = false;
4575 const uint8_t *caseIgnorableForBMP;
4576
4577 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, string, "_cfCapitalize:", locale);
4578
4579 __CFAssertIsStringAndMutable(string);
4580
4581 length = __CFStrLength(string);
4582
4583 caseIgnorableForBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharCaseIgnorableCharacterSet, 0);
4584
4585 langCode = (const uint8_t *)(_CFCanUseLocale(locale) ? _CFStrGetLanguageIdentifierForLocale(locale) : NULL);
4586
4587 if (!langCode && isEightBit) {
4588 uint8_t *contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4589 for (;currentIndex < length;currentIndex++) {
4590 if (contents[currentIndex] > 127) {
4591 break;
4592 } else if (contents[currentIndex] >= 'A' && contents[currentIndex] <= 'Z') {
4593 contents[currentIndex] += (isLastCased ? 'a' - 'A' : 0);
4594 isLastCased = true;
4595 } else if (contents[currentIndex] >= 'a' && contents[currentIndex] <= 'z') {
4596 contents[currentIndex] -= (!isLastCased ? 'a' - 'A' : 0);
4597 isLastCased = true;
4598 } else if (!CFUniCharIsMemberOfBitmap(contents[currentIndex], caseIgnorableForBMP)) {
4599 isLastCased = false;
4600 }
4601 }
4602 }
4603
4604 if (currentIndex < length) {
4605 UniChar *contents;
4606 UniChar mappedCharacters[MAX_CASE_MAPPING_BUF];
4607 CFIndex mappedLength;
4608 UTF32Char currentChar;
4609 UInt32 flags = 0;
4610
4611 if (isEightBit) __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true);
4612
4613 contents = (UniChar *)__CFStrContents(string);
4614
4615 for (;currentIndex < length;currentIndex++) {
4616 if (CFUniCharIsSurrogateHighCharacter(contents[currentIndex]) && (currentIndex + 1 < length) && CFUniCharIsSurrogateLowCharacter(contents[currentIndex + 1])) {
4617 currentChar = CFUniCharGetLongCharacterForSurrogatePair(contents[currentIndex], contents[currentIndex + 1]);
4618 } else {
4619 currentChar = contents[currentIndex];
4620 }
4621 flags = ((langCode || ((currentChar == 0x03A3) && isLastCased)) ? CFUniCharGetConditionalCaseMappingFlags(currentChar, contents, currentIndex, length, (isLastCased ? kCFUniCharToLowercase : kCFUniCharToTitlecase), langCode, flags) : 0);
4622
4623 mappedLength = CFUniCharMapCaseTo(currentChar, mappedCharacters, MAX_CASE_MAPPING_BUF, (isLastCased ? kCFUniCharToLowercase : kCFUniCharToTitlecase), flags, langCode);
4624 if (mappedLength > 0) contents[currentIndex] = *mappedCharacters;
4625
4626 if (currentChar > 0xFFFF) { // Non-BMP char
4627 switch (mappedLength) {
4628 case 0:
4629 __CFStringChangeSize(string, CFRangeMake(currentIndex, 2), 0, true);
4630 contents = (UniChar *)__CFStrContents(string);
4631 length -= 2;
4632 break;
4633
4634 case 1:
4635 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 1), 0, true);
4636 contents = (UniChar *)__CFStrContents(string);
4637 --length;
4638 break;
4639
4640 case 2:
4641 contents[++currentIndex] = mappedCharacters[1];
4642 break;
4643
4644 default:
4645 --mappedLength; // Skip the current char
4646 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength - 1, true);
4647 contents = (UniChar *)__CFStrContents(string);
4648 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4649 length += (mappedLength - 1);
4650 currentIndex += mappedLength;
4651 break;
4652 }
4653 } else if (mappedLength == 0) {
4654 __CFStringChangeSize(string, CFRangeMake(currentIndex, 1), 0, true);
4655 contents = (UniChar *)__CFStrContents(string);
4656 --length;
4657 } else if (mappedLength > 1) {
4658 --mappedLength; // Skip the current char
4659 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength, true);
4660 contents = (UniChar *)__CFStrContents(string);
4661 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4662 length += mappedLength;
4663 currentIndex += mappedLength;
4664 }
4665
4666 if (!((currentChar > 0xFFFF) ? CFUniCharIsMemberOf(currentChar, kCFUniCharCaseIgnorableCharacterSet) : CFUniCharIsMemberOfBitmap(currentChar, caseIgnorableForBMP))) { // We have non-caseignorable here
4667 isLastCased = ((CFUniCharIsMemberOf(currentChar, kCFUniCharUppercaseLetterCharacterSet) || CFUniCharIsMemberOf(currentChar, kCFUniCharLowercaseLetterCharacterSet)) ? true : false);
4668 }
4669 }
4670 }
4671 }
4672
4673
4674 #define MAX_DECOMP_BUF 64
4675
4676 #define HANGUL_SBASE 0xAC00
4677 #define HANGUL_LBASE 0x1100
4678 #define HANGUL_VBASE 0x1161
4679 #define HANGUL_TBASE 0x11A7
4680 #define HANGUL_SCOUNT 11172
4681 #define HANGUL_LCOUNT 19
4682 #define HANGUL_VCOUNT 21
4683 #define HANGUL_TCOUNT 28
4684 #define HANGUL_NCOUNT (HANGUL_VCOUNT * HANGUL_TCOUNT)
4685
4686 CF_INLINE uint32_t __CFGetUTF16Length(const UTF32Char *characters, uint32_t utf32Length) {
4687 const UTF32Char *limit = characters + utf32Length;
4688 uint32_t length = 0;
4689
4690 while (characters < limit) length += (*(characters++) > 0xFFFF ? 2 : 1);
4691
4692 return length;
4693 }
4694
4695 CF_INLINE void __CFFillInUTF16(const UTF32Char *characters, UTF16Char *dst, uint32_t utf32Length) {
4696 const UTF32Char *limit = characters + utf32Length;
4697 UTF32Char currentChar;
4698
4699 while (characters < limit) {
4700 currentChar = *(characters++);
4701 if (currentChar > 0xFFFF) {
4702 currentChar -= 0x10000;
4703 *(dst++) = (UTF16Char)((currentChar >> 10) + 0xD800UL);
4704 *(dst++) = (UTF16Char)((currentChar & 0x3FF) + 0xDC00UL);
4705 } else {
4706 *(dst++) = currentChar;
4707 }
4708 }
4709 }
4710
4711 void CFStringNormalize(CFMutableStringRef string, CFStringNormalizationForm theForm) {
4712 CFIndex currentIndex = 0;
4713 CFIndex length;
4714 bool needToReorder = true;
4715
4716 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, string, "_cfNormalize:", theForm);
4717
4718 __CFAssertIsStringAndMutable(string);
4719
4720 length = __CFStrLength(string);
4721
4722 if (__CFStrIsEightBit(string)) {
4723 uint8_t *contents;
4724
4725 if (theForm == kCFStringNormalizationFormC) return; // 8bit form has no decomposition
4726
4727 contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4728
4729 for (;currentIndex < length;currentIndex++) {
4730 if (contents[currentIndex] > 127) {
4731 __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true); // need to do harm way
4732 needToReorder = false;
4733 break;
4734 }
4735 }
4736 }
4737
4738 if (currentIndex < length) {
4739 UTF16Char *limit = (UTF16Char *)__CFStrContents(string) + length;
4740 UTF16Char *contents = (UTF16Char *)__CFStrContents(string) + currentIndex;
4741 UTF32Char buffer[MAX_DECOMP_BUF];
4742 UTF32Char *mappedCharacters = buffer;
4743 CFIndex allocatedLength = MAX_DECOMP_BUF;
4744 CFIndex mappedLength;
4745 CFIndex currentLength;
4746 UTF32Char currentChar;
4747 const uint8_t *decompBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, 0);
4748 const uint8_t *nonBaseBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, 0);
4749 const uint8_t *combiningBMP = (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0);
4750
4751 while (contents < limit) {
4752 if (CFUniCharIsSurrogateHighCharacter(*contents) && (contents + 1 < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) {
4753 currentChar = CFUniCharGetLongCharacterForSurrogatePair(*contents, *(contents + 1));
4754 currentLength = 2;
4755 contents += 2;
4756 } else {
4757 currentChar = *(contents++);
4758 currentLength = 1;
4759 }
4760
4761 mappedLength = 0;
4762
4763 if (CFUniCharIsMemberOfBitmap(currentChar, ((currentChar < 0x10000) ? decompBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (currentChar >> 16)))) && (0 == CFUniCharGetCombiningPropertyForCharacter(currentChar, ((currentChar < 0x10000) ? combiningBMP : (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16)))))) {
4764 if ((theForm & kCFStringNormalizationFormC) == 0 || currentChar < HANGUL_SBASE || currentChar > (HANGUL_SBASE + HANGUL_SCOUNT)) { // We don't have to decompose Hangul Syllables if we're precomposing again
4765 mappedLength = CFUniCharDecomposeCharacter(currentChar, mappedCharacters, MAX_DECOMP_BUF);
4766 }
4767 }
4768
4769 if ((needToReorder || (theForm & kCFStringNormalizationFormC)) && ((contents < limit) || (mappedLength == 0))) {
4770 if (mappedLength > 0) {
4771 if (CFUniCharIsSurrogateHighCharacter(*contents) && (contents + 1 < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) {
4772 currentChar = CFUniCharGetLongCharacterForSurrogatePair(*contents, *(contents + 1));
4773 } else {
4774 currentChar = *contents;
4775 }
4776 }
4777
4778 if (0 != CFUniCharGetCombiningPropertyForCharacter(currentChar, (const uint8_t *)((currentChar < 0x10000) ? combiningBMP : CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16))))) {
4779 uint32_t decompLength;
4780
4781 if (mappedLength == 0) {
4782 contents -= (currentChar & 0xFFFF0000 ? 2 : 1);
4783 if (currentIndex > 0) {
4784 if (CFUniCharIsSurrogateLowCharacter(*(contents - 1)) && (currentIndex > 1) && CFUniCharIsSurrogateHighCharacter(*(contents - 2))) {
4785 *mappedCharacters = CFUniCharGetLongCharacterForSurrogatePair(*(contents - 2), *(contents - 1));
4786 currentIndex -= 2;
4787 currentLength += 2;
4788 } else {
4789 *mappedCharacters = *(contents - 1);
4790 --currentIndex;
4791 ++currentLength;
4792 }
4793 mappedLength = 1;
4794 }
4795 } else {
4796 currentLength += (currentChar & 0xFFFF0000 ? 2 : 1);
4797 }
4798 contents += (currentChar & 0xFFFF0000 ? 2 : 1);
4799
4800 if (CFUniCharIsMemberOfBitmap(currentChar, ((currentChar < 0x10000) ? decompBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (currentChar >> 16))))) { // Vietnamese accent, etc.
4801 decompLength = CFUniCharDecomposeCharacter(currentChar, mappedCharacters + mappedLength, MAX_DECOMP_BUF - mappedLength);
4802 mappedLength += decompLength;
4803 } else {
4804 mappedCharacters[mappedLength++] = currentChar;
4805 }
4806
4807 while (contents < limit) {
4808 if (CFUniCharIsSurrogateHighCharacter(*contents) && (contents + 1 < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) {
4809 currentChar = CFUniCharGetLongCharacterForSurrogatePair(*contents, *(contents + 1));
4810 } else {
4811 currentChar = *contents;
4812 }
4813 if (0 == CFUniCharGetCombiningPropertyForCharacter(currentChar, (const uint8_t *)((currentChar < 0x10000) ? combiningBMP : CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16))))) break;
4814 if (currentChar & 0xFFFF0000) {
4815 contents += 2;
4816 currentLength += 2;
4817 } else {
4818 ++contents;
4819 ++currentLength;
4820 }
4821 if (mappedLength == allocatedLength) {
4822 allocatedLength += MAX_DECOMP_BUF;
4823 if (mappedCharacters == buffer) {
4824 mappedCharacters = (UTF32Char *)CFAllocatorAllocate(kCFAllocatorSystemDefault, allocatedLength * sizeof(UTF32Char), 0);
4825 memmove(mappedCharacters, buffer, MAX_DECOMP_BUF * sizeof(UTF32Char));
4826 } else {
4827 mappedCharacters = (UTF32Char *)CFAllocatorReallocate(kCFAllocatorSystemDefault, mappedCharacters, allocatedLength * sizeof(UTF32Char), 0);
4828 }
4829 }
4830 if (CFUniCharIsMemberOfBitmap(currentChar, ((currentChar < 0x10000) ? decompBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (currentChar >> 16))))) { // Vietnamese accent, etc.
4831 decompLength = CFUniCharDecomposeCharacter(currentChar, mappedCharacters + mappedLength, MAX_DECOMP_BUF - mappedLength);
4832 mappedLength += decompLength;
4833 } else {
4834 mappedCharacters[mappedLength++] = currentChar;
4835 }
4836 }
4837 }
4838 if (needToReorder && mappedLength > 1) CFUniCharPrioritySort(mappedCharacters, mappedLength);
4839 }
4840
4841 if (theForm & kCFStringNormalizationFormKD) {
4842 CFIndex newLength = 0;
4843
4844 if (mappedLength == 0 && CFUniCharIsMemberOf(currentChar, kCFUniCharCompatibilityDecomposableCharacterSet)) {
4845 mappedCharacters[mappedLength++] = currentChar;
4846 }
4847 while (newLength < mappedLength) {
4848 newLength = CFUniCharCompatibilityDecompose(mappedCharacters, mappedLength, allocatedLength);
4849 if (newLength == 0) {
4850 allocatedLength += MAX_DECOMP_BUF;
4851 if (mappedCharacters == buffer) {
4852 mappedCharacters = (UTF32Char *)CFAllocatorAllocate(kCFAllocatorSystemDefault, allocatedLength * sizeof(UTF32Char), 0);
4853 memmove(mappedCharacters, buffer, MAX_DECOMP_BUF * sizeof(UTF32Char));
4854 } else {
4855 mappedCharacters = (UTF32Char *)CFAllocatorReallocate(kCFAllocatorSystemDefault, mappedCharacters, allocatedLength * sizeof(UTF32Char), 0);
4856 }
4857 }
4858 }
4859 mappedLength = newLength;
4860 }
4861
4862 if (theForm & kCFStringNormalizationFormC) {
4863 UTF32Char nextChar;
4864
4865 if (mappedLength > 1) {
4866 CFIndex consumedLength = 1;
4867 UTF32Char *currentBase = mappedCharacters;
4868 uint8_t currentClass, lastClass = 0;
4869 bool didCombine = false;
4870
4871 currentChar = *mappedCharacters;
4872
4873 while (consumedLength < mappedLength) {
4874 nextChar = mappedCharacters[consumedLength];
4875 currentClass = CFUniCharGetCombiningPropertyForCharacter(nextChar, (const uint8_t *)((nextChar < 0x10000) ? combiningBMP : CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (nextChar >> 16))));
4876
4877 if (theForm & kCFStringNormalizationFormKD) {
4878 if ((currentChar >= HANGUL_LBASE) && (currentChar < (HANGUL_LBASE + 0xFF))) {
4879 SInt8 lIndex = currentChar - HANGUL_LBASE;
4880
4881 if ((0 <= lIndex) && (lIndex <= HANGUL_LCOUNT)) {
4882 SInt16 vIndex = nextChar - HANGUL_VBASE;
4883
4884 if ((vIndex >= 0) && (vIndex <= HANGUL_VCOUNT)) {
4885 SInt16 tIndex = 0;
4886 CFIndex usedLength = mappedLength;
4887
4888 mappedCharacters[consumedLength++] = 0xFFFD;
4889
4890 if (consumedLength < mappedLength) {
4891 tIndex = mappedCharacters[consumedLength] - HANGUL_TBASE;
4892 if ((tIndex < 0) || (tIndex > HANGUL_TCOUNT)) {
4893 tIndex = 0;
4894 } else {
4895 mappedCharacters[consumedLength++] = 0xFFFD;
4896 }
4897 }
4898 *currentBase = (lIndex * HANGUL_VCOUNT + vIndex) * HANGUL_TCOUNT + tIndex + HANGUL_SBASE;
4899
4900 while (--usedLength > 0) {
4901 if (mappedCharacters[usedLength] == 0xFFFD) {
4902 --mappedLength;
4903 --consumedLength;
4904 memmove(mappedCharacters + usedLength, mappedCharacters + usedLength + 1, (mappedLength - usedLength) * sizeof(UTF32Char));
4905 }
4906 }
4907 currentBase = mappedCharacters + consumedLength;
4908 currentChar = *currentBase;
4909 ++consumedLength;
4910
4911 continue;
4912 }
4913 }
4914 }
4915 if (!CFUniCharIsMemberOfBitmap(nextChar, ((nextChar < 0x10000) ? nonBaseBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (nextChar >> 16))))) {
4916 *currentBase = currentChar;
4917 currentBase = mappedCharacters + consumedLength;
4918 currentChar = nextChar;
4919 ++consumedLength;
4920 continue;
4921 }
4922 }
4923
4924 if ((lastClass == 0) || (currentClass > lastClass)) {
4925 nextChar = CFUniCharPrecomposeCharacter(currentChar, nextChar);
4926 if (nextChar == 0xFFFD) {
4927 lastClass = currentClass;
4928 } else {
4929 mappedCharacters[consumedLength] = 0xFFFD;
4930 didCombine = true;
4931 currentChar = nextChar;
4932 }
4933 }
4934 ++consumedLength;
4935 }
4936
4937 *currentBase = currentChar;
4938 if (didCombine) {
4939 consumedLength = mappedLength;
4940 while (--consumedLength > 0) {
4941 if (mappedCharacters[consumedLength] == 0xFFFD) {
4942 --mappedLength;
4943 memmove(mappedCharacters + consumedLength, mappedCharacters + consumedLength + 1, (mappedLength - consumedLength) * sizeof(UTF32Char));
4944 }
4945 }
4946 }
4947 } else if ((currentChar >= HANGUL_LBASE) && (currentChar < (HANGUL_LBASE + 0xFF))) { // Hangul Jamo
4948 SInt8 lIndex = currentChar - HANGUL_LBASE;
4949
4950 if ((contents < limit) && (0 <= lIndex) && (lIndex <= HANGUL_LCOUNT)) {
4951 SInt16 vIndex = *contents - HANGUL_VBASE;
4952
4953 if ((vIndex >= 0) && (vIndex <= HANGUL_VCOUNT)) {
4954 SInt16 tIndex = 0;
4955
4956 ++contents; ++currentLength;
4957
4958 if (contents < limit) {
4959 tIndex = *contents - HANGUL_TBASE;
4960 if ((tIndex < 0) || (tIndex > HANGUL_TCOUNT)) {
4961 tIndex = 0;
4962 } else {
4963 ++contents; ++currentLength;
4964 }
4965 }
4966 *mappedCharacters = (lIndex * HANGUL_VCOUNT + vIndex) * HANGUL_TCOUNT + tIndex + HANGUL_SBASE;
4967 mappedLength = 1;
4968 }
4969 }
4970 } else { // collect class 0 non-base characters
4971 while (contents < limit) {
4972 nextChar = *contents;
4973 if (CFUniCharIsSurrogateHighCharacter(nextChar) && ((contents + 1) < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) {
4974 nextChar = CFUniCharGetLongCharacterForSurrogatePair(nextChar, *(contents + 1));
4975 if (!CFUniCharIsMemberOfBitmap(nextChar, (const uint8_t *)CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (nextChar >> 16))) || (0 != CFUniCharGetCombiningPropertyForCharacter(nextChar, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (nextChar >> 16))))) break;
4976 } else {
4977 if (!CFUniCharIsMemberOfBitmap(nextChar, nonBaseBMP) || (0 != CFUniCharGetCombiningPropertyForCharacter(nextChar, combiningBMP))) break;
4978 }
4979 currentChar = CFUniCharPrecomposeCharacter(currentChar, nextChar);
4980 if (0xFFFD == currentChar) break;
4981
4982 if (nextChar < 0x10000) {
4983 ++contents; ++currentLength;
4984 } else {
4985 contents += 2;
4986 currentLength += 2;
4987 }
4988
4989 *mappedCharacters = currentChar;
4990 mappedLength = 1;
4991 }
4992 }
4993 }
4994
4995 if (mappedLength > 0) {
4996 CFIndex utf16Length = __CFGetUTF16Length(mappedCharacters, mappedLength);
4997
4998 if (utf16Length != currentLength) {
4999 __CFStringChangeSize(string, CFRangeMake(currentIndex, currentLength), utf16Length, true);
5000 currentLength = utf16Length;
5001 }
5002 contents = (UTF16Char *)__CFStrContents(string);
5003 limit = contents + __CFStrLength(string);
5004 contents += currentIndex;
5005 __CFFillInUTF16(mappedCharacters, contents, mappedLength);
5006 contents += utf16Length;
5007 }
5008 currentIndex += currentLength;
5009 }
5010
5011 if (mappedCharacters != buffer) CFAllocatorDeallocate(kCFAllocatorSystemDefault, mappedCharacters);
5012 }
5013 }
5014
5015 void CFStringFold(CFMutableStringRef theString, CFStringCompareFlags theFlags, CFLocaleRef locale) {
5016 CFStringInlineBuffer stringBuffer;
5017 CFIndex length = CFStringGetLength(theString);
5018 CFIndex currentIndex = 0;
5019 CFIndex bufferLength = 0;
5020 UTF32Char buffer[kCFStringStackBufferLength];
5021 const uint8_t *cString;
5022 const uint8_t *langCode;
5023 CFStringEncoding eightBitEncoding;
5024 bool caseInsensitive = ((theFlags & kCFCompareCaseInsensitive) ? true : false);
5025 bool isObjc = CF_IS_OBJC(__kCFStringTypeID, theString);
5026 CFLocaleRef theLocale = locale;
5027
5028 if ((theFlags & kCFCompareLocalized) && (NULL == locale)) {
5029 theLocale = CFLocaleCopyCurrent();
5030 }
5031
5032 theFlags &= (kCFCompareCaseInsensitive|kCFCompareDiacriticInsensitive|kCFCompareWidthInsensitive);
5033
5034 if ((0 == theFlags) || (0 == length)) goto bail; // nothing to do
5035
5036 langCode = ((NULL == theLocale) ? NULL : (const uint8_t *)_CFStrGetLanguageIdentifierForLocale(theLocale));
5037
5038 eightBitEncoding = __CFStringGetEightBitStringEncoding();
5039 cString = (const uint8_t *)CFStringGetCStringPtr(theString, eightBitEncoding);
5040
5041 if ((NULL != cString) && !caseInsensitive && (kCFStringEncodingASCII == eightBitEncoding)) goto bail; // All ASCII
5042
5043 CFStringInitInlineBuffer(theString, &stringBuffer, CFRangeMake(0, length));
5044
5045 if ((NULL != cString) && (theFlags & (kCFCompareCaseInsensitive|kCFCompareDiacriticInsensitive))) {
5046 const uint8_t *cStringPtr = cString;
5047 const uint8_t *cStringLimit = cString + length;
5048 uint8_t *cStringContents = (isObjc ? NULL : (uint8_t *)__CFStrContents(theString) + __CFStrSkipAnyLengthByte(theString));
5049
5050 while (cStringPtr < cStringLimit) {
5051 if ((*cStringPtr < 0x80) && (NULL == langCode)) {
5052 if (caseInsensitive && (*cStringPtr >= 'A') && (*cStringPtr <= 'Z')) {
5053 if (NULL == cStringContents) {
5054 break;
5055 } else {
5056 cStringContents[cStringPtr - cString] += ('a' - 'A');
5057 }
5058 }
5059 } else {
5060 if ((bufferLength = __CFStringFoldCharacterClusterAtIndex((UTF32Char)__CFCharToUniCharTable[*cStringPtr], &stringBuffer, cStringPtr - cString, theFlags, langCode, buffer, kCFStringStackBufferLength, NULL)) > 0) {
5061 if ((*buffer > 0x7F) || (bufferLength > 1) || (NULL == cStringContents)) break;
5062 cStringContents[cStringPtr - cString] = *buffer;
5063 }
5064 }
5065 ++cStringPtr;
5066 }
5067
5068 currentIndex = cStringPtr - cString;
5069 }
5070
5071 if (currentIndex < length) {
5072 UTF16Char *contents;
5073
5074 if (isObjc) {
5075 CFMutableStringRef cfString;
5076 CFRange range = CFRangeMake(currentIndex, length - currentIndex);
5077
5078 contents = (UTF16Char *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(UTF16Char) * range.length, 0);
5079
5080 CFStringGetCharacters(theString, range, contents);
5081
5082 cfString = CFStringCreateMutableWithExternalCharactersNoCopy(kCFAllocatorSystemDefault, contents, range.length, range.length, NULL);
5083
5084 CFStringFold(cfString, theFlags, theLocale);
5085
5086 CFStringReplace(theString, range, cfString);
5087
5088 CFRelease(cfString);
5089 } else {
5090 const UTF32Char *characters;
5091 const UTF32Char *charactersLimit;
5092 UTF32Char character;
5093 CFIndex consumedLength;
5094
5095 contents = NULL;
5096
5097 if (bufferLength > 0) {
5098 __CFStringChangeSize(theString, CFRangeMake(currentIndex + 1, 0), bufferLength - 1, true);
5099 length = __CFStrLength(theString);
5100 CFStringInitInlineBuffer(theString, &stringBuffer, CFRangeMake(0, length));
5101
5102 contents = (UTF16Char *)__CFStrContents(theString) + currentIndex;
5103 characters = buffer;
5104 charactersLimit = characters + bufferLength;
5105 while (characters < charactersLimit) *(contents++) = (UTF16Char)*(characters++);
5106 ++currentIndex;
5107 }
5108
5109 while (currentIndex < length) {
5110 character = __CFStringGetCharacterFromInlineBufferQuick(&stringBuffer, currentIndex);
5111
5112 consumedLength = 0;
5113
5114 if ((NULL == langCode) && (character < 0x80) && (0 == (theFlags & kCFCompareDiacriticInsensitive))) {
5115 if (caseInsensitive && (character >= 'A') && (character <= 'Z')) {
5116 consumedLength = 1;
5117 bufferLength = 1;
5118 *buffer = character + ('a' - 'A');
5119 }
5120 } else {
5121 if (CFUniCharIsSurrogateHighCharacter(character) && ((currentIndex + 1) < length)) {
5122 UTF16Char lowSurrogate = __CFStringGetCharacterFromInlineBufferQuick(&stringBuffer, currentIndex + 1);
5123 if (CFUniCharIsSurrogateLowCharacter(lowSurrogate)) character = CFUniCharGetLongCharacterForSurrogatePair(character, lowSurrogate);
5124 }
5125
5126 bufferLength = __CFStringFoldCharacterClusterAtIndex(character, &stringBuffer, currentIndex, theFlags, langCode, buffer, kCFStringStackBufferLength, &consumedLength);
5127 }
5128
5129 if (consumedLength > 0) {
5130 CFIndex utf16Length = bufferLength;
5131
5132 characters = buffer;
5133 charactersLimit = characters + bufferLength;
5134
5135 while (characters < charactersLimit) if (*(characters++) > 0xFFFF) ++utf16Length; // Extend bufferLength to the UTF-16 length
5136
5137 if ((utf16Length != consumedLength) || __CFStrIsEightBit(theString)) {
5138 CFRange range;
5139 CFIndex insertLength;
5140
5141 if (consumedLength < utf16Length) { // Need to expand
5142 range = CFRangeMake(currentIndex + consumedLength, 0);
5143 insertLength = utf16Length - consumedLength;
5144 } else {
5145 range = CFRangeMake(currentIndex + utf16Length, consumedLength - utf16Length);
5146 insertLength = 0;
5147 }
5148 __CFStringChangeSize(theString, range, insertLength, true);
5149 length = __CFStrLength(theString);
5150 CFStringInitInlineBuffer(theString, &stringBuffer, CFRangeMake(0, length));
5151 }
5152
5153 (void)CFUniCharFromUTF32(buffer, bufferLength, (UTF16Char *)__CFStrContents(theString) + currentIndex, true, __CF_BIG_ENDIAN__);
5154
5155 currentIndex += utf16Length;
5156 } else {
5157 ++currentIndex;
5158 }
5159 }
5160 }
5161 }
5162
5163 bail:
5164 if (NULL == locale && theLocale) {
5165 CFRelease(theLocale);
5166 }
5167 }
5168
5169 enum {
5170 kCFStringFormatZeroFlag = (1 << 0), // if not, padding is space char
5171 kCFStringFormatMinusFlag = (1 << 1), // if not, no flag implied
5172 kCFStringFormatPlusFlag = (1 << 2), // if not, no flag implied, overrides space
5173 kCFStringFormatSpaceFlag = (1 << 3), // if not, no flag implied
5174 kCFStringFormatExternalSpecFlag = (1 << 4) // using config dict
5175 };
5176
5177 typedef struct {
5178 int16_t size;
5179 int16_t type;
5180 SInt32 loc;
5181 SInt32 len;
5182 SInt32 widthArg;
5183 SInt32 precArg;
5184 uint32_t flags;
5185 int8_t mainArgNum;
5186 int8_t precArgNum;
5187 int8_t widthArgNum;
5188 int8_t configDictIndex;
5189 } CFFormatSpec;
5190
5191 typedef struct {
5192 int16_t type;
5193 int16_t size;
5194 union {
5195 int64_t int64Value;
5196 double doubleValue;
5197 #if LONG_DOUBLE_SUPPORT
5198 long double longDoubleValue;
5199 #endif
5200 void *pointerValue;
5201 } value;
5202 } CFPrintValue;
5203
5204 enum {
5205 CFFormatDefaultSize = 0,
5206 CFFormatSize1 = 1,
5207 CFFormatSize2 = 2,
5208 CFFormatSize4 = 3,
5209 CFFormatSize8 = 4,
5210 CFFormatSize16 = 5,
5211 #if __LP64__
5212 CFFormatSizeLong = CFFormatSize8,
5213 CFFormatSizePointer = CFFormatSize8
5214 #else
5215 CFFormatSizeLong = CFFormatSize4,
5216 CFFormatSizePointer = CFFormatSize4
5217 #endif
5218 };
5219
5220
5221
5222 enum {
5223 CFFormatLiteralType = 32,
5224 CFFormatLongType = 33,
5225 CFFormatDoubleType = 34,
5226 CFFormatPointerType = 35,
5227 CFFormatObjectType = 36, /* handled specially */ /* ??? not used anymore, can be removed? */
5228 CFFormatCFType = 37, /* handled specially */
5229 CFFormatUnicharsType = 38, /* handled specially */
5230 CFFormatCharsType = 39, /* handled specially */
5231 CFFormatPascalCharsType = 40, /* handled specially */
5232 CFFormatSingleUnicharType = 41, /* handled specially */
5233 CFFormatDummyPointerType = 42 /* special case for %n */
5234 };
5235
5236 CF_INLINE void __CFParseFormatSpec(const UniChar *uformat, const uint8_t *cformat, SInt32 *fmtIdx, SInt32 fmtLen, CFFormatSpec *spec, CFStringRef *configKeyPointer) {
5237 Boolean seenDot = false;
5238 Boolean seenSharp = false;
5239 CFIndex keyIndex = kCFNotFound;
5240
5241 for (;;) {
5242 UniChar ch;
5243 if (fmtLen <= *fmtIdx) return; /* no type */
5244 if (cformat) ch = (UniChar)cformat[(*fmtIdx)++]; else ch = uformat[(*fmtIdx)++];
5245
5246 if (keyIndex >= 0) {
5247 if ((ch < '0') || ((ch > '9') && (ch < 'A')) || ((ch > 'Z') && (ch < 'a') && (ch != '_')) || (ch > 'z')) {
5248 if (ch == '@') { // found the key
5249 CFIndex length = (*fmtIdx) - 1 - keyIndex;
5250
5251 spec->flags |= kCFStringFormatExternalSpecFlag;
5252 spec->type = CFFormatCFType;
5253 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5254
5255 if ((NULL != configKeyPointer) && (length > 0)) {
5256 if (cformat) {
5257 *configKeyPointer = CFStringCreateWithBytes(NULL, cformat + keyIndex, length, __CFStringGetEightBitStringEncoding(), FALSE);
5258 } else {
5259 *configKeyPointer = CFStringCreateWithCharactersNoCopy(NULL, uformat + keyIndex, length, kCFAllocatorNull);
5260 }
5261 }
5262 return;
5263 }
5264 keyIndex = kCFNotFound;
5265 }
5266 continue;
5267 }
5268
5269 reswtch:switch (ch) {
5270 case '#': // ignored for now
5271 seenSharp = true;
5272 break;
5273 case 0x20:
5274 if (!(spec->flags & kCFStringFormatPlusFlag)) spec->flags |= kCFStringFormatSpaceFlag;
5275 break;
5276 case '-':
5277 spec->flags |= kCFStringFormatMinusFlag;
5278 spec->flags &= ~kCFStringFormatZeroFlag; // remove zero flag
5279 break;
5280 case '+':
5281 spec->flags |= kCFStringFormatPlusFlag;
5282 spec->flags &= ~kCFStringFormatSpaceFlag; // remove space flag
5283 break;
5284 case '0':
5285 if (!(spec->flags & kCFStringFormatMinusFlag)) spec->flags |= kCFStringFormatZeroFlag;
5286 break;
5287 case 'h':
5288 if (*fmtIdx < fmtLen) {
5289 // fetch next character, don't increment fmtIdx
5290 if (cformat) ch = (UniChar)cformat[(*fmtIdx)]; else ch = uformat[(*fmtIdx)];
5291 if ('h' == ch) { // 'hh' for char, like 'c'
5292 (*fmtIdx)++;
5293 spec->size = CFFormatSize1;
5294 break;
5295 }
5296 }
5297 spec->size = CFFormatSize2;
5298 break;
5299 case 'l':
5300 if (*fmtIdx < fmtLen) {
5301 // fetch next character, don't increment fmtIdx
5302 if (cformat) ch = (UniChar)cformat[(*fmtIdx)]; else ch = uformat[(*fmtIdx)];
5303 if ('l' == ch) { // 'll' for long long, like 'q'
5304 (*fmtIdx)++;
5305 spec->size = CFFormatSize8;
5306 break;
5307 }
5308 }
5309 spec->size = CFFormatSizeLong; // 4 or 8 depending on LP64
5310 break;
5311 #if LONG_DOUBLE_SUPPORT
5312 case 'L':
5313 spec->size = CFFormatSize16;
5314 break;
5315 #endif
5316 case 'q':
5317 spec->size = CFFormatSize8;
5318 break;
5319 case 't': case 'z':
5320 spec->size = CFFormatSizeLong; // 4 or 8 depending on LP64
5321 break;
5322 case 'j':
5323 spec->size = CFFormatSize8;
5324 break;
5325 case 'c':
5326 spec->type = CFFormatLongType;
5327 spec->size = CFFormatSize1;
5328 return;
5329 case 'O': case 'o': case 'D': case 'd': case 'i': case 'U': case 'u': case 'x': case 'X':
5330 spec->type = CFFormatLongType;
5331 // Seems like if spec->size == 0, we should spec->size = CFFormatSize4. However, 0 is handled correctly.
5332 return;
5333 case 'a': case 'A': case 'e': case 'E': case 'f': case 'F': case 'g': case 'G':
5334 spec->type = CFFormatDoubleType;
5335 if (spec->size != CFFormatSize16) spec->size = CFFormatSize8;
5336 return;
5337 case 'n': /* %n is not handled correctly; for Leopard or newer apps, we disable it further */
5338 spec->type = _CFExecutableLinkedOnOrAfter(CFSystemVersionLeopard) ? CFFormatDummyPointerType : CFFormatPointerType;
5339 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5340 return;
5341 case 'p':
5342 spec->type = CFFormatPointerType;
5343 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5344 return;
5345 case 's':
5346 spec->type = CFFormatCharsType;
5347 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5348 return;
5349 case 'S':
5350 spec->type = CFFormatUnicharsType;
5351 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5352 return;
5353 case 'C':
5354 spec->type = CFFormatSingleUnicharType;
5355 spec->size = CFFormatSize2;
5356 return;
5357 case 'P':
5358 spec->type = CFFormatPascalCharsType;
5359 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5360 return;
5361 case '@':
5362 if (seenSharp) {
5363 seenSharp = false;
5364 keyIndex = *fmtIdx;
5365 break;
5366 } else {
5367 spec->type = CFFormatCFType;
5368 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5369 return;
5370 }
5371 case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': {
5372 int64_t number = 0;
5373 do {
5374 number = 10 * number + (ch - '0');
5375 if (cformat) ch = (UniChar)cformat[(*fmtIdx)++]; else ch = uformat[(*fmtIdx)++];
5376 } while ((UInt32)(ch - '0') <= 9);
5377 if ('$' == ch) {
5378 if (-2 == spec->precArgNum) {
5379 spec->precArgNum = (int8_t)number - 1; // Arg numbers start from 1
5380 } else if (-2 == spec->widthArgNum) {
5381 spec->widthArgNum = (int8_t)number - 1; // Arg numbers start from 1
5382 } else {
5383 spec->mainArgNum = (int8_t)number - 1; // Arg numbers start from 1
5384 }
5385 break;
5386 } else if (seenDot) { /* else it's either precision or width */
5387 spec->precArg = (SInt32)number;
5388 } else {
5389 spec->widthArg = (SInt32)number;
5390 }
5391 goto reswtch;
5392 }
5393 case '*':
5394 spec->widthArgNum = -2;
5395 break;
5396 case '.':
5397 seenDot = true;
5398 if (cformat) ch = (UniChar)cformat[(*fmtIdx)++]; else ch = uformat[(*fmtIdx)++];
5399 if ('*' == ch) {
5400 spec->precArgNum = -2;
5401 break;
5402 }
5403 goto reswtch;
5404 default:
5405 spec->type = CFFormatLiteralType;
5406 return;
5407 }
5408 }
5409 }
5410
5411 /* ??? It ignores the formatOptions argument.
5412 ??? %s depends on handling of encodings by __CFStringAppendBytes
5413 */
5414 void CFStringAppendFormatAndArguments(CFMutableStringRef outputString, CFDictionaryRef formatOptions, CFStringRef formatString, va_list args) {
5415 __CFStringAppendFormatCore(outputString, NULL, formatOptions, formatString, 0, NULL, 0, args);
5416 }
5417
5418 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
5419 #define SNPRINTF(TYPE, WHAT) { \
5420 TYPE value = (TYPE) WHAT; \
5421 if (-1 != specs[curSpec].widthArgNum) { \
5422 if (-1 != specs[curSpec].precArgNum) { \
5423 snprintf_l(buffer, 255, NULL, formatBuffer, width, precision, value); \
5424 } else { \
5425 snprintf_l(buffer, 255, NULL, formatBuffer, width, value); \
5426 } \
5427 } else { \
5428 if (-1 != specs[curSpec].precArgNum) { \
5429 snprintf_l(buffer, 255, NULL, formatBuffer, precision, value); \
5430 } else { \
5431 snprintf_l(buffer, 255, NULL, formatBuffer, value); \
5432 } \
5433 }}
5434 #else
5435 #define SNPRINTF(TYPE, WHAT) { \
5436 TYPE value = (TYPE) WHAT; \
5437 if (-1 != specs[curSpec].widthArgNum) { \
5438 if (-1 != specs[curSpec].precArgNum) { \
5439 sprintf(buffer, formatBuffer, width, precision, value); \
5440 } else { \
5441 sprintf(buffer, formatBuffer, width, value); \
5442 } \
5443 } else { \
5444 if (-1 != specs[curSpec].precArgNum) { \
5445 sprintf(buffer, formatBuffer, precision, value); \
5446 } else { \
5447 sprintf(buffer, formatBuffer, value); \
5448 } \
5449 }}
5450 #endif
5451
5452 void _CFStringAppendFormatAndArgumentsAux(CFMutableStringRef outputString, CFStringRef (*copyDescFunc)(void *, const void *), CFDictionaryRef formatOptions, CFStringRef formatString, va_list args) { __CFStringAppendFormatCore(outputString, copyDescFunc, formatOptions, formatString, 0, NULL, 0, args); }
5453
5454 static void __CFStringAppendFormatCore(CFMutableStringRef outputString, CFStringRef (*copyDescFunc)(void *, const void *), CFDictionaryRef formatOptions, CFStringRef formatString, CFIndex initialArgPosition, const void *origValues, CFIndex originalValuesSize, va_list args) {
5455 SInt32 numSpecs, sizeSpecs, sizeArgNum, formatIdx, curSpec, argNum;
5456 CFIndex formatLen;
5457 #define FORMAT_BUFFER_LEN 400
5458 const uint8_t *cformat = NULL;
5459 const UniChar *uformat = NULL;
5460 UniChar *formatChars = NULL;
5461 UniChar localFormatBuffer[FORMAT_BUFFER_LEN];
5462
5463 #define VPRINTF_BUFFER_LEN 61
5464 CFFormatSpec localSpecsBuffer[VPRINTF_BUFFER_LEN];
5465 CFFormatSpec *specs;
5466 CFPrintValue localValuesBuffer[VPRINTF_BUFFER_LEN];
5467 CFPrintValue *values;
5468 const CFPrintValue *originalValues = (const CFPrintValue *)origValues;
5469 CFDictionaryRef localConfigs[VPRINTF_BUFFER_LEN];
5470 CFDictionaryRef *configs;
5471 CFIndex numConfigs;
5472 CFAllocatorRef tmpAlloc = NULL;
5473 intmax_t dummyLocation; // A place for %n to do its thing in; should be the widest possible int value
5474 va_list copiedArgs;
5475
5476 numSpecs = 0;
5477 sizeSpecs = 0;
5478 sizeArgNum = 0;
5479 numConfigs = 0;
5480 specs = NULL;
5481 values = NULL;
5482 configs = NULL;
5483
5484
5485 formatLen = CFStringGetLength(formatString);
5486 if (!CF_IS_OBJC(__kCFStringTypeID, formatString)) {
5487 __CFAssertIsString(formatString);
5488 if (!__CFStrIsUnicode(formatString)) {
5489 cformat = (const uint8_t *)__CFStrContents(formatString);
5490 if (cformat) cformat += __CFStrSkipAnyLengthByte(formatString);
5491 } else {
5492 uformat = (const UniChar *)__CFStrContents(formatString);
5493 }
5494 }
5495 if (!cformat && !uformat) {
5496 formatChars = (formatLen > FORMAT_BUFFER_LEN) ? (UniChar *)CFAllocatorAllocate(tmpAlloc = __CFGetDefaultAllocator(), formatLen * sizeof(UniChar), 0) : localFormatBuffer;
5497 if (formatChars != localFormatBuffer && __CFOASafe) __CFSetLastAllocationEventName(formatChars, "CFString (temp)");
5498 CFStringGetCharacters(formatString, CFRangeMake(0, formatLen), formatChars);
5499 uformat = formatChars;
5500 }
5501
5502 /* Compute an upper bound for the number of format specifications */
5503 if (cformat) {
5504 for (formatIdx = 0; formatIdx < formatLen; formatIdx++) if ('%' == cformat[formatIdx]) sizeSpecs++;
5505 } else {
5506 for (formatIdx = 0; formatIdx < formatLen; formatIdx++) if ('%' == uformat[formatIdx]) sizeSpecs++;
5507 }
5508 tmpAlloc = __CFGetDefaultAllocator();
5509 specs = ((2 * sizeSpecs + 1) > VPRINTF_BUFFER_LEN) ? (CFFormatSpec *)CFAllocatorAllocate(tmpAlloc, (2 * sizeSpecs + 1) * sizeof(CFFormatSpec), 0) : localSpecsBuffer;
5510 if (specs != localSpecsBuffer && __CFOASafe) __CFSetLastAllocationEventName(specs, "CFString (temp)");
5511
5512 configs = ((sizeSpecs < VPRINTF_BUFFER_LEN) ? localConfigs : (CFDictionaryRef *)CFAllocatorAllocate(tmpAlloc, sizeof(CFStringRef) * sizeSpecs, 0));
5513
5514 /* Collect format specification information from the format string */
5515 for (curSpec = 0, formatIdx = 0; formatIdx < formatLen; curSpec++) {
5516 SInt32 newFmtIdx;
5517 specs[curSpec].loc = formatIdx;
5518 specs[curSpec].len = 0;
5519 specs[curSpec].size = 0;
5520 specs[curSpec].type = 0;
5521 specs[curSpec].flags = 0;
5522 specs[curSpec].widthArg = -1;
5523 specs[curSpec].precArg = -1;
5524 specs[curSpec].mainArgNum = -1;
5525 specs[curSpec].precArgNum = -1;
5526 specs[curSpec].widthArgNum = -1;
5527 specs[curSpec].configDictIndex = -1;
5528 if (cformat) {
5529 for (newFmtIdx = formatIdx; newFmtIdx < formatLen && '%' != cformat[newFmtIdx]; newFmtIdx++);
5530 } else {
5531 for (newFmtIdx = formatIdx; newFmtIdx < formatLen && '%' != uformat[newFmtIdx]; newFmtIdx++);
5532 }
5533 if (newFmtIdx != formatIdx) { /* Literal chunk */
5534 specs[curSpec].type = CFFormatLiteralType;
5535 specs[curSpec].len = newFmtIdx - formatIdx;
5536 } else {
5537 CFStringRef configKey = NULL;
5538 newFmtIdx++; /* Skip % */
5539 __CFParseFormatSpec(uformat, cformat, &newFmtIdx, formatLen, &(specs[curSpec]), &configKey);
5540 if (CFFormatLiteralType == specs[curSpec].type) {
5541 specs[curSpec].loc = formatIdx + 1;
5542 specs[curSpec].len = 1;
5543 } else {
5544 specs[curSpec].len = newFmtIdx - formatIdx;
5545 }
5546 }
5547 formatIdx = newFmtIdx;
5548
5549 // fprintf(stderr, "specs[%d] = {\n size = %d,\n type = %d,\n loc = %d,\n len = %d,\n mainArgNum = %d,\n precArgNum = %d,\n widthArgNum = %d\n}\n", curSpec, specs[curSpec].size, specs[curSpec].type, specs[curSpec].loc, specs[curSpec].len, specs[curSpec].mainArgNum, specs[curSpec].precArgNum, specs[curSpec].widthArgNum);
5550
5551 }
5552 numSpecs = curSpec;
5553
5554 // Max of three args per spec, reasoning thus: 1 width, 1 prec, 1 value
5555 sizeArgNum = ((NULL == originalValues) ? (3 * sizeSpecs + 1) : originalValuesSize);
5556
5557 values = (sizeArgNum > VPRINTF_BUFFER_LEN) ? (CFPrintValue *)CFAllocatorAllocate(tmpAlloc, sizeArgNum * sizeof(CFPrintValue), 0) : localValuesBuffer;
5558 if (values != localValuesBuffer && __CFOASafe) __CFSetLastAllocationEventName(values, "CFString (temp)");
5559 memset(values, 0, sizeArgNum * sizeof(CFPrintValue));
5560
5561 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
5562 // va_copy is a C99 extension. No support on Windows
5563 if (numConfigs > 0) va_copy(copiedArgs, args); // we need to preserve the original state for passing down
5564 #endif /* DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD */
5565
5566 /* Compute values array */
5567 argNum = initialArgPosition;
5568 for (curSpec = 0; curSpec < numSpecs; curSpec++) {
5569 SInt32 newMaxArgNum;
5570 if (0 == specs[curSpec].type) continue;
5571 if (CFFormatLiteralType == specs[curSpec].type) continue;
5572 newMaxArgNum = sizeArgNum;
5573 if (newMaxArgNum < specs[curSpec].mainArgNum) {
5574 newMaxArgNum = specs[curSpec].mainArgNum;
5575 }
5576 if (newMaxArgNum < specs[curSpec].precArgNum) {
5577 newMaxArgNum = specs[curSpec].precArgNum;
5578 }
5579 if (newMaxArgNum < specs[curSpec].widthArgNum) {
5580 newMaxArgNum = specs[curSpec].widthArgNum;
5581 }
5582 if (sizeArgNum < newMaxArgNum) {
5583 if (specs != localSpecsBuffer) CFAllocatorDeallocate(tmpAlloc, specs);
5584 if (values != localValuesBuffer) CFAllocatorDeallocate(tmpAlloc, values);
5585 if (formatChars && (formatChars != localFormatBuffer)) CFAllocatorDeallocate(tmpAlloc, formatChars);
5586 return; // more args than we expected!
5587 }
5588 /* It is actually incorrect to reorder some specs and not all; we just do some random garbage here */
5589 if (-2 == specs[curSpec].widthArgNum) {
5590 specs[curSpec].widthArgNum = argNum++;
5591 }
5592 if (-2 == specs[curSpec].precArgNum) {
5593 specs[curSpec].precArgNum = argNum++;
5594 }
5595 if (-1 == specs[curSpec].mainArgNum) {
5596 specs[curSpec].mainArgNum = argNum++;
5597 }
5598
5599 values[specs[curSpec].mainArgNum].size = specs[curSpec].size;
5600 values[specs[curSpec].mainArgNum].type = specs[curSpec].type;
5601
5602
5603 if (-1 != specs[curSpec].widthArgNum) {
5604 values[specs[curSpec].widthArgNum].size = 0;
5605 values[specs[curSpec].widthArgNum].type = CFFormatLongType;
5606 }
5607 if (-1 != specs[curSpec].precArgNum) {
5608 values[specs[curSpec].precArgNum].size = 0;
5609 values[specs[curSpec].precArgNum].type = CFFormatLongType;
5610 }
5611 }
5612
5613 /* Collect the arguments in correct type from vararg list */
5614 for (argNum = 0; argNum < sizeArgNum; argNum++) {
5615 if ((NULL != originalValues) && (0 == values[argNum].type)) values[argNum] = originalValues[argNum];
5616 switch (values[argNum].type) {
5617 case 0:
5618 case CFFormatLiteralType:
5619 break;
5620 case CFFormatLongType:
5621 case CFFormatSingleUnicharType:
5622 if (CFFormatSize1 == values[argNum].size) {
5623 values[argNum].value.int64Value = (int64_t)(int8_t)va_arg(args, int);
5624 } else if (CFFormatSize2 == values[argNum].size) {
5625 values[argNum].value.int64Value = (int64_t)(int16_t)va_arg(args, int);
5626 } else if (CFFormatSize4 == values[argNum].size) {
5627 values[argNum].value.int64Value = (int64_t)va_arg(args, int32_t);
5628 } else if (CFFormatSize8 == values[argNum].size) {
5629 values[argNum].value.int64Value = (int64_t)va_arg(args, int64_t);
5630 } else {
5631 values[argNum].value.int64Value = (int64_t)va_arg(args, int);
5632 }
5633 break;
5634 case CFFormatDoubleType:
5635 #if LONG_DOUBLE_SUPPORT
5636 if (CFFormatSize16 == values[argNum].size) {
5637 values[argNum].value.longDoubleValue = va_arg(args, long double);
5638 } else
5639 #endif
5640 {
5641 values[argNum].value.doubleValue = va_arg(args, double);
5642 }
5643 break;
5644 case CFFormatPointerType:
5645 case CFFormatObjectType:
5646 case CFFormatCFType:
5647 case CFFormatUnicharsType:
5648 case CFFormatCharsType:
5649 case CFFormatPascalCharsType:
5650 values[argNum].value.pointerValue = va_arg(args, void *);
5651 break;
5652 case CFFormatDummyPointerType:
5653 (void)va_arg(args, void *); // Skip the provided argument
5654 values[argNum].value.pointerValue = &dummyLocation;
5655 break;
5656 }
5657 }
5658 va_end(args);
5659
5660 /* Format the pieces together */
5661
5662 if (NULL == originalValues) {
5663 originalValues = values;
5664 originalValuesSize = sizeArgNum;
5665 }
5666
5667 for (curSpec = 0; curSpec < numSpecs; curSpec++) {
5668 SInt32 width = 0, precision = 0;
5669 UniChar *up, ch;
5670 Boolean hasWidth = false, hasPrecision = false;
5671
5672 // widthArgNum and widthArg are never set at the same time; same for precArg*
5673 if (-1 != specs[curSpec].widthArgNum) {
5674 width = (SInt32)values[specs[curSpec].widthArgNum].value.int64Value;
5675 hasWidth = true;
5676 }
5677 if (-1 != specs[curSpec].precArgNum) {
5678 precision = (SInt32)values[specs[curSpec].precArgNum].value.int64Value;
5679 hasPrecision = true;
5680 }
5681 if (-1 != specs[curSpec].widthArg) {
5682 width = specs[curSpec].widthArg;
5683 hasWidth = true;
5684 }
5685 if (-1 != specs[curSpec].precArg) {
5686 precision = specs[curSpec].precArg;
5687 hasPrecision = true;
5688 }
5689
5690 switch (specs[curSpec].type) {
5691 case CFFormatLongType:
5692 case CFFormatDoubleType:
5693 case CFFormatPointerType: {
5694 char formatBuffer[128];
5695 #if defined(__GNUC__)
5696 char buffer[256 + width + precision];
5697 #else
5698 char stackBuffer[512];
5699 char *dynamicBuffer = NULL;
5700 char *buffer = stackBuffer;
5701 if (256+width+precision > 512) {
5702 dynamicBuffer = (char *)CFAllocatorAllocate(kCFAllocatorSystemDefault, 256+width+precision, 0);
5703 buffer = dynamicBuffer;
5704 }
5705 #endif
5706 SInt32 cidx, idx, loc;
5707 Boolean appended = false;
5708 loc = specs[curSpec].loc;
5709 // In preparation to call snprintf(), copy the format string out
5710 if (cformat) {
5711 for (idx = 0, cidx = 0; cidx < specs[curSpec].len; idx++, cidx++) {
5712 if ('$' == cformat[loc + cidx]) {
5713 for (idx--; '0' <= formatBuffer[idx] && formatBuffer[idx] <= '9'; idx--);
5714 } else {
5715 formatBuffer[idx] = cformat[loc + cidx];
5716 }
5717 }
5718 } else {
5719 for (idx = 0, cidx = 0; cidx < specs[curSpec].len; idx++, cidx++) {
5720 if ('$' == uformat[loc + cidx]) {
5721 for (idx--; '0' <= formatBuffer[idx] && formatBuffer[idx] <= '9'; idx--);
5722 } else {
5723 formatBuffer[idx] = (int8_t)uformat[loc + cidx];
5724 }
5725 }
5726 }
5727 formatBuffer[idx] = '\0';
5728 // Should modify format buffer here if necessary; for example, to translate %qd to
5729 // the equivalent, on architectures which do not have %q.
5730 buffer[sizeof(buffer) - 1] = '\0';
5731 switch (specs[curSpec].type) {
5732 case CFFormatLongType:
5733 if (CFFormatSize8 == specs[curSpec].size) {
5734 SNPRINTF(int64_t, values[specs[curSpec].mainArgNum].value.int64Value)
5735 } else {
5736 SNPRINTF(SInt32, values[specs[curSpec].mainArgNum].value.int64Value)
5737 }
5738 break;
5739 case CFFormatPointerType:
5740 case CFFormatDummyPointerType:
5741 SNPRINTF(void *, values[specs[curSpec].mainArgNum].value.pointerValue)
5742 break;
5743
5744 case CFFormatDoubleType:
5745 #if LONG_DOUBLE_SUPPORT
5746 if (CFFormatSize16 == specs[curSpec].size) {
5747 SNPRINTF(long double, values[specs[curSpec].mainArgNum].value.longDoubleValue)
5748 } else
5749 #endif
5750 {
5751 SNPRINTF(double, values[specs[curSpec].mainArgNum].value.doubleValue)
5752 }
5753 // See if we need to localize the decimal point
5754 if (formatOptions) { // We have localization info
5755 CFStringRef decimalSeparator = (CFGetTypeID(formatOptions) == CFLocaleGetTypeID()) ? (CFStringRef)CFLocaleGetValue((CFLocaleRef)formatOptions, kCFLocaleDecimalSeparatorKey) : (CFStringRef)CFDictionaryGetValue(formatOptions, CFSTR("NSDecimalSeparator"));
5756 if (decimalSeparator != NULL) { // We have a decimal separator in there
5757 CFIndex decimalPointLoc = 0;
5758 while (buffer[decimalPointLoc] != 0 && buffer[decimalPointLoc] != '.') decimalPointLoc++;
5759 if (buffer[decimalPointLoc] == '.') { // And we have a decimal point in the formatted string
5760 buffer[decimalPointLoc] = 0;
5761 CFStringAppendCString(outputString, (const char *)buffer, __CFStringGetEightBitStringEncoding());
5762 CFStringAppend(outputString, decimalSeparator);
5763 CFStringAppendCString(outputString, (const char *)(buffer + decimalPointLoc + 1), __CFStringGetEightBitStringEncoding());
5764 appended = true;
5765 }
5766 }
5767 }
5768 break;
5769 }
5770 if (!appended) CFStringAppendCString(outputString, (const char *)buffer, __CFStringGetEightBitStringEncoding());
5771 #if !defined(__GNUC__)
5772 if (dynamicBuffer) {
5773 CFAllocatorDeallocate(kCFAllocatorSystemDefault, dynamicBuffer);
5774 }
5775 #endif
5776 }
5777 break;
5778 case CFFormatLiteralType:
5779 if (cformat) {
5780 __CFStringAppendBytes(outputString, (const char *)(cformat+specs[curSpec].loc), specs[curSpec].len, __CFStringGetEightBitStringEncoding());
5781 } else {
5782 CFStringAppendCharacters(outputString, uformat+specs[curSpec].loc, specs[curSpec].len);
5783 }
5784 break;
5785 case CFFormatPascalCharsType:
5786 case CFFormatCharsType:
5787 if (values[specs[curSpec].mainArgNum].value.pointerValue == NULL) {
5788 CFStringAppendCString(outputString, "(null)", kCFStringEncodingASCII);
5789 } else {
5790 int len;
5791 const char *str = (const char *)values[specs[curSpec].mainArgNum].value.pointerValue;
5792 if (specs[curSpec].type == CFFormatPascalCharsType) { // Pascal string case
5793 len = ((unsigned char *)str)[0];
5794 str++;
5795 if (hasPrecision && precision < len) len = precision;
5796 } else { // C-string case
5797 if (!hasPrecision) { // No precision, so rely on the terminating null character
5798 len = strlen(str);
5799 } else { // Don't blindly call strlen() if there is a precision; the string might not have a terminating null (3131988)
5800 const char *terminatingNull = (const char *)memchr(str, 0, precision); // Basically strlen() on only the first precision characters of str
5801 if (terminatingNull) { // There was a null in the first precision characters
5802 len = terminatingNull - str;
5803 } else {
5804 len = precision;
5805 }
5806 }
5807 }
5808 // Since the spec says the behavior of the ' ', '0', '#', and '+' flags is undefined for
5809 // '%s', and since we have ignored them in the past, the behavior is hereby cast in stone
5810 // to ignore those flags (and, say, never pad with '0' instead of space).
5811 if (specs[curSpec].flags & kCFStringFormatMinusFlag) {
5812 __CFStringAppendBytes(outputString, str, len, __CFStringGetSystemEncoding());
5813 if (hasWidth && width > len) {
5814 int w = width - len; // We need this many spaces; do it ten at a time
5815 do {__CFStringAppendBytes(outputString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
5816 }
5817 } else {
5818 if (hasWidth && width > len) {
5819 int w = width - len; // We need this many spaces; do it ten at a time
5820 do {__CFStringAppendBytes(outputString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
5821 }
5822 __CFStringAppendBytes(outputString, str, len, __CFStringGetSystemEncoding());
5823 }
5824 }
5825 break;
5826 case CFFormatSingleUnicharType:
5827 ch = (UniChar)values[specs[curSpec].mainArgNum].value.int64Value;
5828 CFStringAppendCharacters(outputString, &ch, 1);
5829 break;
5830 case CFFormatUnicharsType:
5831 //??? need to handle width, precision, and padding arguments
5832 up = (UniChar *)values[specs[curSpec].mainArgNum].value.pointerValue;
5833 if (NULL == up) {
5834 CFStringAppendCString(outputString, "(null)", kCFStringEncodingASCII);
5835 } else {
5836 int len;
5837 for (len = 0; 0 != up[len]; len++);
5838 // Since the spec says the behavior of the ' ', '0', '#', and '+' flags is undefined for
5839 // '%s', and since we have ignored them in the past, the behavior is hereby cast in stone
5840 // to ignore those flags (and, say, never pad with '0' instead of space).
5841 if (hasPrecision && precision < len) len = precision;
5842 if (specs[curSpec].flags & kCFStringFormatMinusFlag) {
5843 CFStringAppendCharacters(outputString, up, len);
5844 if (hasWidth && width > len) {
5845 int w = width - len; // We need this many spaces; do it ten at a time
5846 do {__CFStringAppendBytes(outputString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
5847 }
5848 } else {
5849 if (hasWidth && width > len) {
5850 int w = width - len; // We need this many spaces; do it ten at a time
5851 do {__CFStringAppendBytes(outputString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
5852 }
5853 CFStringAppendCharacters(outputString, up, len);
5854 }
5855 }
5856 break;
5857 case CFFormatCFType:
5858 case CFFormatObjectType:
5859 if (specs[curSpec].configDictIndex != -1) { // config dict
5860 CFTypeRef object = NULL;
5861 CFStringRef innerFormat = NULL;
5862
5863 switch (values[specs[curSpec].mainArgNum].type) {
5864 case CFFormatLongType:
5865 object = CFNumberCreate(tmpAlloc, kCFNumberSInt64Type, &(values[specs[curSpec].mainArgNum].value.int64Value));
5866 break;
5867
5868 case CFFormatDoubleType:
5869 #if LONG_DOUBLE_SUPPORT
5870 if (CFFormatSize16 == values[specs[curSpec].mainArgNum].size) {
5871 double aValue = values[specs[curSpec].mainArgNum].value.longDoubleValue; // losing precision
5872
5873 object = CFNumberCreate(tmpAlloc, kCFNumberDoubleType, &aValue);
5874 } else
5875 #endif
5876 {
5877 object = CFNumberCreate(tmpAlloc, kCFNumberDoubleType, &(values[specs[curSpec].mainArgNum].value.doubleValue));
5878 }
5879 break;
5880
5881 case CFFormatPointerType:
5882 object = CFNumberCreate(tmpAlloc, kCFNumberCFIndexType, &(values[specs[curSpec].mainArgNum].value.pointerValue));
5883 break;
5884
5885 case CFFormatPascalCharsType:
5886 case CFFormatCharsType:
5887 if (NULL != values[specs[curSpec].mainArgNum].value.pointerValue) {
5888 CFMutableStringRef aString = CFStringCreateMutable(tmpAlloc, 0);
5889 int len;
5890 const char *str = (const char *)values[specs[curSpec].mainArgNum].value.pointerValue;
5891 if (specs[curSpec].type == CFFormatPascalCharsType) { // Pascal string case
5892 len = ((unsigned char *)str)[0];
5893 str++;
5894 if (hasPrecision && precision < len) len = precision;
5895 } else { // C-string case
5896 if (!hasPrecision) { // No precision, so rely on the terminating null character
5897 len = strlen(str);
5898 } else { // Don't blindly call strlen() if there is a precision; the string might not have a terminating null (3131988)
5899 const char *terminatingNull = (const char *)memchr(str, 0, precision); // Basically strlen() on only the first precision characters of str
5900 if (terminatingNull) { // There was a null in the first precision characters
5901 len = terminatingNull - str;
5902 } else {
5903 len = precision;
5904 }
5905 }
5906 }
5907 // Since the spec says the behavior of the ' ', '0', '#', and '+' flags is undefined for
5908 // '%s', and since we have ignored them in the past, the behavior is hereby cast in stone
5909 // to ignore those flags (and, say, never pad with '0' instead of space).
5910 if (specs[curSpec].flags & kCFStringFormatMinusFlag) {
5911 __CFStringAppendBytes(aString, str, len, __CFStringGetSystemEncoding());
5912 if (hasWidth && width > len) {
5913 int w = width - len; // We need this many spaces; do it ten at a time
5914 do {__CFStringAppendBytes(aString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
5915 }
5916 } else {
5917 if (hasWidth && width > len) {
5918 int w = width - len; // We need this many spaces; do it ten at a time
5919 do {__CFStringAppendBytes(aString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
5920 }
5921 __CFStringAppendBytes(aString, str, len, __CFStringGetSystemEncoding());
5922 }
5923
5924 object = aString;
5925 }
5926 break;
5927
5928 case CFFormatSingleUnicharType:
5929 ch = (UniChar)values[specs[curSpec].mainArgNum].value.int64Value;
5930 object = CFStringCreateWithCharactersNoCopy(tmpAlloc, &ch, 1, kCFAllocatorNull);
5931 break;
5932
5933 case CFFormatUnicharsType:
5934 //??? need to handle width, precision, and padding arguments
5935 up = (UniChar *)values[specs[curSpec].mainArgNum].value.pointerValue;
5936 if (NULL != up) {
5937 CFMutableStringRef aString = CFStringCreateMutable(tmpAlloc, 0);
5938 int len;
5939 for (len = 0; 0 != up[len]; len++);
5940 // Since the spec says the behavior of the ' ', '0', '#', and '+' flags is undefined for
5941 // '%s', and since we have ignored them in the past, the behavior is hereby cast in stone
5942 // to ignore those flags (and, say, never pad with '0' instead of space).
5943 if (hasPrecision && precision < len) len = precision;
5944 if (specs[curSpec].flags & kCFStringFormatMinusFlag) {
5945 CFStringAppendCharacters(aString, up, len);
5946 if (hasWidth && width > len) {
5947 int w = width - len; // We need this many spaces; do it ten at a time
5948 do {__CFStringAppendBytes(aString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
5949 }
5950 } else {
5951 if (hasWidth && width > len) {
5952 int w = width - len; // We need this many spaces; do it ten at a time
5953 do {__CFStringAppendBytes(aString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
5954 }
5955 CFStringAppendCharacters(aString, up, len);
5956 }
5957 object = aString;
5958 }
5959 break;
5960
5961 case CFFormatCFType:
5962 case CFFormatObjectType:
5963 if (NULL != values[specs[curSpec].mainArgNum].value.pointerValue) object = CFRetain(values[specs[curSpec].mainArgNum].value.pointerValue);
5964 break;
5965 }
5966
5967 if (NULL != object) CFRelease(object);
5968
5969 } else if (NULL != values[specs[curSpec].mainArgNum].value.pointerValue) {
5970 CFStringRef str = NULL;
5971 if (copyDescFunc) {
5972 str = copyDescFunc(values[specs[curSpec].mainArgNum].value.pointerValue, formatOptions);
5973 } else {
5974 str = __CFCopyFormattingDescription(values[specs[curSpec].mainArgNum].value.pointerValue, formatOptions);
5975 if (NULL == str) {
5976 str = CFCopyDescription(values[specs[curSpec].mainArgNum].value.pointerValue);
5977 }
5978 }
5979 if (str) {
5980 CFStringAppend(outputString, str);
5981 CFRelease(str);
5982 } else {
5983 CFStringAppendCString(outputString, "(null description)", kCFStringEncodingASCII);
5984 }
5985 } else {
5986 CFStringAppendCString(outputString, "(null)", kCFStringEncodingASCII);
5987 }
5988 break;
5989 }
5990 }
5991
5992 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
5993 // va_copy is a C99 extension. No support on Windows
5994 if (numConfigs > 0) va_end(copiedArgs);
5995 #endif /* DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD */
5996 if (specs != localSpecsBuffer) CFAllocatorDeallocate(tmpAlloc, specs);
5997 if (values != localValuesBuffer) CFAllocatorDeallocate(tmpAlloc, values);
5998 if (formatChars && (formatChars != localFormatBuffer)) CFAllocatorDeallocate(tmpAlloc, formatChars);
5999 if (configs != localConfigs) CFAllocatorDeallocate(tmpAlloc, configs);
6000 }
6001
6002 #undef SNPRINTF
6003
6004 void CFShowStr(CFStringRef str) {
6005 CFAllocatorRef alloc;
6006
6007 if (!str) {
6008 fprintf(stdout, "(null)\n");
6009 return;
6010 }
6011
6012 if (CF_IS_OBJC(__kCFStringTypeID, str)) {
6013 fprintf(stdout, "This is an NSString, not CFString\n");
6014 return;
6015 }
6016
6017 alloc = CFGetAllocator(str);
6018
6019 fprintf(stdout, "\nLength %d\nIsEightBit %d\n", (int)__CFStrLength(str), __CFStrIsEightBit(str));
6020 fprintf(stdout, "HasLengthByte %d\nHasNullByte %d\nInlineContents %d\n",
6021 __CFStrHasLengthByte(str), __CFStrHasNullByte(str), __CFStrIsInline(str));
6022
6023 fprintf(stdout, "Allocator ");
6024 if (alloc != kCFAllocatorSystemDefault) {
6025 fprintf(stdout, "%p\n", (void *)alloc);
6026 } else {
6027 fprintf(stdout, "SystemDefault\n");
6028 }
6029 fprintf(stdout, "Mutable %d\n", __CFStrIsMutable(str));
6030 if (!__CFStrIsMutable(str) && __CFStrHasContentsDeallocator(str)) {
6031 if (__CFStrContentsDeallocator(str)) fprintf(stdout, "ContentsDeallocatorFunc %p\n", (void *)__CFStrContentsDeallocator(str));
6032 else fprintf(stdout, "ContentsDeallocatorFunc None\n");
6033 } else if (__CFStrIsMutable(str) && __CFStrHasContentsAllocator(str)) {
6034 fprintf(stdout, "ExternalContentsAllocator %p\n", (void *)__CFStrContentsAllocator((CFMutableStringRef)str));
6035 }
6036
6037 if (__CFStrIsMutable(str)) {
6038 fprintf(stdout, "CurrentCapacity %d\n%sCapacity %d\n", (int)__CFStrCapacity(str), __CFStrIsFixed(str) ? "Fixed" : "Desired", (int)__CFStrDesiredCapacity(str));
6039 }
6040 fprintf(stdout, "Contents %p\n", (void *)__CFStrContents(str));
6041 }
6042
6043
6044
6045 #undef HANGUL_SBASE
6046 #undef HANGUL_LBASE
6047 #undef HANGUL_VBASE
6048 #undef HANGUL_TBASE
6049 #undef HANGUL_SCOUNT
6050 #undef HANGUL_LCOUNT
6051 #undef HANGUL_VCOUNT
6052 #undef HANGUL_TCOUNT
6053 #undef HANGUL_NCOUNT
6054