]> git.saurik.com Git - apple/cf.git/blob - CFString.c
CF-550.13.tar.gz
[apple/cf.git] / CFString.c
1 /*
2 * Copyright (c) 2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /* CFString.c
25 Copyright (c) 1998-2009, Apple Inc. All rights reserved.
26 Responsibility: Ali Ozer
27
28 !!! For performance reasons, it's important that all functions marked CF_INLINE in this file are inlined.
29 */
30
31 #include <CoreFoundation/CFBase.h>
32 #include <CoreFoundation/CFString.h>
33 #include <CoreFoundation/CFDictionary.h>
34 #include <CoreFoundation/CFStringEncodingConverterExt.h>
35 #include <CoreFoundation/CFUniChar.h>
36 #include <CoreFoundation/CFUnicodeDecomposition.h>
37 #include <CoreFoundation/CFUnicodePrecomposition.h>
38 #include <CoreFoundation/CFPriv.h>
39 #include "CFInternal.h"
40 #include "CFLocaleInternal.h"
41 #include <stdarg.h>
42 #include <stdio.h>
43 #include <string.h>
44 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
45 #include <unistd.h>
46 #endif
47 #if DEPLOYMENT_TARGET_WINDOWS
48 #define strncasecmp_l(a, b, c, d) _strnicmp(a, b, c)
49 #endif
50
51 #if defined(__GNUC__)
52 #define LONG_DOUBLE_SUPPORT 1
53 #else
54 #define LONG_DOUBLE_SUPPORT 0
55 #endif
56
57
58
59 #define USE_STRING_ROM 0
60
61
62 #ifndef INSTRUMENT_SHARED_STRINGS
63 #define INSTRUMENT_SHARED_STRINGS 0
64 #endif
65
66 __private_extern__ const CFStringRef __kCFLocaleCollatorID;
67
68 #if INSTRUMENT_SHARED_STRINGS
69 #include <sys/stat.h> /* for umask() */
70
71 static void __CFRecordStringAllocationEvent(const char *encoding, const char *bytes, CFIndex byteCount) {
72 static CFSpinLock_t lock = CFSpinLockInit;
73
74 if (memchr(bytes, '\n', byteCount)) return; //never record string allocation events for strings with newlines, because those confuse our parser and because they'll never go into the ROM
75
76 __CFSpinLock(&lock);
77 static int fd;
78 if (! fd) {
79 extern char **_NSGetProgname(void);
80 const char *name = *_NSGetProgname();
81 if (! name) name = "UNKNOWN";
82 umask(0);
83 char path[1024];
84 snprintf(path, sizeof(path), "/tmp/CFSharedStringInstrumentation_%s_%d.txt", name, getpid());
85 fd = open(path, O_WRONLY | O_APPEND | O_CREAT, 0666);
86 if (fd <= 0) {
87 int error = errno;
88 const char *errString = strerror(error);
89 fprintf(stderr, "open() failed with error %d (%s)\n", error, errString);
90 }
91 }
92 if (fd > 0) {
93 char *buffer = NULL;
94 char formatString[256];
95 snprintf(formatString, sizeof(formatString), "%%-8d\t%%-16s\t%%.%lds\n", byteCount);
96 int resultCount = asprintf(&buffer, formatString, getpid(), encoding, bytes);
97 if (buffer && resultCount > 0) write(fd, buffer, resultCount);
98 else puts("Couldn't record allocation event");
99 free(buffer);
100 }
101 __CFSpinUnlock(&lock);
102 }
103 #endif //INSTRUMENT_SHARED_STRINGS
104
105
106
107 typedef Boolean (*UNI_CHAR_FUNC)(UInt32 flags, UInt8 ch, UniChar *unicodeChar);
108
109 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
110 extern size_t malloc_good_size(size_t size);
111 #endif
112 extern void __CFStrConvertBytesToUnicode(const uint8_t *bytes, UniChar *buffer, CFIndex numChars);
113
114 #if defined(DEBUG)
115
116 // We put this into C & Pascal strings if we can't convert
117 #define CONVERSIONFAILURESTR "CFString conversion failed"
118
119 // We set this to true when purging the constant string table, so CFStringDeallocate doesn't assert
120 static Boolean __CFConstantStringTableBeingFreed = false;
121
122 #endif
123
124
125
126 // This section is for CFString compatibility and other behaviors...
127
128 static CFOptionFlags _CFStringCompatibilityMask = 0;
129
130 #define Bug2967272 1
131
132 void _CFStringSetCompatibility(CFOptionFlags mask) {
133 _CFStringCompatibilityMask |= mask;
134 }
135
136 CF_INLINE Boolean __CFStringGetCompatibility(CFOptionFlags mask) {
137 return (_CFStringCompatibilityMask & mask) == mask;
138 }
139
140
141
142 // Two constant strings used by CFString; these are initialized in CFStringInitialize
143 CONST_STRING_DECL(kCFEmptyString, "")
144
145 // This is separate for C++
146 struct __notInlineMutable {
147 void *buffer;
148 CFIndex length;
149 CFIndex capacity; // Capacity in bytes
150 unsigned int hasGap:1; // Currently unused
151 unsigned int isFixedCapacity:1;
152 unsigned int isExternalMutable:1;
153 unsigned int capacityProvidedExternally:1;
154 #if __LP64__
155 unsigned long desiredCapacity:60;
156 #else
157 unsigned long desiredCapacity:28;
158 #endif
159 CFAllocatorRef contentsAllocator; // Optional
160 }; // The only mutable variant for CFString
161
162
163 /* !!! Never do sizeof(CFString); the union is here just to make it easier to access some fields.
164 */
165 struct __CFString {
166 CFRuntimeBase base;
167 union { // In many cases the allocated structs are smaller than these
168 struct __inline1 {
169 CFIndex length;
170 } inline1; // Bytes follow the length
171 struct __notInlineImmutable1 {
172 void *buffer; // Note that the buffer is in the same place for all non-inline variants of CFString
173 CFIndex length;
174 CFAllocatorRef contentsDeallocator; // Optional; just the dealloc func is used
175 } notInlineImmutable1; // This is the usual not-inline immutable CFString
176 struct __notInlineImmutable2 {
177 void *buffer;
178 CFAllocatorRef contentsDeallocator; // Optional; just the dealloc func is used
179 } notInlineImmutable2; // This is the not-inline immutable CFString when length is stored with the contents (first byte)
180 struct __notInlineMutable notInlineMutable;
181 } variants;
182 };
183
184 /*
185 I = is immutable
186 E = not inline contents
187 U = is Unicode
188 N = has NULL byte
189 L = has length byte
190 D = explicit deallocator for contents (for mutable objects, allocator)
191 C = length field is CFIndex (rather than UInt32); only meaningful for 64-bit, really
192 if needed this bit (valuable real-estate) can be given up for another bit elsewhere, since this info is needed just for 64-bit
193
194 Also need (only for mutable)
195 F = is fixed
196 G = has gap
197 Cap, DesCap = capacity
198
199 B7 B6 B5 B4 B3 B2 B1 B0
200 U N L C I
201
202 B6 B5
203 0 0 inline contents
204 0 1 E (freed with default allocator)
205 1 0 E (not freed)
206 1 1 E D
207
208 !!! Note: Constant CFStrings use the bit patterns:
209 C8 (11001000 = default allocator, not inline, not freed contents; 8-bit; has NULL byte; doesn't have length; is immutable)
210 D0 (11010000 = default allocator, not inline, not freed contents; Unicode; is immutable)
211 The bit usages should not be modified in a way that would effect these bit patterns.
212 */
213
214 enum {
215 __kCFFreeContentsWhenDoneMask = 0x020,
216 __kCFFreeContentsWhenDone = 0x020,
217 __kCFContentsMask = 0x060,
218 __kCFHasInlineContents = 0x000,
219 __kCFNotInlineContentsNoFree = 0x040, // Don't free
220 __kCFNotInlineContentsDefaultFree = 0x020, // Use allocator's free function
221 __kCFNotInlineContentsCustomFree = 0x060, // Use a specially provided free function
222 __kCFHasContentsAllocatorMask = 0x060,
223 __kCFHasContentsAllocator = 0x060, // (For mutable strings) use a specially provided allocator
224 __kCFHasContentsDeallocatorMask = 0x060,
225 __kCFHasContentsDeallocator = 0x060,
226 __kCFIsMutableMask = 0x01,
227 __kCFIsMutable = 0x01,
228 __kCFIsUnicodeMask = 0x10,
229 __kCFIsUnicode = 0x10,
230 __kCFHasNullByteMask = 0x08,
231 __kCFHasNullByte = 0x08,
232 __kCFHasLengthByteMask = 0x04,
233 __kCFHasLengthByte = 0x04,
234 // !!! Bit 0x02 has been freed up
235 };
236
237
238 // !!! Assumptions:
239 // Mutable strings are not inline
240 // Compile-time constant strings are not inline
241 // Mutable strings always have explicit length (but they might also have length byte and null byte)
242 // If there is an explicit length, always use that instead of the length byte (length byte is useful for quickly returning pascal strings)
243 // Never look at the length byte for the length; use __CFStrLength or __CFStrLength2
244
245 /* The following set of functions and macros need to be updated on change to the bit configuration
246 */
247 CF_INLINE Boolean __CFStrIsMutable(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFIsMutableMask) == __kCFIsMutable;}
248 CF_INLINE Boolean __CFStrIsInline(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFContentsMask) == __kCFHasInlineContents;}
249 CF_INLINE Boolean __CFStrFreeContentsWhenDone(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFFreeContentsWhenDoneMask) == __kCFFreeContentsWhenDone;}
250 CF_INLINE Boolean __CFStrHasContentsDeallocator(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFHasContentsDeallocatorMask) == __kCFHasContentsDeallocator;}
251 CF_INLINE Boolean __CFStrIsUnicode(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFIsUnicodeMask) == __kCFIsUnicode;}
252 CF_INLINE Boolean __CFStrIsEightBit(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFIsUnicodeMask) != __kCFIsUnicode;}
253 CF_INLINE Boolean __CFStrHasNullByte(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFHasNullByteMask) == __kCFHasNullByte;}
254 CF_INLINE Boolean __CFStrHasLengthByte(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFHasLengthByteMask) == __kCFHasLengthByte;}
255 CF_INLINE Boolean __CFStrHasExplicitLength(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & (__kCFIsMutableMask | __kCFHasLengthByteMask)) != __kCFHasLengthByte;} // Has explicit length if (1) mutable or (2) not mutable and no length byte
256 CF_INLINE Boolean __CFStrIsConstant(CFStringRef str) {
257 #if __LP64__
258 return str->base._rc == 0;
259 #else
260 return (str->base._cfinfo[CF_RC_BITS]) == 0;
261 #endif
262 }
263
264 CF_INLINE SInt32 __CFStrSkipAnyLengthByte(CFStringRef str) {return ((str->base._cfinfo[CF_INFO_BITS] & __kCFHasLengthByteMask) == __kCFHasLengthByte) ? 1 : 0;} // Number of bytes to skip over the length byte in the contents
265
266 /* Returns ptr to the buffer (which might include the length byte)
267 */
268 CF_INLINE const void *__CFStrContents(CFStringRef str) {
269 if (__CFStrIsInline(str)) {
270 return (const void *)(((uintptr_t)&(str->variants)) + (__CFStrHasExplicitLength(str) ? sizeof(CFIndex) : 0));
271 } else { // Not inline; pointer is always word 2
272 return str->variants.notInlineImmutable1.buffer;
273 }
274 }
275
276 static CFAllocatorRef *__CFStrContentsDeallocatorPtr(CFStringRef str) {
277 return __CFStrHasExplicitLength(str) ? &(((CFMutableStringRef)str)->variants.notInlineImmutable1.contentsDeallocator) : &(((CFMutableStringRef)str)->variants.notInlineImmutable2.contentsDeallocator); }
278
279 // Assumption: Called with immutable strings only, and on strings that are known to have a contentsDeallocator
280 CF_INLINE CFAllocatorRef __CFStrContentsDeallocator(CFStringRef str) {
281 return *__CFStrContentsDeallocatorPtr(str);
282 }
283
284 // Assumption: Called with immutable strings only, and on strings that are known to have a contentsDeallocator
285 CF_INLINE void __CFStrSetContentsDeallocator(CFStringRef str, CFAllocatorRef contentsAllocator) {
286 *__CFStrContentsDeallocatorPtr(str) = contentsAllocator;
287 }
288
289 static CFAllocatorRef *__CFStrContentsAllocatorPtr(CFStringRef str) {
290 CFAssert(!__CFStrIsInline(str), __kCFLogAssertion, "Asking for contents allocator of inline string");
291 CFAssert(__CFStrIsMutable(str), __kCFLogAssertion, "Asking for contents allocator of an immutable string");
292 return (CFAllocatorRef *)&(str->variants.notInlineMutable.contentsAllocator);
293 }
294
295 // Assumption: Called with strings that have a contents allocator; also, contents allocator follows custom
296 CF_INLINE CFAllocatorRef __CFStrContentsAllocator(CFMutableStringRef str) {
297 return *(__CFStrContentsAllocatorPtr(str));
298 }
299
300 // Assumption: Called with strings that have a contents allocator; also, contents allocator follows custom
301 CF_INLINE void __CFStrSetContentsAllocator(CFMutableStringRef str, CFAllocatorRef alloc) {
302 *(__CFStrContentsAllocatorPtr(str)) = alloc;
303 }
304
305 /* Returns length; use __CFStrLength2 if contents buffer pointer has already been computed.
306 */
307 CF_INLINE CFIndex __CFStrLength(CFStringRef str) {
308 if (__CFStrHasExplicitLength(str)) {
309 if (__CFStrIsInline(str)) {
310 return str->variants.inline1.length;
311 } else {
312 return str->variants.notInlineImmutable1.length;
313 }
314 } else {
315 return (CFIndex)(*((uint8_t *)__CFStrContents(str)));
316 }
317 }
318
319 CF_INLINE CFIndex __CFStrLength2(CFStringRef str, const void *buffer) {
320 if (__CFStrHasExplicitLength(str)) {
321 if (__CFStrIsInline(str)) {
322 return str->variants.inline1.length;
323 } else {
324 return str->variants.notInlineImmutable1.length;
325 }
326 } else {
327 return (CFIndex)(*((uint8_t *)buffer));
328 }
329 }
330
331
332 Boolean __CFStringIsEightBit(CFStringRef str) {
333 return __CFStrIsEightBit(str);
334 }
335
336 /* Sets the content pointer for immutable or mutable strings.
337 */
338 CF_INLINE void __CFStrSetContentPtr(CFStringRef str, const void *p) {
339 // XXX_PCB catch all writes for mutable string case.
340 __CFAssignWithWriteBarrier((void **)&((CFMutableStringRef)str)->variants.notInlineImmutable1.buffer, (void *)p);
341 }
342 CF_INLINE void __CFStrSetInfoBits(CFStringRef str, UInt32 v) {__CFBitfieldSetValue(((CFMutableStringRef)str)->base._cfinfo[CF_INFO_BITS], 6, 0, v);}
343
344 CF_INLINE void __CFStrSetExplicitLength(CFStringRef str, CFIndex v) {
345 if (__CFStrIsInline(str)) {
346 ((CFMutableStringRef)str)->variants.inline1.length = v;
347 } else {
348 ((CFMutableStringRef)str)->variants.notInlineImmutable1.length = v;
349 }
350 }
351
352 CF_INLINE void __CFStrSetUnicode(CFMutableStringRef str) {str->base._cfinfo[CF_INFO_BITS] |= __kCFIsUnicode;}
353 CF_INLINE void __CFStrClearUnicode(CFMutableStringRef str) {str->base._cfinfo[CF_INFO_BITS] &= ~__kCFIsUnicode;}
354 CF_INLINE void __CFStrSetHasLengthAndNullBytes(CFMutableStringRef str) {str->base._cfinfo[CF_INFO_BITS] |= (__kCFHasLengthByte | __kCFHasNullByte);}
355 CF_INLINE void __CFStrClearHasLengthAndNullBytes(CFMutableStringRef str) {str->base._cfinfo[CF_INFO_BITS] &= ~(__kCFHasLengthByte | __kCFHasNullByte);}
356
357
358 // Assumption: The following set of inlines (using str->variants.notInlineMutable) are called with mutable strings only
359 CF_INLINE Boolean __CFStrIsFixed(CFStringRef str) {return str->variants.notInlineMutable.isFixedCapacity;}
360 CF_INLINE Boolean __CFStrIsExternalMutable(CFStringRef str) {return str->variants.notInlineMutable.isExternalMutable;}
361 CF_INLINE Boolean __CFStrHasContentsAllocator(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFHasContentsAllocatorMask) == __kCFHasContentsAllocator;}
362 CF_INLINE void __CFStrSetIsFixed(CFMutableStringRef str) {str->variants.notInlineMutable.isFixedCapacity = 1;}
363 CF_INLINE void __CFStrSetIsExternalMutable(CFMutableStringRef str) {str->variants.notInlineMutable.isExternalMutable = 1;}
364 CF_INLINE void __CFStrSetHasGap(CFMutableStringRef str) {str->variants.notInlineMutable.hasGap = 1;}
365
366 // If capacity is provided externally, we only change it when we need to grow beyond it
367 CF_INLINE Boolean __CFStrCapacityProvidedExternally(CFStringRef str) {return str->variants.notInlineMutable.capacityProvidedExternally;}
368 CF_INLINE void __CFStrSetCapacityProvidedExternally(CFMutableStringRef str) {str->variants.notInlineMutable.capacityProvidedExternally = 1;}
369 CF_INLINE void __CFStrClearCapacityProvidedExternally(CFMutableStringRef str) {str->variants.notInlineMutable.capacityProvidedExternally = 0;}
370
371 // "Capacity" is stored in number of bytes, not characters. It indicates the total number of bytes in the contents buffer.
372 CF_INLINE CFIndex __CFStrCapacity(CFStringRef str) {return str->variants.notInlineMutable.capacity;}
373 CF_INLINE void __CFStrSetCapacity(CFMutableStringRef str, CFIndex cap) {str->variants.notInlineMutable.capacity = cap;}
374
375 // "Desired capacity" is in number of characters; it is the client requested capacity; if fixed, it is the upper bound on the mutable string backing store.
376 CF_INLINE CFIndex __CFStrDesiredCapacity(CFStringRef str) {return str->variants.notInlineMutable.desiredCapacity;}
377 CF_INLINE void __CFStrSetDesiredCapacity(CFMutableStringRef str, CFIndex size) {str->variants.notInlineMutable.desiredCapacity = size;}
378
379
380 static void *__CFStrAllocateMutableContents(CFMutableStringRef str, CFIndex size) {
381 void *ptr;
382 CFAllocatorRef alloc = (__CFStrHasContentsAllocator(str)) ? __CFStrContentsAllocator(str) : __CFGetAllocator(str);
383 ptr = CFAllocatorAllocate(alloc, size, 0);
384 if (__CFOASafe) __CFSetLastAllocationEventName(ptr, "CFString (store)");
385 return ptr;
386 }
387
388 static void __CFStrDeallocateMutableContents(CFMutableStringRef str, void *buffer) {
389 CFAllocatorRef alloc = (__CFStrHasContentsAllocator(str)) ? __CFStrContentsAllocator(str) : __CFGetAllocator(str);
390 if (CF_IS_COLLECTABLE_ALLOCATOR(alloc)) {
391 // GC: for finalization safety, let collector reclaim the buffer in the next GC cycle.
392 auto_zone_release(auto_zone(), buffer);
393 } else {
394 CFAllocatorDeallocate(alloc, buffer);
395 }
396 }
397
398
399
400
401 /* CFString specific init flags
402 Note that you cannot count on the external buffer not being copied.
403 Also, if you specify an external buffer, you should not change it behind the CFString's back.
404 */
405 enum {
406 __kCFThinUnicodeIfPossible = 0x1000000, /* See if the Unicode contents can be thinned down to 8-bit */
407 kCFStringPascal = 0x10000, /* Indicating that the string data has a Pascal string structure (length byte at start) */
408 kCFStringNoCopyProvidedContents = 0x20000, /* Don't copy the provided string contents if possible; free it when no longer needed */
409 kCFStringNoCopyNoFreeProvidedContents = 0x30000 /* Don't copy the provided string contents if possible; don't free it when no longer needed */
410 };
411
412 /* System Encoding.
413 */
414 static CFStringEncoding __CFDefaultSystemEncoding = kCFStringEncodingInvalidId;
415 static CFStringEncoding __CFDefaultFileSystemEncoding = kCFStringEncodingInvalidId;
416 CFStringEncoding __CFDefaultEightBitStringEncoding = kCFStringEncodingInvalidId;
417
418
419 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
420 #define __defaultEncoding kCFStringEncodingMacRoman
421 #elif DEPLOYMENT_TARGET_WINDOWS
422 #define __defaultEncoding kCFStringEncodingWindowsLatin1
423 #else
424 #warning This value must match __CFGetConverter condition in CFStringEncodingConverter.c
425 #define __defaultEncoding kCFStringEncodingISOLatin1
426 #endif
427
428 CFStringEncoding CFStringGetSystemEncoding(void) {
429 if (__CFDefaultSystemEncoding == kCFStringEncodingInvalidId) {
430 __CFDefaultSystemEncoding = __defaultEncoding;
431 const CFStringEncodingConverter *converter = CFStringEncodingGetConverter(__CFDefaultSystemEncoding);
432 __CFSetCharToUniCharFunc(converter->encodingClass == kCFStringEncodingConverterCheapEightBit ? (UNI_CHAR_FUNC)converter->toUnicode : NULL);
433 }
434 return __CFDefaultSystemEncoding;
435 }
436
437 // Fast version for internal use
438
439 CF_INLINE CFStringEncoding __CFStringGetSystemEncoding(void) {
440 if (__CFDefaultSystemEncoding == kCFStringEncodingInvalidId) (void)CFStringGetSystemEncoding();
441 return __CFDefaultSystemEncoding;
442 }
443
444 CFStringEncoding CFStringFileSystemEncoding(void) {
445 if (__CFDefaultFileSystemEncoding == kCFStringEncodingInvalidId) {
446 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS
447 __CFDefaultFileSystemEncoding = kCFStringEncodingUTF8;
448 #else
449 __CFDefaultFileSystemEncoding = CFStringGetSystemEncoding();
450 #endif
451 }
452
453 return __CFDefaultFileSystemEncoding;
454 }
455
456 /* ??? Is returning length when no other answer is available the right thing?
457 !!! All of the (length > (LONG_MAX / N)) type checks are to avoid wrap-around and eventual malloc overflow in the client
458 */
459 CFIndex CFStringGetMaximumSizeForEncoding(CFIndex length, CFStringEncoding encoding) {
460 if (encoding == kCFStringEncodingUTF8) {
461 if (_CFExecutableLinkedOnOrAfter(CFSystemVersionPanther)) { // 1 Unichar can expand to 3 bytes; we return 6 for older apps for compatibility
462 return (length > (LONG_MAX / 3)) ? kCFNotFound : (length * 3);
463 } else {
464 return (length > (LONG_MAX / 6)) ? kCFNotFound : (length * 6);
465 }
466 } else if ((encoding == kCFStringEncodingUTF32) || (encoding == kCFStringEncodingUTF32BE) || (encoding == kCFStringEncodingUTF32LE)) { // UTF-32
467 return (length > (LONG_MAX / sizeof(UTF32Char))) ? kCFNotFound : (length * sizeof(UTF32Char));
468 } else {
469 encoding &= 0xFFF; // Mask off non-base part
470 }
471 switch (encoding) {
472 case kCFStringEncodingUnicode:
473 return (length > (LONG_MAX / sizeof(UniChar))) ? kCFNotFound : (length * sizeof(UniChar));
474
475 case kCFStringEncodingNonLossyASCII:
476 return (length > (LONG_MAX / 6)) ? kCFNotFound : (length * 6); // 1 Unichar can expand to 6 bytes
477
478 case kCFStringEncodingMacRoman:
479 case kCFStringEncodingWindowsLatin1:
480 case kCFStringEncodingISOLatin1:
481 case kCFStringEncodingNextStepLatin:
482 case kCFStringEncodingASCII:
483 return length / sizeof(uint8_t);
484
485 default:
486 return length / sizeof(uint8_t);
487 }
488 }
489
490
491 /* Returns whether the indicated encoding can be stored in 8-bit chars
492 */
493 CF_INLINE Boolean __CFStrEncodingCanBeStoredInEightBit(CFStringEncoding encoding) {
494 switch (encoding & 0xFFF) { // just use encoding base
495 case kCFStringEncodingInvalidId:
496 case kCFStringEncodingUnicode:
497 case kCFStringEncodingNonLossyASCII:
498 return false;
499
500 case kCFStringEncodingMacRoman:
501 case kCFStringEncodingWindowsLatin1:
502 case kCFStringEncodingISOLatin1:
503 case kCFStringEncodingNextStepLatin:
504 case kCFStringEncodingASCII:
505 return true;
506
507 default: return false;
508 }
509 }
510
511 /* Returns the encoding used in eight bit CFStrings (can't be any encoding which isn't 1-to-1 with Unicode)
512 ??? Perhaps only ASCII fits the bill due to Unicode decomposition.
513 */
514 CFStringEncoding __CFStringComputeEightBitStringEncoding(void) {
515 if (__CFDefaultEightBitStringEncoding == kCFStringEncodingInvalidId) {
516 CFStringEncoding systemEncoding = CFStringGetSystemEncoding();
517 if (systemEncoding == kCFStringEncodingInvalidId) { // We're right in the middle of querying system encoding from default database. Delaying to set until system encoding is determined.
518 return kCFStringEncodingASCII;
519 } else if (__CFStrEncodingCanBeStoredInEightBit(systemEncoding)) {
520 __CFDefaultEightBitStringEncoding = systemEncoding;
521 } else {
522 __CFDefaultEightBitStringEncoding = kCFStringEncodingASCII;
523 }
524 }
525
526 return __CFDefaultEightBitStringEncoding;
527 }
528
529 /* Returns whether the provided bytes can be stored in ASCII
530 */
531 CF_INLINE Boolean __CFBytesInASCII(const uint8_t *bytes, CFIndex len) {
532 while (len--) if ((uint8_t)(*bytes++) >= 128) return false;
533 return true;
534 }
535
536 /* Returns whether the provided 8-bit string in the specified encoding can be stored in an 8-bit CFString.
537 */
538 CF_INLINE Boolean __CFCanUseEightBitCFStringForBytes(const uint8_t *bytes, CFIndex len, CFStringEncoding encoding) {
539 // If the encoding is the same as the 8-bit CFString encoding, we can just use the bytes as-is.
540 // One exception is ASCII, which unfortunately needs to mean ISOLatin1 for compatibility reasons <rdar://problem/5458321>.
541 if (encoding == __CFStringGetEightBitStringEncoding() && encoding != kCFStringEncodingASCII) return true;
542 if (__CFStringEncodingIsSupersetOfASCII(encoding) && __CFBytesInASCII(bytes, len)) return true;
543 return false;
544 }
545
546
547 /* Returns whether a length byte can be tacked on to a string of the indicated length.
548 */
549 CF_INLINE Boolean __CFCanUseLengthByte(CFIndex len) {
550 #define __kCFMaxPascalStrLen 255
551 return (len <= __kCFMaxPascalStrLen) ? true : false;
552 }
553
554 /* Various string assertions
555 */
556 #define __CFAssertIsString(cf) __CFGenericValidateType(cf, __kCFStringTypeID)
557 #define __CFAssertIndexIsInStringBounds(cf, idx) CFAssert3((idx) >= 0 && (idx) < __CFStrLength(cf), __kCFLogAssertion, "%s(): string index %d out of bounds (length %d)", __PRETTY_FUNCTION__, idx, __CFStrLength(cf))
558 #define __CFAssertRangeIsInStringBounds(cf, idx, count) CFAssert4((idx) >= 0 && (idx + count) <= __CFStrLength(cf), __kCFLogAssertion, "%s(): string range %d,%d out of bounds (length %d)", __PRETTY_FUNCTION__, idx, count, __CFStrLength(cf))
559 #define __CFAssertIsStringAndMutable(cf) {__CFGenericValidateType(cf, __kCFStringTypeID); CFAssert1(__CFStrIsMutable(cf), __kCFLogAssertion, "%s(): string not mutable", __PRETTY_FUNCTION__);}
560 #define __CFAssertIsStringAndExternalMutable(cf) {__CFGenericValidateType(cf, __kCFStringTypeID); CFAssert1(__CFStrIsMutable(cf) && __CFStrIsExternalMutable(cf), __kCFLogAssertion, "%s(): string not external mutable", __PRETTY_FUNCTION__);}
561 #define __CFAssertIsNotNegative(idx) CFAssert2(idx >= 0, __kCFLogAssertion, "%s(): index %d is negative", __PRETTY_FUNCTION__, idx)
562 #define __CFAssertIfFixedLengthIsOK(cf, reqLen) CFAssert2(!__CFStrIsFixed(cf) || (reqLen <= __CFStrDesiredCapacity(cf)), __kCFLogAssertion, "%s(): length %d too large", __PRETTY_FUNCTION__, reqLen)
563
564
565 /* Basic algorithm is to shrink memory when capacity is SHRINKFACTOR times the required capacity or to allocate memory when the capacity is less than GROWFACTOR times the required capacity. This function will return -1 if the new capacity is just too big (> LONG_MAX).
566 Additional complications are applied in the following order:
567 - desiredCapacity, which is the minimum (except initially things can be at zero)
568 - rounding up to factor of 8
569 - compressing (to fit the number if 16 bits), which effectively rounds up to factor of 256
570 - we need to make sure GROWFACTOR computation doesn't suffer from overflow issues on 32-bit, hence the casting to unsigned. Normally for required capacity of C bytes, the allocated space is (3C+1)/2. If C > ULONG_MAX/3, we instead simply return LONG_MAX
571 */
572 #define SHRINKFACTOR(c) (c / 2)
573
574 #if __LP64__
575 #define GROWFACTOR(c) ((c * 3 + 1) / 2)
576 #else
577 #define GROWFACTOR(c) (((c) >= (ULONG_MAX / 3UL)) ? __CFMax(LONG_MAX - 4095, (c)) : (((unsigned long)c * 3 + 1) / 2))
578 #endif
579
580 CF_INLINE CFIndex __CFStrNewCapacity(CFMutableStringRef str, unsigned long reqCapacity, CFIndex capacity, Boolean leaveExtraRoom, CFIndex charSize) {
581 if (capacity != 0 || reqCapacity != 0) { /* If initially zero, and space not needed, leave it at that... */
582 if ((capacity < reqCapacity) || /* We definitely need the room... */
583 (!__CFStrCapacityProvidedExternally(str) && /* Assuming we control the capacity... */
584 ((reqCapacity < SHRINKFACTOR(capacity)) || /* ...we have too much room! */
585 (!leaveExtraRoom && (reqCapacity < capacity))))) { /* ...we need to eliminate the extra space... */
586 if (reqCapacity > LONG_MAX) return -1; /* Too big any way you cut it */
587 unsigned long newCapacity = leaveExtraRoom ? GROWFACTOR(reqCapacity) : reqCapacity; /* Grow by 3/2 if extra room is desired */
588 CFIndex desiredCapacity = __CFStrDesiredCapacity(str) * charSize;
589 if (newCapacity < desiredCapacity) { /* If less than desired, bump up to desired */
590 newCapacity = desiredCapacity;
591 } else if (__CFStrIsFixed(str)) { /* Otherwise, if fixed, no need to go above the desired (fixed) capacity */
592 newCapacity = __CFMax(desiredCapacity, reqCapacity); /* !!! So, fixed is not really fixed, but "tight" */
593 }
594 if (__CFStrHasContentsAllocator(str)) { /* Also apply any preferred size from the allocator */
595 newCapacity = CFAllocatorGetPreferredSizeForSize(__CFStrContentsAllocator(str), newCapacity, 0);
596 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
597 } else {
598 newCapacity = malloc_good_size(newCapacity);
599 #endif
600 }
601 return (newCapacity > LONG_MAX) ? -1 : (CFIndex)newCapacity; // If packing: __CFStrUnpackNumber(__CFStrPackNumber(newCapacity));
602 }
603 }
604 return capacity;
605 }
606
607
608 /* rearrangeBlocks() rearranges the blocks of data within the buffer so that they are "evenly spaced". buffer is assumed to have enough room for the result.
609 numBlocks is current total number of blocks within buffer.
610 blockSize is the size of each block in bytes
611 ranges and numRanges hold the ranges that are no longer needed; ranges are stored sorted in increasing order, and don't overlap
612 insertLength is the final spacing between the remaining blocks
613
614 Example: buffer = A B C D E F G H, blockSize = 1, ranges = { (2,1) , (4,2) } (so we want to "delete" C and E F), fromEnd = NO
615 if insertLength = 4, result = A B ? ? ? ? D ? ? ? ? G H
616 if insertLength = 0, result = A B D G H
617
618 Example: buffer = A B C D E F G H I J K L M N O P Q R S T U, blockSize = 1, ranges { (1,1), (3,1), (5,11), (17,1), (19,1) }, fromEnd = NO
619 if insertLength = 3, result = A ? ? ? C ? ? ? E ? ? ? Q ? ? ? S ? ? ? U
620
621 */
622 typedef struct _CFStringDeferredRange {
623 CFIndex beginning;
624 CFIndex length;
625 CFIndex shift;
626 } CFStringDeferredRange;
627
628 typedef struct _CFStringStackInfo {
629 CFIndex capacity; // Capacity (if capacity == count, need to realloc to add another)
630 CFIndex count; // Number of elements actually stored
631 CFStringDeferredRange *stack;
632 Boolean hasMalloced; // Indicates "stack" is allocated and needs to be deallocated when done
633 char _padding[3];
634 } CFStringStackInfo;
635
636 CF_INLINE void pop (CFStringStackInfo *si, CFStringDeferredRange *topRange) {
637 si->count = si->count - 1;
638 *topRange = si->stack[si->count];
639 }
640
641 CF_INLINE void push (CFStringStackInfo *si, const CFStringDeferredRange *newRange) {
642 if (si->count == si->capacity) {
643 // increase size of the stack
644 si->capacity = (si->capacity + 4) * 2;
645 if (si->hasMalloced) {
646 si->stack = (CFStringDeferredRange *)CFAllocatorReallocate(kCFAllocatorSystemDefault, si->stack, si->capacity * sizeof(CFStringDeferredRange), 0);
647 } else {
648 CFStringDeferredRange *newStack = (CFStringDeferredRange *)CFAllocatorAllocate(kCFAllocatorSystemDefault, si->capacity * sizeof(CFStringDeferredRange), 0);
649 memmove(newStack, si->stack, si->count * sizeof(CFStringDeferredRange));
650 si->stack = newStack;
651 si->hasMalloced = true;
652 }
653 }
654 si->stack[si->count] = *newRange;
655 si->count = si->count + 1;
656 }
657
658 static void rearrangeBlocks(
659 uint8_t *buffer,
660 CFIndex numBlocks,
661 CFIndex blockSize,
662 const CFRange *ranges,
663 CFIndex numRanges,
664 CFIndex insertLength) {
665
666 #define origStackSize 10
667 CFStringDeferredRange origStack[origStackSize];
668 CFStringStackInfo si = {origStackSize, 0, origStack, false, {0, 0, 0}};
669 CFStringDeferredRange currentNonRange = {0, 0, 0};
670 CFIndex currentRange = 0;
671 CFIndex amountShifted = 0;
672
673 // must have at least 1 range left.
674
675 while (currentRange < numRanges) {
676 currentNonRange.beginning = (ranges[currentRange].location + ranges[currentRange].length) * blockSize;
677 if ((numRanges - currentRange) == 1) {
678 // at the end.
679 currentNonRange.length = numBlocks * blockSize - currentNonRange.beginning;
680 if (currentNonRange.length == 0) break;
681 } else {
682 currentNonRange.length = (ranges[currentRange + 1].location * blockSize) - currentNonRange.beginning;
683 }
684 currentNonRange.shift = amountShifted + (insertLength * blockSize) - (ranges[currentRange].length * blockSize);
685 amountShifted = currentNonRange.shift;
686 if (amountShifted <= 0) {
687 // process current item and rest of stack
688 if (currentNonRange.shift && currentNonRange.length) memmove (&buffer[currentNonRange.beginning + currentNonRange.shift], &buffer[currentNonRange.beginning], currentNonRange.length);
689 while (si.count > 0) {
690 pop (&si, &currentNonRange); // currentNonRange now equals the top element of the stack.
691 if (currentNonRange.shift && currentNonRange.length) memmove (&buffer[currentNonRange.beginning + currentNonRange.shift], &buffer[currentNonRange.beginning], currentNonRange.length);
692 }
693 } else {
694 // add currentNonRange to stack.
695 push (&si, &currentNonRange);
696 }
697 currentRange++;
698 }
699
700 // no more ranges. if anything is on the stack, process.
701
702 while (si.count > 0) {
703 pop (&si, &currentNonRange); // currentNonRange now equals the top element of the stack.
704 if (currentNonRange.shift && currentNonRange.length) memmove (&buffer[currentNonRange.beginning + currentNonRange.shift], &buffer[currentNonRange.beginning], currentNonRange.length);
705 }
706 if (si.hasMalloced) CFAllocatorDeallocate (kCFAllocatorSystemDefault, si.stack);
707 }
708
709 /* See comments for rearrangeBlocks(); this is the same, but the string is assembled in another buffer (dstBuffer), so the algorithm is much easier. We also take care of the case where the source is not-Unicode but destination is. (The reverse case is not supported.)
710 */
711 static void copyBlocks(
712 const uint8_t *srcBuffer,
713 uint8_t *dstBuffer,
714 CFIndex srcLength,
715 Boolean srcIsUnicode,
716 Boolean dstIsUnicode,
717 const CFRange *ranges,
718 CFIndex numRanges,
719 CFIndex insertLength) {
720
721 CFIndex srcLocationInBytes = 0; // in order to avoid multiplying all the time, this is in terms of bytes, not blocks
722 CFIndex dstLocationInBytes = 0; // ditto
723 CFIndex srcBlockSize = srcIsUnicode ? sizeof(UniChar) : sizeof(uint8_t);
724 CFIndex insertLengthInBytes = insertLength * (dstIsUnicode ? sizeof(UniChar) : sizeof(uint8_t));
725 CFIndex rangeIndex = 0;
726 CFIndex srcToDstMultiplier = (srcIsUnicode == dstIsUnicode) ? 1 : (sizeof(UniChar) / sizeof(uint8_t));
727
728 // Loop over the ranges, copying the range to be preserved (right before each range)
729 while (rangeIndex < numRanges) {
730 CFIndex srcLengthInBytes = ranges[rangeIndex].location * srcBlockSize - srcLocationInBytes; // srcLengthInBytes is in terms of bytes, not blocks; represents length of region to be preserved
731 if (srcLengthInBytes > 0) {
732 if (srcIsUnicode == dstIsUnicode) {
733 memmove(dstBuffer + dstLocationInBytes, srcBuffer + srcLocationInBytes, srcLengthInBytes);
734 } else {
735 __CFStrConvertBytesToUnicode(srcBuffer + srcLocationInBytes, (UniChar *)(dstBuffer + dstLocationInBytes), srcLengthInBytes);
736 }
737 }
738 srcLocationInBytes += srcLengthInBytes + ranges[rangeIndex].length * srcBlockSize; // Skip over the just-copied and to-be-deleted stuff
739 dstLocationInBytes += srcLengthInBytes * srcToDstMultiplier + insertLengthInBytes;
740 rangeIndex++;
741 }
742
743 // Do last range (the one beyond last range)
744 if (srcLocationInBytes < srcLength * srcBlockSize) {
745 if (srcIsUnicode == dstIsUnicode) {
746 memmove(dstBuffer + dstLocationInBytes, srcBuffer + srcLocationInBytes, srcLength * srcBlockSize - srcLocationInBytes);
747 } else {
748 __CFStrConvertBytesToUnicode(srcBuffer + srcLocationInBytes, (UniChar *)(dstBuffer + dstLocationInBytes), srcLength * srcBlockSize - srcLocationInBytes);
749 }
750 }
751 }
752
753 /* Call the callback; if it doesn't exist or returns false, then log
754 */
755 static void __CFStringHandleOutOfMemory(CFTypeRef obj) {
756 CFStringRef msg = CFSTR("Out of memory. We suggest restarting the application. If you have an unsaved document, create a backup copy in Finder, then try to save.");
757 {
758 CFLog(kCFLogLevelCritical, CFSTR("%@"), msg);
759 }
760 }
761
762 /* Reallocates the backing store of the string to accomodate the new length. Space is reserved or characters are deleted as indicated by insertLength and the ranges in deleteRanges. The length is updated to reflect the new state. Will also maintain a length byte and a null byte in 8-bit strings. If length cannot fit in length byte, the space will still be reserved, but will be 0. (Hence the reason the length byte should never be looked at as length unless there is no explicit length.)
763 */
764 static void __CFStringChangeSizeMultiple(CFMutableStringRef str, const CFRange *deleteRanges, CFIndex numDeleteRanges, CFIndex insertLength, Boolean makeUnicode) {
765 const uint8_t *curContents = (uint8_t *)__CFStrContents(str);
766 CFIndex curLength = curContents ? __CFStrLength2(str, curContents) : 0;
767 unsigned long newLength; // We use unsigned to better keep track of overflow
768
769 // Compute new length of the string
770 if (numDeleteRanges == 1) {
771 newLength = curLength + insertLength - deleteRanges[0].length;
772 } else {
773 CFIndex cnt;
774 newLength = curLength + insertLength * numDeleteRanges;
775 for (cnt = 0; cnt < numDeleteRanges; cnt++) newLength -= deleteRanges[cnt].length;
776 }
777
778 __CFAssertIfFixedLengthIsOK(str, newLength);
779
780 if (newLength == 0) {
781 // An somewhat optimized code-path for this special case, with the following implicit values:
782 // newIsUnicode = false
783 // useLengthAndNullBytes = false
784 // newCharSize = sizeof(uint8_t)
785 // If the newCapacity happens to be the same as the old, we don't free the buffer; otherwise we just free it totally
786 // instead of doing a potentially useless reallocation (as the needed capacity later might turn out to be different anyway)
787 CFIndex curCapacity = __CFStrCapacity(str);
788 CFIndex newCapacity = __CFStrNewCapacity(str, 0, curCapacity, true, sizeof(uint8_t));
789 if (newCapacity != curCapacity) { // If we're reallocing anyway (larger or smaller --- larger could happen if desired capacity was changed in the meantime), let's just free it all
790 if (curContents) __CFStrDeallocateMutableContents(str, (uint8_t *)curContents);
791 __CFStrSetContentPtr(str, NULL);
792 __CFStrSetCapacity(str, 0);
793 __CFStrClearCapacityProvidedExternally(str);
794 __CFStrClearHasLengthAndNullBytes(str);
795 if (!__CFStrIsExternalMutable(str)) __CFStrClearUnicode(str); // External mutable implies Unicode
796 } else {
797 if (!__CFStrIsExternalMutable(str)) {
798 __CFStrClearUnicode(str);
799 if (curCapacity >= (int)(sizeof(uint8_t) * 2)) { // If there's room
800 __CFStrSetHasLengthAndNullBytes(str);
801 ((uint8_t *)curContents)[0] = ((uint8_t *)curContents)[1] = 0;
802 } else {
803 __CFStrClearHasLengthAndNullBytes(str);
804 }
805 }
806 }
807 __CFStrSetExplicitLength(str, 0);
808 } else { /* This else-clause assumes newLength > 0 */
809 Boolean oldIsUnicode = __CFStrIsUnicode(str);
810 Boolean newIsUnicode = makeUnicode || (oldIsUnicode /* && (newLength > 0) - implicit */ ) || __CFStrIsExternalMutable(str);
811 CFIndex newCharSize = newIsUnicode ? sizeof(UniChar) : sizeof(uint8_t);
812 Boolean useLengthAndNullBytes = !newIsUnicode /* && (newLength > 0) - implicit */;
813 CFIndex numExtraBytes = useLengthAndNullBytes ? 2 : 0; /* 2 extra bytes to keep the length byte & null... */
814 CFIndex curCapacity = __CFStrCapacity(str);
815 if (newLength > (LONG_MAX - numExtraBytes) / newCharSize) __CFStringHandleOutOfMemory(str); // Does not return
816 CFIndex newCapacity = __CFStrNewCapacity(str, newLength * newCharSize + numExtraBytes, curCapacity, true, newCharSize);
817 if (newCapacity == -1) __CFStringHandleOutOfMemory(str); // Does not return
818 Boolean allocNewBuffer = (newCapacity != curCapacity) || (curLength > 0 && !oldIsUnicode && newIsUnicode); /* We alloc new buffer if oldIsUnicode != newIsUnicode because the contents have to be copied */
819 uint8_t *newContents;
820 if (allocNewBuffer) {
821 newContents = (uint8_t *)__CFStrAllocateMutableContents(str, newCapacity);
822 if (!newContents) { // Try allocating without extra room
823 newCapacity = __CFStrNewCapacity(str, newLength * newCharSize + numExtraBytes, curCapacity, false, newCharSize);
824 // Since we checked for this above, it shouldn't be the case here, but just in case
825 if (newCapacity == -1) __CFStringHandleOutOfMemory(str); // Does not return
826 newContents = (uint8_t *)__CFStrAllocateMutableContents(str, newCapacity);
827 if (!newContents) __CFStringHandleOutOfMemory(str); // Does not return
828 }
829 } else {
830 newContents = (uint8_t *)curContents;
831 }
832
833 Boolean hasLengthAndNullBytes = __CFStrHasLengthByte(str);
834
835 CFAssert1(hasLengthAndNullBytes == __CFStrHasNullByte(str), __kCFLogAssertion, "%s(): Invalid state in 8-bit string", __PRETTY_FUNCTION__);
836
837 if (hasLengthAndNullBytes) curContents++;
838 if (useLengthAndNullBytes) newContents++;
839
840 if (curContents) {
841 if (oldIsUnicode == newIsUnicode) {
842 if (newContents == curContents) {
843 rearrangeBlocks(newContents, curLength, newCharSize, deleteRanges, numDeleteRanges, insertLength);
844 } else {
845 copyBlocks(curContents, newContents, curLength, oldIsUnicode, newIsUnicode, deleteRanges, numDeleteRanges, insertLength);
846 }
847 } else if (newIsUnicode) { /* this implies we have a new buffer */
848 copyBlocks(curContents, newContents, curLength, oldIsUnicode, newIsUnicode, deleteRanges, numDeleteRanges, insertLength);
849 }
850 if (hasLengthAndNullBytes) curContents--; /* Undo the damage from above */
851 if (allocNewBuffer && __CFStrFreeContentsWhenDone(str)) __CFStrDeallocateMutableContents(str, (void *)curContents);
852 }
853
854 if (!newIsUnicode) {
855 if (useLengthAndNullBytes) {
856 newContents[newLength] = 0; /* Always have null byte, if not unicode */
857 newContents--; /* Undo the damage from above */
858 newContents[0] = __CFCanUseLengthByte(newLength) ? (uint8_t)newLength : 0;
859 if (!hasLengthAndNullBytes) __CFStrSetHasLengthAndNullBytes(str);
860 } else {
861 if (hasLengthAndNullBytes) __CFStrClearHasLengthAndNullBytes(str);
862 }
863 if (oldIsUnicode) __CFStrClearUnicode(str);
864 } else { // New is unicode...
865 if (!oldIsUnicode) __CFStrSetUnicode(str);
866 if (hasLengthAndNullBytes) __CFStrClearHasLengthAndNullBytes(str);
867 }
868 __CFStrSetExplicitLength(str, newLength);
869
870 if (allocNewBuffer) {
871 __CFStrSetCapacity(str, newCapacity);
872 __CFStrClearCapacityProvidedExternally(str);
873 __CFStrSetContentPtr(str, newContents);
874 }
875 }
876 }
877
878 /* Same as above, but takes one range (very common case)
879 */
880 CF_INLINE void __CFStringChangeSize(CFMutableStringRef str, CFRange range, CFIndex insertLength, Boolean makeUnicode) {
881 __CFStringChangeSizeMultiple(str, &range, 1, insertLength, makeUnicode);
882 }
883
884
885 #if defined(DEBUG)
886 static Boolean __CFStrIsConstantString(CFStringRef str);
887 #endif
888
889 static void __CFStringDeallocate(CFTypeRef cf) {
890 CFStringRef str = (CFStringRef)cf;
891
892 // If in DEBUG mode, check to see if the string a CFSTR, and complain.
893 CFAssert1(__CFConstantStringTableBeingFreed || !__CFStrIsConstantString((CFStringRef)cf), __kCFLogAssertion, "Tried to deallocate CFSTR(\"%@\")", str);
894
895 if (!__CFStrIsInline(str)) {
896 uint8_t *contents;
897 Boolean isMutable = __CFStrIsMutable(str);
898 if (__CFStrFreeContentsWhenDone(str) && (contents = (uint8_t *)__CFStrContents(str))) {
899 if (isMutable) {
900 __CFStrDeallocateMutableContents((CFMutableStringRef)str, contents);
901 } else {
902 if (__CFStrHasContentsDeallocator(str)) {
903 CFAllocatorRef contentsDeallocator = __CFStrContentsDeallocator(str);
904 CFAllocatorDeallocate(contentsDeallocator, contents);
905 CFRelease(contentsDeallocator);
906 } else {
907 CFAllocatorRef alloc = __CFGetAllocator(str);
908 CFAllocatorDeallocate(alloc, contents);
909 }
910 }
911 }
912 if (isMutable && __CFStrHasContentsAllocator(str)) CFRelease(__CFStrContentsAllocator((CFMutableStringRef)str));
913 }
914 }
915
916 static Boolean __CFStringEqual(CFTypeRef cf1, CFTypeRef cf2) {
917 CFStringRef str1 = (CFStringRef)cf1;
918 CFStringRef str2 = (CFStringRef)cf2;
919 const uint8_t *contents1;
920 const uint8_t *contents2;
921 CFIndex len1;
922
923 /* !!! We do not need IsString assertions, as the CFBase runtime assures this */
924 /* !!! We do not need == test, as the CFBase runtime assures this */
925
926 contents1 = (uint8_t *)__CFStrContents(str1);
927 contents2 = (uint8_t *)__CFStrContents(str2);
928 len1 = __CFStrLength2(str1, contents1);
929
930 if (len1 != __CFStrLength2(str2, contents2)) return false;
931
932 contents1 += __CFStrSkipAnyLengthByte(str1);
933 contents2 += __CFStrSkipAnyLengthByte(str2);
934
935 if (__CFStrIsEightBit(str1) && __CFStrIsEightBit(str2)) {
936 return memcmp((const char *)contents1, (const char *)contents2, len1) ? false : true;
937 } else if (__CFStrIsEightBit(str1)) { /* One string has Unicode contents */
938 CFStringInlineBuffer buf;
939 CFIndex buf_idx = 0;
940
941 CFStringInitInlineBuffer(str1, &buf, CFRangeMake(0, len1));
942 for (buf_idx = 0; buf_idx < len1; buf_idx++) {
943 if (__CFStringGetCharacterFromInlineBufferQuick(&buf, buf_idx) != ((UniChar *)contents2)[buf_idx]) return false;
944 }
945 } else if (__CFStrIsEightBit(str2)) { /* One string has Unicode contents */
946 CFStringInlineBuffer buf;
947 CFIndex buf_idx = 0;
948
949 CFStringInitInlineBuffer(str2, &buf, CFRangeMake(0, len1));
950 for (buf_idx = 0; buf_idx < len1; buf_idx++) {
951 if (__CFStringGetCharacterFromInlineBufferQuick(&buf, buf_idx) != ((UniChar *)contents1)[buf_idx]) return false;
952 }
953 } else { /* Both strings have Unicode contents */
954 CFIndex idx;
955 for (idx = 0; idx < len1; idx++) {
956 if (((UniChar *)contents1)[idx] != ((UniChar *)contents2)[idx]) return false;
957 }
958 }
959 return true;
960 }
961
962
963 /* String hashing: Should give the same results whatever the encoding; so we hash UniChars.
964 If the length is less than or equal to 96, then the hash function is simply the
965 following (n is the nth UniChar character, starting from 0):
966
967 hash(-1) = length
968 hash(n) = hash(n-1) * 257 + unichar(n);
969 Hash = hash(length-1) * ((length & 31) + 1)
970
971 If the length is greater than 96, then the above algorithm applies to
972 characters 0..31, (length/2)-16..(length/2)+15, and length-32..length-1, inclusive;
973 thus the first, middle, and last 32 characters.
974
975 Note that the loops below are unrolled; and: 257^2 = 66049; 257^3 = 16974593; 257^4 = 4362470401; 67503105 is 257^4 - 256^4
976 If hashcode is changed from UInt32 to something else, this last piece needs to be readjusted.
977 !!! We haven't updated for LP64 yet
978
979 NOTE: The hash algorithm used to be duplicated in CF and Foundation; but now it should only be in the four functions below.
980
981 Hash function was changed between Panther and Tiger, and Tiger and Leopard.
982 */
983 #define HashEverythingLimit 96
984
985 #define HashNextFourUniChars(accessStart, accessEnd, pointer) \
986 {result = result * 67503105 + (accessStart 0 accessEnd) * 16974593 + (accessStart 1 accessEnd) * 66049 + (accessStart 2 accessEnd) * 257 + (accessStart 3 accessEnd); pointer += 4;}
987
988 #define HashNextUniChar(accessStart, accessEnd, pointer) \
989 {result = result * 257 + (accessStart 0 accessEnd); pointer++;}
990
991
992 /* In this function, actualLen is the length of the original string; but len is the number of characters in buffer. The buffer is expected to contain the parts of the string relevant to hashing.
993 */
994 CF_INLINE CFHashCode __CFStrHashCharacters(const UniChar *uContents, CFIndex len, CFIndex actualLen) {
995 CFHashCode result = actualLen;
996 if (len <= HashEverythingLimit) {
997 const UniChar *end4 = uContents + (len & ~3);
998 const UniChar *end = uContents + len;
999 while (uContents < end4) HashNextFourUniChars(uContents[, ], uContents); // First count in fours
1000 while (uContents < end) HashNextUniChar(uContents[, ], uContents); // Then for the last <4 chars, count in ones...
1001 } else {
1002 const UniChar *contents, *end;
1003 contents = uContents;
1004 end = contents + 32;
1005 while (contents < end) HashNextFourUniChars(contents[, ], contents);
1006 contents = uContents + (len >> 1) - 16;
1007 end = contents + 32;
1008 while (contents < end) HashNextFourUniChars(contents[, ], contents);
1009 end = uContents + len;
1010 contents = end - 32;
1011 while (contents < end) HashNextFourUniChars(contents[, ], contents);
1012 }
1013 return result + (result << (actualLen & 31));
1014 }
1015
1016 /* This hashes cString in the eight bit string encoding. It also includes the little debug-time sanity check.
1017 */
1018 CF_INLINE CFHashCode __CFStrHashEightBit(const uint8_t *cContents, CFIndex len) {
1019 #if defined(DEBUG)
1020 if (!__CFCharToUniCharFunc) { // A little sanity verification: If this is not set, trying to hash high byte chars would be a bad idea
1021 CFIndex cnt;
1022 Boolean err = false;
1023 if (len <= HashEverythingLimit) {
1024 for (cnt = 0; cnt < len; cnt++) if (cContents[cnt] >= 128) err = true;
1025 } else {
1026 for (cnt = 0; cnt < 32; cnt++) if (cContents[cnt] >= 128) err = true;
1027 for (cnt = (len >> 1) - 16; cnt < (len >> 1) + 16; cnt++) if (cContents[cnt] >= 128) err = true;
1028 for (cnt = (len - 32); cnt < len; cnt++) if (cContents[cnt] >= 128) err = true;
1029 }
1030 if (err) {
1031 // Can't do log here, as it might be too early
1032 fprintf(stderr, "Warning: CFHash() attempting to hash CFString containing high bytes before properly initialized to do so\n");
1033 }
1034 }
1035 #endif
1036 CFHashCode result = len;
1037 if (len <= HashEverythingLimit) {
1038 const uint8_t *end4 = cContents + (len & ~3);
1039 const uint8_t *end = cContents + len;
1040 while (cContents < end4) HashNextFourUniChars(__CFCharToUniCharTable[cContents[, ]], cContents); // First count in fours
1041 while (cContents < end) HashNextUniChar(__CFCharToUniCharTable[cContents[, ]], cContents); // Then for the last <4 chars, count in ones...
1042 } else {
1043 const uint8_t *contents, *end;
1044 contents = cContents;
1045 end = contents + 32;
1046 while (contents < end) HashNextFourUniChars(__CFCharToUniCharTable[contents[, ]], contents);
1047 contents = cContents + (len >> 1) - 16;
1048 end = contents + 32;
1049 while (contents < end) HashNextFourUniChars(__CFCharToUniCharTable[contents[, ]], contents);
1050 end = cContents + len;
1051 contents = end - 32;
1052 while (contents < end) HashNextFourUniChars(__CFCharToUniCharTable[contents[, ]], contents);
1053 }
1054 return result + (result << (len & 31));
1055 }
1056
1057 CFHashCode CFStringHashISOLatin1CString(const uint8_t *bytes, CFIndex len) {
1058 CFHashCode result = len;
1059 if (len <= HashEverythingLimit) {
1060 const uint8_t *end4 = bytes + (len & ~3);
1061 const uint8_t *end = bytes + len;
1062 while (bytes < end4) HashNextFourUniChars(bytes[, ], bytes); // First count in fours
1063 while (bytes < end) HashNextUniChar(bytes[, ], bytes); // Then for the last <4 chars, count in ones...
1064 } else {
1065 const uint8_t *contents, *end;
1066 contents = bytes;
1067 end = contents + 32;
1068 while (contents < end) HashNextFourUniChars(contents[, ], contents);
1069 contents = bytes + (len >> 1) - 16;
1070 end = contents + 32;
1071 while (contents < end) HashNextFourUniChars(contents[, ], contents);
1072 end = bytes + len;
1073 contents = end - 32;
1074 while (contents < end) HashNextFourUniChars(contents[, ], contents);
1075 }
1076 return result + (result << (len & 31));
1077 }
1078
1079 CFHashCode CFStringHashCString(const uint8_t *bytes, CFIndex len) {
1080 return __CFStrHashEightBit(bytes, len);
1081 }
1082
1083 CFHashCode CFStringHashCharacters(const UniChar *characters, CFIndex len) {
1084 return __CFStrHashCharacters(characters, len, len);
1085 }
1086
1087 /* This is meant to be called from NSString or subclassers only. It is an error for this to be called without the ObjC runtime or an argument which is not an NSString or subclass. It can be called with NSCFString, although that would be inefficient (causing indirection) and won't normally happen anyway, as NSCFString overrides hash.
1088 */
1089 CFHashCode CFStringHashNSString(CFStringRef str) {
1090 UniChar buffer[HashEverythingLimit];
1091 CFIndex bufLen; // Number of characters in the buffer for hashing
1092 CFIndex len = 0; // Actual length of the string
1093
1094 CF_OBJC_CALL0(CFIndex, len, str, "length");
1095 if (len <= HashEverythingLimit) {
1096 CF_OBJC_VOIDCALL2(str, "getCharacters:range:", buffer, CFRangeMake(0, len));
1097 bufLen = len;
1098 } else {
1099 CF_OBJC_VOIDCALL2(str, "getCharacters:range:", buffer, CFRangeMake(0, 32));
1100 CF_OBJC_VOIDCALL2(str, "getCharacters:range:", buffer+32, CFRangeMake((len >> 1) - 16, 32));
1101 CF_OBJC_VOIDCALL2(str, "getCharacters:range:", buffer+64, CFRangeMake(len - 32, 32));
1102 bufLen = HashEverythingLimit;
1103 }
1104 return __CFStrHashCharacters(buffer, bufLen, len);
1105 }
1106
1107 CFHashCode __CFStringHash(CFTypeRef cf) {
1108 /* !!! We do not need an IsString assertion here, as this is called by the CFBase runtime only */
1109 CFStringRef str = (CFStringRef)cf;
1110 const uint8_t *contents = (uint8_t *)__CFStrContents(str);
1111 CFIndex len = __CFStrLength2(str, contents);
1112
1113 if (__CFStrIsEightBit(str)) {
1114 contents += __CFStrSkipAnyLengthByte(str);
1115 return __CFStrHashEightBit(contents, len);
1116 } else {
1117 return __CFStrHashCharacters((const UniChar *)contents, len, len);
1118 }
1119 }
1120
1121
1122 static CFStringRef __CFStringCopyDescription(CFTypeRef cf) {
1123 return CFStringCreateWithFormat(kCFAllocatorSystemDefault, NULL, CFSTR("<CFString %p [%p]>{contents = \"%@\"}"), cf, __CFGetAllocator(cf), cf);
1124 }
1125
1126 static CFStringRef __CFStringCopyFormattingDescription(CFTypeRef cf, CFDictionaryRef formatOptions) {
1127 return (CFStringRef)CFStringCreateCopy(__CFGetAllocator(cf), (CFStringRef)cf);
1128 }
1129
1130 static CFTypeID __kCFStringTypeID = _kCFRuntimeNotATypeID;
1131
1132 typedef CFTypeRef (*CF_STRING_CREATE_COPY)(CFAllocatorRef alloc, CFTypeRef theString);
1133
1134 static const CFRuntimeClass __CFStringClass = {
1135 0,
1136 "CFString",
1137 NULL, // init
1138 (CF_STRING_CREATE_COPY)CFStringCreateCopy,
1139 __CFStringDeallocate,
1140 __CFStringEqual,
1141 __CFStringHash,
1142 __CFStringCopyFormattingDescription,
1143 __CFStringCopyDescription
1144 };
1145
1146 __private_extern__ void __CFStringInitialize(void) {
1147 __kCFStringTypeID = _CFRuntimeRegisterClass(&__CFStringClass);
1148 }
1149
1150 CFTypeID CFStringGetTypeID(void) {
1151 return __kCFStringTypeID;
1152 }
1153
1154
1155 static Boolean CFStrIsUnicode(CFStringRef str) {
1156 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, Boolean, str, "_encodingCantBeStoredInEightBitCFString");
1157 return __CFStrIsUnicode(str);
1158 }
1159
1160
1161
1162 #define ALLOCATORSFREEFUNC ((CFAllocatorRef)-1)
1163
1164 /* contentsDeallocator indicates how to free the data if it's noCopy == true:
1165 kCFAllocatorNull: don't free
1166 ALLOCATORSFREEFUNC: free with main allocator's free func (don't pass in the real func ptr here)
1167 NULL: default allocator
1168 otherwise it's the allocator that should be used (it will be explicitly stored)
1169 if noCopy == false, then freeFunc should be ALLOCATORSFREEFUNC
1170 hasLengthByte, hasNullByte: refers to bytes; used only if encoding != Unicode
1171 possiblyExternalFormat indicates that the bytes might have BOM and be swapped
1172 tryToReduceUnicode means that the Unicode should be checked to see if it contains just ASCII (and reduce it if so)
1173 numBytes contains the actual number of bytes in "bytes", including Length byte,
1174 BUT not the NULL byte at the end
1175 bytes should not contain BOM characters
1176 !!! Various flags should be combined to reduce number of arguments, if possible
1177 */
1178 __private_extern__ CFStringRef __CFStringCreateImmutableFunnel3(
1179 CFAllocatorRef alloc, const void *bytes, CFIndex numBytes, CFStringEncoding encoding,
1180 Boolean possiblyExternalFormat, Boolean tryToReduceUnicode, Boolean hasLengthByte, Boolean hasNullByte, Boolean noCopy,
1181 CFAllocatorRef contentsDeallocator, UInt32 converterFlags) {
1182
1183 CFMutableStringRef str;
1184 CFVarWidthCharBuffer vBuf;
1185 CFIndex size;
1186 Boolean useLengthByte = false;
1187 Boolean useNullByte = false;
1188 Boolean useInlineData = false;
1189
1190 #if INSTRUMENT_SHARED_STRINGS
1191 const char *recordedEncoding;
1192 char encodingBuffer[128];
1193 if (encoding == kCFStringEncodingUnicode) recordedEncoding = "Unicode";
1194 else if (encoding == kCFStringEncodingASCII) recordedEncoding = "ASCII";
1195 else if (encoding == kCFStringEncodingUTF8) recordedEncoding = "UTF8";
1196 else if (encoding == kCFStringEncodingMacRoman) recordedEncoding = "MacRoman";
1197 else {
1198 snprintf(encodingBuffer, sizeof(encodingBuffer), "0x%lX", (unsigned long)encoding);
1199 recordedEncoding = encodingBuffer;
1200 }
1201 #endif
1202
1203 if (alloc == NULL) alloc = __CFGetDefaultAllocator();
1204
1205 if (contentsDeallocator == ALLOCATORSFREEFUNC) {
1206 contentsDeallocator = alloc;
1207 } else if (contentsDeallocator == NULL) {
1208 contentsDeallocator = __CFGetDefaultAllocator();
1209 }
1210
1211 if ((NULL != kCFEmptyString) && (numBytes == 0) && (alloc == kCFAllocatorSystemDefault)) { // If we are using the system default allocator, and the string is empty, then use the empty string!
1212 if (noCopy && (contentsDeallocator != kCFAllocatorNull)) { // See 2365208... This change was done after Sonata; before we didn't free the bytes at all (leak).
1213 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes);
1214 }
1215 return (CFStringRef)CFRetain(kCFEmptyString); // Quick exit; won't catch all empty strings, but most
1216 }
1217
1218 // At this point, contentsDeallocator is either same as alloc, or kCFAllocatorNull, or something else, but not NULL
1219
1220 vBuf.shouldFreeChars = false; // We use this to remember to free the buffer possibly allocated by decode
1221
1222 // Record whether we're starting out with an ASCII-superset string, because we need to know this later for the string ROM; this may get changed later if we successfully convert down from Unicode. We only record this once because __CFCanUseEightBitCFStringForBytes() can be expensive.
1223 Boolean stringSupportsEightBitCFRepresentation = encoding != kCFStringEncodingUnicode && __CFCanUseEightBitCFStringForBytes((const uint8_t *)bytes, numBytes, encoding);
1224
1225 // We may also change noCopy within this function if we have to decode the string into an external buffer. We do not want to avoid the use of the string ROM merely because we tried to be efficient and reuse the decoded buffer for the CFString's external storage. Therefore, we use this variable to track whether we actually can ignore the noCopy flag (which may or may not be set anyways).
1226 Boolean stringROMShouldIgnoreNoCopy = false;
1227
1228 // First check to see if the data needs to be converted...
1229 // ??? We could be more efficient here and in some cases (Unicode data) eliminate a copy
1230
1231 if ((encoding == kCFStringEncodingUnicode && possiblyExternalFormat) || encoding != kCFStringEncodingUnicode && ! stringSupportsEightBitCFRepresentation) {
1232 const void *realBytes = (uint8_t *) bytes + (hasLengthByte ? 1 : 0);
1233 CFIndex realNumBytes = numBytes - (hasLengthByte ? 1 : 0);
1234 Boolean usingPassedInMemory = false;
1235
1236 vBuf.allocator = __CFGetDefaultAllocator(); // We don't want to use client's allocator for temp stuff
1237 vBuf.chars.unicode = NULL; // This will cause the decode function to allocate memory if necessary
1238
1239 if (!__CFStringDecodeByteStream3((const uint8_t *)realBytes, realNumBytes, encoding, false, &vBuf, &usingPassedInMemory, converterFlags)) {
1240 // Note that if the string can't be created, we don't free the buffer, even if there is a contents deallocator. This is on purpose.
1241 return NULL;
1242 }
1243
1244 encoding = vBuf.isASCII ? kCFStringEncodingASCII : kCFStringEncodingUnicode;
1245
1246 // Update our flag according to whether the decoded buffer is ASCII
1247 stringSupportsEightBitCFRepresentation = vBuf.isASCII;
1248
1249 if (!usingPassedInMemory) {
1250
1251 // Because __CFStringDecodeByteStream3() allocated our buffer, it's OK for us to free it if we can get the string from the ROM.
1252 stringROMShouldIgnoreNoCopy = true;
1253
1254 // Make the parameters fit the new situation
1255 numBytes = vBuf.isASCII ? vBuf.numChars : (vBuf.numChars * sizeof(UniChar));
1256 hasLengthByte = hasNullByte = false;
1257
1258 // Get rid of the original buffer if its not being used
1259 if (noCopy && (contentsDeallocator != kCFAllocatorNull)) {
1260 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes);
1261 }
1262 contentsDeallocator = alloc; // At this point we are using the string's allocator, as the original buffer is gone...
1263
1264 // See if we can reuse any storage the decode func might have allocated
1265 // We do this only for Unicode, as otherwise we would not have NULL and Length bytes
1266
1267 if (vBuf.shouldFreeChars && (alloc == vBuf.allocator) && encoding == kCFStringEncodingUnicode) {
1268 vBuf.shouldFreeChars = false; // Transferring ownership to the CFString
1269 bytes = CFAllocatorReallocate(vBuf.allocator, (void *)vBuf.chars.unicode, numBytes, 0); // Tighten up the storage
1270 noCopy = true;
1271 #if INSTRUMENT_SHARED_STRINGS
1272 if (encoding == kCFStringEncodingASCII) recordedEncoding = "ForeignASCII-NoCopy";
1273 else recordedEncoding = "ForeignUnicode-NoCopy";
1274 #endif
1275 } else {
1276 #if INSTRUMENT_SHARED_STRINGS
1277 if (encoding == kCFStringEncodingASCII) recordedEncoding = "ForeignASCII-Copy";
1278 else recordedEncoding = "ForeignUnicode-Copy";
1279 #endif
1280 bytes = vBuf.chars.unicode;
1281 noCopy = false; // Can't do noCopy anymore
1282 // If vBuf.shouldFreeChars is true, the buffer will be freed as intended near the end of this func
1283 }
1284
1285 }
1286
1287 // At this point, all necessary input arguments have been changed to reflect the new state
1288
1289 } else if (encoding == kCFStringEncodingUnicode && tryToReduceUnicode) { // Check to see if we can reduce Unicode to ASCII
1290 CFIndex cnt;
1291 CFIndex len = numBytes / sizeof(UniChar);
1292 Boolean allASCII = true;
1293
1294 for (cnt = 0; cnt < len; cnt++) if (((const UniChar *)bytes)[cnt] > 127) {
1295 allASCII = false;
1296 break;
1297 }
1298
1299 if (allASCII) { // Yes we can!
1300 uint8_t *ptr, *mem;
1301 Boolean newHasLengthByte = __CFCanUseLengthByte(len);
1302 numBytes = (len + 1 + (newHasLengthByte ? 1 : 0)) * sizeof(uint8_t); // NULL and possible length byte
1303 // See if we can use that temporary local buffer in vBuf...
1304 if (numBytes >= __kCFVarWidthLocalBufferSize) {
1305 mem = ptr = (uint8_t *)CFAllocatorAllocate(alloc, numBytes, 0);
1306 if (__CFOASafe) __CFSetLastAllocationEventName(mem, "CFString (store)");
1307 } else {
1308 mem = ptr = (uint8_t *)(vBuf.localBuffer);
1309 }
1310 if (mem) { // If we can't allocate memory for some reason, use what we had (that is, as if we didn't have all ASCII)
1311 // Copy the Unicode bytes into the new ASCII buffer
1312 hasLengthByte = newHasLengthByte;
1313 hasNullByte = true;
1314 if (hasLengthByte) *ptr++ = (uint8_t)len;
1315 for (cnt = 0; cnt < len; cnt++) ptr[cnt] = (uint8_t)(((const UniChar *)bytes)[cnt]);
1316 ptr[len] = 0;
1317 if (noCopy && (contentsDeallocator != kCFAllocatorNull)) {
1318 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes);
1319 }
1320 // Now make everything look like we had an ASCII buffer to start with
1321 bytes = mem;
1322 encoding = kCFStringEncodingASCII;
1323 contentsDeallocator = alloc; // At this point we are using the string's allocator, as the original buffer is gone...
1324 noCopy = (numBytes >= __kCFVarWidthLocalBufferSize); // If we had to allocate it, make sure it's kept around
1325 numBytes--; // Should not contain the NULL byte at end...
1326 stringSupportsEightBitCFRepresentation = true; // We're ASCII now!
1327 stringROMShouldIgnoreNoCopy = true; // We allocated this buffer, so we should feel free to get rid of it if we can use the string ROM
1328 #if INSTRUMENT_SHARED_STRINGS
1329 recordedEncoding = "U->A";
1330 #endif
1331 }
1332 }
1333
1334 // At this point, all necessary input arguments have been changed to reflect the new state
1335 }
1336
1337 // Now determine the necessary size
1338 #if INSTRUMENT_SHARED_STRINGS || USE_STRING_ROM
1339 Boolean stringSupportsROM = stringSupportsEightBitCFRepresentation;
1340 #endif
1341
1342 #if INSTRUMENT_SHARED_STRINGS
1343 if (stringSupportsROM) {
1344 const void *realBytes = (uint8_t *) bytes + (hasLengthByte ? 1 : 0);
1345 CFIndex realNumBytes = numBytes - !! hasLengthByte;
1346 __CFRecordStringAllocationEvent(recordedEncoding, realBytes, realNumBytes);
1347 }
1348 #endif
1349
1350 CFStringRef romResult = NULL;
1351
1352 #if USE_STRING_ROM
1353
1354 if (stringSupportsROM) {
1355 // Disable the string ROM if necessary
1356 static char sDisableStringROM = -1;
1357 if (sDisableStringROM == -1) sDisableStringROM = !! __CFgetenv("CFStringDisableROM");
1358
1359 if (sDisableStringROM == 0) romResult = _CFSearchStringROM(bytes + !! hasLengthByte, numBytes - !! hasLengthByte);
1360 }
1361 /* if we get a result from our ROM, and noCopy is set, then deallocate the buffer immediately */
1362 if (romResult) {
1363 if (noCopy && (contentsDeallocator != kCFAllocatorNull)) {
1364 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes);
1365 }
1366
1367 /* these don't get used again, but clear them for consistency */
1368 noCopy = false;
1369 bytes = NULL;
1370
1371 /* set our result to the ROM result which is not really mutable, of course, but that's OK because we don't try to modify it. */
1372 str = (CFMutableStringRef)romResult;
1373 }
1374 #endif
1375
1376 if (! romResult) {
1377 // Now determine the necessary size
1378
1379 if (noCopy) {
1380
1381 size = sizeof(void *); // Pointer to the buffer
1382 if (contentsDeallocator != alloc && contentsDeallocator != kCFAllocatorNull) {
1383 size += sizeof(void *); // The contentsDeallocator
1384 }
1385 if (!hasLengthByte) size += sizeof(CFIndex); // Explicit length
1386 useLengthByte = hasLengthByte;
1387 useNullByte = hasNullByte;
1388
1389 } else { // Inline data; reserve space for it
1390
1391 useInlineData = true;
1392 size = numBytes;
1393
1394 if (hasLengthByte || (encoding != kCFStringEncodingUnicode && __CFCanUseLengthByte(numBytes))) {
1395 useLengthByte = true;
1396 if (!hasLengthByte) size += 1;
1397 } else {
1398 size += sizeof(CFIndex); // Explicit length
1399 }
1400 if (hasNullByte || encoding != kCFStringEncodingUnicode) {
1401 useNullByte = true;
1402 size += 1;
1403 }
1404 }
1405
1406 #ifdef STRING_SIZE_STATS
1407 // Dump alloced CFString size info every so often
1408 static int cnt = 0;
1409 static unsigned sizes[256] = {0};
1410 int allocedSize = size + sizeof(CFRuntimeBase);
1411 if (allocedSize < 255) sizes[allocedSize]++; else sizes[255]++;
1412 if ((++cnt % 1000) == 0) {
1413 printf ("\nTotal: %d\n", cnt);
1414 int i; for (i = 0; i < 256; i++) printf("%03d: %5d%s", i, sizes[i], ((i % 8) == 7) ? "\n" : " ");
1415 }
1416 #endif
1417
1418 // Finally, allocate!
1419
1420 str = (CFMutableStringRef)_CFRuntimeCreateInstance(alloc, __kCFStringTypeID, size, NULL);
1421 if (str) {
1422 if (__CFOASafe) __CFSetLastAllocationEventName(str, "CFString (immutable)");
1423
1424 __CFStrSetInfoBits(str,
1425 (useInlineData ? __kCFHasInlineContents : (contentsDeallocator == alloc ? __kCFNotInlineContentsDefaultFree : (contentsDeallocator == kCFAllocatorNull ? __kCFNotInlineContentsNoFree : __kCFNotInlineContentsCustomFree))) |
1426 ((encoding == kCFStringEncodingUnicode) ? __kCFIsUnicode : 0) |
1427 (useNullByte ? __kCFHasNullByte : 0) |
1428 (useLengthByte ? __kCFHasLengthByte : 0));
1429
1430 if (!useLengthByte) {
1431 CFIndex length = numBytes - (hasLengthByte ? 1 : 0);
1432 if (encoding == kCFStringEncodingUnicode) length /= sizeof(UniChar);
1433 __CFStrSetExplicitLength(str, length);
1434 }
1435
1436 if (useInlineData) {
1437 uint8_t *contents = (uint8_t *)__CFStrContents(str);
1438 if (useLengthByte && !hasLengthByte) *contents++ = (uint8_t)numBytes;
1439 memmove(contents, bytes, numBytes);
1440 if (useNullByte) contents[numBytes] = 0;
1441 } else {
1442 __CFStrSetContentPtr(str, bytes);
1443 if (contentsDeallocator != alloc && contentsDeallocator != kCFAllocatorNull) __CFStrSetContentsDeallocator(str, (CFAllocatorRef)CFRetain(contentsDeallocator));
1444 }
1445 } else {
1446 if (noCopy && (contentsDeallocator != kCFAllocatorNull)) {
1447 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes);
1448 }
1449 }
1450 }
1451 if (vBuf.shouldFreeChars) CFAllocatorDeallocate(vBuf.allocator, (void *)bytes);
1452
1453 return str;
1454 }
1455
1456 /* !!! __CFStringCreateImmutableFunnel2() is kept around for compatibility; it should be deprecated
1457 */
1458 CFStringRef __CFStringCreateImmutableFunnel2(
1459 CFAllocatorRef alloc, const void *bytes, CFIndex numBytes, CFStringEncoding encoding,
1460 Boolean possiblyExternalFormat, Boolean tryToReduceUnicode, Boolean hasLengthByte, Boolean hasNullByte, Boolean noCopy,
1461 CFAllocatorRef contentsDeallocator) {
1462 return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, possiblyExternalFormat, tryToReduceUnicode, hasLengthByte, hasNullByte, noCopy, contentsDeallocator, 0);
1463 }
1464
1465
1466
1467 CFStringRef CFStringCreateWithPascalString(CFAllocatorRef alloc, ConstStringPtr pStr, CFStringEncoding encoding) {
1468 CFIndex len = (CFIndex)(*(uint8_t *)pStr);
1469 return __CFStringCreateImmutableFunnel3(alloc, pStr, len+1, encoding, false, false, true, false, false, ALLOCATORSFREEFUNC, 0);
1470 }
1471
1472
1473 CFStringRef CFStringCreateWithCString(CFAllocatorRef alloc, const char *cStr, CFStringEncoding encoding) {
1474 CFIndex len = strlen(cStr);
1475 return __CFStringCreateImmutableFunnel3(alloc, cStr, len, encoding, false, false, false, true, false, ALLOCATORSFREEFUNC, 0);
1476 }
1477
1478 CFStringRef CFStringCreateWithPascalStringNoCopy(CFAllocatorRef alloc, ConstStringPtr pStr, CFStringEncoding encoding, CFAllocatorRef contentsDeallocator) {
1479 CFIndex len = (CFIndex)(*(uint8_t *)pStr);
1480 return __CFStringCreateImmutableFunnel3(alloc, pStr, len+1, encoding, false, false, true, false, true, contentsDeallocator, 0);
1481 }
1482
1483
1484 CFStringRef CFStringCreateWithCStringNoCopy(CFAllocatorRef alloc, const char *cStr, CFStringEncoding encoding, CFAllocatorRef contentsDeallocator) {
1485 CFIndex len = strlen(cStr);
1486 return __CFStringCreateImmutableFunnel3(alloc, cStr, len, encoding, false, false, false, true, true, contentsDeallocator, 0);
1487 }
1488
1489
1490 CFStringRef CFStringCreateWithCharacters(CFAllocatorRef alloc, const UniChar *chars, CFIndex numChars) {
1491 return __CFStringCreateImmutableFunnel3(alloc, chars, numChars * sizeof(UniChar), kCFStringEncodingUnicode, false, true, false, false, false, ALLOCATORSFREEFUNC, 0);
1492 }
1493
1494
1495 CFStringRef CFStringCreateWithCharactersNoCopy(CFAllocatorRef alloc, const UniChar *chars, CFIndex numChars, CFAllocatorRef contentsDeallocator) {
1496 return __CFStringCreateImmutableFunnel3(alloc, chars, numChars * sizeof(UniChar), kCFStringEncodingUnicode, false, false, false, false, true, contentsDeallocator, 0);
1497 }
1498
1499
1500 CFStringRef CFStringCreateWithBytes(CFAllocatorRef alloc, const uint8_t *bytes, CFIndex numBytes, CFStringEncoding encoding, Boolean externalFormat) {
1501 return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, externalFormat, true, false, false, false, ALLOCATORSFREEFUNC, 0);
1502 }
1503
1504 CFStringRef _CFStringCreateWithBytesNoCopy(CFAllocatorRef alloc, const uint8_t *bytes, CFIndex numBytes, CFStringEncoding encoding, Boolean externalFormat, CFAllocatorRef contentsDeallocator) {
1505 return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, externalFormat, true, false, false, true, contentsDeallocator, 0);
1506 }
1507
1508 CFStringRef CFStringCreateWithBytesNoCopy(CFAllocatorRef alloc, const uint8_t *bytes, CFIndex numBytes, CFStringEncoding encoding, Boolean externalFormat, CFAllocatorRef contentsDeallocator) {
1509 return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, externalFormat, true, false, false, true, contentsDeallocator, 0);
1510 }
1511
1512 CFStringRef CFStringCreateWithFormatAndArguments(CFAllocatorRef alloc, CFDictionaryRef formatOptions, CFStringRef format, va_list arguments) {
1513 return _CFStringCreateWithFormatAndArgumentsAux(alloc, NULL, formatOptions, format, arguments);
1514 }
1515
1516 CFStringRef _CFStringCreateWithFormatAndArgumentsAux(CFAllocatorRef alloc, CFStringRef (*copyDescFunc)(void *, const void *), CFDictionaryRef formatOptions, CFStringRef format, va_list arguments) {
1517 CFStringRef str;
1518 CFMutableStringRef outputString = CFStringCreateMutable(__CFGetDefaultAllocator(), 0); //should use alloc if no copy/release
1519 __CFStrSetDesiredCapacity(outputString, 120); // Given this will be tightened later, choosing a larger working string is fine
1520 _CFStringAppendFormatAndArgumentsAux(outputString, copyDescFunc, formatOptions, format, arguments);
1521 // ??? copy/release should not be necessary here -- just make immutable, compress if possible
1522 // (However, this does make the string inline, and cause the supplied allocator to be used...)
1523 str = (CFStringRef)CFStringCreateCopy(alloc, outputString);
1524 CFRelease(outputString);
1525 return str;
1526 }
1527
1528 CFStringRef CFStringCreateWithFormat(CFAllocatorRef alloc, CFDictionaryRef formatOptions, CFStringRef format, ...) {
1529 CFStringRef result;
1530 va_list argList;
1531
1532 va_start(argList, format);
1533 result = CFStringCreateWithFormatAndArguments(alloc, formatOptions, format, argList);
1534 va_end(argList);
1535
1536 return result;
1537 }
1538
1539 CFStringRef CFStringCreateWithSubstring(CFAllocatorRef alloc, CFStringRef str, CFRange range) {
1540 // CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, CFStringRef , str, "_createSubstringWithRange:", CFRangeMake(range.location, range.length));
1541
1542 __CFAssertIsString(str);
1543 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
1544
1545 if ((range.location == 0) && (range.length == __CFStrLength(str))) { /* The substring is the whole string... */
1546 return (CFStringRef)CFStringCreateCopy(alloc, str);
1547 } else if (__CFStrIsEightBit(str)) {
1548 const uint8_t *contents = (const uint8_t *)__CFStrContents(str);
1549 return __CFStringCreateImmutableFunnel3(alloc, contents + range.location + __CFStrSkipAnyLengthByte(str), range.length, __CFStringGetEightBitStringEncoding(), false, false, false, false, false, ALLOCATORSFREEFUNC, 0);
1550 } else {
1551 const UniChar *contents = (UniChar *)__CFStrContents(str);
1552 return __CFStringCreateImmutableFunnel3(alloc, contents + range.location, range.length * sizeof(UniChar), kCFStringEncodingUnicode, false, true, false, false, false, ALLOCATORSFREEFUNC, 0);
1553 }
1554 }
1555
1556 CFStringRef CFStringCreateCopy(CFAllocatorRef alloc, CFStringRef str) {
1557 // CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, CFStringRef, str, "copy");
1558
1559 __CFAssertIsString(str);
1560 if (!__CFStrIsMutable((CFStringRef)str) && // If the string is not mutable
1561 ((alloc ? alloc : __CFGetDefaultAllocator()) == __CFGetAllocator(str)) && // and it has the same allocator as the one we're using
1562 (__CFStrIsInline((CFStringRef)str) || __CFStrFreeContentsWhenDone((CFStringRef)str) || __CFStrIsConstant((CFStringRef)str))) { // and the characters are inline, or are owned by the string, or the string is constant
1563 CFRetain(str); // Then just retain instead of making a true copy
1564 return str;
1565 }
1566 if (__CFStrIsEightBit((CFStringRef)str)) {
1567 const uint8_t *contents = (const uint8_t *)__CFStrContents((CFStringRef)str);
1568 return __CFStringCreateImmutableFunnel3(alloc, contents + __CFStrSkipAnyLengthByte((CFStringRef)str), __CFStrLength2((CFStringRef)str, contents), __CFStringGetEightBitStringEncoding(), false, false, false, false, false, ALLOCATORSFREEFUNC, 0);
1569 } else {
1570 const UniChar *contents = (const UniChar *)__CFStrContents((CFStringRef)str);
1571 return __CFStringCreateImmutableFunnel3(alloc, contents, __CFStrLength2((CFStringRef)str, contents) * sizeof(UniChar), kCFStringEncodingUnicode, false, true, false, false, false, ALLOCATORSFREEFUNC, 0);
1572 }
1573 }
1574
1575
1576
1577 /*** Constant string stuff... ***/
1578
1579 /* Table which holds constant strings created with CFSTR, when -fconstant-cfstrings option is not used. These dynamically created constant strings are stored in constantStringTable. The keys are the 8-bit constant C-strings from the compiler; the values are the CFStrings created for them. _CFSTRLock protects this table.
1580 */
1581 static CFMutableDictionaryRef constantStringTable = NULL;
1582 static CFSpinLock_t _CFSTRLock = CFSpinLockInit;
1583
1584 static CFStringRef __cStrCopyDescription(const void *ptr) {
1585 return CFStringCreateWithCStringNoCopy(kCFAllocatorSystemDefault, (const char *)ptr, __CFStringGetEightBitStringEncoding(), kCFAllocatorNull);
1586 }
1587
1588 static Boolean __cStrEqual(const void *ptr1, const void *ptr2) {
1589 return (strcmp((const char *)ptr1, (const char *)ptr2) == 0);
1590 }
1591
1592 static CFHashCode __cStrHash(const void *ptr) {
1593 // It doesn't quite matter if we convert to Unicode correctly, as long as we do it consistently
1594 const char *cStr = (const char *)ptr;
1595 CFIndex len = strlen(cStr);
1596 CFHashCode result = 0;
1597 if (len <= 4) { // All chars
1598 unsigned cnt = len;
1599 while (cnt--) result += (result << 8) + *cStr++;
1600 } else { // First and last 2 chars
1601 result += (result << 8) + cStr[0];
1602 result += (result << 8) + cStr[1];
1603 result += (result << 8) + cStr[len-2];
1604 result += (result << 8) + cStr[len-1];
1605 }
1606 result += (result << (len & 31));
1607 return result;
1608 }
1609
1610
1611 CFStringRef __CFStringMakeConstantString(const char *cStr) {
1612 CFStringRef result;
1613 #if defined(DEBUG)
1614 // StringTest checks that we share kCFEmptyString, which is defeated by constantStringAllocatorForDebugging
1615 if ('\0' == *cStr) return kCFEmptyString;
1616 #endif
1617 if (constantStringTable == NULL) {
1618 CFDictionaryKeyCallBacks constantStringCallBacks = {0, NULL, NULL, __cStrCopyDescription, __cStrEqual, __cStrHash};
1619 CFDictionaryValueCallBacks constantStringValueCallBacks = kCFTypeDictionaryValueCallBacks;
1620 constantStringValueCallBacks.equal = NULL; // So that we only find strings that are ==
1621 CFMutableDictionaryRef table = CFDictionaryCreateMutable(kCFAllocatorSystemDefault, 0, &constantStringCallBacks, &constantStringValueCallBacks);
1622 _CFDictionarySetCapacity(table, 2500); // avoid lots of rehashing
1623 __CFSpinLock(&_CFSTRLock);
1624 if (constantStringTable == NULL) constantStringTable = table;
1625 __CFSpinUnlock(&_CFSTRLock);
1626 if (constantStringTable != table) CFRelease(table);
1627 }
1628
1629 __CFSpinLock(&_CFSTRLock);
1630 if ((result = (CFStringRef)CFDictionaryGetValue(constantStringTable, cStr))) {
1631 __CFSpinUnlock(&_CFSTRLock);
1632 } else {
1633 __CFSpinUnlock(&_CFSTRLock);
1634
1635 {
1636 char *key;
1637 Boolean isASCII = true;
1638 // Given this code path is rarer these days, OK to do this extra work to verify the strings
1639 const char *tmp = cStr;
1640 while (*tmp) {
1641 if (*(tmp++) & 0x80) {
1642 isASCII = false;
1643 break;
1644 }
1645 }
1646 if (!isASCII) {
1647 CFMutableStringRef ms = CFStringCreateMutable(kCFAllocatorSystemDefault, 0);
1648 tmp = cStr;
1649 while (*tmp) {
1650 CFStringAppendFormat(ms, NULL, (*tmp & 0x80) ? CFSTR("\\%3o") : CFSTR("%1c"), *tmp);
1651 tmp++;
1652 }
1653 CFLog(kCFLogLevelWarning, CFSTR("WARNING: CFSTR(\"%@\") has non-7 bit chars, interpreting using MacOS Roman encoding for now, but this will change. Please eliminate usages of non-7 bit chars (including escaped characters above \\177 octal) in CFSTR()."), ms);
1654 CFRelease(ms);
1655 }
1656 // Treat non-7 bit chars in CFSTR() as MacOSRoman, for compatibility
1657 result = CFStringCreateWithCString(kCFAllocatorSystemDefault, cStr, kCFStringEncodingMacRoman);
1658 if (result == NULL) {
1659 CFLog(__kCFLogAssertion, CFSTR("Can't interpret CFSTR() as MacOS Roman, crashing"));
1660 HALT;
1661 }
1662 if (__CFOASafe) __CFSetLastAllocationEventName((void *)result, "CFString (CFSTR)");
1663 if (__CFStrIsEightBit(result)) {
1664 key = (char *)__CFStrContents(result) + __CFStrSkipAnyLengthByte(result);
1665 } else { // For some reason the string is not 8-bit!
1666 key = (char *)CFAllocatorAllocate(kCFAllocatorSystemDefault, strlen(cStr) + 1, 0);
1667 if (__CFOASafe) __CFSetLastAllocationEventName((void *)key, "CFString (CFSTR key)");
1668 strlcpy(key, cStr, strlen(cStr) + 1); // !!! We will leak this, if the string is removed from the table (or table is freed)
1669 }
1670
1671 {
1672 CFStringRef resultToBeReleased = result;
1673 CFIndex count;
1674 __CFSpinLock(&_CFSTRLock);
1675 count = CFDictionaryGetCount(constantStringTable);
1676 CFDictionaryAddValue(constantStringTable, key, result);
1677 if (CFDictionaryGetCount(constantStringTable) == count) { // add did nothing, someone already put it there
1678 result = (CFStringRef)CFDictionaryGetValue(constantStringTable, key);
1679 } else {
1680 #if __LP64__
1681 ((struct __CFString *)result)->base._rc = 0;
1682 #else
1683 ((struct __CFString *)result)->base._cfinfo[CF_RC_BITS] = 0;
1684 #endif
1685 }
1686 __CFSpinUnlock(&_CFSTRLock);
1687 // This either eliminates the extra retain on the freshly created string, or frees it, if it was actually not inserted into the table
1688 CFRelease(resultToBeReleased);
1689 }
1690 }
1691 }
1692 return result;
1693 }
1694
1695 #if defined(DEBUG)
1696 static Boolean __CFStrIsConstantString(CFStringRef str) {
1697 Boolean found = false;
1698 if (constantStringTable) {
1699 __CFSpinLock(&_CFSTRLock);
1700 found = CFDictionaryContainsValue(constantStringTable, str);
1701 __CFSpinUnlock(&_CFSTRLock);
1702 }
1703 return found;
1704 }
1705 #endif
1706
1707
1708 #if DEPLOYMENT_TARGET_WINDOWS
1709 void __CFStringCleanup (void) {
1710 /* in case library is unloaded, release store for the constant string table */
1711 if (constantStringTable != NULL) {
1712 #if defined(DEBUG)
1713 __CFConstantStringTableBeingFreed = true;
1714 CFRelease(constantStringTable);
1715 __CFConstantStringTableBeingFreed = false;
1716 #else
1717 CFRelease(constantStringTable);
1718 #endif
1719 constantStringTable = NULL;
1720 }
1721 }
1722 #endif
1723
1724
1725 // Can pass in NSString as replacement string
1726 // Call with numRanges > 0, and incrementing ranges
1727
1728 static void __CFStringReplaceMultiple(CFMutableStringRef str, CFRange *ranges, CFIndex numRanges, CFStringRef replacement) {
1729 int cnt;
1730 CFStringRef copy = NULL;
1731 if (replacement == str) copy = replacement = CFStringCreateCopy(kCFAllocatorSystemDefault, replacement); // Very special and hopefully rare case
1732 CFIndex replacementLength = CFStringGetLength(replacement);
1733
1734 __CFStringChangeSizeMultiple(str, ranges, numRanges, replacementLength, (replacementLength > 0) && CFStrIsUnicode(replacement));
1735
1736 if (__CFStrIsUnicode(str)) {
1737 UniChar *contents = (UniChar *)__CFStrContents(str);
1738 UniChar *firstReplacement = contents + ranges[0].location;
1739 // Extract the replacementString into the first location, then copy from there
1740 CFStringGetCharacters(replacement, CFRangeMake(0, replacementLength), firstReplacement);
1741 for (cnt = 1; cnt < numRanges; cnt++) {
1742 // The ranges are in terms of the original string; so offset by the change in length due to insertion
1743 contents += replacementLength - ranges[cnt - 1].length;
1744 memmove(contents + ranges[cnt].location, firstReplacement, replacementLength * sizeof(UniChar));
1745 }
1746 } else {
1747 uint8_t *contents = (uint8_t *)__CFStrContents(str);
1748 uint8_t *firstReplacement = contents + ranges[0].location + __CFStrSkipAnyLengthByte(str);
1749 // Extract the replacementString into the first location, then copy from there
1750 CFStringGetBytes(replacement, CFRangeMake(0, replacementLength), __CFStringGetEightBitStringEncoding(), 0, false, firstReplacement, replacementLength, NULL);
1751 contents += __CFStrSkipAnyLengthByte(str); // Now contents will simply track the location to insert next string into
1752 for (cnt = 1; cnt < numRanges; cnt++) {
1753 // The ranges are in terms of the original string; so offset by the change in length due to insertion
1754 contents += replacementLength - ranges[cnt - 1].length;
1755 memmove(contents + ranges[cnt].location, firstReplacement, replacementLength);
1756 }
1757 }
1758 if (copy) CFRelease(copy);
1759 }
1760
1761 // Can pass in NSString as replacement string
1762
1763 CF_INLINE void __CFStringReplace(CFMutableStringRef str, CFRange range, CFStringRef replacement) {
1764 CFStringRef copy = NULL;
1765 if (replacement == str) copy = replacement = (CFStringRef)CFStringCreateCopy(kCFAllocatorSystemDefault, replacement); // Very special and hopefully rare case
1766 CFIndex replacementLength = CFStringGetLength(replacement);
1767
1768 __CFStringChangeSize(str, range, replacementLength, (replacementLength > 0) && CFStrIsUnicode(replacement));
1769
1770 if (__CFStrIsUnicode(str)) {
1771 UniChar *contents = (UniChar *)__CFStrContents(str);
1772 CFStringGetCharacters(replacement, CFRangeMake(0, replacementLength), contents + range.location);
1773 } else {
1774 uint8_t *contents = (uint8_t *)__CFStrContents(str);
1775 CFStringGetBytes(replacement, CFRangeMake(0, replacementLength), __CFStringGetEightBitStringEncoding(), 0, false, contents + range.location + __CFStrSkipAnyLengthByte(str), replacementLength, NULL);
1776 }
1777
1778 if (copy) CFRelease(copy);
1779 }
1780
1781 /* If client does not provide a minimum capacity
1782 */
1783 #define DEFAULTMINCAPACITY 32
1784
1785 CF_INLINE CFMutableStringRef __CFStringCreateMutableFunnel(CFAllocatorRef alloc, CFIndex maxLength, UInt32 additionalInfoBits) {
1786 CFMutableStringRef str;
1787 Boolean hasExternalContentsAllocator = (additionalInfoBits & __kCFHasContentsAllocator) ? true : false;
1788
1789 if (alloc == NULL) alloc = __CFGetDefaultAllocator();
1790
1791 // Note that if there is an externalContentsAllocator, then we also have the storage for the string allocator...
1792 str = (CFMutableStringRef)_CFRuntimeCreateInstance(alloc, __kCFStringTypeID, sizeof(struct __notInlineMutable) - (hasExternalContentsAllocator ? 0 : sizeof(CFAllocatorRef)), NULL);
1793 if (str) {
1794 if (__CFOASafe) __CFSetLastAllocationEventName(str, "CFString (mutable)");
1795
1796 __CFStrSetInfoBits(str, __kCFIsMutable | additionalInfoBits);
1797 str->variants.notInlineMutable.buffer = NULL;
1798 __CFStrSetExplicitLength(str, 0);
1799 str->variants.notInlineMutable.hasGap = str->variants.notInlineMutable.isFixedCapacity = str->variants.notInlineMutable.isExternalMutable = str->variants.notInlineMutable.capacityProvidedExternally = 0;
1800 if (maxLength != 0) __CFStrSetIsFixed(str);
1801 __CFStrSetDesiredCapacity(str, (maxLength == 0) ? DEFAULTMINCAPACITY : maxLength);
1802 __CFStrSetCapacity(str, 0);
1803 }
1804 return str;
1805 }
1806
1807 CFMutableStringRef CFStringCreateMutableWithExternalCharactersNoCopy(CFAllocatorRef alloc, UniChar *chars, CFIndex numChars, CFIndex capacity, CFAllocatorRef externalCharactersAllocator) {
1808 CFOptionFlags contentsAllocationBits = externalCharactersAllocator ? ((externalCharactersAllocator == kCFAllocatorNull) ? __kCFNotInlineContentsNoFree : __kCFHasContentsAllocator) : __kCFNotInlineContentsDefaultFree;
1809 CFMutableStringRef string = __CFStringCreateMutableFunnel(alloc, 0, contentsAllocationBits | __kCFIsUnicode);
1810 if (string) {
1811 __CFStrSetIsExternalMutable(string);
1812 if (contentsAllocationBits == __kCFHasContentsAllocator) __CFStrSetContentsAllocator(string, (CFAllocatorRef)CFRetain(externalCharactersAllocator));
1813 CFStringSetExternalCharactersNoCopy(string, chars, numChars, capacity);
1814 }
1815 return string;
1816 }
1817
1818 CFMutableStringRef CFStringCreateMutable(CFAllocatorRef alloc, CFIndex maxLength) {
1819 return __CFStringCreateMutableFunnel(alloc, maxLength, __kCFNotInlineContentsDefaultFree);
1820 }
1821
1822 CFMutableStringRef CFStringCreateMutableCopy(CFAllocatorRef alloc, CFIndex maxLength, CFStringRef string) {
1823 CFMutableStringRef newString;
1824
1825 // CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, CFMutableStringRef, string, "mutableCopy");
1826
1827 __CFAssertIsString(string);
1828
1829 newString = CFStringCreateMutable(alloc, maxLength);
1830 __CFStringReplace(newString, CFRangeMake(0, 0), string);
1831
1832 return newString;
1833 }
1834
1835
1836 __private_extern__ void _CFStrSetDesiredCapacity(CFMutableStringRef str, CFIndex len) {
1837 __CFAssertIsStringAndMutable(str);
1838 __CFStrSetDesiredCapacity(str, len);
1839 }
1840
1841
1842 /* This one is for CF
1843 */
1844 CFIndex CFStringGetLength(CFStringRef str) {
1845 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, CFIndex, str, "length");
1846
1847 __CFAssertIsString(str);
1848 return __CFStrLength(str);
1849 }
1850
1851 /* This one is for NSCFString; it does not ObjC dispatch or assertion check
1852 */
1853 CFIndex _CFStringGetLength2(CFStringRef str) {
1854 return __CFStrLength(str);
1855 }
1856
1857
1858 /* Guts of CFStringGetCharacterAtIndex(); called from the two functions below. Don't call it from elsewhere.
1859 */
1860 CF_INLINE UniChar __CFStringGetCharacterAtIndexGuts(CFStringRef str, CFIndex idx, const uint8_t *contents) {
1861 if (__CFStrIsEightBit(str)) {
1862 contents += __CFStrSkipAnyLengthByte(str);
1863 #if defined(DEBUG)
1864 if (!__CFCharToUniCharFunc && (contents[idx] >= 128)) {
1865 // Can't do log here, as it might be too early
1866 fprintf(stderr, "Warning: CFStringGetCharacterAtIndex() attempted on CFString containing high bytes before properly initialized to do so\n");
1867 }
1868 #endif
1869 return __CFCharToUniCharTable[contents[idx]];
1870 }
1871
1872 return ((UniChar *)contents)[idx];
1873 }
1874
1875 /* This one is for the CF API
1876 */
1877 UniChar CFStringGetCharacterAtIndex(CFStringRef str, CFIndex idx) {
1878 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, UniChar, str, "characterAtIndex:", idx);
1879
1880 __CFAssertIsString(str);
1881 __CFAssertIndexIsInStringBounds(str, idx);
1882 return __CFStringGetCharacterAtIndexGuts(str, idx, (const uint8_t *)__CFStrContents(str));
1883 }
1884
1885 /* This one is for NSCFString usage; it doesn't do ObjC dispatch; but it does do range check
1886 */
1887 int _CFStringCheckAndGetCharacterAtIndex(CFStringRef str, CFIndex idx, UniChar *ch) {
1888 const uint8_t *contents = (const uint8_t *)__CFStrContents(str);
1889 if (idx >= __CFStrLength2(str, contents) && __CFStringNoteErrors()) return _CFStringErrBounds;
1890 *ch = __CFStringGetCharacterAtIndexGuts(str, idx, contents);
1891 return _CFStringErrNone;
1892 }
1893
1894
1895 /* Guts of CFStringGetCharacters(); called from the two functions below. Don't call it from elsewhere.
1896 */
1897 CF_INLINE void __CFStringGetCharactersGuts(CFStringRef str, CFRange range, UniChar *buffer, const uint8_t *contents) {
1898 if (__CFStrIsEightBit(str)) {
1899 __CFStrConvertBytesToUnicode(((uint8_t *)contents) + (range.location + __CFStrSkipAnyLengthByte(str)), buffer, range.length);
1900 } else {
1901 const UniChar *uContents = ((UniChar *)contents) + range.location;
1902 memmove(buffer, uContents, range.length * sizeof(UniChar));
1903 }
1904 }
1905
1906 /* This one is for the CF API
1907 */
1908 void CFStringGetCharacters(CFStringRef str, CFRange range, UniChar *buffer) {
1909 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID, void, str, "getCharacters:range:", buffer, CFRangeMake(range.location, range.length));
1910
1911 __CFAssertIsString(str);
1912 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
1913 __CFStringGetCharactersGuts(str, range, buffer, (const uint8_t *)__CFStrContents(str));
1914 }
1915
1916 /* This one is for NSCFString usage; it doesn't do ObjC dispatch; but it does do range check
1917 */
1918 int _CFStringCheckAndGetCharacters(CFStringRef str, CFRange range, UniChar *buffer) {
1919 const uint8_t *contents = (const uint8_t *)__CFStrContents(str);
1920 if (range.location + range.length > __CFStrLength2(str, contents) && __CFStringNoteErrors()) return _CFStringErrBounds;
1921 __CFStringGetCharactersGuts(str, range, buffer, contents);
1922 return _CFStringErrNone;
1923 }
1924
1925
1926 CFIndex CFStringGetBytes(CFStringRef str, CFRange range, CFStringEncoding encoding, uint8_t lossByte, Boolean isExternalRepresentation, uint8_t *buffer, CFIndex maxBufLen, CFIndex *usedBufLen) {
1927
1928 /* No objc dispatch needed here since __CFStringEncodeByteStream works with both CFString and NSString */
1929 __CFAssertIsNotNegative(maxBufLen);
1930
1931 if (!CF_IS_OBJC(__kCFStringTypeID, str)) { // If we can grope the ivars, let's do it...
1932 __CFAssertIsString(str);
1933 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
1934
1935 if (__CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string
1936 const unsigned char *contents = (const unsigned char *)__CFStrContents(str);
1937 CFIndex cLength = range.length;
1938
1939 if (buffer) {
1940 if (cLength > maxBufLen) cLength = maxBufLen;
1941 memmove(buffer, contents + __CFStrSkipAnyLengthByte(str) + range.location, cLength);
1942 }
1943 if (usedBufLen) *usedBufLen = cLength;
1944
1945 return cLength;
1946 }
1947 }
1948
1949 return __CFStringEncodeByteStream(str, range.location, range.length, isExternalRepresentation, encoding, lossByte, buffer, maxBufLen, usedBufLen);
1950 }
1951
1952
1953 ConstStringPtr CFStringGetPascalStringPtr (CFStringRef str, CFStringEncoding encoding) {
1954
1955 if (!CF_IS_OBJC(__kCFStringTypeID, str)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
1956 __CFAssertIsString(str);
1957 if (__CFStrHasLengthByte(str) && __CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string || the contents is in ASCII
1958 const uint8_t *contents = (const uint8_t *)__CFStrContents(str);
1959 if (__CFStrHasExplicitLength(str) && (__CFStrLength2(str, contents) != (SInt32)(*contents))) return NULL; // Invalid length byte
1960 return (ConstStringPtr)contents;
1961 }
1962 // ??? Also check for encoding = SystemEncoding and perhaps bytes are all ASCII?
1963 }
1964 return NULL;
1965 }
1966
1967
1968 const char * CFStringGetCStringPtr(CFStringRef str, CFStringEncoding encoding) {
1969
1970 if (encoding != __CFStringGetEightBitStringEncoding() && (kCFStringEncodingASCII != __CFStringGetEightBitStringEncoding() || !__CFStringEncodingIsSupersetOfASCII(encoding))) return NULL;
1971 // ??? Also check for encoding = SystemEncoding and perhaps bytes are all ASCII?
1972
1973 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, const char *, str, "_fastCStringContents:", true);
1974
1975 __CFAssertIsString(str);
1976
1977 if (__CFStrHasNullByte(str)) {
1978 // Note: this is called a lot, 27000 times to open a small xcode project with one file open.
1979 // Of these uses about 1500 are for cStrings/utf8strings.
1980 #if 0
1981 // Only sometimes when the stars are aligned will this call return a gc pointer
1982 // under GC we can only really return a pointer to the start of a GC buffer for cString use
1983 // (Is there a simpler way to ask if contents isGC?)
1984 CFAllocatorRef alloc = (__CFStrHasContentsAllocator(str)) ? __CFStrContentsAllocator(str) : __CFGetAllocator(str);
1985 if (CF_IS_COLLECTABLE_ALLOCATOR(alloc)) {
1986 if (__CFStrSkipAnyLengthByte(str) != 0 || !__CFStrIsMutable(str)) {
1987 static int counter = 0;
1988 printf("CFString %dth unsafe safe string %s\n", ++counter, __CFStrContents(str) + __CFStrSkipAnyLengthByte(str));
1989 return NULL;
1990 }
1991 }
1992 #endif
1993 return (const char *)__CFStrContents(str) + __CFStrSkipAnyLengthByte(str);
1994 } else {
1995 return NULL;
1996 }
1997 }
1998
1999
2000 const UniChar *CFStringGetCharactersPtr(CFStringRef str) {
2001
2002 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, const UniChar *, str, "_fastCharacterContents");
2003
2004 __CFAssertIsString(str);
2005 if (__CFStrIsUnicode(str)) return (const UniChar *)__CFStrContents(str);
2006 return NULL;
2007 }
2008
2009
2010 Boolean CFStringGetPascalString(CFStringRef str, Str255 buffer, CFIndex bufferSize, CFStringEncoding encoding) {
2011 CFIndex length;
2012 CFIndex usedLen;
2013
2014 __CFAssertIsNotNegative(bufferSize);
2015 if (bufferSize < 1) return false;
2016
2017 if (CF_IS_OBJC(__kCFStringTypeID, str)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
2018 length = CFStringGetLength(str);
2019 if (!__CFCanUseLengthByte(length)) return false; // Can't fit into pstring
2020 } else {
2021 const uint8_t *contents;
2022
2023 __CFAssertIsString(str);
2024
2025 contents = (const uint8_t *)__CFStrContents(str);
2026 length = __CFStrLength2(str, contents);
2027
2028 if (!__CFCanUseLengthByte(length)) return false; // Can't fit into pstring
2029
2030 if (__CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string
2031 if (length >= bufferSize) return false;
2032 memmove((void*)(1 + (const char*)buffer), (__CFStrSkipAnyLengthByte(str) + contents), length);
2033 *buffer = (unsigned char)length;
2034 return true;
2035 }
2036 }
2037
2038 if (__CFStringEncodeByteStream(str, 0, length, false, encoding, false, (UInt8 *)(1 + (uint8_t *)buffer), bufferSize - 1, &usedLen) != length) {
2039
2040 #if defined(DEBUG)
2041 if (bufferSize > 0) {
2042 strlcpy((char *)buffer + 1, CONVERSIONFAILURESTR, bufferSize - 1);
2043 buffer[0] = (unsigned char)((CFIndex)sizeof(CONVERSIONFAILURESTR) < (bufferSize - 1) ? (CFIndex)sizeof(CONVERSIONFAILURESTR) : (bufferSize - 1));
2044 }
2045 #else
2046 if (bufferSize > 0) buffer[0] = 0;
2047 #endif
2048 return false;
2049 }
2050 *buffer = (unsigned char)usedLen;
2051 return true;
2052 }
2053
2054 Boolean CFStringGetCString(CFStringRef str, char *buffer, CFIndex bufferSize, CFStringEncoding encoding) {
2055 const uint8_t *contents;
2056 CFIndex len;
2057
2058 __CFAssertIsNotNegative(bufferSize);
2059 if (bufferSize < 1) return false;
2060
2061 CF_OBJC_FUNCDISPATCH3(__kCFStringTypeID, Boolean, str, "_getCString:maxLength:encoding:", buffer, bufferSize - 1, encoding);
2062
2063 __CFAssertIsString(str);
2064
2065 contents = (const uint8_t *)__CFStrContents(str);
2066 len = __CFStrLength2(str, contents);
2067
2068 if (__CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string
2069 if (len >= bufferSize) return false;
2070 memmove(buffer, contents + __CFStrSkipAnyLengthByte(str), len);
2071 buffer[len] = 0;
2072 return true;
2073 } else {
2074 CFIndex usedLen;
2075
2076 if (__CFStringEncodeByteStream(str, 0, len, false, encoding, false, (unsigned char*) buffer, bufferSize - 1, &usedLen) == len) {
2077 buffer[usedLen] = '\0';
2078 return true;
2079 } else {
2080 #if defined(DEBUG)
2081 strlcpy(buffer, CONVERSIONFAILURESTR, bufferSize);
2082 #else
2083 if (bufferSize > 0) buffer[0] = 0;
2084 #endif
2085 return false;
2086 }
2087 }
2088 }
2089
2090 extern Boolean __CFLocaleGetNullLocale(struct __CFLocale *locale);
2091 extern void __CFLocaleSetNullLocale(struct __CFLocale *locale);
2092
2093 static const char *_CFStrGetLanguageIdentifierForLocale(CFLocaleRef locale) {
2094 CFStringRef collatorID;
2095 const char *langID = NULL;
2096 static const void *lastLocale = NULL;
2097 static const char *lastLangID = NULL;
2098 static CFSpinLock_t lock = CFSpinLockInit;
2099
2100 if (__CFLocaleGetNullLocale((struct __CFLocale *)locale)) return NULL;
2101
2102 __CFSpinLock(&lock);
2103 if ((NULL != lastLocale) && (lastLocale == locale)) {
2104 __CFSpinUnlock(&lock);
2105 return lastLangID;
2106 }
2107 __CFSpinUnlock(&lock);
2108
2109 collatorID = (CFStringRef)CFLocaleGetValue(locale, __kCFLocaleCollatorID);
2110
2111 // This is somewhat depending on CFLocale implementation always creating CFString for locale identifer ???
2112 if (__CFStrLength(collatorID) > 1) {
2113 const void *contents = __CFStrContents(collatorID);
2114 const char *string;
2115 char buffer[2];
2116
2117 if (__CFStrIsEightBit(collatorID)) {
2118 string = ((const char *)contents) + __CFStrSkipAnyLengthByte(collatorID);
2119 } else {
2120 const UTF16Char *characters = (const UTF16Char *)contents;
2121
2122 buffer[0] = (char)*(characters++);
2123 buffer[1] = (char)*characters;
2124 string = buffer;
2125 }
2126
2127 if (!strncmp(string, "az", 2)) { // Azerbaijani
2128 langID = "az";
2129 } else if (!strncmp(string, "lt", 2)) { // Lithuanian
2130 langID = "lt";
2131 } else if (!strncmp(string, "tr", 2)) { // Turkish
2132 langID = "tr";
2133 }
2134 }
2135
2136
2137 if (langID == NULL) __CFLocaleSetNullLocale((struct __CFLocale *)locale);
2138
2139 __CFSpinLock(&lock);
2140 lastLocale = locale;
2141 lastLangID = langID;
2142 __CFSpinUnlock(&lock);
2143
2144 return langID;
2145 }
2146
2147 static int8_t __CFCheckLocaleCFType = -1;
2148
2149 CF_INLINE bool _CFCanUseLocale(CFLocaleRef locale) {
2150 if (locale) {
2151 if (__CFCheckLocaleCFType < 0) __CFCheckLocaleCFType = !_CFExecutableLinkedOnOrAfter(CFSystemVersionPanther);
2152 if (!__CFCheckLocaleCFType || (CFGetTypeID(locale) == CFLocaleGetTypeID())) return true;
2153 }
2154 return false;
2155 }
2156
2157 #define MAX_CASE_MAPPING_BUF (8)
2158 #define ZERO_WIDTH_JOINER (0x200D)
2159 #define COMBINING_GRAPHEME_JOINER (0x034F)
2160 // Hangul ranges
2161 #define HANGUL_CHOSEONG_START (0x1100)
2162 #define HANGUL_CHOSEONG_END (0x115F)
2163 #define HANGUL_JUNGSEONG_START (0x1160)
2164 #define HANGUL_JUNGSEONG_END (0x11A2)
2165 #define HANGUL_JONGSEONG_START (0x11A8)
2166 #define HANGUL_JONGSEONG_END (0x11F9)
2167
2168 #define HANGUL_SYLLABLE_START (0xAC00)
2169 #define HANGUL_SYLLABLE_END (0xD7AF)
2170
2171
2172 // Returns the length of characters filled into outCharacters. If no change, returns 0. maxBufLen shoule be at least 8
2173 static CFIndex __CFStringFoldCharacterClusterAtIndex(UTF32Char character, CFStringInlineBuffer *buffer, CFIndex index, CFOptionFlags flags, const uint8_t *langCode, UTF32Char *outCharacters, CFIndex maxBufferLength, CFIndex *consumedLength) {
2174 CFIndex filledLength = 0, currentIndex = index;
2175
2176 if (0 != character) {
2177 UTF16Char lowSurrogate;
2178 CFIndex planeNo = (character >> 16);
2179 bool isTurkikCapitalI = false;
2180 static const uint8_t *decompBMP = NULL;
2181 static const uint8_t *graphemeBMP = NULL;
2182
2183 if (NULL == decompBMP) {
2184 decompBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, 0);
2185 graphemeBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, 0);
2186 }
2187
2188 ++currentIndex;
2189
2190 if ((character < 0x0080) && ((NULL == langCode) || (character != 'I'))) { // ASCII
2191 if ((flags & kCFCompareCaseInsensitive) && (character >= 'A') && (character <= 'Z')) {
2192 character += ('a' - 'A');
2193 *outCharacters = character;
2194 filledLength = 1;
2195 }
2196 } else {
2197 // do width-insensitive mapping
2198 if ((flags & kCFCompareWidthInsensitive) && (character >= 0xFF00) && (character <= 0xFFEF)) {
2199 (void)CFUniCharCompatibilityDecompose(&character, 1, 1);
2200 *outCharacters = character;
2201 filledLength = 1;
2202 }
2203
2204 // map surrogates
2205 if ((0 == planeNo) && CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((lowSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex)))) {
2206 character = CFUniCharGetLongCharacterForSurrogatePair(character, lowSurrogate);
2207 ++currentIndex;
2208 planeNo = (character >> 16);
2209 }
2210
2211 // decompose
2212 if (flags & (kCFCompareDiacriticInsensitive|kCFCompareNonliteral)) {
2213 if (CFUniCharIsMemberOfBitmap(character, ((0 == planeNo) ? decompBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, planeNo)))) {
2214 UTF32Char original = character;
2215
2216 filledLength = CFUniCharDecomposeCharacter(character, outCharacters, maxBufferLength);
2217 character = *outCharacters;
2218
2219 if ((flags & kCFCompareDiacriticInsensitive) && (character < 0x0510)) {
2220 filledLength = 1; // reset if Roman, Greek, Cyrillic
2221 } else if (0 == (flags & kCFCompareNonliteral)) {
2222 character = original;
2223 filledLength = 0;
2224 }
2225 }
2226 }
2227
2228 // fold case
2229 if (flags & kCFCompareCaseInsensitive) {
2230 const uint8_t *nonBaseBitmap;
2231 bool filterNonBase = (((flags & kCFCompareDiacriticInsensitive) && (character < 0x0510)) ? true : false);
2232 static const uint8_t *lowerBMP = NULL;
2233 static const uint8_t *caseFoldBMP = NULL;
2234
2235 if (NULL == lowerBMP) {
2236 lowerBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfLowercaseCharacterSet, 0);
2237 caseFoldBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfCaseFoldingCharacterSet, 0);
2238 }
2239
2240 if ((NULL != langCode) && ('I' == character) && ((0 == strcmp((const char *)langCode, "tr")) || (0 == strcmp((const char *)langCode, "az")))) { // do Turkik special-casing
2241 if (filledLength > 1) {
2242 if (0x0307 == outCharacters[1]) {
2243 if (--filledLength > 1) memmove((outCharacters + 1), (outCharacters + 2), sizeof(UTF32Char) * (filledLength - 1));
2244 character = *outCharacters = 'i';
2245 isTurkikCapitalI = true;
2246 }
2247 } else if (0x0307 == CFStringGetCharacterFromInlineBuffer(buffer, currentIndex)) {
2248 character = *outCharacters = 'i';
2249 filledLength = 1;
2250 ++currentIndex;
2251 isTurkikCapitalI = true;
2252 }
2253 }
2254 if (!isTurkikCapitalI && (CFUniCharIsMemberOfBitmap(character, ((0 == planeNo) ? lowerBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfLowercaseCharacterSet, planeNo))) || CFUniCharIsMemberOfBitmap(character, ((0 == planeNo) ? caseFoldBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfCaseFoldingCharacterSet, planeNo))))) {
2255 UTF16Char caseFoldBuffer[MAX_CASE_MAPPING_BUF];
2256 const UTF16Char *bufferP = caseFoldBuffer, *bufferLimit;
2257 UTF32Char *outCharactersP = outCharacters;
2258 uint32_t bufferLength = CFUniCharMapCaseTo(character, caseFoldBuffer, MAX_CASE_MAPPING_BUF, kCFUniCharCaseFold, 0, langCode);
2259
2260 bufferLimit = bufferP + bufferLength;
2261
2262 if (filledLength > 0) --filledLength; // decrement filledLength (will add back later)
2263
2264 // make space for casefold characters
2265 if ((filledLength > 0) && (bufferLength > 1)) {
2266 CFIndex totalScalerLength = 0;
2267
2268 while (bufferP < bufferLimit) {
2269 if (CFUniCharIsSurrogateHighCharacter(*(bufferP++)) && (bufferP < bufferLimit) && CFUniCharIsSurrogateLowCharacter(*bufferP)) ++bufferP;
2270 ++totalScalerLength;
2271 }
2272 memmove(outCharacters + totalScalerLength, outCharacters + 1, filledLength * sizeof(UTF32Char));
2273 bufferP = caseFoldBuffer;
2274 }
2275
2276 // fill
2277 while (bufferP < bufferLimit) {
2278 character = *(bufferP++);
2279 if (CFUniCharIsSurrogateHighCharacter(character) && (bufferP < bufferLimit) && CFUniCharIsSurrogateLowCharacter(*bufferP)) {
2280 character = CFUniCharGetLongCharacterForSurrogatePair(character, *(bufferP++));
2281 nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (character >> 16));
2282 } else {
2283 nonBaseBitmap = graphemeBMP;
2284 }
2285
2286 if (!filterNonBase || !CFUniCharIsMemberOfBitmap(character, nonBaseBitmap)) {
2287 *(outCharactersP++) = character;
2288 ++filledLength;
2289 }
2290 }
2291 }
2292 }
2293 }
2294
2295 // collect following combining marks
2296 if (flags & (kCFCompareDiacriticInsensitive|kCFCompareNonliteral)) {
2297 const uint8_t *nonBaseBitmap;
2298 const uint8_t *decompBitmap;
2299 bool doFill = (((flags & kCFCompareDiacriticInsensitive) && (character < 0x0510)) ? false : true);
2300
2301 if (0 == filledLength) {
2302 *outCharacters = character; // filledLength will be updated below on demand
2303
2304 if (doFill) { // check if really needs to fill
2305 UTF32Char nonBaseCharacter = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex);
2306
2307 if (CFUniCharIsSurrogateHighCharacter(nonBaseCharacter) && CFUniCharIsSurrogateLowCharacter((lowSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex + 1)))) {
2308 nonBaseCharacter = CFUniCharGetLongCharacterForSurrogatePair(nonBaseCharacter, lowSurrogate);
2309 nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (nonBaseCharacter >> 16));
2310 decompBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (nonBaseCharacter >> 16));
2311 } else {
2312 nonBaseBitmap = graphemeBMP;
2313 decompBitmap = decompBMP;
2314 }
2315
2316 if (CFUniCharIsMemberOfBitmap(nonBaseCharacter, nonBaseBitmap)) {
2317 filledLength = 1; // For the base character
2318
2319 if ((0 == (flags & kCFCompareDiacriticInsensitive)) || (nonBaseCharacter > 0x050F)) {
2320 if (CFUniCharIsMemberOfBitmap(nonBaseCharacter, decompBitmap)) {
2321 filledLength += CFUniCharDecomposeCharacter(nonBaseCharacter, &(outCharacters[filledLength]), maxBufferLength - filledLength);
2322 } else {
2323 outCharacters[filledLength++] = nonBaseCharacter;
2324 }
2325 }
2326 currentIndex += ((nonBaseBitmap == graphemeBMP) ? 1 : 2);
2327 } else {
2328 doFill = false;
2329 }
2330 }
2331 }
2332
2333 while (filledLength < maxBufferLength) { // do the rest
2334 character = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex);
2335
2336 if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((lowSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex + 1)))) {
2337 character = CFUniCharGetLongCharacterForSurrogatePair(character, lowSurrogate);
2338 nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (character >> 16));
2339 decompBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (character >> 16));
2340 } else {
2341 nonBaseBitmap = graphemeBMP;
2342 decompBitmap = decompBMP;
2343 }
2344 if (isTurkikCapitalI) {
2345 isTurkikCapitalI = false;
2346 } else if (CFUniCharIsMemberOfBitmap(character, nonBaseBitmap)) {
2347 if (doFill) {
2348 if (CFUniCharIsMemberOfBitmap(character, decompBitmap)) {
2349 CFIndex currentLength = CFUniCharDecomposeCharacter(character, &(outCharacters[filledLength]), maxBufferLength - filledLength);
2350
2351 if (0 == currentLength) break; // didn't fit
2352
2353 filledLength += currentLength;
2354 } else {
2355 outCharacters[filledLength++] = character;
2356 }
2357 } else if (0 == filledLength) {
2358 filledLength = 1; // For the base character
2359 }
2360 currentIndex += ((nonBaseBitmap == graphemeBMP) ? 1 : 2);
2361 } else {
2362 break;
2363 }
2364 }
2365
2366 if (filledLength > 1) {
2367 UTF32Char *sortCharactersLimit = outCharacters + filledLength;
2368 UTF32Char *sortCharacters = sortCharactersLimit - 1;
2369
2370 while ((outCharacters < sortCharacters) && CFUniCharIsMemberOfBitmap(*sortCharacters, ((*sortCharacters < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (*sortCharacters >> 16))))) --sortCharacters;
2371
2372 if ((sortCharactersLimit - sortCharacters) > 1) CFUniCharPrioritySort(sortCharacters, (sortCharactersLimit - sortCharacters)); // priority sort
2373 }
2374 }
2375 }
2376
2377 if ((filledLength > 0) && (NULL != consumedLength)) *consumedLength = (currentIndex - index);
2378
2379 return filledLength;
2380 }
2381
2382 static bool __CFStringFillCharacterSetInlineBuffer(CFCharacterSetInlineBuffer *buffer, CFStringCompareFlags compareOptions) {
2383 if (0 != (compareOptions & kCFCompareIgnoreNonAlphanumeric)) {
2384 static CFCharacterSetRef nonAlnumChars = NULL;
2385
2386 if (NULL == nonAlnumChars) {
2387 CFMutableCharacterSetRef cset = CFCharacterSetCreateMutableCopy(NULL, CFCharacterSetGetPredefined(kCFCharacterSetAlphaNumeric));
2388 CFCharacterSetInvert(cset);
2389 if (!OSAtomicCompareAndSwapPtrBarrier(NULL, cset, (void **)&nonAlnumChars)) CFRelease(cset);
2390 }
2391
2392 CFCharacterSetInitInlineBuffer(nonAlnumChars, buffer);
2393
2394 return true;
2395 }
2396
2397 return false;
2398 }
2399
2400 #define kCFStringStackBufferLength (__kCFStringInlineBufferLength)
2401
2402 CFComparisonResult CFStringCompareWithOptionsAndLocale(CFStringRef string, CFStringRef string2, CFRange rangeToCompare, CFStringCompareFlags compareOptions, CFLocaleRef locale) {
2403 /* No objc dispatch needed here since CFStringInlineBuffer works with both CFString and NSString */
2404 UTF32Char strBuf1[kCFStringStackBufferLength];
2405 UTF32Char strBuf2[kCFStringStackBufferLength];
2406 CFStringInlineBuffer inlineBuf1, inlineBuf2;
2407 UTF32Char str1Char, str2Char;
2408 CFIndex str1UsedLen, str2UsedLen;
2409 CFIndex str1Index = 0, str2Index = 0, strBuf1Index = 0, strBuf2Index = 0, strBuf1Len = 0, strBuf2Len = 0;
2410 CFIndex str1LocalizedIndex = 0, str2LocalizedIndex = 0;
2411 CFIndex forcedIndex1 = 0, forcedIndex2 = 0;
2412 CFIndex str2Len = CFStringGetLength(string2);
2413 bool caseInsensitive = ((compareOptions & kCFCompareCaseInsensitive) ? true : false);
2414 bool diacriticsInsensitive = ((compareOptions & kCFCompareDiacriticInsensitive) ? true : false);
2415 bool equalityOptions = ((compareOptions & (kCFCompareCaseInsensitive|kCFCompareNonliteral|kCFCompareDiacriticInsensitive|kCFCompareWidthInsensitive)) ? true : false);
2416 bool numerically = ((compareOptions & kCFCompareNumerically) ? true : false);
2417 bool forceOrdering = ((compareOptions & kCFCompareForcedOrdering) ? true : false);
2418 const uint8_t *graphemeBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, 0);
2419 const uint8_t *langCode;
2420 CFComparisonResult compareResult = kCFCompareEqualTo;
2421 UTF16Char otherChar;
2422 Boolean freeLocale = false;
2423 CFCharacterSetInlineBuffer *ignoredChars = NULL;
2424 CFCharacterSetInlineBuffer csetBuffer;
2425
2426 if ((compareOptions & kCFCompareLocalized) && (NULL == locale)) {
2427 locale = CFLocaleCopyCurrent();
2428 freeLocale = true;
2429 }
2430
2431 langCode = ((NULL == locale) ? NULL : (const uint8_t *)_CFStrGetLanguageIdentifierForLocale(locale));
2432
2433 if (__CFStringFillCharacterSetInlineBuffer(&csetBuffer, compareOptions)) {
2434 ignoredChars = &csetBuffer;
2435 equalityOptions = true;
2436 }
2437
2438 if ((NULL == locale) && (NULL == ignoredChars) && !numerically) { // could do binary comp (be careful when adding new flags)
2439 CFStringEncoding eightBitEncoding = __CFStringGetEightBitStringEncoding();
2440 const uint8_t *str1Bytes = (const uint8_t *)CFStringGetCStringPtr(string, eightBitEncoding);
2441 const uint8_t *str2Bytes = (const uint8_t *)CFStringGetCStringPtr(string2, eightBitEncoding);
2442 CFIndex factor = sizeof(uint8_t);
2443
2444 if ((NULL != str1Bytes) && (NULL != str2Bytes)) {
2445 compareOptions &= ~kCFCompareNonliteral; // remove non-literal
2446
2447 if ((kCFStringEncodingASCII == eightBitEncoding) && (false == forceOrdering)) {
2448 if (caseInsensitive) {
2449 int cmpResult = strncasecmp_l((const char *)str1Bytes + rangeToCompare.location, (const char *)str2Bytes, __CFMin(rangeToCompare.length, str2Len), NULL);
2450
2451 if (0 == cmpResult) cmpResult = rangeToCompare.length - str2Len;
2452
2453 return ((0 == cmpResult) ? kCFCompareEqualTo : ((cmpResult < 0) ? kCFCompareLessThan : kCFCompareGreaterThan));
2454 }
2455 } else if (caseInsensitive || diacriticsInsensitive) {
2456 CFIndex limitLength = __CFMin(rangeToCompare.length, str2Len);
2457
2458 str1Bytes += rangeToCompare.location;
2459
2460 while (str1Index < limitLength) {
2461 str1Char = str1Bytes[str1Index];
2462 str2Char = str2Bytes[str1Index];
2463
2464 if (str1Char != str2Char) {
2465 if ((str1Char < 0x80) && (str2Char < 0x80)) {
2466 if (forceOrdering && (kCFCompareEqualTo == compareResult) && (str1Char != str2Char)) compareResult = ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan);
2467 if ((str1Char >= 'A') && (str1Char <= 'Z')) str1Char += ('a' - 'A');
2468 if ((str2Char >= 'A') && (str2Char <= 'Z')) str2Char += ('a' - 'A');
2469
2470 if (str1Char != str2Char) return ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan);
2471 } else {
2472 str1Bytes = NULL;
2473 break;
2474 }
2475 }
2476 ++str1Index;
2477 }
2478
2479 str2Index = str1Index;
2480
2481 if (str1Index == limitLength) {
2482 int cmpResult = rangeToCompare.length - str2Len;
2483
2484 return ((0 == cmpResult) ? compareResult : ((cmpResult < 0) ? kCFCompareLessThan : kCFCompareGreaterThan));
2485 }
2486 }
2487 } else if (!equalityOptions && (NULL == str1Bytes) && (NULL == str2Bytes)) {
2488 str1Bytes = (const uint8_t *)CFStringGetCharactersPtr(string);
2489 str2Bytes = (const uint8_t *)CFStringGetCharactersPtr(string2);
2490 factor = sizeof(UTF16Char);
2491 #if __LITTLE_ENDIAN__
2492 if ((NULL != str1Bytes) && (NULL != str2Bytes)) { // we cannot use memcmp
2493 const UTF16Char *str1 = ((const UTF16Char *)str1Bytes) + rangeToCompare.location;
2494 const UTF16Char *str1Limit = str1 + __CFMin(rangeToCompare.length, str2Len);
2495 const UTF16Char *str2 = (const UTF16Char *)str2Bytes;
2496 CFIndex cmpResult = 0;
2497
2498 while ((0 == cmpResult) && (str1 < str1Limit)) cmpResult = (CFIndex)*(str1++) - (CFIndex)*(str2++);
2499
2500 if (0 == cmpResult) cmpResult = rangeToCompare.length - str2Len;
2501
2502 return ((0 == cmpResult) ? kCFCompareEqualTo : ((cmpResult < 0) ? kCFCompareLessThan : kCFCompareGreaterThan));
2503 }
2504 #endif /* __LITTLE_ENDIAN__ */
2505 }
2506 if ((NULL != str1Bytes) && (NULL != str2Bytes)) {
2507 int cmpResult = memcmp(str1Bytes + (rangeToCompare.location * factor), str2Bytes, __CFMin(rangeToCompare.length, str2Len) * factor);
2508
2509 if (0 == cmpResult) cmpResult = rangeToCompare.length - str2Len;
2510
2511 return ((0 == cmpResult) ? kCFCompareEqualTo : ((cmpResult < 0) ? kCFCompareLessThan : kCFCompareGreaterThan));
2512 }
2513 }
2514
2515 CFStringInitInlineBuffer(string, &inlineBuf1, rangeToCompare);
2516 CFStringInitInlineBuffer(string2, &inlineBuf2, CFRangeMake(0, str2Len));
2517
2518 if (NULL != locale) {
2519 str1LocalizedIndex = str1Index;
2520 str2LocalizedIndex = str2Index;
2521
2522 // We temporarily disable kCFCompareDiacriticInsensitive for SL <rdar://problem/6767096>. Should be revisited in NMOS <rdar://problem/7003830>
2523 if (forceOrdering) {
2524 diacriticsInsensitive = false;
2525 compareOptions &= ~kCFCompareDiacriticInsensitive;
2526 }
2527 }
2528 while ((str1Index < rangeToCompare.length) && (str2Index < str2Len)) {
2529 if (strBuf1Len == 0) {
2530 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
2531 if (caseInsensitive && (str1Char >= 'A') && (str1Char <= 'Z') && ((NULL == langCode) || (str1Char != 'I')) && ((false == forceOrdering) || (kCFCompareEqualTo != compareResult))) str1Char += ('a' - 'A');
2532 str1UsedLen = 1;
2533 } else {
2534 str1Char = strBuf1[strBuf1Index++];
2535 }
2536 if (strBuf2Len == 0) {
2537 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
2538 if (caseInsensitive && (str2Char >= 'A') && (str2Char <= 'Z') && ((NULL == langCode) || (str2Char != 'I')) && ((false == forceOrdering) || (kCFCompareEqualTo != compareResult))) str2Char += ('a' - 'A');
2539 str2UsedLen = 1;
2540 } else {
2541 str2Char = strBuf2[strBuf2Index++];
2542 }
2543
2544 if (numerically && ((0 == strBuf1Len) && (str1Char <= '9') && (str1Char >= '0')) && ((0 == strBuf2Len) && (str2Char <= '9') && (str2Char >= '0'))) { // If both are not ASCII digits, then don't do numerical comparison here
2545 uint64_t intValue1 = 0, intValue2 = 0; // !!! Doesn't work if numbers are > max uint64_t
2546
2547 if (forceOrdering && (kCFCompareEqualTo == compareResult) && (str1Char != str2Char)) {
2548 compareResult = ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan);
2549 forcedIndex1 = str1Index;
2550 forcedIndex2 = str2Index;
2551 }
2552
2553 do {
2554 intValue1 = (intValue1 * 10) + (str1Char - '0');
2555 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, ++str1Index);
2556 } while ((str1Char <= '9') && (str1Char >= '0'));
2557
2558 do {
2559 intValue2 = intValue2 * 10 + (str2Char - '0');
2560 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, ++str2Index);
2561 } while ((str2Char <= '9') && (str2Char >= '0'));
2562
2563 if (intValue1 == intValue2) {
2564 continue;
2565 } else if (intValue1 < intValue2) {
2566 if (freeLocale && locale) {
2567 CFRelease(locale);
2568 }
2569 return kCFCompareLessThan;
2570 } else {
2571 if (freeLocale && locale) {
2572 CFRelease(locale);
2573 }
2574 return kCFCompareGreaterThan;
2575 }
2576 }
2577
2578 if (str1Char != str2Char) {
2579 if (!equalityOptions) {
2580 compareResult = ((NULL == locale) ? ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1Index, rangeToCompare.length - str1Index), &inlineBuf2, CFRangeMake(str2Index, str2Len - str2Index), compareOptions, locale));
2581 if (freeLocale && locale) {
2582 CFRelease(locale);
2583 }
2584 return compareResult;
2585 }
2586
2587 if (forceOrdering && (kCFCompareEqualTo == compareResult)) {
2588 compareResult = ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan);
2589 forcedIndex1 = str1LocalizedIndex;
2590 forcedIndex2 = str2LocalizedIndex;
2591 }
2592
2593 if ((str1Char < 0x80) && (str2Char < 0x80) && (NULL == ignoredChars)) {
2594 if (NULL != locale) {
2595 compareResult = _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1Index, rangeToCompare.length - str1Index), &inlineBuf2, CFRangeMake(str2Index, str2Len - str2Index), compareOptions, locale);
2596 if (freeLocale && locale) {
2597 CFRelease(locale);
2598 }
2599 return compareResult;
2600 } else if (!caseInsensitive) {
2601 if (freeLocale && locale) {
2602 CFRelease(locale);
2603 }
2604 return ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan);
2605 }
2606 }
2607
2608 if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) {
2609 str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);
2610 str1UsedLen = 2;
2611 }
2612
2613 if (CFUniCharIsSurrogateHighCharacter(str2Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index + 1)))) {
2614 str2Char = CFUniCharGetLongCharacterForSurrogatePair(str2Char, otherChar);
2615 str2UsedLen = 2;
2616 }
2617
2618 if (NULL != ignoredChars) {
2619 if (CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char)) {
2620 if ((strBuf1Len > 0) && (strBuf1Index == strBuf1Len)) strBuf1Len = 0;
2621 if (strBuf1Len == 0) str1Index += str1UsedLen;
2622 if (strBuf2Len > 0) --strBuf2Index;
2623 continue;
2624 }
2625 if (CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char)) {
2626 if ((strBuf2Len > 0) && (strBuf2Index == strBuf2Len)) strBuf2Len = 0;
2627 if (strBuf2Len == 0) str2Index += str2UsedLen;
2628 if (strBuf1Len > 0) -- strBuf1Index;
2629 continue;
2630 }
2631 }
2632
2633 if (diacriticsInsensitive && (str1Index > 0)) {
2634 bool str1Skip = false;
2635 bool str2Skip = false;
2636
2637 if ((0 == strBuf1Len) && CFUniCharIsMemberOfBitmap(str1Char, ((str1Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str1Char >> 16))))) {
2638 str1Char = str2Char;
2639 str1Skip = true;
2640 }
2641 if ((0 == strBuf2Len) && CFUniCharIsMemberOfBitmap(str2Char, ((str2Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str2Char >> 16))))) {
2642 str2Char = str1Char;
2643 str2Skip = true;
2644 }
2645
2646 if (str1Skip != str2Skip) {
2647 if (str1Skip) str2Index -= str2UsedLen;
2648 if (str2Skip) str1Index -= str1UsedLen;
2649 }
2650 }
2651
2652 if (str1Char != str2Char) {
2653 if (0 == strBuf1Len) {
2654 strBuf1Len = __CFStringFoldCharacterClusterAtIndex(str1Char, &inlineBuf1, str1Index, compareOptions, langCode, strBuf1, kCFStringStackBufferLength, &str1UsedLen);
2655 if (strBuf1Len > 0) {
2656 str1Char = *strBuf1;
2657 strBuf1Index = 1;
2658 }
2659 }
2660
2661 if ((0 == strBuf1Len) && (0 < strBuf2Len)) {
2662 compareResult = ((NULL == locale) ? ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1LocalizedIndex, rangeToCompare.length - str1LocalizedIndex), &inlineBuf2, CFRangeMake(str2LocalizedIndex, str2Len - str2LocalizedIndex), compareOptions, locale));
2663 if (freeLocale && locale) {
2664 CFRelease(locale);
2665 }
2666 return compareResult;
2667 }
2668
2669 if ((0 == strBuf2Len) && ((0 == strBuf1Len) || (str1Char != str2Char))) {
2670 strBuf2Len = __CFStringFoldCharacterClusterAtIndex(str2Char, &inlineBuf2, str2Index, compareOptions, langCode, strBuf2, kCFStringStackBufferLength, &str2UsedLen);
2671 if (strBuf2Len > 0) {
2672 str2Char = *strBuf2;
2673 strBuf2Index = 1;
2674 }
2675 if ((0 == strBuf2Len) || (str1Char != str2Char)) {
2676 compareResult = ((NULL == locale) ? ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1LocalizedIndex, rangeToCompare.length - str1LocalizedIndex), &inlineBuf2, CFRangeMake(str2LocalizedIndex, str2Len - str2LocalizedIndex), compareOptions, locale));
2677 if (freeLocale && locale) {
2678 CFRelease(locale);
2679 }
2680 return compareResult;
2681 }
2682 }
2683 }
2684
2685 if ((strBuf1Len > 0) && (strBuf2Len > 0)) {
2686 while ((strBuf1Index < strBuf1Len) && (strBuf2Index < strBuf2Len)) {
2687 if (strBuf1[strBuf1Index] != strBuf2[strBuf2Index]) break;
2688 ++strBuf1Index; ++strBuf2Index;
2689 }
2690 if ((strBuf1Index < strBuf1Len) && (strBuf2Index < strBuf2Len)) {
2691 CFComparisonResult res = ((NULL == locale) ? ((strBuf1[strBuf1Index] < strBuf2[strBuf2Index]) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(str1LocalizedIndex, rangeToCompare.length - str1LocalizedIndex), &inlineBuf2, CFRangeMake(str2LocalizedIndex, str2Len - str2LocalizedIndex), compareOptions, locale));
2692 if (freeLocale && locale) {
2693 CFRelease(locale);
2694 }
2695 return res;
2696 }
2697 }
2698 }
2699
2700 if ((strBuf1Len > 0) && (strBuf1Index == strBuf1Len)) strBuf1Len = 0;
2701 if ((strBuf2Len > 0) && (strBuf2Index == strBuf2Len)) strBuf2Len = 0;
2702
2703 if (strBuf1Len == 0) str1Index += str1UsedLen;
2704 if (strBuf2Len == 0) str2Index += str2UsedLen;
2705 if ((strBuf1Len == 0) && (strBuf2Len == 0)) {
2706 str1LocalizedIndex = str1Index;
2707 str2LocalizedIndex = str2Index;
2708 }
2709 }
2710
2711 if (diacriticsInsensitive || (NULL != ignoredChars)) {
2712 while (str1Index < rangeToCompare.length) {
2713 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
2714 if ((str1Char < 0x80) && (NULL == ignoredChars)) break; // found ASCII
2715
2716 if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);
2717
2718 if ((!diacriticsInsensitive || !CFUniCharIsMemberOfBitmap(str1Char, ((str1Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str1Char >> 16))))) && ((NULL == ignoredChars) || !CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char))) break;
2719
2720 str1Index += ((str1Char < 0x10000) ? 1 : 2);
2721 }
2722
2723 while (str2Index < str2Len) {
2724 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
2725 if ((str2Char < 0x80) && (NULL == ignoredChars)) break; // found ASCII
2726
2727 if (CFUniCharIsSurrogateHighCharacter(str2Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index + 1)))) str2Char = CFUniCharGetLongCharacterForSurrogatePair(str2Char, otherChar);
2728
2729 if ((!diacriticsInsensitive || !CFUniCharIsMemberOfBitmap(str2Char, ((str2Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str2Char >> 16))))) && ((NULL == ignoredChars) || !CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char))) break;
2730
2731 str2Index += ((str2Char < 0x10000) ? 1 : 2);
2732 }
2733 }
2734 // Need to recalc localized result here for forced ordering
2735 if ((NULL != locale) && (kCFCompareEqualTo != compareResult) && (str1Index == rangeToCompare.length) && (str2Index == str2Len)) compareResult = _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(forcedIndex1, rangeToCompare.length - forcedIndex1), &inlineBuf2, CFRangeMake(forcedIndex2, str2Len - forcedIndex2), compareOptions, locale);
2736
2737 if (freeLocale && locale) {
2738 CFRelease(locale);
2739 }
2740
2741 return ((str1Index < rangeToCompare.length) ? kCFCompareGreaterThan : ((str2Index < str2Len) ? kCFCompareLessThan : compareResult));
2742 }
2743
2744
2745 CFComparisonResult CFStringCompareWithOptions(CFStringRef string, CFStringRef string2, CFRange rangeToCompare, CFStringCompareFlags compareOptions) { return CFStringCompareWithOptionsAndLocale(string, string2, rangeToCompare, compareOptions, NULL); }
2746
2747 CFComparisonResult CFStringCompare(CFStringRef string, CFStringRef str2, CFOptionFlags options) {
2748 return CFStringCompareWithOptions(string, str2, CFRangeMake(0, CFStringGetLength(string)), options);
2749 }
2750
2751 Boolean CFStringFindWithOptionsAndLocale(CFStringRef string, CFStringRef stringToFind, CFRange rangeToSearch, CFStringCompareFlags compareOptions, CFLocaleRef locale, CFRange *result) {
2752 /* No objc dispatch needed here since CFStringInlineBuffer works with both CFString and NSString */
2753 CFIndex findStrLen = CFStringGetLength(stringToFind);
2754 Boolean didFind = false;
2755 bool lengthVariants = ((compareOptions & (kCFCompareCaseInsensitive|kCFCompareNonliteral|kCFCompareDiacriticInsensitive)) ? true : false);
2756 CFCharacterSetInlineBuffer *ignoredChars = NULL;
2757 CFCharacterSetInlineBuffer csetBuffer;
2758
2759 if (__CFStringFillCharacterSetInlineBuffer(&csetBuffer, compareOptions)) {
2760 ignoredChars = &csetBuffer;
2761 lengthVariants = true;
2762 }
2763
2764 if ((findStrLen > 0) && (rangeToSearch.length > 0) && ((findStrLen <= rangeToSearch.length) || lengthVariants)) {
2765 UTF32Char strBuf1[kCFStringStackBufferLength];
2766 UTF32Char strBuf2[kCFStringStackBufferLength];
2767 CFStringInlineBuffer inlineBuf1, inlineBuf2;
2768 UTF32Char str1Char = 0, str2Char = 0;
2769 CFStringEncoding eightBitEncoding = __CFStringGetEightBitStringEncoding();
2770 const uint8_t *str1Bytes = (const uint8_t *)CFStringGetCStringPtr(string, eightBitEncoding);
2771 const uint8_t *str2Bytes = (const uint8_t *)CFStringGetCStringPtr(stringToFind, eightBitEncoding);
2772 const UTF32Char *characters, *charactersLimit;
2773 const uint8_t *langCode = NULL;
2774 CFIndex fromLoc, toLoc;
2775 CFIndex str1Index, str2Index;
2776 CFIndex strBuf1Len, strBuf2Len;
2777 CFIndex maxStr1Index = (rangeToSearch.location + rangeToSearch.length);
2778 bool equalityOptions = ((lengthVariants || (compareOptions & kCFCompareWidthInsensitive)) ? true : false);
2779 bool caseInsensitive = ((compareOptions & kCFCompareCaseInsensitive) ? true : false);
2780 bool forwardAnchor = ((kCFCompareAnchored == (compareOptions & (kCFCompareBackwards|kCFCompareAnchored))) ? true : false);
2781 bool backwardAnchor = (((kCFCompareBackwards|kCFCompareAnchored) == (compareOptions & (kCFCompareBackwards|kCFCompareAnchored))) ? true : false);
2782 int8_t delta;
2783
2784 if (NULL == locale) {
2785 if (compareOptions & kCFCompareLocalized) {
2786 CFLocaleRef currentLocale = CFLocaleCopyCurrent();
2787 langCode = (const uint8_t *)_CFStrGetLanguageIdentifierForLocale(currentLocale);
2788 CFRelease(currentLocale);
2789 }
2790 } else {
2791 langCode = (const uint8_t *)_CFStrGetLanguageIdentifierForLocale(locale);
2792 }
2793
2794 CFStringInitInlineBuffer(string, &inlineBuf1, CFRangeMake(0, rangeToSearch.location + rangeToSearch.length));
2795 CFStringInitInlineBuffer(stringToFind, &inlineBuf2, CFRangeMake(0, findStrLen));
2796
2797 if (compareOptions & kCFCompareBackwards) {
2798 fromLoc = rangeToSearch.location + rangeToSearch.length - (lengthVariants ? 1 : findStrLen);
2799 toLoc = (((compareOptions & kCFCompareAnchored) && !lengthVariants) ? fromLoc : rangeToSearch.location);
2800 } else {
2801 fromLoc = rangeToSearch.location;
2802 toLoc = ((compareOptions & kCFCompareAnchored) ? fromLoc : rangeToSearch.location + rangeToSearch.length - (lengthVariants ? 1 : findStrLen));
2803 }
2804
2805 delta = ((fromLoc <= toLoc) ? 1 : -1);
2806
2807 if ((NULL != str1Bytes) && (NULL != str2Bytes)) {
2808 uint8_t str1Byte, str2Byte;
2809
2810 while (1) {
2811 str1Index = fromLoc;
2812 str2Index = 0;
2813
2814 while ((str1Index < maxStr1Index) && (str2Index < findStrLen)) {
2815 str1Byte = str1Bytes[str1Index];
2816 str2Byte = str2Bytes[str2Index];
2817
2818 if (str1Byte != str2Byte) {
2819 if (equalityOptions) {
2820 if ((str1Byte < 0x80) && ((NULL == langCode) || ('I' != str1Byte))) {
2821 if (caseInsensitive && (str1Byte >= 'A') && (str1Byte <= 'Z')) str1Byte += ('a' - 'A');
2822 *strBuf1 = str1Byte;
2823 strBuf1Len = 1;
2824 } else {
2825 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
2826 strBuf1Len = __CFStringFoldCharacterClusterAtIndex(str1Char, &inlineBuf1, str1Index, compareOptions, langCode, strBuf1, kCFStringStackBufferLength, NULL);
2827 if (1 > strBuf1Len) {
2828 *strBuf1 = str1Char;
2829 strBuf1Len = 1;
2830 }
2831 }
2832
2833 if ((NULL != ignoredChars) && (forwardAnchor || (str1Index != fromLoc)) && CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, ((str1Byte < 0x80) ? str1Byte : str1Char))) {
2834 ++str1Index;
2835 continue;
2836 }
2837
2838 if ((str2Byte < 0x80) && ((NULL == langCode) || ('I' != str2Byte))) {
2839 if (caseInsensitive && (str2Byte >= 'A') && (str2Byte <= 'Z')) str2Byte += ('a' - 'A');
2840 *strBuf2 = str2Byte;
2841 strBuf2Len = 1;
2842 } else {
2843 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
2844 strBuf2Len = __CFStringFoldCharacterClusterAtIndex(str2Char, &inlineBuf2, str2Index, compareOptions, langCode, strBuf2, kCFStringStackBufferLength, NULL);
2845 if (1 > strBuf2Len) {
2846 *strBuf2 = str2Char;
2847 strBuf2Len = 1;
2848 }
2849 }
2850
2851 if ((NULL != ignoredChars) && CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, ((str2Byte < 0x80) ? str2Byte : str2Char))) {
2852 ++str2Index;
2853 continue;
2854 }
2855
2856 if ((1 == strBuf1Len) && (1 == strBuf2Len)) { // normal case
2857 if (*strBuf1 != *strBuf2) break;
2858 } else {
2859 CFIndex delta;
2860
2861 if (!caseInsensitive && (strBuf1Len != strBuf2Len)) break;
2862 if (memcmp(strBuf1, strBuf2, sizeof(UTF32Char) * __CFMin(strBuf1Len, strBuf2Len))) break;
2863
2864 if (strBuf1Len < strBuf2Len) {
2865 delta = strBuf2Len - strBuf1Len;
2866
2867 if ((str1Index + strBuf1Len + delta) > maxStr1Index) break;
2868
2869 characters = &(strBuf2[strBuf1Len]);
2870 charactersLimit = characters + delta;
2871
2872 while (characters < charactersLimit) {
2873 strBuf1Len = __CFStringFoldCharacterClusterAtIndex(CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1), &inlineBuf1, str1Index + 1, compareOptions, langCode, strBuf1, kCFStringStackBufferLength, NULL);
2874 if ((strBuf1Len > 0) || (*characters != *strBuf1)) break;
2875 ++characters; ++str1Index;
2876 }
2877 if (characters < charactersLimit) break;
2878 } else if (strBuf2Len < strBuf1Len) {
2879 delta = strBuf1Len - strBuf2Len;
2880
2881 if ((str2Index + strBuf2Len + delta) > findStrLen) break;
2882
2883 characters = &(strBuf1[strBuf2Len]);
2884 charactersLimit = characters + delta;
2885
2886 while (characters < charactersLimit) {
2887 strBuf2Len = __CFStringFoldCharacterClusterAtIndex(CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str1Index + 1), &inlineBuf2, str2Index + 1, compareOptions, langCode, strBuf2, kCFStringStackBufferLength, NULL);
2888 if ((strBuf2Len > 0) || (*characters != *strBuf2)) break;
2889 ++characters; ++str2Index;
2890 }
2891 if (characters < charactersLimit) break;
2892 }
2893 }
2894 } else {
2895 break;
2896 }
2897 }
2898 ++str1Index; ++str2Index;
2899 }
2900
2901 if ((NULL != ignoredChars) && (str1Index == maxStr1Index) && (str2Index < findStrLen)) { // Process the stringToFind tail
2902 while (str2Index < findStrLen) {
2903 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
2904
2905 if (!CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char)) break;
2906 ++str2Index;
2907 }
2908 }
2909
2910 if (str2Index == findStrLen) {
2911 if ((NULL != ignoredChars) && backwardAnchor && (str1Index < maxStr1Index)) { // Process the anchor tail
2912 while (str1Index < maxStr1Index) {
2913 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
2914
2915 if (!CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char)) break;
2916 ++str1Index;
2917 }
2918 }
2919
2920 if (!backwardAnchor || (str1Index == maxStr1Index)) {
2921 didFind = true;
2922 if (NULL != result) *result = CFRangeMake(fromLoc, str1Index - fromLoc);
2923 }
2924 break;
2925 }
2926
2927 if (fromLoc == toLoc) break;
2928 fromLoc += delta;
2929 }
2930 } else if (equalityOptions) {
2931 UTF16Char otherChar;
2932 CFIndex str1UsedLen, str2UsedLen, strBuf1Index = 0, strBuf2Index = 0;
2933 bool diacriticsInsensitive = ((compareOptions & kCFCompareDiacriticInsensitive) ? true : false);
2934 const uint8_t *graphemeBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, 0);
2935 const uint8_t *combClassBMP = (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0);
2936
2937 while (1) {
2938 str1Index = fromLoc;
2939 str2Index = 0;
2940
2941 strBuf1Len = strBuf2Len = 0;
2942
2943 while (str2Index < findStrLen) {
2944 if (strBuf1Len == 0) {
2945 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
2946 if (caseInsensitive && (str1Char >= 'A') && (str1Char <= 'Z') && ((NULL == langCode) || (str1Char != 'I'))) str1Char += ('a' - 'A');
2947 str1UsedLen = 1;
2948 } else {
2949 str1Char = strBuf1[strBuf1Index++];
2950 }
2951 if (strBuf2Len == 0) {
2952 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
2953 if (caseInsensitive && (str2Char >= 'A') && (str2Char <= 'Z') && ((NULL == langCode) || (str2Char != 'I'))) str2Char += ('a' - 'A');
2954 str2UsedLen = 1;
2955 } else {
2956 str2Char = strBuf2[strBuf2Index++];
2957 }
2958
2959 if (str1Char != str2Char) {
2960 if ((str1Char < 0x80) && (str2Char < 0x80) && (NULL == ignoredChars) && ((NULL == langCode) || !caseInsensitive)) break;
2961
2962 if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) {
2963 str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);
2964 str1UsedLen = 2;
2965 }
2966
2967 if (CFUniCharIsSurrogateHighCharacter(str2Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index + 1)))) {
2968 str2Char = CFUniCharGetLongCharacterForSurrogatePair(str2Char, otherChar);
2969 str2UsedLen = 2;
2970 }
2971
2972 if (NULL != ignoredChars) {
2973 if ((forwardAnchor || (str1Index != fromLoc)) && (str1Index < maxStr1Index) && CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char)) {
2974 if ((strBuf1Len > 0) && (strBuf1Index == strBuf1Len)) strBuf1Len = 0;
2975 if (strBuf1Len == 0) str1Index += str1UsedLen;
2976 if (strBuf2Len > 0) --strBuf2Index;
2977 continue;
2978 }
2979 if (CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char)) {
2980 if ((strBuf2Len > 0) && (strBuf2Index == strBuf2Len)) strBuf2Len = 0;
2981 if (strBuf2Len == 0) str2Index += str2UsedLen;
2982 if (strBuf1Len > 0) -- strBuf1Index;
2983 continue;
2984 }
2985 }
2986
2987 if (diacriticsInsensitive && (str1Index > fromLoc)) {
2988 bool str1Skip = false;
2989 bool str2Skip = false;
2990
2991 if ((0 == strBuf1Len) && CFUniCharIsMemberOfBitmap(str1Char, ((str1Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str1Char >> 16))))) {
2992 str1Char = str2Char;
2993 str1Skip = true;
2994 }
2995 if ((0 == strBuf2Len) && CFUniCharIsMemberOfBitmap(str2Char, ((str2Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str2Char >> 16))))) {
2996 str2Char = str1Char;
2997 str2Skip = true;
2998 }
2999
3000 if (str1Skip != str2Skip) {
3001 if (str1Skip) str2Index -= str2UsedLen;
3002 if (str2Skip) str1Index -= str1UsedLen;
3003 }
3004 }
3005
3006 if (str1Char != str2Char) {
3007 if (0 == strBuf1Len) {
3008 strBuf1Len = __CFStringFoldCharacterClusterAtIndex(str1Char, &inlineBuf1, str1Index, compareOptions, langCode, strBuf1, kCFStringStackBufferLength, &str1UsedLen);
3009 if (strBuf1Len > 0) {
3010 str1Char = *strBuf1;
3011 strBuf1Index = 1;
3012 }
3013 }
3014
3015 if ((0 == strBuf1Len) && (0 < strBuf2Len)) break;
3016
3017 if ((0 == strBuf2Len) && ((0 == strBuf1Len) || (str1Char != str2Char))) {
3018 strBuf2Len = __CFStringFoldCharacterClusterAtIndex(str2Char, &inlineBuf2, str2Index, compareOptions, langCode, strBuf2, kCFStringStackBufferLength, &str2UsedLen);
3019 if ((0 == strBuf2Len) || (str1Char != *strBuf2)) break;
3020 strBuf2Index = 1;
3021 }
3022 }
3023
3024 if ((strBuf1Len > 0) && (strBuf2Len > 0)) {
3025 while ((strBuf1Index < strBuf1Len) && (strBuf2Index < strBuf2Len)) {
3026 if (strBuf1[strBuf1Index] != strBuf2[strBuf2Index]) break;
3027 ++strBuf1Index; ++strBuf2Index;
3028 }
3029 if ((strBuf1Index < strBuf1Len) && (strBuf2Index < strBuf2Len)) break;
3030 }
3031 }
3032
3033 if ((strBuf1Len > 0) && (strBuf1Index == strBuf1Len)) strBuf1Len = 0;
3034 if ((strBuf2Len > 0) && (strBuf2Index == strBuf2Len)) strBuf2Len = 0;
3035
3036 if (strBuf1Len == 0) str1Index += str1UsedLen;
3037 if (strBuf2Len == 0) str2Index += str2UsedLen;
3038 }
3039
3040 if ((NULL != ignoredChars) && (str1Index == maxStr1Index) && (str2Index < findStrLen)) { // Process the stringToFind tail
3041 while (str2Index < findStrLen) {
3042 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
3043 if (CFUniCharIsSurrogateHighCharacter(str2Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index + 1)))) {
3044 str2Char = CFUniCharGetLongCharacterForSurrogatePair(str2Char, otherChar);
3045 }
3046 if (!CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str2Char)) break;
3047 str2Index += ((str2Char < 0x10000) ? 1 : 2);
3048 }
3049 }
3050
3051 if (str2Index == findStrLen) {
3052 bool match = true;
3053
3054 if (strBuf1Len > 0) {
3055 match = false;
3056
3057 if ((compareOptions & kCFCompareDiacriticInsensitive) && (strBuf1[0] < 0x0510)) {
3058 while (strBuf1Index < strBuf1Len) {
3059 if (!CFUniCharIsMemberOfBitmap(strBuf1[strBuf1Index], ((strBuf1[strBuf1Index] < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (strBuf1[strBuf1Index] >> 16))))) break;
3060 ++strBuf1Index;
3061 }
3062
3063 if (strBuf1Index == strBuf1Len) {
3064 str1Index += str1UsedLen;
3065 match = true;
3066 }
3067 }
3068 }
3069
3070 if (match && (compareOptions & (kCFCompareDiacriticInsensitive|kCFCompareNonliteral)) && (str1Index < maxStr1Index)) {
3071 const uint8_t *nonBaseBitmap;
3072
3073 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
3074
3075 if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) {
3076 str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);
3077 nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str1Char >> 16));
3078 } else {
3079 nonBaseBitmap = graphemeBMP;
3080 }
3081
3082 if (CFUniCharIsMemberOfBitmap(str1Char, nonBaseBitmap)) {
3083 if (diacriticsInsensitive) {
3084 if (str1Char < 0x10000) {
3085 CFIndex index = str1Index;
3086
3087 do {
3088 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, --index);
3089 } while (CFUniCharIsMemberOfBitmap(str1Char, graphemeBMP), (rangeToSearch.location < index));
3090
3091 if (str1Char < 0x0510) {
3092 while (++str1Index < maxStr1Index) if (!CFUniCharIsMemberOfBitmap(CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index), graphemeBMP)) break;
3093 }
3094 }
3095 } else {
3096 match = false;
3097 }
3098 } else if (!diacriticsInsensitive) {
3099 otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index - 1);
3100
3101 // this is assuming viramas are only in BMP ???
3102 if ((str1Char == COMBINING_GRAPHEME_JOINER) || (otherChar == COMBINING_GRAPHEME_JOINER) || (otherChar == ZERO_WIDTH_JOINER) || ((otherChar >= HANGUL_CHOSEONG_START) && (otherChar <= HANGUL_JONGSEONG_END)) || (CFUniCharGetCombiningPropertyForCharacter(otherChar, combClassBMP) == 9)) {
3103 CFRange clusterRange = CFStringGetRangeOfCharacterClusterAtIndex(string, str1Index - 1, kCFStringGraphemeCluster);
3104
3105 if (str1Index < (clusterRange.location + clusterRange.length)) match = false;
3106 }
3107 }
3108 }
3109
3110 if (match) {
3111 if ((NULL != ignoredChars) && backwardAnchor && (str1Index < maxStr1Index)) { // Process the anchor tail
3112 while (str1Index < maxStr1Index) {
3113 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
3114 if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) {
3115 str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);
3116 }
3117 if (!CFCharacterSetInlineBufferIsLongCharacterMember(ignoredChars, str1Char)) break;
3118 str1Index += ((str1Char < 0x10000) ? 1 : 2);
3119 }
3120 }
3121
3122 if (!backwardAnchor || (str1Index == maxStr1Index)) {
3123 didFind = true;
3124 if (NULL != result) *result = CFRangeMake(fromLoc, str1Index - fromLoc);
3125 }
3126 break;
3127 }
3128 }
3129
3130 if (fromLoc == toLoc) break;
3131 fromLoc += delta;
3132 }
3133 } else {
3134 while (1) {
3135 str1Index = fromLoc;
3136 str2Index = 0;
3137
3138 while (str2Index < findStrLen) {
3139 if (CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index) != CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index)) break;
3140
3141 ++str1Index; ++str2Index;
3142 }
3143
3144 if (str2Index == findStrLen) {
3145 didFind = true;
3146 if (NULL != result) *result = CFRangeMake(fromLoc, findStrLen);
3147 break;
3148 }
3149
3150 if (fromLoc == toLoc) break;
3151 fromLoc += delta;
3152 }
3153 }
3154 }
3155
3156 return didFind;
3157 }
3158
3159
3160 Boolean CFStringFindWithOptions(CFStringRef string, CFStringRef stringToFind, CFRange rangeToSearch, CFStringCompareFlags compareOptions, CFRange *result) { return CFStringFindWithOptionsAndLocale(string, stringToFind, rangeToSearch, compareOptions, NULL, result); }
3161
3162 // Functions to deal with special arrays of CFRange, CFDataRef, created by CFStringCreateArrayWithFindResults()
3163
3164 static const void *__rangeRetain(CFAllocatorRef allocator, const void *ptr) {
3165 CFRetain(*(CFDataRef *)((uint8_t *)ptr + sizeof(CFRange)));
3166 return ptr;
3167 }
3168
3169 static void __rangeRelease(CFAllocatorRef allocator, const void *ptr) {
3170 CFRelease(*(CFDataRef *)((uint8_t *)ptr + sizeof(CFRange)));
3171 }
3172
3173 static CFStringRef __rangeCopyDescription(const void *ptr) {
3174 CFRange range = *(CFRange *)ptr;
3175 return CFStringCreateWithFormat(kCFAllocatorSystemDefault, NULL, CFSTR("{%d, %d}"), range.location, range.length);
3176 }
3177
3178 static Boolean __rangeEqual(const void *ptr1, const void *ptr2) {
3179 CFRange range1 = *(CFRange *)ptr1;
3180 CFRange range2 = *(CFRange *)ptr2;
3181 return (range1.location == range2.location) && (range1.length == range2.length);
3182 }
3183
3184
3185 CFArrayRef CFStringCreateArrayWithFindResults(CFAllocatorRef alloc, CFStringRef string, CFStringRef stringToFind, CFRange rangeToSearch, CFStringCompareFlags compareOptions) {
3186 CFRange foundRange;
3187 Boolean backwards = ((compareOptions & kCFCompareBackwards) != 0);
3188 UInt32 endIndex = rangeToSearch.location + rangeToSearch.length;
3189 CFMutableDataRef rangeStorage = NULL; // Basically an array of CFRange, CFDataRef (packed)
3190 uint8_t *rangeStorageBytes = NULL;
3191 CFIndex foundCount = 0;
3192 CFIndex capacity = 0; // Number of CFRange, CFDataRef element slots in rangeStorage
3193
3194 if (alloc == NULL) alloc = __CFGetDefaultAllocator();
3195
3196 while ((rangeToSearch.length > 0) && CFStringFindWithOptions(string, stringToFind, rangeToSearch, compareOptions, &foundRange)) {
3197 // Determine the next range
3198 if (backwards) {
3199 rangeToSearch.length = foundRange.location - rangeToSearch.location;
3200 } else {
3201 rangeToSearch.location = foundRange.location + foundRange.length;
3202 rangeToSearch.length = endIndex - rangeToSearch.location;
3203 }
3204
3205 // If necessary, grow the data and squirrel away the found range
3206 if (foundCount >= capacity) {
3207 if (rangeStorage == NULL) rangeStorage = CFDataCreateMutable(alloc, 0);
3208 capacity = (capacity + 4) * 2;
3209 CFDataSetLength(rangeStorage, capacity * (sizeof(CFRange) + sizeof(CFDataRef)));
3210 rangeStorageBytes = (uint8_t *)CFDataGetMutableBytePtr(rangeStorage) + foundCount * (sizeof(CFRange) + sizeof(CFDataRef));
3211 }
3212 memmove(rangeStorageBytes, &foundRange, sizeof(CFRange)); // The range
3213 memmove(rangeStorageBytes + sizeof(CFRange), &rangeStorage, sizeof(CFDataRef)); // The data
3214 rangeStorageBytes += (sizeof(CFRange) + sizeof(CFDataRef));
3215 foundCount++;
3216 }
3217
3218 if (foundCount > 0) {
3219 CFIndex cnt;
3220 CFMutableArrayRef array;
3221 const CFArrayCallBacks callbacks = {0, __rangeRetain, __rangeRelease, __rangeCopyDescription, __rangeEqual};
3222
3223 CFDataSetLength(rangeStorage, foundCount * (sizeof(CFRange) + sizeof(CFDataRef))); // Tighten storage up
3224 rangeStorageBytes = (uint8_t *)CFDataGetMutableBytePtr(rangeStorage);
3225
3226 array = CFArrayCreateMutable(alloc, foundCount * sizeof(CFRange *), &callbacks);
3227 for (cnt = 0; cnt < foundCount; cnt++) {
3228 // Each element points to the appropriate CFRange in the CFData
3229 CFArrayAppendValue(array, rangeStorageBytes + cnt * (sizeof(CFRange) + sizeof(CFDataRef)));
3230 }
3231 CFRelease(rangeStorage); // We want the data to go away when all CFRanges inside it are released...
3232 return array;
3233 } else {
3234 return NULL;
3235 }
3236 }
3237
3238
3239 CFRange CFStringFind(CFStringRef string, CFStringRef stringToFind, CFStringCompareFlags compareOptions) {
3240 CFRange foundRange;
3241
3242 if (CFStringFindWithOptions(string, stringToFind, CFRangeMake(0, CFStringGetLength(string)), compareOptions, &foundRange)) {
3243 return foundRange;
3244 } else {
3245 return CFRangeMake(kCFNotFound, 0);
3246 }
3247 }
3248
3249 Boolean CFStringHasPrefix(CFStringRef string, CFStringRef prefix) {
3250 return CFStringFindWithOptions(string, prefix, CFRangeMake(0, CFStringGetLength(string)), kCFCompareAnchored, NULL);
3251 }
3252
3253 Boolean CFStringHasSuffix(CFStringRef string, CFStringRef suffix) {
3254 return CFStringFindWithOptions(string, suffix, CFRangeMake(0, CFStringGetLength(string)), kCFCompareAnchored|kCFCompareBackwards, NULL);
3255 }
3256
3257 #define MAX_TRANSCODING_LENGTH 4
3258
3259 #define HANGUL_JONGSEONG_COUNT (28)
3260
3261 CF_INLINE bool _CFStringIsHangulLVT(UTF32Char character) {
3262 return (((character - HANGUL_SYLLABLE_START) % HANGUL_JONGSEONG_COUNT) ? true : false);
3263 }
3264
3265 static uint8_t __CFTranscodingHintLength[] = {
3266 2, 3, 4, 4, 4, 4, 4, 2, 2, 2, 2, 4, 0, 0, 0, 0
3267 };
3268
3269 enum {
3270 kCFStringHangulStateL,
3271 kCFStringHangulStateV,
3272 kCFStringHangulStateT,
3273 kCFStringHangulStateLV,
3274 kCFStringHangulStateLVT,
3275 kCFStringHangulStateBreak
3276 };
3277
3278 static CFRange _CFStringInlineBufferGetComposedRange(CFStringInlineBuffer *buffer, CFIndex start, CFStringCharacterClusterType type, const uint8_t *bmpBitmap, CFIndex csetType) {
3279 CFIndex end = start + 1;
3280 const uint8_t *bitmap = bmpBitmap;
3281 UTF32Char character;
3282 UTF16Char otherSurrogate;
3283 uint8_t step;
3284
3285 character = CFStringGetCharacterFromInlineBuffer(buffer, start);
3286
3287 // We don't combine characters in Armenian ~ Limbu range for backward deletion
3288 if ((type != kCFStringBackwardDeletionCluster) || (character < 0x0530) || (character > 0x194F)) {
3289 // Check if the current is surrogate
3290 if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, start + 1)))) {
3291 ++end;
3292 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
3293 bitmap = CFUniCharGetBitmapPtrForPlane(csetType, (character >> 16));
3294 }
3295
3296 // Extend backward
3297 while (start > 0) {
3298 if ((type == kCFStringBackwardDeletionCluster) && (character >= 0x0530) && (character < 0x1950)) break;
3299
3300 if (character < 0x10000) { // the first round could be already be non-BMP
3301 if (CFUniCharIsSurrogateLowCharacter(character) && CFUniCharIsSurrogateHighCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, start - 1)))) {
3302 character = CFUniCharGetLongCharacterForSurrogatePair(otherSurrogate, character);
3303 bitmap = CFUniCharGetBitmapPtrForPlane(csetType, (character >> 16));
3304 --start;
3305 } else {
3306 bitmap = bmpBitmap;
3307 }
3308 }
3309
3310 if (!CFUniCharIsMemberOfBitmap(character, bitmap) && (character != 0xFF9E) && (character != 0xFF9F) && ((character & 0x1FFFF0) != 0xF870)) break;
3311
3312 --start;
3313
3314 character = CFStringGetCharacterFromInlineBuffer(buffer, start);
3315 }
3316 }
3317
3318 // Hangul
3319 if (((character >= HANGUL_CHOSEONG_START) && (character <= HANGUL_JONGSEONG_END)) || ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END))) {
3320 uint8_t state;
3321 uint8_t initialState;
3322
3323 if (character < HANGUL_JUNGSEONG_START) {
3324 state = kCFStringHangulStateL;
3325 } else if (character < HANGUL_JONGSEONG_START) {
3326 state = kCFStringHangulStateV;
3327 } else if (character < HANGUL_SYLLABLE_START) {
3328 state = kCFStringHangulStateT;
3329 } else {
3330 state = (_CFStringIsHangulLVT(character) ? kCFStringHangulStateLVT : kCFStringHangulStateLV);
3331 }
3332 initialState = state;
3333
3334 // Extend backward
3335 while (((character = CFStringGetCharacterFromInlineBuffer(buffer, start - 1)) >= HANGUL_CHOSEONG_START) && (character <= HANGUL_SYLLABLE_END) && ((character <= HANGUL_JONGSEONG_END) || (character >= HANGUL_SYLLABLE_START))) {
3336 switch (state) {
3337 case kCFStringHangulStateV:
3338 if (character <= HANGUL_CHOSEONG_END) {
3339 state = kCFStringHangulStateL;
3340 } else if ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END) && !_CFStringIsHangulLVT(character)) {
3341 state = kCFStringHangulStateLV;
3342 } else if (character > HANGUL_JUNGSEONG_END) {
3343 state = kCFStringHangulStateBreak;
3344 }
3345 break;
3346
3347 case kCFStringHangulStateT:
3348 if ((character >= HANGUL_JUNGSEONG_START) && (character <= HANGUL_JUNGSEONG_END)) {
3349 state = kCFStringHangulStateV;
3350 } else if ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END)) {
3351 state = (_CFStringIsHangulLVT(character) ? kCFStringHangulStateLVT : kCFStringHangulStateLV);
3352 } else if (character < HANGUL_JUNGSEONG_START) {
3353 state = kCFStringHangulStateBreak;
3354 }
3355 break;
3356
3357 default:
3358 state = ((character < HANGUL_JUNGSEONG_START) ? kCFStringHangulStateL : kCFStringHangulStateBreak);
3359 break;
3360 }
3361
3362 if (state == kCFStringHangulStateBreak) break;
3363 --start;
3364 }
3365
3366 // Extend forward
3367 state = initialState;
3368 while (((character = CFStringGetCharacterFromInlineBuffer(buffer, end)) > 0) && (((character >= HANGUL_CHOSEONG_START) && (character <= HANGUL_JONGSEONG_END)) || ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END)))) {
3369 switch (state) {
3370 case kCFStringHangulStateLV:
3371 case kCFStringHangulStateV:
3372 if ((character >= HANGUL_JUNGSEONG_START) && (character <= HANGUL_JONGSEONG_END)) {
3373 state = ((character < HANGUL_JONGSEONG_START) ? kCFStringHangulStateV : kCFStringHangulStateT);
3374 } else {
3375 state = kCFStringHangulStateBreak;
3376 }
3377 break;
3378
3379 case kCFStringHangulStateLVT:
3380 case kCFStringHangulStateT:
3381 state = (((character >= HANGUL_JONGSEONG_START) && (character <= HANGUL_JONGSEONG_END)) ? kCFStringHangulStateT : kCFStringHangulStateBreak);
3382 break;
3383
3384 default:
3385 if (character < HANGUL_JUNGSEONG_START) {
3386 state = kCFStringHangulStateL;
3387 } else if (character < HANGUL_JONGSEONG_START) {
3388 state = kCFStringHangulStateV;
3389 } else if (character >= HANGUL_SYLLABLE_START) {
3390 state = (_CFStringIsHangulLVT(character) ? kCFStringHangulStateLVT : kCFStringHangulStateLV);
3391 } else {
3392 state = kCFStringHangulStateBreak;
3393 }
3394 break;
3395 }
3396
3397 if (state == kCFStringHangulStateBreak) break;
3398 ++end;
3399 }
3400 }
3401
3402 // Extend forward
3403 while ((character = CFStringGetCharacterFromInlineBuffer(buffer, end)) > 0) {
3404 if ((type == kCFStringBackwardDeletionCluster) && (character >= 0x0530) && (character < 0x1950)) break;
3405
3406 if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, end + 1)))) {
3407 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
3408 bitmap = CFUniCharGetBitmapPtrForPlane(csetType, (character >> 16));
3409 step = 2;
3410 } else {
3411 bitmap = bmpBitmap;
3412 step = 1;
3413 }
3414
3415 if (!CFUniCharIsMemberOfBitmap(character, bitmap) && (character != 0xFF9E) && (character != 0xFF9F) && ((character & 0x1FFFF0) != 0xF870)) break;
3416
3417 end += step;
3418 }
3419
3420 return CFRangeMake(start, end - start);
3421 }
3422
3423 CF_INLINE bool _CFStringIsVirama(UTF32Char character, const uint8_t *combClassBMP) {
3424 return ((character == COMBINING_GRAPHEME_JOINER) || (CFUniCharGetCombiningPropertyForCharacter(character, (const uint8_t *)((character < 0x10000) ? combClassBMP : CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (character >> 16)))) == 9) ? true : false);
3425 }
3426
3427 CFRange CFStringGetRangeOfCharacterClusterAtIndex(CFStringRef string, CFIndex charIndex, CFStringCharacterClusterType type) {
3428 CFRange range;
3429 CFIndex currentIndex;
3430 CFIndex length = CFStringGetLength(string);
3431 CFIndex csetType = ((kCFStringGraphemeCluster == type) ? kCFUniCharGraphemeExtendCharacterSet : kCFUniCharNonBaseCharacterSet);
3432 CFStringInlineBuffer stringBuffer;
3433 const uint8_t *bmpBitmap;
3434 const uint8_t *letterBMP;
3435 const uint8_t *combClassBMP;
3436 UTF32Char character;
3437 UTF16Char otherSurrogate;
3438
3439 if (charIndex >= length) return CFRangeMake(kCFNotFound, 0);
3440
3441 /* Fast case. If we're eight-bit, it's either the default encoding is cheap or the content is all ASCII. Watch out when (or if) adding more 8bit Mac-scripts in CFStringEncodingConverters
3442 */
3443 if (!CF_IS_OBJC(__kCFStringTypeID, string) && __CFStrIsEightBit(string)) return CFRangeMake(charIndex, 1);
3444
3445 bmpBitmap = CFUniCharGetBitmapPtrForPlane(csetType, 0);
3446 letterBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, 0);
3447 combClassBMP = (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0);
3448
3449 CFStringInitInlineBuffer(string, &stringBuffer, CFRangeMake(0, length));
3450
3451 // Get composed character sequence first
3452 range = _CFStringInlineBufferGetComposedRange(&stringBuffer, charIndex, type, bmpBitmap, csetType);
3453
3454 // Do grapheme joiners
3455 if (type < kCFStringCursorMovementCluster) {
3456 const uint8_t *letter = letterBMP;
3457
3458 // Check to see if we have a letter at the beginning of initial cluster
3459 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, range.location);
3460
3461 if ((range.length > 1) && CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(&stringBuffer, range.location + 1)))) {
3462 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
3463 letter = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, (character >> 16));
3464 }
3465
3466 if ((character == ZERO_WIDTH_JOINER) || CFUniCharIsMemberOfBitmap(character, letter)) {
3467 CFRange otherRange;
3468
3469 // Check if preceded by grapheme joiners (U034F and viramas)
3470 otherRange.location = currentIndex = range.location;
3471
3472 while (currentIndex > 1) {
3473 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, --currentIndex);
3474
3475 // ??? We're assuming viramas only in BMP
3476 if ((_CFStringIsVirama(character, combClassBMP) || ((character == ZERO_WIDTH_JOINER) && _CFStringIsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer, --currentIndex), combClassBMP))) && (currentIndex > 0)) {
3477 --currentIndex;
3478 } else {
3479 break;
3480 }
3481
3482 currentIndex = _CFStringInlineBufferGetComposedRange(&stringBuffer, currentIndex, type, bmpBitmap, csetType).location;
3483
3484 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex);
3485
3486 if (CFUniCharIsSurrogateLowCharacter(character) && CFUniCharIsSurrogateHighCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex - 1)))) {
3487 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
3488 letter = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, (character >> 16));
3489 --currentIndex;
3490 } else {
3491 letter = letterBMP;
3492 }
3493
3494 if (!CFUniCharIsMemberOfBitmap(character, letter)) break;
3495 range.location = currentIndex;
3496 }
3497
3498 range.length += otherRange.location - range.location;
3499
3500 // Check if followed by grapheme joiners
3501 if ((range.length > 1) && ((range.location + range.length) < length)) {
3502 otherRange = range;
3503 currentIndex = otherRange.location + otherRange.length;
3504
3505 do {
3506 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex - 1);
3507
3508 // ??? We're assuming viramas only in BMP
3509 if ((character != ZERO_WIDTH_JOINER) && !_CFStringIsVirama(character, combClassBMP)) break;
3510
3511 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex);
3512
3513 if (character == ZERO_WIDTH_JOINER) character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, ++currentIndex);
3514
3515 if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex + 1)))) {
3516 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
3517 letter = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, (character >> 16));
3518 } else {
3519 letter = letterBMP;
3520 }
3521
3522 // We only conjoin letters
3523 if (!CFUniCharIsMemberOfBitmap(character, letter)) break;
3524 otherRange = _CFStringInlineBufferGetComposedRange(&stringBuffer, currentIndex, type, bmpBitmap, csetType);
3525 currentIndex = otherRange.location + otherRange.length;
3526 } while ((otherRange.location + otherRange.length) < length);
3527 range.length = currentIndex - range.location;
3528 }
3529 }
3530 }
3531
3532 // Check if we're part of prefix transcoding hints
3533 CFIndex otherIndex;
3534
3535 currentIndex = (range.location + range.length) - (MAX_TRANSCODING_LENGTH + 1);
3536 if (currentIndex < 0) currentIndex = 0;
3537
3538 while (currentIndex <= range.location) {
3539 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex);
3540
3541 if ((character & 0x1FFFF0) == 0xF860) { // transcoding hint
3542 otherIndex = currentIndex + __CFTranscodingHintLength[(character - 0xF860)] + 1;
3543 if (otherIndex >= (range.location + range.length)) {
3544 if (otherIndex <= length) {
3545 range.location = currentIndex;
3546 range.length = otherIndex - currentIndex;
3547 }
3548 break;
3549 }
3550 }
3551 ++currentIndex;
3552 }
3553
3554 return range;
3555 }
3556
3557 CFRange CFStringGetRangeOfComposedCharactersAtIndex(CFStringRef theString, CFIndex theIndex) {
3558 return CFStringGetRangeOfCharacterClusterAtIndex(theString, theIndex, kCFStringComposedCharacterCluster);
3559 }
3560
3561 /*!
3562 @function CFStringFindCharacterFromSet
3563 Query the range of characters contained in the specified character set.
3564 @param theString The CFString which is to be searched. If this
3565 parameter is not a valid CFString, the behavior is
3566 undefined.
3567 @param theSet The CFCharacterSet against which the membership
3568 of characters is checked. If this parameter is not a valid
3569 CFCharacterSet, the behavior is undefined.
3570 @param range The range of characters within the string to search. If
3571 the range location or end point (defined by the location
3572 plus length minus 1) are outside the index space of the
3573 string (0 to N-1 inclusive, where N is the length of the
3574 string), the behavior is undefined. If the range length is
3575 negative, the behavior is undefined. The range may be empty
3576 (length 0), in which case no search is performed.
3577 @param searchOptions The bitwise-or'ed option flags to control
3578 the search behavior. The supported options are
3579 kCFCompareBackwards andkCFCompareAnchored.
3580 If other option flags are specified, the behavior
3581 is undefined.
3582 @param result The pointer to a CFRange supplied by the caller in
3583 which the search result is stored. If a pointer to an invalid
3584 memory is specified, the behavior is undefined.
3585 @result true, if at least a character which is a member of the character
3586 set is found and result is filled, otherwise, false.
3587 */
3588 #define SURROGATE_START 0xD800
3589 #define SURROGATE_END 0xDFFF
3590
3591 CF_EXPORT Boolean CFStringFindCharacterFromSet(CFStringRef theString, CFCharacterSetRef theSet, CFRange rangeToSearch, CFStringCompareFlags searchOptions, CFRange *result) {
3592 CFStringInlineBuffer stringBuffer;
3593 CFCharacterSetInlineBuffer csetBuffer;
3594 UniChar ch;
3595 CFIndex step;
3596 CFIndex fromLoc, toLoc, cnt; // fromLoc and toLoc are inclusive
3597 Boolean found = false;
3598 Boolean done = false;
3599
3600 //#warning FIX ME !! Should support kCFCompareNonliteral
3601
3602 if ((rangeToSearch.location + rangeToSearch.length > CFStringGetLength(theString)) || (rangeToSearch.length == 0)) return false;
3603
3604 if (searchOptions & kCFCompareBackwards) {
3605 fromLoc = rangeToSearch.location + rangeToSearch.length - 1;
3606 toLoc = rangeToSearch.location;
3607 } else {
3608 fromLoc = rangeToSearch.location;
3609 toLoc = rangeToSearch.location + rangeToSearch.length - 1;
3610 }
3611 if (searchOptions & kCFCompareAnchored) {
3612 toLoc = fromLoc;
3613 }
3614
3615 step = (fromLoc <= toLoc) ? 1 : -1;
3616 cnt = fromLoc;
3617
3618 CFStringInitInlineBuffer(theString, &stringBuffer, rangeToSearch);
3619 CFCharacterSetInitInlineBuffer(theSet, &csetBuffer);
3620
3621 do {
3622 ch = CFStringGetCharacterFromInlineBuffer(&stringBuffer, cnt - rangeToSearch.location);
3623 if ((ch >= SURROGATE_START) && (ch <= SURROGATE_END)) {
3624 int otherCharIndex = cnt + step;
3625
3626 if (((step < 0) && (otherCharIndex < toLoc)) || ((step > 0) && (otherCharIndex > toLoc))) {
3627 done = true;
3628 } else {
3629 UniChar highChar;
3630 UniChar lowChar = CFStringGetCharacterFromInlineBuffer(&stringBuffer, otherCharIndex - rangeToSearch.location);
3631
3632 if (cnt < otherCharIndex) {
3633 highChar = ch;
3634 } else {
3635 highChar = lowChar;
3636 lowChar = ch;
3637 }
3638
3639 if (CFUniCharIsSurrogateHighCharacter(highChar) && CFUniCharIsSurrogateLowCharacter(lowChar) && CFCharacterSetInlineBufferIsLongCharacterMember(&csetBuffer, CFUniCharGetLongCharacterForSurrogatePair(highChar, lowChar))) {
3640 if (result) *result = CFRangeMake((cnt < otherCharIndex ? cnt : otherCharIndex), 2);
3641 return true;
3642 } else if (otherCharIndex == toLoc) {
3643 done = true;
3644 } else {
3645 cnt = otherCharIndex + step;
3646 }
3647 }
3648 } else if (CFCharacterSetInlineBufferIsLongCharacterMember(&csetBuffer, ch)) {
3649 done = found = true;
3650 } else if (cnt == toLoc) {
3651 done = true;
3652 } else {
3653 cnt += step;
3654 }
3655 } while (!done);
3656
3657 if (found && result) *result = CFRangeMake(cnt, 1);
3658 return found;
3659 }
3660
3661 /* Line range code */
3662
3663 #define CarriageReturn '\r' /* 0x0d */
3664 #define NewLine '\n' /* 0x0a */
3665 #define NextLine 0x0085
3666 #define LineSeparator 0x2028
3667 #define ParaSeparator 0x2029
3668
3669 CF_INLINE Boolean isALineSeparatorTypeCharacter(UniChar ch, Boolean includeLineEndings) {
3670 if (ch > CarriageReturn && ch < NextLine) return false; /* Quick test to cover most chars */
3671 return (ch == NewLine || ch == CarriageReturn || ch == ParaSeparator || (includeLineEndings && (ch == NextLine || ch == LineSeparator))) ? true : false;
3672 }
3673
3674 static void __CFStringGetLineOrParagraphBounds(CFStringRef string, CFRange range, CFIndex *lineBeginIndex, CFIndex *lineEndIndex, CFIndex *contentsEndIndex, Boolean includeLineEndings) {
3675 CFIndex len;
3676 CFStringInlineBuffer buf;
3677 UniChar ch;
3678
3679 __CFAssertIsString(string);
3680 __CFAssertRangeIsInStringBounds(string, range.location, range.length);
3681
3682 len = __CFStrLength(string);
3683
3684 if (lineBeginIndex) {
3685 CFIndex start;
3686 if (range.location == 0) {
3687 start = 0;
3688 } else {
3689 CFStringInitInlineBuffer(string, &buf, CFRangeMake(0, len));
3690 CFIndex buf_idx = range.location;
3691
3692 /* Take care of the special case where start happens to fall right between \r and \n */
3693 ch = CFStringGetCharacterFromInlineBuffer(&buf, buf_idx);
3694 buf_idx--;
3695 if ((ch == NewLine) && (CFStringGetCharacterFromInlineBuffer(&buf, buf_idx) == CarriageReturn)) {
3696 buf_idx--;
3697 }
3698 while (1) {
3699 if (buf_idx < 0) {
3700 start = 0;
3701 break;
3702 } else if (isALineSeparatorTypeCharacter(CFStringGetCharacterFromInlineBuffer(&buf, buf_idx), includeLineEndings)) {
3703 start = buf_idx + 1;
3704 break;
3705 } else {
3706 buf_idx--;
3707 }
3708 }
3709 }
3710 *lineBeginIndex = start;
3711 }
3712
3713 /* Now find the ending point */
3714 if (lineEndIndex || contentsEndIndex) {
3715 CFIndex endOfContents, lineSeparatorLength = 1; /* 1 by default */
3716 CFStringInitInlineBuffer(string, &buf, CFRangeMake(0, len));
3717 CFIndex buf_idx = range.location + range.length - (range.length ? 1 : 0);
3718 /* First look at the last char in the range (if the range is zero length, the char after the range) to see if we're already on or within a end of line sequence... */
3719 ch = __CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx);
3720 if (ch == NewLine) {
3721 endOfContents = buf_idx;
3722 buf_idx--;
3723 if (__CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx) == CarriageReturn) {
3724 lineSeparatorLength = 2;
3725 endOfContents--;
3726 }
3727 } else {
3728 while (1) {
3729 if (isALineSeparatorTypeCharacter(ch, includeLineEndings)) {
3730 endOfContents = buf_idx; /* This is actually end of contentsRange */
3731 buf_idx++; /* OK for this to go past the end */
3732 if ((ch == CarriageReturn) && (__CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx) == NewLine)) {
3733 lineSeparatorLength = 2;
3734 }
3735 break;
3736 } else if (buf_idx >= len) {
3737 endOfContents = len;
3738 lineSeparatorLength = 0;
3739 break;
3740 } else {
3741 buf_idx++;
3742 ch = __CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx);
3743 }
3744 }
3745 }
3746 if (contentsEndIndex) *contentsEndIndex = endOfContents;
3747 if (lineEndIndex) *lineEndIndex = endOfContents + lineSeparatorLength;
3748 }
3749 }
3750
3751 void CFStringGetLineBounds(CFStringRef string, CFRange range, CFIndex *lineBeginIndex, CFIndex *lineEndIndex, CFIndex *contentsEndIndex) {
3752 CF_OBJC_FUNCDISPATCH4(__kCFStringTypeID, void, string, "getLineStart:end:contentsEnd:forRange:", lineBeginIndex, lineEndIndex, contentsEndIndex, CFRangeMake(range.location, range.length));
3753 __CFStringGetLineOrParagraphBounds(string, range, lineBeginIndex, lineEndIndex, contentsEndIndex, true);
3754 }
3755
3756 void CFStringGetParagraphBounds(CFStringRef string, CFRange range, CFIndex *parBeginIndex, CFIndex *parEndIndex, CFIndex *contentsEndIndex) {
3757 CF_OBJC_FUNCDISPATCH4(__kCFStringTypeID, void, string, "getParagraphStart:end:contentsEnd:forRange:", parBeginIndex, parEndIndex, contentsEndIndex, CFRangeMake(range.location, range.length));
3758 __CFStringGetLineOrParagraphBounds(string, range, parBeginIndex, parEndIndex, contentsEndIndex, false);
3759 }
3760
3761
3762 CFStringRef CFStringCreateByCombiningStrings(CFAllocatorRef alloc, CFArrayRef array, CFStringRef separatorString) {
3763 CFIndex numChars;
3764 CFIndex separatorNumByte;
3765 CFIndex stringCount = CFArrayGetCount(array);
3766 Boolean isSepCFString = !CF_IS_OBJC(__kCFStringTypeID, separatorString);
3767 Boolean canBeEightbit = isSepCFString && __CFStrIsEightBit(separatorString);
3768 CFIndex idx;
3769 CFStringRef otherString;
3770 void *buffer;
3771 uint8_t *bufPtr;
3772 const void *separatorContents = NULL;
3773
3774 if (stringCount == 0) {
3775 return CFStringCreateWithCharacters(alloc, NULL, 0);
3776 } else if (stringCount == 1) {
3777 return (CFStringRef)CFStringCreateCopy(alloc, (CFStringRef)CFArrayGetValueAtIndex(array, 0));
3778 }
3779
3780 if (alloc == NULL) alloc = __CFGetDefaultAllocator();
3781
3782 numChars = CFStringGetLength(separatorString) * (stringCount - 1);
3783 for (idx = 0; idx < stringCount; idx++) {
3784 otherString = (CFStringRef)CFArrayGetValueAtIndex(array, idx);
3785 numChars += CFStringGetLength(otherString);
3786 // canBeEightbit is already false if the separator is an NSString...
3787 if (!CF_IS_OBJC(__kCFStringTypeID, otherString) && __CFStrIsUnicode(otherString)) canBeEightbit = false;
3788 }
3789
3790 buffer = (uint8_t *)CFAllocatorAllocate(alloc, canBeEightbit ? ((numChars + 1) * sizeof(uint8_t)) : (numChars * sizeof(UniChar)), 0);
3791 bufPtr = (uint8_t *)buffer;
3792 if (__CFOASafe) __CFSetLastAllocationEventName(buffer, "CFString (store)");
3793 separatorNumByte = CFStringGetLength(separatorString) * (canBeEightbit ? sizeof(uint8_t) : sizeof(UniChar));
3794
3795 for (idx = 0; idx < stringCount; idx++) {
3796 if (idx) { // add separator here unless first string
3797 if (separatorContents) {
3798 memmove(bufPtr, separatorContents, separatorNumByte);
3799 } else {
3800 if (!isSepCFString) { // NSString
3801 CFStringGetCharacters(separatorString, CFRangeMake(0, CFStringGetLength(separatorString)), (UniChar *)bufPtr);
3802 } else if (canBeEightbit || __CFStrIsUnicode(separatorString)) {
3803 memmove(bufPtr, (const uint8_t *)__CFStrContents(separatorString) + __CFStrSkipAnyLengthByte(separatorString), separatorNumByte);
3804 } else {
3805 __CFStrConvertBytesToUnicode((uint8_t *)__CFStrContents(separatorString) + __CFStrSkipAnyLengthByte(separatorString), (UniChar *)bufPtr, __CFStrLength(separatorString));
3806 }
3807 separatorContents = bufPtr;
3808 }
3809 bufPtr += separatorNumByte;
3810 }
3811
3812 otherString = (CFStringRef )CFArrayGetValueAtIndex(array, idx);
3813 if (CF_IS_OBJC(__kCFStringTypeID, otherString)) {
3814 CFIndex otherLength = CFStringGetLength(otherString);
3815 CFStringGetCharacters(otherString, CFRangeMake(0, otherLength), (UniChar *)bufPtr);
3816 bufPtr += otherLength * sizeof(UniChar);
3817 } else {
3818 const uint8_t * otherContents = (const uint8_t *)__CFStrContents(otherString);
3819 CFIndex otherNumByte = __CFStrLength2(otherString, otherContents) * (canBeEightbit ? sizeof(uint8_t) : sizeof(UniChar));
3820
3821 if (canBeEightbit || __CFStrIsUnicode(otherString)) {
3822 memmove(bufPtr, otherContents + __CFStrSkipAnyLengthByte(otherString), otherNumByte);
3823 } else {
3824 __CFStrConvertBytesToUnicode(otherContents + __CFStrSkipAnyLengthByte(otherString), (UniChar *)bufPtr, __CFStrLength2(otherString, otherContents));
3825 }
3826 bufPtr += otherNumByte;
3827 }
3828 }
3829 if (canBeEightbit) *bufPtr = 0; // NULL byte;
3830
3831 return canBeEightbit ?
3832 CFStringCreateWithCStringNoCopy(alloc, (const char*)buffer, __CFStringGetEightBitStringEncoding(), alloc) :
3833 CFStringCreateWithCharactersNoCopy(alloc, (UniChar *)buffer, numChars, alloc);
3834 }
3835
3836
3837 CFArrayRef CFStringCreateArrayBySeparatingStrings(CFAllocatorRef alloc, CFStringRef string, CFStringRef separatorString) {
3838 CFArrayRef separatorRanges;
3839 CFIndex length = CFStringGetLength(string);
3840 /* No objc dispatch needed here since CFStringCreateArrayWithFindResults() works with both CFString and NSString */
3841 if (!(separatorRanges = CFStringCreateArrayWithFindResults(alloc, string, separatorString, CFRangeMake(0, length), 0))) {
3842 return CFArrayCreate(alloc, (const void **)&string, 1, & kCFTypeArrayCallBacks);
3843 } else {
3844 CFIndex idx;
3845 CFIndex count = CFArrayGetCount(separatorRanges);
3846 CFIndex startIndex = 0;
3847 CFIndex numChars;
3848 CFMutableArrayRef array = CFArrayCreateMutable(alloc, count + 2, & kCFTypeArrayCallBacks);
3849 const CFRange *currentRange;
3850 CFStringRef substring;
3851
3852 for (idx = 0;idx < count;idx++) {
3853 currentRange = (const CFRange *)CFArrayGetValueAtIndex(separatorRanges, idx);
3854 numChars = currentRange->location - startIndex;
3855 substring = CFStringCreateWithSubstring(alloc, string, CFRangeMake(startIndex, numChars));
3856 CFArrayAppendValue(array, substring);
3857 CFRelease(substring);
3858 startIndex = currentRange->location + currentRange->length;
3859 }
3860 substring = CFStringCreateWithSubstring(alloc, string, CFRangeMake(startIndex, length - startIndex));
3861 CFArrayAppendValue(array, substring);
3862 CFRelease(substring);
3863
3864 CFRelease(separatorRanges);
3865
3866 return array;
3867 }
3868 }
3869
3870 CFStringRef CFStringCreateFromExternalRepresentation(CFAllocatorRef alloc, CFDataRef data, CFStringEncoding encoding) {
3871 return CFStringCreateWithBytes(alloc, CFDataGetBytePtr(data), CFDataGetLength(data), encoding, true);
3872 }
3873
3874
3875 CFDataRef CFStringCreateExternalRepresentation(CFAllocatorRef alloc, CFStringRef string, CFStringEncoding encoding, uint8_t lossByte) {
3876 CFIndex length;
3877 CFIndex guessedByteLength;
3878 uint8_t *bytes;
3879 CFIndex usedLength;
3880 SInt32 result;
3881
3882 if (CF_IS_OBJC(__kCFStringTypeID, string)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
3883 length = CFStringGetLength(string);
3884 } else {
3885 __CFAssertIsString(string);
3886 length = __CFStrLength(string);
3887 if (__CFStrIsEightBit(string) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string
3888 return CFDataCreate(alloc, ((uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string)), __CFStrLength(string));
3889 }
3890 }
3891
3892 if (alloc == NULL) alloc = __CFGetDefaultAllocator();
3893
3894 if (((encoding & 0x0FFF) == kCFStringEncodingUnicode) && ((encoding == kCFStringEncodingUnicode) || ((encoding > kCFStringEncodingUTF8) && (encoding <= kCFStringEncodingUTF32LE)))) {
3895 guessedByteLength = (length + 1) * ((((encoding >> 26) & 2) == 0) ? sizeof(UTF16Char) : sizeof(UTF32Char)); // UTF32 format has the bit set
3896 } else if (((guessedByteLength = CFStringGetMaximumSizeForEncoding(length, encoding)) > length) && !CF_IS_OBJC(__kCFStringTypeID, string)) { // Multi byte encoding
3897 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
3898 if (__CFStrIsUnicode(string)) {
3899 CFIndex aLength = CFStringEncodingByteLengthForCharacters(encoding, kCFStringEncodingPrependBOM, __CFStrContents(string), __CFStrLength(string));
3900 if (aLength > 0) guessedByteLength = aLength;
3901 } else {
3902 #endif
3903 result = __CFStringEncodeByteStream(string, 0, length, true, encoding, lossByte, NULL, LONG_MAX, &guessedByteLength);
3904 // if result == length, we always succeed
3905 // otherwise, if result == 0, we fail
3906 // otherwise, if there was a lossByte but still result != length, we fail
3907 if ((result != length) && (!result || !lossByte)) return NULL;
3908 if (guessedByteLength == length && __CFStrIsEightBit(string) && __CFStringEncodingIsSupersetOfASCII(encoding)) { // It's all ASCII !!
3909 return CFDataCreate(alloc, ((uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string)), __CFStrLength(string));
3910 }
3911 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
3912 }
3913 #endif
3914 }
3915 bytes = (uint8_t *)CFAllocatorAllocate(alloc, guessedByteLength, 0);
3916 if (__CFOASafe) __CFSetLastAllocationEventName(bytes, "CFData (store)");
3917
3918 result = __CFStringEncodeByteStream(string, 0, length, true, encoding, lossByte, bytes, guessedByteLength, &usedLength);
3919
3920 if ((result != length) && (!result || !lossByte)) { // see comment above about what this means
3921 CFAllocatorDeallocate(alloc, bytes);
3922 return NULL;
3923 }
3924
3925 return CFDataCreateWithBytesNoCopy(alloc, (uint8_t *)bytes, usedLength, alloc);
3926 }
3927
3928
3929 CFStringEncoding CFStringGetSmallestEncoding(CFStringRef str) {
3930 CFIndex len;
3931 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, CFStringEncoding, str, "_smallestEncodingInCFStringEncoding");
3932 __CFAssertIsString(str);
3933
3934 if (__CFStrIsEightBit(str)) return __CFStringGetEightBitStringEncoding();
3935 len = __CFStrLength(str);
3936 if (__CFStringEncodeByteStream(str, 0, len, false, __CFStringGetEightBitStringEncoding(), 0, NULL, LONG_MAX, NULL) == len) return __CFStringGetEightBitStringEncoding();
3937 if ((__CFStringGetEightBitStringEncoding() != __CFStringGetSystemEncoding()) && (__CFStringEncodeByteStream(str, 0, len, false, __CFStringGetSystemEncoding(), 0, NULL, LONG_MAX, NULL) == len)) return __CFStringGetSystemEncoding();
3938 return kCFStringEncodingUnicode; /* ??? */
3939 }
3940
3941
3942 CFStringEncoding CFStringGetFastestEncoding(CFStringRef str) {
3943 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, CFStringEncoding, str, "_fastestEncodingInCFStringEncoding");
3944 __CFAssertIsString(str);
3945 return __CFStrIsEightBit(str) ? __CFStringGetEightBitStringEncoding() : kCFStringEncodingUnicode; /* ??? */
3946 }
3947
3948
3949 SInt32 CFStringGetIntValue(CFStringRef str) {
3950 Boolean success;
3951 SInt32 result;
3952 SInt32 idx = 0;
3953 CFStringInlineBuffer buf;
3954 CFStringInitInlineBuffer(str, &buf, CFRangeMake(0, CFStringGetLength(str)));
3955 success = __CFStringScanInteger(&buf, NULL, &idx, false, &result);
3956 return success ? result : 0;
3957 }
3958
3959
3960 double CFStringGetDoubleValue(CFStringRef str) {
3961 Boolean success;
3962 double result;
3963 SInt32 idx = 0;
3964 CFStringInlineBuffer buf;
3965 CFStringInitInlineBuffer(str, &buf, CFRangeMake(0, CFStringGetLength(str)));
3966 success = __CFStringScanDouble(&buf, NULL, &idx, &result);
3967 return success ? result : 0.0;
3968 }
3969
3970
3971 /*** Mutable functions... ***/
3972
3973 void CFStringSetExternalCharactersNoCopy(CFMutableStringRef string, UniChar *chars, CFIndex length, CFIndex capacity) {
3974 __CFAssertIsNotNegative(length);
3975 __CFAssertIsStringAndExternalMutable(string);
3976 CFAssert4((length <= capacity) && ((capacity == 0) || ((capacity > 0) && chars)), __kCFLogAssertion, "%s(): Invalid args: characters %p length %d capacity %d", __PRETTY_FUNCTION__, chars, length, capacity);
3977 __CFStrSetContentPtr(string, chars);
3978 __CFStrSetExplicitLength(string, length);
3979 __CFStrSetCapacity(string, capacity * sizeof(UniChar));
3980 __CFStrSetCapacityProvidedExternally(string);
3981 }
3982
3983
3984
3985 void CFStringInsert(CFMutableStringRef str, CFIndex idx, CFStringRef insertedStr) {
3986 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID, void, str, "insertString:atIndex:", insertedStr, idx);
3987 __CFAssertIsStringAndMutable(str);
3988 CFAssert3(idx >= 0 && idx <= __CFStrLength(str), __kCFLogAssertion, "%s(): string index %d out of bounds (length %d)", __PRETTY_FUNCTION__, idx, __CFStrLength(str));
3989 __CFStringReplace(str, CFRangeMake(idx, 0), insertedStr);
3990 }
3991
3992
3993 void CFStringDelete(CFMutableStringRef str, CFRange range) {
3994 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, str, "deleteCharactersInRange:", range);
3995 __CFAssertIsStringAndMutable(str);
3996 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
3997 __CFStringChangeSize(str, range, 0, false);
3998 }
3999
4000
4001 void CFStringReplace(CFMutableStringRef str, CFRange range, CFStringRef replacement) {
4002 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID, void, str, "replaceCharactersInRange:withString:", range, replacement);
4003 __CFAssertIsStringAndMutable(str);
4004 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
4005 __CFStringReplace(str, range, replacement);
4006 }
4007
4008
4009 void CFStringReplaceAll(CFMutableStringRef str, CFStringRef replacement) {
4010 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, str, "setString:", replacement);
4011 __CFAssertIsStringAndMutable(str);
4012 __CFStringReplace(str, CFRangeMake(0, __CFStrLength(str)), replacement);
4013 }
4014
4015
4016 void CFStringAppend(CFMutableStringRef str, CFStringRef appended) {
4017 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, str, "appendString:", appended);
4018 __CFAssertIsStringAndMutable(str);
4019 __CFStringReplace(str, CFRangeMake(__CFStrLength(str), 0), appended);
4020 }
4021
4022
4023 void CFStringAppendCharacters(CFMutableStringRef str, const UniChar *chars, CFIndex appendedLength) {
4024 CFIndex strLength, idx;
4025
4026 __CFAssertIsNotNegative(appendedLength);
4027
4028 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID, void, str, "appendCharacters:length:", chars, appendedLength);
4029
4030 __CFAssertIsStringAndMutable(str);
4031
4032 strLength = __CFStrLength(str);
4033 if (__CFStringGetCompatibility(Bug2967272) || __CFStrIsUnicode(str)) {
4034 __CFStringChangeSize(str, CFRangeMake(strLength, 0), appendedLength, true);
4035 memmove((UniChar *)__CFStrContents(str) + strLength, chars, appendedLength * sizeof(UniChar));
4036 } else {
4037 uint8_t *contents;
4038 bool isASCII = true;
4039 for (idx = 0; isASCII && idx < appendedLength; idx++) isASCII = (chars[idx] < 0x80);
4040 __CFStringChangeSize(str, CFRangeMake(strLength, 0), appendedLength, !isASCII);
4041 if (!isASCII) {
4042 memmove((UniChar *)__CFStrContents(str) + strLength, chars, appendedLength * sizeof(UniChar));
4043 } else {
4044 contents = (uint8_t *)__CFStrContents(str) + strLength + __CFStrSkipAnyLengthByte(str);
4045 for (idx = 0; idx < appendedLength; idx++) contents[idx] = (uint8_t)chars[idx];
4046 }
4047 }
4048 }
4049
4050
4051 static void __CFStringAppendBytes(CFMutableStringRef str, const char *cStr, CFIndex appendedLength, CFStringEncoding encoding) {
4052 Boolean appendedIsUnicode = false;
4053 Boolean freeCStrWhenDone = false;
4054 Boolean demoteAppendedUnicode = false;
4055 CFVarWidthCharBuffer vBuf;
4056
4057 __CFAssertIsNotNegative(appendedLength);
4058
4059 if (encoding == kCFStringEncodingASCII || encoding == __CFStringGetEightBitStringEncoding()) {
4060 // appendedLength now denotes length in UniChars
4061 } else if (encoding == kCFStringEncodingUnicode) {
4062 UniChar *chars = (UniChar *)cStr;
4063 CFIndex idx, length = appendedLength / sizeof(UniChar);
4064 bool isASCII = true;
4065 for (idx = 0; isASCII && idx < length; idx++) isASCII = (chars[idx] < 0x80);
4066 if (!isASCII) {
4067 appendedIsUnicode = true;
4068 } else {
4069 demoteAppendedUnicode = true;
4070 }
4071 appendedLength = length;
4072 } else {
4073 Boolean usingPassedInMemory = false;
4074
4075 vBuf.allocator = __CFGetDefaultAllocator(); // We don't want to use client's allocator for temp stuff
4076 vBuf.chars.unicode = NULL; // This will cause the decode function to allocate memory if necessary
4077
4078 if (!__CFStringDecodeByteStream3((const uint8_t *)cStr, appendedLength, encoding, __CFStrIsUnicode(str), &vBuf, &usingPassedInMemory, 0)) {
4079 CFAssert1(0, __kCFLogAssertion, "Supplied bytes could not be converted specified encoding %d", encoding);
4080 return;
4081 }
4082
4083 // If not ASCII, appendedLength now denotes length in UniChars
4084 appendedLength = vBuf.numChars;
4085 appendedIsUnicode = !vBuf.isASCII;
4086 cStr = (const char *)vBuf.chars.ascii;
4087 freeCStrWhenDone = !usingPassedInMemory && vBuf.shouldFreeChars;
4088 }
4089
4090 if (CF_IS_OBJC(__kCFStringTypeID, str)) {
4091 if (!appendedIsUnicode && !demoteAppendedUnicode) {
4092 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID, void, str, "_cfAppendCString:length:", cStr, appendedLength);
4093 } else {
4094 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID, void, str, "appendCharacters:length:", cStr, appendedLength);
4095 }
4096 } else {
4097 CFIndex strLength;
4098 __CFAssertIsStringAndMutable(str);
4099 strLength = __CFStrLength(str);
4100
4101 __CFStringChangeSize(str, CFRangeMake(strLength, 0), appendedLength, appendedIsUnicode || __CFStrIsUnicode(str));
4102
4103 if (__CFStrIsUnicode(str)) {
4104 UniChar *contents = (UniChar *)__CFStrContents(str);
4105 if (appendedIsUnicode) {
4106 memmove(contents + strLength, cStr, appendedLength * sizeof(UniChar));
4107 } else {
4108 __CFStrConvertBytesToUnicode((const uint8_t *)cStr, contents + strLength, appendedLength);
4109 }
4110 } else {
4111 if (demoteAppendedUnicode) {
4112 UniChar *chars = (UniChar *)cStr;
4113 CFIndex idx;
4114 uint8_t *contents = (uint8_t *)__CFStrContents(str) + strLength + __CFStrSkipAnyLengthByte(str);
4115 for (idx = 0; idx < appendedLength; idx++) contents[idx] = (uint8_t)chars[idx];
4116 } else {
4117 uint8_t *contents = (uint8_t *)__CFStrContents(str);
4118 memmove(contents + strLength + __CFStrSkipAnyLengthByte(str), cStr, appendedLength);
4119 }
4120 }
4121 }
4122
4123 if (freeCStrWhenDone) CFAllocatorDeallocate(__CFGetDefaultAllocator(), (void *)cStr);
4124 }
4125
4126 void CFStringAppendPascalString(CFMutableStringRef str, ConstStringPtr pStr, CFStringEncoding encoding) {
4127 __CFStringAppendBytes(str, (const char *)(pStr + 1), (CFIndex)*pStr, encoding);
4128 }
4129
4130 void CFStringAppendCString(CFMutableStringRef str, const char *cStr, CFStringEncoding encoding) {
4131 __CFStringAppendBytes(str, cStr, strlen(cStr), encoding);
4132 }
4133
4134
4135 void CFStringAppendFormat(CFMutableStringRef str, CFDictionaryRef formatOptions, CFStringRef format, ...) {
4136 va_list argList;
4137
4138 va_start(argList, format);
4139 CFStringAppendFormatAndArguments(str, formatOptions, format, argList);
4140 va_end(argList);
4141 }
4142
4143
4144 CFIndex CFStringFindAndReplace(CFMutableStringRef string, CFStringRef stringToFind, CFStringRef replacementString, CFRange rangeToSearch, CFStringCompareFlags compareOptions) {
4145 CFRange foundRange;
4146 Boolean backwards = ((compareOptions & kCFCompareBackwards) != 0);
4147 UInt32 endIndex = rangeToSearch.location + rangeToSearch.length;
4148 #define MAX_RANGES_ON_STACK (1000 / sizeof(CFRange))
4149 CFRange rangeBuffer[MAX_RANGES_ON_STACK]; // Used to avoid allocating memory
4150 CFRange *ranges = rangeBuffer;
4151 CFIndex foundCount = 0;
4152 CFIndex capacity = MAX_RANGES_ON_STACK;
4153
4154 __CFAssertIsStringAndMutable(string);
4155 __CFAssertRangeIsInStringBounds(string, rangeToSearch.location, rangeToSearch.length);
4156
4157 // Note: This code is very similar to the one in CFStringCreateArrayWithFindResults().
4158 while ((rangeToSearch.length > 0) && CFStringFindWithOptions(string, stringToFind, rangeToSearch, compareOptions, &foundRange)) {
4159 // Determine the next range
4160 if (backwards) {
4161 rangeToSearch.length = foundRange.location - rangeToSearch.location;
4162 } else {
4163 rangeToSearch.location = foundRange.location + foundRange.length;
4164 rangeToSearch.length = endIndex - rangeToSearch.location;
4165 }
4166
4167 // If necessary, grow the array
4168 if (foundCount >= capacity) {
4169 bool firstAlloc = (ranges == rangeBuffer) ? true : false;
4170 capacity = (capacity + 4) * 2;
4171 // Note that reallocate with NULL previous pointer is same as allocate
4172 ranges = (CFRange *)CFAllocatorReallocate(kCFAllocatorSystemDefault, firstAlloc ? NULL : ranges, capacity * sizeof(CFRange), 0);
4173 if (firstAlloc) memmove(ranges, rangeBuffer, MAX_RANGES_ON_STACK * sizeof(CFRange));
4174 }
4175 ranges[foundCount] = foundRange;
4176 foundCount++;
4177 }
4178
4179 if (foundCount > 0) {
4180 if (backwards) { // Reorder the ranges to be incrementing (better to do this here, then to check other places)
4181 int head = 0;
4182 int tail = foundCount - 1;
4183 while (head < tail) {
4184 CFRange temp = ranges[head];
4185 ranges[head] = ranges[tail];
4186 ranges[tail] = temp;
4187 head++;
4188 tail--;
4189 }
4190 }
4191 __CFStringReplaceMultiple(string, ranges, foundCount, replacementString);
4192 if (ranges != rangeBuffer) CFAllocatorDeallocate(kCFAllocatorSystemDefault, ranges);
4193 }
4194
4195 return foundCount;
4196 }
4197
4198
4199 // This function is here for NSString purposes
4200 // It allows checking for mutability before mutating; this allows NSString to catch invalid mutations
4201
4202 int __CFStringCheckAndReplace(CFMutableStringRef str, CFRange range, CFStringRef replacement) {
4203 if (!__CFStrIsMutable(str)) return _CFStringErrNotMutable; // These three ifs are always here, for NSString usage
4204 if (!replacement && __CFStringNoteErrors()) return _CFStringErrNilArg;
4205 // We use unsigneds as that is what NSRanges do; we use uint64_t do make sure the sum doesn't wrap (otherwise we'd need to do 3 separate checks). This allows catching bad ranges as described in 3375535. (-1,1)
4206 if (((uint64_t)((unsigned)range.location)) + ((uint64_t)((unsigned)range.length)) > (uint64_t)__CFStrLength(str) && __CFStringNoteErrors()) return _CFStringErrBounds;
4207 __CFAssertIsStringAndMutable(str);
4208 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
4209 __CFStringReplace(str, range, replacement);
4210 return _CFStringErrNone;
4211 }
4212
4213 // This function determines whether errors which would cause string exceptions should
4214 // be ignored or not
4215
4216 Boolean __CFStringNoteErrors(void) {
4217 return true;
4218 }
4219
4220
4221
4222 void CFStringPad(CFMutableStringRef string, CFStringRef padString, CFIndex length, CFIndex indexIntoPad) {
4223 CFIndex originalLength;
4224
4225 __CFAssertIsNotNegative(length);
4226 __CFAssertIsNotNegative(indexIntoPad);
4227
4228 CF_OBJC_FUNCDISPATCH3(__kCFStringTypeID, void, string, "_cfPad:length:padIndex:", padString, length, indexIntoPad);
4229
4230 __CFAssertIsStringAndMutable(string);
4231
4232 originalLength = __CFStrLength(string);
4233 if (length < originalLength) {
4234 __CFStringChangeSize(string, CFRangeMake(length, originalLength - length), 0, false);
4235 } else if (originalLength < length) {
4236 uint8_t *contents;
4237 Boolean isUnicode;
4238 CFIndex charSize;
4239 CFIndex padStringLength;
4240 CFIndex padLength;
4241 CFIndex padRemaining = length - originalLength;
4242
4243 if (CF_IS_OBJC(__kCFStringTypeID, padString)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
4244 padStringLength = CFStringGetLength(padString);
4245 isUnicode = true; /* !!! Bad for now */
4246 } else {
4247 __CFAssertIsString(padString);
4248 padStringLength = __CFStrLength(padString);
4249 isUnicode = __CFStrIsUnicode(string) || __CFStrIsUnicode(padString);
4250 }
4251
4252 charSize = isUnicode ? sizeof(UniChar) : sizeof(uint8_t);
4253
4254 __CFStringChangeSize(string, CFRangeMake(originalLength, 0), padRemaining, isUnicode);
4255
4256 contents = (uint8_t *)__CFStrContents(string) + charSize * originalLength + __CFStrSkipAnyLengthByte(string);
4257 padLength = padStringLength - indexIntoPad;
4258 padLength = padRemaining < padLength ? padRemaining : padLength;
4259
4260 while (padRemaining > 0) {
4261 if (isUnicode) {
4262 CFStringGetCharacters(padString, CFRangeMake(indexIntoPad, padLength), (UniChar *)contents);
4263 } else {
4264 CFStringGetBytes(padString, CFRangeMake(indexIntoPad, padLength), __CFStringGetEightBitStringEncoding(), 0, false, contents, padRemaining * charSize, NULL);
4265 }
4266 contents += padLength * charSize;
4267 padRemaining -= padLength;
4268 indexIntoPad = 0;
4269 padLength = padRemaining < padLength ? padRemaining : padStringLength;
4270 }
4271 }
4272 }
4273
4274 void CFStringTrim(CFMutableStringRef string, CFStringRef trimString) {
4275 CFRange range;
4276 CFIndex newStartIndex;
4277 CFIndex length;
4278
4279 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, string, "_cfTrim:", trimString);
4280
4281 __CFAssertIsStringAndMutable(string);
4282 __CFAssertIsString(trimString);
4283
4284 newStartIndex = 0;
4285 length = __CFStrLength(string);
4286
4287 while (CFStringFindWithOptions(string, trimString, CFRangeMake(newStartIndex, length - newStartIndex), kCFCompareAnchored, &range)) {
4288 newStartIndex = range.location + range.length;
4289 }
4290
4291 if (newStartIndex < length) {
4292 CFIndex charSize = __CFStrIsUnicode(string) ? sizeof(UniChar) : sizeof(uint8_t);
4293 uint8_t *contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4294
4295 length -= newStartIndex;
4296 if (__CFStrLength(trimString) < length) {
4297 while (CFStringFindWithOptions(string, trimString, CFRangeMake(newStartIndex, length), kCFCompareAnchored|kCFCompareBackwards, &range)) {
4298 length = range.location - newStartIndex;
4299 }
4300 }
4301 memmove(contents, contents + newStartIndex * charSize, length * charSize);
4302 __CFStringChangeSize(string, CFRangeMake(length, __CFStrLength(string) - length), 0, false);
4303 } else { // Only trimString in string, trim all
4304 __CFStringChangeSize(string, CFRangeMake(0, length), 0, false);
4305 }
4306 }
4307
4308 void CFStringTrimWhitespace(CFMutableStringRef string) {
4309 CFIndex newStartIndex;
4310 CFIndex length;
4311 CFStringInlineBuffer buffer;
4312
4313 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, void, string, "_cfTrimWS");
4314
4315 __CFAssertIsStringAndMutable(string);
4316
4317 newStartIndex = 0;
4318 length = __CFStrLength(string);
4319
4320 CFStringInitInlineBuffer(string, &buffer, CFRangeMake(0, length));
4321 CFIndex buffer_idx = 0;
4322
4323 while (buffer_idx < length && CFUniCharIsMemberOf(__CFStringGetCharacterFromInlineBufferQuick(&buffer, buffer_idx), kCFUniCharWhitespaceAndNewlineCharacterSet))
4324 buffer_idx++;
4325 newStartIndex = buffer_idx;
4326
4327 if (newStartIndex < length) {
4328 uint8_t *contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4329 CFIndex charSize = (__CFStrIsUnicode(string) ? sizeof(UniChar) : sizeof(uint8_t));
4330
4331 buffer_idx = length - 1;
4332 while (0 <= buffer_idx && CFUniCharIsMemberOf(__CFStringGetCharacterFromInlineBufferQuick(&buffer, buffer_idx), kCFUniCharWhitespaceAndNewlineCharacterSet))
4333 buffer_idx--;
4334 length = buffer_idx - newStartIndex + 1;
4335
4336 memmove(contents, contents + newStartIndex * charSize, length * charSize);
4337 __CFStringChangeSize(string, CFRangeMake(length, __CFStrLength(string) - length), 0, false);
4338 } else { // Whitespace only string
4339 __CFStringChangeSize(string, CFRangeMake(0, length), 0, false);
4340 }
4341 }
4342
4343 void CFStringLowercase(CFMutableStringRef string, CFLocaleRef locale) {
4344 CFIndex currentIndex = 0;
4345 CFIndex length;
4346 const uint8_t *langCode;
4347 Boolean isEightBit = __CFStrIsEightBit(string);
4348
4349 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, string, "_cfLowercase:", locale);
4350
4351 __CFAssertIsStringAndMutable(string);
4352
4353 length = __CFStrLength(string);
4354
4355 langCode = (const uint8_t *)(_CFCanUseLocale(locale) ? _CFStrGetLanguageIdentifierForLocale(locale) : NULL);
4356
4357 if (!langCode && isEightBit) {
4358 uint8_t *contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4359 for (;currentIndex < length;currentIndex++) {
4360 if (contents[currentIndex] >= 'A' && contents[currentIndex] <= 'Z') {
4361 contents[currentIndex] += 'a' - 'A';
4362 } else if (contents[currentIndex] > 127) {
4363 break;
4364 }
4365 }
4366 }
4367
4368 if (currentIndex < length) {
4369 UTF16Char *contents;
4370 UniChar mappedCharacters[MAX_CASE_MAPPING_BUF];
4371 CFIndex mappedLength;
4372 UTF32Char currentChar;
4373 UInt32 flags = 0;
4374
4375 if (isEightBit) __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true);
4376
4377 contents = (UniChar *)__CFStrContents(string);
4378
4379 for (;currentIndex < length;currentIndex++) {
4380
4381 if (CFUniCharIsSurrogateHighCharacter(contents[currentIndex]) && (currentIndex + 1 < length) && CFUniCharIsSurrogateLowCharacter(contents[currentIndex + 1])) {
4382 currentChar = CFUniCharGetLongCharacterForSurrogatePair(contents[currentIndex], contents[currentIndex + 1]);
4383 } else {
4384 currentChar = contents[currentIndex];
4385 }
4386 flags = ((langCode || (currentChar == 0x03A3)) ? CFUniCharGetConditionalCaseMappingFlags(currentChar, contents, currentIndex, length, kCFUniCharToLowercase, langCode, flags) : 0);
4387
4388 mappedLength = CFUniCharMapCaseTo(currentChar, mappedCharacters, MAX_CASE_MAPPING_BUF, kCFUniCharToLowercase, flags, langCode);
4389 if (mappedLength > 0) contents[currentIndex] = *mappedCharacters;
4390
4391 if (currentChar > 0xFFFF) { // Non-BMP char
4392 switch (mappedLength) {
4393 case 0:
4394 __CFStringChangeSize(string, CFRangeMake(currentIndex, 2), 0, true);
4395 contents = (UniChar *)__CFStrContents(string);
4396 length -= 2;
4397 break;
4398
4399 case 1:
4400 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 1), 0, true);
4401 contents = (UniChar *)__CFStrContents(string);
4402 --length;
4403 break;
4404
4405 case 2:
4406 contents[++currentIndex] = mappedCharacters[1];
4407 break;
4408
4409 default:
4410 --mappedLength; // Skip the current char
4411 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength - 1, true);
4412 contents = (UniChar *)__CFStrContents(string);
4413 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4414 length += (mappedLength - 1);
4415 currentIndex += mappedLength;
4416 break;
4417 }
4418 } else if (mappedLength == 0) {
4419 __CFStringChangeSize(string, CFRangeMake(currentIndex, 1), 0, true);
4420 contents = (UniChar *)__CFStrContents(string);
4421 --length;
4422 } else if (mappedLength > 1) {
4423 --mappedLength; // Skip the current char
4424 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength, true);
4425 contents = (UniChar *)__CFStrContents(string);
4426 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4427 length += mappedLength;
4428 currentIndex += mappedLength;
4429 }
4430 }
4431 }
4432 }
4433
4434 void CFStringUppercase(CFMutableStringRef string, CFLocaleRef locale) {
4435 CFIndex currentIndex = 0;
4436 CFIndex length;
4437 const uint8_t *langCode;
4438 Boolean isEightBit = __CFStrIsEightBit(string);
4439
4440 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, string, "_cfUppercase:", locale);
4441
4442 __CFAssertIsStringAndMutable(string);
4443
4444 length = __CFStrLength(string);
4445
4446 langCode = (const uint8_t *)(_CFCanUseLocale(locale) ? _CFStrGetLanguageIdentifierForLocale(locale) : NULL);
4447
4448 if (!langCode && isEightBit) {
4449 uint8_t *contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4450 for (;currentIndex < length;currentIndex++) {
4451 if (contents[currentIndex] >= 'a' && contents[currentIndex] <= 'z') {
4452 contents[currentIndex] -= 'a' - 'A';
4453 } else if (contents[currentIndex] > 127) {
4454 break;
4455 }
4456 }
4457 }
4458
4459 if (currentIndex < length) {
4460 UniChar *contents;
4461 UniChar mappedCharacters[MAX_CASE_MAPPING_BUF];
4462 CFIndex mappedLength;
4463 UTF32Char currentChar;
4464 UInt32 flags = 0;
4465
4466 if (isEightBit) __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true);
4467
4468 contents = (UniChar *)__CFStrContents(string);
4469
4470 for (;currentIndex < length;currentIndex++) {
4471 if (CFUniCharIsSurrogateHighCharacter(contents[currentIndex]) && (currentIndex + 1 < length) && CFUniCharIsSurrogateLowCharacter(contents[currentIndex + 1])) {
4472 currentChar = CFUniCharGetLongCharacterForSurrogatePair(contents[currentIndex], contents[currentIndex + 1]);
4473 } else {
4474 currentChar = contents[currentIndex];
4475 }
4476
4477 flags = (langCode ? CFUniCharGetConditionalCaseMappingFlags(currentChar, contents, currentIndex, length, kCFUniCharToUppercase, langCode, flags) : 0);
4478
4479 mappedLength = CFUniCharMapCaseTo(currentChar, mappedCharacters, MAX_CASE_MAPPING_BUF, kCFUniCharToUppercase, flags, langCode);
4480 if (mappedLength > 0) contents[currentIndex] = *mappedCharacters;
4481
4482 if (currentChar > 0xFFFF) { // Non-BMP char
4483 switch (mappedLength) {
4484 case 0:
4485 __CFStringChangeSize(string, CFRangeMake(currentIndex, 2), 0, true);
4486 contents = (UniChar *)__CFStrContents(string);
4487 length -= 2;
4488 break;
4489
4490 case 1:
4491 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 1), 0, true);
4492 contents = (UniChar *)__CFStrContents(string);
4493 --length;
4494 break;
4495
4496 case 2:
4497 contents[++currentIndex] = mappedCharacters[1];
4498 break;
4499
4500 default:
4501 --mappedLength; // Skip the current char
4502 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength - 1, true);
4503 contents = (UniChar *)__CFStrContents(string);
4504 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4505 length += (mappedLength - 1);
4506 currentIndex += mappedLength;
4507 break;
4508 }
4509 } else if (mappedLength == 0) {
4510 __CFStringChangeSize(string, CFRangeMake(currentIndex, 1), 0, true);
4511 contents = (UniChar *)__CFStrContents(string);
4512 --length;
4513 } else if (mappedLength > 1) {
4514 --mappedLength; // Skip the current char
4515 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength, true);
4516 contents = (UniChar *)__CFStrContents(string);
4517 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4518 length += mappedLength;
4519 currentIndex += mappedLength;
4520 }
4521 }
4522 }
4523 }
4524
4525
4526 void CFStringCapitalize(CFMutableStringRef string, CFLocaleRef locale) {
4527 CFIndex currentIndex = 0;
4528 CFIndex length;
4529 const uint8_t *langCode;
4530 Boolean isEightBit = __CFStrIsEightBit(string);
4531 Boolean isLastCased = false;
4532 const uint8_t *caseIgnorableForBMP;
4533
4534 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, string, "_cfCapitalize:", locale);
4535
4536 __CFAssertIsStringAndMutable(string);
4537
4538 length = __CFStrLength(string);
4539
4540 caseIgnorableForBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharCaseIgnorableCharacterSet, 0);
4541
4542 langCode = (const uint8_t *)(_CFCanUseLocale(locale) ? _CFStrGetLanguageIdentifierForLocale(locale) : NULL);
4543
4544 if (!langCode && isEightBit) {
4545 uint8_t *contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4546 for (;currentIndex < length;currentIndex++) {
4547 if (contents[currentIndex] > 127) {
4548 break;
4549 } else if (contents[currentIndex] >= 'A' && contents[currentIndex] <= 'Z') {
4550 contents[currentIndex] += (isLastCased ? 'a' - 'A' : 0);
4551 isLastCased = true;
4552 } else if (contents[currentIndex] >= 'a' && contents[currentIndex] <= 'z') {
4553 contents[currentIndex] -= (!isLastCased ? 'a' - 'A' : 0);
4554 isLastCased = true;
4555 } else if (!CFUniCharIsMemberOfBitmap(contents[currentIndex], caseIgnorableForBMP)) {
4556 isLastCased = false;
4557 }
4558 }
4559 }
4560
4561 if (currentIndex < length) {
4562 UniChar *contents;
4563 UniChar mappedCharacters[MAX_CASE_MAPPING_BUF];
4564 CFIndex mappedLength;
4565 UTF32Char currentChar;
4566 UInt32 flags = 0;
4567
4568 if (isEightBit) __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true);
4569
4570 contents = (UniChar *)__CFStrContents(string);
4571
4572 for (;currentIndex < length;currentIndex++) {
4573 if (CFUniCharIsSurrogateHighCharacter(contents[currentIndex]) && (currentIndex + 1 < length) && CFUniCharIsSurrogateLowCharacter(contents[currentIndex + 1])) {
4574 currentChar = CFUniCharGetLongCharacterForSurrogatePair(contents[currentIndex], contents[currentIndex + 1]);
4575 } else {
4576 currentChar = contents[currentIndex];
4577 }
4578 flags = ((langCode || ((currentChar == 0x03A3) && isLastCased)) ? CFUniCharGetConditionalCaseMappingFlags(currentChar, contents, currentIndex, length, (isLastCased ? kCFUniCharToLowercase : kCFUniCharToTitlecase), langCode, flags) : 0);
4579
4580 mappedLength = CFUniCharMapCaseTo(currentChar, mappedCharacters, MAX_CASE_MAPPING_BUF, (isLastCased ? kCFUniCharToLowercase : kCFUniCharToTitlecase), flags, langCode);
4581 if (mappedLength > 0) contents[currentIndex] = *mappedCharacters;
4582
4583 if (currentChar > 0xFFFF) { // Non-BMP char
4584 switch (mappedLength) {
4585 case 0:
4586 __CFStringChangeSize(string, CFRangeMake(currentIndex, 2), 0, true);
4587 contents = (UniChar *)__CFStrContents(string);
4588 length -= 2;
4589 break;
4590
4591 case 1:
4592 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 1), 0, true);
4593 contents = (UniChar *)__CFStrContents(string);
4594 --length;
4595 break;
4596
4597 case 2:
4598 contents[++currentIndex] = mappedCharacters[1];
4599 break;
4600
4601 default:
4602 --mappedLength; // Skip the current char
4603 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength - 1, true);
4604 contents = (UniChar *)__CFStrContents(string);
4605 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4606 length += (mappedLength - 1);
4607 currentIndex += mappedLength;
4608 break;
4609 }
4610 } else if (mappedLength == 0) {
4611 __CFStringChangeSize(string, CFRangeMake(currentIndex, 1), 0, true);
4612 contents = (UniChar *)__CFStrContents(string);
4613 --length;
4614 } else if (mappedLength > 1) {
4615 --mappedLength; // Skip the current char
4616 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength, true);
4617 contents = (UniChar *)__CFStrContents(string);
4618 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4619 length += mappedLength;
4620 currentIndex += mappedLength;
4621 }
4622
4623 if (!((currentChar > 0xFFFF) ? CFUniCharIsMemberOf(currentChar, kCFUniCharCaseIgnorableCharacterSet) : CFUniCharIsMemberOfBitmap(currentChar, caseIgnorableForBMP))) { // We have non-caseignorable here
4624 isLastCased = ((CFUniCharIsMemberOf(currentChar, kCFUniCharUppercaseLetterCharacterSet) || CFUniCharIsMemberOf(currentChar, kCFUniCharLowercaseLetterCharacterSet)) ? true : false);
4625 }
4626 }
4627 }
4628 }
4629
4630
4631 #define MAX_DECOMP_BUF 64
4632
4633 #define HANGUL_SBASE 0xAC00
4634 #define HANGUL_LBASE 0x1100
4635 #define HANGUL_VBASE 0x1161
4636 #define HANGUL_TBASE 0x11A7
4637 #define HANGUL_SCOUNT 11172
4638 #define HANGUL_LCOUNT 19
4639 #define HANGUL_VCOUNT 21
4640 #define HANGUL_TCOUNT 28
4641 #define HANGUL_NCOUNT (HANGUL_VCOUNT * HANGUL_TCOUNT)
4642
4643 CF_INLINE uint32_t __CFGetUTF16Length(const UTF32Char *characters, uint32_t utf32Length) {
4644 const UTF32Char *limit = characters + utf32Length;
4645 uint32_t length = 0;
4646
4647 while (characters < limit) length += (*(characters++) > 0xFFFF ? 2 : 1);
4648
4649 return length;
4650 }
4651
4652 CF_INLINE void __CFFillInUTF16(const UTF32Char *characters, UTF16Char *dst, uint32_t utf32Length) {
4653 const UTF32Char *limit = characters + utf32Length;
4654 UTF32Char currentChar;
4655
4656 while (characters < limit) {
4657 currentChar = *(characters++);
4658 if (currentChar > 0xFFFF) {
4659 currentChar -= 0x10000;
4660 *(dst++) = (UTF16Char)((currentChar >> 10) + 0xD800UL);
4661 *(dst++) = (UTF16Char)((currentChar & 0x3FF) + 0xDC00UL);
4662 } else {
4663 *(dst++) = currentChar;
4664 }
4665 }
4666 }
4667
4668 void CFStringNormalize(CFMutableStringRef string, CFStringNormalizationForm theForm) {
4669 CFIndex currentIndex = 0;
4670 CFIndex length;
4671 bool needToReorder = true;
4672
4673 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, string, "_cfNormalize:", theForm);
4674
4675 __CFAssertIsStringAndMutable(string);
4676
4677 length = __CFStrLength(string);
4678
4679 if (__CFStrIsEightBit(string)) {
4680 uint8_t *contents;
4681
4682 if (theForm == kCFStringNormalizationFormC) return; // 8bit form has no decomposition
4683
4684 contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4685
4686 for (;currentIndex < length;currentIndex++) {
4687 if (contents[currentIndex] > 127) {
4688 __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true); // need to do harm way
4689 needToReorder = false;
4690 break;
4691 }
4692 }
4693 }
4694
4695 if (currentIndex < length) {
4696 UTF16Char *limit = (UTF16Char *)__CFStrContents(string) + length;
4697 UTF16Char *contents = (UTF16Char *)__CFStrContents(string) + currentIndex;
4698 UTF32Char buffer[MAX_DECOMP_BUF];
4699 UTF32Char *mappedCharacters = buffer;
4700 CFIndex allocatedLength = MAX_DECOMP_BUF;
4701 CFIndex mappedLength;
4702 CFIndex currentLength;
4703 UTF32Char currentChar;
4704 const uint8_t *decompBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, 0);
4705 const uint8_t *nonBaseBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, 0);
4706 const uint8_t *combiningBMP = (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0);
4707
4708 while (contents < limit) {
4709 if (CFUniCharIsSurrogateHighCharacter(*contents) && (contents + 1 < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) {
4710 currentChar = CFUniCharGetLongCharacterForSurrogatePair(*contents, *(contents + 1));
4711 currentLength = 2;
4712 contents += 2;
4713 } else {
4714 currentChar = *(contents++);
4715 currentLength = 1;
4716 }
4717
4718 mappedLength = 0;
4719
4720 if (CFUniCharIsMemberOfBitmap(currentChar, ((currentChar < 0x10000) ? decompBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (currentChar >> 16)))) && (0 == CFUniCharGetCombiningPropertyForCharacter(currentChar, ((currentChar < 0x10000) ? combiningBMP : (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16)))))) {
4721 if ((theForm & kCFStringNormalizationFormC) == 0 || currentChar < HANGUL_SBASE || currentChar > (HANGUL_SBASE + HANGUL_SCOUNT)) { // We don't have to decompose Hangul Syllables if we're precomposing again
4722 mappedLength = CFUniCharDecomposeCharacter(currentChar, mappedCharacters, MAX_DECOMP_BUF);
4723 }
4724 }
4725
4726 if ((needToReorder || (theForm & kCFStringNormalizationFormC)) && ((contents < limit) || (mappedLength == 0))) {
4727 if (mappedLength > 0) {
4728 if (CFUniCharIsSurrogateHighCharacter(*contents) && (contents + 1 < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) {
4729 currentChar = CFUniCharGetLongCharacterForSurrogatePair(*contents, *(contents + 1));
4730 } else {
4731 currentChar = *contents;
4732 }
4733 }
4734
4735 if (0 != CFUniCharGetCombiningPropertyForCharacter(currentChar, (const uint8_t *)((currentChar < 0x10000) ? combiningBMP : CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16))))) {
4736 uint32_t decompLength;
4737
4738 if (mappedLength == 0) {
4739 contents -= (currentChar & 0xFFFF0000 ? 2 : 1);
4740 if (currentIndex > 0) {
4741 if (CFUniCharIsSurrogateLowCharacter(*(contents - 1)) && (currentIndex > 1) && CFUniCharIsSurrogateHighCharacter(*(contents - 2))) {
4742 *mappedCharacters = CFUniCharGetLongCharacterForSurrogatePair(*(contents - 2), *(contents - 1));
4743 currentIndex -= 2;
4744 currentLength += 2;
4745 } else {
4746 *mappedCharacters = *(contents - 1);
4747 --currentIndex;
4748 ++currentLength;
4749 }
4750 mappedLength = 1;
4751 }
4752 } else {
4753 currentLength += (currentChar & 0xFFFF0000 ? 2 : 1);
4754 }
4755 contents += (currentChar & 0xFFFF0000 ? 2 : 1);
4756
4757 if (CFUniCharIsMemberOfBitmap(currentChar, ((currentChar < 0x10000) ? decompBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (currentChar >> 16))))) { // Vietnamese accent, etc.
4758 decompLength = CFUniCharDecomposeCharacter(currentChar, mappedCharacters + mappedLength, MAX_DECOMP_BUF - mappedLength);
4759 mappedLength += decompLength;
4760 } else {
4761 mappedCharacters[mappedLength++] = currentChar;
4762 }
4763
4764 while (contents < limit) {
4765 if (CFUniCharIsSurrogateHighCharacter(*contents) && (contents + 1 < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) {
4766 currentChar = CFUniCharGetLongCharacterForSurrogatePair(*contents, *(contents + 1));
4767 } else {
4768 currentChar = *contents;
4769 }
4770 if (0 == CFUniCharGetCombiningPropertyForCharacter(currentChar, (const uint8_t *)((currentChar < 0x10000) ? combiningBMP : CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16))))) break;
4771 if (currentChar & 0xFFFF0000) {
4772 contents += 2;
4773 currentLength += 2;
4774 } else {
4775 ++contents;
4776 ++currentLength;
4777 }
4778 if (mappedLength == allocatedLength) {
4779 allocatedLength += MAX_DECOMP_BUF;
4780 if (mappedCharacters == buffer) {
4781 mappedCharacters = (UTF32Char *)CFAllocatorAllocate(kCFAllocatorSystemDefault, allocatedLength * sizeof(UTF32Char), 0);
4782 memmove(mappedCharacters, buffer, MAX_DECOMP_BUF * sizeof(UTF32Char));
4783 } else {
4784 mappedCharacters = (UTF32Char *)CFAllocatorReallocate(kCFAllocatorSystemDefault, mappedCharacters, allocatedLength * sizeof(UTF32Char), 0);
4785 }
4786 }
4787 if (CFUniCharIsMemberOfBitmap(currentChar, ((currentChar < 0x10000) ? decompBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (currentChar >> 16))))) { // Vietnamese accent, etc.
4788 decompLength = CFUniCharDecomposeCharacter(currentChar, mappedCharacters + mappedLength, MAX_DECOMP_BUF - mappedLength);
4789 mappedLength += decompLength;
4790 } else {
4791 mappedCharacters[mappedLength++] = currentChar;
4792 }
4793 }
4794 }
4795 if (needToReorder && mappedLength > 1) CFUniCharPrioritySort(mappedCharacters, mappedLength);
4796 }
4797
4798 if (theForm & kCFStringNormalizationFormKD) {
4799 CFIndex newLength = 0;
4800
4801 if (mappedLength == 0 && CFUniCharIsMemberOf(currentChar, kCFUniCharCompatibilityDecomposableCharacterSet)) {
4802 mappedCharacters[mappedLength++] = currentChar;
4803 }
4804 while (newLength < mappedLength) {
4805 newLength = CFUniCharCompatibilityDecompose(mappedCharacters, mappedLength, allocatedLength);
4806 if (newLength == 0) {
4807 allocatedLength += MAX_DECOMP_BUF;
4808 if (mappedCharacters == buffer) {
4809 mappedCharacters = (UTF32Char *)CFAllocatorAllocate(kCFAllocatorSystemDefault, allocatedLength * sizeof(UTF32Char), 0);
4810 memmove(mappedCharacters, buffer, MAX_DECOMP_BUF * sizeof(UTF32Char));
4811 } else {
4812 mappedCharacters = (UTF32Char *)CFAllocatorReallocate(kCFAllocatorSystemDefault, mappedCharacters, allocatedLength * sizeof(UTF32Char), 0);
4813 }
4814 }
4815 }
4816 mappedLength = newLength;
4817 }
4818
4819 if (theForm & kCFStringNormalizationFormC) {
4820 UTF32Char nextChar;
4821
4822 if (mappedLength > 1) {
4823 CFIndex consumedLength = 1;
4824 UTF32Char *currentBase = mappedCharacters;
4825 uint8_t currentClass, lastClass = 0;
4826 bool didCombine = false;
4827
4828 currentChar = *mappedCharacters;
4829
4830 while (consumedLength < mappedLength) {
4831 nextChar = mappedCharacters[consumedLength];
4832 currentClass = CFUniCharGetCombiningPropertyForCharacter(nextChar, (const uint8_t *)((nextChar < 0x10000) ? combiningBMP : CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (nextChar >> 16))));
4833
4834 if (theForm & kCFStringNormalizationFormKD) {
4835 if ((currentChar >= HANGUL_LBASE) && (currentChar < (HANGUL_LBASE + 0xFF))) {
4836 SInt8 lIndex = currentChar - HANGUL_LBASE;
4837
4838 if ((0 <= lIndex) && (lIndex <= HANGUL_LCOUNT)) {
4839 SInt16 vIndex = nextChar - HANGUL_VBASE;
4840
4841 if ((vIndex >= 0) && (vIndex <= HANGUL_VCOUNT)) {
4842 SInt16 tIndex = 0;
4843 CFIndex usedLength = mappedLength;
4844
4845 mappedCharacters[consumedLength++] = 0xFFFD;
4846
4847 if (consumedLength < mappedLength) {
4848 tIndex = mappedCharacters[consumedLength] - HANGUL_TBASE;
4849 if ((tIndex < 0) || (tIndex > HANGUL_TCOUNT)) {
4850 tIndex = 0;
4851 } else {
4852 mappedCharacters[consumedLength++] = 0xFFFD;
4853 }
4854 }
4855 *currentBase = (lIndex * HANGUL_VCOUNT + vIndex) * HANGUL_TCOUNT + tIndex + HANGUL_SBASE;
4856
4857 while (--usedLength > 0) {
4858 if (mappedCharacters[usedLength] == 0xFFFD) {
4859 --mappedLength;
4860 --consumedLength;
4861 memmove(mappedCharacters + usedLength, mappedCharacters + usedLength + 1, (mappedLength - usedLength) * sizeof(UTF32Char));
4862 }
4863 }
4864 currentBase = mappedCharacters + consumedLength;
4865 currentChar = *currentBase;
4866 ++consumedLength;
4867
4868 continue;
4869 }
4870 }
4871 }
4872 if (!CFUniCharIsMemberOfBitmap(nextChar, ((nextChar < 0x10000) ? nonBaseBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (nextChar >> 16))))) {
4873 *currentBase = currentChar;
4874 currentBase = mappedCharacters + consumedLength;
4875 currentChar = nextChar;
4876 ++consumedLength;
4877 continue;
4878 }
4879 }
4880
4881 if ((lastClass == 0) || (currentClass > lastClass)) {
4882 nextChar = CFUniCharPrecomposeCharacter(currentChar, nextChar);
4883 if (nextChar == 0xFFFD) {
4884 lastClass = currentClass;
4885 } else {
4886 mappedCharacters[consumedLength] = 0xFFFD;
4887 didCombine = true;
4888 currentChar = nextChar;
4889 }
4890 }
4891 ++consumedLength;
4892 }
4893
4894 *currentBase = currentChar;
4895 if (didCombine) {
4896 consumedLength = mappedLength;
4897 while (--consumedLength > 0) {
4898 if (mappedCharacters[consumedLength] == 0xFFFD) {
4899 --mappedLength;
4900 memmove(mappedCharacters + consumedLength, mappedCharacters + consumedLength + 1, (mappedLength - consumedLength) * sizeof(UTF32Char));
4901 }
4902 }
4903 }
4904 } else if ((currentChar >= HANGUL_LBASE) && (currentChar < (HANGUL_LBASE + 0xFF))) { // Hangul Jamo
4905 SInt8 lIndex = currentChar - HANGUL_LBASE;
4906
4907 if ((contents < limit) && (0 <= lIndex) && (lIndex <= HANGUL_LCOUNT)) {
4908 SInt16 vIndex = *contents - HANGUL_VBASE;
4909
4910 if ((vIndex >= 0) && (vIndex <= HANGUL_VCOUNT)) {
4911 SInt16 tIndex = 0;
4912
4913 ++contents; ++currentLength;
4914
4915 if (contents < limit) {
4916 tIndex = *contents - HANGUL_TBASE;
4917 if ((tIndex < 0) || (tIndex > HANGUL_TCOUNT)) {
4918 tIndex = 0;
4919 } else {
4920 ++contents; ++currentLength;
4921 }
4922 }
4923 *mappedCharacters = (lIndex * HANGUL_VCOUNT + vIndex) * HANGUL_TCOUNT + tIndex + HANGUL_SBASE;
4924 mappedLength = 1;
4925 }
4926 }
4927 } else { // collect class 0 non-base characters
4928 while (contents < limit) {
4929 nextChar = *contents;
4930 if (CFUniCharIsSurrogateHighCharacter(nextChar) && ((contents + 1) < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) {
4931 nextChar = CFUniCharGetLongCharacterForSurrogatePair(nextChar, *(contents + 1));
4932 if (!CFUniCharIsMemberOfBitmap(nextChar, (const uint8_t *)CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (nextChar >> 16))) || (0 != CFUniCharGetCombiningPropertyForCharacter(nextChar, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (nextChar >> 16))))) break;
4933 } else {
4934 if (!CFUniCharIsMemberOfBitmap(nextChar, nonBaseBMP) || (0 != CFUniCharGetCombiningPropertyForCharacter(nextChar, combiningBMP))) break;
4935 }
4936 currentChar = CFUniCharPrecomposeCharacter(currentChar, nextChar);
4937 if (0xFFFD == currentChar) break;
4938
4939 if (nextChar < 0x10000) {
4940 ++contents; ++currentLength;
4941 } else {
4942 contents += 2;
4943 currentLength += 2;
4944 }
4945
4946 *mappedCharacters = currentChar;
4947 mappedLength = 1;
4948 }
4949 }
4950 }
4951
4952 if (mappedLength > 0) {
4953 CFIndex utf16Length = __CFGetUTF16Length(mappedCharacters, mappedLength);
4954
4955 if (utf16Length != currentLength) {
4956 __CFStringChangeSize(string, CFRangeMake(currentIndex, currentLength), utf16Length, true);
4957 currentLength = utf16Length;
4958 }
4959 contents = (UTF16Char *)__CFStrContents(string);
4960 limit = contents + __CFStrLength(string);
4961 contents += currentIndex;
4962 __CFFillInUTF16(mappedCharacters, contents, mappedLength);
4963 contents += utf16Length;
4964 }
4965 currentIndex += currentLength;
4966 }
4967
4968 if (mappedCharacters != buffer) CFAllocatorDeallocate(kCFAllocatorSystemDefault, mappedCharacters);
4969 }
4970 }
4971
4972 void CFStringFold(CFMutableStringRef theString, CFStringCompareFlags theFlags, CFLocaleRef locale) {
4973 CFStringInlineBuffer stringBuffer;
4974 CFIndex length = CFStringGetLength(theString);
4975 CFIndex currentIndex = 0;
4976 CFIndex bufferLength = 0;
4977 UTF32Char buffer[kCFStringStackBufferLength];
4978 const uint8_t *cString;
4979 const uint8_t *langCode;
4980 CFStringEncoding eightBitEncoding;
4981 bool caseInsensitive = ((theFlags & kCFCompareCaseInsensitive) ? true : false);
4982 bool isObjc = CF_IS_OBJC(__kCFStringTypeID, theString);
4983 CFLocaleRef theLocale = locale;
4984
4985 if ((theFlags & kCFCompareLocalized) && (NULL == locale)) {
4986 theLocale = CFLocaleCopyCurrent();
4987 }
4988
4989 theFlags &= (kCFCompareCaseInsensitive|kCFCompareDiacriticInsensitive|kCFCompareWidthInsensitive);
4990
4991 if ((0 == theFlags) || (0 == length)) goto bail; // nothing to do
4992
4993 langCode = ((NULL == theLocale) ? NULL : (const uint8_t *)_CFStrGetLanguageIdentifierForLocale(theLocale));
4994
4995 eightBitEncoding = __CFStringGetEightBitStringEncoding();
4996 cString = (const uint8_t *)CFStringGetCStringPtr(theString, eightBitEncoding);
4997
4998 if ((NULL != cString) && !caseInsensitive && (kCFStringEncodingASCII == eightBitEncoding)) goto bail; // All ASCII
4999
5000 CFStringInitInlineBuffer(theString, &stringBuffer, CFRangeMake(0, length));
5001
5002 if ((NULL != cString) && (theFlags & (kCFCompareCaseInsensitive|kCFCompareDiacriticInsensitive))) {
5003 const uint8_t *cStringPtr = cString;
5004 const uint8_t *cStringLimit = cString + length;
5005 uint8_t *cStringContents = (isObjc ? NULL : (uint8_t *)__CFStrContents(theString) + __CFStrSkipAnyLengthByte(theString));
5006
5007 while (cStringPtr < cStringLimit) {
5008 if ((*cStringPtr < 0x80) && (NULL == langCode)) {
5009 if (caseInsensitive && (*cStringPtr >= 'A') && (*cStringPtr <= 'Z')) {
5010 if (NULL == cStringContents) {
5011 break;
5012 } else {
5013 cStringContents[cStringPtr - cString] += ('a' - 'A');
5014 }
5015 }
5016 } else {
5017 if ((bufferLength = __CFStringFoldCharacterClusterAtIndex((UTF32Char)__CFCharToUniCharTable[*cStringPtr], &stringBuffer, cStringPtr - cString, theFlags, langCode, buffer, kCFStringStackBufferLength, NULL)) > 0) {
5018 if ((*buffer > 0x7F) || (bufferLength > 1) || (NULL == cStringContents)) break;
5019 cStringContents[cStringPtr - cString] = *buffer;
5020 }
5021 }
5022 ++cStringPtr;
5023 }
5024
5025 currentIndex = cStringPtr - cString;
5026 }
5027
5028 if (currentIndex < length) {
5029 UTF16Char *contents;
5030
5031 if (isObjc) {
5032 CFMutableStringRef cfString;
5033 CFRange range = CFRangeMake(currentIndex, length - currentIndex);
5034
5035 contents = (UTF16Char *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(UTF16Char) * range.length, 0);
5036
5037 CFStringGetCharacters(theString, range, contents);
5038
5039 cfString = CFStringCreateMutableWithExternalCharactersNoCopy(kCFAllocatorSystemDefault, contents, range.length, range.length, NULL);
5040
5041 CFStringFold(cfString, theFlags, theLocale);
5042
5043 CFStringReplace(theString, range, cfString);
5044
5045 CFRelease(cfString);
5046 } else {
5047 const UTF32Char *characters;
5048 const UTF32Char *charactersLimit;
5049 UTF32Char character;
5050 CFIndex consumedLength;
5051
5052 contents = NULL;
5053
5054 if (bufferLength > 0) {
5055 __CFStringChangeSize(theString, CFRangeMake(currentIndex + 1, 0), bufferLength - 1, true);
5056 length = __CFStrLength(theString);
5057 CFStringInitInlineBuffer(theString, &stringBuffer, CFRangeMake(0, length));
5058
5059 contents = (UTF16Char *)__CFStrContents(theString) + currentIndex;
5060 characters = buffer;
5061 charactersLimit = characters + bufferLength;
5062 while (characters < charactersLimit) *(contents++) = (UTF16Char)*(characters++);
5063 ++currentIndex;
5064 }
5065
5066 while (currentIndex < length) {
5067 character = __CFStringGetCharacterFromInlineBufferQuick(&stringBuffer, currentIndex);
5068
5069 consumedLength = 0;
5070
5071 if ((NULL == langCode) && (character < 0x80) && (0 == (theFlags & kCFCompareDiacriticInsensitive))) {
5072 if (caseInsensitive && (character >= 'A') && (character <= 'Z')) {
5073 consumedLength = 1;
5074 bufferLength = 1;
5075 *buffer = character + ('a' - 'A');
5076 }
5077 } else {
5078 if (CFUniCharIsSurrogateHighCharacter(character) && ((currentIndex + 1) < length)) {
5079 UTF16Char lowSurrogate = __CFStringGetCharacterFromInlineBufferQuick(&stringBuffer, currentIndex + 1);
5080 if (CFUniCharIsSurrogateLowCharacter(lowSurrogate)) character = CFUniCharGetLongCharacterForSurrogatePair(character, lowSurrogate);
5081 }
5082
5083 bufferLength = __CFStringFoldCharacterClusterAtIndex(character, &stringBuffer, currentIndex, theFlags, langCode, buffer, kCFStringStackBufferLength, &consumedLength);
5084 }
5085
5086 if (consumedLength > 0) {
5087 CFIndex utf16Length = bufferLength;
5088
5089 characters = buffer;
5090 charactersLimit = characters + bufferLength;
5091
5092 while (characters < charactersLimit) if (*(characters++) > 0xFFFF) ++utf16Length; // Extend bufferLength to the UTF-16 length
5093
5094 if ((utf16Length != consumedLength) || __CFStrIsEightBit(theString)) {
5095 CFRange range;
5096 CFIndex insertLength;
5097
5098 if (consumedLength < utf16Length) { // Need to expand
5099 range = CFRangeMake(currentIndex + consumedLength, 0);
5100 insertLength = utf16Length - consumedLength;
5101 } else {
5102 range = CFRangeMake(currentIndex + utf16Length, consumedLength - utf16Length);
5103 insertLength = 0;
5104 }
5105 __CFStringChangeSize(theString, range, insertLength, true);
5106 length = __CFStrLength(theString);
5107 CFStringInitInlineBuffer(theString, &stringBuffer, CFRangeMake(0, length));
5108 }
5109
5110 (void)CFUniCharFromUTF32(buffer, bufferLength, (UTF16Char *)__CFStrContents(theString) + currentIndex, true, __CF_BIG_ENDIAN__);
5111
5112 currentIndex += utf16Length;
5113 } else {
5114 ++currentIndex;
5115 }
5116 }
5117 }
5118 }
5119
5120 bail:
5121 if (NULL == locale && theLocale) {
5122 CFRelease(theLocale);
5123 }
5124 }
5125
5126 enum {
5127 kCFStringFormatZeroFlag = (1 << 0), // if not, padding is space char
5128 kCFStringFormatMinusFlag = (1 << 1), // if not, no flag implied
5129 kCFStringFormatPlusFlag = (1 << 2), // if not, no flag implied, overrides space
5130 kCFStringFormatSpaceFlag = (1 << 3) // if not, no flag implied
5131 };
5132
5133 typedef struct {
5134 int16_t size;
5135 int16_t type;
5136 SInt32 loc;
5137 SInt32 len;
5138 SInt32 widthArg;
5139 SInt32 precArg;
5140 uint32_t flags;
5141 int8_t mainArgNum;
5142 int8_t precArgNum;
5143 int8_t widthArgNum;
5144 int8_t unused1;
5145 } CFFormatSpec;
5146
5147 typedef struct {
5148 int16_t type;
5149 int16_t size;
5150 union {
5151 int64_t int64Value;
5152 double doubleValue;
5153 #if LONG_DOUBLE_SUPPORT
5154 long double longDoubleValue;
5155 #endif
5156 void *pointerValue;
5157 } value;
5158 } CFPrintValue;
5159
5160 enum {
5161 CFFormatDefaultSize = 0,
5162 CFFormatSize1 = 1,
5163 CFFormatSize2 = 2,
5164 CFFormatSize4 = 3,
5165 CFFormatSize8 = 4,
5166 CFFormatSize16 = 5,
5167 #if __LP64__
5168 CFFormatSizeLong = CFFormatSize8,
5169 CFFormatSizePointer = CFFormatSize8
5170 #else
5171 CFFormatSizeLong = CFFormatSize4,
5172 CFFormatSizePointer = CFFormatSize4
5173 #endif
5174 };
5175
5176
5177
5178 enum {
5179 CFFormatLiteralType = 32,
5180 CFFormatLongType = 33,
5181 CFFormatDoubleType = 34,
5182 CFFormatPointerType = 35,
5183 CFFormatObjectType = 36, /* handled specially */ /* ??? not used anymore, can be removed? */
5184 CFFormatCFType = 37, /* handled specially */
5185 CFFormatUnicharsType = 38, /* handled specially */
5186 CFFormatCharsType = 39, /* handled specially */
5187 CFFormatPascalCharsType = 40, /* handled specially */
5188 CFFormatSingleUnicharType = 41, /* handled specially */
5189 CFFormatDummyPointerType = 42 /* special case for %n */
5190 };
5191
5192 CF_INLINE void __CFParseFormatSpec(const UniChar *uformat, const uint8_t *cformat, SInt32 *fmtIdx, SInt32 fmtLen, CFFormatSpec *spec) {
5193 Boolean seenDot = false;
5194 for (;;) {
5195 UniChar ch;
5196 if (fmtLen <= *fmtIdx) return; /* no type */
5197 if (cformat) ch = (UniChar)cformat[(*fmtIdx)++]; else ch = uformat[(*fmtIdx)++];
5198 reswtch:switch (ch) {
5199 case '#': // ignored for now
5200 break;
5201 case 0x20:
5202 if (!(spec->flags & kCFStringFormatPlusFlag)) spec->flags |= kCFStringFormatSpaceFlag;
5203 break;
5204 case '-':
5205 spec->flags |= kCFStringFormatMinusFlag;
5206 spec->flags &= ~kCFStringFormatZeroFlag; // remove zero flag
5207 break;
5208 case '+':
5209 spec->flags |= kCFStringFormatPlusFlag;
5210 spec->flags &= ~kCFStringFormatSpaceFlag; // remove space flag
5211 break;
5212 case '0':
5213 if (!(spec->flags & kCFStringFormatMinusFlag)) spec->flags |= kCFStringFormatZeroFlag;
5214 break;
5215 case 'h':
5216 spec->size = CFFormatSize2;
5217 break;
5218 case 'l':
5219 if (*fmtIdx < fmtLen) {
5220 // fetch next character, don't increment fmtIdx
5221 if (cformat) ch = (UniChar)cformat[(*fmtIdx)]; else ch = uformat[(*fmtIdx)];
5222 if ('l' == ch) { // 'll' for long long, like 'q'
5223 (*fmtIdx)++;
5224 spec->size = CFFormatSize8;
5225 break;
5226 }
5227 }
5228 spec->size = CFFormatSizeLong; // 4 or 8 depending on LP64
5229 break;
5230 #if LONG_DOUBLE_SUPPORT
5231 case 'L':
5232 spec->size = CFFormatSize16;
5233 break;
5234 #endif
5235 case 'q':
5236 spec->size = CFFormatSize8;
5237 break;
5238 case 't': case 'z':
5239 spec->size = CFFormatSizeLong; // 4 or 8 depending on LP64
5240 break;
5241 case 'j':
5242 spec->size = CFFormatSize8;
5243 break;
5244 case 'c':
5245 spec->type = CFFormatLongType;
5246 spec->size = CFFormatSize1;
5247 return;
5248 case 'O': case 'o': case 'D': case 'd': case 'i': case 'U': case 'u': case 'x': case 'X':
5249 spec->type = CFFormatLongType;
5250 // Seems like if spec->size == 0, we should spec->size = CFFormatSize4. However, 0 is handled correctly.
5251 return;
5252 case 'a': case 'A': case 'e': case 'E': case 'f': case 'F': case 'g': case 'G':
5253 spec->type = CFFormatDoubleType;
5254 if (spec->size != CFFormatSize16) spec->size = CFFormatSize8;
5255 return;
5256 case 'n': /* %n is not handled correctly; for Leopard or newer apps, we disable it further */
5257 spec->type = _CFExecutableLinkedOnOrAfter(CFSystemVersionLeopard) ? CFFormatDummyPointerType : CFFormatPointerType;
5258 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5259 return;
5260 case 'p':
5261 spec->type = CFFormatPointerType;
5262 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5263 return;
5264 case 's':
5265 spec->type = CFFormatCharsType;
5266 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5267 return;
5268 case 'S':
5269 spec->type = CFFormatUnicharsType;
5270 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5271 return;
5272 case 'C':
5273 spec->type = CFFormatSingleUnicharType;
5274 spec->size = CFFormatSize2;
5275 return;
5276 case 'P':
5277 spec->type = CFFormatPascalCharsType;
5278 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5279 return;
5280 case '@':
5281 spec->type = CFFormatCFType;
5282 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5283 return;
5284 case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': {
5285 int64_t number = 0;
5286 do {
5287 number = 10 * number + (ch - '0');
5288 if (cformat) ch = (UniChar)cformat[(*fmtIdx)++]; else ch = uformat[(*fmtIdx)++];
5289 } while ((UInt32)(ch - '0') <= 9);
5290 if ('$' == ch) {
5291 if (-2 == spec->precArgNum) {
5292 spec->precArgNum = (int8_t)number - 1; // Arg numbers start from 1
5293 } else if (-2 == spec->widthArgNum) {
5294 spec->widthArgNum = (int8_t)number - 1; // Arg numbers start from 1
5295 } else {
5296 spec->mainArgNum = (int8_t)number - 1; // Arg numbers start from 1
5297 }
5298 break;
5299 } else if (seenDot) { /* else it's either precision or width */
5300 spec->precArg = (SInt32)number;
5301 } else {
5302 spec->widthArg = (SInt32)number;
5303 }
5304 goto reswtch;
5305 }
5306 case '*':
5307 spec->widthArgNum = -2;
5308 break;
5309 case '.':
5310 seenDot = true;
5311 if (cformat) ch = (UniChar)cformat[(*fmtIdx)++]; else ch = uformat[(*fmtIdx)++];
5312 if ('*' == ch) {
5313 spec->precArgNum = -2;
5314 break;
5315 }
5316 goto reswtch;
5317 default:
5318 spec->type = CFFormatLiteralType;
5319 return;
5320 }
5321 }
5322 }
5323
5324 /* ??? It ignores the formatOptions argument.
5325 ??? %s depends on handling of encodings by __CFStringAppendBytes
5326 */
5327 void CFStringAppendFormatAndArguments(CFMutableStringRef outputString, CFDictionaryRef formatOptions, CFStringRef formatString, va_list args) {
5328 _CFStringAppendFormatAndArgumentsAux(outputString, NULL, formatOptions, formatString, args);
5329 }
5330
5331 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
5332 #define SNPRINTF(TYPE, WHAT) { \
5333 TYPE value = (TYPE) WHAT; \
5334 if (-1 != specs[curSpec].widthArgNum) { \
5335 if (-1 != specs[curSpec].precArgNum) { \
5336 snprintf_l(buffer, 255, NULL, formatBuffer, width, precision, value); \
5337 } else { \
5338 snprintf_l(buffer, 255, NULL, formatBuffer, width, value); \
5339 } \
5340 } else { \
5341 if (-1 != specs[curSpec].precArgNum) { \
5342 snprintf_l(buffer, 255, NULL, formatBuffer, precision, value); \
5343 } else { \
5344 snprintf_l(buffer, 255, NULL, formatBuffer, value); \
5345 } \
5346 }}
5347 #else
5348 #define SNPRINTF(TYPE, WHAT) { \
5349 TYPE value = (TYPE) WHAT; \
5350 if (-1 != specs[curSpec].widthArgNum) { \
5351 if (-1 != specs[curSpec].precArgNum) { \
5352 sprintf(buffer, formatBuffer, width, precision, value); \
5353 } else { \
5354 sprintf(buffer, formatBuffer, width, value); \
5355 } \
5356 } else { \
5357 if (-1 != specs[curSpec].precArgNum) { \
5358 sprintf(buffer, formatBuffer, precision, value); \
5359 } else { \
5360 sprintf(buffer, formatBuffer, value); \
5361 } \
5362 }}
5363 #endif
5364
5365 void _CFStringAppendFormatAndArgumentsAux(CFMutableStringRef outputString, CFStringRef (*copyDescFunc)(void *, const void *), CFDictionaryRef formatOptions, CFStringRef formatString, va_list args) {
5366 SInt32 numSpecs, sizeSpecs, sizeArgNum, formatIdx, curSpec, argNum;
5367 CFIndex formatLen;
5368 #define FORMAT_BUFFER_LEN 400
5369 const uint8_t *cformat = NULL;
5370 const UniChar *uformat = NULL;
5371 UniChar *formatChars = NULL;
5372 UniChar localFormatBuffer[FORMAT_BUFFER_LEN];
5373
5374 #define VPRINTF_BUFFER_LEN 61
5375 CFFormatSpec localSpecsBuffer[VPRINTF_BUFFER_LEN];
5376 CFFormatSpec *specs;
5377 CFPrintValue localValuesBuffer[VPRINTF_BUFFER_LEN];
5378 CFPrintValue *values;
5379 CFAllocatorRef tmpAlloc = NULL;
5380
5381 intmax_t dummyLocation; // A place for %n to do its thing in; should be the widest possible int value
5382
5383 numSpecs = 0;
5384 sizeSpecs = 0;
5385 sizeArgNum = 0;
5386 specs = NULL;
5387 values = NULL;
5388
5389 formatLen = CFStringGetLength(formatString);
5390 if (!CF_IS_OBJC(__kCFStringTypeID, formatString)) {
5391 __CFAssertIsString(formatString);
5392 if (!__CFStrIsUnicode(formatString)) {
5393 cformat = (const uint8_t *)__CFStrContents(formatString);
5394 if (cformat) cformat += __CFStrSkipAnyLengthByte(formatString);
5395 } else {
5396 uformat = (const UniChar *)__CFStrContents(formatString);
5397 }
5398 }
5399 if (!cformat && !uformat) {
5400 formatChars = (formatLen > FORMAT_BUFFER_LEN) ? (UniChar *)CFAllocatorAllocate(tmpAlloc = __CFGetDefaultAllocator(), formatLen * sizeof(UniChar), 0) : localFormatBuffer;
5401 if (formatChars != localFormatBuffer && __CFOASafe) __CFSetLastAllocationEventName(formatChars, "CFString (temp)");
5402 CFStringGetCharacters(formatString, CFRangeMake(0, formatLen), formatChars);
5403 uformat = formatChars;
5404 }
5405
5406 /* Compute an upper bound for the number of format specifications */
5407 if (cformat) {
5408 for (formatIdx = 0; formatIdx < formatLen; formatIdx++) if ('%' == cformat[formatIdx]) sizeSpecs++;
5409 } else {
5410 for (formatIdx = 0; formatIdx < formatLen; formatIdx++) if ('%' == uformat[formatIdx]) sizeSpecs++;
5411 }
5412 tmpAlloc = __CFGetDefaultAllocator();
5413 specs = ((2 * sizeSpecs + 1) > VPRINTF_BUFFER_LEN) ? (CFFormatSpec *)CFAllocatorAllocate(tmpAlloc, (2 * sizeSpecs + 1) * sizeof(CFFormatSpec), 0) : localSpecsBuffer;
5414 if (specs != localSpecsBuffer && __CFOASafe) __CFSetLastAllocationEventName(specs, "CFString (temp)");
5415
5416 /* Collect format specification information from the format string */
5417 for (curSpec = 0, formatIdx = 0; formatIdx < formatLen; curSpec++) {
5418 SInt32 newFmtIdx;
5419 specs[curSpec].loc = formatIdx;
5420 specs[curSpec].len = 0;
5421 specs[curSpec].size = 0;
5422 specs[curSpec].type = 0;
5423 specs[curSpec].flags = 0;
5424 specs[curSpec].widthArg = -1;
5425 specs[curSpec].precArg = -1;
5426 specs[curSpec].mainArgNum = -1;
5427 specs[curSpec].precArgNum = -1;
5428 specs[curSpec].widthArgNum = -1;
5429 if (cformat) {
5430 for (newFmtIdx = formatIdx; newFmtIdx < formatLen && '%' != cformat[newFmtIdx]; newFmtIdx++);
5431 } else {
5432 for (newFmtIdx = formatIdx; newFmtIdx < formatLen && '%' != uformat[newFmtIdx]; newFmtIdx++);
5433 }
5434 if (newFmtIdx != formatIdx) { /* Literal chunk */
5435 specs[curSpec].type = CFFormatLiteralType;
5436 specs[curSpec].len = newFmtIdx - formatIdx;
5437 } else {
5438 newFmtIdx++; /* Skip % */
5439 __CFParseFormatSpec(uformat, cformat, &newFmtIdx, formatLen, &(specs[curSpec]));
5440 if (CFFormatLiteralType == specs[curSpec].type) {
5441 specs[curSpec].loc = formatIdx + 1;
5442 specs[curSpec].len = 1;
5443 } else {
5444 specs[curSpec].len = newFmtIdx - formatIdx;
5445 }
5446 }
5447 formatIdx = newFmtIdx;
5448
5449 // fprintf(stderr, "specs[%d] = {\n size = %d,\n type = %d,\n loc = %d,\n len = %d,\n mainArgNum = %d,\n precArgNum = %d,\n widthArgNum = %d\n}\n", curSpec, specs[curSpec].size, specs[curSpec].type, specs[curSpec].loc, specs[curSpec].len, specs[curSpec].mainArgNum, specs[curSpec].precArgNum, specs[curSpec].widthArgNum);
5450
5451 }
5452 numSpecs = curSpec;
5453 // Max of three args per spec, reasoning thus: 1 width, 1 prec, 1 value
5454 values = ((3 * sizeSpecs + 1) > VPRINTF_BUFFER_LEN) ? (CFPrintValue *)CFAllocatorAllocate(tmpAlloc, (3 * sizeSpecs + 1) * sizeof(CFPrintValue), 0) : localValuesBuffer;
5455 if (values != localValuesBuffer && __CFOASafe) __CFSetLastAllocationEventName(values, "CFString (temp)");
5456 memset(values, 0, (3 * sizeSpecs + 1) * sizeof(CFPrintValue));
5457 sizeArgNum = (3 * sizeSpecs + 1);
5458
5459 /* Compute values array */
5460 argNum = 0;
5461 for (curSpec = 0; curSpec < numSpecs; curSpec++) {
5462 SInt32 newMaxArgNum;
5463 if (0 == specs[curSpec].type) continue;
5464 if (CFFormatLiteralType == specs[curSpec].type) continue;
5465 newMaxArgNum = sizeArgNum;
5466 if (newMaxArgNum < specs[curSpec].mainArgNum) {
5467 newMaxArgNum = specs[curSpec].mainArgNum;
5468 }
5469 if (newMaxArgNum < specs[curSpec].precArgNum) {
5470 newMaxArgNum = specs[curSpec].precArgNum;
5471 }
5472 if (newMaxArgNum < specs[curSpec].widthArgNum) {
5473 newMaxArgNum = specs[curSpec].widthArgNum;
5474 }
5475 if (sizeArgNum < newMaxArgNum) {
5476 if (specs != localSpecsBuffer) CFAllocatorDeallocate(tmpAlloc, specs);
5477 if (values != localValuesBuffer) CFAllocatorDeallocate(tmpAlloc, values);
5478 if (formatChars && (formatChars != localFormatBuffer)) CFAllocatorDeallocate(tmpAlloc, formatChars);
5479 return; // more args than we expected!
5480 }
5481 /* It is actually incorrect to reorder some specs and not all; we just do some random garbage here */
5482 if (-2 == specs[curSpec].widthArgNum) {
5483 specs[curSpec].widthArgNum = argNum++;
5484 }
5485 if (-2 == specs[curSpec].precArgNum) {
5486 specs[curSpec].precArgNum = argNum++;
5487 }
5488 if (-1 == specs[curSpec].mainArgNum) {
5489 specs[curSpec].mainArgNum = argNum++;
5490 }
5491 values[specs[curSpec].mainArgNum].size = specs[curSpec].size;
5492 values[specs[curSpec].mainArgNum].type = specs[curSpec].type;
5493 if (-1 != specs[curSpec].widthArgNum) {
5494 values[specs[curSpec].widthArgNum].size = 0;
5495 values[specs[curSpec].widthArgNum].type = CFFormatLongType;
5496 }
5497 if (-1 != specs[curSpec].precArgNum) {
5498 values[specs[curSpec].precArgNum].size = 0;
5499 values[specs[curSpec].precArgNum].type = CFFormatLongType;
5500 }
5501 }
5502
5503 /* Collect the arguments in correct type from vararg list */
5504 for (argNum = 0; argNum < sizeArgNum; argNum++) {
5505 switch (values[argNum].type) {
5506 case 0:
5507 case CFFormatLiteralType:
5508 break;
5509 case CFFormatLongType:
5510 case CFFormatSingleUnicharType:
5511 if (CFFormatSize1 == values[argNum].size) {
5512 values[argNum].value.int64Value = (int64_t)(int8_t)va_arg(args, int);
5513 } else if (CFFormatSize2 == values[argNum].size) {
5514 values[argNum].value.int64Value = (int64_t)(int16_t)va_arg(args, int);
5515 } else if (CFFormatSize4 == values[argNum].size) {
5516 values[argNum].value.int64Value = (int64_t)va_arg(args, int32_t);
5517 } else if (CFFormatSize8 == values[argNum].size) {
5518 values[argNum].value.int64Value = (int64_t)va_arg(args, int64_t);
5519 } else {
5520 values[argNum].value.int64Value = (int64_t)va_arg(args, int);
5521 }
5522 break;
5523 case CFFormatDoubleType:
5524 #if LONG_DOUBLE_SUPPORT
5525 if (CFFormatSize16 == values[argNum].size) {
5526 values[argNum].value.longDoubleValue = va_arg(args, long double);
5527 } else
5528 #endif
5529 {
5530 values[argNum].value.doubleValue = va_arg(args, double);
5531 }
5532 break;
5533 case CFFormatPointerType:
5534 case CFFormatObjectType:
5535 case CFFormatCFType:
5536 case CFFormatUnicharsType:
5537 case CFFormatCharsType:
5538 case CFFormatPascalCharsType:
5539 values[argNum].value.pointerValue = va_arg(args, void *);
5540 break;
5541 case CFFormatDummyPointerType:
5542 (void)va_arg(args, void *); // Skip the provided argument
5543 values[argNum].value.pointerValue = &dummyLocation;
5544 break;
5545 }
5546 }
5547 va_end(args);
5548
5549 /* Format the pieces together */
5550 for (curSpec = 0; curSpec < numSpecs; curSpec++) {
5551 SInt32 width = 0, precision = 0;
5552 UniChar *up, ch;
5553 Boolean hasWidth = false, hasPrecision = false;
5554
5555 // widthArgNum and widthArg are never set at the same time; same for precArg*
5556 if (-1 != specs[curSpec].widthArgNum) {
5557 width = (SInt32)values[specs[curSpec].widthArgNum].value.int64Value;
5558 hasWidth = true;
5559 }
5560 if (-1 != specs[curSpec].precArgNum) {
5561 precision = (SInt32)values[specs[curSpec].precArgNum].value.int64Value;
5562 hasPrecision = true;
5563 }
5564 if (-1 != specs[curSpec].widthArg) {
5565 width = specs[curSpec].widthArg;
5566 hasWidth = true;
5567 }
5568 if (-1 != specs[curSpec].precArg) {
5569 precision = specs[curSpec].precArg;
5570 hasPrecision = true;
5571 }
5572
5573 switch (specs[curSpec].type) {
5574 case CFFormatLongType:
5575 case CFFormatDoubleType:
5576 case CFFormatPointerType: {
5577 char formatBuffer[128];
5578 #if defined(__GNUC__)
5579 char buffer[256 + width + precision];
5580 #else
5581 char stackBuffer[512];
5582 char *dynamicBuffer = NULL;
5583 char *buffer = stackBuffer;
5584 if (256+width+precision > 512) {
5585 dynamicBuffer = (char *)CFAllocatorAllocate(kCFAllocatorSystemDefault, 256+width+precision, 0);
5586 buffer = dynamicBuffer;
5587 }
5588 #endif
5589 SInt32 cidx, idx, loc;
5590 Boolean appended = false;
5591 loc = specs[curSpec].loc;
5592 // In preparation to call snprintf(), copy the format string out
5593 if (cformat) {
5594 for (idx = 0, cidx = 0; cidx < specs[curSpec].len; idx++, cidx++) {
5595 if ('$' == cformat[loc + cidx]) {
5596 for (idx--; '0' <= formatBuffer[idx] && formatBuffer[idx] <= '9'; idx--);
5597 } else {
5598 formatBuffer[idx] = cformat[loc + cidx];
5599 }
5600 }
5601 } else {
5602 for (idx = 0, cidx = 0; cidx < specs[curSpec].len; idx++, cidx++) {
5603 if ('$' == uformat[loc + cidx]) {
5604 for (idx--; '0' <= formatBuffer[idx] && formatBuffer[idx] <= '9'; idx--);
5605 } else {
5606 formatBuffer[idx] = (int8_t)uformat[loc + cidx];
5607 }
5608 }
5609 }
5610 formatBuffer[idx] = '\0';
5611 // Should modify format buffer here if necessary; for example, to translate %qd to
5612 // the equivalent, on architectures which do not have %q.
5613 buffer[sizeof(buffer) - 1] = '\0';
5614 switch (specs[curSpec].type) {
5615 case CFFormatLongType:
5616 if (CFFormatSize8 == specs[curSpec].size) {
5617 SNPRINTF(int64_t, values[specs[curSpec].mainArgNum].value.int64Value)
5618 } else {
5619 SNPRINTF(SInt32, values[specs[curSpec].mainArgNum].value.int64Value)
5620 }
5621 break;
5622 case CFFormatPointerType:
5623 case CFFormatDummyPointerType:
5624 SNPRINTF(void *, values[specs[curSpec].mainArgNum].value.pointerValue)
5625 break;
5626
5627 case CFFormatDoubleType:
5628 #if LONG_DOUBLE_SUPPORT
5629 if (CFFormatSize16 == specs[curSpec].size) {
5630 SNPRINTF(long double, values[specs[curSpec].mainArgNum].value.longDoubleValue)
5631 } else
5632 #endif
5633 {
5634 SNPRINTF(double, values[specs[curSpec].mainArgNum].value.doubleValue)
5635 }
5636 // See if we need to localize the decimal point
5637 if (formatOptions) { // We have localization info
5638 CFStringRef decimalSeparator = (CFGetTypeID(formatOptions) == CFLocaleGetTypeID()) ? (CFStringRef)CFLocaleGetValue((CFLocaleRef)formatOptions, kCFLocaleDecimalSeparatorKey) : (CFStringRef)CFDictionaryGetValue(formatOptions, CFSTR("NSDecimalSeparator"));
5639 if (decimalSeparator != NULL) { // We have a decimal separator in there
5640 CFIndex decimalPointLoc = 0;
5641 while (buffer[decimalPointLoc] != 0 && buffer[decimalPointLoc] != '.') decimalPointLoc++;
5642 if (buffer[decimalPointLoc] == '.') { // And we have a decimal point in the formatted string
5643 buffer[decimalPointLoc] = 0;
5644 CFStringAppendCString(outputString, (const char *)buffer, __CFStringGetEightBitStringEncoding());
5645 CFStringAppend(outputString, decimalSeparator);
5646 CFStringAppendCString(outputString, (const char *)(buffer + decimalPointLoc + 1), __CFStringGetEightBitStringEncoding());
5647 appended = true;
5648 }
5649 }
5650 }
5651 break;
5652 }
5653 if (!appended) CFStringAppendCString(outputString, (const char *)buffer, __CFStringGetEightBitStringEncoding());
5654 #if !defined(__GNUC__)
5655 if (dynamicBuffer) {
5656 CFAllocatorDeallocate(kCFAllocatorSystemDefault, dynamicBuffer);
5657 }
5658 #endif
5659 }
5660 break;
5661 case CFFormatLiteralType:
5662 if (cformat) {
5663 __CFStringAppendBytes(outputString, (const char *)(cformat+specs[curSpec].loc), specs[curSpec].len, __CFStringGetEightBitStringEncoding());
5664 } else {
5665 CFStringAppendCharacters(outputString, uformat+specs[curSpec].loc, specs[curSpec].len);
5666 }
5667 break;
5668 case CFFormatPascalCharsType:
5669 case CFFormatCharsType:
5670 if (values[specs[curSpec].mainArgNum].value.pointerValue == NULL) {
5671 CFStringAppendCString(outputString, "(null)", kCFStringEncodingASCII);
5672 } else {
5673 int len;
5674 const char *str = (const char *)values[specs[curSpec].mainArgNum].value.pointerValue;
5675 if (specs[curSpec].type == CFFormatPascalCharsType) { // Pascal string case
5676 len = ((unsigned char *)str)[0];
5677 str++;
5678 if (hasPrecision && precision < len) len = precision;
5679 } else { // C-string case
5680 if (!hasPrecision) { // No precision, so rely on the terminating null character
5681 len = strlen(str);
5682 } else { // Don't blindly call strlen() if there is a precision; the string might not have a terminating null (3131988)
5683 const char *terminatingNull = (const char *)memchr(str, 0, precision); // Basically strlen() on only the first precision characters of str
5684 if (terminatingNull) { // There was a null in the first precision characters
5685 len = terminatingNull - str;
5686 } else {
5687 len = precision;
5688 }
5689 }
5690 }
5691 // Since the spec says the behavior of the ' ', '0', '#', and '+' flags is undefined for
5692 // '%s', and since we have ignored them in the past, the behavior is hereby cast in stone
5693 // to ignore those flags (and, say, never pad with '0' instead of space).
5694 if (specs[curSpec].flags & kCFStringFormatMinusFlag) {
5695 __CFStringAppendBytes(outputString, str, len, __CFStringGetSystemEncoding());
5696 if (hasWidth && width > len) {
5697 int w = width - len; // We need this many spaces; do it ten at a time
5698 do {__CFStringAppendBytes(outputString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
5699 }
5700 } else {
5701 if (hasWidth && width > len) {
5702 int w = width - len; // We need this many spaces; do it ten at a time
5703 do {__CFStringAppendBytes(outputString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
5704 }
5705 __CFStringAppendBytes(outputString, str, len, __CFStringGetSystemEncoding());
5706 }
5707 }
5708 break;
5709 case CFFormatSingleUnicharType:
5710 ch = (UniChar)values[specs[curSpec].mainArgNum].value.int64Value;
5711 CFStringAppendCharacters(outputString, &ch, 1);
5712 break;
5713 case CFFormatUnicharsType:
5714 //??? need to handle width, precision, and padding arguments
5715 up = (UniChar *)values[specs[curSpec].mainArgNum].value.pointerValue;
5716 if (NULL == up) {
5717 CFStringAppendCString(outputString, "(null)", kCFStringEncodingASCII);
5718 } else {
5719 int len;
5720 for (len = 0; 0 != up[len]; len++);
5721 // Since the spec says the behavior of the ' ', '0', '#', and '+' flags is undefined for
5722 // '%s', and since we have ignored them in the past, the behavior is hereby cast in stone
5723 // to ignore those flags (and, say, never pad with '0' instead of space).
5724 if (hasPrecision && precision < len) len = precision;
5725 if (specs[curSpec].flags & kCFStringFormatMinusFlag) {
5726 CFStringAppendCharacters(outputString, up, len);
5727 if (hasWidth && width > len) {
5728 int w = width - len; // We need this many spaces; do it ten at a time
5729 do {__CFStringAppendBytes(outputString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
5730 }
5731 } else {
5732 if (hasWidth && width > len) {
5733 int w = width - len; // We need this many spaces; do it ten at a time
5734 do {__CFStringAppendBytes(outputString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
5735 }
5736 CFStringAppendCharacters(outputString, up, len);
5737 }
5738 }
5739 break;
5740 case CFFormatCFType:
5741 case CFFormatObjectType:
5742 if (NULL != values[specs[curSpec].mainArgNum].value.pointerValue) {
5743 CFStringRef str = NULL;
5744 if (copyDescFunc) {
5745 str = copyDescFunc(values[specs[curSpec].mainArgNum].value.pointerValue, formatOptions);
5746 } else {
5747 str = __CFCopyFormattingDescription(values[specs[curSpec].mainArgNum].value.pointerValue, formatOptions);
5748 if (NULL == str) {
5749 str = CFCopyDescription(values[specs[curSpec].mainArgNum].value.pointerValue);
5750 }
5751 }
5752 if (str) {
5753 CFStringAppend(outputString, str);
5754 CFRelease(str);
5755 } else {
5756 CFStringAppendCString(outputString, "(null description)", kCFStringEncodingASCII);
5757 }
5758 } else {
5759 CFStringAppendCString(outputString, "(null)", kCFStringEncodingASCII);
5760 }
5761 break;
5762 }
5763 }
5764
5765 if (specs != localSpecsBuffer) CFAllocatorDeallocate(tmpAlloc, specs);
5766 if (values != localValuesBuffer) CFAllocatorDeallocate(tmpAlloc, values);
5767 if (formatChars && (formatChars != localFormatBuffer)) CFAllocatorDeallocate(tmpAlloc, formatChars);
5768
5769 }
5770
5771 #undef SNPRINTF
5772
5773 void CFShowStr(CFStringRef str) {
5774 CFAllocatorRef alloc;
5775
5776 if (!str) {
5777 fprintf(stdout, "(null)\n");
5778 return;
5779 }
5780
5781 if (CF_IS_OBJC(__kCFStringTypeID, str)) {
5782 fprintf(stdout, "This is an NSString, not CFString\n");
5783 return;
5784 }
5785
5786 alloc = CFGetAllocator(str);
5787
5788 fprintf(stdout, "\nLength %d\nIsEightBit %d\n", (int)__CFStrLength(str), __CFStrIsEightBit(str));
5789 fprintf(stdout, "HasLengthByte %d\nHasNullByte %d\nInlineContents %d\n",
5790 __CFStrHasLengthByte(str), __CFStrHasNullByte(str), __CFStrIsInline(str));
5791
5792 fprintf(stdout, "Allocator ");
5793 if (alloc != kCFAllocatorSystemDefault) {
5794 fprintf(stdout, "%p\n", (void *)alloc);
5795 } else {
5796 fprintf(stdout, "SystemDefault\n");
5797 }
5798 fprintf(stdout, "Mutable %d\n", __CFStrIsMutable(str));
5799 if (!__CFStrIsMutable(str) && __CFStrHasContentsDeallocator(str)) {
5800 if (__CFStrContentsDeallocator(str)) fprintf(stdout, "ContentsDeallocatorFunc %p\n", (void *)__CFStrContentsDeallocator(str));
5801 else fprintf(stdout, "ContentsDeallocatorFunc None\n");
5802 } else if (__CFStrIsMutable(str) && __CFStrHasContentsAllocator(str)) {
5803 fprintf(stdout, "ExternalContentsAllocator %p\n", (void *)__CFStrContentsAllocator((CFMutableStringRef)str));
5804 }
5805
5806 if (__CFStrIsMutable(str)) {
5807 fprintf(stdout, "CurrentCapacity %d\n%sCapacity %d\n", (int)__CFStrCapacity(str), __CFStrIsFixed(str) ? "Fixed" : "Desired", (int)__CFStrDesiredCapacity(str));
5808 }
5809 fprintf(stdout, "Contents %p\n", (void *)__CFStrContents(str));
5810 }
5811
5812
5813
5814 #undef HANGUL_SBASE
5815 #undef HANGUL_LBASE
5816 #undef HANGUL_VBASE
5817 #undef HANGUL_TBASE
5818 #undef HANGUL_SCOUNT
5819 #undef HANGUL_LCOUNT
5820 #undef HANGUL_VCOUNT
5821 #undef HANGUL_TCOUNT
5822 #undef HANGUL_NCOUNT
5823