]> git.saurik.com Git - apple/cf.git/blob - CFString.c
a9b5c501c7b93413908b1d719dd3ba17f2237f1b
[apple/cf.git] / CFString.c
1 /*
2 * Copyright (c) 2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23 /* CFString.c
24 Copyright 1998-2002, Apple, Inc. All rights reserved.
25 Responsibility: Ali Ozer
26
27 !!! For performance reasons, it's important that all functions marked CF_INLINE in this file are inlined.
28 */
29
30 #include <CoreFoundation/CFBase.h>
31 #include <CoreFoundation/CFString.h>
32 #include <CoreFoundation/CFDictionary.h>
33 #include "CFStringEncodingConverterExt.h"
34 #include "CFUniChar.h"
35 #include "CFUnicodeDecomposition.h"
36 #include "CFUnicodePrecomposition.h"
37 #include "CFPriv.h"
38 #include "CFInternal.h"
39 #include <stdarg.h>
40 #include <stdio.h>
41 #include <string.h>
42 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
43 #include <unistd.h>
44 #endif
45
46 #if defined(__GNUC__)
47 #define LONG_DOUBLE_SUPPORT 1
48 #else
49 #define LONG_DOUBLE_SUPPORT 0
50 #endif
51
52
53
54 #define USE_STRING_ROM 0
55
56
57 #ifndef INSTRUMENT_SHARED_STRINGS
58 #define INSTRUMENT_SHARED_STRINGS 0
59 #endif
60
61
62 __private_extern__ CFStringRef __kCFLocaleCollatorID;
63
64 #if INSTRUMENT_SHARED_STRINGS
65 #include <sys/stat.h> /* for umask() */
66
67 static void __CFRecordStringAllocationEvent(const char *encoding, const char *bytes, CFIndex byteCount) {
68 static CFSpinLock_t lock = CFSpinLockInit;
69
70 if (memchr(bytes, '\n', byteCount)) return; //never record string allocation events for strings with newlines, because those confuse our parser and because they'll never go into the ROM
71
72 __CFSpinLock(&lock);
73 static int fd;
74 if (! fd) {
75 extern char **_NSGetProgname(void);
76 const char *name = *_NSGetProgname();
77 if (! name) name = "UNKNOWN";
78 umask(0);
79 char path[1024];
80 sprintf(path, "/tmp/CFSharedStringInstrumentation_%s_%d.txt", name, getpid());
81 fd = open(path, O_WRONLY | O_APPEND | O_CREAT, 0666);
82 if (fd <= 0) {
83 int error = errno;
84 const char *errString = strerror(error);
85 fprintf(stderr, "open() failed with error %d (%s)\n", error, errString);
86 }
87 }
88 if (fd > 0) {
89 char *buffer = NULL;
90 char formatString[256];
91 sprintf(formatString, "%%-8d\t%%-16s\t%%.%lds\n", byteCount);
92 int resultCount = asprintf(&buffer, formatString, getpid(), encoding, bytes);
93 if (buffer && resultCount > 0) write(fd, buffer, resultCount);
94 else puts("Couldn't record allocation event");
95 free(buffer);
96 }
97 __CFSpinUnlock(&lock);
98 }
99 #endif //INSTRUMENT_SHARED_STRINGS
100
101
102
103 typedef Boolean (*UNI_CHAR_FUNC)(UInt32 flags, UInt8 ch, UniChar *unicodeChar);
104
105 #if DEPLOYMENT_TARGET_MACOSX
106 extern size_t malloc_good_size(size_t size);
107 #endif
108 extern void __CFStrConvertBytesToUnicode(const uint8_t *bytes, UniChar *buffer, CFIndex numChars);
109
110 #if defined(DEBUG)
111
112 // We put this into C & Pascal strings if we can't convert
113 #define CONVERSIONFAILURESTR "CFString conversion failed"
114
115 // We set this to true when purging the constant string table, so CFStringDeallocate doesn't assert
116 static Boolean __CFConstantStringTableBeingFreed = false;
117
118 #endif
119
120
121
122 // This section is for CFString compatibility and other behaviors...
123
124 static CFOptionFlags _CFStringCompatibilityMask = 0;
125
126 #define Bug2967272 1
127
128 void _CFStringSetCompatibility(CFOptionFlags mask) {
129 _CFStringCompatibilityMask |= mask;
130 }
131
132 CF_INLINE Boolean __CFStringGetCompatibility(CFOptionFlags mask) {
133 return (_CFStringCompatibilityMask & mask) == mask;
134 }
135
136
137
138 // Two constant strings used by CFString; these are initialized in CFStringInitialize
139 CONST_STRING_DECL(kCFEmptyString, "")
140
141 // This is separate for C++
142 struct __notInlineMutable {
143 void *buffer;
144 CFIndex length;
145 CFIndex capacity; // Capacity in bytes
146 unsigned int hasGap:1; // Currently unused
147 unsigned int isFixedCapacity:1;
148 unsigned int isExternalMutable:1;
149 unsigned int capacityProvidedExternally:1;
150 #if __LP64__
151 unsigned long desiredCapacity:60;
152 #else
153 unsigned long desiredCapacity:28;
154 #endif
155 CFAllocatorRef contentsAllocator; // Optional
156 }; // The only mutable variant for CFString
157
158
159 /* !!! Never do sizeof(CFString); the union is here just to make it easier to access some fields.
160 */
161 struct __CFString {
162 CFRuntimeBase base;
163 union { // In many cases the allocated structs are smaller than these
164 struct __inline1 {
165 CFIndex length;
166 } inline1; // Bytes follow the length
167 struct __notInlineImmutable1 {
168 void *buffer; // Note that the buffer is in the same place for all non-inline variants of CFString
169 CFIndex length;
170 CFAllocatorRef contentsDeallocator; // Optional; just the dealloc func is used
171 } notInlineImmutable1; // This is the usual not-inline immutable CFString
172 struct __notInlineImmutable2 {
173 void *buffer;
174 CFAllocatorRef contentsDeallocator; // Optional; just the dealloc func is used
175 } notInlineImmutable2; // This is the not-inline immutable CFString when length is stored with the contents (first byte)
176 struct __notInlineMutable notInlineMutable;
177 } variants;
178 };
179
180 /*
181 I = is immutable
182 E = not inline contents
183 U = is Unicode
184 N = has NULL byte
185 L = has length byte
186 D = explicit deallocator for contents (for mutable objects, allocator)
187 C = length field is CFIndex (rather than UInt32); only meaningful for 64-bit, really
188 if needed this bit (valuable real-estate) can be given up for another bit elsewhere, since this info is needed just for 64-bit
189
190 Also need (only for mutable)
191 F = is fixed
192 G = has gap
193 Cap, DesCap = capacity
194
195 B7 B6 B5 B4 B3 B2 B1 B0
196 U N L C I
197
198 B6 B5
199 0 0 inline contents
200 0 1 E (freed with default allocator)
201 1 0 E (not freed)
202 1 1 E D
203
204 !!! Note: Constant CFStrings use the bit patterns:
205 C8 (11001000 = default allocator, not inline, not freed contents; 8-bit; has NULL byte; doesn't have length; is immutable)
206 D0 (11010000 = default allocator, not inline, not freed contents; Unicode; is immutable)
207 The bit usages should not be modified in a way that would effect these bit patterns.
208 */
209
210 enum {
211 __kCFFreeContentsWhenDoneMask = 0x020,
212 __kCFFreeContentsWhenDone = 0x020,
213 __kCFContentsMask = 0x060,
214 __kCFHasInlineContents = 0x000,
215 __kCFNotInlineContentsNoFree = 0x040, // Don't free
216 __kCFNotInlineContentsDefaultFree = 0x020, // Use allocator's free function
217 __kCFNotInlineContentsCustomFree = 0x060, // Use a specially provided free function
218 __kCFHasContentsAllocatorMask = 0x060,
219 __kCFHasContentsAllocator = 0x060, // (For mutable strings) use a specially provided allocator
220 __kCFHasContentsDeallocatorMask = 0x060,
221 __kCFHasContentsDeallocator = 0x060,
222 __kCFIsMutableMask = 0x01,
223 __kCFIsMutable = 0x01,
224 __kCFIsUnicodeMask = 0x10,
225 __kCFIsUnicode = 0x10,
226 __kCFHasNullByteMask = 0x08,
227 __kCFHasNullByte = 0x08,
228 __kCFHasLengthByteMask = 0x04,
229 __kCFHasLengthByte = 0x04,
230 // !!! Bit 0x02 has been freed up
231 };
232
233
234 // !!! Assumptions:
235 // Mutable strings are not inline
236 // Compile-time constant strings are not inline
237 // Mutable strings always have explicit length (but they might also have length byte and null byte)
238 // If there is an explicit length, always use that instead of the length byte (length byte is useful for quickly returning pascal strings)
239 // Never look at the length byte for the length; use __CFStrLength or __CFStrLength2
240
241 /* The following set of functions and macros need to be updated on change to the bit configuration
242 */
243 CF_INLINE Boolean __CFStrIsMutable(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFIsMutableMask) == __kCFIsMutable;}
244 CF_INLINE Boolean __CFStrIsInline(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFContentsMask) == __kCFHasInlineContents;}
245 CF_INLINE Boolean __CFStrFreeContentsWhenDone(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFFreeContentsWhenDoneMask) == __kCFFreeContentsWhenDone;}
246 CF_INLINE Boolean __CFStrHasContentsDeallocator(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFHasContentsDeallocatorMask) == __kCFHasContentsDeallocator;}
247 CF_INLINE Boolean __CFStrIsUnicode(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFIsUnicodeMask) == __kCFIsUnicode;}
248 CF_INLINE Boolean __CFStrIsEightBit(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFIsUnicodeMask) != __kCFIsUnicode;}
249 CF_INLINE Boolean __CFStrHasNullByte(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFHasNullByteMask) == __kCFHasNullByte;}
250 CF_INLINE Boolean __CFStrHasLengthByte(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFHasLengthByteMask) == __kCFHasLengthByte;}
251 CF_INLINE Boolean __CFStrHasExplicitLength(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & (__kCFIsMutableMask | __kCFHasLengthByteMask)) != __kCFHasLengthByte;} // Has explicit length if (1) mutable or (2) not mutable and no length byte
252 CF_INLINE Boolean __CFStrIsConstant(CFStringRef str) {
253 #if __LP64__
254 return str->base._rc == 0;
255 #else
256 return (str->base._cfinfo[CF_RC_BITS]) == 0;
257 #endif
258 }
259
260 CF_INLINE SInt32 __CFStrSkipAnyLengthByte(CFStringRef str) {return ((str->base._cfinfo[CF_INFO_BITS] & __kCFHasLengthByteMask) == __kCFHasLengthByte) ? 1 : 0;} // Number of bytes to skip over the length byte in the contents
261
262 /* Returns ptr to the buffer (which might include the length byte)
263 */
264 CF_INLINE const void *__CFStrContents(CFStringRef str) {
265 if (__CFStrIsInline(str)) {
266 return (const void *)(((uintptr_t)&(str->variants)) + (__CFStrHasExplicitLength(str) ? sizeof(CFIndex) : 0));
267 } else { // Not inline; pointer is always word 2
268 return str->variants.notInlineImmutable1.buffer;
269 }
270 }
271
272 static CFAllocatorRef *__CFStrContentsDeallocatorPtr(CFStringRef str) {
273 return __CFStrHasExplicitLength(str) ? &(((CFMutableStringRef)str)->variants.notInlineImmutable1.contentsDeallocator) : &(((CFMutableStringRef)str)->variants.notInlineImmutable2.contentsDeallocator); }
274
275 // Assumption: Called with immutable strings only, and on strings that are known to have a contentsDeallocator
276 CF_INLINE CFAllocatorRef __CFStrContentsDeallocator(CFStringRef str) {
277 return *__CFStrContentsDeallocatorPtr(str);
278 }
279
280 // Assumption: Called with immutable strings only, and on strings that are known to have a contentsDeallocator
281 CF_INLINE void __CFStrSetContentsDeallocator(CFStringRef str, CFAllocatorRef contentsAllocator) {
282 *__CFStrContentsDeallocatorPtr(str) = contentsAllocator;
283 }
284
285 static CFAllocatorRef *__CFStrContentsAllocatorPtr(CFStringRef str) {
286 CFAssert(!__CFStrIsInline(str), __kCFLogAssertion, "Asking for contents allocator of inline string");
287 CFAssert(__CFStrIsMutable(str), __kCFLogAssertion, "Asking for contents allocator of an immutable string");
288 return (CFAllocatorRef *)&(str->variants.notInlineMutable.contentsAllocator);
289 }
290
291 // Assumption: Called with strings that have a contents allocator; also, contents allocator follows custom
292 CF_INLINE CFAllocatorRef __CFStrContentsAllocator(CFMutableStringRef str) {
293 return *(__CFStrContentsAllocatorPtr(str));
294 }
295
296 // Assumption: Called with strings that have a contents allocator; also, contents allocator follows custom
297 CF_INLINE void __CFStrSetContentsAllocator(CFMutableStringRef str, CFAllocatorRef alloc) {
298 *(__CFStrContentsAllocatorPtr(str)) = alloc;
299 }
300
301 /* Returns length; use __CFStrLength2 if contents buffer pointer has already been computed.
302 */
303 CF_INLINE CFIndex __CFStrLength(CFStringRef str) {
304 if (__CFStrHasExplicitLength(str)) {
305 if (__CFStrIsInline(str)) {
306 return str->variants.inline1.length;
307 } else {
308 return str->variants.notInlineImmutable1.length;
309 }
310 } else {
311 return (CFIndex)(*((uint8_t *)__CFStrContents(str)));
312 }
313 }
314
315 CF_INLINE CFIndex __CFStrLength2(CFStringRef str, const void *buffer) {
316 if (__CFStrHasExplicitLength(str)) {
317 if (__CFStrIsInline(str)) {
318 return str->variants.inline1.length;
319 } else {
320 return str->variants.notInlineImmutable1.length;
321 }
322 } else {
323 return (CFIndex)(*((uint8_t *)buffer));
324 }
325 }
326
327
328 Boolean __CFStringIsEightBit(CFStringRef str) {
329 return __CFStrIsEightBit(str);
330 }
331
332 /* Sets the content pointer for immutable or mutable strings.
333 */
334 CF_INLINE void __CFStrSetContentPtr(CFStringRef str, const void *p) {
335 // XXX_PCB catch all writes for mutable string case.
336 CF_WRITE_BARRIER_BASE_ASSIGN(__CFGetAllocator(str), str, ((CFMutableStringRef)str)->variants.notInlineImmutable1.buffer, (void *)p);
337 }
338 CF_INLINE void __CFStrSetInfoBits(CFStringRef str, UInt32 v) {__CFBitfieldSetValue(((CFMutableStringRef)str)->base._cfinfo[CF_INFO_BITS], 6, 0, v);}
339
340 CF_INLINE void __CFStrSetExplicitLength(CFStringRef str, CFIndex v) {
341 if (__CFStrIsInline(str)) {
342 ((CFMutableStringRef)str)->variants.inline1.length = v;
343 } else {
344 ((CFMutableStringRef)str)->variants.notInlineImmutable1.length = v;
345 }
346 }
347
348 CF_INLINE void __CFStrSetUnicode(CFMutableStringRef str) {str->base._cfinfo[CF_INFO_BITS] |= __kCFIsUnicode;}
349 CF_INLINE void __CFStrClearUnicode(CFMutableStringRef str) {str->base._cfinfo[CF_INFO_BITS] &= ~__kCFIsUnicode;}
350 CF_INLINE void __CFStrSetHasLengthAndNullBytes(CFMutableStringRef str) {str->base._cfinfo[CF_INFO_BITS] |= (__kCFHasLengthByte | __kCFHasNullByte);}
351 CF_INLINE void __CFStrClearHasLengthAndNullBytes(CFMutableStringRef str) {str->base._cfinfo[CF_INFO_BITS] &= ~(__kCFHasLengthByte | __kCFHasNullByte);}
352
353
354 // Assumption: The following set of inlines (using str->variants.notInlineMutable) are called with mutable strings only
355 CF_INLINE Boolean __CFStrIsFixed(CFStringRef str) {return str->variants.notInlineMutable.isFixedCapacity;}
356 CF_INLINE Boolean __CFStrIsExternalMutable(CFStringRef str) {return str->variants.notInlineMutable.isExternalMutable;}
357 CF_INLINE Boolean __CFStrHasContentsAllocator(CFStringRef str) {return (str->base._cfinfo[CF_INFO_BITS] & __kCFHasContentsAllocatorMask) == __kCFHasContentsAllocator;}
358 CF_INLINE void __CFStrSetIsFixed(CFMutableStringRef str) {str->variants.notInlineMutable.isFixedCapacity = 1;}
359 CF_INLINE void __CFStrSetIsExternalMutable(CFMutableStringRef str) {str->variants.notInlineMutable.isExternalMutable = 1;}
360 CF_INLINE void __CFStrSetHasGap(CFMutableStringRef str) {str->variants.notInlineMutable.hasGap = 1;}
361
362 // If capacity is provided externally, we only change it when we need to grow beyond it
363 CF_INLINE Boolean __CFStrCapacityProvidedExternally(CFStringRef str) {return str->variants.notInlineMutable.capacityProvidedExternally;}
364 CF_INLINE void __CFStrSetCapacityProvidedExternally(CFMutableStringRef str) {str->variants.notInlineMutable.capacityProvidedExternally = 1;}
365 CF_INLINE void __CFStrClearCapacityProvidedExternally(CFMutableStringRef str) {str->variants.notInlineMutable.capacityProvidedExternally = 0;}
366
367 // "Capacity" is stored in number of bytes, not characters. It indicates the total number of bytes in the contents buffer.
368 CF_INLINE CFIndex __CFStrCapacity(CFStringRef str) {return str->variants.notInlineMutable.capacity;}
369 CF_INLINE void __CFStrSetCapacity(CFMutableStringRef str, CFIndex cap) {str->variants.notInlineMutable.capacity = cap;}
370
371 // "Desired capacity" is in number of characters; it is the client requested capacity; if fixed, it is the upper bound on the mutable string backing store.
372 CF_INLINE CFIndex __CFStrDesiredCapacity(CFStringRef str) {return str->variants.notInlineMutable.desiredCapacity;}
373 CF_INLINE void __CFStrSetDesiredCapacity(CFMutableStringRef str, CFIndex size) {str->variants.notInlineMutable.desiredCapacity = size;}
374
375
376 static void *__CFStrAllocateMutableContents(CFMutableStringRef str, CFIndex size) {
377 void *ptr;
378 CFAllocatorRef alloc = (__CFStrHasContentsAllocator(str)) ? __CFStrContentsAllocator(str) : __CFGetAllocator(str);
379 ptr = CFAllocatorAllocate(alloc, size, 0);
380 if (__CFOASafe) __CFSetLastAllocationEventName(ptr, "CFString (store)");
381 return ptr;
382 }
383
384 static void __CFStrDeallocateMutableContents(CFMutableStringRef str, void *buffer) {
385 CFAllocatorRef alloc = (__CFStrHasContentsAllocator(str)) ? __CFStrContentsAllocator(str) : __CFGetAllocator(str);
386 if (CF_IS_COLLECTABLE_ALLOCATOR(alloc)) {
387 // GC: for finalization safety, let collector reclaim the buffer in the next GC cycle.
388 auto_zone_release(__CFCollectableZone, buffer);
389 } else {
390 CFAllocatorDeallocate(alloc, buffer);
391 }
392 }
393
394
395
396
397 /* CFString specific init flags
398 Note that you cannot count on the external buffer not being copied.
399 Also, if you specify an external buffer, you should not change it behind the CFString's back.
400 */
401 enum {
402 __kCFThinUnicodeIfPossible = 0x1000000, /* See if the Unicode contents can be thinned down to 8-bit */
403 kCFStringPascal = 0x10000, /* Indicating that the string data has a Pascal string structure (length byte at start) */
404 kCFStringNoCopyProvidedContents = 0x20000, /* Don't copy the provided string contents if possible; free it when no longer needed */
405 kCFStringNoCopyNoFreeProvidedContents = 0x30000 /* Don't copy the provided string contents if possible; don't free it when no longer needed */
406 };
407
408 /* System Encoding.
409 */
410 static CFStringEncoding __CFDefaultSystemEncoding = kCFStringEncodingInvalidId;
411 static CFStringEncoding __CFDefaultFileSystemEncoding = kCFStringEncodingInvalidId;
412 CFStringEncoding __CFDefaultEightBitStringEncoding = kCFStringEncodingInvalidId;
413
414 CFStringEncoding CFStringGetSystemEncoding(void) {
415
416 if (__CFDefaultSystemEncoding == kCFStringEncodingInvalidId) {
417 const CFStringEncodingConverter *converter = NULL;
418 #if DEPLOYMENT_TARGET_MACOSX
419 __CFDefaultSystemEncoding = kCFStringEncodingMacRoman; // MacRoman is built-in so always available
420 #elif defined(__WIN32__)
421 __CFDefaultSystemEncoding = kCFStringEncodingWindowsLatin1; // WinLatin1 is built-in so always available
422 #elif DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
423 __CFDefaultSystemEncoding = kCFStringEncodingISOLatin1; // a reasonable default
424 #else // Solaris && HP-UX ?
425 __CFDefaultSystemEncoding = kCFStringEncodingISOLatin1; // a reasonable default
426 #endif
427 converter = CFStringEncodingGetConverter(__CFDefaultSystemEncoding);
428
429 __CFSetCharToUniCharFunc(converter->encodingClass == kCFStringEncodingConverterCheapEightBit ? (UNI_CHAR_FUNC)converter->toUnicode : NULL);
430 }
431
432 return __CFDefaultSystemEncoding;
433 }
434
435 // Fast version for internal use
436
437 CF_INLINE CFStringEncoding __CFStringGetSystemEncoding(void) {
438 if (__CFDefaultSystemEncoding == kCFStringEncodingInvalidId) (void)CFStringGetSystemEncoding();
439 return __CFDefaultSystemEncoding;
440 }
441
442 CFStringEncoding CFStringFileSystemEncoding(void) {
443 if (__CFDefaultFileSystemEncoding == kCFStringEncodingInvalidId) {
444 #if DEPLOYMENT_TARGET_MACOSX
445 __CFDefaultFileSystemEncoding = kCFStringEncodingUTF8;
446 #else
447 __CFDefaultFileSystemEncoding = CFStringGetSystemEncoding();
448 #endif
449 }
450
451 return __CFDefaultFileSystemEncoding;
452 }
453
454 /* ??? Is returning length when no other answer is available the right thing?
455 */
456 CFIndex CFStringGetMaximumSizeForEncoding(CFIndex length, CFStringEncoding encoding) {
457 if (encoding == kCFStringEncodingUTF8) {
458 return _CFExecutableLinkedOnOrAfter(CFSystemVersionPanther) ? (length * 3) : (length * 6); // 1 Unichar could expand to 3 bytes; we return 6 for older apps for compatibility
459 } else if ((encoding == kCFStringEncodingUTF32) || (encoding == kCFStringEncodingUTF32BE) || (encoding == kCFStringEncodingUTF32LE)) { // UTF-32
460 return length * sizeof(UTF32Char);
461 } else {
462 encoding &= 0xFFF; // Mask off non-base part
463 }
464 switch (encoding) {
465 case kCFStringEncodingUnicode:
466 return length * sizeof(UniChar);
467
468 case kCFStringEncodingNonLossyASCII:
469 return length * 6; // 1 Unichar could expand to 6 bytes
470
471 case kCFStringEncodingMacRoman:
472 case kCFStringEncodingWindowsLatin1:
473 case kCFStringEncodingISOLatin1:
474 case kCFStringEncodingNextStepLatin:
475 case kCFStringEncodingASCII:
476 return length / sizeof(uint8_t);
477
478 default:
479 return length / sizeof(uint8_t);
480 }
481 }
482
483
484 /* Returns whether the indicated encoding can be stored in 8-bit chars
485 */
486 CF_INLINE Boolean __CFStrEncodingCanBeStoredInEightBit(CFStringEncoding encoding) {
487 switch (encoding & 0xFFF) { // just use encoding base
488 case kCFStringEncodingInvalidId:
489 case kCFStringEncodingUnicode:
490 case kCFStringEncodingNonLossyASCII:
491 return false;
492
493 case kCFStringEncodingMacRoman:
494 case kCFStringEncodingWindowsLatin1:
495 case kCFStringEncodingISOLatin1:
496 case kCFStringEncodingNextStepLatin:
497 case kCFStringEncodingASCII:
498 return true;
499
500 default: return false;
501 }
502 }
503
504 /* Returns the encoding used in eight bit CFStrings (can't be any encoding which isn't 1-to-1 with Unicode)
505 ??? Perhaps only ASCII fits the bill due to Unicode decomposition.
506 */
507 CFStringEncoding __CFStringComputeEightBitStringEncoding(void) {
508 if (__CFDefaultEightBitStringEncoding == kCFStringEncodingInvalidId) {
509 CFStringEncoding systemEncoding = CFStringGetSystemEncoding();
510 if (systemEncoding == kCFStringEncodingInvalidId) { // We're right in the middle of querying system encoding from default database. Delaying to set until system encoding is determined.
511 return kCFStringEncodingASCII;
512 } else if (__CFStrEncodingCanBeStoredInEightBit(systemEncoding)) {
513 __CFDefaultEightBitStringEncoding = systemEncoding;
514 } else {
515 __CFDefaultEightBitStringEncoding = kCFStringEncodingASCII;
516 }
517 }
518
519 return __CFDefaultEightBitStringEncoding;
520 }
521
522 /* Returns whether the provided bytes can be stored in ASCII
523 */
524 CF_INLINE Boolean __CFBytesInASCII(const uint8_t *bytes, CFIndex len) {
525 while (len--) if ((uint8_t)(*bytes++) >= 128) return false;
526 return true;
527 }
528
529 /* Returns whether the provided 8-bit string in the specified encoding can be stored in an 8-bit CFString.
530 */
531 CF_INLINE Boolean __CFCanUseEightBitCFStringForBytes(const uint8_t *bytes, CFIndex len, CFStringEncoding encoding) {
532 if (encoding == __CFStringGetEightBitStringEncoding()) return true;
533 if (__CFStringEncodingIsSupersetOfASCII(encoding) && __CFBytesInASCII(bytes, len)) return true;
534 return false;
535 }
536
537
538 /* Returns whether a length byte can be tacked on to a string of the indicated length.
539 */
540 CF_INLINE Boolean __CFCanUseLengthByte(CFIndex len) {
541 #define __kCFMaxPascalStrLen 255
542 return (len <= __kCFMaxPascalStrLen) ? true : false;
543 }
544
545 /* Various string assertions
546 */
547 #define __CFAssertIsString(cf) __CFGenericValidateType(cf, __kCFStringTypeID)
548 #define __CFAssertIndexIsInStringBounds(cf, idx) CFAssert3((idx) >= 0 && (idx) < __CFStrLength(cf), __kCFLogAssertion, "%s(): string index %d out of bounds (length %d)", __PRETTY_FUNCTION__, idx, __CFStrLength(cf))
549 #define __CFAssertRangeIsInStringBounds(cf, idx, count) CFAssert4((idx) >= 0 && (idx + count) <= __CFStrLength(cf), __kCFLogAssertion, "%s(): string range %d,%d out of bounds (length %d)", __PRETTY_FUNCTION__, idx, count, __CFStrLength(cf))
550 #define __CFAssertIsStringAndMutable(cf) {__CFGenericValidateType(cf, __kCFStringTypeID); CFAssert1(__CFStrIsMutable(cf), __kCFLogAssertion, "%s(): string not mutable", __PRETTY_FUNCTION__);}
551 #define __CFAssertIsStringAndExternalMutable(cf) {__CFGenericValidateType(cf, __kCFStringTypeID); CFAssert1(__CFStrIsMutable(cf) && __CFStrIsExternalMutable(cf), __kCFLogAssertion, "%s(): string not external mutable", __PRETTY_FUNCTION__);}
552 #define __CFAssertIsNotNegative(idx) CFAssert2(idx >= 0, __kCFLogAssertion, "%s(): index %d is negative", __PRETTY_FUNCTION__, idx)
553 #define __CFAssertIfFixedLengthIsOK(cf, reqLen) CFAssert2(!__CFStrIsFixed(cf) || (reqLen <= __CFStrDesiredCapacity(cf)), __kCFLogAssertion, "%s(): length %d too large", __PRETTY_FUNCTION__, reqLen)
554
555
556 /* Basic algorithm is to shrink memory when capacity is SHRINKFACTOR times the required capacity or to allocate memory when the capacity is less than GROWFACTOR times the required capacity.
557 Additional complications are applied in the following order:
558 - desiredCapacity, which is the minimum (except initially things can be at zero)
559 - rounding up to factor of 8
560 - compressing (to fit the number if 16 bits), which effectively rounds up to factor of 256
561 - we need to make sure GROWFACTOR computation doesn't suffer from overflow issues on 32-bit, hence the casting to unsigned. Normally for required capacity of C bytes, the allocated space is (3C+1)/2. If C > ULONG_MAX/3, we instead simply return LONG_MAX
562 */
563 #define SHRINKFACTOR(c) (c / 2)
564
565 #if __LP64__
566 #define GROWFACTOR(c) ((c * 3 + 1) / 2)
567 #else
568 #define GROWFACTOR(c) (((c) >= (ULONG_MAX / 3UL)) ? __CFMax(LONG_MAX - 4095, (c)) : (((unsigned long)c * 3 + 1) / 2))
569 #endif
570
571 CF_INLINE CFIndex __CFStrNewCapacity(CFMutableStringRef str, CFIndex reqCapacity, CFIndex capacity, Boolean leaveExtraRoom, CFIndex charSize) {
572 if (capacity != 0 || reqCapacity != 0) { /* If initially zero, and space not needed, leave it at that... */
573 if ((capacity < reqCapacity) || /* We definitely need the room... */
574 (!__CFStrCapacityProvidedExternally(str) && /* Assuming we control the capacity... */
575 ((reqCapacity < SHRINKFACTOR(capacity)) || /* ...we have too much room! */
576 (!leaveExtraRoom && (reqCapacity < capacity))))) { /* ...we need to eliminate the extra space... */
577 CFIndex newCapacity = leaveExtraRoom ? GROWFACTOR(reqCapacity) : reqCapacity; /* Grow by 3/2 if extra room is desired */
578 CFIndex desiredCapacity = __CFStrDesiredCapacity(str) * charSize;
579 if (newCapacity < desiredCapacity) { /* If less than desired, bump up to desired */
580 newCapacity = desiredCapacity;
581 } else if (__CFStrIsFixed(str)) { /* Otherwise, if fixed, no need to go above the desired (fixed) capacity */
582 newCapacity = __CFMax(desiredCapacity, reqCapacity); /* !!! So, fixed is not really fixed, but "tight" */
583 }
584 if (__CFStrHasContentsAllocator(str)) { /* Also apply any preferred size from the allocator; should we do something for */
585 newCapacity = CFAllocatorGetPreferredSizeForSize(__CFStrContentsAllocator(str), newCapacity, 0);
586 #if DEPLOYMENT_TARGET_MACOSX
587 } else {
588 newCapacity = malloc_good_size(newCapacity);
589 #endif
590 }
591 return newCapacity; // If packing: __CFStrUnpackNumber(__CFStrPackNumber(newCapacity));
592 }
593 }
594 return capacity;
595 }
596
597
598 /* rearrangeBlocks() rearranges the blocks of data within the buffer so that they are "evenly spaced". buffer is assumed to have enough room for the result.
599 numBlocks is current total number of blocks within buffer.
600 blockSize is the size of each block in bytes
601 ranges and numRanges hold the ranges that are no longer needed; ranges are stored sorted in increasing order, and don't overlap
602 insertLength is the final spacing between the remaining blocks
603
604 Example: buffer = A B C D E F G H, blockSize = 1, ranges = { (2,1) , (4,2) } (so we want to "delete" C and E F), fromEnd = NO
605 if insertLength = 4, result = A B ? ? ? ? D ? ? ? ? G H
606 if insertLength = 0, result = A B D G H
607
608 Example: buffer = A B C D E F G H I J K L M N O P Q R S T U, blockSize = 1, ranges { (1,1), (3,1), (5,11), (17,1), (19,1) }, fromEnd = NO
609 if insertLength = 3, result = A ? ? ? C ? ? ? E ? ? ? Q ? ? ? S ? ? ? U
610
611 */
612 typedef struct _CFStringDeferredRange {
613 CFIndex beginning;
614 CFIndex length;
615 CFIndex shift;
616 } CFStringDeferredRange;
617
618 typedef struct _CFStringStackInfo {
619 CFIndex capacity; // Capacity (if capacity == count, need to realloc to add another)
620 CFIndex count; // Number of elements actually stored
621 CFStringDeferredRange *stack;
622 Boolean hasMalloced; // Indicates "stack" is allocated and needs to be deallocated when done
623 char _padding[3];
624 } CFStringStackInfo;
625
626 CF_INLINE void pop (CFStringStackInfo *si, CFStringDeferredRange *topRange) {
627 si->count = si->count - 1;
628 *topRange = si->stack[si->count];
629 }
630
631 CF_INLINE void push (CFStringStackInfo *si, const CFStringDeferredRange *newRange) {
632 if (si->count == si->capacity) {
633 // increase size of the stack
634 si->capacity = (si->capacity + 4) * 2;
635 if (si->hasMalloced) {
636 si->stack = (CFStringDeferredRange *)CFAllocatorReallocate(kCFAllocatorSystemDefault, si->stack, si->capacity * sizeof(CFStringDeferredRange), 0);
637 } else {
638 CFStringDeferredRange *newStack = (CFStringDeferredRange *)CFAllocatorAllocate(kCFAllocatorSystemDefault, si->capacity * sizeof(CFStringDeferredRange), 0);
639 memmove(newStack, si->stack, si->count * sizeof(CFStringDeferredRange));
640 si->stack = newStack;
641 si->hasMalloced = true;
642 }
643 }
644 si->stack[si->count] = *newRange;
645 si->count = si->count + 1;
646 }
647
648 static void rearrangeBlocks(
649 uint8_t *buffer,
650 CFIndex numBlocks,
651 CFIndex blockSize,
652 const CFRange *ranges,
653 CFIndex numRanges,
654 CFIndex insertLength) {
655
656 #define origStackSize 10
657 CFStringDeferredRange origStack[origStackSize];
658 CFStringStackInfo si = {origStackSize, 0, origStack, false, {0, 0, 0}};
659 CFStringDeferredRange currentNonRange = {0, 0, 0};
660 CFIndex currentRange = 0;
661 CFIndex amountShifted = 0;
662
663 // must have at least 1 range left.
664
665 while (currentRange < numRanges) {
666 currentNonRange.beginning = (ranges[currentRange].location + ranges[currentRange].length) * blockSize;
667 if ((numRanges - currentRange) == 1) {
668 // at the end.
669 currentNonRange.length = numBlocks * blockSize - currentNonRange.beginning;
670 if (currentNonRange.length == 0) break;
671 } else {
672 currentNonRange.length = (ranges[currentRange + 1].location * blockSize) - currentNonRange.beginning;
673 }
674 currentNonRange.shift = amountShifted + (insertLength * blockSize) - (ranges[currentRange].length * blockSize);
675 amountShifted = currentNonRange.shift;
676 if (amountShifted <= 0) {
677 // process current item and rest of stack
678 if (currentNonRange.shift && currentNonRange.length) memmove (&buffer[currentNonRange.beginning + currentNonRange.shift], &buffer[currentNonRange.beginning], currentNonRange.length);
679 while (si.count > 0) {
680 pop (&si, &currentNonRange); // currentNonRange now equals the top element of the stack.
681 if (currentNonRange.shift && currentNonRange.length) memmove (&buffer[currentNonRange.beginning + currentNonRange.shift], &buffer[currentNonRange.beginning], currentNonRange.length);
682 }
683 } else {
684 // add currentNonRange to stack.
685 push (&si, &currentNonRange);
686 }
687 currentRange++;
688 }
689
690 // no more ranges. if anything is on the stack, process.
691
692 while (si.count > 0) {
693 pop (&si, &currentNonRange); // currentNonRange now equals the top element of the stack.
694 if (currentNonRange.shift && currentNonRange.length) memmove (&buffer[currentNonRange.beginning + currentNonRange.shift], &buffer[currentNonRange.beginning], currentNonRange.length);
695 }
696 if (si.hasMalloced) CFAllocatorDeallocate (kCFAllocatorSystemDefault, si.stack);
697 }
698
699 /* See comments for rearrangeBlocks(); this is the same, but the string is assembled in another buffer (dstBuffer), so the algorithm is much easier. We also take care of the case where the source is not-Unicode but destination is. (The reverse case is not supported.)
700 */
701 static void copyBlocks(
702 const uint8_t *srcBuffer,
703 uint8_t *dstBuffer,
704 CFIndex srcLength,
705 Boolean srcIsUnicode,
706 Boolean dstIsUnicode,
707 const CFRange *ranges,
708 CFIndex numRanges,
709 CFIndex insertLength) {
710
711 CFIndex srcLocationInBytes = 0; // in order to avoid multiplying all the time, this is in terms of bytes, not blocks
712 CFIndex dstLocationInBytes = 0; // ditto
713 CFIndex srcBlockSize = srcIsUnicode ? sizeof(UniChar) : sizeof(uint8_t);
714 CFIndex insertLengthInBytes = insertLength * (dstIsUnicode ? sizeof(UniChar) : sizeof(uint8_t));
715 CFIndex rangeIndex = 0;
716 CFIndex srcToDstMultiplier = (srcIsUnicode == dstIsUnicode) ? 1 : (sizeof(UniChar) / sizeof(uint8_t));
717
718 // Loop over the ranges, copying the range to be preserved (right before each range)
719 while (rangeIndex < numRanges) {
720 CFIndex srcLengthInBytes = ranges[rangeIndex].location * srcBlockSize - srcLocationInBytes; // srcLengthInBytes is in terms of bytes, not blocks; represents length of region to be preserved
721 if (srcLengthInBytes > 0) {
722 if (srcIsUnicode == dstIsUnicode) {
723 memmove(dstBuffer + dstLocationInBytes, srcBuffer + srcLocationInBytes, srcLengthInBytes);
724 } else {
725 __CFStrConvertBytesToUnicode(srcBuffer + srcLocationInBytes, (UniChar *)(dstBuffer + dstLocationInBytes), srcLengthInBytes);
726 }
727 }
728 srcLocationInBytes += srcLengthInBytes + ranges[rangeIndex].length * srcBlockSize; // Skip over the just-copied and to-be-deleted stuff
729 dstLocationInBytes += srcLengthInBytes * srcToDstMultiplier + insertLengthInBytes;
730 rangeIndex++;
731 }
732
733 // Do last range (the one beyond last range)
734 if (srcLocationInBytes < srcLength * srcBlockSize) {
735 if (srcIsUnicode == dstIsUnicode) {
736 memmove(dstBuffer + dstLocationInBytes, srcBuffer + srcLocationInBytes, srcLength * srcBlockSize - srcLocationInBytes);
737 } else {
738 __CFStrConvertBytesToUnicode(srcBuffer + srcLocationInBytes, (UniChar *)(dstBuffer + dstLocationInBytes), srcLength * srcBlockSize - srcLocationInBytes);
739 }
740 }
741 }
742
743 /* Call the callback; if it doesn't exist or returns false, then log
744 */
745 static void __CFStringHandleOutOfMemory(CFTypeRef obj) {
746 CFStringRef msg = CFSTR("Out of memory. We suggest restarting the application. If you have an unsaved document, create a backup copy in Finder, then try to save.");
747 CFBadErrorCallBack cb = _CFGetOutOfMemoryErrorCallBack();
748 if (NULL == cb || !cb(obj, CFSTR("NS/CFString"), msg)) {
749 CFLog(kCFLogLevelCritical, CFSTR("%@"), msg);
750 }
751 }
752
753 /* Reallocates the backing store of the string to accomodate the new length. Space is reserved or characters are deleted as indicated by insertLength and the ranges in deleteRanges. The length is updated to reflect the new state. Will also maintain a length byte and a null byte in 8-bit strings. If length cannot fit in length byte, the space will still be reserved, but will be 0. (Hence the reason the length byte should never be looked at as length unless there is no explicit length.)
754 */
755 static void __CFStringChangeSizeMultiple(CFMutableStringRef str, const CFRange *deleteRanges, CFIndex numDeleteRanges, CFIndex insertLength, Boolean makeUnicode) {
756 const uint8_t *curContents = (uint8_t *)__CFStrContents(str);
757 CFIndex curLength = curContents ? __CFStrLength2(str, curContents) : 0;
758 CFIndex newLength;
759
760 // Compute new length of the string
761 if (numDeleteRanges == 1) {
762 newLength = curLength + insertLength - deleteRanges[0].length;
763 } else {
764 CFIndex cnt;
765 newLength = curLength + insertLength * numDeleteRanges;
766 for (cnt = 0; cnt < numDeleteRanges; cnt++) newLength -= deleteRanges[cnt].length;
767 }
768
769 __CFAssertIfFixedLengthIsOK(str, newLength);
770
771 if (newLength == 0) {
772 // An somewhat optimized code-path for this special case, with the following implicit values:
773 // newIsUnicode = false
774 // useLengthAndNullBytes = false
775 // newCharSize = sizeof(uint8_t)
776 // If the newCapacity happens to be the same as the old, we don't free the buffer; otherwise we just free it totally
777 // instead of doing a potentially useless reallocation (as the needed capacity later might turn out to be different anyway)
778 CFIndex curCapacity = __CFStrCapacity(str);
779 CFIndex newCapacity = __CFStrNewCapacity(str, 0, curCapacity, true, sizeof(uint8_t));
780 if (newCapacity != curCapacity) { // If we're reallocing anyway (larger or smaller --- larger could happen if desired capacity was changed in the meantime), let's just free it all
781 if (curContents) __CFStrDeallocateMutableContents(str, (uint8_t *)curContents);
782 __CFStrSetContentPtr(str, NULL);
783 __CFStrSetCapacity(str, 0);
784 __CFStrClearCapacityProvidedExternally(str);
785 __CFStrClearHasLengthAndNullBytes(str);
786 if (!__CFStrIsExternalMutable(str)) __CFStrClearUnicode(str); // External mutable implies Unicode
787 } else {
788 if (!__CFStrIsExternalMutable(str)) {
789 __CFStrClearUnicode(str);
790 if (curCapacity >= (int)(sizeof(uint8_t) * 2)) { // If there's room
791 __CFStrSetHasLengthAndNullBytes(str);
792 ((uint8_t *)curContents)[0] = ((uint8_t *)curContents)[1] = 0;
793 } else {
794 __CFStrClearHasLengthAndNullBytes(str);
795 }
796 }
797 }
798 __CFStrSetExplicitLength(str, 0);
799 } else { /* This else-clause assumes newLength > 0 */
800 Boolean oldIsUnicode = __CFStrIsUnicode(str);
801 Boolean newIsUnicode = makeUnicode || (oldIsUnicode /* && (newLength > 0) - implicit */ ) || __CFStrIsExternalMutable(str);
802 CFIndex newCharSize = newIsUnicode ? sizeof(UniChar) : sizeof(uint8_t);
803 Boolean useLengthAndNullBytes = !newIsUnicode /* && (newLength > 0) - implicit */;
804 CFIndex numExtraBytes = useLengthAndNullBytes ? 2 : 0; /* 2 extra bytes to keep the length byte & null... */
805 CFIndex curCapacity = __CFStrCapacity(str);
806 CFIndex newCapacity = __CFStrNewCapacity(str, newLength * newCharSize + numExtraBytes, curCapacity, true, newCharSize);
807 Boolean allocNewBuffer = (newCapacity != curCapacity) || (curLength > 0 && !oldIsUnicode && newIsUnicode); /* We alloc new buffer if oldIsUnicode != newIsUnicode because the contents have to be copied */
808 uint8_t *newContents;
809 if (allocNewBuffer) {
810 newContents = (uint8_t *)__CFStrAllocateMutableContents(str, newCapacity);
811 if (!newContents) { // Try allocating without extra room
812 newCapacity = __CFStrNewCapacity(str, newLength * newCharSize + numExtraBytes, curCapacity, false, newCharSize);
813 newContents = (uint8_t *)__CFStrAllocateMutableContents(str, newCapacity);
814 if (!newContents) {
815 __CFStringHandleOutOfMemory(str);
816 // Ideally control doesn't come here at all since we expect the above call to raise an exception.
817 // If control comes here, there isn't much we can do.
818 }
819 }
820 } else {
821 newContents = (uint8_t *)curContents;
822 }
823
824 Boolean hasLengthAndNullBytes = __CFStrHasLengthByte(str);
825
826 CFAssert1(hasLengthAndNullBytes == __CFStrHasNullByte(str), __kCFLogAssertion, "%s(): Invalid state in 8-bit string", __PRETTY_FUNCTION__);
827
828 if (hasLengthAndNullBytes) curContents++;
829 if (useLengthAndNullBytes) newContents++;
830
831 if (curContents) {
832 if (oldIsUnicode == newIsUnicode) {
833 if (newContents == curContents) {
834 rearrangeBlocks(newContents, curLength, newCharSize, deleteRanges, numDeleteRanges, insertLength);
835 } else {
836 copyBlocks(curContents, newContents, curLength, oldIsUnicode, newIsUnicode, deleteRanges, numDeleteRanges, insertLength);
837 }
838 } else if (newIsUnicode) { /* this implies we have a new buffer */
839 copyBlocks(curContents, newContents, curLength, oldIsUnicode, newIsUnicode, deleteRanges, numDeleteRanges, insertLength);
840 }
841 if (hasLengthAndNullBytes) curContents--; /* Undo the damage from above */
842 if (allocNewBuffer && __CFStrFreeContentsWhenDone(str)) __CFStrDeallocateMutableContents(str, (void *)curContents);
843 }
844
845 if (!newIsUnicode) {
846 if (useLengthAndNullBytes) {
847 newContents[newLength] = 0; /* Always have null byte, if not unicode */
848 newContents--; /* Undo the damage from above */
849 newContents[0] = __CFCanUseLengthByte(newLength) ? (uint8_t)newLength : 0;
850 if (!hasLengthAndNullBytes) __CFStrSetHasLengthAndNullBytes(str);
851 } else {
852 if (hasLengthAndNullBytes) __CFStrClearHasLengthAndNullBytes(str);
853 }
854 if (oldIsUnicode) __CFStrClearUnicode(str);
855 } else { // New is unicode...
856 if (!oldIsUnicode) __CFStrSetUnicode(str);
857 if (hasLengthAndNullBytes) __CFStrClearHasLengthAndNullBytes(str);
858 }
859 __CFStrSetExplicitLength(str, newLength);
860
861 if (allocNewBuffer) {
862 __CFStrSetCapacity(str, newCapacity);
863 __CFStrClearCapacityProvidedExternally(str);
864 __CFStrSetContentPtr(str, newContents);
865 }
866 }
867 }
868
869 /* Same as above, but takes one range (very common case)
870 */
871 CF_INLINE void __CFStringChangeSize(CFMutableStringRef str, CFRange range, CFIndex insertLength, Boolean makeUnicode) {
872 __CFStringChangeSizeMultiple(str, &range, 1, insertLength, makeUnicode);
873 }
874
875
876 #if defined(DEBUG)
877 static Boolean __CFStrIsConstantString(CFStringRef str);
878 #endif
879
880 static void __CFStringDeallocate(CFTypeRef cf) {
881 CFStringRef str = (CFStringRef)cf;
882
883 // If in DEBUG mode, check to see if the string a CFSTR, and complain.
884 CFAssert1(__CFConstantStringTableBeingFreed || !__CFStrIsConstantString((CFStringRef)cf), __kCFLogAssertion, "Tried to deallocate CFSTR(\"%@\")", str);
885
886 if (!__CFStrIsInline(str)) {
887 uint8_t *contents;
888 Boolean isMutable = __CFStrIsMutable(str);
889 if (__CFStrFreeContentsWhenDone(str) && (contents = (uint8_t *)__CFStrContents(str))) {
890 if (isMutable) {
891 __CFStrDeallocateMutableContents((CFMutableStringRef)str, contents);
892 } else {
893 if (__CFStrHasContentsDeallocator(str)) {
894 CFAllocatorRef contentsDeallocator = __CFStrContentsDeallocator(str);
895 CFAllocatorDeallocate(contentsDeallocator, contents);
896 CFRelease(contentsDeallocator);
897 } else {
898 CFAllocatorRef alloc = __CFGetAllocator(str);
899 CFAllocatorDeallocate(alloc, contents);
900 }
901 }
902 }
903 if (isMutable && __CFStrHasContentsAllocator(str)) CFRelease(__CFStrContentsAllocator((CFMutableStringRef)str));
904 }
905 }
906
907 static Boolean __CFStringEqual(CFTypeRef cf1, CFTypeRef cf2) {
908 CFStringRef str1 = (CFStringRef)cf1;
909 CFStringRef str2 = (CFStringRef)cf2;
910 const uint8_t *contents1;
911 const uint8_t *contents2;
912 CFIndex len1;
913
914 /* !!! We do not need IsString assertions, as the CFBase runtime assures this */
915 /* !!! We do not need == test, as the CFBase runtime assures this */
916
917 contents1 = (uint8_t *)__CFStrContents(str1);
918 contents2 = (uint8_t *)__CFStrContents(str2);
919 len1 = __CFStrLength2(str1, contents1);
920
921 if (len1 != __CFStrLength2(str2, contents2)) return false;
922
923 contents1 += __CFStrSkipAnyLengthByte(str1);
924 contents2 += __CFStrSkipAnyLengthByte(str2);
925
926 if (__CFStrIsEightBit(str1) && __CFStrIsEightBit(str2)) {
927 return memcmp((const char *)contents1, (const char *)contents2, len1) ? false : true;
928 } else if (__CFStrIsEightBit(str1)) { /* One string has Unicode contents */
929 CFStringInlineBuffer buf;
930 CFIndex buf_idx = 0;
931
932 CFStringInitInlineBuffer(str1, &buf, CFRangeMake(0, len1));
933 for (buf_idx = 0; buf_idx < len1; buf_idx++) {
934 if (__CFStringGetCharacterFromInlineBufferQuick(&buf, buf_idx) != ((UniChar *)contents2)[buf_idx]) return false;
935 }
936 } else if (__CFStrIsEightBit(str2)) { /* One string has Unicode contents */
937 CFStringInlineBuffer buf;
938 CFIndex buf_idx = 0;
939
940 CFStringInitInlineBuffer(str2, &buf, CFRangeMake(0, len1));
941 for (buf_idx = 0; buf_idx < len1; buf_idx++) {
942 if (__CFStringGetCharacterFromInlineBufferQuick(&buf, buf_idx) != ((UniChar *)contents1)[buf_idx]) return false;
943 }
944 } else { /* Both strings have Unicode contents */
945 CFIndex idx;
946 for (idx = 0; idx < len1; idx++) {
947 if (((UniChar *)contents1)[idx] != ((UniChar *)contents2)[idx]) return false;
948 }
949 }
950 return true;
951 }
952
953
954 /* String hashing: Should give the same results whatever the encoding; so we hash UniChars.
955 If the length is less than or equal to 96, then the hash function is simply the
956 following (n is the nth UniChar character, starting from 0):
957
958 hash(-1) = length
959 hash(n) = hash(n-1) * 257 + unichar(n);
960 Hash = hash(length-1) * ((length & 31) + 1)
961
962 If the length is greater than 96, then the above algorithm applies to
963 characters 0..31, (length/2)-16..(length/2)+15, and length-32..length-1, inclusive;
964 thus the first, middle, and last 32 characters.
965
966 Note that the loops below are unrolled; and: 257^2 = 66049; 257^3 = 16974593; 257^4 = 4362470401; 67503105 is 257^4 - 256^4
967 If hashcode is changed from UInt32 to something else, this last piece needs to be readjusted.
968 !!! We haven't updated for LP64 yet
969
970 NOTE: The hash algorithm used to be duplicated in CF and Foundation; but now it should only be in the four functions below.
971
972 Hash function was changed between Panther and Tiger, and Tiger and Leopard.
973 */
974 #define HashEverythingLimit 96
975
976 #define HashNextFourUniChars(accessStart, accessEnd, pointer) \
977 {result = result * 67503105 + (accessStart 0 accessEnd) * 16974593 + (accessStart 1 accessEnd) * 66049 + (accessStart 2 accessEnd) * 257 + (accessStart 3 accessEnd); pointer += 4;}
978
979 #define HashNextUniChar(accessStart, accessEnd, pointer) \
980 {result = result * 257 + (accessStart 0 accessEnd); pointer++;}
981
982
983 /* In this function, actualLen is the length of the original string; but len is the number of characters in buffer. The buffer is expected to contain the parts of the string relevant to hashing.
984 */
985 CF_INLINE CFHashCode __CFStrHashCharacters(const UniChar *uContents, CFIndex len, CFIndex actualLen) {
986 CFHashCode result = actualLen;
987 if (len <= HashEverythingLimit) {
988 const UniChar *end4 = uContents + (len & ~3);
989 const UniChar *end = uContents + len;
990 while (uContents < end4) HashNextFourUniChars(uContents[, ], uContents); // First count in fours
991 while (uContents < end) HashNextUniChar(uContents[, ], uContents); // Then for the last <4 chars, count in ones...
992 } else {
993 const UniChar *contents, *end;
994 contents = uContents;
995 end = contents + 32;
996 while (contents < end) HashNextFourUniChars(contents[, ], contents);
997 contents = uContents + (len >> 1) - 16;
998 end = contents + 32;
999 while (contents < end) HashNextFourUniChars(contents[, ], contents);
1000 end = uContents + len;
1001 contents = end - 32;
1002 while (contents < end) HashNextFourUniChars(contents[, ], contents);
1003 }
1004 return result + (result << (actualLen & 31));
1005 }
1006
1007 /* This hashes cString in the eight bit string encoding. It also includes the little debug-time sanity check.
1008 */
1009 CF_INLINE CFHashCode __CFStrHashEightBit(const uint8_t *cContents, CFIndex len) {
1010 #if defined(DEBUG)
1011 if (!__CFCharToUniCharFunc) { // A little sanity verification: If this is not set, trying to hash high byte chars would be a bad idea
1012 CFIndex cnt;
1013 Boolean err = false;
1014 if (len <= HashEverythingLimit) {
1015 for (cnt = 0; cnt < len; cnt++) if (cContents[cnt] >= 128) err = true;
1016 } else {
1017 for (cnt = 0; cnt < 32; cnt++) if (cContents[cnt] >= 128) err = true;
1018 for (cnt = (len >> 1) - 16; cnt < (len >> 1) + 16; cnt++) if (cContents[cnt] >= 128) err = true;
1019 for (cnt = (len - 32); cnt < len; cnt++) if (cContents[cnt] >= 128) err = true;
1020 }
1021 if (err) {
1022 // Can't do log here, as it might be too early
1023 fprintf(stderr, "Warning: CFHash() attempting to hash CFString containing high bytes before properly initialized to do so\n");
1024 }
1025 }
1026 #endif
1027 CFHashCode result = len;
1028 if (len <= HashEverythingLimit) {
1029 const uint8_t *end4 = cContents + (len & ~3);
1030 const uint8_t *end = cContents + len;
1031 while (cContents < end4) HashNextFourUniChars(__CFCharToUniCharTable[cContents[, ]], cContents); // First count in fours
1032 while (cContents < end) HashNextUniChar(__CFCharToUniCharTable[cContents[, ]], cContents); // Then for the last <4 chars, count in ones...
1033 } else {
1034 const uint8_t *contents, *end;
1035 contents = cContents;
1036 end = contents + 32;
1037 while (contents < end) HashNextFourUniChars(__CFCharToUniCharTable[contents[, ]], contents);
1038 contents = cContents + (len >> 1) - 16;
1039 end = contents + 32;
1040 while (contents < end) HashNextFourUniChars(__CFCharToUniCharTable[contents[, ]], contents);
1041 end = cContents + len;
1042 contents = end - 32;
1043 while (contents < end) HashNextFourUniChars(__CFCharToUniCharTable[contents[, ]], contents);
1044 }
1045 return result + (result << (len & 31));
1046 }
1047
1048 CFHashCode CFStringHashISOLatin1CString(const uint8_t *bytes, CFIndex len) {
1049 CFHashCode result = len;
1050 if (len <= HashEverythingLimit) {
1051 const uint8_t *end4 = bytes + (len & ~3);
1052 const uint8_t *end = bytes + len;
1053 while (bytes < end4) HashNextFourUniChars(bytes[, ], bytes); // First count in fours
1054 while (bytes < end) HashNextUniChar(bytes[, ], bytes); // Then for the last <4 chars, count in ones...
1055 } else {
1056 const uint8_t *contents, *end;
1057 contents = bytes;
1058 end = contents + 32;
1059 while (contents < end) HashNextFourUniChars(contents[, ], contents);
1060 contents = bytes + (len >> 1) - 16;
1061 end = contents + 32;
1062 while (contents < end) HashNextFourUniChars(contents[, ], contents);
1063 end = bytes + len;
1064 contents = end - 32;
1065 while (contents < end) HashNextFourUniChars(contents[, ], contents);
1066 }
1067 return result + (result << (len & 31));
1068 }
1069
1070 CFHashCode CFStringHashCString(const uint8_t *bytes, CFIndex len) {
1071 return __CFStrHashEightBit(bytes, len);
1072 }
1073
1074 CFHashCode CFStringHashCharacters(const UniChar *characters, CFIndex len) {
1075 return __CFStrHashCharacters(characters, len, len);
1076 }
1077
1078 /* This is meant to be called from NSString or subclassers only. It is an error for this to be called without the ObjC runtime or an argument which is not an NSString or subclass. It can be called with NSCFString, although that would be inefficient (causing indirection) and won't normally happen anyway, as NSCFString overrides hash.
1079 */
1080 CFHashCode CFStringHashNSString(CFStringRef str) {
1081 UniChar buffer[HashEverythingLimit];
1082 CFIndex bufLen; // Number of characters in the buffer for hashing
1083 CFIndex len = 0; // Actual length of the string
1084
1085 CF_OBJC_CALL0(CFIndex, len, str, "length");
1086 if (len <= HashEverythingLimit) {
1087 CF_OBJC_VOIDCALL2(str, "getCharacters:range:", buffer, CFRangeMake(0, len));
1088 bufLen = len;
1089 } else {
1090 CF_OBJC_VOIDCALL2(str, "getCharacters:range:", buffer, CFRangeMake(0, 32));
1091 CF_OBJC_VOIDCALL2(str, "getCharacters:range:", buffer+32, CFRangeMake((len >> 1) - 16, 32));
1092 CF_OBJC_VOIDCALL2(str, "getCharacters:range:", buffer+64, CFRangeMake(len - 32, 32));
1093 bufLen = HashEverythingLimit;
1094 }
1095 return __CFStrHashCharacters(buffer, bufLen, len);
1096 }
1097
1098 CFHashCode __CFStringHash(CFTypeRef cf) {
1099 /* !!! We do not need an IsString assertion here, as this is called by the CFBase runtime only */
1100 CFStringRef str = (CFStringRef)cf;
1101 const uint8_t *contents = (uint8_t *)__CFStrContents(str);
1102 CFIndex len = __CFStrLength2(str, contents);
1103
1104 if (__CFStrIsEightBit(str)) {
1105 contents += __CFStrSkipAnyLengthByte(str);
1106 return __CFStrHashEightBit(contents, len);
1107 } else {
1108 return __CFStrHashCharacters((const UniChar *)contents, len, len);
1109 }
1110 }
1111
1112
1113 static CFStringRef __CFStringCopyDescription(CFTypeRef cf) {
1114 return CFStringCreateWithFormat(kCFAllocatorSystemDefault, NULL, CFSTR("<CFString %p [%p]>{contents = \"%@\"}"), cf, __CFGetAllocator(cf), cf);
1115 }
1116
1117 static CFStringRef __CFStringCopyFormattingDescription(CFTypeRef cf, CFDictionaryRef formatOptions) {
1118 return (CFStringRef)CFStringCreateCopy(__CFGetAllocator(cf), (CFStringRef)cf);
1119 }
1120
1121 static CFTypeID __kCFStringTypeID = _kCFRuntimeNotATypeID;
1122
1123 typedef CFTypeRef (*CF_STRING_CREATE_COPY)(CFAllocatorRef alloc, CFTypeRef theString);
1124
1125 static const CFRuntimeClass __CFStringClass = {
1126 0,
1127 "CFString",
1128 NULL, // init
1129 (CF_STRING_CREATE_COPY)CFStringCreateCopy,
1130 __CFStringDeallocate,
1131 __CFStringEqual,
1132 __CFStringHash,
1133 __CFStringCopyFormattingDescription,
1134 __CFStringCopyDescription
1135 };
1136
1137 __private_extern__ void __CFStringInitialize(void) {
1138 __kCFStringTypeID = _CFRuntimeRegisterClass(&__CFStringClass);
1139 }
1140
1141 CFTypeID CFStringGetTypeID(void) {
1142 return __kCFStringTypeID;
1143 }
1144
1145
1146 static Boolean CFStrIsUnicode(CFStringRef str) {
1147 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, Boolean, str, "_encodingCantBeStoredInEightBitCFString");
1148 return __CFStrIsUnicode(str);
1149 }
1150
1151
1152
1153 #define ALLOCATORSFREEFUNC ((CFAllocatorRef)-1)
1154
1155 /* contentsDeallocator indicates how to free the data if it's noCopy == true:
1156 kCFAllocatorNull: don't free
1157 ALLOCATORSFREEFUNC: free with main allocator's free func (don't pass in the real func ptr here)
1158 NULL: default allocator
1159 otherwise it's the allocator that should be used (it will be explicitly stored)
1160 if noCopy == false, then freeFunc should be ALLOCATORSFREEFUNC
1161 hasLengthByte, hasNullByte: refers to bytes; used only if encoding != Unicode
1162 possiblyExternalFormat indicates that the bytes might have BOM and be swapped
1163 tryToReduceUnicode means that the Unicode should be checked to see if it contains just ASCII (and reduce it if so)
1164 numBytes contains the actual number of bytes in "bytes", including Length byte,
1165 BUT not the NULL byte at the end
1166 bytes should not contain BOM characters
1167 !!! Various flags should be combined to reduce number of arguments, if possible
1168 */
1169 __private_extern__ CFStringRef __CFStringCreateImmutableFunnel3(
1170 CFAllocatorRef alloc, const void *bytes, CFIndex numBytes, CFStringEncoding encoding,
1171 Boolean possiblyExternalFormat, Boolean tryToReduceUnicode, Boolean hasLengthByte, Boolean hasNullByte, Boolean noCopy,
1172 CFAllocatorRef contentsDeallocator, UInt32 converterFlags) {
1173
1174 CFMutableStringRef str;
1175 CFVarWidthCharBuffer vBuf;
1176 CFIndex size;
1177 Boolean useLengthByte = false;
1178 Boolean useNullByte = false;
1179 Boolean useInlineData = false;
1180
1181 #if INSTRUMENT_SHARED_STRINGS
1182 const char *recordedEncoding;
1183 char encodingBuffer[128];
1184 if (encoding == kCFStringEncodingUnicode) recordedEncoding = "Unicode";
1185 else if (encoding == kCFStringEncodingASCII) recordedEncoding = "ASCII";
1186 else if (encoding == kCFStringEncodingUTF8) recordedEncoding = "UTF8";
1187 else if (encoding == kCFStringEncodingMacRoman) recordedEncoding = "MacRoman";
1188 else {
1189 sprintf(encodingBuffer, "0x%lX", (unsigned long)encoding);
1190 recordedEncoding = encodingBuffer;
1191 }
1192 #endif
1193
1194 if (alloc == NULL) alloc = __CFGetDefaultAllocator();
1195
1196 if (contentsDeallocator == ALLOCATORSFREEFUNC) {
1197 contentsDeallocator = alloc;
1198 } else if (contentsDeallocator == NULL) {
1199 contentsDeallocator = __CFGetDefaultAllocator();
1200 }
1201
1202 if ((NULL != kCFEmptyString) && (numBytes == 0) && (alloc == kCFAllocatorSystemDefault)) { // If we are using the system default allocator, and the string is empty, then use the empty string!
1203 if (noCopy && (contentsDeallocator != kCFAllocatorNull)) { // See 2365208... This change was done after Sonata; before we didn't free the bytes at all (leak).
1204 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes);
1205 }
1206 return (CFStringRef)CFRetain(kCFEmptyString); // Quick exit; won't catch all empty strings, but most
1207 }
1208
1209 // At this point, contentsDeallocator is either same as alloc, or kCFAllocatorNull, or something else, but not NULL
1210
1211 vBuf.shouldFreeChars = false; // We use this to remember to free the buffer possibly allocated by decode
1212
1213 // Record whether we're starting out with an ASCII-superset string, because we need to know this later for the string ROM; this may get changed later if we successfully convert down from Unicode. We only record this once because __CFCanUseEightBitCFStringForBytes() can be expensive.
1214 Boolean stringSupportsEightBitCFRepresentation = encoding != kCFStringEncodingUnicode && __CFCanUseEightBitCFStringForBytes((const uint8_t *)bytes, numBytes, encoding);
1215
1216 // We may also change noCopy within this function if we have to decode the string into an external buffer. We do not want to avoid the use of the string ROM merely because we tried to be efficient and reuse the decoded buffer for the CFString's external storage. Therefore, we use this variable to track whether we actually can ignore the noCopy flag (which may or may not be set anyways).
1217 Boolean stringROMShouldIgnoreNoCopy = false;
1218
1219 // First check to see if the data needs to be converted...
1220 // ??? We could be more efficient here and in some cases (Unicode data) eliminate a copy
1221
1222 if ((encoding == kCFStringEncodingUnicode && possiblyExternalFormat) || encoding != kCFStringEncodingUnicode && ! stringSupportsEightBitCFRepresentation) {
1223 const void *realBytes = (uint8_t *) bytes + (hasLengthByte ? 1 : 0);
1224 CFIndex realNumBytes = numBytes - (hasLengthByte ? 1 : 0);
1225 Boolean usingPassedInMemory = false;
1226
1227 vBuf.allocator = __CFGetDefaultAllocator(); // We don't want to use client's allocator for temp stuff
1228 vBuf.chars.unicode = NULL; // This will cause the decode function to allocate memory if necessary
1229
1230 if (!__CFStringDecodeByteStream3((const uint8_t *)realBytes, realNumBytes, encoding, false, &vBuf, &usingPassedInMemory, converterFlags)) {
1231 // Note that if the string can't be created, we don't free the buffer, even if there is a contents deallocator. This is on purpose.
1232 return NULL;
1233 }
1234
1235 encoding = vBuf.isASCII ? kCFStringEncodingASCII : kCFStringEncodingUnicode;
1236
1237 // Update our flag according to whether the decoded buffer is ASCII
1238 stringSupportsEightBitCFRepresentation = vBuf.isASCII;
1239
1240 if (!usingPassedInMemory) {
1241
1242 // Because __CFStringDecodeByteStream3() allocated our buffer, it's OK for us to free it if we can get the string from the ROM.
1243 stringROMShouldIgnoreNoCopy = true;
1244
1245 // Make the parameters fit the new situation
1246 numBytes = vBuf.isASCII ? vBuf.numChars : (vBuf.numChars * sizeof(UniChar));
1247 hasLengthByte = hasNullByte = false;
1248
1249 // Get rid of the original buffer if its not being used
1250 if (noCopy && (contentsDeallocator != kCFAllocatorNull)) {
1251 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes);
1252 }
1253 contentsDeallocator = alloc; // At this point we are using the string's allocator, as the original buffer is gone...
1254
1255 // See if we can reuse any storage the decode func might have allocated
1256 // We do this only for Unicode, as otherwise we would not have NULL and Length bytes
1257
1258 if (vBuf.shouldFreeChars && (alloc == vBuf.allocator) && encoding == kCFStringEncodingUnicode) {
1259 vBuf.shouldFreeChars = false; // Transferring ownership to the CFString
1260 bytes = CFAllocatorReallocate(vBuf.allocator, (void *)vBuf.chars.unicode, numBytes, 0); // Tighten up the storage
1261 noCopy = true;
1262 #if INSTRUMENT_SHARED_STRINGS
1263 if (encoding == kCFStringEncodingASCII) recordedEncoding = "ForeignASCII-NoCopy";
1264 else recordedEncoding = "ForeignUnicode-NoCopy";
1265 #endif
1266 } else {
1267 #if INSTRUMENT_SHARED_STRINGS
1268 if (encoding == kCFStringEncodingASCII) recordedEncoding = "ForeignASCII-Copy";
1269 else recordedEncoding = "ForeignUnicode-Copy";
1270 #endif
1271 bytes = vBuf.chars.unicode;
1272 noCopy = false; // Can't do noCopy anymore
1273 // If vBuf.shouldFreeChars is true, the buffer will be freed as intended near the end of this func
1274 }
1275
1276 }
1277
1278 // At this point, all necessary input arguments have been changed to reflect the new state
1279
1280 } else if (encoding == kCFStringEncodingUnicode && tryToReduceUnicode) { // Check to see if we can reduce Unicode to ASCII
1281 CFIndex cnt;
1282 CFIndex len = numBytes / sizeof(UniChar);
1283 Boolean allASCII = true;
1284
1285 for (cnt = 0; cnt < len; cnt++) if (((const UniChar *)bytes)[cnt] > 127) {
1286 allASCII = false;
1287 break;
1288 }
1289
1290 if (allASCII) { // Yes we can!
1291 uint8_t *ptr, *mem;
1292 Boolean newHasLengthByte = __CFCanUseLengthByte(len);
1293 numBytes = (len + 1 + (newHasLengthByte ? 1 : 0)) * sizeof(uint8_t); // NULL and possible length byte
1294 // See if we can use that temporary local buffer in vBuf...
1295 if (numBytes >= __kCFVarWidthLocalBufferSize) {
1296 mem = ptr = (uint8_t *)CFAllocatorAllocate(alloc, numBytes, 0);
1297 if (__CFOASafe) __CFSetLastAllocationEventName(mem, "CFString (store)");
1298 } else {
1299 mem = ptr = (uint8_t *)(vBuf.localBuffer);
1300 }
1301 if (mem) { // If we can't allocate memory for some reason, use what we had (that is, as if we didn't have all ASCII)
1302 // Copy the Unicode bytes into the new ASCII buffer
1303 hasLengthByte = newHasLengthByte;
1304 hasNullByte = true;
1305 if (hasLengthByte) *ptr++ = (uint8_t)len;
1306 for (cnt = 0; cnt < len; cnt++) ptr[cnt] = (uint8_t)(((const UniChar *)bytes)[cnt]);
1307 ptr[len] = 0;
1308 if (noCopy && (contentsDeallocator != kCFAllocatorNull)) {
1309 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes);
1310 }
1311 // Now make everything look like we had an ASCII buffer to start with
1312 bytes = mem;
1313 encoding = kCFStringEncodingASCII;
1314 contentsDeallocator = alloc; // At this point we are using the string's allocator, as the original buffer is gone...
1315 noCopy = (numBytes >= __kCFVarWidthLocalBufferSize); // If we had to allocate it, make sure it's kept around
1316 numBytes--; // Should not contain the NULL byte at end...
1317 stringSupportsEightBitCFRepresentation = true; // We're ASCII now!
1318 stringROMShouldIgnoreNoCopy = true; // We allocated this buffer, so we should feel free to get rid of it if we can use the string ROM
1319 #if INSTRUMENT_SHARED_STRINGS
1320 recordedEncoding = "U->A";
1321 #endif
1322 }
1323 }
1324
1325 // At this point, all necessary input arguments have been changed to reflect the new state
1326 }
1327
1328 // Now determine the necessary size
1329
1330 Boolean stringSupportsROM = stringSupportsEightBitCFRepresentation;
1331
1332 #if INSTRUMENT_SHARED_STRINGS
1333 if (stringSupportsROM) {
1334 const void *realBytes = (uint8_t *) bytes + (hasLengthByte ? 1 : 0);
1335 CFIndex realNumBytes = numBytes - !! hasLengthByte;
1336 __CFRecordStringAllocationEvent(recordedEncoding, realBytes, realNumBytes);
1337 }
1338 #endif
1339
1340 CFStringRef romResult = NULL;
1341
1342 #if USE_STRING_ROM
1343
1344 if (stringSupportsROM) {
1345 // Disable the string ROM if necessary
1346 static char sDisableStringROM = -1;
1347 if (sDisableStringROM == -1) sDisableStringROM = !! getenv("CFStringDisableROM");
1348
1349 if (sDisableStringROM == 0) romResult = _CFSearchStringROM(bytes + !! hasLengthByte, numBytes - !! hasLengthByte);
1350 }
1351 /* if we get a result from our ROM, and noCopy is set, then deallocate the buffer immediately */
1352 if (romResult) {
1353 if (noCopy && (contentsDeallocator != kCFAllocatorNull)) {
1354 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes);
1355 }
1356
1357 /* these don't get used again, but clear them for consistency */
1358 noCopy = false;
1359 bytes = NULL;
1360
1361 /* set our result to the ROM result which is not really mutable, of course, but that's OK because we don't try to modify it. */
1362 str = (CFMutableStringRef)romResult;
1363 }
1364 #endif
1365
1366 if (! romResult) {
1367 // Now determine the necessary size
1368
1369 if (noCopy) {
1370
1371 size = sizeof(void *); // Pointer to the buffer
1372 if (contentsDeallocator != alloc && contentsDeallocator != kCFAllocatorNull) {
1373 size += sizeof(void *); // The contentsDeallocator
1374 }
1375 if (!hasLengthByte) size += sizeof(CFIndex); // Explicit length
1376 useLengthByte = hasLengthByte;
1377 useNullByte = hasNullByte;
1378
1379 } else { // Inline data; reserve space for it
1380
1381 useInlineData = true;
1382 size = numBytes;
1383
1384 if (hasLengthByte || (encoding != kCFStringEncodingUnicode && __CFCanUseLengthByte(numBytes))) {
1385 useLengthByte = true;
1386 if (!hasLengthByte) size += 1;
1387 } else {
1388 size += sizeof(CFIndex); // Explicit length
1389 }
1390 if (hasNullByte || encoding != kCFStringEncodingUnicode) {
1391 useNullByte = true;
1392 size += 1;
1393 }
1394 }
1395
1396 #ifdef STRING_SIZE_STATS
1397 // Dump alloced CFString size info every so often
1398 static int cnt = 0;
1399 static unsigned sizes[256] = {0};
1400 int allocedSize = size + sizeof(CFRuntimeBase);
1401 if (allocedSize < 255) sizes[allocedSize]++; else sizes[255]++;
1402 if ((++cnt % 1000) == 0) {
1403 printf ("\nTotal: %d\n", cnt);
1404 int i; for (i = 0; i < 256; i++) printf("%03d: %5d%s", i, sizes[i], ((i % 8) == 7) ? "\n" : " ");
1405 }
1406 #endif
1407
1408 // Finally, allocate!
1409
1410 str = (CFMutableStringRef)_CFRuntimeCreateInstance(alloc, __kCFStringTypeID, size, NULL);
1411 if (str) {
1412 if (__CFOASafe) __CFSetLastAllocationEventName(str, "CFString (immutable)");
1413
1414 __CFStrSetInfoBits(str,
1415 (useInlineData ? __kCFHasInlineContents : (contentsDeallocator == alloc ? __kCFNotInlineContentsDefaultFree : (contentsDeallocator == kCFAllocatorNull ? __kCFNotInlineContentsNoFree : __kCFNotInlineContentsCustomFree))) |
1416 ((encoding == kCFStringEncodingUnicode) ? __kCFIsUnicode : 0) |
1417 (useNullByte ? __kCFHasNullByte : 0) |
1418 (useLengthByte ? __kCFHasLengthByte : 0));
1419
1420 if (!useLengthByte) {
1421 CFIndex length = numBytes - (hasLengthByte ? 1 : 0);
1422 if (encoding == kCFStringEncodingUnicode) length /= sizeof(UniChar);
1423 __CFStrSetExplicitLength(str, length);
1424 }
1425
1426 if (useInlineData) {
1427 uint8_t *contents = (uint8_t *)__CFStrContents(str);
1428 if (useLengthByte && !hasLengthByte) *contents++ = (uint8_t)numBytes;
1429 memmove(contents, bytes, numBytes);
1430 if (useNullByte) contents[numBytes] = 0;
1431 } else {
1432 __CFStrSetContentPtr(str, bytes);
1433 if (contentsDeallocator != alloc && contentsDeallocator != kCFAllocatorNull) __CFStrSetContentsDeallocator(str, (CFAllocatorRef)CFRetain(contentsDeallocator));
1434 }
1435 } else {
1436 if (noCopy && (contentsDeallocator != kCFAllocatorNull)) {
1437 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes);
1438 }
1439 }
1440 }
1441 if (vBuf.shouldFreeChars) CFAllocatorDeallocate(vBuf.allocator, (void *)bytes);
1442
1443 return str;
1444 }
1445
1446 /* !!! __CFStringCreateImmutableFunnel2() is kept around for compatibility; it should be deprecated
1447 */
1448 CFStringRef __CFStringCreateImmutableFunnel2(
1449 CFAllocatorRef alloc, const void *bytes, CFIndex numBytes, CFStringEncoding encoding,
1450 Boolean possiblyExternalFormat, Boolean tryToReduceUnicode, Boolean hasLengthByte, Boolean hasNullByte, Boolean noCopy,
1451 CFAllocatorRef contentsDeallocator) {
1452 return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, possiblyExternalFormat, tryToReduceUnicode, hasLengthByte, hasNullByte, noCopy, contentsDeallocator, 0);
1453 }
1454
1455
1456
1457 CFStringRef CFStringCreateWithPascalString(CFAllocatorRef alloc, ConstStringPtr pStr, CFStringEncoding encoding) {
1458 CFIndex len = (CFIndex)(*(uint8_t *)pStr);
1459 return __CFStringCreateImmutableFunnel3(alloc, pStr, len+1, encoding, false, false, true, false, false, ALLOCATORSFREEFUNC, 0);
1460 }
1461
1462
1463 CFStringRef CFStringCreateWithCString(CFAllocatorRef alloc, const char *cStr, CFStringEncoding encoding) {
1464 CFIndex len = strlen(cStr);
1465 return __CFStringCreateImmutableFunnel3(alloc, cStr, len, encoding, false, false, false, true, false, ALLOCATORSFREEFUNC, 0);
1466 }
1467
1468 CFStringRef CFStringCreateWithPascalStringNoCopy(CFAllocatorRef alloc, ConstStringPtr pStr, CFStringEncoding encoding, CFAllocatorRef contentsDeallocator) {
1469 CFIndex len = (CFIndex)(*(uint8_t *)pStr);
1470 return __CFStringCreateImmutableFunnel3(alloc, pStr, len+1, encoding, false, false, true, false, true, contentsDeallocator, 0);
1471 }
1472
1473
1474 CFStringRef CFStringCreateWithCStringNoCopy(CFAllocatorRef alloc, const char *cStr, CFStringEncoding encoding, CFAllocatorRef contentsDeallocator) {
1475 CFIndex len = strlen(cStr);
1476 return __CFStringCreateImmutableFunnel3(alloc, cStr, len, encoding, false, false, false, true, true, contentsDeallocator, 0);
1477 }
1478
1479
1480 CFStringRef CFStringCreateWithCharacters(CFAllocatorRef alloc, const UniChar *chars, CFIndex numChars) {
1481 return __CFStringCreateImmutableFunnel3(alloc, chars, numChars * sizeof(UniChar), kCFStringEncodingUnicode, false, true, false, false, false, ALLOCATORSFREEFUNC, 0);
1482 }
1483
1484
1485 CFStringRef CFStringCreateWithCharactersNoCopy(CFAllocatorRef alloc, const UniChar *chars, CFIndex numChars, CFAllocatorRef contentsDeallocator) {
1486 return __CFStringCreateImmutableFunnel3(alloc, chars, numChars * sizeof(UniChar), kCFStringEncodingUnicode, false, false, false, false, true, contentsDeallocator, 0);
1487 }
1488
1489
1490 CFStringRef CFStringCreateWithBytes(CFAllocatorRef alloc, const uint8_t *bytes, CFIndex numBytes, CFStringEncoding encoding, Boolean externalFormat) {
1491 return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, externalFormat, true, false, false, false, ALLOCATORSFREEFUNC, 0);
1492 }
1493
1494 CFStringRef _CFStringCreateWithBytesNoCopy(CFAllocatorRef alloc, const uint8_t *bytes, CFIndex numBytes, CFStringEncoding encoding, Boolean externalFormat, CFAllocatorRef contentsDeallocator) {
1495 return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, externalFormat, true, false, false, true, contentsDeallocator, 0);
1496 }
1497
1498 CFStringRef CFStringCreateWithBytesNoCopy(CFAllocatorRef alloc, const uint8_t *bytes, CFIndex numBytes, CFStringEncoding encoding, Boolean externalFormat, CFAllocatorRef contentsDeallocator) {
1499 return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, externalFormat, true, false, false, true, contentsDeallocator, 0);
1500 }
1501
1502 CFStringRef CFStringCreateWithFormatAndArguments(CFAllocatorRef alloc, CFDictionaryRef formatOptions, CFStringRef format, va_list arguments) {
1503 return _CFStringCreateWithFormatAndArgumentsAux(alloc, NULL, formatOptions, format, arguments);
1504 }
1505
1506 CFStringRef _CFStringCreateWithFormatAndArgumentsAux(CFAllocatorRef alloc, CFStringRef (*copyDescFunc)(void *, const void *), CFDictionaryRef formatOptions, CFStringRef format, va_list arguments) {
1507 CFStringRef str;
1508 CFMutableStringRef outputString = CFStringCreateMutable(__CFGetDefaultAllocator(), 0); //should use alloc if no copy/release
1509 __CFStrSetDesiredCapacity(outputString, 120); // Given this will be tightened later, choosing a larger working string is fine
1510 _CFStringAppendFormatAndArgumentsAux(outputString, copyDescFunc, formatOptions, format, arguments);
1511 // ??? copy/release should not be necessary here -- just make immutable, compress if possible
1512 // (However, this does make the string inline, and cause the supplied allocator to be used...)
1513 str = (CFStringRef)CFStringCreateCopy(alloc, outputString);
1514 CFRelease(outputString);
1515 return str;
1516 }
1517
1518 CFStringRef CFStringCreateWithFormat(CFAllocatorRef alloc, CFDictionaryRef formatOptions, CFStringRef format, ...) {
1519 CFStringRef result;
1520 va_list argList;
1521
1522 va_start(argList, format);
1523 result = CFStringCreateWithFormatAndArguments(alloc, formatOptions, format, argList);
1524 va_end(argList);
1525
1526 return result;
1527 }
1528
1529 CFStringRef CFStringCreateWithSubstring(CFAllocatorRef alloc, CFStringRef str, CFRange range) {
1530 // CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, CFStringRef , str, "_createSubstringWithRange:", CFRangeMake(range.location, range.length));
1531
1532 __CFAssertIsString(str);
1533 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
1534
1535 if ((range.location == 0) && (range.length == __CFStrLength(str))) { /* The substring is the whole string... */
1536 return (CFStringRef)CFStringCreateCopy(alloc, str);
1537 } else if (__CFStrIsEightBit(str)) {
1538 const uint8_t *contents = (const uint8_t *)__CFStrContents(str);
1539 return __CFStringCreateImmutableFunnel3(alloc, contents + range.location + __CFStrSkipAnyLengthByte(str), range.length, __CFStringGetEightBitStringEncoding(), false, false, false, false, false, ALLOCATORSFREEFUNC, 0);
1540 } else {
1541 const UniChar *contents = (UniChar *)__CFStrContents(str);
1542 return __CFStringCreateImmutableFunnel3(alloc, contents + range.location, range.length * sizeof(UniChar), kCFStringEncodingUnicode, false, true, false, false, false, ALLOCATORSFREEFUNC, 0);
1543 }
1544 }
1545
1546 CFStringRef CFStringCreateCopy(CFAllocatorRef alloc, CFStringRef str) {
1547 // CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, CFStringRef, str, "copy");
1548
1549 __CFAssertIsString(str);
1550 if (!__CFStrIsMutable((CFStringRef)str) && // If the string is not mutable
1551 ((alloc ? alloc : __CFGetDefaultAllocator()) == __CFGetAllocator(str)) && // and it has the same allocator as the one we're using
1552 (__CFStrIsInline((CFStringRef)str) || __CFStrFreeContentsWhenDone((CFStringRef)str) || __CFStrIsConstant((CFStringRef)str))) { // and the characters are inline, or are owned by the string, or the string is constant
1553 CFRetain(str); // Then just retain instead of making a true copy
1554 return str;
1555 }
1556 if (__CFStrIsEightBit((CFStringRef)str)) {
1557 const uint8_t *contents = (const uint8_t *)__CFStrContents((CFStringRef)str);
1558 return __CFStringCreateImmutableFunnel3(alloc, contents + __CFStrSkipAnyLengthByte((CFStringRef)str), __CFStrLength2((CFStringRef)str, contents), __CFStringGetEightBitStringEncoding(), false, false, false, false, false, ALLOCATORSFREEFUNC, 0);
1559 } else {
1560 const UniChar *contents = (const UniChar *)__CFStrContents((CFStringRef)str);
1561 return __CFStringCreateImmutableFunnel3(alloc, contents, __CFStrLength2((CFStringRef)str, contents) * sizeof(UniChar), kCFStringEncodingUnicode, false, true, false, false, false, ALLOCATORSFREEFUNC, 0);
1562 }
1563 }
1564
1565
1566
1567 /*** Constant string stuff... ***/
1568
1569 /* Table which holds constant strings created with CFSTR, when -fconstant-cfstrings option is not used. These dynamically created constant strings are stored in constantStringTable. The keys are the 8-bit constant C-strings from the compiler; the values are the CFStrings created for them. _CFSTRLock protects this table.
1570 */
1571 static CFMutableDictionaryRef constantStringTable = NULL;
1572 static CFSpinLock_t _CFSTRLock = CFSpinLockInit;
1573
1574 static CFStringRef __cStrCopyDescription(const void *ptr) {
1575 return CFStringCreateWithCStringNoCopy(kCFAllocatorSystemDefault, (const char *)ptr, __CFStringGetEightBitStringEncoding(), kCFAllocatorNull);
1576 }
1577
1578 static Boolean __cStrEqual(const void *ptr1, const void *ptr2) {
1579 return (strcmp((const char *)ptr1, (const char *)ptr2) == 0);
1580 }
1581
1582 static CFHashCode __cStrHash(const void *ptr) {
1583 // It doesn't quite matter if we convert to Unicode correctly, as long as we do it consistently
1584 const char *cStr = (const char *)ptr;
1585 CFIndex len = strlen(cStr);
1586 CFHashCode result = 0;
1587 if (len <= 4) { // All chars
1588 unsigned cnt = len;
1589 while (cnt--) result += (result << 8) + *cStr++;
1590 } else { // First and last 2 chars
1591 result += (result << 8) + cStr[0];
1592 result += (result << 8) + cStr[1];
1593 result += (result << 8) + cStr[len-2];
1594 result += (result << 8) + cStr[len-1];
1595 }
1596 result += (result << (len & 31));
1597 return result;
1598 }
1599
1600
1601 CFStringRef __CFStringMakeConstantString(const char *cStr) {
1602 CFStringRef result;
1603 #if defined(DEBUG)
1604 // StringTest checks that we share kCFEmptyString, which is defeated by constantStringAllocatorForDebugging
1605 if ('\0' == *cStr) return kCFEmptyString;
1606 #endif
1607 if (constantStringTable == NULL) {
1608 CFDictionaryKeyCallBacks constantStringCallBacks = {0, NULL, NULL, __cStrCopyDescription, __cStrEqual, __cStrHash};
1609 CFDictionaryValueCallBacks constantStringValueCallBacks = kCFTypeDictionaryValueCallBacks;
1610 constantStringValueCallBacks.equal = NULL; // So that we only find strings that are ==
1611 CFMutableDictionaryRef table = CFDictionaryCreateMutable(kCFAllocatorSystemDefault, 0, &constantStringCallBacks, &constantStringValueCallBacks);
1612 _CFDictionarySetCapacity(table, 2500); // avoid lots of rehashing
1613 __CFSpinLock(&_CFSTRLock);
1614 if (constantStringTable == NULL) constantStringTable = table;
1615 __CFSpinUnlock(&_CFSTRLock);
1616 if (constantStringTable != table) CFRelease(table);
1617 }
1618
1619 __CFSpinLock(&_CFSTRLock);
1620 if ((result = (CFStringRef)CFDictionaryGetValue(constantStringTable, cStr))) {
1621 __CFSpinUnlock(&_CFSTRLock);
1622 } else {
1623 __CFSpinUnlock(&_CFSTRLock);
1624
1625 {
1626 char *key;
1627 Boolean isASCII = true;
1628 // Given this code path is rarer these days, OK to do this extra work to verify the strings
1629 const char *tmp = cStr;
1630 while (*tmp) {
1631 if (*(tmp++) & 0x80) {
1632 isASCII = false;
1633 break;
1634 }
1635 }
1636 if (!isASCII) {
1637 CFMutableStringRef ms = CFStringCreateMutable(kCFAllocatorSystemDefault, 0);
1638 tmp = cStr;
1639 while (*tmp) {
1640 CFStringAppendFormat(ms, NULL, (*tmp & 0x80) ? CFSTR("\\%3o") : CFSTR("%1c"), *tmp);
1641 tmp++;
1642 }
1643 CFLog(kCFLogLevelWarning, CFSTR("WARNING: CFSTR(\"%@\") has non-7 bit chars, interpreting using MacOS Roman encoding for now, but this will change. Please eliminate usages of non-7 bit chars (including escaped characters above \\177 octal) in CFSTR()."), ms);
1644 CFRelease(ms);
1645 }
1646 // Treat non-7 bit chars in CFSTR() as MacOSRoman, for compatibility
1647 result = CFStringCreateWithCString(kCFAllocatorSystemDefault, cStr, kCFStringEncodingMacRoman);
1648 if (result == NULL) {
1649 CFLog(__kCFLogAssertion, CFSTR("Can't interpret CFSTR() as MacOS Roman, crashing"));
1650 HALT;
1651 }
1652 if (__CFOASafe) __CFSetLastAllocationEventName((void *)result, "CFString (CFSTR)");
1653 if (__CFStrIsEightBit(result)) {
1654 key = (char *)__CFStrContents(result) + __CFStrSkipAnyLengthByte(result);
1655 } else { // For some reason the string is not 8-bit!
1656 key = (char *)CFAllocatorAllocate(kCFAllocatorSystemDefault, strlen(cStr) + 1, 0);
1657 if (__CFOASafe) __CFSetLastAllocationEventName((void *)key, "CFString (CFSTR key)");
1658 strlcpy(key, cStr, strlen(cStr) + 1); // !!! We will leak this, if the string is removed from the table (or table is freed)
1659 }
1660
1661 {
1662 CFStringRef resultToBeReleased = result;
1663 CFIndex count;
1664 __CFSpinLock(&_CFSTRLock);
1665 count = CFDictionaryGetCount(constantStringTable);
1666 CFDictionaryAddValue(constantStringTable, key, result);
1667 if (CFDictionaryGetCount(constantStringTable) == count) { // add did nothing, someone already put it there
1668 result = (CFStringRef)CFDictionaryGetValue(constantStringTable, key);
1669 } else {
1670 #if __LP64__
1671 ((struct __CFString *)result)->base._rc = 0;
1672 #else
1673 ((struct __CFString *)result)->base._cfinfo[CF_RC_BITS] = 0;
1674 #endif
1675 }
1676 __CFSpinUnlock(&_CFSTRLock);
1677 // This either eliminates the extra retain on the freshly created string, or frees it, if it was actually not inserted into the table
1678 CFRelease(resultToBeReleased);
1679 }
1680 }
1681 }
1682 return result;
1683 }
1684
1685 #if defined(DEBUG)
1686 static Boolean __CFStrIsConstantString(CFStringRef str) {
1687 Boolean found = false;
1688 if (constantStringTable) {
1689 __CFSpinLock(&_CFSTRLock);
1690 found = CFDictionaryContainsValue(constantStringTable, str);
1691 __CFSpinUnlock(&_CFSTRLock);
1692 }
1693 return found;
1694 }
1695 #endif
1696
1697
1698 #if 0
1699 void __CFStringCleanup (void) {
1700 /* in case library is unloaded, release store for the constant string table */
1701 if (constantStringTable != NULL) {
1702 #if defined(DEBUG)
1703 __CFConstantStringTableBeingFreed = true;
1704 CFRelease(constantStringTable);
1705 __CFConstantStringTableBeingFreed = false;
1706 #else
1707 CFRelease(constantStringTable);
1708 #endif
1709 }
1710 }
1711 #endif
1712
1713
1714 // Can pass in NSString as replacement string
1715 // Call with numRanges > 0, and incrementing ranges
1716
1717 static void __CFStringReplaceMultiple(CFMutableStringRef str, CFRange *ranges, CFIndex numRanges, CFStringRef replacement) {
1718 int cnt;
1719 CFStringRef copy = NULL;
1720 if (replacement == str) copy = replacement = CFStringCreateCopy(kCFAllocatorSystemDefault, replacement); // Very special and hopefully rare case
1721 CFIndex replacementLength = CFStringGetLength(replacement);
1722
1723 __CFStringChangeSizeMultiple(str, ranges, numRanges, replacementLength, (replacementLength > 0) && CFStrIsUnicode(replacement));
1724
1725 if (__CFStrIsUnicode(str)) {
1726 UniChar *contents = (UniChar *)__CFStrContents(str);
1727 UniChar *firstReplacement = contents + ranges[0].location;
1728 // Extract the replacementString into the first location, then copy from there
1729 CFStringGetCharacters(replacement, CFRangeMake(0, replacementLength), firstReplacement);
1730 for (cnt = 1; cnt < numRanges; cnt++) {
1731 // The ranges are in terms of the original string; so offset by the change in length due to insertion
1732 contents += replacementLength - ranges[cnt - 1].length;
1733 memmove(contents + ranges[cnt].location, firstReplacement, replacementLength * sizeof(UniChar));
1734 }
1735 } else {
1736 uint8_t *contents = (uint8_t *)__CFStrContents(str);
1737 uint8_t *firstReplacement = contents + ranges[0].location + __CFStrSkipAnyLengthByte(str);
1738 // Extract the replacementString into the first location, then copy from there
1739 CFStringGetBytes(replacement, CFRangeMake(0, replacementLength), __CFStringGetEightBitStringEncoding(), 0, false, firstReplacement, replacementLength, NULL);
1740 contents += __CFStrSkipAnyLengthByte(str); // Now contents will simply track the location to insert next string into
1741 for (cnt = 1; cnt < numRanges; cnt++) {
1742 // The ranges are in terms of the original string; so offset by the change in length due to insertion
1743 contents += replacementLength - ranges[cnt - 1].length;
1744 memmove(contents + ranges[cnt].location, firstReplacement, replacementLength);
1745 }
1746 }
1747 if (copy) CFRelease(copy);
1748 }
1749
1750 // Can pass in NSString as replacement string
1751
1752 CF_INLINE void __CFStringReplace(CFMutableStringRef str, CFRange range, CFStringRef replacement) {
1753 CFStringRef copy = NULL;
1754 if (replacement == str) copy = replacement = (CFStringRef)CFStringCreateCopy(kCFAllocatorSystemDefault, replacement); // Very special and hopefully rare case
1755 CFIndex replacementLength = CFStringGetLength(replacement);
1756
1757 __CFStringChangeSize(str, range, replacementLength, (replacementLength > 0) && CFStrIsUnicode(replacement));
1758
1759 if (__CFStrIsUnicode(str)) {
1760 UniChar *contents = (UniChar *)__CFStrContents(str);
1761 CFStringGetCharacters(replacement, CFRangeMake(0, replacementLength), contents + range.location);
1762 } else {
1763 uint8_t *contents = (uint8_t *)__CFStrContents(str);
1764 CFStringGetBytes(replacement, CFRangeMake(0, replacementLength), __CFStringGetEightBitStringEncoding(), 0, false, contents + range.location + __CFStrSkipAnyLengthByte(str), replacementLength, NULL);
1765 }
1766
1767 if (copy) CFRelease(copy);
1768 }
1769
1770 /* If client does not provide a minimum capacity
1771 */
1772 #define DEFAULTMINCAPACITY 32
1773
1774 CF_INLINE CFMutableStringRef __CFStringCreateMutableFunnel(CFAllocatorRef alloc, CFIndex maxLength, UInt32 additionalInfoBits) {
1775 CFMutableStringRef str;
1776 Boolean hasExternalContentsAllocator = (additionalInfoBits & __kCFHasContentsAllocator) ? true : false;
1777
1778 if (alloc == NULL) alloc = __CFGetDefaultAllocator();
1779
1780 // Note that if there is an externalContentsAllocator, then we also have the storage for the string allocator...
1781 str = (CFMutableStringRef)_CFRuntimeCreateInstance(alloc, __kCFStringTypeID, sizeof(struct __notInlineMutable) - (hasExternalContentsAllocator ? 0 : sizeof(CFAllocatorRef)), NULL);
1782 if (str) {
1783 if (__CFOASafe) __CFSetLastAllocationEventName(str, "CFString (mutable)");
1784
1785 __CFStrSetInfoBits(str, __kCFIsMutable | additionalInfoBits);
1786 str->variants.notInlineMutable.buffer = NULL;
1787 __CFStrSetExplicitLength(str, 0);
1788 str->variants.notInlineMutable.hasGap = str->variants.notInlineMutable.isFixedCapacity = str->variants.notInlineMutable.isExternalMutable = str->variants.notInlineMutable.capacityProvidedExternally = 0;
1789 if (maxLength != 0) __CFStrSetIsFixed(str);
1790 __CFStrSetDesiredCapacity(str, (maxLength == 0) ? DEFAULTMINCAPACITY : maxLength);
1791 __CFStrSetCapacity(str, 0);
1792 }
1793 return str;
1794 }
1795
1796 CFMutableStringRef CFStringCreateMutableWithExternalCharactersNoCopy(CFAllocatorRef alloc, UniChar *chars, CFIndex numChars, CFIndex capacity, CFAllocatorRef externalCharactersAllocator) {
1797 CFOptionFlags contentsAllocationBits = externalCharactersAllocator ? ((externalCharactersAllocator == kCFAllocatorNull) ? __kCFNotInlineContentsNoFree : __kCFHasContentsAllocator) : __kCFNotInlineContentsDefaultFree;
1798 CFMutableStringRef string = __CFStringCreateMutableFunnel(alloc, 0, contentsAllocationBits | __kCFIsUnicode);
1799 if (string) {
1800 __CFStrSetIsExternalMutable(string);
1801 if (contentsAllocationBits == __kCFHasContentsAllocator) __CFStrSetContentsAllocator(string, (CFAllocatorRef)CFRetain(externalCharactersAllocator));
1802 CFStringSetExternalCharactersNoCopy(string, chars, numChars, capacity);
1803 }
1804 return string;
1805 }
1806
1807 CFMutableStringRef CFStringCreateMutable(CFAllocatorRef alloc, CFIndex maxLength) {
1808 return __CFStringCreateMutableFunnel(alloc, maxLength, __kCFNotInlineContentsDefaultFree);
1809 }
1810
1811 CFMutableStringRef CFStringCreateMutableCopy(CFAllocatorRef alloc, CFIndex maxLength, CFStringRef string) {
1812 CFMutableStringRef newString;
1813
1814 // CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, CFMutableStringRef, string, "mutableCopy");
1815
1816 __CFAssertIsString(string);
1817
1818 newString = CFStringCreateMutable(alloc, maxLength);
1819 __CFStringReplace(newString, CFRangeMake(0, 0), string);
1820
1821 return newString;
1822 }
1823
1824
1825 __private_extern__ void _CFStrSetDesiredCapacity(CFMutableStringRef str, CFIndex len) {
1826 __CFAssertIsStringAndMutable(str);
1827 __CFStrSetDesiredCapacity(str, len);
1828 }
1829
1830
1831 /* This one is for CF
1832 */
1833 CFIndex CFStringGetLength(CFStringRef str) {
1834 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, CFIndex, str, "length");
1835
1836 __CFAssertIsString(str);
1837 return __CFStrLength(str);
1838 }
1839
1840 /* This one is for NSCFString; it does not ObjC dispatch or assertion check
1841 */
1842 CFIndex _CFStringGetLength2(CFStringRef str) {
1843 return __CFStrLength(str);
1844 }
1845
1846
1847 /* Guts of CFStringGetCharacterAtIndex(); called from the two functions below. Don't call it from elsewhere.
1848 */
1849 CF_INLINE UniChar __CFStringGetCharacterAtIndexGuts(CFStringRef str, CFIndex idx, const uint8_t *contents) {
1850 if (__CFStrIsEightBit(str)) {
1851 contents += __CFStrSkipAnyLengthByte(str);
1852 #if defined(DEBUG)
1853 if (!__CFCharToUniCharFunc && (contents[idx] >= 128)) {
1854 // Can't do log here, as it might be too early
1855 fprintf(stderr, "Warning: CFStringGetCharacterAtIndex() attempted on CFString containing high bytes before properly initialized to do so\n");
1856 }
1857 #endif
1858 return __CFCharToUniCharTable[contents[idx]];
1859 }
1860
1861 return ((UniChar *)contents)[idx];
1862 }
1863
1864 /* This one is for the CF API
1865 */
1866 UniChar CFStringGetCharacterAtIndex(CFStringRef str, CFIndex idx) {
1867 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, UniChar, str, "characterAtIndex:", idx);
1868
1869 __CFAssertIsString(str);
1870 __CFAssertIndexIsInStringBounds(str, idx);
1871 return __CFStringGetCharacterAtIndexGuts(str, idx, (const uint8_t *)__CFStrContents(str));
1872 }
1873
1874 /* This one is for NSCFString usage; it doesn't do ObjC dispatch; but it does do range check
1875 */
1876 int _CFStringCheckAndGetCharacterAtIndex(CFStringRef str, CFIndex idx, UniChar *ch) {
1877 const uint8_t *contents = (const uint8_t *)__CFStrContents(str);
1878 if (idx >= __CFStrLength2(str, contents) && __CFStringNoteErrors()) return _CFStringErrBounds;
1879 *ch = __CFStringGetCharacterAtIndexGuts(str, idx, contents);
1880 return _CFStringErrNone;
1881 }
1882
1883
1884 /* Guts of CFStringGetCharacters(); called from the two functions below. Don't call it from elsewhere.
1885 */
1886 CF_INLINE void __CFStringGetCharactersGuts(CFStringRef str, CFRange range, UniChar *buffer, const uint8_t *contents) {
1887 if (__CFStrIsEightBit(str)) {
1888 __CFStrConvertBytesToUnicode(((uint8_t *)contents) + (range.location + __CFStrSkipAnyLengthByte(str)), buffer, range.length);
1889 } else {
1890 const UniChar *uContents = ((UniChar *)contents) + range.location;
1891 memmove(buffer, uContents, range.length * sizeof(UniChar));
1892 }
1893 }
1894
1895 /* This one is for the CF API
1896 */
1897 void CFStringGetCharacters(CFStringRef str, CFRange range, UniChar *buffer) {
1898 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID, void, str, "getCharacters:range:", buffer, CFRangeMake(range.location, range.length));
1899
1900 __CFAssertIsString(str);
1901 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
1902 __CFStringGetCharactersGuts(str, range, buffer, (const uint8_t *)__CFStrContents(str));
1903 }
1904
1905 /* This one is for NSCFString usage; it doesn't do ObjC dispatch; but it does do range check
1906 */
1907 int _CFStringCheckAndGetCharacters(CFStringRef str, CFRange range, UniChar *buffer) {
1908 const uint8_t *contents = (const uint8_t *)__CFStrContents(str);
1909 if (range.location + range.length > __CFStrLength2(str, contents) && __CFStringNoteErrors()) return _CFStringErrBounds;
1910 __CFStringGetCharactersGuts(str, range, buffer, contents);
1911 return _CFStringErrNone;
1912 }
1913
1914
1915 CFIndex CFStringGetBytes(CFStringRef str, CFRange range, CFStringEncoding encoding, uint8_t lossByte, Boolean isExternalRepresentation, uint8_t *buffer, CFIndex maxBufLen, CFIndex *usedBufLen) {
1916
1917 /* No objc dispatch needed here since __CFStringEncodeByteStream works with both CFString and NSString */
1918 __CFAssertIsNotNegative(maxBufLen);
1919
1920 if (!CF_IS_OBJC(__kCFStringTypeID, str)) { // If we can grope the ivars, let's do it...
1921 __CFAssertIsString(str);
1922 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
1923
1924 if (__CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string
1925 const unsigned char *contents = (const unsigned char *)__CFStrContents(str);
1926 CFIndex cLength = range.length;
1927
1928 if (buffer) {
1929 if (cLength > maxBufLen) cLength = maxBufLen;
1930 memmove(buffer, contents + __CFStrSkipAnyLengthByte(str) + range.location, cLength);
1931 }
1932 if (usedBufLen) *usedBufLen = cLength;
1933
1934 return cLength;
1935 }
1936 }
1937
1938 return __CFStringEncodeByteStream(str, range.location, range.length, isExternalRepresentation, encoding, lossByte, buffer, maxBufLen, usedBufLen);
1939 }
1940
1941
1942 ConstStringPtr CFStringGetPascalStringPtr (CFStringRef str, CFStringEncoding encoding) {
1943
1944 if (!CF_IS_OBJC(__kCFStringTypeID, str)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
1945 __CFAssertIsString(str);
1946 if (__CFStrHasLengthByte(str) && __CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string || the contents is in ASCII
1947 const uint8_t *contents = (const uint8_t *)__CFStrContents(str);
1948 if (__CFStrHasExplicitLength(str) && (__CFStrLength2(str, contents) != (SInt32)(*contents))) return NULL; // Invalid length byte
1949 return (ConstStringPtr)contents;
1950 }
1951 // ??? Also check for encoding = SystemEncoding and perhaps bytes are all ASCII?
1952 }
1953 return NULL;
1954 }
1955
1956
1957 const char * CFStringGetCStringPtr(CFStringRef str, CFStringEncoding encoding) {
1958
1959 if (encoding != __CFStringGetEightBitStringEncoding() && (kCFStringEncodingASCII != __CFStringGetEightBitStringEncoding() || !__CFStringEncodingIsSupersetOfASCII(encoding))) return NULL;
1960 // ??? Also check for encoding = SystemEncoding and perhaps bytes are all ASCII?
1961
1962 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, const char *, str, "_fastCStringContents:", true);
1963
1964 __CFAssertIsString(str);
1965
1966 if (__CFStrHasNullByte(str)) {
1967 // Note: this is called a lot, 27000 times to open a small xcode project with one file open.
1968 // Of these uses about 1500 are for cStrings/utf8strings.
1969 return (const char *)__CFStrContents(str) + __CFStrSkipAnyLengthByte(str);
1970 } else {
1971 return NULL;
1972 }
1973 }
1974
1975
1976 const UniChar *CFStringGetCharactersPtr(CFStringRef str) {
1977
1978 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, const UniChar *, str, "_fastCharacterContents");
1979
1980 __CFAssertIsString(str);
1981 if (__CFStrIsUnicode(str)) return (const UniChar *)__CFStrContents(str);
1982 return NULL;
1983 }
1984
1985
1986 Boolean CFStringGetPascalString(CFStringRef str, Str255 buffer, CFIndex bufferSize, CFStringEncoding encoding) {
1987 CFIndex length;
1988 CFIndex usedLen;
1989
1990 __CFAssertIsNotNegative(bufferSize);
1991 if (bufferSize < 1) return false;
1992
1993 if (CF_IS_OBJC(__kCFStringTypeID, str)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
1994 length = CFStringGetLength(str);
1995 if (!__CFCanUseLengthByte(length)) return false; // Can't fit into pstring
1996 } else {
1997 const uint8_t *contents;
1998
1999 __CFAssertIsString(str);
2000
2001 contents = (const uint8_t *)__CFStrContents(str);
2002 length = __CFStrLength2(str, contents);
2003
2004 if (!__CFCanUseLengthByte(length)) return false; // Can't fit into pstring
2005
2006 if (__CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string
2007 if (length >= bufferSize) return false;
2008 memmove((void*)(1 + (const char*)buffer), (__CFStrSkipAnyLengthByte(str) + contents), length);
2009 *buffer = (unsigned char)length;
2010 return true;
2011 }
2012 }
2013
2014 if (__CFStringEncodeByteStream(str, 0, length, false, encoding, false, (UInt8 *)(1 + (uint8_t *)buffer), bufferSize - 1, &usedLen) != length) {
2015
2016 #if defined(DEBUG)
2017 if (bufferSize > 0) {
2018 strlcpy((char *)buffer + 1, CONVERSIONFAILURESTR, bufferSize - 1);
2019 buffer[0] = (unsigned char)((CFIndex)sizeof(CONVERSIONFAILURESTR) < (bufferSize - 1) ? (CFIndex)sizeof(CONVERSIONFAILURESTR) : (bufferSize - 1));
2020 }
2021 #else
2022 if (bufferSize > 0) buffer[0] = 0;
2023 #endif
2024 return false;
2025 }
2026 *buffer = (unsigned char)usedLen;
2027 return true;
2028 }
2029
2030 Boolean CFStringGetCString(CFStringRef str, char *buffer, CFIndex bufferSize, CFStringEncoding encoding) {
2031 const uint8_t *contents;
2032 CFIndex len;
2033
2034 __CFAssertIsNotNegative(bufferSize);
2035 if (bufferSize < 1) return false;
2036
2037 CF_OBJC_FUNCDISPATCH3(__kCFStringTypeID, Boolean, str, "_getCString:maxLength:encoding:", buffer, bufferSize - 1, encoding);
2038
2039 __CFAssertIsString(str);
2040
2041 contents = (const uint8_t *)__CFStrContents(str);
2042 len = __CFStrLength2(str, contents);
2043
2044 if (__CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string
2045 if (len >= bufferSize) return false;
2046 memmove(buffer, contents + __CFStrSkipAnyLengthByte(str), len);
2047 buffer[len] = 0;
2048 return true;
2049 } else {
2050 CFIndex usedLen;
2051
2052 if (__CFStringEncodeByteStream(str, 0, len, false, encoding, false, (unsigned char*) buffer, bufferSize - 1, &usedLen) == len) {
2053 buffer[usedLen] = '\0';
2054 return true;
2055 } else {
2056 #if defined(DEBUG)
2057 strlcpy(buffer, CONVERSIONFAILURESTR, bufferSize);
2058 #else
2059 if (bufferSize > 0) buffer[0] = 0;
2060 #endif
2061 return false;
2062 }
2063 }
2064 }
2065
2066 static const char *_CFStrGetLanguageIdentifierForLocale(CFLocaleRef locale) {
2067 CFStringRef collatorID;
2068 const char *langID = NULL;
2069 static const void *lastLocale = NULL;
2070 static const char *lastLangID = NULL;
2071 static CFSpinLock_t lock = CFSpinLockInit;
2072
2073 __CFSpinLock(&lock);
2074 if ((NULL != lastLocale) && (lastLocale == locale)) {
2075 __CFSpinUnlock(&lock);
2076 return lastLangID;
2077 }
2078 __CFSpinUnlock(&lock);
2079
2080 collatorID = CFLocaleGetValue(locale, __kCFLocaleCollatorID);
2081
2082 // This is somewhat depending on CFLocale implementation always creating CFString for locale identifer ???
2083 if (__CFStrLength(collatorID) > 1) {
2084 const void *contents = __CFStrContents(collatorID);
2085 const char *string;
2086 char buffer[2];
2087
2088 if (__CFStrIsEightBit(collatorID)) {
2089 string = ((const char *)contents) + __CFStrSkipAnyLengthByte(collatorID);
2090 } else {
2091 const UTF16Char *characters = (const UTF16Char *)contents;
2092
2093 buffer[0] = (char)*(characters++);
2094 buffer[1] = (char)*characters;
2095 string = buffer;
2096 }
2097
2098 if (!strncmp(string, "az", 2)) { // Azerbaijani
2099 langID = "az";
2100 } else if (!strncmp(string, "lt", 2)) { // Lithuanian
2101 langID = "lt";
2102 } else if (!strncmp(string, "tr", 2)) { // Turkish
2103 langID = "tr";
2104 }
2105 }
2106
2107 __CFSpinLock(&lock);
2108 lastLocale = locale;
2109 lastLangID = langID;
2110 __CFSpinUnlock(&lock);
2111
2112 return langID;
2113 }
2114
2115 static int8_t __CFCheckLocaleCFType = -1;
2116
2117 CF_INLINE bool _CFCanUseLocale(CFLocaleRef locale) {
2118 if (locale) {
2119 if (__CFCheckLocaleCFType < 0) __CFCheckLocaleCFType = !_CFExecutableLinkedOnOrAfter(CFSystemVersionPanther);
2120 if (!__CFCheckLocaleCFType || (CFGetTypeID(locale) == CFLocaleGetTypeID())) return true;
2121 }
2122 return false;
2123 }
2124
2125 #define MAX_CASE_MAPPING_BUF (8)
2126 #define ZERO_WIDTH_JOINER (0x200D)
2127 #define COMBINING_GRAPHEME_JOINER (0x034F)
2128 // Hangul ranges
2129 #define HANGUL_CHOSEONG_START (0x1100)
2130 #define HANGUL_CHOSEONG_END (0x115F)
2131 #define HANGUL_JUNGSEONG_START (0x1160)
2132 #define HANGUL_JUNGSEONG_END (0x11A2)
2133 #define HANGUL_JONGSEONG_START (0x11A8)
2134 #define HANGUL_JONGSEONG_END (0x11F9)
2135
2136 #define HANGUL_SYLLABLE_START (0xAC00)
2137 #define HANGUL_SYLLABLE_END (0xD7AF)
2138
2139
2140 // Returns the length of characters filled into outCharacters. If no change, returns 0. maxBufLen shoule be at least 8
2141 static CFIndex __CFStringFoldCharacterClusterAtIndex(UTF32Char character, CFStringInlineBuffer *buffer, CFIndex index, CFOptionFlags flags, const uint8_t *langCode, UTF32Char *outCharacters, CFIndex maxBufferLength, CFIndex *consumedLength) {
2142 CFIndex filledLength = 0, currentIndex = index;
2143
2144 if (0 != character) {
2145 UTF16Char lowSurrogate;
2146 CFIndex planeNo = (character >> 16);
2147 bool isTurkikCapitalI = false;
2148 static const uint8_t *decompBMP = NULL;
2149 static const uint8_t *graphemeBMP = NULL;
2150
2151 if (NULL == decompBMP) {
2152 decompBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, 0);
2153 graphemeBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, 0);
2154 }
2155
2156 ++currentIndex;
2157
2158 if ((character < 0x0080) && ((NULL == langCode) || (character != 'I'))) { // ASCII
2159 if ((flags & kCFCompareCaseInsensitive) && (character >= 'A') && (character <= 'Z')) {
2160 character += ('a' - 'A');
2161 *outCharacters = character;
2162 filledLength = 1;
2163 }
2164 } else {
2165 // do width-insensitive mapping
2166 if ((flags & kCFCompareWidthInsensitive) && (character >= 0xFF00) && (character <= 0xFFEF)) {
2167 (void)CFUniCharCompatibilityDecompose(&character, 1, 1);
2168 *outCharacters = character;
2169 filledLength = 1;
2170 }
2171
2172 // map surrogates
2173 if ((0 == planeNo) && CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((lowSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex)))) {
2174 character = CFUniCharGetLongCharacterForSurrogatePair(character, lowSurrogate);
2175 ++currentIndex;
2176 planeNo = (character >> 16);
2177 }
2178
2179 // decompose
2180 if (flags & (kCFCompareDiacriticsInsensitiveCompatibilityMask|kCFCompareNonliteral)) {
2181 if (CFUniCharIsMemberOfBitmap(character, ((0 == planeNo) ? decompBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, planeNo)))) {
2182 UTF32Char original = character;
2183
2184 filledLength = CFUniCharDecomposeCharacter(character, outCharacters, maxBufferLength);
2185 character = *outCharacters;
2186
2187 if ((flags & kCFCompareDiacriticsInsensitiveCompatibilityMask) && (character < 0x0510)) {
2188 filledLength = 1; // reset if Roman, Greek, Cyrillic
2189 } else if (0 == (flags & kCFCompareNonliteral)) {
2190 character = original;
2191 filledLength = 0;
2192 }
2193 }
2194 }
2195
2196 // fold case
2197 if (flags & kCFCompareCaseInsensitive) {
2198 const uint8_t *nonBaseBitmap;
2199 bool filterNonBase = (((flags & kCFCompareDiacriticsInsensitiveCompatibilityMask) && (character < 0x0510)) ? true : false);
2200 static const uint8_t *lowerBMP = NULL;
2201 static const uint8_t *caseFoldBMP = NULL;
2202
2203 if (NULL == lowerBMP) {
2204 lowerBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfLowercaseCharacterSet, 0);
2205 caseFoldBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfCaseFoldingCharacterSet, 0);
2206 }
2207
2208 if ((NULL != langCode) && ('I' == character) && ((0 == strcmp((const char *)langCode, "tr")) || (0 == strcmp((const char *)langCode, "az")))) { // do Turkik special-casing
2209 if (filledLength > 1) {
2210 if (0x0307 == outCharacters[1]) {
2211 if (--filledLength > 1) memmove((outCharacters + 1), (outCharacters + 2), sizeof(UTF32Char) * (filledLength - 1));
2212 character = *outCharacters = 'i';
2213 isTurkikCapitalI = true;
2214 }
2215 } else if (0x0307 == CFStringGetCharacterFromInlineBuffer(buffer, currentIndex)) {
2216 character = *outCharacters = 'i';
2217 filledLength = 1;
2218 ++currentIndex;
2219 isTurkikCapitalI = true;
2220 }
2221 }
2222 if (!isTurkikCapitalI && (CFUniCharIsMemberOfBitmap(character, ((0 == planeNo) ? lowerBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfLowercaseCharacterSet, planeNo))) || CFUniCharIsMemberOfBitmap(character, ((0 == planeNo) ? caseFoldBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfCaseFoldingCharacterSet, planeNo))))) {
2223 UTF16Char caseFoldBuffer[MAX_CASE_MAPPING_BUF];
2224 const UTF16Char *bufferP = caseFoldBuffer, *bufferLimit;
2225 UTF32Char *outCharactersP = outCharacters;
2226 uint32_t bufferLength = CFUniCharMapCaseTo(character, caseFoldBuffer, MAX_CASE_MAPPING_BUF, kCFUniCharCaseFold, 0, langCode);
2227
2228 bufferLimit = bufferP + bufferLength;
2229
2230 if (filledLength > 0) --filledLength; // decrement filledLength (will add back later)
2231
2232 // make space for casefold characters
2233 if ((filledLength > 0) && (bufferLength > 1)) {
2234 CFIndex totalScalerLength = 0;
2235
2236 while (bufferP < bufferLimit) {
2237 if (CFUniCharIsSurrogateHighCharacter(*(bufferP++)) && (bufferP < bufferLimit) && CFUniCharIsSurrogateLowCharacter(*bufferP)) ++bufferP;
2238 ++totalScalerLength;
2239 }
2240 memmove(outCharacters + totalScalerLength, outCharacters + 1, filledLength * sizeof(UTF32Char));
2241 bufferP = caseFoldBuffer;
2242 }
2243
2244 // fill
2245 while (bufferP < bufferLimit) {
2246 character = *(bufferP++);
2247 if (CFUniCharIsSurrogateHighCharacter(character) && (bufferP < bufferLimit) && CFUniCharIsSurrogateLowCharacter(*bufferP)) {
2248 character = CFUniCharGetLongCharacterForSurrogatePair(character, *(bufferP++));
2249 nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (character >> 16));
2250 } else {
2251 nonBaseBitmap = graphemeBMP;
2252 }
2253
2254 if (!filterNonBase || !CFUniCharIsMemberOfBitmap(character, nonBaseBitmap)) {
2255 *(outCharactersP++) = character;
2256 ++filledLength;
2257 }
2258 }
2259 }
2260 }
2261 }
2262
2263 // collect following combining marks
2264 if (flags & (kCFCompareDiacriticsInsensitiveCompatibilityMask|kCFCompareNonliteral)) {
2265 const uint8_t *nonBaseBitmap;
2266 const uint8_t *decompBitmap;
2267 bool doFill = (((flags & kCFCompareDiacriticsInsensitiveCompatibilityMask) && (character < 0x0510)) ? false : true);
2268
2269 if (0 == filledLength) {
2270 *outCharacters = character; // filledLength will be updated below on demand
2271
2272 if (doFill) { // check if really needs to fill
2273 UTF32Char nonBaseCharacter = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex);
2274
2275 if (CFUniCharIsSurrogateHighCharacter(nonBaseCharacter) && CFUniCharIsSurrogateLowCharacter((lowSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex + 1)))) {
2276 nonBaseCharacter = CFUniCharGetLongCharacterForSurrogatePair(nonBaseCharacter, lowSurrogate);
2277 nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (nonBaseCharacter >> 16));
2278 decompBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (nonBaseCharacter >> 16));
2279 } else {
2280 nonBaseBitmap = graphemeBMP;
2281 decompBitmap = decompBMP;
2282 }
2283
2284 if (CFUniCharIsMemberOfBitmap(nonBaseCharacter, nonBaseBitmap)) {
2285 filledLength = 1; // For the base character
2286
2287 if ((0 == (flags & kCFCompareDiacriticsInsensitiveCompatibilityMask)) || (nonBaseCharacter > 0x050F)) {
2288 if (CFUniCharIsMemberOfBitmap(nonBaseCharacter, decompBitmap)) {
2289 filledLength += CFUniCharDecomposeCharacter(nonBaseCharacter, &(outCharacters[filledLength]), maxBufferLength - filledLength);
2290 } else {
2291 outCharacters[filledLength++] = nonBaseCharacter;
2292 }
2293 }
2294 currentIndex += ((nonBaseBitmap == graphemeBMP) ? 1 : 2);
2295 } else {
2296 doFill = false;
2297 }
2298 }
2299 }
2300
2301 while (filledLength < maxBufferLength) { // do the rest
2302 character = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex);
2303
2304 if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((lowSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex + 1)))) {
2305 character = CFUniCharGetLongCharacterForSurrogatePair(character, lowSurrogate);
2306 nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (character >> 16));
2307 decompBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (character >> 16));
2308 } else {
2309 nonBaseBitmap = graphemeBMP;
2310 decompBitmap = decompBMP;
2311 }
2312 if (isTurkikCapitalI) {
2313 isTurkikCapitalI = false;
2314 } else if (CFUniCharIsMemberOfBitmap(character, nonBaseBitmap)) {
2315 if (doFill) {
2316 if (CFUniCharIsMemberOfBitmap(character, decompBitmap)) {
2317 CFIndex currentLength = CFUniCharDecomposeCharacter(character, &(outCharacters[filledLength]), maxBufferLength - filledLength);
2318
2319 if (0 == currentLength) break; // didn't fit
2320
2321 filledLength += currentLength;
2322 } else {
2323 outCharacters[filledLength++] = character;
2324 }
2325 } else if (0 == filledLength) {
2326 filledLength = 1; // For the base character
2327 }
2328 currentIndex += ((nonBaseBitmap == graphemeBMP) ? 1 : 2);
2329 } else {
2330 break;
2331 }
2332 }
2333
2334 if (filledLength > 1) {
2335 UTF32Char *sortCharactersLimit = outCharacters + filledLength;
2336 UTF32Char *sortCharacters = sortCharactersLimit - 1;
2337
2338 while ((outCharacters < sortCharacters) && CFUniCharIsMemberOfBitmap(*sortCharacters, ((*sortCharacters < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (*sortCharacters >> 16))))) --sortCharacters;
2339
2340 if ((sortCharactersLimit - sortCharacters) > 1) CFUniCharPrioritySort(sortCharacters, (sortCharactersLimit - sortCharacters)); // priority sort
2341 }
2342 }
2343 }
2344
2345 if ((filledLength > 0) && (NULL != consumedLength)) *consumedLength = (currentIndex - index);
2346
2347 return filledLength;
2348 }
2349
2350 #define kCFStringStackBufferLength (64)
2351
2352 CFComparisonResult CFStringCompareWithOptionsAndLocale(CFStringRef string, CFStringRef string2, CFRange rangeToCompare, CFOptionFlags compareOptions, CFLocaleRef locale) {
2353 /* No objc dispatch needed here since CFStringInlineBuffer works with both CFString and NSString */
2354 UTF32Char strBuf1[kCFStringStackBufferLength];
2355 UTF32Char strBuf2[kCFStringStackBufferLength];
2356 CFStringInlineBuffer inlineBuf1, inlineBuf2;
2357 UTF32Char str1Char, str2Char;
2358 CFIndex str1UsedLen, str2UsedLen;
2359 CFIndex str1Index = 0, str2Index = 0, strBuf1Index = 0, strBuf2Index = 0, strBuf1Len = 0, strBuf2Len = 0;
2360 CFIndex str2Len = CFStringGetLength(string2);
2361 bool caseInsensitive = ((compareOptions & kCFCompareCaseInsensitive) ? true : false);
2362 bool diacriticsInsensitive = ((compareOptions & kCFCompareDiacriticsInsensitiveCompatibilityMask) ? true : false);
2363 bool equalityOptions = ((compareOptions & (kCFCompareCaseInsensitive|kCFCompareNonliteral|kCFCompareDiacriticsInsensitiveCompatibilityMask|kCFCompareWidthInsensitive)) ? true : false);
2364 bool numerically = ((compareOptions & kCFCompareNumerically) ? true : false);
2365 const uint8_t *graphemeBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, 0);
2366 const uint8_t *langCode;
2367 CFComparisonResult compareResult = kCFCompareEqualTo;
2368 UTF16Char otherChar;
2369 Boolean freeLocale = false;
2370
2371 #define _CFCompareStringsWithLocale(A, B, C, D, E, F) (0)
2372 locale = NULL;
2373
2374 if ((compareOptions & kCFCompareLocalized) && (NULL == locale)) {
2375 locale = CFLocaleCopyCurrent();
2376 freeLocale = true;
2377 }
2378
2379 langCode = ((NULL == locale) ? NULL : (const uint8_t *)_CFStrGetLanguageIdentifierForLocale(locale));
2380
2381 if ((NULL == locale) && !numerically) { // could do binary comp (be careful when adding new flags)
2382 CFStringEncoding eightBitEncoding = __CFStringGetEightBitStringEncoding();
2383 const uint8_t *str1Bytes = (const uint8_t *)CFStringGetCStringPtr(string, eightBitEncoding);
2384 const uint8_t *str2Bytes = (const uint8_t *)CFStringGetCStringPtr(string2, eightBitEncoding);
2385 CFIndex factor = sizeof(uint8_t);
2386
2387 if ((NULL != str1Bytes) && (NULL != str2Bytes)) {
2388 compareOptions &= ~kCFCompareNonliteral; // remove non-literal
2389
2390 if (kCFStringEncodingASCII == eightBitEncoding) {
2391 if (caseInsensitive) {
2392 int cmpResult = strncasecmp_l((const char *)str1Bytes + rangeToCompare.location, (const char *)str2Bytes, __CFMin(rangeToCompare.length, str2Len), NULL);
2393
2394 if (0 == cmpResult) cmpResult = rangeToCompare.length - str2Len;
2395
2396 return ((0 == cmpResult) ? kCFCompareEqualTo : ((cmpResult < 0) ? kCFCompareLessThan : kCFCompareGreaterThan));
2397 }
2398 } else if (caseInsensitive || diacriticsInsensitive) {
2399 CFIndex limitLength = __CFMin(rangeToCompare.length, str2Len);
2400
2401 str1Bytes += rangeToCompare.location;
2402
2403 while (str1Index < limitLength) {
2404 str1Char = str1Bytes[str1Index];
2405 str2Char = str2Bytes[str1Index];
2406
2407 if (str1Char != str2Char) {
2408 if ((str1Char < 0x80) && (str2Char < 0x80)) {
2409 if ((str1Char >= 'A') && (str1Char <= 'Z')) str1Char += ('a' - 'A');
2410 if ((str2Char >= 'A') && (str2Char <= 'Z')) str2Char += ('a' - 'A');
2411
2412 if (str1Char != str2Char) return ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan);
2413 } else {
2414 str1Bytes = NULL;
2415 break;
2416 }
2417 }
2418 ++str1Index;
2419 }
2420
2421 str2Index = str1Index;
2422
2423 if (str1Index == limitLength) {
2424 int cmpResult = rangeToCompare.length - str2Len;
2425
2426 return ((0 == cmpResult) ? kCFCompareEqualTo : ((cmpResult < 0) ? kCFCompareLessThan : kCFCompareGreaterThan));
2427 }
2428 }
2429 } else if (!equalityOptions && (NULL == str1Bytes) && (NULL == str2Bytes)) {
2430 str1Bytes = (const uint8_t *)CFStringGetCharactersPtr(string);
2431 str2Bytes = (const uint8_t *)CFStringGetCharactersPtr(string2);
2432 factor = sizeof(UTF16Char);
2433 #if __LITTLE_ENDIAN__
2434 if ((NULL != str1Bytes) && (NULL != str2Bytes)) { // we cannot use memcmp
2435 const UTF16Char *str1 = ((const UTF16Char *)str1Bytes) + rangeToCompare.location;
2436 const UTF16Char *str1Limit = str1 + __CFMin(rangeToCompare.length, str2Len);
2437 const UTF16Char *str2 = (const UTF16Char *)str2Bytes;
2438 CFIndex cmpResult = 0;
2439
2440 while ((0 == cmpResult) && (str1 < str1Limit)) cmpResult = (CFIndex)*(str1++) - (CFIndex)*(str2++);
2441
2442 if (0 == cmpResult) cmpResult = rangeToCompare.length - str2Len;
2443
2444 return ((0 == cmpResult) ? kCFCompareEqualTo : ((cmpResult < 0) ? kCFCompareLessThan : kCFCompareGreaterThan));
2445 }
2446 #endif /* __LITTLE_ENDIAN__ */
2447 }
2448 if ((NULL != str1Bytes) && (NULL != str2Bytes)) {
2449 int cmpResult = memcmp(str1Bytes + (rangeToCompare.location * factor), str2Bytes, __CFMin(rangeToCompare.length, str2Len) * factor);
2450
2451 if (0 == cmpResult) cmpResult = rangeToCompare.length - str2Len;
2452
2453 return ((0 == cmpResult) ? kCFCompareEqualTo : ((cmpResult < 0) ? kCFCompareLessThan : kCFCompareGreaterThan));
2454 }
2455 }
2456
2457 CFStringInitInlineBuffer(string, &inlineBuf1, rangeToCompare);
2458 CFStringInitInlineBuffer(string2, &inlineBuf2, CFRangeMake(0, str2Len));
2459
2460 while ((str1Index < rangeToCompare.length) && (str2Index < str2Len)) {
2461 if (strBuf1Len == 0) {
2462 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
2463 if (caseInsensitive && (str1Char >= 'A') && (str1Char <= 'Z') && ((NULL == langCode) || (str1Char != 'I'))) str1Char += ('a' - 'A');
2464 str1UsedLen = 1;
2465 } else {
2466 str1Char = strBuf1[strBuf1Index++];
2467 }
2468 if (strBuf2Len == 0) {
2469 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
2470 if (caseInsensitive && (str2Char >= 'A') && (str2Char <= 'Z') && ((NULL == langCode) || (str2Char != 'I'))) str2Char += ('a' - 'A');
2471 str2UsedLen = 1;
2472 } else {
2473 str2Char = strBuf2[strBuf2Index++];
2474 }
2475
2476 if (numerically && ((0 == strBuf1Len) && (str1Char <= '9') && (str1Char >= '0')) && ((0 == strBuf2Len) && (str2Char <= '9') && (str2Char >= '0'))) { // If both are not ASCII digits, then don't do numerical comparison here
2477 uint64_t intValue1 = 0, intValue2 = 0; // !!! Doesn't work if numbers are > max uint64_t
2478
2479 do {
2480 intValue1 = (intValue1 * 10) + (str1Char - '0');
2481 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, ++str1Index);
2482 } while ((str1Char <= '9') && (str1Char >= '0'));
2483
2484 do {
2485 intValue2 = intValue2 * 10 + (str2Char - '0');
2486 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, ++str2Index);
2487 } while ((str2Char <= '9') && (str2Char >= '0'));
2488
2489 if (intValue1 == intValue2) {
2490 continue;
2491 } else if (intValue1 < intValue2) {
2492 if (freeLocale && locale) {
2493 CFRelease(locale);
2494 }
2495 return kCFCompareLessThan;
2496 } else {
2497 if (freeLocale && locale) {
2498 CFRelease(locale);
2499 }
2500 return kCFCompareGreaterThan;
2501 }
2502 }
2503
2504 if (str1Char != str2Char) {
2505 if (!equalityOptions) {
2506 CFComparisonResult res = ((NULL == locale) ? ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(strBuf1Index, rangeToCompare.length - strBuf1Index), &inlineBuf2, CFRangeMake(strBuf2Index, str2Len - strBuf2Index), compareOptions, locale));
2507 if (freeLocale && locale) {
2508 CFRelease(locale);
2509 }
2510 return res;
2511 }
2512
2513 if ((compareOptions & kCFCompareForcedOrdering) && (kCFCompareEqualTo == compareResult)) compareResult = ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan);
2514
2515 if ((str1Char < 0x80) && (str2Char < 0x80)) {
2516 if (NULL != locale) {
2517 CFComparisonResult res = _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(strBuf1Index, rangeToCompare.length - strBuf1Index), &inlineBuf2, CFRangeMake(strBuf2Index, str2Len - strBuf2Index), compareOptions, locale);
2518 if (freeLocale && locale) {
2519 CFRelease(locale);
2520 }
2521 return res;
2522 } else if (!caseInsensitive) {
2523 if (freeLocale && locale) {
2524 CFRelease(locale);
2525 }
2526 return ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan);
2527 }
2528 }
2529
2530 if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) {
2531 str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);
2532 str1UsedLen = 2;
2533 }
2534
2535 if (CFUniCharIsSurrogateHighCharacter(str2Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index + 1)))) {
2536 str2Char = CFUniCharGetLongCharacterForSurrogatePair(str2Char, otherChar);
2537 str2UsedLen = 2;
2538 }
2539
2540 if (diacriticsInsensitive && (str1Index > 0)) {
2541 bool str1Skip = false;
2542 bool str2Skip = false;
2543
2544 if ((0 == strBuf1Len) && CFUniCharIsMemberOfBitmap(str1Char, ((str1Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str1Char >> 16))))) {
2545 str1Char = str2Char;
2546 str1Skip = true;
2547 }
2548 if ((0 == strBuf2Len) && CFUniCharIsMemberOfBitmap(str2Char, ((str2Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str2Char >> 16))))) {
2549 str2Char = str1Char;
2550 str2Skip = true;
2551 }
2552
2553 if (str1Skip != str2Skip) {
2554 if (str1Skip) str2Index -= str2UsedLen;
2555 if (str2Skip) str1Index -= str1UsedLen;
2556 }
2557 }
2558
2559 if (str1Char != str2Char) {
2560 if (0 == strBuf1Len) {
2561 strBuf1Len = __CFStringFoldCharacterClusterAtIndex(str1Char, &inlineBuf1, str1Index, compareOptions, langCode, strBuf1, kCFStringStackBufferLength, &str1UsedLen);
2562 if (strBuf1Len > 0) {
2563 str1Char = *strBuf1;
2564 strBuf1Index = 1;
2565 }
2566 }
2567
2568 if ((0 == strBuf1Len) && (0 < strBuf2Len)) {
2569 CFComparisonResult res = ((NULL == locale) ? ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(strBuf1Index, rangeToCompare.length - strBuf1Index), &inlineBuf2, CFRangeMake(strBuf2Index, str2Len - strBuf2Index), compareOptions, locale));
2570 if (freeLocale && locale) {
2571 CFRelease(locale);
2572 }
2573 return res;
2574 }
2575
2576 if ((0 == strBuf2Len) && ((0 == strBuf1Len) || (str1Char != str2Char))) {
2577 strBuf2Len = __CFStringFoldCharacterClusterAtIndex(str2Char, &inlineBuf2, str2Index, compareOptions, langCode, strBuf2, kCFStringStackBufferLength, &str2UsedLen);
2578 if (strBuf2Len > 0) {
2579 str2Char = *strBuf2;
2580 strBuf2Index = 1;
2581 }
2582 if ((0 == strBuf2Len) || (str1Char != str2Char)) {
2583 CFComparisonResult res = ((NULL == locale) ? ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(strBuf1Index, rangeToCompare.length - strBuf1Index), &inlineBuf2, CFRangeMake(strBuf2Index, str2Len - strBuf2Index), compareOptions, locale));
2584 if (freeLocale && locale) {
2585 CFRelease(locale);
2586 }
2587 return res;
2588 }
2589 }
2590 }
2591
2592 if ((strBuf1Len > 0) && (strBuf2Len > 0)) {
2593 while ((strBuf1Index < strBuf1Len) && (strBuf2Index < strBuf2Len)) {
2594 if (strBuf1[strBuf1Index] != strBuf2[strBuf2Index]) break;
2595 ++strBuf1Index; ++strBuf2Index;
2596 }
2597 if ((strBuf1Index < strBuf1Len) && (strBuf2Index < strBuf2Len)) {
2598 CFComparisonResult res = ((NULL == locale) ? ((str1Char < str2Char) ? kCFCompareLessThan : kCFCompareGreaterThan) : _CFCompareStringsWithLocale(&inlineBuf1, CFRangeMake(strBuf1Index, rangeToCompare.length - strBuf1Index), &inlineBuf2, CFRangeMake(strBuf2Index, str2Len - strBuf2Index), compareOptions, locale));
2599 if (freeLocale && locale) {
2600 CFRelease(locale);
2601 }
2602 return res;
2603 }
2604 }
2605 }
2606
2607 if ((strBuf1Len > 0) && (strBuf1Index == strBuf1Len)) strBuf1Len = 0;
2608 if ((strBuf2Len > 0) && (strBuf2Index == strBuf2Len)) strBuf2Len = 0;
2609
2610 if (strBuf1Len == 0) str1Index += str1UsedLen;
2611 if (strBuf2Len == 0) str2Index += str2UsedLen;
2612 }
2613
2614 if (diacriticsInsensitive) {
2615 while (str1Index < rangeToCompare.length) {
2616 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
2617 if (str1Char < 0x80) break; // found ASCII
2618
2619 if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);
2620
2621 if (!CFUniCharIsMemberOfBitmap(str1Char, ((str1Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str1Char >> 16))))) break;
2622
2623 str1Index += ((str1Char < 0x10000) ? 1 : 2);
2624 }
2625
2626 while (str2Index < str2Len) {
2627 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
2628 if (str2Char < 0x80) break; // found ASCII
2629
2630 if (CFUniCharIsSurrogateHighCharacter(str2Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index + 1)))) str2Char = CFUniCharGetLongCharacterForSurrogatePair(str2Char, otherChar);
2631
2632 if (!CFUniCharIsMemberOfBitmap(str2Char, ((str2Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str2Char >> 16))))) break;
2633
2634 str2Index += ((str2Char < 0x10000) ? 1 : 2);
2635 }
2636 }
2637
2638 if (freeLocale && locale) {
2639 CFRelease(locale);
2640 }
2641
2642 return ((str1Index < rangeToCompare.length) ? kCFCompareGreaterThan : ((str2Index < str2Len) ? kCFCompareLessThan : compareResult));
2643 }
2644
2645
2646 CFComparisonResult CFStringCompareWithOptions(CFStringRef string, CFStringRef string2, CFRange rangeToCompare, CFOptionFlags compareOptions) { return CFStringCompareWithOptionsAndLocale(string, string2, rangeToCompare, compareOptions, NULL); }
2647
2648 CFComparisonResult CFStringCompare(CFStringRef string, CFStringRef str2, CFOptionFlags options) {
2649 return CFStringCompareWithOptions(string, str2, CFRangeMake(0, CFStringGetLength(string)), options);
2650 }
2651
2652 Boolean CFStringFindWithOptionsAndLocale(CFStringRef string, CFStringRef stringToFind, CFRange rangeToSearch, CFOptionFlags compareOptions, CFLocaleRef locale, CFRange *result) {
2653 /* No objc dispatch needed here since CFStringInlineBuffer works with both CFString and NSString */
2654 CFIndex findStrLen = CFStringGetLength(stringToFind);
2655 Boolean didFind = false;
2656 bool lengthVariants = ((compareOptions & (kCFCompareCaseInsensitive|kCFCompareNonliteral|kCFCompareDiacriticsInsensitiveCompatibilityMask)) ? true : false);
2657
2658 if ((findStrLen > 0) && (rangeToSearch.length > 0) && ((findStrLen <= rangeToSearch.length) || lengthVariants)) {
2659 UTF32Char strBuf1[kCFStringStackBufferLength];
2660 UTF32Char strBuf2[kCFStringStackBufferLength];
2661 CFStringInlineBuffer inlineBuf1, inlineBuf2;
2662 UTF32Char str1Char, str2Char;
2663 CFStringEncoding eightBitEncoding = __CFStringGetEightBitStringEncoding();
2664 const uint8_t *str1Bytes = (const uint8_t *)CFStringGetCStringPtr(string, eightBitEncoding);
2665 const uint8_t *str2Bytes = (const uint8_t *)CFStringGetCStringPtr(stringToFind, eightBitEncoding);
2666 const UTF32Char *characters, *charactersLimit;
2667 const uint8_t *langCode = NULL;
2668 CFIndex fromLoc, toLoc;
2669 CFIndex str1Index, str2Index;
2670 CFIndex strBuf1Len, strBuf2Len;
2671 bool equalityOptions = ((lengthVariants || (compareOptions & kCFCompareWidthInsensitive)) ? true : false);
2672 bool caseInsensitive = ((compareOptions & kCFCompareCaseInsensitive) ? true : false);
2673 int8_t delta;
2674
2675 if (NULL == locale) {
2676 if (compareOptions & kCFCompareLocalized) {
2677 CFLocaleRef currentLocale = CFLocaleCopyCurrent();
2678 langCode = (const uint8_t *)_CFStrGetLanguageIdentifierForLocale(currentLocale);
2679 CFRelease(currentLocale);
2680 }
2681 } else {
2682 langCode = (const uint8_t *)_CFStrGetLanguageIdentifierForLocale(locale);
2683 }
2684
2685 CFStringInitInlineBuffer(string, &inlineBuf1, CFRangeMake(0, rangeToSearch.location + rangeToSearch.length));
2686 CFStringInitInlineBuffer(stringToFind, &inlineBuf2, CFRangeMake(0, findStrLen));
2687
2688 if (compareOptions & kCFCompareBackwards) {
2689 fromLoc = rangeToSearch.location + rangeToSearch.length - (lengthVariants ? 1 : findStrLen);
2690 toLoc = (((compareOptions & kCFCompareAnchored) && !lengthVariants) ? fromLoc : rangeToSearch.location);
2691 } else {
2692 fromLoc = rangeToSearch.location;
2693 toLoc = ((compareOptions & kCFCompareAnchored) ? fromLoc : rangeToSearch.location + rangeToSearch.length - (lengthVariants ? 1 : findStrLen));
2694 }
2695
2696 delta = ((fromLoc <= toLoc) ? 1 : -1);
2697
2698 if ((NULL != str1Bytes) && (NULL != str2Bytes)) {
2699 CFIndex maxStr1Index = (rangeToSearch.location + rangeToSearch.length);
2700 uint8_t str1Byte, str2Byte;
2701
2702 while (1) {
2703 str1Index = fromLoc;
2704 str2Index = 0;
2705
2706 while ((str1Index < maxStr1Index) && (str2Index < findStrLen)) {
2707 str1Byte = str1Bytes[str1Index];
2708 str2Byte = str2Bytes[str2Index];
2709
2710 if (str1Byte != str2Byte) {
2711 if (equalityOptions) {
2712 if ((str1Byte < 0x80) && ((NULL == langCode) || ('I' != str1Byte))) {
2713 if (caseInsensitive && (str1Byte >= 'A') && (str1Byte <= 'Z')) str1Byte += ('a' - 'A');
2714 *strBuf1 = str1Byte;
2715 strBuf1Len = 1;
2716 } else {
2717 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
2718 strBuf1Len = __CFStringFoldCharacterClusterAtIndex(str1Char, &inlineBuf1, str1Index, compareOptions, langCode, strBuf1, kCFStringStackBufferLength, NULL);
2719 if (1 > strBuf1Len) {
2720 *strBuf1 = str1Char;
2721 strBuf1Len = 1;
2722 }
2723 }
2724 if ((str2Byte < 0x80) && ((NULL == langCode) || ('I' != str2Byte))) {
2725 if (caseInsensitive && (str2Byte >= 'A') && (str2Byte <= 'Z')) str2Byte += ('a' - 'A');
2726 *strBuf2 = str2Byte;
2727 strBuf2Len = 1;
2728 } else {
2729 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
2730 strBuf2Len = __CFStringFoldCharacterClusterAtIndex(str2Char, &inlineBuf2, str2Index, compareOptions, langCode, strBuf2, kCFStringStackBufferLength, NULL);
2731 if (1 > strBuf2Len) {
2732 *strBuf2 = str2Char;
2733 strBuf2Len = 1;
2734 }
2735 }
2736
2737 if ((1 == strBuf1Len) && (1 == strBuf2Len)) { // normal case
2738 if (*strBuf1 != *strBuf2) break;
2739 } else {
2740 CFIndex delta;
2741
2742 if (!caseInsensitive && (strBuf1Len != strBuf2Len)) break;
2743 if (memcmp(strBuf1, strBuf2, sizeof(UTF32Char) * __CFMin(strBuf1Len, strBuf2Len))) break;
2744
2745 if (strBuf1Len < strBuf2Len) {
2746 delta = strBuf2Len - strBuf1Len;
2747
2748 if ((str1Index + strBuf1Len + delta) > (rangeToSearch.location + rangeToSearch.length)) break;
2749
2750 characters = &(strBuf2[strBuf1Len]);
2751 charactersLimit = characters + delta;
2752
2753 while (characters < charactersLimit) {
2754 strBuf1Len = __CFStringFoldCharacterClusterAtIndex(CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1), &inlineBuf1, str1Index + 1, compareOptions, langCode, strBuf1, kCFStringStackBufferLength, NULL);
2755 if ((strBuf1Len > 0) || (*characters != *strBuf1)) break;
2756 ++characters; ++str1Index;
2757 }
2758 if (characters < charactersLimit) break;
2759 } else if (strBuf2Len < strBuf1Len) {
2760 delta = strBuf1Len - strBuf2Len;
2761
2762 if ((str2Index + strBuf2Len + delta) > findStrLen) break;
2763
2764 characters = &(strBuf1[strBuf2Len]);
2765 charactersLimit = characters + delta;
2766
2767 while (characters < charactersLimit) {
2768 strBuf2Len = __CFStringFoldCharacterClusterAtIndex(CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str1Index + 1), &inlineBuf2, str2Index + 1, compareOptions, langCode, strBuf2, kCFStringStackBufferLength, NULL);
2769 if ((strBuf2Len > 0) || (*characters != *strBuf2)) break;
2770 ++characters; ++str2Index;
2771 }
2772 if (characters < charactersLimit) break;
2773 }
2774 }
2775 } else {
2776 break;
2777 }
2778 }
2779 ++str1Index; ++str2Index;
2780 }
2781
2782 if (str2Index == findStrLen) {
2783 if (((kCFCompareBackwards|kCFCompareAnchored) != (compareOptions & (kCFCompareBackwards|kCFCompareAnchored))) || (str1Index == (rangeToSearch.location + rangeToSearch.length))) {
2784 didFind = true;
2785 if (NULL != result) *result = CFRangeMake(fromLoc, str1Index - fromLoc);
2786 }
2787 break;
2788 }
2789
2790 if (fromLoc == toLoc) break;
2791 fromLoc += delta;
2792 }
2793 } else if (equalityOptions) {
2794 UTF16Char otherChar;
2795 CFIndex str1UsedLen, str2UsedLen, strBuf1Index = 0, strBuf2Index = 0;
2796 bool diacriticsInsensitive = ((compareOptions & kCFCompareDiacriticsInsensitiveCompatibilityMask) ? true : false);
2797 const uint8_t *graphemeBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, 0);
2798 const uint8_t *combClassBMP = (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0);
2799
2800 while (1) {
2801 str1Index = fromLoc;
2802 str2Index = 0;
2803
2804 strBuf1Len = strBuf2Len = 0;
2805
2806 while (str2Index < findStrLen) {
2807 if (strBuf1Len == 0) {
2808 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
2809 if (caseInsensitive && (str1Char >= 'A') && (str1Char <= 'Z') && ((NULL == langCode) || (str1Char != 'I'))) str1Char += ('a' - 'A');
2810 str1UsedLen = 1;
2811 } else {
2812 str1Char = strBuf1[strBuf1Index++];
2813 }
2814 if (strBuf2Len == 0) {
2815 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
2816 if (caseInsensitive && (str2Char >= 'A') && (str2Char <= 'Z') && ((NULL == langCode) || (str2Char != 'I'))) str2Char += ('a' - 'A');
2817 str2UsedLen = 1;
2818 } else {
2819 str2Char = strBuf2[strBuf2Index++];
2820 }
2821
2822 if (str1Char != str2Char) {
2823 if ((str1Char < 0x80) && (str2Char < 0x80) && ((NULL == langCode) || !caseInsensitive)) break;
2824
2825 if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) {
2826 str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);
2827 str1UsedLen = 2;
2828 }
2829
2830 if (CFUniCharIsSurrogateHighCharacter(str2Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index + 1)))) {
2831 str2Char = CFUniCharGetLongCharacterForSurrogatePair(str2Char, otherChar);
2832 str2UsedLen = 2;
2833 }
2834
2835 if (diacriticsInsensitive && (str1Index > fromLoc)) {
2836 bool str1Skip = false;
2837 bool str2Skip = false;
2838
2839 if ((0 == strBuf1Len) && CFUniCharIsMemberOfBitmap(str1Char, ((str1Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str1Char >> 16))))) {
2840 str1Char = str2Char;
2841 str1Skip = true;
2842 }
2843 if ((0 == strBuf2Len) && CFUniCharIsMemberOfBitmap(str2Char, ((str2Char < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str2Char >> 16))))) {
2844 str2Char = str1Char;
2845 str2Skip = true;
2846 }
2847
2848 if (str1Skip != str2Skip) {
2849 if (str1Skip) str2Index -= str2UsedLen;
2850 if (str2Skip) str1Index -= str1UsedLen;
2851 }
2852 }
2853
2854 if (str1Char != str2Char) {
2855 if (0 == strBuf1Len) {
2856 strBuf1Len = __CFStringFoldCharacterClusterAtIndex(str1Char, &inlineBuf1, str1Index, compareOptions, langCode, strBuf1, kCFStringStackBufferLength, &str1UsedLen);
2857 if (strBuf1Len > 0) {
2858 str1Char = *strBuf1;
2859 strBuf1Index = 1;
2860 }
2861 }
2862
2863 if ((0 == strBuf1Len) && (0 < strBuf2Len)) break;
2864
2865 if ((0 == strBuf2Len) && ((0 == strBuf1Len) || (str1Char != str2Char))) {
2866 strBuf2Len = __CFStringFoldCharacterClusterAtIndex(str2Char, &inlineBuf2, str2Index, compareOptions, langCode, strBuf2, kCFStringStackBufferLength, &str2UsedLen);
2867 if ((0 == strBuf2Len) || (str1Char != *strBuf2)) break;
2868 strBuf2Index = 1;
2869 }
2870 }
2871
2872 if ((strBuf1Len > 0) && (strBuf2Len > 0)) {
2873 while ((strBuf1Index < strBuf1Len) && (strBuf2Index < strBuf2Len)) {
2874 if (strBuf1[strBuf1Index] != strBuf2[strBuf2Index]) break;
2875 ++strBuf1Index; ++strBuf2Index;
2876 }
2877 if ((strBuf1Index < strBuf1Len) && (strBuf2Index < strBuf2Len)) break;
2878 }
2879 }
2880
2881 if ((strBuf1Len > 0) && (strBuf1Index == strBuf1Len)) strBuf1Len = 0;
2882 if ((strBuf2Len > 0) && (strBuf2Index == strBuf2Len)) strBuf2Len = 0;
2883
2884 if (strBuf1Len == 0) str1Index += str1UsedLen;
2885 if (strBuf2Len == 0) str2Index += str2UsedLen;
2886 }
2887
2888 if (str2Index == findStrLen) {
2889 bool match = true;
2890
2891 if (strBuf1Len > 0) {
2892 match = false;
2893
2894 if ((compareOptions & kCFCompareDiacriticsInsensitiveCompatibilityMask) && (strBuf1[0] < 0x0510)) {
2895 while (strBuf1Index < strBuf1Len) {
2896 if (!CFUniCharIsMemberOfBitmap(strBuf1[strBuf1Index], ((strBuf1[strBuf1Index] < 0x10000) ? graphemeBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (strBuf1[strBuf1Index] >> 16))))) break;
2897 ++strBuf1Index;
2898 }
2899
2900 if (strBuf1Index == strBuf1Len) {
2901 str1Index += str1UsedLen;
2902 match = true;
2903 }
2904 }
2905 }
2906
2907 if (match && (compareOptions & (kCFCompareDiacriticsInsensitiveCompatibilityMask|kCFCompareNonliteral)) && (str1Index < (rangeToSearch.location + rangeToSearch.length))) {
2908 const uint8_t *nonBaseBitmap;
2909
2910 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
2911
2912 if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) {
2913 str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);
2914 nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharGraphemeExtendCharacterSet, (str1Char >> 16));
2915 } else {
2916 nonBaseBitmap = graphemeBMP;
2917 }
2918
2919 if (CFUniCharIsMemberOfBitmap(str1Char, nonBaseBitmap)) {
2920 if (diacriticsInsensitive) {
2921 if (str1Char < 0x10000) {
2922 CFIndex index = str1Index;
2923
2924 do {
2925 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, --index);
2926 } while (CFUniCharIsMemberOfBitmap(str1Char, graphemeBMP), (rangeToSearch.location < index));
2927
2928 if (str1Char < 0x0510) {
2929 CFIndex maxIndex = (rangeToSearch.location + rangeToSearch.length);
2930
2931 while (++str1Index < maxIndex) if (!CFUniCharIsMemberOfBitmap(CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index), graphemeBMP)) break;
2932 }
2933 }
2934 } else {
2935 match = false;
2936 }
2937 } else if (!diacriticsInsensitive) {
2938 otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index - 1);
2939
2940 // this is assuming viramas are only in BMP ???
2941 if ((str1Char == COMBINING_GRAPHEME_JOINER) || (otherChar == COMBINING_GRAPHEME_JOINER) || (otherChar == ZERO_WIDTH_JOINER) || ((otherChar >= HANGUL_CHOSEONG_START) && (otherChar <= HANGUL_JONGSEONG_END)) || (CFUniCharGetCombiningPropertyForCharacter(otherChar, combClassBMP) == 9)) {
2942 CFRange clusterRange = CFStringGetRangeOfCharacterClusterAtIndex(string, str1Index - 1, kCFStringGraphemeCluster);
2943
2944 if (str1Index < (clusterRange.location + clusterRange.length)) match = false;
2945 }
2946 }
2947 }
2948
2949 if (match) {
2950 if (((kCFCompareBackwards|kCFCompareAnchored) != (compareOptions & (kCFCompareBackwards|kCFCompareAnchored))) || (str1Index == (rangeToSearch.location + rangeToSearch.length))) {
2951 didFind = true;
2952 if (NULL != result) *result = CFRangeMake(fromLoc, str1Index - fromLoc);
2953 }
2954 break;
2955 }
2956 }
2957
2958 if (fromLoc == toLoc) break;
2959 fromLoc += delta;
2960 }
2961 } else {
2962 while (1) {
2963 str1Index = fromLoc;
2964 str2Index = 0;
2965
2966 while (str2Index < findStrLen) {
2967 if (CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index) != CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index)) break;
2968
2969 ++str1Index; ++str2Index;
2970 }
2971
2972 if (str2Index == findStrLen) {
2973 didFind = true;
2974 if (NULL != result) *result = CFRangeMake(fromLoc, findStrLen);
2975 break;
2976 }
2977
2978 if (fromLoc == toLoc) break;
2979 fromLoc += delta;
2980 }
2981 }
2982 }
2983
2984 return didFind;
2985 }
2986
2987 Boolean CFStringFindWithOptions(CFStringRef string, CFStringRef stringToFind, CFRange rangeToSearch, CFOptionFlags compareOptions, CFRange *result) { return CFStringFindWithOptionsAndLocale(string, stringToFind, rangeToSearch, compareOptions, NULL, result); }
2988
2989 // Functions to deal with special arrays of CFRange, CFDataRef, created by CFStringCreateArrayWithFindResults()
2990
2991 static const void *__rangeRetain(CFAllocatorRef allocator, const void *ptr) {
2992 CFRetain(*(CFDataRef *)((uint8_t *)ptr + sizeof(CFRange)));
2993 return ptr;
2994 }
2995
2996 static void __rangeRelease(CFAllocatorRef allocator, const void *ptr) {
2997 CFRelease(*(CFDataRef *)((uint8_t *)ptr + sizeof(CFRange)));
2998 }
2999
3000 static CFStringRef __rangeCopyDescription(const void *ptr) {
3001 CFRange range = *(CFRange *)ptr;
3002 return CFStringCreateWithFormat(kCFAllocatorSystemDefault, NULL, CFSTR("{%d, %d}"), range.location, range.length);
3003 }
3004
3005 static Boolean __rangeEqual(const void *ptr1, const void *ptr2) {
3006 CFRange range1 = *(CFRange *)ptr1;
3007 CFRange range2 = *(CFRange *)ptr2;
3008 return (range1.location == range2.location) && (range1.length == range2.length);
3009 }
3010
3011
3012 CFArrayRef CFStringCreateArrayWithFindResults(CFAllocatorRef alloc, CFStringRef string, CFStringRef stringToFind, CFRange rangeToSearch, CFOptionFlags compareOptions) {
3013 CFRange foundRange;
3014 Boolean backwards = ((compareOptions & kCFCompareBackwards) != 0);
3015 UInt32 endIndex = rangeToSearch.location + rangeToSearch.length;
3016 CFMutableDataRef rangeStorage = NULL; // Basically an array of CFRange, CFDataRef (packed)
3017 uint8_t *rangeStorageBytes = NULL;
3018 CFIndex foundCount = 0;
3019 CFIndex capacity = 0; // Number of CFRange, CFDataRef element slots in rangeStorage
3020
3021 if (alloc == NULL) alloc = __CFGetDefaultAllocator();
3022
3023 while ((rangeToSearch.length > 0) && CFStringFindWithOptions(string, stringToFind, rangeToSearch, compareOptions, &foundRange)) {
3024 // Determine the next range
3025 if (backwards) {
3026 rangeToSearch.length = foundRange.location - rangeToSearch.location;
3027 } else {
3028 rangeToSearch.location = foundRange.location + foundRange.length;
3029 rangeToSearch.length = endIndex - rangeToSearch.location;
3030 }
3031
3032 // If necessary, grow the data and squirrel away the found range
3033 if (foundCount >= capacity) {
3034 if (rangeStorage == NULL) rangeStorage = CFDataCreateMutable(alloc, 0);
3035 capacity = (capacity + 4) * 2;
3036 CFDataSetLength(rangeStorage, capacity * (sizeof(CFRange) + sizeof(CFDataRef)));
3037 rangeStorageBytes = (uint8_t *)CFDataGetMutableBytePtr(rangeStorage) + foundCount * (sizeof(CFRange) + sizeof(CFDataRef));
3038 }
3039 memmove(rangeStorageBytes, &foundRange, sizeof(CFRange)); // The range
3040 memmove(rangeStorageBytes + sizeof(CFRange), &rangeStorage, sizeof(CFDataRef)); // The data
3041 rangeStorageBytes += (sizeof(CFRange) + sizeof(CFDataRef));
3042 foundCount++;
3043 }
3044
3045 if (foundCount > 0) {
3046 CFIndex cnt;
3047 CFMutableArrayRef array;
3048 const CFArrayCallBacks callbacks = {0, __rangeRetain, __rangeRelease, __rangeCopyDescription, __rangeEqual};
3049
3050 CFDataSetLength(rangeStorage, foundCount * (sizeof(CFRange) + sizeof(CFDataRef))); // Tighten storage up
3051 rangeStorageBytes = (uint8_t *)CFDataGetMutableBytePtr(rangeStorage);
3052
3053 array = CFArrayCreateMutable(alloc, foundCount * sizeof(CFRange *), &callbacks);
3054 for (cnt = 0; cnt < foundCount; cnt++) {
3055 // Each element points to the appropriate CFRange in the CFData
3056 CFArrayAppendValue(array, rangeStorageBytes + cnt * (sizeof(CFRange) + sizeof(CFDataRef)));
3057 }
3058 CFRelease(rangeStorage); // We want the data to go away when all CFRanges inside it are released...
3059 return array;
3060 } else {
3061 return NULL;
3062 }
3063 }
3064
3065
3066 CFRange CFStringFind(CFStringRef string, CFStringRef stringToFind, CFOptionFlags compareOptions) {
3067 CFRange foundRange;
3068
3069 if (CFStringFindWithOptions(string, stringToFind, CFRangeMake(0, CFStringGetLength(string)), compareOptions, &foundRange)) {
3070 return foundRange;
3071 } else {
3072 return CFRangeMake(kCFNotFound, 0);
3073 }
3074 }
3075
3076 Boolean CFStringHasPrefix(CFStringRef string, CFStringRef prefix) {
3077 return CFStringFindWithOptions(string, prefix, CFRangeMake(0, CFStringGetLength(string)), kCFCompareAnchored, NULL);
3078 }
3079
3080 Boolean CFStringHasSuffix(CFStringRef string, CFStringRef suffix) {
3081 return CFStringFindWithOptions(string, suffix, CFRangeMake(0, CFStringGetLength(string)), kCFCompareAnchored|kCFCompareBackwards, NULL);
3082 }
3083
3084 #define MAX_TRANSCODING_LENGTH 4
3085
3086 #define HANGUL_JONGSEONG_COUNT (28)
3087
3088 CF_INLINE bool _CFStringIsHangulLVT(UTF32Char character) {
3089 return (((character - HANGUL_SYLLABLE_START) % HANGUL_JONGSEONG_COUNT) ? true : false);
3090 }
3091
3092 static uint8_t __CFTranscodingHintLength[] = {
3093 2, 3, 4, 4, 4, 4, 4, 2, 2, 2, 2, 4, 0, 0, 0, 0
3094 };
3095
3096 enum {
3097 kCFStringHangulStateL,
3098 kCFStringHangulStateV,
3099 kCFStringHangulStateT,
3100 kCFStringHangulStateLV,
3101 kCFStringHangulStateLVT,
3102 kCFStringHangulStateBreak
3103 };
3104
3105 static CFRange _CFStringInlineBufferGetComposedRange(CFStringInlineBuffer *buffer, CFIndex start, CFStringCharacterClusterType type, const uint8_t *bmpBitmap, CFIndex csetType) {
3106 CFIndex end = start + 1;
3107 const uint8_t *bitmap = bmpBitmap;
3108 UTF32Char character;
3109 UTF16Char otherSurrogate;
3110 uint8_t step;
3111
3112 character = CFStringGetCharacterFromInlineBuffer(buffer, start);
3113
3114 // We don't combine characters in Armenian ~ Limbu range for backward deletion
3115 if ((type != kCFStringBackwardDeletionCluster) || (character < 0x0530) || (character > 0x194F)) {
3116 // Check if the current is surrogate
3117 if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, start + 1)))) {
3118 ++end;
3119 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
3120 bitmap = CFUniCharGetBitmapPtrForPlane(csetType, (character >> 16));
3121 }
3122
3123 // Extend backward
3124 while (start > 0) {
3125 if ((type == kCFStringBackwardDeletionCluster) && (character >= 0x0530) && (character < 0x1950)) break;
3126
3127 if (character < 0x10000) { // the first round could be already be non-BMP
3128 if (CFUniCharIsSurrogateLowCharacter(character) && CFUniCharIsSurrogateHighCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, start - 1)))) {
3129 character = CFUniCharGetLongCharacterForSurrogatePair(otherSurrogate, character);
3130 bitmap = CFUniCharGetBitmapPtrForPlane(csetType, (character >> 16));
3131 --start;
3132 } else {
3133 bitmap = bmpBitmap;
3134 }
3135 }
3136
3137 if (!CFUniCharIsMemberOfBitmap(character, bitmap) && (character != 0xFF9E) && (character != 0xFF9F) && ((character & 0x1FFFF0) != 0xF870)) break;
3138
3139 --start;
3140
3141 character = CFStringGetCharacterFromInlineBuffer(buffer, start);
3142 }
3143 }
3144
3145 // Hangul
3146 if (((character >= HANGUL_CHOSEONG_START) && (character <= HANGUL_JONGSEONG_END)) || ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END))) {
3147 uint8_t state;
3148 uint8_t initialState;
3149
3150 if (character < HANGUL_JUNGSEONG_START) {
3151 state = kCFStringHangulStateL;
3152 } else if (character < HANGUL_JONGSEONG_START) {
3153 state = kCFStringHangulStateV;
3154 } else if (character < HANGUL_SYLLABLE_START) {
3155 state = kCFStringHangulStateT;
3156 } else {
3157 state = (_CFStringIsHangulLVT(character) ? kCFStringHangulStateLVT : kCFStringHangulStateLV);
3158 }
3159 initialState = state;
3160
3161 // Extend backward
3162 while (((character = CFStringGetCharacterFromInlineBuffer(buffer, start - 1)) >= HANGUL_CHOSEONG_START) && (character <= HANGUL_SYLLABLE_END) && ((character <= HANGUL_JONGSEONG_END) || (character >= HANGUL_SYLLABLE_START))) {
3163 switch (state) {
3164 case kCFStringHangulStateV:
3165 if (character <= HANGUL_CHOSEONG_END) {
3166 state = kCFStringHangulStateL;
3167 } else if ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END) && !_CFStringIsHangulLVT(character)) {
3168 state = kCFStringHangulStateLV;
3169 } else if (character > HANGUL_JUNGSEONG_END) {
3170 state = kCFStringHangulStateBreak;
3171 }
3172 break;
3173
3174 case kCFStringHangulStateT:
3175 if ((character >= HANGUL_JUNGSEONG_START) && (character <= HANGUL_JUNGSEONG_END)) {
3176 state = kCFStringHangulStateV;
3177 } else if ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END)) {
3178 state = (_CFStringIsHangulLVT(character) ? kCFStringHangulStateLVT : kCFStringHangulStateLV);
3179 } else if (character < HANGUL_JUNGSEONG_START) {
3180 state = kCFStringHangulStateBreak;
3181 }
3182 break;
3183
3184 default:
3185 state = ((character < HANGUL_JUNGSEONG_START) ? kCFStringHangulStateL : kCFStringHangulStateBreak);
3186 break;
3187 }
3188
3189 if (state == kCFStringHangulStateBreak) break;
3190 --start;
3191 }
3192
3193 // Extend forward
3194 state = initialState;
3195 while (((character = CFStringGetCharacterFromInlineBuffer(buffer, end)) > 0) && (((character >= HANGUL_CHOSEONG_START) && (character <= HANGUL_JONGSEONG_END)) || ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END)))) {
3196 switch (state) {
3197 case kCFStringHangulStateLV:
3198 case kCFStringHangulStateV:
3199 if ((character >= HANGUL_JUNGSEONG_START) && (character <= HANGUL_JONGSEONG_END)) {
3200 state = ((character < HANGUL_JONGSEONG_START) ? kCFStringHangulStateV : kCFStringHangulStateT);
3201 } else {
3202 state = kCFStringHangulStateBreak;
3203 }
3204 break;
3205
3206 case kCFStringHangulStateLVT:
3207 case kCFStringHangulStateT:
3208 state = (((character >= HANGUL_JONGSEONG_START) && (character <= HANGUL_JONGSEONG_END)) ? kCFStringHangulStateT : kCFStringHangulStateBreak);
3209 break;
3210
3211 default:
3212 if (character < HANGUL_JUNGSEONG_START) {
3213 state = kCFStringHangulStateL;
3214 } else if (character < HANGUL_JONGSEONG_START) {
3215 state = kCFStringHangulStateV;
3216 } else if (character >= HANGUL_SYLLABLE_START) {
3217 state = (_CFStringIsHangulLVT(character) ? kCFStringHangulStateLVT : kCFStringHangulStateLV);
3218 } else {
3219 state = kCFStringHangulStateBreak;
3220 }
3221 break;
3222 }
3223
3224 if (state == kCFStringHangulStateBreak) break;
3225 ++end;
3226 }
3227 }
3228
3229 // Extend forward
3230 while ((character = CFStringGetCharacterFromInlineBuffer(buffer, end)) > 0) {
3231 if ((type == kCFStringBackwardDeletionCluster) && (character >= 0x0530) && (character < 0x1950)) break;
3232
3233 if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, end + 1)))) {
3234 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
3235 bitmap = CFUniCharGetBitmapPtrForPlane(csetType, (character >> 16));
3236 step = 2;
3237 } else {
3238 bitmap = bmpBitmap;
3239 step = 1;
3240 }
3241
3242 if (!CFUniCharIsMemberOfBitmap(character, bitmap) && (character != 0xFF9E) && (character != 0xFF9F) && ((character & 0x1FFFF0) != 0xF870)) break;
3243
3244 end += step;
3245 }
3246
3247 return CFRangeMake(start, end - start);
3248 }
3249
3250 CF_INLINE bool _CFStringIsVirama(UTF32Char character, const uint8_t *combClassBMP) {
3251 return ((character == COMBINING_GRAPHEME_JOINER) || (CFUniCharGetCombiningPropertyForCharacter(character, (const uint8_t *)((character < 0x10000) ? combClassBMP : CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (character >> 16)))) == 9) ? true : false);
3252 }
3253
3254 CFRange CFStringGetRangeOfCharacterClusterAtIndex(CFStringRef string, CFIndex charIndex, CFStringCharacterClusterType type) {
3255 CFRange range;
3256 CFIndex currentIndex;
3257 CFIndex length = CFStringGetLength(string);
3258 CFIndex csetType = ((kCFStringGraphemeCluster == type) ? kCFUniCharGraphemeExtendCharacterSet : kCFUniCharNonBaseCharacterSet);
3259 CFStringInlineBuffer stringBuffer;
3260 const uint8_t *bmpBitmap;
3261 const uint8_t *letterBMP;
3262 const uint8_t *combClassBMP;
3263 UTF32Char character;
3264 UTF16Char otherSurrogate;
3265
3266 if (charIndex >= length) return CFRangeMake(kCFNotFound, 0);
3267
3268 /* Fast case. If we're eight-bit, it's either the default encoding is cheap or the content is all ASCII. Watch out when (or if) adding more 8bit Mac-scripts in CFStringEncodingConverters
3269 */
3270 if (!CF_IS_OBJC(__kCFStringTypeID, string) && __CFStrIsEightBit(string)) return CFRangeMake(charIndex, 1);
3271
3272 bmpBitmap = CFUniCharGetBitmapPtrForPlane(csetType, 0);
3273 letterBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, 0);
3274 combClassBMP = (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0);
3275
3276 CFStringInitInlineBuffer(string, &stringBuffer, CFRangeMake(0, length));
3277
3278 // Get composed character sequence first
3279 range = _CFStringInlineBufferGetComposedRange(&stringBuffer, charIndex, type, bmpBitmap, csetType);
3280
3281 // Do grapheme joiners
3282 if (type < kCFStringCursorMovementCluster) {
3283 const uint8_t *letter = letterBMP;
3284
3285 // Check to see if we have a letter at the beginning of initial cluster
3286 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, range.location);
3287
3288 if ((range.length > 1) && CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(&stringBuffer, range.location + 1)))) {
3289 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
3290 letter = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, (character >> 16));
3291 }
3292
3293 if ((character == ZERO_WIDTH_JOINER) || CFUniCharIsMemberOfBitmap(character, letter)) {
3294 CFRange otherRange;
3295
3296 // Check if preceded by grapheme joiners (U034F and viramas)
3297 otherRange.location = currentIndex = range.location;
3298
3299 while (currentIndex > 1) {
3300 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, --currentIndex);
3301
3302 // ??? We're assuming viramas only in BMP
3303 if ((_CFStringIsVirama(character, combClassBMP) || ((character == ZERO_WIDTH_JOINER) && _CFStringIsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer, --currentIndex), combClassBMP))) && (currentIndex > 0)) {
3304 --currentIndex;
3305 } else {
3306 break;
3307 }
3308
3309 currentIndex = _CFStringInlineBufferGetComposedRange(&stringBuffer, currentIndex, type, bmpBitmap, csetType).location;
3310
3311 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex);
3312
3313 if (CFUniCharIsSurrogateLowCharacter(character) && CFUniCharIsSurrogateHighCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex - 1)))) {
3314 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
3315 letter = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, (character >> 16));
3316 --currentIndex;
3317 } else {
3318 letter = letterBMP;
3319 }
3320
3321 if (!CFUniCharIsMemberOfBitmap(character, letter)) break;
3322 range.location = currentIndex;
3323 }
3324
3325 range.length += otherRange.location - range.location;
3326
3327 // Check if followed by grapheme joiners
3328 if ((range.length > 1) && ((range.location + range.length) < length)) {
3329 otherRange = range;
3330 currentIndex = otherRange.location + otherRange.length;
3331
3332 do {
3333 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex - 1);
3334
3335 // ??? We're assuming viramas only in BMP
3336 if ((character != ZERO_WIDTH_JOINER) && !_CFStringIsVirama(character, combClassBMP)) break;
3337
3338 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex);
3339
3340 if (character == ZERO_WIDTH_JOINER) character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, ++currentIndex);
3341
3342 if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex + 1)))) {
3343 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
3344 letter = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, (character >> 16));
3345 } else {
3346 letter = letterBMP;
3347 }
3348
3349 // We only conjoin letters
3350 if (!CFUniCharIsMemberOfBitmap(character, letter)) break;
3351 otherRange = _CFStringInlineBufferGetComposedRange(&stringBuffer, currentIndex, type, bmpBitmap, csetType);
3352 currentIndex = otherRange.location + otherRange.length;
3353 } while ((otherRange.location + otherRange.length) < length);
3354 range.length = currentIndex - range.location;
3355 }
3356 }
3357 }
3358
3359 // Check if we're part of prefix transcoding hints
3360 CFIndex otherIndex;
3361
3362 currentIndex = (range.location + range.length) - (MAX_TRANSCODING_LENGTH + 1);
3363 if (currentIndex < 0) currentIndex = 0;
3364
3365 while (currentIndex <= range.location) {
3366 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex);
3367
3368 if ((character & 0x1FFFF0) == 0xF860) { // transcoding hint
3369 otherIndex = currentIndex + __CFTranscodingHintLength[(character - 0xF860)] + 1;
3370 if (otherIndex >= (range.location + range.length)) {
3371 if (otherIndex <= length) {
3372 range.location = currentIndex;
3373 range.length = otherIndex - currentIndex;
3374 }
3375 break;
3376 }
3377 }
3378 ++currentIndex;
3379 }
3380
3381 return range;
3382 }
3383
3384 #if 1 /* Using the new implementation. Leaving the old implementation if'ed out for testing purposes for now */
3385 CFRange CFStringGetRangeOfComposedCharactersAtIndex(CFStringRef theString, CFIndex theIndex) {
3386 return CFStringGetRangeOfCharacterClusterAtIndex(theString, theIndex, kCFStringComposedCharacterCluster);
3387 }
3388 #else
3389 /*!
3390 @function CFStringGetRangeOfComposedCharactersAtIndex
3391 Returns the range of the composed character sequence at the specified index.
3392 @param theString The CFString which is to be searched. If this
3393 parameter is not a valid CFString, the behavior is
3394 undefined.
3395 @param theIndex The index of the character contained in the
3396 composed character sequence. If the index is
3397 outside the index space of the string (0 to N-1 inclusive,
3398 where N is the length of the string), the behavior is
3399 undefined.
3400 @result The range of the composed character sequence.
3401 */
3402 #define ExtHighHalfZoneLow 0xD800
3403 #define ExtHighHalfZoneHigh 0xDBFF
3404 #define ExtLowHalfZoneLow 0xDC00
3405 #define ExtLowHalfZoneHigh 0xDFFF
3406 #define JunseongStart 0x1160
3407 #define JonseongEnd 0x11F9
3408 CF_INLINE Boolean IsHighCode(UniChar X) { return (X >= ExtHighHalfZoneLow && X <= ExtHighHalfZoneHigh); }
3409 CF_INLINE Boolean IsLowCode(UniChar X) { return (X >= ExtLowHalfZoneLow && X <= ExtLowHalfZoneHigh); }
3410 #define IsHangulConjoiningJamo(X) (X >= JunseongStart && X <= JonseongEnd)
3411 #define IsHalfwidthKanaVoicedMark(X) ((X == 0xFF9E) || (X == 0xFF9F))
3412 CF_INLINE Boolean IsNonBaseChar(UniChar X, CFCharacterSetRef nonBaseSet) { return (CFCharacterSetIsCharacterMember(nonBaseSet, X) || IsHangulConjoiningJamo(X) || IsHalfwidthKanaVoicedMark(X) || (X & 0x1FFFF0) == 0xF870); } // combining char, hangul jamo, or Apple corporate variant tag
3413 #define ZWJ 0x200D
3414 #define ZWNJ 0x200C
3415 #define COMBINING_GRAPHEME_JOINER (0x034F)
3416
3417 static CFCharacterSetRef nonBaseChars = NULL;
3418 static CFCharacterSetRef letterChars = NULL;
3419 static const void *__CFCombiningClassBMP = NULL;
3420
3421 CF_INLINE bool IsVirama(UTF32Char character) {
3422 return ((character == COMBINING_GRAPHEME_JOINER) ? true : ((character < 0x10000) && (CFUniCharGetCombiningPropertyForCharacter(character, __CFCombiningClassBMP) == 9) ? true : false));
3423 }
3424
3425 CFRange CFStringGetRangeOfComposedCharactersAtIndex(CFStringRef theString, CFIndex theIndex) {
3426 CFIndex left, current, save;
3427 CFIndex len = CFStringGetLength(theString);
3428 CFStringInlineBuffer stringBuffer;
3429 static volatile Boolean _isInited = false;
3430
3431 if (theIndex >= len) return CFRangeMake(kCFNotFound, 0);
3432
3433 if (!_isInited) {
3434 nonBaseChars = CFCharacterSetGetPredefined(kCFCharacterSetNonBase);
3435 letterChars = CFCharacterSetGetPredefined(kCFCharacterSetLetter);
3436 __CFCombiningClassBMP = CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0);
3437 _isInited = true;
3438 }
3439
3440 save = current = theIndex;
3441
3442 CFStringInitInlineBuffer(theString, &stringBuffer, CFRangeMake(0, len));
3443
3444 /*
3445 * First check for transcoding hints
3446 */
3447 {
3448 CFRange theRange = (current > MAX_TRANSCODING_LENGTH ? CFRangeMake(current - MAX_TRANSCODING_LENGTH, MAX_TRANSCODING_LENGTH + 1) : CFRangeMake(0, current + 1));
3449
3450 // Should check the next loc ?
3451 if (current + 1 < len) ++theRange.length;
3452
3453 if (theRange.length > 1) {
3454 UniChar characterBuffer[MAX_TRANSCODING_LENGTH + 2]; // Transcoding hint length + current loc + next loc
3455
3456 if (stringBuffer.directBuffer) {
3457 memmove(characterBuffer, stringBuffer.directBuffer + theRange.location, theRange.length * sizeof(UniChar));
3458 } else {
3459 CFStringGetCharacters(theString, theRange, characterBuffer);
3460 }
3461
3462 while (current >= theRange.location) {
3463 if ((characterBuffer[current - theRange.location] & 0x1FFFF0) == 0xF860) {
3464 theRange = CFRangeMake(current, __CFTranscodingHintLength[characterBuffer[current - theRange.location] - 0xF860] + 1);
3465 if ((theRange.location + theRange.length) <= theIndex) break;
3466 if ((theRange.location + theRange.length) >= len) theRange.length = len - theRange.location;
3467 return theRange;
3468 }
3469 if (current == 0) break;
3470 --current;
3471 }
3472 current = theIndex; // Reset current
3473 }
3474 }
3475
3476 //#warning Aki 5/29/01 This does not support non-base chars in non-BMP planes (i.e. musical symbol combining stem in Unicode 3.1)
3477 /*
3478 * if we start NOT on a base, first move back to a base as appropriate.
3479 */
3480
3481 roundAgain:
3482
3483 while ((current > 0) && IsNonBaseChar(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current), nonBaseChars)) --current;
3484
3485 if (current >= 1 && current < len && CFCharacterSetIsCharacterMember(letterChars, CFStringGetCharacterFromInlineBuffer(&stringBuffer, current)) && IsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current - 1))) {
3486 --current;
3487 goto roundAgain;
3488 } else if ((current >= 2) && (CFStringGetCharacterFromInlineBuffer(&stringBuffer, current - 1) == ZWJ) && IsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current - 2))) {
3489 current -= 2;
3490 goto roundAgain;
3491 }
3492
3493 /*
3494 * Set the left position, then jump back to the saved original position.
3495 */
3496
3497 if (current >= 1 && IsLowCode(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current)) && IsHighCode(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current - 1))) --current;
3498 left = current;
3499 current = save;
3500
3501 /*
3502 * Now, presume we are on a base; move forward & look for the next base.
3503 * Handle jumping over H/L codes.
3504 */
3505 if (IsHighCode(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current)) && (current + 1) < len && IsLowCode(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current + 1))) ++current;
3506 ++current;
3507
3508 round2Again:
3509
3510 if (current < len) {
3511 while (IsNonBaseChar(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current), nonBaseChars)) {
3512 ++current;
3513 if (current >= len) break;
3514 }
3515 if ((current < len) && CFCharacterSetIsCharacterMember(letterChars, CFStringGetCharacterFromInlineBuffer(&stringBuffer, current))) {
3516 if (IsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current - 1))) {
3517 ++current; goto round2Again;
3518 } else if ((current >= 2) && (CFStringGetCharacterFromInlineBuffer(&stringBuffer, current - 1) == ZWJ) && IsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current - 2))) {
3519 ++current; goto round2Again;
3520 }
3521 }
3522 }
3523 /*
3524 * Now, "current" is a base, and "left" is a base.
3525 * The junk between had better contain "save"!
3526 */
3527 if ((! (left <= save)) || (! (save <= current))) {
3528 CFLog(kCFLogLevelWarning, CFSTR("CFString: CFStringGetRangeOfComposedCharactersAtIndex:%d returned invalid\n"), save);
3529 }
3530 return CFRangeMake(left, current - left);
3531 }
3532 #endif
3533
3534 /*!
3535 @function CFStringFindCharacterFromSet
3536 Query the range of characters contained in the specified character set.
3537 @param theString The CFString which is to be searched. If this
3538 parameter is not a valid CFString, the behavior is
3539 undefined.
3540 @param theSet The CFCharacterSet against which the membership
3541 of characters is checked. If this parameter is not a valid
3542 CFCharacterSet, the behavior is undefined.
3543 @param range The range of characters within the string to search. If
3544 the range location or end point (defined by the location
3545 plus length minus 1) are outside the index space of the
3546 string (0 to N-1 inclusive, where N is the length of the
3547 string), the behavior is undefined. If the range length is
3548 negative, the behavior is undefined. The range may be empty
3549 (length 0), in which case no search is performed.
3550 @param searchOptions The bitwise-or'ed option flags to control
3551 the search behavior. The supported options are
3552 kCFCompareBackwards andkCFCompareAnchored.
3553 If other option flags are specified, the behavior
3554 is undefined.
3555 @param result The pointer to a CFRange supplied by the caller in
3556 which the search result is stored. If a pointer to an invalid
3557 memory is specified, the behavior is undefined.
3558 @result true, if at least a character which is a member of the character
3559 set is found and result is filled, otherwise, false.
3560 */
3561 #define SURROGATE_START 0xD800
3562 #define SURROGATE_END 0xDFFF
3563
3564 CF_EXPORT Boolean CFStringFindCharacterFromSet(CFStringRef theString, CFCharacterSetRef theSet, CFRange rangeToSearch, CFOptionFlags searchOptions, CFRange *result) {
3565 CFStringInlineBuffer stringBuffer;
3566 CFCharacterSetInlineBuffer csetBuffer;
3567 UniChar ch;
3568 CFIndex step;
3569 CFIndex fromLoc, toLoc, cnt; // fromLoc and toLoc are inclusive
3570 Boolean found = false;
3571 Boolean done = false;
3572
3573 //#warning FIX ME !! Should support kCFCompareNonliteral
3574
3575 if ((rangeToSearch.location + rangeToSearch.length > CFStringGetLength(theString)) || (rangeToSearch.length == 0)) return false;
3576
3577 if (searchOptions & kCFCompareBackwards) {
3578 fromLoc = rangeToSearch.location + rangeToSearch.length - 1;
3579 toLoc = rangeToSearch.location;
3580 } else {
3581 fromLoc = rangeToSearch.location;
3582 toLoc = rangeToSearch.location + rangeToSearch.length - 1;
3583 }
3584 if (searchOptions & kCFCompareAnchored) {
3585 toLoc = fromLoc;
3586 }
3587
3588 step = (fromLoc <= toLoc) ? 1 : -1;
3589 cnt = fromLoc;
3590
3591 CFStringInitInlineBuffer(theString, &stringBuffer, rangeToSearch);
3592 CFCharacterSetInitInlineBuffer(theSet, &csetBuffer);
3593
3594 do {
3595 ch = CFStringGetCharacterFromInlineBuffer(&stringBuffer, cnt - rangeToSearch.location);
3596 if ((ch >= SURROGATE_START) && (ch <= SURROGATE_END)) {
3597 int otherCharIndex = cnt + step;
3598
3599 if (((step < 0) && (otherCharIndex < toLoc)) || ((step > 0) && (otherCharIndex > toLoc))) {
3600 done = true;
3601 } else {
3602 UniChar highChar;
3603 UniChar lowChar = CFStringGetCharacterFromInlineBuffer(&stringBuffer, otherCharIndex - rangeToSearch.location);
3604
3605 if (cnt < otherCharIndex) {
3606 highChar = ch;
3607 } else {
3608 highChar = lowChar;
3609 lowChar = ch;
3610 }
3611
3612 if (CFUniCharIsSurrogateHighCharacter(highChar) && CFUniCharIsSurrogateLowCharacter(lowChar) && CFCharacterSetInlineBufferIsLongCharacterMember(&csetBuffer, CFUniCharGetLongCharacterForSurrogatePair(highChar, lowChar))) {
3613 if (result) *result = CFRangeMake((cnt < otherCharIndex ? cnt : otherCharIndex), 2);
3614 return true;
3615 } else if (otherCharIndex == toLoc) {
3616 done = true;
3617 } else {
3618 cnt = otherCharIndex + step;
3619 }
3620 }
3621 } else if (CFCharacterSetInlineBufferIsLongCharacterMember(&csetBuffer, ch)) {
3622 done = found = true;
3623 } else if (cnt == toLoc) {
3624 done = true;
3625 } else {
3626 cnt += step;
3627 }
3628 } while (!done);
3629
3630 if (found && result) *result = CFRangeMake(cnt, 1);
3631 return found;
3632 }
3633
3634 /* Line range code */
3635
3636 #define CarriageReturn '\r' /* 0x0d */
3637 #define NewLine '\n' /* 0x0a */
3638 #define NextLine 0x0085
3639 #define LineSeparator 0x2028
3640 #define ParaSeparator 0x2029
3641
3642 CF_INLINE Boolean isALineSeparatorTypeCharacter(UniChar ch, Boolean includeLineEndings) {
3643 if (ch > CarriageReturn && ch < NextLine) return false; /* Quick test to cover most chars */
3644 return (ch == NewLine || ch == CarriageReturn || ch == ParaSeparator || (includeLineEndings && (ch == NextLine || ch == LineSeparator))) ? true : false;
3645 }
3646
3647 static void __CFStringGetLineOrParagraphBounds(CFStringRef string, CFRange range, CFIndex *lineBeginIndex, CFIndex *lineEndIndex, CFIndex *contentsEndIndex, Boolean includeLineEndings) {
3648 CFIndex len;
3649 CFStringInlineBuffer buf;
3650 UniChar ch;
3651
3652 __CFAssertIsString(string);
3653 __CFAssertRangeIsInStringBounds(string, range.location, range.length);
3654
3655 len = __CFStrLength(string);
3656
3657 if (lineBeginIndex) {
3658 CFIndex start;
3659 if (range.location == 0) {
3660 start = 0;
3661 } else {
3662 CFStringInitInlineBuffer(string, &buf, CFRangeMake(0, len));
3663 CFIndex buf_idx = range.location;
3664
3665 /* Take care of the special case where start happens to fall right between \r and \n */
3666 ch = CFStringGetCharacterFromInlineBuffer(&buf, buf_idx);
3667 buf_idx--;
3668 if ((ch == NewLine) && (CFStringGetCharacterFromInlineBuffer(&buf, buf_idx) == CarriageReturn)) {
3669 buf_idx--;
3670 }
3671 while (1) {
3672 if (buf_idx < 0) {
3673 start = 0;
3674 break;
3675 } else if (isALineSeparatorTypeCharacter(CFStringGetCharacterFromInlineBuffer(&buf, buf_idx), includeLineEndings)) {
3676 start = buf_idx + 1;
3677 break;
3678 } else {
3679 buf_idx--;
3680 }
3681 }
3682 }
3683 *lineBeginIndex = start;
3684 }
3685
3686 /* Now find the ending point */
3687 if (lineEndIndex || contentsEndIndex) {
3688 CFIndex endOfContents, lineSeparatorLength = 1; /* 1 by default */
3689 CFStringInitInlineBuffer(string, &buf, CFRangeMake(0, len));
3690 CFIndex buf_idx = range.location + range.length - (range.length ? 1 : 0);
3691 /* First look at the last char in the range (if the range is zero length, the char after the range) to see if we're already on or within a end of line sequence... */
3692 ch = __CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx);
3693 if (ch == NewLine) {
3694 endOfContents = buf_idx;
3695 buf_idx--;
3696 if (__CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx) == CarriageReturn) {
3697 lineSeparatorLength = 2;
3698 endOfContents--;
3699 }
3700 } else {
3701 while (1) {
3702 if (isALineSeparatorTypeCharacter(ch, includeLineEndings)) {
3703 endOfContents = buf_idx; /* This is actually end of contentsRange */
3704 buf_idx++; /* OK for this to go past the end */
3705 if ((ch == CarriageReturn) && (__CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx) == NewLine)) {
3706 lineSeparatorLength = 2;
3707 }
3708 break;
3709 } else if (buf_idx >= len) {
3710 endOfContents = len;
3711 lineSeparatorLength = 0;
3712 break;
3713 } else {
3714 buf_idx++;
3715 ch = __CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx);
3716 }
3717 }
3718 }
3719 if (contentsEndIndex) *contentsEndIndex = endOfContents;
3720 if (lineEndIndex) *lineEndIndex = endOfContents + lineSeparatorLength;
3721 }
3722 }
3723
3724 void CFStringGetLineBounds(CFStringRef string, CFRange range, CFIndex *lineBeginIndex, CFIndex *lineEndIndex, CFIndex *contentsEndIndex) {
3725 CF_OBJC_FUNCDISPATCH4(__kCFStringTypeID, void, string, "getLineStart:end:contentsEnd:forRange:", lineBeginIndex, lineEndIndex, contentsEndIndex, CFRangeMake(range.location, range.length));
3726 __CFStringGetLineOrParagraphBounds(string, range, lineBeginIndex, lineEndIndex, contentsEndIndex, true);
3727 }
3728
3729 void CFStringGetParagraphBounds(CFStringRef string, CFRange range, CFIndex *parBeginIndex, CFIndex *parEndIndex, CFIndex *contentsEndIndex) {
3730 CF_OBJC_FUNCDISPATCH4(__kCFStringTypeID, void, string, "getParagraphStart:end:contentsEnd:forRange:", parBeginIndex, parEndIndex, contentsEndIndex, CFRangeMake(range.location, range.length));
3731 __CFStringGetLineOrParagraphBounds(string, range, parBeginIndex, parEndIndex, contentsEndIndex, false);
3732 }
3733
3734
3735 CFStringRef CFStringCreateByCombiningStrings(CFAllocatorRef alloc, CFArrayRef array, CFStringRef separatorString) {
3736 CFIndex numChars;
3737 CFIndex separatorNumByte;
3738 CFIndex stringCount = CFArrayGetCount(array);
3739 Boolean isSepCFString = !CF_IS_OBJC(__kCFStringTypeID, separatorString);
3740 Boolean canBeEightbit = isSepCFString && __CFStrIsEightBit(separatorString);
3741 CFIndex idx;
3742 CFStringRef otherString;
3743 void *buffer;
3744 uint8_t *bufPtr;
3745 const void *separatorContents = NULL;
3746
3747 if (stringCount == 0) {
3748 return CFStringCreateWithCharacters(alloc, NULL, 0);
3749 } else if (stringCount == 1) {
3750 return (CFStringRef)CFStringCreateCopy(alloc, (CFStringRef)CFArrayGetValueAtIndex(array, 0));
3751 }
3752
3753 if (alloc == NULL) alloc = __CFGetDefaultAllocator();
3754
3755 numChars = CFStringGetLength(separatorString) * (stringCount - 1);
3756 for (idx = 0; idx < stringCount; idx++) {
3757 otherString = (CFStringRef)CFArrayGetValueAtIndex(array, idx);
3758 numChars += CFStringGetLength(otherString);
3759 // canBeEightbit is already false if the separator is an NSString...
3760 if (!CF_IS_OBJC(__kCFStringTypeID, otherString) && __CFStrIsUnicode(otherString)) canBeEightbit = false;
3761 }
3762
3763 buffer = (uint8_t *)CFAllocatorAllocate(alloc, canBeEightbit ? ((numChars + 1) * sizeof(uint8_t)) : (numChars * sizeof(UniChar)), 0);
3764 bufPtr = (uint8_t *)buffer;
3765 if (__CFOASafe) __CFSetLastAllocationEventName(buffer, "CFString (store)");
3766 separatorNumByte = CFStringGetLength(separatorString) * (canBeEightbit ? sizeof(uint8_t) : sizeof(UniChar));
3767
3768 for (idx = 0; idx < stringCount; idx++) {
3769 if (idx) { // add separator here unless first string
3770 if (separatorContents) {
3771 memmove(bufPtr, separatorContents, separatorNumByte);
3772 } else {
3773 if (!isSepCFString) { // NSString
3774 CFStringGetCharacters(separatorString, CFRangeMake(0, CFStringGetLength(separatorString)), (UniChar *)bufPtr);
3775 } else if (canBeEightbit || __CFStrIsUnicode(separatorString)) {
3776 memmove(bufPtr, (const uint8_t *)__CFStrContents(separatorString) + __CFStrSkipAnyLengthByte(separatorString), separatorNumByte);
3777 } else {
3778 __CFStrConvertBytesToUnicode((uint8_t *)__CFStrContents(separatorString) + __CFStrSkipAnyLengthByte(separatorString), (UniChar *)bufPtr, __CFStrLength(separatorString));
3779 }
3780 separatorContents = bufPtr;
3781 }
3782 bufPtr += separatorNumByte;
3783 }
3784
3785 otherString = (CFStringRef )CFArrayGetValueAtIndex(array, idx);
3786 if (CF_IS_OBJC(__kCFStringTypeID, otherString)) {
3787 CFIndex otherLength = CFStringGetLength(otherString);
3788 CFStringGetCharacters(otherString, CFRangeMake(0, otherLength), (UniChar *)bufPtr);
3789 bufPtr += otherLength * sizeof(UniChar);
3790 } else {
3791 const uint8_t * otherContents = (const uint8_t *)__CFStrContents(otherString);
3792 CFIndex otherNumByte = __CFStrLength2(otherString, otherContents) * (canBeEightbit ? sizeof(uint8_t) : sizeof(UniChar));
3793
3794 if (canBeEightbit || __CFStrIsUnicode(otherString)) {
3795 memmove(bufPtr, otherContents + __CFStrSkipAnyLengthByte(otherString), otherNumByte);
3796 } else {
3797 __CFStrConvertBytesToUnicode(otherContents + __CFStrSkipAnyLengthByte(otherString), (UniChar *)bufPtr, __CFStrLength2(otherString, otherContents));
3798 }
3799 bufPtr += otherNumByte;
3800 }
3801 }
3802 if (canBeEightbit) *bufPtr = 0; // NULL byte;
3803
3804 return canBeEightbit ?
3805 CFStringCreateWithCStringNoCopy(alloc, (const char*)buffer, __CFStringGetEightBitStringEncoding(), alloc) :
3806 CFStringCreateWithCharactersNoCopy(alloc, (UniChar *)buffer, numChars, alloc);
3807 }
3808
3809
3810 CFArrayRef CFStringCreateArrayBySeparatingStrings(CFAllocatorRef alloc, CFStringRef string, CFStringRef separatorString) {
3811 CFArrayRef separatorRanges;
3812 CFIndex length = CFStringGetLength(string);
3813 /* No objc dispatch needed here since CFStringCreateArrayWithFindResults() works with both CFString and NSString */
3814 if (!(separatorRanges = CFStringCreateArrayWithFindResults(alloc, string, separatorString, CFRangeMake(0, length), 0))) {
3815 return CFArrayCreate(alloc, (const void **)&string, 1, & kCFTypeArrayCallBacks);
3816 } else {
3817 CFIndex idx;
3818 CFIndex count = CFArrayGetCount(separatorRanges);
3819 CFIndex startIndex = 0;
3820 CFIndex numChars;
3821 CFMutableArrayRef array = CFArrayCreateMutable(alloc, count + 2, & kCFTypeArrayCallBacks);
3822 const CFRange *currentRange;
3823 CFStringRef substring;
3824
3825 for (idx = 0;idx < count;idx++) {
3826 currentRange = (const CFRange *)CFArrayGetValueAtIndex(separatorRanges, idx);
3827 numChars = currentRange->location - startIndex;
3828 substring = CFStringCreateWithSubstring(alloc, string, CFRangeMake(startIndex, numChars));
3829 CFArrayAppendValue(array, substring);
3830 CFRelease(substring);
3831 startIndex = currentRange->location + currentRange->length;
3832 }
3833 substring = CFStringCreateWithSubstring(alloc, string, CFRangeMake(startIndex, length - startIndex));
3834 CFArrayAppendValue(array, substring);
3835 CFRelease(substring);
3836
3837 CFRelease(separatorRanges);
3838
3839 return array;
3840 }
3841 }
3842
3843 CFStringRef CFStringCreateFromExternalRepresentation(CFAllocatorRef alloc, CFDataRef data, CFStringEncoding encoding) {
3844 return CFStringCreateWithBytes(alloc, CFDataGetBytePtr(data), CFDataGetLength(data), encoding, true);
3845 }
3846
3847
3848 CFDataRef CFStringCreateExternalRepresentation(CFAllocatorRef alloc, CFStringRef string, CFStringEncoding encoding, uint8_t lossByte) {
3849 CFIndex length;
3850 CFIndex guessedByteLength;
3851 uint8_t *bytes;
3852 CFIndex usedLength;
3853 SInt32 result;
3854
3855 if (CF_IS_OBJC(__kCFStringTypeID, string)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
3856 length = CFStringGetLength(string);
3857 } else {
3858 __CFAssertIsString(string);
3859 length = __CFStrLength(string);
3860 if (__CFStrIsEightBit(string) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string
3861 return CFDataCreate(alloc, ((uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string)), __CFStrLength(string));
3862 }
3863 }
3864
3865 if (alloc == NULL) alloc = __CFGetDefaultAllocator();
3866
3867 if (((encoding & 0x0FFF) == kCFStringEncodingUnicode) && ((encoding == kCFStringEncodingUnicode) || ((encoding > kCFStringEncodingUTF8) && (encoding <= kCFStringEncodingUTF32LE)))) {
3868 guessedByteLength = (length + 1) * ((((encoding >> 26) & 2) == 0) ? sizeof(UTF16Char) : sizeof(UTF32Char)); // UTF32 format has the bit set
3869 } else if (((guessedByteLength = CFStringGetMaximumSizeForEncoding(length, encoding)) > length) && !CF_IS_OBJC(__kCFStringTypeID, string)) { // Multi byte encoding
3870 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
3871 if (__CFStrIsUnicode(string)) {
3872 CFIndex aLength = CFStringEncodingByteLengthForCharacters(encoding, kCFStringEncodingPrependBOM, __CFStrContents(string), __CFStrLength(string));
3873 if (aLength > 0) guessedByteLength = aLength;
3874 } else {
3875 #endif
3876 result = __CFStringEncodeByteStream(string, 0, length, true, encoding, lossByte, NULL, LONG_MAX, &guessedByteLength);
3877 // if result == length, we always succeed
3878 // otherwise, if result == 0, we fail
3879 // otherwise, if there was a lossByte but still result != length, we fail
3880 if ((result != length) && (!result || !lossByte)) return NULL;
3881 if (guessedByteLength == length && __CFStrIsEightBit(string) && __CFStringEncodingIsSupersetOfASCII(encoding)) { // It's all ASCII !!
3882 return CFDataCreate(alloc, ((uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string)), __CFStrLength(string));
3883 }
3884 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
3885 }
3886 #endif
3887 }
3888 bytes = (uint8_t *)CFAllocatorAllocate(alloc, guessedByteLength, 0);
3889 if (__CFOASafe) __CFSetLastAllocationEventName(bytes, "CFData (store)");
3890
3891 result = __CFStringEncodeByteStream(string, 0, length, true, encoding, lossByte, bytes, guessedByteLength, &usedLength);
3892
3893 if ((result != length) && (!result || !lossByte)) { // see comment above about what this means
3894 CFAllocatorDeallocate(alloc, bytes);
3895 return NULL;
3896 }
3897
3898 return CFDataCreateWithBytesNoCopy(alloc, (uint8_t *)bytes, usedLength, alloc);
3899 }
3900
3901
3902 CFStringEncoding CFStringGetSmallestEncoding(CFStringRef str) {
3903 CFIndex len;
3904 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, CFStringEncoding, str, "_smallestEncodingInCFStringEncoding");
3905 __CFAssertIsString(str);
3906
3907 if (__CFStrIsEightBit(str)) return __CFStringGetEightBitStringEncoding();
3908 len = __CFStrLength(str);
3909 if (__CFStringEncodeByteStream(str, 0, len, false, __CFStringGetEightBitStringEncoding(), 0, NULL, LONG_MAX, NULL) == len) return __CFStringGetEightBitStringEncoding();
3910 if ((__CFStringGetEightBitStringEncoding() != __CFStringGetSystemEncoding()) && (__CFStringEncodeByteStream(str, 0, len, false, __CFStringGetSystemEncoding(), 0, NULL, LONG_MAX, NULL) == len)) return __CFStringGetSystemEncoding();
3911 return kCFStringEncodingUnicode; /* ??? */
3912 }
3913
3914
3915 CFStringEncoding CFStringGetFastestEncoding(CFStringRef str) {
3916 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, CFStringEncoding, str, "_fastestEncodingInCFStringEncoding");
3917 __CFAssertIsString(str);
3918 return __CFStrIsEightBit(str) ? __CFStringGetEightBitStringEncoding() : kCFStringEncodingUnicode; /* ??? */
3919 }
3920
3921
3922 SInt32 CFStringGetIntValue(CFStringRef str) {
3923 Boolean success;
3924 SInt32 result;
3925 SInt32 idx = 0;
3926 CFStringInlineBuffer buf;
3927 CFStringInitInlineBuffer(str, &buf, CFRangeMake(0, CFStringGetLength(str)));
3928 success = __CFStringScanInteger(&buf, NULL, &idx, false, &result);
3929 return success ? result : 0;
3930 }
3931
3932
3933 double CFStringGetDoubleValue(CFStringRef str) {
3934 Boolean success;
3935 double result;
3936 SInt32 idx = 0;
3937 CFStringInlineBuffer buf;
3938 CFStringInitInlineBuffer(str, &buf, CFRangeMake(0, CFStringGetLength(str)));
3939 success = __CFStringScanDouble(&buf, NULL, &idx, &result);
3940 return success ? result : 0.0;
3941 }
3942
3943
3944 /*** Mutable functions... ***/
3945
3946 void CFStringSetExternalCharactersNoCopy(CFMutableStringRef string, UniChar *chars, CFIndex length, CFIndex capacity) {
3947 __CFAssertIsNotNegative(length);
3948 __CFAssertIsStringAndExternalMutable(string);
3949 CFAssert4((length <= capacity) && ((capacity == 0) || ((capacity > 0) && chars)), __kCFLogAssertion, "%s(): Invalid args: characters %p length %d capacity %d", __PRETTY_FUNCTION__, chars, length, capacity);
3950 __CFStrSetContentPtr(string, chars);
3951 __CFStrSetExplicitLength(string, length);
3952 __CFStrSetCapacity(string, capacity * sizeof(UniChar));
3953 __CFStrSetCapacityProvidedExternally(string);
3954 }
3955
3956
3957
3958 void CFStringInsert(CFMutableStringRef str, CFIndex idx, CFStringRef insertedStr) {
3959 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID, void, str, "insertString:atIndex:", insertedStr, idx);
3960 __CFAssertIsStringAndMutable(str);
3961 CFAssert3(idx >= 0 && idx <= __CFStrLength(str), __kCFLogAssertion, "%s(): string index %d out of bounds (length %d)", __PRETTY_FUNCTION__, idx, __CFStrLength(str));
3962 __CFStringReplace(str, CFRangeMake(idx, 0), insertedStr);
3963 }
3964
3965
3966 void CFStringDelete(CFMutableStringRef str, CFRange range) {
3967 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, str, "deleteCharactersInRange:", range);
3968 __CFAssertIsStringAndMutable(str);
3969 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
3970 __CFStringChangeSize(str, range, 0, false);
3971 }
3972
3973
3974 void CFStringReplace(CFMutableStringRef str, CFRange range, CFStringRef replacement) {
3975 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID, void, str, "replaceCharactersInRange:withString:", range, replacement);
3976 __CFAssertIsStringAndMutable(str);
3977 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
3978 __CFStringReplace(str, range, replacement);
3979 }
3980
3981
3982 void CFStringReplaceAll(CFMutableStringRef str, CFStringRef replacement) {
3983 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, str, "setString:", replacement);
3984 __CFAssertIsStringAndMutable(str);
3985 __CFStringReplace(str, CFRangeMake(0, __CFStrLength(str)), replacement);
3986 }
3987
3988
3989 void CFStringAppend(CFMutableStringRef str, CFStringRef appended) {
3990 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, str, "appendString:", appended);
3991 __CFAssertIsStringAndMutable(str);
3992 __CFStringReplace(str, CFRangeMake(__CFStrLength(str), 0), appended);
3993 }
3994
3995
3996 void CFStringAppendCharacters(CFMutableStringRef str, const UniChar *chars, CFIndex appendedLength) {
3997 CFIndex strLength, idx;
3998
3999 __CFAssertIsNotNegative(appendedLength);
4000
4001 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID, void, str, "appendCharacters:length:", chars, appendedLength);
4002
4003 __CFAssertIsStringAndMutable(str);
4004
4005 strLength = __CFStrLength(str);
4006 if (__CFStringGetCompatibility(Bug2967272) || __CFStrIsUnicode(str)) {
4007 __CFStringChangeSize(str, CFRangeMake(strLength, 0), appendedLength, true);
4008 memmove((UniChar *)__CFStrContents(str) + strLength, chars, appendedLength * sizeof(UniChar));
4009 } else {
4010 uint8_t *contents;
4011 bool isASCII = true;
4012 for (idx = 0; isASCII && idx < appendedLength; idx++) isASCII = (chars[idx] < 0x80);
4013 __CFStringChangeSize(str, CFRangeMake(strLength, 0), appendedLength, !isASCII);
4014 if (!isASCII) {
4015 memmove((UniChar *)__CFStrContents(str) + strLength, chars, appendedLength * sizeof(UniChar));
4016 } else {
4017 contents = (uint8_t *)__CFStrContents(str) + strLength + __CFStrSkipAnyLengthByte(str);
4018 for (idx = 0; idx < appendedLength; idx++) contents[idx] = (uint8_t)chars[idx];
4019 }
4020 }
4021 }
4022
4023
4024 static void __CFStringAppendBytes(CFMutableStringRef str, const char *cStr, CFIndex appendedLength, CFStringEncoding encoding) {
4025 Boolean appendedIsUnicode = false;
4026 Boolean freeCStrWhenDone = false;
4027 Boolean demoteAppendedUnicode = false;
4028 CFVarWidthCharBuffer vBuf;
4029
4030 __CFAssertIsNotNegative(appendedLength);
4031
4032 if (encoding == kCFStringEncodingASCII || encoding == __CFStringGetEightBitStringEncoding()) {
4033 // appendedLength now denotes length in UniChars
4034 } else if (encoding == kCFStringEncodingUnicode) {
4035 UniChar *chars = (UniChar *)cStr;
4036 CFIndex idx, length = appendedLength / sizeof(UniChar);
4037 bool isASCII = true;
4038 for (idx = 0; isASCII && idx < length; idx++) isASCII = (chars[idx] < 0x80);
4039 if (!isASCII) {
4040 appendedIsUnicode = true;
4041 } else {
4042 demoteAppendedUnicode = true;
4043 }
4044 appendedLength = length;
4045 } else {
4046 Boolean usingPassedInMemory = false;
4047
4048 vBuf.allocator = __CFGetDefaultAllocator(); // We don't want to use client's allocator for temp stuff
4049 vBuf.chars.unicode = NULL; // This will cause the decode function to allocate memory if necessary
4050
4051 if (!__CFStringDecodeByteStream3((const uint8_t *)cStr, appendedLength, encoding, __CFStrIsUnicode(str), &vBuf, &usingPassedInMemory, 0)) {
4052 CFAssert1(0, __kCFLogAssertion, "Supplied bytes could not be converted specified encoding %d", encoding);
4053 return;
4054 }
4055
4056 // If not ASCII, appendedLength now denotes length in UniChars
4057 appendedLength = vBuf.numChars;
4058 appendedIsUnicode = !vBuf.isASCII;
4059 cStr = (const char *)vBuf.chars.ascii;
4060 freeCStrWhenDone = !usingPassedInMemory && vBuf.shouldFreeChars;
4061 }
4062
4063 if (CF_IS_OBJC(__kCFStringTypeID, str)) {
4064 if (!appendedIsUnicode && !demoteAppendedUnicode) {
4065 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID, void, str, "_cfAppendCString:length:", cStr, appendedLength);
4066 } else {
4067 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID, void, str, "appendCharacters:length:", cStr, appendedLength);
4068 }
4069 } else {
4070 CFIndex strLength;
4071 __CFAssertIsStringAndMutable(str);
4072 strLength = __CFStrLength(str);
4073
4074 __CFStringChangeSize(str, CFRangeMake(strLength, 0), appendedLength, appendedIsUnicode || __CFStrIsUnicode(str));
4075
4076 if (__CFStrIsUnicode(str)) {
4077 UniChar *contents = (UniChar *)__CFStrContents(str);
4078 if (appendedIsUnicode) {
4079 memmove(contents + strLength, cStr, appendedLength * sizeof(UniChar));
4080 } else {
4081 __CFStrConvertBytesToUnicode((const uint8_t *)cStr, contents + strLength, appendedLength);
4082 }
4083 } else {
4084 if (demoteAppendedUnicode) {
4085 UniChar *chars = (UniChar *)cStr;
4086 CFIndex idx;
4087 uint8_t *contents = (uint8_t *)__CFStrContents(str) + strLength + __CFStrSkipAnyLengthByte(str);
4088 for (idx = 0; idx < appendedLength; idx++) contents[idx] = (uint8_t)chars[idx];
4089 } else {
4090 uint8_t *contents = (uint8_t *)__CFStrContents(str);
4091 memmove(contents + strLength + __CFStrSkipAnyLengthByte(str), cStr, appendedLength);
4092 }
4093 }
4094 }
4095
4096 if (freeCStrWhenDone) CFAllocatorDeallocate(__CFGetDefaultAllocator(), (void *)cStr);
4097 }
4098
4099 void CFStringAppendPascalString(CFMutableStringRef str, ConstStringPtr pStr, CFStringEncoding encoding) {
4100 __CFStringAppendBytes(str, (const char *)(pStr + 1), (CFIndex)*pStr, encoding);
4101 }
4102
4103 void CFStringAppendCString(CFMutableStringRef str, const char *cStr, CFStringEncoding encoding) {
4104 __CFStringAppendBytes(str, cStr, strlen(cStr), encoding);
4105 }
4106
4107
4108 void CFStringAppendFormat(CFMutableStringRef str, CFDictionaryRef formatOptions, CFStringRef format, ...) {
4109 va_list argList;
4110
4111 va_start(argList, format);
4112 CFStringAppendFormatAndArguments(str, formatOptions, format, argList);
4113 va_end(argList);
4114 }
4115
4116
4117 CFIndex CFStringFindAndReplace(CFMutableStringRef string, CFStringRef stringToFind, CFStringRef replacementString, CFRange rangeToSearch, CFOptionFlags compareOptions) {
4118 CFRange foundRange;
4119 Boolean backwards = ((compareOptions & kCFCompareBackwards) != 0);
4120 UInt32 endIndex = rangeToSearch.location + rangeToSearch.length;
4121 #define MAX_RANGES_ON_STACK (1000 / sizeof(CFRange))
4122 CFRange rangeBuffer[MAX_RANGES_ON_STACK]; // Used to avoid allocating memory
4123 CFRange *ranges = rangeBuffer;
4124 CFIndex foundCount = 0;
4125 CFIndex capacity = MAX_RANGES_ON_STACK;
4126
4127 __CFAssertIsStringAndMutable(string);
4128 __CFAssertRangeIsInStringBounds(string, rangeToSearch.location, rangeToSearch.length);
4129
4130 // Note: This code is very similar to the one in CFStringCreateArrayWithFindResults().
4131 while ((rangeToSearch.length > 0) && CFStringFindWithOptions(string, stringToFind, rangeToSearch, compareOptions, &foundRange)) {
4132 // Determine the next range
4133 if (backwards) {
4134 rangeToSearch.length = foundRange.location - rangeToSearch.location;
4135 } else {
4136 rangeToSearch.location = foundRange.location + foundRange.length;
4137 rangeToSearch.length = endIndex - rangeToSearch.location;
4138 }
4139
4140 // If necessary, grow the array
4141 if (foundCount >= capacity) {
4142 bool firstAlloc = (ranges == rangeBuffer) ? true : false;
4143 capacity = (capacity + 4) * 2;
4144 // Note that reallocate with NULL previous pointer is same as allocate
4145 ranges = (CFRange *)CFAllocatorReallocate(kCFAllocatorSystemDefault, firstAlloc ? NULL : ranges, capacity * sizeof(CFRange), 0);
4146 if (firstAlloc) memmove(ranges, rangeBuffer, MAX_RANGES_ON_STACK * sizeof(CFRange));
4147 }
4148 ranges[foundCount] = foundRange;
4149 foundCount++;
4150 }
4151
4152 if (foundCount > 0) {
4153 if (backwards) { // Reorder the ranges to be incrementing (better to do this here, then to check other places)
4154 int head = 0;
4155 int tail = foundCount - 1;
4156 while (head < tail) {
4157 CFRange temp = ranges[head];
4158 ranges[head] = ranges[tail];
4159 ranges[tail] = temp;
4160 head++;
4161 tail--;
4162 }
4163 }
4164 __CFStringReplaceMultiple(string, ranges, foundCount, replacementString);
4165 if (ranges != rangeBuffer) CFAllocatorDeallocate(kCFAllocatorSystemDefault, ranges);
4166 }
4167
4168 return foundCount;
4169 }
4170
4171
4172 // This function is here for NSString purposes
4173 // It allows checking for mutability before mutating; this allows NSString to catch invalid mutations
4174
4175 int __CFStringCheckAndReplace(CFMutableStringRef str, CFRange range, CFStringRef replacement) {
4176 if (!__CFStrIsMutable(str)) return _CFStringErrNotMutable; // These three ifs are always here, for NSString usage
4177 if (!replacement && __CFStringNoteErrors()) return _CFStringErrNilArg;
4178 // We use unsigneds as that is what NSRanges do; we use uint64_t do make sure the sum doesn't wrap (otherwise we'd need to do 3 separate checks). This allows catching bad ranges as described in 3375535. (-1,1)
4179 if (((uint64_t)((unsigned)range.location)) + ((uint64_t)((unsigned)range.length)) > (uint64_t)__CFStrLength(str) && __CFStringNoteErrors()) return _CFStringErrBounds;
4180 __CFAssertIsStringAndMutable(str);
4181 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
4182 __CFStringReplace(str, range, replacement);
4183 return _CFStringErrNone;
4184 }
4185
4186 // This function determines whether errors which would cause string exceptions should
4187 // be ignored or not
4188
4189 Boolean __CFStringNoteErrors(void) {
4190 return true;
4191 }
4192
4193
4194
4195 void CFStringPad(CFMutableStringRef string, CFStringRef padString, CFIndex length, CFIndex indexIntoPad) {
4196 CFIndex originalLength;
4197
4198 __CFAssertIsNotNegative(length);
4199 __CFAssertIsNotNegative(indexIntoPad);
4200
4201 CF_OBJC_FUNCDISPATCH3(__kCFStringTypeID, void, string, "_cfPad:length:padIndex:", padString, length, indexIntoPad);
4202
4203 __CFAssertIsStringAndMutable(string);
4204
4205 originalLength = __CFStrLength(string);
4206 if (length < originalLength) {
4207 __CFStringChangeSize(string, CFRangeMake(length, originalLength - length), 0, false);
4208 } else if (originalLength < length) {
4209 uint8_t *contents;
4210 Boolean isUnicode;
4211 CFIndex charSize;
4212 CFIndex padStringLength;
4213 CFIndex padLength;
4214 CFIndex padRemaining = length - originalLength;
4215
4216 if (CF_IS_OBJC(__kCFStringTypeID, padString)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
4217 padStringLength = CFStringGetLength(padString);
4218 isUnicode = true; /* !!! Bad for now */
4219 } else {
4220 __CFAssertIsString(padString);
4221 padStringLength = __CFStrLength(padString);
4222 isUnicode = __CFStrIsUnicode(string) || __CFStrIsUnicode(padString);
4223 }
4224
4225 charSize = isUnicode ? sizeof(UniChar) : sizeof(uint8_t);
4226
4227 __CFStringChangeSize(string, CFRangeMake(originalLength, 0), padRemaining, isUnicode);
4228
4229 contents = (uint8_t *)__CFStrContents(string) + charSize * originalLength + __CFStrSkipAnyLengthByte(string);
4230 padLength = padStringLength - indexIntoPad;
4231 padLength = padRemaining < padLength ? padRemaining : padLength;
4232
4233 while (padRemaining > 0) {
4234 if (isUnicode) {
4235 CFStringGetCharacters(padString, CFRangeMake(indexIntoPad, padLength), (UniChar *)contents);
4236 } else {
4237 CFStringGetBytes(padString, CFRangeMake(indexIntoPad, padLength), __CFStringGetEightBitStringEncoding(), 0, false, contents, padRemaining * charSize, NULL);
4238 }
4239 contents += padLength * charSize;
4240 padRemaining -= padLength;
4241 indexIntoPad = 0;
4242 padLength = padRemaining < padLength ? padRemaining : padStringLength;
4243 }
4244 }
4245 }
4246
4247 void CFStringTrim(CFMutableStringRef string, CFStringRef trimString) {
4248 CFRange range;
4249 CFIndex newStartIndex;
4250 CFIndex length;
4251
4252 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, string, "_cfTrim:", trimString);
4253
4254 __CFAssertIsStringAndMutable(string);
4255 __CFAssertIsString(trimString);
4256
4257 newStartIndex = 0;
4258 length = __CFStrLength(string);
4259
4260 while (CFStringFindWithOptions(string, trimString, CFRangeMake(newStartIndex, length - newStartIndex), kCFCompareAnchored, &range)) {
4261 newStartIndex = range.location + range.length;
4262 }
4263
4264 if (newStartIndex < length) {
4265 CFIndex charSize = __CFStrIsUnicode(string) ? sizeof(UniChar) : sizeof(uint8_t);
4266 uint8_t *contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4267
4268 length -= newStartIndex;
4269 if (__CFStrLength(trimString) < length) {
4270 while (CFStringFindWithOptions(string, trimString, CFRangeMake(newStartIndex, length), kCFCompareAnchored|kCFCompareBackwards, &range)) {
4271 length = range.location - newStartIndex;
4272 }
4273 }
4274 memmove(contents, contents + newStartIndex * charSize, length * charSize);
4275 __CFStringChangeSize(string, CFRangeMake(length, __CFStrLength(string) - length), 0, false);
4276 } else { // Only trimString in string, trim all
4277 __CFStringChangeSize(string, CFRangeMake(0, length), 0, false);
4278 }
4279 }
4280
4281 void CFStringTrimWhitespace(CFMutableStringRef string) {
4282 CFIndex newStartIndex;
4283 CFIndex length;
4284 CFStringInlineBuffer buffer;
4285
4286 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, void, string, "_cfTrimWS");
4287
4288 __CFAssertIsStringAndMutable(string);
4289
4290 newStartIndex = 0;
4291 length = __CFStrLength(string);
4292
4293 CFStringInitInlineBuffer(string, &buffer, CFRangeMake(0, length));
4294 CFIndex buffer_idx = 0;
4295
4296 while (buffer_idx < length && CFUniCharIsMemberOf(__CFStringGetCharacterFromInlineBufferQuick(&buffer, buffer_idx), kCFUniCharWhitespaceAndNewlineCharacterSet))
4297 buffer_idx++;
4298 newStartIndex = buffer_idx;
4299
4300 if (newStartIndex < length) {
4301 uint8_t *contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4302 CFIndex charSize = (__CFStrIsUnicode(string) ? sizeof(UniChar) : sizeof(uint8_t));
4303
4304 buffer_idx = length - 1;
4305 while (0 <= buffer_idx && CFUniCharIsMemberOf(__CFStringGetCharacterFromInlineBufferQuick(&buffer, buffer_idx), kCFUniCharWhitespaceAndNewlineCharacterSet))
4306 buffer_idx--;
4307 length = buffer_idx - newStartIndex + 1;
4308
4309 memmove(contents, contents + newStartIndex * charSize, length * charSize);
4310 __CFStringChangeSize(string, CFRangeMake(length, __CFStrLength(string) - length), 0, false);
4311 } else { // Whitespace only string
4312 __CFStringChangeSize(string, CFRangeMake(0, length), 0, false);
4313 }
4314 }
4315
4316 void CFStringLowercase(CFMutableStringRef string, CFLocaleRef locale) {
4317 CFIndex currentIndex = 0;
4318 CFIndex length;
4319 const uint8_t *langCode;
4320 Boolean isEightBit = __CFStrIsEightBit(string);
4321
4322 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, string, "_cfLowercase:", locale);
4323
4324 __CFAssertIsStringAndMutable(string);
4325
4326 length = __CFStrLength(string);
4327
4328 langCode = (const uint8_t *)(_CFCanUseLocale(locale) ? _CFStrGetLanguageIdentifierForLocale(locale) : NULL);
4329
4330 if (!langCode && isEightBit) {
4331 uint8_t *contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4332 for (;currentIndex < length;currentIndex++) {
4333 if (contents[currentIndex] >= 'A' && contents[currentIndex] <= 'Z') {
4334 contents[currentIndex] += 'a' - 'A';
4335 } else if (contents[currentIndex] > 127) {
4336 break;
4337 }
4338 }
4339 }
4340
4341 if (currentIndex < length) {
4342 UTF16Char *contents;
4343 UniChar mappedCharacters[MAX_CASE_MAPPING_BUF];
4344 CFIndex mappedLength;
4345 UTF32Char currentChar;
4346 UInt32 flags = 0;
4347
4348 if (isEightBit) __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true);
4349
4350 contents = (UniChar *)__CFStrContents(string);
4351
4352 for (;currentIndex < length;currentIndex++) {
4353
4354 if (CFUniCharIsSurrogateHighCharacter(contents[currentIndex]) && (currentIndex + 1 < length) && CFUniCharIsSurrogateLowCharacter(contents[currentIndex + 1])) {
4355 currentChar = CFUniCharGetLongCharacterForSurrogatePair(contents[currentIndex], contents[currentIndex + 1]);
4356 } else {
4357 currentChar = contents[currentIndex];
4358 }
4359 flags = ((langCode || (currentChar == 0x03A3)) ? CFUniCharGetConditionalCaseMappingFlags(currentChar, contents, currentIndex, length, kCFUniCharToLowercase, langCode, flags) : 0);
4360
4361 mappedLength = CFUniCharMapCaseTo(currentChar, mappedCharacters, MAX_CASE_MAPPING_BUF, kCFUniCharToLowercase, flags, langCode);
4362 if (mappedLength > 0) contents[currentIndex] = *mappedCharacters;
4363
4364 if (currentChar > 0xFFFF) { // Non-BMP char
4365 switch (mappedLength) {
4366 case 0:
4367 __CFStringChangeSize(string, CFRangeMake(currentIndex, 2), 0, true);
4368 contents = (UniChar *)__CFStrContents(string);
4369 length -= 2;
4370 break;
4371
4372 case 1:
4373 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 1), 0, true);
4374 contents = (UniChar *)__CFStrContents(string);
4375 --length;
4376 break;
4377
4378 case 2:
4379 contents[++currentIndex] = mappedCharacters[1];
4380 break;
4381
4382 default:
4383 --mappedLength; // Skip the current char
4384 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength - 1, true);
4385 contents = (UniChar *)__CFStrContents(string);
4386 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4387 length += (mappedLength - 1);
4388 currentIndex += mappedLength;
4389 break;
4390 }
4391 } else if (mappedLength == 0) {
4392 __CFStringChangeSize(string, CFRangeMake(currentIndex, 1), 0, true);
4393 contents = (UniChar *)__CFStrContents(string);
4394 --length;
4395 } else if (mappedLength > 1) {
4396 --mappedLength; // Skip the current char
4397 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength, true);
4398 contents = (UniChar *)__CFStrContents(string);
4399 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4400 length += mappedLength;
4401 currentIndex += mappedLength;
4402 }
4403 }
4404 }
4405 }
4406
4407 void CFStringUppercase(CFMutableStringRef string, CFLocaleRef locale) {
4408 CFIndex currentIndex = 0;
4409 CFIndex length;
4410 const uint8_t *langCode;
4411 Boolean isEightBit = __CFStrIsEightBit(string);
4412
4413 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, string, "_cfUppercase:", locale);
4414
4415 __CFAssertIsStringAndMutable(string);
4416
4417 length = __CFStrLength(string);
4418
4419 langCode = (const uint8_t *)(_CFCanUseLocale(locale) ? _CFStrGetLanguageIdentifierForLocale(locale) : NULL);
4420
4421 if (!langCode && isEightBit) {
4422 uint8_t *contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4423 for (;currentIndex < length;currentIndex++) {
4424 if (contents[currentIndex] >= 'a' && contents[currentIndex] <= 'z') {
4425 contents[currentIndex] -= 'a' - 'A';
4426 } else if (contents[currentIndex] > 127) {
4427 break;
4428 }
4429 }
4430 }
4431
4432 if (currentIndex < length) {
4433 UniChar *contents;
4434 UniChar mappedCharacters[MAX_CASE_MAPPING_BUF];
4435 CFIndex mappedLength;
4436 UTF32Char currentChar;
4437 UInt32 flags = 0;
4438
4439 if (isEightBit) __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true);
4440
4441 contents = (UniChar *)__CFStrContents(string);
4442
4443 for (;currentIndex < length;currentIndex++) {
4444 if (CFUniCharIsSurrogateHighCharacter(contents[currentIndex]) && (currentIndex + 1 < length) && CFUniCharIsSurrogateLowCharacter(contents[currentIndex + 1])) {
4445 currentChar = CFUniCharGetLongCharacterForSurrogatePair(contents[currentIndex], contents[currentIndex + 1]);
4446 } else {
4447 currentChar = contents[currentIndex];
4448 }
4449
4450 flags = (langCode ? CFUniCharGetConditionalCaseMappingFlags(currentChar, contents, currentIndex, length, kCFUniCharToUppercase, langCode, flags) : 0);
4451
4452 mappedLength = CFUniCharMapCaseTo(currentChar, mappedCharacters, MAX_CASE_MAPPING_BUF, kCFUniCharToUppercase, flags, langCode);
4453 if (mappedLength > 0) contents[currentIndex] = *mappedCharacters;
4454
4455 if (currentChar > 0xFFFF) { // Non-BMP char
4456 switch (mappedLength) {
4457 case 0:
4458 __CFStringChangeSize(string, CFRangeMake(currentIndex, 2), 0, true);
4459 contents = (UniChar *)__CFStrContents(string);
4460 length -= 2;
4461 break;
4462
4463 case 1:
4464 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 1), 0, true);
4465 contents = (UniChar *)__CFStrContents(string);
4466 --length;
4467 break;
4468
4469 case 2:
4470 contents[++currentIndex] = mappedCharacters[1];
4471 break;
4472
4473 default:
4474 --mappedLength; // Skip the current char
4475 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength - 1, true);
4476 contents = (UniChar *)__CFStrContents(string);
4477 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4478 length += (mappedLength - 1);
4479 currentIndex += mappedLength;
4480 break;
4481 }
4482 } else if (mappedLength == 0) {
4483 __CFStringChangeSize(string, CFRangeMake(currentIndex, 1), 0, true);
4484 contents = (UniChar *)__CFStrContents(string);
4485 --length;
4486 } else if (mappedLength > 1) {
4487 --mappedLength; // Skip the current char
4488 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength, true);
4489 contents = (UniChar *)__CFStrContents(string);
4490 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4491 length += mappedLength;
4492 currentIndex += mappedLength;
4493 }
4494 }
4495 }
4496 }
4497
4498
4499 void CFStringCapitalize(CFMutableStringRef string, CFLocaleRef locale) {
4500 CFIndex currentIndex = 0;
4501 CFIndex length;
4502 const uint8_t *langCode;
4503 Boolean isEightBit = __CFStrIsEightBit(string);
4504 Boolean isLastCased = false;
4505 const uint8_t *caseIgnorableForBMP;
4506
4507 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, string, "_cfCapitalize:", locale);
4508
4509 __CFAssertIsStringAndMutable(string);
4510
4511 length = __CFStrLength(string);
4512
4513 caseIgnorableForBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharCaseIgnorableCharacterSet, 0);
4514
4515 langCode = (const uint8_t *)(_CFCanUseLocale(locale) ? _CFStrGetLanguageIdentifierForLocale(locale) : NULL);
4516
4517 if (!langCode && isEightBit) {
4518 uint8_t *contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4519 for (;currentIndex < length;currentIndex++) {
4520 if (contents[currentIndex] > 127) {
4521 break;
4522 } else if (contents[currentIndex] >= 'A' && contents[currentIndex] <= 'Z') {
4523 contents[currentIndex] += (isLastCased ? 'a' - 'A' : 0);
4524 isLastCased = true;
4525 } else if (contents[currentIndex] >= 'a' && contents[currentIndex] <= 'z') {
4526 contents[currentIndex] -= (!isLastCased ? 'a' - 'A' : 0);
4527 isLastCased = true;
4528 } else if (!CFUniCharIsMemberOfBitmap(contents[currentIndex], caseIgnorableForBMP)) {
4529 isLastCased = false;
4530 }
4531 }
4532 }
4533
4534 if (currentIndex < length) {
4535 UniChar *contents;
4536 UniChar mappedCharacters[MAX_CASE_MAPPING_BUF];
4537 CFIndex mappedLength;
4538 UTF32Char currentChar;
4539 UInt32 flags = 0;
4540
4541 if (isEightBit) __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true);
4542
4543 contents = (UniChar *)__CFStrContents(string);
4544
4545 for (;currentIndex < length;currentIndex++) {
4546 if (CFUniCharIsSurrogateHighCharacter(contents[currentIndex]) && (currentIndex + 1 < length) && CFUniCharIsSurrogateLowCharacter(contents[currentIndex + 1])) {
4547 currentChar = CFUniCharGetLongCharacterForSurrogatePair(contents[currentIndex], contents[currentIndex + 1]);
4548 } else {
4549 currentChar = contents[currentIndex];
4550 }
4551 flags = ((langCode || ((currentChar == 0x03A3) && isLastCased)) ? CFUniCharGetConditionalCaseMappingFlags(currentChar, contents, currentIndex, length, (isLastCased ? kCFUniCharToLowercase : kCFUniCharToTitlecase), langCode, flags) : 0);
4552
4553 mappedLength = CFUniCharMapCaseTo(currentChar, mappedCharacters, MAX_CASE_MAPPING_BUF, (isLastCased ? kCFUniCharToLowercase : kCFUniCharToTitlecase), flags, langCode);
4554 if (mappedLength > 0) contents[currentIndex] = *mappedCharacters;
4555
4556 if (currentChar > 0xFFFF) { // Non-BMP char
4557 switch (mappedLength) {
4558 case 0:
4559 __CFStringChangeSize(string, CFRangeMake(currentIndex, 2), 0, true);
4560 contents = (UniChar *)__CFStrContents(string);
4561 length -= 2;
4562 break;
4563
4564 case 1:
4565 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 1), 0, true);
4566 contents = (UniChar *)__CFStrContents(string);
4567 --length;
4568 break;
4569
4570 case 2:
4571 contents[++currentIndex] = mappedCharacters[1];
4572 break;
4573
4574 default:
4575 --mappedLength; // Skip the current char
4576 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength - 1, true);
4577 contents = (UniChar *)__CFStrContents(string);
4578 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4579 length += (mappedLength - 1);
4580 currentIndex += mappedLength;
4581 break;
4582 }
4583 } else if (mappedLength == 0) {
4584 __CFStringChangeSize(string, CFRangeMake(currentIndex, 1), 0, true);
4585 contents = (UniChar *)__CFStrContents(string);
4586 --length;
4587 } else if (mappedLength > 1) {
4588 --mappedLength; // Skip the current char
4589 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength, true);
4590 contents = (UniChar *)__CFStrContents(string);
4591 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4592 length += mappedLength;
4593 currentIndex += mappedLength;
4594 }
4595
4596 if (!((currentChar > 0xFFFF) ? CFUniCharIsMemberOf(currentChar, kCFUniCharCaseIgnorableCharacterSet) : CFUniCharIsMemberOfBitmap(currentChar, caseIgnorableForBMP))) { // We have non-caseignorable here
4597 isLastCased = ((CFUniCharIsMemberOf(currentChar, kCFUniCharUppercaseLetterCharacterSet) || CFUniCharIsMemberOf(currentChar, kCFUniCharLowercaseLetterCharacterSet)) ? true : false);
4598 }
4599 }
4600 }
4601 }
4602
4603
4604 #define MAX_DECOMP_BUF 64
4605
4606 #define HANGUL_SBASE 0xAC00
4607 #define HANGUL_LBASE 0x1100
4608 #define HANGUL_VBASE 0x1161
4609 #define HANGUL_TBASE 0x11A7
4610 #define HANGUL_SCOUNT 11172
4611 #define HANGUL_LCOUNT 19
4612 #define HANGUL_VCOUNT 21
4613 #define HANGUL_TCOUNT 28
4614 #define HANGUL_NCOUNT (HANGUL_VCOUNT * HANGUL_TCOUNT)
4615
4616 CF_INLINE uint32_t __CFGetUTF16Length(const UTF32Char *characters, uint32_t utf32Length) {
4617 const UTF32Char *limit = characters + utf32Length;
4618 uint32_t length = 0;
4619
4620 while (characters < limit) length += (*(characters++) > 0xFFFF ? 2 : 1);
4621
4622 return length;
4623 }
4624
4625 CF_INLINE void __CFFillInUTF16(const UTF32Char *characters, UTF16Char *dst, uint32_t utf32Length) {
4626 const UTF32Char *limit = characters + utf32Length;
4627 UTF32Char currentChar;
4628
4629 while (characters < limit) {
4630 currentChar = *(characters++);
4631 if (currentChar > 0xFFFF) {
4632 currentChar -= 0x10000;
4633 *(dst++) = (UTF16Char)((currentChar >> 10) + 0xD800UL);
4634 *(dst++) = (UTF16Char)((currentChar & 0x3FF) + 0xDC00UL);
4635 } else {
4636 *(dst++) = currentChar;
4637 }
4638 }
4639 }
4640
4641 void CFStringNormalize(CFMutableStringRef string, CFStringNormalizationForm theForm) {
4642 CFIndex currentIndex = 0;
4643 CFIndex length;
4644 bool needToReorder = true;
4645
4646 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, string, "_cfNormalize:", theForm);
4647
4648 __CFAssertIsStringAndMutable(string);
4649
4650 length = __CFStrLength(string);
4651
4652 if (__CFStrIsEightBit(string)) {
4653 uint8_t *contents;
4654
4655 if (theForm == kCFStringNormalizationFormC) return; // 8bit form has no decomposition
4656
4657 contents = (uint8_t *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4658
4659 for (;currentIndex < length;currentIndex++) {
4660 if (contents[currentIndex] > 127) {
4661 __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true); // need to do harm way
4662 needToReorder = false;
4663 break;
4664 }
4665 }
4666 }
4667
4668 if (currentIndex < length) {
4669 UTF16Char *limit = (UTF16Char *)__CFStrContents(string) + length;
4670 UTF16Char *contents = (UTF16Char *)__CFStrContents(string) + currentIndex;
4671 UTF32Char buffer[MAX_DECOMP_BUF];
4672 UTF32Char *mappedCharacters = buffer;
4673 CFIndex allocatedLength = MAX_DECOMP_BUF;
4674 CFIndex mappedLength;
4675 CFIndex currentLength;
4676 UTF32Char currentChar;
4677 const uint8_t *decompBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, 0);
4678 const uint8_t *nonBaseBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, 0);
4679 const uint8_t *combiningBMP = (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0);
4680
4681 while (contents < limit) {
4682 if (CFUniCharIsSurrogateHighCharacter(*contents) && (contents + 1 < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) {
4683 currentChar = CFUniCharGetLongCharacterForSurrogatePair(*contents, *(contents + 1));
4684 currentLength = 2;
4685 contents += 2;
4686 } else {
4687 currentChar = *(contents++);
4688 currentLength = 1;
4689 }
4690
4691 mappedLength = 0;
4692
4693 if (CFUniCharIsMemberOfBitmap(currentChar, ((currentChar < 0x10000) ? decompBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (currentChar >> 16)))) && (0 == CFUniCharGetCombiningPropertyForCharacter(currentChar, ((currentChar < 0x10000) ? combiningBMP : (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16)))))) {
4694 if ((theForm & kCFStringNormalizationFormC) == 0 || currentChar < HANGUL_SBASE || currentChar > (HANGUL_SBASE + HANGUL_SCOUNT)) { // We don't have to decompose Hangul Syllables if we're precomposing again
4695 mappedLength = CFUniCharDecomposeCharacter(currentChar, mappedCharacters, MAX_DECOMP_BUF);
4696 }
4697 }
4698
4699 if ((needToReorder || (theForm & kCFStringNormalizationFormC)) && ((contents < limit) || (mappedLength == 0))) {
4700 if (mappedLength > 0) {
4701 if (CFUniCharIsSurrogateHighCharacter(*contents) && (contents + 1 < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) {
4702 currentChar = CFUniCharGetLongCharacterForSurrogatePair(*contents, *(contents + 1));
4703 } else {
4704 currentChar = *contents;
4705 }
4706 }
4707
4708 if (0 != CFUniCharGetCombiningPropertyForCharacter(currentChar, (const uint8_t *)((currentChar < 0x10000) ? combiningBMP : CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16))))) {
4709 uint32_t decompLength;
4710
4711 if (mappedLength == 0) {
4712 contents -= (currentChar & 0xFFFF0000 ? 2 : 1);
4713 if (currentIndex > 0) {
4714 if (CFUniCharIsSurrogateLowCharacter(*(contents - 1)) && (currentIndex > 1) && CFUniCharIsSurrogateHighCharacter(*(contents - 2))) {
4715 *mappedCharacters = CFUniCharGetLongCharacterForSurrogatePair(*(contents - 2), *(contents - 1));
4716 currentIndex -= 2;
4717 currentLength += 2;
4718 } else {
4719 *mappedCharacters = *(contents - 1);
4720 --currentIndex;
4721 ++currentLength;
4722 }
4723 mappedLength = 1;
4724 }
4725 } else {
4726 currentLength += (currentChar & 0xFFFF0000 ? 2 : 1);
4727 }
4728 contents += (currentChar & 0xFFFF0000 ? 2 : 1);
4729
4730 if (CFUniCharIsMemberOfBitmap(currentChar, ((currentChar < 0x10000) ? decompBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (currentChar >> 16))))) { // Vietnamese accent, etc.
4731 decompLength = CFUniCharDecomposeCharacter(currentChar, mappedCharacters + mappedLength, MAX_DECOMP_BUF - mappedLength);
4732 mappedLength += decompLength;
4733 } else {
4734 mappedCharacters[mappedLength++] = currentChar;
4735 }
4736
4737 while (contents < limit) {
4738 if (CFUniCharIsSurrogateHighCharacter(*contents) && (contents + 1 < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) {
4739 currentChar = CFUniCharGetLongCharacterForSurrogatePair(*contents, *(contents + 1));
4740 } else {
4741 currentChar = *contents;
4742 }
4743 if (0 == CFUniCharGetCombiningPropertyForCharacter(currentChar, (const uint8_t *)((currentChar < 0x10000) ? combiningBMP : CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16))))) break;
4744 if (currentChar & 0xFFFF0000) {
4745 contents += 2;
4746 currentLength += 2;
4747 } else {
4748 ++contents;
4749 ++currentLength;
4750 }
4751 if (mappedLength == allocatedLength) {
4752 allocatedLength += MAX_DECOMP_BUF;
4753 if (mappedCharacters == buffer) {
4754 mappedCharacters = (UTF32Char *)CFAllocatorAllocate(kCFAllocatorSystemDefault, allocatedLength * sizeof(UTF32Char), 0);
4755 memmove(mappedCharacters, buffer, MAX_DECOMP_BUF * sizeof(UTF32Char));
4756 } else {
4757 mappedCharacters = (UTF32Char *)CFAllocatorReallocate(kCFAllocatorSystemDefault, mappedCharacters, allocatedLength * sizeof(UTF32Char), 0);
4758 }
4759 }
4760 if (CFUniCharIsMemberOfBitmap(currentChar, ((currentChar < 0x10000) ? decompBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (currentChar >> 16))))) { // Vietnamese accent, etc.
4761 decompLength = CFUniCharDecomposeCharacter(currentChar, mappedCharacters + mappedLength, MAX_DECOMP_BUF - mappedLength);
4762 mappedLength += decompLength;
4763 } else {
4764 mappedCharacters[mappedLength++] = currentChar;
4765 }
4766 }
4767 }
4768 if (needToReorder && mappedLength > 1) CFUniCharPrioritySort(mappedCharacters, mappedLength);
4769 }
4770
4771 if (theForm & kCFStringNormalizationFormKD) {
4772 CFIndex newLength = 0;
4773
4774 if (mappedLength == 0 && CFUniCharIsMemberOf(currentChar, kCFUniCharCompatibilityDecomposableCharacterSet)) {
4775 mappedCharacters[mappedLength++] = currentChar;
4776 }
4777 while (newLength < mappedLength) {
4778 newLength = CFUniCharCompatibilityDecompose(mappedCharacters, mappedLength, allocatedLength);
4779 if (newLength == 0) {
4780 allocatedLength += MAX_DECOMP_BUF;
4781 if (mappedCharacters == buffer) {
4782 mappedCharacters = (UTF32Char *)CFAllocatorAllocate(kCFAllocatorSystemDefault, allocatedLength * sizeof(UTF32Char), 0);
4783 memmove(mappedCharacters, buffer, MAX_DECOMP_BUF * sizeof(UTF32Char));
4784 } else {
4785 mappedCharacters = (UTF32Char *)CFAllocatorReallocate(kCFAllocatorSystemDefault, mappedCharacters, allocatedLength * sizeof(UTF32Char), 0);
4786 }
4787 }
4788 }
4789 mappedLength = newLength;
4790 }
4791
4792 if (theForm & kCFStringNormalizationFormC) {
4793 UTF32Char nextChar;
4794
4795 if (mappedLength > 1) {
4796 CFIndex consumedLength = 1;
4797 UTF32Char *currentBase = mappedCharacters;
4798 uint8_t currentClass, lastClass = 0;
4799 bool didCombine = false;
4800
4801 currentChar = *mappedCharacters;
4802
4803 while (consumedLength < mappedLength) {
4804 nextChar = mappedCharacters[consumedLength];
4805 currentClass = CFUniCharGetCombiningPropertyForCharacter(nextChar, (const uint8_t *)((nextChar < 0x10000) ? combiningBMP : CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (nextChar >> 16))));
4806
4807 if (theForm & kCFStringNormalizationFormKD) {
4808 if ((currentChar >= HANGUL_LBASE) && (currentChar < (HANGUL_LBASE + 0xFF))) {
4809 SInt8 lIndex = currentChar - HANGUL_LBASE;
4810
4811 if ((0 <= lIndex) && (lIndex <= HANGUL_LCOUNT)) {
4812 SInt16 vIndex = nextChar - HANGUL_VBASE;
4813
4814 if ((vIndex >= 0) && (vIndex <= HANGUL_VCOUNT)) {
4815 SInt16 tIndex = 0;
4816 CFIndex usedLength = mappedLength;
4817
4818 mappedCharacters[consumedLength++] = 0xFFFD;
4819
4820 if (consumedLength < mappedLength) {
4821 tIndex = mappedCharacters[consumedLength] - HANGUL_TBASE;
4822 if ((tIndex < 0) || (tIndex > HANGUL_TCOUNT)) {
4823 tIndex = 0;
4824 } else {
4825 mappedCharacters[consumedLength++] = 0xFFFD;
4826 }
4827 }
4828 *currentBase = (lIndex * HANGUL_VCOUNT + vIndex) * HANGUL_TCOUNT + tIndex + HANGUL_SBASE;
4829
4830 while (--usedLength > 0) {
4831 if (mappedCharacters[usedLength] == 0xFFFD) {
4832 --mappedLength;
4833 --consumedLength;
4834 memmove(mappedCharacters + usedLength, mappedCharacters + usedLength + 1, (mappedLength - usedLength) * sizeof(UTF32Char));
4835 }
4836 }
4837 currentBase = mappedCharacters + consumedLength;
4838 currentChar = *currentBase;
4839 ++consumedLength;
4840
4841 continue;
4842 }
4843 }
4844 }
4845 if (!CFUniCharIsMemberOfBitmap(nextChar, ((nextChar < 0x10000) ? nonBaseBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (nextChar >> 16))))) {
4846 *currentBase = currentChar;
4847 currentBase = mappedCharacters + consumedLength;
4848 currentChar = nextChar;
4849 ++consumedLength;
4850 continue;
4851 }
4852 }
4853
4854 if ((lastClass == 0) || (currentClass > lastClass)) {
4855 nextChar = CFUniCharPrecomposeCharacter(currentChar, nextChar);
4856 if (nextChar == 0xFFFD) {
4857 lastClass = currentClass;
4858 } else {
4859 mappedCharacters[consumedLength] = 0xFFFD;
4860 didCombine = true;
4861 currentChar = nextChar;
4862 }
4863 }
4864 ++consumedLength;
4865 }
4866
4867 *currentBase = currentChar;
4868 if (didCombine) {
4869 consumedLength = mappedLength;
4870 while (--consumedLength > 0) {
4871 if (mappedCharacters[consumedLength] == 0xFFFD) {
4872 --mappedLength;
4873 memmove(mappedCharacters + consumedLength, mappedCharacters + consumedLength + 1, (mappedLength - consumedLength) * sizeof(UTF32Char));
4874 }
4875 }
4876 }
4877 } else if ((currentChar >= HANGUL_LBASE) && (currentChar < (HANGUL_LBASE + 0xFF))) { // Hangul Jamo
4878 SInt8 lIndex = currentChar - HANGUL_LBASE;
4879
4880 if ((contents < limit) && (0 <= lIndex) && (lIndex <= HANGUL_LCOUNT)) {
4881 SInt16 vIndex = *contents - HANGUL_VBASE;
4882
4883 if ((vIndex >= 0) && (vIndex <= HANGUL_VCOUNT)) {
4884 SInt16 tIndex = 0;
4885
4886 ++contents; ++currentLength;
4887
4888 if (contents < limit) {
4889 tIndex = *contents - HANGUL_TBASE;
4890 if ((tIndex < 0) || (tIndex > HANGUL_TCOUNT)) {
4891 tIndex = 0;
4892 } else {
4893 ++contents; ++currentLength;
4894 }
4895 }
4896 *mappedCharacters = (lIndex * HANGUL_VCOUNT + vIndex) * HANGUL_TCOUNT + tIndex + HANGUL_SBASE;
4897 mappedLength = 1;
4898 }
4899 }
4900 } else { // collect class 0 non-base characters
4901 while (contents < limit) {
4902 nextChar = *contents;
4903 if (CFUniCharIsSurrogateHighCharacter(nextChar) && ((contents + 1) < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) {
4904 nextChar = CFUniCharGetLongCharacterForSurrogatePair(nextChar, *(contents + 1));
4905 if (!CFUniCharIsMemberOfBitmap(nextChar, (const uint8_t *)CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (nextChar >> 16))) || (0 != CFUniCharGetCombiningPropertyForCharacter(nextChar, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (nextChar >> 16))))) break;
4906 } else {
4907 if (!CFUniCharIsMemberOfBitmap(nextChar, nonBaseBMP) || (0 != CFUniCharGetCombiningPropertyForCharacter(nextChar, combiningBMP))) break;
4908 }
4909 currentChar = CFUniCharPrecomposeCharacter(currentChar, nextChar);
4910 if (0xFFFD == currentChar) break;
4911
4912 if (nextChar < 0x10000) {
4913 ++contents; ++currentLength;
4914 } else {
4915 contents += 2;
4916 currentLength += 2;
4917 }
4918
4919 *mappedCharacters = currentChar;
4920 mappedLength = 1;
4921 }
4922 }
4923 }
4924
4925 if (mappedLength > 0) {
4926 CFIndex utf16Length = __CFGetUTF16Length(mappedCharacters, mappedLength);
4927
4928 if (utf16Length != currentLength) {
4929 __CFStringChangeSize(string, CFRangeMake(currentIndex, currentLength), utf16Length, true);
4930 currentLength = utf16Length;
4931 }
4932 contents = (UTF16Char *)__CFStrContents(string);
4933 limit = contents + __CFStrLength(string);
4934 contents += currentIndex;
4935 __CFFillInUTF16(mappedCharacters, contents, mappedLength);
4936 contents += utf16Length;
4937 }
4938 currentIndex += currentLength;
4939 }
4940
4941 if (mappedCharacters != buffer) CFAllocatorDeallocate(kCFAllocatorSystemDefault, mappedCharacters);
4942 }
4943 }
4944
4945 void CFStringFold(CFMutableStringRef theString, CFStringCompareFlags theFlags, CFLocaleRef locale) {
4946 CFStringInlineBuffer stringBuffer;
4947 CFIndex length = CFStringGetLength(theString);
4948 CFIndex currentIndex = 0;
4949 CFIndex bufferLength = 0;
4950 UTF32Char buffer[kCFStringStackBufferLength];
4951 const uint8_t *cString;
4952 const uint8_t *langCode;
4953 CFStringEncoding eightBitEncoding;
4954 bool caseInsensitive = ((theFlags & kCFCompareCaseInsensitive) ? true : false);
4955 bool isObjc = CF_IS_OBJC(__kCFStringTypeID, theString);
4956 CFLocaleRef theLocale = locale;
4957
4958 if ((theFlags & kCFCompareLocalized) && (NULL == locale)) {
4959 theLocale = CFLocaleCopyCurrent();
4960 }
4961
4962 theFlags &= (kCFCompareCaseInsensitive|kCFCompareDiacriticInsensitive|kCFCompareWidthInsensitive);
4963
4964 if ((0 == theFlags) || (0 == length)) goto bail; // nothing to do
4965
4966 langCode = ((NULL == theLocale) ? NULL : (const uint8_t *)_CFStrGetLanguageIdentifierForLocale(theLocale));
4967
4968 eightBitEncoding = __CFStringGetEightBitStringEncoding();
4969 cString = (const uint8_t *)CFStringGetCStringPtr(theString, eightBitEncoding);
4970
4971 if ((NULL != cString) && !caseInsensitive && (kCFStringEncodingASCII == eightBitEncoding)) goto bail; // All ASCII
4972
4973 CFStringInitInlineBuffer(theString, &stringBuffer, CFRangeMake(0, length));
4974
4975 if ((NULL != cString) && (theFlags & (kCFCompareCaseInsensitive|kCFCompareDiacriticInsensitive))) {
4976 const uint8_t *cStringPtr = cString;
4977 const uint8_t *cStringLimit = cString + length;
4978 uint8_t *cStringContents = (isObjc ? NULL : (uint8_t *)__CFStrContents(theString) + __CFStrSkipAnyLengthByte(theString));
4979
4980 while (cStringPtr < cStringLimit) {
4981 if ((*cStringPtr < 0x80) && (NULL == langCode)) {
4982 if (caseInsensitive && (*cStringPtr >= 'A') && (*cStringPtr <= 'Z')) {
4983 if (NULL == cStringContents) {
4984 break;
4985 } else {
4986 cStringContents[cStringPtr - cString] += ('a' - 'A');
4987 }
4988 }
4989 } else {
4990 if ((bufferLength = __CFStringFoldCharacterClusterAtIndex((UTF32Char)__CFCharToUniCharTable[*cStringPtr], &stringBuffer, cStringPtr - cString, theFlags, langCode, buffer, kCFStringStackBufferLength, NULL)) > 0) {
4991 if ((*buffer > 0x7F) || (bufferLength > 1) || (NULL == cStringContents)) break;
4992 cStringContents[cStringPtr - cString] = *buffer;
4993 }
4994 }
4995 ++cStringPtr;
4996 }
4997
4998 currentIndex = cStringPtr - cString;
4999 }
5000
5001 if (currentIndex < length) {
5002 UTF16Char *contents;
5003
5004 if (isObjc) {
5005 CFMutableStringRef cfString;
5006 CFRange range = CFRangeMake(currentIndex, length - currentIndex);
5007
5008 contents = (UTF16Char *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(UTF16Char) * range.length, 0);
5009
5010 CFStringGetCharacters(theString, range, contents);
5011
5012 cfString = CFStringCreateMutableWithExternalCharactersNoCopy(kCFAllocatorSystemDefault, contents, range.length, range.length, NULL);
5013
5014 CFStringFold(cfString, theFlags, theLocale);
5015
5016 CFStringReplace(theString, range, cfString);
5017
5018 CFRelease(cfString);
5019 } else {
5020 const UTF32Char *characters;
5021 const UTF32Char *charactersLimit;
5022 UTF32Char character;
5023 CFIndex consumedLength;
5024
5025 contents = NULL;
5026
5027 if (bufferLength > 0) {
5028 __CFStringChangeSize(theString, CFRangeMake(currentIndex + 1, 0), bufferLength - 1, true);
5029 length = __CFStrLength(theString);
5030 CFStringInitInlineBuffer(theString, &stringBuffer, CFRangeMake(0, length));
5031
5032 contents = (UTF16Char *)__CFStrContents(theString) + currentIndex;
5033 characters = buffer;
5034 charactersLimit = characters + bufferLength;
5035 while (characters < charactersLimit) *(contents++) = (UTF16Char)*(characters++);
5036 ++currentIndex;
5037 }
5038
5039 while (currentIndex < length) {
5040 character = __CFStringGetCharacterFromInlineBufferQuick(&stringBuffer, currentIndex);
5041
5042 consumedLength = 0;
5043
5044 if ((NULL == langCode) && (character < 0x80) && (0 == (theFlags & kCFCompareDiacriticInsensitive))) {
5045 if (caseInsensitive && (character >= 'A') && (character <= 'Z')) {
5046 consumedLength = 1;
5047 bufferLength = 1;
5048 *buffer = character + ('a' - 'A');
5049 }
5050 } else {
5051 if (CFUniCharIsSurrogateHighCharacter(character) && ((currentIndex + 1) < length)) {
5052 UTF16Char lowSurrogate = __CFStringGetCharacterFromInlineBufferQuick(&stringBuffer, currentIndex + 1);
5053 if (CFUniCharIsSurrogateLowCharacter(lowSurrogate)) character = CFUniCharGetLongCharacterForSurrogatePair(character, lowSurrogate);
5054 }
5055
5056 bufferLength = __CFStringFoldCharacterClusterAtIndex(character, &stringBuffer, currentIndex, theFlags, langCode, buffer, kCFStringStackBufferLength, &consumedLength);
5057 }
5058
5059 if (consumedLength > 0) {
5060 CFIndex utf16Length = bufferLength;
5061
5062 characters = buffer;
5063 charactersLimit = characters + bufferLength;
5064
5065 while (characters < charactersLimit) if (*(characters++) > 0xFFFF) ++utf16Length; // Extend bufferLength to the UTF-16 length
5066
5067 if ((utf16Length != consumedLength) || __CFStrIsEightBit(theString)) {
5068 CFRange range;
5069 CFIndex insertLength;
5070
5071 if (consumedLength < utf16Length) { // Need to expand
5072 range = CFRangeMake(currentIndex + consumedLength, 0);
5073 insertLength = utf16Length - consumedLength;
5074 } else {
5075 range = CFRangeMake(currentIndex + utf16Length, consumedLength - utf16Length);
5076 insertLength = 0;
5077 }
5078 __CFStringChangeSize(theString, range, insertLength, true);
5079 length = __CFStrLength(theString);
5080 CFStringInitInlineBuffer(theString, &stringBuffer, CFRangeMake(0, length));
5081 }
5082
5083 (void)CFUniCharFromUTF32(buffer, bufferLength, (UTF16Char *)__CFStrContents(theString) + currentIndex, true, __CF_BIG_ENDIAN__);
5084
5085 currentIndex += utf16Length;
5086 } else {
5087 ++currentIndex;
5088 }
5089 }
5090 }
5091 }
5092
5093 bail:
5094 if (NULL == locale && theLocale) {
5095 CFRelease(theLocale);
5096 }
5097 }
5098
5099 enum {
5100 kCFStringFormatZeroFlag = (1 << 0), // if not, padding is space char
5101 kCFStringFormatMinusFlag = (1 << 1), // if not, no flag implied
5102 kCFStringFormatPlusFlag = (1 << 2), // if not, no flag implied, overrides space
5103 kCFStringFormatSpaceFlag = (1 << 3) // if not, no flag implied
5104 };
5105
5106 typedef struct {
5107 int16_t size;
5108 int16_t type;
5109 SInt32 loc;
5110 SInt32 len;
5111 SInt32 widthArg;
5112 SInt32 precArg;
5113 uint32_t flags;
5114 int8_t mainArgNum;
5115 int8_t precArgNum;
5116 int8_t widthArgNum;
5117 int8_t unused1;
5118 } CFFormatSpec;
5119
5120 typedef struct {
5121 int16_t type;
5122 int16_t size;
5123 union {
5124 int64_t int64Value;
5125 double doubleValue;
5126 #if LONG_DOUBLE_SUPPORT
5127 long double longDoubleValue;
5128 #endif
5129 void *pointerValue;
5130 } value;
5131 } CFPrintValue;
5132
5133 enum {
5134 CFFormatDefaultSize = 0,
5135 CFFormatSize1 = 1,
5136 CFFormatSize2 = 2,
5137 CFFormatSize4 = 3,
5138 CFFormatSize8 = 4,
5139 CFFormatSize16 = 5,
5140 #if __LP64__
5141 CFFormatSizeLong = CFFormatSize8,
5142 CFFormatSizePointer = CFFormatSize8
5143 #else
5144 CFFormatSizeLong = CFFormatSize4,
5145 CFFormatSizePointer = CFFormatSize4
5146 #endif
5147 };
5148
5149
5150
5151 enum {
5152 CFFormatLiteralType = 32,
5153 CFFormatLongType = 33,
5154 CFFormatDoubleType = 34,
5155 CFFormatPointerType = 35,
5156 CFFormatObjectType = 36, /* handled specially */ /* ??? not used anymore, can be removed? */
5157 CFFormatCFType = 37, /* handled specially */
5158 CFFormatUnicharsType = 38, /* handled specially */
5159 CFFormatCharsType = 39, /* handled specially */
5160 CFFormatPascalCharsType = 40, /* handled specially */
5161 CFFormatSingleUnicharType = 41, /* handled specially */
5162 CFFormatDummyPointerType = 42 /* special case for %n */
5163 };
5164
5165 CF_INLINE void __CFParseFormatSpec(const UniChar *uformat, const uint8_t *cformat, SInt32 *fmtIdx, SInt32 fmtLen, CFFormatSpec *spec) {
5166 Boolean seenDot = false;
5167 for (;;) {
5168 UniChar ch;
5169 if (fmtLen <= *fmtIdx) return; /* no type */
5170 if (cformat) ch = (UniChar)cformat[(*fmtIdx)++]; else ch = uformat[(*fmtIdx)++];
5171 reswtch:switch (ch) {
5172 case '#': // ignored for now
5173 break;
5174 case 0x20:
5175 if (!(spec->flags & kCFStringFormatPlusFlag)) spec->flags |= kCFStringFormatSpaceFlag;
5176 break;
5177 case '-':
5178 spec->flags |= kCFStringFormatMinusFlag;
5179 spec->flags &= ~kCFStringFormatZeroFlag; // remove zero flag
5180 break;
5181 case '+':
5182 spec->flags |= kCFStringFormatPlusFlag;
5183 spec->flags &= ~kCFStringFormatSpaceFlag; // remove space flag
5184 break;
5185 case '0':
5186 if (!(spec->flags & kCFStringFormatMinusFlag)) spec->flags |= kCFStringFormatZeroFlag;
5187 break;
5188 case 'h':
5189 spec->size = CFFormatSize2;
5190 break;
5191 case 'l':
5192 if (*fmtIdx < fmtLen) {
5193 // fetch next character, don't increment fmtIdx
5194 if (cformat) ch = (UniChar)cformat[(*fmtIdx)]; else ch = uformat[(*fmtIdx)];
5195 if ('l' == ch) { // 'll' for long long, like 'q'
5196 (*fmtIdx)++;
5197 spec->size = CFFormatSize8;
5198 break;
5199 }
5200 }
5201 spec->size = CFFormatSizeLong; // 4 or 8 depending on LP64
5202 break;
5203 #if LONG_DOUBLE_SUPPORT
5204 case 'L':
5205 spec->size = CFFormatSize16;
5206 break;
5207 #endif
5208 case 'q':
5209 spec->size = CFFormatSize8;
5210 break;
5211 case 't': case 'z':
5212 spec->size = CFFormatSizeLong; // 4 or 8 depending on LP64
5213 break;
5214 case 'j':
5215 spec->size = CFFormatSize8;
5216 break;
5217 case 'c':
5218 spec->type = CFFormatLongType;
5219 spec->size = CFFormatSize1;
5220 return;
5221 case 'O': case 'o': case 'D': case 'd': case 'i': case 'U': case 'u': case 'x': case 'X':
5222 spec->type = CFFormatLongType;
5223 // Seems like if spec->size == 0, we should spec->size = CFFormatSize4. However, 0 is handled correctly.
5224 return;
5225 case 'a': case 'A': case 'e': case 'E': case 'f': case 'F': case 'g': case 'G':
5226 spec->type = CFFormatDoubleType;
5227 if (spec->size != CFFormatSize16) spec->size = CFFormatSize8;
5228 return;
5229 case 'n': /* %n is not handled correctly; for Leopard or newer apps, we disable it further */
5230 spec->type = _CFExecutableLinkedOnOrAfter(CFSystemVersionLeopard) ? CFFormatDummyPointerType : CFFormatPointerType;
5231 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5232 return;
5233 case 'p':
5234 spec->type = CFFormatPointerType;
5235 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5236 return;
5237 case 's':
5238 spec->type = CFFormatCharsType;
5239 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5240 return;
5241 case 'S':
5242 spec->type = CFFormatUnicharsType;
5243 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5244 return;
5245 case 'C':
5246 spec->type = CFFormatSingleUnicharType;
5247 spec->size = CFFormatSize2;
5248 return;
5249 case 'P':
5250 spec->type = CFFormatPascalCharsType;
5251 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5252 return;
5253 case '@':
5254 spec->type = CFFormatCFType;
5255 spec->size = CFFormatSizePointer; // 4 or 8 depending on LP64
5256 return;
5257 case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': {
5258 int64_t number = 0;
5259 do {
5260 number = 10 * number + (ch - '0');
5261 if (cformat) ch = (UniChar)cformat[(*fmtIdx)++]; else ch = uformat[(*fmtIdx)++];
5262 } while ((UInt32)(ch - '0') <= 9);
5263 if ('$' == ch) {
5264 if (-2 == spec->precArgNum) {
5265 spec->precArgNum = (int8_t)number - 1; // Arg numbers start from 1
5266 } else if (-2 == spec->widthArgNum) {
5267 spec->widthArgNum = (int8_t)number - 1; // Arg numbers start from 1
5268 } else {
5269 spec->mainArgNum = (int8_t)number - 1; // Arg numbers start from 1
5270 }
5271 break;
5272 } else if (seenDot) { /* else it's either precision or width */
5273 spec->precArg = (SInt32)number;
5274 } else {
5275 spec->widthArg = (SInt32)number;
5276 }
5277 goto reswtch;
5278 }
5279 case '*':
5280 spec->widthArgNum = -2;
5281 break;
5282 case '.':
5283 seenDot = true;
5284 if (cformat) ch = (UniChar)cformat[(*fmtIdx)++]; else ch = uformat[(*fmtIdx)++];
5285 if ('*' == ch) {
5286 spec->precArgNum = -2;
5287 break;
5288 }
5289 goto reswtch;
5290 default:
5291 spec->type = CFFormatLiteralType;
5292 return;
5293 }
5294 }
5295 }
5296
5297 /* ??? It ignores the formatOptions argument.
5298 ??? %s depends on handling of encodings by __CFStringAppendBytes
5299 */
5300 void CFStringAppendFormatAndArguments(CFMutableStringRef outputString, CFDictionaryRef formatOptions, CFStringRef formatString, va_list args) {
5301 _CFStringAppendFormatAndArgumentsAux(outputString, NULL, formatOptions, formatString, args);
5302 }
5303
5304 #if DEPLOYMENT_TARGET_MACOSX
5305 #define SNPRINTF(TYPE, WHAT) { \
5306 TYPE value = (TYPE) WHAT; \
5307 if (-1 != specs[curSpec].widthArgNum) { \
5308 if (-1 != specs[curSpec].precArgNum) { \
5309 snprintf_l(buffer, 255, NULL, formatBuffer, width, precision, value); \
5310 } else { \
5311 snprintf_l(buffer, 255, NULL, formatBuffer, width, value); \
5312 } \
5313 } else { \
5314 if (-1 != specs[curSpec].precArgNum) { \
5315 snprintf_l(buffer, 255, NULL, formatBuffer, precision, value); \
5316 } else { \
5317 snprintf_l(buffer, 255, NULL, formatBuffer, value); \
5318 } \
5319 }}
5320 #else
5321 #define SNPRINTF(TYPE, WHAT) { \
5322 TYPE value = (TYPE) WHAT; \
5323 if (-1 != specs[curSpec].widthArgNum) { \
5324 if (-1 != specs[curSpec].precArgNum) { \
5325 snprintf(buffer, 255, formatBuffer, width, precision, value); \
5326 } else { \
5327 snprintf(buffer, 255, formatBuffer, width, value); \
5328 } \
5329 } else { \
5330 if (-1 != specs[curSpec].precArgNum) { \
5331 snprintf(buffer, 255, formatBuffer, precision, value); \
5332 } else { \
5333 snprintf(buffer, 255, formatBuffer, value); \
5334 } \
5335 }}
5336 #endif //__MACH__
5337
5338 void _CFStringAppendFormatAndArgumentsAux(CFMutableStringRef outputString, CFStringRef (*copyDescFunc)(void *, const void *), CFDictionaryRef formatOptions, CFStringRef formatString, va_list args) {
5339 SInt32 numSpecs, sizeSpecs, sizeArgNum, formatIdx, curSpec, argNum;
5340 CFIndex formatLen;
5341 #define FORMAT_BUFFER_LEN 400
5342 const uint8_t *cformat = NULL;
5343 const UniChar *uformat = NULL;
5344 UniChar *formatChars = NULL;
5345 UniChar localFormatBuffer[FORMAT_BUFFER_LEN];
5346
5347 #define VPRINTF_BUFFER_LEN 61
5348 CFFormatSpec localSpecsBuffer[VPRINTF_BUFFER_LEN];
5349 CFFormatSpec *specs;
5350 CFPrintValue localValuesBuffer[VPRINTF_BUFFER_LEN];
5351 CFPrintValue *values;
5352 CFAllocatorRef tmpAlloc = NULL;
5353
5354 intmax_t dummyLocation; // A place for %n to do its thing in; should be the widest possible int value
5355
5356 numSpecs = 0;
5357 sizeSpecs = 0;
5358 sizeArgNum = 0;
5359 specs = NULL;
5360 values = NULL;
5361
5362 formatLen = CFStringGetLength(formatString);
5363 if (!CF_IS_OBJC(__kCFStringTypeID, formatString)) {
5364 __CFAssertIsString(formatString);
5365 if (!__CFStrIsUnicode(formatString)) {
5366 cformat = (const uint8_t *)__CFStrContents(formatString);
5367 if (cformat) cformat += __CFStrSkipAnyLengthByte(formatString);
5368 } else {
5369 uformat = (const UniChar *)__CFStrContents(formatString);
5370 }
5371 }
5372 if (!cformat && !uformat) {
5373 formatChars = (formatLen > FORMAT_BUFFER_LEN) ? (UniChar *)CFAllocatorAllocate(tmpAlloc = __CFGetDefaultAllocator(), formatLen * sizeof(UniChar), 0) : localFormatBuffer;
5374 if (formatChars != localFormatBuffer && __CFOASafe) __CFSetLastAllocationEventName(formatChars, "CFString (temp)");
5375 CFStringGetCharacters(formatString, CFRangeMake(0, formatLen), formatChars);
5376 uformat = formatChars;
5377 }
5378
5379 /* Compute an upper bound for the number of format specifications */
5380 if (cformat) {
5381 for (formatIdx = 0; formatIdx < formatLen; formatIdx++) if ('%' == cformat[formatIdx]) sizeSpecs++;
5382 } else {
5383 for (formatIdx = 0; formatIdx < formatLen; formatIdx++) if ('%' == uformat[formatIdx]) sizeSpecs++;
5384 }
5385 tmpAlloc = __CFGetDefaultAllocator();
5386 specs = ((2 * sizeSpecs + 1) > VPRINTF_BUFFER_LEN) ? (CFFormatSpec *)CFAllocatorAllocate(tmpAlloc, (2 * sizeSpecs + 1) * sizeof(CFFormatSpec), 0) : localSpecsBuffer;
5387 if (specs != localSpecsBuffer && __CFOASafe) __CFSetLastAllocationEventName(specs, "CFString (temp)");
5388
5389 /* Collect format specification information from the format string */
5390 for (curSpec = 0, formatIdx = 0; formatIdx < formatLen; curSpec++) {
5391 SInt32 newFmtIdx;
5392 specs[curSpec].loc = formatIdx;
5393 specs[curSpec].len = 0;
5394 specs[curSpec].size = 0;
5395 specs[curSpec].type = 0;
5396 specs[curSpec].flags = 0;
5397 specs[curSpec].widthArg = -1;
5398 specs[curSpec].precArg = -1;
5399 specs[curSpec].mainArgNum = -1;
5400 specs[curSpec].precArgNum = -1;
5401 specs[curSpec].widthArgNum = -1;
5402 if (cformat) {
5403 for (newFmtIdx = formatIdx; newFmtIdx < formatLen && '%' != cformat[newFmtIdx]; newFmtIdx++);
5404 } else {
5405 for (newFmtIdx = formatIdx; newFmtIdx < formatLen && '%' != uformat[newFmtIdx]; newFmtIdx++);
5406 }
5407 if (newFmtIdx != formatIdx) { /* Literal chunk */
5408 specs[curSpec].type = CFFormatLiteralType;
5409 specs[curSpec].len = newFmtIdx - formatIdx;
5410 } else {
5411 newFmtIdx++; /* Skip % */
5412 __CFParseFormatSpec(uformat, cformat, &newFmtIdx, formatLen, &(specs[curSpec]));
5413 if (CFFormatLiteralType == specs[curSpec].type) {
5414 specs[curSpec].loc = formatIdx + 1;
5415 specs[curSpec].len = 1;
5416 } else {
5417 specs[curSpec].len = newFmtIdx - formatIdx;
5418 }
5419 }
5420 formatIdx = newFmtIdx;
5421
5422 // fprintf(stderr, "specs[%d] = {\n size = %d,\n type = %d,\n loc = %d,\n len = %d,\n mainArgNum = %d,\n precArgNum = %d,\n widthArgNum = %d\n}\n", curSpec, specs[curSpec].size, specs[curSpec].type, specs[curSpec].loc, specs[curSpec].len, specs[curSpec].mainArgNum, specs[curSpec].precArgNum, specs[curSpec].widthArgNum);
5423
5424 }
5425 numSpecs = curSpec;
5426 // Max of three args per spec, reasoning thus: 1 width, 1 prec, 1 value
5427 values = ((3 * sizeSpecs + 1) > VPRINTF_BUFFER_LEN) ? (CFPrintValue *)CFAllocatorAllocate(tmpAlloc, (3 * sizeSpecs + 1) * sizeof(CFPrintValue), 0) : localValuesBuffer;
5428 if (values != localValuesBuffer && __CFOASafe) __CFSetLastAllocationEventName(values, "CFString (temp)");
5429 memset(values, 0, (3 * sizeSpecs + 1) * sizeof(CFPrintValue));
5430 sizeArgNum = (3 * sizeSpecs + 1);
5431
5432 /* Compute values array */
5433 argNum = 0;
5434 for (curSpec = 0; curSpec < numSpecs; curSpec++) {
5435 SInt32 newMaxArgNum;
5436 if (0 == specs[curSpec].type) continue;
5437 if (CFFormatLiteralType == specs[curSpec].type) continue;
5438 newMaxArgNum = sizeArgNum;
5439 if (newMaxArgNum < specs[curSpec].mainArgNum) {
5440 newMaxArgNum = specs[curSpec].mainArgNum;
5441 }
5442 if (newMaxArgNum < specs[curSpec].precArgNum) {
5443 newMaxArgNum = specs[curSpec].precArgNum;
5444 }
5445 if (newMaxArgNum < specs[curSpec].widthArgNum) {
5446 newMaxArgNum = specs[curSpec].widthArgNum;
5447 }
5448 if (sizeArgNum < newMaxArgNum) {
5449 if (specs != localSpecsBuffer) CFAllocatorDeallocate(tmpAlloc, specs);
5450 if (values != localValuesBuffer) CFAllocatorDeallocate(tmpAlloc, values);
5451 if (formatChars && (formatChars != localFormatBuffer)) CFAllocatorDeallocate(tmpAlloc, formatChars);
5452 return; // more args than we expected!
5453 }
5454 /* It is actually incorrect to reorder some specs and not all; we just do some random garbage here */
5455 if (-2 == specs[curSpec].widthArgNum) {
5456 specs[curSpec].widthArgNum = argNum++;
5457 }
5458 if (-2 == specs[curSpec].precArgNum) {
5459 specs[curSpec].precArgNum = argNum++;
5460 }
5461 if (-1 == specs[curSpec].mainArgNum) {
5462 specs[curSpec].mainArgNum = argNum++;
5463 }
5464 values[specs[curSpec].mainArgNum].size = specs[curSpec].size;
5465 values[specs[curSpec].mainArgNum].type = specs[curSpec].type;
5466 if (-1 != specs[curSpec].widthArgNum) {
5467 values[specs[curSpec].widthArgNum].size = 0;
5468 values[specs[curSpec].widthArgNum].type = CFFormatLongType;
5469 }
5470 if (-1 != specs[curSpec].precArgNum) {
5471 values[specs[curSpec].precArgNum].size = 0;
5472 values[specs[curSpec].precArgNum].type = CFFormatLongType;
5473 }
5474 }
5475
5476 /* Collect the arguments in correct type from vararg list */
5477 for (argNum = 0; argNum < sizeArgNum; argNum++) {
5478 switch (values[argNum].type) {
5479 case 0:
5480 case CFFormatLiteralType:
5481 break;
5482 case CFFormatLongType:
5483 case CFFormatSingleUnicharType:
5484 if (CFFormatSize1 == values[argNum].size) {
5485 values[argNum].value.int64Value = (int64_t)(int8_t)va_arg(args, int);
5486 } else if (CFFormatSize2 == values[argNum].size) {
5487 values[argNum].value.int64Value = (int64_t)(int16_t)va_arg(args, int);
5488 } else if (CFFormatSize4 == values[argNum].size) {
5489 values[argNum].value.int64Value = (int64_t)va_arg(args, int32_t);
5490 } else if (CFFormatSize8 == values[argNum].size) {
5491 values[argNum].value.int64Value = (int64_t)va_arg(args, int64_t);
5492 } else {
5493 values[argNum].value.int64Value = (int64_t)va_arg(args, int);
5494 }
5495 break;
5496 case CFFormatDoubleType:
5497 #if LONG_DOUBLE_SUPPORT
5498 if (CFFormatSize16 == values[argNum].size) {
5499 values[argNum].value.longDoubleValue = va_arg(args, long double);
5500 } else
5501 #endif
5502 {
5503 values[argNum].value.doubleValue = va_arg(args, double);
5504 }
5505 break;
5506 case CFFormatPointerType:
5507 case CFFormatObjectType:
5508 case CFFormatCFType:
5509 case CFFormatUnicharsType:
5510 case CFFormatCharsType:
5511 case CFFormatPascalCharsType:
5512 values[argNum].value.pointerValue = va_arg(args, void *);
5513 break;
5514 case CFFormatDummyPointerType:
5515 (void)va_arg(args, void *); // Skip the provided argument
5516 values[argNum].value.pointerValue = &dummyLocation;
5517 break;
5518 }
5519 }
5520 va_end(args);
5521
5522 /* Format the pieces together */
5523 for (curSpec = 0; curSpec < numSpecs; curSpec++) {
5524 SInt32 width = 0, precision = 0;
5525 UniChar *up, ch;
5526 Boolean hasWidth = false, hasPrecision = false;
5527
5528 // widthArgNum and widthArg are never set at the same time; same for precArg*
5529 if (-1 != specs[curSpec].widthArgNum) {
5530 width = (SInt32)values[specs[curSpec].widthArgNum].value.int64Value;
5531 hasWidth = true;
5532 }
5533 if (-1 != specs[curSpec].precArgNum) {
5534 precision = (SInt32)values[specs[curSpec].precArgNum].value.int64Value;
5535 hasPrecision = true;
5536 }
5537 if (-1 != specs[curSpec].widthArg) {
5538 width = specs[curSpec].widthArg;
5539 hasWidth = true;
5540 }
5541 if (-1 != specs[curSpec].precArg) {
5542 precision = specs[curSpec].precArg;
5543 hasPrecision = true;
5544 }
5545
5546 switch (specs[curSpec].type) {
5547 case CFFormatLongType:
5548 case CFFormatDoubleType:
5549 case CFFormatPointerType: {
5550 char formatBuffer[128];
5551 #if defined(__GNUC__)
5552 char buffer[256 + width + precision];
5553 #else
5554 char stackBuffer[512];
5555 char *dynamicBuffer = NULL;
5556 char *buffer = stackBuffer;
5557 if (256+width+precision > 512) {
5558 dynamicBuffer = (char *)CFAllocatorAllocate(kCFAllocatorSystemDefault, 256+width+precision, 0);
5559 buffer = dynamicBuffer;
5560 }
5561 #endif
5562 SInt32 cidx, idx, loc;
5563 Boolean appended = false;
5564 loc = specs[curSpec].loc;
5565 // In preparation to call snprintf(), copy the format string out
5566 if (cformat) {
5567 for (idx = 0, cidx = 0; cidx < specs[curSpec].len; idx++, cidx++) {
5568 if ('$' == cformat[loc + cidx]) {
5569 for (idx--; '0' <= formatBuffer[idx] && formatBuffer[idx] <= '9'; idx--);
5570 } else {
5571 formatBuffer[idx] = cformat[loc + cidx];
5572 }
5573 }
5574 } else {
5575 for (idx = 0, cidx = 0; cidx < specs[curSpec].len; idx++, cidx++) {
5576 if ('$' == uformat[loc + cidx]) {
5577 for (idx--; '0' <= formatBuffer[idx] && formatBuffer[idx] <= '9'; idx--);
5578 } else {
5579 formatBuffer[idx] = (int8_t)uformat[loc + cidx];
5580 }
5581 }
5582 }
5583 formatBuffer[idx] = '\0';
5584 // Should modify format buffer here if necessary; for example, to translate %qd to
5585 // the equivalent, on architectures which do not have %q.
5586 buffer[sizeof(buffer) - 1] = '\0';
5587 switch (specs[curSpec].type) {
5588 case CFFormatLongType:
5589 if (CFFormatSize8 == specs[curSpec].size) {
5590 SNPRINTF(int64_t, values[specs[curSpec].mainArgNum].value.int64Value)
5591 } else {
5592 SNPRINTF(SInt32, values[specs[curSpec].mainArgNum].value.int64Value)
5593 }
5594 break;
5595 case CFFormatPointerType:
5596 case CFFormatDummyPointerType:
5597 SNPRINTF(void *, values[specs[curSpec].mainArgNum].value.pointerValue)
5598 break;
5599
5600 case CFFormatDoubleType:
5601 #if LONG_DOUBLE_SUPPORT
5602 if (CFFormatSize16 == specs[curSpec].size) {
5603 SNPRINTF(long double, values[specs[curSpec].mainArgNum].value.longDoubleValue)
5604 } else
5605 #endif
5606 {
5607 SNPRINTF(double, values[specs[curSpec].mainArgNum].value.doubleValue)
5608 }
5609 // See if we need to localize the decimal point
5610 if (formatOptions) { // We have localization info
5611 CFStringRef decimalSeparator = (CFGetTypeID(formatOptions) == CFLocaleGetTypeID()) ? (CFStringRef)CFLocaleGetValue((CFLocaleRef)formatOptions, kCFLocaleDecimalSeparator) : (CFStringRef)CFDictionaryGetValue(formatOptions, CFSTR("NSDecimalSeparator"));
5612 if (decimalSeparator != NULL) { // We have a decimal separator in there
5613 CFIndex decimalPointLoc = 0;
5614 while (buffer[decimalPointLoc] != 0 && buffer[decimalPointLoc] != '.') decimalPointLoc++;
5615 if (buffer[decimalPointLoc] == '.') { // And we have a decimal point in the formatted string
5616 buffer[decimalPointLoc] = 0;
5617 CFStringAppendCString(outputString, (const char *)buffer, __CFStringGetEightBitStringEncoding());
5618 CFStringAppend(outputString, decimalSeparator);
5619 CFStringAppendCString(outputString, (const char *)(buffer + decimalPointLoc + 1), __CFStringGetEightBitStringEncoding());
5620 appended = true;
5621 }
5622 }
5623 }
5624 break;
5625 }
5626 if (!appended) CFStringAppendCString(outputString, (const char *)buffer, __CFStringGetEightBitStringEncoding());
5627 #if !defined(__GNUC__)
5628 if (dynamicBuffer) {
5629 CFAllocatorDeallocate(kCFAllocatorSystemDefault, dynamicBuffer);
5630 }
5631 #endif
5632 }
5633 break;
5634 case CFFormatLiteralType:
5635 if (cformat) {
5636 __CFStringAppendBytes(outputString, (const char *)(cformat+specs[curSpec].loc), specs[curSpec].len, __CFStringGetEightBitStringEncoding());
5637 } else {
5638 CFStringAppendCharacters(outputString, uformat+specs[curSpec].loc, specs[curSpec].len);
5639 }
5640 break;
5641 case CFFormatPascalCharsType:
5642 case CFFormatCharsType:
5643 if (values[specs[curSpec].mainArgNum].value.pointerValue == NULL) {
5644 CFStringAppendCString(outputString, "(null)", kCFStringEncodingASCII);
5645 } else {
5646 int len;
5647 const char *str = (const char *)values[specs[curSpec].mainArgNum].value.pointerValue;
5648 if (specs[curSpec].type == CFFormatPascalCharsType) { // Pascal string case
5649 len = ((unsigned char *)str)[0];
5650 str++;
5651 if (hasPrecision && precision < len) len = precision;
5652 } else { // C-string case
5653 if (!hasPrecision) { // No precision, so rely on the terminating null character
5654 len = strlen(str);
5655 } else { // Don't blindly call strlen() if there is a precision; the string might not have a terminating null (3131988)
5656 const char *terminatingNull = (const char *)memchr(str, 0, precision); // Basically strlen() on only the first precision characters of str
5657 if (terminatingNull) { // There was a null in the first precision characters
5658 len = terminatingNull - str;
5659 } else {
5660 len = precision;
5661 }
5662 }
5663 }
5664 // Since the spec says the behavior of the ' ', '0', '#', and '+' flags is undefined for
5665 // '%s', and since we have ignored them in the past, the behavior is hereby cast in stone
5666 // to ignore those flags (and, say, never pad with '0' instead of space).
5667 if (specs[curSpec].flags & kCFStringFormatMinusFlag) {
5668 __CFStringAppendBytes(outputString, str, len, __CFStringGetSystemEncoding());
5669 if (hasWidth && width > len) {
5670 int w = width - len; // We need this many spaces; do it ten at a time
5671 do {__CFStringAppendBytes(outputString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
5672 }
5673 } else {
5674 if (hasWidth && width > len) {
5675 int w = width - len; // We need this many spaces; do it ten at a time
5676 do {__CFStringAppendBytes(outputString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
5677 }
5678 __CFStringAppendBytes(outputString, str, len, __CFStringGetSystemEncoding());
5679 }
5680 }
5681 break;
5682 case CFFormatSingleUnicharType:
5683 ch = (UniChar)values[specs[curSpec].mainArgNum].value.int64Value;
5684 CFStringAppendCharacters(outputString, &ch, 1);
5685 break;
5686 case CFFormatUnicharsType:
5687 //??? need to handle width, precision, and padding arguments
5688 up = (UniChar *)values[specs[curSpec].mainArgNum].value.pointerValue;
5689 if (NULL == up) {
5690 CFStringAppendCString(outputString, "(null)", kCFStringEncodingASCII);
5691 } else {
5692 int len;
5693 for (len = 0; 0 != up[len]; len++);
5694 // Since the spec says the behavior of the ' ', '0', '#', and '+' flags is undefined for
5695 // '%s', and since we have ignored them in the past, the behavior is hereby cast in stone
5696 // to ignore those flags (and, say, never pad with '0' instead of space).
5697 if (hasPrecision && precision < len) len = precision;
5698 if (specs[curSpec].flags & kCFStringFormatMinusFlag) {
5699 CFStringAppendCharacters(outputString, up, len);
5700 if (hasWidth && width > len) {
5701 int w = width - len; // We need this many spaces; do it ten at a time
5702 do {__CFStringAppendBytes(outputString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
5703 }
5704 } else {
5705 if (hasWidth && width > len) {
5706 int w = width - len; // We need this many spaces; do it ten at a time
5707 do {__CFStringAppendBytes(outputString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
5708 }
5709 CFStringAppendCharacters(outputString, up, len);
5710 }
5711 }
5712 break;
5713 case CFFormatCFType:
5714 case CFFormatObjectType:
5715 if (NULL != values[specs[curSpec].mainArgNum].value.pointerValue) {
5716 CFStringRef str = NULL;
5717 if (copyDescFunc) {
5718 str = copyDescFunc(values[specs[curSpec].mainArgNum].value.pointerValue, formatOptions);
5719 } else {
5720 str = __CFCopyFormattingDescription(values[specs[curSpec].mainArgNum].value.pointerValue, formatOptions);
5721 if (NULL == str) {
5722 str = CFCopyDescription(values[specs[curSpec].mainArgNum].value.pointerValue);
5723 }
5724 }
5725 if (str) {
5726 CFStringAppend(outputString, str);
5727 CFRelease(str);
5728 } else {
5729 CFStringAppendCString(outputString, "(null description)", kCFStringEncodingASCII);
5730 }
5731 } else {
5732 CFStringAppendCString(outputString, "(null)", kCFStringEncodingASCII);
5733 }
5734 break;
5735 }
5736 }
5737
5738 if (specs != localSpecsBuffer) CFAllocatorDeallocate(tmpAlloc, specs);
5739 if (values != localValuesBuffer) CFAllocatorDeallocate(tmpAlloc, values);
5740 if (formatChars && (formatChars != localFormatBuffer)) CFAllocatorDeallocate(tmpAlloc, formatChars);
5741
5742 }
5743
5744 #undef SNPRINTF
5745
5746 void CFShowStr(CFStringRef str) {
5747 CFAllocatorRef alloc;
5748
5749 if (!str) {
5750 fprintf(stdout, "(null)\n");
5751 return;
5752 }
5753
5754 if (CF_IS_OBJC(__kCFStringTypeID, str)) {
5755 fprintf(stdout, "This is an NSString, not CFString\n");
5756 return;
5757 }
5758
5759 alloc = CFGetAllocator(str);
5760
5761 fprintf(stdout, "\nLength %d\nIsEightBit %d\n", (int)__CFStrLength(str), __CFStrIsEightBit(str));
5762 fprintf(stdout, "HasLengthByte %d\nHasNullByte %d\nInlineContents %d\n",
5763 __CFStrHasLengthByte(str), __CFStrHasNullByte(str), __CFStrIsInline(str));
5764
5765 fprintf(stdout, "Allocator ");
5766 if (alloc != kCFAllocatorSystemDefault) {
5767 fprintf(stdout, "%p\n", (void *)alloc);
5768 } else {
5769 fprintf(stdout, "SystemDefault\n");
5770 }
5771 fprintf(stdout, "Mutable %d\n", __CFStrIsMutable(str));
5772 if (!__CFStrIsMutable(str) && __CFStrHasContentsDeallocator(str)) {
5773 if (__CFStrContentsDeallocator(str)) fprintf(stdout, "ContentsDeallocatorFunc %p\n", (void *)__CFStrContentsDeallocator(str));
5774 else fprintf(stdout, "ContentsDeallocatorFunc None\n");
5775 } else if (__CFStrIsMutable(str) && __CFStrHasContentsAllocator(str)) {
5776 fprintf(stdout, "ExternalContentsAllocator %p\n", (void *)__CFStrContentsAllocator((CFMutableStringRef)str));
5777 }
5778
5779 if (__CFStrIsMutable(str)) {
5780 fprintf(stdout, "CurrentCapacity %d\n%sCapacity %d\n", (int)__CFStrCapacity(str), __CFStrIsFixed(str) ? "Fixed" : "Desired", (int)__CFStrDesiredCapacity(str));
5781 }
5782 fprintf(stdout, "Contents %p\n", (void *)__CFStrContents(str));
5783 }
5784
5785
5786
5787 #undef HANGUL_SBASE
5788 #undef HANGUL_LBASE
5789 #undef HANGUL_VBASE
5790 #undef HANGUL_TBASE
5791 #undef HANGUL_SCOUNT
5792 #undef HANGUL_LCOUNT
5793 #undef HANGUL_VCOUNT
5794 #undef HANGUL_TCOUNT
5795 #undef HANGUL_NCOUNT
5796