]> git.saurik.com Git - apple/cf.git/blob - String.subproj/CFString.c
CF-368.11.tar.gz
[apple/cf.git] / String.subproj / CFString.c
1 /*
2 * Copyright (c) 2005 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23 /* CFString.c
24 Copyright 1998-2002, Apple, Inc. All rights reserved.
25 Responsibility: Ali Ozer
26
27 !!! For performance reasons, it's important that all functions marked CF_INLINE in this file are inlined.
28 */
29
30 #include <CoreFoundation/CFBase.h>
31 #include <CoreFoundation/CFString.h>
32 #include <CoreFoundation/CFDictionary.h>
33 #include "CFStringEncodingConverterExt.h"
34 #include "CFUniChar.h"
35 #include "CFUnicodeDecomposition.h"
36 #include "CFUnicodePrecomposition.h"
37 #include "CFUtilitiesPriv.h"
38 #include "CFInternal.h"
39 #include <stdarg.h>
40 #include <stdio.h>
41 #include <string.h>
42 #if defined (__MACOS8__)
43 #include <Script.h> // For GetScriptManagerVariable
44 #include <Processes.h> // For logging
45 #include <stdlib.h>
46 #include <UnicodeConverter.h>
47 #include <TextEncodingConverter.h>
48 #elif defined(__MACH__) || defined(__LINUX__) || defined(__FREEBSD__)
49 #include <unistd.h>
50 #endif
51 #if defined(__WIN32__)
52 #include <windows.h>
53 #endif /* __WIN32__ */
54
55 #if defined(__MACH__)
56 extern size_t malloc_good_size(size_t size);
57 #endif
58 extern void __CFStrConvertBytesToUnicode(const uint8_t *bytes, UniChar *buffer, CFIndex numChars);
59
60 #if defined(DEBUG)
61
62 // Special allocator used by CFSTRs to catch deallocations
63 static CFAllocatorRef constantStringAllocatorForDebugging = NULL;
64
65 // We put this into C & Pascal strings if we can't convert
66 #define CONVERSIONFAILURESTR "CFString conversion failed"
67
68 // We set this to true when purging the constant string table, so CFStringDeallocate doesn't assert
69 static Boolean __CFConstantStringTableBeingFreed = false;
70
71 #endif
72
73
74
75 // This section is for CFString compatibility and other behaviors...
76
77 static CFOptionFlags _CFStringCompatibilityMask = 0;
78
79 #define Bug2967272 1
80
81 void _CFStringSetCompatibility(CFOptionFlags mask) {
82 _CFStringCompatibilityMask |= mask;
83 }
84
85 CF_INLINE Boolean __CFStringGetCompatibility(CFOptionFlags mask) {
86 return (_CFStringCompatibilityMask & mask) == mask;
87 }
88
89
90
91 // Two constant strings used by CFString; these are initialized in CFStringInitialize
92 CONST_STRING_DECL(kCFEmptyString, "")
93 CONST_STRING_DECL(kCFNSDecimalSeparatorKey, "NSDecimalSeparator")
94
95
96 /* !!! Never do sizeof(CFString); the union is here just to make it easier to access some fields.
97 */
98 struct __CFString {
99 CFRuntimeBase base;
100 union { // In many cases the allocated structs are smaller than these
101 struct {
102 SInt32 length;
103 } inline1;
104
105 struct {
106 void *buffer;
107 UInt32 length;
108 CFAllocatorRef contentsDeallocator; // Just the dealloc func is used
109 } notInlineImmutable1;
110 struct {
111 void *buffer;
112 CFAllocatorRef contentsDeallocator; // Just the dealloc func is used
113 } notInlineImmutable2;
114 struct {
115 void *buffer;
116 UInt32 length;
117 UInt32 capacityFields; // Currently only stores capacity
118 UInt32 gapEtc; // Stores some bits, plus desired or fixed capacity
119 CFAllocatorRef contentsAllocator; // Optional
120 } notInlineMutable;
121 } variants;
122 };
123
124 /*
125 I = is immutable
126 E = not inline contents
127 U = is Unicode
128 N = has NULL byte
129 L = has length byte
130 D = explicit deallocator for contents (for mutable objects, allocator)
131 X = UNUSED
132
133 Also need (only for mutable)
134 F = is fixed
135 G = has gap
136 Cap, DesCap = capacity
137
138 B7 B6 B5 B4 B3 B2 B1 B0
139 U N L X I
140
141 B6 B5
142 0 0 inline contents
143 0 1 E (freed with default allocator)
144 1 0 E (not freed)
145 1 1 E D
146
147 !!! Note: Constant CFStrings use the bit patterns:
148 C8 (11001000 = default allocator, not inline, not freed contents; 8-bit; has NULL byte; doesn't have length; is immutable)
149 D0 (11010000 = default allocator, not inline, not freed contents; Unicode; is immutable)
150 The bit usages should not be modified in a way that would effect these bit patterns.
151 */
152
153 enum {
154 __kCFFreeContentsWhenDoneMask = 0x020,
155 __kCFFreeContentsWhenDone = 0x020,
156 __kCFContentsMask = 0x060,
157 __kCFHasInlineContents = 0x000,
158 __kCFNotInlineContentsNoFree = 0x040, // Don't free
159 __kCFNotInlineContentsDefaultFree = 0x020, // Use allocator's free function
160 __kCFNotInlineContentsCustomFree = 0x060, // Use a specially provided free function
161 __kCFHasContentsAllocatorMask = 0x060,
162 __kCFHasContentsAllocator = 0x060, // (For mutable strings) use a specially provided allocator
163 __kCFHasContentsDeallocatorMask = 0x060,
164 __kCFHasContentsDeallocator = 0x060,
165 __kCFIsMutableMask = 0x01,
166 __kCFIsMutable = 0x01,
167 __kCFIsUnicodeMask = 0x10,
168 __kCFIsUnicode = 0x10,
169 __kCFHasNullByteMask = 0x08,
170 __kCFHasNullByte = 0x08,
171 __kCFHasLengthByteMask = 0x04,
172 __kCFHasLengthByte = 0x04,
173 // !!! Bit 0x02 has been freed up
174 // These are in variants.notInlineMutable.gapEtc
175 __kCFGapMask = 0x00ffffff,
176 __kCFGapBitNumber = 24,
177 __kCFDesiredCapacityMask = 0x00ffffff, // Currently gap and fixed share same bits as gap not implemented
178 __kCFDesiredCapacityBitNumber = 24,
179 __kCFIsFixedMask = 0x80000000,
180 __kCFIsFixed = 0x80000000,
181 __kCFHasGapMask = 0x40000000,
182 __kCFHasGap = 0x40000000,
183 __kCFCapacityProvidedExternallyMask = 0x20000000, // Set if the external buffer capacity is set explicitly by the developer
184 __kCFCapacityProvidedExternally = 0x20000000,
185 __kCFIsExternalMutableMask = 0x10000000, // Determines whether the buffer is controlled by the developer
186 __kCFIsExternalMutable = 0x10000000
187 // 0x0f000000: 4 additional bits available for use in mutable strings
188 };
189
190
191 // !!! Assumptions:
192 // Mutable strings are not inline
193 // Compile-time constant strings are not inline
194 // Mutable strings always have explicit length (but they might also have length byte and null byte)
195 // If there is an explicit length, always use that instead of the length byte (length byte is useful for quickly returning pascal strings)
196 // Never look at the length byte for the length; use __CFStrLength or __CFStrLength2
197
198 /* The following set of functions and macros need to be updated on change to the bit configuration
199 */
200 CF_INLINE Boolean __CFStrIsMutable(CFStringRef str) {return (str->base._info & __kCFIsMutableMask) == __kCFIsMutable;}
201 CF_INLINE Boolean __CFStrIsInline(CFStringRef str) {return (str->base._info & __kCFContentsMask) == __kCFHasInlineContents;}
202 CF_INLINE Boolean __CFStrFreeContentsWhenDone(CFStringRef str) {return (str->base._info & __kCFFreeContentsWhenDoneMask) == __kCFFreeContentsWhenDone;}
203 CF_INLINE Boolean __CFStrHasContentsDeallocator(CFStringRef str) {return (str->base._info & __kCFHasContentsDeallocatorMask) == __kCFHasContentsDeallocator;}
204 CF_INLINE Boolean __CFStrIsUnicode(CFStringRef str) {return (str->base._info & __kCFIsUnicodeMask) == __kCFIsUnicode;}
205 CF_INLINE Boolean __CFStrIsEightBit(CFStringRef str) {return (str->base._info & __kCFIsUnicodeMask) != __kCFIsUnicode;}
206 CF_INLINE Boolean __CFStrHasNullByte(CFStringRef str) {return (str->base._info & __kCFHasNullByteMask) == __kCFHasNullByte;}
207 CF_INLINE Boolean __CFStrHasLengthByte(CFStringRef str) {return (str->base._info & __kCFHasLengthByteMask) == __kCFHasLengthByte;}
208 CF_INLINE Boolean __CFStrHasExplicitLength(CFStringRef str) {return (str->base._info & (__kCFIsMutableMask | __kCFHasLengthByteMask)) != __kCFHasLengthByte;} // Has explicit length if (1) mutable or (2) not mutable and no length byte
209 CF_INLINE Boolean __CFStrIsConstant(CFStringRef str) {return (str->base._rc) == 0;}
210
211 CF_INLINE SInt32 __CFStrSkipAnyLengthByte(CFStringRef str) {return ((str->base._info & __kCFHasLengthByteMask) == __kCFHasLengthByte) ? 1 : 0;} // Number of bytes to skip over the length byte in the contents
212
213 /* Returns ptr to the buffer (which might include the length byte)
214 */
215 CF_INLINE const void *__CFStrContents(CFStringRef str) {
216 if (__CFStrIsInline(str)) {
217 return (const void *)(((UInt32)&(str->variants)) + (__CFStrHasExplicitLength(str) ? sizeof(UInt32) : 0));
218 } else { // Not inline; pointer is always word 2
219 return str->variants.notInlineImmutable1.buffer;
220 }
221 }
222
223 static CFAllocatorRef *__CFStrContentsDeallocatorPtr(CFStringRef str) {
224 return __CFStrHasExplicitLength(str) ? &(((CFMutableStringRef)str)->variants.notInlineImmutable1.contentsDeallocator) : &(((CFMutableStringRef)str)->variants.notInlineImmutable2.contentsDeallocator); }
225
226 // Assumption: Called with immutable strings only, and on strings that are known to have a contentsDeallocator
227 CF_INLINE CFAllocatorRef __CFStrContentsDeallocator(CFStringRef str) {
228 return *__CFStrContentsDeallocatorPtr(str);
229 }
230
231 // Assumption: Called with immutable strings only, and on strings that are known to have a contentsDeallocator
232 CF_INLINE void __CFStrSetContentsDeallocator(CFStringRef str, CFAllocatorRef contentsAllocator) {
233 *__CFStrContentsDeallocatorPtr(str) = contentsAllocator;
234 }
235
236 static CFAllocatorRef *__CFStrContentsAllocatorPtr(CFStringRef str) {
237 CFAssert(!__CFStrIsInline(str), __kCFLogAssertion, "Asking for contents allocator of inline string");
238 CFAssert(__CFStrIsMutable(str), __kCFLogAssertion, "Asking for contents allocator of an immutable string");
239 return (CFAllocatorRef *)&(str->variants.notInlineMutable.contentsAllocator);
240 }
241
242 // Assumption: Called with strings that have a contents allocator; also, contents allocator follows custom
243 CF_INLINE CFAllocatorRef __CFStrContentsAllocator(CFMutableStringRef str) {
244 return *(__CFStrContentsAllocatorPtr(str));
245 }
246
247 // Assumption: Called with strings that have a contents allocator; also, contents allocator follows custom
248 CF_INLINE void __CFStrSetContentsAllocator(CFMutableStringRef str, CFAllocatorRef alloc) {
249 *(__CFStrContentsAllocatorPtr(str)) = alloc;
250 }
251
252 /* Returns length; use __CFStrLength2 if contents buffer pointer has already been computed.
253 */
254 CF_INLINE CFIndex __CFStrLength(CFStringRef str) {
255 if (__CFStrHasExplicitLength(str)) {
256 if (__CFStrIsInline(str)) {
257 return str->variants.inline1.length;
258 } else {
259 return str->variants.notInlineImmutable1.length;
260 }
261 } else {
262 return (CFIndex)(*((uint8_t *)__CFStrContents(str)));
263 }
264 }
265
266 CF_INLINE CFIndex __CFStrLength2(CFStringRef str, const void *buffer) {
267 if (__CFStrHasExplicitLength(str)) {
268 if (__CFStrIsInline(str)) {
269 return str->variants.inline1.length;
270 } else {
271 return str->variants.notInlineImmutable1.length;
272 }
273 } else {
274 return (CFIndex)(*((uint8_t *)buffer));
275 }
276 }
277
278
279 Boolean __CFStringIsEightBit(CFStringRef str) {
280 return __CFStrIsEightBit(str);
281 }
282
283 /* Sets the content pointer for immutable or mutable strings.
284 */
285 CF_INLINE void __CFStrSetContentPtr(CFStringRef str, const void *p)
286 {
287 // XXX_PCB catch all writes for mutable string case.
288 CF_WRITE_BARRIER_BASE_ASSIGN(__CFGetAllocator(str), str, ((CFMutableStringRef)str)->variants.notInlineImmutable1.buffer, (void *)p);
289 }
290 CF_INLINE void __CFStrSetInfoBits(CFStringRef str, UInt32 v) {__CFBitfieldSetValue(((CFMutableStringRef)str)->base._info, 6, 0, v);}
291
292 CF_INLINE void __CFStrSetExplicitLength(CFStringRef str, CFIndex v) {
293 if (__CFStrIsInline(str)) {
294 ((CFMutableStringRef)str)->variants.inline1.length = v;
295 } else {
296 ((CFMutableStringRef)str)->variants.notInlineImmutable1.length = v;
297 }
298 }
299
300 // Assumption: Called with mutable strings only
301 CF_INLINE Boolean __CFStrIsFixed(CFStringRef str) {return (str->variants.notInlineMutable.gapEtc & __kCFIsFixedMask) == __kCFIsFixed;}
302 CF_INLINE Boolean __CFStrHasContentsAllocator(CFStringRef str) {return (str->base._info & __kCFHasContentsAllocatorMask) == __kCFHasContentsAllocator;}
303 CF_INLINE Boolean __CFStrIsExternalMutable(CFStringRef str) {return (str->variants.notInlineMutable.gapEtc & __kCFIsExternalMutableMask) == __kCFIsExternalMutable;}
304
305 // If capacity is provided externally, we only change it when we need to grow beyond it
306 CF_INLINE Boolean __CFStrCapacityProvidedExternally(CFStringRef str) {return (str->variants.notInlineMutable.gapEtc & __kCFCapacityProvidedExternallyMask) == __kCFCapacityProvidedExternally;}
307 CF_INLINE void __CFStrSetCapacityProvidedExternally(CFMutableStringRef str) {str->variants.notInlineMutable.gapEtc |= __kCFCapacityProvidedExternally;}
308 CF_INLINE void __CFStrClearCapacityProvidedExternally(CFMutableStringRef str) {str->variants.notInlineMutable.gapEtc &= ~__kCFCapacityProvidedExternally;}
309
310
311 CF_INLINE void __CFStrSetIsFixed(CFMutableStringRef str) {str->variants.notInlineMutable.gapEtc |= __kCFIsFixed;}
312 CF_INLINE void __CFStrSetIsExternalMutable(CFMutableStringRef str) {str->variants.notInlineMutable.gapEtc |= __kCFIsExternalMutable;}
313 CF_INLINE void __CFStrSetHasGap(CFMutableStringRef str) {str->variants.notInlineMutable.gapEtc |= __kCFHasGap;}
314 CF_INLINE void __CFStrSetUnicode(CFMutableStringRef str) {str->base._info |= __kCFIsUnicode;}
315 CF_INLINE void __CFStrClearUnicode(CFMutableStringRef str) {str->base._info &= ~__kCFIsUnicode;}
316 CF_INLINE void __CFStrSetHasLengthAndNullBytes(CFMutableStringRef str) {str->base._info |= (__kCFHasLengthByte | __kCFHasNullByte);}
317 CF_INLINE void __CFStrClearHasLengthAndNullBytes(CFMutableStringRef str) {str->base._info &= ~(__kCFHasLengthByte | __kCFHasNullByte);}
318
319
320 static void *__CFStrAllocateMutableContents(CFMutableStringRef str, CFIndex size) {
321 void *ptr;
322 CFAllocatorRef alloc = (__CFStrHasContentsAllocator(str)) ? __CFStrContentsAllocator(str) : __CFGetAllocator(str);
323 ptr = CFAllocatorAllocate(alloc, size, 0);
324 if (__CFOASafe) __CFSetLastAllocationEventName(ptr, "CFString (store)");
325 return ptr;
326 }
327
328 static void __CFStrDeallocateMutableContents(CFMutableStringRef str, void *buffer) {
329 CFAllocatorRef alloc = (__CFStrHasContentsAllocator(str)) ? __CFStrContentsAllocator(str) : __CFGetAllocator(str);
330 if (CF_IS_COLLECTABLE_ALLOCATOR(alloc)) {
331 // GC: for finalization safety, let collector reclaim the buffer in the next GC cycle.
332 auto_zone_release(__CFCollectableZone, buffer);
333 } else {
334 CFAllocatorDeallocate(alloc, buffer);
335 }
336 }
337
338
339 // The following set of functions should only be called on mutable strings
340
341 /* "Capacity" is stored in number of bytes, not characters. It indicates the total number of bytes in the contents buffer.
342 "Desired capacity" is in number of characters; it is the client requested capacity; if fixed, it is the upper bound on the mutable string backing store.
343 */
344 CF_INLINE CFIndex __CFStrCapacity(CFStringRef str) {return str->variants.notInlineMutable.capacityFields;}
345 CF_INLINE void __CFStrSetCapacity(CFMutableStringRef str, CFIndex cap) {str->variants.notInlineMutable.capacityFields = cap;}
346 CF_INLINE CFIndex __CFStrDesiredCapacity(CFStringRef str) {return __CFBitfieldGetValue(str->variants.notInlineMutable.gapEtc, __kCFDesiredCapacityBitNumber, 0);}
347 CF_INLINE void __CFStrSetDesiredCapacity(CFMutableStringRef str, CFIndex size) {__CFBitfieldSetValue(str->variants.notInlineMutable.gapEtc, __kCFDesiredCapacityBitNumber, 0, size);}
348
349
350
351
352 /* CFString specific init flags
353 Note that you cannot count on the external buffer not being copied.
354 Also, if you specify an external buffer, you should not change it behind the CFString's back.
355 */
356 enum {
357 __kCFThinUnicodeIfPossible = 0x1000000, /* See if the Unicode contents can be thinned down to 8-bit */
358 kCFStringPascal = 0x10000, /* Indicating that the string data has a Pascal string structure (length byte at start) */
359 kCFStringNoCopyProvidedContents = 0x20000, /* Don't copy the provided string contents if possible; free it when no longer needed */
360 kCFStringNoCopyNoFreeProvidedContents = 0x30000 /* Don't copy the provided string contents if possible; don't free it when no longer needed */
361 };
362
363 /* System Encoding.
364 */
365 static CFStringEncoding __CFDefaultSystemEncoding = kCFStringEncodingInvalidId;
366 static CFStringEncoding __CFDefaultFileSystemEncoding = kCFStringEncodingInvalidId;
367 CFStringEncoding __CFDefaultEightBitStringEncoding = kCFStringEncodingInvalidId;
368
369 CFStringEncoding CFStringGetSystemEncoding(void) {
370
371 if (__CFDefaultSystemEncoding == kCFStringEncodingInvalidId) {
372 const CFStringEncodingConverter *converter = NULL;
373 #if defined(__MACOS8__) || defined(__MACH__)
374 __CFDefaultSystemEncoding = kCFStringEncodingMacRoman; // MacRoman is built-in so always available
375 #elif defined(__WIN32__)
376 __CFDefaultSystemEncoding = kCFStringEncodingWindowsLatin1; // WinLatin1 is built-in so always available
377 #elif defined(__LINUX__) || defined(__FREEBSD__)
378 __CFDefaultSystemEncoding = kCFStringEncodingISOLatin1; // a reasonable default
379 #else // Solaris && HP-UX ?
380 __CFDefaultSystemEncoding = kCFStringEncodingISOLatin1; // a reasonable default
381 #endif
382 converter = CFStringEncodingGetConverter(__CFDefaultSystemEncoding);
383
384 __CFSetCharToUniCharFunc(converter->encodingClass == kCFStringEncodingConverterCheapEightBit ? converter->toUnicode : NULL);
385 }
386
387 return __CFDefaultSystemEncoding;
388 }
389
390 // Fast version for internal use
391
392 CF_INLINE CFStringEncoding __CFStringGetSystemEncoding(void) {
393 if (__CFDefaultSystemEncoding == kCFStringEncodingInvalidId) (void)CFStringGetSystemEncoding();
394 return __CFDefaultSystemEncoding;
395 }
396
397 CFStringEncoding CFStringFileSystemEncoding(void) {
398 if (__CFDefaultFileSystemEncoding == kCFStringEncodingInvalidId) {
399 #if defined(__MACH__)
400 __CFDefaultFileSystemEncoding = kCFStringEncodingUTF8;
401 #else
402 __CFDefaultFileSystemEncoding = CFStringGetSystemEncoding();
403 #endif
404 }
405
406 return __CFDefaultFileSystemEncoding;
407 }
408
409 /* ??? Is returning length when no other answer is available the right thing?
410 */
411 CFIndex CFStringGetMaximumSizeForEncoding(CFIndex length, CFStringEncoding encoding) {
412 if (encoding == kCFStringEncodingUTF8) {
413 return _CFExecutableLinkedOnOrAfter(CFSystemVersionPanther) ? (length * 3) : (length * 6); // 1 Unichar could expand to 3 bytes; we return 6 for older apps for compatibility
414 } else if ((encoding == kCFStringEncodingUTF32) || (encoding == kCFStringEncodingUTF32BE) || (encoding == kCFStringEncodingUTF32LE)) { // UTF-32
415 return length * sizeof(UTF32Char);
416 } else {
417 encoding &= 0xFFF; // Mask off non-base part
418 }
419 switch (encoding) {
420 case kCFStringEncodingUnicode:
421 return length * sizeof(UniChar);
422
423 case kCFStringEncodingNonLossyASCII:
424 return length * 6; // 1 Unichar could expand to 6 bytes
425
426 case kCFStringEncodingMacRoman:
427 case kCFStringEncodingWindowsLatin1:
428 case kCFStringEncodingISOLatin1:
429 case kCFStringEncodingNextStepLatin:
430 case kCFStringEncodingASCII:
431 return length / sizeof(uint8_t);
432
433 default:
434 return length / sizeof(uint8_t);
435 }
436 }
437
438
439 /* Returns whether the indicated encoding can be stored in 8-bit chars
440 */
441 CF_INLINE Boolean __CFStrEncodingCanBeStoredInEightBit(CFStringEncoding encoding) {
442 switch (encoding & 0xFFF) { // just use encoding base
443 case kCFStringEncodingInvalidId:
444 case kCFStringEncodingUnicode:
445 case kCFStringEncodingNonLossyASCII:
446 return false;
447
448 case kCFStringEncodingMacRoman:
449 case kCFStringEncodingWindowsLatin1:
450 case kCFStringEncodingISOLatin1:
451 case kCFStringEncodingNextStepLatin:
452 case kCFStringEncodingASCII:
453 return true;
454
455 default: return false;
456 }
457 }
458
459 /* Returns the encoding used in eight bit CFStrings (can't be any encoding which isn't 1-to-1 with Unicode)
460 ??? Perhaps only ASCII fits the bill due to Unicode decomposition.
461 */
462 CFStringEncoding __CFStringComputeEightBitStringEncoding(void) {
463 if (__CFDefaultEightBitStringEncoding == kCFStringEncodingInvalidId) {
464 CFStringEncoding systemEncoding = CFStringGetSystemEncoding();
465 if (systemEncoding == kCFStringEncodingInvalidId) { // We're right in the middle of querying system encoding from default database. Delaying to set until system encoding is determined.
466 return kCFStringEncodingASCII;
467 } else if (__CFStrEncodingCanBeStoredInEightBit(systemEncoding)) {
468 __CFDefaultEightBitStringEncoding = systemEncoding;
469 } else {
470 __CFDefaultEightBitStringEncoding = kCFStringEncodingASCII;
471 }
472 }
473
474 return __CFDefaultEightBitStringEncoding;
475 }
476
477 /* Returns whether the provided bytes can be stored in ASCII
478 */
479 CF_INLINE Boolean __CFBytesInASCII(const uint8_t *bytes, CFIndex len) {
480 while (len--) if ((uint8_t)(*bytes++) >= 128) return false;
481 return true;
482 }
483
484 /* Returns whether the provided 8-bit string in the specified encoding can be stored in an 8-bit CFString.
485 */
486 CF_INLINE Boolean __CFCanUseEightBitCFStringForBytes(const uint8_t *bytes, CFIndex len, CFStringEncoding encoding) {
487 if (encoding == __CFStringGetEightBitStringEncoding()) return true;
488 if (__CFStringEncodingIsSupersetOfASCII(encoding) && __CFBytesInASCII(bytes, len)) return true;
489 return false;
490 }
491
492
493 /* Returns whether a length byte can be tacked on to a string of the indicated length.
494 */
495 CF_INLINE Boolean __CFCanUseLengthByte(CFIndex len) {
496 #define __kCFMaxPascalStrLen 255
497 return (len <= __kCFMaxPascalStrLen) ? true : false;
498 }
499
500 /* Various string assertions
501 */
502 #define __CFAssertIsString(cf) __CFGenericValidateType(cf, __kCFStringTypeID)
503 #define __CFAssertIndexIsInStringBounds(cf, idx) CFAssert3((idx) >= 0 && (idx) < __CFStrLength(cf), __kCFLogAssertion, "%s(): string index %d out of bounds (length %d)", __PRETTY_FUNCTION__, idx, __CFStrLength(cf))
504 #define __CFAssertRangeIsInStringBounds(cf, idx, count) CFAssert4((idx) >= 0 && (idx + count) <= __CFStrLength(cf), __kCFLogAssertion, "%s(): string range %d,%d out of bounds (length %d)", __PRETTY_FUNCTION__, idx, count, __CFStrLength(cf))
505 #define __CFAssertLengthIsOK(len) CFAssert2(len < __kCFMaxLength, __kCFLogAssertion, "%s(): length %d too large", __PRETTY_FUNCTION__, len)
506 #define __CFAssertIsStringAndMutable(cf) {__CFGenericValidateType(cf, __kCFStringTypeID); CFAssert1(__CFStrIsMutable(cf), __kCFLogAssertion, "%s(): string not mutable", __PRETTY_FUNCTION__);}
507 #define __CFAssertIsStringAndExternalMutable(cf) {__CFGenericValidateType(cf, __kCFStringTypeID); CFAssert1(__CFStrIsMutable(cf) && __CFStrIsExternalMutable(cf), __kCFLogAssertion, "%s(): string not external mutable", __PRETTY_FUNCTION__);}
508 #define __CFAssertIsNotNegative(idx) CFAssert2(idx >= 0, __kCFLogAssertion, "%s(): index %d is negative", __PRETTY_FUNCTION__, idx)
509 #define __CFAssertIfFixedLengthIsOK(cf, reqLen) CFAssert2(!__CFStrIsFixed(cf) || (reqLen <= __CFStrDesiredCapacity(cf)), __kCFLogAssertion, "%s(): length %d too large", __PRETTY_FUNCTION__, reqLen)
510
511
512 /* Basic algorithm is to shrink memory when capacity is SHRINKFACTOR times the required capacity or to allocate memory when the capacity is less than GROWFACTOR times the required capacity.
513 Additional complications are applied in the following order:
514 - desiredCapacity, which is the minimum (except initially things can be at zero)
515 - rounding up to factor of 8
516 - compressing (to fit the number if 16 bits), which effectively rounds up to factor of 256
517 */
518 #define SHRINKFACTOR(c) (c / 2)
519 #define GROWFACTOR(c) ((c * 3 + 1) / 2)
520
521 CF_INLINE CFIndex __CFStrNewCapacity(CFMutableStringRef str, CFIndex reqCapacity, CFIndex capacity, Boolean leaveExtraRoom, CFIndex charSize) {
522 if (capacity != 0 || reqCapacity != 0) { /* If initially zero, and space not needed, leave it at that... */
523 if ((capacity < reqCapacity) || /* We definitely need the room... */
524 (!__CFStrCapacityProvidedExternally(str) && /* Assuming we control the capacity... */
525 ((reqCapacity < SHRINKFACTOR(capacity)) || /* ...we have too much room! */
526 (!leaveExtraRoom && (reqCapacity < capacity))))) { /* ...we need to eliminate the extra space... */
527 CFIndex newCapacity = leaveExtraRoom ? GROWFACTOR(reqCapacity) : reqCapacity; /* Grow by 3/2 if extra room is desired */
528 CFIndex desiredCapacity = __CFStrDesiredCapacity(str) * charSize;
529 if (newCapacity < desiredCapacity) { /* If less than desired, bump up to desired */
530 newCapacity = desiredCapacity;
531 } else if (__CFStrIsFixed(str)) { /* Otherwise, if fixed, no need to go above the desired (fixed) capacity */
532 newCapacity = __CFMax(desiredCapacity, reqCapacity); /* !!! So, fixed is not really fixed, but "tight" */
533 }
534 if (__CFStrHasContentsAllocator(str)) { /* Also apply any preferred size from the allocator; should we do something for */
535 newCapacity = CFAllocatorGetPreferredSizeForSize(__CFStrContentsAllocator(str), newCapacity, 0);
536 #if defined(__MACH__)
537 } else {
538 newCapacity = malloc_good_size(newCapacity);
539 #endif
540 }
541 return newCapacity; // If packing: __CFStrUnpackNumber(__CFStrPackNumber(newCapacity));
542 }
543 }
544 return capacity;
545 }
546
547
548 /* rearrangeBlocks() rearranges the blocks of data within the buffer so that they are "evenly spaced". buffer is assumed to have enough room for the result.
549 numBlocks is current total number of blocks within buffer.
550 blockSize is the size of each block in bytes
551 ranges and numRanges hold the ranges that are no longer needed; ranges are stored sorted in increasing order, and don't overlap
552 insertLength is the final spacing between the remaining blocks
553
554 Example: buffer = A B C D E F G H, blockSize = 1, ranges = { (2,1) , (4,2) } (so we want to "delete" C and E F), fromEnd = NO
555 if insertLength = 4, result = A B ? ? ? ? D ? ? ? ? G H
556 if insertLength = 0, result = A B D G H
557
558 Example: buffer = A B C D E F G H I J K L M N O P Q R S T U, blockSize = 1, ranges { (1,1), (3,1), (5,11), (17,1), (19,1) }, fromEnd = NO
559 if insertLength = 3, result = A ? ? ? C ? ? ? E ? ? ? Q ? ? ? S ? ? ? U
560
561 */
562 typedef struct _CFStringDeferredRange {
563 int beginning;
564 int length;
565 int shift;
566 } CFStringDeferredRange;
567
568 typedef struct _CFStringStackInfo {
569 int capacity; // Capacity (if capacity == count, need to realloc to add another)
570 int count; // Number of elements actually stored
571 CFStringDeferredRange *stack;
572 Boolean hasMalloced; // Indicates "stack" is allocated and needs to be deallocated when done
573 char _padding[3];
574 } CFStringStackInfo;
575
576 CF_INLINE void pop (CFStringStackInfo *si, CFStringDeferredRange *topRange) {
577 si->count = si->count - 1;
578 *topRange = si->stack[si->count];
579 }
580
581 CF_INLINE void push (CFStringStackInfo *si, const CFStringDeferredRange *newRange) {
582 if (si->count == si->capacity) {
583 // increase size of the stack
584 si->capacity = (si->capacity + 4) * 2;
585 if (si->hasMalloced) {
586 si->stack = CFAllocatorReallocate(NULL, si->stack, si->capacity * sizeof(CFStringDeferredRange), 0);
587 } else {
588 CFStringDeferredRange *newStack = (CFStringDeferredRange *)CFAllocatorAllocate(NULL, si->capacity * sizeof(CFStringDeferredRange), 0);
589 memmove(newStack, si->stack, si->count * sizeof(CFStringDeferredRange));
590 si->stack = newStack;
591 si->hasMalloced = true;
592 }
593 }
594 si->stack[si->count] = *newRange;
595 si->count = si->count + 1;
596 }
597
598 static void rearrangeBlocks(
599 uint8_t *buffer,
600 CFIndex numBlocks,
601 CFIndex blockSize,
602 const CFRange *ranges,
603 CFIndex numRanges,
604 CFIndex insertLength) {
605
606 #define origStackSize 10
607 CFStringDeferredRange origStack[origStackSize];
608 CFStringStackInfo si = {origStackSize, 0, origStack, false, {0, 0, 0}};
609 CFStringDeferredRange currentNonRange = {0, 0, 0};
610 int currentRange = 0;
611 int amountShifted = 0;
612
613 // must have at least 1 range left.
614
615 while (currentRange < numRanges) {
616 currentNonRange.beginning = (ranges[currentRange].location + ranges[currentRange].length) * blockSize;
617 if ((numRanges - currentRange) == 1) {
618 // at the end.
619 currentNonRange.length = numBlocks * blockSize - currentNonRange.beginning;
620 if (currentNonRange.length == 0) break;
621 } else {
622 currentNonRange.length = (ranges[currentRange + 1].location * blockSize) - currentNonRange.beginning;
623 }
624 currentNonRange.shift = amountShifted + (insertLength * blockSize) - (ranges[currentRange].length * blockSize);
625 amountShifted = currentNonRange.shift;
626 if (amountShifted <= 0) {
627 // process current item and rest of stack
628 if (currentNonRange.shift && currentNonRange.length) memmove (&buffer[currentNonRange.beginning + currentNonRange.shift], &buffer[currentNonRange.beginning], currentNonRange.length);
629 while (si.count > 0) {
630 pop (&si, &currentNonRange); // currentNonRange now equals the top element of the stack.
631 if (currentNonRange.shift && currentNonRange.length) memmove (&buffer[currentNonRange.beginning + currentNonRange.shift], &buffer[currentNonRange.beginning], currentNonRange.length);
632 }
633 } else {
634 // add currentNonRange to stack.
635 push (&si, &currentNonRange);
636 }
637 currentRange++;
638 }
639
640 // no more ranges. if anything is on the stack, process.
641
642 while (si.count > 0) {
643 pop (&si, &currentNonRange); // currentNonRange now equals the top element of the stack.
644 if (currentNonRange.shift && currentNonRange.length) memmove (&buffer[currentNonRange.beginning + currentNonRange.shift], &buffer[currentNonRange.beginning], currentNonRange.length);
645 }
646 if (si.hasMalloced) CFAllocatorDeallocate (NULL, si.stack);
647 }
648
649 /* See comments for rearrangeBlocks(); this is the same, but the string is assembled in another buffer (dstBuffer), so the algorithm is much easier. We also take care of the case where the source is not-Unicode but destination is. (The reverse case is not supported.)
650 */
651 static void copyBlocks(
652 const uint8_t *srcBuffer,
653 uint8_t *dstBuffer,
654 CFIndex srcLength,
655 Boolean srcIsUnicode,
656 Boolean dstIsUnicode,
657 const CFRange *ranges,
658 CFIndex numRanges,
659 CFIndex insertLength) {
660
661 CFIndex srcLocationInBytes = 0; // in order to avoid multiplying all the time, this is in terms of bytes, not blocks
662 CFIndex dstLocationInBytes = 0; // ditto
663 CFIndex srcBlockSize = srcIsUnicode ? sizeof(UniChar) : sizeof(uint8_t);
664 CFIndex insertLengthInBytes = insertLength * (dstIsUnicode ? sizeof(UniChar) : sizeof(uint8_t));
665 CFIndex rangeIndex = 0;
666 CFIndex srcToDstMultiplier = (srcIsUnicode == dstIsUnicode) ? 1 : (sizeof(UniChar) / sizeof(uint8_t));
667
668 // Loop over the ranges, copying the range to be preserved (right before each range)
669 while (rangeIndex < numRanges) {
670 CFIndex srcLengthInBytes = ranges[rangeIndex].location * srcBlockSize - srcLocationInBytes; // srcLengthInBytes is in terms of bytes, not blocks; represents length of region to be preserved
671 if (srcLengthInBytes > 0) {
672 if (srcIsUnicode == dstIsUnicode) {
673 memmove(dstBuffer + dstLocationInBytes, srcBuffer + srcLocationInBytes, srcLengthInBytes);
674 } else {
675 __CFStrConvertBytesToUnicode(srcBuffer + srcLocationInBytes, (UniChar *)(dstBuffer + dstLocationInBytes), srcLengthInBytes);
676 }
677 }
678 srcLocationInBytes += srcLengthInBytes + ranges[rangeIndex].length * srcBlockSize; // Skip over the just-copied and to-be-deleted stuff
679 dstLocationInBytes += srcLengthInBytes * srcToDstMultiplier + insertLengthInBytes;
680 rangeIndex++;
681 }
682
683 // Do last range (the one beyond last range)
684 if (srcLocationInBytes < srcLength * srcBlockSize) {
685 if (srcIsUnicode == dstIsUnicode) {
686 memmove(dstBuffer + dstLocationInBytes, srcBuffer + srcLocationInBytes, srcLength * srcBlockSize - srcLocationInBytes);
687 } else {
688 __CFStrConvertBytesToUnicode(srcBuffer + srcLocationInBytes, (UniChar *)(dstBuffer + dstLocationInBytes), srcLength * srcBlockSize - srcLocationInBytes);
689 }
690 }
691 }
692
693
694 /* Reallocates the backing store of the string to accomodate the new length. Space is reserved or characters are deleted as indicated by insertLength and the ranges in deleteRanges. The length is updated to reflect the new state. Will also maintain a length byte and a null byte in 8-bit strings. If length cannot fit in length byte, the space will still be reserved, but will be 0. (Hence the reason the length byte should never be looked at as length unless there is no explicit length.)
695 */
696 static void __CFStringChangeSizeMultiple(CFMutableStringRef str, const CFRange *deleteRanges, CFIndex numDeleteRanges, CFIndex insertLength, Boolean makeUnicode) {
697 const uint8_t *curContents = __CFStrContents(str);
698 CFIndex curLength = curContents ? __CFStrLength2(str, curContents) : 0;
699 CFIndex newLength;
700
701 // Compute new length of the string
702 if (numDeleteRanges == 1) {
703 newLength = curLength + insertLength - deleteRanges[0].length;
704 } else {
705 int cnt;
706 newLength = curLength + insertLength * numDeleteRanges;
707 for (cnt = 0; cnt < numDeleteRanges; cnt++) newLength -= deleteRanges[cnt].length;
708 }
709
710 __CFAssertIfFixedLengthIsOK(str, newLength);
711
712 if (newLength == 0) {
713 // An somewhat optimized code-path for this special case, with the following implicit values:
714 // newIsUnicode = false
715 // useLengthAndNullBytes = false
716 // newCharSize = sizeof(uint8_t)
717 // If the newCapacity happens to be the same as the old, we don't free the buffer; otherwise we just free it totally
718 // instead of doing a potentially useless reallocation (as the needed capacity later might turn out to be different anyway)
719 CFIndex curCapacity = __CFStrCapacity(str);
720 CFIndex newCapacity = __CFStrNewCapacity(str, 0, curCapacity, true, sizeof(uint8_t));
721 if (newCapacity != curCapacity) { // If we're reallocing anyway (larger or smaller --- larger could happen if desired capacity was changed in the meantime), let's just free it all
722 if (curContents) __CFStrDeallocateMutableContents(str, (uint8_t *)curContents);
723 __CFStrSetContentPtr(str, NULL);
724 __CFStrSetCapacity(str, 0);
725 __CFStrClearCapacityProvidedExternally(str);
726 __CFStrClearHasLengthAndNullBytes(str);
727 if (!__CFStrIsExternalMutable(str)) __CFStrClearUnicode(str); // External mutable implies Unicode
728 } else {
729 if (!__CFStrIsExternalMutable(str)) {
730 __CFStrClearUnicode(str);
731 if (curCapacity >= (int)(sizeof(uint8_t) * 2)) { // If there's room
732 __CFStrSetHasLengthAndNullBytes(str);
733 ((uint8_t *)curContents)[0] = ((uint8_t *)curContents)[1] = 0;
734 } else {
735 __CFStrClearHasLengthAndNullBytes(str);
736 }
737 }
738 }
739 __CFStrSetExplicitLength(str, 0);
740 } else { /* This else-clause assumes newLength > 0 */
741 Boolean oldIsUnicode = __CFStrIsUnicode(str);
742 Boolean newIsUnicode = makeUnicode || (oldIsUnicode /* && (newLength > 0) - implicit */ ) || __CFStrIsExternalMutable(str);
743 CFIndex newCharSize = newIsUnicode ? sizeof(UniChar) : sizeof(uint8_t);
744 Boolean useLengthAndNullBytes = !newIsUnicode /* && (newLength > 0) - implicit */;
745 CFIndex numExtraBytes = useLengthAndNullBytes ? 2 : 0; /* 2 extra bytes to keep the length byte & null... */
746 CFIndex curCapacity = __CFStrCapacity(str);
747 CFIndex newCapacity = __CFStrNewCapacity(str, newLength * newCharSize + numExtraBytes, curCapacity, true, newCharSize);
748 Boolean allocNewBuffer = (newCapacity != curCapacity) || (curLength > 0 && !oldIsUnicode && newIsUnicode); /* We alloc new buffer if oldIsUnicode != newIsUnicode because the contents have to be copied */
749 uint8_t *newContents = allocNewBuffer ? __CFStrAllocateMutableContents(str, newCapacity) : (uint8_t *)curContents;
750 Boolean hasLengthAndNullBytes = __CFStrHasLengthByte(str);
751
752 CFAssert1(hasLengthAndNullBytes == __CFStrHasNullByte(str), __kCFLogAssertion, "%s(): Invalid state in 8-bit string", __PRETTY_FUNCTION__);
753
754 if (hasLengthAndNullBytes) curContents++;
755 if (useLengthAndNullBytes) newContents++;
756
757 if (curContents) {
758 if (oldIsUnicode == newIsUnicode) {
759 if (newContents == curContents) {
760 rearrangeBlocks(newContents, curLength, newCharSize, deleteRanges, numDeleteRanges, insertLength);
761 } else {
762 copyBlocks(curContents, newContents, curLength, oldIsUnicode, newIsUnicode, deleteRanges, numDeleteRanges, insertLength);
763 }
764 } else if (newIsUnicode) { /* this implies we have a new buffer */
765 copyBlocks(curContents, newContents, curLength, oldIsUnicode, newIsUnicode, deleteRanges, numDeleteRanges, insertLength);
766 }
767 if (hasLengthAndNullBytes) curContents--; /* Undo the damage from above */
768 if (allocNewBuffer) __CFStrDeallocateMutableContents(str, (void *)curContents);
769 }
770
771 if (!newIsUnicode) {
772 if (useLengthAndNullBytes) {
773 newContents[newLength] = 0; /* Always have null byte, if not unicode */
774 newContents--; /* Undo the damage from above */
775 newContents[0] = __CFCanUseLengthByte(newLength) ? (uint8_t)newLength : 0;
776 if (!hasLengthAndNullBytes) __CFStrSetHasLengthAndNullBytes(str);
777 } else {
778 if (hasLengthAndNullBytes) __CFStrClearHasLengthAndNullBytes(str);
779 }
780 if (oldIsUnicode) __CFStrClearUnicode(str);
781 } else { // New is unicode...
782 if (!oldIsUnicode) __CFStrSetUnicode(str);
783 if (hasLengthAndNullBytes) __CFStrClearHasLengthAndNullBytes(str);
784 }
785 __CFStrSetExplicitLength(str, newLength);
786
787 if (allocNewBuffer) {
788 __CFStrSetCapacity(str, newCapacity);
789 __CFStrClearCapacityProvidedExternally(str);
790 __CFStrSetContentPtr(str, newContents);
791 }
792 }
793 }
794
795 /* Same as above, but takes one range (very common case)
796 */
797 CF_INLINE void __CFStringChangeSize(CFMutableStringRef str, CFRange range, CFIndex insertLength, Boolean makeUnicode) {
798 __CFStringChangeSizeMultiple(str, &range, 1, insertLength, makeUnicode);
799 }
800
801
802 static void __CFStringDeallocate(CFTypeRef cf) {
803 CFStringRef str = cf;
804
805 // constantStringAllocatorForDebugging is not around unless DEBUG is defined, but neither is CFAssert2()...
806 CFAssert1(__CFConstantStringTableBeingFreed || CFGetAllocator(str) != constantStringAllocatorForDebugging, __kCFLogAssertion, "Tried to deallocate CFSTR(\"%@\")", str);
807
808 if (!__CFStrIsInline(str)) {
809 uint8_t *contents;
810 Boolean mutable = __CFStrIsMutable(str);
811 if (__CFStrFreeContentsWhenDone(str) && (contents = (uint8_t *)__CFStrContents(str))) {
812 if (mutable) {
813 __CFStrDeallocateMutableContents((CFMutableStringRef)str, contents);
814 } else {
815 if (__CFStrHasContentsDeallocator(str)) {
816 CFAllocatorRef contentsDeallocator = __CFStrContentsDeallocator(str);
817 CFAllocatorDeallocate(contentsDeallocator, contents);
818 CFRelease(contentsDeallocator);
819 } else {
820 CFAllocatorRef alloc = __CFGetAllocator(str);
821 CFAllocatorDeallocate(alloc, contents);
822 }
823 }
824 }
825 if (mutable && __CFStrHasContentsAllocator(str)) CFRelease(__CFStrContentsAllocator((CFMutableStringRef)str));
826 }
827 }
828
829 static Boolean __CFStringEqual(CFTypeRef cf1, CFTypeRef cf2) {
830 CFStringRef str1 = cf1;
831 CFStringRef str2 = cf2;
832 const uint8_t *contents1;
833 const uint8_t *contents2;
834 CFIndex len1;
835
836 /* !!! We do not need IsString assertions, as the CFBase runtime assures this */
837 /* !!! We do not need == test, as the CFBase runtime assures this */
838
839 contents1 = __CFStrContents(str1);
840 contents2 = __CFStrContents(str2);
841 len1 = __CFStrLength2(str1, contents1);
842
843 if (len1 != __CFStrLength2(str2, contents2)) return false;
844
845 contents1 += __CFStrSkipAnyLengthByte(str1);
846 contents2 += __CFStrSkipAnyLengthByte(str2);
847
848 if (__CFStrIsEightBit(str1) && __CFStrIsEightBit(str2)) {
849 return memcmp((const char *)contents1, (const char *)contents2, len1) ? false : true;
850 } else if (__CFStrIsEightBit(str1)) { /* One string has Unicode contents */
851 CFStringInlineBuffer buf;
852 CFIndex buf_idx = 0;
853
854 CFStringInitInlineBuffer(str1, &buf, CFRangeMake(0, len1));
855 for (buf_idx = 0; buf_idx < len1; buf_idx++) {
856 if (__CFStringGetCharacterFromInlineBufferQuick(&buf, buf_idx) != ((UniChar *)contents2)[buf_idx]) return false;
857 }
858 } else if (__CFStrIsEightBit(str2)) { /* One string has Unicode contents */
859 CFStringInlineBuffer buf;
860 CFIndex buf_idx = 0;
861
862 CFStringInitInlineBuffer(str2, &buf, CFRangeMake(0, len1));
863 for (buf_idx = 0; buf_idx < len1; buf_idx++) {
864 if (__CFStringGetCharacterFromInlineBufferQuick(&buf, buf_idx) != ((UniChar *)contents1)[buf_idx]) return false;
865 }
866 } else { /* Both strings have Unicode contents */
867 CFIndex idx;
868 for (idx = 0; idx < len1; idx++) {
869 if (((UniChar *)contents1)[idx] != ((UniChar *)contents2)[idx]) return false;
870 }
871 }
872 return true;
873 }
874
875
876 /* String hashing: Should give the same results whatever the encoding; so we hash UniChars.
877 If the length is less than or equal to 24, then the hash function is simply the
878 following (n is the nth UniChar character, starting from 0):
879
880 hash(-1) = length
881 hash(n) = hash(n-1) * 257 + unichar(n);
882 Hash = hash(length-1) * ((length & 31) + 1)
883
884 If the length is greater than 24, then the above algorithm applies to
885 characters 0..7 and length-16..length-1; thus the first 8 and last 16 characters.
886
887 Note that the loops below are unrolled; and: 257^2 = 66049; 257^3 = 16974593; 257^4 = 4362470401; 67503105 is 257^4 - 256^4
888 If hashcode is changed from UInt32 to something else, this last piece needs to be readjusted.
889
890 NOTE: The hash algorithm used to be duplicated in CF and Foundation; but now it should only be in the four functions below.
891 */
892
893 /* In this function, actualLen is the length of the original string; but len is the number of characters in buffer. The buffer is expected to contain the parts of the string relevant to hashing.
894 */
895 CF_INLINE CFHashCode __CFStrHashCharacters(const UniChar *uContents, CFIndex len, CFIndex actualLen) {
896 CFHashCode result = actualLen;
897 if (len < 24) {
898 const UniChar *end4 = uContents + (len & ~3);
899 const UniChar *end = uContents + len;
900 while (uContents < end4) { // First count in fours
901 result = result * 67503105 + uContents[0] * 16974593 + uContents[1] * 66049 + uContents[2] * 257 + uContents[3];
902 uContents += 4;
903 }
904 while (uContents < end) { // Then for the last <4 chars, count in ones...
905 result = result * 257 + *uContents++;
906 }
907 } else {
908 result = result * 67503105 + uContents[0] * 16974593 + uContents[1] * 66049 + uContents[2] * 257 + uContents[3];
909 result = result * 67503105 + uContents[4] * 16974593 + uContents[5] * 66049 + uContents[6] * 257 + uContents[7];
910 uContents += (len - 16);
911 result = result * 67503105 + uContents[0] * 16974593 + uContents[1] * 66049 + uContents[2] * 257 + uContents[3];
912 result = result * 67503105 + uContents[4] * 16974593 + uContents[5] * 66049 + uContents[6] * 257 + uContents[7];
913 result = result * 67503105 + uContents[8] * 16974593 + uContents[9] * 66049 + uContents[10] * 257 + uContents[11];
914 result = result * 67503105 + uContents[12] * 16974593 + uContents[13] * 66049 + uContents[14] * 257 + uContents[15];
915 }
916 return result + (result << (actualLen & 31));
917 }
918
919 /* This hashes cString in the eight bit string encoding. It also includes the little debug-time sanity check.
920 */
921 CF_INLINE CFHashCode __CFStrHashEightBit(const uint8_t *contents, CFIndex len) {
922 #if defined(DEBUG)
923 const uint8_t *origContents = contents;
924 #endif
925 CFHashCode result = len;
926 if (len < 24) {
927 const uint8_t *end4 = contents + (len & ~3);
928 const uint8_t *end = contents + len;
929 while (contents < end4) { // First count in fours
930 result = result * 67503105 + __CFCharToUniCharTable[contents[0]] * 16974593 + __CFCharToUniCharTable[contents[1]] * 66049 + __CFCharToUniCharTable[contents[2]] * 257 + __CFCharToUniCharTable[contents[3]];
931 contents += 4;
932 }
933 while (contents < end) { // Then for the last <4 chars, count single chars
934 result = result * 257 + __CFCharToUniCharTable[*contents++];
935 }
936 } else {
937 result = result * 67503105 + __CFCharToUniCharTable[contents[0]] * 16974593 + __CFCharToUniCharTable[contents[1]] * 66049 + __CFCharToUniCharTable[contents[2]] * 257 + __CFCharToUniCharTable[contents[3]];
938 result = result * 67503105 + __CFCharToUniCharTable[contents[4]] * 16974593 + __CFCharToUniCharTable[contents[5]] * 66049 + __CFCharToUniCharTable[contents[6]] * 257 + __CFCharToUniCharTable[contents[7]];
939 contents += (len - 16);
940 result = result * 67503105 + __CFCharToUniCharTable[contents[0]] * 16974593 + __CFCharToUniCharTable[contents[1]] * 66049 + __CFCharToUniCharTable[contents[2]] * 257 + __CFCharToUniCharTable[contents[3]];
941 result = result * 67503105 + __CFCharToUniCharTable[contents[4]] * 16974593 + __CFCharToUniCharTable[contents[5]] * 66049 + __CFCharToUniCharTable[contents[6]] * 257 + __CFCharToUniCharTable[contents[7]];
942 result = result * 67503105 + __CFCharToUniCharTable[contents[8]] * 16974593 + __CFCharToUniCharTable[contents[9]] * 66049 + __CFCharToUniCharTable[contents[10]] * 257 + __CFCharToUniCharTable[contents[11]];
943 result = result * 67503105 + __CFCharToUniCharTable[contents[12]] * 16974593 + __CFCharToUniCharTable[contents[13]] * 66049 + __CFCharToUniCharTable[contents[14]] * 257 + __CFCharToUniCharTable[contents[15]];
944 }
945 #if defined(DEBUG)
946 if (!__CFCharToUniCharFunc) { // A little sanity verification: If this is not set, trying to hash high byte chars would be a bad idea
947 CFIndex cnt;
948 Boolean err = false;
949 contents = origContents;
950 if (len <= 24) {
951 for (cnt = 0; cnt < len; cnt++) if (contents[cnt] >= 128) err = true;
952 } else {
953 for (cnt = 0; cnt < 8; cnt++) if (contents[cnt] >= 128) err = true;
954 for (cnt = len - 16; cnt < len; cnt++) if (contents[cnt] >= 128) err = true;
955 }
956 if (err) {
957 // Can't do log here, as it might be too early
958 fprintf(stderr, "Warning: CFHash() attempting to hash CFString containing high bytes before properly initialized to do so\n");
959 }
960 }
961 #endif
962 return result + (result << (len & 31));
963 }
964
965 CFHashCode CFStringHashISOLatin1CString(const uint8_t *bytes, CFIndex len) {
966 CFHashCode result = len;
967 if (len < 24) {
968 const uint8_t *end4 = bytes + (len & ~3);
969 const uint8_t *end = bytes + len;
970 while (bytes < end4) { // First count in fours
971 result = result * 67503105 + bytes[0] * 16974593 + bytes[1] * 66049 + bytes[2] * 257 + bytes[3];
972 bytes += 4;
973 }
974 while (bytes < end) { // Then for the last <4 chars, count in ones...
975 result = result * 257 + *bytes++;
976 }
977 } else {
978 result = result * 67503105 + bytes[0] * 16974593 + bytes[1] * 66049 + bytes[2] * 257 + bytes[3];
979 result = result * 67503105 + bytes[4] * 16974593 + bytes[5] * 66049 + bytes[6] * 257 + bytes[7];
980 bytes += (len - 16);
981 result = result * 67503105 + bytes[0] * 16974593 + bytes[1] * 66049 + bytes[2] * 257 + bytes[3];
982 result = result * 67503105 + bytes[4] * 16974593 + bytes[5] * 66049 + bytes[6] * 257 + bytes[7];
983 result = result * 67503105 + bytes[8] * 16974593 + bytes[9] * 66049 + bytes[10] * 257 + bytes[11];
984 result = result * 67503105 + bytes[12] * 16974593 + bytes[13] * 66049 + bytes[14] * 257 + bytes[15];
985 }
986 return result + (result << (len & 31));
987 }
988
989 CFHashCode CFStringHashCString(const uint8_t *bytes, CFIndex len) {
990 return __CFStrHashEightBit(bytes, len);
991 }
992
993 CFHashCode CFStringHashCharacters(const UniChar *characters, CFIndex len) {
994 return __CFStrHashCharacters(characters, len, len);
995 }
996
997 /* This is meant to be called from NSString or subclassers only. It is an error for this to be called without the ObjC runtime or an argument which is not an NSString or subclass. It can be called with NSCFString, although that would be inefficient (causing indirection) and won't normally happen anyway, as NSCFString overrides hash.
998 */
999 CFHashCode CFStringHashNSString(CFStringRef str) {
1000 UniChar buffer[24];
1001 CFIndex bufLen; // Number of characters in the buffer for hashing
1002 CFIndex len; // Actual length of the string
1003
1004 CF_OBJC_CALL0(CFIndex, len, str, "length");
1005 if (len <= 24) {
1006 CF_OBJC_VOIDCALL2(str, "getCharacters:range:", buffer, CFRangeMake(0, len));
1007 bufLen = len;
1008 } else {
1009 CF_OBJC_VOIDCALL2(str, "getCharacters:range:", buffer, CFRangeMake(0, 8));
1010 CF_OBJC_VOIDCALL2(str, "getCharacters:range:", buffer+8, CFRangeMake(len-16, 16));
1011 bufLen = 24;
1012 }
1013 return __CFStrHashCharacters(buffer, bufLen, len);
1014 }
1015
1016 CFHashCode __CFStringHash(CFTypeRef cf) {
1017 /* !!! We do not need an IsString assertion here, as this is called by the CFBase runtime only */
1018 CFStringRef str = cf;
1019 const uint8_t *contents = __CFStrContents(str);
1020 CFIndex len = __CFStrLength2(str, contents);
1021
1022 if (__CFStrIsEightBit(str)) {
1023 contents += __CFStrSkipAnyLengthByte(str);
1024 return __CFStrHashEightBit(contents, len);
1025 } else {
1026 return __CFStrHashCharacters((const UniChar *)contents, len, len);
1027 }
1028 }
1029
1030
1031 static CFStringRef __CFStringCopyDescription(CFTypeRef cf) {
1032 return CFStringCreateWithFormat(kCFAllocatorDefault, NULL, CFSTR("<CFString %p [%p]>{contents = \"%@\"}"), cf, __CFGetAllocator(cf), cf);
1033 }
1034
1035 static CFStringRef __CFStringCopyFormattingDescription(CFTypeRef cf, CFDictionaryRef formatOptions) {
1036 return CFStringCreateCopy(__CFGetAllocator(cf), cf);
1037 }
1038
1039 static CFTypeID __kCFStringTypeID = _kCFRuntimeNotATypeID;
1040
1041 static const CFRuntimeClass __CFStringClass = {
1042 0,
1043 "CFString",
1044 NULL, // init
1045 (void *)CFStringCreateCopy,
1046 __CFStringDeallocate,
1047 __CFStringEqual,
1048 __CFStringHash,
1049 __CFStringCopyFormattingDescription,
1050 __CFStringCopyDescription
1051 };
1052
1053 __private_extern__ void __CFStringInitialize(void) {
1054 __kCFStringTypeID = _CFRuntimeRegisterClass(&__CFStringClass);
1055 }
1056
1057 CFTypeID CFStringGetTypeID(void) {
1058 return __kCFStringTypeID;
1059 }
1060
1061
1062 static Boolean CFStrIsUnicode(CFStringRef str) {
1063 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, Boolean, str, "_encodingCantBeStoredInEightBitCFString");
1064 return __CFStrIsUnicode(str);
1065 }
1066
1067
1068
1069 #define ALLOCATORSFREEFUNC ((void *)-1)
1070
1071 /* contentsDeallocator indicates how to free the data if it's noCopy == true:
1072 kCFAllocatorNull: don't free
1073 ALLOCATORSFREEFUNC: free with main allocator's free func (don't pass in the real func ptr here)
1074 NULL: default allocator
1075 otherwise it's the allocator that should be used (it will be explicitly stored)
1076 if noCopy == false, then freeFunc should be ALLOCATORSFREEFUNC
1077 hasLengthByte, hasNullByte: refers to bytes; used only if encoding != Unicode
1078 possiblyExternalFormat indicates that the bytes might have BOM and be swapped
1079 tryToReduceUnicode means that the Unicode should be checked to see if it contains just ASCII (and reduce it if so)
1080 numBytes contains the actual number of bytes in "bytes", including Length byte,
1081 BUT not the NULL byte at the end
1082 bytes should not contain BOM characters
1083 !!! Various flags should be combined to reduce number of arguments, if possible
1084 */
1085 __private_extern__ CFStringRef __CFStringCreateImmutableFunnel3(
1086 CFAllocatorRef alloc, const void *bytes, CFIndex numBytes, CFStringEncoding encoding,
1087 Boolean possiblyExternalFormat, Boolean tryToReduceUnicode, Boolean hasLengthByte, Boolean hasNullByte, Boolean noCopy,
1088 CFAllocatorRef contentsDeallocator, UInt32 converterFlags) {
1089
1090 CFMutableStringRef str;
1091 CFVarWidthCharBuffer vBuf;
1092 CFIndex size;
1093 Boolean useLengthByte = false;
1094 Boolean useNullByte = false;
1095 Boolean useInlineData = false;
1096
1097 if (alloc == NULL) alloc = __CFGetDefaultAllocator();
1098
1099 if (contentsDeallocator == ALLOCATORSFREEFUNC) {
1100 contentsDeallocator = alloc;
1101 } else if (contentsDeallocator == NULL) {
1102 contentsDeallocator = __CFGetDefaultAllocator();
1103 }
1104
1105 if ((NULL != kCFEmptyString) && (numBytes == 0) && (alloc == kCFAllocatorSystemDefault)) { // If we are using the system default allocator, and the string is empty, then use the empty string!
1106 if (noCopy && (contentsDeallocator != kCFAllocatorNull)) { // See 2365208... This change was done after Sonata; before we didn't free the bytes at all (leak).
1107 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes);
1108 }
1109 return CFRetain(kCFEmptyString); // Quick exit; won't catch all empty strings, but most
1110 }
1111
1112 // At this point, contentsDeallocator is either same as alloc, or kCFAllocatorNull, or something else, but not NULL
1113
1114 vBuf.shouldFreeChars = false; // We use this to remember to free the buffer possibly allocated by decode
1115
1116 // First check to see if the data needs to be converted...
1117 // ??? We could be more efficient here and in some cases (Unicode data) eliminate a copy
1118
1119 if ((encoding == kCFStringEncodingUnicode && possiblyExternalFormat) || (encoding != kCFStringEncodingUnicode && !__CFCanUseEightBitCFStringForBytes(bytes, numBytes, encoding))) {
1120 const void *realBytes = (uint8_t*) bytes + (hasLengthByte ? 1 : 0);
1121 CFIndex realNumBytes = numBytes - (hasLengthByte ? 1 : 0);
1122 Boolean usingPassedInMemory = false;
1123
1124 vBuf.allocator = __CFGetDefaultAllocator(); // We don't want to use client's allocator for temp stuff
1125 vBuf.chars.unicode = NULL; // This will cause the decode function to allocate memory if necessary
1126
1127 if (!__CFStringDecodeByteStream3(realBytes, realNumBytes, encoding, false, &vBuf, &usingPassedInMemory, converterFlags)) {
1128 return NULL; // !!! Is this acceptable failure mode?
1129 }
1130
1131 encoding = vBuf.isASCII ? kCFStringEncodingASCII : kCFStringEncodingUnicode;
1132
1133 if (!usingPassedInMemory) {
1134
1135 // Make the parameters fit the new situation
1136 numBytes = vBuf.isASCII ? vBuf.numChars : (vBuf.numChars * sizeof(UniChar));
1137 hasLengthByte = hasNullByte = false;
1138
1139 // Get rid of the original buffer if its not being used
1140 if (noCopy && contentsDeallocator != kCFAllocatorNull) {
1141 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes);
1142 }
1143 contentsDeallocator = alloc; // At this point we are using the string's allocator, as the original buffer is gone...
1144
1145 // See if we can reuse any storage the decode func might have allocated
1146 // We do this only for Unicode, as otherwise we would not have NULL and Length bytes
1147
1148 if (vBuf.shouldFreeChars && (alloc == vBuf.allocator) && encoding == kCFStringEncodingUnicode) {
1149 vBuf.shouldFreeChars = false; // Transferring ownership to the CFString
1150 bytes = CFAllocatorReallocate(vBuf.allocator, (void *)vBuf.chars.unicode, numBytes, 0); // Tighten up the storage
1151 noCopy = true;
1152 } else {
1153 bytes = vBuf.chars.unicode;
1154 noCopy = false; // Can't do noCopy anymore
1155 // If vBuf.shouldFreeChars is true, the buffer will be freed as intended near the end of this func
1156 }
1157
1158 }
1159
1160 // At this point, all necessary input arguments have been changed to reflect the new state
1161
1162 } else if (encoding == kCFStringEncodingUnicode && tryToReduceUnicode) { // Check to see if we can reduce Unicode to ASCII
1163 CFIndex cnt;
1164 CFIndex len = numBytes / sizeof(UniChar);
1165 Boolean allASCII = true;
1166
1167 for (cnt = 0; cnt < len; cnt++) if (((const UniChar *)bytes)[cnt] > 127) {
1168 allASCII = false;
1169 break;
1170 }
1171
1172 if (allASCII) { // Yes we can!
1173 uint8_t *ptr, *mem;
1174 hasLengthByte = __CFCanUseLengthByte(len);
1175 hasNullByte = true;
1176 numBytes = (len + 1 + (hasLengthByte ? 1 : 0)) * sizeof(uint8_t); // NULL and possible length byte
1177 // See if we can use that temporary local buffer in vBuf...
1178 if (numBytes >= __kCFVarWidthLocalBufferSize) {
1179 mem = ptr = (uint8_t *)CFAllocatorAllocate(alloc, numBytes, 0);
1180 if (__CFOASafe) __CFSetLastAllocationEventName(mem, "CFString (store)");
1181 } else {
1182 mem = ptr = (uint8_t *)(vBuf.localBuffer);
1183 }
1184 // Copy the Unicode bytes into the new ASCII buffer
1185 if (hasLengthByte) *ptr++ = len;
1186 for (cnt = 0; cnt < len; cnt++) ptr[cnt] = ((const UniChar *)bytes)[cnt];
1187 ptr[len] = 0;
1188 if (noCopy && contentsDeallocator != kCFAllocatorNull) {
1189 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes);
1190 }
1191 // Now make everything look like we had an ASCII buffer to start with
1192 bytes = mem;
1193 encoding = kCFStringEncodingASCII;
1194 contentsDeallocator = alloc; // At this point we are using the string's allocator, as the original buffer is gone...
1195 noCopy = (numBytes >= __kCFVarWidthLocalBufferSize); // If we had to allocate it, make sure it's kept around
1196 numBytes--; // Should not contain the NULL byte at end...
1197 }
1198
1199 // At this point, all necessary input arguments have been changed to reflect the new state
1200 }
1201
1202 // Now determine the necessary size
1203
1204 if (noCopy) {
1205
1206 size = sizeof(void *); // Pointer to the buffer
1207 if (contentsDeallocator != alloc && contentsDeallocator != kCFAllocatorNull) {
1208 size += sizeof(void *); // The contentsDeallocator
1209 }
1210 if (!hasLengthByte) size += sizeof(SInt32); // Explicit length
1211 useLengthByte = hasLengthByte;
1212 useNullByte = hasNullByte;
1213
1214 } else { // Inline data; reserve space for it
1215
1216 useInlineData = true;
1217 size = numBytes;
1218
1219 if (hasLengthByte || (encoding != kCFStringEncodingUnicode && __CFCanUseLengthByte(numBytes))) {
1220 useLengthByte = true;
1221 if (!hasLengthByte) size += 1;
1222 } else {
1223 size += sizeof(SInt32); // Explicit length
1224 }
1225 if (hasNullByte || encoding != kCFStringEncodingUnicode) {
1226 useNullByte = true;
1227 size += 1;
1228 }
1229 }
1230
1231 #ifdef STRING_SIZE_STATS
1232 // Dump alloced CFString size info every so often
1233 static int cnt = 0;
1234 static unsigned sizes[256] = {0};
1235 int allocedSize = size + sizeof(CFRuntimeBase);
1236 if (allocedSize < 255) sizes[allocedSize]++; else sizes[255]++;
1237 if ((++cnt % 1000) == 0) {
1238 printf ("\nTotal: %d\n", cnt);
1239 int i; for (i = 0; i < 256; i++) printf("%03d: %5d%s", i, sizes[i], ((i % 8) == 7) ? "\n" : " ");
1240 }
1241 #endif
1242
1243 // Finally, allocate!
1244
1245 str = (CFMutableStringRef)_CFRuntimeCreateInstance(alloc, __kCFStringTypeID, size, NULL);
1246 if (str) {
1247 if (__CFOASafe) __CFSetLastAllocationEventName(str, "CFString (immutable)");
1248
1249 __CFStrSetInfoBits(str,
1250 (useInlineData ? __kCFHasInlineContents : (contentsDeallocator == alloc ? __kCFNotInlineContentsDefaultFree : (contentsDeallocator == kCFAllocatorNull ? __kCFNotInlineContentsNoFree : __kCFNotInlineContentsCustomFree))) |
1251 ((encoding == kCFStringEncodingUnicode) ? __kCFIsUnicode : 0) |
1252 (useNullByte ? __kCFHasNullByte : 0) |
1253 (useLengthByte ? __kCFHasLengthByte : 0));
1254
1255 if (!useLengthByte) {
1256 CFIndex length = numBytes - (hasLengthByte ? 1 : 0);
1257 if (encoding == kCFStringEncodingUnicode) length /= sizeof(UniChar);
1258 __CFStrSetExplicitLength(str, length);
1259 }
1260
1261 if (useInlineData) {
1262 uint8_t *contents = (uint8_t *)__CFStrContents(str);
1263 if (useLengthByte && !hasLengthByte) *contents++ = numBytes;
1264 memmove(contents, bytes, numBytes);
1265 if (useNullByte) contents[numBytes] = 0;
1266 } else {
1267 __CFStrSetContentPtr(str, bytes);
1268 if (contentsDeallocator != alloc && contentsDeallocator != kCFAllocatorNull) __CFStrSetContentsDeallocator(str, CFRetain(contentsDeallocator));
1269 }
1270 } else {
1271 if (contentsDeallocator != kCFAllocatorNull) CFAllocatorDeallocate(contentsDeallocator, (void *)bytes);
1272 }
1273 if (vBuf.shouldFreeChars) CFAllocatorDeallocate(vBuf.allocator, (void *)bytes);
1274
1275 return str;
1276 }
1277
1278 /* !!! __CFStringCreateImmutableFunnel2() is kept around for compatibility; it should be deprecated
1279 */
1280 CFStringRef __CFStringCreateImmutableFunnel2(
1281 CFAllocatorRef alloc, const void *bytes, CFIndex numBytes, CFStringEncoding encoding,
1282 Boolean possiblyExternalFormat, Boolean tryToReduceUnicode, Boolean hasLengthByte, Boolean hasNullByte, Boolean noCopy,
1283 CFAllocatorRef contentsDeallocator) {
1284 return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, possiblyExternalFormat, tryToReduceUnicode, hasLengthByte, hasNullByte, noCopy, contentsDeallocator, 0);
1285 }
1286
1287
1288
1289 CFStringRef CFStringCreateWithPascalString(CFAllocatorRef alloc, ConstStringPtr pStr, CFStringEncoding encoding) {
1290 CFIndex len = (CFIndex)(*(uint8_t *)pStr);
1291 return __CFStringCreateImmutableFunnel3(alloc, pStr, len+1, encoding, false, false, true, false, false, ALLOCATORSFREEFUNC, 0);
1292 }
1293
1294
1295 CFStringRef CFStringCreateWithCString(CFAllocatorRef alloc, const char *cStr, CFStringEncoding encoding) {
1296 CFIndex len = strlen(cStr);
1297 return __CFStringCreateImmutableFunnel3(alloc, cStr, len, encoding, false, false, false, true, false, ALLOCATORSFREEFUNC, 0);
1298 }
1299
1300 CFStringRef CFStringCreateWithPascalStringNoCopy(CFAllocatorRef alloc, ConstStringPtr pStr, CFStringEncoding encoding, CFAllocatorRef contentsDeallocator) {
1301 CFIndex len = (CFIndex)(*(uint8_t *)pStr);
1302 return __CFStringCreateImmutableFunnel3(alloc, pStr, len+1, encoding, false, false, true, false, true, contentsDeallocator, 0);
1303 }
1304
1305
1306 CFStringRef CFStringCreateWithCStringNoCopy(CFAllocatorRef alloc, const char *cStr, CFStringEncoding encoding, CFAllocatorRef contentsDeallocator) {
1307 CFIndex len = strlen(cStr);
1308 return __CFStringCreateImmutableFunnel3(alloc, cStr, len, encoding, false, false, false, true, true, contentsDeallocator, 0);
1309 }
1310
1311
1312 CFStringRef CFStringCreateWithCharacters(CFAllocatorRef alloc, const UniChar *chars, CFIndex numChars) {
1313 return __CFStringCreateImmutableFunnel3(alloc, chars, numChars * sizeof(UniChar), kCFStringEncodingUnicode, false, true, false, false, false, ALLOCATORSFREEFUNC, 0);
1314 }
1315
1316
1317 CFStringRef CFStringCreateWithCharactersNoCopy(CFAllocatorRef alloc, const UniChar *chars, CFIndex numChars, CFAllocatorRef contentsDeallocator) {
1318 return __CFStringCreateImmutableFunnel3(alloc, chars, numChars * sizeof(UniChar), kCFStringEncodingUnicode, false, false, false, false, true, contentsDeallocator, 0);
1319 }
1320
1321
1322 CFStringRef CFStringCreateWithBytes(CFAllocatorRef alloc, const uint8_t *bytes, CFIndex numBytes, CFStringEncoding encoding, Boolean externalFormat) {
1323 return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, externalFormat, true, false, false, false, ALLOCATORSFREEFUNC, 0);
1324 }
1325
1326 CFStringRef _CFStringCreateWithBytesNoCopy(CFAllocatorRef alloc, const uint8_t *bytes, CFIndex numBytes, CFStringEncoding encoding, Boolean externalFormat, CFAllocatorRef contentsDeallocator) {
1327 return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, externalFormat, true, false, false, true, contentsDeallocator, 0);
1328 }
1329
1330 CFStringRef CFStringCreateWithBytesNoCopy(CFAllocatorRef alloc, const uint8_t *bytes, CFIndex numBytes, CFStringEncoding encoding, Boolean externalFormat, CFAllocatorRef contentsDeallocator) {
1331 return _CFStringCreateWithBytesNoCopy(alloc, bytes, numBytes, encoding, externalFormat, contentsDeallocator);
1332 }
1333
1334 CFStringRef CFStringCreateWithFormatAndArguments(CFAllocatorRef alloc, CFDictionaryRef formatOptions, CFStringRef format, va_list arguments) {
1335 return _CFStringCreateWithFormatAndArgumentsAux(alloc, NULL, formatOptions, format, arguments);
1336 }
1337
1338 CFStringRef _CFStringCreateWithFormatAndArgumentsAux(CFAllocatorRef alloc, CFStringRef (*copyDescFunc)(void *, CFDictionaryRef), CFDictionaryRef formatOptions, CFStringRef format, va_list arguments) {
1339 CFStringRef str;
1340 CFMutableStringRef outputString = CFStringCreateMutable(__CFGetDefaultAllocator(), 0); //should use alloc if no copy/release
1341 __CFStrSetDesiredCapacity(outputString, 120); // Given this will be tightened later, choosing a larger working string is fine
1342 _CFStringAppendFormatAndArgumentsAux(outputString, copyDescFunc, formatOptions, format, arguments);
1343 // ??? copy/release should not be necessary here -- just make immutable, compress if possible
1344 // (However, this does make the string inline, and cause the supplied allocator to be used...)
1345 str = CFStringCreateCopy(alloc, outputString);
1346 CFRelease(outputString);
1347 return str;
1348 }
1349
1350 CFStringRef CFStringCreateWithFormat(CFAllocatorRef alloc, CFDictionaryRef formatOptions, CFStringRef format, ...) {
1351 CFStringRef result;
1352 va_list argList;
1353
1354 va_start(argList, format);
1355 result = CFStringCreateWithFormatAndArguments(alloc, formatOptions, format, argList);
1356 va_end(argList);
1357
1358 return result;
1359 }
1360
1361 CFStringRef CFStringCreateWithSubstring(CFAllocatorRef alloc, CFStringRef str, CFRange range) {
1362 if (CF_IS_OBJC(__kCFStringTypeID, str)) {
1363 static SEL s = NULL;
1364 CFStringRef (*func)(void *, SEL, ...) = (void *)__CFSendObjCMsg;
1365 if (!s) s = sel_registerName("_createSubstringWithRange:");
1366 CFStringRef result = func((void *)str, s, CFRangeMake(range.location, range.length));
1367 if (result && CF_USING_COLLECTABLE_MEMORY) CFRetain(result); // needs hard retain.
1368 return result;
1369 }
1370 // CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, CFStringRef , str, "_createSubstringWithRange:", CFRangeMake(range.location, range.length));
1371
1372 __CFAssertIsString(str);
1373 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
1374
1375 if ((range.location == 0) && (range.length == __CFStrLength(str))) { /* The substring is the whole string... */
1376 return CFStringCreateCopy(alloc, str);
1377 } else if (__CFStrIsEightBit(str)) {
1378 const uint8_t *contents = __CFStrContents(str);
1379 return __CFStringCreateImmutableFunnel3(alloc, contents + range.location + __CFStrSkipAnyLengthByte(str), range.length, __CFStringGetEightBitStringEncoding(), false, false, false, false, false, ALLOCATORSFREEFUNC, 0);
1380 } else {
1381 const UniChar *contents = __CFStrContents(str);
1382 return __CFStringCreateImmutableFunnel3(alloc, contents + range.location, range.length * sizeof(UniChar), kCFStringEncodingUnicode, false, true, false, false, false, ALLOCATORSFREEFUNC, 0);
1383 }
1384 }
1385
1386 CFStringRef CFStringCreateCopy(CFAllocatorRef alloc, CFStringRef str) {
1387 if (CF_IS_OBJC(__kCFStringTypeID, str)) {
1388 static SEL s = NULL;
1389 CFStringRef (*func)(void *, SEL, ...) = (void *)__CFSendObjCMsg;
1390 if (!s) s = sel_registerName("copy");
1391 CFStringRef result = func((void *)str, s);
1392 if (result && CF_USING_COLLECTABLE_MEMORY) CFRetain(result); // needs hard retain.
1393 return result;
1394 }
1395 // CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, CFStringRef, str, "copy");
1396
1397 __CFAssertIsString(str);
1398 if (!__CFStrIsMutable(str) && // If the string is not mutable
1399 ((alloc ? alloc : __CFGetDefaultAllocator()) == __CFGetAllocator(str)) && // and it has the same allocator as the one we're using
1400 (__CFStrIsInline(str) || __CFStrFreeContentsWhenDone(str) || __CFStrIsConstant(str))) { // and the characters are inline, or are owned by the string, or the string is constant
1401 CFRetain(str); // Then just retain instead of making a true copy
1402 return str;
1403 }
1404 if (__CFStrIsEightBit(str)) {
1405 const uint8_t *contents = __CFStrContents(str);
1406 return __CFStringCreateImmutableFunnel3(alloc, contents + __CFStrSkipAnyLengthByte(str), __CFStrLength2(str, contents), __CFStringGetEightBitStringEncoding(), false, false, false, false, false, ALLOCATORSFREEFUNC, 0);
1407 } else {
1408 const UniChar *contents = __CFStrContents(str);
1409 return __CFStringCreateImmutableFunnel3(alloc, contents, __CFStrLength2(str, contents) * sizeof(UniChar), kCFStringEncodingUnicode, false, true, false, false, false, ALLOCATORSFREEFUNC, 0);
1410 }
1411 }
1412
1413
1414
1415 /*** Constant string stuff... ***/
1416
1417 static CFMutableDictionaryRef constantStringTable = NULL;
1418
1419 /* For now we call a function to create a constant string and keep previously created constant strings in a dictionary. The keys are the 8-bit constant C-strings from the compiler; the values are the CFStrings created for them.
1420 */
1421
1422 static CFStringRef __cStrCopyDescription(const void *ptr) {
1423 return CFStringCreateWithCStringNoCopy(NULL, (const char *)ptr, __CFStringGetEightBitStringEncoding(), kCFAllocatorNull);
1424 }
1425
1426 static Boolean __cStrEqual(const void *ptr1, const void *ptr2) {
1427 return (strcmp((const char *)ptr1, (const char *)ptr2) == 0);
1428 }
1429
1430 static CFHashCode __cStrHash(const void *ptr) {
1431 // It doesn't quite matter if we convert to Unicode correctly, as long as we do it consistently
1432 const unsigned char *cStr = (const unsigned char *)ptr;
1433 CFIndex len = strlen(cStr);
1434 CFHashCode result = 0;
1435 if (len <= 4) { // All chars
1436 unsigned cnt = len;
1437 while (cnt--) result += (result << 8) + *cStr++;
1438 } else { // First and last 2 chars
1439 result += (result << 8) + cStr[0];
1440 result += (result << 8) + cStr[1];
1441 result += (result << 8) + cStr[len-2];
1442 result += (result << 8) + cStr[len-1];
1443 }
1444 result += (result << (len & 31));
1445 return result;
1446 }
1447
1448 #if defined(DEBUG)
1449 /* We use a special allocator (which simply calls through to the default) for constant strings so that we can catch them being freed...
1450 */
1451 static void *csRealloc(void *oPtr, CFIndex size, CFOptionFlags hint, void *info) {
1452 return CFAllocatorReallocate(NULL, oPtr, size, hint);
1453 }
1454
1455 static void *csAlloc(CFIndex size, CFOptionFlags hint, void *info) {
1456 return CFAllocatorAllocate(NULL, size, hint);
1457 }
1458
1459 static void csDealloc(void *ptr, void *info) {
1460 CFAllocatorDeallocate(NULL, ptr);
1461 }
1462
1463 static CFStringRef csCopyDescription(const void *info) {
1464 return CFRetain(CFSTR("Debug allocator for CFSTRs"));
1465 }
1466 #endif
1467
1468 static CFSpinLock_t _CFSTRLock = 0;
1469
1470 CFStringRef __CFStringMakeConstantString(const char *cStr) {
1471 CFStringRef result;
1472 #if defined(DEBUG)
1473 //StringTest checks that we share kCFEmptyString, which is defeated by constantStringAllocatorForDebugging
1474 if ('\0' == *cStr) return kCFEmptyString;
1475 #endif
1476 if (constantStringTable == NULL) {
1477 CFDictionaryKeyCallBacks constantStringCallBacks = {0, NULL, NULL, __cStrCopyDescription, __cStrEqual, __cStrHash};
1478 CFMutableDictionaryRef table = CFDictionaryCreateMutable(NULL, 0, &constantStringCallBacks, &kCFTypeDictionaryValueCallBacks);
1479 _CFDictionarySetCapacity(table, 2500); // avoid lots of rehashing
1480 __CFSpinLock(&_CFSTRLock);
1481 if (constantStringTable == NULL) constantStringTable = table;
1482 __CFSpinUnlock(&_CFSTRLock);
1483 if (constantStringTable != table) CFRelease(table);
1484 #if defined(DEBUG)
1485 {
1486 CFAllocatorContext context = {0, NULL, NULL, NULL, csCopyDescription, csAlloc, csRealloc, csDealloc, NULL};
1487 constantStringAllocatorForDebugging = _CFAllocatorCreateGC(NULL, &context);
1488 }
1489 #else
1490 #define constantStringAllocatorForDebugging NULL
1491 #endif
1492 }
1493
1494 __CFSpinLock(&_CFSTRLock);
1495 if ((result = (CFStringRef)CFDictionaryGetValue(constantStringTable, cStr))) {
1496 __CFSpinUnlock(&_CFSTRLock);
1497 } else {
1498 __CFSpinUnlock(&_CFSTRLock);
1499
1500 {
1501 char *key;
1502 Boolean isASCII = true;
1503 // Given this code path is rarer these days, OK to do this extra work to verify the strings
1504 const unsigned char *tmp = cStr;
1505 while (*tmp) {
1506 if (*tmp++ > 127) {
1507 isASCII = false;
1508 break;
1509 }
1510 }
1511 if (!isASCII) {
1512 CFMutableStringRef ms = CFStringCreateMutable(NULL, 0);
1513 tmp = cStr;
1514 while (*tmp) {
1515 CFStringAppendFormat(ms, NULL, (*tmp > 127) ? CFSTR("\\%3o") : CFSTR("%1c"), *tmp);
1516 tmp++;
1517 }
1518 CFLog(0, CFSTR("WARNING: CFSTR(\"%@\") has non-7 bit chars, interpreting using MacOS Roman encoding for now, but this will change. Please eliminate usages of non-7 bit chars (including escaped characters above \\177 octal) in CFSTR()."), ms);
1519 CFRelease(ms);
1520 }
1521 // Treat non-7 bit chars in CFSTR() as MacOSRoman, for compatibility
1522 result = CFStringCreateWithCString(constantStringAllocatorForDebugging, cStr, kCFStringEncodingMacRoman);
1523 if (result == NULL) {
1524 CFLog(__kCFLogAssertion, CFSTR("Can't interpret CFSTR() as MacOS Roman, crashing"));
1525 HALT;
1526 }
1527 if (__CFOASafe) __CFSetLastAllocationEventName((void *)result, "CFString (CFSTR)");
1528 if (__CFStrIsEightBit(result)) {
1529 key = (char *)__CFStrContents(result) + __CFStrSkipAnyLengthByte(result);
1530 } else { // For some reason the string is not 8-bit!
1531 key = CFAllocatorAllocate(NULL, strlen(cStr) + 1, 0);
1532 if (__CFOASafe) __CFSetLastAllocationEventName((void *)key, "CFString (CFSTR key)");
1533 strcpy(key, cStr); // !!! We will leak this, if the string is removed from the table (or table is freed)
1534 }
1535
1536 {
1537 #if !defined(DEBUG)
1538 CFStringRef resultToBeReleased = result;
1539 #endif
1540 CFIndex count;
1541 __CFSpinLock(&_CFSTRLock);
1542 count = CFDictionaryGetCount(constantStringTable);
1543 CFDictionaryAddValue(constantStringTable, key, result);
1544 if (CFDictionaryGetCount(constantStringTable) == count) { // add did nothing, someone already put it there
1545 result = (CFStringRef)CFDictionaryGetValue(constantStringTable, key);
1546 }
1547 __CFSpinUnlock(&_CFSTRLock);
1548 #if !defined(DEBUG)
1549 // Can't release this in the DEBUG case; will get assertion failure
1550 CFRelease(resultToBeReleased);
1551 #endif
1552 }
1553 }
1554 }
1555 return result;
1556 }
1557
1558 #if defined(__MACOS8__) || defined(__WIN32__)
1559
1560 void __CFStringCleanup (void) {
1561 /* in case library is unloaded, release store for the constant string table */
1562 if (constantStringTable != NULL) {
1563 #if defined(DEBUG)
1564 __CFConstantStringTableBeingFreed = true;
1565 CFRelease(constantStringTable);
1566 __CFConstantStringTableBeingFreed = false;
1567 #else
1568 CFRelease(constantStringTable);
1569 #endif
1570 }
1571 #if defined(DEBUG)
1572 CFAllocatorDeallocate( constantStringAllocatorForDebugging, (void*) constantStringAllocatorForDebugging );
1573 #endif
1574 }
1575
1576 #endif
1577
1578
1579 // Can pass in NSString as replacement string
1580 // Call with numRanges > 0, and incrementing ranges
1581
1582 static void __CFStringReplaceMultiple(CFMutableStringRef str, CFRange *ranges, CFIndex numRanges, CFStringRef replacement) {
1583 int cnt;
1584 CFStringRef copy = NULL;
1585 if (replacement == str) copy = replacement = CFStringCreateCopy(NULL, replacement); // Very special and hopefully rare case
1586 CFIndex replacementLength = CFStringGetLength(replacement);
1587
1588 __CFStringChangeSizeMultiple(str, ranges, numRanges, replacementLength, (replacementLength > 0) && CFStrIsUnicode(replacement));
1589
1590 if (__CFStrIsUnicode(str)) {
1591 UniChar *contents = (UniChar *)__CFStrContents(str);
1592 UniChar *firstReplacement = contents + ranges[0].location;
1593 // Extract the replacementString into the first location, then copy from there
1594 CFStringGetCharacters(replacement, CFRangeMake(0, replacementLength), firstReplacement);
1595 for (cnt = 1; cnt < numRanges; cnt++) {
1596 // The ranges are in terms of the original string; so offset by the change in length due to insertion
1597 contents += replacementLength - ranges[cnt - 1].length;
1598 memmove(contents + ranges[cnt].location, firstReplacement, replacementLength * sizeof(UniChar));
1599 }
1600 } else {
1601 uint8_t *contents = (uint8_t *)__CFStrContents(str);
1602 uint8_t *firstReplacement = contents + ranges[0].location + __CFStrSkipAnyLengthByte(str);
1603 // Extract the replacementString into the first location, then copy from there
1604 CFStringGetBytes(replacement, CFRangeMake(0, replacementLength), __CFStringGetEightBitStringEncoding(), 0, false, firstReplacement, replacementLength, NULL);
1605 contents += __CFStrSkipAnyLengthByte(str); // Now contents will simply track the location to insert next string into
1606 for (cnt = 1; cnt < numRanges; cnt++) {
1607 // The ranges are in terms of the original string; so offset by the change in length due to insertion
1608 contents += replacementLength - ranges[cnt - 1].length;
1609 memmove(contents + ranges[cnt].location, firstReplacement, replacementLength);
1610 }
1611 }
1612 if (copy) CFRelease(copy);
1613 }
1614
1615 // Can pass in NSString as replacement string
1616
1617 CF_INLINE void __CFStringReplace(CFMutableStringRef str, CFRange range, CFStringRef replacement) {
1618 CFStringRef copy = NULL;
1619 if (replacement == str) copy = replacement = CFStringCreateCopy(NULL, replacement); // Very special and hopefully rare case
1620 CFIndex replacementLength = CFStringGetLength(replacement);
1621
1622 __CFStringChangeSize(str, range, replacementLength, (replacementLength > 0) && CFStrIsUnicode(replacement));
1623
1624 if (__CFStrIsUnicode(str)) {
1625 UniChar *contents = (UniChar *)__CFStrContents(str);
1626 CFStringGetCharacters(replacement, CFRangeMake(0, replacementLength), contents + range.location);
1627 } else {
1628 uint8_t *contents = (uint8_t *)__CFStrContents(str);
1629 CFStringGetBytes(replacement, CFRangeMake(0, replacementLength), __CFStringGetEightBitStringEncoding(), 0, false, contents + range.location + __CFStrSkipAnyLengthByte(str), replacementLength, NULL);
1630 }
1631
1632 if (copy) CFRelease(copy);
1633 }
1634
1635 /* If client does not provide a minimum capacity
1636 */
1637 #define DEFAULTMINCAPACITY 32
1638
1639 CF_INLINE CFMutableStringRef __CFStringCreateMutableFunnel(CFAllocatorRef alloc, CFIndex maxLength, UInt32 additionalInfoBits) {
1640 CFMutableStringRef str;
1641 Boolean hasExternalContentsAllocator = (additionalInfoBits & __kCFHasContentsAllocator) ? true : false;
1642
1643 if (alloc == NULL) alloc = __CFGetDefaultAllocator();
1644
1645 // Note that if there is an externalContentsAllocator, then we also have the storage for the string allocator...
1646 str = (CFMutableStringRef)_CFRuntimeCreateInstance(alloc, __kCFStringTypeID, sizeof(void *) + sizeof(UInt32) * 3 + (hasExternalContentsAllocator ? sizeof(CFAllocatorRef) : 0), NULL);
1647 if (str) {
1648 if (__CFOASafe) __CFSetLastAllocationEventName(str, "CFString (mutable)");
1649
1650 __CFStrSetInfoBits(str, __kCFIsMutable | additionalInfoBits);
1651 str->variants.notInlineMutable.buffer = NULL;
1652 __CFStrSetExplicitLength(str, 0);
1653 str->variants.notInlineMutable.gapEtc = 0;
1654 if (maxLength != 0) __CFStrSetIsFixed(str);
1655 __CFStrSetDesiredCapacity(str, (maxLength == 0) ? DEFAULTMINCAPACITY : maxLength);
1656 __CFStrSetCapacity(str, 0);
1657 }
1658 return str;
1659 }
1660
1661 CFMutableStringRef CFStringCreateMutableWithExternalCharactersNoCopy(CFAllocatorRef alloc, UniChar *chars, CFIndex numChars, CFIndex capacity, CFAllocatorRef externalCharactersAllocator) {
1662 CFOptionFlags contentsAllocationBits = externalCharactersAllocator ? ((externalCharactersAllocator == kCFAllocatorNull) ? __kCFNotInlineContentsNoFree : __kCFHasContentsAllocator) : __kCFNotInlineContentsDefaultFree;
1663 CFMutableStringRef string = __CFStringCreateMutableFunnel(alloc, 0, contentsAllocationBits | __kCFIsUnicode);
1664 if (string) {
1665 __CFStrSetIsExternalMutable(string);
1666 if (contentsAllocationBits == __kCFHasContentsAllocator) __CFStrSetContentsAllocator(string, CFRetain(externalCharactersAllocator));
1667 CFStringSetExternalCharactersNoCopy(string, chars, numChars, capacity);
1668 }
1669 return string;
1670 }
1671
1672 CFMutableStringRef CFStringCreateMutable(CFAllocatorRef alloc, CFIndex maxLength) {
1673 return __CFStringCreateMutableFunnel(alloc, maxLength, __kCFNotInlineContentsDefaultFree);
1674 }
1675
1676 CFMutableStringRef CFStringCreateMutableCopy(CFAllocatorRef alloc, CFIndex maxLength, CFStringRef string) {
1677 CFMutableStringRef newString;
1678
1679 if (CF_IS_OBJC(__kCFStringTypeID, string)) {
1680 static SEL s = NULL;
1681 CFMutableStringRef (*func)(void *, SEL, ...) = (void *)__CFSendObjCMsg;
1682 if (!s) s = sel_registerName("mutableCopy");
1683 newString = func((void *)string, s);
1684 if (CF_USING_COLLECTABLE_MEMORY) auto_zone_retain(__CFCollectableZone, newString); // needs hard retain IF using GC
1685 return newString;
1686 }
1687 // CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, CFMutableStringRef, string, "mutableCopy");
1688
1689 __CFAssertIsString(string);
1690
1691 newString = CFStringCreateMutable(alloc, maxLength);
1692 __CFStringReplace(newString, CFRangeMake(0, 0), string);
1693
1694 return newString;
1695 }
1696
1697
1698 __private_extern__ void _CFStrSetDesiredCapacity(CFMutableStringRef str, CFIndex len) {
1699 __CFAssertIsStringAndMutable(str);
1700 __CFStrSetDesiredCapacity(str, len);
1701 }
1702
1703
1704 /* This one is for CF
1705 */
1706 CFIndex CFStringGetLength(CFStringRef str) {
1707 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, CFIndex, str, "length");
1708
1709 __CFAssertIsString(str);
1710 return __CFStrLength(str);
1711 }
1712
1713 /* This one is for NSCFString; it does not ObjC dispatch or assertion check
1714 */
1715 CFIndex _CFStringGetLength2(CFStringRef str) {
1716 return __CFStrLength(str);
1717 }
1718
1719
1720 /* Guts of CFStringGetCharacterAtIndex(); called from the two functions below. Don't call it from elsewhere.
1721 */
1722 CF_INLINE UniChar __CFStringGetCharacterAtIndexGuts(CFStringRef str, CFIndex idx, const uint8_t *contents) {
1723 if (__CFStrIsEightBit(str)) {
1724 contents += __CFStrSkipAnyLengthByte(str);
1725 #if defined(DEBUG)
1726 if (!__CFCharToUniCharFunc && (contents[idx] >= 128)) {
1727 // Can't do log here, as it might be too early
1728 fprintf(stderr, "Warning: CFStringGetCharacterAtIndex() attempted on CFString containing high bytes before properly initialized to do so\n");
1729 }
1730 #endif
1731 return __CFCharToUniCharTable[contents[idx]];
1732 }
1733
1734 return ((UniChar *)contents)[idx];
1735 }
1736
1737 /* This one is for the CF API
1738 */
1739 UniChar CFStringGetCharacterAtIndex(CFStringRef str, CFIndex idx) {
1740 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, UniChar, str, "characterAtIndex:", idx);
1741
1742 __CFAssertIsString(str);
1743 __CFAssertIndexIsInStringBounds(str, idx);
1744 return __CFStringGetCharacterAtIndexGuts(str, idx, __CFStrContents(str));
1745 }
1746
1747 /* This one is for NSCFString usage; it doesn't do ObjC dispatch; but it does do range check
1748 */
1749 int _CFStringCheckAndGetCharacterAtIndex(CFStringRef str, CFIndex idx, UniChar *ch) {
1750 const uint8_t *contents = __CFStrContents(str);
1751 if (idx >= __CFStrLength2(str, contents) && __CFStringNoteErrors()) return _CFStringErrBounds;
1752 *ch = __CFStringGetCharacterAtIndexGuts(str, idx, contents);
1753 return _CFStringErrNone;
1754 }
1755
1756
1757 /* Guts of CFStringGetCharacters(); called from the two functions below. Don't call it from elsewhere.
1758 */
1759 CF_INLINE void __CFStringGetCharactersGuts(CFStringRef str, CFRange range, UniChar *buffer, const uint8_t *contents) {
1760 if (__CFStrIsEightBit(str)) {
1761 __CFStrConvertBytesToUnicode(((uint8_t *)contents) + (range.location + __CFStrSkipAnyLengthByte(str)), buffer, range.length);
1762 } else {
1763 const UniChar *uContents = ((UniChar *)contents) + range.location;
1764 memmove(buffer, uContents, range.length * sizeof(UniChar));
1765 }
1766 }
1767
1768 /* This one is for the CF API
1769 */
1770 void CFStringGetCharacters(CFStringRef str, CFRange range, UniChar *buffer) {
1771 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID, void, str, "getCharacters:range:", buffer, CFRangeMake(range.location, range.length));
1772
1773 __CFAssertIsString(str);
1774 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
1775 __CFStringGetCharactersGuts(str, range, buffer, __CFStrContents(str));
1776 }
1777
1778 /* This one is for NSCFString usage; it doesn't do ObjC dispatch; but it does do range check
1779 */
1780 int _CFStringCheckAndGetCharacters(CFStringRef str, CFRange range, UniChar *buffer) {
1781 const uint8_t *contents = __CFStrContents(str);
1782 if (range.location + range.length > __CFStrLength2(str, contents) && __CFStringNoteErrors()) return _CFStringErrBounds;
1783 __CFStringGetCharactersGuts(str, range, buffer, contents);
1784 return _CFStringErrNone;
1785 }
1786
1787
1788 CFIndex CFStringGetBytes(CFStringRef str, CFRange range, CFStringEncoding encoding, uint8_t lossByte, Boolean isExternalRepresentation, uint8_t *buffer, CFIndex maxBufLen, CFIndex *usedBufLen) {
1789
1790 /* No objc dispatch needed here since __CFStringEncodeByteStream works with both CFString and NSString */
1791 __CFAssertIsNotNegative(maxBufLen);
1792
1793 if (!CF_IS_OBJC(__kCFStringTypeID, str)) { // If we can grope the ivars, let's do it...
1794 __CFAssertIsString(str);
1795 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
1796
1797 if (__CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string
1798 const unsigned char *contents = __CFStrContents(str);
1799 CFIndex cLength = range.length;
1800
1801 if (buffer) {
1802 if (cLength > maxBufLen) cLength = maxBufLen;
1803 memmove(buffer, contents + __CFStrSkipAnyLengthByte(str) + range.location, cLength);
1804 }
1805 if (usedBufLen) *usedBufLen = cLength;
1806
1807 return cLength;
1808 }
1809 }
1810
1811 return __CFStringEncodeByteStream(str, range.location, range.length, isExternalRepresentation, encoding, lossByte, buffer, maxBufLen, usedBufLen);
1812 }
1813
1814
1815 ConstStringPtr CFStringGetPascalStringPtr (CFStringRef str, CFStringEncoding encoding) {
1816
1817 if (!CF_IS_OBJC(__kCFStringTypeID, str)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
1818 __CFAssertIsString(str);
1819 if (__CFStrHasLengthByte(str) && __CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string || the contents is in ASCII
1820 const uint8_t *contents = __CFStrContents(str);
1821 if (__CFStrHasExplicitLength(str) && (__CFStrLength2(str, contents) != (SInt32)(*contents))) return NULL; // Invalid length byte
1822 return (ConstStringPtr)contents;
1823 }
1824 // ??? Also check for encoding = SystemEncoding and perhaps bytes are all ASCII?
1825 }
1826 return NULL;
1827 }
1828
1829
1830 const char * CFStringGetCStringPtr(CFStringRef str, CFStringEncoding encoding) {
1831
1832 if (encoding != __CFStringGetEightBitStringEncoding() && (kCFStringEncodingASCII != __CFStringGetEightBitStringEncoding() || !__CFStringEncodingIsSupersetOfASCII(encoding))) return NULL;
1833 // ??? Also check for encoding = SystemEncoding and perhaps bytes are all ASCII?
1834
1835 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, const char *, str, "_fastCStringContents:", true);
1836
1837 __CFAssertIsString(str);
1838
1839 if (__CFStrHasNullByte(str)) {
1840 return (const char *)__CFStrContents(str) + __CFStrSkipAnyLengthByte(str);
1841 } else {
1842 return NULL;
1843 }
1844 }
1845
1846
1847 const UniChar *CFStringGetCharactersPtr(CFStringRef str) {
1848
1849 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, const UniChar *, str, "_fastCharacterContents");
1850
1851 __CFAssertIsString(str);
1852 if (__CFStrIsUnicode(str)) return (const UniChar *)__CFStrContents(str);
1853 return NULL;
1854 }
1855
1856
1857 Boolean CFStringGetPascalString(CFStringRef str, Str255 buffer, CFIndex bufferSize, CFStringEncoding encoding) {
1858 CFIndex length;
1859 CFIndex usedLen;
1860
1861 __CFAssertIsNotNegative(bufferSize);
1862 if (bufferSize < 1) return false;
1863
1864 if (CF_IS_OBJC(__kCFStringTypeID, str)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
1865 length = CFStringGetLength(str);
1866 if (!__CFCanUseLengthByte(length)) return false; // Can't fit into pstring
1867 } else {
1868 const uint8_t *contents;
1869
1870 __CFAssertIsString(str);
1871
1872 contents = __CFStrContents(str);
1873 length = __CFStrLength2(str, contents);
1874
1875 if (!__CFCanUseLengthByte(length)) return false; // Can't fit into pstring
1876
1877 if (__CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string
1878 if (length >= bufferSize) return false;
1879 memmove((void*)(1 + (const char*)buffer), (__CFStrSkipAnyLengthByte(str) + contents), length);
1880 *buffer = length;
1881 return true;
1882 }
1883 }
1884
1885 if (__CFStringEncodeByteStream(str, 0, length, false, encoding, false, (void*)(1 + (uint8_t*)buffer), bufferSize - 1, &usedLen) != length) {
1886 #if defined(DEBUG)
1887 if (bufferSize > 0) {
1888 strncpy((char *)buffer + 1, CONVERSIONFAILURESTR, bufferSize - 1);
1889 buffer[0] = (CFIndex)sizeof(CONVERSIONFAILURESTR) < (bufferSize - 1) ? (CFIndex)sizeof(CONVERSIONFAILURESTR) : (bufferSize - 1);
1890 }
1891 #else
1892 if (bufferSize > 0) buffer[0] = 0;
1893 #endif
1894 return false;
1895 }
1896 *buffer = usedLen;
1897 return true;
1898 }
1899
1900 Boolean CFStringGetCString(CFStringRef str, char *buffer, CFIndex bufferSize, CFStringEncoding encoding) {
1901 const uint8_t *contents;
1902 CFIndex len;
1903
1904 __CFAssertIsNotNegative(bufferSize);
1905 if (bufferSize < 1) return false;
1906
1907 CF_OBJC_FUNCDISPATCH3(__kCFStringTypeID, Boolean, str, "_getCString:maxLength:encoding:", buffer, bufferSize - 1, encoding);
1908
1909 __CFAssertIsString(str);
1910
1911 contents = __CFStrContents(str);
1912 len = __CFStrLength2(str, contents);
1913
1914 if (__CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string
1915 if (len >= bufferSize) return false;
1916 memmove(buffer, contents + __CFStrSkipAnyLengthByte(str), len);
1917 buffer[len] = 0;
1918 return true;
1919 } else {
1920 CFIndex usedLen;
1921
1922 if (__CFStringEncodeByteStream(str, 0, len, false, encoding, false, (unsigned char*) buffer, bufferSize - 1, &usedLen) == len) {
1923 buffer[usedLen] = '\0';
1924 return true;
1925 } else {
1926 #if defined(DEBUG)
1927 strncpy(buffer, CONVERSIONFAILURESTR, bufferSize);
1928 #else
1929 if (bufferSize > 0) buffer[0] = 0;
1930 #endif
1931 return false;
1932 }
1933 }
1934 }
1935
1936
1937 CF_INLINE bool _CFCanUseLocale(CFLocaleRef locale) {
1938 return false;
1939 }
1940
1941 static const char *_CFStrGetLanguageIdentifierForLocale(CFLocaleRef locale) {
1942 return NULL;
1943 }
1944
1945 #define MAX_CASE_MAPPING_BUF (8)
1946 #define ZERO_WIDTH_JOINER (0x200D)
1947 #define COMBINING_GRAPHEME_JOINER (0x034F)
1948 // Hangul ranges
1949 #define HANGUL_CHOSEONG_START (0x1100)
1950 #define HANGUL_CHOSEONG_END (0x115F)
1951 #define HANGUL_JUNGSEONG_START (0x1160)
1952 #define HANGUL_JUNGSEONG_END (0x11A2)
1953 #define HANGUL_JONGSEONG_START (0x11A8)
1954 #define HANGUL_JONGSEONG_END (0x11F9)
1955
1956 #define HANGUL_SYLLABLE_START (0xAC00)
1957 #define HANGUL_SYLLABLE_END (0xD7AF)
1958
1959
1960 // Returns the length of characters filled into outCharacters. If no change, returns 0. maxBufLen shoule be at least 8
1961 static inline CFIndex __CFStringFoldCharacterClusterAtIndex(UTF32Char character, CFStringInlineBuffer *buffer, CFIndex index, CFOptionFlags flags, const uint8_t *langCode, UTF32Char *outCharacters, CFIndex maxBufferLength, CFIndex *consumedLength) {
1962 CFIndex filledLength = 0, currentIndex = index;
1963
1964 if (0 != character) {
1965 UTF16Char lowSurrogate;
1966 CFIndex planeNo = (character >> 16);
1967 bool isTurkikCapitalI = false;
1968 static const uint8_t *decompBMP = NULL;
1969 static const uint8_t *nonBaseBMP = NULL;
1970
1971 if (NULL == decompBMP) {
1972 decompBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, 0);
1973 nonBaseBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, 0);
1974 }
1975
1976 ++currentIndex;
1977
1978 if ((character < 0x0080) && ((NULL == langCode) || (character != 'I'))) { // ASCII
1979 if ((flags & kCFCompareCaseInsensitive) && (character >= 'A') && (character <= 'Z')) {
1980 character += ('a' - 'A');
1981 *outCharacters = character;
1982 filledLength = 1;
1983 }
1984 } else {
1985 // do width-insensitive mapping
1986 if ((flags & kCFCompareWidthInsensitive) && (character >= 0xFF00) && (character <= 0xFFEF)) {
1987 (void)CFUniCharCompatibilityDecompose(&character, 1, 1);
1988 *outCharacters = character;
1989 filledLength = 1;
1990 }
1991
1992 // map surrogates
1993 if ((0 == planeNo) && CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((lowSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex)))) {
1994 character = CFUniCharGetLongCharacterForSurrogatePair(character, lowSurrogate);
1995 ++currentIndex;
1996 planeNo = (character >> 16);
1997 }
1998
1999 // decompose
2000 if (flags & (kCFCompareDiacriticsInsensitive|kCFCompareNonliteral)) {
2001 if (CFUniCharIsMemberOfBitmap(character, ((0 == planeNo) ? decompBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, planeNo)))) {
2002 filledLength = CFUniCharDecomposeCharacter(character, outCharacters, maxBufferLength);
2003 character = *outCharacters;
2004 if ((flags & kCFCompareDiacriticsInsensitive) && (character < 0x0510)) filledLength = 1; // reset if Roman, Greek, Cyrillic
2005 }
2006 }
2007
2008 // fold case
2009 if (flags & kCFCompareCaseInsensitive) {
2010 const uint8_t *nonBaseBitmap;
2011 bool filterNonBase = (((flags & kCFCompareDiacriticsInsensitive) && (character < 0x0510)) ? true : false);
2012 static const uint8_t *lowerBMP = NULL;
2013 static const uint8_t *caseFoldBMP = NULL;
2014
2015 if (NULL == lowerBMP) {
2016 lowerBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfLowercaseCharacterSet, 0);
2017 caseFoldBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfCaseFoldingCharacterSet, 0);
2018 }
2019
2020 if ((NULL != langCode) && ('I' == character) && ((0 == strcmp(langCode, "tr")) || (0 == strcmp(langCode, "az")))) { // do Turkik special-casing
2021 if (filledLength > 1) {
2022 if (0x0307 == outCharacters[1]) {
2023 memmove(&(outCharacters[index]), &(outCharacters[index + 1]), sizeof(UTF32Char) * (--filledLength));
2024 character = *outCharacters = 'i';
2025 isTurkikCapitalI = true;
2026 }
2027 } else if (0x0307 == CFStringGetCharacterFromInlineBuffer(buffer, currentIndex)) {
2028 character = *outCharacters = 'i';
2029 filledLength = 1;
2030 ++currentIndex;
2031 isTurkikCapitalI = true;
2032 }
2033 }
2034 if (!isTurkikCapitalI && (CFUniCharIsMemberOfBitmap(character, ((0 == planeNo) ? lowerBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfLowercaseCharacterSet, planeNo))) || CFUniCharIsMemberOfBitmap(character, ((0 == planeNo) ? caseFoldBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfCaseFoldingCharacterSet, planeNo))))) {
2035 UTF16Char caseFoldBuffer[MAX_CASE_MAPPING_BUF];
2036 const UTF16Char *bufferP = caseFoldBuffer, *bufferLimit;
2037 UTF32Char *outCharactersP = outCharacters;
2038 uint32_t bufferLength = CFUniCharMapCaseTo(character, caseFoldBuffer, MAX_CASE_MAPPING_BUF, kCFUniCharCaseFold, 0, langCode);
2039
2040 bufferLimit = bufferP + bufferLength;
2041
2042 if (filledLength > 0) --filledLength; // decrement filledLength (will add back later)
2043
2044 // make space for casefold characters
2045 if ((filledLength > 0) && (bufferLength > 1)) {
2046 CFIndex totalScalerLength = 0;
2047
2048 while (bufferP < bufferLimit) {
2049 if (CFUniCharIsSurrogateHighCharacter(*(bufferP++)) && (bufferP < bufferLimit) && CFUniCharIsSurrogateLowCharacter(*bufferP)) ++bufferP;
2050 ++totalScalerLength;
2051 }
2052 memmove(outCharacters + totalScalerLength, outCharacters + 1, filledLength * sizeof(UTF32Char));
2053 bufferP = caseFoldBuffer;
2054 }
2055
2056 // fill
2057 while (bufferP < bufferLimit) {
2058 character = *(bufferP++);
2059 if (CFUniCharIsSurrogateHighCharacter(character) && (bufferP < bufferLimit) && CFUniCharIsSurrogateLowCharacter(*bufferP)) {
2060 character = CFUniCharGetLongCharacterForSurrogatePair(character, *(bufferP++));
2061 nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (character >> 16));
2062 } else {
2063 nonBaseBitmap = nonBaseBMP;
2064 }
2065
2066 if (!filterNonBase || !CFUniCharIsMemberOfBitmap(character, nonBaseBitmap)) {
2067 *(outCharactersP++) = character;
2068 ++filledLength;
2069 }
2070 }
2071 }
2072 }
2073 }
2074
2075 // collect following combining marks
2076 if (flags & (kCFCompareDiacriticsInsensitive|kCFCompareNonliteral)) {
2077 const uint8_t *nonBaseBitmap;
2078 const uint8_t *decompBitmap;
2079 bool doFill = (((flags & kCFCompareDiacriticsInsensitive) && (character < 0x0510)) ? false : true);
2080
2081 if (doFill && (0 == filledLength)) { // check if really needs to fill
2082 UTF32Char nonBaseCharacter = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex);
2083
2084 if (CFUniCharIsSurrogateHighCharacter(nonBaseCharacter) && CFUniCharIsSurrogateLowCharacter((lowSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex + 1)))) {
2085 nonBaseCharacter = CFUniCharGetLongCharacterForSurrogatePair(nonBaseCharacter, lowSurrogate);
2086 nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (nonBaseCharacter >> 16));
2087 decompBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (nonBaseCharacter >> 16));
2088 } else {
2089 nonBaseBitmap = nonBaseBMP;
2090 decompBitmap = decompBMP;
2091 }
2092
2093 if (CFUniCharIsMemberOfBitmap(nonBaseCharacter, nonBaseBitmap)) {
2094 outCharacters[filledLength++] = character;
2095
2096 if ((0 == (flags & kCFCompareDiacriticsInsensitive)) || (nonBaseCharacter > 0x050F)) {
2097 if (CFUniCharIsMemberOfBitmap(nonBaseCharacter, decompBitmap)) {
2098 filledLength += CFUniCharDecomposeCharacter(nonBaseCharacter, &(outCharacters[filledLength]), maxBufferLength - filledLength);
2099 } else {
2100 outCharacters[filledLength++] = nonBaseCharacter;
2101 }
2102 }
2103 currentIndex += ((nonBaseBitmap == nonBaseBMP) ? 1 : 2);
2104 } else {
2105 doFill = false;
2106 }
2107 }
2108
2109 while (filledLength < maxBufferLength) { // do the rest
2110 character = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex);
2111
2112 if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((lowSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex + 1)))) {
2113 character = CFUniCharGetLongCharacterForSurrogatePair(character, lowSurrogate);
2114 nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (character >> 16));
2115 decompBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (character >> 16));
2116 } else {
2117 nonBaseBitmap = nonBaseBMP;
2118 decompBitmap = decompBMP;
2119 }
2120 if (isTurkikCapitalI) {
2121 isTurkikCapitalI = false;
2122 } else if (CFUniCharIsMemberOfBitmap(character, nonBaseBitmap)) {
2123 if (doFill && ((0 == (flags & kCFCompareDiacriticsInsensitive)) || (character > 0x050F))) {
2124 if (CFUniCharIsMemberOfBitmap(character, decompBitmap)) {
2125 CFIndex currentLength = CFUniCharDecomposeCharacter(character, &(outCharacters[filledLength]), maxBufferLength - filledLength);
2126
2127 if (0 == currentLength) break; // didn't fit
2128
2129 filledLength += currentLength;
2130 } else {
2131 outCharacters[filledLength++] = character;
2132 }
2133 }
2134 currentIndex += ((nonBaseBitmap == nonBaseBMP) ? 1 : 2);
2135 } else {
2136 break;
2137 }
2138 }
2139
2140 if (filledLength > 1) CFUniCharPrioritySort(outCharacters, filledLength); // priority sort
2141 }
2142 }
2143
2144 if ((filledLength > 0) && (NULL != consumedLength)) *consumedLength = (currentIndex - index);
2145
2146 return filledLength;
2147 }
2148
2149 /* Special casing for Uk sorting */
2150 #define DO_IGNORE_PUNCTUATION 1
2151 #if DO_IGNORE_PUNCTUATION
2152 #define UKRAINIAN_LANG_CODE (45)
2153 static bool __CFLocaleChecked = false;
2154 static const uint8_t *__CFPunctSetBMP = NULL;
2155 #endif /* DO_IGNORE_PUNCTUATION */
2156
2157 /* ??? We need to implement some additional flags here
2158 ??? Also, pay attention to flag 2, which is the NS flag (which CF has as flag 16, w/opposite meaning).
2159 */
2160 CFComparisonResult CFStringCompareWithOptions(CFStringRef string, CFStringRef string2, CFRange rangeToCompare, CFOptionFlags compareOptions) {
2161 /* No objc dispatch needed here since CFStringInlineBuffer works with both CFString and NSString */
2162 CFStringInlineBuffer strBuf1, strBuf2;
2163 UTF32Char ch1, ch2;
2164 const uint8_t *punctBMP = NULL;
2165 Boolean caseInsensitive = (compareOptions & kCFCompareCaseInsensitive ? true : false);
2166 Boolean decompose = (compareOptions & kCFCompareNonliteral ? true : false);
2167 Boolean numerically = (compareOptions & kCFCompareNumerically ? true : false);
2168 Boolean localized = (compareOptions & kCFCompareLocalized ? true : false);
2169
2170 #if DO_IGNORE_PUNCTUATION
2171 if (localized) {
2172 if (!__CFLocaleChecked) {
2173 CFArrayRef locales = _CFBundleCopyUserLanguages(false);
2174
2175 if (locales && (CFArrayGetCount(locales) > 0)) {
2176 SInt32 langCode;
2177
2178 if (CFBundleGetLocalizationInfoForLocalization((CFStringRef)CFArrayGetValueAtIndex(locales, 0), &langCode, NULL, NULL, NULL) && (langCode == UKRAINIAN_LANG_CODE)) {
2179 __CFPunctSetBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet, 0);
2180 }
2181
2182 CFRelease(locales);
2183 }
2184 __CFLocaleChecked = true;
2185 }
2186
2187 punctBMP = __CFPunctSetBMP;
2188 }
2189 #endif /* DO_IGNORE_PUNCTUATION */
2190
2191 CFStringInitInlineBuffer(string, &strBuf1, CFRangeMake(rangeToCompare.location, rangeToCompare.length));
2192 CFIndex strBuf1_idx = 0;
2193 CFIndex string2_len = CFStringGetLength(string2);
2194 CFStringInitInlineBuffer(string2, &strBuf2, CFRangeMake(0, string2_len));
2195 CFIndex strBuf2_idx = 0;
2196
2197 while (strBuf1_idx < rangeToCompare.length && strBuf2_idx < string2_len) {
2198 ch1 = CFStringGetCharacterFromInlineBuffer(&strBuf1, strBuf1_idx);
2199 ch2 = CFStringGetCharacterFromInlineBuffer(&strBuf2, strBuf2_idx);
2200
2201 if (numerically && (ch1 <= '9' && ch1 >= '0') && (ch2 <= '9' && ch2 >= '0')) { // If both are not digits, then don't do numerical comparison
2202 uint64_t n1 = 0; // !!! Doesn't work if numbers are > max uint64_t
2203 uint64_t n2 = 0;
2204 do {
2205 n1 = n1 * 10 + (ch1 - '0');
2206 strBuf1_idx++;
2207 if (rangeToCompare.length <= strBuf1_idx) break;
2208 ch1 = CFStringGetCharacterFromInlineBuffer(&strBuf1, strBuf1_idx);
2209 } while (ch1 <= '9' && ch1 >= '0');
2210 do {
2211 n2 = n2 * 10 + (ch2 - '0');
2212 strBuf2_idx++;
2213 if (string2_len <= strBuf2_idx) break;
2214 ch2 = CFStringGetCharacterFromInlineBuffer(&strBuf2, strBuf2_idx);
2215 } while (ch2 <= '9' && ch2 >= '0');
2216 if (n1 < n2) return kCFCompareLessThan; else if (n1 > n2) return kCFCompareGreaterThan;
2217 continue; // If numbers were equal, go back to top without incrementing the buffer pointers
2218 }
2219
2220 if (CFUniCharIsSurrogateHighCharacter(ch1)) {
2221 strBuf1_idx++;
2222 if (strBuf1_idx < rangeToCompare.length && CFUniCharIsSurrogateLowCharacter(CFStringGetCharacterFromInlineBuffer(&strBuf1, strBuf1_idx))) {
2223 ch1 = CFUniCharGetLongCharacterForSurrogatePair(ch1, CFStringGetCharacterFromInlineBuffer(&strBuf1, strBuf1_idx));
2224 } else {
2225 strBuf1_idx--;
2226 }
2227 }
2228 if (CFUniCharIsSurrogateHighCharacter(ch2)) {
2229 strBuf2_idx++;
2230 if (strBuf2_idx < string2_len && CFUniCharIsSurrogateLowCharacter(CFStringGetCharacterFromInlineBuffer(&strBuf2, strBuf2_idx))) {
2231 ch2 = CFUniCharGetLongCharacterForSurrogatePair(ch2, CFStringGetCharacterFromInlineBuffer(&strBuf2, strBuf2_idx));
2232 } else {
2233 strBuf2_idx--;
2234 }
2235 }
2236
2237 if (ch1 != ch2) {
2238 #if DO_IGNORE_PUNCTUATION
2239 if (punctBMP) {
2240 if (CFUniCharIsMemberOfBitmap(ch1, (ch1 < 0x10000 ? punctBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet, (ch1 >> 16))))) {
2241 ++strBuf1_idx; continue;
2242 }
2243 if (CFUniCharIsMemberOfBitmap(ch2, (ch2 < 0x10000 ? punctBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet, (ch2 >> 16))))) {
2244 ++strBuf2_idx; continue;
2245 }
2246 }
2247 #endif /* DO_IGNORE_PUNCTUATION */
2248 // We standardize to lowercase here since currently, as of Unicode 3.1.1, it's one-to-one mapping.
2249 // Note we map to uppercase for both SMALL LETTER SIGMA and SMALL LETTER FINAL SIGMA
2250 if (caseInsensitive) {
2251 if (ch1 < 128) {
2252 ch1 -= ((ch1 >= 'A' && ch1 <= 'Z') ? 'A' - 'a' : 0);
2253 } else if (ch1 == 0x03C2 || ch1 == 0x03C3 || ch1 == 0x03A3) { // SMALL SIGMA
2254 ch1 = 0x03A3;
2255 } else {
2256 UniChar buffer[MAX_CASE_MAPPING_BUF];
2257
2258 if (CFUniCharMapCaseTo(ch1, buffer, MAX_CASE_MAPPING_BUF, kCFUniCharToLowercase, 0, NULL) > 1) { // It's supposed to be surrogates
2259 ch1 = CFUniCharGetLongCharacterForSurrogatePair(buffer[0], buffer[1]);
2260 } else {
2261 ch1 = *buffer;
2262 }
2263 }
2264 if (ch2 < 128) {
2265 ch2 -= ((ch2 >= 'A' && ch2 <= 'Z') ? 'A' - 'a' : 0);
2266 } else if (ch2 == 0x03C2 || ch2 == 0x03C3 || ch2 == 0x03A3) { // SMALL SIGMA
2267 ch2 = 0x03A3;
2268 } else {
2269 UniChar buffer[MAX_CASE_MAPPING_BUF];
2270
2271 if (CFUniCharMapCaseTo(ch2, buffer, MAX_CASE_MAPPING_BUF, kCFUniCharToLowercase, 0, NULL) > 1) { // It's supposed to be surrogates
2272 ch2 = CFUniCharGetLongCharacterForSurrogatePair(buffer[0], buffer[1]);
2273 } else {
2274 ch2 = *buffer;
2275 }
2276 }
2277 }
2278
2279 if (ch1 != ch2) { // still different
2280 if (decompose) { // ??? This is not exactly the canonical comparison (We need to do priority sort)
2281 Boolean isCh1Decomposable = (ch1 > 0x7F && CFUniCharIsMemberOf(ch1, kCFUniCharDecomposableCharacterSet));
2282 Boolean isCh2Decomposable = (ch2 > 0x7F && CFUniCharIsMemberOf(ch2, kCFUniCharDecomposableCharacterSet));
2283
2284 if (isCh1Decomposable != isCh2Decomposable) {
2285 UTF32Char decomposedCharater[MAX_DECOMPOSED_LENGTH];
2286 UInt32 decomposedCharacterLength;
2287 UInt32 idx;
2288
2289 if (isCh1Decomposable) {
2290 decomposedCharacterLength = CFUniCharDecomposeCharacter(ch1, decomposedCharater, MAX_DECOMPOSED_LENGTH);
2291 if ((string2_len - strBuf2_idx) < decomposedCharacterLength) { // the remaining other length is shorter
2292 if (ch1 < ch2) return kCFCompareLessThan; else if (ch1 > ch2) return kCFCompareGreaterThan;
2293 }
2294 for (idx = 0; idx < decomposedCharacterLength; idx++) {
2295 ch1 = decomposedCharater[idx];
2296 if (ch1 < ch2) return kCFCompareLessThan; else if (ch1 > ch2) return kCFCompareGreaterThan;
2297 strBuf2_idx++; ch2 = (strBuf2_idx < string2_len ? CFStringGetCharacterFromInlineBuffer(&strBuf2, strBuf2_idx) : 0xffff);
2298 if (CFUniCharIsSurrogateHighCharacter(ch2)) {
2299 strBuf2_idx++;
2300 if (strBuf2_idx < string2_len && CFUniCharIsSurrogateLowCharacter(CFStringGetCharacterFromInlineBuffer(&strBuf2, strBuf2_idx))) {
2301 ch2 = CFUniCharGetLongCharacterForSurrogatePair(ch2, CFStringGetCharacterFromInlineBuffer(&strBuf2, strBuf2_idx));
2302 } else {
2303 strBuf2_idx--;
2304 }
2305 }
2306 }
2307 strBuf1_idx++; continue;
2308 } else { // ch2 is decomposable, then
2309 decomposedCharacterLength = CFUniCharDecomposeCharacter(ch2, decomposedCharater, MAX_DECOMPOSED_LENGTH);
2310 if ((rangeToCompare.length - strBuf1_idx) < decomposedCharacterLength) { // the remaining other length is shorter
2311 if (ch1 < ch2) return kCFCompareLessThan; else if (ch1 > ch2) return kCFCompareGreaterThan;
2312 }
2313 for (idx = 0; idx < decomposedCharacterLength && strBuf1_idx < rangeToCompare.length; idx++) {
2314 ch2 = decomposedCharater[idx];
2315 if (ch1 < ch2) return kCFCompareLessThan; else if (ch1 > ch2) return kCFCompareGreaterThan;
2316 strBuf1_idx++; ch1 = (strBuf1_idx < rangeToCompare.length ? CFStringGetCharacterFromInlineBuffer(&strBuf1, strBuf1_idx) : 0xffff);
2317 if (CFUniCharIsSurrogateHighCharacter(ch1)) {
2318 strBuf1_idx++;
2319 if (strBuf1_idx < rangeToCompare.length && CFUniCharIsSurrogateLowCharacter(CFStringGetCharacterFromInlineBuffer(&strBuf1, strBuf1_idx))) {
2320 ch1 = CFUniCharGetLongCharacterForSurrogatePair(ch1, CFStringGetCharacterFromInlineBuffer(&strBuf1, strBuf1_idx));
2321 } else {
2322 strBuf1_idx--;
2323 }
2324 }
2325 }
2326 strBuf2_idx++; continue;
2327 }
2328 }
2329 }
2330 if (ch1 < ch2) return kCFCompareLessThan; else if (ch1 > ch2) return kCFCompareGreaterThan;
2331 }
2332 }
2333 strBuf1_idx++; strBuf2_idx++;
2334 }
2335 if (strBuf1_idx < rangeToCompare.length) {
2336 return kCFCompareGreaterThan;
2337 } else if (strBuf2_idx < string2_len) {
2338 return kCFCompareLessThan;
2339 } else {
2340 return kCFCompareEqualTo;
2341 }
2342 }
2343
2344
2345 CFComparisonResult CFStringCompare(CFStringRef string, CFStringRef str2, CFOptionFlags options) {
2346 return CFStringCompareWithOptions(string, str2, CFRangeMake(0, CFStringGetLength(string)), options);
2347 }
2348
2349 #define kCFStringStackBufferLength (64)
2350
2351 Boolean CFStringFindWithOptions(CFStringRef string, CFStringRef stringToFind, CFRange rangeToSearch, CFOptionFlags compareOptions, CFRange *result) {
2352 /* No objc dispatch needed here since CFStringInlineBuffer works with both CFString and NSString */
2353 CFIndex findStrLen = CFStringGetLength(stringToFind);
2354 Boolean didFind = false;
2355 bool lengthVariants = ((compareOptions & (kCFCompareCaseInsensitive|kCFCompareNonliteral|kCFCompareDiacriticsInsensitive)) ? true : false);
2356
2357 if ((findStrLen > 0) && (rangeToSearch.length > 0) && ((findStrLen <= rangeToSearch.length) || lengthVariants)) {
2358 UTF32Char strBuf1[kCFStringStackBufferLength];
2359 UTF32Char strBuf2[kCFStringStackBufferLength];
2360 CFStringInlineBuffer inlineBuf1, inlineBuf2;
2361 UTF32Char str1Char, str2Char;
2362 CFStringEncoding eightBitEncoding = __CFStringGetEightBitStringEncoding();
2363 const uint8_t *str1Bytes = CFStringGetCStringPtr(string, eightBitEncoding);
2364 const uint8_t *str2Bytes = CFStringGetCStringPtr(stringToFind, eightBitEncoding);
2365 const UTF32Char *characters, *charactersLimit;
2366 const uint8_t *langCode = NULL;
2367 CFIndex fromLoc, toLoc;
2368 CFIndex str1Index, str2Index;
2369 CFIndex strBuf1Len, strBuf2Len;
2370 bool equalityOptions = ((lengthVariants || (compareOptions & kCFCompareWidthInsensitive)) ? true : false);
2371 bool caseInsensitive = ((compareOptions & kCFCompareCaseInsensitive) ? true : false);
2372 int8_t delta;
2373
2374
2375 CFStringInitInlineBuffer(string, &inlineBuf1, CFRangeMake(0, rangeToSearch.location + rangeToSearch.length));
2376 CFStringInitInlineBuffer(stringToFind, &inlineBuf2, CFRangeMake(0, findStrLen));
2377
2378 if (compareOptions & kCFCompareBackwards) {
2379 fromLoc = rangeToSearch.location + rangeToSearch.length - (lengthVariants ? 1 : findStrLen);
2380 toLoc = (((compareOptions & kCFCompareAnchored) && !lengthVariants) ? fromLoc : rangeToSearch.location);
2381 } else {
2382 fromLoc = rangeToSearch.location;
2383 toLoc = ((compareOptions & kCFCompareAnchored) ? fromLoc : rangeToSearch.location + rangeToSearch.length - (lengthVariants ? 1 : findStrLen));
2384 }
2385
2386 delta = ((fromLoc <= toLoc) ? 1 : -1);
2387
2388 if ((NULL != str1Bytes) && (NULL != str2Bytes)) {
2389 CFIndex maxStr1Index = (rangeToSearch.location + rangeToSearch.length);
2390 uint8_t str1Byte, str2Byte;
2391
2392 while (1) {
2393 str1Index = fromLoc;
2394 str2Index = 0;
2395
2396 while ((str1Index < maxStr1Index) && (str2Index < findStrLen)) {
2397 str1Byte = str1Bytes[str1Index];
2398 str2Byte = str2Bytes[str2Index];
2399
2400 if (str1Byte != str2Byte) {
2401 if (equalityOptions) {
2402 if ((str1Byte < 0x80) && ((NULL == langCode) || ('I' != str1Byte))) {
2403 if (caseInsensitive && (str1Byte >= 'A') && (str1Byte <= 'Z')) str1Byte += ('a' - 'A');
2404 *strBuf1 = str1Byte;
2405 strBuf1Len = 1;
2406 } else {
2407 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
2408 strBuf1Len = __CFStringFoldCharacterClusterAtIndex(str1Char, &inlineBuf1, str1Index, compareOptions, langCode, strBuf1, kCFStringStackBufferLength, NULL);
2409 if (1 > strBuf1Len) {
2410 *strBuf1 = str1Char;
2411 strBuf1Len = 1;
2412 }
2413 }
2414 if ((str2Byte < 0x80) && ((NULL == langCode) || ('I' != str2Byte))) {
2415 if (caseInsensitive && (str2Byte >= 'A') && (str2Byte <= 'Z')) str2Byte += ('a' - 'A');
2416 *strBuf2 = str2Byte;
2417 strBuf2Len = 1;
2418 } else {
2419 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
2420 strBuf2Len = __CFStringFoldCharacterClusterAtIndex(str2Char, &inlineBuf2, str2Index, compareOptions, langCode, strBuf2, kCFStringStackBufferLength, NULL);
2421 if (1 > strBuf2Len) {
2422 *strBuf2 = str2Char;
2423 strBuf2Len = 1;
2424 }
2425 }
2426
2427 if ((1 == strBuf1Len) && (1 == strBuf2Len)) { // normal case
2428 if (*strBuf1 != *strBuf2) break;
2429 } else {
2430 CFIndex delta;
2431
2432 if (!caseInsensitive && (strBuf1Len != strBuf2Len)) break;
2433 if (memcmp(strBuf1, strBuf2, sizeof(UTF32Char) * __CFMin(strBuf1Len, strBuf2Len))) break;
2434
2435 if (strBuf1Len < strBuf2Len) {
2436 delta = strBuf2Len - strBuf1Len;
2437
2438 if ((str1Index + strBuf1Len + delta) > (rangeToSearch.location + rangeToSearch.length)) break;
2439
2440 characters = &(strBuf2[strBuf1Len]);
2441 charactersLimit = characters + delta;
2442
2443 while (characters < charactersLimit) {
2444 strBuf1Len = __CFStringFoldCharacterClusterAtIndex(CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1), &inlineBuf1, str1Index + 1, compareOptions, langCode, strBuf1, kCFStringStackBufferLength, NULL);
2445 if ((strBuf1Len > 0) || (*characters != *strBuf1)) break;
2446 ++characters; ++str1Index;
2447 }
2448 if (characters < charactersLimit) break;
2449 } else if (strBuf2Len < strBuf1Len) {
2450 delta = strBuf1Len - strBuf2Len;
2451
2452 if ((str2Index + strBuf2Len + delta) > findStrLen) break;
2453
2454 characters = &(strBuf1[strBuf2Len]);
2455 charactersLimit = characters + delta;
2456
2457 while (characters < charactersLimit) {
2458 strBuf2Len = __CFStringFoldCharacterClusterAtIndex(CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str1Index + 1), &inlineBuf2, str2Index + 1, compareOptions, langCode, strBuf2, kCFStringStackBufferLength, NULL);
2459 if ((strBuf2Len > 0) || (*characters != *strBuf2)) break;
2460 ++characters; ++str2Index;
2461 }
2462 if (characters < charactersLimit) break;
2463 }
2464 }
2465 } else {
2466 break;
2467 }
2468 }
2469 ++str1Index; ++str2Index;
2470 }
2471
2472 if (str2Index == findStrLen) {
2473 if (((kCFCompareBackwards|kCFCompareAnchored) != (compareOptions & (kCFCompareBackwards|kCFCompareAnchored))) || (str1Index == (rangeToSearch.location + rangeToSearch.length))) {
2474 didFind = true;
2475 if (NULL != result) *result = CFRangeMake(fromLoc, str1Index - fromLoc);
2476 }
2477 break;
2478 }
2479
2480 if (fromLoc == toLoc) break;
2481 fromLoc += delta;
2482 }
2483 } else if (equalityOptions) {
2484 UTF16Char otherChar;
2485 CFIndex str1UsedLen, str2UsedLen, strBuf1Index = 0, strBuf2Index = 0;
2486 bool diacriticsInsensitive = ((compareOptions & kCFCompareDiacriticsInsensitive) ? true : false);
2487 static const uint8_t *nonBaseBMP = NULL;
2488 static const uint8_t *combClassBMP = NULL;
2489
2490 if (NULL == nonBaseBMP) {
2491 nonBaseBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, 0);
2492 combClassBMP = CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0);
2493 }
2494
2495 while (1) {
2496 str1Index = fromLoc;
2497 str2Index = 0;
2498
2499 strBuf1Len = strBuf2Len = 0;
2500
2501 while (str2Index < findStrLen) {
2502 if (strBuf1Len == 0) {
2503 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
2504 if (caseInsensitive && (str1Char >= 'A') && (str1Char <= 'Z') && ((NULL == langCode) || (str1Char != 'I'))) str1Char += ('a' - 'A');
2505 str1UsedLen = 1;
2506 } else {
2507 str1Char = strBuf1[strBuf1Index++];
2508 }
2509 if (strBuf2Len == 0) {
2510 str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index);
2511 if (caseInsensitive && (str2Char >= 'A') && (str2Char <= 'Z') && ((NULL == langCode) || (str2Char != 'I'))) str2Char += ('a' - 'A');
2512 str2UsedLen = 1;
2513 } else {
2514 str2Char = strBuf2[strBuf2Index++];
2515 }
2516
2517 if (str1Char != str2Char) {
2518 if ((str1Char < 0x80) && (str2Char < 0x80) && ((NULL == langCode) || !caseInsensitive)) break;
2519
2520 if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) {
2521 str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);
2522 str1UsedLen = 2;
2523 }
2524
2525 if (CFUniCharIsSurrogateHighCharacter(str2Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index + 1)))) {
2526 str2Char = CFUniCharGetLongCharacterForSurrogatePair(str2Char, otherChar);
2527 str2UsedLen = 2;
2528 }
2529
2530 if (diacriticsInsensitive && (str1Index > fromLoc)) {
2531 if ((0 == strBuf1Len) && CFUniCharIsMemberOfBitmap(str1Char, ((str1Char < 0x10000) ? nonBaseBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (str1Char >> 16))))) str1Char = str2Char;
2532 if ((0 == strBuf2Len) && CFUniCharIsMemberOfBitmap(str2Char, ((str2Char < 0x10000) ? nonBaseBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (str2Char >> 16))))) str2Char = str1Char;
2533 }
2534
2535 if (str1Char != str2Char) {
2536 if (0 == strBuf1Len) {
2537 strBuf1Len = __CFStringFoldCharacterClusterAtIndex(str1Char, &inlineBuf1, str1Index, compareOptions, langCode, strBuf1, kCFStringStackBufferLength, &str1UsedLen);
2538 if (strBuf1Len > 0) {
2539 str1Char = *strBuf1;
2540 strBuf1Index = 1;
2541 }
2542 }
2543
2544 if ((0 == strBuf1Len) && (0 < strBuf2Len)) break;
2545
2546 if ((0 == strBuf2Len) && ((0 == strBuf1Len) || (str1Char != str2Char))) {
2547 strBuf2Len = __CFStringFoldCharacterClusterAtIndex(str2Char, &inlineBuf2, str2Index, compareOptions, langCode, strBuf2, kCFStringStackBufferLength, &str2UsedLen);
2548 if ((0 == strBuf2Len) || (str1Char != *strBuf2)) break;
2549 strBuf2Index = 1;
2550 }
2551 }
2552
2553 if ((strBuf1Len > 0) && (strBuf2Len > 0)) {
2554 while ((strBuf1Index < strBuf1Len) && (strBuf2Index < strBuf2Len)) {
2555 if (strBuf1[strBuf1Index] != strBuf2[strBuf2Index]) break;
2556 ++strBuf1Index; ++strBuf2Index;
2557 }
2558 if ((strBuf1Index < strBuf1Len) && (strBuf2Index < strBuf2Len)) break;
2559 }
2560 }
2561
2562 if ((strBuf1Len > 0) && (strBuf1Index == strBuf1Len)) strBuf1Len = 0;
2563 if ((strBuf2Len > 0) && (strBuf2Index == strBuf2Len)) strBuf2Len = 0;
2564
2565 if (strBuf1Len == 0) str1Index += str1UsedLen;
2566 if (strBuf2Len == 0) str2Index += str2UsedLen;
2567 }
2568
2569 if (str2Index == findStrLen) {
2570 bool match = true;
2571
2572 if (strBuf1Len > 0) {
2573 match = false;
2574
2575 if ((compareOptions & kCFCompareDiacriticsInsensitive) && (strBuf1[0] < 0x0510)) {
2576 while (strBuf1Index < strBuf1Len) {
2577 if (!CFUniCharIsMemberOfBitmap(strBuf1[strBuf1Index], ((strBuf1[strBuf1Index] < 0x10000) ? nonBaseBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (strBuf1[strBuf1Index] >> 16))))) break;
2578 ++strBuf1Index;
2579 }
2580
2581 if (strBuf1Index == strBuf1Len) {
2582 str1Index += str1UsedLen;
2583 match = true;
2584 }
2585 }
2586 }
2587
2588 if (match && (compareOptions & (kCFCompareDiacriticsInsensitive|kCFCompareNonliteral)) && (str1Index < (rangeToSearch.location + rangeToSearch.length))) {
2589 const uint8_t *nonBaseBitmap;
2590
2591 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index);
2592
2593 if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) {
2594 str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar);
2595 nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (str1Char >> 16));
2596 } else {
2597 nonBaseBitmap = nonBaseBMP;
2598 }
2599
2600 if (CFUniCharIsMemberOfBitmap(str1Char, nonBaseBitmap)) {
2601 if (diacriticsInsensitive) {
2602 if (str1Char < 0x10000) {
2603 CFIndex index = str1Index;
2604
2605 do {
2606 str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, --index);
2607 } while (CFUniCharIsMemberOfBitmap(str1Char, nonBaseBMP), (rangeToSearch.location < index));
2608
2609 if (str1Char < 0x0510) {
2610 CFIndex maxIndex = (rangeToSearch.location + rangeToSearch.length);
2611
2612 while (++str1Index < maxIndex) if (!CFUniCharIsMemberOfBitmap(CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index), nonBaseBMP)) break;
2613 }
2614 }
2615 } else {
2616 match = false;
2617 }
2618 } else if (!diacriticsInsensitive) {
2619 otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index - 1);
2620
2621 // this is assuming viramas are only in BMP ???
2622 if ((str1Char == COMBINING_GRAPHEME_JOINER) || (otherChar == COMBINING_GRAPHEME_JOINER) || (otherChar == ZERO_WIDTH_JOINER) || ((otherChar >= HANGUL_CHOSEONG_START) && (otherChar <= HANGUL_JONGSEONG_END)) || (CFUniCharGetCombiningPropertyForCharacter(otherChar, combClassBMP) == 9)) {
2623 CFRange clusterRange = CFStringGetRangeOfCharacterClusterAtIndex(string, str1Index, kCFStringGramphemeCluster);
2624
2625 if (str1Index < (clusterRange.location + clusterRange.length)) match = false;
2626 }
2627 }
2628 }
2629
2630 if (match) {
2631 if (((kCFCompareBackwards|kCFCompareAnchored) != (compareOptions & (kCFCompareBackwards|kCFCompareAnchored))) || (str1Index == (rangeToSearch.location + rangeToSearch.length))) {
2632 didFind = true;
2633 if (NULL != result) *result = CFRangeMake(fromLoc, str1Index - fromLoc);
2634 }
2635 break;
2636 }
2637 }
2638
2639 if (fromLoc == toLoc) break;
2640 fromLoc += delta;
2641 }
2642 } else {
2643 while (1) {
2644 str1Index = fromLoc;
2645 str2Index = 0;
2646
2647 while (str2Index < findStrLen) {
2648 if (CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index) != CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index)) break;
2649
2650 ++str1Index; ++str2Index;
2651 }
2652
2653 if (str2Index == findStrLen) {
2654 didFind = true;
2655 if (NULL != result) *result = CFRangeMake(fromLoc, findStrLen);
2656 break;
2657 }
2658
2659 if (fromLoc == toLoc) break;
2660 fromLoc += delta;
2661 }
2662 }
2663 }
2664
2665 return didFind;
2666 }
2667
2668 // Functions to deal with special arrays of CFRange, CFDataRef, created by CFStringCreateArrayWithFindResults()
2669
2670 static const void *__rangeRetain(CFAllocatorRef allocator, const void *ptr) {
2671 CFRetain(*(CFDataRef *)((uint8_t *)ptr + sizeof(CFRange)));
2672 return ptr;
2673 }
2674
2675 static void __rangeRelease(CFAllocatorRef allocator, const void *ptr) {
2676 CFRelease(*(CFDataRef *)((uint8_t *)ptr + sizeof(CFRange)));
2677 }
2678
2679 static CFStringRef __rangeCopyDescription(const void *ptr) {
2680 CFRange range = *(CFRange *)ptr;
2681 return CFStringCreateWithFormat(NULL /* ??? allocator */, NULL, CFSTR("{%d, %d}"), range.location, range.length);
2682 }
2683
2684 static Boolean __rangeEqual(const void *ptr1, const void *ptr2) {
2685 CFRange range1 = *(CFRange *)ptr1;
2686 CFRange range2 = *(CFRange *)ptr2;
2687 return (range1.location == range2.location) && (range1.length == range2.length);
2688 }
2689
2690
2691 CFArrayRef CFStringCreateArrayWithFindResults(CFAllocatorRef alloc, CFStringRef string, CFStringRef stringToFind, CFRange rangeToSearch, CFOptionFlags compareOptions) {
2692 CFRange foundRange;
2693 Boolean backwards = compareOptions & kCFCompareBackwards;
2694 UInt32 endIndex = rangeToSearch.location + rangeToSearch.length;
2695 CFMutableDataRef rangeStorage = NULL; // Basically an array of CFRange, CFDataRef (packed)
2696 uint8_t *rangeStorageBytes = NULL;
2697 CFIndex foundCount = 0;
2698 CFIndex capacity = 0; // Number of CFRange, CFDataRef element slots in rangeStorage
2699
2700 if (alloc == NULL) alloc = __CFGetDefaultAllocator();
2701
2702 while ((rangeToSearch.length > 0) && CFStringFindWithOptions(string, stringToFind, rangeToSearch, compareOptions, &foundRange)) {
2703 // Determine the next range
2704 if (backwards) {
2705 rangeToSearch.length = foundRange.location - rangeToSearch.location;
2706 } else {
2707 rangeToSearch.location = foundRange.location + foundRange.length;
2708 rangeToSearch.length = endIndex - rangeToSearch.location;
2709 }
2710
2711 // If necessary, grow the data and squirrel away the found range
2712 if (foundCount >= capacity) {
2713 if (rangeStorage == NULL) rangeStorage = CFDataCreateMutable(alloc, 0);
2714 capacity = (capacity + 4) * 2;
2715 CFDataSetLength(rangeStorage, capacity * (sizeof(CFRange) + sizeof(CFDataRef)));
2716 rangeStorageBytes = (uint8_t *)CFDataGetMutableBytePtr(rangeStorage) + foundCount * (sizeof(CFRange) + sizeof(CFDataRef));
2717 }
2718 memmove(rangeStorageBytes, &foundRange, sizeof(CFRange)); // The range
2719 memmove(rangeStorageBytes + sizeof(CFRange), &rangeStorage, sizeof(CFDataRef)); // The data
2720 rangeStorageBytes += (sizeof(CFRange) + sizeof(CFDataRef));
2721 foundCount++;
2722 }
2723
2724 if (foundCount > 0) {
2725 CFIndex cnt;
2726 CFMutableArrayRef array;
2727 const CFArrayCallBacks callbacks = {0, __rangeRetain, __rangeRelease, __rangeCopyDescription, __rangeEqual};
2728
2729 CFDataSetLength(rangeStorage, foundCount * (sizeof(CFRange) + sizeof(CFDataRef))); // Tighten storage up
2730 rangeStorageBytes = (uint8_t *)CFDataGetMutableBytePtr(rangeStorage);
2731
2732 array = CFArrayCreateMutable(alloc, foundCount * sizeof(CFRange *), &callbacks);
2733 for (cnt = 0; cnt < foundCount; cnt++) {
2734 // Each element points to the appropriate CFRange in the CFData
2735 CFArrayAppendValue(array, rangeStorageBytes + cnt * (sizeof(CFRange) + sizeof(CFDataRef)));
2736 }
2737 CFRelease(rangeStorage); // We want the data to go away when all CFRanges inside it are released...
2738 return array;
2739 } else {
2740 return NULL;
2741 }
2742 }
2743
2744
2745 CFRange CFStringFind(CFStringRef string, CFStringRef stringToFind, CFOptionFlags compareOptions) {
2746 CFRange foundRange;
2747
2748 if (CFStringFindWithOptions(string, stringToFind, CFRangeMake(0, CFStringGetLength(string)), compareOptions, &foundRange)) {
2749 return foundRange;
2750 } else {
2751 return CFRangeMake(kCFNotFound, 0);
2752 }
2753 }
2754
2755 Boolean CFStringHasPrefix(CFStringRef string, CFStringRef prefix) {
2756 return CFStringFindWithOptions(string, prefix, CFRangeMake(0, CFStringGetLength(string)), kCFCompareAnchored, NULL);
2757 }
2758
2759 Boolean CFStringHasSuffix(CFStringRef string, CFStringRef suffix) {
2760 return CFStringFindWithOptions(string, suffix, CFRangeMake(0, CFStringGetLength(string)), kCFCompareAnchored|kCFCompareBackwards, NULL);
2761 }
2762
2763 #define MAX_TRANSCODING_LENGTH 4
2764
2765 #define HANGUL_JONGSEONG_COUNT (28)
2766
2767 CF_INLINE bool _CFStringIsHangulLVT(UTF32Char character) {
2768 return (((character - HANGUL_SYLLABLE_START) % HANGUL_JONGSEONG_COUNT) ? true : false);
2769 }
2770
2771 static uint8_t __CFTranscodingHintLength[] = {
2772 2, 3, 4, 4, 4, 4, 4, 2, 2, 2, 2, 4, 0, 0, 0, 0
2773 };
2774
2775 enum {
2776 kCFStringHangulStateL,
2777 kCFStringHangulStateV,
2778 kCFStringHangulStateT,
2779 kCFStringHangulStateLV,
2780 kCFStringHangulStateLVT,
2781 kCFStringHangulStateBreak
2782 };
2783
2784 static CFRange _CFStringInlineBufferGetComposedRange(CFStringInlineBuffer *buffer, CFIndex start, CFStringCharacterClusterType type, const uint8_t *nonBaseBMP) {
2785 CFIndex end = start + 1;
2786 const uint8_t *nonBase = nonBaseBMP;
2787 UTF32Char character;
2788 UTF16Char otherSurrogate;
2789 uint8_t step;
2790
2791 character = CFStringGetCharacterFromInlineBuffer(buffer, start);
2792
2793
2794 // We don't combine characters in Armenian ~ Limbu range for backward deletion
2795 if ((type != kCFStringBackwardDeletionCluster) || (character < 0x0530) || (character > 0x194F)) {
2796 // Check if the current is surrogate
2797 if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, start + 1)))) {
2798 ++end;
2799 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
2800 nonBase = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (character >> 16));
2801 }
2802
2803 // Extend backward
2804 while (start > 0) {
2805 if ((type == kCFStringBackwardDeletionCluster) && (character >= 0x0530) && (character < 0x1950)) break;
2806
2807 if (character < 0x10000) { // the first round could be already be non-BMP
2808 if (CFUniCharIsSurrogateLowCharacter(character) && CFUniCharIsSurrogateHighCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, start - 1)))) {
2809 character = CFUniCharGetLongCharacterForSurrogatePair(otherSurrogate, character);
2810 nonBase = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (character >> 16));
2811 --start;
2812 } else {
2813 nonBase = nonBaseBMP;
2814 }
2815 }
2816
2817 if (!CFUniCharIsMemberOfBitmap(character, nonBase) && (character != 0xFF9E) && (character != 0xFF9F) && ((character & 0x1FFFF0) != 0xF870)) break;
2818
2819 --start;
2820
2821 character = CFStringGetCharacterFromInlineBuffer(buffer, start);
2822 }
2823 }
2824
2825 // Hangul
2826 if (((character >= HANGUL_CHOSEONG_START) && (character <= HANGUL_JONGSEONG_END)) || ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END))) {
2827 uint8_t state;
2828 uint8_t initialState;
2829
2830 if (character < HANGUL_JUNGSEONG_START) {
2831 state = kCFStringHangulStateL;
2832 } else if (character < HANGUL_JONGSEONG_START) {
2833 state = kCFStringHangulStateV;
2834 } else if (character < HANGUL_SYLLABLE_START) {
2835 state = kCFStringHangulStateT;
2836 } else {
2837 state = (_CFStringIsHangulLVT(character) ? kCFStringHangulStateLVT : kCFStringHangulStateLV);
2838 }
2839 initialState = state;
2840
2841 // Extend backward
2842 while (((character = CFStringGetCharacterFromInlineBuffer(buffer, start - 1)) >= HANGUL_CHOSEONG_START) && (character <= HANGUL_SYLLABLE_END) && ((character <= HANGUL_JONGSEONG_END) || (character >= HANGUL_SYLLABLE_START))) {
2843 switch (state) {
2844 case kCFStringHangulStateV:
2845 if (character <= HANGUL_CHOSEONG_END) {
2846 state = kCFStringHangulStateL;
2847 } else if ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END) && !_CFStringIsHangulLVT(character)) {
2848 state = kCFStringHangulStateLV;
2849 } else if (character > HANGUL_JUNGSEONG_END) {
2850 state = kCFStringHangulStateBreak;
2851 }
2852 break;
2853
2854 case kCFStringHangulStateT:
2855 if ((character >= HANGUL_JUNGSEONG_START) && (character <= HANGUL_JUNGSEONG_END)) {
2856 state = kCFStringHangulStateV;
2857 } else if ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END)) {
2858 state = (_CFStringIsHangulLVT(character) ? kCFStringHangulStateLVT : kCFStringHangulStateLV);
2859 } else if (character < HANGUL_JUNGSEONG_START) {
2860 state = kCFStringHangulStateBreak;
2861 }
2862 break;
2863
2864 default:
2865 state = ((character < HANGUL_JUNGSEONG_START) ? kCFStringHangulStateL : kCFStringHangulStateBreak);
2866 break;
2867 }
2868
2869 if (state == kCFStringHangulStateBreak) break;
2870 --start;
2871 }
2872
2873 // Extend forward
2874 state = initialState;
2875 while (((character = CFStringGetCharacterFromInlineBuffer(buffer, end)) > 0) && (((character >= HANGUL_CHOSEONG_START) && (character <= HANGUL_JONGSEONG_END)) || ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END)))) {
2876 switch (state) {
2877 case kCFStringHangulStateLV:
2878 case kCFStringHangulStateV:
2879 if ((character >= HANGUL_JUNGSEONG_START) && (character <= HANGUL_JONGSEONG_END)) {
2880 state = ((character < HANGUL_JONGSEONG_START) ? kCFStringHangulStateV : kCFStringHangulStateT);
2881 } else {
2882 state = kCFStringHangulStateBreak;
2883 }
2884 break;
2885
2886 case kCFStringHangulStateLVT:
2887 case kCFStringHangulStateT:
2888 state = (((character >= HANGUL_JONGSEONG_START) && (character <= HANGUL_JONGSEONG_END)) ? kCFStringHangulStateT : kCFStringHangulStateBreak);
2889 break;
2890
2891 default:
2892 if (character < HANGUL_JUNGSEONG_START) {
2893 state = kCFStringHangulStateL;
2894 } else if (character < HANGUL_JONGSEONG_START) {
2895 state = kCFStringHangulStateV;
2896 } else if (character >= HANGUL_SYLLABLE_START) {
2897 state = (_CFStringIsHangulLVT(character) ? kCFStringHangulStateLVT : kCFStringHangulStateLV);
2898 } else {
2899 state = kCFStringHangulStateBreak;
2900 }
2901 break;
2902 }
2903
2904 if (state == kCFStringHangulStateBreak) break;
2905 ++end;
2906 }
2907 }
2908
2909 // Extend forward
2910 while ((character = CFStringGetCharacterFromInlineBuffer(buffer, end)) > 0) {
2911 if ((type == kCFStringBackwardDeletionCluster) && (character >= 0x0530) && (character < 0x1950)) break;
2912
2913 if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, end + 1)))) {
2914 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
2915 nonBase = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (character >> 16));
2916 step = 2;
2917 } else {
2918 nonBase = nonBaseBMP;
2919 step = 1;
2920 }
2921
2922 if (!CFUniCharIsMemberOfBitmap(character, nonBase) && (character != 0xFF9E) && (character != 0xFF9F) && ((character & 0x1FFFF0) != 0xF870)) break;
2923
2924 end += step;
2925 }
2926
2927 return CFRangeMake(start, end - start);
2928 }
2929
2930 CF_INLINE bool _CFStringIsVirama(UTF32Char character, const uint8_t *combClassBMP) {
2931 return ((character == COMBINING_GRAPHEME_JOINER) || (CFUniCharGetCombiningPropertyForCharacter(character, ((character < 0x10000) ? combClassBMP : CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (character >> 16)))) == 9) ? true : false);
2932 }
2933
2934 CFRange CFStringGetRangeOfCharacterClusterAtIndex(CFStringRef string, CFIndex charIndex, CFStringCharacterClusterType type) {
2935 CFRange range;
2936 CFIndex currentIndex;
2937 CFIndex length = CFStringGetLength(string);
2938 CFStringInlineBuffer stringBuffer;
2939 UTF32Char character;
2940 UTF16Char otherSurrogate;
2941 static const uint8_t *nonBaseBMP = NULL;
2942 static const uint8_t *letterBMP = NULL;
2943 static const uint8_t *combClassBMP = NULL;
2944
2945 if (charIndex >= length) return CFRangeMake(kCFNotFound, 0);
2946
2947 /* Fast case. If we're eight-bit, it's either the default encoding is cheap or the content is all ASCII. Watch out when (or if) adding more 8bit Mac-scripts in CFStringEncodingConverters
2948 */
2949 if (!CF_IS_OBJC(__kCFStringTypeID, string) && __CFStrIsEightBit(string)) return CFRangeMake(charIndex, 1);
2950
2951 if (NULL == nonBaseBMP) {
2952 nonBaseBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, 0);
2953 letterBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, 0);
2954 combClassBMP = CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0);
2955 }
2956
2957 CFStringInitInlineBuffer(string, &stringBuffer, CFRangeMake(0, length));
2958
2959 // Get composed character sequence first
2960 range = _CFStringInlineBufferGetComposedRange(&stringBuffer, charIndex, type, nonBaseBMP);
2961
2962 // Do grapheme joiners
2963 if (type < kCFStringCursorMovementCluster) {
2964 const uint8_t *letter = letterBMP;
2965
2966 // Check to see if we have a letter at the beginning of initial cluster
2967 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, range.location);
2968
2969 if ((range.length > 1) && CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(&stringBuffer, range.location + 1)))) {
2970 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
2971 letter = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, (character >> 16));
2972 }
2973
2974 if ((character == ZERO_WIDTH_JOINER) || CFUniCharIsMemberOfBitmap(character, letter)) {
2975 CFRange otherRange;
2976
2977 // Check if preceded by grapheme joiners (U034F and viramas)
2978 otherRange.location = currentIndex = range.location;
2979
2980 while (currentIndex > 1) {
2981 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, --currentIndex);
2982
2983 // ??? We're assuming viramas only in BMP
2984 if ((_CFStringIsVirama(character, combClassBMP) || ((character == ZERO_WIDTH_JOINER) && _CFStringIsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer, --currentIndex), combClassBMP))) && (currentIndex > 0)) {
2985 --currentIndex;
2986 } else {
2987 break;
2988 }
2989
2990 currentIndex = _CFStringInlineBufferGetComposedRange(&stringBuffer, currentIndex, type, nonBaseBMP).location;
2991
2992 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex);
2993
2994 if (CFUniCharIsSurrogateLowCharacter(character) && CFUniCharIsSurrogateHighCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex - 1)))) {
2995 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
2996 letter = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, (character >> 16));
2997 --currentIndex;
2998 } else {
2999 letter = letterBMP;
3000 }
3001
3002 if (!CFUniCharIsMemberOfBitmap(character, letter)) break;
3003 range.location = currentIndex;
3004 }
3005
3006 range.length += otherRange.location - range.location;
3007
3008 // Check if followed by grapheme joiners
3009 if ((range.length > 1) && ((range.location + range.length) < length)) {
3010 otherRange = range;
3011
3012 do {
3013 currentIndex = otherRange.location + otherRange.length;
3014 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex - 1);
3015
3016 // ??? We're assuming viramas only in BMP
3017 if ((character != ZERO_WIDTH_JOINER) && !_CFStringIsVirama(character, combClassBMP)) break;
3018
3019 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex);
3020
3021 if (character == ZERO_WIDTH_JOINER) character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, ++currentIndex);
3022
3023 if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex + 1)))) {
3024 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
3025 letter = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, (character >> 16));
3026 } else {
3027 letter = letterBMP;
3028 }
3029
3030 // We only conjoin letters
3031 if (!CFUniCharIsMemberOfBitmap(character, letter)) break;
3032 otherRange = _CFStringInlineBufferGetComposedRange(&stringBuffer, currentIndex, type, nonBaseBMP);
3033 } while ((otherRange.location + otherRange.length) < length);
3034 range.length = currentIndex - range.location;
3035 }
3036 }
3037 }
3038
3039 // Check if we're part of prefix transcoding hints
3040 CFIndex otherIndex;
3041
3042 currentIndex = (range.location + range.length) - (MAX_TRANSCODING_LENGTH + 1);
3043 if (currentIndex < 0) currentIndex = 0;
3044
3045 while (currentIndex <= range.location) {
3046 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex);
3047
3048 if ((character & 0x1FFFF0) == 0xF860) { // transcoding hint
3049 otherIndex = currentIndex + __CFTranscodingHintLength[(character - 0xF860)] + 1;
3050 if (otherIndex >= (range.location + range.length)) {
3051 if (otherIndex <= length) {
3052 range.location = currentIndex;
3053 range.length = otherIndex - currentIndex;
3054 }
3055 break;
3056 }
3057 }
3058 ++currentIndex;
3059 }
3060
3061 return range;
3062 }
3063
3064 #if 1 /* Using the new implementation. Leaving the old implementation if'ed out for testing purposes for now */
3065 CFRange CFStringGetRangeOfComposedCharactersAtIndex(CFStringRef theString, CFIndex theIndex) {
3066 return CFStringGetRangeOfCharacterClusterAtIndex(theString, theIndex, kCFStringComposedCharacterCluster);
3067 }
3068 #else
3069 /*!
3070 @function CFStringGetRangeOfComposedCharactersAtIndex
3071 Returns the range of the composed character sequence at the specified index.
3072 @param theString The CFString which is to be searched. If this
3073 parameter is not a valid CFString, the behavior is
3074 undefined.
3075 @param theIndex The index of the character contained in the
3076 composed character sequence. If the index is
3077 outside the index space of the string (0 to N-1 inclusive,
3078 where N is the length of the string), the behavior is
3079 undefined.
3080 @result The range of the composed character sequence.
3081 */
3082 #define ExtHighHalfZoneLow 0xD800
3083 #define ExtHighHalfZoneHigh 0xDBFF
3084 #define ExtLowHalfZoneLow 0xDC00
3085 #define ExtLowHalfZoneHigh 0xDFFF
3086 #define JunseongStart 0x1160
3087 #define JonseongEnd 0x11F9
3088 CF_INLINE Boolean IsHighCode(UniChar X) { return (X >= ExtHighHalfZoneLow && X <= ExtHighHalfZoneHigh); }
3089 CF_INLINE Boolean IsLowCode(UniChar X) { return (X >= ExtLowHalfZoneLow && X <= ExtLowHalfZoneHigh); }
3090 #define IsHangulConjoiningJamo(X) (X >= JunseongStart && X <= JonseongEnd)
3091 #define IsHalfwidthKanaVoicedMark(X) ((X == 0xFF9E) || (X == 0xFF9F))
3092 CF_INLINE Boolean IsNonBaseChar(UniChar X, CFCharacterSetRef nonBaseSet) { return (CFCharacterSetIsCharacterMember(nonBaseSet, X) || IsHangulConjoiningJamo(X) || IsHalfwidthKanaVoicedMark(X) || (X & 0x1FFFF0) == 0xF870); } // combining char, hangul jamo, or Apple corporate variant tag
3093 #define ZWJ 0x200D
3094 #define ZWNJ 0x200C
3095 #define COMBINING_GRAPHEME_JOINER (0x034F)
3096
3097 static CFCharacterSetRef nonBaseChars = NULL;
3098 static CFCharacterSetRef letterChars = NULL;
3099 static const void *__CFCombiningClassBMP = NULL;
3100
3101 CF_INLINE bool IsVirama(UTF32Char character) {
3102 return ((character == COMBINING_GRAPHEME_JOINER) ? true : ((character < 0x10000) && (CFUniCharGetCombiningPropertyForCharacter(character, __CFCombiningClassBMP) == 9) ? true : false));
3103 }
3104
3105 CFRange CFStringGetRangeOfComposedCharactersAtIndex(CFStringRef theString, CFIndex theIndex) {
3106 CFIndex left, current, save;
3107 CFIndex len = CFStringGetLength(theString);
3108 CFStringInlineBuffer stringBuffer;
3109 static volatile Boolean _isInited = false;
3110
3111 if (theIndex >= len) return CFRangeMake(kCFNotFound, 0);
3112
3113 if (!_isInited) {
3114 nonBaseChars = CFCharacterSetGetPredefined(kCFCharacterSetNonBase);
3115 letterChars = CFCharacterSetGetPredefined(kCFCharacterSetLetter);
3116 __CFCombiningClassBMP = CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0);
3117 _isInited = true;
3118 }
3119
3120 save = current = theIndex;
3121
3122 CFStringInitInlineBuffer(theString, &stringBuffer, CFRangeMake(0, len));
3123
3124 /*
3125 * First check for transcoding hints
3126 */
3127 {
3128 CFRange theRange = (current > MAX_TRANSCODING_LENGTH ? CFRangeMake(current - MAX_TRANSCODING_LENGTH, MAX_TRANSCODING_LENGTH + 1) : CFRangeMake(0, current + 1));
3129
3130 // Should check the next loc ?
3131 if (current + 1 < len) ++theRange.length;
3132
3133 if (theRange.length > 1) {
3134 UniChar characterBuffer[MAX_TRANSCODING_LENGTH + 2]; // Transcoding hint length + current loc + next loc
3135
3136 if (stringBuffer.directBuffer) {
3137 memmove(characterBuffer, stringBuffer.directBuffer + theRange.location, theRange.length * sizeof(UniChar));
3138 } else {
3139 CFStringGetCharacters(theString, theRange, characterBuffer);
3140 }
3141
3142 while (current >= theRange.location) {
3143 if ((characterBuffer[current - theRange.location] & 0x1FFFF0) == 0xF860) {
3144 theRange = CFRangeMake(current, __CFTranscodingHintLength[characterBuffer[current - theRange.location] - 0xF860] + 1);
3145 if ((theRange.location + theRange.length) <= theIndex) break;
3146 if ((theRange.location + theRange.length) >= len) theRange.length = len - theRange.location;
3147 return theRange;
3148 }
3149 if (current == 0) break;
3150 --current;
3151 }
3152 current = theIndex; // Reset current
3153 }
3154 }
3155
3156 //#warning Aki 5/29/01 This does not support non-base chars in non-BMP planes (i.e. musical symbol combining stem in Unicode 3.1)
3157 /*
3158 * if we start NOT on a base, first move back to a base as appropriate.
3159 */
3160
3161 roundAgain:
3162
3163 while ((current > 0) && IsNonBaseChar(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current), nonBaseChars)) --current;
3164
3165 if (current >= 1 && current < len && CFCharacterSetIsCharacterMember(letterChars, CFStringGetCharacterFromInlineBuffer(&stringBuffer, current)) && IsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current - 1))) {
3166 --current;
3167 goto roundAgain;
3168 } else if ((current >= 2) && (CFStringGetCharacterFromInlineBuffer(&stringBuffer, current - 1) == ZWJ) && IsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current - 2))) {
3169 current -= 2;
3170 goto roundAgain;
3171 }
3172
3173 /*
3174 * Set the left position, then jump back to the saved original position.
3175 */
3176
3177 if (current >= 1 && IsLowCode(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current)) && IsHighCode(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current - 1))) --current;
3178 left = current;
3179 current = save;
3180
3181 /*
3182 * Now, presume we are on a base; move forward & look for the next base.
3183 * Handle jumping over H/L codes.
3184 */
3185 if (IsHighCode(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current)) && (current + 1) < len && IsLowCode(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current + 1))) ++current;
3186 ++current;
3187
3188 round2Again:
3189
3190 if (current < len) {
3191 while (IsNonBaseChar(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current), nonBaseChars)) {
3192 ++current;
3193 if (current >= len) break;
3194 }
3195 if ((current < len) && CFCharacterSetIsCharacterMember(letterChars, CFStringGetCharacterFromInlineBuffer(&stringBuffer, current))) {
3196 if (IsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current - 1))) {
3197 ++current; goto round2Again;
3198 } else if ((current >= 2) && (CFStringGetCharacterFromInlineBuffer(&stringBuffer, current - 1) == ZWJ) && IsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current - 2))) {
3199 ++current; goto round2Again;
3200 }
3201 }
3202 }
3203 /*
3204 * Now, "current" is a base, and "left" is a base.
3205 * The junk between had better contain "save"!
3206 */
3207 if ((! (left <= save)) || (! (save <= current))) {
3208 CFLog(0, CFSTR("CFString: CFStringGetRangeOfComposedCharactersAtIndex:%d returned invalid\n"), save);
3209 }
3210 return CFRangeMake(left, current - left);
3211 }
3212 #endif
3213
3214 /*!
3215 @function CFStringFindCharacterFromSet
3216 Query the range of characters contained in the specified character set.
3217 @param theString The CFString which is to be searched. If this
3218 parameter is not a valid CFString, the behavior is
3219 undefined.
3220 @param theSet The CFCharacterSet against which the membership
3221 of characters is checked. If this parameter is not a valid
3222 CFCharacterSet, the behavior is undefined.
3223 @param range The range of characters within the string to search. If
3224 the range location or end point (defined by the location
3225 plus length minus 1) are outside the index space of the
3226 string (0 to N-1 inclusive, where N is the length of the
3227 string), the behavior is undefined. If the range length is
3228 negative, the behavior is undefined. The range may be empty
3229 (length 0), in which case no search is performed.
3230 @param searchOptions The bitwise-or'ed option flags to control
3231 the search behavior. The supported options are
3232 kCFCompareBackwards andkCFCompareAnchored.
3233 If other option flags are specified, the behavior
3234 is undefined.
3235 @param result The pointer to a CFRange supplied by the caller in
3236 which the search result is stored. If a pointer to an invalid
3237 memory is specified, the behavior is undefined.
3238 @result true, if at least a character which is a member of the character
3239 set is found and result is filled, otherwise, false.
3240 */
3241 #define SURROGATE_START 0xD800
3242 #define SURROGATE_END 0xDFFF
3243
3244 CF_EXPORT Boolean CFStringFindCharacterFromSet(CFStringRef theString, CFCharacterSetRef theSet, CFRange rangeToSearch, CFOptionFlags searchOptions, CFRange *result) {
3245 CFStringInlineBuffer stringBuffer;
3246 UniChar ch;
3247 CFIndex step;
3248 CFIndex fromLoc, toLoc, cnt; // fromLoc and toLoc are inclusive
3249 Boolean found = false;
3250 Boolean done = false;
3251
3252 //#warning FIX ME !! Should support kCFCompareNonliteral
3253
3254 if ((rangeToSearch.location + rangeToSearch.length > CFStringGetLength(theString)) || (rangeToSearch.length == 0)) return false;
3255
3256 if (searchOptions & kCFCompareBackwards) {
3257 fromLoc = rangeToSearch.location + rangeToSearch.length - 1;
3258 toLoc = rangeToSearch.location;
3259 } else {
3260 fromLoc = rangeToSearch.location;
3261 toLoc = rangeToSearch.location + rangeToSearch.length - 1;
3262 }
3263 if (searchOptions & kCFCompareAnchored) {
3264 toLoc = fromLoc;
3265 }
3266
3267 step = (fromLoc <= toLoc) ? 1 : -1;
3268 cnt = fromLoc;
3269
3270 CFStringInitInlineBuffer(theString, &stringBuffer, rangeToSearch);
3271
3272 do {
3273 ch = CFStringGetCharacterFromInlineBuffer(&stringBuffer, cnt - rangeToSearch.location);
3274 if ((ch >= SURROGATE_START) && (ch <= SURROGATE_END)) {
3275 int otherCharIndex = cnt + step;
3276
3277 if (((step < 0) && (otherCharIndex < toLoc)) || ((step > 0) && (otherCharIndex > toLoc))) {
3278 done = true;
3279 } else {
3280 UniChar highChar;
3281 UniChar lowChar = CFStringGetCharacterFromInlineBuffer(&stringBuffer, otherCharIndex - rangeToSearch.location);
3282
3283 if (cnt < otherCharIndex) {
3284 highChar = ch;
3285 } else {
3286 highChar = lowChar;
3287 lowChar = ch;
3288 }
3289
3290 if (CFUniCharIsSurrogateHighCharacter(highChar) && CFUniCharIsSurrogateLowCharacter(lowChar) && CFCharacterSetIsLongCharacterMember(theSet, CFUniCharGetLongCharacterForSurrogatePair(highChar, lowChar))) {
3291 if (result) *result = CFRangeMake((cnt < otherCharIndex ? cnt : otherCharIndex), 2);
3292 return true;
3293 } else if (otherCharIndex == toLoc) {
3294 done = true;
3295 } else {
3296 cnt = otherCharIndex + step;
3297 }
3298 }
3299 } else if (CFCharacterSetIsCharacterMember(theSet, ch)) {
3300 done = found = true;
3301 } else if (cnt == toLoc) {
3302 done = true;
3303 } else {
3304 cnt += step;
3305 }
3306 } while (!done);
3307
3308 if (found && result) *result = CFRangeMake(cnt, 1);
3309 return found;
3310 }
3311
3312 /* Line range code */
3313
3314 #define CarriageReturn '\r' /* 0x0d */
3315 #define NewLine '\n' /* 0x0a */
3316 #define NextLine 0x0085
3317 #define LineSeparator 0x2028
3318 #define ParaSeparator 0x2029
3319
3320 CF_INLINE Boolean isALineSeparatorTypeCharacter(UniChar ch) {
3321 if (ch > CarriageReturn && ch < NextLine) return false; /* Quick test to cover most chars */
3322 return (ch == NewLine || ch == CarriageReturn || ch == NextLine || ch == LineSeparator || ch == ParaSeparator) ? true : false;
3323 }
3324
3325 void CFStringGetLineBounds(CFStringRef string, CFRange range, CFIndex *lineBeginIndex, CFIndex *lineEndIndex, CFIndex *contentsEndIndex) {
3326 CFIndex len;
3327 CFStringInlineBuffer buf;
3328 UniChar ch;
3329
3330 CF_OBJC_FUNCDISPATCH4(__kCFStringTypeID, void, string, "getLineStart:end:contentsEnd:forRange:", lineBeginIndex, lineEndIndex, contentsEndIndex, CFRangeMake(range.location, range.length));
3331
3332 __CFAssertIsString(string);
3333 __CFAssertRangeIsInStringBounds(string, range.location, range.length);
3334
3335 len = __CFStrLength(string);
3336
3337 if (lineBeginIndex) {
3338 CFIndex start;
3339 if (range.location == 0) {
3340 start = 0;
3341 } else {
3342 CFStringInitInlineBuffer(string, &buf, CFRangeMake(0, len));
3343 CFIndex buf_idx = range.location;
3344
3345 /* Take care of the special case where start happens to fall right between \r and \n */
3346 ch = CFStringGetCharacterFromInlineBuffer(&buf, buf_idx);
3347 buf_idx--;
3348 if ((ch == NewLine) && (CFStringGetCharacterFromInlineBuffer(&buf, buf_idx) == CarriageReturn)) {
3349 buf_idx--;
3350 }
3351 while (1) {
3352 if (buf_idx < 0) {
3353 start = 0;
3354 break;
3355 } else if (isALineSeparatorTypeCharacter(CFStringGetCharacterFromInlineBuffer(&buf, buf_idx))) {
3356 start = buf_idx + 1;
3357 break;
3358 } else {
3359 buf_idx--;
3360 }
3361 }
3362 }
3363 *lineBeginIndex = start;
3364 }
3365
3366 /* Now find the ending point */
3367 if (lineEndIndex || contentsEndIndex) {
3368 CFIndex endOfContents, lineSeparatorLength = 1; /* 1 by default */
3369 CFStringInitInlineBuffer(string, &buf, CFRangeMake(0, len));
3370 CFIndex buf_idx = range.location + range.length - (range.length ? 1 : 0);
3371 /* First look at the last char in the range (if the range is zero length, the char after the range) to see if we're already on or within a end of line sequence... */
3372 ch = __CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx);
3373 if (ch == NewLine) {
3374 endOfContents = buf_idx;
3375 buf_idx--;
3376 if (__CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx) == CarriageReturn) {
3377 lineSeparatorLength = 2;
3378 endOfContents--;
3379 }
3380 } else {
3381 while (1) {
3382 if (isALineSeparatorTypeCharacter(ch)) {
3383 endOfContents = buf_idx; /* This is actually end of contentsRange */
3384 buf_idx++; /* OK for this to go past the end */
3385 if ((ch == CarriageReturn) && (__CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx) == NewLine)) {
3386 lineSeparatorLength = 2;
3387 }
3388 break;
3389 } else if (buf_idx >= len) {
3390 endOfContents = len;
3391 lineSeparatorLength = 0;
3392 break;
3393 } else {
3394 buf_idx++;
3395 ch = __CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx);
3396 }
3397 }
3398 }
3399 if (contentsEndIndex) *contentsEndIndex = endOfContents;
3400 if (lineEndIndex) *lineEndIndex = endOfContents + lineSeparatorLength;
3401 }
3402 }
3403
3404
3405 CFStringRef CFStringCreateByCombiningStrings(CFAllocatorRef alloc, CFArrayRef array, CFStringRef separatorString) {
3406 CFIndex numChars;
3407 CFIndex separatorNumByte;
3408 CFIndex stringCount = CFArrayGetCount(array);
3409 Boolean isSepCFString = !CF_IS_OBJC(__kCFStringTypeID, separatorString);
3410 Boolean canBeEightbit = isSepCFString && __CFStrIsEightBit(separatorString);
3411 CFIndex idx;
3412 CFStringRef otherString;
3413 void *buffer;
3414 uint8_t *bufPtr;
3415 const void *separatorContents = NULL;
3416
3417 if (stringCount == 0) {
3418 return CFStringCreateWithCharacters(alloc, NULL, 0);
3419 } else if (stringCount == 1) {
3420 return CFStringCreateCopy(alloc, CFArrayGetValueAtIndex(array, 0));
3421 }
3422
3423 if (alloc == NULL) alloc = __CFGetDefaultAllocator();
3424
3425 numChars = CFStringGetLength(separatorString) * (stringCount - 1);
3426 for (idx = 0; idx < stringCount; idx++) {
3427 otherString = (CFStringRef)CFArrayGetValueAtIndex(array, idx);
3428 numChars += CFStringGetLength(otherString);
3429 // canBeEightbit is already false if the separator is an NSString...
3430 if (!CF_IS_OBJC(__kCFStringTypeID, otherString) && __CFStrIsUnicode(otherString)) canBeEightbit = false;
3431 }
3432
3433 bufPtr = buffer = CFAllocatorAllocate(alloc, canBeEightbit ? ((numChars + 1) * sizeof(uint8_t)) : (numChars * sizeof(UniChar)), 0);
3434 if (__CFOASafe) __CFSetLastAllocationEventName(buffer, "CFString (store)");
3435 separatorNumByte = CFStringGetLength(separatorString) * (canBeEightbit ? sizeof(uint8_t) : sizeof(UniChar));
3436
3437 for (idx = 0; idx < stringCount; idx++) {
3438 if (idx) { // add separator here unless first string
3439 if (separatorContents) {
3440 memmove(bufPtr, separatorContents, separatorNumByte);
3441 } else {
3442 if (!isSepCFString) { // NSString
3443 CFStringGetCharacters(separatorString, CFRangeMake(0, CFStringGetLength(separatorString)), (UniChar*)bufPtr);
3444 } else if (canBeEightbit || __CFStrIsUnicode(separatorString)) {
3445 memmove(bufPtr, (const uint8_t *)__CFStrContents(separatorString) + __CFStrSkipAnyLengthByte(separatorString), separatorNumByte);
3446 } else {
3447 __CFStrConvertBytesToUnicode((uint8_t*)__CFStrContents(separatorString) + __CFStrSkipAnyLengthByte(separatorString), (UniChar*)bufPtr, __CFStrLength(separatorString));
3448 }
3449 separatorContents = bufPtr;
3450 }
3451 bufPtr += separatorNumByte;
3452 }
3453
3454 otherString = (CFStringRef )CFArrayGetValueAtIndex(array, idx);
3455 if (CF_IS_OBJC(__kCFStringTypeID, otherString)) {
3456 CFIndex otherLength = CFStringGetLength(otherString);
3457 CFStringGetCharacters(otherString, CFRangeMake(0, otherLength), (UniChar*)bufPtr);
3458 bufPtr += otherLength * sizeof(UniChar);
3459 } else {
3460 const uint8_t* otherContents = __CFStrContents(otherString);
3461 CFIndex otherNumByte = __CFStrLength2(otherString, otherContents) * (canBeEightbit ? sizeof(uint8_t) : sizeof(UniChar));
3462
3463 if (canBeEightbit || __CFStrIsUnicode(otherString)) {
3464 memmove(bufPtr, otherContents + __CFStrSkipAnyLengthByte(otherString), otherNumByte);
3465 } else {
3466 __CFStrConvertBytesToUnicode(otherContents + __CFStrSkipAnyLengthByte(otherString), (UniChar*)bufPtr, __CFStrLength2(otherString, otherContents));
3467 }
3468 bufPtr += otherNumByte;
3469 }
3470 }
3471 if (canBeEightbit) *bufPtr = 0; // NULL byte;
3472
3473 return canBeEightbit ?
3474 CFStringCreateWithCStringNoCopy(alloc, buffer, __CFStringGetEightBitStringEncoding(), alloc) :
3475 CFStringCreateWithCharactersNoCopy(alloc, buffer, numChars, alloc);
3476 }
3477
3478
3479 CFArrayRef CFStringCreateArrayBySeparatingStrings(CFAllocatorRef alloc, CFStringRef string, CFStringRef separatorString) {
3480 CFArrayRef separatorRanges;
3481 CFIndex length = CFStringGetLength(string);
3482 /* No objc dispatch needed here since CFStringCreateArrayWithFindResults() works with both CFString and NSString */
3483 if (!(separatorRanges = CFStringCreateArrayWithFindResults(alloc, string, separatorString, CFRangeMake(0, length), 0))) {
3484 return CFArrayCreate(alloc, (const void**)&string, 1, & kCFTypeArrayCallBacks);
3485 } else {
3486 CFIndex idx;
3487 CFIndex count = CFArrayGetCount(separatorRanges);
3488 CFIndex startIndex = 0;
3489 CFIndex numChars;
3490 CFMutableArrayRef array = CFArrayCreateMutable(alloc, count + 2, & kCFTypeArrayCallBacks);
3491 const CFRange *currentRange;
3492 CFStringRef substring;
3493
3494 for (idx = 0;idx < count;idx++) {
3495 currentRange = CFArrayGetValueAtIndex(separatorRanges, idx);
3496 numChars = currentRange->location - startIndex;
3497 substring = CFStringCreateWithSubstring(alloc, string, CFRangeMake(startIndex, numChars));
3498 CFArrayAppendValue(array, substring);
3499 CFRelease(substring);
3500 startIndex = currentRange->location + currentRange->length;
3501 }
3502 substring = CFStringCreateWithSubstring(alloc, string, CFRangeMake(startIndex, length - startIndex));
3503 CFArrayAppendValue(array, substring);
3504 CFRelease(substring);
3505
3506 CFRelease(separatorRanges);
3507
3508 return array;
3509 }
3510 }
3511
3512 CFStringRef CFStringCreateFromExternalRepresentation(CFAllocatorRef alloc, CFDataRef data, CFStringEncoding encoding) {
3513 return CFStringCreateWithBytes(alloc, CFDataGetBytePtr(data), CFDataGetLength(data), encoding, true);
3514 }
3515
3516
3517 CFDataRef CFStringCreateExternalRepresentation(CFAllocatorRef alloc, CFStringRef string, CFStringEncoding encoding, uint8_t lossByte) {
3518 CFIndex length;
3519 CFIndex guessedByteLength;
3520 uint8_t *bytes;
3521 CFIndex usedLength;
3522 SInt32 result;
3523
3524 if (CF_IS_OBJC(__kCFStringTypeID, string)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
3525 length = CFStringGetLength(string);
3526 } else {
3527 __CFAssertIsString(string);
3528 length = __CFStrLength(string);
3529 if (__CFStrIsEightBit(string) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string
3530 return CFDataCreate(alloc, ((char *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string)), __CFStrLength(string));
3531 }
3532 }
3533
3534 if (alloc == NULL) alloc = __CFGetDefaultAllocator();
3535
3536 if (encoding == kCFStringEncodingUnicode) {
3537 guessedByteLength = (length + 1) * sizeof(UniChar);
3538 } else if (((guessedByteLength = CFStringGetMaximumSizeForEncoding(length, encoding)) > length) && !CF_IS_OBJC(__kCFStringTypeID, string)) { // Multi byte encoding
3539 #if defined(__MACH__) || defined(__LINUX__) || defined(__FREEBSD__)
3540 if (__CFStrIsUnicode(string)) {
3541 guessedByteLength = CFStringEncodingByteLengthForCharacters(encoding, kCFStringEncodingPrependBOM, __CFStrContents(string), __CFStrLength(string));
3542 } else {
3543 #endif
3544 result = __CFStringEncodeByteStream(string, 0, length, true, encoding, lossByte, NULL, 0x7FFFFFFF, &guessedByteLength);
3545 // if result == length, we always succeed
3546 // otherwise, if result == 0, we fail
3547 // otherwise, if there was a lossByte but still result != length, we fail
3548 if ((result != length) && (!result || !lossByte)) return NULL;
3549 if (guessedByteLength == length && __CFStrIsEightBit(string) && __CFStringEncodingIsSupersetOfASCII(encoding)) { // It's all ASCII !!
3550 return CFDataCreate(alloc, ((char *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string)), __CFStrLength(string));
3551 }
3552 #if defined(__MACH__) || defined(__LINUX__) || defined(__FREEBSD__)
3553 }
3554 #endif
3555 }
3556 bytes = CFAllocatorAllocate(alloc, guessedByteLength, 0);
3557 if (__CFOASafe) __CFSetLastAllocationEventName(bytes, "CFData (store)");
3558
3559 result = __CFStringEncodeByteStream(string, 0, length, true, encoding, lossByte, bytes, guessedByteLength, &usedLength);
3560
3561 if ((result != length) && (!result || !lossByte)) { // see comment above about what this means
3562 CFAllocatorDeallocate(alloc, bytes);
3563 return NULL;
3564 }
3565
3566 return CFDataCreateWithBytesNoCopy(alloc, (char const *)bytes, usedLength, alloc);
3567 }
3568
3569
3570 CFStringEncoding CFStringGetSmallestEncoding(CFStringRef str) {
3571 CFIndex len;
3572 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, CFStringEncoding, str, "_smallestEncodingInCFStringEncoding");
3573 __CFAssertIsString(str);
3574
3575 if (__CFStrIsEightBit(str)) return __CFStringGetEightBitStringEncoding();
3576 len = __CFStrLength(str);
3577 if (__CFStringEncodeByteStream(str, 0, len, false, __CFStringGetEightBitStringEncoding(), 0, NULL, 0x7fffffff, NULL) == len) return __CFStringGetEightBitStringEncoding();
3578 if ((__CFStringGetEightBitStringEncoding() != __CFStringGetSystemEncoding()) && (__CFStringEncodeByteStream(str, 0, len, false, __CFStringGetSystemEncoding(), 0, NULL, 0x7fffffff, NULL) == len)) return __CFStringGetSystemEncoding();
3579 return kCFStringEncodingUnicode; /* ??? */
3580 }
3581
3582
3583 CFStringEncoding CFStringGetFastestEncoding(CFStringRef str) {
3584 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, CFStringEncoding, str, "_fastestEncodingInCFStringEncoding");
3585 __CFAssertIsString(str);
3586 return __CFStrIsEightBit(str) ? __CFStringGetEightBitStringEncoding() : kCFStringEncodingUnicode; /* ??? */
3587 }
3588
3589
3590 SInt32 CFStringGetIntValue(CFStringRef str) {
3591 Boolean success;
3592 SInt32 result;
3593 SInt32 idx = 0;
3594 CFStringInlineBuffer buf;
3595 CFStringInitInlineBuffer(str, &buf, CFRangeMake(0, CFStringGetLength(str)));
3596 success = __CFStringScanInteger(&buf, NULL, &idx, false, &result);
3597 return success ? result : 0;
3598 }
3599
3600
3601 double CFStringGetDoubleValue(CFStringRef str) {
3602 Boolean success;
3603 double result;
3604 SInt32 idx = 0;
3605 CFStringInlineBuffer buf;
3606 CFStringInitInlineBuffer(str, &buf, CFRangeMake(0, CFStringGetLength(str)));
3607 success = __CFStringScanDouble(&buf, NULL, &idx, &result);
3608 return success ? result : 0.0;
3609 }
3610
3611
3612 /*** Mutable functions... ***/
3613
3614 void CFStringSetExternalCharactersNoCopy(CFMutableStringRef string, UniChar *chars, CFIndex length, CFIndex capacity) {
3615 __CFAssertIsNotNegative(length);
3616 __CFAssertIsStringAndExternalMutable(string);
3617 CFAssert4((length <= capacity) && ((capacity == 0) || ((capacity > 0) && chars)), __kCFLogAssertion, "%s(): Invalid args: characters %p length %d capacity %d", __PRETTY_FUNCTION__, chars, length, capacity);
3618 __CFStrSetContentPtr(string, chars);
3619 __CFStrSetExplicitLength(string, length);
3620 __CFStrSetCapacity(string, capacity * sizeof(UniChar));
3621 __CFStrSetCapacityProvidedExternally(string);
3622 }
3623
3624
3625
3626 void CFStringInsert(CFMutableStringRef str, CFIndex idx, CFStringRef insertedStr) {
3627 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID, void, str, "insertString:atIndex:", insertedStr, idx);
3628 __CFAssertIsStringAndMutable(str);
3629 CFAssert3(idx >= 0 && idx <= __CFStrLength(str), __kCFLogAssertion, "%s(): string index %d out of bounds (length %d)", __PRETTY_FUNCTION__, idx, __CFStrLength(str));
3630 __CFStringReplace(str, CFRangeMake(idx, 0), insertedStr);
3631 }
3632
3633
3634 void CFStringDelete(CFMutableStringRef str, CFRange range) {
3635 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, str, "deleteCharactersInRange:", range);
3636 __CFAssertIsStringAndMutable(str);
3637 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
3638 __CFStringChangeSize(str, range, 0, false);
3639 }
3640
3641
3642 void CFStringReplace(CFMutableStringRef str, CFRange range, CFStringRef replacement) {
3643 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID, void, str, "replaceCharactersInRange:withString:", range, replacement);
3644 __CFAssertIsStringAndMutable(str);
3645 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
3646 __CFStringReplace(str, range, replacement);
3647 }
3648
3649
3650 void CFStringReplaceAll(CFMutableStringRef str, CFStringRef replacement) {
3651 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, str, "setString:", replacement);
3652 __CFAssertIsStringAndMutable(str);
3653 __CFStringReplace(str, CFRangeMake(0, __CFStrLength(str)), replacement);
3654 }
3655
3656
3657 void CFStringAppend(CFMutableStringRef str, CFStringRef appended) {
3658 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, str, "appendString:", appended);
3659 __CFAssertIsStringAndMutable(str);
3660 __CFStringReplace(str, CFRangeMake(__CFStrLength(str), 0), appended);
3661 }
3662
3663
3664 void CFStringAppendCharacters(CFMutableStringRef str, const UniChar *chars, CFIndex appendedLength) {
3665 CFIndex strLength, idx;
3666
3667 __CFAssertIsNotNegative(appendedLength);
3668
3669 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID, void, str, "appendCharacters:length:", chars, appendedLength);
3670
3671 __CFAssertIsStringAndMutable(str);
3672
3673 strLength = __CFStrLength(str);
3674 if (__CFStringGetCompatibility(Bug2967272) || __CFStrIsUnicode(str)) {
3675 __CFStringChangeSize(str, CFRangeMake(strLength, 0), appendedLength, true);
3676 memmove((UniChar *)__CFStrContents(str) + strLength, chars, appendedLength * sizeof(UniChar));
3677 } else {
3678 uint8_t *contents;
3679 bool isASCII = true;
3680 for (idx = 0; isASCII && idx < appendedLength; idx++) isASCII = (chars[idx] < 0x80);
3681 __CFStringChangeSize(str, CFRangeMake(strLength, 0), appendedLength, !isASCII);
3682 if (!isASCII) {
3683 memmove((UniChar *)__CFStrContents(str) + strLength, chars, appendedLength * sizeof(UniChar));
3684 } else {
3685 contents = (uint8_t *)__CFStrContents(str) + strLength + __CFStrSkipAnyLengthByte(str);
3686 for (idx = 0; idx < appendedLength; idx++) contents[idx] = (uint8_t)chars[idx];
3687 }
3688 }
3689 }
3690
3691
3692 static void __CFStringAppendBytes(CFMutableStringRef str, const char *cStr, CFIndex appendedLength, CFStringEncoding encoding) {
3693 Boolean appendedIsUnicode = false;
3694 Boolean freeCStrWhenDone = false;
3695 Boolean demoteAppendedUnicode = false;
3696 CFVarWidthCharBuffer vBuf;
3697
3698 __CFAssertIsNotNegative(appendedLength);
3699
3700 if (encoding == kCFStringEncodingASCII || encoding == __CFStringGetEightBitStringEncoding()) {
3701 // appendedLength now denotes length in UniChars
3702 } else if (encoding == kCFStringEncodingUnicode) {
3703 UniChar *chars = (UniChar *)cStr;
3704 CFIndex idx, length = appendedLength / sizeof(UniChar);
3705 bool isASCII = true;
3706 for (idx = 0; isASCII && idx < length; idx++) isASCII = (chars[idx] < 0x80);
3707 if (!isASCII) {
3708 appendedIsUnicode = true;
3709 } else {
3710 demoteAppendedUnicode = true;
3711 }
3712 appendedLength = length;
3713 } else {
3714 Boolean usingPassedInMemory = false;
3715
3716 vBuf.allocator = __CFGetDefaultAllocator(); // We don't want to use client's allocator for temp stuff
3717 vBuf.chars.unicode = NULL; // This will cause the decode function to allocate memory if necessary
3718
3719 if (!__CFStringDecodeByteStream3(cStr, appendedLength, encoding, __CFStrIsUnicode(str), &vBuf, &usingPassedInMemory, 0)) {
3720 CFAssert1(0, __kCFLogAssertion, "Supplied bytes could not be converted specified encoding %d", encoding);
3721 return;
3722 }
3723
3724 // If not ASCII, appendedLength now denotes length in UniChars
3725 appendedLength = vBuf.numChars;
3726 appendedIsUnicode = !vBuf.isASCII;
3727 cStr = vBuf.chars.ascii;
3728 freeCStrWhenDone = !usingPassedInMemory && vBuf.shouldFreeChars;
3729 }
3730
3731 if (CF_IS_OBJC(__kCFStringTypeID, str)) {
3732 if (!appendedIsUnicode && !demoteAppendedUnicode) {
3733 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID, void, str, "_cfAppendCString:length:", cStr, appendedLength);
3734 } else {
3735 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID, void, str, "appendCharacters:length:", cStr, appendedLength);
3736 }
3737 } else {
3738 CFIndex strLength;
3739 __CFAssertIsStringAndMutable(str);
3740 strLength = __CFStrLength(str);
3741
3742 __CFStringChangeSize(str, CFRangeMake(strLength, 0), appendedLength, appendedIsUnicode || __CFStrIsUnicode(str));
3743
3744 if (__CFStrIsUnicode(str)) {
3745 UniChar *contents = (UniChar *)__CFStrContents(str);
3746 if (appendedIsUnicode) {
3747 memmove(contents + strLength, cStr, appendedLength * sizeof(UniChar));
3748 } else {
3749 __CFStrConvertBytesToUnicode(cStr, contents + strLength, appendedLength);
3750 }
3751 } else {
3752 if (demoteAppendedUnicode) {
3753 UniChar *chars = (UniChar *)cStr;
3754 CFIndex idx;
3755 uint8_t *contents = (uint8_t *)__CFStrContents(str) + strLength + __CFStrSkipAnyLengthByte(str);
3756 for (idx = 0; idx < appendedLength; idx++) contents[idx] = (uint8_t)chars[idx];
3757 } else {
3758 uint8_t *contents = (uint8_t *)__CFStrContents(str);
3759 memmove(contents + strLength + __CFStrSkipAnyLengthByte(str), cStr, appendedLength);
3760 }
3761 }
3762 }
3763
3764 if (freeCStrWhenDone) CFAllocatorDeallocate(__CFGetDefaultAllocator(), (void *)cStr);
3765 }
3766
3767 void CFStringAppendPascalString(CFMutableStringRef str, ConstStringPtr pStr, CFStringEncoding encoding) {
3768 __CFStringAppendBytes(str, pStr + 1, (CFIndex)*pStr, encoding);
3769 }
3770
3771 void CFStringAppendCString(CFMutableStringRef str, const char *cStr, CFStringEncoding encoding) {
3772 __CFStringAppendBytes(str, cStr, strlen(cStr), encoding);
3773 }
3774
3775
3776 void CFStringAppendFormat(CFMutableStringRef str, CFDictionaryRef formatOptions, CFStringRef format, ...) {
3777 va_list argList;
3778
3779 va_start(argList, format);
3780 CFStringAppendFormatAndArguments(str, formatOptions, format, argList);
3781 va_end(argList);
3782 }
3783
3784
3785 CFIndex CFStringFindAndReplace(CFMutableStringRef string, CFStringRef stringToFind, CFStringRef replacementString, CFRange rangeToSearch, CFOptionFlags compareOptions) {
3786 CFRange foundRange;
3787 Boolean backwards = compareOptions & kCFCompareBackwards;
3788 UInt32 endIndex = rangeToSearch.location + rangeToSearch.length;
3789 #define MAX_RANGES_ON_STACK (1000 / sizeof(CFRange))
3790 CFRange rangeBuffer[MAX_RANGES_ON_STACK]; // Used to avoid allocating memory
3791 CFRange *ranges = rangeBuffer;
3792 CFIndex foundCount = 0;
3793 CFIndex capacity = MAX_RANGES_ON_STACK;
3794
3795 __CFAssertIsStringAndMutable(string);
3796 __CFAssertRangeIsInStringBounds(string, rangeToSearch.location, rangeToSearch.length);
3797
3798 // Note: This code is very similar to the one in CFStringCreateArrayWithFindResults().
3799 while ((rangeToSearch.length > 0) && CFStringFindWithOptions(string, stringToFind, rangeToSearch, compareOptions, &foundRange)) {
3800 // Determine the next range
3801 if (backwards) {
3802 rangeToSearch.length = foundRange.location - rangeToSearch.location;
3803 } else {
3804 rangeToSearch.location = foundRange.location + foundRange.length;
3805 rangeToSearch.length = endIndex - rangeToSearch.location;
3806 }
3807
3808 // If necessary, grow the array
3809 if (foundCount >= capacity) {
3810 bool firstAlloc = (ranges == rangeBuffer) ? true : false;
3811 capacity = (capacity + 4) * 2;
3812 // Note that reallocate with NULL previous pointer is same as allocate
3813 ranges = CFAllocatorReallocate(NULL, firstAlloc ? NULL : ranges, capacity * sizeof(CFRange), 0);
3814 if (firstAlloc) memmove(ranges, rangeBuffer, MAX_RANGES_ON_STACK * sizeof(CFRange));
3815 }
3816 ranges[foundCount] = foundRange;
3817 foundCount++;
3818 }
3819
3820 if (foundCount > 0) {
3821 if (backwards) { // Reorder the ranges to be incrementing (better to do this here, then to check other places)
3822 int head = 0;
3823 int tail = foundCount - 1;
3824 while (head < tail) {
3825 CFRange temp = ranges[head];
3826 ranges[head] = ranges[tail];
3827 ranges[tail] = temp;
3828 head++;
3829 tail--;
3830 }
3831 }
3832 __CFStringReplaceMultiple(string, ranges, foundCount, replacementString);
3833 if (ranges != rangeBuffer) CFAllocatorDeallocate(NULL, ranges);
3834 }
3835
3836 return foundCount;
3837 }
3838
3839
3840 // This function is here for NSString purposes
3841 // It allows checking for mutability before mutating; this allows NSString to catch invalid mutations
3842
3843 int __CFStringCheckAndReplace(CFMutableStringRef str, CFRange range, CFStringRef replacement) {
3844 if (!__CFStrIsMutable(str)) return _CFStringErrNotMutable; // These three ifs are always here, for NSString usage
3845 if (!replacement && __CFStringNoteErrors()) return _CFStringErrNilArg;
3846 // We use unsigneds as that is what NSRanges do; we use uint64_t do make sure the sum doesn't wrap (otherwise we'd need to do 3 separate checks). This allows catching bad ranges as described in 3375535. (-1,1)
3847 if (((uint64_t)((unsigned)range.location)) + ((uint64_t)((unsigned)range.length)) > (uint64_t)__CFStrLength(str) && __CFStringNoteErrors()) return _CFStringErrBounds;
3848 __CFAssertIsStringAndMutable(str);
3849 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
3850 __CFStringReplace(str, range, replacement);
3851 return _CFStringErrNone;
3852 }
3853
3854 // This function determines whether errors which would cause string exceptions should
3855 // be ignored or not
3856
3857 Boolean __CFStringNoteErrors(void) {
3858 return _CFExecutableLinkedOnOrAfter(CFSystemVersionJaguar) ? true : false;
3859 }
3860
3861
3862
3863 void CFStringPad(CFMutableStringRef string, CFStringRef padString, CFIndex length, CFIndex indexIntoPad) {
3864 CFIndex originalLength;
3865
3866 __CFAssertIsNotNegative(length);
3867 __CFAssertIsNotNegative(indexIntoPad);
3868
3869 CF_OBJC_FUNCDISPATCH3(__kCFStringTypeID, void, string, "_cfPad:length:padIndex:", padString, length, indexIntoPad);
3870
3871 __CFAssertIsStringAndMutable(string);
3872
3873 originalLength = __CFStrLength(string);
3874 if (length < originalLength) {
3875 __CFStringChangeSize(string, CFRangeMake(length, originalLength - length), 0, false);
3876 } else if (originalLength < length) {
3877 uint8_t *contents;
3878 Boolean isUnicode;
3879 CFIndex charSize;
3880 CFIndex padStringLength;
3881 CFIndex padLength;
3882 CFIndex padRemaining = length - originalLength;
3883
3884 if (CF_IS_OBJC(__kCFStringTypeID, padString)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
3885 padStringLength = CFStringGetLength(padString);
3886 isUnicode = true; /* !!! Bad for now */
3887 } else {
3888 __CFAssertIsString(padString);
3889 padStringLength = __CFStrLength(padString);
3890 isUnicode = __CFStrIsUnicode(string) || __CFStrIsUnicode(padString);
3891 }
3892
3893 charSize = isUnicode ? sizeof(UniChar) : sizeof(uint8_t);
3894
3895 __CFStringChangeSize(string, CFRangeMake(originalLength, 0), padRemaining, isUnicode);
3896
3897 contents = (uint8_t*)__CFStrContents(string) + charSize * originalLength + __CFStrSkipAnyLengthByte(string);
3898 padLength = padStringLength - indexIntoPad;
3899 padLength = padRemaining < padLength ? padRemaining : padLength;
3900
3901 while (padRemaining > 0) {
3902 if (isUnicode) {
3903 CFStringGetCharacters(padString, CFRangeMake(indexIntoPad, padLength), (UniChar*)contents);
3904 } else {
3905 CFStringGetBytes(padString, CFRangeMake(indexIntoPad, padLength), __CFStringGetEightBitStringEncoding(), 0, false, contents, padRemaining * charSize, NULL);
3906 }
3907 contents += padLength * charSize;
3908 padRemaining -= padLength;
3909 indexIntoPad = 0;
3910 padLength = padRemaining < padLength ? padRemaining : padStringLength;
3911 }
3912 }
3913 }
3914
3915 void CFStringTrim(CFMutableStringRef string, CFStringRef trimString) {
3916 CFRange range;
3917 CFIndex newStartIndex;
3918 CFIndex length;
3919
3920 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, string, "_cfTrim:", trimString);
3921
3922 __CFAssertIsStringAndMutable(string);
3923 __CFAssertIsString(trimString);
3924
3925 newStartIndex = 0;
3926 length = __CFStrLength(string);
3927
3928 while (CFStringFindWithOptions(string, trimString, CFRangeMake(newStartIndex, length - newStartIndex), kCFCompareAnchored, &range)) {
3929 newStartIndex = range.location + range.length;
3930 }
3931
3932 if (newStartIndex < length) {
3933 CFIndex charSize = __CFStrIsUnicode(string) ? sizeof(UniChar) : sizeof(uint8_t);
3934 uint8_t *contents = (uint8_t*)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
3935
3936 length -= newStartIndex;
3937 if (__CFStrLength(trimString) < length) {
3938 while (CFStringFindWithOptions(string, trimString, CFRangeMake(newStartIndex, length), kCFCompareAnchored|kCFCompareBackwards, &range)) {
3939 length = range.location - newStartIndex;
3940 }
3941 }
3942 memmove(contents, contents + newStartIndex * charSize, length * charSize);
3943 __CFStringChangeSize(string, CFRangeMake(length, __CFStrLength(string) - length), 0, false);
3944 } else { // Only trimString in string, trim all
3945 __CFStringChangeSize(string, CFRangeMake(0, length), 0, false);
3946 }
3947 }
3948
3949 void CFStringTrimWhitespace(CFMutableStringRef string) {
3950 CFIndex newStartIndex;
3951 CFIndex length;
3952 CFStringInlineBuffer buffer;
3953
3954 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, void, string, "_cfTrimWS");
3955
3956 __CFAssertIsStringAndMutable(string);
3957
3958 newStartIndex = 0;
3959 length = __CFStrLength(string);
3960
3961 CFStringInitInlineBuffer(string, &buffer, CFRangeMake(0, length));
3962 CFIndex buffer_idx = 0;
3963
3964 while (buffer_idx < length && CFUniCharIsMemberOf(__CFStringGetCharacterFromInlineBufferQuick(&buffer, buffer_idx), kCFUniCharWhitespaceAndNewlineCharacterSet))
3965 buffer_idx++;
3966 newStartIndex = buffer_idx;
3967
3968 if (newStartIndex < length) {
3969 uint8_t *contents = (uint8_t*)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
3970 CFIndex charSize = (__CFStrIsUnicode(string) ? sizeof(UniChar) : sizeof(uint8_t));
3971
3972 buffer_idx = length - 1;
3973 while (0 <= buffer_idx && CFUniCharIsMemberOf(__CFStringGetCharacterFromInlineBufferQuick(&buffer, buffer_idx), kCFUniCharWhitespaceAndNewlineCharacterSet))
3974 buffer_idx--;
3975 length = buffer_idx - newStartIndex + 1;
3976
3977 memmove(contents, contents + newStartIndex * charSize, length * charSize);
3978 __CFStringChangeSize(string, CFRangeMake(length, __CFStrLength(string) - length), 0, false);
3979 } else { // Whitespace only string
3980 __CFStringChangeSize(string, CFRangeMake(0, length), 0, false);
3981 }
3982 }
3983
3984 void CFStringLowercase(CFMutableStringRef string, CFLocaleRef locale) {
3985 CFIndex currentIndex = 0;
3986 CFIndex length;
3987 const char *langCode;
3988 Boolean isEightBit = __CFStrIsEightBit(string);
3989
3990 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, string, "_cfLowercase:", locale);
3991
3992 __CFAssertIsStringAndMutable(string);
3993
3994 length = __CFStrLength(string);
3995
3996 langCode = (_CFCanUseLocale(locale) ? _CFStrGetLanguageIdentifierForLocale(locale) : NULL);
3997
3998 if (!langCode && isEightBit) {
3999 uint8_t *contents = (uint8_t*)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4000 for (;currentIndex < length;currentIndex++) {
4001 if (contents[currentIndex] >= 'A' && contents[currentIndex] <= 'Z') {
4002 contents[currentIndex] += 'a' - 'A';
4003 } else if (contents[currentIndex] > 127) {
4004 break;
4005 }
4006 }
4007 }
4008
4009 if (currentIndex < length) {
4010 UniChar *contents;
4011 UniChar mappedCharacters[MAX_CASE_MAPPING_BUF];
4012 CFIndex mappedLength;
4013 UTF32Char currentChar;
4014 UInt32 flags = 0;
4015
4016 if (isEightBit) __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true);
4017
4018 contents = (UniChar*)__CFStrContents(string);
4019
4020 for (;currentIndex < length;currentIndex++) {
4021
4022 if (CFUniCharIsSurrogateHighCharacter(contents[currentIndex]) && (currentIndex + 1 < length) && CFUniCharIsSurrogateLowCharacter(contents[currentIndex + 1])) {
4023 currentChar = CFUniCharGetLongCharacterForSurrogatePair(contents[currentIndex], contents[currentIndex + 1]);
4024 } else {
4025 currentChar = contents[currentIndex];
4026 }
4027 flags = ((langCode || (currentChar == 0x03A3)) ? CFUniCharGetConditionalCaseMappingFlags(currentChar, contents, currentIndex, length, kCFUniCharToLowercase, langCode, flags) : 0);
4028
4029 mappedLength = CFUniCharMapCaseTo(currentChar, mappedCharacters, MAX_CASE_MAPPING_BUF, kCFUniCharToLowercase, flags, langCode);
4030 if (mappedLength > 0) contents[currentIndex] = *mappedCharacters;
4031
4032 if (currentChar > 0xFFFF) { // Non-BMP char
4033 switch (mappedLength) {
4034 case 0:
4035 __CFStringChangeSize(string, CFRangeMake(currentIndex, 2), 0, true);
4036 contents = (UniChar*)__CFStrContents(string);
4037 length -= 2;
4038 break;
4039
4040 case 1:
4041 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 1), 0, true);
4042 contents = (UniChar*)__CFStrContents(string);
4043 --length;
4044 break;
4045
4046 case 2:
4047 contents[++currentIndex] = mappedCharacters[1];
4048 break;
4049
4050 default:
4051 --mappedLength; // Skip the current char
4052 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength - 1, true);
4053 contents = (UniChar*)__CFStrContents(string);
4054 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4055 length += (mappedLength - 1);
4056 currentIndex += mappedLength;
4057 break;
4058 }
4059 } else if (mappedLength == 0) {
4060 __CFStringChangeSize(string, CFRangeMake(currentIndex, 1), 0, true);
4061 contents = (UniChar*)__CFStrContents(string);
4062 --length;
4063 } else if (mappedLength > 1) {
4064 --mappedLength; // Skip the current char
4065 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength, true);
4066 contents = (UniChar*)__CFStrContents(string);
4067 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4068 length += mappedLength;
4069 currentIndex += mappedLength;
4070 }
4071 }
4072 }
4073 }
4074
4075 void CFStringUppercase(CFMutableStringRef string, CFLocaleRef locale) {
4076 CFIndex currentIndex = 0;
4077 CFIndex length;
4078 const char *langCode;
4079 Boolean isEightBit = __CFStrIsEightBit(string);
4080
4081 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, string, "_cfUppercase:", locale);
4082
4083 __CFAssertIsStringAndMutable(string);
4084
4085 length = __CFStrLength(string);
4086
4087 langCode = (_CFCanUseLocale(locale) ? _CFStrGetLanguageIdentifierForLocale(locale) : NULL);
4088
4089 if (!langCode && isEightBit) {
4090 uint8_t *contents = (uint8_t*)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4091 for (;currentIndex < length;currentIndex++) {
4092 if (contents[currentIndex] >= 'a' && contents[currentIndex] <= 'z') {
4093 contents[currentIndex] -= 'a' - 'A';
4094 } else if (contents[currentIndex] > 127) {
4095 break;
4096 }
4097 }
4098 }
4099
4100 if (currentIndex < length) {
4101 UniChar *contents;
4102 UniChar mappedCharacters[MAX_CASE_MAPPING_BUF];
4103 CFIndex mappedLength;
4104 UTF32Char currentChar;
4105 UInt32 flags = 0;
4106
4107 if (isEightBit) __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true);
4108
4109 contents = (UniChar*)__CFStrContents(string);
4110
4111 for (;currentIndex < length;currentIndex++) {
4112 if (CFUniCharIsSurrogateHighCharacter(contents[currentIndex]) && (currentIndex + 1 < length) && CFUniCharIsSurrogateLowCharacter(contents[currentIndex + 1])) {
4113 currentChar = CFUniCharGetLongCharacterForSurrogatePair(contents[currentIndex], contents[currentIndex + 1]);
4114 } else {
4115 currentChar = contents[currentIndex];
4116 }
4117
4118 flags = (langCode ? CFUniCharGetConditionalCaseMappingFlags(currentChar, contents, currentIndex, length, kCFUniCharToUppercase, langCode, flags) : 0);
4119
4120 mappedLength = CFUniCharMapCaseTo(currentChar, mappedCharacters, MAX_CASE_MAPPING_BUF, kCFUniCharToUppercase, flags, langCode);
4121 if (mappedLength > 0) contents[currentIndex] = *mappedCharacters;
4122
4123 if (currentChar > 0xFFFF) { // Non-BMP char
4124 switch (mappedLength) {
4125 case 0:
4126 __CFStringChangeSize(string, CFRangeMake(currentIndex, 2), 0, true);
4127 contents = (UniChar*)__CFStrContents(string);
4128 length -= 2;
4129 break;
4130
4131 case 1:
4132 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 1), 0, true);
4133 contents = (UniChar*)__CFStrContents(string);
4134 --length;
4135 break;
4136
4137 case 2:
4138 contents[++currentIndex] = mappedCharacters[1];
4139 break;
4140
4141 default:
4142 --mappedLength; // Skip the current char
4143 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength - 1, true);
4144 contents = (UniChar*)__CFStrContents(string);
4145 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4146 length += (mappedLength - 1);
4147 currentIndex += mappedLength;
4148 break;
4149 }
4150 } else if (mappedLength == 0) {
4151 __CFStringChangeSize(string, CFRangeMake(currentIndex, 1), 0, true);
4152 contents = (UniChar*)__CFStrContents(string);
4153 --length;
4154 } else if (mappedLength > 1) {
4155 --mappedLength; // Skip the current char
4156 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength, true);
4157 contents = (UniChar*)__CFStrContents(string);
4158 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4159 length += mappedLength;
4160 currentIndex += mappedLength;
4161 }
4162 }
4163 }
4164 }
4165
4166
4167 void CFStringCapitalize(CFMutableStringRef string, CFLocaleRef locale) {
4168 CFIndex currentIndex = 0;
4169 CFIndex length;
4170 const char *langCode;
4171 Boolean isEightBit = __CFStrIsEightBit(string);
4172 Boolean isLastCased = false;
4173 static const uint8_t *caseIgnorableForBMP = NULL;
4174
4175 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, string, "_cfCapitalize:", locale);
4176
4177 __CFAssertIsStringAndMutable(string);
4178
4179 length = __CFStrLength(string);
4180
4181 if (NULL == caseIgnorableForBMP) caseIgnorableForBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharCaseIgnorableCharacterSet, 0);
4182
4183 langCode = (_CFCanUseLocale(locale) ? _CFStrGetLanguageIdentifierForLocale(locale) : NULL);
4184
4185 if (!langCode && isEightBit) {
4186 uint8_t *contents = (uint8_t*)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4187 for (;currentIndex < length;currentIndex++) {
4188 if (contents[currentIndex] > 127) {
4189 break;
4190 } else if (contents[currentIndex] >= 'A' && contents[currentIndex] <= 'Z') {
4191 contents[currentIndex] += (isLastCased ? 'a' - 'A' : 0);
4192 isLastCased = true;
4193 } else if (contents[currentIndex] >= 'a' && contents[currentIndex] <= 'z') {
4194 contents[currentIndex] -= (!isLastCased ? 'a' - 'A' : 0);
4195 isLastCased = true;
4196 } else if (!CFUniCharIsMemberOfBitmap(contents[currentIndex], caseIgnorableForBMP)) {
4197 isLastCased = false;
4198 }
4199 }
4200 }
4201
4202 if (currentIndex < length) {
4203 UniChar *contents;
4204 UniChar mappedCharacters[MAX_CASE_MAPPING_BUF];
4205 CFIndex mappedLength;
4206 UTF32Char currentChar;
4207 UInt32 flags = 0;
4208
4209 if (isEightBit) __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true);
4210
4211 contents = (UniChar*)__CFStrContents(string);
4212
4213 for (;currentIndex < length;currentIndex++) {
4214 if (CFUniCharIsSurrogateHighCharacter(contents[currentIndex]) && (currentIndex + 1 < length) && CFUniCharIsSurrogateLowCharacter(contents[currentIndex + 1])) {
4215 currentChar = CFUniCharGetLongCharacterForSurrogatePair(contents[currentIndex], contents[currentIndex + 1]);
4216 } else {
4217 currentChar = contents[currentIndex];
4218 }
4219 flags = ((langCode || ((currentChar == 0x03A3) && isLastCased)) ? CFUniCharGetConditionalCaseMappingFlags(currentChar, contents, currentIndex, length, (isLastCased ? kCFUniCharToLowercase : kCFUniCharToTitlecase), langCode, flags) : 0);
4220
4221 mappedLength = CFUniCharMapCaseTo(currentChar, mappedCharacters, MAX_CASE_MAPPING_BUF, (isLastCased ? kCFUniCharToLowercase : kCFUniCharToTitlecase), flags, langCode);
4222 if (mappedLength > 0) contents[currentIndex] = *mappedCharacters;
4223
4224 if (currentChar > 0xFFFF) { // Non-BMP char
4225 switch (mappedLength) {
4226 case 0:
4227 __CFStringChangeSize(string, CFRangeMake(currentIndex, 2), 0, true);
4228 contents = (UniChar*)__CFStrContents(string);
4229 length -= 2;
4230 break;
4231
4232 case 1:
4233 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 1), 0, true);
4234 contents = (UniChar*)__CFStrContents(string);
4235 --length;
4236 break;
4237
4238 case 2:
4239 contents[++currentIndex] = mappedCharacters[1];
4240 break;
4241
4242 default:
4243 --mappedLength; // Skip the current char
4244 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength - 1, true);
4245 contents = (UniChar*)__CFStrContents(string);
4246 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4247 length += (mappedLength - 1);
4248 currentIndex += mappedLength;
4249 break;
4250 }
4251 } else if (mappedLength == 0) {
4252 __CFStringChangeSize(string, CFRangeMake(currentIndex, 1), 0, true);
4253 contents = (UniChar*)__CFStrContents(string);
4254 --length;
4255 } else if (mappedLength > 1) {
4256 --mappedLength; // Skip the current char
4257 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength, true);
4258 contents = (UniChar*)__CFStrContents(string);
4259 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
4260 length += mappedLength;
4261 currentIndex += mappedLength;
4262 }
4263
4264 if (!((currentChar > 0xFFFF) ? CFUniCharIsMemberOf(currentChar, kCFUniCharCaseIgnorableCharacterSet) : CFUniCharIsMemberOfBitmap(currentChar, caseIgnorableForBMP))) { // We have non-caseignorable here
4265 isLastCased = ((CFUniCharIsMemberOf(currentChar, kCFUniCharUppercaseLetterCharacterSet) || CFUniCharIsMemberOf(currentChar, kCFUniCharLowercaseLetterCharacterSet)) ? true : false);
4266 }
4267 }
4268 }
4269 }
4270
4271
4272 #define MAX_DECOMP_BUF 64
4273
4274 #define HANGUL_SBASE 0xAC00
4275 #define HANGUL_LBASE 0x1100
4276 #define HANGUL_VBASE 0x1161
4277 #define HANGUL_TBASE 0x11A7
4278 #define HANGUL_SCOUNT 11172
4279 #define HANGUL_LCOUNT 19
4280 #define HANGUL_VCOUNT 21
4281 #define HANGUL_TCOUNT 28
4282 #define HANGUL_NCOUNT (HANGUL_VCOUNT * HANGUL_TCOUNT)
4283
4284 CF_INLINE uint32_t __CFGetUTF16Length(const UTF32Char *characters, uint32_t utf32Length) {
4285 const UTF32Char *limit = characters + utf32Length;
4286 uint32_t length = 0;
4287
4288 while (characters < limit) length += (*(characters++) > 0xFFFF ? 2 : 1);
4289
4290 return length;
4291 }
4292
4293 CF_INLINE void __CFFillInUTF16(const UTF32Char *characters, UTF16Char *dst, uint32_t utf32Length) {
4294 const UTF32Char *limit = characters + utf32Length;
4295 UTF32Char currentChar;
4296
4297 while (characters < limit) {
4298 currentChar = *(characters++);
4299 if (currentChar > 0xFFFF) {
4300 currentChar -= 0x10000;
4301 *(dst++) = (UTF16Char)((currentChar >> 10) + 0xD800UL);
4302 *(dst++) = (UTF16Char)((currentChar & 0x3FF) + 0xDC00UL);
4303 } else {
4304 *(dst++) = currentChar;
4305 }
4306 }
4307 }
4308
4309 void CFStringNormalize(CFMutableStringRef string, CFStringNormalizationForm theForm) {
4310 CFIndex currentIndex = 0;
4311 CFIndex length;
4312 bool needToReorder = true;
4313
4314 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, string, "_cfNormalize:", theForm);
4315
4316 __CFAssertIsStringAndMutable(string);
4317
4318 length = __CFStrLength(string);
4319
4320 if (__CFStrIsEightBit(string)) {
4321 uint8_t *contents;
4322
4323 if (theForm == kCFStringNormalizationFormC) return; // 8bit form has no decomposition
4324
4325 contents = (uint8_t*)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4326
4327 for (;currentIndex < length;currentIndex++) {
4328 if (contents[currentIndex] > 127) {
4329 __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true); // need to do harm way
4330 needToReorder = false;
4331 break;
4332 }
4333 }
4334 }
4335
4336 if (currentIndex < length) {
4337 UTF16Char *limit = (UTF16Char *)__CFStrContents(string) + length;
4338 UTF16Char *contents = (UTF16Char *)__CFStrContents(string) + currentIndex;
4339 UTF32Char buffer[MAX_DECOMP_BUF];
4340 UTF32Char *mappedCharacters = buffer;
4341 CFIndex allocatedLength = MAX_DECOMP_BUF;
4342 CFIndex mappedLength;
4343 CFIndex currentLength;
4344 UTF32Char currentChar;
4345
4346 while (contents < limit) {
4347 if (CFUniCharIsSurrogateHighCharacter(*contents) && (contents + 1 < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) {
4348 currentChar = CFUniCharGetLongCharacterForSurrogatePair(*contents, *(contents + 1));
4349 currentLength = 2;
4350 contents += 2;
4351 } else {
4352 currentChar = *(contents++);
4353 currentLength = 1;
4354 }
4355
4356 mappedLength = 0;
4357
4358 if (CFUniCharIsMemberOf(currentChar, kCFUniCharCanonicalDecomposableCharacterSet) && !CFUniCharIsMemberOf(currentChar, kCFUniCharNonBaseCharacterSet)) {
4359 if ((theForm & kCFStringNormalizationFormC) == 0 || currentChar < HANGUL_SBASE || currentChar > (HANGUL_SBASE + HANGUL_SCOUNT)) { // We don't have to decompose Hangul Syllables if we're precomposing again
4360 mappedLength = CFUniCharDecomposeCharacter(currentChar, mappedCharacters, MAX_DECOMP_BUF);
4361 }
4362 }
4363
4364 if ((needToReorder || (theForm & kCFStringNormalizationFormC)) && ((contents < limit) || (mappedLength == 0))) {
4365 if (mappedLength > 0) {
4366 if (CFUniCharIsSurrogateHighCharacter(*contents) && (contents + 1 < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) {
4367 currentChar = CFUniCharGetLongCharacterForSurrogatePair(*contents, *(contents + 1));
4368 } else {
4369 currentChar = *contents;
4370 }
4371 }
4372
4373 if (CFUniCharIsMemberOf(currentChar, kCFUniCharNonBaseCharacterSet)) {
4374 uint32_t decompLength;
4375
4376 if (mappedLength == 0) {
4377 contents -= (currentChar & 0xFFFF0000 ? 2 : 1);
4378 if (currentIndex > 0) {
4379 if (CFUniCharIsSurrogateLowCharacter(*(contents - 1)) && (currentIndex > 1) && CFUniCharIsSurrogateHighCharacter(*(contents - 2))) {
4380 *mappedCharacters = CFUniCharGetLongCharacterForSurrogatePair(*(contents - 2), *(contents - 1));
4381 currentIndex -= 2;
4382 currentLength += 2;
4383 } else {
4384 *mappedCharacters = *(contents - 1);
4385 --currentIndex;
4386 ++currentLength;
4387 }
4388 mappedLength = 1;
4389 }
4390 } else {
4391 currentLength += (currentChar & 0xFFFF0000 ? 2 : 1);
4392 }
4393 contents += (currentChar & 0xFFFF0000 ? 2 : 1);
4394
4395 if (CFUniCharIsMemberOf(currentChar, kCFUniCharDecomposableCharacterSet)) { // Vietnamese accent, etc.
4396 decompLength = CFUniCharDecomposeCharacter(currentChar, mappedCharacters + mappedLength, MAX_DECOMP_BUF - mappedLength);
4397 mappedLength += decompLength;
4398 } else {
4399 mappedCharacters[mappedLength++] = currentChar;
4400 }
4401
4402 while (contents < limit) {
4403 if (CFUniCharIsSurrogateHighCharacter(*contents) && (contents + 1 < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) {
4404 currentChar = CFUniCharGetLongCharacterForSurrogatePair(*contents, *(contents + 1));
4405 } else {
4406 currentChar = *contents;
4407 }
4408 if (!CFUniCharIsMemberOf(currentChar, kCFUniCharNonBaseCharacterSet)) break;
4409 if (currentChar & 0xFFFF0000) {
4410 contents += 2;
4411 currentLength += 2;
4412 } else {
4413 ++contents;
4414 ++currentLength;
4415 }
4416 if (mappedLength == allocatedLength) {
4417 allocatedLength += MAX_DECOMP_BUF;
4418 if (mappedCharacters == buffer) {
4419 mappedCharacters = (UTF32Char *)CFAllocatorAllocate(NULL, allocatedLength * sizeof(UTF32Char), 0);
4420 memmove(mappedCharacters, buffer, MAX_DECOMP_BUF * sizeof(UTF32Char));
4421 } else {
4422 mappedCharacters = (UTF32Char *)CFAllocatorReallocate(NULL, mappedCharacters, allocatedLength * sizeof(UTF32Char), 0);
4423 }
4424 }
4425 if (CFUniCharIsMemberOf(currentChar, kCFUniCharDecomposableCharacterSet)) { // Vietnamese accent, etc.
4426 decompLength = CFUniCharDecomposeCharacter(currentChar, mappedCharacters + mappedLength, MAX_DECOMP_BUF - mappedLength);
4427 mappedLength += decompLength;
4428 } else {
4429 mappedCharacters[mappedLength++] = currentChar;
4430 }
4431 }
4432 }
4433 if (needToReorder && mappedLength > 1) CFUniCharPrioritySort(mappedCharacters, mappedLength);
4434 }
4435
4436 if (theForm & kCFStringNormalizationFormKD) {
4437 CFIndex newLength = 0;
4438
4439 if (mappedLength == 0 && CFUniCharIsMemberOf(currentChar, kCFUniCharCompatibilityDecomposableCharacterSet)) {
4440 mappedCharacters[mappedLength++] = currentChar;
4441 }
4442 while (newLength < mappedLength) {
4443 newLength = CFUniCharCompatibilityDecompose(mappedCharacters, mappedLength, allocatedLength);
4444 if (newLength == 0) {
4445 allocatedLength += MAX_DECOMP_BUF;
4446 if (mappedCharacters == buffer) {
4447 mappedCharacters = (UTF32Char *)CFAllocatorAllocate(NULL, allocatedLength * sizeof(UTF32Char), 0);
4448 memmove(mappedCharacters, buffer, MAX_DECOMP_BUF * sizeof(UTF32Char));
4449 } else {
4450 mappedCharacters = (UTF32Char *)CFAllocatorReallocate(NULL, mappedCharacters, allocatedLength * sizeof(UTF32Char), 0);
4451 }
4452 }
4453 }
4454 mappedLength = newLength;
4455 }
4456
4457 if (theForm & kCFStringNormalizationFormC) {
4458 if (mappedLength > 1) {
4459 CFIndex consumedLength = 1;
4460 UTF32Char nextChar;
4461 UTF32Char *currentBase = mappedCharacters;
4462 uint8_t currentClass, lastClass = 0;
4463 const uint8_t *bmpClassTable = CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0);
4464 bool didCombine = false;
4465
4466 currentChar = *mappedCharacters;
4467
4468 while (consumedLength < mappedLength) {
4469 nextChar = mappedCharacters[consumedLength];
4470 currentClass = (nextChar & 0xFFFF0000 ? CFUniCharGetUnicodeProperty(nextChar, kCFUniCharCombiningProperty) : CFUniCharGetCombiningPropertyForCharacter(nextChar, bmpClassTable));
4471
4472 if (theForm & kCFStringNormalizationFormKD) {
4473 if ((currentChar >= HANGUL_LBASE) && (currentChar < (HANGUL_LBASE + 0xFF))) {
4474 SInt8 lIndex = currentChar - HANGUL_LBASE;
4475
4476 if ((0 <= lIndex) && (lIndex <= HANGUL_LCOUNT)) {
4477 SInt16 vIndex = nextChar - HANGUL_VBASE;
4478
4479 if ((vIndex >= 0) && (vIndex <= HANGUL_VCOUNT)) {
4480 SInt16 tIndex = 0;
4481 CFIndex usedLength = mappedLength;
4482
4483 mappedCharacters[consumedLength++] = 0xFFFD;
4484
4485 if (consumedLength < mappedLength) {
4486 tIndex = mappedCharacters[consumedLength] - HANGUL_TBASE;
4487 if ((tIndex < 0) || (tIndex > HANGUL_TCOUNT)) {
4488 tIndex = 0;
4489 } else {
4490 mappedCharacters[consumedLength++] = 0xFFFD;
4491 }
4492 }
4493 *currentBase = (lIndex * HANGUL_VCOUNT + vIndex) * HANGUL_TCOUNT + tIndex + HANGUL_SBASE;
4494
4495 while (--usedLength > 0) {
4496 if (mappedCharacters[usedLength] == 0xFFFD) {
4497 --mappedLength;
4498 --consumedLength;
4499 memmove(mappedCharacters + usedLength, mappedCharacters + usedLength + 1, (mappedLength - usedLength) * sizeof(UTF32Char));
4500 }
4501 }
4502 currentBase = mappedCharacters + consumedLength;
4503 currentChar = *currentBase;
4504 ++consumedLength;
4505
4506 continue;
4507 }
4508 }
4509 }
4510 if (!CFUniCharIsMemberOf(nextChar, kCFUniCharNonBaseCharacterSet)) {
4511 *currentBase = currentChar;
4512 currentBase = mappedCharacters + consumedLength;
4513 currentChar = nextChar;
4514 ++consumedLength;
4515 continue;
4516 }
4517 }
4518 if ((lastClass == 0) || (currentClass != lastClass)) {
4519 nextChar = CFUniCharPrecomposeCharacter(currentChar, nextChar);
4520 if (nextChar == 0xFFFD) {
4521 lastClass = currentClass;
4522 } else {
4523 mappedCharacters[consumedLength] = 0xFFFD;
4524 didCombine = true;
4525 currentChar = nextChar;
4526 lastClass = 0;
4527 }
4528 }
4529 ++consumedLength;
4530 }
4531
4532 *currentBase = currentChar;
4533 if (didCombine) {
4534 consumedLength = mappedLength;
4535 while (--consumedLength > 0) {
4536 if (mappedCharacters[consumedLength] == 0xFFFD) {
4537 --mappedLength;
4538 memmove(mappedCharacters + consumedLength, mappedCharacters + consumedLength + 1, (mappedLength - consumedLength) * sizeof(UTF32Char));
4539 }
4540 }
4541 }
4542 } else if ((currentChar >= HANGUL_LBASE) && (currentChar < (HANGUL_LBASE + 0xFF))) { // Hangul Jamo
4543 SInt8 lIndex = currentChar - HANGUL_LBASE;
4544
4545 if ((contents < limit) && (0 <= lIndex) && (lIndex <= HANGUL_LCOUNT)) {
4546 SInt16 vIndex = *contents - HANGUL_VBASE;
4547
4548 if ((vIndex >= 0) && (vIndex <= HANGUL_VCOUNT)) {
4549 SInt16 tIndex = 0;
4550
4551 ++contents; ++currentLength;
4552
4553 if (contents < limit) {
4554 tIndex = *contents - HANGUL_TBASE;
4555 if ((tIndex < 0) || (tIndex > HANGUL_TCOUNT)) {
4556 tIndex = 0;
4557 } else {
4558 ++contents; ++currentLength;
4559 }
4560 }
4561 *mappedCharacters = (lIndex * HANGUL_VCOUNT + vIndex) * HANGUL_TCOUNT + tIndex + HANGUL_SBASE;
4562 mappedLength = 1;
4563 }
4564 }
4565 }
4566 }
4567
4568 if (mappedLength > 0) {
4569 CFIndex utf16Length = __CFGetUTF16Length(mappedCharacters, mappedLength);
4570
4571 if (utf16Length != currentLength) {
4572 __CFStringChangeSize(string, CFRangeMake(currentIndex, currentLength), utf16Length, true);
4573 currentLength = utf16Length;
4574 }
4575 contents = (UTF16Char *)__CFStrContents(string);
4576 limit = contents + __CFStrLength(string);
4577 contents += currentIndex;
4578 __CFFillInUTF16(mappedCharacters, contents, mappedLength);
4579 contents += utf16Length;
4580 }
4581 currentIndex += currentLength;
4582 }
4583
4584 if (mappedCharacters != buffer) CFAllocatorDeallocate(NULL, mappedCharacters);
4585 }
4586 }
4587
4588
4589 enum {
4590 kCFStringFormatZeroFlag = (1 << 0), // if not, padding is space char
4591 kCFStringFormatMinusFlag = (1 << 1), // if not, no flag implied
4592 kCFStringFormatPlusFlag = (1 << 2), // if not, no flag implied, overrides space
4593 kCFStringFormatSpaceFlag = (1 << 3) // if not, no flag implied
4594 };
4595
4596 typedef struct {
4597 int16_t size;
4598 int16_t type;
4599 SInt32 loc;
4600 SInt32 len;
4601 SInt32 widthArg;
4602 SInt32 precArg;
4603 uint32_t flags;
4604 int8_t mainArgNum;
4605 int8_t precArgNum;
4606 int8_t widthArgNum;
4607 int8_t unused1;
4608 } CFFormatSpec;
4609
4610 typedef struct {
4611 int16_t type;
4612 int16_t size;
4613 union {
4614 int64_t int64Value;
4615 double doubleValue;
4616 void *pointerValue;
4617 } value;
4618 } CFPrintValue;
4619
4620 enum {
4621 CFFormatDefaultSize = 0,
4622 CFFormatSize1 = 1,
4623 CFFormatSize2 = 2,
4624 CFFormatSize4 = 3,
4625 CFFormatSize8 = 4,
4626 CFFormatSize16 = 5, /* unused */
4627 };
4628
4629 enum {
4630 CFFormatLiteralType = 32,
4631 CFFormatLongType = 33,
4632 CFFormatDoubleType = 34,
4633 CFFormatPointerType = 35,
4634 CFFormatObjectType = 36, /* handled specially */ /* ??? not used anymore, can be removed? */
4635 CFFormatCFType = 37, /* handled specially */
4636 CFFormatUnicharsType = 38, /* handled specially */
4637 CFFormatCharsType = 39, /* handled specially */
4638 CFFormatPascalCharsType = 40, /* handled specially */
4639 CFFormatSingleUnicharType = 41 /* handled specially */
4640 };
4641
4642 CF_INLINE void __CFParseFormatSpec(const UniChar *uformat, const uint8_t *cformat, SInt32 *fmtIdx, SInt32 fmtLen, CFFormatSpec *spec) {
4643 Boolean seenDot = false;
4644 for (;;) {
4645 UniChar ch;
4646 if (fmtLen <= *fmtIdx) return; /* no type */
4647 if (cformat) ch = (UniChar)cformat[(*fmtIdx)++]; else ch = uformat[(*fmtIdx)++];
4648 reswtch:switch (ch) {
4649 case '#': // ignored for now
4650 break;
4651 case 0x20:
4652 if (!(spec->flags & kCFStringFormatPlusFlag)) spec->flags |= kCFStringFormatSpaceFlag;
4653 break;
4654 case '-':
4655 spec->flags |= kCFStringFormatMinusFlag;
4656 spec->flags &= ~kCFStringFormatZeroFlag; // remove zero flag
4657 break;
4658 case '+':
4659 spec->flags |= kCFStringFormatPlusFlag;
4660 spec->flags &= ~kCFStringFormatSpaceFlag; // remove space flag
4661 break;
4662 case '0':
4663 if (!(spec->flags & kCFStringFormatMinusFlag)) spec->flags |= kCFStringFormatZeroFlag;
4664 break;
4665 case 'h':
4666 spec->size = CFFormatSize2;
4667 break;
4668 case 'l':
4669 if (*fmtIdx < fmtLen) {
4670 // fetch next character, don't increment fmtIdx
4671 if (cformat) ch = (UniChar)cformat[(*fmtIdx)]; else ch = uformat[(*fmtIdx)];
4672 if ('l' == ch) { // 'll' for long long, like 'q'
4673 (*fmtIdx)++;
4674 spec->size = CFFormatSize8;
4675 break;
4676 }
4677 }
4678 spec->size = CFFormatSize4;
4679 break;
4680 case 'q':
4681 spec->size = CFFormatSize8;
4682 break;
4683 case 'c':
4684 spec->type = CFFormatLongType;
4685 spec->size = CFFormatSize1;
4686 return;
4687 case 'O': case 'o': case 'D': case 'd': case 'i': case 'U': case 'u': case 'x': case 'X':
4688 spec->type = CFFormatLongType;
4689 return;
4690 case 'e': case 'E': case 'f': case 'g': case 'G':
4691 spec->type = CFFormatDoubleType;
4692 spec->size = CFFormatSize8;
4693 return;
4694 case 'n': case 'p': /* %n is not handled correctly currently */
4695 spec->type = CFFormatPointerType;
4696 spec->size = CFFormatSize4;
4697 return;
4698 case 's':
4699 spec->type = CFFormatCharsType;
4700 spec->size = CFFormatSize4;
4701 return;
4702 case 'S':
4703 spec->type = CFFormatUnicharsType;
4704 spec->size = CFFormatSize4;
4705 return;
4706 case 'C':
4707 spec->type = CFFormatSingleUnicharType;
4708 spec->size = CFFormatSize2;
4709 return;
4710 case 'P':
4711 spec->type = CFFormatPascalCharsType;
4712 spec->size = CFFormatSize4;
4713 return;
4714 case '@':
4715 spec->type = CFFormatCFType;
4716 spec->size = CFFormatSize4;
4717 return;
4718 case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': {
4719 int64_t number = 0;
4720 do {
4721 number = 10 * number + (ch - '0');
4722 if (cformat) ch = (UniChar)cformat[(*fmtIdx)++]; else ch = uformat[(*fmtIdx)++];
4723 } while ((UInt32)(ch - '0') <= 9);
4724 if ('$' == ch) {
4725 if (-2 == spec->precArgNum) {
4726 spec->precArgNum = number - 1; // Arg numbers start from 1
4727 } else if (-2 == spec->widthArgNum) {
4728 spec->widthArgNum = number - 1; // Arg numbers start from 1
4729 } else {
4730 spec->mainArgNum = number - 1; // Arg numbers start from 1
4731 }
4732 break;
4733 } else if (seenDot) { /* else it's either precision or width */
4734 spec->precArg = (SInt32)number;
4735 } else {
4736 spec->widthArg = (SInt32)number;
4737 }
4738 goto reswtch;
4739 }
4740 case '*':
4741 spec->widthArgNum = -2;
4742 break;
4743 case '.':
4744 seenDot = true;
4745 if (cformat) ch = (UniChar)cformat[(*fmtIdx)++]; else ch = uformat[(*fmtIdx)++];
4746 if ('*' == ch) {
4747 spec->precArgNum = -2;
4748 break;
4749 }
4750 goto reswtch;
4751 default:
4752 spec->type = CFFormatLiteralType;
4753 return;
4754 }
4755 }
4756 }
4757
4758 #if defined(__WIN32__)
4759 static int snprintf(char *b, size_t n, const char * f, ...) {
4760 int retval;
4761 va_list args;
4762 va_start (args, f);
4763 retval = _vsnprintf(b, n, f, args);
4764 va_end(args);
4765 return retval;
4766 }
4767 #endif
4768
4769 /* ??? It ignores the formatOptions argument.
4770 ??? %s depends on handling of encodings by __CFStringAppendBytes
4771 */
4772 void CFStringAppendFormatAndArguments(CFMutableStringRef outputString, CFDictionaryRef formatOptions, CFStringRef formatString, va_list args) {
4773 _CFStringAppendFormatAndArgumentsAux(outputString, NULL, formatOptions, formatString, args);
4774 }
4775
4776 #define SNPRINTF(TYPE, WHAT) { \
4777 TYPE value = (TYPE) WHAT; \
4778 if (-1 != specs[curSpec].widthArgNum) { \
4779 if (-1 != specs[curSpec].precArgNum) { \
4780 snprintf_l(buffer, 255, NULL, formatBuffer, width, precision, value); \
4781 } else { \
4782 snprintf_l(buffer, 255, NULL, formatBuffer, width, value); \
4783 } \
4784 } else { \
4785 if (-1 != specs[curSpec].precArgNum) { \
4786 snprintf_l(buffer, 255, NULL, formatBuffer, precision, value); \
4787 } else { \
4788 snprintf_l(buffer, 255, NULL, formatBuffer, value); \
4789 } \
4790 }}
4791
4792 void _CFStringAppendFormatAndArgumentsAux(CFMutableStringRef outputString, CFStringRef (*copyDescFunc)(void *, CFDictionaryRef), CFDictionaryRef formatOptions, CFStringRef formatString, va_list args) {
4793 SInt32 numSpecs, sizeSpecs, sizeArgNum, formatIdx, curSpec, argNum;
4794 CFIndex formatLen;
4795 #define FORMAT_BUFFER_LEN 400
4796 const uint8_t *cformat = NULL;
4797 const UniChar *uformat = NULL;
4798 UniChar *formatChars = NULL;
4799 UniChar localFormatBuffer[FORMAT_BUFFER_LEN];
4800
4801 #define VPRINTF_BUFFER_LEN 61
4802 CFFormatSpec localSpecsBuffer[VPRINTF_BUFFER_LEN];
4803 CFFormatSpec *specs;
4804 CFPrintValue localValuesBuffer[VPRINTF_BUFFER_LEN];
4805 CFPrintValue *values;
4806 CFAllocatorRef tmpAlloc = NULL;
4807
4808 numSpecs = 0;
4809 sizeSpecs = 0;
4810 sizeArgNum = 0;
4811 specs = NULL;
4812 values = NULL;
4813
4814 formatLen = CFStringGetLength(formatString);
4815 if (!CF_IS_OBJC(__kCFStringTypeID, formatString)) {
4816 __CFAssertIsString(formatString);
4817 if (!__CFStrIsUnicode(formatString)) {
4818 cformat = __CFStrContents(formatString);
4819 if (cformat) cformat += __CFStrSkipAnyLengthByte(formatString);
4820 } else {
4821 uformat = __CFStrContents(formatString);
4822 }
4823 }
4824 if (!cformat && !uformat) {
4825 formatChars = (formatLen > FORMAT_BUFFER_LEN) ? CFAllocatorAllocate(tmpAlloc = __CFGetDefaultAllocator(), formatLen * sizeof(UniChar), 0) : localFormatBuffer;
4826 if (formatChars != localFormatBuffer && __CFOASafe) __CFSetLastAllocationEventName(formatChars, "CFString (temp)");
4827 CFStringGetCharacters(formatString, CFRangeMake(0, formatLen), formatChars);
4828 uformat = formatChars;
4829 }
4830
4831 /* Compute an upper bound for the number of format specifications */
4832 if (cformat) {
4833 for (formatIdx = 0; formatIdx < formatLen; formatIdx++) if ('%' == cformat[formatIdx]) sizeSpecs++;
4834 } else {
4835 for (formatIdx = 0; formatIdx < formatLen; formatIdx++) if ('%' == uformat[formatIdx]) sizeSpecs++;
4836 }
4837 tmpAlloc = __CFGetDefaultAllocator();
4838 specs = ((2 * sizeSpecs + 1) > VPRINTF_BUFFER_LEN) ? CFAllocatorAllocate(tmpAlloc, (2 * sizeSpecs + 1) * sizeof(CFFormatSpec), 0) : localSpecsBuffer;
4839 if (specs != localSpecsBuffer && __CFOASafe) __CFSetLastAllocationEventName(specs, "CFString (temp)");
4840
4841 /* Collect format specification information from the format string */
4842 for (curSpec = 0, formatIdx = 0; formatIdx < formatLen; curSpec++) {
4843 SInt32 newFmtIdx;
4844 specs[curSpec].loc = formatIdx;
4845 specs[curSpec].len = 0;
4846 specs[curSpec].size = 0;
4847 specs[curSpec].type = 0;
4848 specs[curSpec].flags = 0;
4849 specs[curSpec].widthArg = -1;
4850 specs[curSpec].precArg = -1;
4851 specs[curSpec].mainArgNum = -1;
4852 specs[curSpec].precArgNum = -1;
4853 specs[curSpec].widthArgNum = -1;
4854 if (cformat) {
4855 for (newFmtIdx = formatIdx; newFmtIdx < formatLen && '%' != cformat[newFmtIdx]; newFmtIdx++);
4856 } else {
4857 for (newFmtIdx = formatIdx; newFmtIdx < formatLen && '%' != uformat[newFmtIdx]; newFmtIdx++);
4858 }
4859 if (newFmtIdx != formatIdx) { /* Literal chunk */
4860 specs[curSpec].type = CFFormatLiteralType;
4861 specs[curSpec].len = newFmtIdx - formatIdx;
4862 } else {
4863 newFmtIdx++; /* Skip % */
4864 __CFParseFormatSpec(uformat, cformat, &newFmtIdx, formatLen, &(specs[curSpec]));
4865 if (CFFormatLiteralType == specs[curSpec].type) {
4866 specs[curSpec].loc = formatIdx + 1;
4867 specs[curSpec].len = 1;
4868 } else {
4869 specs[curSpec].len = newFmtIdx - formatIdx;
4870 }
4871 }
4872 formatIdx = newFmtIdx;
4873
4874 // fprintf(stderr, "specs[%d] = {\n size = %d,\n type = %d,\n loc = %d,\n len = %d,\n mainArgNum = %d,\n precArgNum = %d,\n widthArgNum = %d\n}\n", curSpec, specs[curSpec].size, specs[curSpec].type, specs[curSpec].loc, specs[curSpec].len, specs[curSpec].mainArgNum, specs[curSpec].precArgNum, specs[curSpec].widthArgNum);
4875
4876 }
4877 numSpecs = curSpec;
4878 // Max of three args per spec, reasoning thus: 1 width, 1 prec, 1 value
4879 values = ((3 * sizeSpecs + 1) > VPRINTF_BUFFER_LEN) ? CFAllocatorAllocate(tmpAlloc, (3 * sizeSpecs + 1) * sizeof(CFPrintValue), 0) : localValuesBuffer;
4880 if (values != localValuesBuffer && __CFOASafe) __CFSetLastAllocationEventName(values, "CFString (temp)");
4881 memset(values, 0, (3 * sizeSpecs + 1) * sizeof(CFPrintValue));
4882 sizeArgNum = (3 * sizeSpecs + 1);
4883
4884 /* Compute values array */
4885 argNum = 0;
4886 for (curSpec = 0; curSpec < numSpecs; curSpec++) {
4887 SInt32 newMaxArgNum;
4888 if (0 == specs[curSpec].type) continue;
4889 if (CFFormatLiteralType == specs[curSpec].type) continue;
4890 newMaxArgNum = sizeArgNum;
4891 if (newMaxArgNum < specs[curSpec].mainArgNum) {
4892 newMaxArgNum = specs[curSpec].mainArgNum;
4893 }
4894 if (newMaxArgNum < specs[curSpec].precArgNum) {
4895 newMaxArgNum = specs[curSpec].precArgNum;
4896 }
4897 if (newMaxArgNum < specs[curSpec].widthArgNum) {
4898 newMaxArgNum = specs[curSpec].widthArgNum;
4899 }
4900 if (sizeArgNum < newMaxArgNum) {
4901 if (specs != localSpecsBuffer) CFAllocatorDeallocate(tmpAlloc, specs);
4902 if (values != localValuesBuffer) CFAllocatorDeallocate(tmpAlloc, values);
4903 if (formatChars && (formatChars != localFormatBuffer)) CFAllocatorDeallocate(tmpAlloc, formatChars);
4904 return; // more args than we expected!
4905 }
4906 /* It is actually incorrect to reorder some specs and not all; we just do some random garbage here */
4907 if (-2 == specs[curSpec].widthArgNum) {
4908 specs[curSpec].widthArgNum = argNum++;
4909 }
4910 if (-2 == specs[curSpec].precArgNum) {
4911 specs[curSpec].precArgNum = argNum++;
4912 }
4913 if (-1 == specs[curSpec].mainArgNum) {
4914 specs[curSpec].mainArgNum = argNum++;
4915 }
4916 values[specs[curSpec].mainArgNum].size = specs[curSpec].size;
4917 values[specs[curSpec].mainArgNum].type = specs[curSpec].type;
4918 if (-1 != specs[curSpec].widthArgNum) {
4919 values[specs[curSpec].widthArgNum].size = 0;
4920 values[specs[curSpec].widthArgNum].type = CFFormatLongType;
4921 }
4922 if (-1 != specs[curSpec].precArgNum) {
4923 values[specs[curSpec].precArgNum].size = 0;
4924 values[specs[curSpec].precArgNum].type = CFFormatLongType;
4925 }
4926 }
4927
4928 /* Collect the arguments in correct type from vararg list */
4929 for (argNum = 0; argNum < sizeArgNum; argNum++) {
4930 switch (values[argNum].type) {
4931 case 0:
4932 case CFFormatLiteralType:
4933 break;
4934 case CFFormatLongType:
4935 case CFFormatSingleUnicharType:
4936 if (CFFormatSize1 == values[argNum].size) {
4937 values[argNum].value.int64Value = (int64_t)(int8_t)va_arg(args, int);
4938 } else if (CFFormatSize2 == values[argNum].size) {
4939 values[argNum].value.int64Value = (int64_t)(int16_t)va_arg(args, int);
4940 } else if (CFFormatSize4 == values[argNum].size) {
4941 values[argNum].value.int64Value = (int64_t)va_arg(args, int32_t);
4942 } else if (CFFormatSize8 == values[argNum].size) {
4943 values[argNum].value.int64Value = (int64_t)va_arg(args, int64_t);
4944 } else {
4945 values[argNum].value.int64Value = (int64_t)va_arg(args, int);
4946 }
4947 break;
4948 case CFFormatDoubleType:
4949 values[argNum].value.doubleValue = va_arg(args, double);
4950 break;
4951 case CFFormatPointerType:
4952 case CFFormatObjectType:
4953 case CFFormatCFType:
4954 case CFFormatUnicharsType:
4955 case CFFormatCharsType:
4956 case CFFormatPascalCharsType:
4957 values[argNum].value.pointerValue = va_arg(args, void *);
4958 break;
4959 }
4960 }
4961 va_end(args);
4962
4963 /* Format the pieces together */
4964 for (curSpec = 0; curSpec < numSpecs; curSpec++) {
4965 SInt32 width = 0, precision = 0;
4966 UniChar *up, ch;
4967 Boolean hasWidth = false, hasPrecision = false;
4968
4969 // widthArgNum and widthArg are never set at the same time; same for precArg*
4970 if (-1 != specs[curSpec].widthArgNum) {
4971 width = (SInt32)values[specs[curSpec].widthArgNum].value.int64Value;
4972 hasWidth = true;
4973 }
4974 if (-1 != specs[curSpec].precArgNum) {
4975 precision = (SInt32)values[specs[curSpec].precArgNum].value.int64Value;
4976 hasPrecision = true;
4977 }
4978 if (-1 != specs[curSpec].widthArg) {
4979 width = specs[curSpec].widthArg;
4980 hasWidth = true;
4981 }
4982 if (-1 != specs[curSpec].precArg) {
4983 precision = specs[curSpec].precArg;
4984 hasPrecision = true;
4985 }
4986
4987 switch (specs[curSpec].type) {
4988 case CFFormatLongType:
4989 case CFFormatDoubleType:
4990 case CFFormatPointerType: {
4991 int8_t formatBuffer[128];
4992 #if defined(__GNUC__)
4993 int8_t buffer[256 + width + precision];
4994 #else
4995 int8_t stackBuffer[512];
4996 int8_t *dynamicBuffer = NULL;
4997 int8_t *buffer = stackBuffer;
4998 if (256+width+precision > 512) {
4999 dynamicBuffer = CFAllocatorAllocate(NULL, 256+width+precision, 0);
5000 buffer = dynamicBuffer;
5001 }
5002 #endif
5003 SInt32 cidx, idx, loc;
5004 Boolean appended = false;
5005 loc = specs[curSpec].loc;
5006 // In preparation to call snprintf(), copy the format string out
5007 if (cformat) {
5008 for (idx = 0, cidx = 0; cidx < specs[curSpec].len; idx++, cidx++) {
5009 if ('$' == cformat[loc + cidx]) {
5010 for (idx--; '0' <= formatBuffer[idx] && formatBuffer[idx] <= '9'; idx--);
5011 } else {
5012 formatBuffer[idx] = cformat[loc + cidx];
5013 }
5014 }
5015 } else {
5016 for (idx = 0, cidx = 0; cidx < specs[curSpec].len; idx++, cidx++) {
5017 if ('$' == uformat[loc + cidx]) {
5018 for (idx--; '0' <= formatBuffer[idx] && formatBuffer[idx] <= '9'; idx--);
5019 } else {
5020 formatBuffer[idx] = (int8_t)uformat[loc + cidx];
5021 }
5022 }
5023 }
5024 formatBuffer[idx] = '\0';
5025 // Should modify format buffer here if necessary; for example, to translate %qd to
5026 // the equivalent, on architectures which do not have %q.
5027 buffer[sizeof(buffer) - 1] = '\0';
5028 switch (specs[curSpec].type) {
5029 case CFFormatLongType:
5030 if (CFFormatSize8 == specs[curSpec].size) {
5031 SNPRINTF(int64_t, values[specs[curSpec].mainArgNum].value.int64Value)
5032 } else {
5033 SNPRINTF(SInt32, values[specs[curSpec].mainArgNum].value.int64Value)
5034 }
5035 break;
5036 case CFFormatPointerType:
5037 SNPRINTF(void *, values[specs[curSpec].mainArgNum].value.pointerValue)
5038 break;
5039
5040 case CFFormatDoubleType:
5041 SNPRINTF(double, values[specs[curSpec].mainArgNum].value.doubleValue)
5042 // See if we need to localize the decimal point
5043 if (formatOptions) { // We have a localization dictionary
5044 CFStringRef decimalSeparator = CFDictionaryGetValue(formatOptions, kCFNSDecimalSeparatorKey);
5045 if (decimalSeparator != NULL) { // We have a decimal separator in there
5046 CFIndex decimalPointLoc = 0;
5047 while (buffer[decimalPointLoc] != 0 && buffer[decimalPointLoc] != '.') decimalPointLoc++;
5048 if (buffer[decimalPointLoc] == '.') { // And we have a decimal point in the formatted string
5049 buffer[decimalPointLoc] = 0;
5050 CFStringAppendCString(outputString, buffer, __CFStringGetEightBitStringEncoding());
5051 CFStringAppend(outputString, decimalSeparator);
5052 CFStringAppendCString(outputString, buffer + decimalPointLoc + 1, __CFStringGetEightBitStringEncoding());
5053 appended = true;
5054 }
5055 }
5056 }
5057 break;
5058 }
5059 if (!appended) CFStringAppendCString(outputString, buffer, __CFStringGetEightBitStringEncoding());
5060 }
5061 #if !defined(__GNUC__)
5062 if (dynamicBuffer) {
5063 CFAllocatorDeallocate(NULL, dynamicBuffer);
5064 }
5065 #endif
5066 break;
5067 case CFFormatLiteralType:
5068 if (cformat) {
5069 __CFStringAppendBytes(outputString, cformat+specs[curSpec].loc, specs[curSpec].len, __CFStringGetEightBitStringEncoding());
5070 } else {
5071 CFStringAppendCharacters(outputString, uformat+specs[curSpec].loc, specs[curSpec].len);
5072 }
5073 break;
5074 case CFFormatPascalCharsType:
5075 case CFFormatCharsType:
5076 if (values[specs[curSpec].mainArgNum].value.pointerValue == NULL) {
5077 CFStringAppendCString(outputString, "(null)", kCFStringEncodingASCII);
5078 } else {
5079 int len;
5080 const char *str = values[specs[curSpec].mainArgNum].value.pointerValue;
5081 if (specs[curSpec].type == CFFormatPascalCharsType) { // Pascal string case
5082 len = ((unsigned char *)str)[0];
5083 str++;
5084 if (hasPrecision && precision < len) len = precision;
5085 } else { // C-string case
5086 if (!hasPrecision) { // No precision, so rely on the terminating null character
5087 len = strlen(str);
5088 } else { // Don't blindly call strlen() if there is a precision; the string might not have a terminating null (3131988)
5089 const char *terminatingNull = memchr(str, 0, precision); // Basically strlen() on only the first precision characters of str
5090 if (terminatingNull) { // There was a null in the first precision characters
5091 len = terminatingNull - str;
5092 } else {
5093 len = precision;
5094 }
5095 }
5096 }
5097 // Since the spec says the behavior of the ' ', '0', '#', and '+' flags is undefined for
5098 // '%s', and since we have ignored them in the past, the behavior is hereby cast in stone
5099 // to ignore those flags (and, say, never pad with '0' instead of space).
5100 if (specs[curSpec].flags & kCFStringFormatMinusFlag) {
5101 __CFStringAppendBytes(outputString, str, len, __CFStringGetSystemEncoding());
5102 if (hasWidth && width > len) {
5103 int w = width - len; // We need this many spaces; do it ten at a time
5104 do {__CFStringAppendBytes(outputString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
5105 }
5106 } else {
5107 if (hasWidth && width > len) {
5108 int w = width - len; // We need this many spaces; do it ten at a time
5109 do {__CFStringAppendBytes(outputString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
5110 }
5111 __CFStringAppendBytes(outputString, str, len, __CFStringGetSystemEncoding());
5112 }
5113 }
5114 break;
5115 case CFFormatSingleUnicharType:
5116 ch = values[specs[curSpec].mainArgNum].value.int64Value;
5117 CFStringAppendCharacters(outputString, &ch, 1);
5118 break;
5119 case CFFormatUnicharsType:
5120 //??? need to handle width, precision, and padding arguments
5121 up = values[specs[curSpec].mainArgNum].value.pointerValue;
5122 if (NULL == up) {
5123 CFStringAppendCString(outputString, "(null)", kCFStringEncodingASCII);
5124 } else {
5125 int len;
5126 for (len = 0; 0 != up[len]; len++);
5127 // Since the spec says the behavior of the ' ', '0', '#', and '+' flags is undefined for
5128 // '%s', and since we have ignored them in the past, the behavior is hereby cast in stone
5129 // to ignore those flags (and, say, never pad with '0' instead of space).
5130 if (hasPrecision && precision < len) len = precision;
5131 if (specs[curSpec].flags & kCFStringFormatMinusFlag) {
5132 CFStringAppendCharacters(outputString, up, len);
5133 if (hasWidth && width > len) {
5134 int w = width - len; // We need this many spaces; do it ten at a time
5135 do {__CFStringAppendBytes(outputString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
5136 }
5137 } else {
5138 if (hasWidth && width > len) {
5139 int w = width - len; // We need this many spaces; do it ten at a time
5140 do {__CFStringAppendBytes(outputString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
5141 }
5142 CFStringAppendCharacters(outputString, up, len);
5143 }
5144 }
5145 break;
5146 case CFFormatCFType:
5147 case CFFormatObjectType:
5148 if (NULL != values[specs[curSpec].mainArgNum].value.pointerValue) {
5149 CFStringRef str = NULL;
5150 if (copyDescFunc) {
5151 str = copyDescFunc(values[specs[curSpec].mainArgNum].value.pointerValue, formatOptions);
5152 } else {
5153 str = __CFCopyFormattingDescription(values[specs[curSpec].mainArgNum].value.pointerValue, formatOptions);
5154 if (NULL == str) {
5155 str = CFCopyDescription(values[specs[curSpec].mainArgNum].value.pointerValue);
5156 }
5157 }
5158 if (str) {
5159 CFStringAppend(outputString, str);
5160 CFRelease(str);
5161 } else {
5162 CFStringAppendCString(outputString, "(null description)", kCFStringEncodingASCII);
5163 }
5164 } else {
5165 CFStringAppendCString(outputString, "(null)", kCFStringEncodingASCII);
5166 }
5167 break;
5168 }
5169 }
5170
5171 if (specs != localSpecsBuffer) CFAllocatorDeallocate(tmpAlloc, specs);
5172 if (values != localValuesBuffer) CFAllocatorDeallocate(tmpAlloc, values);
5173 if (formatChars && (formatChars != localFormatBuffer)) CFAllocatorDeallocate(tmpAlloc, formatChars);
5174
5175 }
5176
5177 #undef SNPRINTF
5178
5179 void CFShowStr(CFStringRef str) {
5180 CFAllocatorRef alloc;
5181
5182 if (!str) {
5183 fprintf(stdout, "(null)\n");
5184 return;
5185 }
5186
5187 if (CF_IS_OBJC(__kCFStringTypeID, str)) {
5188 fprintf(stdout, "This is an NSString, not CFString\n");
5189 return;
5190 }
5191
5192 alloc = CFGetAllocator(str);
5193
5194 fprintf(stdout, "\nLength %d\nIsEightBit %d\n", (int)__CFStrLength(str), __CFStrIsEightBit(str));
5195 fprintf(stdout, "HasLengthByte %d\nHasNullByte %d\nInlineContents %d\n",
5196 __CFStrHasLengthByte(str), __CFStrHasNullByte(str), __CFStrIsInline(str));
5197
5198 fprintf(stdout, "Allocator ");
5199 if (alloc != kCFAllocatorSystemDefault) {
5200 fprintf(stdout, "%p\n", (void *)alloc);
5201 } else {
5202 fprintf(stdout, "SystemDefault\n");
5203 }
5204 fprintf(stdout, "Mutable %d\n", __CFStrIsMutable(str));
5205 if (!__CFStrIsMutable(str) && __CFStrHasContentsDeallocator(str)) {
5206 if (__CFStrContentsDeallocator(str)) fprintf(stdout, "ContentsDeallocatorFunc %p\n", (void *)__CFStrContentsDeallocator(str));
5207 else fprintf(stdout, "ContentsDeallocatorFunc None\n");
5208 } else if (__CFStrIsMutable(str) && __CFStrHasContentsAllocator(str)) {
5209 fprintf(stdout, "ExternalContentsAllocator %p\n", (void *)__CFStrContentsAllocator((CFMutableStringRef)str));
5210 }
5211
5212 if (__CFStrIsMutable(str)) {
5213 fprintf(stdout, "CurrentCapacity %d\n%sCapacity %d\n", (int)__CFStrCapacity(str), __CFStrIsFixed(str) ? "Fixed" : "Desired", (int)__CFStrDesiredCapacity(str));
5214 }
5215 fprintf(stdout, "Contents %p\n", (void *)__CFStrContents(str));
5216 }
5217
5218
5219