]> git.saurik.com Git - apple/cf.git/blob - String.subproj/CFString.c
CF-299.35.tar.gz
[apple/cf.git] / String.subproj / CFString.c
1 /*
2 * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
7 *
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
25 /* CFString.c
26 Copyright 1998-2002, Apple, Inc. All rights reserved.
27 Responsibility: Ali Ozer
28 */
29
30 #include <CoreFoundation/CFBase.h>
31 #include <CoreFoundation/CFString.h>
32 #include <CoreFoundation/CFDictionary.h>
33 #include "CFStringEncodingConverterExt.h"
34 #include "CFUniChar.h"
35 #include "CFUnicodeDecomposition.h"
36 #include "CFUnicodePrecomposition.h"
37 #include "CFUtilities.h"
38 #include "CFInternal.h"
39 #include <stdarg.h>
40 #include <stdio.h>
41 /* strncmp, etc */
42 #include <string.h>
43 #if defined(__MACH__) || defined(__LINUX__) || defined(__FREEBSD__)
44 #include <unistd.h>
45 #endif
46 #if defined(__WIN32__)
47 #include <windows.h>
48 #endif /* __WIN32__ */
49
50 extern size_t malloc_good_size(size_t size);
51 extern void __CFStrConvertBytesToUnicode(const uint8_t *bytes, UniChar *buffer, CFIndex numChars);
52
53 #if defined(DEBUG)
54
55 // Special allocator used by CFSTRs to catch deallocations
56 static CFAllocatorRef constantStringAllocatorForDebugging = NULL;
57
58 // We put this into C & Pascal strings if we can't convert
59 #define CONVERSIONFAILURESTR "CFString conversion failed"
60
61 // We set this to true when purging the constant string table, so CFStringDeallocate doesn't assert
62 static Boolean __CFConstantStringTableBeingFreed = false;
63
64 #endif
65
66
67 // This section is for CFString compatibility and other behaviors...
68
69 static CFOptionFlags _CFStringCompatibilityMask = 0;
70
71 #define Bug2967272 1
72
73 void _CFStringSetCompatibility(CFOptionFlags mask) {
74 _CFStringCompatibilityMask |= mask;
75 }
76
77 CF_INLINE Boolean __CFStringGetCompatibility(CFOptionFlags mask) {
78 return (_CFStringCompatibilityMask & mask) == mask;
79 }
80
81
82
83 // Two constant strings used by CFString; these are initialized in CFStringInitialize
84 CONST_STRING_DECL(kCFEmptyString, "")
85 CONST_STRING_DECL(kCFNSDecimalSeparatorKey, "NSDecimalSeparator")
86
87
88 /* !!! Never do sizeof(CFString); the union is here just to make it easier to access some fields.
89 */
90 struct __CFString {
91 CFRuntimeBase base;
92 union { // In many cases the allocated structs are smaller than these
93 struct {
94 SInt32 length;
95 } inline1;
96
97 struct {
98 void *buffer;
99 UInt32 length;
100 CFAllocatorRef contentsDeallocator; // Just the dealloc func is used
101 } externalImmutable1;
102 struct {
103 void *buffer;
104 CFAllocatorRef contentsDeallocator; // Just the dealloc func is used
105 } externalImmutable2;
106 struct {
107 void *buffer;
108 UInt32 length;
109 UInt32 capacityFields; // Currently only stores capacity
110 UInt32 gapEtc; // Stores some bits, plus desired or fixed capacity
111 CFAllocatorRef contentsAllocator; // Optional
112 } externalMutable;
113 } variants;
114 };
115
116 /*
117 I = is immutable
118 E = not inline contents
119 U = is Unicode
120 N = has NULL byte
121 L = has length byte
122 D = explicit deallocator for contents (for mutable objects, allocator)
123 X = is external mutable
124
125 Also need (only for mutable)
126 F = is fixed
127 G = has gap
128 Cap, DesCap = capacity
129
130 B7 B6 B5 B4 B3 B2 B1 B0
131 U N L X I
132
133 B6 B5
134 0 0 inline contents
135 0 1 E (freed with default allocator)
136 1 0 E (not freed)
137 1 1 E D
138 */
139
140 enum {
141 __kCFFreeContentsWhenDoneMask = 0x020,
142 __kCFFreeContentsWhenDone = 0x020,
143 __kCFContentsMask = 0x060,
144 __kCFHasInlineData = 0x000,
145 __kCFHasExternalDataNoFree = 0x040, // Don't free
146 __kCFHasExternalDataDefaultFree = 0x020, // Use allocator's free function
147 __kCFHasExternalDataCustomFree = 0x060, // Use a specially provided free function
148 __kCFHasContentsAllocatorMask = 0x060,
149 __kCFHasContentsAllocator = 0x060, // (For mutable strings) use a specially provided allocator
150 __kCFHasContentsDeallocatorMask = 0x060,
151 __kCFHasContentsDeallocator = 0x060,
152 __kCFIsMutableMask = 0x01,
153 __kCFIsMutable = 0x01,
154 __kCFIsUnicodeMask = 0x10,
155 __kCFIsUnicode = 0x10,
156 __kCFHasNullByteMask = 0x08,
157 __kCFHasNullByte = 0x08,
158 __kCFHasLengthByteMask = 0x04,
159 __kCFHasLengthByte = 0x04,
160 __kCFIsExternalMutableMask = 0x02, // For now we use this bit; can switch to something else
161 __kCFIsExternalMutable = 0x02,
162 // These are in variants.externalMutable.gapEtc
163 __kCFGapMask = 0x00ffffff,
164 __kCFGapBitNumber = 24,
165 __kCFDesiredCapacityMask = 0x00ffffff, // Currently gap and fixed share same bits as gap not implemented
166 __kCFDesiredCapacityBitNumber = 24,
167 __kCFIsFixedMask = 0x80000000,
168 __kCFIsFixed = 0x80000000,
169 __kCFHasGapMask = 0x40000000,
170 __kCFHasGap = 0x40000000,
171 __kCFCapacityProvidedExternallyMask = 0x20000000, // Set if the external buffer is set explicitly by the developer
172 __kCFCapacityProvidedExternally = 0x20000000
173 };
174
175
176 // !!! Assumptions:
177 // Mutable strings are not inline
178 // Compile-time constant strings are not inline
179 // Mutable strings always have explicit length (but they might also have length byte and null byte)
180 // If there is an explicit length, always use that instead of the length byte (length byte is useful for quickly returning pascal strings)
181 // Never look at the length byte for the length; use __CFStrLength or __CFStrLength2
182
183 /* The following set of functions and macros need to be updated on change to the bit configuration
184 */
185 CF_INLINE Boolean __CFStrIsMutable(CFStringRef str) {return (str->base._info & __kCFIsMutableMask) == __kCFIsMutable;}
186 CF_INLINE Boolean __CFStrIsExternalMutable(CFStringRef str) {return (str->base._info & __kCFIsExternalMutableMask) == __kCFIsExternalMutable;}
187 CF_INLINE Boolean __CFStrIsInline(CFStringRef str) {return (str->base._info & __kCFContentsMask) == __kCFHasInlineData;}
188 CF_INLINE Boolean __CFStrFreeContentsWhenDone(CFStringRef str) {return (str->base._info & __kCFFreeContentsWhenDoneMask) == __kCFFreeContentsWhenDone;}
189 CF_INLINE Boolean __CFStrHasContentsDeallocator(CFStringRef str) {return (str->base._info & __kCFHasContentsDeallocatorMask) == __kCFHasContentsDeallocator;}
190 CF_INLINE Boolean __CFStrIsUnicode(CFStringRef str) {return (str->base._info & __kCFIsUnicodeMask) == __kCFIsUnicode;}
191 CF_INLINE Boolean __CFStrIsEightBit(CFStringRef str) {return (str->base._info & __kCFIsUnicodeMask) != __kCFIsUnicode;}
192 CF_INLINE Boolean __CFStrHasNullByte(CFStringRef str) {return (str->base._info & __kCFHasNullByteMask) == __kCFHasNullByte;}
193 CF_INLINE Boolean __CFStrHasLengthByte(CFStringRef str) {return (str->base._info & __kCFHasLengthByteMask) == __kCFHasLengthByte;}
194 CF_INLINE Boolean __CFStrHasExplicitLength(CFStringRef str) {return (str->base._info & (__kCFIsMutableMask | __kCFHasLengthByteMask)) != __kCFHasLengthByte;} // Has explicit length if (1) mutable or (2) not mutable and no length byte
195
196 CF_INLINE SInt32 __CFStrSkipAnyLengthByte(CFStringRef str) {return ((str->base._info & __kCFHasLengthByteMask) == __kCFHasLengthByte) ? 1 : 0;} // Number of bytes to skip over the length byte in the contents
197
198 /* Returns ptr to the buffer (which might include the length byte)
199 */
200 CF_INLINE const void *__CFStrContents(CFStringRef str) {
201 if (__CFStrIsInline(str)) {
202 return (const void *)(((UInt32)&(str->variants)) + (__CFStrHasExplicitLength(str) ? sizeof(UInt32) : 0));
203 } else { // External; pointer is always word 2
204 return str->variants.externalImmutable1.buffer;
205 }
206 }
207
208 static CFAllocatorRef *__CFStrContentsDeallocatorPtr(CFStringRef str) {
209 return __CFStrHasExplicitLength(str) ? &(((CFMutableStringRef)str)->variants.externalImmutable1.contentsDeallocator) : &(((CFMutableStringRef)str)->variants.externalImmutable2.contentsDeallocator); }
210
211 // Assumption: Called with immutable strings only, and on strings that are known to have a contentsDeallocator
212 CF_INLINE CFAllocatorRef __CFStrContentsDeallocator(CFStringRef str) {
213 return *__CFStrContentsDeallocatorPtr(str);
214 }
215
216 // Assumption: Called with immutable strings only, and on strings that are known to have a contentsDeallocator
217 CF_INLINE void __CFStrSetContentsDeallocator(CFStringRef str, CFAllocatorRef contentsAllocator) {
218 *__CFStrContentsDeallocatorPtr(str) = contentsAllocator;
219 }
220
221 static CFAllocatorRef *__CFStrContentsAllocatorPtr(CFStringRef str) {
222 CFAssert(!__CFStrIsInline(str), __kCFLogAssertion, "Asking for contents allocator of inline string");
223 CFAssert(__CFStrIsMutable(str), __kCFLogAssertion, "Asking for contents allocator of an immutable string");
224 return (CFAllocatorRef *)&(str->variants.externalMutable.contentsAllocator);
225 }
226
227 // Assumption: Called with strings that have a contents allocator; also, contents allocator follows custom
228 CF_INLINE CFAllocatorRef __CFStrContentsAllocator(CFMutableStringRef str) {
229 return *(__CFStrContentsAllocatorPtr(str));
230 }
231
232 // Assumption: Called with strings that have a contents allocator; also, contents allocator follows custom
233 CF_INLINE void __CFStrSetContentsAllocator(CFMutableStringRef str, CFAllocatorRef alloc) {
234 *(__CFStrContentsAllocatorPtr(str)) = alloc;
235 }
236
237 /* Returns length; use __CFStrLength2 if contents buffer pointer has already been computed.
238 */
239 CF_INLINE CFIndex __CFStrLength(CFStringRef str) {
240 if (__CFStrHasExplicitLength(str)) {
241 if (__CFStrIsInline(str)) {
242 return str->variants.inline1.length;
243 } else {
244 CFIndex len = str->variants.externalImmutable1.length;
245 if (len == 0x0ffffff) ((CFMutableStringRef)str)->variants.externalImmutable1.length = (len = strlen(__CFStrContents(str))); /* For compile-time constant strings */
246 return len;
247 }
248 } else {
249 return (CFIndex)(*((uint8_t *)__CFStrContents(str)));
250 }
251 }
252
253 CF_INLINE CFIndex __CFStrLength2(CFStringRef str, const void *buffer) {
254 if (__CFStrHasExplicitLength(str)) {
255 if (__CFStrIsInline(str)) {
256 return str->variants.inline1.length;
257 } else {
258 CFIndex len = str->variants.externalImmutable1.length;
259 if (len == 0x0ffffff) ((CFMutableStringRef)str)->variants.externalImmutable1.length = (len = strlen(buffer)); /* For compile-time constant strings */
260 return len;
261 }
262 } else {
263 return (CFIndex)(*((uint8_t *)buffer));
264 }
265 }
266
267 Boolean __CFStringIsMutable(CFStringRef str) {
268 return __CFStrIsMutable(str);
269 }
270
271 Boolean __CFStringIsEightBit(CFStringRef str) {
272 return __CFStrIsEightBit(str);
273 }
274
275 /* Sets the external content pointer for immutable or mutable strings.
276 */
277 CF_INLINE void __CFStrSetContentPtr(CFStringRef str, const void *p) {((CFMutableStringRef)str)->variants.externalImmutable1.buffer = (void *)p;}
278 CF_INLINE void __CFStrSetInfoBits(CFStringRef str, UInt32 v) {__CFBitfieldSetValue(((CFMutableStringRef)str)->base._info, 6, 0, v);}
279
280 CF_INLINE void __CFStrSetExplicitLength(CFStringRef str, CFIndex v) {
281 if (__CFStrIsInline(str)) {
282 ((CFMutableStringRef)str)->variants.inline1.length = v;
283 } else {
284 ((CFMutableStringRef)str)->variants.externalImmutable1.length = v;
285 }
286 }
287
288 // Assumption: Called with mutable strings only
289 CF_INLINE Boolean __CFStrIsFixed(CFStringRef str) {return (str->variants.externalMutable.gapEtc & __kCFIsFixedMask) == __kCFIsFixed;}
290 CF_INLINE Boolean __CFStrHasContentsAllocator(CFStringRef str) {return (str->base._info & __kCFHasContentsAllocatorMask) == __kCFHasContentsAllocator;}
291
292 // If capacity is provided externally, we only change it when we need to grow beyond it
293 CF_INLINE Boolean __CFStrCapacityProvidedExternally(CFStringRef str) {return (str->variants.externalMutable.gapEtc & __kCFCapacityProvidedExternallyMask) == __kCFCapacityProvidedExternally;}
294 CF_INLINE void __CFStrSetCapacityProvidedExternally(CFMutableStringRef str) {str->variants.externalMutable.gapEtc |= __kCFCapacityProvidedExternally;}
295 CF_INLINE void __CFStrClearCapacityProvidedExternally(CFMutableStringRef str) {str->variants.externalMutable.gapEtc &= ~__kCFCapacityProvidedExternally;}
296
297
298 CF_INLINE void __CFStrSetIsFixed(CFMutableStringRef str) {str->variants.externalMutable.gapEtc |= __kCFIsFixed;}
299 CF_INLINE void __CFStrSetHasGap(CFMutableStringRef str) {str->variants.externalMutable.gapEtc |= __kCFHasGap;}
300 CF_INLINE void __CFStrSetUnicode(CFMutableStringRef str) {str->base._info |= __kCFIsUnicode;}
301 CF_INLINE void __CFStrClearUnicode(CFMutableStringRef str) {str->base._info &= ~__kCFIsUnicode;}
302 CF_INLINE void __CFStrSetHasLengthAndNullBytes(CFMutableStringRef str) {str->base._info |= (__kCFHasLengthByte | __kCFHasNullByte);}
303 CF_INLINE void __CFStrClearHasLengthAndNullBytes(CFMutableStringRef str) {str->base._info &= ~(__kCFHasLengthByte | __kCFHasNullByte);}
304
305
306 static void *__CFStrAllocateMutableContents(CFMutableStringRef str, CFIndex size) {
307 void *ptr;
308 CFAllocatorRef alloc = (__CFStrHasContentsAllocator(str)) ? __CFStrContentsAllocator(str) : __CFGetAllocator(str);
309 ptr = CFAllocatorAllocate(alloc, size, 0);
310 if (__CFOASafe) __CFSetLastAllocationEventName(ptr, "CFString (store)");
311 return ptr;
312 }
313
314 static void __CFStrDeallocateMutableContents(CFMutableStringRef str, void *buffer) {
315 CFAllocatorRef alloc = (__CFStrHasContentsAllocator(str)) ? __CFStrContentsAllocator(str) : __CFGetAllocator(str);
316 CFAllocatorDeallocate(alloc, buffer);
317 }
318
319
320 // The following set of functions should only be called on mutable strings
321
322 /* "Capacity" is stored in number of bytes, not characters. It indicates the total number of bytes in the contents buffer.
323 "Desired capacity" is in number of characters; it is the client requested capacity; if fixed, it is the upper bound on the mutable string backing store.
324 */
325 CF_INLINE CFIndex __CFStrCapacity(CFStringRef str) {return str->variants.externalMutable.capacityFields;}
326 CF_INLINE void __CFStrSetCapacity(CFMutableStringRef str, CFIndex cap) {str->variants.externalMutable.capacityFields = cap;}
327 CF_INLINE CFIndex __CFStrDesiredCapacity(CFStringRef str) {return __CFBitfieldGetValue(str->variants.externalMutable.gapEtc, __kCFDesiredCapacityBitNumber, 0);}
328 CF_INLINE void __CFStrSetDesiredCapacity(CFMutableStringRef str, CFIndex size) {__CFBitfieldSetValue(str->variants.externalMutable.gapEtc, __kCFDesiredCapacityBitNumber, 0, size);}
329
330
331
332
333 /* CFString specific init flags
334 Note that you cannot count on the external buffer not being copied.
335 Also, if you specify an external buffer, you should not change it behind the CFString's back.
336 */
337 enum {
338 __kCFThinUnicodeIfPossible = 0x1000000, /* See if the Unicode contents can be thinned down to 8-bit */
339 kCFStringPascal = 0x10000, /* Indicating that the string data has a Pascal string structure (length byte at start) */
340 kCFStringNoCopyProvidedContents = 0x20000, /* Don't copy the provided string contents if possible; free it when no longer needed */
341 kCFStringNoCopyNoFreeProvidedContents = 0x30000 /* Don't copy the provided string contents if possible; don't free it when no longer needed */
342 };
343
344 /* Size for temporary buffers
345 */
346 #define MAXTMPBUFFERLEN (2048)
347 #define MAXISDECOMPBUFFERLEN (32)
348
349 /* System Encoding.
350 */
351 static CFStringEncoding __CFDefaultSystemEncoding = kCFStringEncodingInvalidId;
352 static CFStringEncoding __CFDefaultFileSystemEncoding = kCFStringEncodingInvalidId;
353 CFStringEncoding __CFDefaultEightBitStringEncoding = kCFStringEncodingInvalidId;
354
355 CFStringEncoding CFStringGetSystemEncoding(void) {
356
357 if (__CFDefaultSystemEncoding == kCFStringEncodingInvalidId) {
358 const CFStringEncodingConverter *converter = NULL;
359 #if defined(__MACOS8__) || defined(__MACH__)
360 __CFDefaultSystemEncoding = kCFStringEncodingMacRoman; // MacRoman is built-in so always available
361 #elif defined(__WIN32__)
362 __CFDefaultSystemEncoding = kCFStringEncodingWindowsLatin1; // WinLatin1 is built-in so always available
363 #elif defined(__LINUX__) || defined(__FREEBSD__)
364 __CFDefaultSystemEncoding = kCFStringEncodingISOLatin1; // a reasonable default
365 #else // Solaris && HP-UX ?
366 __CFDefaultSystemEncoding = kCFStringEncodingISOLatin1; // a reasonable default
367 #endif
368 converter = CFStringEncodingGetConverter(__CFDefaultSystemEncoding);
369
370 __CFSetCharToUniCharFunc(converter->encodingClass == kCFStringEncodingConverterCheapEightBit ? converter->toUnicode : NULL);
371 }
372
373 return __CFDefaultSystemEncoding;
374 }
375
376 // Fast version for internal use
377
378 CF_INLINE CFStringEncoding __CFStringGetSystemEncoding(void) {
379 if (__CFDefaultSystemEncoding == kCFStringEncodingInvalidId) (void)CFStringGetSystemEncoding();
380 return __CFDefaultSystemEncoding;
381 }
382
383 CFStringEncoding CFStringFileSystemEncoding(void) {
384 if (__CFDefaultFileSystemEncoding == kCFStringEncodingInvalidId) {
385 #if defined(__MACH__)
386 __CFDefaultFileSystemEncoding = kCFStringEncodingUTF8;
387 #else
388 __CFDefaultFileSystemEncoding = CFStringGetSystemEncoding();
389 #endif
390 }
391
392 return __CFDefaultFileSystemEncoding;
393 }
394
395 /* ??? Is returning length when no other answer is available the right thing?
396 */
397 CFIndex CFStringGetMaximumSizeForEncoding(CFIndex length, CFStringEncoding encoding) {
398 if (encoding == kCFStringEncodingUTF8) {
399 return _CFExecutableLinkedOnOrAfter(CFSystemVersionPanther) ? (length * 3) : (length * 6); // 1 Unichar could expand to 3 bytes; we return 6 for older apps for compatibility
400 } else {
401 encoding &= 0xFFF; // Mask off non-base part
402 }
403 switch (encoding) {
404 case kCFStringEncodingUnicode:
405 return length * sizeof(UniChar);
406
407 case kCFStringEncodingNonLossyASCII:
408 return length * 6; // 1 Unichar could expand to 6 bytes
409
410 case kCFStringEncodingMacRoman:
411 case kCFStringEncodingWindowsLatin1:
412 case kCFStringEncodingISOLatin1:
413 case kCFStringEncodingNextStepLatin:
414 case kCFStringEncodingASCII:
415 return length / sizeof(uint8_t);
416
417 default:
418 return length / sizeof(uint8_t);
419 }
420 }
421
422
423 /* Returns whether the indicated encoding can be stored in 8-bit chars
424 */
425 CF_INLINE Boolean __CFStrEncodingCanBeStoredInEightBit(CFStringEncoding encoding) {
426 switch (encoding) {
427 case kCFStringEncodingInvalidId:
428 case kCFStringEncodingUnicode:
429 case kCFStringEncodingUTF8:
430 case kCFStringEncodingNonLossyASCII:
431 return false;
432
433 case kCFStringEncodingMacRoman:
434 case kCFStringEncodingWindowsLatin1:
435 case kCFStringEncodingISOLatin1:
436 case kCFStringEncodingNextStepLatin:
437 case kCFStringEncodingASCII:
438 return true;
439
440 default: return false;
441 }
442 }
443
444 /* Returns the encoding used in eight bit CFStrings (can't be any encoding which isn't 1-to-1 with Unicode)
445 ??? Perhaps only ASCII fits the bill due to Unicode decomposition.
446 */
447 CFStringEncoding __CFStringComputeEightBitStringEncoding(void) {
448 if (__CFDefaultEightBitStringEncoding == kCFStringEncodingInvalidId) {
449 CFStringEncoding systemEncoding = CFStringGetSystemEncoding();
450 if (systemEncoding == kCFStringEncodingInvalidId) { // We're right in the middle of querying system encoding from default database. Delaying to set until system encoding is determined.
451 return kCFStringEncodingASCII;
452 } else if (__CFStrEncodingCanBeStoredInEightBit(systemEncoding)) {
453 __CFDefaultEightBitStringEncoding = systemEncoding;
454 } else {
455 __CFDefaultEightBitStringEncoding = kCFStringEncodingASCII;
456 }
457 }
458
459 return __CFDefaultEightBitStringEncoding;
460 }
461
462 /* Returns whether the provided bytes can be stored in ASCII
463 */
464 CF_INLINE Boolean __CFBytesInASCII(const uint8_t *bytes, CFIndex len) {
465 while (len--) if ((uint8_t)(*bytes++) >= 128) return false;
466 return true;
467 }
468
469 /* Returns whether the provided 8-bit string in the specified encoding can be stored in an 8-bit CFString.
470 */
471 CF_INLINE Boolean __CFCanUseEightBitCFStringForBytes(const uint8_t *bytes, CFIndex len, CFStringEncoding encoding) {
472 if (encoding == __CFStringGetEightBitStringEncoding()) return true;
473 if (__CFStringEncodingIsSupersetOfASCII(encoding) && __CFBytesInASCII(bytes, len)) return true;
474 return false;
475 }
476
477
478 /* Returns whether a length byte can be tacked on to a string of the indicated length.
479 */
480 CF_INLINE Boolean __CFCanUseLengthByte(CFIndex len) {
481 #define __kCFMaxPascalStrLen 255
482 return (len <= __kCFMaxPascalStrLen) ? true : false;
483 }
484
485 /* Various string assertions
486 */
487 #define __CFAssertIsString(cf) __CFGenericValidateType(cf, __kCFStringTypeID)
488 #define __CFAssertIndexIsInStringBounds(cf, idx) CFAssert3((idx) >= 0 && (idx) < __CFStrLength(cf), __kCFLogAssertion, "%s(): string index %d out of bounds (length %d)", __PRETTY_FUNCTION__, idx, __CFStrLength(cf))
489 #define __CFAssertRangeIsInStringBounds(cf, idx, count) CFAssert4((idx) >= 0 && (idx + count) <= __CFStrLength(cf), __kCFLogAssertion, "%s(): string range %d,%d out of bounds (length %d)", __PRETTY_FUNCTION__, idx, count, __CFStrLength(cf))
490 #define __CFAssertLengthIsOK(len) CFAssert2(len < __kCFMaxLength, __kCFLogAssertion, "%s(): length %d too large", __PRETTY_FUNCTION__, len)
491 #define __CFAssertIsStringAndMutable(cf) {__CFGenericValidateType(cf, __kCFStringTypeID); CFAssert1(__CFStrIsMutable(cf), __kCFLogAssertion, "%s(): string not mutable", __PRETTY_FUNCTION__);}
492 #define __CFAssertIsStringAndExternalMutable(cf) {__CFGenericValidateType(cf, __kCFStringTypeID); CFAssert1(__CFStrIsExternalMutable(cf), __kCFLogAssertion, "%s(): string not external mutable", __PRETTY_FUNCTION__);}
493 #define __CFAssertIsNotNegative(idx) CFAssert2(idx >= 0, __kCFLogAssertion, "%s(): index %d is negative", __PRETTY_FUNCTION__, idx)
494 #define __CFAssertIfFixedLengthIsOK(cf, reqLen) CFAssert2(!__CFStrIsFixed(cf) || (reqLen <= __CFStrDesiredCapacity(cf)), __kCFLogAssertion, "%s(): length %d too large", __PRETTY_FUNCTION__, reqLen)
495
496
497 /* Basic algorithm is to shrink memory when capacity is SHRINKFACTOR times the required capacity or to allocate memory when the capacity is less than GROWFACTOR times the required capacity.
498 Additional complications are applied in the following order:
499 - desiredCapacity, which is the minimum (except initially things can be at zero)
500 - rounding up to factor of 8
501 - compressing (to fit the number if 16 bits), which effectively rounds up to factor of 256
502 */
503 #define SHRINKFACTOR(c) (c / 2)
504 #define GROWFACTOR(c) ((c * 3 + 1) / 2)
505
506 CF_INLINE CFIndex __CFStrNewCapacity(CFMutableStringRef str, CFIndex reqCapacity, CFIndex capacity, Boolean leaveExtraRoom, CFIndex charSize) {
507 if (capacity != 0 || reqCapacity != 0) { /* If initially zero, and space not needed, leave it at that... */
508 if ((capacity < reqCapacity) || /* We definitely need the room... */
509 (!__CFStrCapacityProvidedExternally(str) && /* Assuming we control the capacity... */
510 ((reqCapacity < SHRINKFACTOR(capacity)) || /* ...we have too much room! */
511 (!leaveExtraRoom && (reqCapacity < capacity))))) { /* ...we need to eliminate the extra space... */
512 CFIndex newCapacity = leaveExtraRoom ? GROWFACTOR(reqCapacity) : reqCapacity; /* Grow by 3/2 if extra room is desired */
513 CFIndex desiredCapacity = __CFStrDesiredCapacity(str) * charSize;
514 if (newCapacity < desiredCapacity) { /* If less than desired, bump up to desired */
515 newCapacity = desiredCapacity;
516 } else if (__CFStrIsFixed(str)) { /* Otherwise, if fixed, no need to go above the desired (fixed) capacity */
517 newCapacity = __CFMax(desiredCapacity, reqCapacity); /* !!! So, fixed is not really fixed, but "tight" */
518 }
519 if (__CFStrHasContentsAllocator(str)) { /* Also apply any preferred size from the allocator; should we do something for */
520 newCapacity = CFAllocatorGetPreferredSizeForSize(__CFStrContentsAllocator(str), newCapacity, 0);
521 } else {
522 newCapacity = malloc_good_size(newCapacity);
523 }
524 return newCapacity; // If packing: __CFStrUnpackNumber(__CFStrPackNumber(newCapacity));
525 }
526 }
527 return capacity;
528 }
529
530
531 /* rearrangeBlocks() rearranges the blocks of data within the buffer so that they are "evenly spaced". buffer is assumed to have enough room for the result.
532 numBlocks is current total number of blocks within buffer.
533 blockSize is the size of each block in bytes
534 ranges and numRanges hold the ranges that are no longer needed; ranges are stored sorted in increasing order, and don't overlap
535 insertLength is the final spacing between the remaining blocks
536
537 Example: buffer = A B C D E F G H, blockSize = 1, ranges = { (2,1) , (4,2) } (so we want to "delete" C and E F), fromEnd = NO
538 if insertLength = 4, result = A B ? ? ? ? D ? ? ? ? G H
539 if insertLength = 0, result = A B D G H
540
541 Example: buffer = A B C D E F G H I J K L M N O P Q R S T U, blockSize = 1, ranges { (1,1), (3,1), (5,11), (17,1), (19,1) }, fromEnd = NO
542 if insertLength = 3, result = A ? ? ? C ? ? ? E ? ? ? Q ? ? ? S ? ? ? U
543
544 */
545 typedef struct _CFStringDeferredRange {
546 int beginning;
547 int length;
548 int shift;
549 } CFStringDeferredRange;
550
551 typedef struct _CFStringStackInfo {
552 int capacity; // Capacity (if capacity == count, need to realloc to add another)
553 int count; // Number of elements actually stored
554 CFStringDeferredRange *stack;
555 Boolean hasMalloced; // Indicates "stack" is allocated and needs to be deallocated when done
556 char _padding[3];
557 } CFStringStackInfo;
558
559 CF_INLINE void pop (CFStringStackInfo *si, CFStringDeferredRange *topRange) {
560 si->count = si->count - 1;
561 *topRange = si->stack[si->count];
562 }
563
564 CF_INLINE void push (CFStringStackInfo *si, const CFStringDeferredRange *newRange) {
565 if (si->count == si->capacity) {
566 // increase size of the stack
567 si->capacity = (si->capacity + 4) * 2;
568 if (si->hasMalloced) {
569 si->stack = CFAllocatorReallocate(NULL, si->stack, si->capacity * sizeof(CFStringDeferredRange), 0);
570 } else {
571 CFStringDeferredRange *newStack = (CFStringDeferredRange *)CFAllocatorAllocate(NULL, si->capacity * sizeof(CFStringDeferredRange), 0);
572 memmove(newStack, si->stack, si->count * sizeof(CFStringDeferredRange));
573 si->stack = newStack;
574 si->hasMalloced = true;
575 }
576 }
577 si->stack[si->count] = *newRange;
578 si->count = si->count + 1;
579 }
580
581 static void rearrangeBlocks(
582 uint8_t *buffer,
583 CFIndex numBlocks,
584 CFIndex blockSize,
585 const CFRange *ranges,
586 CFIndex numRanges,
587 CFIndex insertLength) {
588
589 #define origStackSize 10
590 CFStringDeferredRange origStack[origStackSize];
591 CFStringStackInfo si = {origStackSize, 0, origStack, false, {0, 0, 0}};
592 CFStringDeferredRange currentNonRange = {0, 0, 0};
593 int currentRange = 0;
594 int amountShifted = 0;
595
596 // must have at least 1 range left.
597
598 while (currentRange < numRanges) {
599 currentNonRange.beginning = (ranges[currentRange].location + ranges[currentRange].length) * blockSize;
600 if ((numRanges - currentRange) == 1) {
601 // at the end.
602 currentNonRange.length = numBlocks * blockSize - currentNonRange.beginning;
603 if (currentNonRange.length == 0) break;
604 } else {
605 currentNonRange.length = (ranges[currentRange + 1].location * blockSize) - currentNonRange.beginning;
606 }
607 currentNonRange.shift = amountShifted + (insertLength * blockSize) - (ranges[currentRange].length * blockSize);
608 amountShifted = currentNonRange.shift;
609 if (amountShifted <= 0) {
610 // process current item and rest of stack
611 if (currentNonRange.shift && currentNonRange.length) memmove (&buffer[currentNonRange.beginning + currentNonRange.shift], &buffer[currentNonRange.beginning], currentNonRange.length);
612 while (si.count > 0) {
613 pop (&si, &currentNonRange); // currentNonRange now equals the top element of the stack.
614 if (currentNonRange.shift && currentNonRange.length) memmove (&buffer[currentNonRange.beginning + currentNonRange.shift], &buffer[currentNonRange.beginning], currentNonRange.length);
615 }
616 } else {
617 // add currentNonRange to stack.
618 push (&si, &currentNonRange);
619 }
620 currentRange++;
621 }
622
623 // no more ranges. if anything is on the stack, process.
624
625 while (si.count > 0) {
626 pop (&si, &currentNonRange); // currentNonRange now equals the top element of the stack.
627 if (currentNonRange.shift && currentNonRange.length) memmove (&buffer[currentNonRange.beginning + currentNonRange.shift], &buffer[currentNonRange.beginning], currentNonRange.length);
628 }
629 if (si.hasMalloced) CFAllocatorDeallocate (NULL, si.stack);
630 }
631
632 /* See comments for rearrangeBlocks(); this is the same, but the string is assembled in another buffer (dstBuffer), so the algorithm is much easier. We also take care of the case where the source is not-Unicode but destination is. (The reverse case is not supported.)
633 */
634 static void copyBlocks(
635 const uint8_t *srcBuffer,
636 uint8_t *dstBuffer,
637 CFIndex srcLength,
638 Boolean srcIsUnicode,
639 Boolean dstIsUnicode,
640 const CFRange *ranges,
641 CFIndex numRanges,
642 CFIndex insertLength) {
643
644 CFIndex srcLocationInBytes = 0; // in order to avoid multiplying all the time, this is in terms of bytes, not blocks
645 CFIndex dstLocationInBytes = 0; // ditto
646 CFIndex srcBlockSize = srcIsUnicode ? sizeof(UniChar) : sizeof(uint8_t);
647 CFIndex insertLengthInBytes = insertLength * (dstIsUnicode ? sizeof(UniChar) : sizeof(uint8_t));
648 CFIndex rangeIndex = 0;
649 CFIndex srcToDstMultiplier = (srcIsUnicode == dstIsUnicode) ? 1 : (sizeof(UniChar) / sizeof(uint8_t));
650
651 // Loop over the ranges, copying the range to be preserved (right before each range)
652 while (rangeIndex < numRanges) {
653 CFIndex srcLengthInBytes = ranges[rangeIndex].location * srcBlockSize - srcLocationInBytes; // srcLengthInBytes is in terms of bytes, not blocks; represents length of region to be preserved
654 if (srcLengthInBytes > 0) {
655 if (srcIsUnicode == dstIsUnicode) {
656 memmove(dstBuffer + dstLocationInBytes, srcBuffer + srcLocationInBytes, srcLengthInBytes);
657 } else {
658 __CFStrConvertBytesToUnicode(srcBuffer + srcLocationInBytes, (UniChar *)(dstBuffer + dstLocationInBytes), srcLengthInBytes);
659 }
660 }
661 srcLocationInBytes += srcLengthInBytes + ranges[rangeIndex].length * srcBlockSize; // Skip over the just-copied and to-be-deleted stuff
662 dstLocationInBytes += srcLengthInBytes * srcToDstMultiplier + insertLengthInBytes;
663 rangeIndex++;
664 }
665
666 // Do last range (the one beyond last range)
667 if (srcLocationInBytes < srcLength * srcBlockSize) {
668 if (srcIsUnicode == dstIsUnicode) {
669 memmove(dstBuffer + dstLocationInBytes, srcBuffer + srcLocationInBytes, srcLength * srcBlockSize - srcLocationInBytes);
670 } else {
671 __CFStrConvertBytesToUnicode(srcBuffer + srcLocationInBytes, (UniChar *)(dstBuffer + dstLocationInBytes), srcLength * srcBlockSize - srcLocationInBytes);
672 }
673 }
674 }
675
676
677 /* Reallocates the backing store of the string to accomodate the new length. Space is reserved or characters are deleted as indicated by insertLength and the ranges in deleteRanges. The length is updated to reflect the new state. Will also maintain a length byte and a null byte in 8-bit strings. If length cannot fit in length byte, the space will still be reserved, but will be 0. (Hence the reason the length byte should never be looked at as length unless there is no explicit length.)
678 */
679 static void __CFStringChangeSizeMultiple(CFMutableStringRef str, const CFRange *deleteRanges, CFIndex numDeleteRanges, CFIndex insertLength, Boolean makeUnicode) {
680 const uint8_t *curContents = __CFStrContents(str);
681 CFIndex curLength = curContents ? __CFStrLength2(str, curContents) : 0;
682 CFIndex newLength;
683
684 // Compute new length of the string
685 if (numDeleteRanges == 1) {
686 newLength = curLength + insertLength - deleteRanges[0].length;
687 } else {
688 int cnt;
689 newLength = curLength + insertLength * numDeleteRanges;
690 for (cnt = 0; cnt < numDeleteRanges; cnt++) newLength -= deleteRanges[cnt].length;
691 }
692
693 __CFAssertIfFixedLengthIsOK(str, newLength);
694
695 if (newLength == 0) {
696 // An somewhat optimized code-path for this special case, with the following implicit values:
697 // newIsUnicode = false
698 // useLengthAndNullBytes = false
699 // newCharSize = sizeof(uint8_t)
700 // If the newCapacity happens to be the same as the old, we don't free the buffer; otherwise we just free it totally
701 // instead of doing a potentially useless reallocation (as the needed capacity later might turn out to be different anyway)
702 CFIndex curCapacity = __CFStrCapacity(str);
703 CFIndex newCapacity = __CFStrNewCapacity(str, 0, curCapacity, true, sizeof(uint8_t));
704 if (newCapacity != curCapacity) { // If we're reallocing anyway (larger or smaller --- larger could happen if desired capacity was changed in the meantime), let's just free it all
705 if (curContents) __CFStrDeallocateMutableContents(str, (uint8_t *)curContents);
706 __CFStrSetContentPtr(str, NULL);
707 __CFStrSetCapacity(str, 0);
708 __CFStrClearCapacityProvidedExternally(str);
709 __CFStrClearHasLengthAndNullBytes(str);
710 if (!__CFStrIsExternalMutable(str)) __CFStrClearUnicode(str); // External mutable implies Unicode
711 } else {
712 if (!__CFStrIsExternalMutable(str)) {
713 __CFStrClearUnicode(str);
714 if (curCapacity >= (int)(sizeof(uint8_t) * 2)) { // If there's room
715 __CFStrSetHasLengthAndNullBytes(str);
716 ((uint8_t *)curContents)[0] = ((uint8_t *)curContents)[1] = 0;
717 } else {
718 __CFStrClearHasLengthAndNullBytes(str);
719 }
720 }
721 }
722 __CFStrSetExplicitLength(str, 0);
723 } else { /* This else-clause assumes newLength > 0 */
724 Boolean oldIsUnicode = __CFStrIsUnicode(str);
725 Boolean newIsUnicode = makeUnicode || (oldIsUnicode /* && (newLength > 0) - implicit */ ) || __CFStrIsExternalMutable(str);
726 CFIndex newCharSize = newIsUnicode ? sizeof(UniChar) : sizeof(uint8_t);
727 Boolean useLengthAndNullBytes = !newIsUnicode /* && (newLength > 0) - implicit */;
728 CFIndex numExtraBytes = useLengthAndNullBytes ? 2 : 0; /* 2 extra bytes to keep the length byte & null... */
729 CFIndex curCapacity = __CFStrCapacity(str);
730 CFIndex newCapacity = __CFStrNewCapacity(str, newLength * newCharSize + numExtraBytes, curCapacity, true, newCharSize);
731 Boolean allocNewBuffer = (newCapacity != curCapacity) || (curLength > 0 && !oldIsUnicode && newIsUnicode); /* We alloc new buffer if oldIsUnicode != newIsUnicode because the contents have to be copied */
732 uint8_t *newContents = allocNewBuffer ? __CFStrAllocateMutableContents(str, newCapacity) : (uint8_t *)curContents;
733 Boolean hasLengthAndNullBytes = __CFStrHasLengthByte(str);
734
735 CFAssert1(hasLengthAndNullBytes == __CFStrHasNullByte(str), __kCFLogAssertion, "%s(): Invalid state in 8-bit string", __PRETTY_FUNCTION__);
736
737 if (hasLengthAndNullBytes) curContents++;
738 if (useLengthAndNullBytes) newContents++;
739
740 if (curContents) {
741 if (oldIsUnicode == newIsUnicode) {
742 if (newContents == curContents) {
743 rearrangeBlocks(newContents, curLength, newCharSize, deleteRanges, numDeleteRanges, insertLength);
744 } else {
745 copyBlocks(curContents, newContents, curLength, oldIsUnicode, newIsUnicode, deleteRanges, numDeleteRanges, insertLength);
746 }
747 } else if (newIsUnicode) { /* this implies we have a new buffer */
748 copyBlocks(curContents, newContents, curLength, oldIsUnicode, newIsUnicode, deleteRanges, numDeleteRanges, insertLength);
749 }
750 if (hasLengthAndNullBytes) curContents--; /* Undo the damage from above */
751 if (allocNewBuffer) __CFStrDeallocateMutableContents(str, (void *)curContents);
752 }
753
754 if (!newIsUnicode) {
755 if (useLengthAndNullBytes) {
756 newContents[newLength] = 0; /* Always have null byte, if not unicode */
757 newContents--; /* Undo the damage from above */
758 newContents[0] = __CFCanUseLengthByte(newLength) ? (uint8_t)newLength : 0;
759 if (!hasLengthAndNullBytes) __CFStrSetHasLengthAndNullBytes(str);
760 } else {
761 if (hasLengthAndNullBytes) __CFStrClearHasLengthAndNullBytes(str);
762 }
763 if (oldIsUnicode) __CFStrClearUnicode(str);
764 } else { // New is unicode...
765 if (!oldIsUnicode) __CFStrSetUnicode(str);
766 if (hasLengthAndNullBytes) __CFStrClearHasLengthAndNullBytes(str);
767 }
768 __CFStrSetExplicitLength(str, newLength);
769
770 if (allocNewBuffer) {
771 __CFStrSetCapacity(str, newCapacity);
772 __CFStrClearCapacityProvidedExternally(str);
773 __CFStrSetContentPtr(str, newContents);
774 }
775 }
776 }
777
778 /* Same as above, but takes one range (very common case)
779 */
780 CF_INLINE void __CFStringChangeSize(CFMutableStringRef str, CFRange range, CFIndex insertLength, Boolean makeUnicode) {
781 __CFStringChangeSizeMultiple(str, &range, 1, insertLength, makeUnicode);
782 }
783
784
785 static void __CFStringDeallocate(CFTypeRef cf) {
786 CFStringRef str = cf;
787
788 // constantStringAllocatorForDebugging is not around unless DEBUG is defined, but neither is CFAssert2()...
789 CFAssert1(__CFConstantStringTableBeingFreed || CFGetAllocator(str) != constantStringAllocatorForDebugging, __kCFLogAssertion, "Tried to deallocate CFSTR(\"%@\")", str);
790
791 if (!__CFStrIsInline(str)) {
792 uint8_t *contents;
793 Boolean mutable = __CFStrIsMutable(str);
794 if (__CFStrFreeContentsWhenDone(str) && (contents = (uint8_t *)__CFStrContents(str))) {
795 if (mutable) {
796 __CFStrDeallocateMutableContents((CFMutableStringRef)str, contents);
797 } else {
798 if (__CFStrHasContentsDeallocator(str)) {
799 CFAllocatorRef contentsDeallocator = __CFStrContentsDeallocator(str);
800 CFAllocatorDeallocate(contentsDeallocator, contents);
801 CFRelease(contentsDeallocator);
802 } else {
803 CFAllocatorRef alloc = __CFGetAllocator(str);
804 CFAllocatorDeallocate(alloc, contents);
805 }
806 }
807 }
808 if (mutable && __CFStrHasContentsAllocator(str)) CFRelease(__CFStrContentsAllocator((CFMutableStringRef)str));
809 }
810 }
811
812 static Boolean __CFStringEqual(CFTypeRef cf1, CFTypeRef cf2) {
813 CFStringRef str1 = cf1;
814 CFStringRef str2 = cf2;
815 const uint8_t *contents1;
816 const uint8_t *contents2;
817 CFIndex len1;
818
819 /* !!! We do not need IsString assertions, as the CFBase runtime assures this */
820 /* !!! We do not need == test, as the CFBase runtime assures this */
821
822 contents1 = __CFStrContents(str1);
823 contents2 = __CFStrContents(str2);
824 len1 = __CFStrLength2(str1, contents1);
825
826 if (len1 != __CFStrLength2(str2, contents2)) return false;
827
828 contents1 += __CFStrSkipAnyLengthByte(str1);
829 contents2 += __CFStrSkipAnyLengthByte(str2);
830
831 if (__CFStrIsEightBit(str1) && __CFStrIsEightBit(str2)) {
832 return memcmp((const char *)contents1, (const char *)contents2, len1) ? false : true;
833 } else if (__CFStrIsEightBit(str1)) { /* One string has Unicode contents */
834 CFStringInlineBuffer buf;
835 CFIndex buf_idx = 0;
836
837 CFStringInitInlineBuffer(str1, &buf, CFRangeMake(0, len1));
838 for (buf_idx = 0; buf_idx < len1; buf_idx++) {
839 if (__CFStringGetCharacterFromInlineBufferQuick(&buf, buf_idx) != ((UniChar *)contents2)[buf_idx]) return false;
840 }
841 } else if (__CFStrIsEightBit(str2)) { /* One string has Unicode contents */
842 CFStringInlineBuffer buf;
843 CFIndex buf_idx = 0;
844
845 CFStringInitInlineBuffer(str2, &buf, CFRangeMake(0, len1));
846 for (buf_idx = 0; buf_idx < len1; buf_idx++) {
847 if (__CFStringGetCharacterFromInlineBufferQuick(&buf, buf_idx) != ((UniChar *)contents1)[buf_idx]) return false;
848 }
849 } else { /* Both strings have Unicode contents */
850 CFIndex idx;
851 for (idx = 0; idx < len1; idx++) {
852 if (((UniChar *)contents1)[idx] != ((UniChar *)contents2)[idx]) return false;
853 }
854 }
855 return true;
856 }
857
858
859 /* String hashing: Should give the same results whatever the encoding; so we hash UniChars.
860 If the length is less than or equal to 16, then the hash function is simply the
861 following (n is the nth UniChar character, starting from 0):
862
863 hash(-1) = length
864 hash(n) = hash(n-1) * 257 + unichar(n);
865 Hash = hash(length-1) * ((length & 31) + 1)
866
867 If the length is greater than 16, then the above algorithm applies to
868 characters 0..7 and length-8..length-1; thus the first and last 8 characters.
869 */
870 CFHashCode __CFStringHash(CFTypeRef cf) {
871 CFStringRef str = cf;
872 const uint8_t *contents;
873 CFIndex len;
874 CFIndex cnt;
875 UInt32 result;
876
877 /* !!! We do not need an IsString assertion here, as this is called by the CFBase runtime only */
878
879 contents = __CFStrContents(str);
880 len = __CFStrLength2(str, contents);
881 result = len;
882 if (__CFStrIsEightBit(str)) {
883 contents += __CFStrSkipAnyLengthByte(str);
884 if (len <= 16) {
885 for (cnt = 0; cnt < len; cnt++) result = result * 257 + __CFCharToUniCharTable[contents[cnt]];
886 } else {
887 for (cnt = 0; cnt < 8; cnt++) result = result * 257 + __CFCharToUniCharTable[contents[cnt]];
888 for (cnt = len - 8; cnt < len; cnt++) result = result * 257 + __CFCharToUniCharTable[contents[cnt]];
889 }
890 #if defined(DEBUG)
891 if (!__CFCharToUniCharFunc) { // A little sanity verification: If this is not set, trying to hash high byte chars would be a bad idea
892 Boolean err = false;
893 if (len <= 16) {
894 for (cnt = 0; cnt < len; cnt++) if (contents[cnt] >= 128) err = true;
895 } else {
896 for (cnt = 0; cnt < 8; cnt++) if (contents[cnt] >= 128) err = true;
897 for (cnt = len - 8; cnt < len; cnt++) if (contents[cnt] >= 128) err = true;
898 }
899 if (err) {
900 // Can't do log here, as it might be too early
901 printf("Warning: CFHash() attempting to hash CFString containing high bytes before properly initialized to do so\n");
902 }
903 }
904 #endif
905 } else {
906 const UniChar *uContents = (UniChar *)contents;
907 if (len <= 16) {
908 for (cnt = 0; cnt < len; cnt++) result = result * 257 + uContents[cnt];
909 } else {
910 for (cnt = 0; cnt < 8; cnt++) result = result * 257 + uContents[cnt];
911 for (cnt = len - 8; cnt < len; cnt++) result = result * 257 + uContents[cnt];
912 }
913 }
914 result += (result << (len & 31));
915 return result;
916 }
917
918
919 static CFStringRef __CFStringCopyDescription(CFTypeRef cf) {
920 return CFStringCreateWithFormat(kCFAllocatorDefault, NULL, CFSTR("<CFString %p [%p]>{contents = \"%@\"}"), cf, __CFGetAllocator(cf), cf);
921 }
922
923 static CFStringRef __CFStringCopyFormattingDescription(CFTypeRef cf, CFDictionaryRef formatOptions) {
924 return CFStringCreateCopy(__CFGetAllocator(cf), cf);
925 }
926
927 static CFTypeID __kCFStringTypeID = _kCFRuntimeNotATypeID;
928
929 static const CFRuntimeClass __CFStringClass = {
930 0,
931 "CFString",
932 NULL, // init
933 (void *)CFStringCreateCopy,
934 __CFStringDeallocate,
935 __CFStringEqual,
936 __CFStringHash,
937 __CFStringCopyFormattingDescription,
938 __CFStringCopyDescription
939 };
940
941 __private_extern__ void __CFStringInitialize(void) {
942 __kCFStringTypeID = _CFRuntimeRegisterClass(&__CFStringClass);
943 }
944
945 CFTypeID CFStringGetTypeID(void) {
946 return __kCFStringTypeID;
947 }
948
949
950 static Boolean CFStrIsUnicode(CFStringRef str) {
951 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, Boolean, str, "_encodingCantBeStoredInEightBitCFString");
952 return __CFStrIsUnicode(str);
953 }
954
955
956
957 #define ALLOCATORSFREEFUNC ((void *)-1)
958
959 /* contentsDeallocator indicates how to free the data if it's noCopy == true:
960 kCFAllocatorNull: don't free
961 ALLOCATORSFREEFUNC: free with main allocator's free func (don't pass in the real func ptr here)
962 NULL: default allocator
963 otherwise it's the allocator that should be used (it will be explicitly stored)
964 if noCopy == false, then freeFunc should be ALLOCATORSFREEFUNC
965 hasLengthByte, hasNullByte: refers to bytes; used only if encoding != Unicode
966 possiblyExternalFormat indicates that the bytes might have BOM and be swapped
967 tryToReduceUnicode means that the Unicode should be checked to see if it contains just ASCII (and reduce it if so)
968 numBytes contains the actual number of bytes in "bytes", including Length byte,
969 BUT not the NULL byte at the end
970 bytes should not contain BOM characters
971 !!! Various flags should be combined to reduce number of arguments, if possible
972 */
973 __private_extern__ CFStringRef __CFStringCreateImmutableFunnel3(
974 CFAllocatorRef alloc, const void *bytes, CFIndex numBytes, CFStringEncoding encoding,
975 Boolean possiblyExternalFormat, Boolean tryToReduceUnicode, Boolean hasLengthByte, Boolean hasNullByte, Boolean noCopy,
976 CFAllocatorRef contentsDeallocator, UInt32 converterFlags) {
977
978 CFMutableStringRef str;
979 CFVarWidthCharBuffer vBuf;
980 CFIndex size;
981 Boolean useLengthByte = false;
982 Boolean useNullByte = false;
983 Boolean useInlineData = false;
984
985 if (alloc == NULL) alloc = __CFGetDefaultAllocator();
986
987 if (contentsDeallocator == ALLOCATORSFREEFUNC) {
988 contentsDeallocator = alloc;
989 } else if (contentsDeallocator == NULL) {
990 contentsDeallocator = __CFGetDefaultAllocator();
991 }
992
993 if ((NULL != kCFEmptyString) && (numBytes == 0) && (alloc == kCFAllocatorSystemDefault)) { // If we are using the system default allocator, and the string is empty, then use the empty string!
994 if (noCopy && (contentsDeallocator != kCFAllocatorNull)) { // See 2365208... This change was done after Sonata; before we didn't free the bytes at all (leak).
995 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes);
996 }
997 return CFRetain(kCFEmptyString); // Quick exit; won't catch all empty strings, but most
998 }
999
1000 // At this point, contentsDeallocator is either same as alloc, or kCFAllocatorNull, or something else, but not NULL
1001
1002 vBuf.shouldFreeChars = false; // We use this to remember to free the buffer possibly allocated by decode
1003
1004 // First check to see if the data needs to be converted...
1005 // ??? We could be more efficient here and in some cases (Unicode data) eliminate a copy
1006
1007 if ((encoding == kCFStringEncodingUnicode && possiblyExternalFormat) || (encoding != kCFStringEncodingUnicode && !__CFCanUseEightBitCFStringForBytes(bytes, numBytes, encoding))) {
1008 const void *realBytes = (uint8_t*) bytes + (hasLengthByte ? 1 : 0);
1009 CFIndex realNumBytes = numBytes - (hasLengthByte ? 1 : 0);
1010 Boolean usingPassedInMemory = false;
1011
1012 vBuf.allocator = __CFGetDefaultAllocator(); // We don't want to use client's allocator for temp stuff
1013 vBuf.chars.unicode = NULL; // This will cause the decode function to allocate memory if necessary
1014
1015 if (!__CFStringDecodeByteStream3(realBytes, realNumBytes, encoding, false, &vBuf, &usingPassedInMemory, converterFlags)) {
1016 return NULL; // !!! Is this acceptable failure mode?
1017 }
1018
1019 encoding = vBuf.isASCII ? kCFStringEncodingASCII : kCFStringEncodingUnicode;
1020
1021 if (!usingPassedInMemory) {
1022
1023 // Make the parameters fit the new situation
1024 numBytes = vBuf.isASCII ? vBuf.numChars : (vBuf.numChars * sizeof(UniChar));
1025 hasLengthByte = hasNullByte = false;
1026
1027 // Get rid of the original buffer if its not being used
1028 if (noCopy && contentsDeallocator != kCFAllocatorNull) {
1029 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes);
1030 }
1031 contentsDeallocator = alloc; // At this point we are using the string's allocator, as the original buffer is gone...
1032
1033 // See if we can reuse any storage the decode func might have allocated
1034 // We do this only for Unicode, as otherwise we would not have NULL and Length bytes
1035
1036 if (vBuf.shouldFreeChars && (alloc == vBuf.allocator) && encoding == kCFStringEncodingUnicode) {
1037 vBuf.shouldFreeChars = false; // Transferring ownership to the CFString
1038 bytes = CFAllocatorReallocate(vBuf.allocator, (void *)vBuf.chars.unicode, numBytes, 0); // Tighten up the storage
1039 noCopy = true;
1040 } else {
1041 bytes = vBuf.chars.unicode;
1042 noCopy = false; // Can't do noCopy anymore
1043 // If vBuf.shouldFreeChars is true, the buffer will be freed as intended near the end of this func
1044 }
1045
1046 }
1047
1048 // At this point, all necessary input arguments have been changed to reflect the new state
1049
1050 } else if (encoding == kCFStringEncodingUnicode && tryToReduceUnicode) { // Check to see if we can reduce Unicode to ASCII
1051 CFIndex cnt;
1052 CFIndex len = numBytes / sizeof(UniChar);
1053 Boolean allASCII = true;
1054
1055 for (cnt = 0; cnt < len; cnt++) if (((const UniChar *)bytes)[cnt] > 127) {
1056 allASCII = false;
1057 break;
1058 }
1059
1060 if (allASCII) { // Yes we can!
1061 uint8_t *ptr, *mem;
1062 hasLengthByte = __CFCanUseLengthByte(len);
1063 hasNullByte = true;
1064 numBytes = (len + 1 + (hasLengthByte ? 1 : 0)) * sizeof(uint8_t); // NULL and possible length byte
1065 // See if we can use that temporary local buffer in vBuf...
1066 mem = ptr = (uint8_t *)((numBytes >= __kCFVarWidthLocalBufferSize) ? CFAllocatorAllocate(alloc, numBytes, 0) : vBuf.localBuffer);
1067 if (mem != vBuf.localBuffer && __CFOASafe) __CFSetLastAllocationEventName(mem, "CFString (store)");
1068 if (hasLengthByte) *ptr++ = len;
1069 for (cnt = 0; cnt < len; cnt++) ptr[cnt] = ((const UniChar *)bytes)[cnt];
1070 ptr[len] = 0;
1071 if (noCopy && contentsDeallocator != kCFAllocatorNull) {
1072 CFAllocatorDeallocate(contentsDeallocator, (void *)bytes);
1073 }
1074 bytes = mem;
1075 encoding = kCFStringEncodingASCII;
1076 contentsDeallocator = alloc; // At this point we are using the string's allocator, as the original buffer is gone...
1077 noCopy = (numBytes >= __kCFVarWidthLocalBufferSize); // If we had to allocate it, make sure it's kept around
1078 numBytes--; // Should not contain the NULL byte at end...
1079 }
1080
1081 // At this point, all necessary input arguments have been changed to reflect the new state
1082 }
1083
1084 // Now determine the necessary size
1085
1086 if (noCopy) {
1087
1088 size = sizeof(void *); // Pointer to the buffer
1089 if (contentsDeallocator != alloc && contentsDeallocator != kCFAllocatorNull) {
1090 size += sizeof(void *); // The contentsDeallocator
1091 }
1092 if (!hasLengthByte) size += sizeof(SInt32); // Explicit length
1093 useLengthByte = hasLengthByte;
1094 useNullByte = hasNullByte;
1095
1096 } else { // Inline data; reserve space for it
1097
1098 useInlineData = true;
1099 size = numBytes;
1100
1101 if (hasLengthByte || (encoding != kCFStringEncodingUnicode && __CFCanUseLengthByte(numBytes))) {
1102 useLengthByte = true;
1103 if (!hasLengthByte) size += 1;
1104 } else {
1105 size += sizeof(SInt32); // Explicit length
1106 }
1107 if (hasNullByte || encoding != kCFStringEncodingUnicode) {
1108 useNullByte = true;
1109 size += 1;
1110 }
1111 }
1112
1113 // Finally, allocate!
1114
1115 str = (CFMutableStringRef)_CFRuntimeCreateInstance(alloc, __kCFStringTypeID, size, NULL);
1116 if (__CFOASafe) __CFSetLastAllocationEventName(str, "CFString (immutable)");
1117
1118 __CFStrSetInfoBits(str,
1119 (useInlineData ? __kCFHasInlineData : (contentsDeallocator == alloc ? __kCFHasExternalDataDefaultFree : (contentsDeallocator == kCFAllocatorNull ? __kCFHasExternalDataNoFree : __kCFHasExternalDataCustomFree))) |
1120 ((encoding == kCFStringEncodingUnicode) ? __kCFIsUnicode : 0) |
1121 (useNullByte ? __kCFHasNullByte : 0) |
1122 (useLengthByte ? __kCFHasLengthByte : 0));
1123
1124 if (!useLengthByte) {
1125 CFIndex length = numBytes - (hasLengthByte ? 1 : 0);
1126 if (encoding == kCFStringEncodingUnicode) length /= sizeof(UniChar);
1127 __CFStrSetExplicitLength(str, length);
1128 }
1129
1130 if (useInlineData) {
1131 uint8_t *contents = (uint8_t *)__CFStrContents(str);
1132 if (useLengthByte && !hasLengthByte) *contents++ = numBytes;
1133 memmove(contents, bytes, numBytes);
1134 if (useNullByte) contents[numBytes] = 0;
1135 } else {
1136 __CFStrSetContentPtr(str, bytes);
1137 if (contentsDeallocator != alloc && contentsDeallocator != kCFAllocatorNull) __CFStrSetContentsDeallocator(str, CFRetain(contentsDeallocator));
1138 }
1139 if (vBuf.shouldFreeChars) CFAllocatorDeallocate(vBuf.allocator, (void *)bytes);
1140
1141 return str;
1142 }
1143
1144 /* !!! __CFStringCreateImmutableFunnel2() is kept around for compatibility; it should be deprecated
1145 */
1146 CFStringRef __CFStringCreateImmutableFunnel2(
1147 CFAllocatorRef alloc, const void *bytes, CFIndex numBytes, CFStringEncoding encoding,
1148 Boolean possiblyExternalFormat, Boolean tryToReduceUnicode, Boolean hasLengthByte, Boolean hasNullByte, Boolean noCopy,
1149 CFAllocatorRef contentsDeallocator) {
1150 return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, possiblyExternalFormat, tryToReduceUnicode, hasLengthByte, hasNullByte, noCopy, contentsDeallocator, 0);
1151 }
1152
1153
1154
1155 CFStringRef CFStringCreateWithPascalString(CFAllocatorRef alloc, ConstStringPtr pStr, CFStringEncoding encoding) {
1156 CFIndex len = (CFIndex)(*(uint8_t *)pStr);
1157 return __CFStringCreateImmutableFunnel3(alloc, pStr, len+1, encoding, false, false, true, false, false, ALLOCATORSFREEFUNC, 0);
1158 }
1159
1160
1161 CFStringRef CFStringCreateWithCString(CFAllocatorRef alloc, const char *cStr, CFStringEncoding encoding) {
1162 CFIndex len = strlen(cStr);
1163 return __CFStringCreateImmutableFunnel3(alloc, cStr, len, encoding, false, false, false, true, false, ALLOCATORSFREEFUNC, 0);
1164 }
1165
1166 CFStringRef CFStringCreateWithPascalStringNoCopy(CFAllocatorRef alloc, ConstStringPtr pStr, CFStringEncoding encoding, CFAllocatorRef contentsDeallocator) {
1167 CFIndex len = (CFIndex)(*(uint8_t *)pStr);
1168 return __CFStringCreateImmutableFunnel3(alloc, pStr, len+1, encoding, false, false, true, false, true, contentsDeallocator, 0);
1169 }
1170
1171
1172 CFStringRef CFStringCreateWithCStringNoCopy(CFAllocatorRef alloc, const char *cStr, CFStringEncoding encoding, CFAllocatorRef contentsDeallocator) {
1173 CFIndex len = strlen(cStr);
1174 return __CFStringCreateImmutableFunnel3(alloc, cStr, len, encoding, false, false, false, true, true, contentsDeallocator, 0);
1175 }
1176
1177
1178 CFStringRef CFStringCreateWithCharacters(CFAllocatorRef alloc, const UniChar *chars, CFIndex numChars) {
1179 return __CFStringCreateImmutableFunnel3(alloc, chars, numChars * sizeof(UniChar), kCFStringEncodingUnicode, false, true, false, false, false, ALLOCATORSFREEFUNC, 0);
1180 }
1181
1182
1183 CFStringRef CFStringCreateWithCharactersNoCopy(CFAllocatorRef alloc, const UniChar *chars, CFIndex numChars, CFAllocatorRef contentsDeallocator) {
1184 return __CFStringCreateImmutableFunnel3(alloc, chars, numChars * sizeof(UniChar), kCFStringEncodingUnicode, false, false, false, false, true, contentsDeallocator, 0);
1185 }
1186
1187
1188 CFStringRef CFStringCreateWithBytes(CFAllocatorRef alloc, const uint8_t *bytes, CFIndex numBytes, CFStringEncoding encoding, Boolean externalFormat) {
1189 return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, externalFormat, true, false, false, false, ALLOCATORSFREEFUNC, 0);
1190 }
1191
1192 CFStringRef _CFStringCreateWithBytesNoCopy(CFAllocatorRef alloc, const uint8_t *bytes, CFIndex numBytes, CFStringEncoding encoding, Boolean externalFormat, CFAllocatorRef contentsDeallocator) {
1193 return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, externalFormat, true, false, false, true, contentsDeallocator, 0);
1194 }
1195
1196 CFStringRef CFStringCreateWithFormatAndArguments(CFAllocatorRef alloc, CFDictionaryRef formatOptions, CFStringRef format, va_list arguments) {
1197 return _CFStringCreateWithFormatAndArgumentsAux(alloc, NULL, formatOptions, format, arguments);
1198 }
1199
1200 CFStringRef _CFStringCreateWithFormatAndArgumentsAux(CFAllocatorRef alloc, CFStringRef (*copyDescFunc)(void *, CFDictionaryRef), CFDictionaryRef formatOptions, CFStringRef format, va_list arguments) {
1201 CFStringRef str;
1202 CFMutableStringRef outputString = CFStringCreateMutable(__CFGetDefaultAllocator(), 0); //should use alloc if no copy/release
1203 __CFStrSetDesiredCapacity(outputString, 120); // Given this will be tightened later, choosing a larger working string is fine
1204 _CFStringAppendFormatAndArgumentsAux(outputString, copyDescFunc, formatOptions, format, arguments);
1205 // ??? copy/release should not be necessary here -- just make immutable, compress if possible
1206 // (However, this does make the string inline, and cause the supplied allocator to be used...)
1207 str = CFStringCreateCopy(alloc, outputString);
1208 CFRelease(outputString);
1209 return str;
1210 }
1211
1212 CFStringRef CFStringCreateWithFormat(CFAllocatorRef alloc, CFDictionaryRef formatOptions, CFStringRef format, ...) {
1213 CFStringRef result;
1214 va_list argList;
1215
1216 va_start(argList, format);
1217 result = CFStringCreateWithFormatAndArguments(alloc, formatOptions, format, argList);
1218 va_end(argList);
1219
1220 return result;
1221 }
1222
1223
1224 CFStringRef CFStringCreateWithSubstring(CFAllocatorRef alloc, CFStringRef str, CFRange range) {
1225 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, CFStringRef , str, "_createSubstringWithRange:", CFRangeMake(range.location, range.length));
1226
1227 __CFAssertIsString(str);
1228 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
1229
1230 if ((range.location == 0) && (range.length == __CFStrLength(str))) { /* The substring is the whole string... */
1231 return CFStringCreateCopy(alloc, str);
1232 } else if (__CFStrIsEightBit(str)) {
1233 const uint8_t *contents = __CFStrContents(str);
1234 return __CFStringCreateImmutableFunnel3(alloc, contents + range.location + __CFStrSkipAnyLengthByte(str), range.length, __CFStringGetEightBitStringEncoding(), false, false, false, false, false, ALLOCATORSFREEFUNC, 0);
1235 } else {
1236 const UniChar *contents = __CFStrContents(str);
1237 return __CFStringCreateImmutableFunnel3(alloc, contents + range.location, range.length * sizeof(UniChar), kCFStringEncodingUnicode, false, true, false, false, false, ALLOCATORSFREEFUNC, 0);
1238 }
1239 }
1240
1241 CFStringRef CFStringCreateCopy(CFAllocatorRef alloc, CFStringRef str) {
1242 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, CFStringRef, str, "copy");
1243
1244 __CFAssertIsString(str);
1245 if (!__CFStrIsMutable(str) && // If the string is not mutable
1246 ((alloc ? alloc : __CFGetDefaultAllocator()) == __CFGetAllocator(str)) && // and it has the same allocator as the one we're using
1247 (__CFStrIsInline(str) || __CFStrFreeContentsWhenDone(str) || str->base._rc == 0)) { // and the characters are inline, or are owned by the string, or the string is constant
1248 CFRetain(str); // Then just retain instead of making a true copy
1249 return str;
1250 }
1251 if (__CFStrIsEightBit(str)) {
1252 const uint8_t *contents = __CFStrContents(str);
1253 return __CFStringCreateImmutableFunnel3(alloc, contents + __CFStrSkipAnyLengthByte(str), __CFStrLength2(str, contents), __CFStringGetEightBitStringEncoding(), false, false, false, false, false, ALLOCATORSFREEFUNC, 0);
1254 } else {
1255 const UniChar *contents = __CFStrContents(str);
1256 return __CFStringCreateImmutableFunnel3(alloc, contents, __CFStrLength2(str, contents) * sizeof(UniChar), kCFStringEncodingUnicode, false, true, false, false, false, ALLOCATORSFREEFUNC, 0);
1257 }
1258 }
1259
1260
1261
1262 /*** Constant string stuff... ***/
1263
1264 static CFMutableDictionaryRef constantStringTable = NULL;
1265
1266 /* For now we call a function to create a constant string and keep previously created constant strings in a dictionary. The keys are the 8-bit constant C-strings from the compiler; the values are the CFStrings created for them.
1267 */
1268
1269 static CFStringRef __cStrCopyDescription(const void *ptr) {
1270 return CFStringCreateWithCStringNoCopy(NULL, (const char *)ptr, __CFStringGetEightBitStringEncoding(), kCFAllocatorNull);
1271 }
1272
1273 static Boolean __cStrEqual(const void *ptr1, const void *ptr2) {
1274 return (strcmp((const char *)ptr1, (const char *)ptr2) == 0);
1275 }
1276
1277 static CFHashCode __cStrHash(const void *ptr) {
1278 // It doesn't quite matter if we convert to Unicode correctly, as long as we do it consistently
1279 const unsigned char *cStr = (const unsigned char *)ptr;
1280 CFIndex len = strlen(cStr);
1281 CFHashCode result = 0;
1282 if (len <= 4) { // All chars
1283 unsigned cnt = len;
1284 while (cnt--) result += (result << 8) + *cStr++;
1285 } else { // First and last 2 chars
1286 result += (result << 8) + cStr[0];
1287 result += (result << 8) + cStr[1];
1288 result += (result << 8) + cStr[len-2];
1289 result += (result << 8) + cStr[len-1];
1290 }
1291 result += (result << (len & 31));
1292 return result;
1293 }
1294
1295 #if defined(DEBUG)
1296 /* We use a special allocator (which simply calls through to the default) for constant strings so that we can catch them being freed...
1297 */
1298 static void *csRealloc(void *oPtr, CFIndex size, CFOptionFlags hint, void *info) {
1299 return CFAllocatorReallocate(NULL, oPtr, size, hint);
1300 }
1301
1302 static void *csAlloc(CFIndex size, CFOptionFlags hint, void *info) {
1303 return CFAllocatorAllocate(NULL, size, hint);
1304 }
1305
1306 static void csDealloc(void *ptr, void *info) {
1307 CFAllocatorDeallocate(NULL, ptr);
1308 }
1309
1310 static CFStringRef csCopyDescription(const void *info) {
1311 return CFRetain(CFSTR("Debug allocator for CFSTRs"));
1312 }
1313 #endif
1314
1315 static CFSpinLock_t _CFSTRLock = 0;
1316
1317 CFStringRef __CFStringMakeConstantString(const char *cStr) {
1318 CFStringRef result;
1319 if (constantStringTable == NULL) {
1320 CFDictionaryKeyCallBacks constantStringCallBacks = {0, NULL, NULL, __cStrCopyDescription, __cStrEqual, __cStrHash};
1321 constantStringTable = CFDictionaryCreateMutable(NULL, 0, &constantStringCallBacks, &kCFTypeDictionaryValueCallBacks);
1322 _CFDictionarySetCapacity(constantStringTable, 2500); // avoid lots of rehashing
1323 #if defined(DEBUG)
1324 {
1325 CFAllocatorContext context = {0, NULL, NULL, NULL, csCopyDescription, csAlloc, csRealloc, csDealloc, NULL};
1326 constantStringAllocatorForDebugging = CFAllocatorCreate(NULL, &context);
1327 }
1328 #else
1329 #define constantStringAllocatorForDebugging NULL
1330 #endif
1331 }
1332
1333 __CFSpinLock(&_CFSTRLock);
1334 if ((result = (CFStringRef)CFDictionaryGetValue(constantStringTable, cStr))) {
1335 __CFSpinUnlock(&_CFSTRLock);
1336 } else {
1337 __CFSpinUnlock(&_CFSTRLock);
1338
1339 {
1340 #if 0
1341 // This #if treats non-7 bit chars in CFSTR() as MacOSRoman, for backward compatibility
1342 char *key;
1343 Boolean isASCII = true;
1344 //#warning Ali: Code to verify CFSTRs active, should be disabled before ship
1345 const unsigned char *tmp = cStr;
1346 while (*tmp) {
1347 if (*tmp++ > 127) {
1348 isASCII = false;
1349 break;
1350 }
1351 }
1352
1353 if (isASCII) result = CFStringCreateWithCString(constantStringAllocatorForDebugging, cStr, kCFStringEncodingASCII);
1354 if (result == NULL) {
1355 const char *log;
1356 result = CFStringCreateWithCString(constantStringAllocatorForDebugging, cStr, kCFStringEncodingUTF8);
1357 if (result == NULL) {
1358 result = CFStringCreateWithCString(constantStringAllocatorForDebugging, cStr, kCFStringEncodingMacRoman);
1359 if (result == NULL) {
1360 log = "that are not UTF-8, crashing";
1361 } else {
1362 log = "that are not UTF-8; treating as Mac OS Roman for now. FIX THIS!";
1363 }
1364 } else {
1365 log = "that seem to be UTF-8; please VERIFY";
1366 }
1367 {
1368 const unsigned char *tmp = cStr;
1369 CFMutableStringRef ms = CFStringCreateMutable(NULL, 0);
1370 while (*tmp) {
1371 CFStringAppendFormat(ms, NULL, (*tmp > 127) ? CFSTR("\\%3o") : CFSTR("%1c"), *tmp);
1372 tmp++;
1373 }
1374 CFLog(__kCFLogAssertion, CFSTR("CFSTR(\"%@\") has non-7 bit chars %s"), ms, log);
1375 CFRelease(ms);
1376 if (result == NULL) HALT;
1377 }
1378 }
1379 #else
1380 // This #else treats non-7 bit chars in CFSTR() as UTF8 first, and if that doesn't work, as MacOSRoman, for compatibility
1381 char *key;
1382 Boolean isASCII = true;
1383 //#warning Ali: Code to verify CFSTRs active, should be disabled before ship
1384 const unsigned char *tmp = cStr;
1385 while (*tmp) {
1386 if (*tmp++ > 127) {
1387 isASCII = false;
1388 break;
1389 }
1390 }
1391 if (!isASCII) {
1392 CFMutableStringRef ms = CFStringCreateMutable(NULL, 0);
1393 tmp = cStr;
1394 while (*tmp) {
1395 CFStringAppendFormat(ms, NULL, (*tmp > 127) ? CFSTR("\\%3o") : CFSTR("%1c"), *tmp);
1396 tmp++;
1397 }
1398 CFLog(0, CFSTR("WARNING: CFSTR(\"%@\") has non-7 bit chars, interpreting using MacOS Roman encoding for now, but this will change. Please eliminate usages of non-7 bit chars (including escaped characters above \\177 octal) in CFSTR()."), ms);
1399 CFRelease(ms);
1400 }
1401 result = CFStringCreateWithCString(constantStringAllocatorForDebugging, cStr, kCFStringEncodingMacRoman);
1402 if (result == NULL) {
1403 CFLog(__kCFLogAssertion, CFSTR("Can't interpret CFSTR() as MacOS Roman, crashing"));
1404 HALT;
1405 }
1406 #endif
1407 if (__CFOASafe) __CFSetLastAllocationEventName((void *)result, "CFString (CFSTR)");
1408 if (__CFStrIsEightBit(result)) {
1409 key = (char *)__CFStrContents(result) + __CFStrSkipAnyLengthByte(result);
1410 } else { // For some reason the string is not 8-bit!
1411 key = CFAllocatorAllocate(NULL, strlen(cStr) + 1, 0);
1412 if (__CFOASafe) __CFSetLastAllocationEventName((void *)key, "CFString (CFSTR key)");
1413 strcpy(key, cStr); // !!! We will leak this, if the string is removed from the table (or table is freed)
1414 }
1415
1416 {
1417 #if !defined(DEBUG)
1418 CFStringRef resultToBeReleased = result;
1419 #endif
1420 CFIndex count;
1421 __CFSpinLock(&_CFSTRLock);
1422 count = CFDictionaryGetCount(constantStringTable);
1423 CFDictionaryAddValue(constantStringTable, key, result);
1424 if (CFDictionaryGetCount(constantStringTable) == count) { // add did nothing, someone already put it there
1425 result = (CFStringRef)CFDictionaryGetValue(constantStringTable, key);
1426 }
1427 __CFSpinUnlock(&_CFSTRLock);
1428 #if !defined(DEBUG)
1429 // Can't release this in the DEBUG case; will get assertion failure
1430 CFRelease(resultToBeReleased);
1431 #endif
1432 }
1433 }
1434 }
1435 return result;
1436 }
1437
1438 #if defined(__MACOS8__) || defined(__WIN32__)
1439
1440 void __CFStringCleanup (void) {
1441 /* in case library is unloaded, release store for the constant string table */
1442 if (constantStringTable != NULL) {
1443 #if defined(DEBUG)
1444 __CFConstantStringTableBeingFreed = true;
1445 CFRelease(constantStringTable);
1446 __CFConstantStringTableBeingFreed = false;
1447 #else
1448 CFRelease(constantStringTable);
1449 #endif
1450 }
1451 #if defined(DEBUG)
1452 CFAllocatorDeallocate( constantStringAllocatorForDebugging, (void*) constantStringAllocatorForDebugging );
1453 #endif
1454 }
1455
1456 #endif
1457
1458
1459 // Can pass in NSString as replacement string
1460 // Call with numRanges > 0, and incrementing ranges
1461
1462 static void __CFStringReplaceMultiple(CFMutableStringRef str, CFRange *ranges, CFIndex numRanges, CFStringRef replacement) {
1463 int cnt;
1464 CFIndex replacementLength = CFStringGetLength(replacement);
1465
1466 __CFStringChangeSizeMultiple(str, ranges, numRanges, replacementLength, (replacementLength > 0) && CFStrIsUnicode(replacement));
1467
1468 if (__CFStrIsUnicode(str)) {
1469 UniChar *contents = (UniChar *)__CFStrContents(str);
1470 UniChar *firstReplacement = contents + ranges[0].location;
1471 // Extract the replacementString into the first location, then copy from there
1472 CFStringGetCharacters(replacement, CFRangeMake(0, replacementLength), firstReplacement);
1473 for (cnt = 1; cnt < numRanges; cnt++) {
1474 // The ranges are in terms of the original string; so offset by the change in length due to insertion
1475 contents += replacementLength - ranges[cnt - 1].length;
1476 memmove(contents + ranges[cnt].location, firstReplacement, replacementLength * sizeof(UniChar));
1477 }
1478 } else {
1479 uint8_t *contents = (uint8_t *)__CFStrContents(str);
1480 uint8_t *firstReplacement = contents + ranges[0].location + __CFStrSkipAnyLengthByte(str);
1481 // Extract the replacementString into the first location, then copy from there
1482 CFStringGetBytes(replacement, CFRangeMake(0, replacementLength), __CFStringGetEightBitStringEncoding(), 0, false, firstReplacement, replacementLength, NULL);
1483 contents += __CFStrSkipAnyLengthByte(str); // Now contents will simply track the location to insert next string into
1484 for (cnt = 1; cnt < numRanges; cnt++) {
1485 // The ranges are in terms of the original string; so offset by the change in length due to insertion
1486 contents += replacementLength - ranges[cnt - 1].length;
1487 memmove(contents + ranges[cnt].location, firstReplacement, replacementLength);
1488 }
1489 }
1490 }
1491
1492 // Can pass in NSString as replacement string
1493
1494 static void __CFStringReplace(CFMutableStringRef str, CFRange range, CFStringRef replacement) {
1495 CFIndex replacementLength = CFStringGetLength(replacement);
1496
1497 __CFStringChangeSize(str, range, replacementLength, (replacementLength > 0) && CFStrIsUnicode(replacement));
1498
1499 if (__CFStrIsUnicode(str)) {
1500 UniChar *contents = (UniChar *)__CFStrContents(str);
1501 CFStringGetCharacters(replacement, CFRangeMake(0, replacementLength), contents + range.location);
1502 } else {
1503 uint8_t *contents = (uint8_t *)__CFStrContents(str);
1504 CFStringGetBytes(replacement, CFRangeMake(0, replacementLength), __CFStringGetEightBitStringEncoding(), 0, false, contents + range.location + __CFStrSkipAnyLengthByte(str), replacementLength, NULL);
1505 }
1506 }
1507
1508 /* If client does not provide a minimum capacity
1509 */
1510 #define DEFAULTMINCAPACITY 32
1511
1512 CF_INLINE CFMutableStringRef __CFStringCreateMutableFunnel(CFAllocatorRef alloc, CFIndex maxLength, UInt32 additionalInfoBits) {
1513 CFMutableStringRef str;
1514 Boolean hasExternalContentsAllocator = (additionalInfoBits & __kCFHasContentsAllocator) ? true : false;
1515
1516 if (alloc == NULL) alloc = __CFGetDefaultAllocator();
1517
1518 // Note that if there is an externalContentsAllocator, then we also have the storage for the string allocator...
1519 str = (CFMutableStringRef)_CFRuntimeCreateInstance(alloc, __kCFStringTypeID, sizeof(void *) + sizeof(UInt32) * 3 + (hasExternalContentsAllocator ? sizeof(CFAllocatorRef) : 0), NULL);
1520 if (str) {
1521 if (__CFOASafe) __CFSetLastAllocationEventName(str, "CFString (mutable)");
1522
1523 __CFStrSetInfoBits(str, __kCFIsMutable | additionalInfoBits);
1524 str->variants.externalMutable.buffer = NULL;
1525 __CFStrSetExplicitLength(str, 0);
1526 str->variants.externalMutable.gapEtc = 0;
1527 if (maxLength != 0) __CFStrSetIsFixed(str);
1528 __CFStrSetDesiredCapacity(str, (maxLength == 0) ? DEFAULTMINCAPACITY : maxLength);
1529 __CFStrSetCapacity(str, 0);
1530 }
1531 return str;
1532 }
1533
1534 CFMutableStringRef CFStringCreateMutableWithExternalCharactersNoCopy(CFAllocatorRef alloc, UniChar *chars, CFIndex numChars, CFIndex capacity, CFAllocatorRef externalCharactersAllocator) {
1535 CFOptionFlags contentsAllocationBits = externalCharactersAllocator ? ((externalCharactersAllocator == kCFAllocatorNull) ? __kCFHasExternalDataNoFree : __kCFHasContentsAllocator) : __kCFHasExternalDataDefaultFree;
1536 CFMutableStringRef string = __CFStringCreateMutableFunnel(alloc, 0, contentsAllocationBits | __kCFIsExternalMutable | __kCFIsUnicode);
1537 if (string) {
1538 if (contentsAllocationBits == __kCFHasContentsAllocator) __CFStrSetContentsAllocator(string, CFRetain(externalCharactersAllocator));
1539 CFStringSetExternalCharactersNoCopy(string, chars, numChars, capacity);
1540 }
1541 return string;
1542 }
1543
1544 CFMutableStringRef CFStringCreateMutable(CFAllocatorRef alloc, CFIndex maxLength) {
1545 return __CFStringCreateMutableFunnel(alloc, maxLength, __kCFHasExternalDataDefaultFree);
1546 }
1547
1548 CFMutableStringRef CFStringCreateMutableCopy(CFAllocatorRef alloc, CFIndex maxLength, CFStringRef string) {
1549 CFMutableStringRef newString;
1550
1551 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, CFMutableStringRef, string, "mutableCopy");
1552
1553 __CFAssertIsString(string);
1554
1555 newString = CFStringCreateMutable(alloc, maxLength);
1556 __CFStringReplace(newString, CFRangeMake(0, 0), string);
1557
1558 return newString;
1559 }
1560
1561
1562 __private_extern__ void _CFStrSetDesiredCapacity(CFMutableStringRef str, CFIndex len) {
1563 __CFAssertIsStringAndMutable(str);
1564 __CFStrSetDesiredCapacity(str, len);
1565 }
1566
1567
1568 /* This one is for CF
1569 */
1570 CFIndex CFStringGetLength(CFStringRef str) {
1571 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, CFIndex, str, "length");
1572
1573 __CFAssertIsString(str);
1574 return __CFStrLength(str);
1575 }
1576
1577 /* This one is for NSCFString; it does not ObjC dispatch or assertion check
1578 */
1579 CFIndex _CFStringGetLength2(CFStringRef str) {
1580 return __CFStrLength(str);
1581 }
1582
1583
1584 /* Guts of CFStringGetCharacterAtIndex(); called from the two functions below. Don't call it from elsewhere.
1585 */
1586 CF_INLINE UniChar __CFStringGetCharacterAtIndexGuts(CFStringRef str, CFIndex idx, const uint8_t *contents) {
1587 if (__CFStrIsEightBit(str)) {
1588 contents += __CFStrSkipAnyLengthByte(str);
1589 #if defined(DEBUG)
1590 if (!__CFCharToUniCharFunc && (contents[idx] >= 128)) {
1591 // Can't do log here, as it might be too early
1592 printf("Warning: CFStringGetCharacterAtIndex() attempted on CFString containing high bytes before properly initialized to do so\n");
1593 }
1594 #endif
1595 return __CFCharToUniCharTable[contents[idx]];
1596 }
1597
1598 return ((UniChar *)contents)[idx];
1599 }
1600
1601 /* This one is for the CF API
1602 */
1603 UniChar CFStringGetCharacterAtIndex(CFStringRef str, CFIndex idx) {
1604 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, UniChar, str, "characterAtIndex:", idx);
1605
1606 __CFAssertIsString(str);
1607 __CFAssertIndexIsInStringBounds(str, idx);
1608 return __CFStringGetCharacterAtIndexGuts(str, idx, __CFStrContents(str));
1609 }
1610
1611 /* This one is for NSCFString usage; it doesn't do ObjC dispatch; but it does do range check
1612 */
1613 int _CFStringCheckAndGetCharacterAtIndex(CFStringRef str, CFIndex idx, UniChar *ch) {
1614 const uint8_t *contents = __CFStrContents(str);
1615 if (idx >= __CFStrLength2(str, contents) && __CFStringNoteErrors()) return _CFStringErrBounds;
1616 *ch = __CFStringGetCharacterAtIndexGuts(str, idx, contents);
1617 return _CFStringErrNone;
1618 }
1619
1620
1621 /* Guts of CFStringGetCharacters(); called from the two functions below. Don't call it from elsewhere.
1622 */
1623 CF_INLINE void __CFStringGetCharactersGuts(CFStringRef str, CFRange range, UniChar *buffer, const uint8_t *contents) {
1624 if (__CFStrIsEightBit(str)) {
1625 __CFStrConvertBytesToUnicode(((uint8_t *)contents) + (range.location + __CFStrSkipAnyLengthByte(str)), buffer, range.length);
1626 } else {
1627 const UniChar *uContents = ((UniChar *)contents) + range.location;
1628 memmove(buffer, uContents, range.length * sizeof(UniChar));
1629 }
1630 }
1631
1632 /* This one is for the CF API
1633 */
1634 void CFStringGetCharacters(CFStringRef str, CFRange range, UniChar *buffer) {
1635 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID, void, str, "getCharacters:range:", buffer, CFRangeMake(range.location, range.length));
1636
1637 __CFAssertIsString(str);
1638 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
1639 __CFStringGetCharactersGuts(str, range, buffer, __CFStrContents(str));
1640 }
1641
1642 /* This one is for NSCFString usage; it doesn't do ObjC dispatch; but it does do range check
1643 */
1644 int _CFStringCheckAndGetCharacters(CFStringRef str, CFRange range, UniChar *buffer) {
1645 const uint8_t *contents = __CFStrContents(str);
1646 if (range.location + range.length > __CFStrLength2(str, contents) && __CFStringNoteErrors()) return _CFStringErrBounds;
1647 __CFStringGetCharactersGuts(str, range, buffer, contents);
1648 return _CFStringErrNone;
1649 }
1650
1651
1652 CFIndex CFStringGetBytes(CFStringRef str, CFRange range, CFStringEncoding encoding, uint8_t lossByte, Boolean isExternalRepresentation, uint8_t *buffer, CFIndex maxBufLen, CFIndex *usedBufLen) {
1653
1654 /* No objc dispatch needed here since __CFStringEncodeByteStream works with both CFString and NSString */
1655 __CFAssertIsNotNegative(maxBufLen);
1656
1657 if (!CF_IS_OBJC(__kCFStringTypeID, str)) { // If we can grope the ivars, let's do it...
1658 __CFAssertIsString(str);
1659 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
1660
1661 if (__CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string
1662 const unsigned char *contents = __CFStrContents(str);
1663 CFIndex cLength = range.length;
1664
1665 if (buffer) {
1666 if (cLength > maxBufLen) cLength = maxBufLen;
1667 memmove(buffer, contents + __CFStrSkipAnyLengthByte(str) + range.location, cLength);
1668 }
1669 if (usedBufLen) *usedBufLen = cLength;
1670
1671 return cLength;
1672 }
1673 }
1674
1675 return __CFStringEncodeByteStream(str, range.location, range.length, isExternalRepresentation, encoding, lossByte, buffer, maxBufLen, usedBufLen);
1676 }
1677
1678
1679 ConstStringPtr CFStringGetPascalStringPtr (CFStringRef str, CFStringEncoding encoding) {
1680
1681 if (!CF_IS_OBJC(__kCFStringTypeID, str)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
1682 __CFAssertIsString(str);
1683 if (__CFStrHasLengthByte(str) && __CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string || the contents is in ASCII
1684 const uint8_t *contents = __CFStrContents(str);
1685 if (__CFStrHasExplicitLength(str) && (__CFStrLength2(str, contents) != (SInt32)(*contents))) return NULL; // Invalid length byte
1686 return (ConstStringPtr)contents;
1687 }
1688 // ??? Also check for encoding = SystemEncoding and perhaps bytes are all ASCII?
1689 }
1690 return NULL;
1691 }
1692
1693
1694 const char * CFStringGetCStringPtr(CFStringRef str, CFStringEncoding encoding) {
1695
1696 if (encoding != __CFStringGetEightBitStringEncoding() && (kCFStringEncodingASCII != __CFStringGetEightBitStringEncoding() || !__CFStringEncodingIsSupersetOfASCII(encoding))) return NULL;
1697 // ??? Also check for encoding = SystemEncoding and perhaps bytes are all ASCII?
1698
1699 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, const char *, str, "_fastCStringContents:", true);
1700
1701 __CFAssertIsString(str);
1702
1703 if (__CFStrHasNullByte(str)) {
1704 return (const char *)__CFStrContents(str) + __CFStrSkipAnyLengthByte(str);
1705 } else {
1706 return NULL;
1707 }
1708 }
1709
1710
1711 const UniChar *CFStringGetCharactersPtr(CFStringRef str) {
1712
1713 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, const UniChar *, str, "_fastCharacterContents");
1714
1715 __CFAssertIsString(str);
1716 if (__CFStrIsUnicode(str)) return (const UniChar *)__CFStrContents(str);
1717 return NULL;
1718 }
1719
1720
1721 Boolean CFStringGetPascalString(CFStringRef str, Str255 buffer, CFIndex bufferSize, CFStringEncoding encoding) {
1722 CFIndex length;
1723 CFIndex usedLen;
1724
1725 __CFAssertIsNotNegative(bufferSize);
1726 if (bufferSize < 1) return false;
1727
1728 if (CF_IS_OBJC(__kCFStringTypeID, str)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
1729 length = CFStringGetLength(str);
1730 if (!__CFCanUseLengthByte(length)) return false; // Can't fit into pstring
1731 } else {
1732 const uint8_t *contents;
1733
1734 __CFAssertIsString(str);
1735
1736 contents = __CFStrContents(str);
1737 length = __CFStrLength2(str, contents);
1738
1739 if (!__CFCanUseLengthByte(length)) return false; // Can't fit into pstring
1740
1741 if (__CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string
1742 if (length >= bufferSize) return false;
1743 memmove((void*)(1 + (const char*)buffer), (__CFStrSkipAnyLengthByte(str) + contents), length);
1744 *buffer = length;
1745 return true;
1746 }
1747 }
1748
1749 if (__CFStringEncodeByteStream(str, 0, length, false, encoding, false, (void*)(1 + (uint8_t*)buffer), bufferSize - 1, &usedLen) != length) {
1750 #if defined(DEBUG)
1751 if (bufferSize > 0) {
1752 strncpy((char *)buffer + 1, CONVERSIONFAILURESTR, bufferSize - 1);
1753 buffer[0] = (CFIndex)sizeof(CONVERSIONFAILURESTR) < (bufferSize - 1) ? (CFIndex)sizeof(CONVERSIONFAILURESTR) : (bufferSize - 1);
1754 }
1755 #else
1756 if (bufferSize > 0) buffer[0] = 0;
1757 #endif
1758 return false;
1759 }
1760 *buffer = usedLen;
1761 return true;
1762 }
1763
1764 Boolean CFStringGetCString(CFStringRef str, char *buffer, CFIndex bufferSize, CFStringEncoding encoding) {
1765 const uint8_t *contents;
1766 CFIndex len;
1767
1768 __CFAssertIsNotNegative(bufferSize);
1769 if (bufferSize < 1) return false;
1770
1771 CF_OBJC_FUNCDISPATCH3(__kCFStringTypeID, Boolean, str, "_getCString:maxLength:encoding:", buffer, bufferSize - 1, encoding);
1772
1773 __CFAssertIsString(str);
1774
1775 contents = __CFStrContents(str);
1776 len = __CFStrLength2(str, contents);
1777
1778 if (__CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string
1779 if (len >= bufferSize) return false;
1780 memmove(buffer, contents + __CFStrSkipAnyLengthByte(str), len);
1781 buffer[len] = 0;
1782 return true;
1783 } else {
1784 CFIndex usedLen;
1785
1786 if (__CFStringEncodeByteStream(str, 0, len, false, encoding, false, (unsigned char*) buffer, bufferSize - 1, &usedLen) == len) {
1787 buffer[usedLen] = '\0';
1788 return true;
1789 } else {
1790 #if defined(DEBUG)
1791 strncpy(buffer, CONVERSIONFAILURESTR, bufferSize);
1792 #else
1793 if (bufferSize > 0) buffer[0] = 0;
1794 #endif
1795 return false;
1796 }
1797 }
1798 }
1799
1800 #define MAX_CASE_MAPPING_BUF (8)
1801
1802 /* Special casing for Uk sorting */
1803 #define DO_IGNORE_PUNCTUATION 1
1804 #if DO_IGNORE_PUNCTUATION
1805 #define UKRAINIAN_LANG_CODE (45)
1806 static bool __CFLocaleChecked = false;
1807 static const uint8_t *__CFPunctSetBMP = NULL;
1808 #endif /* DO_IGNORE_PUNCTUATION */
1809
1810 /* ??? We need to implement some additional flags here
1811 ??? Also, pay attention to flag 2, which is the NS flag (which CF has as flag 16, w/opposite meaning).
1812 */
1813 CFComparisonResult CFStringCompareWithOptions(CFStringRef string, CFStringRef string2, CFRange rangeToCompare, CFOptionFlags compareOptions) {
1814 /* No objc dispatch needed here since CFStringInlineBuffer works with both CFString and NSString */
1815 CFStringInlineBuffer strBuf1, strBuf2;
1816 UTF32Char ch1, ch2;
1817 const uint8_t *punctBMP = NULL;
1818 Boolean caseInsensitive = (compareOptions & kCFCompareCaseInsensitive ? true : false);
1819 Boolean decompose = (compareOptions & kCFCompareNonliteral ? true : false);
1820 Boolean numerically = (compareOptions & kCFCompareNumerically ? true : false);
1821 Boolean localized = (compareOptions & kCFCompareLocalized ? true : false);
1822
1823 #if DO_IGNORE_PUNCTUATION
1824 if (localized) {
1825 if (!__CFLocaleChecked) {
1826 CFArrayRef locales = _CFBundleCopyUserLanguages(false);
1827
1828 if (locales && (CFArrayGetCount(locales) > 0)) {
1829 SInt32 langCode;
1830
1831 if (CFBundleGetLocalizationInfoForLocalization((CFStringRef)CFArrayGetValueAtIndex(locales, 0), &langCode, NULL, NULL, NULL) && (langCode == UKRAINIAN_LANG_CODE)) {
1832 __CFPunctSetBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet, 0);
1833 }
1834
1835 CFRelease(locales);
1836 }
1837 __CFLocaleChecked = true;
1838 }
1839
1840 punctBMP = __CFPunctSetBMP;
1841 }
1842 #endif /* DO_IGNORE_PUNCTUATION */
1843
1844 CFStringInitInlineBuffer(string, &strBuf1, CFRangeMake(rangeToCompare.location, rangeToCompare.length));
1845 CFIndex strBuf1_idx = 0;
1846 CFIndex string2_len = CFStringGetLength(string2);
1847 CFStringInitInlineBuffer(string2, &strBuf2, CFRangeMake(0, string2_len));
1848 CFIndex strBuf2_idx = 0;
1849
1850 while (strBuf1_idx < rangeToCompare.length && strBuf2_idx < string2_len) {
1851 ch1 = CFStringGetCharacterFromInlineBuffer(&strBuf1, strBuf1_idx);
1852 ch2 = CFStringGetCharacterFromInlineBuffer(&strBuf2, strBuf2_idx);
1853
1854 if (numerically && (ch1 <= '9' && ch1 >= '0') && (ch2 <= '9' && ch2 >= '0')) { // If both are not digits, then don't do numerical comparison
1855 unsigned long long n1 = 0; // !!! Doesn't work if numbers are > max unsigned long long
1856 unsigned long long n2 = 0;
1857 do {
1858 n1 = n1 * 10 + (ch1 - '0');
1859 strBuf1_idx++;
1860 if (rangeToCompare.length <= strBuf1_idx) break;
1861 ch1 = CFStringGetCharacterFromInlineBuffer(&strBuf1, strBuf1_idx);
1862 } while (ch1 <= '9' && ch1 >= '0');
1863 do {
1864 n2 = n2 * 10 + (ch2 - '0');
1865 strBuf2_idx++;
1866 if (string2_len <= strBuf2_idx) break;
1867 ch2 = CFStringGetCharacterFromInlineBuffer(&strBuf2, strBuf2_idx);
1868 } while (ch2 <= '9' && ch2 >= '0');
1869 if (n1 < n2) return kCFCompareLessThan; else if (n1 > n2) return kCFCompareGreaterThan;
1870 continue; // If numbers were equal, go back to top without incrementing the buffer pointers
1871 }
1872
1873 if (CFUniCharIsSurrogateHighCharacter(ch1)) {
1874 strBuf1_idx++;
1875 if (strBuf1_idx < rangeToCompare.length && CFUniCharIsSurrogateLowCharacter(CFStringGetCharacterFromInlineBuffer(&strBuf1, strBuf1_idx))) {
1876 ch1 = CFUniCharGetLongCharacterForSurrogatePair(ch1, CFStringGetCharacterFromInlineBuffer(&strBuf1, strBuf1_idx));
1877 } else {
1878 strBuf1_idx--;
1879 }
1880 }
1881 if (CFUniCharIsSurrogateHighCharacter(ch2)) {
1882 strBuf2_idx++;
1883 if (strBuf2_idx < string2_len && CFUniCharIsSurrogateLowCharacter(CFStringGetCharacterFromInlineBuffer(&strBuf2, strBuf2_idx))) {
1884 ch2 = CFUniCharGetLongCharacterForSurrogatePair(ch2, CFStringGetCharacterFromInlineBuffer(&strBuf2, strBuf2_idx));
1885 } else {
1886 strBuf2_idx--;
1887 }
1888 }
1889
1890 if (ch1 != ch2) {
1891 #if DO_IGNORE_PUNCTUATION
1892 if (punctBMP) {
1893 if (CFUniCharIsMemberOfBitmap(ch1, (ch1 < 0x10000 ? punctBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet, (ch1 >> 16))))) {
1894 ++strBuf1_idx; continue;
1895 }
1896 if (CFUniCharIsMemberOfBitmap(ch2, (ch2 < 0x10000 ? punctBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet, (ch2 >> 16))))) {
1897 ++strBuf2_idx; continue;
1898 }
1899 }
1900 #endif /* DO_IGNORE_PUNCTUATION */
1901 // We standardize to lowercase here since currently, as of Unicode 3.1.1, it's one-to-one mapping.
1902 // Note we map to uppercase for both SMALL LETTER SIGMA and SMALL LETTER FINAL SIGMA
1903 if (caseInsensitive) {
1904 if (ch1 < 128) {
1905 ch1 -= ((ch1 >= 'A' && ch1 <= 'Z') ? 'A' - 'a' : 0);
1906 } else if (ch1 == 0x03C2 || ch1 == 0x03C3 || ch1 == 0x03A3) { // SMALL SIGMA
1907 ch1 = 0x03A3;
1908 } else {
1909 UniChar buffer[MAX_CASE_MAPPING_BUF];
1910
1911 if (CFUniCharMapCaseTo(ch1, buffer, MAX_CASE_MAPPING_BUF, kCFUniCharToLowercase, 0, NULL) > 1) { // It's supposed to be surrogates
1912 ch1 = CFUniCharGetLongCharacterForSurrogatePair(buffer[0], buffer[1]);
1913 } else {
1914 ch1 = *buffer;
1915 }
1916 }
1917 if (ch2 < 128) {
1918 ch2 -= ((ch2 >= 'A' && ch2 <= 'Z') ? 'A' - 'a' : 0);
1919 } else if (ch2 == 0x03C2 || ch2 == 0x03C3 || ch2 == 0x03A3) { // SMALL SIGMA
1920 ch2 = 0x03A3;
1921 } else {
1922 UniChar buffer[MAX_CASE_MAPPING_BUF];
1923
1924 if (CFUniCharMapCaseTo(ch2, buffer, MAX_CASE_MAPPING_BUF, kCFUniCharToLowercase, 0, NULL) > 1) { // It's supposed to be surrogates
1925 ch2 = CFUniCharGetLongCharacterForSurrogatePair(buffer[0], buffer[1]);
1926 } else {
1927 ch2 = *buffer;
1928 }
1929 }
1930 }
1931
1932 if (ch1 != ch2) { // still different
1933 if (decompose) { // ??? This is not exactly the canonical comparison (We need to do priority sort)
1934 Boolean isCh1Decomposable = (ch1 > 0x7F && CFUniCharIsMemberOf(ch1, kCFUniCharDecomposableCharacterSet));
1935 Boolean isCh2Decomposable = (ch2 > 0x7F && CFUniCharIsMemberOf(ch2, kCFUniCharDecomposableCharacterSet));
1936
1937 if (isCh1Decomposable != isCh2Decomposable) {
1938 UTF32Char decomposedCharater[MAX_DECOMPOSED_LENGTH];
1939 UInt32 decomposedCharacterLength;
1940 UInt32 idx;
1941
1942 if (isCh1Decomposable) {
1943 decomposedCharacterLength = CFUniCharDecomposeCharacter(ch1, decomposedCharater, MAX_DECOMPOSED_LENGTH);
1944 for (idx = 0; idx < decomposedCharacterLength && strBuf2_idx < string2_len; idx++) {
1945 ch1 = decomposedCharater[idx];
1946 if (ch1 < ch2) return kCFCompareLessThan; else if (ch1 > ch2) return kCFCompareGreaterThan;
1947 strBuf2_idx++; ch2 = (strBuf2_idx < string2_len ? CFStringGetCharacterFromInlineBuffer(&strBuf2, strBuf2_idx) : 0xffff);
1948 if (CFUniCharIsSurrogateHighCharacter(ch2)) {
1949 strBuf2_idx++;
1950 if (strBuf2_idx < string2_len && CFUniCharIsSurrogateLowCharacter(CFStringGetCharacterFromInlineBuffer(&strBuf2, strBuf2_idx))) {
1951 ch2 = CFUniCharGetLongCharacterForSurrogatePair(ch2, CFStringGetCharacterFromInlineBuffer(&strBuf2, strBuf2_idx));
1952 } else {
1953 strBuf2_idx--;
1954 }
1955 }
1956 }
1957 strBuf1_idx++; continue;
1958 } else { // ch2 is decomposable, then
1959 decomposedCharacterLength = CFUniCharDecomposeCharacter(ch2, decomposedCharater, MAX_DECOMPOSED_LENGTH);
1960 for (idx = 0; idx < decomposedCharacterLength && strBuf1_idx < rangeToCompare.length; idx++) {
1961 ch2 = decomposedCharater[idx];
1962 if (ch1 < ch2) return kCFCompareLessThan; else if (ch1 > ch2) return kCFCompareGreaterThan;
1963 strBuf1_idx++; ch1 = (strBuf1_idx < rangeToCompare.length ? CFStringGetCharacterFromInlineBuffer(&strBuf1, strBuf1_idx) : 0xffff);
1964 if (CFUniCharIsSurrogateHighCharacter(ch1)) {
1965 strBuf1_idx++;
1966 if (strBuf1_idx < rangeToCompare.length && CFUniCharIsSurrogateLowCharacter(CFStringGetCharacterFromInlineBuffer(&strBuf1, strBuf1_idx))) {
1967 ch1 = CFUniCharGetLongCharacterForSurrogatePair(ch1, CFStringGetCharacterFromInlineBuffer(&strBuf1, strBuf1_idx));
1968 } else {
1969 strBuf1_idx--;
1970 }
1971 }
1972 }
1973 strBuf2_idx++; continue;
1974 }
1975 }
1976 }
1977 if (ch1 < ch2) return kCFCompareLessThan; else if (ch1 > ch2) return kCFCompareGreaterThan;
1978 }
1979 }
1980 strBuf1_idx++; strBuf2_idx++;
1981 }
1982 if (strBuf1_idx < rangeToCompare.length) {
1983 return kCFCompareGreaterThan;
1984 } else if (strBuf2_idx < string2_len) {
1985 return kCFCompareLessThan;
1986 } else {
1987 return kCFCompareEqualTo;
1988 }
1989 }
1990
1991
1992 CFComparisonResult CFStringCompare(CFStringRef string, CFStringRef str2, CFOptionFlags options) {
1993 return CFStringCompareWithOptions(string, str2, CFRangeMake(0, CFStringGetLength(string)), options);
1994 }
1995
1996 /* ??? Need to implement localized find
1997 */
1998 Boolean CFStringFindWithOptions(CFStringRef string, CFStringRef stringToFind, CFRange rangeToSearch, CFOptionFlags compareOptions, CFRange *result) {
1999 /* No objc dispatch needed here since CFStringInlineBuffer works with both CFString and NSString */
2000 int step;
2001 SInt32 fromLoc, toLoc; // fromLoc and toLoc are inclusive
2002 SInt32 cnt, findStrLen = CFStringGetLength(stringToFind);
2003 Boolean done = false;
2004 Boolean caseInsensitive = (compareOptions & kCFCompareCaseInsensitive) ? true : false;
2005 Boolean decompose = (compareOptions & kCFCompareNonliteral) ? true : false;
2006 UniChar tmpBuf[MAXTMPBUFFERLEN];
2007 UniChar *findBuf;
2008 UTF32Char ch1, ch2;
2009 Boolean isDecompBuf[MAXISDECOMPBUFFERLEN];
2010 CFStringInlineBuffer buf;
2011 CFAllocatorRef tmpAlloc = NULL;
2012
2013 if (findStrLen == 0) return false; // This is the way it is, by definition (even find("", "") -> false)
2014 if (!decompose && findStrLen > rangeToSearch.length) return false;
2015 if (rangeToSearch.length == 0) return false; // This protects against crashes further below (see 2908472); if we ever implement ignorable characters, this shouldn't be here
2016
2017 findBuf = (findStrLen > MAXTMPBUFFERLEN) ? CFAllocatorAllocate(tmpAlloc = __CFGetDefaultAllocator(), findStrLen * sizeof(UniChar), 0) : tmpBuf;
2018 if (findBuf != tmpBuf && __CFOASafe) __CFSetLastAllocationEventName(findBuf, "CFString (temp)");
2019 CFStringGetCharacters(stringToFind, CFRangeMake(0, findStrLen), findBuf);
2020
2021 if (decompose) {
2022 SInt32 max = __CFMin(MAXISDECOMPBUFFERLEN, findStrLen);
2023
2024 for (cnt = 0;cnt < max;cnt++) {
2025 if (CFUniCharIsSurrogateHighCharacter(findBuf[cnt]) && (cnt + 1 < max) && CFUniCharIsSurrogateLowCharacter(findBuf[cnt + 1])) {
2026 isDecompBuf[cnt] = isDecompBuf[cnt + 1] = CFUniCharIsMemberOf(CFUniCharGetLongCharacterForSurrogatePair(findBuf[cnt], findBuf[cnt + 1]), kCFUniCharDecomposableCharacterSet);
2027 ++cnt;
2028 } else {
2029 isDecompBuf[cnt] = (findBuf[cnt] > 0x7F && CFUniCharIsMemberOf(findBuf[cnt], kCFCharacterSetDecomposable));
2030 }
2031 }
2032 }
2033
2034 if (caseInsensitive) { /* Lower case the search string */
2035 for (cnt = 0; cnt < findStrLen; cnt++) {
2036 ch1 = findBuf[cnt];
2037 if (ch1 < 128) {
2038 if (ch1 >= 'A' && ch1 <= 'Z') findBuf[cnt] = (ch1 - 'A' + 'a'); /* Lower case the cheap way */
2039 } else if (ch1 == 0x03C2 || ch1 == 0x03C3 || ch1 == 0x03A3) { // SMALL SIGMA
2040 findBuf[cnt] = 0x03A3;
2041 } else {
2042 UniChar buffer[MAX_CASE_MAPPING_BUF];
2043
2044 if (CFUniCharIsSurrogateHighCharacter(ch1) && (cnt + 1 < findStrLen) && CFUniCharIsSurrogateLowCharacter(findBuf[cnt + 1])) {
2045 ch1 = CFUniCharGetLongCharacterForSurrogatePair(ch1, findBuf[cnt + 1]);
2046 }
2047 if (CFUniCharMapCaseTo(ch1, buffer, MAX_CASE_MAPPING_BUF, kCFUniCharToLowercase, 0, NULL) > 1) { // It's supposed to be surrogates
2048 findBuf[cnt] = buffer[0];
2049 findBuf[++cnt] = buffer[1];
2050 } else {
2051 findBuf[cnt] = *buffer;
2052 }
2053 }
2054 }
2055 }
2056
2057 if (compareOptions & kCFCompareBackwards) {
2058 fromLoc = rangeToSearch.location + rangeToSearch.length - (decompose ? 1 : findStrLen);
2059 toLoc = ((compareOptions & kCFCompareAnchored) && !decompose ? fromLoc : rangeToSearch.location);
2060 } else {
2061 fromLoc = rangeToSearch.location;
2062 toLoc = ((compareOptions & kCFCompareAnchored) ? fromLoc : rangeToSearch.location + rangeToSearch.length - (decompose ? 1 : findStrLen));
2063 }
2064
2065 step = (fromLoc <= toLoc) ? 1 : -1;
2066 cnt = fromLoc;
2067 CFStringInitInlineBuffer(string, &buf, CFRangeMake(0, rangeToSearch.location + rangeToSearch.length));
2068 CFIndex buf_idx = fromLoc, buf_idx_end = rangeToSearch.location + rangeToSearch.length;
2069
2070 do {
2071 CFIndex chCnt;
2072 for (chCnt = 0; chCnt < findStrLen; chCnt++) {
2073 ch2 = buf_idx < buf_idx_end ? CFStringGetCharacterFromInlineBuffer(&buf, buf_idx) : 0xffff;
2074 if (decompose && ch2 == 0xffff) break;
2075
2076 if (caseInsensitive) {
2077 if (CFUniCharIsSurrogateHighCharacter(ch2)) {
2078 buf_idx++;
2079 if (buf_idx < buf_idx_end && CFUniCharIsSurrogateLowCharacter(CFStringGetCharacterFromInlineBuffer(&buf, buf_idx))) {
2080 ch2 = CFUniCharGetLongCharacterForSurrogatePair(ch2, CFStringGetCharacterFromInlineBuffer(&buf, buf_idx));
2081 } else {
2082 buf_idx--;
2083 }
2084 }
2085
2086 if (ch2 < 128) {
2087 if (ch2 >= 'A' && ch2 <= 'Z') ch2 = (ch2 - 'A' + 'a'); /* Lower case the cheap way */
2088 } else if (ch2 == 0x03C2 || ch2 == 0x03C3 || ch2 == 0x03A3) { // SMALL SIGMA
2089 ch2 = 0x03A3;
2090 } else {
2091 UniChar buffer[MAX_CASE_MAPPING_BUF];
2092
2093 if (CFUniCharMapCaseTo(ch2, buffer, MAX_CASE_MAPPING_BUF, kCFUniCharToLowercase, 0, NULL) > 1) { // It's supposed to be surrogates
2094 ch2 = CFUniCharGetLongCharacterForSurrogatePair(buffer[0], buffer[1]);
2095 } else {
2096 ch2 = *buffer;
2097 }
2098 }
2099 }
2100 if (decompose) {
2101 if (CFUniCharIsSurrogateHighCharacter(ch2)) {
2102 buf_idx++;
2103 if (buf_idx < buf_idx_end && CFUniCharIsSurrogateLowCharacter(CFStringGetCharacterFromInlineBuffer(&buf, buf_idx))) {
2104 ch2 = CFUniCharGetLongCharacterForSurrogatePair(ch2, CFStringGetCharacterFromInlineBuffer(&buf, buf_idx));
2105 } else {
2106 buf_idx--;
2107 }
2108 }
2109
2110 if (CFUniCharIsSurrogateHighCharacter(findBuf[chCnt]) && (chCnt + 1 < findStrLen) && CFUniCharIsSurrogateLowCharacter(findBuf[chCnt + 1])) {
2111 ch1 = CFUniCharGetLongCharacterForSurrogatePair(findBuf[chCnt], findBuf[chCnt + 1]);
2112 ++chCnt;
2113 } else {
2114 ch1 = findBuf[chCnt];
2115 }
2116
2117 if (ch1 != ch2) { // ??? This is not exactly the canonical comparison. Needs to be addressed by Cheetah.
2118 Boolean isCh1Decomposable = (chCnt < MAXISDECOMPBUFFERLEN ? isDecompBuf[chCnt] : (ch1 > 0x7F && CFUniCharIsMemberOf(ch1, kCFUniCharDecomposableCharacterSet)));
2119 Boolean isCh2Decomposable = (ch2 > 0x7F && CFUniCharIsMemberOf(ch2, kCFUniCharDecomposableCharacterSet));
2120
2121 if (isCh1Decomposable != isCh2Decomposable) {
2122 UTF32Char decomposedCharater[MAX_DECOMPOSED_LENGTH];
2123 UInt32 decomposedCharacterLength;
2124 UInt32 idx;
2125
2126 if (isCh1Decomposable) {
2127 decomposedCharacterLength = CFUniCharDecomposeCharacter(ch1, decomposedCharater, MAX_DECOMPOSED_LENGTH);
2128 for (idx = 0; idx < decomposedCharacterLength && buf_idx < buf_idx_end; idx++) {
2129 if (decomposedCharater[idx] != ch2) break;
2130
2131 buf_idx++; ch2 = buf_idx < buf_idx_end ? CFStringGetCharacterFromInlineBuffer(&buf, buf_idx) : 0xffff;
2132 if (CFUniCharIsSurrogateHighCharacter(ch2)) {
2133 buf_idx++;
2134 if (buf_idx < buf_idx_end && CFUniCharIsSurrogateLowCharacter(CFStringGetCharacterFromInlineBuffer(&buf, buf_idx))) {
2135 ch2 = CFUniCharGetLongCharacterForSurrogatePair(ch2, CFStringGetCharacterFromInlineBuffer(&buf, buf_idx));
2136 } else {
2137 buf_idx--;
2138 }
2139 }
2140 }
2141 if (idx < decomposedCharacterLength) break;
2142 continue;
2143 } else { // ch2 is decomposable, then
2144 int32_t foundLen = chCnt;
2145
2146 decomposedCharacterLength = CFUniCharDecomposeCharacter(ch2, decomposedCharater, MAX_DECOMPOSED_LENGTH);
2147 for (idx = 0;idx < decomposedCharacterLength && foundLen < findStrLen;idx++) {
2148 if (CFUniCharIsSurrogateHighCharacter(findBuf[foundLen]) && ((foundLen + 1) < findStrLen) && CFUniCharIsSurrogateLowCharacter(findBuf[foundLen + 1])) {
2149 if (CFUniCharGetLongCharacterForSurrogatePair(findBuf[foundLen], findBuf[foundLen + 1]) != decomposedCharater[idx]) break;
2150 ++foundLen;
2151 } else {
2152 if (findBuf[foundLen] != decomposedCharater[idx]) break;
2153 }
2154 ++foundLen;
2155 }
2156 if (idx < decomposedCharacterLength) break;
2157 chCnt = foundLen - 1; // Decrement so we can back up
2158 buf_idx++; continue;
2159 }
2160 }
2161 break;
2162 }
2163 } else {
2164 if (ch2 > 0xFFFF) { // Non-BMP
2165 if (CFUniCharIsSurrogateHighCharacter(findBuf[chCnt]) && (chCnt + 1 < findStrLen) && CFUniCharIsSurrogateLowCharacter(findBuf[chCnt + 1])) {
2166 if (ch2 != CFUniCharGetLongCharacterForSurrogatePair(findBuf[chCnt], findBuf[chCnt + 1])) break;
2167 ++chCnt;
2168 } else {
2169 break;
2170 }
2171 } else {
2172 if (findBuf[chCnt] != ch2) break;
2173 }
2174 }
2175 buf_idx++;
2176 }
2177 if (chCnt == findStrLen) {
2178 if (decompose && (buf_idx < buf_idx_end)) {
2179 if ((compareOptions & kCFCompareAnchored) && (compareOptions & kCFCompareBackwards)) break;
2180
2181 ch2 = CFStringGetCharacterFromInlineBuffer(&buf, buf_idx);
2182
2183 if (CFUniCharIsSurrogateHighCharacter(ch2)) {
2184 if ((buf_idx + 1) < buf_idx_end && CFUniCharIsSurrogateLowCharacter(CFStringGetCharacterFromInlineBuffer(&buf, buf_idx + 1))) {
2185 ch2 = CFUniCharGetLongCharacterForSurrogatePair(ch2, CFStringGetCharacterFromInlineBuffer(&buf, buf_idx + 1));
2186 }
2187 }
2188 if (ch2 > 0x7F && CFUniCharIsMemberOf(ch2, kCFUniCharNonBaseCharacterSet)) continue; // Next char is non-base
2189 }
2190 done = true;
2191 if (result) {
2192 result->location = cnt;
2193 result->length = (decompose ? buf_idx - cnt : findStrLen);
2194 }
2195 } else if (cnt == toLoc) {
2196 break;
2197 } else {
2198 cnt += step;
2199 buf_idx = cnt;
2200 }
2201 } while (!done);
2202
2203 if (findBuf != tmpBuf) CFAllocatorDeallocate(tmpAlloc, findBuf);
2204
2205 return done;
2206 }
2207
2208
2209 // Functions to deal with special arrays of CFRange, CFDataRef, created by CFStringCreateArrayWithFindResults()
2210
2211 static const void *__rangeRetain(CFAllocatorRef allocator, const void *ptr) {
2212 CFRetain(*(CFDataRef *)((uint8_t *)ptr + sizeof(CFRange)));
2213 return ptr;
2214 }
2215
2216 static void __rangeRelease(CFAllocatorRef allocator, const void *ptr) {
2217 CFRelease(*(CFDataRef *)((uint8_t *)ptr + sizeof(CFRange)));
2218 }
2219
2220 static CFStringRef __rangeCopyDescription(const void *ptr) {
2221 CFRange range = *(CFRange *)ptr;
2222 return CFStringCreateWithFormat(NULL /* ??? allocator */, NULL, CFSTR("{%d, %d}"), range.location, range.length);
2223 }
2224
2225 static Boolean __rangeEqual(const void *ptr1, const void *ptr2) {
2226 CFRange range1 = *(CFRange *)ptr1;
2227 CFRange range2 = *(CFRange *)ptr2;
2228 return (range1.location == range2.location) && (range1.length == range2.length);
2229 }
2230
2231
2232 CFArrayRef CFStringCreateArrayWithFindResults(CFAllocatorRef alloc, CFStringRef string, CFStringRef stringToFind, CFRange rangeToSearch, CFOptionFlags compareOptions) {
2233 CFRange foundRange;
2234 Boolean backwards = compareOptions & kCFCompareBackwards;
2235 UInt32 endIndex = rangeToSearch.location + rangeToSearch.length;
2236 CFMutableDataRef rangeStorage = NULL; // Basically an array of CFRange, CFDataRef (packed)
2237 uint8_t *rangeStorageBytes = NULL;
2238 CFIndex foundCount = 0;
2239 CFIndex capacity = 0; // Number of CFRange, CFDataRef element slots in rangeStorage
2240
2241 if (alloc == NULL) alloc = __CFGetDefaultAllocator();
2242
2243 while ((rangeToSearch.length > 0) && CFStringFindWithOptions(string, stringToFind, rangeToSearch, compareOptions, &foundRange)) {
2244 // Determine the next range
2245 if (backwards) {
2246 rangeToSearch.length = foundRange.location - rangeToSearch.location;
2247 } else {
2248 rangeToSearch.location = foundRange.location + foundRange.length;
2249 rangeToSearch.length = endIndex - rangeToSearch.location;
2250 }
2251
2252 // If necessary, grow the data and squirrel away the found range
2253 if (foundCount >= capacity) {
2254 if (rangeStorage == NULL) rangeStorage = CFDataCreateMutable(alloc, 0);
2255 capacity = (capacity + 4) * 2;
2256 CFDataSetLength(rangeStorage, capacity * (sizeof(CFRange) + sizeof(CFDataRef)));
2257 rangeStorageBytes = (uint8_t *)CFDataGetMutableBytePtr(rangeStorage) + foundCount * (sizeof(CFRange) + sizeof(CFDataRef));
2258 }
2259 memmove(rangeStorageBytes, &foundRange, sizeof(CFRange)); // The range
2260 memmove(rangeStorageBytes + sizeof(CFRange), &rangeStorage, sizeof(CFDataRef)); // The data
2261 rangeStorageBytes += (sizeof(CFRange) + sizeof(CFDataRef));
2262 foundCount++;
2263 }
2264
2265 if (foundCount > 0) {
2266 CFIndex cnt;
2267 CFMutableArrayRef array;
2268 const CFArrayCallBacks callbacks = {0, __rangeRetain, __rangeRelease, __rangeCopyDescription, __rangeEqual};
2269
2270 CFDataSetLength(rangeStorage, foundCount * (sizeof(CFRange) + sizeof(CFDataRef))); // Tighten storage up
2271 rangeStorageBytes = (uint8_t *)CFDataGetMutableBytePtr(rangeStorage);
2272
2273 array = CFArrayCreateMutable(alloc, foundCount * sizeof(CFRange *), &callbacks);
2274 for (cnt = 0; cnt < foundCount; cnt++) {
2275 // Each element points to the appropriate CFRange in the CFData
2276 CFArrayAppendValue(array, rangeStorageBytes + cnt * (sizeof(CFRange) + sizeof(CFDataRef)));
2277 }
2278 CFRelease(rangeStorage); // We want the data to go away when all CFRanges inside it are released...
2279 return array;
2280 } else {
2281 return NULL;
2282 }
2283 }
2284
2285
2286 CFRange CFStringFind(CFStringRef string, CFStringRef stringToFind, CFOptionFlags compareOptions) {
2287 CFRange foundRange;
2288
2289 if (CFStringFindWithOptions(string, stringToFind, CFRangeMake(0, CFStringGetLength(string)), compareOptions, &foundRange)) {
2290 return foundRange;
2291 } else {
2292 return CFRangeMake(kCFNotFound, 0);
2293 }
2294 }
2295
2296 Boolean CFStringHasPrefix(CFStringRef string, CFStringRef prefix) {
2297 return CFStringFindWithOptions(string, prefix, CFRangeMake(0, CFStringGetLength(string)), kCFCompareAnchored, NULL);
2298 }
2299
2300 Boolean CFStringHasSuffix(CFStringRef string, CFStringRef suffix) {
2301 return CFStringFindWithOptions(string, suffix, CFRangeMake(0, CFStringGetLength(string)), kCFCompareAnchored|kCFCompareBackwards, NULL);
2302 }
2303
2304 #define ZERO_WIDTH_JOINER (0x200D)
2305 #define COMBINING_GRAPHEME_JOINER (0x034F)
2306 #define MAX_TRANSCODING_LENGTH 4
2307
2308 // Hangul ranges
2309 #define HANGUL_CHOSEONG_START (0x1100)
2310 #define HANGUL_CHOSEONG_END (0x115F)
2311 #define HANGUL_JUNGSEONG_START (0x1160)
2312 #define HANGUL_JUNGSEONG_END (0x11A2)
2313 #define HANGUL_JONGSEONG_START (0x11A8)
2314 #define HANGUL_JONGSEONG_END (0x11F9)
2315
2316 #define HANGUL_SYLLABLE_START (0xAC00)
2317 #define HANGUL_SYLLABLE_END (0xD7AF)
2318
2319 #define HANGUL_JONGSEONG_COUNT (28)
2320
2321 CF_INLINE bool _CFStringIsHangulLVT(UTF32Char character) {
2322 return (((character - HANGUL_SYLLABLE_START) % HANGUL_JONGSEONG_COUNT) ? true : false);
2323 }
2324
2325 static uint8_t __CFTranscodingHintLength[] = {
2326 2, 3, 4, 4, 4, 4, 4, 2, 2, 2, 2, 4, 0, 0, 0, 0
2327 };
2328
2329 enum {
2330 kCFStringHangulStateL,
2331 kCFStringHangulStateV,
2332 kCFStringHangulStateT,
2333 kCFStringHangulStateLV,
2334 kCFStringHangulStateLVT,
2335 kCFStringHangulStateBreak
2336 };
2337
2338 static CFRange _CFStringInlineBufferGetComposedRange(CFStringInlineBuffer *buffer, CFIndex start, CFStringCharacterClusterType type, const uint8_t *nonBaseBMP) {
2339 CFIndex end = start + 1;
2340 const uint8_t *nonBase = nonBaseBMP;
2341 UTF32Char character;
2342 UTF16Char otherSurrogate;
2343 uint8_t step;
2344
2345 character = CFStringGetCharacterFromInlineBuffer(buffer, start);
2346
2347
2348 // We don't combine characters in Armenian ~ Limbu range for backward deletion
2349 if ((type != kCFStringBackwardDeletionCluster) || (character < 0x0530) || (character > 0x194F)) {
2350 // Check if the current is surrogate
2351 if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, start + 1)))) {
2352 ++end;
2353 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
2354 nonBase = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (character >> 16));
2355 }
2356
2357 // Extend backward
2358 while (start > 0) {
2359 if ((type == kCFStringBackwardDeletionCluster) && (character >= 0x0530) && (character < 0x1950)) break;
2360
2361 if (character < 0x10000) { // the first round could be already be non-BMP
2362 if (CFUniCharIsSurrogateLowCharacter(character) && CFUniCharIsSurrogateHighCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, start - 1)))) {
2363 character = CFUniCharGetLongCharacterForSurrogatePair(otherSurrogate, character);
2364 nonBase = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (character >> 16));
2365 --start;
2366 } else {
2367 nonBase = nonBaseBMP;
2368 }
2369 }
2370
2371 if (!CFUniCharIsMemberOfBitmap(character, nonBase) && (character != 0xFF9E) && (character != 0xFF9F) && ((character & 0x1FFFF0) != 0xF870)) break;
2372
2373 --start;
2374
2375 character = CFStringGetCharacterFromInlineBuffer(buffer, start);
2376 }
2377 }
2378
2379 // Hangul
2380 if (((character >= HANGUL_CHOSEONG_START) && (character <= HANGUL_JONGSEONG_END)) || ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END))) {
2381 uint8_t state;
2382 uint8_t initialState;
2383
2384 if (character < HANGUL_JUNGSEONG_START) {
2385 state = kCFStringHangulStateL;
2386 } else if (character < HANGUL_JONGSEONG_START) {
2387 state = kCFStringHangulStateV;
2388 } else if (character < HANGUL_SYLLABLE_START) {
2389 state = kCFStringHangulStateT;
2390 } else {
2391 state = (_CFStringIsHangulLVT(character) ? kCFStringHangulStateLVT : kCFStringHangulStateLV);
2392 }
2393 initialState = state;
2394
2395 // Extend backward
2396 while (((character = CFStringGetCharacterFromInlineBuffer(buffer, start - 1)) >= HANGUL_CHOSEONG_START) && (character <= HANGUL_SYLLABLE_END) && ((character <= HANGUL_JONGSEONG_END) || (character >= HANGUL_SYLLABLE_START))) {
2397 switch (state) {
2398 case kCFStringHangulStateV:
2399 if (character <= HANGUL_CHOSEONG_END) {
2400 state = kCFStringHangulStateL;
2401 } else if ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END) && !_CFStringIsHangulLVT(character)) {
2402 state = kCFStringHangulStateLV;
2403 } else if (character > HANGUL_JUNGSEONG_END) {
2404 state = kCFStringHangulStateBreak;
2405 }
2406 break;
2407
2408 case kCFStringHangulStateT:
2409 if ((character >= HANGUL_JUNGSEONG_START) && (character <= HANGUL_JUNGSEONG_END)) {
2410 state = kCFStringHangulStateV;
2411 } else if ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END)) {
2412 state = (_CFStringIsHangulLVT(character) ? kCFStringHangulStateLVT : kCFStringHangulStateLV);
2413 } else if (character < HANGUL_JUNGSEONG_START) {
2414 state = kCFStringHangulStateBreak;
2415 }
2416 break;
2417
2418 default:
2419 state = ((character < HANGUL_JUNGSEONG_START) ? kCFStringHangulStateL : kCFStringHangulStateBreak);
2420 break;
2421 }
2422
2423 if (state == kCFStringHangulStateBreak) break;
2424 --start;
2425 }
2426
2427 // Extend forward
2428 state = initialState;
2429 while (((character = CFStringGetCharacterFromInlineBuffer(buffer, end)) > 0) && (((character >= HANGUL_CHOSEONG_START) && (character <= HANGUL_JONGSEONG_END)) || ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END)))) {
2430 switch (state) {
2431 case kCFStringHangulStateLV:
2432 case kCFStringHangulStateV:
2433 if ((character >= HANGUL_JUNGSEONG_START) && (character <= HANGUL_JONGSEONG_END)) {
2434 state = ((character < HANGUL_JONGSEONG_START) ? kCFStringHangulStateV : kCFStringHangulStateT);
2435 } else {
2436 state = kCFStringHangulStateBreak;
2437 }
2438 break;
2439
2440 case kCFStringHangulStateLVT:
2441 case kCFStringHangulStateT:
2442 state = (((character >= HANGUL_JONGSEONG_START) && (character <= HANGUL_JONGSEONG_END)) ? kCFStringHangulStateT : kCFStringHangulStateBreak);
2443 break;
2444
2445 default:
2446 if (character < HANGUL_JUNGSEONG_START) {
2447 state = kCFStringHangulStateL;
2448 } else if (character < HANGUL_JONGSEONG_START) {
2449 state = kCFStringHangulStateV;
2450 } else if (character >= HANGUL_SYLLABLE_START) {
2451 state = (_CFStringIsHangulLVT(character) ? kCFStringHangulStateLVT : kCFStringHangulStateLV);
2452 } else {
2453 state = kCFStringHangulStateBreak;
2454 }
2455 break;
2456 }
2457
2458 if (state == kCFStringHangulStateBreak) break;
2459 ++end;
2460 }
2461 }
2462
2463 // Extend forward
2464 while ((character = CFStringGetCharacterFromInlineBuffer(buffer, end)) > 0) {
2465 if ((type == kCFStringBackwardDeletionCluster) && (character >= 0x0530) && (character < 0x1950)) break;
2466
2467 if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, end + 1)))) {
2468 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
2469 nonBase = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (character >> 16));
2470 step = 2;
2471 } else {
2472 nonBase = nonBaseBMP;
2473 step = 1;
2474 }
2475
2476 if (!CFUniCharIsMemberOfBitmap(character, nonBase) && (character != 0xFF9E) && (character != 0xFF9F) && ((character & 0x1FFFF0) != 0xF870)) break;
2477
2478 end += step;
2479 }
2480
2481 return CFRangeMake(start, end - start);
2482 }
2483
2484 CF_INLINE bool _CFStringIsVirama(UTF32Char character, const uint8_t *combClassBMP) {
2485 return ((character == COMBINING_GRAPHEME_JOINER) || (CFUniCharGetCombiningPropertyForCharacter(character, ((character < 0x10000) ? combClassBMP : CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (character >> 16)))) == 9) ? true : false);
2486 }
2487
2488 CFRange CFStringGetRangeOfCharacterClusterAtIndex(CFStringRef string, CFIndex charIndex, CFStringCharacterClusterType type) {
2489 CFRange range;
2490 CFIndex currentIndex;
2491 CFIndex length = CFStringGetLength(string);
2492 CFStringInlineBuffer stringBuffer;
2493 UTF32Char character;
2494 UTF16Char otherSurrogate;
2495 static const uint8_t *nonBaseBMP = NULL;
2496 static const uint8_t *letterBMP = NULL;
2497 static const uint8_t *combClassBMP = NULL;
2498
2499 if (charIndex >= length) return CFRangeMake(kCFNotFound, 0);
2500
2501 /* Fast case. If we're eight-bit, it's either the default encoding is cheap or the content is all ASCII. Watch out when (or if) adding more 8bit Mac-scripts in CFStringEncodingConverters
2502 */
2503 if (!CF_IS_OBJC(__kCFStringTypeID, string) && __CFStrIsEightBit(string)) return CFRangeMake(charIndex, 1);
2504
2505 if (NULL == nonBaseBMP) {
2506 nonBaseBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, 0);
2507 letterBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, 0);
2508 combClassBMP = CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0);
2509 }
2510
2511 CFStringInitInlineBuffer(string, &stringBuffer, CFRangeMake(0, length));
2512
2513 // Get composed character sequence first
2514 range = _CFStringInlineBufferGetComposedRange(&stringBuffer, charIndex, type, nonBaseBMP);
2515
2516 // Do grapheme joiners
2517 if (type < kCFStringCursorMovementCluster) {
2518 const uint8_t *letter = letterBMP;
2519
2520 // Check to see if we have a letter at the beginning of initial cluster
2521 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, range.location);
2522
2523 if ((range.length > 1) && CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(&stringBuffer, range.location + 1)))) {
2524 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
2525 letter = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, (character >> 16));
2526 }
2527
2528 if ((character == ZERO_WIDTH_JOINER) || CFUniCharIsMemberOfBitmap(character, letter)) {
2529 CFRange otherRange;
2530
2531 // Check if preceded by grapheme joiners (U034F and viramas)
2532 otherRange.location = currentIndex = range.location;
2533
2534 while (currentIndex > 1) {
2535 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, --currentIndex);
2536
2537 // ??? We're assuming viramas only in BMP
2538 if ((_CFStringIsVirama(character, combClassBMP) || ((character == ZERO_WIDTH_JOINER) && _CFStringIsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer, --currentIndex), combClassBMP))) && (currentIndex > 0)) {
2539 --currentIndex;
2540 } else {
2541 break;
2542 }
2543
2544 currentIndex = _CFStringInlineBufferGetComposedRange(&stringBuffer, currentIndex, type, nonBaseBMP).location;
2545
2546 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex);
2547
2548 if (CFUniCharIsSurrogateLowCharacter(character) && CFUniCharIsSurrogateHighCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex - 1)))) {
2549 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
2550 letter = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, (character >> 16));
2551 --currentIndex;
2552 } else {
2553 letter = letterBMP;
2554 }
2555
2556 if (!CFUniCharIsMemberOfBitmap(character, letter)) break;
2557 range.location = currentIndex;
2558 }
2559
2560 range.length += otherRange.location - range.location;
2561
2562 // Check if followed by grapheme joiners
2563 if ((range.length > 1) && ((range.location + range.length) < length)) {
2564 otherRange = range;
2565
2566 do {
2567 currentIndex = otherRange.location + otherRange.length;
2568 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex - 1);
2569
2570 // ??? We're assuming viramas only in BMP
2571 if ((character != ZERO_WIDTH_JOINER) && !_CFStringIsVirama(character, combClassBMP)) break;
2572
2573 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex);
2574
2575 if (character == ZERO_WIDTH_JOINER) character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, ++currentIndex);
2576
2577 if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex + 1)))) {
2578 character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate);
2579 letter = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, (character >> 16));
2580 } else {
2581 letter = letterBMP;
2582 }
2583
2584 // We only conjoin letters
2585 if (!CFUniCharIsMemberOfBitmap(character, letter)) break;
2586 otherRange = _CFStringInlineBufferGetComposedRange(&stringBuffer, currentIndex, type, nonBaseBMP);
2587 } while ((otherRange.location + otherRange.length) < length);
2588 range.length = currentIndex - range.location;
2589 }
2590 }
2591 }
2592
2593 // Check if we're part of prefix transcoding hints
2594 if (range.location > 0) {
2595 CFIndex otherIndex;
2596
2597 currentIndex = (range.location + range.length) - (MAX_TRANSCODING_LENGTH + 1);
2598 if (currentIndex < 0) currentIndex = 0;
2599
2600 while (currentIndex <= range.location) {
2601 character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex);
2602
2603 if ((character & 0x1FFFF0) == 0xF860) { // transcoding hint
2604 otherIndex = currentIndex + __CFTranscodingHintLength[(character - 0xF860)] + 1;
2605 if (otherIndex >= (range.location + range.length)) {
2606 range.location = currentIndex;
2607 range.length = otherIndex - currentIndex;
2608 break;
2609 }
2610 }
2611 ++currentIndex;
2612 }
2613 }
2614
2615 return range;
2616 }
2617
2618 #if 1 /* Using the new implementation. Leaving the old implementation if'ed out for testing purposes for now */
2619 CFRange CFStringGetRangeOfComposedCharactersAtIndex(CFStringRef theString, CFIndex theIndex) {
2620 return CFStringGetRangeOfCharacterClusterAtIndex(theString, theIndex, kCFStringComposedCharacterCluster);
2621 }
2622 #else
2623 /*!
2624 @function CFStringGetRangeOfComposedCharactersAtIndex
2625 Returns the range of the composed character sequence at the specified index.
2626 @param theString The CFString which is to be searched. If this
2627 parameter is not a valid CFString, the behavior is
2628 undefined.
2629 @param theIndex The index of the character contained in the
2630 composed character sequence. If the index is
2631 outside the index space of the string (0 to N-1 inclusive,
2632 where N is the length of the string), the behavior is
2633 undefined.
2634 @result The range of the composed character sequence.
2635 */
2636 #define ExtHighHalfZoneLow 0xD800
2637 #define ExtHighHalfZoneHigh 0xDBFF
2638 #define ExtLowHalfZoneLow 0xDC00
2639 #define ExtLowHalfZoneHigh 0xDFFF
2640 #define JunseongStart 0x1160
2641 #define JonseongEnd 0x11F9
2642 CF_INLINE Boolean IsHighCode(UniChar X) { return (X >= ExtHighHalfZoneLow && X <= ExtHighHalfZoneHigh); }
2643 CF_INLINE Boolean IsLowCode(UniChar X) { return (X >= ExtLowHalfZoneLow && X <= ExtLowHalfZoneHigh); }
2644 #define IsHangulConjoiningJamo(X) (X >= JunseongStart && X <= JonseongEnd)
2645 #define IsHalfwidthKanaVoicedMark(X) ((X == 0xFF9E) || (X == 0xFF9F))
2646 CF_INLINE Boolean IsNonBaseChar(UniChar X, CFCharacterSetRef nonBaseSet) { return (CFCharacterSetIsCharacterMember(nonBaseSet, X) || IsHangulConjoiningJamo(X) || IsHalfwidthKanaVoicedMark(X) || (X & 0x1FFFF0) == 0xF870); } // combining char, hangul jamo, or Apple corporate variant tag
2647 #define ZWJ 0x200D
2648 #define ZWNJ 0x200C
2649 #define COMBINING_GRAPHEME_JOINER (0x034F)
2650
2651 static CFCharacterSetRef nonBaseChars = NULL;
2652 static CFCharacterSetRef letterChars = NULL;
2653 static const void *__CFCombiningClassBMP = NULL;
2654
2655 CF_INLINE bool IsVirama(UTF32Char character) {
2656 return ((character == COMBINING_GRAPHEME_JOINER) ? true : ((character < 0x10000) && (CFUniCharGetCombiningPropertyForCharacter(character, __CFCombiningClassBMP) == 9) ? true : false));
2657 }
2658
2659 CFRange CFStringGetRangeOfComposedCharactersAtIndex(CFStringRef theString, CFIndex theIndex) {
2660 CFIndex left, current, save;
2661 CFIndex len = CFStringGetLength(theString);
2662 CFStringInlineBuffer stringBuffer;
2663 static volatile Boolean _isInited = false;
2664
2665 if (theIndex >= len) return CFRangeMake(kCFNotFound, 0);
2666
2667 if (!_isInited) {
2668 nonBaseChars = CFCharacterSetGetPredefined(kCFCharacterSetNonBase);
2669 letterChars = CFCharacterSetGetPredefined(kCFCharacterSetLetter);
2670 __CFCombiningClassBMP = CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0);
2671 _isInited = true;
2672 }
2673
2674 save = current = theIndex;
2675
2676 CFStringInitInlineBuffer(theString, &stringBuffer, CFRangeMake(0, len));
2677
2678 /*
2679 * First check for transcoding hints
2680 */
2681 {
2682 CFRange theRange = (current > MAX_TRANSCODING_LENGTH ? CFRangeMake(current - MAX_TRANSCODING_LENGTH, MAX_TRANSCODING_LENGTH + 1) : CFRangeMake(0, current + 1));
2683
2684 // Should check the next loc ?
2685 if (current + 1 < len) ++theRange.length;
2686
2687 if (theRange.length > 1) {
2688 UniChar characterBuffer[MAX_TRANSCODING_LENGTH + 2]; // Transcoding hint length + current loc + next loc
2689
2690 if (stringBuffer.directBuffer) {
2691 memmove(characterBuffer, stringBuffer.directBuffer + theRange.location, theRange.length * sizeof(UniChar));
2692 } else {
2693 CFStringGetCharacters(theString, theRange, characterBuffer);
2694 }
2695
2696 while (current >= theRange.location) {
2697 if ((characterBuffer[current - theRange.location] & 0x1FFFF0) == 0xF860) {
2698 theRange = CFRangeMake(current, __CFTranscodingHintLength[characterBuffer[current - theRange.location] - 0xF860] + 1);
2699 if ((theRange.location + theRange.length) <= theIndex) break;
2700 if ((theRange.location + theRange.length) >= len) theRange.length = len - theRange.location;
2701 return theRange;
2702 }
2703 if (current == 0) break;
2704 --current;
2705 }
2706 current = theIndex; // Reset current
2707 }
2708 }
2709
2710 //#warning Aki 5/29/01 This does not support non-base chars in non-BMP planes (i.e. musical symbol combining stem in Unicode 3.1)
2711 /*
2712 * if we start NOT on a base, first move back to a base as appropriate.
2713 */
2714
2715 roundAgain:
2716
2717 while ((current > 0) && IsNonBaseChar(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current), nonBaseChars)) --current;
2718
2719 if (current >= 1 && current < len && CFCharacterSetIsCharacterMember(letterChars, CFStringGetCharacterFromInlineBuffer(&stringBuffer, current)) && IsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current - 1))) {
2720 --current;
2721 goto roundAgain;
2722 } else if ((current >= 2) && (CFStringGetCharacterFromInlineBuffer(&stringBuffer, current - 1) == ZWJ) && IsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current - 2))) {
2723 current -= 2;
2724 goto roundAgain;
2725 }
2726
2727 /*
2728 * Set the left position, then jump back to the saved original position.
2729 */
2730
2731 if (current >= 1 && IsLowCode(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current)) && IsHighCode(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current - 1))) --current;
2732 left = current;
2733 current = save;
2734
2735 /*
2736 * Now, presume we are on a base; move forward & look for the next base.
2737 * Handle jumping over H/L codes.
2738 */
2739 if (IsHighCode(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current)) && (current + 1) < len && IsLowCode(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current + 1))) ++current;
2740 ++current;
2741
2742 round2Again:
2743
2744 if (current < len) {
2745 while (IsNonBaseChar(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current), nonBaseChars)) {
2746 ++current;
2747 if (current >= len) break;
2748 }
2749 if ((current < len) && CFCharacterSetIsCharacterMember(letterChars, CFStringGetCharacterFromInlineBuffer(&stringBuffer, current))) {
2750 if (IsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current - 1))) {
2751 ++current; goto round2Again;
2752 } else if ((current >= 2) && (CFStringGetCharacterFromInlineBuffer(&stringBuffer, current - 1) == ZWJ) && IsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current - 2))) {
2753 ++current; goto round2Again;
2754 }
2755 }
2756 }
2757 /*
2758 * Now, "current" is a base, and "left" is a base.
2759 * The junk between had better contain "save"!
2760 */
2761 if ((! (left <= save)) || (! (save <= current))) {
2762 CFLog(0, CFSTR("CFString: CFStringGetRangeOfComposedCharactersAtIndex:%d returned invalid\n"), save);
2763 }
2764 return CFRangeMake(left, current - left);
2765 }
2766 #endif
2767
2768 /*!
2769 @function CFStringFindCharacterFromSet
2770 Query the range of characters contained in the specified character set.
2771 @param theString The CFString which is to be searched. If this
2772 parameter is not a valid CFString, the behavior is
2773 undefined.
2774 @param theSet The CFCharacterSet against which the membership
2775 of characters is checked. If this parameter is not a valid
2776 CFCharacterSet, the behavior is undefined.
2777 @param range The range of characters within the string to search. If
2778 the range location or end point (defined by the location
2779 plus length minus 1) are outside the index space of the
2780 string (0 to N-1 inclusive, where N is the length of the
2781 string), the behavior is undefined. If the range length is
2782 negative, the behavior is undefined. The range may be empty
2783 (length 0), in which case no search is performed.
2784 @param searchOptions The bitwise-or'ed option flags to control
2785 the search behavior. The supported options are
2786 kCFCompareBackwards andkCFCompareAnchored.
2787 If other option flags are specified, the behavior
2788 is undefined.
2789 @param result The pointer to a CFRange supplied by the caller in
2790 which the search result is stored. If a pointer to an invalid
2791 memory is specified, the behavior is undefined.
2792 @result true, if at least a character which is a member of the character
2793 set is found and result is filled, otherwise, false.
2794 */
2795 #define SURROGATE_START 0xD800
2796 #define SURROGATE_END 0xDFFF
2797
2798 CF_EXPORT Boolean CFStringFindCharacterFromSet(CFStringRef theString, CFCharacterSetRef theSet, CFRange rangeToSearch, CFOptionFlags searchOptions, CFRange *result) {
2799 CFStringInlineBuffer stringBuffer;
2800 UniChar ch;
2801 CFIndex step;
2802 CFIndex fromLoc, toLoc, cnt; // fromLoc and toLoc are inclusive
2803 Boolean found = false;
2804 Boolean done = false;
2805
2806 //#warning FIX ME !! Should support kCFCompareNonliteral
2807
2808 if ((rangeToSearch.location + rangeToSearch.length > CFStringGetLength(theString)) || (rangeToSearch.length == 0)) return false;
2809
2810 if (searchOptions & kCFCompareBackwards) {
2811 fromLoc = rangeToSearch.location + rangeToSearch.length - 1;
2812 toLoc = rangeToSearch.location;
2813 } else {
2814 fromLoc = rangeToSearch.location;
2815 toLoc = rangeToSearch.location + rangeToSearch.length - 1;
2816 }
2817 if (searchOptions & kCFCompareAnchored) {
2818 toLoc = fromLoc;
2819 }
2820
2821 step = (fromLoc <= toLoc) ? 1 : -1;
2822 cnt = fromLoc;
2823
2824 CFStringInitInlineBuffer(theString, &stringBuffer, rangeToSearch);
2825
2826 do {
2827 ch = CFStringGetCharacterFromInlineBuffer(&stringBuffer, cnt - rangeToSearch.location);
2828 if ((ch >= SURROGATE_START) && (ch <= SURROGATE_END)) {
2829 int otherCharIndex = cnt + step;
2830
2831 if (((step < 0) && (otherCharIndex < toLoc)) || ((step > 0) && (otherCharIndex > toLoc))) {
2832 done = true;
2833 } else {
2834 UniChar highChar;
2835 UniChar lowChar = CFStringGetCharacterFromInlineBuffer(&stringBuffer, otherCharIndex - rangeToSearch.location);
2836
2837 if (cnt < otherCharIndex) {
2838 highChar = ch;
2839 } else {
2840 highChar = lowChar;
2841 lowChar = ch;
2842 }
2843
2844 if (CFUniCharIsSurrogateHighCharacter(highChar) && CFUniCharIsSurrogateLowCharacter(lowChar) && CFCharacterSetIsLongCharacterMember(theSet, CFUniCharGetLongCharacterForSurrogatePair(highChar, lowChar))) {
2845 if (result) *result = CFRangeMake((cnt < otherCharIndex ? cnt : otherCharIndex), 2);
2846 return true;
2847 } else if (otherCharIndex == toLoc) {
2848 done = true;
2849 } else {
2850 cnt = otherCharIndex + step;
2851 }
2852 }
2853 } else if (CFCharacterSetIsCharacterMember(theSet, ch)) {
2854 done = found = true;
2855 } else if (cnt == toLoc) {
2856 done = true;
2857 } else {
2858 cnt += step;
2859 }
2860 } while (!done);
2861
2862 if (found && result) *result = CFRangeMake(cnt, 1);
2863 return found;
2864 }
2865
2866 /* Line range code */
2867
2868 #define CarriageReturn '\r' /* 0x0d */
2869 #define NewLine '\n' /* 0x0a */
2870 #define NextLine 0x0085
2871 #define LineSeparator 0x2028
2872 #define ParaSeparator 0x2029
2873
2874 CF_INLINE Boolean isALineSeparatorTypeCharacter(UniChar ch) {
2875 if (ch > CarriageReturn && ch < NextLine) return false; /* Quick test to cover most chars */
2876 return (ch == NewLine || ch == CarriageReturn || ch == NextLine || ch == LineSeparator || ch == ParaSeparator) ? true : false;
2877 }
2878
2879 void CFStringGetLineBounds(CFStringRef string, CFRange range, CFIndex *lineBeginIndex, CFIndex *lineEndIndex, CFIndex *contentsEndIndex) {
2880 CFIndex len;
2881 CFStringInlineBuffer buf;
2882 UniChar ch;
2883
2884 CF_OBJC_FUNCDISPATCH4(__kCFStringTypeID, void, string, "getLineStart:end:contentsEnd:forRange:", lineBeginIndex, lineEndIndex, contentsEndIndex, CFRangeMake(range.location, range.length));
2885
2886 __CFAssertIsString(string);
2887 __CFAssertRangeIsInStringBounds(string, range.location, range.length);
2888
2889 len = __CFStrLength(string);
2890
2891 if (lineBeginIndex) {
2892 CFIndex start;
2893 if (range.location == 0) {
2894 start = 0;
2895 } else {
2896 CFStringInitInlineBuffer(string, &buf, CFRangeMake(0, len));
2897 CFIndex buf_idx = range.location;
2898
2899 /* Take care of the special case where start happens to fall right between \r and \n */
2900 ch = CFStringGetCharacterFromInlineBuffer(&buf, buf_idx);
2901 buf_idx--;
2902 if ((ch == NewLine) && (CFStringGetCharacterFromInlineBuffer(&buf, buf_idx) == CarriageReturn)) {
2903 buf_idx--;
2904 }
2905 while (1) {
2906 if (buf_idx < 0) {
2907 start = 0;
2908 break;
2909 } else if (isALineSeparatorTypeCharacter(CFStringGetCharacterFromInlineBuffer(&buf, buf_idx))) {
2910 start = buf_idx + 1;
2911 break;
2912 } else {
2913 buf_idx--;
2914 }
2915 }
2916 }
2917 *lineBeginIndex = start;
2918 }
2919
2920 /* Now find the ending point */
2921 if (lineEndIndex || contentsEndIndex) {
2922 CFIndex endOfContents, lineSeparatorLength = 1; /* 1 by default */
2923 CFStringInitInlineBuffer(string, &buf, CFRangeMake(0, len));
2924 CFIndex buf_idx = range.location + range.length - (range.length ? 1 : 0);
2925 /* First look at the last char in the range (if the range is zero length, the char after the range) to see if we're already on or within a end of line sequence... */
2926 ch = __CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx);
2927 if (ch == NewLine) {
2928 endOfContents = buf_idx;
2929 buf_idx--;
2930 if (__CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx) == CarriageReturn) {
2931 lineSeparatorLength = 2;
2932 endOfContents--;
2933 }
2934 } else {
2935 while (1) {
2936 if (isALineSeparatorTypeCharacter(ch)) {
2937 endOfContents = buf_idx; /* This is actually end of contentsRange */
2938 buf_idx++; /* OK for this to go past the end */
2939 if ((ch == CarriageReturn) && (__CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx) == NewLine)) {
2940 lineSeparatorLength = 2;
2941 }
2942 break;
2943 } else if (buf_idx >= len) {
2944 endOfContents = len;
2945 lineSeparatorLength = 0;
2946 break;
2947 } else {
2948 buf_idx++;
2949 ch = __CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx);
2950 }
2951 }
2952 }
2953 if (contentsEndIndex) *contentsEndIndex = endOfContents;
2954 if (lineEndIndex) *lineEndIndex = endOfContents + lineSeparatorLength;
2955 }
2956 }
2957
2958
2959 CFStringRef CFStringCreateByCombiningStrings(CFAllocatorRef alloc, CFArrayRef array, CFStringRef separatorString) {
2960 CFIndex numChars;
2961 CFIndex separatorNumByte;
2962 CFIndex stringCount = CFArrayGetCount(array);
2963 Boolean isSepCFString = !CF_IS_OBJC(__kCFStringTypeID, separatorString);
2964 Boolean canBeEightbit = isSepCFString && __CFStrIsEightBit(separatorString);
2965 CFIndex idx;
2966 CFStringRef otherString;
2967 void *buffer;
2968 uint8_t *bufPtr;
2969 const void *separatorContents = NULL;
2970
2971 if (stringCount == 0) {
2972 return CFStringCreateWithCharacters(alloc, NULL, 0);
2973 } else if (stringCount == 1) {
2974 return CFStringCreateCopy(alloc, CFArrayGetValueAtIndex(array, 0));
2975 }
2976
2977 if (alloc == NULL) alloc = __CFGetDefaultAllocator();
2978
2979 numChars = CFStringGetLength(separatorString) * (stringCount - 1);
2980 for (idx = 0; idx < stringCount; idx++) {
2981 otherString = (CFStringRef)CFArrayGetValueAtIndex(array, idx);
2982 numChars += CFStringGetLength(otherString);
2983 // canBeEightbit is already false if the separator is an NSString...
2984 if (!CF_IS_OBJC(__kCFStringTypeID, otherString) && __CFStrIsUnicode(otherString)) canBeEightbit = false;
2985 }
2986
2987 bufPtr = buffer = CFAllocatorAllocate(alloc, canBeEightbit ? ((numChars + 1) * sizeof(uint8_t)) : (numChars * sizeof(UniChar)), 0);
2988 if (__CFOASafe) __CFSetLastAllocationEventName(buffer, "CFString (store)");
2989 separatorNumByte = CFStringGetLength(separatorString) * (canBeEightbit ? sizeof(uint8_t) : sizeof(UniChar));
2990
2991 for (idx = 0; idx < stringCount; idx++) {
2992 if (idx) { // add separator here unless first string
2993 if (separatorContents) {
2994 memmove(bufPtr, separatorContents, separatorNumByte);
2995 } else {
2996 if (!isSepCFString) { // NSString
2997 CFStringGetCharacters(separatorString, CFRangeMake(0, CFStringGetLength(separatorString)), (UniChar*)bufPtr);
2998 } else if (canBeEightbit || __CFStrIsUnicode(separatorString)) {
2999 memmove(bufPtr, (const uint8_t *)__CFStrContents(separatorString) + __CFStrSkipAnyLengthByte(separatorString), separatorNumByte);
3000 } else {
3001 __CFStrConvertBytesToUnicode((uint8_t*)__CFStrContents(separatorString) + __CFStrSkipAnyLengthByte(separatorString), (UniChar*)bufPtr, __CFStrLength(separatorString));
3002 }
3003 separatorContents = bufPtr;
3004 }
3005 bufPtr += separatorNumByte;
3006 }
3007
3008 otherString = (CFStringRef )CFArrayGetValueAtIndex(array, idx);
3009 if (CF_IS_OBJC(__kCFStringTypeID, otherString)) {
3010 CFIndex otherLength = CFStringGetLength(otherString);
3011 CFStringGetCharacters(otherString, CFRangeMake(0, otherLength), (UniChar*)bufPtr);
3012 bufPtr += otherLength * sizeof(UniChar);
3013 } else {
3014 const uint8_t* otherContents = __CFStrContents(otherString);
3015 CFIndex otherNumByte = __CFStrLength2(otherString, otherContents) * (canBeEightbit ? sizeof(uint8_t) : sizeof(UniChar));
3016
3017 if (canBeEightbit || __CFStrIsUnicode(otherString)) {
3018 memmove(bufPtr, otherContents + __CFStrSkipAnyLengthByte(otherString), otherNumByte);
3019 } else {
3020 __CFStrConvertBytesToUnicode(otherContents + __CFStrSkipAnyLengthByte(otherString), (UniChar*)bufPtr, __CFStrLength2(otherString, otherContents));
3021 }
3022 bufPtr += otherNumByte;
3023 }
3024 }
3025 if (canBeEightbit) *bufPtr = 0; // NULL byte;
3026
3027 return canBeEightbit ?
3028 CFStringCreateWithCStringNoCopy(alloc, buffer, __CFStringGetEightBitStringEncoding(), alloc) :
3029 CFStringCreateWithCharactersNoCopy(alloc, buffer, numChars, alloc);
3030 }
3031
3032
3033 CFArrayRef CFStringCreateArrayBySeparatingStrings(CFAllocatorRef alloc, CFStringRef string, CFStringRef separatorString) {
3034 CFArrayRef separatorRanges;
3035 CFIndex length = CFStringGetLength(string);
3036 /* No objc dispatch needed here since CFStringCreateArrayWithFindResults() works with both CFString and NSString */
3037 if (!(separatorRanges = CFStringCreateArrayWithFindResults(alloc, string, separatorString, CFRangeMake(0, length), 0))) {
3038 return CFArrayCreate(alloc, (const void**)&string, 1, & kCFTypeArrayCallBacks);
3039 } else {
3040 CFIndex idx;
3041 CFIndex count = CFArrayGetCount(separatorRanges);
3042 CFIndex startIndex = 0;
3043 CFIndex numChars;
3044 CFMutableArrayRef array = CFArrayCreateMutable(alloc, count + 2, & kCFTypeArrayCallBacks);
3045 const CFRange *currentRange;
3046 CFStringRef substring;
3047
3048 for (idx = 0;idx < count;idx++) {
3049 currentRange = CFArrayGetValueAtIndex(separatorRanges, idx);
3050 numChars = currentRange->location - startIndex;
3051 substring = CFStringCreateWithSubstring(alloc, string, CFRangeMake(startIndex, numChars));
3052 CFArrayAppendValue(array, substring);
3053 CFRelease(substring);
3054 startIndex = currentRange->location + currentRange->length;
3055 }
3056 substring = CFStringCreateWithSubstring(alloc, string, CFRangeMake(startIndex, length - startIndex));
3057 CFArrayAppendValue(array, substring);
3058 CFRelease(substring);
3059
3060 CFRelease(separatorRanges);
3061
3062 return array;
3063 }
3064 }
3065
3066 CFStringRef CFStringCreateFromExternalRepresentation(CFAllocatorRef alloc, CFDataRef data, CFStringEncoding encoding) {
3067 return CFStringCreateWithBytes(alloc, CFDataGetBytePtr(data), CFDataGetLength(data), encoding, true);
3068 }
3069
3070
3071 CFDataRef CFStringCreateExternalRepresentation(CFAllocatorRef alloc, CFStringRef string, CFStringEncoding encoding, uint8_t lossByte) {
3072 CFIndex length;
3073 CFIndex guessedByteLength;
3074 uint8_t *bytes;
3075 CFIndex usedLength;
3076 SInt32 result;
3077
3078 if (CF_IS_OBJC(__kCFStringTypeID, string)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
3079 length = CFStringGetLength(string);
3080 } else {
3081 __CFAssertIsString(string);
3082 length = __CFStrLength(string);
3083 if (__CFStrIsEightBit(string) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string
3084 return CFDataCreate(alloc, ((char *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string)), __CFStrLength(string));
3085 }
3086 }
3087
3088 if (alloc == NULL) alloc = __CFGetDefaultAllocator();
3089
3090 if (encoding == kCFStringEncodingUnicode) {
3091 guessedByteLength = (length + 1) * sizeof(UniChar);
3092 } else if (((guessedByteLength = CFStringGetMaximumSizeForEncoding(length, encoding)) > length) && !CF_IS_OBJC(__kCFStringTypeID, string)) { // Multi byte encoding
3093 #if defined(__MACH__) || defined(__LINUX__) || defined(__FREEBSD__)
3094 if (__CFStrIsUnicode(string)) {
3095 guessedByteLength = CFStringEncodingByteLengthForCharacters(encoding, kCFStringEncodingPrependBOM, __CFStrContents(string), __CFStrLength(string));
3096 } else {
3097 #endif
3098 result = __CFStringEncodeByteStream(string, 0, length, true, encoding, lossByte, NULL, 0x7FFFFFFF, &guessedByteLength);
3099 // if result == length, we always succeed
3100 // otherwise, if result == 0, we fail
3101 // otherwise, if there was a lossByte but still result != length, we fail
3102 if ((result != length) && (!result || !lossByte)) return NULL;
3103 if (guessedByteLength == length && __CFStrIsEightBit(string) && __CFStringEncodingIsSupersetOfASCII(encoding)) { // It's all ASCII !!
3104 return CFDataCreate(alloc, ((char *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string)), __CFStrLength(string));
3105 }
3106 #if defined(__MACH__) || defined(__LINUX__) || defined(__FREEBSD__)
3107 }
3108 #endif
3109 }
3110 bytes = CFAllocatorAllocate(alloc, guessedByteLength, 0);
3111 if (__CFOASafe) __CFSetLastAllocationEventName(bytes, "CFData (store)");
3112
3113 result = __CFStringEncodeByteStream(string, 0, length, true, encoding, lossByte, bytes, guessedByteLength, &usedLength);
3114
3115 if ((result != length) && (!result || !lossByte)) { // see comment above about what this means
3116 CFAllocatorDeallocate(alloc, bytes);
3117 return NULL;
3118 }
3119
3120 return CFDataCreateWithBytesNoCopy(alloc, (char const *)bytes, usedLength, alloc);
3121 }
3122
3123
3124 CFStringEncoding CFStringGetSmallestEncoding(CFStringRef str) {
3125 CFIndex len;
3126 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, CFStringEncoding, str, "_smallestEncodingInCFStringEncoding");
3127 __CFAssertIsString(str);
3128
3129 if (__CFStrIsEightBit(str)) return __CFStringGetEightBitStringEncoding();
3130 len = __CFStrLength(str);
3131 if (__CFStringEncodeByteStream(str, 0, len, false, __CFStringGetEightBitStringEncoding(), 0, NULL, 0x7fffffff, NULL) == len) return __CFStringGetEightBitStringEncoding();
3132 if ((__CFStringGetEightBitStringEncoding() != __CFStringGetSystemEncoding()) && (__CFStringEncodeByteStream(str, 0, len, false, __CFStringGetSystemEncoding(), 0, NULL, 0x7fffffff, NULL) == len)) return __CFStringGetSystemEncoding();
3133 return kCFStringEncodingUnicode; /* ??? */
3134 }
3135
3136
3137 CFStringEncoding CFStringGetFastestEncoding(CFStringRef str) {
3138 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, CFStringEncoding, str, "_fastestEncodingInCFStringEncoding");
3139 __CFAssertIsString(str);
3140 return __CFStrIsEightBit(str) ? __CFStringGetEightBitStringEncoding() : kCFStringEncodingUnicode; /* ??? */
3141 }
3142
3143
3144 SInt32 CFStringGetIntValue(CFStringRef str) {
3145 Boolean success;
3146 SInt32 result;
3147 SInt32 idx = 0;
3148 CFStringInlineBuffer buf;
3149 CFStringInitInlineBuffer(str, &buf, CFRangeMake(0, CFStringGetLength(str)));
3150 success = __CFStringScanInteger(&buf, NULL, &idx, false, &result);
3151 return success ? result : 0;
3152 }
3153
3154
3155 double CFStringGetDoubleValue(CFStringRef str) {
3156 Boolean success;
3157 double result;
3158 SInt32 idx = 0;
3159 CFStringInlineBuffer buf;
3160 CFStringInitInlineBuffer(str, &buf, CFRangeMake(0, CFStringGetLength(str)));
3161 success = __CFStringScanDouble(&buf, NULL, &idx, &result);
3162 return success ? result : 0.0;
3163 }
3164
3165
3166 /*** Mutable functions... ***/
3167
3168 void CFStringSetExternalCharactersNoCopy(CFMutableStringRef string, UniChar *chars, CFIndex length, CFIndex capacity) {
3169 __CFAssertIsNotNegative(length);
3170 __CFAssertIsStringAndExternalMutable(string);
3171 CFAssert4((length <= capacity) && ((capacity == 0) || ((capacity > 0) && chars)), __kCFLogAssertion, "%s(): Invalid args: characters %p length %d capacity %d", __PRETTY_FUNCTION__, chars, length, capacity);
3172 __CFStrSetContentPtr(string, chars);
3173 __CFStrSetExplicitLength(string, length);
3174 __CFStrSetCapacity(string, capacity * sizeof(UniChar));
3175 __CFStrSetCapacityProvidedExternally(string);
3176 }
3177
3178
3179
3180 void CFStringInsert(CFMutableStringRef str, CFIndex idx, CFStringRef insertedStr) {
3181 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID, void, str, "insertString:atIndex:", insertedStr, idx);
3182 __CFAssertIsStringAndMutable(str);
3183 CFAssert3(idx >= 0 && idx <= __CFStrLength(str), __kCFLogAssertion, "%s(): string index %d out of bounds (length %d)", __PRETTY_FUNCTION__, idx, __CFStrLength(str));
3184 __CFStringReplace(str, CFRangeMake(idx, 0), insertedStr);
3185 }
3186
3187
3188 void CFStringDelete(CFMutableStringRef str, CFRange range) {
3189 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, str, "deleteCharactersInRange:", range);
3190 __CFAssertIsStringAndMutable(str);
3191 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
3192 __CFStringChangeSize(str, range, 0, false);
3193 }
3194
3195
3196 void CFStringReplace(CFMutableStringRef str, CFRange range, CFStringRef replacement) {
3197 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID, void, str, "replaceCharactersInRange:withString:", range, replacement);
3198 __CFAssertIsStringAndMutable(str);
3199 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
3200 __CFStringReplace(str, range, replacement);
3201 }
3202
3203
3204 void CFStringReplaceAll(CFMutableStringRef str, CFStringRef replacement) {
3205 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, str, "setString:", replacement);
3206 __CFAssertIsStringAndMutable(str);
3207 __CFStringReplace(str, CFRangeMake(0, __CFStrLength(str)), replacement);
3208 }
3209
3210
3211 void CFStringAppend(CFMutableStringRef str, CFStringRef appended) {
3212 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, str, "appendString:", appended);
3213 __CFAssertIsStringAndMutable(str);
3214 __CFStringReplace(str, CFRangeMake(__CFStrLength(str), 0), appended);
3215 }
3216
3217
3218 void CFStringAppendCharacters(CFMutableStringRef str, const UniChar *chars, CFIndex appendedLength) {
3219 CFIndex strLength, idx;
3220
3221 __CFAssertIsNotNegative(appendedLength);
3222
3223 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID, void, str, "appendCharacters:length:", chars, appendedLength);
3224
3225 __CFAssertIsStringAndMutable(str);
3226
3227 strLength = __CFStrLength(str);
3228 if (__CFStringGetCompatibility(Bug2967272) || __CFStrIsUnicode(str)) {
3229 __CFStringChangeSize(str, CFRangeMake(strLength, 0), appendedLength, true);
3230 memmove((UniChar *)__CFStrContents(str) + strLength, chars, appendedLength * sizeof(UniChar));
3231 } else {
3232 uint8_t *contents;
3233 bool isASCII = true;
3234 for (idx = 0; isASCII && idx < appendedLength; idx++) isASCII = (chars[idx] < 0x80);
3235 __CFStringChangeSize(str, CFRangeMake(strLength, 0), appendedLength, !isASCII);
3236 if (!isASCII) {
3237 memmove((UniChar *)__CFStrContents(str) + strLength, chars, appendedLength * sizeof(UniChar));
3238 } else {
3239 contents = (uint8_t *)__CFStrContents(str) + strLength + __CFStrSkipAnyLengthByte(str);
3240 for (idx = 0; idx < appendedLength; idx++) contents[idx] = (uint8_t)chars[idx];
3241 }
3242 }
3243 }
3244
3245
3246 static void __CFStringAppendBytes(CFMutableStringRef str, const char *cStr, CFIndex appendedLength, CFStringEncoding encoding) {
3247 Boolean appendedIsUnicode = false;
3248 Boolean freeCStrWhenDone = false;
3249 Boolean demoteAppendedUnicode = false;
3250 CFVarWidthCharBuffer vBuf;
3251
3252 __CFAssertIsNotNegative(appendedLength);
3253
3254 if (encoding == kCFStringEncodingASCII || encoding == __CFStringGetEightBitStringEncoding()) {
3255 // appendedLength now denotes length in UniChars
3256 } else if (encoding == kCFStringEncodingUnicode) {
3257 UniChar *chars = (UniChar *)cStr;
3258 CFIndex idx, length = appendedLength / sizeof(UniChar);
3259 bool isASCII = true;
3260 for (idx = 0; isASCII && idx < length; idx++) isASCII = (chars[idx] < 0x80);
3261 if (!isASCII) {
3262 appendedIsUnicode = true;
3263 } else {
3264 demoteAppendedUnicode = true;
3265 }
3266 appendedLength = length;
3267 } else {
3268 Boolean usingPassedInMemory = false;
3269
3270 vBuf.allocator = __CFGetDefaultAllocator(); // We don't want to use client's allocator for temp stuff
3271 vBuf.chars.unicode = NULL; // This will cause the decode function to allocate memory if necessary
3272
3273 if (!__CFStringDecodeByteStream3(cStr, appendedLength, encoding, __CFStrIsUnicode(str), &vBuf, &usingPassedInMemory, 0)) {
3274 CFAssert1(0, __kCFLogAssertion, "Supplied bytes could not be converted specified encoding %d", encoding);
3275 return;
3276 }
3277
3278 // If not ASCII, appendedLength now denotes length in UniChars
3279 appendedLength = vBuf.numChars;
3280 appendedIsUnicode = !vBuf.isASCII;
3281 cStr = vBuf.chars.ascii;
3282 freeCStrWhenDone = !usingPassedInMemory && vBuf.shouldFreeChars;
3283 }
3284
3285 if (CF_IS_OBJC(__kCFStringTypeID, str)) {
3286 if (!appendedIsUnicode && !demoteAppendedUnicode) {
3287 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID, void, str, "_cfAppendCString:length:", cStr, appendedLength);
3288 } else {
3289 CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID, void, str, "appendCharacters:length:", cStr, appendedLength);
3290 }
3291 } else {
3292 CFIndex strLength;
3293 __CFAssertIsStringAndMutable(str);
3294 strLength = __CFStrLength(str);
3295
3296 __CFStringChangeSize(str, CFRangeMake(strLength, 0), appendedLength, appendedIsUnicode || __CFStrIsUnicode(str));
3297
3298 if (__CFStrIsUnicode(str)) {
3299 UniChar *contents = (UniChar *)__CFStrContents(str);
3300 if (appendedIsUnicode) {
3301 memmove(contents + strLength, cStr, appendedLength * sizeof(UniChar));
3302 } else {
3303 __CFStrConvertBytesToUnicode(cStr, contents + strLength, appendedLength);
3304 }
3305 } else {
3306 if (demoteAppendedUnicode) {
3307 UniChar *chars = (UniChar *)cStr;
3308 CFIndex idx;
3309 uint8_t *contents = (uint8_t *)__CFStrContents(str) + strLength + __CFStrSkipAnyLengthByte(str);
3310 for (idx = 0; idx < appendedLength; idx++) contents[idx] = (uint8_t)chars[idx];
3311 } else {
3312 uint8_t *contents = (uint8_t *)__CFStrContents(str);
3313 memmove(contents + strLength + __CFStrSkipAnyLengthByte(str), cStr, appendedLength);
3314 }
3315 }
3316 }
3317
3318 if (freeCStrWhenDone) CFAllocatorDeallocate(__CFGetDefaultAllocator(), (void *)cStr);
3319 }
3320
3321 void CFStringAppendPascalString(CFMutableStringRef str, ConstStringPtr pStr, CFStringEncoding encoding) {
3322 __CFStringAppendBytes(str, pStr + 1, (CFIndex)*pStr, encoding);
3323 }
3324
3325 void CFStringAppendCString(CFMutableStringRef str, const char *cStr, CFStringEncoding encoding) {
3326 __CFStringAppendBytes(str, cStr, strlen(cStr), encoding);
3327 }
3328
3329
3330 void CFStringAppendFormat(CFMutableStringRef str, CFDictionaryRef formatOptions, CFStringRef format, ...) {
3331 va_list argList;
3332
3333 va_start(argList, format);
3334 CFStringAppendFormatAndArguments(str, formatOptions, format, argList);
3335 va_end(argList);
3336 }
3337
3338
3339 CFIndex CFStringFindAndReplace(CFMutableStringRef string, CFStringRef stringToFind, CFStringRef replacementString, CFRange rangeToSearch, CFOptionFlags compareOptions) {
3340 CFRange foundRange;
3341 Boolean backwards = compareOptions & kCFCompareBackwards;
3342 UInt32 endIndex = rangeToSearch.location + rangeToSearch.length;
3343 #define MAX_RANGES_ON_STACK (1000 / sizeof(CFRange))
3344 CFRange rangeBuffer[MAX_RANGES_ON_STACK]; // Used to avoid allocating memory
3345 CFRange *ranges = rangeBuffer;
3346 CFIndex foundCount = 0;
3347 CFIndex capacity = MAX_RANGES_ON_STACK;
3348
3349 __CFAssertIsStringAndMutable(string);
3350 __CFAssertRangeIsInStringBounds(string, rangeToSearch.location, rangeToSearch.length);
3351
3352 // Note: This code is very similar to the one in CFStringCreateArrayWithFindResults().
3353 while ((rangeToSearch.length > 0) && CFStringFindWithOptions(string, stringToFind, rangeToSearch, compareOptions, &foundRange)) {
3354 // Determine the next range
3355 if (backwards) {
3356 rangeToSearch.length = foundRange.location - rangeToSearch.location;
3357 } else {
3358 rangeToSearch.location = foundRange.location + foundRange.length;
3359 rangeToSearch.length = endIndex - rangeToSearch.location;
3360 }
3361
3362 // If necessary, grow the array
3363 if (foundCount >= capacity) {
3364 bool firstAlloc = (ranges == rangeBuffer) ? true : false;
3365 capacity = (capacity + 4) * 2;
3366 // Note that reallocate with NULL previous pointer is same as allocate
3367 ranges = CFAllocatorReallocate(NULL, firstAlloc ? NULL : ranges, capacity * sizeof(CFRange), 0);
3368 if (firstAlloc) memmove(ranges, rangeBuffer, MAX_RANGES_ON_STACK * sizeof(CFRange));
3369 }
3370 ranges[foundCount] = foundRange;
3371 foundCount++;
3372 }
3373
3374 if (foundCount > 0) {
3375 if (backwards) { // Reorder the ranges to be incrementing (better to do this here, then to check other places)
3376 int head = 0;
3377 int tail = foundCount - 1;
3378 while (head < tail) {
3379 CFRange temp = ranges[head];
3380 ranges[head] = ranges[tail];
3381 ranges[tail] = temp;
3382 head++;
3383 tail--;
3384 }
3385 }
3386 __CFStringReplaceMultiple(string, ranges, foundCount, replacementString);
3387 if (ranges != rangeBuffer) CFAllocatorDeallocate(NULL, ranges);
3388 }
3389
3390 return foundCount;
3391 }
3392
3393
3394 // This function is here for NSString purposes
3395 // It allows checking for mutability before mutating; this allows NSString to catch invalid mutations
3396
3397 int __CFStringCheckAndReplace(CFMutableStringRef str, CFRange range, CFStringRef replacement) {
3398 if (!__CFStrIsMutable(str)) return _CFStringErrNotMutable; // These three ifs are always here, for NSString usage
3399 if (!replacement && __CFStringNoteErrors()) return _CFStringErrNilArg;
3400 // We use unsigneds as that is what NSRanges do
3401 if ((unsigned)range.location + (unsigned)range.length > (unsigned)__CFStrLength(str) && __CFStringNoteErrors()) return _CFStringErrBounds;
3402 __CFAssertIsStringAndMutable(str);
3403 __CFAssertRangeIsInStringBounds(str, range.location, range.length);
3404 __CFStringReplace(str, range, replacement);
3405 return _CFStringErrNone;
3406 }
3407
3408 // This function determines whether errors which would cause string exceptions should
3409 // be ignored or not
3410
3411 Boolean __CFStringNoteErrors(void) {
3412 return _CFExecutableLinkedOnOrAfter(CFSystemVersionJaguar) ? true : false;
3413 }
3414
3415
3416
3417 void CFStringPad(CFMutableStringRef string, CFStringRef padString, CFIndex length, CFIndex indexIntoPad) {
3418 CFIndex originalLength;
3419
3420 __CFAssertIsNotNegative(length);
3421 __CFAssertIsNotNegative(indexIntoPad);
3422
3423 CF_OBJC_FUNCDISPATCH3(__kCFStringTypeID, void, string, "_cfPad:length:padIndex:", padString, length, indexIntoPad);
3424
3425 __CFAssertIsStringAndMutable(string);
3426
3427 originalLength = __CFStrLength(string);
3428 if (length < originalLength) {
3429 __CFStringChangeSize(string, CFRangeMake(length, originalLength - length), 0, false);
3430 } else if (originalLength < length) {
3431 uint8_t *contents;
3432 Boolean isUnicode;
3433 CFIndex charSize;
3434 CFIndex padStringLength;
3435 CFIndex padLength;
3436 CFIndex padRemaining = length - originalLength;
3437
3438 if (CF_IS_OBJC(__kCFStringTypeID, padString)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */
3439 padStringLength = CFStringGetLength(padString);
3440 isUnicode = true; /* !!! Bad for now */
3441 } else {
3442 __CFAssertIsString(padString);
3443 padStringLength = __CFStrLength(padString);
3444 isUnicode = __CFStrIsUnicode(string) || __CFStrIsUnicode(padString);
3445 }
3446
3447 charSize = isUnicode ? sizeof(UniChar) : sizeof(uint8_t);
3448
3449 __CFStringChangeSize(string, CFRangeMake(originalLength, 0), padRemaining, isUnicode);
3450
3451 contents = (uint8_t*)__CFStrContents(string) + charSize * originalLength + __CFStrSkipAnyLengthByte(string);
3452 padLength = padStringLength - indexIntoPad;
3453 padLength = padRemaining < padLength ? padRemaining : padLength;
3454
3455 while (padRemaining > 0) {
3456 if (isUnicode) {
3457 CFStringGetCharacters(padString, CFRangeMake(indexIntoPad, padLength), (UniChar*)contents);
3458 } else {
3459 CFStringGetBytes(padString, CFRangeMake(indexIntoPad, padLength), __CFStringGetEightBitStringEncoding(), 0, false, contents, padRemaining * charSize, NULL);
3460 }
3461 contents += padLength * charSize;
3462 padRemaining -= padLength;
3463 indexIntoPad = 0;
3464 padLength = padRemaining < padLength ? padRemaining : padStringLength;
3465 }
3466 }
3467 }
3468
3469 void CFStringTrim(CFMutableStringRef string, CFStringRef trimString) {
3470 CFRange range;
3471 CFIndex newStartIndex;
3472 CFIndex length;
3473
3474 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, string, "_cfTrim:", trimString);
3475
3476 __CFAssertIsStringAndMutable(string);
3477 __CFAssertIsString(trimString);
3478
3479 newStartIndex = 0;
3480 length = __CFStrLength(string);
3481
3482 while (CFStringFindWithOptions(string, trimString, CFRangeMake(newStartIndex, length - newStartIndex), kCFCompareAnchored, &range)) {
3483 newStartIndex = range.location + range.length;
3484 }
3485
3486 if (newStartIndex < length) {
3487 CFIndex charSize = __CFStrIsUnicode(string) ? sizeof(UniChar) : sizeof(uint8_t);
3488 uint8_t *contents = (uint8_t*)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
3489
3490 length -= newStartIndex;
3491 if (__CFStrLength(trimString) < length) {
3492 while (CFStringFindWithOptions(string, trimString, CFRangeMake(newStartIndex, length), kCFCompareAnchored|kCFCompareBackwards, &range)) {
3493 length = range.location - newStartIndex;
3494 }
3495 }
3496 memmove(contents, contents + newStartIndex * charSize, length * charSize);
3497 __CFStringChangeSize(string, CFRangeMake(length, __CFStrLength(string) - length), 0, false);
3498 } else { // Only trimString in string, trim all
3499 __CFStringChangeSize(string, CFRangeMake(0, length), 0, false);
3500 }
3501 }
3502
3503 void CFStringTrimWhitespace(CFMutableStringRef string) {
3504 CFIndex newStartIndex;
3505 CFIndex length;
3506 CFStringInlineBuffer buffer;
3507
3508 CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, void, string, "_cfTrimWS");
3509
3510 __CFAssertIsStringAndMutable(string);
3511
3512 newStartIndex = 0;
3513 length = __CFStrLength(string);
3514
3515 CFStringInitInlineBuffer(string, &buffer, CFRangeMake(0, length));
3516 CFIndex buffer_idx = 0;
3517
3518 while (buffer_idx < length && CFUniCharIsMemberOf(__CFStringGetCharacterFromInlineBufferQuick(&buffer, buffer_idx), kCFUniCharWhitespaceAndNewlineCharacterSet))
3519 buffer_idx++;
3520 newStartIndex = buffer_idx;
3521
3522 if (newStartIndex < length) {
3523 uint8_t *contents = (uint8_t*)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
3524 CFIndex charSize = (__CFStrIsUnicode(string) ? sizeof(UniChar) : sizeof(uint8_t));
3525
3526 buffer_idx = length - 1;
3527 while (0 <= buffer_idx && CFUniCharIsMemberOf(__CFStringGetCharacterFromInlineBufferQuick(&buffer, buffer_idx), kCFUniCharWhitespaceAndNewlineCharacterSet))
3528 buffer_idx--;
3529 length = buffer_idx - newStartIndex + 1;
3530
3531 memmove(contents, contents + newStartIndex * charSize, length * charSize);
3532 __CFStringChangeSize(string, CFRangeMake(length, __CFStrLength(string) - length), 0, false);
3533 } else { // Whitespace only string
3534 __CFStringChangeSize(string, CFRangeMake(0, length), 0, false);
3535 }
3536 }
3537
3538 void CFStringLowercase(CFMutableStringRef string, CFLocaleRef locale) {
3539 CFIndex currentIndex = 0;
3540 CFIndex length;
3541 const char *langCode;
3542 Boolean isEightBit = __CFStrIsEightBit(string);
3543
3544 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, string, "_cfLowercase:", locale);
3545
3546 __CFAssertIsStringAndMutable(string);
3547
3548 length = __CFStrLength(string);
3549
3550 langCode = NULL;
3551
3552 if (!langCode && isEightBit) {
3553 uint8_t *contents = (uint8_t*)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
3554 for (;currentIndex < length;currentIndex++) {
3555 if (contents[currentIndex] >= 'A' && contents[currentIndex] <= 'Z') {
3556 contents[currentIndex] += 'a' - 'A';
3557 } else if (contents[currentIndex] > 127) {
3558 break;
3559 }
3560 }
3561 }
3562
3563 if (currentIndex < length) {
3564 UniChar *contents;
3565 UniChar mappedCharacters[MAX_CASE_MAPPING_BUF];
3566 CFIndex mappedLength;
3567 UTF32Char currentChar;
3568 UInt32 flags = 0;
3569
3570 if (isEightBit) __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true);
3571
3572 contents = (UniChar*)__CFStrContents(string);
3573
3574 for (;currentIndex < length;currentIndex++) {
3575
3576 if (CFUniCharIsSurrogateHighCharacter(contents[currentIndex]) && (currentIndex + 1 < length) && CFUniCharIsSurrogateLowCharacter(contents[currentIndex + 1])) {
3577 currentChar = CFUniCharGetLongCharacterForSurrogatePair(contents[currentIndex], contents[currentIndex + 1]);
3578 } else {
3579 currentChar = contents[currentIndex];
3580 }
3581 flags = ((langCode || (currentChar == 0x03A3)) ? CFUniCharGetConditionalCaseMappingFlags(currentChar, contents, currentIndex, length, kCFUniCharToLowercase, langCode, flags) : 0);
3582
3583 mappedLength = CFUniCharMapCaseTo(currentChar, mappedCharacters, MAX_CASE_MAPPING_BUF, kCFUniCharToLowercase, flags, langCode);
3584 if (mappedLength > 0) contents[currentIndex] = *mappedCharacters;
3585
3586 if (currentChar > 0xFFFF) { // Non-BMP char
3587 switch (mappedLength) {
3588 case 0:
3589 __CFStringChangeSize(string, CFRangeMake(currentIndex, 2), 0, true);
3590 contents = (UniChar*)__CFStrContents(string);
3591 length -= 2;
3592 break;
3593
3594 case 1:
3595 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 1), 0, true);
3596 contents = (UniChar*)__CFStrContents(string);
3597 --length;
3598 break;
3599
3600 case 2:
3601 contents[++currentIndex] = mappedCharacters[1];
3602 break;
3603
3604 default:
3605 --mappedLength; // Skip the current char
3606 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength - 1, true);
3607 contents = (UniChar*)__CFStrContents(string);
3608 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
3609 length += (mappedLength - 1);
3610 currentIndex += mappedLength;
3611 break;
3612 }
3613 } else if (mappedLength == 0) {
3614 __CFStringChangeSize(string, CFRangeMake(currentIndex, 1), 0, true);
3615 contents = (UniChar*)__CFStrContents(string);
3616 --length;
3617 } else if (mappedLength > 1) {
3618 --mappedLength; // Skip the current char
3619 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength, true);
3620 contents = (UniChar*)__CFStrContents(string);
3621 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
3622 length += mappedLength;
3623 currentIndex += mappedLength;
3624 }
3625 }
3626 }
3627 }
3628
3629 void CFStringUppercase(CFMutableStringRef string, CFLocaleRef locale) {
3630 CFIndex currentIndex = 0;
3631 CFIndex length;
3632 const char *langCode;
3633 Boolean isEightBit = __CFStrIsEightBit(string);
3634
3635 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, string, "_cfUppercase:", locale);
3636
3637 __CFAssertIsStringAndMutable(string);
3638
3639 length = __CFStrLength(string);
3640
3641 langCode = NULL;
3642
3643 if (!langCode && isEightBit) {
3644 uint8_t *contents = (uint8_t*)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
3645 for (;currentIndex < length;currentIndex++) {
3646 if (contents[currentIndex] >= 'a' && contents[currentIndex] <= 'z') {
3647 contents[currentIndex] -= 'a' - 'A';
3648 } else if (contents[currentIndex] > 127) {
3649 break;
3650 }
3651 }
3652 }
3653
3654 if (currentIndex < length) {
3655 UniChar *contents;
3656 UniChar mappedCharacters[MAX_CASE_MAPPING_BUF];
3657 CFIndex mappedLength;
3658 UTF32Char currentChar;
3659 UInt32 flags = 0;
3660
3661 if (isEightBit) __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true);
3662
3663 contents = (UniChar*)__CFStrContents(string);
3664
3665 for (;currentIndex < length;currentIndex++) {
3666 if (CFUniCharIsSurrogateHighCharacter(contents[currentIndex]) && (currentIndex + 1 < length) && CFUniCharIsSurrogateLowCharacter(contents[currentIndex + 1])) {
3667 currentChar = CFUniCharGetLongCharacterForSurrogatePair(contents[currentIndex], contents[currentIndex + 1]);
3668 } else {
3669 currentChar = contents[currentIndex];
3670 }
3671
3672 flags = (langCode ? CFUniCharGetConditionalCaseMappingFlags(currentChar, contents, currentIndex, length, kCFUniCharToUppercase, langCode, flags) : 0);
3673
3674 mappedLength = CFUniCharMapCaseTo(currentChar, mappedCharacters, MAX_CASE_MAPPING_BUF, kCFUniCharToUppercase, flags, langCode);
3675 if (mappedLength > 0) contents[currentIndex] = *mappedCharacters;
3676
3677 if (currentChar > 0xFFFF) { // Non-BMP char
3678 switch (mappedLength) {
3679 case 0:
3680 __CFStringChangeSize(string, CFRangeMake(currentIndex, 2), 0, true);
3681 contents = (UniChar*)__CFStrContents(string);
3682 length -= 2;
3683 break;
3684
3685 case 1:
3686 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 1), 0, true);
3687 contents = (UniChar*)__CFStrContents(string);
3688 --length;
3689 break;
3690
3691 case 2:
3692 contents[++currentIndex] = mappedCharacters[1];
3693 break;
3694
3695 default:
3696 --mappedLength; // Skip the current char
3697 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength - 1, true);
3698 contents = (UniChar*)__CFStrContents(string);
3699 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
3700 length += (mappedLength - 1);
3701 currentIndex += mappedLength;
3702 break;
3703 }
3704 } else if (mappedLength == 0) {
3705 __CFStringChangeSize(string, CFRangeMake(currentIndex, 1), 0, true);
3706 contents = (UniChar*)__CFStrContents(string);
3707 --length;
3708 } else if (mappedLength > 1) {
3709 --mappedLength; // Skip the current char
3710 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength, true);
3711 contents = (UniChar*)__CFStrContents(string);
3712 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
3713 length += mappedLength;
3714 currentIndex += mappedLength;
3715 }
3716 }
3717 }
3718 }
3719
3720
3721 void CFStringCapitalize(CFMutableStringRef string, CFLocaleRef locale) {
3722 CFIndex currentIndex = 0;
3723 CFIndex length;
3724 const char *langCode;
3725 Boolean isEightBit = __CFStrIsEightBit(string);
3726 Boolean isLastCased = false;
3727 static const uint8_t *caseIgnorableForBMP = NULL;
3728
3729 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, string, "_cfCapitalize:", locale);
3730
3731 __CFAssertIsStringAndMutable(string);
3732
3733 length = __CFStrLength(string);
3734
3735 if (NULL == caseIgnorableForBMP) caseIgnorableForBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharCaseIgnorableCharacterSet, 0);
3736
3737 langCode = NULL;
3738
3739 if (!langCode && isEightBit) {
3740 uint8_t *contents = (uint8_t*)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
3741 for (;currentIndex < length;currentIndex++) {
3742 if (contents[currentIndex] > 127) {
3743 break;
3744 } else if (contents[currentIndex] >= 'A' && contents[currentIndex] <= 'Z') {
3745 contents[currentIndex] += (isLastCased ? 'a' - 'A' : 0);
3746 isLastCased = true;
3747 } else if (contents[currentIndex] >= 'a' && contents[currentIndex] <= 'z') {
3748 contents[currentIndex] -= (!isLastCased ? 'a' - 'A' : 0);
3749 isLastCased = true;
3750 } else if (!CFUniCharIsMemberOfBitmap(contents[currentIndex], caseIgnorableForBMP)) {
3751 isLastCased = false;
3752 }
3753 }
3754 }
3755
3756 if (currentIndex < length) {
3757 UniChar *contents;
3758 UniChar mappedCharacters[MAX_CASE_MAPPING_BUF];
3759 CFIndex mappedLength;
3760 UTF32Char currentChar;
3761 UInt32 flags = 0;
3762
3763 if (isEightBit) __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true);
3764
3765 contents = (UniChar*)__CFStrContents(string);
3766
3767 for (;currentIndex < length;currentIndex++) {
3768 if (CFUniCharIsSurrogateHighCharacter(contents[currentIndex]) && (currentIndex + 1 < length) && CFUniCharIsSurrogateLowCharacter(contents[currentIndex + 1])) {
3769 currentChar = CFUniCharGetLongCharacterForSurrogatePair(contents[currentIndex], contents[currentIndex + 1]);
3770 } else {
3771 currentChar = contents[currentIndex];
3772 }
3773 flags = ((langCode || ((currentChar == 0x03A3) && isLastCased)) ? CFUniCharGetConditionalCaseMappingFlags(currentChar, contents, currentIndex, length, (isLastCased ? kCFUniCharToLowercase : kCFUniCharToTitlecase), langCode, flags) : 0);
3774
3775 mappedLength = CFUniCharMapCaseTo(currentChar, mappedCharacters, MAX_CASE_MAPPING_BUF, (isLastCased ? kCFUniCharToLowercase : kCFUniCharToTitlecase), flags, langCode);
3776 if (mappedLength > 0) contents[currentIndex] = *mappedCharacters;
3777
3778 if (currentChar > 0xFFFF) { // Non-BMP char
3779 switch (mappedLength) {
3780 case 0:
3781 __CFStringChangeSize(string, CFRangeMake(currentIndex, 2), 0, true);
3782 contents = (UniChar*)__CFStrContents(string);
3783 length -= 2;
3784 break;
3785
3786 case 1:
3787 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 1), 0, true);
3788 contents = (UniChar*)__CFStrContents(string);
3789 --length;
3790 break;
3791
3792 case 2:
3793 contents[++currentIndex] = mappedCharacters[1];
3794 break;
3795
3796 default:
3797 --mappedLength; // Skip the current char
3798 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength - 1, true);
3799 contents = (UniChar*)__CFStrContents(string);
3800 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
3801 length += (mappedLength - 1);
3802 currentIndex += mappedLength;
3803 break;
3804 }
3805 } else if (mappedLength == 0) {
3806 __CFStringChangeSize(string, CFRangeMake(currentIndex, 1), 0, true);
3807 contents = (UniChar*)__CFStrContents(string);
3808 --length;
3809 } else if (mappedLength > 1) {
3810 --mappedLength; // Skip the current char
3811 __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength, true);
3812 contents = (UniChar*)__CFStrContents(string);
3813 memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar));
3814 length += mappedLength;
3815 currentIndex += mappedLength;
3816 }
3817
3818 if (!((currentChar > 0xFFFF) ? CFUniCharIsMemberOf(currentChar, kCFUniCharCaseIgnorableCharacterSet) : CFUniCharIsMemberOfBitmap(currentChar, caseIgnorableForBMP))) { // We have non-caseignorable here
3819 isLastCased = ((CFUniCharIsMemberOf(currentChar, kCFUniCharUppercaseLetterCharacterSet) || CFUniCharIsMemberOf(currentChar, kCFUniCharLowercaseLetterCharacterSet)) ? true : false);
3820 }
3821 }
3822 }
3823 }
3824
3825 #define MAX_DECOMP_BUF 64
3826
3827 #define HANGUL_SBASE 0xAC00
3828 #define HANGUL_LBASE 0x1100
3829 #define HANGUL_VBASE 0x1161
3830 #define HANGUL_TBASE 0x11A7
3831 #define HANGUL_SCOUNT 11172
3832 #define HANGUL_LCOUNT 19
3833 #define HANGUL_VCOUNT 21
3834 #define HANGUL_TCOUNT 28
3835 #define HANGUL_NCOUNT (HANGUL_VCOUNT * HANGUL_TCOUNT)
3836
3837 CF_INLINE uint32_t __CFGetUTF16Length(const UTF32Char *characters, uint32_t utf32Length) {
3838 const UTF32Char *limit = characters + utf32Length;
3839 uint32_t length = 0;
3840
3841 while (characters < limit) length += (*(characters++) > 0xFFFF ? 2 : 1);
3842
3843 return length;
3844 }
3845
3846 CF_INLINE void __CFFillInUTF16(const UTF32Char *characters, UTF16Char *dst, uint32_t utf32Length) {
3847 const UTF32Char *limit = characters + utf32Length;
3848 UTF32Char currentChar;
3849
3850 while (characters < limit) {
3851 currentChar = *(characters++);
3852 if (currentChar > 0xFFFF) {
3853 currentChar -= 0x10000;
3854 *(dst++) = (UTF16Char)((currentChar >> 10) + 0xD800UL);
3855 *(dst++) = (UTF16Char)((currentChar & 0x3FF) + 0xDC00UL);
3856 } else {
3857 *(dst++) = currentChar;
3858 }
3859 }
3860 }
3861
3862 void CFStringNormalize(CFMutableStringRef string, CFStringNormalizationForm theForm) {
3863 CFIndex currentIndex = 0;
3864 CFIndex length;
3865 bool needToReorder = true;
3866
3867 CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, string, "_cfNormalize:", theForm);
3868
3869 __CFAssertIsStringAndMutable(string);
3870
3871 length = __CFStrLength(string);
3872
3873 if (__CFStrIsEightBit(string)) {
3874 uint8_t *contents;
3875
3876 if (theForm == kCFStringNormalizationFormC) return; // 8bit form has no decomposition
3877
3878 contents = (uint8_t*)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
3879
3880 for (;currentIndex < length;currentIndex++) {
3881 if (contents[currentIndex] > 127) {
3882 __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true); // need to do harm way
3883 needToReorder = false;
3884 break;
3885 }
3886 }
3887 }
3888
3889 if (currentIndex < length) {
3890 UTF16Char *limit = (UTF16Char *)__CFStrContents(string) + length;
3891 UTF16Char *contents = (UTF16Char *)__CFStrContents(string) + currentIndex;
3892 UTF32Char buffer[MAX_DECOMP_BUF];
3893 UTF32Char *mappedCharacters = buffer;
3894 CFIndex allocatedLength = MAX_DECOMP_BUF;
3895 CFIndex mappedLength;
3896 CFIndex currentLength;
3897 UTF32Char currentChar;
3898
3899 while (contents < limit) {
3900 if (CFUniCharIsSurrogateHighCharacter(*contents) && (contents + 1 < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) {
3901 currentChar = CFUniCharGetLongCharacterForSurrogatePair(*contents, *(contents + 1));
3902 currentLength = 2;
3903 contents += 2;
3904 } else {
3905 currentChar = *(contents++);
3906 currentLength = 1;
3907 }
3908
3909 mappedLength = 0;
3910
3911 if (CFUniCharIsMemberOf(currentChar, kCFUniCharCanonicalDecomposableCharacterSet) && !CFUniCharIsMemberOf(currentChar, kCFUniCharNonBaseCharacterSet)) {
3912 if ((theForm & kCFStringNormalizationFormC) == 0 || currentChar < HANGUL_SBASE || currentChar > (HANGUL_SBASE + HANGUL_SCOUNT)) { // We don't have to decompose Hangul Syllables if we're precomposing again
3913 mappedLength = CFUniCharDecomposeCharacter(currentChar, mappedCharacters, MAX_DECOMP_BUF);
3914 }
3915 }
3916
3917 if ((needToReorder || (theForm & kCFStringNormalizationFormC)) && ((contents < limit) || (mappedLength == 0))) {
3918 if (mappedLength > 0) {
3919 if (CFUniCharIsSurrogateHighCharacter(*contents) && (contents + 1 < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) {
3920 currentChar = CFUniCharGetLongCharacterForSurrogatePair(*contents, *(contents + 1));
3921 } else {
3922 currentChar = *contents;
3923 }
3924 }
3925
3926 if (CFUniCharIsMemberOf(currentChar, kCFUniCharNonBaseCharacterSet)) {
3927 uint32_t decompLength;
3928
3929 if (mappedLength == 0) {
3930 contents -= (currentChar & 0xFFFF0000 ? 2 : 1);
3931 if (currentIndex > 0) {
3932 if (CFUniCharIsSurrogateLowCharacter(*(contents - 1)) && (currentIndex > 1) && CFUniCharIsSurrogateHighCharacter(*(contents - 2))) {
3933 *mappedCharacters = CFUniCharGetLongCharacterForSurrogatePair(*(contents - 2), *(contents - 1));
3934 currentIndex -= 2;
3935 currentLength += 2;
3936 } else {
3937 *mappedCharacters = *(contents - 1);
3938 --currentIndex;
3939 ++currentLength;
3940 }
3941 mappedLength = 1;
3942 }
3943 } else {
3944 currentLength += (currentChar & 0xFFFF0000 ? 2 : 1);
3945 }
3946 contents += (currentChar & 0xFFFF0000 ? 2 : 1);
3947
3948 if (CFUniCharIsMemberOf(currentChar, kCFUniCharDecomposableCharacterSet)) { // Vietnamese accent, etc.
3949 decompLength = CFUniCharDecomposeCharacter(currentChar, mappedCharacters + mappedLength, MAX_DECOMP_BUF - mappedLength);
3950 mappedLength += decompLength;
3951 } else {
3952 mappedCharacters[mappedLength++] = currentChar;
3953 }
3954
3955 while (contents < limit) {
3956 if (CFUniCharIsSurrogateHighCharacter(*contents) && (contents + 1 < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) {
3957 currentChar = CFUniCharGetLongCharacterForSurrogatePair(*contents, *(contents + 1));
3958 } else {
3959 currentChar = *contents;
3960 }
3961 if (!CFUniCharIsMemberOf(currentChar, kCFUniCharNonBaseCharacterSet)) break;
3962 if (currentChar & 0xFFFF0000) {
3963 contents += 2;
3964 currentLength += 2;
3965 } else {
3966 ++contents;
3967 ++currentLength;
3968 }
3969 if (mappedLength == allocatedLength) {
3970 allocatedLength += MAX_DECOMP_BUF;
3971 if (mappedCharacters == buffer) {
3972 mappedCharacters = (UTF32Char *)CFAllocatorAllocate(NULL, allocatedLength * sizeof(UTF32Char), 0);
3973 memmove(mappedCharacters, buffer, MAX_DECOMP_BUF * sizeof(UTF32Char));
3974 } else {
3975 mappedCharacters = (UTF32Char *)CFAllocatorReallocate(NULL, mappedCharacters, allocatedLength * sizeof(UTF32Char), 0);
3976 }
3977 }
3978 if (CFUniCharIsMemberOf(currentChar, kCFUniCharDecomposableCharacterSet)) { // Vietnamese accent, etc.
3979 decompLength = CFUniCharDecomposeCharacter(currentChar, mappedCharacters + mappedLength, MAX_DECOMP_BUF - mappedLength);
3980 mappedLength += decompLength;
3981 } else {
3982 mappedCharacters[mappedLength++] = currentChar;
3983 }
3984 }
3985 }
3986 if (needToReorder && mappedLength > 1) CFUniCharPrioritySort(mappedCharacters, mappedLength);
3987 }
3988
3989 if (theForm & kCFStringNormalizationFormKD) {
3990 CFIndex newLength = 0;
3991
3992 if (mappedLength == 0 && CFUniCharIsMemberOf(currentChar, kCFUniCharCompatibilityDecomposableCharacterSet)) {
3993 mappedCharacters[mappedLength++] = currentChar;
3994 }
3995 while (newLength < mappedLength) {
3996 newLength = CFUniCharCompatibilityDecompose(mappedCharacters, mappedLength, allocatedLength);
3997 if (newLength == 0) {
3998 allocatedLength += MAX_DECOMP_BUF;
3999 if (mappedCharacters == buffer) {
4000 mappedCharacters = (UTF32Char *)CFAllocatorAllocate(NULL, allocatedLength * sizeof(UTF32Char), 0);
4001 memmove(mappedCharacters, buffer, MAX_DECOMP_BUF * sizeof(UTF32Char));
4002 } else {
4003 mappedCharacters = (UTF32Char *)CFAllocatorReallocate(NULL, mappedCharacters, allocatedLength * sizeof(UTF32Char), 0);
4004 }
4005 }
4006 }
4007 mappedLength = newLength;
4008 }
4009
4010 if (theForm & kCFStringNormalizationFormC) {
4011 if (mappedLength > 1) {
4012 CFIndex consumedLength = 1;
4013 UTF32Char nextChar;
4014 UTF32Char *currentBase = mappedCharacters;
4015 uint8_t currentClass, lastClass = 0;
4016 const uint8_t *bmpClassTable = CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0);
4017 bool didCombine = false;
4018
4019 currentChar = *mappedCharacters;
4020
4021 while (consumedLength < mappedLength) {
4022 nextChar = mappedCharacters[consumedLength];
4023 currentClass = (nextChar & 0xFFFF0000 ? CFUniCharGetUnicodeProperty(nextChar, kCFUniCharCombiningProperty) : CFUniCharGetCombiningPropertyForCharacter(nextChar, bmpClassTable));
4024
4025 if (theForm & kCFStringNormalizationFormKD) {
4026 if ((currentChar >= HANGUL_LBASE) && (currentChar < (HANGUL_LBASE + 0xFF))) {
4027 SInt8 lIndex = currentChar - HANGUL_LBASE;
4028
4029 if ((0 <= lIndex) && (lIndex <= HANGUL_LCOUNT)) {
4030 SInt16 vIndex = nextChar - HANGUL_VBASE;
4031
4032 if ((vIndex >= 0) && (vIndex <= HANGUL_VCOUNT)) {
4033 SInt16 tIndex = 0;
4034 CFIndex usedLength = mappedLength;
4035
4036 mappedCharacters[consumedLength++] = 0xFFFD;
4037
4038 if (consumedLength < mappedLength) {
4039 tIndex = mappedCharacters[consumedLength] - HANGUL_TBASE;
4040 if ((tIndex < 0) || (tIndex > HANGUL_TCOUNT)) {
4041 tIndex = 0;
4042 } else {
4043 mappedCharacters[consumedLength++] = 0xFFFD;
4044 }
4045 }
4046 *currentBase = (lIndex * HANGUL_VCOUNT + vIndex) * HANGUL_TCOUNT + tIndex + HANGUL_SBASE;
4047
4048 while (--usedLength > 0) {
4049 if (mappedCharacters[usedLength] == 0xFFFD) {
4050 --mappedLength;
4051 --consumedLength;
4052 memmove(mappedCharacters + usedLength, mappedCharacters + usedLength + 1, (mappedLength - usedLength) * sizeof(UTF32Char));
4053 }
4054 }
4055 currentBase = mappedCharacters + consumedLength;
4056 currentChar = *currentBase;
4057 ++consumedLength;
4058
4059 continue;
4060 }
4061 }
4062 }
4063 if (!CFUniCharIsMemberOf(nextChar, kCFUniCharNonBaseCharacterSet)) {
4064 *currentBase = currentChar;
4065 currentBase = mappedCharacters + consumedLength;
4066 currentChar = nextChar;
4067 ++consumedLength;
4068 continue;
4069 }
4070 }
4071 if ((lastClass == 0) || (currentClass != lastClass)) {
4072 nextChar = CFUniCharPrecomposeCharacter(currentChar, nextChar);
4073 if (nextChar == 0xFFFD) {
4074 lastClass = currentClass;
4075 } else {
4076 mappedCharacters[consumedLength] = 0xFFFD;
4077 didCombine = true;
4078 currentChar = nextChar;
4079 lastClass = 0;
4080 }
4081 }
4082 ++consumedLength;
4083 }
4084
4085 *currentBase = currentChar;
4086 if (didCombine) {
4087 consumedLength = mappedLength;
4088 while (--consumedLength > 0) {
4089 if (mappedCharacters[consumedLength] == 0xFFFD) {
4090 --mappedLength;
4091 memmove(mappedCharacters + consumedLength, mappedCharacters + consumedLength + 1, (mappedLength - consumedLength) * sizeof(UTF32Char));
4092 }
4093 }
4094 }
4095 } else if ((currentChar >= HANGUL_LBASE) && (currentChar < (HANGUL_LBASE + 0xFF))) { // Hangul Jamo
4096 SInt8 lIndex = currentChar - HANGUL_LBASE;
4097
4098 if ((contents < limit) && (0 <= lIndex) && (lIndex <= HANGUL_LCOUNT)) {
4099 SInt16 vIndex = *contents - HANGUL_VBASE;
4100
4101 if ((vIndex >= 0) && (vIndex <= HANGUL_VCOUNT)) {
4102 SInt16 tIndex = 0;
4103
4104 ++contents; ++currentLength;
4105
4106 if (contents < limit) {
4107 tIndex = *contents - HANGUL_TBASE;
4108 if ((tIndex < 0) || (tIndex > HANGUL_TCOUNT)) {
4109 tIndex = 0;
4110 } else {
4111 ++contents; ++currentLength;
4112 }
4113 }
4114 *mappedCharacters = (lIndex * HANGUL_VCOUNT + vIndex) * HANGUL_TCOUNT + tIndex + HANGUL_SBASE;
4115 mappedLength = 1;
4116 }
4117 }
4118 }
4119 }
4120
4121 if (mappedLength > 0) {
4122 CFIndex utf16Length = __CFGetUTF16Length(mappedCharacters, mappedLength);
4123
4124 if (utf16Length != currentLength) {
4125 __CFStringChangeSize(string, CFRangeMake(currentIndex, currentLength), utf16Length, true);
4126 currentLength = utf16Length;
4127 }
4128 contents = (UTF16Char *)__CFStrContents(string);
4129 limit = contents + __CFStrLength(string);
4130 contents += currentIndex;
4131 __CFFillInUTF16(mappedCharacters, contents, mappedLength);
4132 contents += utf16Length;
4133 }
4134 currentIndex += currentLength;
4135 }
4136
4137 if (mappedCharacters != buffer) CFAllocatorDeallocate(NULL, mappedCharacters);
4138 }
4139 }
4140
4141 #define POSIX_SEPARATOR "/"
4142
4143 CF_INLINE void __CFStringReplacePathSeparator(CFMutableStringRef string, const char from, const char to) {
4144 uint8_t *contents = (uint8_t*)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string);
4145 CFIndex length = __CFStrLength2(string, contents);
4146 bool isUnicode = __CFStrIsUnicode(string);
4147 CFIndex idx;
4148
4149 for (idx = 0;idx < length;idx++) {
4150 if ((isUnicode ? ((UniChar*)contents)[idx] : ((uint8_t*)contents)[idx]) == from) {
4151 if (isUnicode) {
4152 ((UniChar*)contents)[idx] = to;
4153 } else {
4154 ((uint8_t*)contents)[idx] = to;
4155 }
4156 }
4157 }
4158 }
4159
4160 enum {
4161 kCFStringFormatZeroFlag = (1 << 0), // if not, padding is space char
4162 kCFStringFormatMinusFlag = (1 << 1), // if not, no flag implied
4163 kCFStringFormatPlusFlag = (1 << 2), // if not, no flag implied, overrides space
4164 kCFStringFormatSpaceFlag = (1 << 3) // if not, no flag implied
4165 };
4166
4167 typedef struct {
4168 int16_t size;
4169 int16_t type;
4170 SInt32 loc;
4171 SInt32 len;
4172 SInt32 widthArg;
4173 SInt32 precArg;
4174 uint32_t flags;
4175 int8_t mainArgNum;
4176 int8_t precArgNum;
4177 int8_t widthArgNum;
4178 int8_t unused1;
4179 } CFFormatSpec;
4180
4181 typedef struct {
4182 int16_t type;
4183 int16_t size;
4184 union {
4185 int64_t longlongValue;
4186 double doubleValue;
4187 void *pointerValue;
4188 } value;
4189 } CFPrintValue;
4190
4191 enum {
4192 CFFormatDefaultSize = 0,
4193 CFFormatSize1 = 1,
4194 CFFormatSize2 = 2,
4195 CFFormatSize4 = 3,
4196 CFFormatSize8 = 4,
4197 CFFormatSize16 = 5, /* unused */
4198 };
4199
4200 enum {
4201 CFFormatLiteralType = 32,
4202 CFFormatLongType = 33,
4203 CFFormatDoubleType = 34,
4204 CFFormatPointerType = 35,
4205 CFFormatObjectType = 36, /* handled specially */ /* ??? not used anymore, can be removed? */
4206 CFFormatCFType = 37, /* handled specially */
4207 CFFormatUnicharsType = 38, /* handled specially */
4208 CFFormatCharsType = 39, /* handled specially */
4209 CFFormatPascalCharsType = 40, /* handled specially */
4210 CFFormatSingleUnicharType = 41 /* handled specially */
4211 };
4212
4213 CF_INLINE void __CFParseFormatSpec(const UniChar *uformat, const uint8_t *cformat, SInt32 *fmtIdx, SInt32 fmtLen, CFFormatSpec *spec) {
4214 Boolean seenDot = false;
4215 for (;;) {
4216 UniChar ch;
4217 if (fmtLen <= *fmtIdx) return; /* no type */
4218 if (cformat) ch = (UniChar)cformat[(*fmtIdx)++]; else ch = uformat[(*fmtIdx)++];
4219 reswtch:switch (ch) {
4220 case '#': // ignored for now
4221 break;
4222 case 0x20:
4223 if (!(spec->flags & kCFStringFormatPlusFlag)) spec->flags |= kCFStringFormatSpaceFlag;
4224 break;
4225 case '-':
4226 spec->flags |= kCFStringFormatMinusFlag;
4227 spec->flags &= ~kCFStringFormatZeroFlag; // remove zero flag
4228 break;
4229 case '+':
4230 spec->flags |= kCFStringFormatPlusFlag;
4231 spec->flags &= ~kCFStringFormatSpaceFlag; // remove space flag
4232 break;
4233 case '0':
4234 if (!(spec->flags & kCFStringFormatMinusFlag)) spec->flags |= kCFStringFormatZeroFlag;
4235 break;
4236 case 'h':
4237 spec->size = CFFormatSize2;
4238 break;
4239 case 'l':
4240 if (*fmtIdx < fmtLen) {
4241 // fetch next character, don't increment fmtIdx
4242 if (cformat) ch = (UniChar)cformat[(*fmtIdx)]; else ch = uformat[(*fmtIdx)];
4243 if ('l' == ch) { // 'll' for long long, like 'q'
4244 (*fmtIdx)++;
4245 spec->size = CFFormatSize8;
4246 break;
4247 }
4248 }
4249 spec->size = CFFormatSize4;
4250 break;
4251 case 'q':
4252 spec->size = CFFormatSize8;
4253 break;
4254 case 'c':
4255 spec->type = CFFormatLongType;
4256 spec->size = CFFormatSize1;
4257 return;
4258 case 'O': case 'o': case 'D': case 'd': case 'i': case 'U': case 'u': case 'x': case 'X':
4259 spec->type = CFFormatLongType;
4260 return;
4261 case 'e': case 'E': case 'f': case 'g': case 'G':
4262 spec->type = CFFormatDoubleType;
4263 spec->size = CFFormatSize8;
4264 return;
4265 case 'n': case 'p': /* %n is not handled correctly currently */
4266 spec->type = CFFormatPointerType;
4267 spec->size = CFFormatSize4;
4268 return;
4269 case 's':
4270 spec->type = CFFormatCharsType;
4271 spec->size = CFFormatSize4;
4272 return;
4273 case 'S':
4274 spec->type = CFFormatUnicharsType;
4275 spec->size = CFFormatSize4;
4276 return;
4277 case 'C':
4278 spec->type = CFFormatSingleUnicharType;
4279 spec->size = CFFormatSize2;
4280 return;
4281 case 'P':
4282 spec->type = CFFormatPascalCharsType;
4283 spec->size = CFFormatSize4;
4284 return;
4285 case '@':
4286 spec->type = CFFormatCFType;
4287 spec->size = CFFormatSize4;
4288 return;
4289 case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': {
4290 int64_t number = 0;
4291 do {
4292 number = 10 * number + (ch - '0');
4293 if (cformat) ch = (UniChar)cformat[(*fmtIdx)++]; else ch = uformat[(*fmtIdx)++];
4294 } while ((UInt32)(ch - '0') <= 9);
4295 if ('$' == ch) {
4296 if (-2 == spec->precArgNum) {
4297 spec->precArgNum = number - 1; // Arg numbers start from 1
4298 } else if (-2 == spec->widthArgNum) {
4299 spec->widthArgNum = number - 1; // Arg numbers start from 1
4300 } else {
4301 spec->mainArgNum = number - 1; // Arg numbers start from 1
4302 }
4303 break;
4304 } else if (seenDot) { /* else it's either precision or width */
4305 spec->precArg = (SInt32)number;
4306 } else {
4307 spec->widthArg = (SInt32)number;
4308 }
4309 goto reswtch;
4310 }
4311 case '*':
4312 spec->widthArgNum = -2;
4313 break;
4314 case '.':
4315 seenDot = true;
4316 if (cformat) ch = (UniChar)cformat[(*fmtIdx)++]; else ch = uformat[(*fmtIdx)++];
4317 if ('*' == ch) {
4318 spec->precArgNum = -2;
4319 break;
4320 }
4321 goto reswtch;
4322 default:
4323 spec->type = CFFormatLiteralType;
4324 return;
4325 }
4326 }
4327 }
4328
4329 #if defined(__MACOS8__)
4330 static int snprintf (char *b, size_t n, const char * f, ...) {
4331 int retval;
4332 va_list args;
4333 va_start (args, f);
4334 retval = vsprintf(b, f, args);
4335 va_end(args);
4336 return retval;
4337 }
4338 #elif defined(__WIN32__)
4339 static int snprintf (char *b, size_t n, const char * f, ...) {
4340 int retval;
4341 va_list args;
4342 va_start (args, f);
4343 retval = _vsnprintf(b, n, f, args);
4344 va_end(args);
4345 return retval;
4346 }
4347 #endif
4348
4349 /* ??? It ignores the formatOptions argument.
4350 ??? %s depends on handling of encodings by __CFStringAppendBytes
4351 */
4352 void CFStringAppendFormatAndArguments(CFMutableStringRef outputString, CFDictionaryRef formatOptions, CFStringRef formatString, va_list args) {
4353 _CFStringAppendFormatAndArgumentsAux(outputString, NULL, formatOptions, formatString, args);
4354 }
4355
4356 #define SNPRINTF(TYPE, WHAT) { \
4357 TYPE value = (TYPE) WHAT; \
4358 if (-1 != specs[curSpec].widthArgNum) { \
4359 if (-1 != specs[curSpec].precArgNum) { \
4360 snprintf(buffer, 255, formatBuffer, width, precision, value); \
4361 } else { \
4362 snprintf(buffer, 255, formatBuffer, width, value); \
4363 } \
4364 } else { \
4365 if (-1 != specs[curSpec].precArgNum) { \
4366 snprintf(buffer, 255, formatBuffer, precision, value); \
4367 } else { \
4368 snprintf(buffer, 255, formatBuffer, value); \
4369 } \
4370 }}
4371
4372 void _CFStringAppendFormatAndArgumentsAux(CFMutableStringRef outputString, CFStringRef (*copyDescFunc)(void *, CFDictionaryRef), CFDictionaryRef formatOptions, CFStringRef formatString, va_list args) {
4373 SInt32 numSpecs, sizeSpecs, sizeArgNum, formatIdx, curSpec, argNum;
4374 CFIndex formatLen;
4375 #define FORMAT_BUFFER_LEN 400
4376 const uint8_t *cformat = NULL;
4377 const UniChar *uformat = NULL;
4378 UniChar *formatChars = NULL;
4379 UniChar localFormatBuffer[FORMAT_BUFFER_LEN];
4380
4381 #define VPRINTF_BUFFER_LEN 61
4382 CFFormatSpec localSpecsBuffer[VPRINTF_BUFFER_LEN];
4383 CFFormatSpec *specs;
4384 CFPrintValue localValuesBuffer[VPRINTF_BUFFER_LEN];
4385 CFPrintValue *values;
4386 CFAllocatorRef tmpAlloc = NULL;
4387
4388 numSpecs = 0;
4389 sizeSpecs = 0;
4390 sizeArgNum = 0;
4391 specs = NULL;
4392 values = NULL;
4393
4394 formatLen = CFStringGetLength(formatString);
4395 if (!CF_IS_OBJC(__kCFStringTypeID, formatString)) {
4396 __CFAssertIsString(formatString);
4397 if (!__CFStrIsUnicode(formatString)) {
4398 cformat = __CFStrContents(formatString);
4399 if (cformat) cformat += __CFStrSkipAnyLengthByte(formatString);
4400 } else {
4401 uformat = __CFStrContents(formatString);
4402 }
4403 }
4404 if (!cformat && !uformat) {
4405 formatChars = (formatLen > FORMAT_BUFFER_LEN) ? CFAllocatorAllocate(tmpAlloc = __CFGetDefaultAllocator(), formatLen * sizeof(UniChar), 0) : localFormatBuffer;
4406 if (formatChars != localFormatBuffer && __CFOASafe) __CFSetLastAllocationEventName(formatChars, "CFString (temp)");
4407 CFStringGetCharacters(formatString, CFRangeMake(0, formatLen), formatChars);
4408 uformat = formatChars;
4409 }
4410
4411 /* Compute an upper bound for the number of format specifications */
4412 if (cformat) {
4413 for (formatIdx = 0; formatIdx < formatLen; formatIdx++) if ('%' == cformat[formatIdx]) sizeSpecs++;
4414 } else {
4415 for (formatIdx = 0; formatIdx < formatLen; formatIdx++) if ('%' == uformat[formatIdx]) sizeSpecs++;
4416 }
4417 tmpAlloc = __CFGetDefaultAllocator();
4418 specs = ((2 * sizeSpecs + 1) > VPRINTF_BUFFER_LEN) ? CFAllocatorAllocate(tmpAlloc, (2 * sizeSpecs + 1) * sizeof(CFFormatSpec), 0) : localSpecsBuffer;
4419 if (specs != localSpecsBuffer && __CFOASafe) __CFSetLastAllocationEventName(specs, "CFString (temp)");
4420
4421 /* Collect format specification information from the format string */
4422 for (curSpec = 0, formatIdx = 0; formatIdx < formatLen; curSpec++) {
4423 SInt32 newFmtIdx;
4424 specs[curSpec].loc = formatIdx;
4425 specs[curSpec].len = 0;
4426 specs[curSpec].size = 0;
4427 specs[curSpec].type = 0;
4428 specs[curSpec].flags = 0;
4429 specs[curSpec].widthArg = -1;
4430 specs[curSpec].precArg = -1;
4431 specs[curSpec].mainArgNum = -1;
4432 specs[curSpec].precArgNum = -1;
4433 specs[curSpec].widthArgNum = -1;
4434 if (cformat) {
4435 for (newFmtIdx = formatIdx; newFmtIdx < formatLen && '%' != cformat[newFmtIdx]; newFmtIdx++);
4436 } else {
4437 for (newFmtIdx = formatIdx; newFmtIdx < formatLen && '%' != uformat[newFmtIdx]; newFmtIdx++);
4438 }
4439 if (newFmtIdx != formatIdx) { /* Literal chunk */
4440 specs[curSpec].type = CFFormatLiteralType;
4441 specs[curSpec].len = newFmtIdx - formatIdx;
4442 } else {
4443 newFmtIdx++; /* Skip % */
4444 __CFParseFormatSpec(uformat, cformat, &newFmtIdx, formatLen, &(specs[curSpec]));
4445 if (CFFormatLiteralType == specs[curSpec].type) {
4446 specs[curSpec].loc = formatIdx + 1;
4447 specs[curSpec].len = 1;
4448 } else {
4449 specs[curSpec].len = newFmtIdx - formatIdx;
4450 }
4451 }
4452 formatIdx = newFmtIdx;
4453
4454 // printf("specs[%d] = {\n size = %d,\n type = %d,\n loc = %d,\n len = %d,\n mainArgNum = %d,\n precArgNum = %d,\n widthArgNum = %d\n}\n", curSpec, specs[curSpec].size, specs[curSpec].type, specs[curSpec].loc, specs[curSpec].len, specs[curSpec].mainArgNum, specs[curSpec].precArgNum, specs[curSpec].widthArgNum);
4455
4456 }
4457 numSpecs = curSpec;
4458 // Max of three args per spec, reasoning thus: 1 width, 1 prec, 1 value
4459 values = ((3 * sizeSpecs + 1) > VPRINTF_BUFFER_LEN) ? CFAllocatorAllocate(tmpAlloc, (3 * sizeSpecs + 1) * sizeof(CFPrintValue), 0) : localValuesBuffer;
4460 if (values != localValuesBuffer && __CFOASafe) __CFSetLastAllocationEventName(values, "CFString (temp)");
4461 memset(values, 0, (3 * sizeSpecs + 1) * sizeof(CFPrintValue));
4462 sizeArgNum = (3 * sizeSpecs + 1);
4463
4464 /* Compute values array */
4465 argNum = 0;
4466 for (curSpec = 0; curSpec < numSpecs; curSpec++) {
4467 SInt32 newMaxArgNum;
4468 if (0 == specs[curSpec].type) continue;
4469 if (CFFormatLiteralType == specs[curSpec].type) continue;
4470 newMaxArgNum = sizeArgNum;
4471 if (newMaxArgNum < specs[curSpec].mainArgNum) {
4472 newMaxArgNum = specs[curSpec].mainArgNum;
4473 }
4474 if (newMaxArgNum < specs[curSpec].precArgNum) {
4475 newMaxArgNum = specs[curSpec].precArgNum;
4476 }
4477 if (newMaxArgNum < specs[curSpec].widthArgNum) {
4478 newMaxArgNum = specs[curSpec].widthArgNum;
4479 }
4480 if (sizeArgNum < newMaxArgNum) {
4481 if (specs != localSpecsBuffer) CFAllocatorDeallocate(tmpAlloc, specs);
4482 if (values != localValuesBuffer) CFAllocatorDeallocate(tmpAlloc, values);
4483 if (formatChars && (formatChars != localFormatBuffer)) CFAllocatorDeallocate(tmpAlloc, formatChars);
4484 return; // more args than we expected!
4485 }
4486 /* It is actually incorrect to reorder some specs and not all; we just do some random garbage here */
4487 if (-2 == specs[curSpec].widthArgNum) {
4488 specs[curSpec].widthArgNum = argNum++;
4489 }
4490 if (-2 == specs[curSpec].precArgNum) {
4491 specs[curSpec].precArgNum = argNum++;
4492 }
4493 if (-1 == specs[curSpec].mainArgNum) {
4494 specs[curSpec].mainArgNum = argNum++;
4495 }
4496 values[specs[curSpec].mainArgNum].size = specs[curSpec].size;
4497 values[specs[curSpec].mainArgNum].type = specs[curSpec].type;
4498 if (-1 != specs[curSpec].widthArgNum) {
4499 values[specs[curSpec].widthArgNum].size = 0;
4500 values[specs[curSpec].widthArgNum].type = CFFormatLongType;
4501 }
4502 if (-1 != specs[curSpec].precArgNum) {
4503 values[specs[curSpec].precArgNum].size = 0;
4504 values[specs[curSpec].precArgNum].type = CFFormatLongType;
4505 }
4506 }
4507
4508 /* Collect the arguments in correct type from vararg list */
4509 for (argNum = 0; argNum < sizeArgNum; argNum++) {
4510 switch (values[argNum].type) {
4511 case 0:
4512 case CFFormatLiteralType:
4513 break;
4514 case CFFormatLongType:
4515 case CFFormatSingleUnicharType:
4516 if (CFFormatSize1 == values[argNum].size) {
4517 values[argNum].value.longlongValue = (int64_t)(char)va_arg(args, int);
4518 } else if (CFFormatSize2 == values[argNum].size) {
4519 values[argNum].value.longlongValue = (int64_t)(short)va_arg(args, int);
4520 } else if (CFFormatSize4 == values[argNum].size) {
4521 values[argNum].value.longlongValue = (int64_t)va_arg(args, long);
4522 } else if (CFFormatSize8 == values[argNum].size) {
4523 values[argNum].value.longlongValue = (int64_t)va_arg(args, int64_t);
4524 } else {
4525 values[argNum].value.longlongValue = (int64_t)va_arg(args, int);
4526 }
4527 break;
4528 case CFFormatDoubleType:
4529 values[argNum].value.doubleValue = va_arg(args, double);
4530 break;
4531 case CFFormatPointerType:
4532 case CFFormatObjectType:
4533 case CFFormatCFType:
4534 case CFFormatUnicharsType:
4535 case CFFormatCharsType:
4536 case CFFormatPascalCharsType:
4537 values[argNum].value.pointerValue = va_arg(args, void *);
4538 break;
4539 }
4540 }
4541 va_end(args);
4542
4543 /* Format the pieces together */
4544 for (curSpec = 0; curSpec < numSpecs; curSpec++) {
4545 SInt32 width = 0, precision = 0;
4546 UniChar *up, ch;
4547 Boolean hasWidth = false, hasPrecision = false;
4548
4549 // widthArgNum and widthArg are never set at the same time; same for precArg*
4550 if (-1 != specs[curSpec].widthArgNum) {
4551 width = (SInt32)values[specs[curSpec].widthArgNum].value.longlongValue;
4552 hasWidth = true;
4553 }
4554 if (-1 != specs[curSpec].precArgNum) {
4555 precision = (SInt32)values[specs[curSpec].precArgNum].value.longlongValue;
4556 hasPrecision = true;
4557 }
4558 if (-1 != specs[curSpec].widthArg) {
4559 width = specs[curSpec].widthArg;
4560 hasWidth = true;
4561 }
4562 if (-1 != specs[curSpec].precArg) {
4563 precision = specs[curSpec].precArg;
4564 hasPrecision = true;
4565 }
4566
4567 switch (specs[curSpec].type) {
4568 case CFFormatLongType:
4569 case CFFormatDoubleType:
4570 case CFFormatPointerType: {
4571 int8_t formatBuffer[128];
4572 int8_t buffer[256 + width + precision];
4573 SInt32 cidx, idx, loc;
4574 Boolean appended = false;
4575 loc = specs[curSpec].loc;
4576 // In preparation to call snprintf(), copy the format string out
4577 if (cformat) {
4578 for (idx = 0, cidx = 0; cidx < specs[curSpec].len; idx++, cidx++) {
4579 if ('$' == cformat[loc + cidx]) {
4580 for (idx--; '0' <= formatBuffer[idx] && formatBuffer[idx] <= '9'; idx--);
4581 } else {
4582 formatBuffer[idx] = cformat[loc + cidx];
4583 }
4584 }
4585 } else {
4586 for (idx = 0, cidx = 0; cidx < specs[curSpec].len; idx++, cidx++) {
4587 if ('$' == uformat[loc + cidx]) {
4588 for (idx--; '0' <= formatBuffer[idx] && formatBuffer[idx] <= '9'; idx--);
4589 } else {
4590 formatBuffer[idx] = (int8_t)uformat[loc + cidx];
4591 }
4592 }
4593 }
4594 formatBuffer[idx] = '\0';
4595 // Should modify format buffer here if necessary; for example, to translate %qd to
4596 // the equivalent, on architectures which do not have %q.
4597 buffer[sizeof(buffer) - 1] = '\0';
4598 switch (specs[curSpec].type) {
4599 case CFFormatLongType:
4600 if (CFFormatSize8 == specs[curSpec].size) {
4601 SNPRINTF(int64_t, values[specs[curSpec].mainArgNum].value.longlongValue)
4602 } else {
4603 SNPRINTF(SInt32, values[specs[curSpec].mainArgNum].value.longlongValue)
4604 }
4605 break;
4606 case CFFormatPointerType:
4607 SNPRINTF(void *, values[specs[curSpec].mainArgNum].value.pointerValue)
4608 break;
4609
4610 case CFFormatDoubleType:
4611 SNPRINTF(double, values[specs[curSpec].mainArgNum].value.doubleValue)
4612 // See if we need to localize the decimal point
4613 if (formatOptions) { // We have a localization dictionary
4614 CFStringRef decimalSeparator = CFDictionaryGetValue(formatOptions, kCFNSDecimalSeparatorKey);
4615 if (decimalSeparator != NULL) { // We have a decimal separator in there
4616 CFIndex decimalPointLoc = 0;
4617 while (buffer[decimalPointLoc] != 0 && buffer[decimalPointLoc] != '.') decimalPointLoc++;
4618 if (buffer[decimalPointLoc] == '.') { // And we have a decimal point in the formatted string
4619 buffer[decimalPointLoc] = 0;
4620 CFStringAppendCString(outputString, buffer, __CFStringGetEightBitStringEncoding());
4621 CFStringAppend(outputString, decimalSeparator);
4622 CFStringAppendCString(outputString, buffer + decimalPointLoc + 1, __CFStringGetEightBitStringEncoding());
4623 appended = true;
4624 }
4625 }
4626 }
4627 break;
4628 }
4629 if (!appended) CFStringAppendCString(outputString, buffer, __CFStringGetEightBitStringEncoding());
4630 }
4631 break;
4632 case CFFormatLiteralType:
4633 if (cformat) {
4634 __CFStringAppendBytes(outputString, cformat+specs[curSpec].loc, specs[curSpec].len, __CFStringGetEightBitStringEncoding());
4635 } else {
4636 CFStringAppendCharacters(outputString, uformat+specs[curSpec].loc, specs[curSpec].len);
4637 }
4638 break;
4639 case CFFormatPascalCharsType:
4640 case CFFormatCharsType:
4641 if (values[specs[curSpec].mainArgNum].value.pointerValue == NULL) {
4642 CFStringAppendCString(outputString, "(null)", kCFStringEncodingASCII);
4643 } else {
4644 int len;
4645 const char *str = values[specs[curSpec].mainArgNum].value.pointerValue;
4646 if (specs[curSpec].type == CFFormatPascalCharsType) { // Pascal string case
4647 len = ((unsigned char *)str)[0];
4648 str++;
4649 if (hasPrecision && precision < len) len = precision;
4650 } else { // C-string case
4651 if (!hasPrecision) { // No precision, so rely on the terminating null character
4652 len = strlen(str);
4653 } else { // Don't blindly call strlen() if there is a precision; the string might not have a terminating null (3131988)
4654 const char *terminatingNull = memchr(str, 0, precision); // Basically strlen() on only the first precision characters of str
4655 if (terminatingNull) { // There was a null in the first precision characters
4656 len = terminatingNull - str;
4657 } else {
4658 len = precision;
4659 }
4660 }
4661 }
4662 // Since the spec says the behavior of the ' ', '0', '#', and '+' flags is undefined for
4663 // '%s', and since we have ignored them in the past, the behavior is hereby cast in stone
4664 // to ignore those flags (and, say, never pad with '0' instead of space).
4665 if (specs[curSpec].flags & kCFStringFormatMinusFlag) {
4666 __CFStringAppendBytes(outputString, str, len, __CFStringGetSystemEncoding());
4667 if (hasWidth && width > len) {
4668 int w = width - len; // We need this many spaces; do it ten at a time
4669 do {__CFStringAppendBytes(outputString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
4670 }
4671 } else {
4672 if (hasWidth && width > len) {
4673 int w = width - len; // We need this many spaces; do it ten at a time
4674 do {__CFStringAppendBytes(outputString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
4675 }
4676 __CFStringAppendBytes(outputString, str, len, __CFStringGetSystemEncoding());
4677 }
4678 }
4679 break;
4680 case CFFormatSingleUnicharType:
4681 ch = values[specs[curSpec].mainArgNum].value.longlongValue;
4682 CFStringAppendCharacters(outputString, &ch, 1);
4683 break;
4684 case CFFormatUnicharsType:
4685 //??? need to handle width, precision, and padding arguments
4686 up = values[specs[curSpec].mainArgNum].value.pointerValue;
4687 if (NULL == up) {
4688 CFStringAppendCString(outputString, "(null)", kCFStringEncodingASCII);
4689 } else {
4690 int len;
4691 for (len = 0; 0 != up[len]; len++);
4692 // Since the spec says the behavior of the ' ', '0', '#', and '+' flags is undefined for
4693 // '%s', and since we have ignored them in the past, the behavior is hereby cast in stone
4694 // to ignore those flags (and, say, never pad with '0' instead of space).
4695 if (hasPrecision && precision < len) len = precision;
4696 if (specs[curSpec].flags & kCFStringFormatMinusFlag) {
4697 CFStringAppendCharacters(outputString, up, len);
4698 if (hasWidth && width > len) {
4699 int w = width - len; // We need this many spaces; do it ten at a time
4700 do {__CFStringAppendBytes(outputString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
4701 }
4702 } else {
4703 if (hasWidth && width > len) {
4704 int w = width - len; // We need this many spaces; do it ten at a time
4705 do {__CFStringAppendBytes(outputString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0);
4706 }
4707 CFStringAppendCharacters(outputString, up, len);
4708 }
4709 }
4710 break;
4711 case CFFormatCFType:
4712 case CFFormatObjectType:
4713 if (NULL != values[specs[curSpec].mainArgNum].value.pointerValue) {
4714 CFStringRef str = NULL;
4715 if (copyDescFunc) {
4716 str = copyDescFunc(values[specs[curSpec].mainArgNum].value.pointerValue, formatOptions);
4717 } else {
4718 str = __CFCopyFormattingDescription(values[specs[curSpec].mainArgNum].value.pointerValue, formatOptions);
4719 if (NULL == str) {
4720 str = CFCopyDescription(values[specs[curSpec].mainArgNum].value.pointerValue);
4721 }
4722 }
4723 if (str) {
4724 CFStringAppend(outputString, str);
4725 CFRelease(str);
4726 } else {
4727 CFStringAppendCString(outputString, "(null description)", kCFStringEncodingASCII);
4728 }
4729 } else {
4730 CFStringAppendCString(outputString, "(null)", kCFStringEncodingASCII);
4731 }
4732 break;
4733 }
4734 }
4735
4736 if (specs != localSpecsBuffer) CFAllocatorDeallocate(tmpAlloc, specs);
4737 if (values != localValuesBuffer) CFAllocatorDeallocate(tmpAlloc, values);
4738 if (formatChars && (formatChars != localFormatBuffer)) CFAllocatorDeallocate(tmpAlloc, formatChars);
4739
4740 }
4741
4742 #undef SNPRINTF
4743
4744 void CFShowStr(CFStringRef str) {
4745 CFAllocatorRef alloc;
4746
4747 if (!str) {
4748 printf ("(null)\n");
4749 return;
4750 }
4751
4752 if (CF_IS_OBJC(__kCFStringTypeID, str)) {
4753 printf ("This is an NSString, not CFString\n");
4754 return;
4755 }
4756
4757 alloc = CFGetAllocator(str);
4758
4759 printf ("\nLength %d\nIsEightBit %d\n", (int)__CFStrLength(str), __CFStrIsEightBit(str));
4760 printf ("HasLengthByte %d\nHasNullByte %d\nInlineContents %d\n",
4761 __CFStrHasLengthByte(str), __CFStrHasNullByte(str), __CFStrIsInline(str));
4762
4763 printf ("Allocator ");
4764 if (alloc != kCFAllocatorSystemDefault) {
4765 printf ("%p\n", (void *)alloc);
4766 } else {
4767 printf ("SystemDefault\n");
4768 }
4769 printf ("Mutable %d\n", __CFStrIsMutable(str));
4770 if (!__CFStrIsMutable(str) && __CFStrHasContentsDeallocator(str)) {
4771 if (__CFStrContentsDeallocator(str)) printf ("ContentsDeallocatorFunc %p\n", (void *)__CFStrContentsDeallocator(str));
4772 else printf ("ContentsDeallocatorFunc None\n");
4773 } else if (__CFStrIsMutable(str) && __CFStrHasContentsAllocator(str)) {
4774 printf ("ExternalContentsAllocator %p\n", (void *)__CFStrContentsAllocator((CFMutableStringRef)str));
4775 }
4776
4777 if (__CFStrIsMutable(str)) {
4778 printf ("CurrentCapacity %d\n%sCapacity %d\n", (int)__CFStrCapacity(str), __CFStrIsFixed(str) ? "Fixed" : "Desired", (int)__CFStrDesiredCapacity(str));
4779 }
4780 printf ("Contents %p\n", (void *)__CFStrContents(str));
4781 }
4782
4783