]>
Commit | Line | Data |
---|---|---|
9ce05555 | 1 | /* |
d8925383 | 2 | * Copyright (c) 2005 Apple Computer, Inc. All rights reserved. |
9ce05555 A |
3 | * |
4 | * @APPLE_LICENSE_HEADER_START@ | |
5 | * | |
9ce05555 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. Please obtain a copy of the License at | |
10 | * http://www.opensource.apple.com/apsl/ and read it before using this | |
11 | * file. | |
12 | * | |
13 | * The Original Code and all software distributed under the License are | |
14 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
15 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
16 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
18 | * Please see the License for the specific language governing rights and | |
19 | * limitations under the License. | |
20 | * | |
21 | * @APPLE_LICENSE_HEADER_END@ | |
22 | */ | |
23 | /* CFString.c | |
24 | Copyright 1998-2002, Apple, Inc. All rights reserved. | |
25 | Responsibility: Ali Ozer | |
d8925383 A |
26 | |
27 | !!! For performance reasons, it's important that all functions marked CF_INLINE in this file are inlined. | |
9ce05555 A |
28 | */ |
29 | ||
30 | #include <CoreFoundation/CFBase.h> | |
31 | #include <CoreFoundation/CFString.h> | |
32 | #include <CoreFoundation/CFDictionary.h> | |
33 | #include "CFStringEncodingConverterExt.h" | |
34 | #include "CFUniChar.h" | |
35 | #include "CFUnicodeDecomposition.h" | |
36 | #include "CFUnicodePrecomposition.h" | |
d8925383 | 37 | #include "CFUtilitiesPriv.h" |
9ce05555 A |
38 | #include "CFInternal.h" |
39 | #include <stdarg.h> | |
40 | #include <stdio.h> | |
9ce05555 | 41 | #include <string.h> |
d8925383 A |
42 | #if defined (__MACOS8__) |
43 | #include <Script.h> // For GetScriptManagerVariable | |
44 | #include <Processes.h> // For logging | |
45 | #include <stdlib.h> | |
46 | #include <UnicodeConverter.h> | |
47 | #include <TextEncodingConverter.h> | |
48 | #elif defined(__MACH__) || defined(__LINUX__) || defined(__FREEBSD__) | |
9ce05555 A |
49 | #include <unistd.h> |
50 | #endif | |
51 | #if defined(__WIN32__) | |
52 | #include <windows.h> | |
53 | #endif /* __WIN32__ */ | |
54 | ||
d8925383 | 55 | #if defined(__MACH__) |
9ce05555 | 56 | extern size_t malloc_good_size(size_t size); |
d8925383 | 57 | #endif |
9ce05555 A |
58 | extern void __CFStrConvertBytesToUnicode(const uint8_t *bytes, UniChar *buffer, CFIndex numChars); |
59 | ||
60 | #if defined(DEBUG) | |
61 | ||
62 | // Special allocator used by CFSTRs to catch deallocations | |
63 | static CFAllocatorRef constantStringAllocatorForDebugging = NULL; | |
64 | ||
65 | // We put this into C & Pascal strings if we can't convert | |
66 | #define CONVERSIONFAILURESTR "CFString conversion failed" | |
67 | ||
68 | // We set this to true when purging the constant string table, so CFStringDeallocate doesn't assert | |
69 | static Boolean __CFConstantStringTableBeingFreed = false; | |
70 | ||
71 | #endif | |
72 | ||
73 | ||
d8925383 | 74 | |
9ce05555 A |
75 | // This section is for CFString compatibility and other behaviors... |
76 | ||
77 | static CFOptionFlags _CFStringCompatibilityMask = 0; | |
78 | ||
79 | #define Bug2967272 1 | |
80 | ||
81 | void _CFStringSetCompatibility(CFOptionFlags mask) { | |
82 | _CFStringCompatibilityMask |= mask; | |
83 | } | |
84 | ||
85 | CF_INLINE Boolean __CFStringGetCompatibility(CFOptionFlags mask) { | |
86 | return (_CFStringCompatibilityMask & mask) == mask; | |
87 | } | |
88 | ||
89 | ||
90 | ||
91 | // Two constant strings used by CFString; these are initialized in CFStringInitialize | |
92 | CONST_STRING_DECL(kCFEmptyString, "") | |
93 | CONST_STRING_DECL(kCFNSDecimalSeparatorKey, "NSDecimalSeparator") | |
94 | ||
95 | ||
96 | /* !!! Never do sizeof(CFString); the union is here just to make it easier to access some fields. | |
97 | */ | |
98 | struct __CFString { | |
99 | CFRuntimeBase base; | |
100 | union { // In many cases the allocated structs are smaller than these | |
101 | struct { | |
102 | SInt32 length; | |
103 | } inline1; | |
104 | ||
105 | struct { | |
106 | void *buffer; | |
107 | UInt32 length; | |
108 | CFAllocatorRef contentsDeallocator; // Just the dealloc func is used | |
d8925383 | 109 | } notInlineImmutable1; |
9ce05555 A |
110 | struct { |
111 | void *buffer; | |
112 | CFAllocatorRef contentsDeallocator; // Just the dealloc func is used | |
d8925383 | 113 | } notInlineImmutable2; |
9ce05555 A |
114 | struct { |
115 | void *buffer; | |
116 | UInt32 length; | |
117 | UInt32 capacityFields; // Currently only stores capacity | |
118 | UInt32 gapEtc; // Stores some bits, plus desired or fixed capacity | |
119 | CFAllocatorRef contentsAllocator; // Optional | |
d8925383 | 120 | } notInlineMutable; |
9ce05555 A |
121 | } variants; |
122 | }; | |
123 | ||
124 | /* | |
125 | I = is immutable | |
126 | E = not inline contents | |
127 | U = is Unicode | |
128 | N = has NULL byte | |
129 | L = has length byte | |
130 | D = explicit deallocator for contents (for mutable objects, allocator) | |
d8925383 | 131 | X = UNUSED |
9ce05555 A |
132 | |
133 | Also need (only for mutable) | |
134 | F = is fixed | |
135 | G = has gap | |
136 | Cap, DesCap = capacity | |
137 | ||
138 | B7 B6 B5 B4 B3 B2 B1 B0 | |
139 | U N L X I | |
140 | ||
141 | B6 B5 | |
142 | 0 0 inline contents | |
143 | 0 1 E (freed with default allocator) | |
144 | 1 0 E (not freed) | |
145 | 1 1 E D | |
d8925383 A |
146 | |
147 | !!! Note: Constant CFStrings use the bit patterns: | |
148 | C8 (11001000 = default allocator, not inline, not freed contents; 8-bit; has NULL byte; doesn't have length; is immutable) | |
149 | D0 (11010000 = default allocator, not inline, not freed contents; Unicode; is immutable) | |
150 | The bit usages should not be modified in a way that would effect these bit patterns. | |
9ce05555 A |
151 | */ |
152 | ||
153 | enum { | |
154 | __kCFFreeContentsWhenDoneMask = 0x020, | |
155 | __kCFFreeContentsWhenDone = 0x020, | |
156 | __kCFContentsMask = 0x060, | |
d8925383 A |
157 | __kCFHasInlineContents = 0x000, |
158 | __kCFNotInlineContentsNoFree = 0x040, // Don't free | |
159 | __kCFNotInlineContentsDefaultFree = 0x020, // Use allocator's free function | |
160 | __kCFNotInlineContentsCustomFree = 0x060, // Use a specially provided free function | |
9ce05555 A |
161 | __kCFHasContentsAllocatorMask = 0x060, |
162 | __kCFHasContentsAllocator = 0x060, // (For mutable strings) use a specially provided allocator | |
163 | __kCFHasContentsDeallocatorMask = 0x060, | |
164 | __kCFHasContentsDeallocator = 0x060, | |
165 | __kCFIsMutableMask = 0x01, | |
166 | __kCFIsMutable = 0x01, | |
167 | __kCFIsUnicodeMask = 0x10, | |
168 | __kCFIsUnicode = 0x10, | |
169 | __kCFHasNullByteMask = 0x08, | |
170 | __kCFHasNullByte = 0x08, | |
171 | __kCFHasLengthByteMask = 0x04, | |
172 | __kCFHasLengthByte = 0x04, | |
d8925383 A |
173 | // !!! Bit 0x02 has been freed up |
174 | // These are in variants.notInlineMutable.gapEtc | |
9ce05555 A |
175 | __kCFGapMask = 0x00ffffff, |
176 | __kCFGapBitNumber = 24, | |
177 | __kCFDesiredCapacityMask = 0x00ffffff, // Currently gap and fixed share same bits as gap not implemented | |
178 | __kCFDesiredCapacityBitNumber = 24, | |
179 | __kCFIsFixedMask = 0x80000000, | |
180 | __kCFIsFixed = 0x80000000, | |
181 | __kCFHasGapMask = 0x40000000, | |
182 | __kCFHasGap = 0x40000000, | |
d8925383 A |
183 | __kCFCapacityProvidedExternallyMask = 0x20000000, // Set if the external buffer capacity is set explicitly by the developer |
184 | __kCFCapacityProvidedExternally = 0x20000000, | |
185 | __kCFIsExternalMutableMask = 0x10000000, // Determines whether the buffer is controlled by the developer | |
186 | __kCFIsExternalMutable = 0x10000000 | |
187 | // 0x0f000000: 4 additional bits available for use in mutable strings | |
9ce05555 A |
188 | }; |
189 | ||
190 | ||
191 | // !!! Assumptions: | |
192 | // Mutable strings are not inline | |
193 | // Compile-time constant strings are not inline | |
194 | // Mutable strings always have explicit length (but they might also have length byte and null byte) | |
195 | // If there is an explicit length, always use that instead of the length byte (length byte is useful for quickly returning pascal strings) | |
196 | // Never look at the length byte for the length; use __CFStrLength or __CFStrLength2 | |
197 | ||
198 | /* The following set of functions and macros need to be updated on change to the bit configuration | |
199 | */ | |
200 | CF_INLINE Boolean __CFStrIsMutable(CFStringRef str) {return (str->base._info & __kCFIsMutableMask) == __kCFIsMutable;} | |
d8925383 | 201 | CF_INLINE Boolean __CFStrIsInline(CFStringRef str) {return (str->base._info & __kCFContentsMask) == __kCFHasInlineContents;} |
9ce05555 A |
202 | CF_INLINE Boolean __CFStrFreeContentsWhenDone(CFStringRef str) {return (str->base._info & __kCFFreeContentsWhenDoneMask) == __kCFFreeContentsWhenDone;} |
203 | CF_INLINE Boolean __CFStrHasContentsDeallocator(CFStringRef str) {return (str->base._info & __kCFHasContentsDeallocatorMask) == __kCFHasContentsDeallocator;} | |
204 | CF_INLINE Boolean __CFStrIsUnicode(CFStringRef str) {return (str->base._info & __kCFIsUnicodeMask) == __kCFIsUnicode;} | |
205 | CF_INLINE Boolean __CFStrIsEightBit(CFStringRef str) {return (str->base._info & __kCFIsUnicodeMask) != __kCFIsUnicode;} | |
206 | CF_INLINE Boolean __CFStrHasNullByte(CFStringRef str) {return (str->base._info & __kCFHasNullByteMask) == __kCFHasNullByte;} | |
207 | CF_INLINE Boolean __CFStrHasLengthByte(CFStringRef str) {return (str->base._info & __kCFHasLengthByteMask) == __kCFHasLengthByte;} | |
208 | CF_INLINE Boolean __CFStrHasExplicitLength(CFStringRef str) {return (str->base._info & (__kCFIsMutableMask | __kCFHasLengthByteMask)) != __kCFHasLengthByte;} // Has explicit length if (1) mutable or (2) not mutable and no length byte | |
d8925383 | 209 | CF_INLINE Boolean __CFStrIsConstant(CFStringRef str) {return (str->base._rc) == 0;} |
9ce05555 A |
210 | |
211 | CF_INLINE SInt32 __CFStrSkipAnyLengthByte(CFStringRef str) {return ((str->base._info & __kCFHasLengthByteMask) == __kCFHasLengthByte) ? 1 : 0;} // Number of bytes to skip over the length byte in the contents | |
212 | ||
213 | /* Returns ptr to the buffer (which might include the length byte) | |
214 | */ | |
215 | CF_INLINE const void *__CFStrContents(CFStringRef str) { | |
216 | if (__CFStrIsInline(str)) { | |
217 | return (const void *)(((UInt32)&(str->variants)) + (__CFStrHasExplicitLength(str) ? sizeof(UInt32) : 0)); | |
d8925383 A |
218 | } else { // Not inline; pointer is always word 2 |
219 | return str->variants.notInlineImmutable1.buffer; | |
9ce05555 A |
220 | } |
221 | } | |
222 | ||
223 | static CFAllocatorRef *__CFStrContentsDeallocatorPtr(CFStringRef str) { | |
d8925383 | 224 | return __CFStrHasExplicitLength(str) ? &(((CFMutableStringRef)str)->variants.notInlineImmutable1.contentsDeallocator) : &(((CFMutableStringRef)str)->variants.notInlineImmutable2.contentsDeallocator); } |
9ce05555 A |
225 | |
226 | // Assumption: Called with immutable strings only, and on strings that are known to have a contentsDeallocator | |
227 | CF_INLINE CFAllocatorRef __CFStrContentsDeallocator(CFStringRef str) { | |
228 | return *__CFStrContentsDeallocatorPtr(str); | |
229 | } | |
230 | ||
231 | // Assumption: Called with immutable strings only, and on strings that are known to have a contentsDeallocator | |
232 | CF_INLINE void __CFStrSetContentsDeallocator(CFStringRef str, CFAllocatorRef contentsAllocator) { | |
233 | *__CFStrContentsDeallocatorPtr(str) = contentsAllocator; | |
234 | } | |
235 | ||
236 | static CFAllocatorRef *__CFStrContentsAllocatorPtr(CFStringRef str) { | |
237 | CFAssert(!__CFStrIsInline(str), __kCFLogAssertion, "Asking for contents allocator of inline string"); | |
238 | CFAssert(__CFStrIsMutable(str), __kCFLogAssertion, "Asking for contents allocator of an immutable string"); | |
d8925383 | 239 | return (CFAllocatorRef *)&(str->variants.notInlineMutable.contentsAllocator); |
9ce05555 A |
240 | } |
241 | ||
242 | // Assumption: Called with strings that have a contents allocator; also, contents allocator follows custom | |
243 | CF_INLINE CFAllocatorRef __CFStrContentsAllocator(CFMutableStringRef str) { | |
244 | return *(__CFStrContentsAllocatorPtr(str)); | |
245 | } | |
246 | ||
247 | // Assumption: Called with strings that have a contents allocator; also, contents allocator follows custom | |
248 | CF_INLINE void __CFStrSetContentsAllocator(CFMutableStringRef str, CFAllocatorRef alloc) { | |
249 | *(__CFStrContentsAllocatorPtr(str)) = alloc; | |
250 | } | |
251 | ||
252 | /* Returns length; use __CFStrLength2 if contents buffer pointer has already been computed. | |
253 | */ | |
254 | CF_INLINE CFIndex __CFStrLength(CFStringRef str) { | |
255 | if (__CFStrHasExplicitLength(str)) { | |
256 | if (__CFStrIsInline(str)) { | |
257 | return str->variants.inline1.length; | |
258 | } else { | |
d8925383 | 259 | return str->variants.notInlineImmutable1.length; |
9ce05555 A |
260 | } |
261 | } else { | |
262 | return (CFIndex)(*((uint8_t *)__CFStrContents(str))); | |
263 | } | |
264 | } | |
265 | ||
266 | CF_INLINE CFIndex __CFStrLength2(CFStringRef str, const void *buffer) { | |
267 | if (__CFStrHasExplicitLength(str)) { | |
268 | if (__CFStrIsInline(str)) { | |
269 | return str->variants.inline1.length; | |
270 | } else { | |
d8925383 | 271 | return str->variants.notInlineImmutable1.length; |
9ce05555 A |
272 | } |
273 | } else { | |
274 | return (CFIndex)(*((uint8_t *)buffer)); | |
275 | } | |
276 | } | |
277 | ||
9ce05555 A |
278 | |
279 | Boolean __CFStringIsEightBit(CFStringRef str) { | |
280 | return __CFStrIsEightBit(str); | |
281 | } | |
282 | ||
d8925383 | 283 | /* Sets the content pointer for immutable or mutable strings. |
9ce05555 | 284 | */ |
d8925383 A |
285 | CF_INLINE void __CFStrSetContentPtr(CFStringRef str, const void *p) |
286 | { | |
287 | // XXX_PCB catch all writes for mutable string case. | |
288 | CF_WRITE_BARRIER_BASE_ASSIGN(__CFGetAllocator(str), str, ((CFMutableStringRef)str)->variants.notInlineImmutable1.buffer, (void *)p); | |
289 | } | |
9ce05555 A |
290 | CF_INLINE void __CFStrSetInfoBits(CFStringRef str, UInt32 v) {__CFBitfieldSetValue(((CFMutableStringRef)str)->base._info, 6, 0, v);} |
291 | ||
292 | CF_INLINE void __CFStrSetExplicitLength(CFStringRef str, CFIndex v) { | |
293 | if (__CFStrIsInline(str)) { | |
294 | ((CFMutableStringRef)str)->variants.inline1.length = v; | |
295 | } else { | |
d8925383 | 296 | ((CFMutableStringRef)str)->variants.notInlineImmutable1.length = v; |
9ce05555 A |
297 | } |
298 | } | |
299 | ||
300 | // Assumption: Called with mutable strings only | |
d8925383 A |
301 | CF_INLINE Boolean __CFStrIsFixed(CFStringRef str) {return (str->variants.notInlineMutable.gapEtc & __kCFIsFixedMask) == __kCFIsFixed;} |
302 | CF_INLINE Boolean __CFStrHasContentsAllocator(CFStringRef str) {return (str->base._info & __kCFHasContentsAllocatorMask) == __kCFHasContentsAllocator;} | |
303 | CF_INLINE Boolean __CFStrIsExternalMutable(CFStringRef str) {return (str->variants.notInlineMutable.gapEtc & __kCFIsExternalMutableMask) == __kCFIsExternalMutable;} | |
9ce05555 A |
304 | |
305 | // If capacity is provided externally, we only change it when we need to grow beyond it | |
d8925383 A |
306 | CF_INLINE Boolean __CFStrCapacityProvidedExternally(CFStringRef str) {return (str->variants.notInlineMutable.gapEtc & __kCFCapacityProvidedExternallyMask) == __kCFCapacityProvidedExternally;} |
307 | CF_INLINE void __CFStrSetCapacityProvidedExternally(CFMutableStringRef str) {str->variants.notInlineMutable.gapEtc |= __kCFCapacityProvidedExternally;} | |
308 | CF_INLINE void __CFStrClearCapacityProvidedExternally(CFMutableStringRef str) {str->variants.notInlineMutable.gapEtc &= ~__kCFCapacityProvidedExternally;} | |
9ce05555 A |
309 | |
310 | ||
d8925383 A |
311 | CF_INLINE void __CFStrSetIsFixed(CFMutableStringRef str) {str->variants.notInlineMutable.gapEtc |= __kCFIsFixed;} |
312 | CF_INLINE void __CFStrSetIsExternalMutable(CFMutableStringRef str) {str->variants.notInlineMutable.gapEtc |= __kCFIsExternalMutable;} | |
313 | CF_INLINE void __CFStrSetHasGap(CFMutableStringRef str) {str->variants.notInlineMutable.gapEtc |= __kCFHasGap;} | |
9ce05555 A |
314 | CF_INLINE void __CFStrSetUnicode(CFMutableStringRef str) {str->base._info |= __kCFIsUnicode;} |
315 | CF_INLINE void __CFStrClearUnicode(CFMutableStringRef str) {str->base._info &= ~__kCFIsUnicode;} | |
316 | CF_INLINE void __CFStrSetHasLengthAndNullBytes(CFMutableStringRef str) {str->base._info |= (__kCFHasLengthByte | __kCFHasNullByte);} | |
317 | CF_INLINE void __CFStrClearHasLengthAndNullBytes(CFMutableStringRef str) {str->base._info &= ~(__kCFHasLengthByte | __kCFHasNullByte);} | |
318 | ||
319 | ||
320 | static void *__CFStrAllocateMutableContents(CFMutableStringRef str, CFIndex size) { | |
321 | void *ptr; | |
322 | CFAllocatorRef alloc = (__CFStrHasContentsAllocator(str)) ? __CFStrContentsAllocator(str) : __CFGetAllocator(str); | |
323 | ptr = CFAllocatorAllocate(alloc, size, 0); | |
324 | if (__CFOASafe) __CFSetLastAllocationEventName(ptr, "CFString (store)"); | |
325 | return ptr; | |
326 | } | |
327 | ||
328 | static void __CFStrDeallocateMutableContents(CFMutableStringRef str, void *buffer) { | |
329 | CFAllocatorRef alloc = (__CFStrHasContentsAllocator(str)) ? __CFStrContentsAllocator(str) : __CFGetAllocator(str); | |
d8925383 A |
330 | if (CF_IS_COLLECTABLE_ALLOCATOR(alloc)) { |
331 | // GC: for finalization safety, let collector reclaim the buffer in the next GC cycle. | |
332 | auto_zone_release(__CFCollectableZone, buffer); | |
333 | } else { | |
334 | CFAllocatorDeallocate(alloc, buffer); | |
335 | } | |
9ce05555 A |
336 | } |
337 | ||
338 | ||
339 | // The following set of functions should only be called on mutable strings | |
340 | ||
341 | /* "Capacity" is stored in number of bytes, not characters. It indicates the total number of bytes in the contents buffer. | |
342 | "Desired capacity" is in number of characters; it is the client requested capacity; if fixed, it is the upper bound on the mutable string backing store. | |
343 | */ | |
d8925383 A |
344 | CF_INLINE CFIndex __CFStrCapacity(CFStringRef str) {return str->variants.notInlineMutable.capacityFields;} |
345 | CF_INLINE void __CFStrSetCapacity(CFMutableStringRef str, CFIndex cap) {str->variants.notInlineMutable.capacityFields = cap;} | |
346 | CF_INLINE CFIndex __CFStrDesiredCapacity(CFStringRef str) {return __CFBitfieldGetValue(str->variants.notInlineMutable.gapEtc, __kCFDesiredCapacityBitNumber, 0);} | |
347 | CF_INLINE void __CFStrSetDesiredCapacity(CFMutableStringRef str, CFIndex size) {__CFBitfieldSetValue(str->variants.notInlineMutable.gapEtc, __kCFDesiredCapacityBitNumber, 0, size);} | |
9ce05555 A |
348 | |
349 | ||
350 | ||
351 | ||
352 | /* CFString specific init flags | |
353 | Note that you cannot count on the external buffer not being copied. | |
354 | Also, if you specify an external buffer, you should not change it behind the CFString's back. | |
355 | */ | |
356 | enum { | |
357 | __kCFThinUnicodeIfPossible = 0x1000000, /* See if the Unicode contents can be thinned down to 8-bit */ | |
358 | kCFStringPascal = 0x10000, /* Indicating that the string data has a Pascal string structure (length byte at start) */ | |
359 | kCFStringNoCopyProvidedContents = 0x20000, /* Don't copy the provided string contents if possible; free it when no longer needed */ | |
360 | kCFStringNoCopyNoFreeProvidedContents = 0x30000 /* Don't copy the provided string contents if possible; don't free it when no longer needed */ | |
361 | }; | |
362 | ||
9ce05555 A |
363 | /* System Encoding. |
364 | */ | |
365 | static CFStringEncoding __CFDefaultSystemEncoding = kCFStringEncodingInvalidId; | |
366 | static CFStringEncoding __CFDefaultFileSystemEncoding = kCFStringEncodingInvalidId; | |
367 | CFStringEncoding __CFDefaultEightBitStringEncoding = kCFStringEncodingInvalidId; | |
368 | ||
369 | CFStringEncoding CFStringGetSystemEncoding(void) { | |
370 | ||
371 | if (__CFDefaultSystemEncoding == kCFStringEncodingInvalidId) { | |
372 | const CFStringEncodingConverter *converter = NULL; | |
373 | #if defined(__MACOS8__) || defined(__MACH__) | |
374 | __CFDefaultSystemEncoding = kCFStringEncodingMacRoman; // MacRoman is built-in so always available | |
375 | #elif defined(__WIN32__) | |
376 | __CFDefaultSystemEncoding = kCFStringEncodingWindowsLatin1; // WinLatin1 is built-in so always available | |
377 | #elif defined(__LINUX__) || defined(__FREEBSD__) | |
378 | __CFDefaultSystemEncoding = kCFStringEncodingISOLatin1; // a reasonable default | |
379 | #else // Solaris && HP-UX ? | |
380 | __CFDefaultSystemEncoding = kCFStringEncodingISOLatin1; // a reasonable default | |
381 | #endif | |
382 | converter = CFStringEncodingGetConverter(__CFDefaultSystemEncoding); | |
383 | ||
384 | __CFSetCharToUniCharFunc(converter->encodingClass == kCFStringEncodingConverterCheapEightBit ? converter->toUnicode : NULL); | |
385 | } | |
386 | ||
387 | return __CFDefaultSystemEncoding; | |
388 | } | |
389 | ||
390 | // Fast version for internal use | |
391 | ||
392 | CF_INLINE CFStringEncoding __CFStringGetSystemEncoding(void) { | |
393 | if (__CFDefaultSystemEncoding == kCFStringEncodingInvalidId) (void)CFStringGetSystemEncoding(); | |
394 | return __CFDefaultSystemEncoding; | |
395 | } | |
396 | ||
397 | CFStringEncoding CFStringFileSystemEncoding(void) { | |
398 | if (__CFDefaultFileSystemEncoding == kCFStringEncodingInvalidId) { | |
399 | #if defined(__MACH__) | |
400 | __CFDefaultFileSystemEncoding = kCFStringEncodingUTF8; | |
401 | #else | |
402 | __CFDefaultFileSystemEncoding = CFStringGetSystemEncoding(); | |
403 | #endif | |
404 | } | |
405 | ||
406 | return __CFDefaultFileSystemEncoding; | |
407 | } | |
408 | ||
409 | /* ??? Is returning length when no other answer is available the right thing? | |
410 | */ | |
411 | CFIndex CFStringGetMaximumSizeForEncoding(CFIndex length, CFStringEncoding encoding) { | |
412 | if (encoding == kCFStringEncodingUTF8) { | |
413 | return _CFExecutableLinkedOnOrAfter(CFSystemVersionPanther) ? (length * 3) : (length * 6); // 1 Unichar could expand to 3 bytes; we return 6 for older apps for compatibility | |
d8925383 A |
414 | } else if ((encoding == kCFStringEncodingUTF32) || (encoding == kCFStringEncodingUTF32BE) || (encoding == kCFStringEncodingUTF32LE)) { // UTF-32 |
415 | return length * sizeof(UTF32Char); | |
9ce05555 A |
416 | } else { |
417 | encoding &= 0xFFF; // Mask off non-base part | |
418 | } | |
419 | switch (encoding) { | |
420 | case kCFStringEncodingUnicode: | |
421 | return length * sizeof(UniChar); | |
422 | ||
423 | case kCFStringEncodingNonLossyASCII: | |
424 | return length * 6; // 1 Unichar could expand to 6 bytes | |
425 | ||
426 | case kCFStringEncodingMacRoman: | |
427 | case kCFStringEncodingWindowsLatin1: | |
428 | case kCFStringEncodingISOLatin1: | |
429 | case kCFStringEncodingNextStepLatin: | |
430 | case kCFStringEncodingASCII: | |
431 | return length / sizeof(uint8_t); | |
432 | ||
433 | default: | |
434 | return length / sizeof(uint8_t); | |
435 | } | |
436 | } | |
437 | ||
438 | ||
439 | /* Returns whether the indicated encoding can be stored in 8-bit chars | |
440 | */ | |
441 | CF_INLINE Boolean __CFStrEncodingCanBeStoredInEightBit(CFStringEncoding encoding) { | |
d8925383 | 442 | switch (encoding & 0xFFF) { // just use encoding base |
9ce05555 A |
443 | case kCFStringEncodingInvalidId: |
444 | case kCFStringEncodingUnicode: | |
9ce05555 A |
445 | case kCFStringEncodingNonLossyASCII: |
446 | return false; | |
447 | ||
448 | case kCFStringEncodingMacRoman: | |
449 | case kCFStringEncodingWindowsLatin1: | |
450 | case kCFStringEncodingISOLatin1: | |
451 | case kCFStringEncodingNextStepLatin: | |
452 | case kCFStringEncodingASCII: | |
453 | return true; | |
454 | ||
455 | default: return false; | |
456 | } | |
457 | } | |
458 | ||
459 | /* Returns the encoding used in eight bit CFStrings (can't be any encoding which isn't 1-to-1 with Unicode) | |
460 | ??? Perhaps only ASCII fits the bill due to Unicode decomposition. | |
461 | */ | |
462 | CFStringEncoding __CFStringComputeEightBitStringEncoding(void) { | |
463 | if (__CFDefaultEightBitStringEncoding == kCFStringEncodingInvalidId) { | |
464 | CFStringEncoding systemEncoding = CFStringGetSystemEncoding(); | |
465 | if (systemEncoding == kCFStringEncodingInvalidId) { // We're right in the middle of querying system encoding from default database. Delaying to set until system encoding is determined. | |
466 | return kCFStringEncodingASCII; | |
467 | } else if (__CFStrEncodingCanBeStoredInEightBit(systemEncoding)) { | |
468 | __CFDefaultEightBitStringEncoding = systemEncoding; | |
469 | } else { | |
470 | __CFDefaultEightBitStringEncoding = kCFStringEncodingASCII; | |
471 | } | |
472 | } | |
473 | ||
474 | return __CFDefaultEightBitStringEncoding; | |
475 | } | |
476 | ||
477 | /* Returns whether the provided bytes can be stored in ASCII | |
478 | */ | |
479 | CF_INLINE Boolean __CFBytesInASCII(const uint8_t *bytes, CFIndex len) { | |
480 | while (len--) if ((uint8_t)(*bytes++) >= 128) return false; | |
481 | return true; | |
482 | } | |
483 | ||
484 | /* Returns whether the provided 8-bit string in the specified encoding can be stored in an 8-bit CFString. | |
485 | */ | |
486 | CF_INLINE Boolean __CFCanUseEightBitCFStringForBytes(const uint8_t *bytes, CFIndex len, CFStringEncoding encoding) { | |
487 | if (encoding == __CFStringGetEightBitStringEncoding()) return true; | |
488 | if (__CFStringEncodingIsSupersetOfASCII(encoding) && __CFBytesInASCII(bytes, len)) return true; | |
489 | return false; | |
490 | } | |
491 | ||
492 | ||
493 | /* Returns whether a length byte can be tacked on to a string of the indicated length. | |
494 | */ | |
495 | CF_INLINE Boolean __CFCanUseLengthByte(CFIndex len) { | |
496 | #define __kCFMaxPascalStrLen 255 | |
497 | return (len <= __kCFMaxPascalStrLen) ? true : false; | |
498 | } | |
499 | ||
500 | /* Various string assertions | |
501 | */ | |
502 | #define __CFAssertIsString(cf) __CFGenericValidateType(cf, __kCFStringTypeID) | |
503 | #define __CFAssertIndexIsInStringBounds(cf, idx) CFAssert3((idx) >= 0 && (idx) < __CFStrLength(cf), __kCFLogAssertion, "%s(): string index %d out of bounds (length %d)", __PRETTY_FUNCTION__, idx, __CFStrLength(cf)) | |
504 | #define __CFAssertRangeIsInStringBounds(cf, idx, count) CFAssert4((idx) >= 0 && (idx + count) <= __CFStrLength(cf), __kCFLogAssertion, "%s(): string range %d,%d out of bounds (length %d)", __PRETTY_FUNCTION__, idx, count, __CFStrLength(cf)) | |
505 | #define __CFAssertLengthIsOK(len) CFAssert2(len < __kCFMaxLength, __kCFLogAssertion, "%s(): length %d too large", __PRETTY_FUNCTION__, len) | |
506 | #define __CFAssertIsStringAndMutable(cf) {__CFGenericValidateType(cf, __kCFStringTypeID); CFAssert1(__CFStrIsMutable(cf), __kCFLogAssertion, "%s(): string not mutable", __PRETTY_FUNCTION__);} | |
d8925383 | 507 | #define __CFAssertIsStringAndExternalMutable(cf) {__CFGenericValidateType(cf, __kCFStringTypeID); CFAssert1(__CFStrIsMutable(cf) && __CFStrIsExternalMutable(cf), __kCFLogAssertion, "%s(): string not external mutable", __PRETTY_FUNCTION__);} |
9ce05555 A |
508 | #define __CFAssertIsNotNegative(idx) CFAssert2(idx >= 0, __kCFLogAssertion, "%s(): index %d is negative", __PRETTY_FUNCTION__, idx) |
509 | #define __CFAssertIfFixedLengthIsOK(cf, reqLen) CFAssert2(!__CFStrIsFixed(cf) || (reqLen <= __CFStrDesiredCapacity(cf)), __kCFLogAssertion, "%s(): length %d too large", __PRETTY_FUNCTION__, reqLen) | |
510 | ||
511 | ||
512 | /* Basic algorithm is to shrink memory when capacity is SHRINKFACTOR times the required capacity or to allocate memory when the capacity is less than GROWFACTOR times the required capacity. | |
513 | Additional complications are applied in the following order: | |
514 | - desiredCapacity, which is the minimum (except initially things can be at zero) | |
515 | - rounding up to factor of 8 | |
516 | - compressing (to fit the number if 16 bits), which effectively rounds up to factor of 256 | |
517 | */ | |
518 | #define SHRINKFACTOR(c) (c / 2) | |
519 | #define GROWFACTOR(c) ((c * 3 + 1) / 2) | |
520 | ||
521 | CF_INLINE CFIndex __CFStrNewCapacity(CFMutableStringRef str, CFIndex reqCapacity, CFIndex capacity, Boolean leaveExtraRoom, CFIndex charSize) { | |
522 | if (capacity != 0 || reqCapacity != 0) { /* If initially zero, and space not needed, leave it at that... */ | |
523 | if ((capacity < reqCapacity) || /* We definitely need the room... */ | |
524 | (!__CFStrCapacityProvidedExternally(str) && /* Assuming we control the capacity... */ | |
525 | ((reqCapacity < SHRINKFACTOR(capacity)) || /* ...we have too much room! */ | |
526 | (!leaveExtraRoom && (reqCapacity < capacity))))) { /* ...we need to eliminate the extra space... */ | |
527 | CFIndex newCapacity = leaveExtraRoom ? GROWFACTOR(reqCapacity) : reqCapacity; /* Grow by 3/2 if extra room is desired */ | |
528 | CFIndex desiredCapacity = __CFStrDesiredCapacity(str) * charSize; | |
529 | if (newCapacity < desiredCapacity) { /* If less than desired, bump up to desired */ | |
530 | newCapacity = desiredCapacity; | |
531 | } else if (__CFStrIsFixed(str)) { /* Otherwise, if fixed, no need to go above the desired (fixed) capacity */ | |
532 | newCapacity = __CFMax(desiredCapacity, reqCapacity); /* !!! So, fixed is not really fixed, but "tight" */ | |
533 | } | |
534 | if (__CFStrHasContentsAllocator(str)) { /* Also apply any preferred size from the allocator; should we do something for */ | |
535 | newCapacity = CFAllocatorGetPreferredSizeForSize(__CFStrContentsAllocator(str), newCapacity, 0); | |
d8925383 | 536 | #if defined(__MACH__) |
9ce05555 A |
537 | } else { |
538 | newCapacity = malloc_good_size(newCapacity); | |
d8925383 | 539 | #endif |
9ce05555 A |
540 | } |
541 | return newCapacity; // If packing: __CFStrUnpackNumber(__CFStrPackNumber(newCapacity)); | |
542 | } | |
543 | } | |
544 | return capacity; | |
545 | } | |
546 | ||
547 | ||
548 | /* rearrangeBlocks() rearranges the blocks of data within the buffer so that they are "evenly spaced". buffer is assumed to have enough room for the result. | |
549 | numBlocks is current total number of blocks within buffer. | |
550 | blockSize is the size of each block in bytes | |
551 | ranges and numRanges hold the ranges that are no longer needed; ranges are stored sorted in increasing order, and don't overlap | |
552 | insertLength is the final spacing between the remaining blocks | |
553 | ||
554 | Example: buffer = A B C D E F G H, blockSize = 1, ranges = { (2,1) , (4,2) } (so we want to "delete" C and E F), fromEnd = NO | |
555 | if insertLength = 4, result = A B ? ? ? ? D ? ? ? ? G H | |
556 | if insertLength = 0, result = A B D G H | |
557 | ||
558 | Example: buffer = A B C D E F G H I J K L M N O P Q R S T U, blockSize = 1, ranges { (1,1), (3,1), (5,11), (17,1), (19,1) }, fromEnd = NO | |
559 | if insertLength = 3, result = A ? ? ? C ? ? ? E ? ? ? Q ? ? ? S ? ? ? U | |
560 | ||
561 | */ | |
562 | typedef struct _CFStringDeferredRange { | |
563 | int beginning; | |
564 | int length; | |
565 | int shift; | |
566 | } CFStringDeferredRange; | |
567 | ||
568 | typedef struct _CFStringStackInfo { | |
569 | int capacity; // Capacity (if capacity == count, need to realloc to add another) | |
570 | int count; // Number of elements actually stored | |
571 | CFStringDeferredRange *stack; | |
572 | Boolean hasMalloced; // Indicates "stack" is allocated and needs to be deallocated when done | |
573 | char _padding[3]; | |
574 | } CFStringStackInfo; | |
575 | ||
576 | CF_INLINE void pop (CFStringStackInfo *si, CFStringDeferredRange *topRange) { | |
577 | si->count = si->count - 1; | |
578 | *topRange = si->stack[si->count]; | |
579 | } | |
580 | ||
581 | CF_INLINE void push (CFStringStackInfo *si, const CFStringDeferredRange *newRange) { | |
582 | if (si->count == si->capacity) { | |
583 | // increase size of the stack | |
584 | si->capacity = (si->capacity + 4) * 2; | |
585 | if (si->hasMalloced) { | |
586 | si->stack = CFAllocatorReallocate(NULL, si->stack, si->capacity * sizeof(CFStringDeferredRange), 0); | |
587 | } else { | |
588 | CFStringDeferredRange *newStack = (CFStringDeferredRange *)CFAllocatorAllocate(NULL, si->capacity * sizeof(CFStringDeferredRange), 0); | |
589 | memmove(newStack, si->stack, si->count * sizeof(CFStringDeferredRange)); | |
590 | si->stack = newStack; | |
591 | si->hasMalloced = true; | |
592 | } | |
593 | } | |
594 | si->stack[si->count] = *newRange; | |
595 | si->count = si->count + 1; | |
596 | } | |
597 | ||
598 | static void rearrangeBlocks( | |
599 | uint8_t *buffer, | |
600 | CFIndex numBlocks, | |
601 | CFIndex blockSize, | |
602 | const CFRange *ranges, | |
603 | CFIndex numRanges, | |
604 | CFIndex insertLength) { | |
605 | ||
606 | #define origStackSize 10 | |
607 | CFStringDeferredRange origStack[origStackSize]; | |
608 | CFStringStackInfo si = {origStackSize, 0, origStack, false, {0, 0, 0}}; | |
609 | CFStringDeferredRange currentNonRange = {0, 0, 0}; | |
610 | int currentRange = 0; | |
611 | int amountShifted = 0; | |
612 | ||
613 | // must have at least 1 range left. | |
614 | ||
615 | while (currentRange < numRanges) { | |
616 | currentNonRange.beginning = (ranges[currentRange].location + ranges[currentRange].length) * blockSize; | |
617 | if ((numRanges - currentRange) == 1) { | |
618 | // at the end. | |
619 | currentNonRange.length = numBlocks * blockSize - currentNonRange.beginning; | |
620 | if (currentNonRange.length == 0) break; | |
621 | } else { | |
622 | currentNonRange.length = (ranges[currentRange + 1].location * blockSize) - currentNonRange.beginning; | |
623 | } | |
624 | currentNonRange.shift = amountShifted + (insertLength * blockSize) - (ranges[currentRange].length * blockSize); | |
625 | amountShifted = currentNonRange.shift; | |
626 | if (amountShifted <= 0) { | |
627 | // process current item and rest of stack | |
628 | if (currentNonRange.shift && currentNonRange.length) memmove (&buffer[currentNonRange.beginning + currentNonRange.shift], &buffer[currentNonRange.beginning], currentNonRange.length); | |
629 | while (si.count > 0) { | |
630 | pop (&si, ¤tNonRange); // currentNonRange now equals the top element of the stack. | |
631 | if (currentNonRange.shift && currentNonRange.length) memmove (&buffer[currentNonRange.beginning + currentNonRange.shift], &buffer[currentNonRange.beginning], currentNonRange.length); | |
632 | } | |
633 | } else { | |
634 | // add currentNonRange to stack. | |
635 | push (&si, ¤tNonRange); | |
636 | } | |
637 | currentRange++; | |
638 | } | |
639 | ||
640 | // no more ranges. if anything is on the stack, process. | |
641 | ||
642 | while (si.count > 0) { | |
643 | pop (&si, ¤tNonRange); // currentNonRange now equals the top element of the stack. | |
644 | if (currentNonRange.shift && currentNonRange.length) memmove (&buffer[currentNonRange.beginning + currentNonRange.shift], &buffer[currentNonRange.beginning], currentNonRange.length); | |
645 | } | |
646 | if (si.hasMalloced) CFAllocatorDeallocate (NULL, si.stack); | |
647 | } | |
648 | ||
649 | /* See comments for rearrangeBlocks(); this is the same, but the string is assembled in another buffer (dstBuffer), so the algorithm is much easier. We also take care of the case where the source is not-Unicode but destination is. (The reverse case is not supported.) | |
650 | */ | |
651 | static void copyBlocks( | |
652 | const uint8_t *srcBuffer, | |
653 | uint8_t *dstBuffer, | |
654 | CFIndex srcLength, | |
655 | Boolean srcIsUnicode, | |
656 | Boolean dstIsUnicode, | |
657 | const CFRange *ranges, | |
658 | CFIndex numRanges, | |
659 | CFIndex insertLength) { | |
660 | ||
661 | CFIndex srcLocationInBytes = 0; // in order to avoid multiplying all the time, this is in terms of bytes, not blocks | |
662 | CFIndex dstLocationInBytes = 0; // ditto | |
663 | CFIndex srcBlockSize = srcIsUnicode ? sizeof(UniChar) : sizeof(uint8_t); | |
664 | CFIndex insertLengthInBytes = insertLength * (dstIsUnicode ? sizeof(UniChar) : sizeof(uint8_t)); | |
665 | CFIndex rangeIndex = 0; | |
666 | CFIndex srcToDstMultiplier = (srcIsUnicode == dstIsUnicode) ? 1 : (sizeof(UniChar) / sizeof(uint8_t)); | |
667 | ||
668 | // Loop over the ranges, copying the range to be preserved (right before each range) | |
669 | while (rangeIndex < numRanges) { | |
670 | CFIndex srcLengthInBytes = ranges[rangeIndex].location * srcBlockSize - srcLocationInBytes; // srcLengthInBytes is in terms of bytes, not blocks; represents length of region to be preserved | |
671 | if (srcLengthInBytes > 0) { | |
672 | if (srcIsUnicode == dstIsUnicode) { | |
673 | memmove(dstBuffer + dstLocationInBytes, srcBuffer + srcLocationInBytes, srcLengthInBytes); | |
674 | } else { | |
675 | __CFStrConvertBytesToUnicode(srcBuffer + srcLocationInBytes, (UniChar *)(dstBuffer + dstLocationInBytes), srcLengthInBytes); | |
676 | } | |
677 | } | |
678 | srcLocationInBytes += srcLengthInBytes + ranges[rangeIndex].length * srcBlockSize; // Skip over the just-copied and to-be-deleted stuff | |
679 | dstLocationInBytes += srcLengthInBytes * srcToDstMultiplier + insertLengthInBytes; | |
680 | rangeIndex++; | |
681 | } | |
682 | ||
683 | // Do last range (the one beyond last range) | |
684 | if (srcLocationInBytes < srcLength * srcBlockSize) { | |
685 | if (srcIsUnicode == dstIsUnicode) { | |
686 | memmove(dstBuffer + dstLocationInBytes, srcBuffer + srcLocationInBytes, srcLength * srcBlockSize - srcLocationInBytes); | |
687 | } else { | |
688 | __CFStrConvertBytesToUnicode(srcBuffer + srcLocationInBytes, (UniChar *)(dstBuffer + dstLocationInBytes), srcLength * srcBlockSize - srcLocationInBytes); | |
689 | } | |
690 | } | |
691 | } | |
692 | ||
693 | ||
694 | /* Reallocates the backing store of the string to accomodate the new length. Space is reserved or characters are deleted as indicated by insertLength and the ranges in deleteRanges. The length is updated to reflect the new state. Will also maintain a length byte and a null byte in 8-bit strings. If length cannot fit in length byte, the space will still be reserved, but will be 0. (Hence the reason the length byte should never be looked at as length unless there is no explicit length.) | |
695 | */ | |
696 | static void __CFStringChangeSizeMultiple(CFMutableStringRef str, const CFRange *deleteRanges, CFIndex numDeleteRanges, CFIndex insertLength, Boolean makeUnicode) { | |
697 | const uint8_t *curContents = __CFStrContents(str); | |
698 | CFIndex curLength = curContents ? __CFStrLength2(str, curContents) : 0; | |
699 | CFIndex newLength; | |
700 | ||
701 | // Compute new length of the string | |
702 | if (numDeleteRanges == 1) { | |
703 | newLength = curLength + insertLength - deleteRanges[0].length; | |
704 | } else { | |
705 | int cnt; | |
706 | newLength = curLength + insertLength * numDeleteRanges; | |
707 | for (cnt = 0; cnt < numDeleteRanges; cnt++) newLength -= deleteRanges[cnt].length; | |
708 | } | |
709 | ||
710 | __CFAssertIfFixedLengthIsOK(str, newLength); | |
711 | ||
712 | if (newLength == 0) { | |
713 | // An somewhat optimized code-path for this special case, with the following implicit values: | |
714 | // newIsUnicode = false | |
715 | // useLengthAndNullBytes = false | |
716 | // newCharSize = sizeof(uint8_t) | |
717 | // If the newCapacity happens to be the same as the old, we don't free the buffer; otherwise we just free it totally | |
718 | // instead of doing a potentially useless reallocation (as the needed capacity later might turn out to be different anyway) | |
719 | CFIndex curCapacity = __CFStrCapacity(str); | |
720 | CFIndex newCapacity = __CFStrNewCapacity(str, 0, curCapacity, true, sizeof(uint8_t)); | |
721 | if (newCapacity != curCapacity) { // If we're reallocing anyway (larger or smaller --- larger could happen if desired capacity was changed in the meantime), let's just free it all | |
722 | if (curContents) __CFStrDeallocateMutableContents(str, (uint8_t *)curContents); | |
723 | __CFStrSetContentPtr(str, NULL); | |
724 | __CFStrSetCapacity(str, 0); | |
725 | __CFStrClearCapacityProvidedExternally(str); | |
726 | __CFStrClearHasLengthAndNullBytes(str); | |
727 | if (!__CFStrIsExternalMutable(str)) __CFStrClearUnicode(str); // External mutable implies Unicode | |
728 | } else { | |
729 | if (!__CFStrIsExternalMutable(str)) { | |
730 | __CFStrClearUnicode(str); | |
731 | if (curCapacity >= (int)(sizeof(uint8_t) * 2)) { // If there's room | |
732 | __CFStrSetHasLengthAndNullBytes(str); | |
733 | ((uint8_t *)curContents)[0] = ((uint8_t *)curContents)[1] = 0; | |
734 | } else { | |
735 | __CFStrClearHasLengthAndNullBytes(str); | |
736 | } | |
737 | } | |
738 | } | |
739 | __CFStrSetExplicitLength(str, 0); | |
740 | } else { /* This else-clause assumes newLength > 0 */ | |
741 | Boolean oldIsUnicode = __CFStrIsUnicode(str); | |
742 | Boolean newIsUnicode = makeUnicode || (oldIsUnicode /* && (newLength > 0) - implicit */ ) || __CFStrIsExternalMutable(str); | |
743 | CFIndex newCharSize = newIsUnicode ? sizeof(UniChar) : sizeof(uint8_t); | |
744 | Boolean useLengthAndNullBytes = !newIsUnicode /* && (newLength > 0) - implicit */; | |
745 | CFIndex numExtraBytes = useLengthAndNullBytes ? 2 : 0; /* 2 extra bytes to keep the length byte & null... */ | |
746 | CFIndex curCapacity = __CFStrCapacity(str); | |
747 | CFIndex newCapacity = __CFStrNewCapacity(str, newLength * newCharSize + numExtraBytes, curCapacity, true, newCharSize); | |
748 | Boolean allocNewBuffer = (newCapacity != curCapacity) || (curLength > 0 && !oldIsUnicode && newIsUnicode); /* We alloc new buffer if oldIsUnicode != newIsUnicode because the contents have to be copied */ | |
749 | uint8_t *newContents = allocNewBuffer ? __CFStrAllocateMutableContents(str, newCapacity) : (uint8_t *)curContents; | |
750 | Boolean hasLengthAndNullBytes = __CFStrHasLengthByte(str); | |
751 | ||
752 | CFAssert1(hasLengthAndNullBytes == __CFStrHasNullByte(str), __kCFLogAssertion, "%s(): Invalid state in 8-bit string", __PRETTY_FUNCTION__); | |
753 | ||
754 | if (hasLengthAndNullBytes) curContents++; | |
755 | if (useLengthAndNullBytes) newContents++; | |
756 | ||
757 | if (curContents) { | |
758 | if (oldIsUnicode == newIsUnicode) { | |
759 | if (newContents == curContents) { | |
760 | rearrangeBlocks(newContents, curLength, newCharSize, deleteRanges, numDeleteRanges, insertLength); | |
761 | } else { | |
762 | copyBlocks(curContents, newContents, curLength, oldIsUnicode, newIsUnicode, deleteRanges, numDeleteRanges, insertLength); | |
763 | } | |
764 | } else if (newIsUnicode) { /* this implies we have a new buffer */ | |
765 | copyBlocks(curContents, newContents, curLength, oldIsUnicode, newIsUnicode, deleteRanges, numDeleteRanges, insertLength); | |
766 | } | |
767 | if (hasLengthAndNullBytes) curContents--; /* Undo the damage from above */ | |
768 | if (allocNewBuffer) __CFStrDeallocateMutableContents(str, (void *)curContents); | |
769 | } | |
770 | ||
771 | if (!newIsUnicode) { | |
772 | if (useLengthAndNullBytes) { | |
773 | newContents[newLength] = 0; /* Always have null byte, if not unicode */ | |
774 | newContents--; /* Undo the damage from above */ | |
775 | newContents[0] = __CFCanUseLengthByte(newLength) ? (uint8_t)newLength : 0; | |
776 | if (!hasLengthAndNullBytes) __CFStrSetHasLengthAndNullBytes(str); | |
777 | } else { | |
778 | if (hasLengthAndNullBytes) __CFStrClearHasLengthAndNullBytes(str); | |
779 | } | |
780 | if (oldIsUnicode) __CFStrClearUnicode(str); | |
781 | } else { // New is unicode... | |
782 | if (!oldIsUnicode) __CFStrSetUnicode(str); | |
783 | if (hasLengthAndNullBytes) __CFStrClearHasLengthAndNullBytes(str); | |
784 | } | |
785 | __CFStrSetExplicitLength(str, newLength); | |
786 | ||
787 | if (allocNewBuffer) { | |
788 | __CFStrSetCapacity(str, newCapacity); | |
789 | __CFStrClearCapacityProvidedExternally(str); | |
790 | __CFStrSetContentPtr(str, newContents); | |
791 | } | |
792 | } | |
793 | } | |
794 | ||
795 | /* Same as above, but takes one range (very common case) | |
796 | */ | |
797 | CF_INLINE void __CFStringChangeSize(CFMutableStringRef str, CFRange range, CFIndex insertLength, Boolean makeUnicode) { | |
798 | __CFStringChangeSizeMultiple(str, &range, 1, insertLength, makeUnicode); | |
799 | } | |
800 | ||
801 | ||
802 | static void __CFStringDeallocate(CFTypeRef cf) { | |
803 | CFStringRef str = cf; | |
804 | ||
805 | // constantStringAllocatorForDebugging is not around unless DEBUG is defined, but neither is CFAssert2()... | |
806 | CFAssert1(__CFConstantStringTableBeingFreed || CFGetAllocator(str) != constantStringAllocatorForDebugging, __kCFLogAssertion, "Tried to deallocate CFSTR(\"%@\")", str); | |
807 | ||
808 | if (!__CFStrIsInline(str)) { | |
809 | uint8_t *contents; | |
810 | Boolean mutable = __CFStrIsMutable(str); | |
811 | if (__CFStrFreeContentsWhenDone(str) && (contents = (uint8_t *)__CFStrContents(str))) { | |
812 | if (mutable) { | |
813 | __CFStrDeallocateMutableContents((CFMutableStringRef)str, contents); | |
814 | } else { | |
815 | if (__CFStrHasContentsDeallocator(str)) { | |
816 | CFAllocatorRef contentsDeallocator = __CFStrContentsDeallocator(str); | |
817 | CFAllocatorDeallocate(contentsDeallocator, contents); | |
818 | CFRelease(contentsDeallocator); | |
819 | } else { | |
820 | CFAllocatorRef alloc = __CFGetAllocator(str); | |
821 | CFAllocatorDeallocate(alloc, contents); | |
822 | } | |
823 | } | |
824 | } | |
825 | if (mutable && __CFStrHasContentsAllocator(str)) CFRelease(__CFStrContentsAllocator((CFMutableStringRef)str)); | |
826 | } | |
827 | } | |
828 | ||
829 | static Boolean __CFStringEqual(CFTypeRef cf1, CFTypeRef cf2) { | |
830 | CFStringRef str1 = cf1; | |
831 | CFStringRef str2 = cf2; | |
832 | const uint8_t *contents1; | |
833 | const uint8_t *contents2; | |
834 | CFIndex len1; | |
835 | ||
836 | /* !!! We do not need IsString assertions, as the CFBase runtime assures this */ | |
837 | /* !!! We do not need == test, as the CFBase runtime assures this */ | |
838 | ||
839 | contents1 = __CFStrContents(str1); | |
840 | contents2 = __CFStrContents(str2); | |
841 | len1 = __CFStrLength2(str1, contents1); | |
842 | ||
843 | if (len1 != __CFStrLength2(str2, contents2)) return false; | |
844 | ||
845 | contents1 += __CFStrSkipAnyLengthByte(str1); | |
846 | contents2 += __CFStrSkipAnyLengthByte(str2); | |
847 | ||
848 | if (__CFStrIsEightBit(str1) && __CFStrIsEightBit(str2)) { | |
849 | return memcmp((const char *)contents1, (const char *)contents2, len1) ? false : true; | |
850 | } else if (__CFStrIsEightBit(str1)) { /* One string has Unicode contents */ | |
851 | CFStringInlineBuffer buf; | |
852 | CFIndex buf_idx = 0; | |
853 | ||
854 | CFStringInitInlineBuffer(str1, &buf, CFRangeMake(0, len1)); | |
855 | for (buf_idx = 0; buf_idx < len1; buf_idx++) { | |
856 | if (__CFStringGetCharacterFromInlineBufferQuick(&buf, buf_idx) != ((UniChar *)contents2)[buf_idx]) return false; | |
857 | } | |
858 | } else if (__CFStrIsEightBit(str2)) { /* One string has Unicode contents */ | |
859 | CFStringInlineBuffer buf; | |
860 | CFIndex buf_idx = 0; | |
861 | ||
862 | CFStringInitInlineBuffer(str2, &buf, CFRangeMake(0, len1)); | |
863 | for (buf_idx = 0; buf_idx < len1; buf_idx++) { | |
864 | if (__CFStringGetCharacterFromInlineBufferQuick(&buf, buf_idx) != ((UniChar *)contents1)[buf_idx]) return false; | |
865 | } | |
866 | } else { /* Both strings have Unicode contents */ | |
867 | CFIndex idx; | |
868 | for (idx = 0; idx < len1; idx++) { | |
869 | if (((UniChar *)contents1)[idx] != ((UniChar *)contents2)[idx]) return false; | |
870 | } | |
871 | } | |
872 | return true; | |
873 | } | |
874 | ||
875 | ||
876 | /* String hashing: Should give the same results whatever the encoding; so we hash UniChars. | |
d8925383 | 877 | If the length is less than or equal to 24, then the hash function is simply the |
9ce05555 A |
878 | following (n is the nth UniChar character, starting from 0): |
879 | ||
880 | hash(-1) = length | |
881 | hash(n) = hash(n-1) * 257 + unichar(n); | |
882 | Hash = hash(length-1) * ((length & 31) + 1) | |
883 | ||
d8925383 A |
884 | If the length is greater than 24, then the above algorithm applies to |
885 | characters 0..7 and length-16..length-1; thus the first 8 and last 16 characters. | |
9ce05555 | 886 | |
d8925383 A |
887 | Note that the loops below are unrolled; and: 257^2 = 66049; 257^3 = 16974593; 257^4 = 4362470401; 67503105 is 257^4 - 256^4 |
888 | If hashcode is changed from UInt32 to something else, this last piece needs to be readjusted. | |
9ce05555 | 889 | |
d8925383 A |
890 | NOTE: The hash algorithm used to be duplicated in CF and Foundation; but now it should only be in the four functions below. |
891 | */ | |
892 | ||
893 | /* In this function, actualLen is the length of the original string; but len is the number of characters in buffer. The buffer is expected to contain the parts of the string relevant to hashing. | |
894 | */ | |
895 | CF_INLINE CFHashCode __CFStrHashCharacters(const UniChar *uContents, CFIndex len, CFIndex actualLen) { | |
896 | CFHashCode result = actualLen; | |
897 | if (len < 24) { | |
898 | const UniChar *end4 = uContents + (len & ~3); | |
899 | const UniChar *end = uContents + len; | |
900 | while (uContents < end4) { // First count in fours | |
901 | result = result * 67503105 + uContents[0] * 16974593 + uContents[1] * 66049 + uContents[2] * 257 + uContents[3]; | |
902 | uContents += 4; | |
9ce05555 | 903 | } |
d8925383 A |
904 | while (uContents < end) { // Then for the last <4 chars, count in ones... |
905 | result = result * 257 + *uContents++; | |
9ce05555 | 906 | } |
d8925383 A |
907 | } else { |
908 | result = result * 67503105 + uContents[0] * 16974593 + uContents[1] * 66049 + uContents[2] * 257 + uContents[3]; | |
909 | result = result * 67503105 + uContents[4] * 16974593 + uContents[5] * 66049 + uContents[6] * 257 + uContents[7]; | |
910 | uContents += (len - 16); | |
911 | result = result * 67503105 + uContents[0] * 16974593 + uContents[1] * 66049 + uContents[2] * 257 + uContents[3]; | |
912 | result = result * 67503105 + uContents[4] * 16974593 + uContents[5] * 66049 + uContents[6] * 257 + uContents[7]; | |
913 | result = result * 67503105 + uContents[8] * 16974593 + uContents[9] * 66049 + uContents[10] * 257 + uContents[11]; | |
914 | result = result * 67503105 + uContents[12] * 16974593 + uContents[13] * 66049 + uContents[14] * 257 + uContents[15]; | |
915 | } | |
916 | return result + (result << (actualLen & 31)); | |
917 | } | |
918 | ||
919 | /* This hashes cString in the eight bit string encoding. It also includes the little debug-time sanity check. | |
920 | */ | |
921 | CF_INLINE CFHashCode __CFStrHashEightBit(const uint8_t *contents, CFIndex len) { | |
922 | #if defined(DEBUG) | |
923 | const uint8_t *origContents = contents; | |
9ce05555 | 924 | #endif |
d8925383 A |
925 | CFHashCode result = len; |
926 | if (len < 24) { | |
927 | const uint8_t *end4 = contents + (len & ~3); | |
928 | const uint8_t *end = contents + len; | |
929 | while (contents < end4) { // First count in fours | |
930 | result = result * 67503105 + __CFCharToUniCharTable[contents[0]] * 16974593 + __CFCharToUniCharTable[contents[1]] * 66049 + __CFCharToUniCharTable[contents[2]] * 257 + __CFCharToUniCharTable[contents[3]]; | |
931 | contents += 4; | |
932 | } | |
933 | while (contents < end) { // Then for the last <4 chars, count single chars | |
934 | result = result * 257 + __CFCharToUniCharTable[*contents++]; | |
935 | } | |
9ce05555 | 936 | } else { |
d8925383 A |
937 | result = result * 67503105 + __CFCharToUniCharTable[contents[0]] * 16974593 + __CFCharToUniCharTable[contents[1]] * 66049 + __CFCharToUniCharTable[contents[2]] * 257 + __CFCharToUniCharTable[contents[3]]; |
938 | result = result * 67503105 + __CFCharToUniCharTable[contents[4]] * 16974593 + __CFCharToUniCharTable[contents[5]] * 66049 + __CFCharToUniCharTable[contents[6]] * 257 + __CFCharToUniCharTable[contents[7]]; | |
939 | contents += (len - 16); | |
940 | result = result * 67503105 + __CFCharToUniCharTable[contents[0]] * 16974593 + __CFCharToUniCharTable[contents[1]] * 66049 + __CFCharToUniCharTable[contents[2]] * 257 + __CFCharToUniCharTable[contents[3]]; | |
941 | result = result * 67503105 + __CFCharToUniCharTable[contents[4]] * 16974593 + __CFCharToUniCharTable[contents[5]] * 66049 + __CFCharToUniCharTable[contents[6]] * 257 + __CFCharToUniCharTable[contents[7]]; | |
942 | result = result * 67503105 + __CFCharToUniCharTable[contents[8]] * 16974593 + __CFCharToUniCharTable[contents[9]] * 66049 + __CFCharToUniCharTable[contents[10]] * 257 + __CFCharToUniCharTable[contents[11]]; | |
943 | result = result * 67503105 + __CFCharToUniCharTable[contents[12]] * 16974593 + __CFCharToUniCharTable[contents[13]] * 66049 + __CFCharToUniCharTable[contents[14]] * 257 + __CFCharToUniCharTable[contents[15]]; | |
944 | } | |
945 | #if defined(DEBUG) | |
946 | if (!__CFCharToUniCharFunc) { // A little sanity verification: If this is not set, trying to hash high byte chars would be a bad idea | |
947 | CFIndex cnt; | |
948 | Boolean err = false; | |
949 | contents = origContents; | |
950 | if (len <= 24) { | |
951 | for (cnt = 0; cnt < len; cnt++) if (contents[cnt] >= 128) err = true; | |
9ce05555 | 952 | } else { |
d8925383 A |
953 | for (cnt = 0; cnt < 8; cnt++) if (contents[cnt] >= 128) err = true; |
954 | for (cnt = len - 16; cnt < len; cnt++) if (contents[cnt] >= 128) err = true; | |
955 | } | |
956 | if (err) { | |
957 | // Can't do log here, as it might be too early | |
958 | fprintf(stderr, "Warning: CFHash() attempting to hash CFString containing high bytes before properly initialized to do so\n"); | |
9ce05555 A |
959 | } |
960 | } | |
d8925383 A |
961 | #endif |
962 | return result + (result << (len & 31)); | |
963 | } | |
964 | ||
965 | CFHashCode CFStringHashISOLatin1CString(const uint8_t *bytes, CFIndex len) { | |
966 | CFHashCode result = len; | |
967 | if (len < 24) { | |
968 | const uint8_t *end4 = bytes + (len & ~3); | |
969 | const uint8_t *end = bytes + len; | |
970 | while (bytes < end4) { // First count in fours | |
971 | result = result * 67503105 + bytes[0] * 16974593 + bytes[1] * 66049 + bytes[2] * 257 + bytes[3]; | |
972 | bytes += 4; | |
973 | } | |
974 | while (bytes < end) { // Then for the last <4 chars, count in ones... | |
975 | result = result * 257 + *bytes++; | |
976 | } | |
977 | } else { | |
978 | result = result * 67503105 + bytes[0] * 16974593 + bytes[1] * 66049 + bytes[2] * 257 + bytes[3]; | |
979 | result = result * 67503105 + bytes[4] * 16974593 + bytes[5] * 66049 + bytes[6] * 257 + bytes[7]; | |
980 | bytes += (len - 16); | |
981 | result = result * 67503105 + bytes[0] * 16974593 + bytes[1] * 66049 + bytes[2] * 257 + bytes[3]; | |
982 | result = result * 67503105 + bytes[4] * 16974593 + bytes[5] * 66049 + bytes[6] * 257 + bytes[7]; | |
983 | result = result * 67503105 + bytes[8] * 16974593 + bytes[9] * 66049 + bytes[10] * 257 + bytes[11]; | |
984 | result = result * 67503105 + bytes[12] * 16974593 + bytes[13] * 66049 + bytes[14] * 257 + bytes[15]; | |
985 | } | |
986 | return result + (result << (len & 31)); | |
987 | } | |
988 | ||
989 | CFHashCode CFStringHashCString(const uint8_t *bytes, CFIndex len) { | |
990 | return __CFStrHashEightBit(bytes, len); | |
991 | } | |
992 | ||
993 | CFHashCode CFStringHashCharacters(const UniChar *characters, CFIndex len) { | |
994 | return __CFStrHashCharacters(characters, len, len); | |
995 | } | |
996 | ||
997 | /* This is meant to be called from NSString or subclassers only. It is an error for this to be called without the ObjC runtime or an argument which is not an NSString or subclass. It can be called with NSCFString, although that would be inefficient (causing indirection) and won't normally happen anyway, as NSCFString overrides hash. | |
998 | */ | |
999 | CFHashCode CFStringHashNSString(CFStringRef str) { | |
1000 | UniChar buffer[24]; | |
1001 | CFIndex bufLen; // Number of characters in the buffer for hashing | |
1002 | CFIndex len; // Actual length of the string | |
1003 | ||
1004 | CF_OBJC_CALL0(CFIndex, len, str, "length"); | |
1005 | if (len <= 24) { | |
1006 | CF_OBJC_VOIDCALL2(str, "getCharacters:range:", buffer, CFRangeMake(0, len)); | |
1007 | bufLen = len; | |
1008 | } else { | |
1009 | CF_OBJC_VOIDCALL2(str, "getCharacters:range:", buffer, CFRangeMake(0, 8)); | |
1010 | CF_OBJC_VOIDCALL2(str, "getCharacters:range:", buffer+8, CFRangeMake(len-16, 16)); | |
1011 | bufLen = 24; | |
1012 | } | |
1013 | return __CFStrHashCharacters(buffer, bufLen, len); | |
1014 | } | |
1015 | ||
1016 | CFHashCode __CFStringHash(CFTypeRef cf) { | |
1017 | /* !!! We do not need an IsString assertion here, as this is called by the CFBase runtime only */ | |
1018 | CFStringRef str = cf; | |
1019 | const uint8_t *contents = __CFStrContents(str); | |
1020 | CFIndex len = __CFStrLength2(str, contents); | |
1021 | ||
1022 | if (__CFStrIsEightBit(str)) { | |
1023 | contents += __CFStrSkipAnyLengthByte(str); | |
1024 | return __CFStrHashEightBit(contents, len); | |
1025 | } else { | |
1026 | return __CFStrHashCharacters((const UniChar *)contents, len, len); | |
1027 | } | |
9ce05555 A |
1028 | } |
1029 | ||
1030 | ||
1031 | static CFStringRef __CFStringCopyDescription(CFTypeRef cf) { | |
1032 | return CFStringCreateWithFormat(kCFAllocatorDefault, NULL, CFSTR("<CFString %p [%p]>{contents = \"%@\"}"), cf, __CFGetAllocator(cf), cf); | |
1033 | } | |
1034 | ||
1035 | static CFStringRef __CFStringCopyFormattingDescription(CFTypeRef cf, CFDictionaryRef formatOptions) { | |
1036 | return CFStringCreateCopy(__CFGetAllocator(cf), cf); | |
1037 | } | |
1038 | ||
1039 | static CFTypeID __kCFStringTypeID = _kCFRuntimeNotATypeID; | |
1040 | ||
1041 | static const CFRuntimeClass __CFStringClass = { | |
1042 | 0, | |
1043 | "CFString", | |
1044 | NULL, // init | |
1045 | (void *)CFStringCreateCopy, | |
1046 | __CFStringDeallocate, | |
1047 | __CFStringEqual, | |
1048 | __CFStringHash, | |
1049 | __CFStringCopyFormattingDescription, | |
1050 | __CFStringCopyDescription | |
1051 | }; | |
1052 | ||
1053 | __private_extern__ void __CFStringInitialize(void) { | |
1054 | __kCFStringTypeID = _CFRuntimeRegisterClass(&__CFStringClass); | |
1055 | } | |
1056 | ||
1057 | CFTypeID CFStringGetTypeID(void) { | |
1058 | return __kCFStringTypeID; | |
1059 | } | |
1060 | ||
1061 | ||
1062 | static Boolean CFStrIsUnicode(CFStringRef str) { | |
1063 | CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, Boolean, str, "_encodingCantBeStoredInEightBitCFString"); | |
1064 | return __CFStrIsUnicode(str); | |
1065 | } | |
1066 | ||
1067 | ||
1068 | ||
1069 | #define ALLOCATORSFREEFUNC ((void *)-1) | |
1070 | ||
1071 | /* contentsDeallocator indicates how to free the data if it's noCopy == true: | |
1072 | kCFAllocatorNull: don't free | |
1073 | ALLOCATORSFREEFUNC: free with main allocator's free func (don't pass in the real func ptr here) | |
1074 | NULL: default allocator | |
1075 | otherwise it's the allocator that should be used (it will be explicitly stored) | |
1076 | if noCopy == false, then freeFunc should be ALLOCATORSFREEFUNC | |
1077 | hasLengthByte, hasNullByte: refers to bytes; used only if encoding != Unicode | |
1078 | possiblyExternalFormat indicates that the bytes might have BOM and be swapped | |
1079 | tryToReduceUnicode means that the Unicode should be checked to see if it contains just ASCII (and reduce it if so) | |
1080 | numBytes contains the actual number of bytes in "bytes", including Length byte, | |
1081 | BUT not the NULL byte at the end | |
1082 | bytes should not contain BOM characters | |
1083 | !!! Various flags should be combined to reduce number of arguments, if possible | |
1084 | */ | |
1085 | __private_extern__ CFStringRef __CFStringCreateImmutableFunnel3( | |
1086 | CFAllocatorRef alloc, const void *bytes, CFIndex numBytes, CFStringEncoding encoding, | |
1087 | Boolean possiblyExternalFormat, Boolean tryToReduceUnicode, Boolean hasLengthByte, Boolean hasNullByte, Boolean noCopy, | |
1088 | CFAllocatorRef contentsDeallocator, UInt32 converterFlags) { | |
1089 | ||
1090 | CFMutableStringRef str; | |
1091 | CFVarWidthCharBuffer vBuf; | |
1092 | CFIndex size; | |
1093 | Boolean useLengthByte = false; | |
1094 | Boolean useNullByte = false; | |
1095 | Boolean useInlineData = false; | |
1096 | ||
1097 | if (alloc == NULL) alloc = __CFGetDefaultAllocator(); | |
1098 | ||
1099 | if (contentsDeallocator == ALLOCATORSFREEFUNC) { | |
1100 | contentsDeallocator = alloc; | |
1101 | } else if (contentsDeallocator == NULL) { | |
1102 | contentsDeallocator = __CFGetDefaultAllocator(); | |
1103 | } | |
1104 | ||
1105 | if ((NULL != kCFEmptyString) && (numBytes == 0) && (alloc == kCFAllocatorSystemDefault)) { // If we are using the system default allocator, and the string is empty, then use the empty string! | |
1106 | if (noCopy && (contentsDeallocator != kCFAllocatorNull)) { // See 2365208... This change was done after Sonata; before we didn't free the bytes at all (leak). | |
1107 | CFAllocatorDeallocate(contentsDeallocator, (void *)bytes); | |
1108 | } | |
1109 | return CFRetain(kCFEmptyString); // Quick exit; won't catch all empty strings, but most | |
1110 | } | |
1111 | ||
1112 | // At this point, contentsDeallocator is either same as alloc, or kCFAllocatorNull, or something else, but not NULL | |
1113 | ||
1114 | vBuf.shouldFreeChars = false; // We use this to remember to free the buffer possibly allocated by decode | |
1115 | ||
1116 | // First check to see if the data needs to be converted... | |
1117 | // ??? We could be more efficient here and in some cases (Unicode data) eliminate a copy | |
1118 | ||
1119 | if ((encoding == kCFStringEncodingUnicode && possiblyExternalFormat) || (encoding != kCFStringEncodingUnicode && !__CFCanUseEightBitCFStringForBytes(bytes, numBytes, encoding))) { | |
1120 | const void *realBytes = (uint8_t*) bytes + (hasLengthByte ? 1 : 0); | |
1121 | CFIndex realNumBytes = numBytes - (hasLengthByte ? 1 : 0); | |
1122 | Boolean usingPassedInMemory = false; | |
1123 | ||
1124 | vBuf.allocator = __CFGetDefaultAllocator(); // We don't want to use client's allocator for temp stuff | |
1125 | vBuf.chars.unicode = NULL; // This will cause the decode function to allocate memory if necessary | |
1126 | ||
1127 | if (!__CFStringDecodeByteStream3(realBytes, realNumBytes, encoding, false, &vBuf, &usingPassedInMemory, converterFlags)) { | |
1128 | return NULL; // !!! Is this acceptable failure mode? | |
1129 | } | |
1130 | ||
1131 | encoding = vBuf.isASCII ? kCFStringEncodingASCII : kCFStringEncodingUnicode; | |
1132 | ||
1133 | if (!usingPassedInMemory) { | |
1134 | ||
1135 | // Make the parameters fit the new situation | |
1136 | numBytes = vBuf.isASCII ? vBuf.numChars : (vBuf.numChars * sizeof(UniChar)); | |
1137 | hasLengthByte = hasNullByte = false; | |
1138 | ||
1139 | // Get rid of the original buffer if its not being used | |
1140 | if (noCopy && contentsDeallocator != kCFAllocatorNull) { | |
1141 | CFAllocatorDeallocate(contentsDeallocator, (void *)bytes); | |
1142 | } | |
1143 | contentsDeallocator = alloc; // At this point we are using the string's allocator, as the original buffer is gone... | |
1144 | ||
1145 | // See if we can reuse any storage the decode func might have allocated | |
1146 | // We do this only for Unicode, as otherwise we would not have NULL and Length bytes | |
1147 | ||
1148 | if (vBuf.shouldFreeChars && (alloc == vBuf.allocator) && encoding == kCFStringEncodingUnicode) { | |
1149 | vBuf.shouldFreeChars = false; // Transferring ownership to the CFString | |
1150 | bytes = CFAllocatorReallocate(vBuf.allocator, (void *)vBuf.chars.unicode, numBytes, 0); // Tighten up the storage | |
1151 | noCopy = true; | |
1152 | } else { | |
1153 | bytes = vBuf.chars.unicode; | |
1154 | noCopy = false; // Can't do noCopy anymore | |
1155 | // If vBuf.shouldFreeChars is true, the buffer will be freed as intended near the end of this func | |
1156 | } | |
1157 | ||
1158 | } | |
1159 | ||
1160 | // At this point, all necessary input arguments have been changed to reflect the new state | |
1161 | ||
1162 | } else if (encoding == kCFStringEncodingUnicode && tryToReduceUnicode) { // Check to see if we can reduce Unicode to ASCII | |
1163 | CFIndex cnt; | |
1164 | CFIndex len = numBytes / sizeof(UniChar); | |
1165 | Boolean allASCII = true; | |
1166 | ||
1167 | for (cnt = 0; cnt < len; cnt++) if (((const UniChar *)bytes)[cnt] > 127) { | |
1168 | allASCII = false; | |
1169 | break; | |
1170 | } | |
1171 | ||
1172 | if (allASCII) { // Yes we can! | |
1173 | uint8_t *ptr, *mem; | |
1174 | hasLengthByte = __CFCanUseLengthByte(len); | |
1175 | hasNullByte = true; | |
1176 | numBytes = (len + 1 + (hasLengthByte ? 1 : 0)) * sizeof(uint8_t); // NULL and possible length byte | |
1177 | // See if we can use that temporary local buffer in vBuf... | |
d8925383 A |
1178 | if (numBytes >= __kCFVarWidthLocalBufferSize) { |
1179 | mem = ptr = (uint8_t *)CFAllocatorAllocate(alloc, numBytes, 0); | |
1180 | if (__CFOASafe) __CFSetLastAllocationEventName(mem, "CFString (store)"); | |
1181 | } else { | |
1182 | mem = ptr = (uint8_t *)(vBuf.localBuffer); | |
1183 | } | |
1184 | // Copy the Unicode bytes into the new ASCII buffer | |
9ce05555 A |
1185 | if (hasLengthByte) *ptr++ = len; |
1186 | for (cnt = 0; cnt < len; cnt++) ptr[cnt] = ((const UniChar *)bytes)[cnt]; | |
1187 | ptr[len] = 0; | |
1188 | if (noCopy && contentsDeallocator != kCFAllocatorNull) { | |
1189 | CFAllocatorDeallocate(contentsDeallocator, (void *)bytes); | |
1190 | } | |
d8925383 | 1191 | // Now make everything look like we had an ASCII buffer to start with |
9ce05555 A |
1192 | bytes = mem; |
1193 | encoding = kCFStringEncodingASCII; | |
1194 | contentsDeallocator = alloc; // At this point we are using the string's allocator, as the original buffer is gone... | |
1195 | noCopy = (numBytes >= __kCFVarWidthLocalBufferSize); // If we had to allocate it, make sure it's kept around | |
1196 | numBytes--; // Should not contain the NULL byte at end... | |
1197 | } | |
1198 | ||
1199 | // At this point, all necessary input arguments have been changed to reflect the new state | |
1200 | } | |
1201 | ||
1202 | // Now determine the necessary size | |
1203 | ||
1204 | if (noCopy) { | |
1205 | ||
1206 | size = sizeof(void *); // Pointer to the buffer | |
1207 | if (contentsDeallocator != alloc && contentsDeallocator != kCFAllocatorNull) { | |
1208 | size += sizeof(void *); // The contentsDeallocator | |
1209 | } | |
1210 | if (!hasLengthByte) size += sizeof(SInt32); // Explicit length | |
1211 | useLengthByte = hasLengthByte; | |
1212 | useNullByte = hasNullByte; | |
1213 | ||
1214 | } else { // Inline data; reserve space for it | |
1215 | ||
1216 | useInlineData = true; | |
1217 | size = numBytes; | |
1218 | ||
1219 | if (hasLengthByte || (encoding != kCFStringEncodingUnicode && __CFCanUseLengthByte(numBytes))) { | |
1220 | useLengthByte = true; | |
1221 | if (!hasLengthByte) size += 1; | |
1222 | } else { | |
1223 | size += sizeof(SInt32); // Explicit length | |
1224 | } | |
1225 | if (hasNullByte || encoding != kCFStringEncodingUnicode) { | |
1226 | useNullByte = true; | |
1227 | size += 1; | |
1228 | } | |
1229 | } | |
1230 | ||
d8925383 A |
1231 | #ifdef STRING_SIZE_STATS |
1232 | // Dump alloced CFString size info every so often | |
1233 | static int cnt = 0; | |
1234 | static unsigned sizes[256] = {0}; | |
1235 | int allocedSize = size + sizeof(CFRuntimeBase); | |
1236 | if (allocedSize < 255) sizes[allocedSize]++; else sizes[255]++; | |
1237 | if ((++cnt % 1000) == 0) { | |
1238 | printf ("\nTotal: %d\n", cnt); | |
1239 | int i; for (i = 0; i < 256; i++) printf("%03d: %5d%s", i, sizes[i], ((i % 8) == 7) ? "\n" : " "); | |
1240 | } | |
1241 | #endif | |
1242 | ||
9ce05555 A |
1243 | // Finally, allocate! |
1244 | ||
1245 | str = (CFMutableStringRef)_CFRuntimeCreateInstance(alloc, __kCFStringTypeID, size, NULL); | |
d8925383 A |
1246 | if (str) { |
1247 | if (__CFOASafe) __CFSetLastAllocationEventName(str, "CFString (immutable)"); | |
1248 | ||
1249 | __CFStrSetInfoBits(str, | |
1250 | (useInlineData ? __kCFHasInlineContents : (contentsDeallocator == alloc ? __kCFNotInlineContentsDefaultFree : (contentsDeallocator == kCFAllocatorNull ? __kCFNotInlineContentsNoFree : __kCFNotInlineContentsCustomFree))) | | |
1251 | ((encoding == kCFStringEncodingUnicode) ? __kCFIsUnicode : 0) | | |
1252 | (useNullByte ? __kCFHasNullByte : 0) | | |
1253 | (useLengthByte ? __kCFHasLengthByte : 0)); | |
1254 | ||
1255 | if (!useLengthByte) { | |
1256 | CFIndex length = numBytes - (hasLengthByte ? 1 : 0); | |
1257 | if (encoding == kCFStringEncodingUnicode) length /= sizeof(UniChar); | |
1258 | __CFStrSetExplicitLength(str, length); | |
1259 | } | |
9ce05555 | 1260 | |
d8925383 A |
1261 | if (useInlineData) { |
1262 | uint8_t *contents = (uint8_t *)__CFStrContents(str); | |
1263 | if (useLengthByte && !hasLengthByte) *contents++ = numBytes; | |
1264 | memmove(contents, bytes, numBytes); | |
1265 | if (useNullByte) contents[numBytes] = 0; | |
1266 | } else { | |
1267 | __CFStrSetContentPtr(str, bytes); | |
1268 | if (contentsDeallocator != alloc && contentsDeallocator != kCFAllocatorNull) __CFStrSetContentsDeallocator(str, CFRetain(contentsDeallocator)); | |
1269 | } | |
9ce05555 | 1270 | } else { |
d8925383 | 1271 | if (contentsDeallocator != kCFAllocatorNull) CFAllocatorDeallocate(contentsDeallocator, (void *)bytes); |
9ce05555 A |
1272 | } |
1273 | if (vBuf.shouldFreeChars) CFAllocatorDeallocate(vBuf.allocator, (void *)bytes); | |
1274 | ||
1275 | return str; | |
1276 | } | |
1277 | ||
1278 | /* !!! __CFStringCreateImmutableFunnel2() is kept around for compatibility; it should be deprecated | |
1279 | */ | |
1280 | CFStringRef __CFStringCreateImmutableFunnel2( | |
1281 | CFAllocatorRef alloc, const void *bytes, CFIndex numBytes, CFStringEncoding encoding, | |
1282 | Boolean possiblyExternalFormat, Boolean tryToReduceUnicode, Boolean hasLengthByte, Boolean hasNullByte, Boolean noCopy, | |
1283 | CFAllocatorRef contentsDeallocator) { | |
1284 | return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, possiblyExternalFormat, tryToReduceUnicode, hasLengthByte, hasNullByte, noCopy, contentsDeallocator, 0); | |
1285 | } | |
1286 | ||
1287 | ||
1288 | ||
1289 | CFStringRef CFStringCreateWithPascalString(CFAllocatorRef alloc, ConstStringPtr pStr, CFStringEncoding encoding) { | |
1290 | CFIndex len = (CFIndex)(*(uint8_t *)pStr); | |
1291 | return __CFStringCreateImmutableFunnel3(alloc, pStr, len+1, encoding, false, false, true, false, false, ALLOCATORSFREEFUNC, 0); | |
1292 | } | |
1293 | ||
1294 | ||
1295 | CFStringRef CFStringCreateWithCString(CFAllocatorRef alloc, const char *cStr, CFStringEncoding encoding) { | |
1296 | CFIndex len = strlen(cStr); | |
1297 | return __CFStringCreateImmutableFunnel3(alloc, cStr, len, encoding, false, false, false, true, false, ALLOCATORSFREEFUNC, 0); | |
1298 | } | |
1299 | ||
1300 | CFStringRef CFStringCreateWithPascalStringNoCopy(CFAllocatorRef alloc, ConstStringPtr pStr, CFStringEncoding encoding, CFAllocatorRef contentsDeallocator) { | |
1301 | CFIndex len = (CFIndex)(*(uint8_t *)pStr); | |
1302 | return __CFStringCreateImmutableFunnel3(alloc, pStr, len+1, encoding, false, false, true, false, true, contentsDeallocator, 0); | |
1303 | } | |
1304 | ||
1305 | ||
1306 | CFStringRef CFStringCreateWithCStringNoCopy(CFAllocatorRef alloc, const char *cStr, CFStringEncoding encoding, CFAllocatorRef contentsDeallocator) { | |
1307 | CFIndex len = strlen(cStr); | |
1308 | return __CFStringCreateImmutableFunnel3(alloc, cStr, len, encoding, false, false, false, true, true, contentsDeallocator, 0); | |
1309 | } | |
1310 | ||
1311 | ||
1312 | CFStringRef CFStringCreateWithCharacters(CFAllocatorRef alloc, const UniChar *chars, CFIndex numChars) { | |
1313 | return __CFStringCreateImmutableFunnel3(alloc, chars, numChars * sizeof(UniChar), kCFStringEncodingUnicode, false, true, false, false, false, ALLOCATORSFREEFUNC, 0); | |
1314 | } | |
1315 | ||
1316 | ||
1317 | CFStringRef CFStringCreateWithCharactersNoCopy(CFAllocatorRef alloc, const UniChar *chars, CFIndex numChars, CFAllocatorRef contentsDeallocator) { | |
1318 | return __CFStringCreateImmutableFunnel3(alloc, chars, numChars * sizeof(UniChar), kCFStringEncodingUnicode, false, false, false, false, true, contentsDeallocator, 0); | |
1319 | } | |
1320 | ||
1321 | ||
1322 | CFStringRef CFStringCreateWithBytes(CFAllocatorRef alloc, const uint8_t *bytes, CFIndex numBytes, CFStringEncoding encoding, Boolean externalFormat) { | |
1323 | return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, externalFormat, true, false, false, false, ALLOCATORSFREEFUNC, 0); | |
1324 | } | |
1325 | ||
1326 | CFStringRef _CFStringCreateWithBytesNoCopy(CFAllocatorRef alloc, const uint8_t *bytes, CFIndex numBytes, CFStringEncoding encoding, Boolean externalFormat, CFAllocatorRef contentsDeallocator) { | |
1327 | return __CFStringCreateImmutableFunnel3(alloc, bytes, numBytes, encoding, externalFormat, true, false, false, true, contentsDeallocator, 0); | |
1328 | } | |
1329 | ||
d8925383 A |
1330 | CFStringRef CFStringCreateWithBytesNoCopy(CFAllocatorRef alloc, const uint8_t *bytes, CFIndex numBytes, CFStringEncoding encoding, Boolean externalFormat, CFAllocatorRef contentsDeallocator) { |
1331 | return _CFStringCreateWithBytesNoCopy(alloc, bytes, numBytes, encoding, externalFormat, contentsDeallocator); | |
1332 | } | |
1333 | ||
9ce05555 A |
1334 | CFStringRef CFStringCreateWithFormatAndArguments(CFAllocatorRef alloc, CFDictionaryRef formatOptions, CFStringRef format, va_list arguments) { |
1335 | return _CFStringCreateWithFormatAndArgumentsAux(alloc, NULL, formatOptions, format, arguments); | |
1336 | } | |
1337 | ||
1338 | CFStringRef _CFStringCreateWithFormatAndArgumentsAux(CFAllocatorRef alloc, CFStringRef (*copyDescFunc)(void *, CFDictionaryRef), CFDictionaryRef formatOptions, CFStringRef format, va_list arguments) { | |
1339 | CFStringRef str; | |
1340 | CFMutableStringRef outputString = CFStringCreateMutable(__CFGetDefaultAllocator(), 0); //should use alloc if no copy/release | |
1341 | __CFStrSetDesiredCapacity(outputString, 120); // Given this will be tightened later, choosing a larger working string is fine | |
1342 | _CFStringAppendFormatAndArgumentsAux(outputString, copyDescFunc, formatOptions, format, arguments); | |
1343 | // ??? copy/release should not be necessary here -- just make immutable, compress if possible | |
1344 | // (However, this does make the string inline, and cause the supplied allocator to be used...) | |
1345 | str = CFStringCreateCopy(alloc, outputString); | |
1346 | CFRelease(outputString); | |
1347 | return str; | |
1348 | } | |
1349 | ||
1350 | CFStringRef CFStringCreateWithFormat(CFAllocatorRef alloc, CFDictionaryRef formatOptions, CFStringRef format, ...) { | |
1351 | CFStringRef result; | |
1352 | va_list argList; | |
1353 | ||
1354 | va_start(argList, format); | |
1355 | result = CFStringCreateWithFormatAndArguments(alloc, formatOptions, format, argList); | |
1356 | va_end(argList); | |
1357 | ||
1358 | return result; | |
1359 | } | |
1360 | ||
9ce05555 | 1361 | CFStringRef CFStringCreateWithSubstring(CFAllocatorRef alloc, CFStringRef str, CFRange range) { |
d8925383 A |
1362 | if (CF_IS_OBJC(__kCFStringTypeID, str)) { |
1363 | static SEL s = NULL; | |
1364 | CFStringRef (*func)(void *, SEL, ...) = (void *)__CFSendObjCMsg; | |
1365 | if (!s) s = sel_registerName("_createSubstringWithRange:"); | |
1366 | CFStringRef result = func((void *)str, s, CFRangeMake(range.location, range.length)); | |
1367 | if (result && CF_USING_COLLECTABLE_MEMORY) CFRetain(result); // needs hard retain. | |
1368 | return result; | |
1369 | } | |
1370 | // CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, CFStringRef , str, "_createSubstringWithRange:", CFRangeMake(range.location, range.length)); | |
9ce05555 A |
1371 | |
1372 | __CFAssertIsString(str); | |
1373 | __CFAssertRangeIsInStringBounds(str, range.location, range.length); | |
1374 | ||
1375 | if ((range.location == 0) && (range.length == __CFStrLength(str))) { /* The substring is the whole string... */ | |
1376 | return CFStringCreateCopy(alloc, str); | |
1377 | } else if (__CFStrIsEightBit(str)) { | |
1378 | const uint8_t *contents = __CFStrContents(str); | |
1379 | return __CFStringCreateImmutableFunnel3(alloc, contents + range.location + __CFStrSkipAnyLengthByte(str), range.length, __CFStringGetEightBitStringEncoding(), false, false, false, false, false, ALLOCATORSFREEFUNC, 0); | |
1380 | } else { | |
1381 | const UniChar *contents = __CFStrContents(str); | |
1382 | return __CFStringCreateImmutableFunnel3(alloc, contents + range.location, range.length * sizeof(UniChar), kCFStringEncodingUnicode, false, true, false, false, false, ALLOCATORSFREEFUNC, 0); | |
1383 | } | |
1384 | } | |
1385 | ||
1386 | CFStringRef CFStringCreateCopy(CFAllocatorRef alloc, CFStringRef str) { | |
d8925383 A |
1387 | if (CF_IS_OBJC(__kCFStringTypeID, str)) { |
1388 | static SEL s = NULL; | |
1389 | CFStringRef (*func)(void *, SEL, ...) = (void *)__CFSendObjCMsg; | |
1390 | if (!s) s = sel_registerName("copy"); | |
1391 | CFStringRef result = func((void *)str, s); | |
1392 | if (result && CF_USING_COLLECTABLE_MEMORY) CFRetain(result); // needs hard retain. | |
1393 | return result; | |
1394 | } | |
1395 | // CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, CFStringRef, str, "copy"); | |
9ce05555 A |
1396 | |
1397 | __CFAssertIsString(str); | |
1398 | if (!__CFStrIsMutable(str) && // If the string is not mutable | |
1399 | ((alloc ? alloc : __CFGetDefaultAllocator()) == __CFGetAllocator(str)) && // and it has the same allocator as the one we're using | |
d8925383 | 1400 | (__CFStrIsInline(str) || __CFStrFreeContentsWhenDone(str) || __CFStrIsConstant(str))) { // and the characters are inline, or are owned by the string, or the string is constant |
9ce05555 A |
1401 | CFRetain(str); // Then just retain instead of making a true copy |
1402 | return str; | |
1403 | } | |
1404 | if (__CFStrIsEightBit(str)) { | |
1405 | const uint8_t *contents = __CFStrContents(str); | |
1406 | return __CFStringCreateImmutableFunnel3(alloc, contents + __CFStrSkipAnyLengthByte(str), __CFStrLength2(str, contents), __CFStringGetEightBitStringEncoding(), false, false, false, false, false, ALLOCATORSFREEFUNC, 0); | |
1407 | } else { | |
1408 | const UniChar *contents = __CFStrContents(str); | |
1409 | return __CFStringCreateImmutableFunnel3(alloc, contents, __CFStrLength2(str, contents) * sizeof(UniChar), kCFStringEncodingUnicode, false, true, false, false, false, ALLOCATORSFREEFUNC, 0); | |
1410 | } | |
1411 | } | |
1412 | ||
1413 | ||
1414 | ||
1415 | /*** Constant string stuff... ***/ | |
1416 | ||
1417 | static CFMutableDictionaryRef constantStringTable = NULL; | |
1418 | ||
1419 | /* For now we call a function to create a constant string and keep previously created constant strings in a dictionary. The keys are the 8-bit constant C-strings from the compiler; the values are the CFStrings created for them. | |
1420 | */ | |
1421 | ||
1422 | static CFStringRef __cStrCopyDescription(const void *ptr) { | |
1423 | return CFStringCreateWithCStringNoCopy(NULL, (const char *)ptr, __CFStringGetEightBitStringEncoding(), kCFAllocatorNull); | |
1424 | } | |
1425 | ||
1426 | static Boolean __cStrEqual(const void *ptr1, const void *ptr2) { | |
1427 | return (strcmp((const char *)ptr1, (const char *)ptr2) == 0); | |
1428 | } | |
1429 | ||
1430 | static CFHashCode __cStrHash(const void *ptr) { | |
1431 | // It doesn't quite matter if we convert to Unicode correctly, as long as we do it consistently | |
1432 | const unsigned char *cStr = (const unsigned char *)ptr; | |
1433 | CFIndex len = strlen(cStr); | |
1434 | CFHashCode result = 0; | |
1435 | if (len <= 4) { // All chars | |
1436 | unsigned cnt = len; | |
1437 | while (cnt--) result += (result << 8) + *cStr++; | |
1438 | } else { // First and last 2 chars | |
1439 | result += (result << 8) + cStr[0]; | |
1440 | result += (result << 8) + cStr[1]; | |
1441 | result += (result << 8) + cStr[len-2]; | |
1442 | result += (result << 8) + cStr[len-1]; | |
1443 | } | |
1444 | result += (result << (len & 31)); | |
1445 | return result; | |
1446 | } | |
1447 | ||
1448 | #if defined(DEBUG) | |
1449 | /* We use a special allocator (which simply calls through to the default) for constant strings so that we can catch them being freed... | |
1450 | */ | |
1451 | static void *csRealloc(void *oPtr, CFIndex size, CFOptionFlags hint, void *info) { | |
1452 | return CFAllocatorReallocate(NULL, oPtr, size, hint); | |
1453 | } | |
1454 | ||
1455 | static void *csAlloc(CFIndex size, CFOptionFlags hint, void *info) { | |
1456 | return CFAllocatorAllocate(NULL, size, hint); | |
1457 | } | |
1458 | ||
1459 | static void csDealloc(void *ptr, void *info) { | |
1460 | CFAllocatorDeallocate(NULL, ptr); | |
1461 | } | |
1462 | ||
1463 | static CFStringRef csCopyDescription(const void *info) { | |
1464 | return CFRetain(CFSTR("Debug allocator for CFSTRs")); | |
1465 | } | |
1466 | #endif | |
1467 | ||
1468 | static CFSpinLock_t _CFSTRLock = 0; | |
1469 | ||
1470 | CFStringRef __CFStringMakeConstantString(const char *cStr) { | |
1471 | CFStringRef result; | |
d8925383 A |
1472 | #if defined(DEBUG) |
1473 | //StringTest checks that we share kCFEmptyString, which is defeated by constantStringAllocatorForDebugging | |
1474 | if ('\0' == *cStr) return kCFEmptyString; | |
1475 | #endif | |
9ce05555 A |
1476 | if (constantStringTable == NULL) { |
1477 | CFDictionaryKeyCallBacks constantStringCallBacks = {0, NULL, NULL, __cStrCopyDescription, __cStrEqual, __cStrHash}; | |
d8925383 A |
1478 | CFMutableDictionaryRef table = CFDictionaryCreateMutable(NULL, 0, &constantStringCallBacks, &kCFTypeDictionaryValueCallBacks); |
1479 | _CFDictionarySetCapacity(table, 2500); // avoid lots of rehashing | |
1480 | __CFSpinLock(&_CFSTRLock); | |
1481 | if (constantStringTable == NULL) constantStringTable = table; | |
1482 | __CFSpinUnlock(&_CFSTRLock); | |
1483 | if (constantStringTable != table) CFRelease(table); | |
9ce05555 A |
1484 | #if defined(DEBUG) |
1485 | { | |
1486 | CFAllocatorContext context = {0, NULL, NULL, NULL, csCopyDescription, csAlloc, csRealloc, csDealloc, NULL}; | |
d8925383 | 1487 | constantStringAllocatorForDebugging = _CFAllocatorCreateGC(NULL, &context); |
9ce05555 A |
1488 | } |
1489 | #else | |
1490 | #define constantStringAllocatorForDebugging NULL | |
1491 | #endif | |
1492 | } | |
1493 | ||
1494 | __CFSpinLock(&_CFSTRLock); | |
1495 | if ((result = (CFStringRef)CFDictionaryGetValue(constantStringTable, cStr))) { | |
1496 | __CFSpinUnlock(&_CFSTRLock); | |
1497 | } else { | |
1498 | __CFSpinUnlock(&_CFSTRLock); | |
1499 | ||
1500 | { | |
9ce05555 A |
1501 | char *key; |
1502 | Boolean isASCII = true; | |
d8925383 | 1503 | // Given this code path is rarer these days, OK to do this extra work to verify the strings |
9ce05555 A |
1504 | const unsigned char *tmp = cStr; |
1505 | while (*tmp) { | |
1506 | if (*tmp++ > 127) { | |
1507 | isASCII = false; | |
1508 | break; | |
1509 | } | |
1510 | } | |
1511 | if (!isASCII) { | |
1512 | CFMutableStringRef ms = CFStringCreateMutable(NULL, 0); | |
1513 | tmp = cStr; | |
1514 | while (*tmp) { | |
1515 | CFStringAppendFormat(ms, NULL, (*tmp > 127) ? CFSTR("\\%3o") : CFSTR("%1c"), *tmp); | |
1516 | tmp++; | |
1517 | } | |
1518 | CFLog(0, CFSTR("WARNING: CFSTR(\"%@\") has non-7 bit chars, interpreting using MacOS Roman encoding for now, but this will change. Please eliminate usages of non-7 bit chars (including escaped characters above \\177 octal) in CFSTR()."), ms); | |
1519 | CFRelease(ms); | |
1520 | } | |
d8925383 | 1521 | // Treat non-7 bit chars in CFSTR() as MacOSRoman, for compatibility |
9ce05555 A |
1522 | result = CFStringCreateWithCString(constantStringAllocatorForDebugging, cStr, kCFStringEncodingMacRoman); |
1523 | if (result == NULL) { | |
1524 | CFLog(__kCFLogAssertion, CFSTR("Can't interpret CFSTR() as MacOS Roman, crashing")); | |
1525 | HALT; | |
1526 | } | |
9ce05555 A |
1527 | if (__CFOASafe) __CFSetLastAllocationEventName((void *)result, "CFString (CFSTR)"); |
1528 | if (__CFStrIsEightBit(result)) { | |
1529 | key = (char *)__CFStrContents(result) + __CFStrSkipAnyLengthByte(result); | |
1530 | } else { // For some reason the string is not 8-bit! | |
1531 | key = CFAllocatorAllocate(NULL, strlen(cStr) + 1, 0); | |
1532 | if (__CFOASafe) __CFSetLastAllocationEventName((void *)key, "CFString (CFSTR key)"); | |
1533 | strcpy(key, cStr); // !!! We will leak this, if the string is removed from the table (or table is freed) | |
1534 | } | |
1535 | ||
1536 | { | |
1537 | #if !defined(DEBUG) | |
1538 | CFStringRef resultToBeReleased = result; | |
1539 | #endif | |
1540 | CFIndex count; | |
1541 | __CFSpinLock(&_CFSTRLock); | |
1542 | count = CFDictionaryGetCount(constantStringTable); | |
1543 | CFDictionaryAddValue(constantStringTable, key, result); | |
1544 | if (CFDictionaryGetCount(constantStringTable) == count) { // add did nothing, someone already put it there | |
1545 | result = (CFStringRef)CFDictionaryGetValue(constantStringTable, key); | |
1546 | } | |
1547 | __CFSpinUnlock(&_CFSTRLock); | |
1548 | #if !defined(DEBUG) | |
1549 | // Can't release this in the DEBUG case; will get assertion failure | |
1550 | CFRelease(resultToBeReleased); | |
1551 | #endif | |
1552 | } | |
1553 | } | |
1554 | } | |
1555 | return result; | |
1556 | } | |
1557 | ||
1558 | #if defined(__MACOS8__) || defined(__WIN32__) | |
1559 | ||
1560 | void __CFStringCleanup (void) { | |
1561 | /* in case library is unloaded, release store for the constant string table */ | |
1562 | if (constantStringTable != NULL) { | |
1563 | #if defined(DEBUG) | |
1564 | __CFConstantStringTableBeingFreed = true; | |
1565 | CFRelease(constantStringTable); | |
1566 | __CFConstantStringTableBeingFreed = false; | |
1567 | #else | |
1568 | CFRelease(constantStringTable); | |
1569 | #endif | |
1570 | } | |
1571 | #if defined(DEBUG) | |
1572 | CFAllocatorDeallocate( constantStringAllocatorForDebugging, (void*) constantStringAllocatorForDebugging ); | |
1573 | #endif | |
1574 | } | |
1575 | ||
1576 | #endif | |
1577 | ||
1578 | ||
1579 | // Can pass in NSString as replacement string | |
1580 | // Call with numRanges > 0, and incrementing ranges | |
1581 | ||
1582 | static void __CFStringReplaceMultiple(CFMutableStringRef str, CFRange *ranges, CFIndex numRanges, CFStringRef replacement) { | |
1583 | int cnt; | |
d8925383 A |
1584 | CFStringRef copy = NULL; |
1585 | if (replacement == str) copy = replacement = CFStringCreateCopy(NULL, replacement); // Very special and hopefully rare case | |
9ce05555 A |
1586 | CFIndex replacementLength = CFStringGetLength(replacement); |
1587 | ||
1588 | __CFStringChangeSizeMultiple(str, ranges, numRanges, replacementLength, (replacementLength > 0) && CFStrIsUnicode(replacement)); | |
1589 | ||
1590 | if (__CFStrIsUnicode(str)) { | |
1591 | UniChar *contents = (UniChar *)__CFStrContents(str); | |
1592 | UniChar *firstReplacement = contents + ranges[0].location; | |
1593 | // Extract the replacementString into the first location, then copy from there | |
1594 | CFStringGetCharacters(replacement, CFRangeMake(0, replacementLength), firstReplacement); | |
1595 | for (cnt = 1; cnt < numRanges; cnt++) { | |
1596 | // The ranges are in terms of the original string; so offset by the change in length due to insertion | |
1597 | contents += replacementLength - ranges[cnt - 1].length; | |
1598 | memmove(contents + ranges[cnt].location, firstReplacement, replacementLength * sizeof(UniChar)); | |
1599 | } | |
1600 | } else { | |
1601 | uint8_t *contents = (uint8_t *)__CFStrContents(str); | |
1602 | uint8_t *firstReplacement = contents + ranges[0].location + __CFStrSkipAnyLengthByte(str); | |
1603 | // Extract the replacementString into the first location, then copy from there | |
1604 | CFStringGetBytes(replacement, CFRangeMake(0, replacementLength), __CFStringGetEightBitStringEncoding(), 0, false, firstReplacement, replacementLength, NULL); | |
1605 | contents += __CFStrSkipAnyLengthByte(str); // Now contents will simply track the location to insert next string into | |
1606 | for (cnt = 1; cnt < numRanges; cnt++) { | |
1607 | // The ranges are in terms of the original string; so offset by the change in length due to insertion | |
1608 | contents += replacementLength - ranges[cnt - 1].length; | |
1609 | memmove(contents + ranges[cnt].location, firstReplacement, replacementLength); | |
1610 | } | |
1611 | } | |
d8925383 | 1612 | if (copy) CFRelease(copy); |
9ce05555 A |
1613 | } |
1614 | ||
1615 | // Can pass in NSString as replacement string | |
1616 | ||
d8925383 A |
1617 | CF_INLINE void __CFStringReplace(CFMutableStringRef str, CFRange range, CFStringRef replacement) { |
1618 | CFStringRef copy = NULL; | |
1619 | if (replacement == str) copy = replacement = CFStringCreateCopy(NULL, replacement); // Very special and hopefully rare case | |
9ce05555 A |
1620 | CFIndex replacementLength = CFStringGetLength(replacement); |
1621 | ||
1622 | __CFStringChangeSize(str, range, replacementLength, (replacementLength > 0) && CFStrIsUnicode(replacement)); | |
1623 | ||
1624 | if (__CFStrIsUnicode(str)) { | |
1625 | UniChar *contents = (UniChar *)__CFStrContents(str); | |
1626 | CFStringGetCharacters(replacement, CFRangeMake(0, replacementLength), contents + range.location); | |
1627 | } else { | |
1628 | uint8_t *contents = (uint8_t *)__CFStrContents(str); | |
1629 | CFStringGetBytes(replacement, CFRangeMake(0, replacementLength), __CFStringGetEightBitStringEncoding(), 0, false, contents + range.location + __CFStrSkipAnyLengthByte(str), replacementLength, NULL); | |
1630 | } | |
d8925383 A |
1631 | |
1632 | if (copy) CFRelease(copy); | |
9ce05555 A |
1633 | } |
1634 | ||
1635 | /* If client does not provide a minimum capacity | |
1636 | */ | |
1637 | #define DEFAULTMINCAPACITY 32 | |
1638 | ||
d8925383 | 1639 | CF_INLINE CFMutableStringRef __CFStringCreateMutableFunnel(CFAllocatorRef alloc, CFIndex maxLength, UInt32 additionalInfoBits) { |
9ce05555 A |
1640 | CFMutableStringRef str; |
1641 | Boolean hasExternalContentsAllocator = (additionalInfoBits & __kCFHasContentsAllocator) ? true : false; | |
1642 | ||
1643 | if (alloc == NULL) alloc = __CFGetDefaultAllocator(); | |
1644 | ||
1645 | // Note that if there is an externalContentsAllocator, then we also have the storage for the string allocator... | |
1646 | str = (CFMutableStringRef)_CFRuntimeCreateInstance(alloc, __kCFStringTypeID, sizeof(void *) + sizeof(UInt32) * 3 + (hasExternalContentsAllocator ? sizeof(CFAllocatorRef) : 0), NULL); | |
1647 | if (str) { | |
1648 | if (__CFOASafe) __CFSetLastAllocationEventName(str, "CFString (mutable)"); | |
1649 | ||
1650 | __CFStrSetInfoBits(str, __kCFIsMutable | additionalInfoBits); | |
d8925383 | 1651 | str->variants.notInlineMutable.buffer = NULL; |
9ce05555 | 1652 | __CFStrSetExplicitLength(str, 0); |
d8925383 A |
1653 | str->variants.notInlineMutable.gapEtc = 0; |
1654 | if (maxLength != 0) __CFStrSetIsFixed(str); | |
9ce05555 A |
1655 | __CFStrSetDesiredCapacity(str, (maxLength == 0) ? DEFAULTMINCAPACITY : maxLength); |
1656 | __CFStrSetCapacity(str, 0); | |
1657 | } | |
1658 | return str; | |
1659 | } | |
1660 | ||
1661 | CFMutableStringRef CFStringCreateMutableWithExternalCharactersNoCopy(CFAllocatorRef alloc, UniChar *chars, CFIndex numChars, CFIndex capacity, CFAllocatorRef externalCharactersAllocator) { | |
d8925383 A |
1662 | CFOptionFlags contentsAllocationBits = externalCharactersAllocator ? ((externalCharactersAllocator == kCFAllocatorNull) ? __kCFNotInlineContentsNoFree : __kCFHasContentsAllocator) : __kCFNotInlineContentsDefaultFree; |
1663 | CFMutableStringRef string = __CFStringCreateMutableFunnel(alloc, 0, contentsAllocationBits | __kCFIsUnicode); | |
9ce05555 | 1664 | if (string) { |
d8925383 | 1665 | __CFStrSetIsExternalMutable(string); |
9ce05555 A |
1666 | if (contentsAllocationBits == __kCFHasContentsAllocator) __CFStrSetContentsAllocator(string, CFRetain(externalCharactersAllocator)); |
1667 | CFStringSetExternalCharactersNoCopy(string, chars, numChars, capacity); | |
1668 | } | |
1669 | return string; | |
1670 | } | |
1671 | ||
1672 | CFMutableStringRef CFStringCreateMutable(CFAllocatorRef alloc, CFIndex maxLength) { | |
d8925383 | 1673 | return __CFStringCreateMutableFunnel(alloc, maxLength, __kCFNotInlineContentsDefaultFree); |
9ce05555 A |
1674 | } |
1675 | ||
1676 | CFMutableStringRef CFStringCreateMutableCopy(CFAllocatorRef alloc, CFIndex maxLength, CFStringRef string) { | |
1677 | CFMutableStringRef newString; | |
1678 | ||
d8925383 A |
1679 | if (CF_IS_OBJC(__kCFStringTypeID, string)) { |
1680 | static SEL s = NULL; | |
1681 | CFMutableStringRef (*func)(void *, SEL, ...) = (void *)__CFSendObjCMsg; | |
1682 | if (!s) s = sel_registerName("mutableCopy"); | |
1683 | newString = func((void *)string, s); | |
1684 | if (CF_USING_COLLECTABLE_MEMORY) auto_zone_retain(__CFCollectableZone, newString); // needs hard retain IF using GC | |
1685 | return newString; | |
1686 | } | |
1687 | // CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, CFMutableStringRef, string, "mutableCopy"); | |
9ce05555 A |
1688 | |
1689 | __CFAssertIsString(string); | |
1690 | ||
1691 | newString = CFStringCreateMutable(alloc, maxLength); | |
1692 | __CFStringReplace(newString, CFRangeMake(0, 0), string); | |
1693 | ||
1694 | return newString; | |
1695 | } | |
1696 | ||
1697 | ||
1698 | __private_extern__ void _CFStrSetDesiredCapacity(CFMutableStringRef str, CFIndex len) { | |
1699 | __CFAssertIsStringAndMutable(str); | |
1700 | __CFStrSetDesiredCapacity(str, len); | |
1701 | } | |
1702 | ||
1703 | ||
1704 | /* This one is for CF | |
1705 | */ | |
1706 | CFIndex CFStringGetLength(CFStringRef str) { | |
1707 | CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, CFIndex, str, "length"); | |
1708 | ||
1709 | __CFAssertIsString(str); | |
1710 | return __CFStrLength(str); | |
1711 | } | |
1712 | ||
1713 | /* This one is for NSCFString; it does not ObjC dispatch or assertion check | |
1714 | */ | |
1715 | CFIndex _CFStringGetLength2(CFStringRef str) { | |
1716 | return __CFStrLength(str); | |
1717 | } | |
1718 | ||
1719 | ||
1720 | /* Guts of CFStringGetCharacterAtIndex(); called from the two functions below. Don't call it from elsewhere. | |
1721 | */ | |
1722 | CF_INLINE UniChar __CFStringGetCharacterAtIndexGuts(CFStringRef str, CFIndex idx, const uint8_t *contents) { | |
1723 | if (__CFStrIsEightBit(str)) { | |
1724 | contents += __CFStrSkipAnyLengthByte(str); | |
1725 | #if defined(DEBUG) | |
1726 | if (!__CFCharToUniCharFunc && (contents[idx] >= 128)) { | |
1727 | // Can't do log here, as it might be too early | |
d8925383 | 1728 | fprintf(stderr, "Warning: CFStringGetCharacterAtIndex() attempted on CFString containing high bytes before properly initialized to do so\n"); |
9ce05555 A |
1729 | } |
1730 | #endif | |
1731 | return __CFCharToUniCharTable[contents[idx]]; | |
1732 | } | |
1733 | ||
1734 | return ((UniChar *)contents)[idx]; | |
1735 | } | |
1736 | ||
1737 | /* This one is for the CF API | |
1738 | */ | |
1739 | UniChar CFStringGetCharacterAtIndex(CFStringRef str, CFIndex idx) { | |
1740 | CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, UniChar, str, "characterAtIndex:", idx); | |
1741 | ||
1742 | __CFAssertIsString(str); | |
1743 | __CFAssertIndexIsInStringBounds(str, idx); | |
1744 | return __CFStringGetCharacterAtIndexGuts(str, idx, __CFStrContents(str)); | |
1745 | } | |
1746 | ||
1747 | /* This one is for NSCFString usage; it doesn't do ObjC dispatch; but it does do range check | |
1748 | */ | |
1749 | int _CFStringCheckAndGetCharacterAtIndex(CFStringRef str, CFIndex idx, UniChar *ch) { | |
1750 | const uint8_t *contents = __CFStrContents(str); | |
1751 | if (idx >= __CFStrLength2(str, contents) && __CFStringNoteErrors()) return _CFStringErrBounds; | |
1752 | *ch = __CFStringGetCharacterAtIndexGuts(str, idx, contents); | |
1753 | return _CFStringErrNone; | |
1754 | } | |
1755 | ||
1756 | ||
1757 | /* Guts of CFStringGetCharacters(); called from the two functions below. Don't call it from elsewhere. | |
1758 | */ | |
1759 | CF_INLINE void __CFStringGetCharactersGuts(CFStringRef str, CFRange range, UniChar *buffer, const uint8_t *contents) { | |
1760 | if (__CFStrIsEightBit(str)) { | |
1761 | __CFStrConvertBytesToUnicode(((uint8_t *)contents) + (range.location + __CFStrSkipAnyLengthByte(str)), buffer, range.length); | |
1762 | } else { | |
1763 | const UniChar *uContents = ((UniChar *)contents) + range.location; | |
1764 | memmove(buffer, uContents, range.length * sizeof(UniChar)); | |
1765 | } | |
1766 | } | |
1767 | ||
1768 | /* This one is for the CF API | |
1769 | */ | |
1770 | void CFStringGetCharacters(CFStringRef str, CFRange range, UniChar *buffer) { | |
1771 | CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID, void, str, "getCharacters:range:", buffer, CFRangeMake(range.location, range.length)); | |
1772 | ||
1773 | __CFAssertIsString(str); | |
1774 | __CFAssertRangeIsInStringBounds(str, range.location, range.length); | |
1775 | __CFStringGetCharactersGuts(str, range, buffer, __CFStrContents(str)); | |
1776 | } | |
1777 | ||
1778 | /* This one is for NSCFString usage; it doesn't do ObjC dispatch; but it does do range check | |
1779 | */ | |
1780 | int _CFStringCheckAndGetCharacters(CFStringRef str, CFRange range, UniChar *buffer) { | |
1781 | const uint8_t *contents = __CFStrContents(str); | |
1782 | if (range.location + range.length > __CFStrLength2(str, contents) && __CFStringNoteErrors()) return _CFStringErrBounds; | |
1783 | __CFStringGetCharactersGuts(str, range, buffer, contents); | |
1784 | return _CFStringErrNone; | |
1785 | } | |
1786 | ||
1787 | ||
1788 | CFIndex CFStringGetBytes(CFStringRef str, CFRange range, CFStringEncoding encoding, uint8_t lossByte, Boolean isExternalRepresentation, uint8_t *buffer, CFIndex maxBufLen, CFIndex *usedBufLen) { | |
1789 | ||
1790 | /* No objc dispatch needed here since __CFStringEncodeByteStream works with both CFString and NSString */ | |
1791 | __CFAssertIsNotNegative(maxBufLen); | |
1792 | ||
1793 | if (!CF_IS_OBJC(__kCFStringTypeID, str)) { // If we can grope the ivars, let's do it... | |
1794 | __CFAssertIsString(str); | |
1795 | __CFAssertRangeIsInStringBounds(str, range.location, range.length); | |
1796 | ||
1797 | if (__CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string | |
1798 | const unsigned char *contents = __CFStrContents(str); | |
1799 | CFIndex cLength = range.length; | |
1800 | ||
1801 | if (buffer) { | |
1802 | if (cLength > maxBufLen) cLength = maxBufLen; | |
1803 | memmove(buffer, contents + __CFStrSkipAnyLengthByte(str) + range.location, cLength); | |
1804 | } | |
1805 | if (usedBufLen) *usedBufLen = cLength; | |
1806 | ||
1807 | return cLength; | |
1808 | } | |
1809 | } | |
1810 | ||
1811 | return __CFStringEncodeByteStream(str, range.location, range.length, isExternalRepresentation, encoding, lossByte, buffer, maxBufLen, usedBufLen); | |
1812 | } | |
1813 | ||
1814 | ||
1815 | ConstStringPtr CFStringGetPascalStringPtr (CFStringRef str, CFStringEncoding encoding) { | |
1816 | ||
1817 | if (!CF_IS_OBJC(__kCFStringTypeID, str)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */ | |
1818 | __CFAssertIsString(str); | |
1819 | if (__CFStrHasLengthByte(str) && __CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string || the contents is in ASCII | |
1820 | const uint8_t *contents = __CFStrContents(str); | |
1821 | if (__CFStrHasExplicitLength(str) && (__CFStrLength2(str, contents) != (SInt32)(*contents))) return NULL; // Invalid length byte | |
1822 | return (ConstStringPtr)contents; | |
1823 | } | |
1824 | // ??? Also check for encoding = SystemEncoding and perhaps bytes are all ASCII? | |
1825 | } | |
1826 | return NULL; | |
1827 | } | |
1828 | ||
1829 | ||
1830 | const char * CFStringGetCStringPtr(CFStringRef str, CFStringEncoding encoding) { | |
1831 | ||
1832 | if (encoding != __CFStringGetEightBitStringEncoding() && (kCFStringEncodingASCII != __CFStringGetEightBitStringEncoding() || !__CFStringEncodingIsSupersetOfASCII(encoding))) return NULL; | |
1833 | // ??? Also check for encoding = SystemEncoding and perhaps bytes are all ASCII? | |
1834 | ||
1835 | CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, const char *, str, "_fastCStringContents:", true); | |
1836 | ||
1837 | __CFAssertIsString(str); | |
1838 | ||
1839 | if (__CFStrHasNullByte(str)) { | |
1840 | return (const char *)__CFStrContents(str) + __CFStrSkipAnyLengthByte(str); | |
1841 | } else { | |
1842 | return NULL; | |
1843 | } | |
1844 | } | |
1845 | ||
1846 | ||
1847 | const UniChar *CFStringGetCharactersPtr(CFStringRef str) { | |
1848 | ||
1849 | CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, const UniChar *, str, "_fastCharacterContents"); | |
1850 | ||
1851 | __CFAssertIsString(str); | |
1852 | if (__CFStrIsUnicode(str)) return (const UniChar *)__CFStrContents(str); | |
1853 | return NULL; | |
1854 | } | |
1855 | ||
1856 | ||
1857 | Boolean CFStringGetPascalString(CFStringRef str, Str255 buffer, CFIndex bufferSize, CFStringEncoding encoding) { | |
1858 | CFIndex length; | |
1859 | CFIndex usedLen; | |
1860 | ||
1861 | __CFAssertIsNotNegative(bufferSize); | |
1862 | if (bufferSize < 1) return false; | |
1863 | ||
1864 | if (CF_IS_OBJC(__kCFStringTypeID, str)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */ | |
1865 | length = CFStringGetLength(str); | |
1866 | if (!__CFCanUseLengthByte(length)) return false; // Can't fit into pstring | |
1867 | } else { | |
1868 | const uint8_t *contents; | |
1869 | ||
1870 | __CFAssertIsString(str); | |
1871 | ||
1872 | contents = __CFStrContents(str); | |
1873 | length = __CFStrLength2(str, contents); | |
1874 | ||
1875 | if (!__CFCanUseLengthByte(length)) return false; // Can't fit into pstring | |
1876 | ||
1877 | if (__CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string | |
1878 | if (length >= bufferSize) return false; | |
1879 | memmove((void*)(1 + (const char*)buffer), (__CFStrSkipAnyLengthByte(str) + contents), length); | |
1880 | *buffer = length; | |
1881 | return true; | |
1882 | } | |
1883 | } | |
1884 | ||
1885 | if (__CFStringEncodeByteStream(str, 0, length, false, encoding, false, (void*)(1 + (uint8_t*)buffer), bufferSize - 1, &usedLen) != length) { | |
1886 | #if defined(DEBUG) | |
1887 | if (bufferSize > 0) { | |
1888 | strncpy((char *)buffer + 1, CONVERSIONFAILURESTR, bufferSize - 1); | |
1889 | buffer[0] = (CFIndex)sizeof(CONVERSIONFAILURESTR) < (bufferSize - 1) ? (CFIndex)sizeof(CONVERSIONFAILURESTR) : (bufferSize - 1); | |
1890 | } | |
1891 | #else | |
1892 | if (bufferSize > 0) buffer[0] = 0; | |
1893 | #endif | |
1894 | return false; | |
1895 | } | |
1896 | *buffer = usedLen; | |
1897 | return true; | |
1898 | } | |
1899 | ||
1900 | Boolean CFStringGetCString(CFStringRef str, char *buffer, CFIndex bufferSize, CFStringEncoding encoding) { | |
1901 | const uint8_t *contents; | |
1902 | CFIndex len; | |
1903 | ||
1904 | __CFAssertIsNotNegative(bufferSize); | |
1905 | if (bufferSize < 1) return false; | |
1906 | ||
1907 | CF_OBJC_FUNCDISPATCH3(__kCFStringTypeID, Boolean, str, "_getCString:maxLength:encoding:", buffer, bufferSize - 1, encoding); | |
1908 | ||
1909 | __CFAssertIsString(str); | |
1910 | ||
1911 | contents = __CFStrContents(str); | |
1912 | len = __CFStrLength2(str, contents); | |
1913 | ||
1914 | if (__CFStrIsEightBit(str) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string | |
1915 | if (len >= bufferSize) return false; | |
1916 | memmove(buffer, contents + __CFStrSkipAnyLengthByte(str), len); | |
1917 | buffer[len] = 0; | |
1918 | return true; | |
1919 | } else { | |
1920 | CFIndex usedLen; | |
1921 | ||
1922 | if (__CFStringEncodeByteStream(str, 0, len, false, encoding, false, (unsigned char*) buffer, bufferSize - 1, &usedLen) == len) { | |
1923 | buffer[usedLen] = '\0'; | |
1924 | return true; | |
1925 | } else { | |
1926 | #if defined(DEBUG) | |
1927 | strncpy(buffer, CONVERSIONFAILURESTR, bufferSize); | |
1928 | #else | |
1929 | if (bufferSize > 0) buffer[0] = 0; | |
1930 | #endif | |
1931 | return false; | |
1932 | } | |
1933 | } | |
1934 | } | |
1935 | ||
d8925383 A |
1936 | |
1937 | CF_INLINE bool _CFCanUseLocale(CFLocaleRef locale) { | |
1938 | return false; | |
1939 | } | |
1940 | ||
1941 | static const char *_CFStrGetLanguageIdentifierForLocale(CFLocaleRef locale) { | |
1942 | return NULL; | |
1943 | } | |
1944 | ||
9ce05555 | 1945 | #define MAX_CASE_MAPPING_BUF (8) |
d8925383 A |
1946 | #define ZERO_WIDTH_JOINER (0x200D) |
1947 | #define COMBINING_GRAPHEME_JOINER (0x034F) | |
1948 | // Hangul ranges | |
1949 | #define HANGUL_CHOSEONG_START (0x1100) | |
1950 | #define HANGUL_CHOSEONG_END (0x115F) | |
1951 | #define HANGUL_JUNGSEONG_START (0x1160) | |
1952 | #define HANGUL_JUNGSEONG_END (0x11A2) | |
1953 | #define HANGUL_JONGSEONG_START (0x11A8) | |
1954 | #define HANGUL_JONGSEONG_END (0x11F9) | |
1955 | ||
1956 | #define HANGUL_SYLLABLE_START (0xAC00) | |
1957 | #define HANGUL_SYLLABLE_END (0xD7AF) | |
1958 | ||
1959 | ||
1960 | // Returns the length of characters filled into outCharacters. If no change, returns 0. maxBufLen shoule be at least 8 | |
1961 | static inline CFIndex __CFStringFoldCharacterClusterAtIndex(UTF32Char character, CFStringInlineBuffer *buffer, CFIndex index, CFOptionFlags flags, const uint8_t *langCode, UTF32Char *outCharacters, CFIndex maxBufferLength, CFIndex *consumedLength) { | |
1962 | CFIndex filledLength = 0, currentIndex = index; | |
1963 | ||
1964 | if (0 != character) { | |
1965 | UTF16Char lowSurrogate; | |
1966 | CFIndex planeNo = (character >> 16); | |
1967 | bool isTurkikCapitalI = false; | |
1968 | static const uint8_t *decompBMP = NULL; | |
1969 | static const uint8_t *nonBaseBMP = NULL; | |
1970 | ||
1971 | if (NULL == decompBMP) { | |
1972 | decompBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, 0); | |
1973 | nonBaseBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, 0); | |
1974 | } | |
1975 | ||
1976 | ++currentIndex; | |
1977 | ||
1978 | if ((character < 0x0080) && ((NULL == langCode) || (character != 'I'))) { // ASCII | |
1979 | if ((flags & kCFCompareCaseInsensitive) && (character >= 'A') && (character <= 'Z')) { | |
1980 | character += ('a' - 'A'); | |
1981 | *outCharacters = character; | |
1982 | filledLength = 1; | |
1983 | } | |
1984 | } else { | |
1985 | // do width-insensitive mapping | |
1986 | if ((flags & kCFCompareWidthInsensitive) && (character >= 0xFF00) && (character <= 0xFFEF)) { | |
1987 | (void)CFUniCharCompatibilityDecompose(&character, 1, 1); | |
1988 | *outCharacters = character; | |
1989 | filledLength = 1; | |
1990 | } | |
1991 | ||
1992 | // map surrogates | |
1993 | if ((0 == planeNo) && CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((lowSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex)))) { | |
1994 | character = CFUniCharGetLongCharacterForSurrogatePair(character, lowSurrogate); | |
1995 | ++currentIndex; | |
1996 | planeNo = (character >> 16); | |
1997 | } | |
1998 | ||
1999 | // decompose | |
2000 | if (flags & (kCFCompareDiacriticsInsensitive|kCFCompareNonliteral)) { | |
2001 | if (CFUniCharIsMemberOfBitmap(character, ((0 == planeNo) ? decompBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, planeNo)))) { | |
2002 | filledLength = CFUniCharDecomposeCharacter(character, outCharacters, maxBufferLength); | |
2003 | character = *outCharacters; | |
2004 | if ((flags & kCFCompareDiacriticsInsensitive) && (character < 0x0510)) filledLength = 1; // reset if Roman, Greek, Cyrillic | |
2005 | } | |
2006 | } | |
2007 | ||
2008 | // fold case | |
2009 | if (flags & kCFCompareCaseInsensitive) { | |
2010 | const uint8_t *nonBaseBitmap; | |
2011 | bool filterNonBase = (((flags & kCFCompareDiacriticsInsensitive) && (character < 0x0510)) ? true : false); | |
2012 | static const uint8_t *lowerBMP = NULL; | |
2013 | static const uint8_t *caseFoldBMP = NULL; | |
2014 | ||
2015 | if (NULL == lowerBMP) { | |
2016 | lowerBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfLowercaseCharacterSet, 0); | |
2017 | caseFoldBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfCaseFoldingCharacterSet, 0); | |
2018 | } | |
2019 | ||
2020 | if ((NULL != langCode) && ('I' == character) && ((0 == strcmp(langCode, "tr")) || (0 == strcmp(langCode, "az")))) { // do Turkik special-casing | |
2021 | if (filledLength > 1) { | |
2022 | if (0x0307 == outCharacters[1]) { | |
2023 | memmove(&(outCharacters[index]), &(outCharacters[index + 1]), sizeof(UTF32Char) * (--filledLength)); | |
2024 | character = *outCharacters = 'i'; | |
2025 | isTurkikCapitalI = true; | |
2026 | } | |
2027 | } else if (0x0307 == CFStringGetCharacterFromInlineBuffer(buffer, currentIndex)) { | |
2028 | character = *outCharacters = 'i'; | |
2029 | filledLength = 1; | |
2030 | ++currentIndex; | |
2031 | isTurkikCapitalI = true; | |
2032 | } | |
2033 | } | |
2034 | if (!isTurkikCapitalI && (CFUniCharIsMemberOfBitmap(character, ((0 == planeNo) ? lowerBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfLowercaseCharacterSet, planeNo))) || CFUniCharIsMemberOfBitmap(character, ((0 == planeNo) ? caseFoldBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharHasNonSelfCaseFoldingCharacterSet, planeNo))))) { | |
2035 | UTF16Char caseFoldBuffer[MAX_CASE_MAPPING_BUF]; | |
2036 | const UTF16Char *bufferP = caseFoldBuffer, *bufferLimit; | |
2037 | UTF32Char *outCharactersP = outCharacters; | |
2038 | uint32_t bufferLength = CFUniCharMapCaseTo(character, caseFoldBuffer, MAX_CASE_MAPPING_BUF, kCFUniCharCaseFold, 0, langCode); | |
2039 | ||
2040 | bufferLimit = bufferP + bufferLength; | |
2041 | ||
2042 | if (filledLength > 0) --filledLength; // decrement filledLength (will add back later) | |
2043 | ||
2044 | // make space for casefold characters | |
2045 | if ((filledLength > 0) && (bufferLength > 1)) { | |
2046 | CFIndex totalScalerLength = 0; | |
2047 | ||
2048 | while (bufferP < bufferLimit) { | |
2049 | if (CFUniCharIsSurrogateHighCharacter(*(bufferP++)) && (bufferP < bufferLimit) && CFUniCharIsSurrogateLowCharacter(*bufferP)) ++bufferP; | |
2050 | ++totalScalerLength; | |
2051 | } | |
2052 | memmove(outCharacters + totalScalerLength, outCharacters + 1, filledLength * sizeof(UTF32Char)); | |
2053 | bufferP = caseFoldBuffer; | |
2054 | } | |
2055 | ||
2056 | // fill | |
2057 | while (bufferP < bufferLimit) { | |
2058 | character = *(bufferP++); | |
2059 | if (CFUniCharIsSurrogateHighCharacter(character) && (bufferP < bufferLimit) && CFUniCharIsSurrogateLowCharacter(*bufferP)) { | |
2060 | character = CFUniCharGetLongCharacterForSurrogatePair(character, *(bufferP++)); | |
2061 | nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (character >> 16)); | |
2062 | } else { | |
2063 | nonBaseBitmap = nonBaseBMP; | |
2064 | } | |
2065 | ||
2066 | if (!filterNonBase || !CFUniCharIsMemberOfBitmap(character, nonBaseBitmap)) { | |
2067 | *(outCharactersP++) = character; | |
2068 | ++filledLength; | |
2069 | } | |
2070 | } | |
2071 | } | |
2072 | } | |
2073 | } | |
2074 | ||
2075 | // collect following combining marks | |
2076 | if (flags & (kCFCompareDiacriticsInsensitive|kCFCompareNonliteral)) { | |
2077 | const uint8_t *nonBaseBitmap; | |
2078 | const uint8_t *decompBitmap; | |
2079 | bool doFill = (((flags & kCFCompareDiacriticsInsensitive) && (character < 0x0510)) ? false : true); | |
2080 | ||
2081 | if (doFill && (0 == filledLength)) { // check if really needs to fill | |
2082 | UTF32Char nonBaseCharacter = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex); | |
2083 | ||
2084 | if (CFUniCharIsSurrogateHighCharacter(nonBaseCharacter) && CFUniCharIsSurrogateLowCharacter((lowSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex + 1)))) { | |
2085 | nonBaseCharacter = CFUniCharGetLongCharacterForSurrogatePair(nonBaseCharacter, lowSurrogate); | |
2086 | nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (nonBaseCharacter >> 16)); | |
2087 | decompBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (nonBaseCharacter >> 16)); | |
2088 | } else { | |
2089 | nonBaseBitmap = nonBaseBMP; | |
2090 | decompBitmap = decompBMP; | |
2091 | } | |
2092 | ||
2093 | if (CFUniCharIsMemberOfBitmap(nonBaseCharacter, nonBaseBitmap)) { | |
2094 | outCharacters[filledLength++] = character; | |
2095 | ||
2096 | if ((0 == (flags & kCFCompareDiacriticsInsensitive)) || (nonBaseCharacter > 0x050F)) { | |
2097 | if (CFUniCharIsMemberOfBitmap(nonBaseCharacter, decompBitmap)) { | |
2098 | filledLength += CFUniCharDecomposeCharacter(nonBaseCharacter, &(outCharacters[filledLength]), maxBufferLength - filledLength); | |
2099 | } else { | |
2100 | outCharacters[filledLength++] = nonBaseCharacter; | |
2101 | } | |
2102 | } | |
2103 | currentIndex += ((nonBaseBitmap == nonBaseBMP) ? 1 : 2); | |
2104 | } else { | |
2105 | doFill = false; | |
2106 | } | |
2107 | } | |
2108 | ||
2109 | while (filledLength < maxBufferLength) { // do the rest | |
2110 | character = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex); | |
2111 | ||
2112 | if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((lowSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, currentIndex + 1)))) { | |
2113 | character = CFUniCharGetLongCharacterForSurrogatePair(character, lowSurrogate); | |
2114 | nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (character >> 16)); | |
2115 | decompBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (character >> 16)); | |
2116 | } else { | |
2117 | nonBaseBitmap = nonBaseBMP; | |
2118 | decompBitmap = decompBMP; | |
2119 | } | |
2120 | if (isTurkikCapitalI) { | |
2121 | isTurkikCapitalI = false; | |
2122 | } else if (CFUniCharIsMemberOfBitmap(character, nonBaseBitmap)) { | |
2123 | if (doFill && ((0 == (flags & kCFCompareDiacriticsInsensitive)) || (character > 0x050F))) { | |
2124 | if (CFUniCharIsMemberOfBitmap(character, decompBitmap)) { | |
2125 | CFIndex currentLength = CFUniCharDecomposeCharacter(character, &(outCharacters[filledLength]), maxBufferLength - filledLength); | |
2126 | ||
2127 | if (0 == currentLength) break; // didn't fit | |
2128 | ||
2129 | filledLength += currentLength; | |
2130 | } else { | |
2131 | outCharacters[filledLength++] = character; | |
2132 | } | |
2133 | } | |
2134 | currentIndex += ((nonBaseBitmap == nonBaseBMP) ? 1 : 2); | |
2135 | } else { | |
2136 | break; | |
2137 | } | |
2138 | } | |
2139 | ||
2140 | if (filledLength > 1) CFUniCharPrioritySort(outCharacters, filledLength); // priority sort | |
2141 | } | |
2142 | } | |
2143 | ||
2144 | if ((filledLength > 0) && (NULL != consumedLength)) *consumedLength = (currentIndex - index); | |
2145 | ||
2146 | return filledLength; | |
2147 | } | |
9ce05555 A |
2148 | |
2149 | /* Special casing for Uk sorting */ | |
2150 | #define DO_IGNORE_PUNCTUATION 1 | |
2151 | #if DO_IGNORE_PUNCTUATION | |
2152 | #define UKRAINIAN_LANG_CODE (45) | |
2153 | static bool __CFLocaleChecked = false; | |
2154 | static const uint8_t *__CFPunctSetBMP = NULL; | |
2155 | #endif /* DO_IGNORE_PUNCTUATION */ | |
2156 | ||
2157 | /* ??? We need to implement some additional flags here | |
2158 | ??? Also, pay attention to flag 2, which is the NS flag (which CF has as flag 16, w/opposite meaning). | |
2159 | */ | |
2160 | CFComparisonResult CFStringCompareWithOptions(CFStringRef string, CFStringRef string2, CFRange rangeToCompare, CFOptionFlags compareOptions) { | |
2161 | /* No objc dispatch needed here since CFStringInlineBuffer works with both CFString and NSString */ | |
2162 | CFStringInlineBuffer strBuf1, strBuf2; | |
2163 | UTF32Char ch1, ch2; | |
2164 | const uint8_t *punctBMP = NULL; | |
2165 | Boolean caseInsensitive = (compareOptions & kCFCompareCaseInsensitive ? true : false); | |
2166 | Boolean decompose = (compareOptions & kCFCompareNonliteral ? true : false); | |
2167 | Boolean numerically = (compareOptions & kCFCompareNumerically ? true : false); | |
2168 | Boolean localized = (compareOptions & kCFCompareLocalized ? true : false); | |
2169 | ||
2170 | #if DO_IGNORE_PUNCTUATION | |
2171 | if (localized) { | |
2172 | if (!__CFLocaleChecked) { | |
2173 | CFArrayRef locales = _CFBundleCopyUserLanguages(false); | |
2174 | ||
2175 | if (locales && (CFArrayGetCount(locales) > 0)) { | |
2176 | SInt32 langCode; | |
2177 | ||
2178 | if (CFBundleGetLocalizationInfoForLocalization((CFStringRef)CFArrayGetValueAtIndex(locales, 0), &langCode, NULL, NULL, NULL) && (langCode == UKRAINIAN_LANG_CODE)) { | |
2179 | __CFPunctSetBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet, 0); | |
2180 | } | |
2181 | ||
2182 | CFRelease(locales); | |
2183 | } | |
2184 | __CFLocaleChecked = true; | |
2185 | } | |
2186 | ||
2187 | punctBMP = __CFPunctSetBMP; | |
2188 | } | |
2189 | #endif /* DO_IGNORE_PUNCTUATION */ | |
2190 | ||
2191 | CFStringInitInlineBuffer(string, &strBuf1, CFRangeMake(rangeToCompare.location, rangeToCompare.length)); | |
2192 | CFIndex strBuf1_idx = 0; | |
2193 | CFIndex string2_len = CFStringGetLength(string2); | |
2194 | CFStringInitInlineBuffer(string2, &strBuf2, CFRangeMake(0, string2_len)); | |
2195 | CFIndex strBuf2_idx = 0; | |
2196 | ||
2197 | while (strBuf1_idx < rangeToCompare.length && strBuf2_idx < string2_len) { | |
2198 | ch1 = CFStringGetCharacterFromInlineBuffer(&strBuf1, strBuf1_idx); | |
2199 | ch2 = CFStringGetCharacterFromInlineBuffer(&strBuf2, strBuf2_idx); | |
2200 | ||
2201 | if (numerically && (ch1 <= '9' && ch1 >= '0') && (ch2 <= '9' && ch2 >= '0')) { // If both are not digits, then don't do numerical comparison | |
d8925383 A |
2202 | uint64_t n1 = 0; // !!! Doesn't work if numbers are > max uint64_t |
2203 | uint64_t n2 = 0; | |
9ce05555 A |
2204 | do { |
2205 | n1 = n1 * 10 + (ch1 - '0'); | |
2206 | strBuf1_idx++; | |
2207 | if (rangeToCompare.length <= strBuf1_idx) break; | |
2208 | ch1 = CFStringGetCharacterFromInlineBuffer(&strBuf1, strBuf1_idx); | |
2209 | } while (ch1 <= '9' && ch1 >= '0'); | |
2210 | do { | |
2211 | n2 = n2 * 10 + (ch2 - '0'); | |
2212 | strBuf2_idx++; | |
2213 | if (string2_len <= strBuf2_idx) break; | |
2214 | ch2 = CFStringGetCharacterFromInlineBuffer(&strBuf2, strBuf2_idx); | |
2215 | } while (ch2 <= '9' && ch2 >= '0'); | |
2216 | if (n1 < n2) return kCFCompareLessThan; else if (n1 > n2) return kCFCompareGreaterThan; | |
2217 | continue; // If numbers were equal, go back to top without incrementing the buffer pointers | |
2218 | } | |
2219 | ||
2220 | if (CFUniCharIsSurrogateHighCharacter(ch1)) { | |
2221 | strBuf1_idx++; | |
2222 | if (strBuf1_idx < rangeToCompare.length && CFUniCharIsSurrogateLowCharacter(CFStringGetCharacterFromInlineBuffer(&strBuf1, strBuf1_idx))) { | |
2223 | ch1 = CFUniCharGetLongCharacterForSurrogatePair(ch1, CFStringGetCharacterFromInlineBuffer(&strBuf1, strBuf1_idx)); | |
2224 | } else { | |
2225 | strBuf1_idx--; | |
2226 | } | |
2227 | } | |
2228 | if (CFUniCharIsSurrogateHighCharacter(ch2)) { | |
2229 | strBuf2_idx++; | |
2230 | if (strBuf2_idx < string2_len && CFUniCharIsSurrogateLowCharacter(CFStringGetCharacterFromInlineBuffer(&strBuf2, strBuf2_idx))) { | |
2231 | ch2 = CFUniCharGetLongCharacterForSurrogatePair(ch2, CFStringGetCharacterFromInlineBuffer(&strBuf2, strBuf2_idx)); | |
2232 | } else { | |
2233 | strBuf2_idx--; | |
2234 | } | |
2235 | } | |
2236 | ||
2237 | if (ch1 != ch2) { | |
2238 | #if DO_IGNORE_PUNCTUATION | |
2239 | if (punctBMP) { | |
2240 | if (CFUniCharIsMemberOfBitmap(ch1, (ch1 < 0x10000 ? punctBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet, (ch1 >> 16))))) { | |
2241 | ++strBuf1_idx; continue; | |
2242 | } | |
2243 | if (CFUniCharIsMemberOfBitmap(ch2, (ch2 < 0x10000 ? punctBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet, (ch2 >> 16))))) { | |
2244 | ++strBuf2_idx; continue; | |
2245 | } | |
2246 | } | |
2247 | #endif /* DO_IGNORE_PUNCTUATION */ | |
2248 | // We standardize to lowercase here since currently, as of Unicode 3.1.1, it's one-to-one mapping. | |
2249 | // Note we map to uppercase for both SMALL LETTER SIGMA and SMALL LETTER FINAL SIGMA | |
2250 | if (caseInsensitive) { | |
2251 | if (ch1 < 128) { | |
2252 | ch1 -= ((ch1 >= 'A' && ch1 <= 'Z') ? 'A' - 'a' : 0); | |
2253 | } else if (ch1 == 0x03C2 || ch1 == 0x03C3 || ch1 == 0x03A3) { // SMALL SIGMA | |
2254 | ch1 = 0x03A3; | |
2255 | } else { | |
2256 | UniChar buffer[MAX_CASE_MAPPING_BUF]; | |
2257 | ||
2258 | if (CFUniCharMapCaseTo(ch1, buffer, MAX_CASE_MAPPING_BUF, kCFUniCharToLowercase, 0, NULL) > 1) { // It's supposed to be surrogates | |
2259 | ch1 = CFUniCharGetLongCharacterForSurrogatePair(buffer[0], buffer[1]); | |
2260 | } else { | |
2261 | ch1 = *buffer; | |
2262 | } | |
2263 | } | |
2264 | if (ch2 < 128) { | |
2265 | ch2 -= ((ch2 >= 'A' && ch2 <= 'Z') ? 'A' - 'a' : 0); | |
2266 | } else if (ch2 == 0x03C2 || ch2 == 0x03C3 || ch2 == 0x03A3) { // SMALL SIGMA | |
2267 | ch2 = 0x03A3; | |
2268 | } else { | |
2269 | UniChar buffer[MAX_CASE_MAPPING_BUF]; | |
2270 | ||
2271 | if (CFUniCharMapCaseTo(ch2, buffer, MAX_CASE_MAPPING_BUF, kCFUniCharToLowercase, 0, NULL) > 1) { // It's supposed to be surrogates | |
2272 | ch2 = CFUniCharGetLongCharacterForSurrogatePair(buffer[0], buffer[1]); | |
2273 | } else { | |
2274 | ch2 = *buffer; | |
2275 | } | |
2276 | } | |
2277 | } | |
2278 | ||
2279 | if (ch1 != ch2) { // still different | |
2280 | if (decompose) { // ??? This is not exactly the canonical comparison (We need to do priority sort) | |
2281 | Boolean isCh1Decomposable = (ch1 > 0x7F && CFUniCharIsMemberOf(ch1, kCFUniCharDecomposableCharacterSet)); | |
2282 | Boolean isCh2Decomposable = (ch2 > 0x7F && CFUniCharIsMemberOf(ch2, kCFUniCharDecomposableCharacterSet)); | |
2283 | ||
2284 | if (isCh1Decomposable != isCh2Decomposable) { | |
2285 | UTF32Char decomposedCharater[MAX_DECOMPOSED_LENGTH]; | |
2286 | UInt32 decomposedCharacterLength; | |
2287 | UInt32 idx; | |
2288 | ||
2289 | if (isCh1Decomposable) { | |
2290 | decomposedCharacterLength = CFUniCharDecomposeCharacter(ch1, decomposedCharater, MAX_DECOMPOSED_LENGTH); | |
d8925383 A |
2291 | if ((string2_len - strBuf2_idx) < decomposedCharacterLength) { // the remaining other length is shorter |
2292 | if (ch1 < ch2) return kCFCompareLessThan; else if (ch1 > ch2) return kCFCompareGreaterThan; | |
2293 | } | |
2294 | for (idx = 0; idx < decomposedCharacterLength; idx++) { | |
9ce05555 A |
2295 | ch1 = decomposedCharater[idx]; |
2296 | if (ch1 < ch2) return kCFCompareLessThan; else if (ch1 > ch2) return kCFCompareGreaterThan; | |
2297 | strBuf2_idx++; ch2 = (strBuf2_idx < string2_len ? CFStringGetCharacterFromInlineBuffer(&strBuf2, strBuf2_idx) : 0xffff); | |
2298 | if (CFUniCharIsSurrogateHighCharacter(ch2)) { | |
2299 | strBuf2_idx++; | |
2300 | if (strBuf2_idx < string2_len && CFUniCharIsSurrogateLowCharacter(CFStringGetCharacterFromInlineBuffer(&strBuf2, strBuf2_idx))) { | |
2301 | ch2 = CFUniCharGetLongCharacterForSurrogatePair(ch2, CFStringGetCharacterFromInlineBuffer(&strBuf2, strBuf2_idx)); | |
2302 | } else { | |
2303 | strBuf2_idx--; | |
2304 | } | |
2305 | } | |
2306 | } | |
2307 | strBuf1_idx++; continue; | |
2308 | } else { // ch2 is decomposable, then | |
2309 | decomposedCharacterLength = CFUniCharDecomposeCharacter(ch2, decomposedCharater, MAX_DECOMPOSED_LENGTH); | |
d8925383 A |
2310 | if ((rangeToCompare.length - strBuf1_idx) < decomposedCharacterLength) { // the remaining other length is shorter |
2311 | if (ch1 < ch2) return kCFCompareLessThan; else if (ch1 > ch2) return kCFCompareGreaterThan; | |
2312 | } | |
9ce05555 A |
2313 | for (idx = 0; idx < decomposedCharacterLength && strBuf1_idx < rangeToCompare.length; idx++) { |
2314 | ch2 = decomposedCharater[idx]; | |
2315 | if (ch1 < ch2) return kCFCompareLessThan; else if (ch1 > ch2) return kCFCompareGreaterThan; | |
2316 | strBuf1_idx++; ch1 = (strBuf1_idx < rangeToCompare.length ? CFStringGetCharacterFromInlineBuffer(&strBuf1, strBuf1_idx) : 0xffff); | |
2317 | if (CFUniCharIsSurrogateHighCharacter(ch1)) { | |
2318 | strBuf1_idx++; | |
2319 | if (strBuf1_idx < rangeToCompare.length && CFUniCharIsSurrogateLowCharacter(CFStringGetCharacterFromInlineBuffer(&strBuf1, strBuf1_idx))) { | |
2320 | ch1 = CFUniCharGetLongCharacterForSurrogatePair(ch1, CFStringGetCharacterFromInlineBuffer(&strBuf1, strBuf1_idx)); | |
2321 | } else { | |
2322 | strBuf1_idx--; | |
2323 | } | |
2324 | } | |
2325 | } | |
2326 | strBuf2_idx++; continue; | |
2327 | } | |
2328 | } | |
2329 | } | |
2330 | if (ch1 < ch2) return kCFCompareLessThan; else if (ch1 > ch2) return kCFCompareGreaterThan; | |
2331 | } | |
2332 | } | |
2333 | strBuf1_idx++; strBuf2_idx++; | |
2334 | } | |
2335 | if (strBuf1_idx < rangeToCompare.length) { | |
2336 | return kCFCompareGreaterThan; | |
2337 | } else if (strBuf2_idx < string2_len) { | |
2338 | return kCFCompareLessThan; | |
2339 | } else { | |
2340 | return kCFCompareEqualTo; | |
2341 | } | |
2342 | } | |
2343 | ||
2344 | ||
2345 | CFComparisonResult CFStringCompare(CFStringRef string, CFStringRef str2, CFOptionFlags options) { | |
2346 | return CFStringCompareWithOptions(string, str2, CFRangeMake(0, CFStringGetLength(string)), options); | |
2347 | } | |
2348 | ||
d8925383 A |
2349 | #define kCFStringStackBufferLength (64) |
2350 | ||
9ce05555 A |
2351 | Boolean CFStringFindWithOptions(CFStringRef string, CFStringRef stringToFind, CFRange rangeToSearch, CFOptionFlags compareOptions, CFRange *result) { |
2352 | /* No objc dispatch needed here since CFStringInlineBuffer works with both CFString and NSString */ | |
d8925383 A |
2353 | CFIndex findStrLen = CFStringGetLength(stringToFind); |
2354 | Boolean didFind = false; | |
2355 | bool lengthVariants = ((compareOptions & (kCFCompareCaseInsensitive|kCFCompareNonliteral|kCFCompareDiacriticsInsensitive)) ? true : false); | |
2356 | ||
2357 | if ((findStrLen > 0) && (rangeToSearch.length > 0) && ((findStrLen <= rangeToSearch.length) || lengthVariants)) { | |
2358 | UTF32Char strBuf1[kCFStringStackBufferLength]; | |
2359 | UTF32Char strBuf2[kCFStringStackBufferLength]; | |
2360 | CFStringInlineBuffer inlineBuf1, inlineBuf2; | |
2361 | UTF32Char str1Char, str2Char; | |
2362 | CFStringEncoding eightBitEncoding = __CFStringGetEightBitStringEncoding(); | |
2363 | const uint8_t *str1Bytes = CFStringGetCStringPtr(string, eightBitEncoding); | |
2364 | const uint8_t *str2Bytes = CFStringGetCStringPtr(stringToFind, eightBitEncoding); | |
2365 | const UTF32Char *characters, *charactersLimit; | |
2366 | const uint8_t *langCode = NULL; | |
2367 | CFIndex fromLoc, toLoc; | |
2368 | CFIndex str1Index, str2Index; | |
2369 | CFIndex strBuf1Len, strBuf2Len; | |
2370 | bool equalityOptions = ((lengthVariants || (compareOptions & kCFCompareWidthInsensitive)) ? true : false); | |
2371 | bool caseInsensitive = ((compareOptions & kCFCompareCaseInsensitive) ? true : false); | |
2372 | int8_t delta; | |
2373 | ||
2374 | ||
2375 | CFStringInitInlineBuffer(string, &inlineBuf1, CFRangeMake(0, rangeToSearch.location + rangeToSearch.length)); | |
2376 | CFStringInitInlineBuffer(stringToFind, &inlineBuf2, CFRangeMake(0, findStrLen)); | |
2377 | ||
2378 | if (compareOptions & kCFCompareBackwards) { | |
2379 | fromLoc = rangeToSearch.location + rangeToSearch.length - (lengthVariants ? 1 : findStrLen); | |
2380 | toLoc = (((compareOptions & kCFCompareAnchored) && !lengthVariants) ? fromLoc : rangeToSearch.location); | |
2381 | } else { | |
2382 | fromLoc = rangeToSearch.location; | |
2383 | toLoc = ((compareOptions & kCFCompareAnchored) ? fromLoc : rangeToSearch.location + rangeToSearch.length - (lengthVariants ? 1 : findStrLen)); | |
2384 | } | |
2385 | ||
2386 | delta = ((fromLoc <= toLoc) ? 1 : -1); | |
9ce05555 | 2387 | |
d8925383 A |
2388 | if ((NULL != str1Bytes) && (NULL != str2Bytes)) { |
2389 | CFIndex maxStr1Index = (rangeToSearch.location + rangeToSearch.length); | |
2390 | uint8_t str1Byte, str2Byte; | |
9ce05555 | 2391 | |
d8925383 A |
2392 | while (1) { |
2393 | str1Index = fromLoc; | |
2394 | str2Index = 0; | |
2395 | ||
2396 | while ((str1Index < maxStr1Index) && (str2Index < findStrLen)) { | |
2397 | str1Byte = str1Bytes[str1Index]; | |
2398 | str2Byte = str2Bytes[str2Index]; | |
2399 | ||
2400 | if (str1Byte != str2Byte) { | |
2401 | if (equalityOptions) { | |
2402 | if ((str1Byte < 0x80) && ((NULL == langCode) || ('I' != str1Byte))) { | |
2403 | if (caseInsensitive && (str1Byte >= 'A') && (str1Byte <= 'Z')) str1Byte += ('a' - 'A'); | |
2404 | *strBuf1 = str1Byte; | |
2405 | strBuf1Len = 1; | |
2406 | } else { | |
2407 | str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index); | |
2408 | strBuf1Len = __CFStringFoldCharacterClusterAtIndex(str1Char, &inlineBuf1, str1Index, compareOptions, langCode, strBuf1, kCFStringStackBufferLength, NULL); | |
2409 | if (1 > strBuf1Len) { | |
2410 | *strBuf1 = str1Char; | |
2411 | strBuf1Len = 1; | |
2412 | } | |
2413 | } | |
2414 | if ((str2Byte < 0x80) && ((NULL == langCode) || ('I' != str2Byte))) { | |
2415 | if (caseInsensitive && (str2Byte >= 'A') && (str2Byte <= 'Z')) str2Byte += ('a' - 'A'); | |
2416 | *strBuf2 = str2Byte; | |
2417 | strBuf2Len = 1; | |
2418 | } else { | |
2419 | str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index); | |
2420 | strBuf2Len = __CFStringFoldCharacterClusterAtIndex(str2Char, &inlineBuf2, str2Index, compareOptions, langCode, strBuf2, kCFStringStackBufferLength, NULL); | |
2421 | if (1 > strBuf2Len) { | |
2422 | *strBuf2 = str2Char; | |
2423 | strBuf2Len = 1; | |
2424 | } | |
2425 | } | |
9ce05555 | 2426 | |
d8925383 A |
2427 | if ((1 == strBuf1Len) && (1 == strBuf2Len)) { // normal case |
2428 | if (*strBuf1 != *strBuf2) break; | |
2429 | } else { | |
2430 | CFIndex delta; | |
9ce05555 | 2431 | |
d8925383 A |
2432 | if (!caseInsensitive && (strBuf1Len != strBuf2Len)) break; |
2433 | if (memcmp(strBuf1, strBuf2, sizeof(UTF32Char) * __CFMin(strBuf1Len, strBuf2Len))) break; | |
9ce05555 | 2434 | |
d8925383 A |
2435 | if (strBuf1Len < strBuf2Len) { |
2436 | delta = strBuf2Len - strBuf1Len; | |
2437 | ||
2438 | if ((str1Index + strBuf1Len + delta) > (rangeToSearch.location + rangeToSearch.length)) break; | |
9ce05555 | 2439 | |
d8925383 A |
2440 | characters = &(strBuf2[strBuf1Len]); |
2441 | charactersLimit = characters + delta; | |
2442 | ||
2443 | while (characters < charactersLimit) { | |
2444 | strBuf1Len = __CFStringFoldCharacterClusterAtIndex(CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1), &inlineBuf1, str1Index + 1, compareOptions, langCode, strBuf1, kCFStringStackBufferLength, NULL); | |
2445 | if ((strBuf1Len > 0) || (*characters != *strBuf1)) break; | |
2446 | ++characters; ++str1Index; | |
2447 | } | |
2448 | if (characters < charactersLimit) break; | |
2449 | } else if (strBuf2Len < strBuf1Len) { | |
2450 | delta = strBuf1Len - strBuf2Len; | |
2451 | ||
2452 | if ((str2Index + strBuf2Len + delta) > findStrLen) break; | |
2453 | ||
2454 | characters = &(strBuf1[strBuf2Len]); | |
2455 | charactersLimit = characters + delta; | |
2456 | ||
2457 | while (characters < charactersLimit) { | |
2458 | strBuf2Len = __CFStringFoldCharacterClusterAtIndex(CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str1Index + 1), &inlineBuf2, str2Index + 1, compareOptions, langCode, strBuf2, kCFStringStackBufferLength, NULL); | |
2459 | if ((strBuf2Len > 0) || (*characters != *strBuf2)) break; | |
2460 | ++characters; ++str2Index; | |
2461 | } | |
2462 | if (characters < charactersLimit) break; | |
2463 | } | |
2464 | } | |
2465 | } else { | |
2466 | break; | |
2467 | } | |
2468 | } | |
2469 | ++str1Index; ++str2Index; | |
9ce05555 | 2470 | } |
d8925383 A |
2471 | |
2472 | if (str2Index == findStrLen) { | |
2473 | if (((kCFCompareBackwards|kCFCompareAnchored) != (compareOptions & (kCFCompareBackwards|kCFCompareAnchored))) || (str1Index == (rangeToSearch.location + rangeToSearch.length))) { | |
2474 | didFind = true; | |
2475 | if (NULL != result) *result = CFRangeMake(fromLoc, str1Index - fromLoc); | |
2476 | } | |
2477 | break; | |
9ce05555 | 2478 | } |
9ce05555 | 2479 | |
d8925383 A |
2480 | if (fromLoc == toLoc) break; |
2481 | fromLoc += delta; | |
2482 | } | |
2483 | } else if (equalityOptions) { | |
2484 | UTF16Char otherChar; | |
2485 | CFIndex str1UsedLen, str2UsedLen, strBuf1Index = 0, strBuf2Index = 0; | |
2486 | bool diacriticsInsensitive = ((compareOptions & kCFCompareDiacriticsInsensitive) ? true : false); | |
2487 | static const uint8_t *nonBaseBMP = NULL; | |
2488 | static const uint8_t *combClassBMP = NULL; | |
2489 | ||
2490 | if (NULL == nonBaseBMP) { | |
2491 | nonBaseBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, 0); | |
2492 | combClassBMP = CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0); | |
2493 | } | |
9ce05555 | 2494 | |
d8925383 A |
2495 | while (1) { |
2496 | str1Index = fromLoc; | |
2497 | str2Index = 0; | |
9ce05555 | 2498 | |
d8925383 | 2499 | strBuf1Len = strBuf2Len = 0; |
9ce05555 | 2500 | |
d8925383 A |
2501 | while (str2Index < findStrLen) { |
2502 | if (strBuf1Len == 0) { | |
2503 | str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index); | |
2504 | if (caseInsensitive && (str1Char >= 'A') && (str1Char <= 'Z') && ((NULL == langCode) || (str1Char != 'I'))) str1Char += ('a' - 'A'); | |
2505 | str1UsedLen = 1; | |
9ce05555 | 2506 | } else { |
d8925383 A |
2507 | str1Char = strBuf1[strBuf1Index++]; |
2508 | } | |
2509 | if (strBuf2Len == 0) { | |
2510 | str2Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index); | |
2511 | if (caseInsensitive && (str2Char >= 'A') && (str2Char <= 'Z') && ((NULL == langCode) || (str2Char != 'I'))) str2Char += ('a' - 'A'); | |
2512 | str2UsedLen = 1; | |
2513 | } else { | |
2514 | str2Char = strBuf2[strBuf2Index++]; | |
9ce05555 | 2515 | } |
9ce05555 | 2516 | |
d8925383 A |
2517 | if (str1Char != str2Char) { |
2518 | if ((str1Char < 0x80) && (str2Char < 0x80) && ((NULL == langCode) || !caseInsensitive)) break; | |
9ce05555 | 2519 | |
d8925383 A |
2520 | if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) { |
2521 | str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar); | |
2522 | str1UsedLen = 2; | |
2523 | } | |
2524 | ||
2525 | if (CFUniCharIsSurrogateHighCharacter(str2Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index + 1)))) { | |
2526 | str2Char = CFUniCharGetLongCharacterForSurrogatePair(str2Char, otherChar); | |
2527 | str2UsedLen = 2; | |
2528 | } | |
2529 | ||
2530 | if (diacriticsInsensitive && (str1Index > fromLoc)) { | |
2531 | if ((0 == strBuf1Len) && CFUniCharIsMemberOfBitmap(str1Char, ((str1Char < 0x10000) ? nonBaseBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (str1Char >> 16))))) str1Char = str2Char; | |
2532 | if ((0 == strBuf2Len) && CFUniCharIsMemberOfBitmap(str2Char, ((str2Char < 0x10000) ? nonBaseBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (str2Char >> 16))))) str2Char = str1Char; | |
2533 | } | |
2534 | ||
2535 | if (str1Char != str2Char) { | |
2536 | if (0 == strBuf1Len) { | |
2537 | strBuf1Len = __CFStringFoldCharacterClusterAtIndex(str1Char, &inlineBuf1, str1Index, compareOptions, langCode, strBuf1, kCFStringStackBufferLength, &str1UsedLen); | |
2538 | if (strBuf1Len > 0) { | |
2539 | str1Char = *strBuf1; | |
2540 | strBuf1Index = 1; | |
2541 | } | |
2542 | } | |
2543 | ||
2544 | if ((0 == strBuf1Len) && (0 < strBuf2Len)) break; | |
2545 | ||
2546 | if ((0 == strBuf2Len) && ((0 == strBuf1Len) || (str1Char != str2Char))) { | |
2547 | strBuf2Len = __CFStringFoldCharacterClusterAtIndex(str2Char, &inlineBuf2, str2Index, compareOptions, langCode, strBuf2, kCFStringStackBufferLength, &str2UsedLen); | |
2548 | if ((0 == strBuf2Len) || (str1Char != *strBuf2)) break; | |
2549 | strBuf2Index = 1; | |
2550 | } | |
2551 | } | |
2552 | ||
2553 | if ((strBuf1Len > 0) && (strBuf2Len > 0)) { | |
2554 | while ((strBuf1Index < strBuf1Len) && (strBuf2Index < strBuf2Len)) { | |
2555 | if (strBuf1[strBuf1Index] != strBuf2[strBuf2Index]) break; | |
2556 | ++strBuf1Index; ++strBuf2Index; | |
2557 | } | |
2558 | if ((strBuf1Index < strBuf1Len) && (strBuf2Index < strBuf2Len)) break; | |
2559 | } | |
9ce05555 | 2560 | } |
d8925383 A |
2561 | |
2562 | if ((strBuf1Len > 0) && (strBuf1Index == strBuf1Len)) strBuf1Len = 0; | |
2563 | if ((strBuf2Len > 0) && (strBuf2Index == strBuf2Len)) strBuf2Len = 0; | |
2564 | ||
2565 | if (strBuf1Len == 0) str1Index += str1UsedLen; | |
2566 | if (strBuf2Len == 0) str2Index += str2UsedLen; | |
9ce05555 | 2567 | } |
d8925383 A |
2568 | |
2569 | if (str2Index == findStrLen) { | |
2570 | bool match = true; | |
2571 | ||
2572 | if (strBuf1Len > 0) { | |
2573 | match = false; | |
2574 | ||
2575 | if ((compareOptions & kCFCompareDiacriticsInsensitive) && (strBuf1[0] < 0x0510)) { | |
2576 | while (strBuf1Index < strBuf1Len) { | |
2577 | if (!CFUniCharIsMemberOfBitmap(strBuf1[strBuf1Index], ((strBuf1[strBuf1Index] < 0x10000) ? nonBaseBMP : CFUniCharGetBitmapPtrForPlane(kCFUniCharCanonicalDecomposableCharacterSet, (strBuf1[strBuf1Index] >> 16))))) break; | |
2578 | ++strBuf1Index; | |
2579 | } | |
2580 | ||
2581 | if (strBuf1Index == strBuf1Len) { | |
2582 | str1Index += str1UsedLen; | |
2583 | match = true; | |
2584 | } | |
2585 | } | |
9ce05555 | 2586 | } |
9ce05555 | 2587 | |
d8925383 A |
2588 | if (match && (compareOptions & (kCFCompareDiacriticsInsensitive|kCFCompareNonliteral)) && (str1Index < (rangeToSearch.location + rangeToSearch.length))) { |
2589 | const uint8_t *nonBaseBitmap; | |
9ce05555 | 2590 | |
d8925383 A |
2591 | str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index); |
2592 | ||
2593 | if (CFUniCharIsSurrogateHighCharacter(str1Char) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index + 1)))) { | |
2594 | str1Char = CFUniCharGetLongCharacterForSurrogatePair(str1Char, otherChar); | |
2595 | nonBaseBitmap = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (str1Char >> 16)); | |
2596 | } else { | |
2597 | nonBaseBitmap = nonBaseBMP; | |
2598 | } | |
9ce05555 | 2599 | |
d8925383 A |
2600 | if (CFUniCharIsMemberOfBitmap(str1Char, nonBaseBitmap)) { |
2601 | if (diacriticsInsensitive) { | |
2602 | if (str1Char < 0x10000) { | |
2603 | CFIndex index = str1Index; | |
9ce05555 | 2604 | |
d8925383 A |
2605 | do { |
2606 | str1Char = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, --index); | |
2607 | } while (CFUniCharIsMemberOfBitmap(str1Char, nonBaseBMP), (rangeToSearch.location < index)); | |
9ce05555 | 2608 | |
d8925383 A |
2609 | if (str1Char < 0x0510) { |
2610 | CFIndex maxIndex = (rangeToSearch.location + rangeToSearch.length); | |
2611 | ||
2612 | while (++str1Index < maxIndex) if (!CFUniCharIsMemberOfBitmap(CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index), nonBaseBMP)) break; | |
9ce05555 A |
2613 | } |
2614 | } | |
d8925383 A |
2615 | } else { |
2616 | match = false; | |
9ce05555 | 2617 | } |
d8925383 A |
2618 | } else if (!diacriticsInsensitive) { |
2619 | otherChar = CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index - 1); | |
2620 | ||
2621 | // this is assuming viramas are only in BMP ??? | |
2622 | if ((str1Char == COMBINING_GRAPHEME_JOINER) || (otherChar == COMBINING_GRAPHEME_JOINER) || (otherChar == ZERO_WIDTH_JOINER) || ((otherChar >= HANGUL_CHOSEONG_START) && (otherChar <= HANGUL_JONGSEONG_END)) || (CFUniCharGetCombiningPropertyForCharacter(otherChar, combClassBMP) == 9)) { | |
4c91a73d | 2623 | CFRange clusterRange = CFStringGetRangeOfCharacterClusterAtIndex(string, str1Index - 1, kCFStringGramphemeCluster); |
d8925383 A |
2624 | |
2625 | if (str1Index < (clusterRange.location + clusterRange.length)) match = false; | |
9ce05555 | 2626 | } |
9ce05555 A |
2627 | } |
2628 | } | |
d8925383 A |
2629 | |
2630 | if (match) { | |
2631 | if (((kCFCompareBackwards|kCFCompareAnchored) != (compareOptions & (kCFCompareBackwards|kCFCompareAnchored))) || (str1Index == (rangeToSearch.location + rangeToSearch.length))) { | |
2632 | didFind = true; | |
2633 | if (NULL != result) *result = CFRangeMake(fromLoc, str1Index - fromLoc); | |
2634 | } | |
9ce05555 A |
2635 | break; |
2636 | } | |
9ce05555 | 2637 | } |
d8925383 A |
2638 | |
2639 | if (fromLoc == toLoc) break; | |
2640 | fromLoc += delta; | |
9ce05555 | 2641 | } |
d8925383 A |
2642 | } else { |
2643 | while (1) { | |
2644 | str1Index = fromLoc; | |
2645 | str2Index = 0; | |
2646 | ||
2647 | while (str2Index < findStrLen) { | |
2648 | if (CFStringGetCharacterFromInlineBuffer(&inlineBuf1, str1Index) != CFStringGetCharacterFromInlineBuffer(&inlineBuf2, str2Index)) break; | |
9ce05555 | 2649 | |
d8925383 | 2650 | ++str1Index; ++str2Index; |
9ce05555 | 2651 | } |
d8925383 A |
2652 | |
2653 | if (str2Index == findStrLen) { | |
2654 | didFind = true; | |
2655 | if (NULL != result) *result = CFRangeMake(fromLoc, findStrLen); | |
2656 | break; | |
2657 | } | |
2658 | ||
2659 | if (fromLoc == toLoc) break; | |
2660 | fromLoc += delta; | |
9ce05555 | 2661 | } |
9ce05555 | 2662 | } |
d8925383 | 2663 | } |
9ce05555 | 2664 | |
d8925383 | 2665 | return didFind; |
9ce05555 A |
2666 | } |
2667 | ||
9ce05555 A |
2668 | // Functions to deal with special arrays of CFRange, CFDataRef, created by CFStringCreateArrayWithFindResults() |
2669 | ||
2670 | static const void *__rangeRetain(CFAllocatorRef allocator, const void *ptr) { | |
2671 | CFRetain(*(CFDataRef *)((uint8_t *)ptr + sizeof(CFRange))); | |
2672 | return ptr; | |
2673 | } | |
2674 | ||
2675 | static void __rangeRelease(CFAllocatorRef allocator, const void *ptr) { | |
2676 | CFRelease(*(CFDataRef *)((uint8_t *)ptr + sizeof(CFRange))); | |
2677 | } | |
2678 | ||
2679 | static CFStringRef __rangeCopyDescription(const void *ptr) { | |
2680 | CFRange range = *(CFRange *)ptr; | |
2681 | return CFStringCreateWithFormat(NULL /* ??? allocator */, NULL, CFSTR("{%d, %d}"), range.location, range.length); | |
2682 | } | |
2683 | ||
2684 | static Boolean __rangeEqual(const void *ptr1, const void *ptr2) { | |
2685 | CFRange range1 = *(CFRange *)ptr1; | |
2686 | CFRange range2 = *(CFRange *)ptr2; | |
2687 | return (range1.location == range2.location) && (range1.length == range2.length); | |
2688 | } | |
2689 | ||
2690 | ||
2691 | CFArrayRef CFStringCreateArrayWithFindResults(CFAllocatorRef alloc, CFStringRef string, CFStringRef stringToFind, CFRange rangeToSearch, CFOptionFlags compareOptions) { | |
2692 | CFRange foundRange; | |
2693 | Boolean backwards = compareOptions & kCFCompareBackwards; | |
2694 | UInt32 endIndex = rangeToSearch.location + rangeToSearch.length; | |
2695 | CFMutableDataRef rangeStorage = NULL; // Basically an array of CFRange, CFDataRef (packed) | |
2696 | uint8_t *rangeStorageBytes = NULL; | |
2697 | CFIndex foundCount = 0; | |
2698 | CFIndex capacity = 0; // Number of CFRange, CFDataRef element slots in rangeStorage | |
2699 | ||
2700 | if (alloc == NULL) alloc = __CFGetDefaultAllocator(); | |
2701 | ||
2702 | while ((rangeToSearch.length > 0) && CFStringFindWithOptions(string, stringToFind, rangeToSearch, compareOptions, &foundRange)) { | |
2703 | // Determine the next range | |
2704 | if (backwards) { | |
2705 | rangeToSearch.length = foundRange.location - rangeToSearch.location; | |
2706 | } else { | |
2707 | rangeToSearch.location = foundRange.location + foundRange.length; | |
2708 | rangeToSearch.length = endIndex - rangeToSearch.location; | |
2709 | } | |
2710 | ||
2711 | // If necessary, grow the data and squirrel away the found range | |
2712 | if (foundCount >= capacity) { | |
2713 | if (rangeStorage == NULL) rangeStorage = CFDataCreateMutable(alloc, 0); | |
2714 | capacity = (capacity + 4) * 2; | |
2715 | CFDataSetLength(rangeStorage, capacity * (sizeof(CFRange) + sizeof(CFDataRef))); | |
2716 | rangeStorageBytes = (uint8_t *)CFDataGetMutableBytePtr(rangeStorage) + foundCount * (sizeof(CFRange) + sizeof(CFDataRef)); | |
2717 | } | |
2718 | memmove(rangeStorageBytes, &foundRange, sizeof(CFRange)); // The range | |
2719 | memmove(rangeStorageBytes + sizeof(CFRange), &rangeStorage, sizeof(CFDataRef)); // The data | |
2720 | rangeStorageBytes += (sizeof(CFRange) + sizeof(CFDataRef)); | |
2721 | foundCount++; | |
2722 | } | |
2723 | ||
2724 | if (foundCount > 0) { | |
2725 | CFIndex cnt; | |
2726 | CFMutableArrayRef array; | |
2727 | const CFArrayCallBacks callbacks = {0, __rangeRetain, __rangeRelease, __rangeCopyDescription, __rangeEqual}; | |
2728 | ||
2729 | CFDataSetLength(rangeStorage, foundCount * (sizeof(CFRange) + sizeof(CFDataRef))); // Tighten storage up | |
2730 | rangeStorageBytes = (uint8_t *)CFDataGetMutableBytePtr(rangeStorage); | |
2731 | ||
2732 | array = CFArrayCreateMutable(alloc, foundCount * sizeof(CFRange *), &callbacks); | |
2733 | for (cnt = 0; cnt < foundCount; cnt++) { | |
2734 | // Each element points to the appropriate CFRange in the CFData | |
2735 | CFArrayAppendValue(array, rangeStorageBytes + cnt * (sizeof(CFRange) + sizeof(CFDataRef))); | |
2736 | } | |
2737 | CFRelease(rangeStorage); // We want the data to go away when all CFRanges inside it are released... | |
2738 | return array; | |
2739 | } else { | |
2740 | return NULL; | |
2741 | } | |
2742 | } | |
2743 | ||
2744 | ||
2745 | CFRange CFStringFind(CFStringRef string, CFStringRef stringToFind, CFOptionFlags compareOptions) { | |
2746 | CFRange foundRange; | |
2747 | ||
2748 | if (CFStringFindWithOptions(string, stringToFind, CFRangeMake(0, CFStringGetLength(string)), compareOptions, &foundRange)) { | |
2749 | return foundRange; | |
2750 | } else { | |
2751 | return CFRangeMake(kCFNotFound, 0); | |
2752 | } | |
2753 | } | |
2754 | ||
2755 | Boolean CFStringHasPrefix(CFStringRef string, CFStringRef prefix) { | |
2756 | return CFStringFindWithOptions(string, prefix, CFRangeMake(0, CFStringGetLength(string)), kCFCompareAnchored, NULL); | |
2757 | } | |
2758 | ||
2759 | Boolean CFStringHasSuffix(CFStringRef string, CFStringRef suffix) { | |
2760 | return CFStringFindWithOptions(string, suffix, CFRangeMake(0, CFStringGetLength(string)), kCFCompareAnchored|kCFCompareBackwards, NULL); | |
2761 | } | |
2762 | ||
9ce05555 A |
2763 | #define MAX_TRANSCODING_LENGTH 4 |
2764 | ||
9ce05555 A |
2765 | #define HANGUL_JONGSEONG_COUNT (28) |
2766 | ||
2767 | CF_INLINE bool _CFStringIsHangulLVT(UTF32Char character) { | |
2768 | return (((character - HANGUL_SYLLABLE_START) % HANGUL_JONGSEONG_COUNT) ? true : false); | |
2769 | } | |
2770 | ||
2771 | static uint8_t __CFTranscodingHintLength[] = { | |
2772 | 2, 3, 4, 4, 4, 4, 4, 2, 2, 2, 2, 4, 0, 0, 0, 0 | |
2773 | }; | |
2774 | ||
2775 | enum { | |
2776 | kCFStringHangulStateL, | |
2777 | kCFStringHangulStateV, | |
2778 | kCFStringHangulStateT, | |
2779 | kCFStringHangulStateLV, | |
2780 | kCFStringHangulStateLVT, | |
2781 | kCFStringHangulStateBreak | |
2782 | }; | |
2783 | ||
2784 | static CFRange _CFStringInlineBufferGetComposedRange(CFStringInlineBuffer *buffer, CFIndex start, CFStringCharacterClusterType type, const uint8_t *nonBaseBMP) { | |
2785 | CFIndex end = start + 1; | |
2786 | const uint8_t *nonBase = nonBaseBMP; | |
2787 | UTF32Char character; | |
2788 | UTF16Char otherSurrogate; | |
2789 | uint8_t step; | |
2790 | ||
2791 | character = CFStringGetCharacterFromInlineBuffer(buffer, start); | |
2792 | ||
2793 | ||
2794 | // We don't combine characters in Armenian ~ Limbu range for backward deletion | |
2795 | if ((type != kCFStringBackwardDeletionCluster) || (character < 0x0530) || (character > 0x194F)) { | |
2796 | // Check if the current is surrogate | |
2797 | if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, start + 1)))) { | |
2798 | ++end; | |
2799 | character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate); | |
2800 | nonBase = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (character >> 16)); | |
2801 | } | |
2802 | ||
2803 | // Extend backward | |
2804 | while (start > 0) { | |
2805 | if ((type == kCFStringBackwardDeletionCluster) && (character >= 0x0530) && (character < 0x1950)) break; | |
2806 | ||
2807 | if (character < 0x10000) { // the first round could be already be non-BMP | |
2808 | if (CFUniCharIsSurrogateLowCharacter(character) && CFUniCharIsSurrogateHighCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, start - 1)))) { | |
2809 | character = CFUniCharGetLongCharacterForSurrogatePair(otherSurrogate, character); | |
2810 | nonBase = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (character >> 16)); | |
2811 | --start; | |
2812 | } else { | |
2813 | nonBase = nonBaseBMP; | |
2814 | } | |
2815 | } | |
2816 | ||
2817 | if (!CFUniCharIsMemberOfBitmap(character, nonBase) && (character != 0xFF9E) && (character != 0xFF9F) && ((character & 0x1FFFF0) != 0xF870)) break; | |
2818 | ||
2819 | --start; | |
2820 | ||
2821 | character = CFStringGetCharacterFromInlineBuffer(buffer, start); | |
2822 | } | |
2823 | } | |
2824 | ||
2825 | // Hangul | |
2826 | if (((character >= HANGUL_CHOSEONG_START) && (character <= HANGUL_JONGSEONG_END)) || ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END))) { | |
2827 | uint8_t state; | |
2828 | uint8_t initialState; | |
2829 | ||
2830 | if (character < HANGUL_JUNGSEONG_START) { | |
2831 | state = kCFStringHangulStateL; | |
2832 | } else if (character < HANGUL_JONGSEONG_START) { | |
2833 | state = kCFStringHangulStateV; | |
2834 | } else if (character < HANGUL_SYLLABLE_START) { | |
2835 | state = kCFStringHangulStateT; | |
2836 | } else { | |
2837 | state = (_CFStringIsHangulLVT(character) ? kCFStringHangulStateLVT : kCFStringHangulStateLV); | |
2838 | } | |
2839 | initialState = state; | |
2840 | ||
2841 | // Extend backward | |
2842 | while (((character = CFStringGetCharacterFromInlineBuffer(buffer, start - 1)) >= HANGUL_CHOSEONG_START) && (character <= HANGUL_SYLLABLE_END) && ((character <= HANGUL_JONGSEONG_END) || (character >= HANGUL_SYLLABLE_START))) { | |
2843 | switch (state) { | |
2844 | case kCFStringHangulStateV: | |
2845 | if (character <= HANGUL_CHOSEONG_END) { | |
2846 | state = kCFStringHangulStateL; | |
2847 | } else if ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END) && !_CFStringIsHangulLVT(character)) { | |
2848 | state = kCFStringHangulStateLV; | |
2849 | } else if (character > HANGUL_JUNGSEONG_END) { | |
2850 | state = kCFStringHangulStateBreak; | |
2851 | } | |
2852 | break; | |
2853 | ||
2854 | case kCFStringHangulStateT: | |
2855 | if ((character >= HANGUL_JUNGSEONG_START) && (character <= HANGUL_JUNGSEONG_END)) { | |
2856 | state = kCFStringHangulStateV; | |
2857 | } else if ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END)) { | |
2858 | state = (_CFStringIsHangulLVT(character) ? kCFStringHangulStateLVT : kCFStringHangulStateLV); | |
2859 | } else if (character < HANGUL_JUNGSEONG_START) { | |
2860 | state = kCFStringHangulStateBreak; | |
2861 | } | |
2862 | break; | |
2863 | ||
2864 | default: | |
2865 | state = ((character < HANGUL_JUNGSEONG_START) ? kCFStringHangulStateL : kCFStringHangulStateBreak); | |
2866 | break; | |
2867 | } | |
2868 | ||
2869 | if (state == kCFStringHangulStateBreak) break; | |
2870 | --start; | |
2871 | } | |
2872 | ||
2873 | // Extend forward | |
2874 | state = initialState; | |
2875 | while (((character = CFStringGetCharacterFromInlineBuffer(buffer, end)) > 0) && (((character >= HANGUL_CHOSEONG_START) && (character <= HANGUL_JONGSEONG_END)) || ((character >= HANGUL_SYLLABLE_START) && (character <= HANGUL_SYLLABLE_END)))) { | |
2876 | switch (state) { | |
2877 | case kCFStringHangulStateLV: | |
2878 | case kCFStringHangulStateV: | |
2879 | if ((character >= HANGUL_JUNGSEONG_START) && (character <= HANGUL_JONGSEONG_END)) { | |
2880 | state = ((character < HANGUL_JONGSEONG_START) ? kCFStringHangulStateV : kCFStringHangulStateT); | |
2881 | } else { | |
2882 | state = kCFStringHangulStateBreak; | |
2883 | } | |
2884 | break; | |
2885 | ||
2886 | case kCFStringHangulStateLVT: | |
2887 | case kCFStringHangulStateT: | |
2888 | state = (((character >= HANGUL_JONGSEONG_START) && (character <= HANGUL_JONGSEONG_END)) ? kCFStringHangulStateT : kCFStringHangulStateBreak); | |
2889 | break; | |
2890 | ||
2891 | default: | |
2892 | if (character < HANGUL_JUNGSEONG_START) { | |
2893 | state = kCFStringHangulStateL; | |
2894 | } else if (character < HANGUL_JONGSEONG_START) { | |
2895 | state = kCFStringHangulStateV; | |
2896 | } else if (character >= HANGUL_SYLLABLE_START) { | |
2897 | state = (_CFStringIsHangulLVT(character) ? kCFStringHangulStateLVT : kCFStringHangulStateLV); | |
2898 | } else { | |
2899 | state = kCFStringHangulStateBreak; | |
2900 | } | |
2901 | break; | |
2902 | } | |
2903 | ||
2904 | if (state == kCFStringHangulStateBreak) break; | |
2905 | ++end; | |
2906 | } | |
2907 | } | |
2908 | ||
2909 | // Extend forward | |
2910 | while ((character = CFStringGetCharacterFromInlineBuffer(buffer, end)) > 0) { | |
2911 | if ((type == kCFStringBackwardDeletionCluster) && (character >= 0x0530) && (character < 0x1950)) break; | |
2912 | ||
2913 | if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(buffer, end + 1)))) { | |
2914 | character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate); | |
2915 | nonBase = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, (character >> 16)); | |
2916 | step = 2; | |
2917 | } else { | |
2918 | nonBase = nonBaseBMP; | |
2919 | step = 1; | |
2920 | } | |
2921 | ||
2922 | if (!CFUniCharIsMemberOfBitmap(character, nonBase) && (character != 0xFF9E) && (character != 0xFF9F) && ((character & 0x1FFFF0) != 0xF870)) break; | |
2923 | ||
2924 | end += step; | |
2925 | } | |
2926 | ||
2927 | return CFRangeMake(start, end - start); | |
2928 | } | |
2929 | ||
2930 | CF_INLINE bool _CFStringIsVirama(UTF32Char character, const uint8_t *combClassBMP) { | |
2931 | return ((character == COMBINING_GRAPHEME_JOINER) || (CFUniCharGetCombiningPropertyForCharacter(character, ((character < 0x10000) ? combClassBMP : CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (character >> 16)))) == 9) ? true : false); | |
2932 | } | |
2933 | ||
2934 | CFRange CFStringGetRangeOfCharacterClusterAtIndex(CFStringRef string, CFIndex charIndex, CFStringCharacterClusterType type) { | |
2935 | CFRange range; | |
2936 | CFIndex currentIndex; | |
2937 | CFIndex length = CFStringGetLength(string); | |
2938 | CFStringInlineBuffer stringBuffer; | |
2939 | UTF32Char character; | |
2940 | UTF16Char otherSurrogate; | |
2941 | static const uint8_t *nonBaseBMP = NULL; | |
2942 | static const uint8_t *letterBMP = NULL; | |
2943 | static const uint8_t *combClassBMP = NULL; | |
2944 | ||
2945 | if (charIndex >= length) return CFRangeMake(kCFNotFound, 0); | |
2946 | ||
2947 | /* Fast case. If we're eight-bit, it's either the default encoding is cheap or the content is all ASCII. Watch out when (or if) adding more 8bit Mac-scripts in CFStringEncodingConverters | |
2948 | */ | |
2949 | if (!CF_IS_OBJC(__kCFStringTypeID, string) && __CFStrIsEightBit(string)) return CFRangeMake(charIndex, 1); | |
2950 | ||
2951 | if (NULL == nonBaseBMP) { | |
2952 | nonBaseBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, 0); | |
2953 | letterBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, 0); | |
2954 | combClassBMP = CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0); | |
2955 | } | |
2956 | ||
2957 | CFStringInitInlineBuffer(string, &stringBuffer, CFRangeMake(0, length)); | |
2958 | ||
2959 | // Get composed character sequence first | |
2960 | range = _CFStringInlineBufferGetComposedRange(&stringBuffer, charIndex, type, nonBaseBMP); | |
2961 | ||
2962 | // Do grapheme joiners | |
2963 | if (type < kCFStringCursorMovementCluster) { | |
2964 | const uint8_t *letter = letterBMP; | |
2965 | ||
2966 | // Check to see if we have a letter at the beginning of initial cluster | |
2967 | character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, range.location); | |
2968 | ||
2969 | if ((range.length > 1) && CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(&stringBuffer, range.location + 1)))) { | |
2970 | character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate); | |
2971 | letter = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, (character >> 16)); | |
2972 | } | |
2973 | ||
2974 | if ((character == ZERO_WIDTH_JOINER) || CFUniCharIsMemberOfBitmap(character, letter)) { | |
2975 | CFRange otherRange; | |
2976 | ||
2977 | // Check if preceded by grapheme joiners (U034F and viramas) | |
2978 | otherRange.location = currentIndex = range.location; | |
2979 | ||
2980 | while (currentIndex > 1) { | |
2981 | character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, --currentIndex); | |
2982 | ||
2983 | // ??? We're assuming viramas only in BMP | |
2984 | if ((_CFStringIsVirama(character, combClassBMP) || ((character == ZERO_WIDTH_JOINER) && _CFStringIsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer, --currentIndex), combClassBMP))) && (currentIndex > 0)) { | |
2985 | --currentIndex; | |
2986 | } else { | |
2987 | break; | |
2988 | } | |
2989 | ||
2990 | currentIndex = _CFStringInlineBufferGetComposedRange(&stringBuffer, currentIndex, type, nonBaseBMP).location; | |
2991 | ||
2992 | character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex); | |
2993 | ||
2994 | if (CFUniCharIsSurrogateLowCharacter(character) && CFUniCharIsSurrogateHighCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex - 1)))) { | |
2995 | character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate); | |
2996 | letter = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, (character >> 16)); | |
2997 | --currentIndex; | |
2998 | } else { | |
2999 | letter = letterBMP; | |
3000 | } | |
3001 | ||
3002 | if (!CFUniCharIsMemberOfBitmap(character, letter)) break; | |
3003 | range.location = currentIndex; | |
3004 | } | |
3005 | ||
3006 | range.length += otherRange.location - range.location; | |
3007 | ||
3008 | // Check if followed by grapheme joiners | |
3009 | if ((range.length > 1) && ((range.location + range.length) < length)) { | |
3010 | otherRange = range; | |
3011 | ||
3012 | do { | |
3013 | currentIndex = otherRange.location + otherRange.length; | |
3014 | character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex - 1); | |
3015 | ||
3016 | // ??? We're assuming viramas only in BMP | |
3017 | if ((character != ZERO_WIDTH_JOINER) && !_CFStringIsVirama(character, combClassBMP)) break; | |
3018 | ||
3019 | character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex); | |
3020 | ||
3021 | if (character == ZERO_WIDTH_JOINER) character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, ++currentIndex); | |
3022 | ||
3023 | if (CFUniCharIsSurrogateHighCharacter(character) && CFUniCharIsSurrogateLowCharacter((otherSurrogate = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex + 1)))) { | |
3024 | character = CFUniCharGetLongCharacterForSurrogatePair(character, otherSurrogate); | |
3025 | letter = CFUniCharGetBitmapPtrForPlane(kCFUniCharLetterCharacterSet, (character >> 16)); | |
3026 | } else { | |
3027 | letter = letterBMP; | |
3028 | } | |
3029 | ||
3030 | // We only conjoin letters | |
3031 | if (!CFUniCharIsMemberOfBitmap(character, letter)) break; | |
3032 | otherRange = _CFStringInlineBufferGetComposedRange(&stringBuffer, currentIndex, type, nonBaseBMP); | |
3033 | } while ((otherRange.location + otherRange.length) < length); | |
3034 | range.length = currentIndex - range.location; | |
3035 | } | |
3036 | } | |
3037 | } | |
3038 | ||
3039 | // Check if we're part of prefix transcoding hints | |
d8925383 A |
3040 | CFIndex otherIndex; |
3041 | ||
3042 | currentIndex = (range.location + range.length) - (MAX_TRANSCODING_LENGTH + 1); | |
3043 | if (currentIndex < 0) currentIndex = 0; | |
3044 | ||
3045 | while (currentIndex <= range.location) { | |
3046 | character = CFStringGetCharacterFromInlineBuffer(&stringBuffer, currentIndex); | |
3047 | ||
3048 | if ((character & 0x1FFFF0) == 0xF860) { // transcoding hint | |
3049 | otherIndex = currentIndex + __CFTranscodingHintLength[(character - 0xF860)] + 1; | |
3050 | if (otherIndex >= (range.location + range.length)) { | |
3051 | if (otherIndex <= length) { | |
9ce05555 A |
3052 | range.location = currentIndex; |
3053 | range.length = otherIndex - currentIndex; | |
9ce05555 | 3054 | } |
d8925383 | 3055 | break; |
9ce05555 | 3056 | } |
9ce05555 | 3057 | } |
d8925383 | 3058 | ++currentIndex; |
9ce05555 | 3059 | } |
d8925383 | 3060 | |
9ce05555 A |
3061 | return range; |
3062 | } | |
3063 | ||
3064 | #if 1 /* Using the new implementation. Leaving the old implementation if'ed out for testing purposes for now */ | |
3065 | CFRange CFStringGetRangeOfComposedCharactersAtIndex(CFStringRef theString, CFIndex theIndex) { | |
3066 | return CFStringGetRangeOfCharacterClusterAtIndex(theString, theIndex, kCFStringComposedCharacterCluster); | |
3067 | } | |
3068 | #else | |
3069 | /*! | |
3070 | @function CFStringGetRangeOfComposedCharactersAtIndex | |
3071 | Returns the range of the composed character sequence at the specified index. | |
3072 | @param theString The CFString which is to be searched. If this | |
3073 | parameter is not a valid CFString, the behavior is | |
3074 | undefined. | |
3075 | @param theIndex The index of the character contained in the | |
3076 | composed character sequence. If the index is | |
3077 | outside the index space of the string (0 to N-1 inclusive, | |
3078 | where N is the length of the string), the behavior is | |
3079 | undefined. | |
3080 | @result The range of the composed character sequence. | |
3081 | */ | |
3082 | #define ExtHighHalfZoneLow 0xD800 | |
3083 | #define ExtHighHalfZoneHigh 0xDBFF | |
3084 | #define ExtLowHalfZoneLow 0xDC00 | |
3085 | #define ExtLowHalfZoneHigh 0xDFFF | |
3086 | #define JunseongStart 0x1160 | |
3087 | #define JonseongEnd 0x11F9 | |
3088 | CF_INLINE Boolean IsHighCode(UniChar X) { return (X >= ExtHighHalfZoneLow && X <= ExtHighHalfZoneHigh); } | |
3089 | CF_INLINE Boolean IsLowCode(UniChar X) { return (X >= ExtLowHalfZoneLow && X <= ExtLowHalfZoneHigh); } | |
3090 | #define IsHangulConjoiningJamo(X) (X >= JunseongStart && X <= JonseongEnd) | |
3091 | #define IsHalfwidthKanaVoicedMark(X) ((X == 0xFF9E) || (X == 0xFF9F)) | |
3092 | CF_INLINE Boolean IsNonBaseChar(UniChar X, CFCharacterSetRef nonBaseSet) { return (CFCharacterSetIsCharacterMember(nonBaseSet, X) || IsHangulConjoiningJamo(X) || IsHalfwidthKanaVoicedMark(X) || (X & 0x1FFFF0) == 0xF870); } // combining char, hangul jamo, or Apple corporate variant tag | |
3093 | #define ZWJ 0x200D | |
3094 | #define ZWNJ 0x200C | |
3095 | #define COMBINING_GRAPHEME_JOINER (0x034F) | |
3096 | ||
3097 | static CFCharacterSetRef nonBaseChars = NULL; | |
3098 | static CFCharacterSetRef letterChars = NULL; | |
3099 | static const void *__CFCombiningClassBMP = NULL; | |
3100 | ||
3101 | CF_INLINE bool IsVirama(UTF32Char character) { | |
3102 | return ((character == COMBINING_GRAPHEME_JOINER) ? true : ((character < 0x10000) && (CFUniCharGetCombiningPropertyForCharacter(character, __CFCombiningClassBMP) == 9) ? true : false)); | |
3103 | } | |
3104 | ||
3105 | CFRange CFStringGetRangeOfComposedCharactersAtIndex(CFStringRef theString, CFIndex theIndex) { | |
3106 | CFIndex left, current, save; | |
3107 | CFIndex len = CFStringGetLength(theString); | |
3108 | CFStringInlineBuffer stringBuffer; | |
3109 | static volatile Boolean _isInited = false; | |
3110 | ||
3111 | if (theIndex >= len) return CFRangeMake(kCFNotFound, 0); | |
3112 | ||
3113 | if (!_isInited) { | |
3114 | nonBaseChars = CFCharacterSetGetPredefined(kCFCharacterSetNonBase); | |
3115 | letterChars = CFCharacterSetGetPredefined(kCFCharacterSetLetter); | |
3116 | __CFCombiningClassBMP = CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0); | |
3117 | _isInited = true; | |
3118 | } | |
3119 | ||
3120 | save = current = theIndex; | |
3121 | ||
3122 | CFStringInitInlineBuffer(theString, &stringBuffer, CFRangeMake(0, len)); | |
3123 | ||
3124 | /* | |
3125 | * First check for transcoding hints | |
3126 | */ | |
3127 | { | |
3128 | CFRange theRange = (current > MAX_TRANSCODING_LENGTH ? CFRangeMake(current - MAX_TRANSCODING_LENGTH, MAX_TRANSCODING_LENGTH + 1) : CFRangeMake(0, current + 1)); | |
3129 | ||
3130 | // Should check the next loc ? | |
3131 | if (current + 1 < len) ++theRange.length; | |
3132 | ||
3133 | if (theRange.length > 1) { | |
3134 | UniChar characterBuffer[MAX_TRANSCODING_LENGTH + 2]; // Transcoding hint length + current loc + next loc | |
3135 | ||
3136 | if (stringBuffer.directBuffer) { | |
3137 | memmove(characterBuffer, stringBuffer.directBuffer + theRange.location, theRange.length * sizeof(UniChar)); | |
3138 | } else { | |
3139 | CFStringGetCharacters(theString, theRange, characterBuffer); | |
3140 | } | |
3141 | ||
3142 | while (current >= theRange.location) { | |
3143 | if ((characterBuffer[current - theRange.location] & 0x1FFFF0) == 0xF860) { | |
3144 | theRange = CFRangeMake(current, __CFTranscodingHintLength[characterBuffer[current - theRange.location] - 0xF860] + 1); | |
3145 | if ((theRange.location + theRange.length) <= theIndex) break; | |
3146 | if ((theRange.location + theRange.length) >= len) theRange.length = len - theRange.location; | |
3147 | return theRange; | |
3148 | } | |
3149 | if (current == 0) break; | |
3150 | --current; | |
3151 | } | |
3152 | current = theIndex; // Reset current | |
3153 | } | |
3154 | } | |
3155 | ||
3156 | //#warning Aki 5/29/01 This does not support non-base chars in non-BMP planes (i.e. musical symbol combining stem in Unicode 3.1) | |
3157 | /* | |
3158 | * if we start NOT on a base, first move back to a base as appropriate. | |
3159 | */ | |
3160 | ||
3161 | roundAgain: | |
3162 | ||
3163 | while ((current > 0) && IsNonBaseChar(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current), nonBaseChars)) --current; | |
3164 | ||
3165 | if (current >= 1 && current < len && CFCharacterSetIsCharacterMember(letterChars, CFStringGetCharacterFromInlineBuffer(&stringBuffer, current)) && IsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current - 1))) { | |
3166 | --current; | |
3167 | goto roundAgain; | |
3168 | } else if ((current >= 2) && (CFStringGetCharacterFromInlineBuffer(&stringBuffer, current - 1) == ZWJ) && IsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current - 2))) { | |
3169 | current -= 2; | |
3170 | goto roundAgain; | |
3171 | } | |
3172 | ||
3173 | /* | |
3174 | * Set the left position, then jump back to the saved original position. | |
3175 | */ | |
3176 | ||
3177 | if (current >= 1 && IsLowCode(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current)) && IsHighCode(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current - 1))) --current; | |
3178 | left = current; | |
3179 | current = save; | |
3180 | ||
3181 | /* | |
3182 | * Now, presume we are on a base; move forward & look for the next base. | |
3183 | * Handle jumping over H/L codes. | |
3184 | */ | |
3185 | if (IsHighCode(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current)) && (current + 1) < len && IsLowCode(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current + 1))) ++current; | |
3186 | ++current; | |
3187 | ||
3188 | round2Again: | |
3189 | ||
3190 | if (current < len) { | |
3191 | while (IsNonBaseChar(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current), nonBaseChars)) { | |
3192 | ++current; | |
3193 | if (current >= len) break; | |
3194 | } | |
3195 | if ((current < len) && CFCharacterSetIsCharacterMember(letterChars, CFStringGetCharacterFromInlineBuffer(&stringBuffer, current))) { | |
3196 | if (IsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current - 1))) { | |
3197 | ++current; goto round2Again; | |
3198 | } else if ((current >= 2) && (CFStringGetCharacterFromInlineBuffer(&stringBuffer, current - 1) == ZWJ) && IsVirama(CFStringGetCharacterFromInlineBuffer(&stringBuffer, current - 2))) { | |
3199 | ++current; goto round2Again; | |
3200 | } | |
3201 | } | |
3202 | } | |
3203 | /* | |
3204 | * Now, "current" is a base, and "left" is a base. | |
3205 | * The junk between had better contain "save"! | |
3206 | */ | |
3207 | if ((! (left <= save)) || (! (save <= current))) { | |
3208 | CFLog(0, CFSTR("CFString: CFStringGetRangeOfComposedCharactersAtIndex:%d returned invalid\n"), save); | |
3209 | } | |
3210 | return CFRangeMake(left, current - left); | |
3211 | } | |
3212 | #endif | |
3213 | ||
3214 | /*! | |
3215 | @function CFStringFindCharacterFromSet | |
3216 | Query the range of characters contained in the specified character set. | |
3217 | @param theString The CFString which is to be searched. If this | |
3218 | parameter is not a valid CFString, the behavior is | |
3219 | undefined. | |
3220 | @param theSet The CFCharacterSet against which the membership | |
3221 | of characters is checked. If this parameter is not a valid | |
3222 | CFCharacterSet, the behavior is undefined. | |
3223 | @param range The range of characters within the string to search. If | |
3224 | the range location or end point (defined by the location | |
3225 | plus length minus 1) are outside the index space of the | |
3226 | string (0 to N-1 inclusive, where N is the length of the | |
3227 | string), the behavior is undefined. If the range length is | |
3228 | negative, the behavior is undefined. The range may be empty | |
3229 | (length 0), in which case no search is performed. | |
3230 | @param searchOptions The bitwise-or'ed option flags to control | |
3231 | the search behavior. The supported options are | |
3232 | kCFCompareBackwards andkCFCompareAnchored. | |
3233 | If other option flags are specified, the behavior | |
3234 | is undefined. | |
3235 | @param result The pointer to a CFRange supplied by the caller in | |
3236 | which the search result is stored. If a pointer to an invalid | |
3237 | memory is specified, the behavior is undefined. | |
3238 | @result true, if at least a character which is a member of the character | |
3239 | set is found and result is filled, otherwise, false. | |
3240 | */ | |
3241 | #define SURROGATE_START 0xD800 | |
3242 | #define SURROGATE_END 0xDFFF | |
3243 | ||
3244 | CF_EXPORT Boolean CFStringFindCharacterFromSet(CFStringRef theString, CFCharacterSetRef theSet, CFRange rangeToSearch, CFOptionFlags searchOptions, CFRange *result) { | |
3245 | CFStringInlineBuffer stringBuffer; | |
3246 | UniChar ch; | |
3247 | CFIndex step; | |
3248 | CFIndex fromLoc, toLoc, cnt; // fromLoc and toLoc are inclusive | |
3249 | Boolean found = false; | |
3250 | Boolean done = false; | |
3251 | ||
3252 | //#warning FIX ME !! Should support kCFCompareNonliteral | |
3253 | ||
3254 | if ((rangeToSearch.location + rangeToSearch.length > CFStringGetLength(theString)) || (rangeToSearch.length == 0)) return false; | |
3255 | ||
3256 | if (searchOptions & kCFCompareBackwards) { | |
3257 | fromLoc = rangeToSearch.location + rangeToSearch.length - 1; | |
3258 | toLoc = rangeToSearch.location; | |
3259 | } else { | |
3260 | fromLoc = rangeToSearch.location; | |
3261 | toLoc = rangeToSearch.location + rangeToSearch.length - 1; | |
3262 | } | |
3263 | if (searchOptions & kCFCompareAnchored) { | |
3264 | toLoc = fromLoc; | |
3265 | } | |
3266 | ||
3267 | step = (fromLoc <= toLoc) ? 1 : -1; | |
3268 | cnt = fromLoc; | |
3269 | ||
3270 | CFStringInitInlineBuffer(theString, &stringBuffer, rangeToSearch); | |
3271 | ||
3272 | do { | |
3273 | ch = CFStringGetCharacterFromInlineBuffer(&stringBuffer, cnt - rangeToSearch.location); | |
3274 | if ((ch >= SURROGATE_START) && (ch <= SURROGATE_END)) { | |
3275 | int otherCharIndex = cnt + step; | |
3276 | ||
3277 | if (((step < 0) && (otherCharIndex < toLoc)) || ((step > 0) && (otherCharIndex > toLoc))) { | |
3278 | done = true; | |
3279 | } else { | |
3280 | UniChar highChar; | |
3281 | UniChar lowChar = CFStringGetCharacterFromInlineBuffer(&stringBuffer, otherCharIndex - rangeToSearch.location); | |
3282 | ||
3283 | if (cnt < otherCharIndex) { | |
3284 | highChar = ch; | |
3285 | } else { | |
3286 | highChar = lowChar; | |
3287 | lowChar = ch; | |
3288 | } | |
3289 | ||
3290 | if (CFUniCharIsSurrogateHighCharacter(highChar) && CFUniCharIsSurrogateLowCharacter(lowChar) && CFCharacterSetIsLongCharacterMember(theSet, CFUniCharGetLongCharacterForSurrogatePair(highChar, lowChar))) { | |
3291 | if (result) *result = CFRangeMake((cnt < otherCharIndex ? cnt : otherCharIndex), 2); | |
3292 | return true; | |
3293 | } else if (otherCharIndex == toLoc) { | |
3294 | done = true; | |
3295 | } else { | |
3296 | cnt = otherCharIndex + step; | |
3297 | } | |
3298 | } | |
3299 | } else if (CFCharacterSetIsCharacterMember(theSet, ch)) { | |
3300 | done = found = true; | |
3301 | } else if (cnt == toLoc) { | |
3302 | done = true; | |
3303 | } else { | |
3304 | cnt += step; | |
3305 | } | |
3306 | } while (!done); | |
3307 | ||
3308 | if (found && result) *result = CFRangeMake(cnt, 1); | |
3309 | return found; | |
3310 | } | |
3311 | ||
3312 | /* Line range code */ | |
3313 | ||
3314 | #define CarriageReturn '\r' /* 0x0d */ | |
3315 | #define NewLine '\n' /* 0x0a */ | |
3316 | #define NextLine 0x0085 | |
3317 | #define LineSeparator 0x2028 | |
3318 | #define ParaSeparator 0x2029 | |
3319 | ||
3320 | CF_INLINE Boolean isALineSeparatorTypeCharacter(UniChar ch) { | |
3321 | if (ch > CarriageReturn && ch < NextLine) return false; /* Quick test to cover most chars */ | |
3322 | return (ch == NewLine || ch == CarriageReturn || ch == NextLine || ch == LineSeparator || ch == ParaSeparator) ? true : false; | |
3323 | } | |
3324 | ||
3325 | void CFStringGetLineBounds(CFStringRef string, CFRange range, CFIndex *lineBeginIndex, CFIndex *lineEndIndex, CFIndex *contentsEndIndex) { | |
3326 | CFIndex len; | |
3327 | CFStringInlineBuffer buf; | |
3328 | UniChar ch; | |
3329 | ||
3330 | CF_OBJC_FUNCDISPATCH4(__kCFStringTypeID, void, string, "getLineStart:end:contentsEnd:forRange:", lineBeginIndex, lineEndIndex, contentsEndIndex, CFRangeMake(range.location, range.length)); | |
3331 | ||
3332 | __CFAssertIsString(string); | |
3333 | __CFAssertRangeIsInStringBounds(string, range.location, range.length); | |
3334 | ||
3335 | len = __CFStrLength(string); | |
3336 | ||
3337 | if (lineBeginIndex) { | |
3338 | CFIndex start; | |
3339 | if (range.location == 0) { | |
3340 | start = 0; | |
3341 | } else { | |
3342 | CFStringInitInlineBuffer(string, &buf, CFRangeMake(0, len)); | |
3343 | CFIndex buf_idx = range.location; | |
3344 | ||
3345 | /* Take care of the special case where start happens to fall right between \r and \n */ | |
3346 | ch = CFStringGetCharacterFromInlineBuffer(&buf, buf_idx); | |
3347 | buf_idx--; | |
3348 | if ((ch == NewLine) && (CFStringGetCharacterFromInlineBuffer(&buf, buf_idx) == CarriageReturn)) { | |
3349 | buf_idx--; | |
3350 | } | |
3351 | while (1) { | |
3352 | if (buf_idx < 0) { | |
3353 | start = 0; | |
3354 | break; | |
3355 | } else if (isALineSeparatorTypeCharacter(CFStringGetCharacterFromInlineBuffer(&buf, buf_idx))) { | |
3356 | start = buf_idx + 1; | |
3357 | break; | |
3358 | } else { | |
3359 | buf_idx--; | |
3360 | } | |
3361 | } | |
3362 | } | |
3363 | *lineBeginIndex = start; | |
3364 | } | |
3365 | ||
3366 | /* Now find the ending point */ | |
3367 | if (lineEndIndex || contentsEndIndex) { | |
3368 | CFIndex endOfContents, lineSeparatorLength = 1; /* 1 by default */ | |
3369 | CFStringInitInlineBuffer(string, &buf, CFRangeMake(0, len)); | |
3370 | CFIndex buf_idx = range.location + range.length - (range.length ? 1 : 0); | |
3371 | /* First look at the last char in the range (if the range is zero length, the char after the range) to see if we're already on or within a end of line sequence... */ | |
3372 | ch = __CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx); | |
3373 | if (ch == NewLine) { | |
3374 | endOfContents = buf_idx; | |
3375 | buf_idx--; | |
3376 | if (__CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx) == CarriageReturn) { | |
3377 | lineSeparatorLength = 2; | |
3378 | endOfContents--; | |
3379 | } | |
3380 | } else { | |
3381 | while (1) { | |
3382 | if (isALineSeparatorTypeCharacter(ch)) { | |
3383 | endOfContents = buf_idx; /* This is actually end of contentsRange */ | |
3384 | buf_idx++; /* OK for this to go past the end */ | |
3385 | if ((ch == CarriageReturn) && (__CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx) == NewLine)) { | |
3386 | lineSeparatorLength = 2; | |
3387 | } | |
3388 | break; | |
3389 | } else if (buf_idx >= len) { | |
3390 | endOfContents = len; | |
3391 | lineSeparatorLength = 0; | |
3392 | break; | |
3393 | } else { | |
3394 | buf_idx++; | |
3395 | ch = __CFStringGetCharacterFromInlineBufferAux(&buf, buf_idx); | |
3396 | } | |
3397 | } | |
3398 | } | |
3399 | if (contentsEndIndex) *contentsEndIndex = endOfContents; | |
3400 | if (lineEndIndex) *lineEndIndex = endOfContents + lineSeparatorLength; | |
3401 | } | |
3402 | } | |
3403 | ||
3404 | ||
3405 | CFStringRef CFStringCreateByCombiningStrings(CFAllocatorRef alloc, CFArrayRef array, CFStringRef separatorString) { | |
3406 | CFIndex numChars; | |
3407 | CFIndex separatorNumByte; | |
3408 | CFIndex stringCount = CFArrayGetCount(array); | |
3409 | Boolean isSepCFString = !CF_IS_OBJC(__kCFStringTypeID, separatorString); | |
3410 | Boolean canBeEightbit = isSepCFString && __CFStrIsEightBit(separatorString); | |
3411 | CFIndex idx; | |
3412 | CFStringRef otherString; | |
3413 | void *buffer; | |
3414 | uint8_t *bufPtr; | |
3415 | const void *separatorContents = NULL; | |
3416 | ||
3417 | if (stringCount == 0) { | |
3418 | return CFStringCreateWithCharacters(alloc, NULL, 0); | |
3419 | } else if (stringCount == 1) { | |
3420 | return CFStringCreateCopy(alloc, CFArrayGetValueAtIndex(array, 0)); | |
3421 | } | |
3422 | ||
3423 | if (alloc == NULL) alloc = __CFGetDefaultAllocator(); | |
3424 | ||
3425 | numChars = CFStringGetLength(separatorString) * (stringCount - 1); | |
3426 | for (idx = 0; idx < stringCount; idx++) { | |
3427 | otherString = (CFStringRef)CFArrayGetValueAtIndex(array, idx); | |
3428 | numChars += CFStringGetLength(otherString); | |
3429 | // canBeEightbit is already false if the separator is an NSString... | |
3430 | if (!CF_IS_OBJC(__kCFStringTypeID, otherString) && __CFStrIsUnicode(otherString)) canBeEightbit = false; | |
3431 | } | |
3432 | ||
3433 | bufPtr = buffer = CFAllocatorAllocate(alloc, canBeEightbit ? ((numChars + 1) * sizeof(uint8_t)) : (numChars * sizeof(UniChar)), 0); | |
3434 | if (__CFOASafe) __CFSetLastAllocationEventName(buffer, "CFString (store)"); | |
3435 | separatorNumByte = CFStringGetLength(separatorString) * (canBeEightbit ? sizeof(uint8_t) : sizeof(UniChar)); | |
3436 | ||
3437 | for (idx = 0; idx < stringCount; idx++) { | |
3438 | if (idx) { // add separator here unless first string | |
3439 | if (separatorContents) { | |
3440 | memmove(bufPtr, separatorContents, separatorNumByte); | |
3441 | } else { | |
3442 | if (!isSepCFString) { // NSString | |
3443 | CFStringGetCharacters(separatorString, CFRangeMake(0, CFStringGetLength(separatorString)), (UniChar*)bufPtr); | |
3444 | } else if (canBeEightbit || __CFStrIsUnicode(separatorString)) { | |
3445 | memmove(bufPtr, (const uint8_t *)__CFStrContents(separatorString) + __CFStrSkipAnyLengthByte(separatorString), separatorNumByte); | |
3446 | } else { | |
3447 | __CFStrConvertBytesToUnicode((uint8_t*)__CFStrContents(separatorString) + __CFStrSkipAnyLengthByte(separatorString), (UniChar*)bufPtr, __CFStrLength(separatorString)); | |
3448 | } | |
3449 | separatorContents = bufPtr; | |
3450 | } | |
3451 | bufPtr += separatorNumByte; | |
3452 | } | |
3453 | ||
3454 | otherString = (CFStringRef )CFArrayGetValueAtIndex(array, idx); | |
3455 | if (CF_IS_OBJC(__kCFStringTypeID, otherString)) { | |
3456 | CFIndex otherLength = CFStringGetLength(otherString); | |
3457 | CFStringGetCharacters(otherString, CFRangeMake(0, otherLength), (UniChar*)bufPtr); | |
3458 | bufPtr += otherLength * sizeof(UniChar); | |
3459 | } else { | |
3460 | const uint8_t* otherContents = __CFStrContents(otherString); | |
3461 | CFIndex otherNumByte = __CFStrLength2(otherString, otherContents) * (canBeEightbit ? sizeof(uint8_t) : sizeof(UniChar)); | |
3462 | ||
3463 | if (canBeEightbit || __CFStrIsUnicode(otherString)) { | |
3464 | memmove(bufPtr, otherContents + __CFStrSkipAnyLengthByte(otherString), otherNumByte); | |
3465 | } else { | |
3466 | __CFStrConvertBytesToUnicode(otherContents + __CFStrSkipAnyLengthByte(otherString), (UniChar*)bufPtr, __CFStrLength2(otherString, otherContents)); | |
3467 | } | |
3468 | bufPtr += otherNumByte; | |
3469 | } | |
3470 | } | |
3471 | if (canBeEightbit) *bufPtr = 0; // NULL byte; | |
3472 | ||
3473 | return canBeEightbit ? | |
3474 | CFStringCreateWithCStringNoCopy(alloc, buffer, __CFStringGetEightBitStringEncoding(), alloc) : | |
3475 | CFStringCreateWithCharactersNoCopy(alloc, buffer, numChars, alloc); | |
3476 | } | |
3477 | ||
3478 | ||
3479 | CFArrayRef CFStringCreateArrayBySeparatingStrings(CFAllocatorRef alloc, CFStringRef string, CFStringRef separatorString) { | |
3480 | CFArrayRef separatorRanges; | |
3481 | CFIndex length = CFStringGetLength(string); | |
3482 | /* No objc dispatch needed here since CFStringCreateArrayWithFindResults() works with both CFString and NSString */ | |
3483 | if (!(separatorRanges = CFStringCreateArrayWithFindResults(alloc, string, separatorString, CFRangeMake(0, length), 0))) { | |
3484 | return CFArrayCreate(alloc, (const void**)&string, 1, & kCFTypeArrayCallBacks); | |
3485 | } else { | |
3486 | CFIndex idx; | |
3487 | CFIndex count = CFArrayGetCount(separatorRanges); | |
3488 | CFIndex startIndex = 0; | |
3489 | CFIndex numChars; | |
3490 | CFMutableArrayRef array = CFArrayCreateMutable(alloc, count + 2, & kCFTypeArrayCallBacks); | |
3491 | const CFRange *currentRange; | |
3492 | CFStringRef substring; | |
3493 | ||
3494 | for (idx = 0;idx < count;idx++) { | |
3495 | currentRange = CFArrayGetValueAtIndex(separatorRanges, idx); | |
3496 | numChars = currentRange->location - startIndex; | |
3497 | substring = CFStringCreateWithSubstring(alloc, string, CFRangeMake(startIndex, numChars)); | |
3498 | CFArrayAppendValue(array, substring); | |
3499 | CFRelease(substring); | |
3500 | startIndex = currentRange->location + currentRange->length; | |
3501 | } | |
3502 | substring = CFStringCreateWithSubstring(alloc, string, CFRangeMake(startIndex, length - startIndex)); | |
3503 | CFArrayAppendValue(array, substring); | |
3504 | CFRelease(substring); | |
3505 | ||
3506 | CFRelease(separatorRanges); | |
3507 | ||
3508 | return array; | |
3509 | } | |
3510 | } | |
3511 | ||
3512 | CFStringRef CFStringCreateFromExternalRepresentation(CFAllocatorRef alloc, CFDataRef data, CFStringEncoding encoding) { | |
3513 | return CFStringCreateWithBytes(alloc, CFDataGetBytePtr(data), CFDataGetLength(data), encoding, true); | |
3514 | } | |
3515 | ||
3516 | ||
3517 | CFDataRef CFStringCreateExternalRepresentation(CFAllocatorRef alloc, CFStringRef string, CFStringEncoding encoding, uint8_t lossByte) { | |
3518 | CFIndex length; | |
3519 | CFIndex guessedByteLength; | |
3520 | uint8_t *bytes; | |
3521 | CFIndex usedLength; | |
3522 | SInt32 result; | |
3523 | ||
3524 | if (CF_IS_OBJC(__kCFStringTypeID, string)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */ | |
3525 | length = CFStringGetLength(string); | |
3526 | } else { | |
3527 | __CFAssertIsString(string); | |
3528 | length = __CFStrLength(string); | |
3529 | if (__CFStrIsEightBit(string) && ((__CFStringGetEightBitStringEncoding() == encoding) || (__CFStringGetEightBitStringEncoding() == kCFStringEncodingASCII && __CFStringEncodingIsSupersetOfASCII(encoding)))) { // Requested encoding is equal to the encoding in string | |
3530 | return CFDataCreate(alloc, ((char *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string)), __CFStrLength(string)); | |
3531 | } | |
3532 | } | |
3533 | ||
3534 | if (alloc == NULL) alloc = __CFGetDefaultAllocator(); | |
3535 | ||
3536 | if (encoding == kCFStringEncodingUnicode) { | |
3537 | guessedByteLength = (length + 1) * sizeof(UniChar); | |
3538 | } else if (((guessedByteLength = CFStringGetMaximumSizeForEncoding(length, encoding)) > length) && !CF_IS_OBJC(__kCFStringTypeID, string)) { // Multi byte encoding | |
3539 | #if defined(__MACH__) || defined(__LINUX__) || defined(__FREEBSD__) | |
3540 | if (__CFStrIsUnicode(string)) { | |
3541 | guessedByteLength = CFStringEncodingByteLengthForCharacters(encoding, kCFStringEncodingPrependBOM, __CFStrContents(string), __CFStrLength(string)); | |
3542 | } else { | |
3543 | #endif | |
3544 | result = __CFStringEncodeByteStream(string, 0, length, true, encoding, lossByte, NULL, 0x7FFFFFFF, &guessedByteLength); | |
3545 | // if result == length, we always succeed | |
3546 | // otherwise, if result == 0, we fail | |
3547 | // otherwise, if there was a lossByte but still result != length, we fail | |
3548 | if ((result != length) && (!result || !lossByte)) return NULL; | |
3549 | if (guessedByteLength == length && __CFStrIsEightBit(string) && __CFStringEncodingIsSupersetOfASCII(encoding)) { // It's all ASCII !! | |
3550 | return CFDataCreate(alloc, ((char *)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string)), __CFStrLength(string)); | |
3551 | } | |
3552 | #if defined(__MACH__) || defined(__LINUX__) || defined(__FREEBSD__) | |
3553 | } | |
3554 | #endif | |
3555 | } | |
3556 | bytes = CFAllocatorAllocate(alloc, guessedByteLength, 0); | |
3557 | if (__CFOASafe) __CFSetLastAllocationEventName(bytes, "CFData (store)"); | |
3558 | ||
3559 | result = __CFStringEncodeByteStream(string, 0, length, true, encoding, lossByte, bytes, guessedByteLength, &usedLength); | |
3560 | ||
3561 | if ((result != length) && (!result || !lossByte)) { // see comment above about what this means | |
3562 | CFAllocatorDeallocate(alloc, bytes); | |
3563 | return NULL; | |
3564 | } | |
3565 | ||
3566 | return CFDataCreateWithBytesNoCopy(alloc, (char const *)bytes, usedLength, alloc); | |
3567 | } | |
3568 | ||
3569 | ||
3570 | CFStringEncoding CFStringGetSmallestEncoding(CFStringRef str) { | |
3571 | CFIndex len; | |
3572 | CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, CFStringEncoding, str, "_smallestEncodingInCFStringEncoding"); | |
3573 | __CFAssertIsString(str); | |
3574 | ||
3575 | if (__CFStrIsEightBit(str)) return __CFStringGetEightBitStringEncoding(); | |
3576 | len = __CFStrLength(str); | |
3577 | if (__CFStringEncodeByteStream(str, 0, len, false, __CFStringGetEightBitStringEncoding(), 0, NULL, 0x7fffffff, NULL) == len) return __CFStringGetEightBitStringEncoding(); | |
3578 | if ((__CFStringGetEightBitStringEncoding() != __CFStringGetSystemEncoding()) && (__CFStringEncodeByteStream(str, 0, len, false, __CFStringGetSystemEncoding(), 0, NULL, 0x7fffffff, NULL) == len)) return __CFStringGetSystemEncoding(); | |
3579 | return kCFStringEncodingUnicode; /* ??? */ | |
3580 | } | |
3581 | ||
3582 | ||
3583 | CFStringEncoding CFStringGetFastestEncoding(CFStringRef str) { | |
3584 | CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, CFStringEncoding, str, "_fastestEncodingInCFStringEncoding"); | |
3585 | __CFAssertIsString(str); | |
3586 | return __CFStrIsEightBit(str) ? __CFStringGetEightBitStringEncoding() : kCFStringEncodingUnicode; /* ??? */ | |
3587 | } | |
3588 | ||
3589 | ||
3590 | SInt32 CFStringGetIntValue(CFStringRef str) { | |
3591 | Boolean success; | |
3592 | SInt32 result; | |
3593 | SInt32 idx = 0; | |
3594 | CFStringInlineBuffer buf; | |
3595 | CFStringInitInlineBuffer(str, &buf, CFRangeMake(0, CFStringGetLength(str))); | |
3596 | success = __CFStringScanInteger(&buf, NULL, &idx, false, &result); | |
3597 | return success ? result : 0; | |
3598 | } | |
3599 | ||
3600 | ||
3601 | double CFStringGetDoubleValue(CFStringRef str) { | |
3602 | Boolean success; | |
3603 | double result; | |
3604 | SInt32 idx = 0; | |
3605 | CFStringInlineBuffer buf; | |
3606 | CFStringInitInlineBuffer(str, &buf, CFRangeMake(0, CFStringGetLength(str))); | |
3607 | success = __CFStringScanDouble(&buf, NULL, &idx, &result); | |
3608 | return success ? result : 0.0; | |
3609 | } | |
3610 | ||
3611 | ||
3612 | /*** Mutable functions... ***/ | |
3613 | ||
3614 | void CFStringSetExternalCharactersNoCopy(CFMutableStringRef string, UniChar *chars, CFIndex length, CFIndex capacity) { | |
3615 | __CFAssertIsNotNegative(length); | |
3616 | __CFAssertIsStringAndExternalMutable(string); | |
3617 | CFAssert4((length <= capacity) && ((capacity == 0) || ((capacity > 0) && chars)), __kCFLogAssertion, "%s(): Invalid args: characters %p length %d capacity %d", __PRETTY_FUNCTION__, chars, length, capacity); | |
3618 | __CFStrSetContentPtr(string, chars); | |
3619 | __CFStrSetExplicitLength(string, length); | |
3620 | __CFStrSetCapacity(string, capacity * sizeof(UniChar)); | |
3621 | __CFStrSetCapacityProvidedExternally(string); | |
3622 | } | |
3623 | ||
3624 | ||
3625 | ||
3626 | void CFStringInsert(CFMutableStringRef str, CFIndex idx, CFStringRef insertedStr) { | |
3627 | CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID, void, str, "insertString:atIndex:", insertedStr, idx); | |
3628 | __CFAssertIsStringAndMutable(str); | |
3629 | CFAssert3(idx >= 0 && idx <= __CFStrLength(str), __kCFLogAssertion, "%s(): string index %d out of bounds (length %d)", __PRETTY_FUNCTION__, idx, __CFStrLength(str)); | |
3630 | __CFStringReplace(str, CFRangeMake(idx, 0), insertedStr); | |
3631 | } | |
3632 | ||
3633 | ||
3634 | void CFStringDelete(CFMutableStringRef str, CFRange range) { | |
3635 | CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, str, "deleteCharactersInRange:", range); | |
3636 | __CFAssertIsStringAndMutable(str); | |
3637 | __CFAssertRangeIsInStringBounds(str, range.location, range.length); | |
3638 | __CFStringChangeSize(str, range, 0, false); | |
3639 | } | |
3640 | ||
3641 | ||
3642 | void CFStringReplace(CFMutableStringRef str, CFRange range, CFStringRef replacement) { | |
3643 | CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID, void, str, "replaceCharactersInRange:withString:", range, replacement); | |
3644 | __CFAssertIsStringAndMutable(str); | |
3645 | __CFAssertRangeIsInStringBounds(str, range.location, range.length); | |
3646 | __CFStringReplace(str, range, replacement); | |
3647 | } | |
3648 | ||
3649 | ||
3650 | void CFStringReplaceAll(CFMutableStringRef str, CFStringRef replacement) { | |
3651 | CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, str, "setString:", replacement); | |
3652 | __CFAssertIsStringAndMutable(str); | |
3653 | __CFStringReplace(str, CFRangeMake(0, __CFStrLength(str)), replacement); | |
3654 | } | |
3655 | ||
3656 | ||
3657 | void CFStringAppend(CFMutableStringRef str, CFStringRef appended) { | |
3658 | CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, str, "appendString:", appended); | |
3659 | __CFAssertIsStringAndMutable(str); | |
3660 | __CFStringReplace(str, CFRangeMake(__CFStrLength(str), 0), appended); | |
3661 | } | |
3662 | ||
3663 | ||
3664 | void CFStringAppendCharacters(CFMutableStringRef str, const UniChar *chars, CFIndex appendedLength) { | |
3665 | CFIndex strLength, idx; | |
3666 | ||
3667 | __CFAssertIsNotNegative(appendedLength); | |
3668 | ||
3669 | CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID, void, str, "appendCharacters:length:", chars, appendedLength); | |
3670 | ||
3671 | __CFAssertIsStringAndMutable(str); | |
3672 | ||
3673 | strLength = __CFStrLength(str); | |
3674 | if (__CFStringGetCompatibility(Bug2967272) || __CFStrIsUnicode(str)) { | |
3675 | __CFStringChangeSize(str, CFRangeMake(strLength, 0), appendedLength, true); | |
3676 | memmove((UniChar *)__CFStrContents(str) + strLength, chars, appendedLength * sizeof(UniChar)); | |
3677 | } else { | |
3678 | uint8_t *contents; | |
3679 | bool isASCII = true; | |
3680 | for (idx = 0; isASCII && idx < appendedLength; idx++) isASCII = (chars[idx] < 0x80); | |
3681 | __CFStringChangeSize(str, CFRangeMake(strLength, 0), appendedLength, !isASCII); | |
3682 | if (!isASCII) { | |
3683 | memmove((UniChar *)__CFStrContents(str) + strLength, chars, appendedLength * sizeof(UniChar)); | |
3684 | } else { | |
3685 | contents = (uint8_t *)__CFStrContents(str) + strLength + __CFStrSkipAnyLengthByte(str); | |
3686 | for (idx = 0; idx < appendedLength; idx++) contents[idx] = (uint8_t)chars[idx]; | |
3687 | } | |
3688 | } | |
3689 | } | |
3690 | ||
3691 | ||
3692 | static void __CFStringAppendBytes(CFMutableStringRef str, const char *cStr, CFIndex appendedLength, CFStringEncoding encoding) { | |
3693 | Boolean appendedIsUnicode = false; | |
3694 | Boolean freeCStrWhenDone = false; | |
3695 | Boolean demoteAppendedUnicode = false; | |
3696 | CFVarWidthCharBuffer vBuf; | |
3697 | ||
3698 | __CFAssertIsNotNegative(appendedLength); | |
3699 | ||
3700 | if (encoding == kCFStringEncodingASCII || encoding == __CFStringGetEightBitStringEncoding()) { | |
3701 | // appendedLength now denotes length in UniChars | |
3702 | } else if (encoding == kCFStringEncodingUnicode) { | |
3703 | UniChar *chars = (UniChar *)cStr; | |
3704 | CFIndex idx, length = appendedLength / sizeof(UniChar); | |
3705 | bool isASCII = true; | |
3706 | for (idx = 0; isASCII && idx < length; idx++) isASCII = (chars[idx] < 0x80); | |
3707 | if (!isASCII) { | |
3708 | appendedIsUnicode = true; | |
3709 | } else { | |
3710 | demoteAppendedUnicode = true; | |
3711 | } | |
3712 | appendedLength = length; | |
3713 | } else { | |
3714 | Boolean usingPassedInMemory = false; | |
3715 | ||
3716 | vBuf.allocator = __CFGetDefaultAllocator(); // We don't want to use client's allocator for temp stuff | |
3717 | vBuf.chars.unicode = NULL; // This will cause the decode function to allocate memory if necessary | |
3718 | ||
3719 | if (!__CFStringDecodeByteStream3(cStr, appendedLength, encoding, __CFStrIsUnicode(str), &vBuf, &usingPassedInMemory, 0)) { | |
3720 | CFAssert1(0, __kCFLogAssertion, "Supplied bytes could not be converted specified encoding %d", encoding); | |
3721 | return; | |
3722 | } | |
3723 | ||
3724 | // If not ASCII, appendedLength now denotes length in UniChars | |
3725 | appendedLength = vBuf.numChars; | |
3726 | appendedIsUnicode = !vBuf.isASCII; | |
3727 | cStr = vBuf.chars.ascii; | |
3728 | freeCStrWhenDone = !usingPassedInMemory && vBuf.shouldFreeChars; | |
3729 | } | |
3730 | ||
3731 | if (CF_IS_OBJC(__kCFStringTypeID, str)) { | |
3732 | if (!appendedIsUnicode && !demoteAppendedUnicode) { | |
3733 | CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID, void, str, "_cfAppendCString:length:", cStr, appendedLength); | |
3734 | } else { | |
3735 | CF_OBJC_FUNCDISPATCH2(__kCFStringTypeID, void, str, "appendCharacters:length:", cStr, appendedLength); | |
3736 | } | |
3737 | } else { | |
3738 | CFIndex strLength; | |
3739 | __CFAssertIsStringAndMutable(str); | |
3740 | strLength = __CFStrLength(str); | |
3741 | ||
3742 | __CFStringChangeSize(str, CFRangeMake(strLength, 0), appendedLength, appendedIsUnicode || __CFStrIsUnicode(str)); | |
3743 | ||
3744 | if (__CFStrIsUnicode(str)) { | |
3745 | UniChar *contents = (UniChar *)__CFStrContents(str); | |
3746 | if (appendedIsUnicode) { | |
3747 | memmove(contents + strLength, cStr, appendedLength * sizeof(UniChar)); | |
3748 | } else { | |
3749 | __CFStrConvertBytesToUnicode(cStr, contents + strLength, appendedLength); | |
3750 | } | |
3751 | } else { | |
3752 | if (demoteAppendedUnicode) { | |
3753 | UniChar *chars = (UniChar *)cStr; | |
3754 | CFIndex idx; | |
3755 | uint8_t *contents = (uint8_t *)__CFStrContents(str) + strLength + __CFStrSkipAnyLengthByte(str); | |
3756 | for (idx = 0; idx < appendedLength; idx++) contents[idx] = (uint8_t)chars[idx]; | |
3757 | } else { | |
3758 | uint8_t *contents = (uint8_t *)__CFStrContents(str); | |
3759 | memmove(contents + strLength + __CFStrSkipAnyLengthByte(str), cStr, appendedLength); | |
3760 | } | |
3761 | } | |
3762 | } | |
3763 | ||
3764 | if (freeCStrWhenDone) CFAllocatorDeallocate(__CFGetDefaultAllocator(), (void *)cStr); | |
3765 | } | |
3766 | ||
3767 | void CFStringAppendPascalString(CFMutableStringRef str, ConstStringPtr pStr, CFStringEncoding encoding) { | |
3768 | __CFStringAppendBytes(str, pStr + 1, (CFIndex)*pStr, encoding); | |
3769 | } | |
3770 | ||
3771 | void CFStringAppendCString(CFMutableStringRef str, const char *cStr, CFStringEncoding encoding) { | |
3772 | __CFStringAppendBytes(str, cStr, strlen(cStr), encoding); | |
3773 | } | |
3774 | ||
3775 | ||
3776 | void CFStringAppendFormat(CFMutableStringRef str, CFDictionaryRef formatOptions, CFStringRef format, ...) { | |
3777 | va_list argList; | |
3778 | ||
3779 | va_start(argList, format); | |
3780 | CFStringAppendFormatAndArguments(str, formatOptions, format, argList); | |
3781 | va_end(argList); | |
3782 | } | |
3783 | ||
3784 | ||
3785 | CFIndex CFStringFindAndReplace(CFMutableStringRef string, CFStringRef stringToFind, CFStringRef replacementString, CFRange rangeToSearch, CFOptionFlags compareOptions) { | |
3786 | CFRange foundRange; | |
3787 | Boolean backwards = compareOptions & kCFCompareBackwards; | |
3788 | UInt32 endIndex = rangeToSearch.location + rangeToSearch.length; | |
3789 | #define MAX_RANGES_ON_STACK (1000 / sizeof(CFRange)) | |
3790 | CFRange rangeBuffer[MAX_RANGES_ON_STACK]; // Used to avoid allocating memory | |
3791 | CFRange *ranges = rangeBuffer; | |
3792 | CFIndex foundCount = 0; | |
3793 | CFIndex capacity = MAX_RANGES_ON_STACK; | |
3794 | ||
3795 | __CFAssertIsStringAndMutable(string); | |
3796 | __CFAssertRangeIsInStringBounds(string, rangeToSearch.location, rangeToSearch.length); | |
3797 | ||
3798 | // Note: This code is very similar to the one in CFStringCreateArrayWithFindResults(). | |
3799 | while ((rangeToSearch.length > 0) && CFStringFindWithOptions(string, stringToFind, rangeToSearch, compareOptions, &foundRange)) { | |
3800 | // Determine the next range | |
3801 | if (backwards) { | |
3802 | rangeToSearch.length = foundRange.location - rangeToSearch.location; | |
3803 | } else { | |
3804 | rangeToSearch.location = foundRange.location + foundRange.length; | |
3805 | rangeToSearch.length = endIndex - rangeToSearch.location; | |
3806 | } | |
3807 | ||
3808 | // If necessary, grow the array | |
3809 | if (foundCount >= capacity) { | |
3810 | bool firstAlloc = (ranges == rangeBuffer) ? true : false; | |
3811 | capacity = (capacity + 4) * 2; | |
3812 | // Note that reallocate with NULL previous pointer is same as allocate | |
3813 | ranges = CFAllocatorReallocate(NULL, firstAlloc ? NULL : ranges, capacity * sizeof(CFRange), 0); | |
3814 | if (firstAlloc) memmove(ranges, rangeBuffer, MAX_RANGES_ON_STACK * sizeof(CFRange)); | |
3815 | } | |
3816 | ranges[foundCount] = foundRange; | |
3817 | foundCount++; | |
3818 | } | |
3819 | ||
3820 | if (foundCount > 0) { | |
3821 | if (backwards) { // Reorder the ranges to be incrementing (better to do this here, then to check other places) | |
3822 | int head = 0; | |
3823 | int tail = foundCount - 1; | |
3824 | while (head < tail) { | |
3825 | CFRange temp = ranges[head]; | |
3826 | ranges[head] = ranges[tail]; | |
3827 | ranges[tail] = temp; | |
3828 | head++; | |
3829 | tail--; | |
3830 | } | |
3831 | } | |
3832 | __CFStringReplaceMultiple(string, ranges, foundCount, replacementString); | |
3833 | if (ranges != rangeBuffer) CFAllocatorDeallocate(NULL, ranges); | |
3834 | } | |
3835 | ||
3836 | return foundCount; | |
3837 | } | |
3838 | ||
3839 | ||
3840 | // This function is here for NSString purposes | |
3841 | // It allows checking for mutability before mutating; this allows NSString to catch invalid mutations | |
3842 | ||
3843 | int __CFStringCheckAndReplace(CFMutableStringRef str, CFRange range, CFStringRef replacement) { | |
3844 | if (!__CFStrIsMutable(str)) return _CFStringErrNotMutable; // These three ifs are always here, for NSString usage | |
3845 | if (!replacement && __CFStringNoteErrors()) return _CFStringErrNilArg; | |
d8925383 A |
3846 | // We use unsigneds as that is what NSRanges do; we use uint64_t do make sure the sum doesn't wrap (otherwise we'd need to do 3 separate checks). This allows catching bad ranges as described in 3375535. (-1,1) |
3847 | if (((uint64_t)((unsigned)range.location)) + ((uint64_t)((unsigned)range.length)) > (uint64_t)__CFStrLength(str) && __CFStringNoteErrors()) return _CFStringErrBounds; | |
9ce05555 A |
3848 | __CFAssertIsStringAndMutable(str); |
3849 | __CFAssertRangeIsInStringBounds(str, range.location, range.length); | |
3850 | __CFStringReplace(str, range, replacement); | |
3851 | return _CFStringErrNone; | |
3852 | } | |
3853 | ||
3854 | // This function determines whether errors which would cause string exceptions should | |
3855 | // be ignored or not | |
3856 | ||
3857 | Boolean __CFStringNoteErrors(void) { | |
3858 | return _CFExecutableLinkedOnOrAfter(CFSystemVersionJaguar) ? true : false; | |
3859 | } | |
3860 | ||
3861 | ||
3862 | ||
3863 | void CFStringPad(CFMutableStringRef string, CFStringRef padString, CFIndex length, CFIndex indexIntoPad) { | |
3864 | CFIndex originalLength; | |
3865 | ||
3866 | __CFAssertIsNotNegative(length); | |
3867 | __CFAssertIsNotNegative(indexIntoPad); | |
3868 | ||
3869 | CF_OBJC_FUNCDISPATCH3(__kCFStringTypeID, void, string, "_cfPad:length:padIndex:", padString, length, indexIntoPad); | |
3870 | ||
3871 | __CFAssertIsStringAndMutable(string); | |
3872 | ||
3873 | originalLength = __CFStrLength(string); | |
3874 | if (length < originalLength) { | |
3875 | __CFStringChangeSize(string, CFRangeMake(length, originalLength - length), 0, false); | |
3876 | } else if (originalLength < length) { | |
3877 | uint8_t *contents; | |
3878 | Boolean isUnicode; | |
3879 | CFIndex charSize; | |
3880 | CFIndex padStringLength; | |
3881 | CFIndex padLength; | |
3882 | CFIndex padRemaining = length - originalLength; | |
3883 | ||
3884 | if (CF_IS_OBJC(__kCFStringTypeID, padString)) { /* ??? Hope the compiler optimizes this away if OBJC_MAPPINGS is not on */ | |
3885 | padStringLength = CFStringGetLength(padString); | |
3886 | isUnicode = true; /* !!! Bad for now */ | |
3887 | } else { | |
3888 | __CFAssertIsString(padString); | |
3889 | padStringLength = __CFStrLength(padString); | |
3890 | isUnicode = __CFStrIsUnicode(string) || __CFStrIsUnicode(padString); | |
3891 | } | |
3892 | ||
3893 | charSize = isUnicode ? sizeof(UniChar) : sizeof(uint8_t); | |
3894 | ||
3895 | __CFStringChangeSize(string, CFRangeMake(originalLength, 0), padRemaining, isUnicode); | |
3896 | ||
3897 | contents = (uint8_t*)__CFStrContents(string) + charSize * originalLength + __CFStrSkipAnyLengthByte(string); | |
3898 | padLength = padStringLength - indexIntoPad; | |
3899 | padLength = padRemaining < padLength ? padRemaining : padLength; | |
3900 | ||
3901 | while (padRemaining > 0) { | |
3902 | if (isUnicode) { | |
3903 | CFStringGetCharacters(padString, CFRangeMake(indexIntoPad, padLength), (UniChar*)contents); | |
3904 | } else { | |
3905 | CFStringGetBytes(padString, CFRangeMake(indexIntoPad, padLength), __CFStringGetEightBitStringEncoding(), 0, false, contents, padRemaining * charSize, NULL); | |
3906 | } | |
3907 | contents += padLength * charSize; | |
3908 | padRemaining -= padLength; | |
3909 | indexIntoPad = 0; | |
3910 | padLength = padRemaining < padLength ? padRemaining : padStringLength; | |
3911 | } | |
3912 | } | |
3913 | } | |
3914 | ||
3915 | void CFStringTrim(CFMutableStringRef string, CFStringRef trimString) { | |
3916 | CFRange range; | |
3917 | CFIndex newStartIndex; | |
3918 | CFIndex length; | |
3919 | ||
3920 | CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, string, "_cfTrim:", trimString); | |
3921 | ||
3922 | __CFAssertIsStringAndMutable(string); | |
3923 | __CFAssertIsString(trimString); | |
3924 | ||
3925 | newStartIndex = 0; | |
3926 | length = __CFStrLength(string); | |
3927 | ||
3928 | while (CFStringFindWithOptions(string, trimString, CFRangeMake(newStartIndex, length - newStartIndex), kCFCompareAnchored, &range)) { | |
3929 | newStartIndex = range.location + range.length; | |
3930 | } | |
3931 | ||
3932 | if (newStartIndex < length) { | |
3933 | CFIndex charSize = __CFStrIsUnicode(string) ? sizeof(UniChar) : sizeof(uint8_t); | |
3934 | uint8_t *contents = (uint8_t*)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string); | |
3935 | ||
3936 | length -= newStartIndex; | |
3937 | if (__CFStrLength(trimString) < length) { | |
3938 | while (CFStringFindWithOptions(string, trimString, CFRangeMake(newStartIndex, length), kCFCompareAnchored|kCFCompareBackwards, &range)) { | |
3939 | length = range.location - newStartIndex; | |
3940 | } | |
3941 | } | |
3942 | memmove(contents, contents + newStartIndex * charSize, length * charSize); | |
3943 | __CFStringChangeSize(string, CFRangeMake(length, __CFStrLength(string) - length), 0, false); | |
3944 | } else { // Only trimString in string, trim all | |
3945 | __CFStringChangeSize(string, CFRangeMake(0, length), 0, false); | |
3946 | } | |
3947 | } | |
3948 | ||
3949 | void CFStringTrimWhitespace(CFMutableStringRef string) { | |
3950 | CFIndex newStartIndex; | |
3951 | CFIndex length; | |
3952 | CFStringInlineBuffer buffer; | |
3953 | ||
3954 | CF_OBJC_FUNCDISPATCH0(__kCFStringTypeID, void, string, "_cfTrimWS"); | |
3955 | ||
3956 | __CFAssertIsStringAndMutable(string); | |
3957 | ||
3958 | newStartIndex = 0; | |
3959 | length = __CFStrLength(string); | |
3960 | ||
3961 | CFStringInitInlineBuffer(string, &buffer, CFRangeMake(0, length)); | |
3962 | CFIndex buffer_idx = 0; | |
3963 | ||
3964 | while (buffer_idx < length && CFUniCharIsMemberOf(__CFStringGetCharacterFromInlineBufferQuick(&buffer, buffer_idx), kCFUniCharWhitespaceAndNewlineCharacterSet)) | |
3965 | buffer_idx++; | |
3966 | newStartIndex = buffer_idx; | |
3967 | ||
3968 | if (newStartIndex < length) { | |
3969 | uint8_t *contents = (uint8_t*)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string); | |
3970 | CFIndex charSize = (__CFStrIsUnicode(string) ? sizeof(UniChar) : sizeof(uint8_t)); | |
3971 | ||
3972 | buffer_idx = length - 1; | |
3973 | while (0 <= buffer_idx && CFUniCharIsMemberOf(__CFStringGetCharacterFromInlineBufferQuick(&buffer, buffer_idx), kCFUniCharWhitespaceAndNewlineCharacterSet)) | |
3974 | buffer_idx--; | |
3975 | length = buffer_idx - newStartIndex + 1; | |
3976 | ||
3977 | memmove(contents, contents + newStartIndex * charSize, length * charSize); | |
3978 | __CFStringChangeSize(string, CFRangeMake(length, __CFStrLength(string) - length), 0, false); | |
3979 | } else { // Whitespace only string | |
3980 | __CFStringChangeSize(string, CFRangeMake(0, length), 0, false); | |
3981 | } | |
3982 | } | |
3983 | ||
3984 | void CFStringLowercase(CFMutableStringRef string, CFLocaleRef locale) { | |
3985 | CFIndex currentIndex = 0; | |
3986 | CFIndex length; | |
3987 | const char *langCode; | |
3988 | Boolean isEightBit = __CFStrIsEightBit(string); | |
3989 | ||
3990 | CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, string, "_cfLowercase:", locale); | |
3991 | ||
3992 | __CFAssertIsStringAndMutable(string); | |
3993 | ||
3994 | length = __CFStrLength(string); | |
3995 | ||
d8925383 | 3996 | langCode = (_CFCanUseLocale(locale) ? _CFStrGetLanguageIdentifierForLocale(locale) : NULL); |
9ce05555 A |
3997 | |
3998 | if (!langCode && isEightBit) { | |
3999 | uint8_t *contents = (uint8_t*)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string); | |
4000 | for (;currentIndex < length;currentIndex++) { | |
4001 | if (contents[currentIndex] >= 'A' && contents[currentIndex] <= 'Z') { | |
4002 | contents[currentIndex] += 'a' - 'A'; | |
4003 | } else if (contents[currentIndex] > 127) { | |
4004 | break; | |
4005 | } | |
4006 | } | |
4007 | } | |
4008 | ||
4009 | if (currentIndex < length) { | |
4010 | UniChar *contents; | |
4011 | UniChar mappedCharacters[MAX_CASE_MAPPING_BUF]; | |
4012 | CFIndex mappedLength; | |
4013 | UTF32Char currentChar; | |
4014 | UInt32 flags = 0; | |
4015 | ||
4016 | if (isEightBit) __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true); | |
4017 | ||
4018 | contents = (UniChar*)__CFStrContents(string); | |
4019 | ||
4020 | for (;currentIndex < length;currentIndex++) { | |
4021 | ||
4022 | if (CFUniCharIsSurrogateHighCharacter(contents[currentIndex]) && (currentIndex + 1 < length) && CFUniCharIsSurrogateLowCharacter(contents[currentIndex + 1])) { | |
4023 | currentChar = CFUniCharGetLongCharacterForSurrogatePair(contents[currentIndex], contents[currentIndex + 1]); | |
4024 | } else { | |
4025 | currentChar = contents[currentIndex]; | |
4026 | } | |
4027 | flags = ((langCode || (currentChar == 0x03A3)) ? CFUniCharGetConditionalCaseMappingFlags(currentChar, contents, currentIndex, length, kCFUniCharToLowercase, langCode, flags) : 0); | |
4028 | ||
4029 | mappedLength = CFUniCharMapCaseTo(currentChar, mappedCharacters, MAX_CASE_MAPPING_BUF, kCFUniCharToLowercase, flags, langCode); | |
4030 | if (mappedLength > 0) contents[currentIndex] = *mappedCharacters; | |
4031 | ||
4032 | if (currentChar > 0xFFFF) { // Non-BMP char | |
4033 | switch (mappedLength) { | |
4034 | case 0: | |
4035 | __CFStringChangeSize(string, CFRangeMake(currentIndex, 2), 0, true); | |
4036 | contents = (UniChar*)__CFStrContents(string); | |
4037 | length -= 2; | |
4038 | break; | |
4039 | ||
4040 | case 1: | |
4041 | __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 1), 0, true); | |
4042 | contents = (UniChar*)__CFStrContents(string); | |
4043 | --length; | |
4044 | break; | |
4045 | ||
4046 | case 2: | |
4047 | contents[++currentIndex] = mappedCharacters[1]; | |
4048 | break; | |
4049 | ||
4050 | default: | |
4051 | --mappedLength; // Skip the current char | |
4052 | __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength - 1, true); | |
4053 | contents = (UniChar*)__CFStrContents(string); | |
4054 | memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar)); | |
4055 | length += (mappedLength - 1); | |
4056 | currentIndex += mappedLength; | |
4057 | break; | |
4058 | } | |
4059 | } else if (mappedLength == 0) { | |
4060 | __CFStringChangeSize(string, CFRangeMake(currentIndex, 1), 0, true); | |
4061 | contents = (UniChar*)__CFStrContents(string); | |
4062 | --length; | |
4063 | } else if (mappedLength > 1) { | |
4064 | --mappedLength; // Skip the current char | |
4065 | __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength, true); | |
4066 | contents = (UniChar*)__CFStrContents(string); | |
4067 | memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar)); | |
4068 | length += mappedLength; | |
4069 | currentIndex += mappedLength; | |
4070 | } | |
4071 | } | |
4072 | } | |
4073 | } | |
4074 | ||
4075 | void CFStringUppercase(CFMutableStringRef string, CFLocaleRef locale) { | |
4076 | CFIndex currentIndex = 0; | |
4077 | CFIndex length; | |
4078 | const char *langCode; | |
4079 | Boolean isEightBit = __CFStrIsEightBit(string); | |
4080 | ||
4081 | CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, string, "_cfUppercase:", locale); | |
4082 | ||
4083 | __CFAssertIsStringAndMutable(string); | |
4084 | ||
4085 | length = __CFStrLength(string); | |
4086 | ||
d8925383 | 4087 | langCode = (_CFCanUseLocale(locale) ? _CFStrGetLanguageIdentifierForLocale(locale) : NULL); |
9ce05555 A |
4088 | |
4089 | if (!langCode && isEightBit) { | |
4090 | uint8_t *contents = (uint8_t*)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string); | |
4091 | for (;currentIndex < length;currentIndex++) { | |
4092 | if (contents[currentIndex] >= 'a' && contents[currentIndex] <= 'z') { | |
4093 | contents[currentIndex] -= 'a' - 'A'; | |
4094 | } else if (contents[currentIndex] > 127) { | |
4095 | break; | |
4096 | } | |
4097 | } | |
4098 | } | |
4099 | ||
4100 | if (currentIndex < length) { | |
4101 | UniChar *contents; | |
4102 | UniChar mappedCharacters[MAX_CASE_MAPPING_BUF]; | |
4103 | CFIndex mappedLength; | |
4104 | UTF32Char currentChar; | |
4105 | UInt32 flags = 0; | |
4106 | ||
4107 | if (isEightBit) __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true); | |
4108 | ||
4109 | contents = (UniChar*)__CFStrContents(string); | |
4110 | ||
4111 | for (;currentIndex < length;currentIndex++) { | |
4112 | if (CFUniCharIsSurrogateHighCharacter(contents[currentIndex]) && (currentIndex + 1 < length) && CFUniCharIsSurrogateLowCharacter(contents[currentIndex + 1])) { | |
4113 | currentChar = CFUniCharGetLongCharacterForSurrogatePair(contents[currentIndex], contents[currentIndex + 1]); | |
4114 | } else { | |
4115 | currentChar = contents[currentIndex]; | |
4116 | } | |
4117 | ||
4118 | flags = (langCode ? CFUniCharGetConditionalCaseMappingFlags(currentChar, contents, currentIndex, length, kCFUniCharToUppercase, langCode, flags) : 0); | |
4119 | ||
4120 | mappedLength = CFUniCharMapCaseTo(currentChar, mappedCharacters, MAX_CASE_MAPPING_BUF, kCFUniCharToUppercase, flags, langCode); | |
4121 | if (mappedLength > 0) contents[currentIndex] = *mappedCharacters; | |
4122 | ||
4123 | if (currentChar > 0xFFFF) { // Non-BMP char | |
4124 | switch (mappedLength) { | |
4125 | case 0: | |
4126 | __CFStringChangeSize(string, CFRangeMake(currentIndex, 2), 0, true); | |
4127 | contents = (UniChar*)__CFStrContents(string); | |
4128 | length -= 2; | |
4129 | break; | |
4130 | ||
4131 | case 1: | |
4132 | __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 1), 0, true); | |
4133 | contents = (UniChar*)__CFStrContents(string); | |
4134 | --length; | |
4135 | break; | |
4136 | ||
4137 | case 2: | |
4138 | contents[++currentIndex] = mappedCharacters[1]; | |
4139 | break; | |
4140 | ||
4141 | default: | |
4142 | --mappedLength; // Skip the current char | |
4143 | __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength - 1, true); | |
4144 | contents = (UniChar*)__CFStrContents(string); | |
4145 | memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar)); | |
4146 | length += (mappedLength - 1); | |
4147 | currentIndex += mappedLength; | |
4148 | break; | |
4149 | } | |
4150 | } else if (mappedLength == 0) { | |
4151 | __CFStringChangeSize(string, CFRangeMake(currentIndex, 1), 0, true); | |
4152 | contents = (UniChar*)__CFStrContents(string); | |
4153 | --length; | |
4154 | } else if (mappedLength > 1) { | |
4155 | --mappedLength; // Skip the current char | |
4156 | __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength, true); | |
4157 | contents = (UniChar*)__CFStrContents(string); | |
4158 | memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar)); | |
4159 | length += mappedLength; | |
4160 | currentIndex += mappedLength; | |
4161 | } | |
4162 | } | |
4163 | } | |
4164 | } | |
4165 | ||
4166 | ||
4167 | void CFStringCapitalize(CFMutableStringRef string, CFLocaleRef locale) { | |
4168 | CFIndex currentIndex = 0; | |
4169 | CFIndex length; | |
4170 | const char *langCode; | |
4171 | Boolean isEightBit = __CFStrIsEightBit(string); | |
4172 | Boolean isLastCased = false; | |
4173 | static const uint8_t *caseIgnorableForBMP = NULL; | |
4174 | ||
4175 | CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, string, "_cfCapitalize:", locale); | |
4176 | ||
4177 | __CFAssertIsStringAndMutable(string); | |
4178 | ||
4179 | length = __CFStrLength(string); | |
4180 | ||
4181 | if (NULL == caseIgnorableForBMP) caseIgnorableForBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharCaseIgnorableCharacterSet, 0); | |
4182 | ||
d8925383 | 4183 | langCode = (_CFCanUseLocale(locale) ? _CFStrGetLanguageIdentifierForLocale(locale) : NULL); |
9ce05555 A |
4184 | |
4185 | if (!langCode && isEightBit) { | |
4186 | uint8_t *contents = (uint8_t*)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string); | |
4187 | for (;currentIndex < length;currentIndex++) { | |
4188 | if (contents[currentIndex] > 127) { | |
4189 | break; | |
4190 | } else if (contents[currentIndex] >= 'A' && contents[currentIndex] <= 'Z') { | |
4191 | contents[currentIndex] += (isLastCased ? 'a' - 'A' : 0); | |
4192 | isLastCased = true; | |
4193 | } else if (contents[currentIndex] >= 'a' && contents[currentIndex] <= 'z') { | |
4194 | contents[currentIndex] -= (!isLastCased ? 'a' - 'A' : 0); | |
4195 | isLastCased = true; | |
4196 | } else if (!CFUniCharIsMemberOfBitmap(contents[currentIndex], caseIgnorableForBMP)) { | |
4197 | isLastCased = false; | |
4198 | } | |
4199 | } | |
4200 | } | |
4201 | ||
4202 | if (currentIndex < length) { | |
4203 | UniChar *contents; | |
4204 | UniChar mappedCharacters[MAX_CASE_MAPPING_BUF]; | |
4205 | CFIndex mappedLength; | |
4206 | UTF32Char currentChar; | |
4207 | UInt32 flags = 0; | |
4208 | ||
4209 | if (isEightBit) __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true); | |
4210 | ||
4211 | contents = (UniChar*)__CFStrContents(string); | |
4212 | ||
4213 | for (;currentIndex < length;currentIndex++) { | |
4214 | if (CFUniCharIsSurrogateHighCharacter(contents[currentIndex]) && (currentIndex + 1 < length) && CFUniCharIsSurrogateLowCharacter(contents[currentIndex + 1])) { | |
4215 | currentChar = CFUniCharGetLongCharacterForSurrogatePair(contents[currentIndex], contents[currentIndex + 1]); | |
4216 | } else { | |
4217 | currentChar = contents[currentIndex]; | |
4218 | } | |
4219 | flags = ((langCode || ((currentChar == 0x03A3) && isLastCased)) ? CFUniCharGetConditionalCaseMappingFlags(currentChar, contents, currentIndex, length, (isLastCased ? kCFUniCharToLowercase : kCFUniCharToTitlecase), langCode, flags) : 0); | |
4220 | ||
4221 | mappedLength = CFUniCharMapCaseTo(currentChar, mappedCharacters, MAX_CASE_MAPPING_BUF, (isLastCased ? kCFUniCharToLowercase : kCFUniCharToTitlecase), flags, langCode); | |
4222 | if (mappedLength > 0) contents[currentIndex] = *mappedCharacters; | |
4223 | ||
4224 | if (currentChar > 0xFFFF) { // Non-BMP char | |
4225 | switch (mappedLength) { | |
4226 | case 0: | |
4227 | __CFStringChangeSize(string, CFRangeMake(currentIndex, 2), 0, true); | |
4228 | contents = (UniChar*)__CFStrContents(string); | |
4229 | length -= 2; | |
4230 | break; | |
4231 | ||
4232 | case 1: | |
4233 | __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 1), 0, true); | |
4234 | contents = (UniChar*)__CFStrContents(string); | |
4235 | --length; | |
4236 | break; | |
4237 | ||
4238 | case 2: | |
4239 | contents[++currentIndex] = mappedCharacters[1]; | |
4240 | break; | |
4241 | ||
4242 | default: | |
4243 | --mappedLength; // Skip the current char | |
4244 | __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength - 1, true); | |
4245 | contents = (UniChar*)__CFStrContents(string); | |
4246 | memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar)); | |
4247 | length += (mappedLength - 1); | |
4248 | currentIndex += mappedLength; | |
4249 | break; | |
4250 | } | |
4251 | } else if (mappedLength == 0) { | |
4252 | __CFStringChangeSize(string, CFRangeMake(currentIndex, 1), 0, true); | |
4253 | contents = (UniChar*)__CFStrContents(string); | |
4254 | --length; | |
4255 | } else if (mappedLength > 1) { | |
4256 | --mappedLength; // Skip the current char | |
4257 | __CFStringChangeSize(string, CFRangeMake(currentIndex + 1, 0), mappedLength, true); | |
4258 | contents = (UniChar*)__CFStrContents(string); | |
4259 | memmove(contents + currentIndex + 1, mappedCharacters + 1, mappedLength * sizeof(UniChar)); | |
4260 | length += mappedLength; | |
4261 | currentIndex += mappedLength; | |
4262 | } | |
4263 | ||
4264 | if (!((currentChar > 0xFFFF) ? CFUniCharIsMemberOf(currentChar, kCFUniCharCaseIgnorableCharacterSet) : CFUniCharIsMemberOfBitmap(currentChar, caseIgnorableForBMP))) { // We have non-caseignorable here | |
4265 | isLastCased = ((CFUniCharIsMemberOf(currentChar, kCFUniCharUppercaseLetterCharacterSet) || CFUniCharIsMemberOf(currentChar, kCFUniCharLowercaseLetterCharacterSet)) ? true : false); | |
4266 | } | |
4267 | } | |
4268 | } | |
4269 | } | |
4270 | ||
d8925383 | 4271 | |
9ce05555 A |
4272 | #define MAX_DECOMP_BUF 64 |
4273 | ||
4274 | #define HANGUL_SBASE 0xAC00 | |
4275 | #define HANGUL_LBASE 0x1100 | |
4276 | #define HANGUL_VBASE 0x1161 | |
4277 | #define HANGUL_TBASE 0x11A7 | |
4278 | #define HANGUL_SCOUNT 11172 | |
4279 | #define HANGUL_LCOUNT 19 | |
4280 | #define HANGUL_VCOUNT 21 | |
4281 | #define HANGUL_TCOUNT 28 | |
4282 | #define HANGUL_NCOUNT (HANGUL_VCOUNT * HANGUL_TCOUNT) | |
4283 | ||
4284 | CF_INLINE uint32_t __CFGetUTF16Length(const UTF32Char *characters, uint32_t utf32Length) { | |
4285 | const UTF32Char *limit = characters + utf32Length; | |
4286 | uint32_t length = 0; | |
4287 | ||
4288 | while (characters < limit) length += (*(characters++) > 0xFFFF ? 2 : 1); | |
4289 | ||
4290 | return length; | |
4291 | } | |
4292 | ||
4293 | CF_INLINE void __CFFillInUTF16(const UTF32Char *characters, UTF16Char *dst, uint32_t utf32Length) { | |
4294 | const UTF32Char *limit = characters + utf32Length; | |
4295 | UTF32Char currentChar; | |
4296 | ||
4297 | while (characters < limit) { | |
4298 | currentChar = *(characters++); | |
4299 | if (currentChar > 0xFFFF) { | |
4300 | currentChar -= 0x10000; | |
4301 | *(dst++) = (UTF16Char)((currentChar >> 10) + 0xD800UL); | |
4302 | *(dst++) = (UTF16Char)((currentChar & 0x3FF) + 0xDC00UL); | |
4303 | } else { | |
4304 | *(dst++) = currentChar; | |
4305 | } | |
4306 | } | |
4307 | } | |
4308 | ||
4309 | void CFStringNormalize(CFMutableStringRef string, CFStringNormalizationForm theForm) { | |
4310 | CFIndex currentIndex = 0; | |
4311 | CFIndex length; | |
4312 | bool needToReorder = true; | |
4313 | ||
4314 | CF_OBJC_FUNCDISPATCH1(__kCFStringTypeID, void, string, "_cfNormalize:", theForm); | |
4315 | ||
4316 | __CFAssertIsStringAndMutable(string); | |
4317 | ||
4318 | length = __CFStrLength(string); | |
4319 | ||
4320 | if (__CFStrIsEightBit(string)) { | |
4321 | uint8_t *contents; | |
4322 | ||
4323 | if (theForm == kCFStringNormalizationFormC) return; // 8bit form has no decomposition | |
4324 | ||
4325 | contents = (uint8_t*)__CFStrContents(string) + __CFStrSkipAnyLengthByte(string); | |
4326 | ||
4327 | for (;currentIndex < length;currentIndex++) { | |
4328 | if (contents[currentIndex] > 127) { | |
4329 | __CFStringChangeSize(string, CFRangeMake(0, 0), 0, true); // need to do harm way | |
4330 | needToReorder = false; | |
4331 | break; | |
4332 | } | |
4333 | } | |
4334 | } | |
4335 | ||
4336 | if (currentIndex < length) { | |
4337 | UTF16Char *limit = (UTF16Char *)__CFStrContents(string) + length; | |
4338 | UTF16Char *contents = (UTF16Char *)__CFStrContents(string) + currentIndex; | |
4339 | UTF32Char buffer[MAX_DECOMP_BUF]; | |
4340 | UTF32Char *mappedCharacters = buffer; | |
4341 | CFIndex allocatedLength = MAX_DECOMP_BUF; | |
4342 | CFIndex mappedLength; | |
4343 | CFIndex currentLength; | |
4344 | UTF32Char currentChar; | |
4345 | ||
4346 | while (contents < limit) { | |
4347 | if (CFUniCharIsSurrogateHighCharacter(*contents) && (contents + 1 < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) { | |
4348 | currentChar = CFUniCharGetLongCharacterForSurrogatePair(*contents, *(contents + 1)); | |
4349 | currentLength = 2; | |
4350 | contents += 2; | |
4351 | } else { | |
4352 | currentChar = *(contents++); | |
4353 | currentLength = 1; | |
4354 | } | |
4355 | ||
4356 | mappedLength = 0; | |
4357 | ||
4358 | if (CFUniCharIsMemberOf(currentChar, kCFUniCharCanonicalDecomposableCharacterSet) && !CFUniCharIsMemberOf(currentChar, kCFUniCharNonBaseCharacterSet)) { | |
4359 | if ((theForm & kCFStringNormalizationFormC) == 0 || currentChar < HANGUL_SBASE || currentChar > (HANGUL_SBASE + HANGUL_SCOUNT)) { // We don't have to decompose Hangul Syllables if we're precomposing again | |
4360 | mappedLength = CFUniCharDecomposeCharacter(currentChar, mappedCharacters, MAX_DECOMP_BUF); | |
4361 | } | |
4362 | } | |
4363 | ||
4364 | if ((needToReorder || (theForm & kCFStringNormalizationFormC)) && ((contents < limit) || (mappedLength == 0))) { | |
4365 | if (mappedLength > 0) { | |
4366 | if (CFUniCharIsSurrogateHighCharacter(*contents) && (contents + 1 < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) { | |
4367 | currentChar = CFUniCharGetLongCharacterForSurrogatePair(*contents, *(contents + 1)); | |
4368 | } else { | |
4369 | currentChar = *contents; | |
4370 | } | |
4371 | } | |
4372 | ||
4373 | if (CFUniCharIsMemberOf(currentChar, kCFUniCharNonBaseCharacterSet)) { | |
4374 | uint32_t decompLength; | |
4375 | ||
4376 | if (mappedLength == 0) { | |
4377 | contents -= (currentChar & 0xFFFF0000 ? 2 : 1); | |
4378 | if (currentIndex > 0) { | |
4379 | if (CFUniCharIsSurrogateLowCharacter(*(contents - 1)) && (currentIndex > 1) && CFUniCharIsSurrogateHighCharacter(*(contents - 2))) { | |
4380 | *mappedCharacters = CFUniCharGetLongCharacterForSurrogatePair(*(contents - 2), *(contents - 1)); | |
4381 | currentIndex -= 2; | |
4382 | currentLength += 2; | |
4383 | } else { | |
4384 | *mappedCharacters = *(contents - 1); | |
4385 | --currentIndex; | |
4386 | ++currentLength; | |
4387 | } | |
4388 | mappedLength = 1; | |
4389 | } | |
4390 | } else { | |
4391 | currentLength += (currentChar & 0xFFFF0000 ? 2 : 1); | |
4392 | } | |
4393 | contents += (currentChar & 0xFFFF0000 ? 2 : 1); | |
4394 | ||
4395 | if (CFUniCharIsMemberOf(currentChar, kCFUniCharDecomposableCharacterSet)) { // Vietnamese accent, etc. | |
4396 | decompLength = CFUniCharDecomposeCharacter(currentChar, mappedCharacters + mappedLength, MAX_DECOMP_BUF - mappedLength); | |
4397 | mappedLength += decompLength; | |
4398 | } else { | |
4399 | mappedCharacters[mappedLength++] = currentChar; | |
4400 | } | |
4401 | ||
4402 | while (contents < limit) { | |
4403 | if (CFUniCharIsSurrogateHighCharacter(*contents) && (contents + 1 < limit) && CFUniCharIsSurrogateLowCharacter(*(contents + 1))) { | |
4404 | currentChar = CFUniCharGetLongCharacterForSurrogatePair(*contents, *(contents + 1)); | |
4405 | } else { | |
4406 | currentChar = *contents; | |
4407 | } | |
4408 | if (!CFUniCharIsMemberOf(currentChar, kCFUniCharNonBaseCharacterSet)) break; | |
4409 | if (currentChar & 0xFFFF0000) { | |
4410 | contents += 2; | |
4411 | currentLength += 2; | |
4412 | } else { | |
4413 | ++contents; | |
4414 | ++currentLength; | |
4415 | } | |
4416 | if (mappedLength == allocatedLength) { | |
4417 | allocatedLength += MAX_DECOMP_BUF; | |
4418 | if (mappedCharacters == buffer) { | |
4419 | mappedCharacters = (UTF32Char *)CFAllocatorAllocate(NULL, allocatedLength * sizeof(UTF32Char), 0); | |
4420 | memmove(mappedCharacters, buffer, MAX_DECOMP_BUF * sizeof(UTF32Char)); | |
4421 | } else { | |
4422 | mappedCharacters = (UTF32Char *)CFAllocatorReallocate(NULL, mappedCharacters, allocatedLength * sizeof(UTF32Char), 0); | |
4423 | } | |
4424 | } | |
4425 | if (CFUniCharIsMemberOf(currentChar, kCFUniCharDecomposableCharacterSet)) { // Vietnamese accent, etc. | |
4426 | decompLength = CFUniCharDecomposeCharacter(currentChar, mappedCharacters + mappedLength, MAX_DECOMP_BUF - mappedLength); | |
4427 | mappedLength += decompLength; | |
4428 | } else { | |
4429 | mappedCharacters[mappedLength++] = currentChar; | |
4430 | } | |
4431 | } | |
4432 | } | |
4433 | if (needToReorder && mappedLength > 1) CFUniCharPrioritySort(mappedCharacters, mappedLength); | |
4434 | } | |
4435 | ||
4436 | if (theForm & kCFStringNormalizationFormKD) { | |
4437 | CFIndex newLength = 0; | |
4438 | ||
4439 | if (mappedLength == 0 && CFUniCharIsMemberOf(currentChar, kCFUniCharCompatibilityDecomposableCharacterSet)) { | |
4440 | mappedCharacters[mappedLength++] = currentChar; | |
4441 | } | |
4442 | while (newLength < mappedLength) { | |
4443 | newLength = CFUniCharCompatibilityDecompose(mappedCharacters, mappedLength, allocatedLength); | |
4444 | if (newLength == 0) { | |
4445 | allocatedLength += MAX_DECOMP_BUF; | |
4446 | if (mappedCharacters == buffer) { | |
4447 | mappedCharacters = (UTF32Char *)CFAllocatorAllocate(NULL, allocatedLength * sizeof(UTF32Char), 0); | |
4448 | memmove(mappedCharacters, buffer, MAX_DECOMP_BUF * sizeof(UTF32Char)); | |
4449 | } else { | |
4450 | mappedCharacters = (UTF32Char *)CFAllocatorReallocate(NULL, mappedCharacters, allocatedLength * sizeof(UTF32Char), 0); | |
4451 | } | |
4452 | } | |
4453 | } | |
4454 | mappedLength = newLength; | |
4455 | } | |
4456 | ||
4457 | if (theForm & kCFStringNormalizationFormC) { | |
4458 | if (mappedLength > 1) { | |
4459 | CFIndex consumedLength = 1; | |
4460 | UTF32Char nextChar; | |
4461 | UTF32Char *currentBase = mappedCharacters; | |
4462 | uint8_t currentClass, lastClass = 0; | |
4463 | const uint8_t *bmpClassTable = CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0); | |
4464 | bool didCombine = false; | |
4465 | ||
4466 | currentChar = *mappedCharacters; | |
4467 | ||
4468 | while (consumedLength < mappedLength) { | |
4469 | nextChar = mappedCharacters[consumedLength]; | |
4470 | currentClass = (nextChar & 0xFFFF0000 ? CFUniCharGetUnicodeProperty(nextChar, kCFUniCharCombiningProperty) : CFUniCharGetCombiningPropertyForCharacter(nextChar, bmpClassTable)); | |
4471 | ||
4472 | if (theForm & kCFStringNormalizationFormKD) { | |
4473 | if ((currentChar >= HANGUL_LBASE) && (currentChar < (HANGUL_LBASE + 0xFF))) { | |
4474 | SInt8 lIndex = currentChar - HANGUL_LBASE; | |
4475 | ||
4476 | if ((0 <= lIndex) && (lIndex <= HANGUL_LCOUNT)) { | |
4477 | SInt16 vIndex = nextChar - HANGUL_VBASE; | |
4478 | ||
4479 | if ((vIndex >= 0) && (vIndex <= HANGUL_VCOUNT)) { | |
4480 | SInt16 tIndex = 0; | |
4481 | CFIndex usedLength = mappedLength; | |
4482 | ||
4483 | mappedCharacters[consumedLength++] = 0xFFFD; | |
4484 | ||
4485 | if (consumedLength < mappedLength) { | |
4486 | tIndex = mappedCharacters[consumedLength] - HANGUL_TBASE; | |
4487 | if ((tIndex < 0) || (tIndex > HANGUL_TCOUNT)) { | |
4488 | tIndex = 0; | |
4489 | } else { | |
4490 | mappedCharacters[consumedLength++] = 0xFFFD; | |
4491 | } | |
4492 | } | |
4493 | *currentBase = (lIndex * HANGUL_VCOUNT + vIndex) * HANGUL_TCOUNT + tIndex + HANGUL_SBASE; | |
4494 | ||
4495 | while (--usedLength > 0) { | |
4496 | if (mappedCharacters[usedLength] == 0xFFFD) { | |
4497 | --mappedLength; | |
4498 | --consumedLength; | |
4499 | memmove(mappedCharacters + usedLength, mappedCharacters + usedLength + 1, (mappedLength - usedLength) * sizeof(UTF32Char)); | |
4500 | } | |
4501 | } | |
4502 | currentBase = mappedCharacters + consumedLength; | |
4503 | currentChar = *currentBase; | |
4504 | ++consumedLength; | |
4505 | ||
4506 | continue; | |
4507 | } | |
4508 | } | |
4509 | } | |
4510 | if (!CFUniCharIsMemberOf(nextChar, kCFUniCharNonBaseCharacterSet)) { | |
4511 | *currentBase = currentChar; | |
4512 | currentBase = mappedCharacters + consumedLength; | |
4513 | currentChar = nextChar; | |
4514 | ++consumedLength; | |
4515 | continue; | |
4516 | } | |
4517 | } | |
4518 | if ((lastClass == 0) || (currentClass != lastClass)) { | |
4519 | nextChar = CFUniCharPrecomposeCharacter(currentChar, nextChar); | |
4520 | if (nextChar == 0xFFFD) { | |
4521 | lastClass = currentClass; | |
4522 | } else { | |
4523 | mappedCharacters[consumedLength] = 0xFFFD; | |
4524 | didCombine = true; | |
4525 | currentChar = nextChar; | |
4526 | lastClass = 0; | |
4527 | } | |
4528 | } | |
4529 | ++consumedLength; | |
4530 | } | |
4531 | ||
4532 | *currentBase = currentChar; | |
4533 | if (didCombine) { | |
4534 | consumedLength = mappedLength; | |
4535 | while (--consumedLength > 0) { | |
4536 | if (mappedCharacters[consumedLength] == 0xFFFD) { | |
4537 | --mappedLength; | |
4538 | memmove(mappedCharacters + consumedLength, mappedCharacters + consumedLength + 1, (mappedLength - consumedLength) * sizeof(UTF32Char)); | |
4539 | } | |
4540 | } | |
4541 | } | |
4542 | } else if ((currentChar >= HANGUL_LBASE) && (currentChar < (HANGUL_LBASE + 0xFF))) { // Hangul Jamo | |
4543 | SInt8 lIndex = currentChar - HANGUL_LBASE; | |
4544 | ||
4545 | if ((contents < limit) && (0 <= lIndex) && (lIndex <= HANGUL_LCOUNT)) { | |
4546 | SInt16 vIndex = *contents - HANGUL_VBASE; | |
4547 | ||
4548 | if ((vIndex >= 0) && (vIndex <= HANGUL_VCOUNT)) { | |
4549 | SInt16 tIndex = 0; | |
4550 | ||
4551 | ++contents; ++currentLength; | |
4552 | ||
4553 | if (contents < limit) { | |
4554 | tIndex = *contents - HANGUL_TBASE; | |
4555 | if ((tIndex < 0) || (tIndex > HANGUL_TCOUNT)) { | |
4556 | tIndex = 0; | |
4557 | } else { | |
4558 | ++contents; ++currentLength; | |
4559 | } | |
4560 | } | |
4561 | *mappedCharacters = (lIndex * HANGUL_VCOUNT + vIndex) * HANGUL_TCOUNT + tIndex + HANGUL_SBASE; | |
4562 | mappedLength = 1; | |
4563 | } | |
4564 | } | |
4565 | } | |
4566 | } | |
4567 | ||
4568 | if (mappedLength > 0) { | |
4569 | CFIndex utf16Length = __CFGetUTF16Length(mappedCharacters, mappedLength); | |
4570 | ||
4571 | if (utf16Length != currentLength) { | |
4572 | __CFStringChangeSize(string, CFRangeMake(currentIndex, currentLength), utf16Length, true); | |
4573 | currentLength = utf16Length; | |
4574 | } | |
4575 | contents = (UTF16Char *)__CFStrContents(string); | |
4576 | limit = contents + __CFStrLength(string); | |
4577 | contents += currentIndex; | |
4578 | __CFFillInUTF16(mappedCharacters, contents, mappedLength); | |
4579 | contents += utf16Length; | |
4580 | } | |
4581 | currentIndex += currentLength; | |
4582 | } | |
4583 | ||
4584 | if (mappedCharacters != buffer) CFAllocatorDeallocate(NULL, mappedCharacters); | |
4585 | } | |
4586 | } | |
4587 | ||
9ce05555 A |
4588 | |
4589 | enum { | |
4590 | kCFStringFormatZeroFlag = (1 << 0), // if not, padding is space char | |
4591 | kCFStringFormatMinusFlag = (1 << 1), // if not, no flag implied | |
4592 | kCFStringFormatPlusFlag = (1 << 2), // if not, no flag implied, overrides space | |
4593 | kCFStringFormatSpaceFlag = (1 << 3) // if not, no flag implied | |
4594 | }; | |
4595 | ||
4596 | typedef struct { | |
4597 | int16_t size; | |
4598 | int16_t type; | |
4599 | SInt32 loc; | |
4600 | SInt32 len; | |
4601 | SInt32 widthArg; | |
4602 | SInt32 precArg; | |
4603 | uint32_t flags; | |
4604 | int8_t mainArgNum; | |
4605 | int8_t precArgNum; | |
4606 | int8_t widthArgNum; | |
4607 | int8_t unused1; | |
4608 | } CFFormatSpec; | |
4609 | ||
4610 | typedef struct { | |
4611 | int16_t type; | |
4612 | int16_t size; | |
4613 | union { | |
d8925383 | 4614 | int64_t int64Value; |
9ce05555 A |
4615 | double doubleValue; |
4616 | void *pointerValue; | |
4617 | } value; | |
4618 | } CFPrintValue; | |
4619 | ||
4620 | enum { | |
4621 | CFFormatDefaultSize = 0, | |
4622 | CFFormatSize1 = 1, | |
4623 | CFFormatSize2 = 2, | |
4624 | CFFormatSize4 = 3, | |
4625 | CFFormatSize8 = 4, | |
4626 | CFFormatSize16 = 5, /* unused */ | |
4627 | }; | |
4628 | ||
4629 | enum { | |
4630 | CFFormatLiteralType = 32, | |
4631 | CFFormatLongType = 33, | |
4632 | CFFormatDoubleType = 34, | |
4633 | CFFormatPointerType = 35, | |
4634 | CFFormatObjectType = 36, /* handled specially */ /* ??? not used anymore, can be removed? */ | |
4635 | CFFormatCFType = 37, /* handled specially */ | |
4636 | CFFormatUnicharsType = 38, /* handled specially */ | |
4637 | CFFormatCharsType = 39, /* handled specially */ | |
4638 | CFFormatPascalCharsType = 40, /* handled specially */ | |
4639 | CFFormatSingleUnicharType = 41 /* handled specially */ | |
4640 | }; | |
4641 | ||
4642 | CF_INLINE void __CFParseFormatSpec(const UniChar *uformat, const uint8_t *cformat, SInt32 *fmtIdx, SInt32 fmtLen, CFFormatSpec *spec) { | |
4643 | Boolean seenDot = false; | |
4644 | for (;;) { | |
4645 | UniChar ch; | |
4646 | if (fmtLen <= *fmtIdx) return; /* no type */ | |
4647 | if (cformat) ch = (UniChar)cformat[(*fmtIdx)++]; else ch = uformat[(*fmtIdx)++]; | |
4648 | reswtch:switch (ch) { | |
4649 | case '#': // ignored for now | |
4650 | break; | |
4651 | case 0x20: | |
4652 | if (!(spec->flags & kCFStringFormatPlusFlag)) spec->flags |= kCFStringFormatSpaceFlag; | |
4653 | break; | |
4654 | case '-': | |
4655 | spec->flags |= kCFStringFormatMinusFlag; | |
4656 | spec->flags &= ~kCFStringFormatZeroFlag; // remove zero flag | |
4657 | break; | |
4658 | case '+': | |
4659 | spec->flags |= kCFStringFormatPlusFlag; | |
4660 | spec->flags &= ~kCFStringFormatSpaceFlag; // remove space flag | |
4661 | break; | |
4662 | case '0': | |
4663 | if (!(spec->flags & kCFStringFormatMinusFlag)) spec->flags |= kCFStringFormatZeroFlag; | |
4664 | break; | |
4665 | case 'h': | |
4666 | spec->size = CFFormatSize2; | |
4667 | break; | |
4668 | case 'l': | |
4669 | if (*fmtIdx < fmtLen) { | |
4670 | // fetch next character, don't increment fmtIdx | |
4671 | if (cformat) ch = (UniChar)cformat[(*fmtIdx)]; else ch = uformat[(*fmtIdx)]; | |
4672 | if ('l' == ch) { // 'll' for long long, like 'q' | |
4673 | (*fmtIdx)++; | |
4674 | spec->size = CFFormatSize8; | |
4675 | break; | |
4676 | } | |
4677 | } | |
4678 | spec->size = CFFormatSize4; | |
4679 | break; | |
4680 | case 'q': | |
4681 | spec->size = CFFormatSize8; | |
4682 | break; | |
4683 | case 'c': | |
4684 | spec->type = CFFormatLongType; | |
4685 | spec->size = CFFormatSize1; | |
4686 | return; | |
4687 | case 'O': case 'o': case 'D': case 'd': case 'i': case 'U': case 'u': case 'x': case 'X': | |
4688 | spec->type = CFFormatLongType; | |
4689 | return; | |
4690 | case 'e': case 'E': case 'f': case 'g': case 'G': | |
4691 | spec->type = CFFormatDoubleType; | |
4692 | spec->size = CFFormatSize8; | |
4693 | return; | |
4694 | case 'n': case 'p': /* %n is not handled correctly currently */ | |
4695 | spec->type = CFFormatPointerType; | |
4696 | spec->size = CFFormatSize4; | |
4697 | return; | |
4698 | case 's': | |
4699 | spec->type = CFFormatCharsType; | |
4700 | spec->size = CFFormatSize4; | |
4701 | return; | |
4702 | case 'S': | |
4703 | spec->type = CFFormatUnicharsType; | |
4704 | spec->size = CFFormatSize4; | |
4705 | return; | |
4706 | case 'C': | |
4707 | spec->type = CFFormatSingleUnicharType; | |
4708 | spec->size = CFFormatSize2; | |
4709 | return; | |
4710 | case 'P': | |
4711 | spec->type = CFFormatPascalCharsType; | |
4712 | spec->size = CFFormatSize4; | |
4713 | return; | |
4714 | case '@': | |
4715 | spec->type = CFFormatCFType; | |
4716 | spec->size = CFFormatSize4; | |
4717 | return; | |
4718 | case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { | |
4719 | int64_t number = 0; | |
4720 | do { | |
4721 | number = 10 * number + (ch - '0'); | |
4722 | if (cformat) ch = (UniChar)cformat[(*fmtIdx)++]; else ch = uformat[(*fmtIdx)++]; | |
4723 | } while ((UInt32)(ch - '0') <= 9); | |
4724 | if ('$' == ch) { | |
4725 | if (-2 == spec->precArgNum) { | |
4726 | spec->precArgNum = number - 1; // Arg numbers start from 1 | |
4727 | } else if (-2 == spec->widthArgNum) { | |
4728 | spec->widthArgNum = number - 1; // Arg numbers start from 1 | |
4729 | } else { | |
4730 | spec->mainArgNum = number - 1; // Arg numbers start from 1 | |
4731 | } | |
4732 | break; | |
4733 | } else if (seenDot) { /* else it's either precision or width */ | |
4734 | spec->precArg = (SInt32)number; | |
4735 | } else { | |
4736 | spec->widthArg = (SInt32)number; | |
4737 | } | |
4738 | goto reswtch; | |
4739 | } | |
4740 | case '*': | |
4741 | spec->widthArgNum = -2; | |
4742 | break; | |
4743 | case '.': | |
4744 | seenDot = true; | |
4745 | if (cformat) ch = (UniChar)cformat[(*fmtIdx)++]; else ch = uformat[(*fmtIdx)++]; | |
4746 | if ('*' == ch) { | |
4747 | spec->precArgNum = -2; | |
4748 | break; | |
4749 | } | |
4750 | goto reswtch; | |
4751 | default: | |
4752 | spec->type = CFFormatLiteralType; | |
4753 | return; | |
4754 | } | |
4755 | } | |
4756 | } | |
4757 | ||
d8925383 A |
4758 | #if defined(__WIN32__) |
4759 | static int snprintf(char *b, size_t n, const char * f, ...) { | |
9ce05555 A |
4760 | int retval; |
4761 | va_list args; | |
4762 | va_start (args, f); | |
4763 | retval = _vsnprintf(b, n, f, args); | |
4764 | va_end(args); | |
4765 | return retval; | |
4766 | } | |
4767 | #endif | |
4768 | ||
4769 | /* ??? It ignores the formatOptions argument. | |
4770 | ??? %s depends on handling of encodings by __CFStringAppendBytes | |
4771 | */ | |
4772 | void CFStringAppendFormatAndArguments(CFMutableStringRef outputString, CFDictionaryRef formatOptions, CFStringRef formatString, va_list args) { | |
4773 | _CFStringAppendFormatAndArgumentsAux(outputString, NULL, formatOptions, formatString, args); | |
4774 | } | |
4775 | ||
4776 | #define SNPRINTF(TYPE, WHAT) { \ | |
4777 | TYPE value = (TYPE) WHAT; \ | |
4778 | if (-1 != specs[curSpec].widthArgNum) { \ | |
4779 | if (-1 != specs[curSpec].precArgNum) { \ | |
d8925383 | 4780 | snprintf_l(buffer, 255, NULL, formatBuffer, width, precision, value); \ |
9ce05555 | 4781 | } else { \ |
d8925383 | 4782 | snprintf_l(buffer, 255, NULL, formatBuffer, width, value); \ |
9ce05555 A |
4783 | } \ |
4784 | } else { \ | |
4785 | if (-1 != specs[curSpec].precArgNum) { \ | |
d8925383 | 4786 | snprintf_l(buffer, 255, NULL, formatBuffer, precision, value); \ |
9ce05555 | 4787 | } else { \ |
d8925383 | 4788 | snprintf_l(buffer, 255, NULL, formatBuffer, value); \ |
9ce05555 A |
4789 | } \ |
4790 | }} | |
4791 | ||
4792 | void _CFStringAppendFormatAndArgumentsAux(CFMutableStringRef outputString, CFStringRef (*copyDescFunc)(void *, CFDictionaryRef), CFDictionaryRef formatOptions, CFStringRef formatString, va_list args) { | |
4793 | SInt32 numSpecs, sizeSpecs, sizeArgNum, formatIdx, curSpec, argNum; | |
4794 | CFIndex formatLen; | |
4795 | #define FORMAT_BUFFER_LEN 400 | |
4796 | const uint8_t *cformat = NULL; | |
4797 | const UniChar *uformat = NULL; | |
4798 | UniChar *formatChars = NULL; | |
4799 | UniChar localFormatBuffer[FORMAT_BUFFER_LEN]; | |
4800 | ||
4801 | #define VPRINTF_BUFFER_LEN 61 | |
4802 | CFFormatSpec localSpecsBuffer[VPRINTF_BUFFER_LEN]; | |
4803 | CFFormatSpec *specs; | |
4804 | CFPrintValue localValuesBuffer[VPRINTF_BUFFER_LEN]; | |
4805 | CFPrintValue *values; | |
4806 | CFAllocatorRef tmpAlloc = NULL; | |
4807 | ||
4808 | numSpecs = 0; | |
4809 | sizeSpecs = 0; | |
4810 | sizeArgNum = 0; | |
4811 | specs = NULL; | |
4812 | values = NULL; | |
4813 | ||
4814 | formatLen = CFStringGetLength(formatString); | |
4815 | if (!CF_IS_OBJC(__kCFStringTypeID, formatString)) { | |
4816 | __CFAssertIsString(formatString); | |
4817 | if (!__CFStrIsUnicode(formatString)) { | |
4818 | cformat = __CFStrContents(formatString); | |
4819 | if (cformat) cformat += __CFStrSkipAnyLengthByte(formatString); | |
4820 | } else { | |
4821 | uformat = __CFStrContents(formatString); | |
4822 | } | |
4823 | } | |
4824 | if (!cformat && !uformat) { | |
4825 | formatChars = (formatLen > FORMAT_BUFFER_LEN) ? CFAllocatorAllocate(tmpAlloc = __CFGetDefaultAllocator(), formatLen * sizeof(UniChar), 0) : localFormatBuffer; | |
4826 | if (formatChars != localFormatBuffer && __CFOASafe) __CFSetLastAllocationEventName(formatChars, "CFString (temp)"); | |
4827 | CFStringGetCharacters(formatString, CFRangeMake(0, formatLen), formatChars); | |
4828 | uformat = formatChars; | |
4829 | } | |
4830 | ||
4831 | /* Compute an upper bound for the number of format specifications */ | |
4832 | if (cformat) { | |
4833 | for (formatIdx = 0; formatIdx < formatLen; formatIdx++) if ('%' == cformat[formatIdx]) sizeSpecs++; | |
4834 | } else { | |
4835 | for (formatIdx = 0; formatIdx < formatLen; formatIdx++) if ('%' == uformat[formatIdx]) sizeSpecs++; | |
4836 | } | |
4837 | tmpAlloc = __CFGetDefaultAllocator(); | |
4838 | specs = ((2 * sizeSpecs + 1) > VPRINTF_BUFFER_LEN) ? CFAllocatorAllocate(tmpAlloc, (2 * sizeSpecs + 1) * sizeof(CFFormatSpec), 0) : localSpecsBuffer; | |
4839 | if (specs != localSpecsBuffer && __CFOASafe) __CFSetLastAllocationEventName(specs, "CFString (temp)"); | |
4840 | ||
4841 | /* Collect format specification information from the format string */ | |
4842 | for (curSpec = 0, formatIdx = 0; formatIdx < formatLen; curSpec++) { | |
4843 | SInt32 newFmtIdx; | |
4844 | specs[curSpec].loc = formatIdx; | |
4845 | specs[curSpec].len = 0; | |
4846 | specs[curSpec].size = 0; | |
4847 | specs[curSpec].type = 0; | |
4848 | specs[curSpec].flags = 0; | |
4849 | specs[curSpec].widthArg = -1; | |
4850 | specs[curSpec].precArg = -1; | |
4851 | specs[curSpec].mainArgNum = -1; | |
4852 | specs[curSpec].precArgNum = -1; | |
4853 | specs[curSpec].widthArgNum = -1; | |
4854 | if (cformat) { | |
4855 | for (newFmtIdx = formatIdx; newFmtIdx < formatLen && '%' != cformat[newFmtIdx]; newFmtIdx++); | |
4856 | } else { | |
4857 | for (newFmtIdx = formatIdx; newFmtIdx < formatLen && '%' != uformat[newFmtIdx]; newFmtIdx++); | |
4858 | } | |
4859 | if (newFmtIdx != formatIdx) { /* Literal chunk */ | |
4860 | specs[curSpec].type = CFFormatLiteralType; | |
4861 | specs[curSpec].len = newFmtIdx - formatIdx; | |
4862 | } else { | |
4863 | newFmtIdx++; /* Skip % */ | |
4864 | __CFParseFormatSpec(uformat, cformat, &newFmtIdx, formatLen, &(specs[curSpec])); | |
4865 | if (CFFormatLiteralType == specs[curSpec].type) { | |
4866 | specs[curSpec].loc = formatIdx + 1; | |
4867 | specs[curSpec].len = 1; | |
4868 | } else { | |
4869 | specs[curSpec].len = newFmtIdx - formatIdx; | |
4870 | } | |
4871 | } | |
4872 | formatIdx = newFmtIdx; | |
4873 | ||
d8925383 | 4874 | // fprintf(stderr, "specs[%d] = {\n size = %d,\n type = %d,\n loc = %d,\n len = %d,\n mainArgNum = %d,\n precArgNum = %d,\n widthArgNum = %d\n}\n", curSpec, specs[curSpec].size, specs[curSpec].type, specs[curSpec].loc, specs[curSpec].len, specs[curSpec].mainArgNum, specs[curSpec].precArgNum, specs[curSpec].widthArgNum); |
9ce05555 A |
4875 | |
4876 | } | |
4877 | numSpecs = curSpec; | |
4878 | // Max of three args per spec, reasoning thus: 1 width, 1 prec, 1 value | |
4879 | values = ((3 * sizeSpecs + 1) > VPRINTF_BUFFER_LEN) ? CFAllocatorAllocate(tmpAlloc, (3 * sizeSpecs + 1) * sizeof(CFPrintValue), 0) : localValuesBuffer; | |
4880 | if (values != localValuesBuffer && __CFOASafe) __CFSetLastAllocationEventName(values, "CFString (temp)"); | |
4881 | memset(values, 0, (3 * sizeSpecs + 1) * sizeof(CFPrintValue)); | |
4882 | sizeArgNum = (3 * sizeSpecs + 1); | |
4883 | ||
4884 | /* Compute values array */ | |
4885 | argNum = 0; | |
4886 | for (curSpec = 0; curSpec < numSpecs; curSpec++) { | |
4887 | SInt32 newMaxArgNum; | |
4888 | if (0 == specs[curSpec].type) continue; | |
4889 | if (CFFormatLiteralType == specs[curSpec].type) continue; | |
4890 | newMaxArgNum = sizeArgNum; | |
4891 | if (newMaxArgNum < specs[curSpec].mainArgNum) { | |
4892 | newMaxArgNum = specs[curSpec].mainArgNum; | |
4893 | } | |
4894 | if (newMaxArgNum < specs[curSpec].precArgNum) { | |
4895 | newMaxArgNum = specs[curSpec].precArgNum; | |
4896 | } | |
4897 | if (newMaxArgNum < specs[curSpec].widthArgNum) { | |
4898 | newMaxArgNum = specs[curSpec].widthArgNum; | |
4899 | } | |
4900 | if (sizeArgNum < newMaxArgNum) { | |
4901 | if (specs != localSpecsBuffer) CFAllocatorDeallocate(tmpAlloc, specs); | |
4902 | if (values != localValuesBuffer) CFAllocatorDeallocate(tmpAlloc, values); | |
4903 | if (formatChars && (formatChars != localFormatBuffer)) CFAllocatorDeallocate(tmpAlloc, formatChars); | |
4904 | return; // more args than we expected! | |
4905 | } | |
4906 | /* It is actually incorrect to reorder some specs and not all; we just do some random garbage here */ | |
4907 | if (-2 == specs[curSpec].widthArgNum) { | |
4908 | specs[curSpec].widthArgNum = argNum++; | |
4909 | } | |
4910 | if (-2 == specs[curSpec].precArgNum) { | |
4911 | specs[curSpec].precArgNum = argNum++; | |
4912 | } | |
4913 | if (-1 == specs[curSpec].mainArgNum) { | |
4914 | specs[curSpec].mainArgNum = argNum++; | |
4915 | } | |
4916 | values[specs[curSpec].mainArgNum].size = specs[curSpec].size; | |
4917 | values[specs[curSpec].mainArgNum].type = specs[curSpec].type; | |
4918 | if (-1 != specs[curSpec].widthArgNum) { | |
4919 | values[specs[curSpec].widthArgNum].size = 0; | |
4920 | values[specs[curSpec].widthArgNum].type = CFFormatLongType; | |
4921 | } | |
4922 | if (-1 != specs[curSpec].precArgNum) { | |
4923 | values[specs[curSpec].precArgNum].size = 0; | |
4924 | values[specs[curSpec].precArgNum].type = CFFormatLongType; | |
4925 | } | |
4926 | } | |
4927 | ||
4928 | /* Collect the arguments in correct type from vararg list */ | |
4929 | for (argNum = 0; argNum < sizeArgNum; argNum++) { | |
4930 | switch (values[argNum].type) { | |
4931 | case 0: | |
4932 | case CFFormatLiteralType: | |
4933 | break; | |
4934 | case CFFormatLongType: | |
4935 | case CFFormatSingleUnicharType: | |
4936 | if (CFFormatSize1 == values[argNum].size) { | |
d8925383 | 4937 | values[argNum].value.int64Value = (int64_t)(int8_t)va_arg(args, int); |
9ce05555 | 4938 | } else if (CFFormatSize2 == values[argNum].size) { |
d8925383 | 4939 | values[argNum].value.int64Value = (int64_t)(int16_t)va_arg(args, int); |
9ce05555 | 4940 | } else if (CFFormatSize4 == values[argNum].size) { |
d8925383 | 4941 | values[argNum].value.int64Value = (int64_t)va_arg(args, int32_t); |
9ce05555 | 4942 | } else if (CFFormatSize8 == values[argNum].size) { |
d8925383 | 4943 | values[argNum].value.int64Value = (int64_t)va_arg(args, int64_t); |
9ce05555 | 4944 | } else { |
d8925383 | 4945 | values[argNum].value.int64Value = (int64_t)va_arg(args, int); |
9ce05555 A |
4946 | } |
4947 | break; | |
4948 | case CFFormatDoubleType: | |
4949 | values[argNum].value.doubleValue = va_arg(args, double); | |
4950 | break; | |
4951 | case CFFormatPointerType: | |
4952 | case CFFormatObjectType: | |
4953 | case CFFormatCFType: | |
4954 | case CFFormatUnicharsType: | |
4955 | case CFFormatCharsType: | |
4956 | case CFFormatPascalCharsType: | |
4957 | values[argNum].value.pointerValue = va_arg(args, void *); | |
4958 | break; | |
4959 | } | |
4960 | } | |
4961 | va_end(args); | |
4962 | ||
4963 | /* Format the pieces together */ | |
4964 | for (curSpec = 0; curSpec < numSpecs; curSpec++) { | |
4965 | SInt32 width = 0, precision = 0; | |
4966 | UniChar *up, ch; | |
4967 | Boolean hasWidth = false, hasPrecision = false; | |
4968 | ||
4969 | // widthArgNum and widthArg are never set at the same time; same for precArg* | |
4970 | if (-1 != specs[curSpec].widthArgNum) { | |
d8925383 | 4971 | width = (SInt32)values[specs[curSpec].widthArgNum].value.int64Value; |
9ce05555 A |
4972 | hasWidth = true; |
4973 | } | |
4974 | if (-1 != specs[curSpec].precArgNum) { | |
d8925383 | 4975 | precision = (SInt32)values[specs[curSpec].precArgNum].value.int64Value; |
9ce05555 A |
4976 | hasPrecision = true; |
4977 | } | |
4978 | if (-1 != specs[curSpec].widthArg) { | |
4979 | width = specs[curSpec].widthArg; | |
4980 | hasWidth = true; | |
4981 | } | |
4982 | if (-1 != specs[curSpec].precArg) { | |
4983 | precision = specs[curSpec].precArg; | |
4984 | hasPrecision = true; | |
4985 | } | |
4986 | ||
4987 | switch (specs[curSpec].type) { | |
4988 | case CFFormatLongType: | |
4989 | case CFFormatDoubleType: | |
4990 | case CFFormatPointerType: { | |
4991 | int8_t formatBuffer[128]; | |
d8925383 | 4992 | #if defined(__GNUC__) |
9ce05555 | 4993 | int8_t buffer[256 + width + precision]; |
d8925383 A |
4994 | #else |
4995 | int8_t stackBuffer[512]; | |
4996 | int8_t *dynamicBuffer = NULL; | |
4997 | int8_t *buffer = stackBuffer; | |
4998 | if (256+width+precision > 512) { | |
4999 | dynamicBuffer = CFAllocatorAllocate(NULL, 256+width+precision, 0); | |
5000 | buffer = dynamicBuffer; | |
5001 | } | |
5002 | #endif | |
9ce05555 A |
5003 | SInt32 cidx, idx, loc; |
5004 | Boolean appended = false; | |
5005 | loc = specs[curSpec].loc; | |
5006 | // In preparation to call snprintf(), copy the format string out | |
5007 | if (cformat) { | |
5008 | for (idx = 0, cidx = 0; cidx < specs[curSpec].len; idx++, cidx++) { | |
5009 | if ('$' == cformat[loc + cidx]) { | |
5010 | for (idx--; '0' <= formatBuffer[idx] && formatBuffer[idx] <= '9'; idx--); | |
5011 | } else { | |
5012 | formatBuffer[idx] = cformat[loc + cidx]; | |
5013 | } | |
5014 | } | |
5015 | } else { | |
5016 | for (idx = 0, cidx = 0; cidx < specs[curSpec].len; idx++, cidx++) { | |
5017 | if ('$' == uformat[loc + cidx]) { | |
5018 | for (idx--; '0' <= formatBuffer[idx] && formatBuffer[idx] <= '9'; idx--); | |
5019 | } else { | |
5020 | formatBuffer[idx] = (int8_t)uformat[loc + cidx]; | |
5021 | } | |
5022 | } | |
5023 | } | |
5024 | formatBuffer[idx] = '\0'; | |
5025 | // Should modify format buffer here if necessary; for example, to translate %qd to | |
5026 | // the equivalent, on architectures which do not have %q. | |
5027 | buffer[sizeof(buffer) - 1] = '\0'; | |
5028 | switch (specs[curSpec].type) { | |
5029 | case CFFormatLongType: | |
5030 | if (CFFormatSize8 == specs[curSpec].size) { | |
d8925383 | 5031 | SNPRINTF(int64_t, values[specs[curSpec].mainArgNum].value.int64Value) |
9ce05555 | 5032 | } else { |
d8925383 | 5033 | SNPRINTF(SInt32, values[specs[curSpec].mainArgNum].value.int64Value) |
9ce05555 A |
5034 | } |
5035 | break; | |
5036 | case CFFormatPointerType: | |
5037 | SNPRINTF(void *, values[specs[curSpec].mainArgNum].value.pointerValue) | |
5038 | break; | |
5039 | ||
5040 | case CFFormatDoubleType: | |
5041 | SNPRINTF(double, values[specs[curSpec].mainArgNum].value.doubleValue) | |
5042 | // See if we need to localize the decimal point | |
5043 | if (formatOptions) { // We have a localization dictionary | |
5044 | CFStringRef decimalSeparator = CFDictionaryGetValue(formatOptions, kCFNSDecimalSeparatorKey); | |
5045 | if (decimalSeparator != NULL) { // We have a decimal separator in there | |
5046 | CFIndex decimalPointLoc = 0; | |
5047 | while (buffer[decimalPointLoc] != 0 && buffer[decimalPointLoc] != '.') decimalPointLoc++; | |
5048 | if (buffer[decimalPointLoc] == '.') { // And we have a decimal point in the formatted string | |
5049 | buffer[decimalPointLoc] = 0; | |
5050 | CFStringAppendCString(outputString, buffer, __CFStringGetEightBitStringEncoding()); | |
5051 | CFStringAppend(outputString, decimalSeparator); | |
5052 | CFStringAppendCString(outputString, buffer + decimalPointLoc + 1, __CFStringGetEightBitStringEncoding()); | |
5053 | appended = true; | |
5054 | } | |
5055 | } | |
5056 | } | |
5057 | break; | |
5058 | } | |
5059 | if (!appended) CFStringAppendCString(outputString, buffer, __CFStringGetEightBitStringEncoding()); | |
5060 | } | |
d8925383 A |
5061 | #if !defined(__GNUC__) |
5062 | if (dynamicBuffer) { | |
5063 | CFAllocatorDeallocate(NULL, dynamicBuffer); | |
5064 | } | |
5065 | #endif | |
5066 | break; | |
9ce05555 A |
5067 | case CFFormatLiteralType: |
5068 | if (cformat) { | |
5069 | __CFStringAppendBytes(outputString, cformat+specs[curSpec].loc, specs[curSpec].len, __CFStringGetEightBitStringEncoding()); | |
5070 | } else { | |
5071 | CFStringAppendCharacters(outputString, uformat+specs[curSpec].loc, specs[curSpec].len); | |
5072 | } | |
5073 | break; | |
5074 | case CFFormatPascalCharsType: | |
5075 | case CFFormatCharsType: | |
5076 | if (values[specs[curSpec].mainArgNum].value.pointerValue == NULL) { | |
5077 | CFStringAppendCString(outputString, "(null)", kCFStringEncodingASCII); | |
5078 | } else { | |
5079 | int len; | |
5080 | const char *str = values[specs[curSpec].mainArgNum].value.pointerValue; | |
5081 | if (specs[curSpec].type == CFFormatPascalCharsType) { // Pascal string case | |
5082 | len = ((unsigned char *)str)[0]; | |
5083 | str++; | |
5084 | if (hasPrecision && precision < len) len = precision; | |
5085 | } else { // C-string case | |
5086 | if (!hasPrecision) { // No precision, so rely on the terminating null character | |
5087 | len = strlen(str); | |
5088 | } else { // Don't blindly call strlen() if there is a precision; the string might not have a terminating null (3131988) | |
5089 | const char *terminatingNull = memchr(str, 0, precision); // Basically strlen() on only the first precision characters of str | |
5090 | if (terminatingNull) { // There was a null in the first precision characters | |
5091 | len = terminatingNull - str; | |
5092 | } else { | |
5093 | len = precision; | |
5094 | } | |
5095 | } | |
5096 | } | |
5097 | // Since the spec says the behavior of the ' ', '0', '#', and '+' flags is undefined for | |
5098 | // '%s', and since we have ignored them in the past, the behavior is hereby cast in stone | |
5099 | // to ignore those flags (and, say, never pad with '0' instead of space). | |
5100 | if (specs[curSpec].flags & kCFStringFormatMinusFlag) { | |
5101 | __CFStringAppendBytes(outputString, str, len, __CFStringGetSystemEncoding()); | |
5102 | if (hasWidth && width > len) { | |
5103 | int w = width - len; // We need this many spaces; do it ten at a time | |
5104 | do {__CFStringAppendBytes(outputString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0); | |
5105 | } | |
5106 | } else { | |
5107 | if (hasWidth && width > len) { | |
5108 | int w = width - len; // We need this many spaces; do it ten at a time | |
5109 | do {__CFStringAppendBytes(outputString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0); | |
5110 | } | |
5111 | __CFStringAppendBytes(outputString, str, len, __CFStringGetSystemEncoding()); | |
5112 | } | |
5113 | } | |
5114 | break; | |
5115 | case CFFormatSingleUnicharType: | |
d8925383 | 5116 | ch = values[specs[curSpec].mainArgNum].value.int64Value; |
9ce05555 A |
5117 | CFStringAppendCharacters(outputString, &ch, 1); |
5118 | break; | |
5119 | case CFFormatUnicharsType: | |
5120 | //??? need to handle width, precision, and padding arguments | |
5121 | up = values[specs[curSpec].mainArgNum].value.pointerValue; | |
5122 | if (NULL == up) { | |
5123 | CFStringAppendCString(outputString, "(null)", kCFStringEncodingASCII); | |
5124 | } else { | |
5125 | int len; | |
5126 | for (len = 0; 0 != up[len]; len++); | |
5127 | // Since the spec says the behavior of the ' ', '0', '#', and '+' flags is undefined for | |
5128 | // '%s', and since we have ignored them in the past, the behavior is hereby cast in stone | |
5129 | // to ignore those flags (and, say, never pad with '0' instead of space). | |
5130 | if (hasPrecision && precision < len) len = precision; | |
5131 | if (specs[curSpec].flags & kCFStringFormatMinusFlag) { | |
5132 | CFStringAppendCharacters(outputString, up, len); | |
5133 | if (hasWidth && width > len) { | |
5134 | int w = width - len; // We need this many spaces; do it ten at a time | |
5135 | do {__CFStringAppendBytes(outputString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0); | |
5136 | } | |
5137 | } else { | |
5138 | if (hasWidth && width > len) { | |
5139 | int w = width - len; // We need this many spaces; do it ten at a time | |
5140 | do {__CFStringAppendBytes(outputString, " ", (w > 10 ? 10 : w), kCFStringEncodingASCII);} while ((w -= 10) > 0); | |
5141 | } | |
5142 | CFStringAppendCharacters(outputString, up, len); | |
5143 | } | |
5144 | } | |
5145 | break; | |
5146 | case CFFormatCFType: | |
5147 | case CFFormatObjectType: | |
5148 | if (NULL != values[specs[curSpec].mainArgNum].value.pointerValue) { | |
5149 | CFStringRef str = NULL; | |
5150 | if (copyDescFunc) { | |
5151 | str = copyDescFunc(values[specs[curSpec].mainArgNum].value.pointerValue, formatOptions); | |
5152 | } else { | |
5153 | str = __CFCopyFormattingDescription(values[specs[curSpec].mainArgNum].value.pointerValue, formatOptions); | |
5154 | if (NULL == str) { | |
5155 | str = CFCopyDescription(values[specs[curSpec].mainArgNum].value.pointerValue); | |
5156 | } | |
5157 | } | |
5158 | if (str) { | |
5159 | CFStringAppend(outputString, str); | |
5160 | CFRelease(str); | |
5161 | } else { | |
5162 | CFStringAppendCString(outputString, "(null description)", kCFStringEncodingASCII); | |
5163 | } | |
5164 | } else { | |
5165 | CFStringAppendCString(outputString, "(null)", kCFStringEncodingASCII); | |
5166 | } | |
5167 | break; | |
5168 | } | |
5169 | } | |
5170 | ||
5171 | if (specs != localSpecsBuffer) CFAllocatorDeallocate(tmpAlloc, specs); | |
5172 | if (values != localValuesBuffer) CFAllocatorDeallocate(tmpAlloc, values); | |
5173 | if (formatChars && (formatChars != localFormatBuffer)) CFAllocatorDeallocate(tmpAlloc, formatChars); | |
5174 | ||
5175 | } | |
5176 | ||
5177 | #undef SNPRINTF | |
5178 | ||
5179 | void CFShowStr(CFStringRef str) { | |
5180 | CFAllocatorRef alloc; | |
5181 | ||
5182 | if (!str) { | |
d8925383 | 5183 | fprintf(stdout, "(null)\n"); |
9ce05555 A |
5184 | return; |
5185 | } | |
5186 | ||
5187 | if (CF_IS_OBJC(__kCFStringTypeID, str)) { | |
d8925383 | 5188 | fprintf(stdout, "This is an NSString, not CFString\n"); |
9ce05555 A |
5189 | return; |
5190 | } | |
5191 | ||
5192 | alloc = CFGetAllocator(str); | |
5193 | ||
d8925383 A |
5194 | fprintf(stdout, "\nLength %d\nIsEightBit %d\n", (int)__CFStrLength(str), __CFStrIsEightBit(str)); |
5195 | fprintf(stdout, "HasLengthByte %d\nHasNullByte %d\nInlineContents %d\n", | |
9ce05555 A |
5196 | __CFStrHasLengthByte(str), __CFStrHasNullByte(str), __CFStrIsInline(str)); |
5197 | ||
d8925383 | 5198 | fprintf(stdout, "Allocator "); |
9ce05555 | 5199 | if (alloc != kCFAllocatorSystemDefault) { |
d8925383 | 5200 | fprintf(stdout, "%p\n", (void *)alloc); |
9ce05555 | 5201 | } else { |
d8925383 | 5202 | fprintf(stdout, "SystemDefault\n"); |
9ce05555 | 5203 | } |
d8925383 | 5204 | fprintf(stdout, "Mutable %d\n", __CFStrIsMutable(str)); |
9ce05555 | 5205 | if (!__CFStrIsMutable(str) && __CFStrHasContentsDeallocator(str)) { |
d8925383 A |
5206 | if (__CFStrContentsDeallocator(str)) fprintf(stdout, "ContentsDeallocatorFunc %p\n", (void *)__CFStrContentsDeallocator(str)); |
5207 | else fprintf(stdout, "ContentsDeallocatorFunc None\n"); | |
9ce05555 | 5208 | } else if (__CFStrIsMutable(str) && __CFStrHasContentsAllocator(str)) { |
d8925383 | 5209 | fprintf(stdout, "ExternalContentsAllocator %p\n", (void *)__CFStrContentsAllocator((CFMutableStringRef)str)); |
9ce05555 A |
5210 | } |
5211 | ||
5212 | if (__CFStrIsMutable(str)) { | |
d8925383 | 5213 | fprintf(stdout, "CurrentCapacity %d\n%sCapacity %d\n", (int)__CFStrCapacity(str), __CFStrIsFixed(str) ? "Fixed" : "Desired", (int)__CFStrDesiredCapacity(str)); |
9ce05555 | 5214 | } |
d8925383 | 5215 | fprintf(stdout, "Contents %p\n", (void *)__CFStrContents(str)); |
9ce05555 A |
5216 | } |
5217 | ||
5218 | ||
d8925383 | 5219 |