]> git.saurik.com Git - apple/cf.git/blob - CFStringUtilities.c
CF-744.tar.gz
[apple/cf.git] / CFStringUtilities.c
1 /*
2 * Copyright (c) 2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /* CFStringUtilities.c
25 Copyright (c) 1999-2012, Apple Inc. All rights reserved.
26 Responsibility: Aki Inoue
27 */
28
29 #include "CFInternal.h"
30 #include <CoreFoundation/CFStringEncodingConverterExt.h>
31 #include <CoreFoundation/CFUniChar.h>
32 #include <CoreFoundation/CFStringEncodingExt.h>
33 #include "CFStringEncodingDatabase.h"
34 #include "CFICUConverters.h"
35 #include <limits.h>
36 #include <stdlib.h>
37 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
38 #include <unicode/ucol.h>
39 #include <unicode/ucoleitr.h>
40 #endif
41 #include <string.h>
42
43 #if DEPLOYMENT_TARGET_WINDOWS
44 #include <tchar.h>
45 #endif
46
47
48 Boolean CFStringIsEncodingAvailable(CFStringEncoding theEncoding) {
49 switch (theEncoding) {
50 case kCFStringEncodingASCII: // Built-in encodings
51 case kCFStringEncodingMacRoman:
52 case kCFStringEncodingUTF8:
53 case kCFStringEncodingNonLossyASCII:
54 case kCFStringEncodingWindowsLatin1:
55 case kCFStringEncodingNextStepLatin:
56 case kCFStringEncodingUTF16:
57 case kCFStringEncodingUTF16BE:
58 case kCFStringEncodingUTF16LE:
59 case kCFStringEncodingUTF32:
60 case kCFStringEncodingUTF32BE:
61 case kCFStringEncodingUTF32LE:
62 return true;
63
64 default:
65 return CFStringEncodingIsValidEncoding(theEncoding);
66 }
67 }
68
69 const CFStringEncoding* CFStringGetListOfAvailableEncodings() {
70 return (const CFStringEncoding *)CFStringEncodingListOfAvailableEncodings();
71 }
72
73 CFStringRef CFStringGetNameOfEncoding(CFStringEncoding theEncoding) {
74 static CFMutableDictionaryRef mappingTable = NULL;
75 CFStringRef theName = mappingTable ? (CFStringRef)CFDictionaryGetValue(mappingTable, (const void*)(uintptr_t)theEncoding) : NULL;
76
77 if (!theName) {
78 const char *encodingName = __CFStringEncodingGetName(theEncoding);
79
80 if (encodingName) {
81 theName = CFStringCreateWithCString(kCFAllocatorSystemDefault, encodingName, kCFStringEncodingASCII);
82 }
83
84 if (theName) {
85 if (!mappingTable) mappingTable = CFDictionaryCreateMutable(kCFAllocatorSystemDefault, 0, (const CFDictionaryKeyCallBacks *)NULL, &kCFTypeDictionaryValueCallBacks);
86
87 CFDictionaryAddValue(mappingTable, (const void*)(uintptr_t)theEncoding, (const void*)theName);
88 CFRelease(theName);
89 }
90 }
91
92 return theName;
93 }
94
95 CFStringEncoding CFStringConvertIANACharSetNameToEncoding(CFStringRef charsetName) {
96 CFStringEncoding encoding = kCFStringEncodingInvalidId;
97 #define BUFFER_SIZE (100)
98 char buffer[BUFFER_SIZE];
99 const char *name = CFStringGetCStringPtr(charsetName, __CFStringGetEightBitStringEncoding());
100
101 if (NULL == name) {
102 if (false == CFStringGetCString(charsetName, buffer, BUFFER_SIZE, __CFStringGetEightBitStringEncoding())) return kCFStringEncodingInvalidId;
103
104 name = buffer;
105 }
106
107 encoding = __CFStringEncodingGetFromCanonicalName(name);
108
109 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
110 if (kCFStringEncodingInvalidId == encoding) encoding = __CFStringEncodingGetFromICUName(name);
111 #endif
112
113
114 return encoding;
115 }
116
117 CFStringRef CFStringConvertEncodingToIANACharSetName(CFStringEncoding encoding) {
118 CFStringRef name = NULL;
119 CFIndex value = encoding;
120 static CFMutableDictionaryRef mappingTable = NULL;
121 static CFSpinLock_t lock = CFSpinLockInit;
122
123 __CFSpinLock(&lock);
124 name = ((NULL == mappingTable) ? NULL : (CFStringRef)CFDictionaryGetValue(mappingTable, (const void*)value));
125
126 if (NULL == name) {
127 #define STACK_BUFFER_SIZE (100)
128 char buffer[STACK_BUFFER_SIZE];
129
130 if (__CFStringEncodingGetCanonicalName(encoding, buffer, STACK_BUFFER_SIZE)) name = CFStringCreateWithCString(NULL, buffer, kCFStringEncodingASCII);
131
132
133 if (NULL != name) {
134 CFIndex value = encoding;
135
136 if (NULL == mappingTable) mappingTable = CFDictionaryCreateMutable(NULL, 0, NULL, &kCFTypeDictionaryValueCallBacks);
137
138 CFDictionaryAddValue(mappingTable, (const void*)value, (const void*)name);
139 CFRelease(name);
140 }
141 }
142 __CFSpinUnlock(&lock);
143
144 return name;
145 }
146
147 enum {
148 NSASCIIStringEncoding = 1, /* 0..127 only */
149 NSNEXTSTEPStringEncoding = 2,
150 NSJapaneseEUCStringEncoding = 3,
151 NSUTF8StringEncoding = 4,
152 NSISOLatin1StringEncoding = 5,
153 NSSymbolStringEncoding = 6,
154 NSNonLossyASCIIStringEncoding = 7,
155 NSShiftJISStringEncoding = 8,
156 NSISOLatin2StringEncoding = 9,
157 NSUnicodeStringEncoding = 10,
158 NSWindowsCP1251StringEncoding = 11, /* Cyrillic; same as AdobeStandardCyrillic */
159 NSWindowsCP1252StringEncoding = 12, /* WinLatin1 */
160 NSWindowsCP1253StringEncoding = 13, /* Greek */
161 NSWindowsCP1254StringEncoding = 14, /* Turkish */
162 NSWindowsCP1250StringEncoding = 15, /* WinLatin2 */
163 NSISO2022JPStringEncoding = 21, /* ISO 2022 Japanese encoding for e-mail */
164 NSMacOSRomanStringEncoding = 30,
165
166 NSProprietaryStringEncoding = 65536 /* Installation-specific encoding */
167 };
168
169 #define NSENCODING_MASK (1 << 31)
170
171 unsigned long CFStringConvertEncodingToNSStringEncoding(CFStringEncoding theEncoding) {
172 switch (theEncoding & 0xFFF) {
173 case kCFStringEncodingUnicode:
174 if (theEncoding == kCFStringEncodingUTF16) return NSUnicodeStringEncoding;
175 else if (theEncoding == kCFStringEncodingUTF8) return NSUTF8StringEncoding;
176 break;
177
178 case kCFStringEncodingWindowsLatin1: return NSWindowsCP1252StringEncoding;
179 case kCFStringEncodingMacRoman: return NSMacOSRomanStringEncoding;
180
181 case kCFStringEncodingASCII: return NSASCIIStringEncoding;
182
183 case kCFStringEncodingDOSJapanese: return NSShiftJISStringEncoding;
184 case kCFStringEncodingWindowsCyrillic: return NSWindowsCP1251StringEncoding;
185 case kCFStringEncodingWindowsGreek: return NSWindowsCP1253StringEncoding;
186 case kCFStringEncodingWindowsLatin5: return NSWindowsCP1254StringEncoding;
187 case kCFStringEncodingWindowsLatin2: return NSWindowsCP1250StringEncoding;
188 case kCFStringEncodingISOLatin1: return NSISOLatin1StringEncoding;
189
190 case kCFStringEncodingNonLossyASCII: return NSNonLossyASCIIStringEncoding;
191 case kCFStringEncodingEUC_JP: return NSJapaneseEUCStringEncoding;
192 case kCFStringEncodingMacSymbol: return NSSymbolStringEncoding;
193 case kCFStringEncodingISOLatin2: return NSISOLatin2StringEncoding;
194 case kCFStringEncodingISO_2022_JP: return NSISO2022JPStringEncoding;
195 case kCFStringEncodingNextStepLatin: return NSNEXTSTEPStringEncoding;
196 }
197
198 return NSENCODING_MASK | theEncoding;
199 }
200
201 CFStringEncoding CFStringConvertNSStringEncodingToEncoding(unsigned long theEncoding) {
202 const uint16_t encodings[] = {
203 kCFStringEncodingASCII,
204 kCFStringEncodingNextStepLatin,
205 kCFStringEncodingEUC_JP,
206 0,
207 kCFStringEncodingISOLatin1,
208 kCFStringEncodingMacSymbol,
209 kCFStringEncodingNonLossyASCII,
210 kCFStringEncodingDOSJapanese,
211 kCFStringEncodingISOLatin2,
212 kCFStringEncodingUTF16,
213 kCFStringEncodingWindowsCyrillic,
214 kCFStringEncodingWindowsLatin1,
215 kCFStringEncodingWindowsGreek,
216 kCFStringEncodingWindowsLatin5,
217 kCFStringEncodingWindowsLatin2
218 };
219
220 if (NSUTF8StringEncoding == theEncoding) return kCFStringEncodingUTF8;
221
222 if ((theEncoding > 0) && (theEncoding <= NSWindowsCP1250StringEncoding)) return encodings[theEncoding - 1];
223
224 switch (theEncoding) {
225 case NSMacOSRomanStringEncoding: return kCFStringEncodingMacRoman;
226 case NSISO2022JPStringEncoding: return kCFStringEncodingISO_2022_JP;
227
228 default:
229 return ((theEncoding & NSENCODING_MASK) ? theEncoding & ~NSENCODING_MASK : kCFStringEncodingInvalidId);
230 }
231 }
232
233 UInt32 CFStringConvertEncodingToWindowsCodepage(CFStringEncoding theEncoding) {
234 uint16_t codepage = __CFStringEncodingGetWindowsCodePage(theEncoding);
235
236 return ((0 == codepage) ? kCFStringEncodingInvalidId : codepage);
237 }
238
239 CFStringEncoding CFStringConvertWindowsCodepageToEncoding(UInt32 theEncoding) {
240 return __CFStringEncodingGetFromWindowsCodePage(theEncoding);
241 }
242
243 CFStringEncoding CFStringGetMostCompatibleMacStringEncoding(CFStringEncoding encoding) {
244 CFStringEncoding macEncoding = __CFStringEncodingGetMostCompatibleMacScript(encoding);
245
246
247 return macEncoding;
248 }
249
250 #define kCFStringCompareAllocationIncrement (128)
251
252 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
253
254 // -------------------------------------------------------------------------------------------------
255 // CompareSpecials - ignore case & diacritic differences
256 //
257 // Decomposed have 2nd-4th chars of type Mn or Mc, or in range 1160-11FF (jamo)
258 // Fullwidth & halfwidth are in range FF00-FFEF
259 // Parenthesized & circled are in range 3200-32FF
260 // -------------------------------------------------------------------------------------------------
261
262 enum {
263 kUpperCaseWeightMin = 0x80 | 0x0F,
264 kUpperCaseWeightMax = 0x80 | 0x17,
265 kUpperToLowerDelta = 0x80 | 0x0A, // 0x0A = 0x0F - 0x05
266 kMaskPrimarySecondary = 0xFFFFFF00,
267 kMaskPrimaryOnly = 0xFFFF0000,
268 kMaskSecondaryOnly = 0x0000FF00,
269 kMaskCaseTertiary = 0x000000FF // 2 hi bits case, 6 lo bits tertiary
270 };
271
272 static SInt32 __CompareSpecials(const UCollator *collator, CFOptionFlags options, const UniChar *text1Ptr, UniCharCount text1Length, const UniChar *text2Ptr, UniCharCount text2Length) {
273 UErrorCode icuStatus = U_ZERO_ERROR;
274 SInt32 orderWidth = 0;
275 SInt32 orderCompos = 0;
276
277 UCollationElements * collElems1 = ucol_openElements(collator, (const UChar *)text1Ptr, text1Length, &icuStatus);
278 UCollationElements * collElems2 = ucol_openElements(collator, (const UChar *)text2Ptr, text2Length, &icuStatus);
279 if (U_SUCCESS(icuStatus)) {
280 int32_t startOffset1 = 0;
281 int32_t startOffset2 = 0;
282
283 while (true) {
284 int32_t elemOrder1, elemOrder2;
285 int32_t offset1, offset2;
286
287 elemOrder1 = ucol_next(collElems1, &icuStatus);
288 elemOrder2 = ucol_next(collElems2, &icuStatus);
289 if ( U_FAILURE(icuStatus) || elemOrder1 == (int32_t)UCOL_NULLORDER || elemOrder2 == (int32_t)UCOL_NULLORDER ) {
290 break;
291 }
292
293 offset1 = ucol_getOffset(collElems1);
294 offset2 = ucol_getOffset(collElems2);
295 if ( (elemOrder1 & kMaskPrimarySecondary) == (elemOrder2 & kMaskPrimarySecondary) ) {
296 if ( (elemOrder1 & kMaskPrimaryOnly) != 0 ) {
297 // keys may differ in case, width, circling, etc.
298
299 int32_t tertiary1 = (elemOrder1 & kMaskCaseTertiary);
300 int32_t tertiary2 = (elemOrder2 & kMaskCaseTertiary);
301 // fold upper to lower case
302 if (tertiary1 >= kUpperCaseWeightMin && tertiary1 <= kUpperCaseWeightMax) {
303 tertiary1 -= kUpperToLowerDelta;
304 }
305 if (tertiary2 >= kUpperCaseWeightMin && tertiary2 <= kUpperCaseWeightMax) {
306 tertiary2 -= kUpperToLowerDelta;
307 }
308 // now compare
309 if (tertiary1 != tertiary2) {
310 orderWidth = (tertiary1 < tertiary2)? -1: 1;
311 break;
312 }
313
314 } else if ( (elemOrder1 & kMaskSecondaryOnly) != 0 ) {
315 // primary weights are both zero, but secondaries are not.
316 if ( orderCompos == 0 && (options & kCFCompareNonliteral) == 0 ) {
317 // We have a code element which is a diacritic.
318 // It may have come from a composed char or a combining char.
319 // If it came from a combining char (longer element length) it sorts first.
320 // This is only an approximation to what the Mac OS 9 code did, but this is an
321 // unusual case anyway.
322 int32_t elem1Length = offset1 - startOffset1;
323 int32_t elem2Length = offset2 - startOffset2;
324 if (elem1Length != elem2Length) {
325 orderCompos = (elem1Length > elem2Length)? -1: 1;
326 }
327 }
328 }
329 }
330
331 startOffset1 = offset1;
332 startOffset2 = offset2;
333 }
334 ucol_closeElements(collElems1);
335 ucol_closeElements(collElems2);
336 }
337
338 return (orderWidth != 0)? orderWidth: orderCompos;
339 }
340
341 static SInt32 __CompareCodePoints(const UniChar *text1Ptr, UniCharCount text1Length, const UniChar *text2Ptr, UniCharCount text2Length ) {
342 const UniChar * text1P = text1Ptr;
343 const UniChar * text2P = text2Ptr;
344 UInt32 textLimit = (text1Length <= text2Length)? text1Length: text2Length;
345 UInt32 textCounter;
346 SInt32 orderResult = 0;
347
348 // Loop through either string...the first difference differentiates this.
349 for (textCounter = 0; textCounter < textLimit && *text1P == *text2P; textCounter++) {
350 text1P++;
351 text2P++;
352 }
353 if (textCounter < textLimit) {
354 // code point difference
355 orderResult = (*text1P < *text2P) ? -1 : 1;
356 } else if (text1Length != text2Length) {
357 // one string has extra stuff at end
358 orderResult = (text1Length < text2Length) ? -1 : 1;
359 }
360 return orderResult;
361 }
362
363
364 extern const CFStringRef __kCFLocaleCollatorID;
365
366 static UCollator *__CFStringCreateCollator(CFLocaleRef compareLocale) {
367 CFStringRef canonLocaleCFStr = (CFStringRef)CFLocaleGetValue(compareLocale, __kCFLocaleCollatorID);
368 char icuLocaleStr[128] = {0};
369 CFStringGetCString(canonLocaleCFStr, icuLocaleStr, sizeof(icuLocaleStr), kCFStringEncodingASCII);
370 UErrorCode icuStatus = U_ZERO_ERROR;
371 UCollator * collator = ucol_open(icuLocaleStr, &icuStatus);
372 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_OFF, &icuStatus);
373 ucol_setAttribute(collator, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &icuStatus);
374 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &icuStatus);
375 ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_OFF, &icuStatus);
376 ucol_setAttribute(collator, UCOL_NUMERIC_COLLATION, UCOL_OFF, &icuStatus);
377 return collator;
378 }
379
380 #define kCFMaxCachedDefaultCollators (8)
381 static UCollator *__CFDefaultCollators[kCFMaxCachedDefaultCollators];
382 static CFIndex __CFDefaultCollatorsCount = 0;
383 static const void *__CFDefaultCollatorLocale = NULL;
384 static CFSpinLock_t __CFDefaultCollatorLock = CFSpinLockInit;
385
386 static UCollator *__CFStringCopyDefaultCollator(CFLocaleRef compareLocale) {
387 CFLocaleRef currentLocale = NULL;
388 UCollator * collator = NULL;
389
390 if (compareLocale != __CFDefaultCollatorLocale) {
391 currentLocale = CFLocaleCopyCurrent();
392 if (compareLocale != currentLocale) {
393 CFRelease(currentLocale);
394 return NULL;
395 }
396 }
397
398 __CFSpinLock(&__CFDefaultCollatorLock);
399 if ((NULL != currentLocale) && (__CFDefaultCollatorLocale != currentLocale)) {
400 while (__CFDefaultCollatorsCount > 0) ucol_close(__CFDefaultCollators[--__CFDefaultCollatorsCount]);
401 __CFDefaultCollatorLocale = CFRetain(currentLocale);
402 }
403
404 if (__CFDefaultCollatorsCount > 0) collator = __CFDefaultCollators[--__CFDefaultCollatorsCount];
405 __CFSpinUnlock(&__CFDefaultCollatorLock);
406
407 if (NULL == collator) {
408 collator = __CFStringCreateCollator(compareLocale);
409 }
410
411 if (NULL != currentLocale) CFRelease(currentLocale);
412
413 return collator;
414 }
415
416 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
417 static void __collatorFinalize(UCollator *collator) {
418 CFLocaleRef locale = _CFGetTSD(__CFTSDKeyCollatorLocale);
419 _CFSetTSD(__CFTSDKeyCollatorUCollator, NULL, NULL);
420 _CFSetTSD(__CFTSDKeyCollatorLocale, NULL, NULL);
421 __CFSpinLock(&__CFDefaultCollatorLock);
422 if ((__CFDefaultCollatorLocale == locale) && (__CFDefaultCollatorsCount < kCFMaxCachedDefaultCollators)) {
423 __CFDefaultCollators[__CFDefaultCollatorsCount++] = collator;
424 collator = NULL;
425 }
426 __CFSpinUnlock(&__CFDefaultCollatorLock);
427 if (NULL != collator) ucol_close(collator);
428 if (locale) CFRelease(locale);
429 }
430 #endif
431
432 // -------------------------------------------------------------------------------------------------
433 // __CompareTextDefault
434 //
435 // A primary difference is denoted by values 2/-2 in orderP. Other differences are indicated with a -1/1.
436 // A negative value indicates that text1 sorts before text2.
437 // -------------------------------------------------------------------------------------------------
438 static OSStatus __CompareTextDefault(UCollator *collator, CFOptionFlags options, const UniChar *text1Ptr, UniCharCount text1Length, const UniChar *text2Ptr, UniCharCount text2Length, Boolean *equivalentP, SInt32 *orderP) {
439
440 // collator must have default settings restored on exit from this function
441
442 *equivalentP = true;
443 *orderP = 0;
444
445 if (options & kCFCompareNumerically) {
446 UErrorCode icuStatus = U_ZERO_ERROR;
447 ucol_setAttribute(collator, UCOL_NUMERIC_COLLATION, UCOL_ON, &icuStatus);
448 }
449
450 // Most string differences are Primary. Do a primary check first, then if there
451 // are no differences do a comparison with the options in the collator.
452 UCollationResult icuResult = ucol_strcoll(collator, (const UChar *)text1Ptr, text1Length, (const UChar *)text2Ptr, text2Length);
453 if (icuResult != UCOL_EQUAL) {
454 *orderP = (icuResult == UCOL_LESS) ? -2 : 2;
455 }
456 if (*orderP == 0) {
457 UErrorCode icuStatus = U_ZERO_ERROR;
458 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &icuStatus);
459 ucol_setAttribute(collator, UCOL_STRENGTH, (options & kCFCompareDiacriticInsensitive) ? UCOL_PRIMARY : UCOL_SECONDARY, &icuStatus);
460 ucol_setAttribute(collator, UCOL_CASE_LEVEL, (options & kCFCompareCaseInsensitive) ? UCOL_OFF : UCOL_ON, &icuStatus);
461 if (!U_SUCCESS(icuStatus)) {
462 icuStatus = U_ZERO_ERROR;
463 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_OFF, &icuStatus);
464 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &icuStatus);
465 ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_OFF, &icuStatus);
466 ucol_setAttribute(collator, UCOL_NUMERIC_COLLATION, UCOL_OFF, &icuStatus);
467 return 666;
468 }
469
470 // We don't have a primary difference. Recompare with standard collator.
471 icuResult = ucol_strcoll(collator, (const UChar *)text1Ptr, text1Length, (const UChar *)text2Ptr, text2Length);
472 if (icuResult != UCOL_EQUAL) {
473 *orderP = (icuResult == UCOL_LESS) ? -1 : 1;
474 }
475 icuStatus = U_ZERO_ERROR;
476 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_OFF, &icuStatus);
477 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &icuStatus);
478 ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_OFF, &icuStatus);
479 }
480 if (*orderP == 0 && (options & kCFCompareNonliteral) == 0) {
481 *orderP = __CompareSpecials(collator, options, text1Ptr, text1Length, text2Ptr, text2Length);
482 }
483
484 *equivalentP = (*orderP == 0);
485
486 // If strings are equivalent but we care about order and have not yet checked
487 // to the level of code point order, then do some more checks for order
488 if (*orderP == 0) {
489 UErrorCode icuStatus = U_ZERO_ERROR;
490 // First try to see if ICU can find any differences above code point level
491 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &icuStatus);
492 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_TERTIARY, &icuStatus);
493 ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_ON, &icuStatus);
494 if (!U_SUCCESS(icuStatus)) {
495 icuStatus = U_ZERO_ERROR;
496 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_OFF, &icuStatus);
497 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &icuStatus);
498 ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_OFF, &icuStatus);
499 ucol_setAttribute(collator, UCOL_NUMERIC_COLLATION, UCOL_OFF, &icuStatus);
500 return 666;
501 }
502 icuResult = ucol_strcoll(collator, (const UChar *)text1Ptr, text1Length, (const UChar *)text2Ptr, text2Length);
503 if (icuResult != UCOL_EQUAL) {
504 *orderP = (icuResult == UCOL_LESS) ? -1 : 1;
505 } else {
506 // no ICU differences above code point level, compare code points
507 *orderP = __CompareCodePoints( text1Ptr, text1Length, text2Ptr, text2Length );
508 }
509 icuStatus = U_ZERO_ERROR;
510 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_OFF, &icuStatus);
511 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &icuStatus);
512 ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_OFF, &icuStatus);
513 }
514
515 if (options & kCFCompareNumerically) {
516 UErrorCode icuStatus = U_ZERO_ERROR;
517 ucol_setAttribute(collator, UCOL_NUMERIC_COLLATION, UCOL_OFF, &icuStatus);
518 }
519 return 0; // noErr
520 }
521
522 #endif // DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
523
524 static inline CFIndex __extendLocationBackward(CFIndex location, CFStringInlineBuffer *str, const uint8_t *nonBaseBMP, const uint8_t *punctBMP) {
525 while (location > 0) {
526 UTF32Char ch = CFStringGetCharacterFromInlineBuffer(str, location);
527 UTF32Char otherChar;
528 if (CFUniCharIsSurrogateLowCharacter(ch) && CFUniCharIsSurrogateHighCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(str, location - 1)))) {
529 ch = CFUniCharGetLongCharacterForSurrogatePair(ch, otherChar);
530 uint8_t planeNo = (ch >> 16);
531 if ((planeNo > 1) || (!CFUniCharIsMemberOfBitmap(ch, CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, planeNo)) && !CFUniCharIsMemberOfBitmap(ch, CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet, planeNo)))) break;
532 location -= 2;
533 } else {
534 if ((!CFUniCharIsMemberOfBitmap(ch, nonBaseBMP) && !CFUniCharIsMemberOfBitmap(ch, punctBMP)) || ((ch >= 0x2E80) && (ch < 0xAC00))) break;
535 --location;
536 }
537 }
538
539 return location;
540 }
541
542 static inline CFIndex __extendLocationForward(CFIndex location, CFStringInlineBuffer *str, const uint8_t *alnumBMP, const uint8_t *punctBMP, const uint8_t *controlBMP, CFIndex strMax) {
543 do {
544 UTF32Char ch = CFStringGetCharacterFromInlineBuffer(str, location);
545 UTF32Char otherChar;
546 if (CFUniCharIsSurrogateHighCharacter(ch) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(str, location + 1)))) {
547 ch = CFUniCharGetLongCharacterForSurrogatePair(ch, otherChar);
548 location += 2;
549 uint8_t planeNo = (ch >> 16);
550 if (!CFUniCharIsMemberOfBitmap(ch, CFUniCharGetBitmapPtrForPlane(kCFUniCharAlphaNumericCharacterSet, planeNo)) && !CFUniCharIsMemberOfBitmap(ch, CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet, planeNo)) && !CFUniCharIsMemberOfBitmap(ch, CFUniCharGetBitmapPtrForPlane(kCFUniCharControlAndFormatterCharacterSet, planeNo))) break;
551 } else {
552 ++location;
553 if ((!CFUniCharIsMemberOfBitmap(ch, alnumBMP) && !CFUniCharIsMemberOfBitmap(ch, punctBMP) && !CFUniCharIsMemberOfBitmap(ch, controlBMP)) || ((ch >= 0x2E80) && (ch < 0xAC00))) break;
554 }
555 } while (location < strMax);
556 return location;
557 }
558
559 __private_extern__ CFComparisonResult _CFCompareStringsWithLocale(CFStringInlineBuffer *str1, CFRange str1Range, CFStringInlineBuffer *str2, CFRange str2Range, CFOptionFlags options, const void *compareLocale) {
560 const UniChar *characters1;
561 const UniChar *characters2;
562 CFComparisonResult compResult = kCFCompareEqualTo;
563 CFRange range1 = str1Range;
564 CFRange range2 = str2Range;
565 SInt32 order;
566 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
567 Boolean isEqual;
568 bool forcedOrdering = ((options & kCFCompareForcedOrdering) ? true : false);
569
570 UCollator *collator = NULL;
571 bool defaultCollator = true;
572 #endif
573 static const uint8_t *alnumBMP = NULL;
574 static const uint8_t *nonBaseBMP = NULL;
575 static const uint8_t *punctBMP = NULL;
576 static const uint8_t *controlBMP = NULL;
577
578 if (NULL == alnumBMP) {
579 alnumBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharAlphaNumericCharacterSet, 0);
580 nonBaseBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, 0);
581 punctBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet, 0);
582 controlBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharControlAndFormatterCharacterSet, 0);
583 }
584
585 // Determine the range of characters surrodiing the current index significant for localized comparison. The range is extended backward and forward as long as they are contextual. Contextual characters include all letters and punctuations. Since most control/format characters are ignorable in localized comparison, we also include them extending forward.
586
587 range1.location = str1Range.location;
588 range2.location = str2Range.location;
589
590 // go backward
591 // The characters upto the current index are already determined to be equal by the CFString's standard character folding algorithm. Extend as long as truly contextual (all letters and punctuations).
592 if (range1.location > 0) {
593 range1.location = __extendLocationBackward(range1.location - 1, str1, nonBaseBMP, punctBMP);
594 }
595
596 if (range2.location > 0) {
597 range2.location = __extendLocationBackward(range2.location - 1, str2, nonBaseBMP, punctBMP);
598 }
599
600 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
601 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
602 // First we try to use the last one used on this thread, if the locale is the same,
603 // otherwise we try to check out a default one, or then we create one.
604 UCollator *threadCollator = _CFGetTSD(__CFTSDKeyCollatorUCollator);
605 CFLocaleRef threadLocale = _CFGetTSD(__CFTSDKeyCollatorLocale);
606 if (compareLocale == threadLocale) {
607 collator = threadCollator;
608 } else {
609 #endif
610 collator = __CFStringCopyDefaultCollator((CFLocaleRef)compareLocale);
611 defaultCollator = true;
612 if (NULL == collator) {
613 collator = __CFStringCreateCollator((CFLocaleRef)compareLocale);
614 defaultCollator = false;
615 }
616 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
617 }
618 #endif
619 #endif
620
621 characters1 = CFStringGetCharactersPtrFromInlineBuffer(str1, range1);
622 characters2 = CFStringGetCharactersPtrFromInlineBuffer(str2, range2);
623
624 if ((NULL != characters1) && (NULL != characters2)) { // do fast
625 range1.length = (str1Range.location + str1Range.length) - range1.location;
626 range2.length = (str2Range.location + str2Range.length) - range2.location;
627
628 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
629 if ((NULL != collator) && (__CompareTextDefault(collator, options, characters1, range1.length, characters2, range2.length, &isEqual, &order) == 0 /* noErr */)) {
630 compResult = ((isEqual && !forcedOrdering) ? kCFCompareEqualTo : ((order < 0) ? kCFCompareLessThan : kCFCompareGreaterThan));
631 } else
632 #endif
633 {
634 compResult = ((memcmp(characters1, characters2, sizeof(UniChar) * range1.length) < 0) ? kCFCompareLessThan : kCFCompareGreaterThan);
635 }
636 } else {
637 UniChar *buffer1 = NULL;
638 UniChar *buffer2 = NULL;
639 UTF16Char sBuffer1[kCFStringCompareAllocationIncrement];
640 UTF16Char sBuffer2[kCFStringCompareAllocationIncrement];
641 CFIndex buffer1Len = 0, buffer2Len = 0;
642 CFIndex str1Max = str1Range.location + str1Range.length;
643 CFIndex str2Max = str2Range.location + str2Range.length;
644 CFIndex bufferSize;
645
646 // Extend forward and compare until the result is deterministic. The result is indeterministic if the differences are weak and can be resolved by character folding. For example, comparision between "abc" and "ABC" is considered to be indeterministic.
647 do {
648 if (str1Range.location < str1Max) {
649 str1Range.location = __extendLocationForward(str1Range.location, str1, alnumBMP, punctBMP, controlBMP, str1Max);
650 range1.length = (str1Range.location - range1.location);
651 characters1 = CFStringGetCharactersPtrFromInlineBuffer(str1, range1);
652
653 if (NULL == characters1) {
654 if ((0 > buffer1Len) || (range1.length > kCFStringCompareAllocationIncrement)) {
655 if (buffer1Len < range1.length) {
656 bufferSize = range1.length + (kCFStringCompareAllocationIncrement - (range1.length % kCFStringCompareAllocationIncrement));
657 if (0 == buffer1Len) {
658 buffer1 = (UniChar *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(UTF16Char) * bufferSize, 0);
659 } else if (buffer1Len < range1.length) {
660 buffer1 = (UniChar *)CFAllocatorReallocate(kCFAllocatorSystemDefault, buffer1, sizeof(UTF16Char) * bufferSize, 0);
661 }
662 buffer1Len = bufferSize;
663 }
664 } else {
665 buffer1 = sBuffer1;
666 }
667
668 CFStringGetCharactersFromInlineBuffer(str1, range1, buffer1);
669 characters1 = buffer1;
670 }
671 }
672
673 if (str2Range.location < str2Max) {
674 str2Range.location = __extendLocationForward(str2Range.location, str2, alnumBMP, punctBMP, controlBMP, str2Max);
675 range2.length = (str2Range.location - range2.location);
676 characters2 = CFStringGetCharactersPtrFromInlineBuffer(str2, range2);
677
678 if (NULL == characters2) {
679 if ((0 > buffer2Len) || (range2.length > kCFStringCompareAllocationIncrement)) {
680 if (buffer2Len < range2.length) {
681 bufferSize = range2.length + (kCFStringCompareAllocationIncrement - (range2.length % kCFStringCompareAllocationIncrement));
682 if (0 == buffer2Len) {
683 buffer2 = (UniChar *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(UTF16Char) * bufferSize, 0);
684 } else if (buffer2Len < range2.length) {
685 buffer2 = (UniChar *)CFAllocatorReallocate(kCFAllocatorSystemDefault, buffer2, sizeof(UTF16Char) * bufferSize, 0);
686 }
687 buffer2Len = bufferSize;
688 }
689 } else {
690 buffer2 = sBuffer2;
691 }
692
693 CFStringGetCharactersFromInlineBuffer(str2, range2, buffer2);
694 characters2 = buffer2;
695 }
696 }
697
698 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
699 if ((NULL != collator) && (__CompareTextDefault(collator, options, characters1, range1.length, characters2, range2.length, &isEqual, &order) == 0 /* noErr */)) {
700 if (isEqual) {
701 if (forcedOrdering && (kCFCompareEqualTo == compResult) && (0 != order)) compResult = ((order < 0) ? kCFCompareLessThan : kCFCompareGreaterThan);
702 order = 0;
703 }
704 } else
705 #endif
706 {
707 order = memcmp(characters1, characters2, sizeof(UTF16Char) * ((range1.length < range2.length) ? range1.length : range2.length));
708 if (0 == order) {
709 if (range1.length < range2.length) {
710 order = -2;
711 } else if (range2.length < range1.length) {
712 order = 2;
713 }
714 } else if (order < 0) {
715 --order;
716 } else if (order > 0) {
717 ++order;
718 }
719 }
720
721 if ((order < -1) || (order > 1)) break; // the result is deterministic
722
723 if (0 == order) {
724 range1.location = str1Range.location;
725 range2.location = str2Range.location;
726 }
727 } while ((str1Range.location < str1Max) || (str2Range.location < str2Max));
728
729 if (0 != order) compResult = ((order < 0) ? kCFCompareLessThan : kCFCompareGreaterThan);
730
731 if (buffer1Len > 0) CFAllocatorDeallocate(kCFAllocatorSystemDefault, buffer1);
732 if (buffer2Len > 0) CFAllocatorDeallocate(kCFAllocatorSystemDefault, buffer2);
733 }
734
735 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
736 if (collator == threadCollator) {
737 // do nothing, already cached
738 } else {
739 if (threadLocale) __collatorFinalize((UCollator *)_CFGetTSD(__CFTSDKeyCollatorUCollator)); // need to dealloc collators
740
741 _CFSetTSD(__CFTSDKeyCollatorUCollator, collator, (void *)__collatorFinalize);
742 _CFSetTSD(__CFTSDKeyCollatorLocale, (void *)CFRetain(compareLocale), NULL);
743 }
744 #endif
745
746 return compResult;
747 }
748