]> git.saurik.com Git - apple/cf.git/blob - CFStringUtilities.c
CF-855.11.tar.gz
[apple/cf.git] / CFStringUtilities.c
1 /*
2 * Copyright (c) 2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /* CFStringUtilities.c
25 Copyright (c) 1999-2013, Apple Inc. All rights reserved.
26 Responsibility: Aki Inoue
27 */
28
29 #include "CFInternal.h"
30 #include <CoreFoundation/CFStringEncodingConverterExt.h>
31 #include <CoreFoundation/CFUniChar.h>
32 #include <CoreFoundation/CFStringEncodingExt.h>
33 #include "CFStringEncodingDatabase.h"
34 #include "CFICUConverters.h"
35 #include <limits.h>
36 #include <stdlib.h>
37 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
38 #include <unicode/ucol.h>
39 #include <unicode/ucoleitr.h>
40 #endif
41 #include <string.h>
42
43 #if DEPLOYMENT_TARGET_WINDOWS
44 #include <tchar.h>
45 #endif
46
47
48 Boolean CFStringIsEncodingAvailable(CFStringEncoding theEncoding) {
49 switch (theEncoding) {
50 case kCFStringEncodingASCII: // Built-in encodings
51 case kCFStringEncodingMacRoman:
52 case kCFStringEncodingUTF8:
53 case kCFStringEncodingNonLossyASCII:
54 case kCFStringEncodingWindowsLatin1:
55 case kCFStringEncodingNextStepLatin:
56 case kCFStringEncodingUTF16:
57 case kCFStringEncodingUTF16BE:
58 case kCFStringEncodingUTF16LE:
59 case kCFStringEncodingUTF32:
60 case kCFStringEncodingUTF32BE:
61 case kCFStringEncodingUTF32LE:
62 return true;
63
64 default:
65 return CFStringEncodingIsValidEncoding(theEncoding);
66 }
67 }
68
69 const CFStringEncoding* CFStringGetListOfAvailableEncodings() {
70 return (const CFStringEncoding *)CFStringEncodingListOfAvailableEncodings();
71 }
72
73 CFStringRef CFStringGetNameOfEncoding(CFStringEncoding theEncoding) {
74 static CFMutableDictionaryRef mappingTable = NULL;
75 CFStringRef theName = mappingTable ? (CFStringRef)CFDictionaryGetValue(mappingTable, (const void*)(uintptr_t)theEncoding) : NULL;
76
77 if (!theName) {
78 const char *encodingName = __CFStringEncodingGetName(theEncoding);
79
80 if (encodingName) {
81 theName = CFStringCreateWithCString(kCFAllocatorSystemDefault, encodingName, kCFStringEncodingASCII);
82 }
83
84 if (theName) {
85 if (!mappingTable) mappingTable = CFDictionaryCreateMutable(kCFAllocatorSystemDefault, 0, (const CFDictionaryKeyCallBacks *)NULL, &kCFTypeDictionaryValueCallBacks);
86
87 CFDictionaryAddValue(mappingTable, (const void*)(uintptr_t)theEncoding, (const void*)theName);
88 CFRelease(theName);
89 }
90 }
91
92 return theName;
93 }
94
95 CFStringEncoding CFStringConvertIANACharSetNameToEncoding(CFStringRef charsetName) {
96 CFStringEncoding encoding = kCFStringEncodingInvalidId;
97 #define BUFFER_SIZE (100)
98 char buffer[BUFFER_SIZE];
99 const char *name = CFStringGetCStringPtr(charsetName, __CFStringGetEightBitStringEncoding());
100
101 if (NULL == name) {
102 if (false == CFStringGetCString(charsetName, buffer, BUFFER_SIZE, __CFStringGetEightBitStringEncoding())) return kCFStringEncodingInvalidId;
103
104 name = buffer;
105 }
106
107 encoding = __CFStringEncodingGetFromCanonicalName(name);
108
109 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
110 if (kCFStringEncodingInvalidId == encoding) encoding = __CFStringEncodingGetFromICUName(name);
111 #endif
112
113
114 // handling Java name variant for MS codepages
115 if ((kCFStringEncodingInvalidId == encoding) && !strncasecmp(name, "ms950", strlen("ms950"))) { // <rdar://problem/12903398> “MS950” is not recognized
116 encoding = __CFStringEncodingGetFromCanonicalName("cp950");
117 }
118
119 return encoding;
120 }
121
122 CFStringRef CFStringConvertEncodingToIANACharSetName(CFStringEncoding encoding) {
123 CFStringRef name = NULL;
124 CFIndex value = encoding;
125 static CFMutableDictionaryRef mappingTable = NULL;
126 static CFSpinLock_t lock = CFSpinLockInit;
127
128 __CFSpinLock(&lock);
129 name = ((NULL == mappingTable) ? NULL : (CFStringRef)CFDictionaryGetValue(mappingTable, (const void*)value));
130
131 if (NULL == name) {
132 #define STACK_BUFFER_SIZE (100)
133 char buffer[STACK_BUFFER_SIZE];
134
135 if (__CFStringEncodingGetCanonicalName(encoding, buffer, STACK_BUFFER_SIZE)) name = CFStringCreateWithCString(NULL, buffer, kCFStringEncodingASCII);
136
137
138 if (NULL != name) {
139 CFIndex value = encoding;
140
141 if (NULL == mappingTable) mappingTable = CFDictionaryCreateMutable(NULL, 0, NULL, &kCFTypeDictionaryValueCallBacks);
142
143 CFDictionaryAddValue(mappingTable, (const void*)value, (const void*)name);
144 CFRelease(name);
145 }
146 }
147 __CFSpinUnlock(&lock);
148
149 return name;
150 }
151
152 enum {
153 NSASCIIStringEncoding = 1, /* 0..127 only */
154 NSNEXTSTEPStringEncoding = 2,
155 NSJapaneseEUCStringEncoding = 3,
156 NSUTF8StringEncoding = 4,
157 NSISOLatin1StringEncoding = 5,
158 NSSymbolStringEncoding = 6,
159 NSNonLossyASCIIStringEncoding = 7,
160 NSShiftJISStringEncoding = 8,
161 NSISOLatin2StringEncoding = 9,
162 NSUnicodeStringEncoding = 10,
163 NSWindowsCP1251StringEncoding = 11, /* Cyrillic; same as AdobeStandardCyrillic */
164 NSWindowsCP1252StringEncoding = 12, /* WinLatin1 */
165 NSWindowsCP1253StringEncoding = 13, /* Greek */
166 NSWindowsCP1254StringEncoding = 14, /* Turkish */
167 NSWindowsCP1250StringEncoding = 15, /* WinLatin2 */
168 NSISO2022JPStringEncoding = 21, /* ISO 2022 Japanese encoding for e-mail */
169 NSMacOSRomanStringEncoding = 30,
170
171 NSProprietaryStringEncoding = 65536 /* Installation-specific encoding */
172 };
173
174 #define NSENCODING_MASK (1 << 31)
175
176 unsigned long CFStringConvertEncodingToNSStringEncoding(CFStringEncoding theEncoding) {
177 switch (theEncoding & 0xFFF) {
178 case kCFStringEncodingUnicode:
179 if (theEncoding == kCFStringEncodingUTF16) return NSUnicodeStringEncoding;
180 else if (theEncoding == kCFStringEncodingUTF8) return NSUTF8StringEncoding;
181 break;
182
183 case kCFStringEncodingWindowsLatin1: return NSWindowsCP1252StringEncoding;
184 case kCFStringEncodingMacRoman: return NSMacOSRomanStringEncoding;
185
186 case kCFStringEncodingASCII: return NSASCIIStringEncoding;
187
188 case kCFStringEncodingDOSJapanese: return NSShiftJISStringEncoding;
189 case kCFStringEncodingWindowsCyrillic: return NSWindowsCP1251StringEncoding;
190 case kCFStringEncodingWindowsGreek: return NSWindowsCP1253StringEncoding;
191 case kCFStringEncodingWindowsLatin5: return NSWindowsCP1254StringEncoding;
192 case kCFStringEncodingWindowsLatin2: return NSWindowsCP1250StringEncoding;
193 case kCFStringEncodingISOLatin1: return NSISOLatin1StringEncoding;
194
195 case kCFStringEncodingNonLossyASCII: return NSNonLossyASCIIStringEncoding;
196 case kCFStringEncodingEUC_JP: return NSJapaneseEUCStringEncoding;
197 case kCFStringEncodingMacSymbol: return NSSymbolStringEncoding;
198 case kCFStringEncodingISOLatin2: return NSISOLatin2StringEncoding;
199 case kCFStringEncodingISO_2022_JP: return NSISO2022JPStringEncoding;
200 case kCFStringEncodingNextStepLatin: return NSNEXTSTEPStringEncoding;
201 }
202
203 return NSENCODING_MASK | theEncoding;
204 }
205
206 CFStringEncoding CFStringConvertNSStringEncodingToEncoding(unsigned long theEncoding) {
207 const uint16_t encodings[] = {
208 kCFStringEncodingASCII,
209 kCFStringEncodingNextStepLatin,
210 kCFStringEncodingEUC_JP,
211 0,
212 kCFStringEncodingISOLatin1,
213 kCFStringEncodingMacSymbol,
214 kCFStringEncodingNonLossyASCII,
215 kCFStringEncodingDOSJapanese,
216 kCFStringEncodingISOLatin2,
217 kCFStringEncodingUTF16,
218 kCFStringEncodingWindowsCyrillic,
219 kCFStringEncodingWindowsLatin1,
220 kCFStringEncodingWindowsGreek,
221 kCFStringEncodingWindowsLatin5,
222 kCFStringEncodingWindowsLatin2
223 };
224
225 if (NSUTF8StringEncoding == theEncoding) return kCFStringEncodingUTF8;
226
227 if ((theEncoding > 0) && (theEncoding <= NSWindowsCP1250StringEncoding)) return encodings[theEncoding - 1];
228
229 switch (theEncoding) {
230 case NSMacOSRomanStringEncoding: return kCFStringEncodingMacRoman;
231 case NSISO2022JPStringEncoding: return kCFStringEncodingISO_2022_JP;
232
233 default:
234 return ((theEncoding & NSENCODING_MASK) ? theEncoding & ~NSENCODING_MASK : kCFStringEncodingInvalidId);
235 }
236 }
237
238 UInt32 CFStringConvertEncodingToWindowsCodepage(CFStringEncoding theEncoding) {
239 uint16_t codepage = __CFStringEncodingGetWindowsCodePage(theEncoding);
240
241 return ((0 == codepage) ? kCFStringEncodingInvalidId : codepage);
242 }
243
244 CFStringEncoding CFStringConvertWindowsCodepageToEncoding(UInt32 theEncoding) {
245 return __CFStringEncodingGetFromWindowsCodePage(theEncoding);
246 }
247
248 CFStringEncoding CFStringGetMostCompatibleMacStringEncoding(CFStringEncoding encoding) {
249 CFStringEncoding macEncoding = __CFStringEncodingGetMostCompatibleMacScript(encoding);
250
251
252 return macEncoding;
253 }
254
255 #define kCFStringCompareAllocationIncrement (128)
256
257 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
258
259 // -------------------------------------------------------------------------------------------------
260 // CompareSpecials - ignore case & diacritic differences
261 //
262 // Decomposed have 2nd-4th chars of type Mn or Mc, or in range 1160-11FF (jamo)
263 // Fullwidth & halfwidth are in range FF00-FFEF
264 // Parenthesized & circled are in range 3200-32FF
265 // -------------------------------------------------------------------------------------------------
266
267 enum {
268 kUpperCaseWeightMin = 0x80 | 0x0F,
269 kUpperCaseWeightMax = 0x80 | 0x17,
270 kUpperToLowerDelta = 0x80 | 0x0A, // 0x0A = 0x0F - 0x05
271 kMaskPrimarySecondary = 0xFFFFFF00,
272 kMaskPrimaryOnly = 0xFFFF0000,
273 kMaskSecondaryOnly = 0x0000FF00,
274 kMaskCaseTertiary = 0x000000FF // 2 hi bits case, 6 lo bits tertiary
275 };
276
277 static SInt32 __CompareSpecials(const UCollator *collator, CFOptionFlags options, const UniChar *text1Ptr, UniCharCount text1Length, const UniChar *text2Ptr, UniCharCount text2Length) {
278 UErrorCode icuStatus = U_ZERO_ERROR;
279 SInt32 orderWidth = 0;
280 SInt32 orderCompos = 0;
281
282 UCollationElements * collElems1 = ucol_openElements(collator, (const UChar *)text1Ptr, text1Length, &icuStatus);
283 UCollationElements * collElems2 = ucol_openElements(collator, (const UChar *)text2Ptr, text2Length, &icuStatus);
284 if (U_SUCCESS(icuStatus)) {
285 int32_t startOffset1 = 0;
286 int32_t startOffset2 = 0;
287
288 while (true) {
289 int32_t elemOrder1, elemOrder2;
290 int32_t offset1, offset2;
291
292 elemOrder1 = ucol_next(collElems1, &icuStatus);
293 elemOrder2 = ucol_next(collElems2, &icuStatus);
294 if ( U_FAILURE(icuStatus) || elemOrder1 == (int32_t)UCOL_NULLORDER || elemOrder2 == (int32_t)UCOL_NULLORDER ) {
295 break;
296 }
297
298 offset1 = ucol_getOffset(collElems1);
299 offset2 = ucol_getOffset(collElems2);
300 if ( (elemOrder1 & kMaskPrimarySecondary) == (elemOrder2 & kMaskPrimarySecondary) ) {
301 if ( (elemOrder1 & kMaskPrimaryOnly) != 0 ) {
302 // keys may differ in case, width, circling, etc.
303
304 int32_t tertiary1 = (elemOrder1 & kMaskCaseTertiary);
305 int32_t tertiary2 = (elemOrder2 & kMaskCaseTertiary);
306 // fold upper to lower case
307 if (tertiary1 >= kUpperCaseWeightMin && tertiary1 <= kUpperCaseWeightMax) {
308 tertiary1 -= kUpperToLowerDelta;
309 }
310 if (tertiary2 >= kUpperCaseWeightMin && tertiary2 <= kUpperCaseWeightMax) {
311 tertiary2 -= kUpperToLowerDelta;
312 }
313 // now compare
314 if (tertiary1 != tertiary2) {
315 orderWidth = (tertiary1 < tertiary2)? -1: 1;
316 break;
317 }
318
319 } else if ( (elemOrder1 & kMaskSecondaryOnly) != 0 ) {
320 // primary weights are both zero, but secondaries are not.
321 if ( orderCompos == 0 && (options & kCFCompareNonliteral) == 0 ) {
322 // We have a code element which is a diacritic.
323 // It may have come from a composed char or a combining char.
324 // If it came from a combining char (longer element length) it sorts first.
325 // This is only an approximation to what the Mac OS 9 code did, but this is an
326 // unusual case anyway.
327 int32_t elem1Length = offset1 - startOffset1;
328 int32_t elem2Length = offset2 - startOffset2;
329 if (elem1Length != elem2Length) {
330 orderCompos = (elem1Length > elem2Length)? -1: 1;
331 }
332 }
333 }
334 }
335
336 startOffset1 = offset1;
337 startOffset2 = offset2;
338 }
339 ucol_closeElements(collElems1);
340 ucol_closeElements(collElems2);
341 }
342
343 return (orderWidth != 0)? orderWidth: orderCompos;
344 }
345
346 static SInt32 __CompareCodePoints(const UniChar *text1Ptr, UniCharCount text1Length, const UniChar *text2Ptr, UniCharCount text2Length ) {
347 const UniChar * text1P = text1Ptr;
348 const UniChar * text2P = text2Ptr;
349 UInt32 textLimit = (text1Length <= text2Length)? text1Length: text2Length;
350 UInt32 textCounter;
351 SInt32 orderResult = 0;
352
353 // Loop through either string...the first difference differentiates this.
354 for (textCounter = 0; textCounter < textLimit && *text1P == *text2P; textCounter++) {
355 text1P++;
356 text2P++;
357 }
358 if (textCounter < textLimit) {
359 // code point difference
360 orderResult = (*text1P < *text2P) ? -1 : 1;
361 } else if (text1Length != text2Length) {
362 // one string has extra stuff at end
363 orderResult = (text1Length < text2Length) ? -1 : 1;
364 }
365 return orderResult;
366 }
367
368
369 extern const CFStringRef __kCFLocaleCollatorID;
370
371 static UCollator *__CFStringCreateCollator(CFLocaleRef compareLocale) {
372 CFStringRef canonLocaleCFStr = (CFStringRef)CFLocaleGetValue(compareLocale, __kCFLocaleCollatorID);
373 char icuLocaleStr[128] = {0};
374 CFStringGetCString(canonLocaleCFStr, icuLocaleStr, sizeof(icuLocaleStr), kCFStringEncodingASCII);
375 UErrorCode icuStatus = U_ZERO_ERROR;
376 UCollator * collator = ucol_open(icuLocaleStr, &icuStatus);
377 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_OFF, &icuStatus);
378 ucol_setAttribute(collator, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &icuStatus);
379 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &icuStatus);
380 ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_OFF, &icuStatus);
381 ucol_setAttribute(collator, UCOL_NUMERIC_COLLATION, UCOL_OFF, &icuStatus);
382 return collator;
383 }
384
385 #define kCFMaxCachedDefaultCollators (8)
386 static UCollator *__CFDefaultCollators[kCFMaxCachedDefaultCollators];
387 static CFIndex __CFDefaultCollatorsCount = 0;
388 static const void *__CFDefaultCollatorLocale = NULL;
389 static CFSpinLock_t __CFDefaultCollatorLock = CFSpinLockInit;
390
391 static UCollator *__CFStringCopyDefaultCollator(CFLocaleRef compareLocale) {
392 CFLocaleRef currentLocale = NULL;
393 UCollator * collator = NULL;
394
395 if (compareLocale != __CFDefaultCollatorLocale) {
396 currentLocale = CFLocaleCopyCurrent();
397 if (compareLocale != currentLocale) {
398 CFRelease(currentLocale);
399 return NULL;
400 }
401 }
402
403 __CFSpinLock(&__CFDefaultCollatorLock);
404 if ((NULL != currentLocale) && (__CFDefaultCollatorLocale != currentLocale)) {
405 while (__CFDefaultCollatorsCount > 0) ucol_close(__CFDefaultCollators[--__CFDefaultCollatorsCount]);
406 __CFDefaultCollatorLocale = CFRetain(currentLocale);
407 }
408
409 if (__CFDefaultCollatorsCount > 0) collator = __CFDefaultCollators[--__CFDefaultCollatorsCount];
410 __CFSpinUnlock(&__CFDefaultCollatorLock);
411
412 if (NULL == collator) {
413 collator = __CFStringCreateCollator(compareLocale);
414 }
415
416 if (NULL != currentLocale) CFRelease(currentLocale);
417
418 return collator;
419 }
420
421 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
422 static void __collatorFinalize(UCollator *collator) {
423 CFLocaleRef locale = _CFGetTSD(__CFTSDKeyCollatorLocale);
424 _CFSetTSD(__CFTSDKeyCollatorUCollator, NULL, NULL);
425 _CFSetTSD(__CFTSDKeyCollatorLocale, NULL, NULL);
426 __CFSpinLock(&__CFDefaultCollatorLock);
427 if ((__CFDefaultCollatorLocale == locale) && (__CFDefaultCollatorsCount < kCFMaxCachedDefaultCollators)) {
428 __CFDefaultCollators[__CFDefaultCollatorsCount++] = collator;
429 collator = NULL;
430 }
431 __CFSpinUnlock(&__CFDefaultCollatorLock);
432 if (NULL != collator) ucol_close(collator);
433 if (locale) CFRelease(locale);
434 }
435 #endif
436
437 // -------------------------------------------------------------------------------------------------
438 // __CompareTextDefault
439 //
440 // A primary difference is denoted by values 2/-2 in orderP. Other differences are indicated with a -1/1.
441 // A negative value indicates that text1 sorts before text2.
442 // -------------------------------------------------------------------------------------------------
443 static OSStatus __CompareTextDefault(UCollator *collator, CFOptionFlags options, const UniChar *text1Ptr, UniCharCount text1Length, const UniChar *text2Ptr, UniCharCount text2Length, Boolean *equivalentP, SInt32 *orderP) {
444
445 // collator must have default settings restored on exit from this function
446
447 *equivalentP = true;
448 *orderP = 0;
449
450 if (options & kCFCompareNumerically) {
451 UErrorCode icuStatus = U_ZERO_ERROR;
452 ucol_setAttribute(collator, UCOL_NUMERIC_COLLATION, UCOL_ON, &icuStatus);
453 }
454
455 // Most string differences are Primary. Do a primary check first, then if there
456 // are no differences do a comparison with the options in the collator.
457 UCollationResult icuResult = ucol_strcoll(collator, (const UChar *)text1Ptr, text1Length, (const UChar *)text2Ptr, text2Length);
458 if (icuResult != UCOL_EQUAL) {
459 *orderP = (icuResult == UCOL_LESS) ? -2 : 2;
460 }
461 if (*orderP == 0) {
462 UErrorCode icuStatus = U_ZERO_ERROR;
463 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &icuStatus);
464 ucol_setAttribute(collator, UCOL_STRENGTH, (options & kCFCompareDiacriticInsensitive) ? UCOL_PRIMARY : UCOL_SECONDARY, &icuStatus);
465 ucol_setAttribute(collator, UCOL_CASE_LEVEL, (options & kCFCompareCaseInsensitive) ? UCOL_OFF : UCOL_ON, &icuStatus);
466 if (!U_SUCCESS(icuStatus)) {
467 icuStatus = U_ZERO_ERROR;
468 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_OFF, &icuStatus);
469 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &icuStatus);
470 ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_OFF, &icuStatus);
471 ucol_setAttribute(collator, UCOL_NUMERIC_COLLATION, UCOL_OFF, &icuStatus);
472 return 666;
473 }
474
475 // We don't have a primary difference. Recompare with standard collator.
476 icuResult = ucol_strcoll(collator, (const UChar *)text1Ptr, text1Length, (const UChar *)text2Ptr, text2Length);
477 if (icuResult != UCOL_EQUAL) {
478 *orderP = (icuResult == UCOL_LESS) ? -1 : 1;
479 }
480 icuStatus = U_ZERO_ERROR;
481 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_OFF, &icuStatus);
482 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &icuStatus);
483 ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_OFF, &icuStatus);
484 }
485 if (*orderP == 0 && (options & kCFCompareNonliteral) == 0) {
486 *orderP = __CompareSpecials(collator, options, text1Ptr, text1Length, text2Ptr, text2Length);
487 }
488
489 *equivalentP = (*orderP == 0);
490
491 // If strings are equivalent but we care about order and have not yet checked
492 // to the level of code point order, then do some more checks for order
493 if (*orderP == 0) {
494 UErrorCode icuStatus = U_ZERO_ERROR;
495 // First try to see if ICU can find any differences above code point level
496 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &icuStatus);
497 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_TERTIARY, &icuStatus);
498 ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_ON, &icuStatus);
499 if (!U_SUCCESS(icuStatus)) {
500 icuStatus = U_ZERO_ERROR;
501 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_OFF, &icuStatus);
502 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &icuStatus);
503 ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_OFF, &icuStatus);
504 ucol_setAttribute(collator, UCOL_NUMERIC_COLLATION, UCOL_OFF, &icuStatus);
505 return 666;
506 }
507 icuResult = ucol_strcoll(collator, (const UChar *)text1Ptr, text1Length, (const UChar *)text2Ptr, text2Length);
508 if (icuResult != UCOL_EQUAL) {
509 *orderP = (icuResult == UCOL_LESS) ? -1 : 1;
510 } else {
511 // no ICU differences above code point level, compare code points
512 *orderP = __CompareCodePoints( text1Ptr, text1Length, text2Ptr, text2Length );
513 }
514 icuStatus = U_ZERO_ERROR;
515 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_OFF, &icuStatus);
516 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &icuStatus);
517 ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_OFF, &icuStatus);
518 }
519
520 if (options & kCFCompareNumerically) {
521 UErrorCode icuStatus = U_ZERO_ERROR;
522 ucol_setAttribute(collator, UCOL_NUMERIC_COLLATION, UCOL_OFF, &icuStatus);
523 }
524 return 0; // noErr
525 }
526
527 #endif // DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
528
529 static inline CFIndex __extendLocationBackward(CFIndex location, CFStringInlineBuffer *str, const uint8_t *nonBaseBMP, const uint8_t *punctBMP) {
530 while (location > 0) {
531 UTF32Char ch = CFStringGetCharacterFromInlineBuffer(str, location);
532 UTF32Char otherChar;
533 if (CFUniCharIsSurrogateLowCharacter(ch) && CFUniCharIsSurrogateHighCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(str, location - 1)))) {
534 ch = CFUniCharGetLongCharacterForSurrogatePair(ch, otherChar);
535 uint8_t planeNo = (ch >> 16);
536 if ((planeNo > 1) || (!CFUniCharIsMemberOfBitmap(ch, CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, planeNo)) && !CFUniCharIsMemberOfBitmap(ch, CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet, planeNo)))) break;
537 location -= 2;
538 } else {
539 if ((!CFUniCharIsMemberOfBitmap(ch, nonBaseBMP) && !CFUniCharIsMemberOfBitmap(ch, punctBMP)) || ((ch >= 0x2E80) && (ch < 0xAC00))) break;
540 --location;
541 }
542 }
543
544 return location;
545 }
546
547 static inline CFIndex __extendLocationForward(CFIndex location, CFStringInlineBuffer *str, const uint8_t *alnumBMP, const uint8_t *punctBMP, const uint8_t *controlBMP, CFIndex strMax) {
548 do {
549 UTF32Char ch = CFStringGetCharacterFromInlineBuffer(str, location);
550 UTF32Char otherChar;
551 if (CFUniCharIsSurrogateHighCharacter(ch) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(str, location + 1)))) {
552 ch = CFUniCharGetLongCharacterForSurrogatePair(ch, otherChar);
553 location += 2;
554 uint8_t planeNo = (ch >> 16);
555 if (!CFUniCharIsMemberOfBitmap(ch, CFUniCharGetBitmapPtrForPlane(kCFUniCharAlphaNumericCharacterSet, planeNo)) && !CFUniCharIsMemberOfBitmap(ch, CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet, planeNo)) && !CFUniCharIsMemberOfBitmap(ch, CFUniCharGetBitmapPtrForPlane(kCFUniCharControlAndFormatterCharacterSet, planeNo))) break;
556 } else {
557 ++location;
558 if ((!CFUniCharIsMemberOfBitmap(ch, alnumBMP) && !CFUniCharIsMemberOfBitmap(ch, punctBMP) && !CFUniCharIsMemberOfBitmap(ch, controlBMP)) || ((ch >= 0x2E80) && (ch < 0xAC00))) break;
559 }
560 } while (location < strMax);
561 return location;
562 }
563
564 CF_PRIVATE CFComparisonResult _CFCompareStringsWithLocale(CFStringInlineBuffer *str1, CFRange str1Range, CFStringInlineBuffer *str2, CFRange str2Range, CFOptionFlags options, const void *compareLocale) {
565 const UniChar *characters1;
566 const UniChar *characters2;
567 CFComparisonResult compResult = kCFCompareEqualTo;
568 CFRange range1 = str1Range;
569 CFRange range2 = str2Range;
570 SInt32 order;
571 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
572 Boolean isEqual;
573 bool forcedOrdering = ((options & kCFCompareForcedOrdering) ? true : false);
574
575 UCollator *collator = NULL;
576 bool defaultCollator = true;
577 #endif
578 static const uint8_t *alnumBMP = NULL;
579 static const uint8_t *nonBaseBMP = NULL;
580 static const uint8_t *punctBMP = NULL;
581 static const uint8_t *controlBMP = NULL;
582
583 if (NULL == alnumBMP) {
584 alnumBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharAlphaNumericCharacterSet, 0);
585 nonBaseBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, 0);
586 punctBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet, 0);
587 controlBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharControlAndFormatterCharacterSet, 0);
588 }
589
590 // Determine the range of characters surrodiing the current index significant for localized comparison. The range is extended backward and forward as long as they are contextual. Contextual characters include all letters and punctuations. Since most control/format characters are ignorable in localized comparison, we also include them extending forward.
591
592 range1.location = str1Range.location;
593 range2.location = str2Range.location;
594
595 // go backward
596 // The characters upto the current index are already determined to be equal by the CFString's standard character folding algorithm. Extend as long as truly contextual (all letters and punctuations).
597 if (range1.location > 0) {
598 range1.location = __extendLocationBackward(range1.location - 1, str1, nonBaseBMP, punctBMP);
599 }
600
601 if (range2.location > 0) {
602 range2.location = __extendLocationBackward(range2.location - 1, str2, nonBaseBMP, punctBMP);
603 }
604
605 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
606 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
607 // First we try to use the last one used on this thread, if the locale is the same,
608 // otherwise we try to check out a default one, or then we create one.
609 UCollator *threadCollator = _CFGetTSD(__CFTSDKeyCollatorUCollator);
610 CFLocaleRef threadLocale = _CFGetTSD(__CFTSDKeyCollatorLocale);
611 if (compareLocale == threadLocale) {
612 collator = threadCollator;
613 } else {
614 #endif
615 collator = __CFStringCopyDefaultCollator((CFLocaleRef)compareLocale);
616 defaultCollator = true;
617 if (NULL == collator) {
618 collator = __CFStringCreateCollator((CFLocaleRef)compareLocale);
619 defaultCollator = false;
620 }
621 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
622 }
623 #endif
624 #endif
625
626 characters1 = CFStringGetCharactersPtrFromInlineBuffer(str1, range1);
627 characters2 = CFStringGetCharactersPtrFromInlineBuffer(str2, range2);
628
629 if ((NULL != characters1) && (NULL != characters2)) { // do fast
630 range1.length = (str1Range.location + str1Range.length) - range1.location;
631 range2.length = (str2Range.location + str2Range.length) - range2.location;
632
633 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
634 if ((NULL != collator) && (__CompareTextDefault(collator, options, characters1, range1.length, characters2, range2.length, &isEqual, &order) == 0 /* noErr */)) {
635 compResult = ((isEqual && !forcedOrdering) ? kCFCompareEqualTo : ((order < 0) ? kCFCompareLessThan : kCFCompareGreaterThan));
636 } else
637 #endif
638 {
639 compResult = ((memcmp(characters1, characters2, sizeof(UniChar) * range1.length) < 0) ? kCFCompareLessThan : kCFCompareGreaterThan);
640 }
641 } else {
642 UniChar *buffer1 = NULL;
643 UniChar *buffer2 = NULL;
644 UTF16Char sBuffer1[kCFStringCompareAllocationIncrement];
645 UTF16Char sBuffer2[kCFStringCompareAllocationIncrement];
646 CFIndex buffer1Len = 0, buffer2Len = 0;
647 CFIndex str1Max = str1Range.location + str1Range.length;
648 CFIndex str2Max = str2Range.location + str2Range.length;
649 CFIndex bufferSize;
650
651 // Extend forward and compare until the result is deterministic. The result is indeterministic if the differences are weak and can be resolved by character folding. For example, comparision between "abc" and "ABC" is considered to be indeterministic.
652 do {
653 if (str1Range.location < str1Max) {
654 str1Range.location = __extendLocationForward(str1Range.location, str1, alnumBMP, punctBMP, controlBMP, str1Max);
655 range1.length = (str1Range.location - range1.location);
656 characters1 = CFStringGetCharactersPtrFromInlineBuffer(str1, range1);
657
658 if (NULL == characters1) {
659 if ((0 > buffer1Len) || (range1.length > kCFStringCompareAllocationIncrement)) {
660 if (buffer1Len < range1.length) {
661 bufferSize = range1.length + (kCFStringCompareAllocationIncrement - (range1.length % kCFStringCompareAllocationIncrement));
662 if (0 == buffer1Len) {
663 buffer1 = (UniChar *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(UTF16Char) * bufferSize, 0);
664 } else if (buffer1Len < range1.length) {
665 buffer1 = (UniChar *)CFAllocatorReallocate(kCFAllocatorSystemDefault, buffer1, sizeof(UTF16Char) * bufferSize, 0);
666 }
667 buffer1Len = bufferSize;
668 }
669 } else {
670 buffer1 = sBuffer1;
671 }
672
673 CFStringGetCharactersFromInlineBuffer(str1, range1, buffer1);
674 characters1 = buffer1;
675 }
676 }
677
678 if (str2Range.location < str2Max) {
679 str2Range.location = __extendLocationForward(str2Range.location, str2, alnumBMP, punctBMP, controlBMP, str2Max);
680 range2.length = (str2Range.location - range2.location);
681 characters2 = CFStringGetCharactersPtrFromInlineBuffer(str2, range2);
682
683 if (NULL == characters2) {
684 if ((0 > buffer2Len) || (range2.length > kCFStringCompareAllocationIncrement)) {
685 if (buffer2Len < range2.length) {
686 bufferSize = range2.length + (kCFStringCompareAllocationIncrement - (range2.length % kCFStringCompareAllocationIncrement));
687 if (0 == buffer2Len) {
688 buffer2 = (UniChar *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(UTF16Char) * bufferSize, 0);
689 } else if (buffer2Len < range2.length) {
690 buffer2 = (UniChar *)CFAllocatorReallocate(kCFAllocatorSystemDefault, buffer2, sizeof(UTF16Char) * bufferSize, 0);
691 }
692 buffer2Len = bufferSize;
693 }
694 } else {
695 buffer2 = sBuffer2;
696 }
697
698 CFStringGetCharactersFromInlineBuffer(str2, range2, buffer2);
699 characters2 = buffer2;
700 }
701 }
702
703 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
704 if ((NULL != collator) && (__CompareTextDefault(collator, options, characters1, range1.length, characters2, range2.length, &isEqual, &order) == 0 /* noErr */)) {
705 if (isEqual) {
706 if (forcedOrdering && (kCFCompareEqualTo == compResult) && (0 != order)) compResult = ((order < 0) ? kCFCompareLessThan : kCFCompareGreaterThan);
707 order = 0;
708 }
709 } else
710 #endif
711 {
712 order = memcmp(characters1, characters2, sizeof(UTF16Char) * ((range1.length < range2.length) ? range1.length : range2.length));
713 if (0 == order) {
714 if (range1.length < range2.length) {
715 order = -2;
716 } else if (range2.length < range1.length) {
717 order = 2;
718 }
719 } else if (order < 0) {
720 --order;
721 } else if (order > 0) {
722 ++order;
723 }
724 }
725
726 if ((order < -1) || (order > 1)) break; // the result is deterministic
727
728 if (0 == order) {
729 range1.location = str1Range.location;
730 range2.location = str2Range.location;
731 }
732 } while ((str1Range.location < str1Max) || (str2Range.location < str2Max));
733
734 if (0 != order) compResult = ((order < 0) ? kCFCompareLessThan : kCFCompareGreaterThan);
735
736 if (buffer1Len > 0) CFAllocatorDeallocate(kCFAllocatorSystemDefault, buffer1);
737 if (buffer2Len > 0) CFAllocatorDeallocate(kCFAllocatorSystemDefault, buffer2);
738 }
739
740 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
741 if (collator == threadCollator) {
742 // do nothing, already cached
743 } else {
744 if (threadLocale) __collatorFinalize((UCollator *)_CFGetTSD(__CFTSDKeyCollatorUCollator)); // need to dealloc collators
745
746 _CFSetTSD(__CFTSDKeyCollatorUCollator, collator, (void *)__collatorFinalize);
747 _CFSetTSD(__CFTSDKeyCollatorLocale, (void *)CFRetain(compareLocale), NULL);
748 }
749 #endif
750
751 return compResult;
752 }
753