2 * Copyright (c) 2014 Apple Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
24 /* CFStringUtilities.c
25 Copyright (c) 1999-2014, Apple Inc. All rights reserved.
26 Responsibility: Aki Inoue
29 #include "CFInternal.h"
30 #include <CoreFoundation/CFStringEncodingConverterExt.h>
31 #include <CoreFoundation/CFUniChar.h>
32 #include <CoreFoundation/CFStringEncodingExt.h>
33 #include "CFStringEncodingDatabase.h"
34 #include "CFICUConverters.h"
37 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
38 #include <unicode/ucol.h>
39 #include <unicode/ucoleitr.h>
43 #if DEPLOYMENT_TARGET_WINDOWS
48 Boolean
CFStringIsEncodingAvailable(CFStringEncoding theEncoding
) {
49 switch (theEncoding
) {
50 case kCFStringEncodingASCII
: // Built-in encodings
51 case kCFStringEncodingMacRoman
:
52 case kCFStringEncodingUTF8
:
53 case kCFStringEncodingNonLossyASCII
:
54 case kCFStringEncodingWindowsLatin1
:
55 case kCFStringEncodingNextStepLatin
:
56 case kCFStringEncodingUTF16
:
57 case kCFStringEncodingUTF16BE
:
58 case kCFStringEncodingUTF16LE
:
59 case kCFStringEncodingUTF32
:
60 case kCFStringEncodingUTF32BE
:
61 case kCFStringEncodingUTF32LE
:
65 return CFStringEncodingIsValidEncoding(theEncoding
);
69 const CFStringEncoding
* CFStringGetListOfAvailableEncodings() {
70 return (const CFStringEncoding
*)CFStringEncodingListOfAvailableEncodings();
73 CFStringRef
CFStringGetNameOfEncoding(CFStringEncoding theEncoding
) {
74 static CFMutableDictionaryRef mappingTable
= NULL
;
75 static OSSpinLock mappingTableLock
= OS_SPINLOCK_INIT
;
77 CFStringRef theName
= NULL
;
80 OSSpinLockLock(&mappingTableLock
);
81 theName
= (CFStringRef
)CFDictionaryGetValue(mappingTable
, (const void*)(uintptr_t)theEncoding
);
82 OSSpinLockUnlock(&mappingTableLock
);
86 const char *encodingName
= __CFStringEncodingGetName(theEncoding
);
89 theName
= CFStringCreateWithCString(kCFAllocatorSystemDefault
, encodingName
, kCFStringEncodingASCII
);
93 OSSpinLockLock(&mappingTableLock
);
95 CFStringRef result
= NULL
;
97 mappingTable
= CFDictionaryCreateMutable(kCFAllocatorSystemDefault
, 0, (const CFDictionaryKeyCallBacks
*)NULL
, &kCFTypeDictionaryValueCallBacks
);
98 } else { // Check to see if this got in the dictionary in the meantime
99 result
= (CFStringRef
)CFDictionaryGetValue(mappingTable
, (const void*)(uintptr_t)theEncoding
);
101 if (!result
) { // If not, add it in
102 CFDictionaryAddValue(mappingTable
, (const void*)(uintptr_t)theEncoding
, (const void*)theName
);
103 OSSpinLockUnlock(&mappingTableLock
);
105 } else { // Otherwise use the one already in there
106 OSSpinLockUnlock(&mappingTableLock
);
116 CFStringEncoding
CFStringConvertIANACharSetNameToEncoding(CFStringRef charsetName
) {
117 CFStringEncoding encoding
= kCFStringEncodingInvalidId
;
118 #define BUFFER_SIZE (100)
119 char buffer
[BUFFER_SIZE
];
120 const char *name
= CFStringGetCStringPtr(charsetName
, __CFStringGetEightBitStringEncoding());
123 if (false == CFStringGetCString(charsetName
, buffer
, BUFFER_SIZE
, __CFStringGetEightBitStringEncoding())) return kCFStringEncodingInvalidId
;
128 encoding
= __CFStringEncodingGetFromCanonicalName(name
);
130 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
131 if (kCFStringEncodingInvalidId
== encoding
) encoding
= __CFStringEncodingGetFromICUName(name
);
135 // handling Java name variant for MS codepages
136 if ((kCFStringEncodingInvalidId
== encoding
) && !strncasecmp(name
, "ms950", strlen("ms950"))) { // <rdar://problem/12903398> “MS950” is not recognized
137 encoding
= __CFStringEncodingGetFromCanonicalName("cp950");
143 CFStringRef
CFStringConvertEncodingToIANACharSetName(CFStringEncoding encoding
) {
144 CFStringRef name
= NULL
;
145 CFIndex value
= encoding
;
146 static CFMutableDictionaryRef mappingTable
= NULL
;
147 static CFLock_t lock
= CFLockInit
;
150 name
= ((NULL
== mappingTable
) ? NULL
: (CFStringRef
)CFDictionaryGetValue(mappingTable
, (const void*)value
));
153 #define STACK_BUFFER_SIZE (100)
154 char buffer
[STACK_BUFFER_SIZE
];
156 if (__CFStringEncodingGetCanonicalName(encoding
, buffer
, STACK_BUFFER_SIZE
)) name
= CFStringCreateWithCString(NULL
, buffer
, kCFStringEncodingASCII
);
160 CFIndex value
= encoding
;
162 if (NULL
== mappingTable
) mappingTable
= CFDictionaryCreateMutable(NULL
, 0, NULL
, &kCFTypeDictionaryValueCallBacks
);
164 CFDictionaryAddValue(mappingTable
, (const void*)value
, (const void*)name
);
174 NSASCIIStringEncoding
= 1, /* 0..127 only */
175 NSNEXTSTEPStringEncoding
= 2,
176 NSJapaneseEUCStringEncoding
= 3,
177 NSUTF8StringEncoding
= 4,
178 NSISOLatin1StringEncoding
= 5,
179 NSSymbolStringEncoding
= 6,
180 NSNonLossyASCIIStringEncoding
= 7,
181 NSShiftJISStringEncoding
= 8,
182 NSISOLatin2StringEncoding
= 9,
183 NSUnicodeStringEncoding
= 10,
184 NSWindowsCP1251StringEncoding
= 11, /* Cyrillic; same as AdobeStandardCyrillic */
185 NSWindowsCP1252StringEncoding
= 12, /* WinLatin1 */
186 NSWindowsCP1253StringEncoding
= 13, /* Greek */
187 NSWindowsCP1254StringEncoding
= 14, /* Turkish */
188 NSWindowsCP1250StringEncoding
= 15, /* WinLatin2 */
189 NSISO2022JPStringEncoding
= 21, /* ISO 2022 Japanese encoding for e-mail */
190 NSMacOSRomanStringEncoding
= 30,
192 NSProprietaryStringEncoding
= 65536 /* Installation-specific encoding */
195 #define NSENCODING_MASK (1 << 31)
197 unsigned long CFStringConvertEncodingToNSStringEncoding(CFStringEncoding theEncoding
) {
198 switch (theEncoding
& 0xFFF) {
199 case kCFStringEncodingUnicode
:
200 if (theEncoding
== kCFStringEncodingUTF16
) return NSUnicodeStringEncoding
;
201 else if (theEncoding
== kCFStringEncodingUTF8
) return NSUTF8StringEncoding
;
204 case kCFStringEncodingWindowsLatin1
: return NSWindowsCP1252StringEncoding
;
205 case kCFStringEncodingMacRoman
: return NSMacOSRomanStringEncoding
;
207 case kCFStringEncodingASCII
: return NSASCIIStringEncoding
;
209 case kCFStringEncodingDOSJapanese
: return NSShiftJISStringEncoding
;
210 case kCFStringEncodingWindowsCyrillic
: return NSWindowsCP1251StringEncoding
;
211 case kCFStringEncodingWindowsGreek
: return NSWindowsCP1253StringEncoding
;
212 case kCFStringEncodingWindowsLatin5
: return NSWindowsCP1254StringEncoding
;
213 case kCFStringEncodingWindowsLatin2
: return NSWindowsCP1250StringEncoding
;
214 case kCFStringEncodingISOLatin1
: return NSISOLatin1StringEncoding
;
216 case kCFStringEncodingNonLossyASCII
: return NSNonLossyASCIIStringEncoding
;
217 case kCFStringEncodingEUC_JP
: return NSJapaneseEUCStringEncoding
;
218 case kCFStringEncodingMacSymbol
: return NSSymbolStringEncoding
;
219 case kCFStringEncodingISOLatin2
: return NSISOLatin2StringEncoding
;
220 case kCFStringEncodingISO_2022_JP
: return NSISO2022JPStringEncoding
;
221 case kCFStringEncodingNextStepLatin
: return NSNEXTSTEPStringEncoding
;
224 return NSENCODING_MASK
| theEncoding
;
227 CFStringEncoding
CFStringConvertNSStringEncodingToEncoding(unsigned long theEncoding
) {
228 const uint16_t encodings
[] = {
229 kCFStringEncodingASCII
,
230 kCFStringEncodingNextStepLatin
,
231 kCFStringEncodingEUC_JP
,
233 kCFStringEncodingISOLatin1
,
234 kCFStringEncodingMacSymbol
,
235 kCFStringEncodingNonLossyASCII
,
236 kCFStringEncodingDOSJapanese
,
237 kCFStringEncodingISOLatin2
,
238 kCFStringEncodingUTF16
,
239 kCFStringEncodingWindowsCyrillic
,
240 kCFStringEncodingWindowsLatin1
,
241 kCFStringEncodingWindowsGreek
,
242 kCFStringEncodingWindowsLatin5
,
243 kCFStringEncodingWindowsLatin2
246 if (NSUTF8StringEncoding
== theEncoding
) return kCFStringEncodingUTF8
;
248 if ((theEncoding
> 0) && (theEncoding
<= NSWindowsCP1250StringEncoding
)) return encodings
[theEncoding
- 1];
250 switch (theEncoding
) {
251 case NSMacOSRomanStringEncoding
: return kCFStringEncodingMacRoman
;
252 case NSISO2022JPStringEncoding
: return kCFStringEncodingISO_2022_JP
;
255 return ((theEncoding
& NSENCODING_MASK
) ? theEncoding
& ~NSENCODING_MASK
: kCFStringEncodingInvalidId
);
259 UInt32
CFStringConvertEncodingToWindowsCodepage(CFStringEncoding theEncoding
) {
260 uint16_t codepage
= __CFStringEncodingGetWindowsCodePage(theEncoding
);
262 return ((0 == codepage
) ? kCFStringEncodingInvalidId
: codepage
);
265 CFStringEncoding
CFStringConvertWindowsCodepageToEncoding(UInt32 theEncoding
) {
266 return __CFStringEncodingGetFromWindowsCodePage(theEncoding
);
269 CFStringEncoding
CFStringGetMostCompatibleMacStringEncoding(CFStringEncoding encoding
) {
270 CFStringEncoding macEncoding
= __CFStringEncodingGetMostCompatibleMacScript(encoding
);
276 #define kCFStringCompareAllocationIncrement (128)
278 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
280 // -------------------------------------------------------------------------------------------------
281 // CompareSpecials - ignore case & diacritic differences
283 // Decomposed have 2nd-4th chars of type Mn or Mc, or in range 1160-11FF (jamo)
284 // Fullwidth & halfwidth are in range FF00-FFEF
285 // Parenthesized & circled are in range 3200-32FF
286 // -------------------------------------------------------------------------------------------------
289 kUpperCaseWeightMin
= 0x80 | 0x0F,
290 kUpperCaseWeightMax
= 0x80 | 0x17,
291 kUpperToLowerDelta
= 0x80 | 0x0A, // 0x0A = 0x0F - 0x05
292 kMaskPrimarySecondary
= 0xFFFFFF00,
293 kMaskPrimaryOnly
= 0xFFFF0000,
294 kMaskSecondaryOnly
= 0x0000FF00,
295 kMaskCaseTertiary
= 0x000000FF // 2 hi bits case, 6 lo bits tertiary
298 static SInt32
__CompareSpecials(const UCollator
*collator
, CFOptionFlags options
, const UniChar
*text1Ptr
, UniCharCount text1Length
, const UniChar
*text2Ptr
, UniCharCount text2Length
) {
299 UErrorCode icuStatus
= U_ZERO_ERROR
;
300 SInt32 orderWidth
= 0;
301 SInt32 orderCompos
= 0;
303 UCollationElements
* collElems1
= ucol_openElements(collator
, (const UChar
*)text1Ptr
, text1Length
, &icuStatus
);
304 UCollationElements
* collElems2
= ucol_openElements(collator
, (const UChar
*)text2Ptr
, text2Length
, &icuStatus
);
305 if (U_SUCCESS(icuStatus
)) {
306 int32_t startOffset1
= 0;
307 int32_t startOffset2
= 0;
310 int32_t elemOrder1
, elemOrder2
;
311 int32_t offset1
, offset2
;
313 elemOrder1
= ucol_next(collElems1
, &icuStatus
);
314 elemOrder2
= ucol_next(collElems2
, &icuStatus
);
315 if ( U_FAILURE(icuStatus
) || elemOrder1
== (int32_t)UCOL_NULLORDER
|| elemOrder2
== (int32_t)UCOL_NULLORDER
) {
319 offset1
= ucol_getOffset(collElems1
);
320 offset2
= ucol_getOffset(collElems2
);
321 if ( (elemOrder1
& kMaskPrimarySecondary
) == (elemOrder2
& kMaskPrimarySecondary
) ) {
322 if ( (elemOrder1
& kMaskPrimaryOnly
) != 0 ) {
323 // keys may differ in case, width, circling, etc.
325 int32_t tertiary1
= (elemOrder1
& kMaskCaseTertiary
);
326 int32_t tertiary2
= (elemOrder2
& kMaskCaseTertiary
);
327 // fold upper to lower case
328 if (tertiary1
>= kUpperCaseWeightMin
&& tertiary1
<= kUpperCaseWeightMax
) {
329 tertiary1
-= kUpperToLowerDelta
;
331 if (tertiary2
>= kUpperCaseWeightMin
&& tertiary2
<= kUpperCaseWeightMax
) {
332 tertiary2
-= kUpperToLowerDelta
;
335 if (tertiary1
!= tertiary2
) {
336 orderWidth
= (tertiary1
< tertiary2
)? -1: 1;
340 } else if ( (elemOrder1
& kMaskSecondaryOnly
) != 0 ) {
341 // primary weights are both zero, but secondaries are not.
342 if ( orderCompos
== 0 && (options
& kCFCompareNonliteral
) == 0 ) {
343 // We have a code element which is a diacritic.
344 // It may have come from a composed char or a combining char.
345 // If it came from a combining char (longer element length) it sorts first.
346 // This is only an approximation to what the Mac OS 9 code did, but this is an
347 // unusual case anyway.
348 int32_t elem1Length
= offset1
- startOffset1
;
349 int32_t elem2Length
= offset2
- startOffset2
;
350 if (elem1Length
!= elem2Length
) {
351 orderCompos
= (elem1Length
> elem2Length
)? -1: 1;
357 startOffset1
= offset1
;
358 startOffset2
= offset2
;
360 ucol_closeElements(collElems1
);
361 ucol_closeElements(collElems2
);
364 return (orderWidth
!= 0)? orderWidth
: orderCompos
;
367 static SInt32
__CompareCodePoints(const UniChar
*text1Ptr
, UniCharCount text1Length
, const UniChar
*text2Ptr
, UniCharCount text2Length
) {
368 const UniChar
* text1P
= text1Ptr
;
369 const UniChar
* text2P
= text2Ptr
;
370 UInt32 textLimit
= (text1Length
<= text2Length
)? text1Length
: text2Length
;
372 SInt32 orderResult
= 0;
374 // Loop through either string...the first difference differentiates this.
375 for (textCounter
= 0; textCounter
< textLimit
&& *text1P
== *text2P
; textCounter
++) {
379 if (textCounter
< textLimit
) {
380 // code point difference
381 orderResult
= (*text1P
< *text2P
) ? -1 : 1;
382 } else if (text1Length
!= text2Length
) {
383 // one string has extra stuff at end
384 orderResult
= (text1Length
< text2Length
) ? -1 : 1;
390 extern const CFStringRef __kCFLocaleCollatorID
;
392 static UCollator
*__CFStringCreateCollator(CFLocaleRef compareLocale
) {
393 CFStringRef canonLocaleCFStr
= (CFStringRef
)CFLocaleGetValue(compareLocale
, __kCFLocaleCollatorID
);
394 char icuLocaleStr
[128] = {0};
395 CFStringGetCString(canonLocaleCFStr
, icuLocaleStr
, sizeof(icuLocaleStr
), kCFStringEncodingASCII
);
396 UErrorCode icuStatus
= U_ZERO_ERROR
;
397 UCollator
* collator
= ucol_open(icuLocaleStr
, &icuStatus
);
398 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &icuStatus
);
399 ucol_setAttribute(collator
, UCOL_ALTERNATE_HANDLING
, UCOL_NON_IGNORABLE
, &icuStatus
);
400 ucol_setAttribute(collator
, UCOL_STRENGTH
, UCOL_PRIMARY
, &icuStatus
);
401 ucol_setAttribute(collator
, UCOL_CASE_LEVEL
, UCOL_OFF
, &icuStatus
);
402 ucol_setAttribute(collator
, UCOL_NUMERIC_COLLATION
, UCOL_OFF
, &icuStatus
);
406 #define kCFMaxCachedDefaultCollators (8)
407 static UCollator
*__CFDefaultCollators
[kCFMaxCachedDefaultCollators
];
408 static CFIndex __CFDefaultCollatorsCount
= 0;
409 static const void *__CFDefaultCollatorLocale
= NULL
;
410 static CFLock_t __CFDefaultCollatorLock
= CFLockInit
;
412 static UCollator
*__CFStringCopyDefaultCollator(CFLocaleRef compareLocale
) {
413 CFLocaleRef currentLocale
= NULL
;
414 UCollator
* collator
= NULL
;
416 if (compareLocale
!= __CFDefaultCollatorLocale
) {
417 currentLocale
= CFLocaleCopyCurrent();
418 if (compareLocale
!= currentLocale
) {
419 CFRelease(currentLocale
);
424 __CFLock(&__CFDefaultCollatorLock
);
425 if ((NULL
!= currentLocale
) && (__CFDefaultCollatorLocale
!= currentLocale
)) {
426 while (__CFDefaultCollatorsCount
> 0) ucol_close(__CFDefaultCollators
[--__CFDefaultCollatorsCount
]);
427 __CFDefaultCollatorLocale
= CFRetain(currentLocale
);
430 if (__CFDefaultCollatorsCount
> 0) collator
= __CFDefaultCollators
[--__CFDefaultCollatorsCount
];
431 __CFUnlock(&__CFDefaultCollatorLock
);
433 if (NULL
== collator
) {
434 collator
= __CFStringCreateCollator(compareLocale
);
437 if (NULL
!= currentLocale
) CFRelease(currentLocale
);
442 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
443 static void __collatorFinalize(UCollator
*collator
) {
444 CFLocaleRef locale
= _CFGetTSD(__CFTSDKeyCollatorLocale
);
445 _CFSetTSD(__CFTSDKeyCollatorUCollator
, NULL
, NULL
);
446 _CFSetTSD(__CFTSDKeyCollatorLocale
, NULL
, NULL
);
447 __CFLock(&__CFDefaultCollatorLock
);
448 if ((__CFDefaultCollatorLocale
== locale
) && (__CFDefaultCollatorsCount
< kCFMaxCachedDefaultCollators
)) {
449 __CFDefaultCollators
[__CFDefaultCollatorsCount
++] = collator
;
452 __CFUnlock(&__CFDefaultCollatorLock
);
453 if (NULL
!= collator
) ucol_close(collator
);
454 if (locale
) CFRelease(locale
);
458 // -------------------------------------------------------------------------------------------------
459 // __CompareTextDefault
461 // A primary difference is denoted by values 2/-2 in orderP. Other differences are indicated with a -1/1.
462 // A negative value indicates that text1 sorts before text2.
463 // -------------------------------------------------------------------------------------------------
464 static OSStatus
__CompareTextDefault(UCollator
*collator
, CFOptionFlags options
, const UniChar
*text1Ptr
, UniCharCount text1Length
, const UniChar
*text2Ptr
, UniCharCount text2Length
, Boolean
*equivalentP
, SInt32
*orderP
) {
466 // collator must have default settings restored on exit from this function
471 if (options
& kCFCompareNumerically
) {
472 UErrorCode icuStatus
= U_ZERO_ERROR
;
473 ucol_setAttribute(collator
, UCOL_NUMERIC_COLLATION
, UCOL_ON
, &icuStatus
);
476 // Most string differences are Primary. Do a primary check first, then if there
477 // are no differences do a comparison with the options in the collator.
478 UCollationResult icuResult
= ucol_strcoll(collator
, (const UChar
*)text1Ptr
, text1Length
, (const UChar
*)text2Ptr
, text2Length
);
479 if (icuResult
!= UCOL_EQUAL
) {
480 *orderP
= (icuResult
== UCOL_LESS
) ? -2 : 2;
483 UErrorCode icuStatus
= U_ZERO_ERROR
;
484 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &icuStatus
);
485 ucol_setAttribute(collator
, UCOL_STRENGTH
, (options
& kCFCompareDiacriticInsensitive
) ? UCOL_PRIMARY
: UCOL_SECONDARY
, &icuStatus
);
486 ucol_setAttribute(collator
, UCOL_CASE_LEVEL
, (options
& kCFCompareCaseInsensitive
) ? UCOL_OFF
: UCOL_ON
, &icuStatus
);
487 if (!U_SUCCESS(icuStatus
)) {
488 icuStatus
= U_ZERO_ERROR
;
489 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &icuStatus
);
490 ucol_setAttribute(collator
, UCOL_STRENGTH
, UCOL_PRIMARY
, &icuStatus
);
491 ucol_setAttribute(collator
, UCOL_CASE_LEVEL
, UCOL_OFF
, &icuStatus
);
492 ucol_setAttribute(collator
, UCOL_NUMERIC_COLLATION
, UCOL_OFF
, &icuStatus
);
496 // We don't have a primary difference. Recompare with standard collator.
497 icuResult
= ucol_strcoll(collator
, (const UChar
*)text1Ptr
, text1Length
, (const UChar
*)text2Ptr
, text2Length
);
498 if (icuResult
!= UCOL_EQUAL
) {
499 *orderP
= (icuResult
== UCOL_LESS
) ? -1 : 1;
501 icuStatus
= U_ZERO_ERROR
;
502 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &icuStatus
);
503 ucol_setAttribute(collator
, UCOL_STRENGTH
, UCOL_PRIMARY
, &icuStatus
);
504 ucol_setAttribute(collator
, UCOL_CASE_LEVEL
, UCOL_OFF
, &icuStatus
);
506 if (*orderP
== 0 && (options
& kCFCompareNonliteral
) == 0) {
507 *orderP
= __CompareSpecials(collator
, options
, text1Ptr
, text1Length
, text2Ptr
, text2Length
);
510 *equivalentP
= (*orderP
== 0);
512 // If strings are equivalent but we care about order and have not yet checked
513 // to the level of code point order, then do some more checks for order
515 UErrorCode icuStatus
= U_ZERO_ERROR
;
516 // First try to see if ICU can find any differences above code point level
517 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &icuStatus
);
518 ucol_setAttribute(collator
, UCOL_STRENGTH
, UCOL_TERTIARY
, &icuStatus
);
519 ucol_setAttribute(collator
, UCOL_CASE_LEVEL
, UCOL_ON
, &icuStatus
);
520 if (!U_SUCCESS(icuStatus
)) {
521 icuStatus
= U_ZERO_ERROR
;
522 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &icuStatus
);
523 ucol_setAttribute(collator
, UCOL_STRENGTH
, UCOL_PRIMARY
, &icuStatus
);
524 ucol_setAttribute(collator
, UCOL_CASE_LEVEL
, UCOL_OFF
, &icuStatus
);
525 ucol_setAttribute(collator
, UCOL_NUMERIC_COLLATION
, UCOL_OFF
, &icuStatus
);
528 icuResult
= ucol_strcoll(collator
, (const UChar
*)text1Ptr
, text1Length
, (const UChar
*)text2Ptr
, text2Length
);
529 if (icuResult
!= UCOL_EQUAL
) {
530 *orderP
= (icuResult
== UCOL_LESS
) ? -1 : 1;
532 // no ICU differences above code point level, compare code points
533 *orderP
= __CompareCodePoints( text1Ptr
, text1Length
, text2Ptr
, text2Length
);
535 icuStatus
= U_ZERO_ERROR
;
536 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &icuStatus
);
537 ucol_setAttribute(collator
, UCOL_STRENGTH
, UCOL_PRIMARY
, &icuStatus
);
538 ucol_setAttribute(collator
, UCOL_CASE_LEVEL
, UCOL_OFF
, &icuStatus
);
541 if (options
& kCFCompareNumerically
) {
542 UErrorCode icuStatus
= U_ZERO_ERROR
;
543 ucol_setAttribute(collator
, UCOL_NUMERIC_COLLATION
, UCOL_OFF
, &icuStatus
);
548 #endif // DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
550 static inline CFIndex
__extendLocationBackward(CFIndex location
, CFStringInlineBuffer
*str
, const uint8_t *nonBaseBMP
, const uint8_t *punctBMP
) {
551 while (location
> 0) {
552 UTF32Char ch
= CFStringGetCharacterFromInlineBuffer(str
, location
);
554 if (CFUniCharIsSurrogateLowCharacter(ch
) && CFUniCharIsSurrogateHighCharacter((otherChar
= CFStringGetCharacterFromInlineBuffer(str
, location
- 1)))) {
555 ch
= CFUniCharGetLongCharacterForSurrogatePair(ch
, otherChar
);
556 uint8_t planeNo
= (ch
>> 16);
557 if ((planeNo
> 1) || (!CFUniCharIsMemberOfBitmap(ch
, CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet
, planeNo
)) && !CFUniCharIsMemberOfBitmap(ch
, CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet
, planeNo
)))) break;
560 if ((!CFUniCharIsMemberOfBitmap(ch
, nonBaseBMP
) && !CFUniCharIsMemberOfBitmap(ch
, punctBMP
)) || ((ch
>= 0x2E80) && (ch
< 0xAC00))) break;
568 static inline CFIndex
__extendLocationForward(CFIndex location
, CFStringInlineBuffer
*str
, const uint8_t *alnumBMP
, const uint8_t *punctBMP
, const uint8_t *controlBMP
, CFIndex strMax
) {
570 UTF32Char ch
= CFStringGetCharacterFromInlineBuffer(str
, location
);
572 if (CFUniCharIsSurrogateHighCharacter(ch
) && CFUniCharIsSurrogateLowCharacter((otherChar
= CFStringGetCharacterFromInlineBuffer(str
, location
+ 1)))) {
573 ch
= CFUniCharGetLongCharacterForSurrogatePair(ch
, otherChar
);
575 uint8_t planeNo
= (ch
>> 16);
576 if (!CFUniCharIsMemberOfBitmap(ch
, CFUniCharGetBitmapPtrForPlane(kCFUniCharAlphaNumericCharacterSet
, planeNo
)) && !CFUniCharIsMemberOfBitmap(ch
, CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet
, planeNo
)) && !CFUniCharIsMemberOfBitmap(ch
, CFUniCharGetBitmapPtrForPlane(kCFUniCharControlAndFormatterCharacterSet
, planeNo
))) break;
579 if ((!CFUniCharIsMemberOfBitmap(ch
, alnumBMP
) && !CFUniCharIsMemberOfBitmap(ch
, punctBMP
) && !CFUniCharIsMemberOfBitmap(ch
, controlBMP
)) || ((ch
>= 0x2E80) && (ch
< 0xAC00))) break;
581 } while (location
< strMax
);
585 CF_PRIVATE CFComparisonResult
_CFCompareStringsWithLocale(CFStringInlineBuffer
*str1
, CFRange str1Range
, CFStringInlineBuffer
*str2
, CFRange str2Range
, CFOptionFlags options
, const void *compareLocale
) {
586 const UniChar
*characters1
;
587 const UniChar
*characters2
;
588 CFComparisonResult compResult
= kCFCompareEqualTo
;
589 CFRange range1
= str1Range
;
590 CFRange range2
= str2Range
;
592 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
594 bool forcedOrdering
= ((options
& kCFCompareForcedOrdering
) ? true : false);
596 UCollator
*collator
= NULL
;
597 bool defaultCollator
= true;
599 static const uint8_t *alnumBMP
= NULL
;
600 static const uint8_t *nonBaseBMP
= NULL
;
601 static const uint8_t *punctBMP
= NULL
;
602 static const uint8_t *controlBMP
= NULL
;
604 if (NULL
== alnumBMP
) {
605 alnumBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharAlphaNumericCharacterSet
, 0);
606 nonBaseBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet
, 0);
607 punctBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet
, 0);
608 controlBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharControlAndFormatterCharacterSet
, 0);
611 // Determine the range of characters surrodiing the current index significant for localized comparison. The range is extended backward and forward as long as they are contextual. Contextual characters include all letters and punctuations. Since most control/format characters are ignorable in localized comparison, we also include them extending forward.
613 range1
.location
= str1Range
.location
;
614 range2
.location
= str2Range
.location
;
617 // The characters upto the current index are already determined to be equal by the CFString's standard character folding algorithm. Extend as long as truly contextual (all letters and punctuations).
618 if (range1
.location
> 0) {
619 range1
.location
= __extendLocationBackward(range1
.location
- 1, str1
, nonBaseBMP
, punctBMP
);
622 if (range2
.location
> 0) {
623 range2
.location
= __extendLocationBackward(range2
.location
- 1, str2
, nonBaseBMP
, punctBMP
);
626 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
627 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
628 // First we try to use the last one used on this thread, if the locale is the same,
629 // otherwise we try to check out a default one, or then we create one.
630 UCollator
*threadCollator
= _CFGetTSD(__CFTSDKeyCollatorUCollator
);
631 CFLocaleRef threadLocale
= _CFGetTSD(__CFTSDKeyCollatorLocale
);
632 if (compareLocale
== threadLocale
) {
633 collator
= threadCollator
;
636 collator
= __CFStringCopyDefaultCollator((CFLocaleRef
)compareLocale
);
637 defaultCollator
= true;
638 if (NULL
== collator
) {
639 collator
= __CFStringCreateCollator((CFLocaleRef
)compareLocale
);
640 defaultCollator
= false;
642 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
647 characters1
= CFStringGetCharactersPtrFromInlineBuffer(str1
, range1
);
648 characters2
= CFStringGetCharactersPtrFromInlineBuffer(str2
, range2
);
650 if ((NULL
!= characters1
) && (NULL
!= characters2
)) { // do fast
651 range1
.length
= (str1Range
.location
+ str1Range
.length
) - range1
.location
;
652 range2
.length
= (str2Range
.location
+ str2Range
.length
) - range2
.location
;
654 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
655 if ((NULL
!= collator
) && (__CompareTextDefault(collator
, options
, characters1
, range1
.length
, characters2
, range2
.length
, &isEqual
, &order
) == 0 /* noErr */)) {
656 compResult
= ((isEqual
&& !forcedOrdering
) ? kCFCompareEqualTo
: ((order
< 0) ? kCFCompareLessThan
: kCFCompareGreaterThan
));
660 compResult
= ((memcmp(characters1
, characters2
, sizeof(UniChar
) * range1
.length
) < 0) ? kCFCompareLessThan
: kCFCompareGreaterThan
);
663 UniChar
*buffer1
= NULL
;
664 UniChar
*buffer2
= NULL
;
665 UTF16Char sBuffer1
[kCFStringCompareAllocationIncrement
];
666 UTF16Char sBuffer2
[kCFStringCompareAllocationIncrement
];
667 CFIndex buffer1Len
= 0, buffer2Len
= 0;
668 CFIndex str1Max
= str1Range
.location
+ str1Range
.length
;
669 CFIndex str2Max
= str2Range
.location
+ str2Range
.length
;
672 // Extend forward and compare until the result is deterministic. The result is indeterministic if the differences are weak and can be resolved by character folding. For example, comparision between "abc" and "ABC" is considered to be indeterministic.
674 if (str1Range
.location
< str1Max
) {
675 str1Range
.location
= __extendLocationForward(str1Range
.location
, str1
, alnumBMP
, punctBMP
, controlBMP
, str1Max
);
676 range1
.length
= (str1Range
.location
- range1
.location
);
677 characters1
= CFStringGetCharactersPtrFromInlineBuffer(str1
, range1
);
679 if (NULL
== characters1
) {
680 if ((0 > buffer1Len
) || (range1
.length
> kCFStringCompareAllocationIncrement
)) {
681 if (buffer1Len
< range1
.length
) {
682 bufferSize
= range1
.length
+ (kCFStringCompareAllocationIncrement
- (range1
.length
% kCFStringCompareAllocationIncrement
));
683 if (0 == buffer1Len
) {
684 buffer1
= (UniChar
*)CFAllocatorAllocate(kCFAllocatorSystemDefault
, sizeof(UTF16Char
) * bufferSize
, 0);
685 } else if (buffer1Len
< range1
.length
) {
686 buffer1
= (UniChar
*)CFAllocatorReallocate(kCFAllocatorSystemDefault
, buffer1
, sizeof(UTF16Char
) * bufferSize
, 0);
688 buffer1Len
= bufferSize
;
694 CFStringGetCharactersFromInlineBuffer(str1
, range1
, buffer1
);
695 characters1
= buffer1
;
699 if (str2Range
.location
< str2Max
) {
700 str2Range
.location
= __extendLocationForward(str2Range
.location
, str2
, alnumBMP
, punctBMP
, controlBMP
, str2Max
);
701 range2
.length
= (str2Range
.location
- range2
.location
);
702 characters2
= CFStringGetCharactersPtrFromInlineBuffer(str2
, range2
);
704 if (NULL
== characters2
) {
705 if ((0 > buffer2Len
) || (range2
.length
> kCFStringCompareAllocationIncrement
)) {
706 if (buffer2Len
< range2
.length
) {
707 bufferSize
= range2
.length
+ (kCFStringCompareAllocationIncrement
- (range2
.length
% kCFStringCompareAllocationIncrement
));
708 if (0 == buffer2Len
) {
709 buffer2
= (UniChar
*)CFAllocatorAllocate(kCFAllocatorSystemDefault
, sizeof(UTF16Char
) * bufferSize
, 0);
710 } else if (buffer2Len
< range2
.length
) {
711 buffer2
= (UniChar
*)CFAllocatorReallocate(kCFAllocatorSystemDefault
, buffer2
, sizeof(UTF16Char
) * bufferSize
, 0);
713 buffer2Len
= bufferSize
;
719 CFStringGetCharactersFromInlineBuffer(str2
, range2
, buffer2
);
720 characters2
= buffer2
;
724 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
725 if ((NULL
!= collator
) && (__CompareTextDefault(collator
, options
, characters1
, range1
.length
, characters2
, range2
.length
, &isEqual
, &order
) == 0 /* noErr */)) {
727 if (forcedOrdering
&& (kCFCompareEqualTo
== compResult
) && (0 != order
)) compResult
= ((order
< 0) ? kCFCompareLessThan
: kCFCompareGreaterThan
);
733 order
= memcmp(characters1
, characters2
, sizeof(UTF16Char
) * ((range1
.length
< range2
.length
) ? range1
.length
: range2
.length
));
735 if (range1
.length
< range2
.length
) {
737 } else if (range2
.length
< range1
.length
) {
740 } else if (order
< 0) {
742 } else if (order
> 0) {
747 if ((order
< -1) || (order
> 1)) break; // the result is deterministic
750 range1
.location
= str1Range
.location
;
751 range2
.location
= str2Range
.location
;
753 } while ((str1Range
.location
< str1Max
) || (str2Range
.location
< str2Max
));
755 if (0 != order
) compResult
= ((order
< 0) ? kCFCompareLessThan
: kCFCompareGreaterThan
);
757 if (buffer1Len
> 0) CFAllocatorDeallocate(kCFAllocatorSystemDefault
, buffer1
);
758 if (buffer2Len
> 0) CFAllocatorDeallocate(kCFAllocatorSystemDefault
, buffer2
);
761 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
762 if (collator
== threadCollator
) {
763 // do nothing, already cached
765 if (threadLocale
) __collatorFinalize((UCollator
*)_CFGetTSD(__CFTSDKeyCollatorUCollator
)); // need to dealloc collators
767 _CFSetTSD(__CFTSDKeyCollatorUCollator
, collator
, (void *)__collatorFinalize
);
768 _CFSetTSD(__CFTSDKeyCollatorLocale
, (void *)CFRetain(compareLocale
), NULL
);