2 * Copyright (c) 2013 Apple Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
24 /* CFStringUtilities.c
25 Copyright (c) 1999-2013, Apple Inc. All rights reserved.
26 Responsibility: Aki Inoue
29 #include "CFInternal.h"
30 #include <CoreFoundation/CFStringEncodingConverterExt.h>
31 #include <CoreFoundation/CFUniChar.h>
32 #include <CoreFoundation/CFStringEncodingExt.h>
33 #include "CFStringEncodingDatabase.h"
34 #include "CFICUConverters.h"
37 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
38 #include <unicode/ucol.h>
39 #include <unicode/ucoleitr.h>
43 #if DEPLOYMENT_TARGET_WINDOWS
48 Boolean
CFStringIsEncodingAvailable(CFStringEncoding theEncoding
) {
49 switch (theEncoding
) {
50 case kCFStringEncodingASCII
: // Built-in encodings
51 case kCFStringEncodingMacRoman
:
52 case kCFStringEncodingUTF8
:
53 case kCFStringEncodingNonLossyASCII
:
54 case kCFStringEncodingWindowsLatin1
:
55 case kCFStringEncodingNextStepLatin
:
56 case kCFStringEncodingUTF16
:
57 case kCFStringEncodingUTF16BE
:
58 case kCFStringEncodingUTF16LE
:
59 case kCFStringEncodingUTF32
:
60 case kCFStringEncodingUTF32BE
:
61 case kCFStringEncodingUTF32LE
:
65 return CFStringEncodingIsValidEncoding(theEncoding
);
69 const CFStringEncoding
* CFStringGetListOfAvailableEncodings() {
70 return (const CFStringEncoding
*)CFStringEncodingListOfAvailableEncodings();
73 CFStringRef
CFStringGetNameOfEncoding(CFStringEncoding theEncoding
) {
74 static CFMutableDictionaryRef mappingTable
= NULL
;
75 CFStringRef theName
= mappingTable
? (CFStringRef
)CFDictionaryGetValue(mappingTable
, (const void*)(uintptr_t)theEncoding
) : NULL
;
78 const char *encodingName
= __CFStringEncodingGetName(theEncoding
);
81 theName
= CFStringCreateWithCString(kCFAllocatorSystemDefault
, encodingName
, kCFStringEncodingASCII
);
85 if (!mappingTable
) mappingTable
= CFDictionaryCreateMutable(kCFAllocatorSystemDefault
, 0, (const CFDictionaryKeyCallBacks
*)NULL
, &kCFTypeDictionaryValueCallBacks
);
87 CFDictionaryAddValue(mappingTable
, (const void*)(uintptr_t)theEncoding
, (const void*)theName
);
95 CFStringEncoding
CFStringConvertIANACharSetNameToEncoding(CFStringRef charsetName
) {
96 CFStringEncoding encoding
= kCFStringEncodingInvalidId
;
97 #define BUFFER_SIZE (100)
98 char buffer
[BUFFER_SIZE
];
99 const char *name
= CFStringGetCStringPtr(charsetName
, __CFStringGetEightBitStringEncoding());
102 if (false == CFStringGetCString(charsetName
, buffer
, BUFFER_SIZE
, __CFStringGetEightBitStringEncoding())) return kCFStringEncodingInvalidId
;
107 encoding
= __CFStringEncodingGetFromCanonicalName(name
);
109 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
110 if (kCFStringEncodingInvalidId
== encoding
) encoding
= __CFStringEncodingGetFromICUName(name
);
114 // handling Java name variant for MS codepages
115 if ((kCFStringEncodingInvalidId
== encoding
) && !strncasecmp(name
, "ms950", strlen("ms950"))) { // <rdar://problem/12903398> “MS950” is not recognized
116 encoding
= __CFStringEncodingGetFromCanonicalName("cp950");
122 CFStringRef
CFStringConvertEncodingToIANACharSetName(CFStringEncoding encoding
) {
123 CFStringRef name
= NULL
;
124 CFIndex value
= encoding
;
125 static CFMutableDictionaryRef mappingTable
= NULL
;
126 static CFSpinLock_t lock
= CFSpinLockInit
;
129 name
= ((NULL
== mappingTable
) ? NULL
: (CFStringRef
)CFDictionaryGetValue(mappingTable
, (const void*)value
));
132 #define STACK_BUFFER_SIZE (100)
133 char buffer
[STACK_BUFFER_SIZE
];
135 if (__CFStringEncodingGetCanonicalName(encoding
, buffer
, STACK_BUFFER_SIZE
)) name
= CFStringCreateWithCString(NULL
, buffer
, kCFStringEncodingASCII
);
139 CFIndex value
= encoding
;
141 if (NULL
== mappingTable
) mappingTable
= CFDictionaryCreateMutable(NULL
, 0, NULL
, &kCFTypeDictionaryValueCallBacks
);
143 CFDictionaryAddValue(mappingTable
, (const void*)value
, (const void*)name
);
147 __CFSpinUnlock(&lock
);
153 NSASCIIStringEncoding
= 1, /* 0..127 only */
154 NSNEXTSTEPStringEncoding
= 2,
155 NSJapaneseEUCStringEncoding
= 3,
156 NSUTF8StringEncoding
= 4,
157 NSISOLatin1StringEncoding
= 5,
158 NSSymbolStringEncoding
= 6,
159 NSNonLossyASCIIStringEncoding
= 7,
160 NSShiftJISStringEncoding
= 8,
161 NSISOLatin2StringEncoding
= 9,
162 NSUnicodeStringEncoding
= 10,
163 NSWindowsCP1251StringEncoding
= 11, /* Cyrillic; same as AdobeStandardCyrillic */
164 NSWindowsCP1252StringEncoding
= 12, /* WinLatin1 */
165 NSWindowsCP1253StringEncoding
= 13, /* Greek */
166 NSWindowsCP1254StringEncoding
= 14, /* Turkish */
167 NSWindowsCP1250StringEncoding
= 15, /* WinLatin2 */
168 NSISO2022JPStringEncoding
= 21, /* ISO 2022 Japanese encoding for e-mail */
169 NSMacOSRomanStringEncoding
= 30,
171 NSProprietaryStringEncoding
= 65536 /* Installation-specific encoding */
174 #define NSENCODING_MASK (1 << 31)
176 unsigned long CFStringConvertEncodingToNSStringEncoding(CFStringEncoding theEncoding
) {
177 switch (theEncoding
& 0xFFF) {
178 case kCFStringEncodingUnicode
:
179 if (theEncoding
== kCFStringEncodingUTF16
) return NSUnicodeStringEncoding
;
180 else if (theEncoding
== kCFStringEncodingUTF8
) return NSUTF8StringEncoding
;
183 case kCFStringEncodingWindowsLatin1
: return NSWindowsCP1252StringEncoding
;
184 case kCFStringEncodingMacRoman
: return NSMacOSRomanStringEncoding
;
186 case kCFStringEncodingASCII
: return NSASCIIStringEncoding
;
188 case kCFStringEncodingDOSJapanese
: return NSShiftJISStringEncoding
;
189 case kCFStringEncodingWindowsCyrillic
: return NSWindowsCP1251StringEncoding
;
190 case kCFStringEncodingWindowsGreek
: return NSWindowsCP1253StringEncoding
;
191 case kCFStringEncodingWindowsLatin5
: return NSWindowsCP1254StringEncoding
;
192 case kCFStringEncodingWindowsLatin2
: return NSWindowsCP1250StringEncoding
;
193 case kCFStringEncodingISOLatin1
: return NSISOLatin1StringEncoding
;
195 case kCFStringEncodingNonLossyASCII
: return NSNonLossyASCIIStringEncoding
;
196 case kCFStringEncodingEUC_JP
: return NSJapaneseEUCStringEncoding
;
197 case kCFStringEncodingMacSymbol
: return NSSymbolStringEncoding
;
198 case kCFStringEncodingISOLatin2
: return NSISOLatin2StringEncoding
;
199 case kCFStringEncodingISO_2022_JP
: return NSISO2022JPStringEncoding
;
200 case kCFStringEncodingNextStepLatin
: return NSNEXTSTEPStringEncoding
;
203 return NSENCODING_MASK
| theEncoding
;
206 CFStringEncoding
CFStringConvertNSStringEncodingToEncoding(unsigned long theEncoding
) {
207 const uint16_t encodings
[] = {
208 kCFStringEncodingASCII
,
209 kCFStringEncodingNextStepLatin
,
210 kCFStringEncodingEUC_JP
,
212 kCFStringEncodingISOLatin1
,
213 kCFStringEncodingMacSymbol
,
214 kCFStringEncodingNonLossyASCII
,
215 kCFStringEncodingDOSJapanese
,
216 kCFStringEncodingISOLatin2
,
217 kCFStringEncodingUTF16
,
218 kCFStringEncodingWindowsCyrillic
,
219 kCFStringEncodingWindowsLatin1
,
220 kCFStringEncodingWindowsGreek
,
221 kCFStringEncodingWindowsLatin5
,
222 kCFStringEncodingWindowsLatin2
225 if (NSUTF8StringEncoding
== theEncoding
) return kCFStringEncodingUTF8
;
227 if ((theEncoding
> 0) && (theEncoding
<= NSWindowsCP1250StringEncoding
)) return encodings
[theEncoding
- 1];
229 switch (theEncoding
) {
230 case NSMacOSRomanStringEncoding
: return kCFStringEncodingMacRoman
;
231 case NSISO2022JPStringEncoding
: return kCFStringEncodingISO_2022_JP
;
234 return ((theEncoding
& NSENCODING_MASK
) ? theEncoding
& ~NSENCODING_MASK
: kCFStringEncodingInvalidId
);
238 UInt32
CFStringConvertEncodingToWindowsCodepage(CFStringEncoding theEncoding
) {
239 uint16_t codepage
= __CFStringEncodingGetWindowsCodePage(theEncoding
);
241 return ((0 == codepage
) ? kCFStringEncodingInvalidId
: codepage
);
244 CFStringEncoding
CFStringConvertWindowsCodepageToEncoding(UInt32 theEncoding
) {
245 return __CFStringEncodingGetFromWindowsCodePage(theEncoding
);
248 CFStringEncoding
CFStringGetMostCompatibleMacStringEncoding(CFStringEncoding encoding
) {
249 CFStringEncoding macEncoding
= __CFStringEncodingGetMostCompatibleMacScript(encoding
);
255 #define kCFStringCompareAllocationIncrement (128)
257 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
259 // -------------------------------------------------------------------------------------------------
260 // CompareSpecials - ignore case & diacritic differences
262 // Decomposed have 2nd-4th chars of type Mn or Mc, or in range 1160-11FF (jamo)
263 // Fullwidth & halfwidth are in range FF00-FFEF
264 // Parenthesized & circled are in range 3200-32FF
265 // -------------------------------------------------------------------------------------------------
268 kUpperCaseWeightMin
= 0x80 | 0x0F,
269 kUpperCaseWeightMax
= 0x80 | 0x17,
270 kUpperToLowerDelta
= 0x80 | 0x0A, // 0x0A = 0x0F - 0x05
271 kMaskPrimarySecondary
= 0xFFFFFF00,
272 kMaskPrimaryOnly
= 0xFFFF0000,
273 kMaskSecondaryOnly
= 0x0000FF00,
274 kMaskCaseTertiary
= 0x000000FF // 2 hi bits case, 6 lo bits tertiary
277 static SInt32
__CompareSpecials(const UCollator
*collator
, CFOptionFlags options
, const UniChar
*text1Ptr
, UniCharCount text1Length
, const UniChar
*text2Ptr
, UniCharCount text2Length
) {
278 UErrorCode icuStatus
= U_ZERO_ERROR
;
279 SInt32 orderWidth
= 0;
280 SInt32 orderCompos
= 0;
282 UCollationElements
* collElems1
= ucol_openElements(collator
, (const UChar
*)text1Ptr
, text1Length
, &icuStatus
);
283 UCollationElements
* collElems2
= ucol_openElements(collator
, (const UChar
*)text2Ptr
, text2Length
, &icuStatus
);
284 if (U_SUCCESS(icuStatus
)) {
285 int32_t startOffset1
= 0;
286 int32_t startOffset2
= 0;
289 int32_t elemOrder1
, elemOrder2
;
290 int32_t offset1
, offset2
;
292 elemOrder1
= ucol_next(collElems1
, &icuStatus
);
293 elemOrder2
= ucol_next(collElems2
, &icuStatus
);
294 if ( U_FAILURE(icuStatus
) || elemOrder1
== (int32_t)UCOL_NULLORDER
|| elemOrder2
== (int32_t)UCOL_NULLORDER
) {
298 offset1
= ucol_getOffset(collElems1
);
299 offset2
= ucol_getOffset(collElems2
);
300 if ( (elemOrder1
& kMaskPrimarySecondary
) == (elemOrder2
& kMaskPrimarySecondary
) ) {
301 if ( (elemOrder1
& kMaskPrimaryOnly
) != 0 ) {
302 // keys may differ in case, width, circling, etc.
304 int32_t tertiary1
= (elemOrder1
& kMaskCaseTertiary
);
305 int32_t tertiary2
= (elemOrder2
& kMaskCaseTertiary
);
306 // fold upper to lower case
307 if (tertiary1
>= kUpperCaseWeightMin
&& tertiary1
<= kUpperCaseWeightMax
) {
308 tertiary1
-= kUpperToLowerDelta
;
310 if (tertiary2
>= kUpperCaseWeightMin
&& tertiary2
<= kUpperCaseWeightMax
) {
311 tertiary2
-= kUpperToLowerDelta
;
314 if (tertiary1
!= tertiary2
) {
315 orderWidth
= (tertiary1
< tertiary2
)? -1: 1;
319 } else if ( (elemOrder1
& kMaskSecondaryOnly
) != 0 ) {
320 // primary weights are both zero, but secondaries are not.
321 if ( orderCompos
== 0 && (options
& kCFCompareNonliteral
) == 0 ) {
322 // We have a code element which is a diacritic.
323 // It may have come from a composed char or a combining char.
324 // If it came from a combining char (longer element length) it sorts first.
325 // This is only an approximation to what the Mac OS 9 code did, but this is an
326 // unusual case anyway.
327 int32_t elem1Length
= offset1
- startOffset1
;
328 int32_t elem2Length
= offset2
- startOffset2
;
329 if (elem1Length
!= elem2Length
) {
330 orderCompos
= (elem1Length
> elem2Length
)? -1: 1;
336 startOffset1
= offset1
;
337 startOffset2
= offset2
;
339 ucol_closeElements(collElems1
);
340 ucol_closeElements(collElems2
);
343 return (orderWidth
!= 0)? orderWidth
: orderCompos
;
346 static SInt32
__CompareCodePoints(const UniChar
*text1Ptr
, UniCharCount text1Length
, const UniChar
*text2Ptr
, UniCharCount text2Length
) {
347 const UniChar
* text1P
= text1Ptr
;
348 const UniChar
* text2P
= text2Ptr
;
349 UInt32 textLimit
= (text1Length
<= text2Length
)? text1Length
: text2Length
;
351 SInt32 orderResult
= 0;
353 // Loop through either string...the first difference differentiates this.
354 for (textCounter
= 0; textCounter
< textLimit
&& *text1P
== *text2P
; textCounter
++) {
358 if (textCounter
< textLimit
) {
359 // code point difference
360 orderResult
= (*text1P
< *text2P
) ? -1 : 1;
361 } else if (text1Length
!= text2Length
) {
362 // one string has extra stuff at end
363 orderResult
= (text1Length
< text2Length
) ? -1 : 1;
369 extern const CFStringRef __kCFLocaleCollatorID
;
371 static UCollator
*__CFStringCreateCollator(CFLocaleRef compareLocale
) {
372 CFStringRef canonLocaleCFStr
= (CFStringRef
)CFLocaleGetValue(compareLocale
, __kCFLocaleCollatorID
);
373 char icuLocaleStr
[128] = {0};
374 CFStringGetCString(canonLocaleCFStr
, icuLocaleStr
, sizeof(icuLocaleStr
), kCFStringEncodingASCII
);
375 UErrorCode icuStatus
= U_ZERO_ERROR
;
376 UCollator
* collator
= ucol_open(icuLocaleStr
, &icuStatus
);
377 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &icuStatus
);
378 ucol_setAttribute(collator
, UCOL_ALTERNATE_HANDLING
, UCOL_NON_IGNORABLE
, &icuStatus
);
379 ucol_setAttribute(collator
, UCOL_STRENGTH
, UCOL_PRIMARY
, &icuStatus
);
380 ucol_setAttribute(collator
, UCOL_CASE_LEVEL
, UCOL_OFF
, &icuStatus
);
381 ucol_setAttribute(collator
, UCOL_NUMERIC_COLLATION
, UCOL_OFF
, &icuStatus
);
385 #define kCFMaxCachedDefaultCollators (8)
386 static UCollator
*__CFDefaultCollators
[kCFMaxCachedDefaultCollators
];
387 static CFIndex __CFDefaultCollatorsCount
= 0;
388 static const void *__CFDefaultCollatorLocale
= NULL
;
389 static CFSpinLock_t __CFDefaultCollatorLock
= CFSpinLockInit
;
391 static UCollator
*__CFStringCopyDefaultCollator(CFLocaleRef compareLocale
) {
392 CFLocaleRef currentLocale
= NULL
;
393 UCollator
* collator
= NULL
;
395 if (compareLocale
!= __CFDefaultCollatorLocale
) {
396 currentLocale
= CFLocaleCopyCurrent();
397 if (compareLocale
!= currentLocale
) {
398 CFRelease(currentLocale
);
403 __CFSpinLock(&__CFDefaultCollatorLock
);
404 if ((NULL
!= currentLocale
) && (__CFDefaultCollatorLocale
!= currentLocale
)) {
405 while (__CFDefaultCollatorsCount
> 0) ucol_close(__CFDefaultCollators
[--__CFDefaultCollatorsCount
]);
406 __CFDefaultCollatorLocale
= CFRetain(currentLocale
);
409 if (__CFDefaultCollatorsCount
> 0) collator
= __CFDefaultCollators
[--__CFDefaultCollatorsCount
];
410 __CFSpinUnlock(&__CFDefaultCollatorLock
);
412 if (NULL
== collator
) {
413 collator
= __CFStringCreateCollator(compareLocale
);
416 if (NULL
!= currentLocale
) CFRelease(currentLocale
);
421 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
422 static void __collatorFinalize(UCollator
*collator
) {
423 CFLocaleRef locale
= _CFGetTSD(__CFTSDKeyCollatorLocale
);
424 _CFSetTSD(__CFTSDKeyCollatorUCollator
, NULL
, NULL
);
425 _CFSetTSD(__CFTSDKeyCollatorLocale
, NULL
, NULL
);
426 __CFSpinLock(&__CFDefaultCollatorLock
);
427 if ((__CFDefaultCollatorLocale
== locale
) && (__CFDefaultCollatorsCount
< kCFMaxCachedDefaultCollators
)) {
428 __CFDefaultCollators
[__CFDefaultCollatorsCount
++] = collator
;
431 __CFSpinUnlock(&__CFDefaultCollatorLock
);
432 if (NULL
!= collator
) ucol_close(collator
);
433 if (locale
) CFRelease(locale
);
437 // -------------------------------------------------------------------------------------------------
438 // __CompareTextDefault
440 // A primary difference is denoted by values 2/-2 in orderP. Other differences are indicated with a -1/1.
441 // A negative value indicates that text1 sorts before text2.
442 // -------------------------------------------------------------------------------------------------
443 static OSStatus
__CompareTextDefault(UCollator
*collator
, CFOptionFlags options
, const UniChar
*text1Ptr
, UniCharCount text1Length
, const UniChar
*text2Ptr
, UniCharCount text2Length
, Boolean
*equivalentP
, SInt32
*orderP
) {
445 // collator must have default settings restored on exit from this function
450 if (options
& kCFCompareNumerically
) {
451 UErrorCode icuStatus
= U_ZERO_ERROR
;
452 ucol_setAttribute(collator
, UCOL_NUMERIC_COLLATION
, UCOL_ON
, &icuStatus
);
455 // Most string differences are Primary. Do a primary check first, then if there
456 // are no differences do a comparison with the options in the collator.
457 UCollationResult icuResult
= ucol_strcoll(collator
, (const UChar
*)text1Ptr
, text1Length
, (const UChar
*)text2Ptr
, text2Length
);
458 if (icuResult
!= UCOL_EQUAL
) {
459 *orderP
= (icuResult
== UCOL_LESS
) ? -2 : 2;
462 UErrorCode icuStatus
= U_ZERO_ERROR
;
463 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &icuStatus
);
464 ucol_setAttribute(collator
, UCOL_STRENGTH
, (options
& kCFCompareDiacriticInsensitive
) ? UCOL_PRIMARY
: UCOL_SECONDARY
, &icuStatus
);
465 ucol_setAttribute(collator
, UCOL_CASE_LEVEL
, (options
& kCFCompareCaseInsensitive
) ? UCOL_OFF
: UCOL_ON
, &icuStatus
);
466 if (!U_SUCCESS(icuStatus
)) {
467 icuStatus
= U_ZERO_ERROR
;
468 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &icuStatus
);
469 ucol_setAttribute(collator
, UCOL_STRENGTH
, UCOL_PRIMARY
, &icuStatus
);
470 ucol_setAttribute(collator
, UCOL_CASE_LEVEL
, UCOL_OFF
, &icuStatus
);
471 ucol_setAttribute(collator
, UCOL_NUMERIC_COLLATION
, UCOL_OFF
, &icuStatus
);
475 // We don't have a primary difference. Recompare with standard collator.
476 icuResult
= ucol_strcoll(collator
, (const UChar
*)text1Ptr
, text1Length
, (const UChar
*)text2Ptr
, text2Length
);
477 if (icuResult
!= UCOL_EQUAL
) {
478 *orderP
= (icuResult
== UCOL_LESS
) ? -1 : 1;
480 icuStatus
= U_ZERO_ERROR
;
481 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &icuStatus
);
482 ucol_setAttribute(collator
, UCOL_STRENGTH
, UCOL_PRIMARY
, &icuStatus
);
483 ucol_setAttribute(collator
, UCOL_CASE_LEVEL
, UCOL_OFF
, &icuStatus
);
485 if (*orderP
== 0 && (options
& kCFCompareNonliteral
) == 0) {
486 *orderP
= __CompareSpecials(collator
, options
, text1Ptr
, text1Length
, text2Ptr
, text2Length
);
489 *equivalentP
= (*orderP
== 0);
491 // If strings are equivalent but we care about order and have not yet checked
492 // to the level of code point order, then do some more checks for order
494 UErrorCode icuStatus
= U_ZERO_ERROR
;
495 // First try to see if ICU can find any differences above code point level
496 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &icuStatus
);
497 ucol_setAttribute(collator
, UCOL_STRENGTH
, UCOL_TERTIARY
, &icuStatus
);
498 ucol_setAttribute(collator
, UCOL_CASE_LEVEL
, UCOL_ON
, &icuStatus
);
499 if (!U_SUCCESS(icuStatus
)) {
500 icuStatus
= U_ZERO_ERROR
;
501 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &icuStatus
);
502 ucol_setAttribute(collator
, UCOL_STRENGTH
, UCOL_PRIMARY
, &icuStatus
);
503 ucol_setAttribute(collator
, UCOL_CASE_LEVEL
, UCOL_OFF
, &icuStatus
);
504 ucol_setAttribute(collator
, UCOL_NUMERIC_COLLATION
, UCOL_OFF
, &icuStatus
);
507 icuResult
= ucol_strcoll(collator
, (const UChar
*)text1Ptr
, text1Length
, (const UChar
*)text2Ptr
, text2Length
);
508 if (icuResult
!= UCOL_EQUAL
) {
509 *orderP
= (icuResult
== UCOL_LESS
) ? -1 : 1;
511 // no ICU differences above code point level, compare code points
512 *orderP
= __CompareCodePoints( text1Ptr
, text1Length
, text2Ptr
, text2Length
);
514 icuStatus
= U_ZERO_ERROR
;
515 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &icuStatus
);
516 ucol_setAttribute(collator
, UCOL_STRENGTH
, UCOL_PRIMARY
, &icuStatus
);
517 ucol_setAttribute(collator
, UCOL_CASE_LEVEL
, UCOL_OFF
, &icuStatus
);
520 if (options
& kCFCompareNumerically
) {
521 UErrorCode icuStatus
= U_ZERO_ERROR
;
522 ucol_setAttribute(collator
, UCOL_NUMERIC_COLLATION
, UCOL_OFF
, &icuStatus
);
527 #endif // DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
529 static inline CFIndex
__extendLocationBackward(CFIndex location
, CFStringInlineBuffer
*str
, const uint8_t *nonBaseBMP
, const uint8_t *punctBMP
) {
530 while (location
> 0) {
531 UTF32Char ch
= CFStringGetCharacterFromInlineBuffer(str
, location
);
533 if (CFUniCharIsSurrogateLowCharacter(ch
) && CFUniCharIsSurrogateHighCharacter((otherChar
= CFStringGetCharacterFromInlineBuffer(str
, location
- 1)))) {
534 ch
= CFUniCharGetLongCharacterForSurrogatePair(ch
, otherChar
);
535 uint8_t planeNo
= (ch
>> 16);
536 if ((planeNo
> 1) || (!CFUniCharIsMemberOfBitmap(ch
, CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet
, planeNo
)) && !CFUniCharIsMemberOfBitmap(ch
, CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet
, planeNo
)))) break;
539 if ((!CFUniCharIsMemberOfBitmap(ch
, nonBaseBMP
) && !CFUniCharIsMemberOfBitmap(ch
, punctBMP
)) || ((ch
>= 0x2E80) && (ch
< 0xAC00))) break;
547 static inline CFIndex
__extendLocationForward(CFIndex location
, CFStringInlineBuffer
*str
, const uint8_t *alnumBMP
, const uint8_t *punctBMP
, const uint8_t *controlBMP
, CFIndex strMax
) {
549 UTF32Char ch
= CFStringGetCharacterFromInlineBuffer(str
, location
);
551 if (CFUniCharIsSurrogateHighCharacter(ch
) && CFUniCharIsSurrogateLowCharacter((otherChar
= CFStringGetCharacterFromInlineBuffer(str
, location
+ 1)))) {
552 ch
= CFUniCharGetLongCharacterForSurrogatePair(ch
, otherChar
);
554 uint8_t planeNo
= (ch
>> 16);
555 if (!CFUniCharIsMemberOfBitmap(ch
, CFUniCharGetBitmapPtrForPlane(kCFUniCharAlphaNumericCharacterSet
, planeNo
)) && !CFUniCharIsMemberOfBitmap(ch
, CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet
, planeNo
)) && !CFUniCharIsMemberOfBitmap(ch
, CFUniCharGetBitmapPtrForPlane(kCFUniCharControlAndFormatterCharacterSet
, planeNo
))) break;
558 if ((!CFUniCharIsMemberOfBitmap(ch
, alnumBMP
) && !CFUniCharIsMemberOfBitmap(ch
, punctBMP
) && !CFUniCharIsMemberOfBitmap(ch
, controlBMP
)) || ((ch
>= 0x2E80) && (ch
< 0xAC00))) break;
560 } while (location
< strMax
);
564 CF_PRIVATE CFComparisonResult
_CFCompareStringsWithLocale(CFStringInlineBuffer
*str1
, CFRange str1Range
, CFStringInlineBuffer
*str2
, CFRange str2Range
, CFOptionFlags options
, const void *compareLocale
) {
565 const UniChar
*characters1
;
566 const UniChar
*characters2
;
567 CFComparisonResult compResult
= kCFCompareEqualTo
;
568 CFRange range1
= str1Range
;
569 CFRange range2
= str2Range
;
571 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
573 bool forcedOrdering
= ((options
& kCFCompareForcedOrdering
) ? true : false);
575 UCollator
*collator
= NULL
;
576 bool defaultCollator
= true;
578 static const uint8_t *alnumBMP
= NULL
;
579 static const uint8_t *nonBaseBMP
= NULL
;
580 static const uint8_t *punctBMP
= NULL
;
581 static const uint8_t *controlBMP
= NULL
;
583 if (NULL
== alnumBMP
) {
584 alnumBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharAlphaNumericCharacterSet
, 0);
585 nonBaseBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet
, 0);
586 punctBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet
, 0);
587 controlBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharControlAndFormatterCharacterSet
, 0);
590 // Determine the range of characters surrodiing the current index significant for localized comparison. The range is extended backward and forward as long as they are contextual. Contextual characters include all letters and punctuations. Since most control/format characters are ignorable in localized comparison, we also include them extending forward.
592 range1
.location
= str1Range
.location
;
593 range2
.location
= str2Range
.location
;
596 // The characters upto the current index are already determined to be equal by the CFString's standard character folding algorithm. Extend as long as truly contextual (all letters and punctuations).
597 if (range1
.location
> 0) {
598 range1
.location
= __extendLocationBackward(range1
.location
- 1, str1
, nonBaseBMP
, punctBMP
);
601 if (range2
.location
> 0) {
602 range2
.location
= __extendLocationBackward(range2
.location
- 1, str2
, nonBaseBMP
, punctBMP
);
605 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
606 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
607 // First we try to use the last one used on this thread, if the locale is the same,
608 // otherwise we try to check out a default one, or then we create one.
609 UCollator
*threadCollator
= _CFGetTSD(__CFTSDKeyCollatorUCollator
);
610 CFLocaleRef threadLocale
= _CFGetTSD(__CFTSDKeyCollatorLocale
);
611 if (compareLocale
== threadLocale
) {
612 collator
= threadCollator
;
615 collator
= __CFStringCopyDefaultCollator((CFLocaleRef
)compareLocale
);
616 defaultCollator
= true;
617 if (NULL
== collator
) {
618 collator
= __CFStringCreateCollator((CFLocaleRef
)compareLocale
);
619 defaultCollator
= false;
621 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
626 characters1
= CFStringGetCharactersPtrFromInlineBuffer(str1
, range1
);
627 characters2
= CFStringGetCharactersPtrFromInlineBuffer(str2
, range2
);
629 if ((NULL
!= characters1
) && (NULL
!= characters2
)) { // do fast
630 range1
.length
= (str1Range
.location
+ str1Range
.length
) - range1
.location
;
631 range2
.length
= (str2Range
.location
+ str2Range
.length
) - range2
.location
;
633 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
634 if ((NULL
!= collator
) && (__CompareTextDefault(collator
, options
, characters1
, range1
.length
, characters2
, range2
.length
, &isEqual
, &order
) == 0 /* noErr */)) {
635 compResult
= ((isEqual
&& !forcedOrdering
) ? kCFCompareEqualTo
: ((order
< 0) ? kCFCompareLessThan
: kCFCompareGreaterThan
));
639 compResult
= ((memcmp(characters1
, characters2
, sizeof(UniChar
) * range1
.length
) < 0) ? kCFCompareLessThan
: kCFCompareGreaterThan
);
642 UniChar
*buffer1
= NULL
;
643 UniChar
*buffer2
= NULL
;
644 UTF16Char sBuffer1
[kCFStringCompareAllocationIncrement
];
645 UTF16Char sBuffer2
[kCFStringCompareAllocationIncrement
];
646 CFIndex buffer1Len
= 0, buffer2Len
= 0;
647 CFIndex str1Max
= str1Range
.location
+ str1Range
.length
;
648 CFIndex str2Max
= str2Range
.location
+ str2Range
.length
;
651 // Extend forward and compare until the result is deterministic. The result is indeterministic if the differences are weak and can be resolved by character folding. For example, comparision between "abc" and "ABC" is considered to be indeterministic.
653 if (str1Range
.location
< str1Max
) {
654 str1Range
.location
= __extendLocationForward(str1Range
.location
, str1
, alnumBMP
, punctBMP
, controlBMP
, str1Max
);
655 range1
.length
= (str1Range
.location
- range1
.location
);
656 characters1
= CFStringGetCharactersPtrFromInlineBuffer(str1
, range1
);
658 if (NULL
== characters1
) {
659 if ((0 > buffer1Len
) || (range1
.length
> kCFStringCompareAllocationIncrement
)) {
660 if (buffer1Len
< range1
.length
) {
661 bufferSize
= range1
.length
+ (kCFStringCompareAllocationIncrement
- (range1
.length
% kCFStringCompareAllocationIncrement
));
662 if (0 == buffer1Len
) {
663 buffer1
= (UniChar
*)CFAllocatorAllocate(kCFAllocatorSystemDefault
, sizeof(UTF16Char
) * bufferSize
, 0);
664 } else if (buffer1Len
< range1
.length
) {
665 buffer1
= (UniChar
*)CFAllocatorReallocate(kCFAllocatorSystemDefault
, buffer1
, sizeof(UTF16Char
) * bufferSize
, 0);
667 buffer1Len
= bufferSize
;
673 CFStringGetCharactersFromInlineBuffer(str1
, range1
, buffer1
);
674 characters1
= buffer1
;
678 if (str2Range
.location
< str2Max
) {
679 str2Range
.location
= __extendLocationForward(str2Range
.location
, str2
, alnumBMP
, punctBMP
, controlBMP
, str2Max
);
680 range2
.length
= (str2Range
.location
- range2
.location
);
681 characters2
= CFStringGetCharactersPtrFromInlineBuffer(str2
, range2
);
683 if (NULL
== characters2
) {
684 if ((0 > buffer2Len
) || (range2
.length
> kCFStringCompareAllocationIncrement
)) {
685 if (buffer2Len
< range2
.length
) {
686 bufferSize
= range2
.length
+ (kCFStringCompareAllocationIncrement
- (range2
.length
% kCFStringCompareAllocationIncrement
));
687 if (0 == buffer2Len
) {
688 buffer2
= (UniChar
*)CFAllocatorAllocate(kCFAllocatorSystemDefault
, sizeof(UTF16Char
) * bufferSize
, 0);
689 } else if (buffer2Len
< range2
.length
) {
690 buffer2
= (UniChar
*)CFAllocatorReallocate(kCFAllocatorSystemDefault
, buffer2
, sizeof(UTF16Char
) * bufferSize
, 0);
692 buffer2Len
= bufferSize
;
698 CFStringGetCharactersFromInlineBuffer(str2
, range2
, buffer2
);
699 characters2
= buffer2
;
703 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
704 if ((NULL
!= collator
) && (__CompareTextDefault(collator
, options
, characters1
, range1
.length
, characters2
, range2
.length
, &isEqual
, &order
) == 0 /* noErr */)) {
706 if (forcedOrdering
&& (kCFCompareEqualTo
== compResult
) && (0 != order
)) compResult
= ((order
< 0) ? kCFCompareLessThan
: kCFCompareGreaterThan
);
712 order
= memcmp(characters1
, characters2
, sizeof(UTF16Char
) * ((range1
.length
< range2
.length
) ? range1
.length
: range2
.length
));
714 if (range1
.length
< range2
.length
) {
716 } else if (range2
.length
< range1
.length
) {
719 } else if (order
< 0) {
721 } else if (order
> 0) {
726 if ((order
< -1) || (order
> 1)) break; // the result is deterministic
729 range1
.location
= str1Range
.location
;
730 range2
.location
= str2Range
.location
;
732 } while ((str1Range
.location
< str1Max
) || (str2Range
.location
< str2Max
));
734 if (0 != order
) compResult
= ((order
< 0) ? kCFCompareLessThan
: kCFCompareGreaterThan
);
736 if (buffer1Len
> 0) CFAllocatorDeallocate(kCFAllocatorSystemDefault
, buffer1
);
737 if (buffer2Len
> 0) CFAllocatorDeallocate(kCFAllocatorSystemDefault
, buffer2
);
740 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
741 if (collator
== threadCollator
) {
742 // do nothing, already cached
744 if (threadLocale
) __collatorFinalize((UCollator
*)_CFGetTSD(__CFTSDKeyCollatorUCollator
)); // need to dealloc collators
746 _CFSetTSD(__CFTSDKeyCollatorUCollator
, collator
, (void *)__collatorFinalize
);
747 _CFSetTSD(__CFTSDKeyCollatorLocale
, (void *)CFRetain(compareLocale
), NULL
);