2 * Copyright (c) 2012 Apple Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
24 /* CFStringUtilities.c
25 Copyright (c) 1999-2012, Apple Inc. All rights reserved.
26 Responsibility: Aki Inoue
29 #include "CFInternal.h"
30 #include <CoreFoundation/CFStringEncodingConverterExt.h>
31 #include <CoreFoundation/CFUniChar.h>
32 #include <CoreFoundation/CFStringEncodingExt.h>
33 #include "CFStringEncodingDatabase.h"
34 #include "CFICUConverters.h"
37 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
38 #include <unicode/ucol.h>
39 #include <unicode/ucoleitr.h>
43 #if DEPLOYMENT_TARGET_WINDOWS
48 Boolean
CFStringIsEncodingAvailable(CFStringEncoding theEncoding
) {
49 switch (theEncoding
) {
50 case kCFStringEncodingASCII
: // Built-in encodings
51 case kCFStringEncodingMacRoman
:
52 case kCFStringEncodingUTF8
:
53 case kCFStringEncodingNonLossyASCII
:
54 case kCFStringEncodingWindowsLatin1
:
55 case kCFStringEncodingNextStepLatin
:
56 case kCFStringEncodingUTF16
:
57 case kCFStringEncodingUTF16BE
:
58 case kCFStringEncodingUTF16LE
:
59 case kCFStringEncodingUTF32
:
60 case kCFStringEncodingUTF32BE
:
61 case kCFStringEncodingUTF32LE
:
65 return CFStringEncodingIsValidEncoding(theEncoding
);
69 const CFStringEncoding
* CFStringGetListOfAvailableEncodings() {
70 return (const CFStringEncoding
*)CFStringEncodingListOfAvailableEncodings();
73 CFStringRef
CFStringGetNameOfEncoding(CFStringEncoding theEncoding
) {
74 static CFMutableDictionaryRef mappingTable
= NULL
;
75 CFStringRef theName
= mappingTable
? (CFStringRef
)CFDictionaryGetValue(mappingTable
, (const void*)(uintptr_t)theEncoding
) : NULL
;
78 const char *encodingName
= __CFStringEncodingGetName(theEncoding
);
81 theName
= CFStringCreateWithCString(kCFAllocatorSystemDefault
, encodingName
, kCFStringEncodingASCII
);
85 if (!mappingTable
) mappingTable
= CFDictionaryCreateMutable(kCFAllocatorSystemDefault
, 0, (const CFDictionaryKeyCallBacks
*)NULL
, &kCFTypeDictionaryValueCallBacks
);
87 CFDictionaryAddValue(mappingTable
, (const void*)(uintptr_t)theEncoding
, (const void*)theName
);
95 CFStringEncoding
CFStringConvertIANACharSetNameToEncoding(CFStringRef charsetName
) {
96 CFStringEncoding encoding
= kCFStringEncodingInvalidId
;
97 #define BUFFER_SIZE (100)
98 char buffer
[BUFFER_SIZE
];
99 const char *name
= CFStringGetCStringPtr(charsetName
, __CFStringGetEightBitStringEncoding());
102 if (false == CFStringGetCString(charsetName
, buffer
, BUFFER_SIZE
, __CFStringGetEightBitStringEncoding())) return kCFStringEncodingInvalidId
;
107 encoding
= __CFStringEncodingGetFromCanonicalName(name
);
109 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
110 if (kCFStringEncodingInvalidId
== encoding
) encoding
= __CFStringEncodingGetFromICUName(name
);
117 CFStringRef
CFStringConvertEncodingToIANACharSetName(CFStringEncoding encoding
) {
118 CFStringRef name
= NULL
;
119 CFIndex value
= encoding
;
120 static CFMutableDictionaryRef mappingTable
= NULL
;
121 static CFSpinLock_t lock
= CFSpinLockInit
;
124 name
= ((NULL
== mappingTable
) ? NULL
: (CFStringRef
)CFDictionaryGetValue(mappingTable
, (const void*)value
));
127 #define STACK_BUFFER_SIZE (100)
128 char buffer
[STACK_BUFFER_SIZE
];
130 if (__CFStringEncodingGetCanonicalName(encoding
, buffer
, STACK_BUFFER_SIZE
)) name
= CFStringCreateWithCString(NULL
, buffer
, kCFStringEncodingASCII
);
134 CFIndex value
= encoding
;
136 if (NULL
== mappingTable
) mappingTable
= CFDictionaryCreateMutable(NULL
, 0, NULL
, &kCFTypeDictionaryValueCallBacks
);
138 CFDictionaryAddValue(mappingTable
, (const void*)value
, (const void*)name
);
142 __CFSpinUnlock(&lock
);
148 NSASCIIStringEncoding
= 1, /* 0..127 only */
149 NSNEXTSTEPStringEncoding
= 2,
150 NSJapaneseEUCStringEncoding
= 3,
151 NSUTF8StringEncoding
= 4,
152 NSISOLatin1StringEncoding
= 5,
153 NSSymbolStringEncoding
= 6,
154 NSNonLossyASCIIStringEncoding
= 7,
155 NSShiftJISStringEncoding
= 8,
156 NSISOLatin2StringEncoding
= 9,
157 NSUnicodeStringEncoding
= 10,
158 NSWindowsCP1251StringEncoding
= 11, /* Cyrillic; same as AdobeStandardCyrillic */
159 NSWindowsCP1252StringEncoding
= 12, /* WinLatin1 */
160 NSWindowsCP1253StringEncoding
= 13, /* Greek */
161 NSWindowsCP1254StringEncoding
= 14, /* Turkish */
162 NSWindowsCP1250StringEncoding
= 15, /* WinLatin2 */
163 NSISO2022JPStringEncoding
= 21, /* ISO 2022 Japanese encoding for e-mail */
164 NSMacOSRomanStringEncoding
= 30,
166 NSProprietaryStringEncoding
= 65536 /* Installation-specific encoding */
169 #define NSENCODING_MASK (1 << 31)
171 unsigned long CFStringConvertEncodingToNSStringEncoding(CFStringEncoding theEncoding
) {
172 switch (theEncoding
& 0xFFF) {
173 case kCFStringEncodingUnicode
:
174 if (theEncoding
== kCFStringEncodingUTF16
) return NSUnicodeStringEncoding
;
175 else if (theEncoding
== kCFStringEncodingUTF8
) return NSUTF8StringEncoding
;
178 case kCFStringEncodingWindowsLatin1
: return NSWindowsCP1252StringEncoding
;
179 case kCFStringEncodingMacRoman
: return NSMacOSRomanStringEncoding
;
181 case kCFStringEncodingASCII
: return NSASCIIStringEncoding
;
183 case kCFStringEncodingDOSJapanese
: return NSShiftJISStringEncoding
;
184 case kCFStringEncodingWindowsCyrillic
: return NSWindowsCP1251StringEncoding
;
185 case kCFStringEncodingWindowsGreek
: return NSWindowsCP1253StringEncoding
;
186 case kCFStringEncodingWindowsLatin5
: return NSWindowsCP1254StringEncoding
;
187 case kCFStringEncodingWindowsLatin2
: return NSWindowsCP1250StringEncoding
;
188 case kCFStringEncodingISOLatin1
: return NSISOLatin1StringEncoding
;
190 case kCFStringEncodingNonLossyASCII
: return NSNonLossyASCIIStringEncoding
;
191 case kCFStringEncodingEUC_JP
: return NSJapaneseEUCStringEncoding
;
192 case kCFStringEncodingMacSymbol
: return NSSymbolStringEncoding
;
193 case kCFStringEncodingISOLatin2
: return NSISOLatin2StringEncoding
;
194 case kCFStringEncodingISO_2022_JP
: return NSISO2022JPStringEncoding
;
195 case kCFStringEncodingNextStepLatin
: return NSNEXTSTEPStringEncoding
;
198 return NSENCODING_MASK
| theEncoding
;
201 CFStringEncoding
CFStringConvertNSStringEncodingToEncoding(unsigned long theEncoding
) {
202 const uint16_t encodings
[] = {
203 kCFStringEncodingASCII
,
204 kCFStringEncodingNextStepLatin
,
205 kCFStringEncodingEUC_JP
,
207 kCFStringEncodingISOLatin1
,
208 kCFStringEncodingMacSymbol
,
209 kCFStringEncodingNonLossyASCII
,
210 kCFStringEncodingDOSJapanese
,
211 kCFStringEncodingISOLatin2
,
212 kCFStringEncodingUTF16
,
213 kCFStringEncodingWindowsCyrillic
,
214 kCFStringEncodingWindowsLatin1
,
215 kCFStringEncodingWindowsGreek
,
216 kCFStringEncodingWindowsLatin5
,
217 kCFStringEncodingWindowsLatin2
220 if (NSUTF8StringEncoding
== theEncoding
) return kCFStringEncodingUTF8
;
222 if ((theEncoding
> 0) && (theEncoding
<= NSWindowsCP1250StringEncoding
)) return encodings
[theEncoding
- 1];
224 switch (theEncoding
) {
225 case NSMacOSRomanStringEncoding
: return kCFStringEncodingMacRoman
;
226 case NSISO2022JPStringEncoding
: return kCFStringEncodingISO_2022_JP
;
229 return ((theEncoding
& NSENCODING_MASK
) ? theEncoding
& ~NSENCODING_MASK
: kCFStringEncodingInvalidId
);
233 UInt32
CFStringConvertEncodingToWindowsCodepage(CFStringEncoding theEncoding
) {
234 uint16_t codepage
= __CFStringEncodingGetWindowsCodePage(theEncoding
);
236 return ((0 == codepage
) ? kCFStringEncodingInvalidId
: codepage
);
239 CFStringEncoding
CFStringConvertWindowsCodepageToEncoding(UInt32 theEncoding
) {
240 return __CFStringEncodingGetFromWindowsCodePage(theEncoding
);
243 CFStringEncoding
CFStringGetMostCompatibleMacStringEncoding(CFStringEncoding encoding
) {
244 CFStringEncoding macEncoding
= __CFStringEncodingGetMostCompatibleMacScript(encoding
);
250 #define kCFStringCompareAllocationIncrement (128)
252 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
254 // -------------------------------------------------------------------------------------------------
255 // CompareSpecials - ignore case & diacritic differences
257 // Decomposed have 2nd-4th chars of type Mn or Mc, or in range 1160-11FF (jamo)
258 // Fullwidth & halfwidth are in range FF00-FFEF
259 // Parenthesized & circled are in range 3200-32FF
260 // -------------------------------------------------------------------------------------------------
263 kUpperCaseWeightMin
= 0x80 | 0x0F,
264 kUpperCaseWeightMax
= 0x80 | 0x17,
265 kUpperToLowerDelta
= 0x80 | 0x0A, // 0x0A = 0x0F - 0x05
266 kMaskPrimarySecondary
= 0xFFFFFF00,
267 kMaskPrimaryOnly
= 0xFFFF0000,
268 kMaskSecondaryOnly
= 0x0000FF00,
269 kMaskCaseTertiary
= 0x000000FF // 2 hi bits case, 6 lo bits tertiary
272 static SInt32
__CompareSpecials(const UCollator
*collator
, CFOptionFlags options
, const UniChar
*text1Ptr
, UniCharCount text1Length
, const UniChar
*text2Ptr
, UniCharCount text2Length
) {
273 UErrorCode icuStatus
= U_ZERO_ERROR
;
274 SInt32 orderWidth
= 0;
275 SInt32 orderCompos
= 0;
277 UCollationElements
* collElems1
= ucol_openElements(collator
, (const UChar
*)text1Ptr
, text1Length
, &icuStatus
);
278 UCollationElements
* collElems2
= ucol_openElements(collator
, (const UChar
*)text2Ptr
, text2Length
, &icuStatus
);
279 if (U_SUCCESS(icuStatus
)) {
280 int32_t startOffset1
= 0;
281 int32_t startOffset2
= 0;
284 int32_t elemOrder1
, elemOrder2
;
285 int32_t offset1
, offset2
;
287 elemOrder1
= ucol_next(collElems1
, &icuStatus
);
288 elemOrder2
= ucol_next(collElems2
, &icuStatus
);
289 if ( U_FAILURE(icuStatus
) || elemOrder1
== (int32_t)UCOL_NULLORDER
|| elemOrder2
== (int32_t)UCOL_NULLORDER
) {
293 offset1
= ucol_getOffset(collElems1
);
294 offset2
= ucol_getOffset(collElems2
);
295 if ( (elemOrder1
& kMaskPrimarySecondary
) == (elemOrder2
& kMaskPrimarySecondary
) ) {
296 if ( (elemOrder1
& kMaskPrimaryOnly
) != 0 ) {
297 // keys may differ in case, width, circling, etc.
299 int32_t tertiary1
= (elemOrder1
& kMaskCaseTertiary
);
300 int32_t tertiary2
= (elemOrder2
& kMaskCaseTertiary
);
301 // fold upper to lower case
302 if (tertiary1
>= kUpperCaseWeightMin
&& tertiary1
<= kUpperCaseWeightMax
) {
303 tertiary1
-= kUpperToLowerDelta
;
305 if (tertiary2
>= kUpperCaseWeightMin
&& tertiary2
<= kUpperCaseWeightMax
) {
306 tertiary2
-= kUpperToLowerDelta
;
309 if (tertiary1
!= tertiary2
) {
310 orderWidth
= (tertiary1
< tertiary2
)? -1: 1;
314 } else if ( (elemOrder1
& kMaskSecondaryOnly
) != 0 ) {
315 // primary weights are both zero, but secondaries are not.
316 if ( orderCompos
== 0 && (options
& kCFCompareNonliteral
) == 0 ) {
317 // We have a code element which is a diacritic.
318 // It may have come from a composed char or a combining char.
319 // If it came from a combining char (longer element length) it sorts first.
320 // This is only an approximation to what the Mac OS 9 code did, but this is an
321 // unusual case anyway.
322 int32_t elem1Length
= offset1
- startOffset1
;
323 int32_t elem2Length
= offset2
- startOffset2
;
324 if (elem1Length
!= elem2Length
) {
325 orderCompos
= (elem1Length
> elem2Length
)? -1: 1;
331 startOffset1
= offset1
;
332 startOffset2
= offset2
;
334 ucol_closeElements(collElems1
);
335 ucol_closeElements(collElems2
);
338 return (orderWidth
!= 0)? orderWidth
: orderCompos
;
341 static SInt32
__CompareCodePoints(const UniChar
*text1Ptr
, UniCharCount text1Length
, const UniChar
*text2Ptr
, UniCharCount text2Length
) {
342 const UniChar
* text1P
= text1Ptr
;
343 const UniChar
* text2P
= text2Ptr
;
344 UInt32 textLimit
= (text1Length
<= text2Length
)? text1Length
: text2Length
;
346 SInt32 orderResult
= 0;
348 // Loop through either string...the first difference differentiates this.
349 for (textCounter
= 0; textCounter
< textLimit
&& *text1P
== *text2P
; textCounter
++) {
353 if (textCounter
< textLimit
) {
354 // code point difference
355 orderResult
= (*text1P
< *text2P
) ? -1 : 1;
356 } else if (text1Length
!= text2Length
) {
357 // one string has extra stuff at end
358 orderResult
= (text1Length
< text2Length
) ? -1 : 1;
364 extern const CFStringRef __kCFLocaleCollatorID
;
366 static UCollator
*__CFStringCreateCollator(CFLocaleRef compareLocale
) {
367 CFStringRef canonLocaleCFStr
= (CFStringRef
)CFLocaleGetValue(compareLocale
, __kCFLocaleCollatorID
);
368 char icuLocaleStr
[128] = {0};
369 CFStringGetCString(canonLocaleCFStr
, icuLocaleStr
, sizeof(icuLocaleStr
), kCFStringEncodingASCII
);
370 UErrorCode icuStatus
= U_ZERO_ERROR
;
371 UCollator
* collator
= ucol_open(icuLocaleStr
, &icuStatus
);
372 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &icuStatus
);
373 ucol_setAttribute(collator
, UCOL_ALTERNATE_HANDLING
, UCOL_NON_IGNORABLE
, &icuStatus
);
374 ucol_setAttribute(collator
, UCOL_STRENGTH
, UCOL_PRIMARY
, &icuStatus
);
375 ucol_setAttribute(collator
, UCOL_CASE_LEVEL
, UCOL_OFF
, &icuStatus
);
376 ucol_setAttribute(collator
, UCOL_NUMERIC_COLLATION
, UCOL_OFF
, &icuStatus
);
380 #define kCFMaxCachedDefaultCollators (8)
381 static UCollator
*__CFDefaultCollators
[kCFMaxCachedDefaultCollators
];
382 static CFIndex __CFDefaultCollatorsCount
= 0;
383 static const void *__CFDefaultCollatorLocale
= NULL
;
384 static CFSpinLock_t __CFDefaultCollatorLock
= CFSpinLockInit
;
386 static UCollator
*__CFStringCopyDefaultCollator(CFLocaleRef compareLocale
) {
387 CFLocaleRef currentLocale
= NULL
;
388 UCollator
* collator
= NULL
;
390 if (compareLocale
!= __CFDefaultCollatorLocale
) {
391 currentLocale
= CFLocaleCopyCurrent();
392 if (compareLocale
!= currentLocale
) {
393 CFRelease(currentLocale
);
398 __CFSpinLock(&__CFDefaultCollatorLock
);
399 if ((NULL
!= currentLocale
) && (__CFDefaultCollatorLocale
!= currentLocale
)) {
400 while (__CFDefaultCollatorsCount
> 0) ucol_close(__CFDefaultCollators
[--__CFDefaultCollatorsCount
]);
401 __CFDefaultCollatorLocale
= CFRetain(currentLocale
);
404 if (__CFDefaultCollatorsCount
> 0) collator
= __CFDefaultCollators
[--__CFDefaultCollatorsCount
];
405 __CFSpinUnlock(&__CFDefaultCollatorLock
);
407 if (NULL
== collator
) {
408 collator
= __CFStringCreateCollator(compareLocale
);
411 if (NULL
!= currentLocale
) CFRelease(currentLocale
);
416 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
417 static void __collatorFinalize(UCollator
*collator
) {
418 CFLocaleRef locale
= _CFGetTSD(__CFTSDKeyCollatorLocale
);
419 _CFSetTSD(__CFTSDKeyCollatorUCollator
, NULL
, NULL
);
420 _CFSetTSD(__CFTSDKeyCollatorLocale
, NULL
, NULL
);
421 __CFSpinLock(&__CFDefaultCollatorLock
);
422 if ((__CFDefaultCollatorLocale
== locale
) && (__CFDefaultCollatorsCount
< kCFMaxCachedDefaultCollators
)) {
423 __CFDefaultCollators
[__CFDefaultCollatorsCount
++] = collator
;
426 __CFSpinUnlock(&__CFDefaultCollatorLock
);
427 if (NULL
!= collator
) ucol_close(collator
);
428 if (locale
) CFRelease(locale
);
432 // -------------------------------------------------------------------------------------------------
433 // __CompareTextDefault
435 // A primary difference is denoted by values 2/-2 in orderP. Other differences are indicated with a -1/1.
436 // A negative value indicates that text1 sorts before text2.
437 // -------------------------------------------------------------------------------------------------
438 static OSStatus
__CompareTextDefault(UCollator
*collator
, CFOptionFlags options
, const UniChar
*text1Ptr
, UniCharCount text1Length
, const UniChar
*text2Ptr
, UniCharCount text2Length
, Boolean
*equivalentP
, SInt32
*orderP
) {
440 // collator must have default settings restored on exit from this function
445 if (options
& kCFCompareNumerically
) {
446 UErrorCode icuStatus
= U_ZERO_ERROR
;
447 ucol_setAttribute(collator
, UCOL_NUMERIC_COLLATION
, UCOL_ON
, &icuStatus
);
450 // Most string differences are Primary. Do a primary check first, then if there
451 // are no differences do a comparison with the options in the collator.
452 UCollationResult icuResult
= ucol_strcoll(collator
, (const UChar
*)text1Ptr
, text1Length
, (const UChar
*)text2Ptr
, text2Length
);
453 if (icuResult
!= UCOL_EQUAL
) {
454 *orderP
= (icuResult
== UCOL_LESS
) ? -2 : 2;
457 UErrorCode icuStatus
= U_ZERO_ERROR
;
458 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &icuStatus
);
459 ucol_setAttribute(collator
, UCOL_STRENGTH
, (options
& kCFCompareDiacriticInsensitive
) ? UCOL_PRIMARY
: UCOL_SECONDARY
, &icuStatus
);
460 ucol_setAttribute(collator
, UCOL_CASE_LEVEL
, (options
& kCFCompareCaseInsensitive
) ? UCOL_OFF
: UCOL_ON
, &icuStatus
);
461 if (!U_SUCCESS(icuStatus
)) {
462 icuStatus
= U_ZERO_ERROR
;
463 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &icuStatus
);
464 ucol_setAttribute(collator
, UCOL_STRENGTH
, UCOL_PRIMARY
, &icuStatus
);
465 ucol_setAttribute(collator
, UCOL_CASE_LEVEL
, UCOL_OFF
, &icuStatus
);
466 ucol_setAttribute(collator
, UCOL_NUMERIC_COLLATION
, UCOL_OFF
, &icuStatus
);
470 // We don't have a primary difference. Recompare with standard collator.
471 icuResult
= ucol_strcoll(collator
, (const UChar
*)text1Ptr
, text1Length
, (const UChar
*)text2Ptr
, text2Length
);
472 if (icuResult
!= UCOL_EQUAL
) {
473 *orderP
= (icuResult
== UCOL_LESS
) ? -1 : 1;
475 icuStatus
= U_ZERO_ERROR
;
476 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &icuStatus
);
477 ucol_setAttribute(collator
, UCOL_STRENGTH
, UCOL_PRIMARY
, &icuStatus
);
478 ucol_setAttribute(collator
, UCOL_CASE_LEVEL
, UCOL_OFF
, &icuStatus
);
480 if (*orderP
== 0 && (options
& kCFCompareNonliteral
) == 0) {
481 *orderP
= __CompareSpecials(collator
, options
, text1Ptr
, text1Length
, text2Ptr
, text2Length
);
484 *equivalentP
= (*orderP
== 0);
486 // If strings are equivalent but we care about order and have not yet checked
487 // to the level of code point order, then do some more checks for order
489 UErrorCode icuStatus
= U_ZERO_ERROR
;
490 // First try to see if ICU can find any differences above code point level
491 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &icuStatus
);
492 ucol_setAttribute(collator
, UCOL_STRENGTH
, UCOL_TERTIARY
, &icuStatus
);
493 ucol_setAttribute(collator
, UCOL_CASE_LEVEL
, UCOL_ON
, &icuStatus
);
494 if (!U_SUCCESS(icuStatus
)) {
495 icuStatus
= U_ZERO_ERROR
;
496 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &icuStatus
);
497 ucol_setAttribute(collator
, UCOL_STRENGTH
, UCOL_PRIMARY
, &icuStatus
);
498 ucol_setAttribute(collator
, UCOL_CASE_LEVEL
, UCOL_OFF
, &icuStatus
);
499 ucol_setAttribute(collator
, UCOL_NUMERIC_COLLATION
, UCOL_OFF
, &icuStatus
);
502 icuResult
= ucol_strcoll(collator
, (const UChar
*)text1Ptr
, text1Length
, (const UChar
*)text2Ptr
, text2Length
);
503 if (icuResult
!= UCOL_EQUAL
) {
504 *orderP
= (icuResult
== UCOL_LESS
) ? -1 : 1;
506 // no ICU differences above code point level, compare code points
507 *orderP
= __CompareCodePoints( text1Ptr
, text1Length
, text2Ptr
, text2Length
);
509 icuStatus
= U_ZERO_ERROR
;
510 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &icuStatus
);
511 ucol_setAttribute(collator
, UCOL_STRENGTH
, UCOL_PRIMARY
, &icuStatus
);
512 ucol_setAttribute(collator
, UCOL_CASE_LEVEL
, UCOL_OFF
, &icuStatus
);
515 if (options
& kCFCompareNumerically
) {
516 UErrorCode icuStatus
= U_ZERO_ERROR
;
517 ucol_setAttribute(collator
, UCOL_NUMERIC_COLLATION
, UCOL_OFF
, &icuStatus
);
522 #endif // DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
524 static inline CFIndex
__extendLocationBackward(CFIndex location
, CFStringInlineBuffer
*str
, const uint8_t *nonBaseBMP
, const uint8_t *punctBMP
) {
525 while (location
> 0) {
526 UTF32Char ch
= CFStringGetCharacterFromInlineBuffer(str
, location
);
528 if (CFUniCharIsSurrogateLowCharacter(ch
) && CFUniCharIsSurrogateHighCharacter((otherChar
= CFStringGetCharacterFromInlineBuffer(str
, location
- 1)))) {
529 ch
= CFUniCharGetLongCharacterForSurrogatePair(ch
, otherChar
);
530 uint8_t planeNo
= (ch
>> 16);
531 if ((planeNo
> 1) || (!CFUniCharIsMemberOfBitmap(ch
, CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet
, planeNo
)) && !CFUniCharIsMemberOfBitmap(ch
, CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet
, planeNo
)))) break;
534 if ((!CFUniCharIsMemberOfBitmap(ch
, nonBaseBMP
) && !CFUniCharIsMemberOfBitmap(ch
, punctBMP
)) || ((ch
>= 0x2E80) && (ch
< 0xAC00))) break;
542 static inline CFIndex
__extendLocationForward(CFIndex location
, CFStringInlineBuffer
*str
, const uint8_t *alnumBMP
, const uint8_t *punctBMP
, const uint8_t *controlBMP
, CFIndex strMax
) {
544 UTF32Char ch
= CFStringGetCharacterFromInlineBuffer(str
, location
);
546 if (CFUniCharIsSurrogateHighCharacter(ch
) && CFUniCharIsSurrogateLowCharacter((otherChar
= CFStringGetCharacterFromInlineBuffer(str
, location
+ 1)))) {
547 ch
= CFUniCharGetLongCharacterForSurrogatePair(ch
, otherChar
);
549 uint8_t planeNo
= (ch
>> 16);
550 if (!CFUniCharIsMemberOfBitmap(ch
, CFUniCharGetBitmapPtrForPlane(kCFUniCharAlphaNumericCharacterSet
, planeNo
)) && !CFUniCharIsMemberOfBitmap(ch
, CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet
, planeNo
)) && !CFUniCharIsMemberOfBitmap(ch
, CFUniCharGetBitmapPtrForPlane(kCFUniCharControlAndFormatterCharacterSet
, planeNo
))) break;
553 if ((!CFUniCharIsMemberOfBitmap(ch
, alnumBMP
) && !CFUniCharIsMemberOfBitmap(ch
, punctBMP
) && !CFUniCharIsMemberOfBitmap(ch
, controlBMP
)) || ((ch
>= 0x2E80) && (ch
< 0xAC00))) break;
555 } while (location
< strMax
);
559 __private_extern__ CFComparisonResult
_CFCompareStringsWithLocale(CFStringInlineBuffer
*str1
, CFRange str1Range
, CFStringInlineBuffer
*str2
, CFRange str2Range
, CFOptionFlags options
, const void *compareLocale
) {
560 const UniChar
*characters1
;
561 const UniChar
*characters2
;
562 CFComparisonResult compResult
= kCFCompareEqualTo
;
563 CFRange range1
= str1Range
;
564 CFRange range2
= str2Range
;
566 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
568 bool forcedOrdering
= ((options
& kCFCompareForcedOrdering
) ? true : false);
570 UCollator
*collator
= NULL
;
571 bool defaultCollator
= true;
573 static const uint8_t *alnumBMP
= NULL
;
574 static const uint8_t *nonBaseBMP
= NULL
;
575 static const uint8_t *punctBMP
= NULL
;
576 static const uint8_t *controlBMP
= NULL
;
578 if (NULL
== alnumBMP
) {
579 alnumBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharAlphaNumericCharacterSet
, 0);
580 nonBaseBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet
, 0);
581 punctBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet
, 0);
582 controlBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharControlAndFormatterCharacterSet
, 0);
585 // Determine the range of characters surrodiing the current index significant for localized comparison. The range is extended backward and forward as long as they are contextual. Contextual characters include all letters and punctuations. Since most control/format characters are ignorable in localized comparison, we also include them extending forward.
587 range1
.location
= str1Range
.location
;
588 range2
.location
= str2Range
.location
;
591 // The characters upto the current index are already determined to be equal by the CFString's standard character folding algorithm. Extend as long as truly contextual (all letters and punctuations).
592 if (range1
.location
> 0) {
593 range1
.location
= __extendLocationBackward(range1
.location
- 1, str1
, nonBaseBMP
, punctBMP
);
596 if (range2
.location
> 0) {
597 range2
.location
= __extendLocationBackward(range2
.location
- 1, str2
, nonBaseBMP
, punctBMP
);
600 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
601 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
602 // First we try to use the last one used on this thread, if the locale is the same,
603 // otherwise we try to check out a default one, or then we create one.
604 UCollator
*threadCollator
= _CFGetTSD(__CFTSDKeyCollatorUCollator
);
605 CFLocaleRef threadLocale
= _CFGetTSD(__CFTSDKeyCollatorLocale
);
606 if (compareLocale
== threadLocale
) {
607 collator
= threadCollator
;
610 collator
= __CFStringCopyDefaultCollator((CFLocaleRef
)compareLocale
);
611 defaultCollator
= true;
612 if (NULL
== collator
) {
613 collator
= __CFStringCreateCollator((CFLocaleRef
)compareLocale
);
614 defaultCollator
= false;
616 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
621 characters1
= CFStringGetCharactersPtrFromInlineBuffer(str1
, range1
);
622 characters2
= CFStringGetCharactersPtrFromInlineBuffer(str2
, range2
);
624 if ((NULL
!= characters1
) && (NULL
!= characters2
)) { // do fast
625 range1
.length
= (str1Range
.location
+ str1Range
.length
) - range1
.location
;
626 range2
.length
= (str2Range
.location
+ str2Range
.length
) - range2
.location
;
628 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
629 if ((NULL
!= collator
) && (__CompareTextDefault(collator
, options
, characters1
, range1
.length
, characters2
, range2
.length
, &isEqual
, &order
) == 0 /* noErr */)) {
630 compResult
= ((isEqual
&& !forcedOrdering
) ? kCFCompareEqualTo
: ((order
< 0) ? kCFCompareLessThan
: kCFCompareGreaterThan
));
634 compResult
= ((memcmp(characters1
, characters2
, sizeof(UniChar
) * range1
.length
) < 0) ? kCFCompareLessThan
: kCFCompareGreaterThan
);
637 UniChar
*buffer1
= NULL
;
638 UniChar
*buffer2
= NULL
;
639 UTF16Char sBuffer1
[kCFStringCompareAllocationIncrement
];
640 UTF16Char sBuffer2
[kCFStringCompareAllocationIncrement
];
641 CFIndex buffer1Len
= 0, buffer2Len
= 0;
642 CFIndex str1Max
= str1Range
.location
+ str1Range
.length
;
643 CFIndex str2Max
= str2Range
.location
+ str2Range
.length
;
646 // Extend forward and compare until the result is deterministic. The result is indeterministic if the differences are weak and can be resolved by character folding. For example, comparision between "abc" and "ABC" is considered to be indeterministic.
648 if (str1Range
.location
< str1Max
) {
649 str1Range
.location
= __extendLocationForward(str1Range
.location
, str1
, alnumBMP
, punctBMP
, controlBMP
, str1Max
);
650 range1
.length
= (str1Range
.location
- range1
.location
);
651 characters1
= CFStringGetCharactersPtrFromInlineBuffer(str1
, range1
);
653 if (NULL
== characters1
) {
654 if ((0 > buffer1Len
) || (range1
.length
> kCFStringCompareAllocationIncrement
)) {
655 if (buffer1Len
< range1
.length
) {
656 bufferSize
= range1
.length
+ (kCFStringCompareAllocationIncrement
- (range1
.length
% kCFStringCompareAllocationIncrement
));
657 if (0 == buffer1Len
) {
658 buffer1
= (UniChar
*)CFAllocatorAllocate(kCFAllocatorSystemDefault
, sizeof(UTF16Char
) * bufferSize
, 0);
659 } else if (buffer1Len
< range1
.length
) {
660 buffer1
= (UniChar
*)CFAllocatorReallocate(kCFAllocatorSystemDefault
, buffer1
, sizeof(UTF16Char
) * bufferSize
, 0);
662 buffer1Len
= bufferSize
;
668 CFStringGetCharactersFromInlineBuffer(str1
, range1
, buffer1
);
669 characters1
= buffer1
;
673 if (str2Range
.location
< str2Max
) {
674 str2Range
.location
= __extendLocationForward(str2Range
.location
, str2
, alnumBMP
, punctBMP
, controlBMP
, str2Max
);
675 range2
.length
= (str2Range
.location
- range2
.location
);
676 characters2
= CFStringGetCharactersPtrFromInlineBuffer(str2
, range2
);
678 if (NULL
== characters2
) {
679 if ((0 > buffer2Len
) || (range2
.length
> kCFStringCompareAllocationIncrement
)) {
680 if (buffer2Len
< range2
.length
) {
681 bufferSize
= range2
.length
+ (kCFStringCompareAllocationIncrement
- (range2
.length
% kCFStringCompareAllocationIncrement
));
682 if (0 == buffer2Len
) {
683 buffer2
= (UniChar
*)CFAllocatorAllocate(kCFAllocatorSystemDefault
, sizeof(UTF16Char
) * bufferSize
, 0);
684 } else if (buffer2Len
< range2
.length
) {
685 buffer2
= (UniChar
*)CFAllocatorReallocate(kCFAllocatorSystemDefault
, buffer2
, sizeof(UTF16Char
) * bufferSize
, 0);
687 buffer2Len
= bufferSize
;
693 CFStringGetCharactersFromInlineBuffer(str2
, range2
, buffer2
);
694 characters2
= buffer2
;
698 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS || DEPLOYMENT_TARGET_LINUX
699 if ((NULL
!= collator
) && (__CompareTextDefault(collator
, options
, characters1
, range1
.length
, characters2
, range2
.length
, &isEqual
, &order
) == 0 /* noErr */)) {
701 if (forcedOrdering
&& (kCFCompareEqualTo
== compResult
) && (0 != order
)) compResult
= ((order
< 0) ? kCFCompareLessThan
: kCFCompareGreaterThan
);
707 order
= memcmp(characters1
, characters2
, sizeof(UTF16Char
) * ((range1
.length
< range2
.length
) ? range1
.length
: range2
.length
));
709 if (range1
.length
< range2
.length
) {
711 } else if (range2
.length
< range1
.length
) {
714 } else if (order
< 0) {
716 } else if (order
> 0) {
721 if ((order
< -1) || (order
> 1)) break; // the result is deterministic
724 range1
.location
= str1Range
.location
;
725 range2
.location
= str2Range
.location
;
727 } while ((str1Range
.location
< str1Max
) || (str2Range
.location
< str2Max
));
729 if (0 != order
) compResult
= ((order
< 0) ? kCFCompareLessThan
: kCFCompareGreaterThan
);
731 if (buffer1Len
> 0) CFAllocatorDeallocate(kCFAllocatorSystemDefault
, buffer1
);
732 if (buffer2Len
> 0) CFAllocatorDeallocate(kCFAllocatorSystemDefault
, buffer2
);
735 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
736 if (collator
== threadCollator
) {
737 // do nothing, already cached
739 if (threadLocale
) __collatorFinalize((UCollator
*)_CFGetTSD(__CFTSDKeyCollatorUCollator
)); // need to dealloc collators
741 _CFSetTSD(__CFTSDKeyCollatorUCollator
, collator
, (void *)__collatorFinalize
);
742 _CFSetTSD(__CFTSDKeyCollatorLocale
, (void *)CFRetain(compareLocale
), NULL
);