2 * Copyright (c) 2011 Apple Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
24 /* CFStringUtilities.c
25 Copyright (c) 1999-2011, Apple Inc. All rights reserved.
26 Responsibility: Aki Inoue
29 #include "CFInternal.h"
30 #include <CoreFoundation/CFStringEncodingConverterExt.h>
31 #include <CoreFoundation/CFUniChar.h>
32 #include <CoreFoundation/CFStringEncodingExt.h>
33 #include "CFStringEncodingDatabase.h"
34 #include "CFICUConverters.h"
35 #include <CoreFoundation/CFPreferences.h>
38 #include <unicode/ucol.h>
39 #include <unicode/ucoleitr.h>
42 #if DEPLOYMENT_TARGET_WINDOWS
47 Boolean
CFStringIsEncodingAvailable(CFStringEncoding theEncoding
) {
48 switch (theEncoding
) {
49 case kCFStringEncodingASCII
: // Built-in encodings
50 case kCFStringEncodingMacRoman
:
51 case kCFStringEncodingUTF8
:
52 case kCFStringEncodingNonLossyASCII
:
53 case kCFStringEncodingWindowsLatin1
:
54 case kCFStringEncodingNextStepLatin
:
55 case kCFStringEncodingUTF16
:
56 case kCFStringEncodingUTF16BE
:
57 case kCFStringEncodingUTF16LE
:
58 case kCFStringEncodingUTF32
:
59 case kCFStringEncodingUTF32BE
:
60 case kCFStringEncodingUTF32LE
:
64 return CFStringEncodingIsValidEncoding(theEncoding
);
68 const CFStringEncoding
* CFStringGetListOfAvailableEncodings() {
69 return (const CFStringEncoding
*)CFStringEncodingListOfAvailableEncodings();
72 CFStringRef
CFStringGetNameOfEncoding(CFStringEncoding theEncoding
) {
73 static CFMutableDictionaryRef mappingTable
= NULL
;
74 CFStringRef theName
= mappingTable
? (CFStringRef
)CFDictionaryGetValue(mappingTable
, (const void*)(uintptr_t)theEncoding
) : NULL
;
77 const char *encodingName
= __CFStringEncodingGetName(theEncoding
);
80 theName
= CFStringCreateWithCString(kCFAllocatorSystemDefault
, encodingName
, kCFStringEncodingASCII
);
84 if (!mappingTable
) mappingTable
= CFDictionaryCreateMutable(kCFAllocatorSystemDefault
, 0, (const CFDictionaryKeyCallBacks
*)NULL
, &kCFTypeDictionaryValueCallBacks
);
86 CFDictionaryAddValue(mappingTable
, (const void*)(uintptr_t)theEncoding
, (const void*)theName
);
94 CFStringEncoding
CFStringConvertIANACharSetNameToEncoding(CFStringRef charsetName
) {
95 CFStringEncoding encoding
= kCFStringEncodingInvalidId
;
96 #define BUFFER_SIZE (100)
97 char buffer
[BUFFER_SIZE
];
98 const char *name
= CFStringGetCStringPtr(charsetName
, __CFStringGetEightBitStringEncoding());
101 if (false == CFStringGetCString(charsetName
, buffer
, BUFFER_SIZE
, __CFStringGetEightBitStringEncoding())) return kCFStringEncodingInvalidId
;
106 encoding
= __CFStringEncodingGetFromCanonicalName(name
);
108 if (kCFStringEncodingInvalidId
== encoding
) encoding
= __CFStringEncodingGetFromICUName(name
);
114 CFStringRef
CFStringConvertEncodingToIANACharSetName(CFStringEncoding encoding
) {
115 CFStringRef name
= NULL
;
116 CFIndex value
= encoding
;
117 static CFMutableDictionaryRef mappingTable
= NULL
;
118 static CFSpinLock_t lock
= CFSpinLockInit
;
121 name
= ((NULL
== mappingTable
) ? NULL
: (CFStringRef
)CFDictionaryGetValue(mappingTable
, (const void*)value
));
124 #define STACK_BUFFER_SIZE (100)
125 char buffer
[STACK_BUFFER_SIZE
];
127 if (__CFStringEncodingGetCanonicalName(encoding
, buffer
, STACK_BUFFER_SIZE
)) name
= CFStringCreateWithCString(NULL
, buffer
, kCFStringEncodingASCII
);
131 CFIndex value
= encoding
;
133 if (NULL
== mappingTable
) mappingTable
= CFDictionaryCreateMutable(NULL
, 0, NULL
, &kCFTypeDictionaryValueCallBacks
);
135 CFDictionaryAddValue(mappingTable
, (const void*)value
, (const void*)name
);
139 __CFSpinUnlock(&lock
);
145 NSASCIIStringEncoding
= 1, /* 0..127 only */
146 NSNEXTSTEPStringEncoding
= 2,
147 NSJapaneseEUCStringEncoding
= 3,
148 NSUTF8StringEncoding
= 4,
149 NSISOLatin1StringEncoding
= 5,
150 NSSymbolStringEncoding
= 6,
151 NSNonLossyASCIIStringEncoding
= 7,
152 NSShiftJISStringEncoding
= 8,
153 NSISOLatin2StringEncoding
= 9,
154 NSUnicodeStringEncoding
= 10,
155 NSWindowsCP1251StringEncoding
= 11, /* Cyrillic; same as AdobeStandardCyrillic */
156 NSWindowsCP1252StringEncoding
= 12, /* WinLatin1 */
157 NSWindowsCP1253StringEncoding
= 13, /* Greek */
158 NSWindowsCP1254StringEncoding
= 14, /* Turkish */
159 NSWindowsCP1250StringEncoding
= 15, /* WinLatin2 */
160 NSISO2022JPStringEncoding
= 21, /* ISO 2022 Japanese encoding for e-mail */
161 NSMacOSRomanStringEncoding
= 30,
163 NSProprietaryStringEncoding
= 65536 /* Installation-specific encoding */
166 #define NSENCODING_MASK (1 << 31)
168 unsigned long CFStringConvertEncodingToNSStringEncoding(CFStringEncoding theEncoding
) {
169 switch (theEncoding
& 0xFFF) {
170 case kCFStringEncodingUnicode
:
171 if (theEncoding
== kCFStringEncodingUTF16
) return NSUnicodeStringEncoding
;
172 else if (theEncoding
== kCFStringEncodingUTF8
) return NSUTF8StringEncoding
;
175 case kCFStringEncodingWindowsLatin1
: return NSWindowsCP1252StringEncoding
;
176 case kCFStringEncodingMacRoman
: return NSMacOSRomanStringEncoding
;
178 case kCFStringEncodingASCII
: return NSASCIIStringEncoding
;
180 case kCFStringEncodingDOSJapanese
: return NSShiftJISStringEncoding
;
181 case kCFStringEncodingWindowsCyrillic
: return NSWindowsCP1251StringEncoding
;
182 case kCFStringEncodingWindowsGreek
: return NSWindowsCP1253StringEncoding
;
183 case kCFStringEncodingWindowsLatin5
: return NSWindowsCP1254StringEncoding
;
184 case kCFStringEncodingWindowsLatin2
: return NSWindowsCP1250StringEncoding
;
185 case kCFStringEncodingISOLatin1
: return NSISOLatin1StringEncoding
;
187 case kCFStringEncodingNonLossyASCII
: return NSNonLossyASCIIStringEncoding
;
188 case kCFStringEncodingEUC_JP
: return NSJapaneseEUCStringEncoding
;
189 case kCFStringEncodingMacSymbol
: return NSSymbolStringEncoding
;
190 case kCFStringEncodingISOLatin2
: return NSISOLatin2StringEncoding
;
191 case kCFStringEncodingISO_2022_JP
: return NSISO2022JPStringEncoding
;
192 case kCFStringEncodingNextStepLatin
: return NSNEXTSTEPStringEncoding
;
195 return NSENCODING_MASK
| theEncoding
;
198 CFStringEncoding
CFStringConvertNSStringEncodingToEncoding(unsigned long theEncoding
) {
199 const uint16_t encodings
[] = {
200 kCFStringEncodingASCII
,
201 kCFStringEncodingNextStepLatin
,
202 kCFStringEncodingEUC_JP
,
204 kCFStringEncodingISOLatin1
,
205 kCFStringEncodingMacSymbol
,
206 kCFStringEncodingNonLossyASCII
,
207 kCFStringEncodingDOSJapanese
,
208 kCFStringEncodingISOLatin2
,
209 kCFStringEncodingUTF16
,
210 kCFStringEncodingWindowsCyrillic
,
211 kCFStringEncodingWindowsLatin1
,
212 kCFStringEncodingWindowsGreek
,
213 kCFStringEncodingWindowsLatin5
,
214 kCFStringEncodingWindowsLatin2
217 if (NSUTF8StringEncoding
== theEncoding
) return kCFStringEncodingUTF8
;
219 if ((theEncoding
> 0) && (theEncoding
<= NSWindowsCP1250StringEncoding
)) return encodings
[theEncoding
- 1];
221 switch (theEncoding
) {
222 case NSMacOSRomanStringEncoding
: return kCFStringEncodingMacRoman
;
223 case NSISO2022JPStringEncoding
: return kCFStringEncodingISO_2022_JP
;
226 return ((theEncoding
& NSENCODING_MASK
) ? theEncoding
& ~NSENCODING_MASK
: kCFStringEncodingInvalidId
);
230 UInt32
CFStringConvertEncodingToWindowsCodepage(CFStringEncoding theEncoding
) {
231 uint16_t codepage
= __CFStringEncodingGetWindowsCodePage(theEncoding
);
233 return ((0 == codepage
) ? kCFStringEncodingInvalidId
: codepage
);
236 CFStringEncoding
CFStringConvertWindowsCodepageToEncoding(UInt32 theEncoding
) {
237 return __CFStringEncodingGetFromWindowsCodePage(theEncoding
);
240 CFStringEncoding
CFStringGetMostCompatibleMacStringEncoding(CFStringEncoding encoding
) {
241 CFStringEncoding macEncoding
= __CFStringEncodingGetMostCompatibleMacScript(encoding
);
247 #define kCFStringCompareAllocationIncrement (128)
250 // -------------------------------------------------------------------------------------------------
251 // CompareSpecials - ignore case & diacritic differences
253 // Decomposed have 2nd-4th chars of type Mn or Mc, or in range 1160-11FF (jamo)
254 // Fullwidth & halfwidth are in range FF00-FFEF
255 // Parenthesized & circled are in range 3200-32FF
256 // -------------------------------------------------------------------------------------------------
259 kUpperCaseWeightMin
= 0x80 | 0x0F,
260 kUpperCaseWeightMax
= 0x80 | 0x17,
261 kUpperToLowerDelta
= 0x80 | 0x0A, // 0x0A = 0x0F - 0x05
262 kMaskPrimarySecondary
= 0xFFFFFF00,
263 kMaskPrimaryOnly
= 0xFFFF0000,
264 kMaskSecondaryOnly
= 0x0000FF00,
265 kMaskCaseTertiary
= 0x000000FF // 2 hi bits case, 6 lo bits tertiary
268 static SInt32
__CompareSpecials(const UCollator
*collator
, CFOptionFlags options
, const UniChar
*text1Ptr
, UniCharCount text1Length
, const UniChar
*text2Ptr
, UniCharCount text2Length
) {
269 UErrorCode icuStatus
= U_ZERO_ERROR
;
270 SInt32 orderWidth
= 0;
271 SInt32 orderCompos
= 0;
273 UCollationElements
* collElems1
= ucol_openElements(collator
, (const UChar
*)text1Ptr
, text1Length
, &icuStatus
);
274 UCollationElements
* collElems2
= ucol_openElements(collator
, (const UChar
*)text2Ptr
, text2Length
, &icuStatus
);
275 if (U_SUCCESS(icuStatus
)) {
276 int32_t startOffset1
= 0;
277 int32_t startOffset2
= 0;
280 int32_t elemOrder1
, elemOrder2
;
281 int32_t offset1
, offset2
;
283 elemOrder1
= ucol_next(collElems1
, &icuStatus
);
284 elemOrder2
= ucol_next(collElems2
, &icuStatus
);
285 if ( U_FAILURE(icuStatus
) || elemOrder1
== (int32_t)UCOL_NULLORDER
|| elemOrder2
== (int32_t)UCOL_NULLORDER
) {
289 offset1
= ucol_getOffset(collElems1
);
290 offset2
= ucol_getOffset(collElems2
);
291 if ( (elemOrder1
& kMaskPrimarySecondary
) == (elemOrder2
& kMaskPrimarySecondary
) ) {
292 if ( (elemOrder1
& kMaskPrimaryOnly
) != 0 ) {
293 // keys may differ in case, width, circling, etc.
295 int32_t tertiary1
= (elemOrder1
& kMaskCaseTertiary
);
296 int32_t tertiary2
= (elemOrder2
& kMaskCaseTertiary
);
297 // fold upper to lower case
298 if (tertiary1
>= kUpperCaseWeightMin
&& tertiary1
<= kUpperCaseWeightMax
) {
299 tertiary1
-= kUpperToLowerDelta
;
301 if (tertiary2
>= kUpperCaseWeightMin
&& tertiary2
<= kUpperCaseWeightMax
) {
302 tertiary2
-= kUpperToLowerDelta
;
305 if (tertiary1
!= tertiary2
) {
306 orderWidth
= (tertiary1
< tertiary2
)? -1: 1;
310 } else if ( (elemOrder1
& kMaskSecondaryOnly
) != 0 ) {
311 // primary weights are both zero, but secondaries are not.
312 if ( orderCompos
== 0 && (options
& kCFCompareNonliteral
) == 0 ) {
313 // We have a code element which is a diacritic.
314 // It may have come from a composed char or a combining char.
315 // If it came from a combining char (longer element length) it sorts first.
316 // This is only an approximation to what the Mac OS 9 code did, but this is an
317 // unusual case anyway.
318 int32_t elem1Length
= offset1
- startOffset1
;
319 int32_t elem2Length
= offset2
- startOffset2
;
320 if (elem1Length
!= elem2Length
) {
321 orderCompos
= (elem1Length
> elem2Length
)? -1: 1;
327 startOffset1
= offset1
;
328 startOffset2
= offset2
;
330 ucol_closeElements(collElems1
);
331 ucol_closeElements(collElems2
);
334 return (orderWidth
!= 0)? orderWidth
: orderCompos
;
337 static SInt32
__CompareCodePoints(const UniChar
*text1Ptr
, UniCharCount text1Length
, const UniChar
*text2Ptr
, UniCharCount text2Length
) {
338 const UniChar
* text1P
= text1Ptr
;
339 const UniChar
* text2P
= text2Ptr
;
340 UInt32 textLimit
= (text1Length
<= text2Length
)? text1Length
: text2Length
;
342 SInt32 orderResult
= 0;
344 // Loop through either string...the first difference differentiates this.
345 for (textCounter
= 0; textCounter
< textLimit
&& *text1P
== *text2P
; textCounter
++) {
349 if (textCounter
< textLimit
) {
350 // code point difference
351 orderResult
= (*text1P
< *text2P
) ? -1 : 1;
352 } else if (text1Length
!= text2Length
) {
353 // one string has extra stuff at end
354 orderResult
= (text1Length
< text2Length
) ? -1 : 1;
360 extern const CFStringRef __kCFLocaleCollatorID
;
362 static UCollator
*__CFStringCreateCollator(CFLocaleRef compareLocale
) {
363 CFStringRef canonLocaleCFStr
= (CFStringRef
)CFLocaleGetValue(compareLocale
, __kCFLocaleCollatorID
);
364 char icuLocaleStr
[128] = {0};
365 CFStringGetCString(canonLocaleCFStr
, icuLocaleStr
, sizeof(icuLocaleStr
), kCFStringEncodingASCII
);
366 UErrorCode icuStatus
= U_ZERO_ERROR
;
367 UCollator
* collator
= ucol_open(icuLocaleStr
, &icuStatus
);
368 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &icuStatus
);
369 ucol_setAttribute(collator
, UCOL_ALTERNATE_HANDLING
, UCOL_NON_IGNORABLE
, &icuStatus
);
370 ucol_setAttribute(collator
, UCOL_STRENGTH
, UCOL_PRIMARY
, &icuStatus
);
371 ucol_setAttribute(collator
, UCOL_CASE_LEVEL
, UCOL_OFF
, &icuStatus
);
372 ucol_setAttribute(collator
, UCOL_NUMERIC_COLLATION
, UCOL_OFF
, &icuStatus
);
376 #define kCFMaxCachedDefaultCollators (8)
377 static UCollator
*__CFDefaultCollators
[kCFMaxCachedDefaultCollators
];
378 static CFIndex __CFDefaultCollatorsCount
= 0;
379 static const void *__CFDefaultCollatorLocale
= NULL
;
380 static CFSpinLock_t __CFDefaultCollatorLock
= CFSpinLockInit
;
382 static UCollator
*__CFStringCopyDefaultCollator(CFLocaleRef compareLocale
) {
383 CFLocaleRef currentLocale
= NULL
;
384 UCollator
* collator
= NULL
;
386 if (compareLocale
!= __CFDefaultCollatorLocale
) {
387 currentLocale
= CFLocaleCopyCurrent();
388 if (compareLocale
!= currentLocale
) {
389 CFRelease(currentLocale
);
394 __CFSpinLock(&__CFDefaultCollatorLock
);
395 if ((NULL
!= currentLocale
) && (__CFDefaultCollatorLocale
!= currentLocale
)) {
396 while (__CFDefaultCollatorsCount
> 0) ucol_close(__CFDefaultCollators
[--__CFDefaultCollatorsCount
]);
397 __CFDefaultCollatorLocale
= CFRetain(currentLocale
);
400 if (__CFDefaultCollatorsCount
> 0) collator
= __CFDefaultCollators
[--__CFDefaultCollatorsCount
];
401 __CFSpinUnlock(&__CFDefaultCollatorLock
);
403 if (NULL
== collator
) {
404 collator
= __CFStringCreateCollator(compareLocale
);
407 if (NULL
!= currentLocale
) CFRelease(currentLocale
);
412 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
413 static void __collatorFinalize(UCollator
*collator
) {
414 CFLocaleRef locale
= _CFGetTSD(__CFTSDKeyCollatorLocale
);
415 _CFSetTSD(__CFTSDKeyCollatorUCollator
, NULL
, NULL
);
416 _CFSetTSD(__CFTSDKeyCollatorLocale
, NULL
, NULL
);
417 __CFSpinLock(&__CFDefaultCollatorLock
);
418 if ((__CFDefaultCollatorLocale
== locale
) && (__CFDefaultCollatorsCount
< kCFMaxCachedDefaultCollators
)) {
419 __CFDefaultCollators
[__CFDefaultCollatorsCount
++] = collator
;
422 __CFSpinUnlock(&__CFDefaultCollatorLock
);
423 if (NULL
!= collator
) ucol_close(collator
);
424 if (locale
) CFRelease(locale
);
428 // -------------------------------------------------------------------------------------------------
429 // __CompareTextDefault
431 // A primary difference is denoted by values 2/-2 in orderP. Other differences are indicated with a -1/1.
432 // A negative value indicates that text1 sorts before text2.
433 // -------------------------------------------------------------------------------------------------
434 static OSStatus
__CompareTextDefault(UCollator
*collator
, CFOptionFlags options
, const UniChar
*text1Ptr
, UniCharCount text1Length
, const UniChar
*text2Ptr
, UniCharCount text2Length
, Boolean
*equivalentP
, SInt32
*orderP
) {
436 // collator must have default settings restored on exit from this function
441 if (options
& kCFCompareNumerically
) {
442 UErrorCode icuStatus
= U_ZERO_ERROR
;
443 ucol_setAttribute(collator
, UCOL_NUMERIC_COLLATION
, UCOL_ON
, &icuStatus
);
446 // Most string differences are Primary. Do a primary check first, then if there
447 // are no differences do a comparison with the options in the collator.
448 UCollationResult icuResult
= ucol_strcoll(collator
, (const UChar
*)text1Ptr
, text1Length
, (const UChar
*)text2Ptr
, text2Length
);
449 if (icuResult
!= UCOL_EQUAL
) {
450 *orderP
= (icuResult
== UCOL_LESS
) ? -2 : 2;
453 UErrorCode icuStatus
= U_ZERO_ERROR
;
454 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &icuStatus
);
455 ucol_setAttribute(collator
, UCOL_STRENGTH
, (options
& kCFCompareDiacriticInsensitive
) ? UCOL_PRIMARY
: UCOL_SECONDARY
, &icuStatus
);
456 ucol_setAttribute(collator
, UCOL_CASE_LEVEL
, (options
& kCFCompareCaseInsensitive
) ? UCOL_OFF
: UCOL_ON
, &icuStatus
);
457 if (!U_SUCCESS(icuStatus
)) {
458 icuStatus
= U_ZERO_ERROR
;
459 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &icuStatus
);
460 ucol_setAttribute(collator
, UCOL_STRENGTH
, UCOL_PRIMARY
, &icuStatus
);
461 ucol_setAttribute(collator
, UCOL_CASE_LEVEL
, UCOL_OFF
, &icuStatus
);
462 ucol_setAttribute(collator
, UCOL_NUMERIC_COLLATION
, UCOL_OFF
, &icuStatus
);
466 // We don't have a primary difference. Recompare with standard collator.
467 icuResult
= ucol_strcoll(collator
, (const UChar
*)text1Ptr
, text1Length
, (const UChar
*)text2Ptr
, text2Length
);
468 if (icuResult
!= UCOL_EQUAL
) {
469 *orderP
= (icuResult
== UCOL_LESS
) ? -1 : 1;
471 icuStatus
= U_ZERO_ERROR
;
472 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &icuStatus
);
473 ucol_setAttribute(collator
, UCOL_STRENGTH
, UCOL_PRIMARY
, &icuStatus
);
474 ucol_setAttribute(collator
, UCOL_CASE_LEVEL
, UCOL_OFF
, &icuStatus
);
476 if (*orderP
== 0 && (options
& kCFCompareNonliteral
) == 0) {
477 *orderP
= __CompareSpecials(collator
, options
, text1Ptr
, text1Length
, text2Ptr
, text2Length
);
480 *equivalentP
= (*orderP
== 0);
482 // If strings are equivalent but we care about order and have not yet checked
483 // to the level of code point order, then do some more checks for order
485 UErrorCode icuStatus
= U_ZERO_ERROR
;
486 // First try to see if ICU can find any differences above code point level
487 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &icuStatus
);
488 ucol_setAttribute(collator
, UCOL_STRENGTH
, UCOL_TERTIARY
, &icuStatus
);
489 ucol_setAttribute(collator
, UCOL_CASE_LEVEL
, UCOL_ON
, &icuStatus
);
490 if (!U_SUCCESS(icuStatus
)) {
491 icuStatus
= U_ZERO_ERROR
;
492 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &icuStatus
);
493 ucol_setAttribute(collator
, UCOL_STRENGTH
, UCOL_PRIMARY
, &icuStatus
);
494 ucol_setAttribute(collator
, UCOL_CASE_LEVEL
, UCOL_OFF
, &icuStatus
);
495 ucol_setAttribute(collator
, UCOL_NUMERIC_COLLATION
, UCOL_OFF
, &icuStatus
);
498 icuResult
= ucol_strcoll(collator
, (const UChar
*)text1Ptr
, text1Length
, (const UChar
*)text2Ptr
, text2Length
);
499 if (icuResult
!= UCOL_EQUAL
) {
500 *orderP
= (icuResult
== UCOL_LESS
) ? -1 : 1;
502 // no ICU differences above code point level, compare code points
503 *orderP
= __CompareCodePoints( text1Ptr
, text1Length
, text2Ptr
, text2Length
);
505 icuStatus
= U_ZERO_ERROR
;
506 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &icuStatus
);
507 ucol_setAttribute(collator
, UCOL_STRENGTH
, UCOL_PRIMARY
, &icuStatus
);
508 ucol_setAttribute(collator
, UCOL_CASE_LEVEL
, UCOL_OFF
, &icuStatus
);
511 if (options
& kCFCompareNumerically
) {
512 UErrorCode icuStatus
= U_ZERO_ERROR
;
513 ucol_setAttribute(collator
, UCOL_NUMERIC_COLLATION
, UCOL_OFF
, &icuStatus
);
518 static inline CFIndex
__extendLocationBackward(CFIndex location
, CFStringInlineBuffer
*str
, const uint8_t *nonBaseBMP
, const uint8_t *punctBMP
) {
519 while (location
> 0) {
520 UTF32Char ch
= CFStringGetCharacterFromInlineBuffer(str
, location
);
522 if (CFUniCharIsSurrogateLowCharacter(ch
) && CFUniCharIsSurrogateHighCharacter((otherChar
= CFStringGetCharacterFromInlineBuffer(str
, location
- 1)))) {
523 ch
= CFUniCharGetLongCharacterForSurrogatePair(ch
, otherChar
);
524 uint8_t planeNo
= (ch
>> 16);
525 if ((planeNo
> 1) || (!CFUniCharIsMemberOfBitmap(ch
, CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet
, planeNo
)) && !CFUniCharIsMemberOfBitmap(ch
, CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet
, planeNo
)))) break;
528 if ((!CFUniCharIsMemberOfBitmap(ch
, nonBaseBMP
) && !CFUniCharIsMemberOfBitmap(ch
, punctBMP
)) || ((ch
>= 0x2E80) && (ch
< 0xAC00))) break;
536 static inline CFIndex
__extendLocationForward(CFIndex location
, CFStringInlineBuffer
*str
, const uint8_t *alnumBMP
, const uint8_t *punctBMP
, const uint8_t *controlBMP
, CFIndex strMax
) {
538 UTF32Char ch
= CFStringGetCharacterFromInlineBuffer(str
, location
);
540 if (CFUniCharIsSurrogateHighCharacter(ch
) && CFUniCharIsSurrogateLowCharacter((otherChar
= CFStringGetCharacterFromInlineBuffer(str
, location
+ 1)))) {
541 ch
= CFUniCharGetLongCharacterForSurrogatePair(ch
, otherChar
);
543 uint8_t planeNo
= (ch
>> 16);
544 if (!CFUniCharIsMemberOfBitmap(ch
, CFUniCharGetBitmapPtrForPlane(kCFUniCharAlphaNumericCharacterSet
, planeNo
)) && !CFUniCharIsMemberOfBitmap(ch
, CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet
, planeNo
)) && !CFUniCharIsMemberOfBitmap(ch
, CFUniCharGetBitmapPtrForPlane(kCFUniCharControlAndFormatterCharacterSet
, planeNo
))) break;
547 if ((!CFUniCharIsMemberOfBitmap(ch
, alnumBMP
) && !CFUniCharIsMemberOfBitmap(ch
, punctBMP
) && !CFUniCharIsMemberOfBitmap(ch
, controlBMP
)) || ((ch
>= 0x2E80) && (ch
< 0xAC00))) break;
549 } while (location
< strMax
);
553 __private_extern__ CFComparisonResult
_CFCompareStringsWithLocale(CFStringInlineBuffer
*str1
, CFRange str1Range
, CFStringInlineBuffer
*str2
, CFRange str2Range
, CFOptionFlags options
, const void *compareLocale
) {
554 const UniChar
*characters1
;
555 const UniChar
*characters2
;
556 CFComparisonResult compResult
= kCFCompareEqualTo
;
557 CFRange range1
= str1Range
;
558 CFRange range2
= str2Range
;
561 bool forcedOrdering
= ((options
& kCFCompareForcedOrdering
) ? true : false);
563 UCollator
*collator
= NULL
;
564 bool defaultCollator
= true;
565 static const uint8_t *alnumBMP
= NULL
;
566 static const uint8_t *nonBaseBMP
= NULL
;
567 static const uint8_t *punctBMP
= NULL
;
568 static const uint8_t *controlBMP
= NULL
;
570 if (NULL
== alnumBMP
) {
571 alnumBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharAlphaNumericCharacterSet
, 0);
572 nonBaseBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet
, 0);
573 punctBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet
, 0);
574 controlBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharControlAndFormatterCharacterSet
, 0);
577 // Determine the range of characters surrodiing the current index significant for localized comparison. The range is extended backward and forward as long as they are contextual. Contextual characters include all letters and punctuations. Since most control/format characters are ignorable in localized comparison, we also include them extending forward.
579 range1
.location
= str1Range
.location
;
580 range2
.location
= str2Range
.location
;
583 // The characters upto the current index are already determined to be equal by the CFString's standard character folding algorithm. Extend as long as truly contextual (all letters and punctuations).
584 if (range1
.location
> 0) {
585 range1
.location
= __extendLocationBackward(range1
.location
- 1, str1
, nonBaseBMP
, punctBMP
);
588 if (range2
.location
> 0) {
589 range2
.location
= __extendLocationBackward(range2
.location
- 1, str2
, nonBaseBMP
, punctBMP
);
592 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
593 // First we try to use the last one used on this thread, if the locale is the same,
594 // otherwise we try to check out a default one, or then we create one.
595 UCollator
*threadCollator
= _CFGetTSD(__CFTSDKeyCollatorUCollator
);
596 CFLocaleRef threadLocale
= _CFGetTSD(__CFTSDKeyCollatorLocale
);
597 if (compareLocale
== threadLocale
) {
598 collator
= threadCollator
;
601 collator
= __CFStringCopyDefaultCollator((CFLocaleRef
)compareLocale
);
602 defaultCollator
= true;
603 if (NULL
== collator
) {
604 collator
= __CFStringCreateCollator((CFLocaleRef
)compareLocale
);
605 defaultCollator
= false;
607 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
611 characters1
= CFStringGetCharactersPtrFromInlineBuffer(str1
, range1
);
612 characters2
= CFStringGetCharactersPtrFromInlineBuffer(str2
, range2
);
614 if ((NULL
!= characters1
) && (NULL
!= characters2
)) { // do fast
615 range1
.length
= (str1Range
.location
+ str1Range
.length
) - range1
.location
;
616 range2
.length
= (str2Range
.location
+ str2Range
.length
) - range2
.location
;
618 if ((NULL
!= collator
) && (__CompareTextDefault(collator
, options
, characters1
, range1
.length
, characters2
, range2
.length
, &isEqual
, &order
) == 0 /* noErr */)) {
619 compResult
= ((isEqual
&& !forcedOrdering
) ? kCFCompareEqualTo
: ((order
< 0) ? kCFCompareLessThan
: kCFCompareGreaterThan
));
621 compResult
= ((memcmp(characters1
, characters2
, sizeof(UniChar
) * range1
.length
) < 0) ? kCFCompareLessThan
: kCFCompareGreaterThan
);
624 UniChar
*buffer1
= NULL
;
625 UniChar
*buffer2
= NULL
;
626 UTF16Char sBuffer1
[kCFStringCompareAllocationIncrement
];
627 UTF16Char sBuffer2
[kCFStringCompareAllocationIncrement
];
628 CFIndex buffer1Len
= 0, buffer2Len
= 0;
629 CFIndex str1Max
= str1Range
.location
+ str1Range
.length
;
630 CFIndex str2Max
= str2Range
.location
+ str2Range
.length
;
633 // Extend forward and compare until the result is deterministic. The result is indeterministic if the differences are weak and can be resolved by character folding. For example, comparision between "abc" and "ABC" is considered to be indeterministic.
635 if (str1Range
.location
< str1Max
) {
636 str1Range
.location
= __extendLocationForward(str1Range
.location
, str1
, alnumBMP
, punctBMP
, controlBMP
, str1Max
);
637 range1
.length
= (str1Range
.location
- range1
.location
);
638 characters1
= CFStringGetCharactersPtrFromInlineBuffer(str1
, range1
);
640 if (NULL
== characters1
) {
641 if ((0 > buffer1Len
) || (range1
.length
> kCFStringCompareAllocationIncrement
)) {
642 if (buffer1Len
< range1
.length
) {
643 bufferSize
= range1
.length
+ (kCFStringCompareAllocationIncrement
- (range1
.length
% kCFStringCompareAllocationIncrement
));
644 if (0 == buffer1Len
) {
645 buffer1
= (UniChar
*)CFAllocatorAllocate(kCFAllocatorSystemDefault
, sizeof(UTF16Char
) * bufferSize
, 0);
646 } else if (buffer1Len
< range1
.length
) {
647 buffer1
= (UniChar
*)CFAllocatorReallocate(kCFAllocatorSystemDefault
, buffer1
, sizeof(UTF16Char
) * bufferSize
, 0);
649 buffer1Len
= bufferSize
;
655 CFStringGetCharactersFromInlineBuffer(str1
, range1
, buffer1
);
656 characters1
= buffer1
;
660 if (str2Range
.location
< str2Max
) {
661 str2Range
.location
= __extendLocationForward(str2Range
.location
, str2
, alnumBMP
, punctBMP
, controlBMP
, str2Max
);
662 range2
.length
= (str2Range
.location
- range2
.location
);
663 characters2
= CFStringGetCharactersPtrFromInlineBuffer(str2
, range2
);
665 if (NULL
== characters2
) {
666 if ((0 > buffer2Len
) || (range2
.length
> kCFStringCompareAllocationIncrement
)) {
667 if (buffer2Len
< range2
.length
) {
668 bufferSize
= range2
.length
+ (kCFStringCompareAllocationIncrement
- (range2
.length
% kCFStringCompareAllocationIncrement
));
669 if (0 == buffer2Len
) {
670 buffer2
= (UniChar
*)CFAllocatorAllocate(kCFAllocatorSystemDefault
, sizeof(UTF16Char
) * bufferSize
, 0);
671 } else if (buffer2Len
< range2
.length
) {
672 buffer2
= (UniChar
*)CFAllocatorReallocate(kCFAllocatorSystemDefault
, buffer2
, sizeof(UTF16Char
) * bufferSize
, 0);
674 buffer2Len
= bufferSize
;
680 CFStringGetCharactersFromInlineBuffer(str2
, range2
, buffer2
);
681 characters2
= buffer2
;
685 if ((NULL
!= collator
) && (__CompareTextDefault(collator
, options
, characters1
, range1
.length
, characters2
, range2
.length
, &isEqual
, &order
) == 0 /* noErr */)) {
687 if (forcedOrdering
&& (kCFCompareEqualTo
== compResult
) && (0 != order
)) compResult
= ((order
< 0) ? kCFCompareLessThan
: kCFCompareGreaterThan
);
691 order
= memcmp(characters1
, characters2
, sizeof(UTF16Char
) * ((range1
.length
< range2
.length
) ? range1
.length
: range2
.length
));
693 if (range1
.length
< range2
.length
) {
695 } else if (range2
.length
< range1
.length
) {
698 } else if (order
< 0) {
700 } else if (order
> 0) {
705 if ((order
< -1) || (order
> 1)) break; // the result is deterministic
708 range1
.location
= str1Range
.location
;
709 range2
.location
= str2Range
.location
;
711 } while ((str1Range
.location
< str1Max
) || (str2Range
.location
< str2Max
));
713 if (0 != order
) compResult
= ((order
< 0) ? kCFCompareLessThan
: kCFCompareGreaterThan
);
715 if (buffer1Len
> 0) CFAllocatorDeallocate(kCFAllocatorSystemDefault
, buffer1
);
716 if (buffer2Len
> 0) CFAllocatorDeallocate(kCFAllocatorSystemDefault
, buffer2
);
719 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
720 if (collator
== threadCollator
) {
721 // do nothing, already cached
723 if (threadLocale
) __collatorFinalize((UCollator
*)_CFGetTSD(__CFTSDKeyCollatorUCollator
)); // need to dealloc collators
725 _CFSetTSD(__CFTSDKeyCollatorUCollator
, collator
, (void *)__collatorFinalize
);
726 _CFSetTSD(__CFTSDKeyCollatorLocale
, (void *)CFRetain(compareLocale
), NULL
);