2 * Copyright (c) 2009 Apple Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
23 /* CFStringUtilities.c
24 Copyright (c) 1999-2009, Apple Inc. All rights reserved.
25 Responsibility: Aki Inoue
28 #include "CFInternal.h"
29 #include <CoreFoundation/CFStringEncodingConverterExt.h>
30 #include <CoreFoundation/CFUniChar.h>
31 #include <CoreFoundation/CFStringEncodingExt.h>
32 #include "CFStringEncodingDatabase.h"
33 #include "CFICUConverters.h"
34 #include <CoreFoundation/CFPreferences.h>
37 #include <unicode/ucol.h>
38 #include <unicode/ucoleitr.h>
41 #if DEPLOYMENT_TARGET_WINDOWS
46 Boolean
CFStringIsEncodingAvailable(CFStringEncoding theEncoding
) {
47 switch (theEncoding
) {
48 case kCFStringEncodingASCII
: // Built-in encodings
49 case kCFStringEncodingMacRoman
:
50 case kCFStringEncodingUTF8
:
51 case kCFStringEncodingNonLossyASCII
:
52 case kCFStringEncodingWindowsLatin1
:
53 case kCFStringEncodingNextStepLatin
:
54 case kCFStringEncodingUTF16
:
55 case kCFStringEncodingUTF16BE
:
56 case kCFStringEncodingUTF16LE
:
57 case kCFStringEncodingUTF32
:
58 case kCFStringEncodingUTF32BE
:
59 case kCFStringEncodingUTF32LE
:
63 return CFStringEncodingIsValidEncoding(theEncoding
);
67 const CFStringEncoding
* CFStringGetListOfAvailableEncodings() {
68 return (const CFStringEncoding
*)CFStringEncodingListOfAvailableEncodings();
71 CFStringRef
CFStringGetNameOfEncoding(CFStringEncoding theEncoding
) {
72 static CFMutableDictionaryRef mappingTable
= NULL
;
73 CFStringRef theName
= mappingTable
? (CFStringRef
)CFDictionaryGetValue(mappingTable
, (const void*)(uintptr_t)theEncoding
) : NULL
;
76 const char *encodingName
= __CFStringEncodingGetName(theEncoding
);
79 theName
= CFStringCreateWithCString(kCFAllocatorSystemDefault
, encodingName
, kCFStringEncodingASCII
);
83 if (!mappingTable
) mappingTable
= CFDictionaryCreateMutable(kCFAllocatorSystemDefault
, 0, (const CFDictionaryKeyCallBacks
*)NULL
, &kCFTypeDictionaryValueCallBacks
);
85 CFDictionaryAddValue(mappingTable
, (const void*)(uintptr_t)theEncoding
, (const void*)theName
);
93 CFStringEncoding
CFStringConvertIANACharSetNameToEncoding(CFStringRef charsetName
) {
94 CFStringEncoding encoding
= kCFStringEncodingInvalidId
;
95 #define BUFFER_SIZE (100)
96 char buffer
[BUFFER_SIZE
];
97 const char *name
= CFStringGetCStringPtr(charsetName
, __CFStringGetEightBitStringEncoding());
100 if (false == CFStringGetCString(charsetName
, buffer
, BUFFER_SIZE
, __CFStringGetEightBitStringEncoding())) return kCFStringEncodingInvalidId
;
105 encoding
= __CFStringEncodingGetFromCanonicalName(name
);
107 if (kCFStringEncodingInvalidId
== encoding
) encoding
= __CFStringEncodingGetFromICUName(name
);
113 CFStringRef
CFStringConvertEncodingToIANACharSetName(CFStringEncoding encoding
) {
114 CFStringRef name
= NULL
;
115 CFIndex value
= encoding
;
116 static CFMutableDictionaryRef mappingTable
= NULL
;
117 static CFSpinLock_t lock
= CFSpinLockInit
;
120 name
= ((NULL
== mappingTable
) ? NULL
: (CFStringRef
)CFDictionaryGetValue(mappingTable
, (const void*)value
));
123 #define STACK_BUFFER_SIZE (100)
124 char buffer
[STACK_BUFFER_SIZE
];
126 if (__CFStringEncodingGetCanonicalName(encoding
, buffer
, STACK_BUFFER_SIZE
)) name
= CFStringCreateWithCString(NULL
, buffer
, kCFStringEncodingASCII
);
130 CFIndex value
= encoding
;
132 if (NULL
== mappingTable
) mappingTable
= CFDictionaryCreateMutable(NULL
, 0, NULL
, &kCFTypeDictionaryValueCallBacks
);
134 CFDictionaryAddValue(mappingTable
, (const void*)value
, (const void*)name
);
138 __CFSpinUnlock(&lock
);
144 NSASCIIStringEncoding
= 1, /* 0..127 only */
145 NSNEXTSTEPStringEncoding
= 2,
146 NSJapaneseEUCStringEncoding
= 3,
147 NSUTF8StringEncoding
= 4,
148 NSISOLatin1StringEncoding
= 5,
149 NSSymbolStringEncoding
= 6,
150 NSNonLossyASCIIStringEncoding
= 7,
151 NSShiftJISStringEncoding
= 8,
152 NSISOLatin2StringEncoding
= 9,
153 NSUnicodeStringEncoding
= 10,
154 NSWindowsCP1251StringEncoding
= 11, /* Cyrillic; same as AdobeStandardCyrillic */
155 NSWindowsCP1252StringEncoding
= 12, /* WinLatin1 */
156 NSWindowsCP1253StringEncoding
= 13, /* Greek */
157 NSWindowsCP1254StringEncoding
= 14, /* Turkish */
158 NSWindowsCP1250StringEncoding
= 15, /* WinLatin2 */
159 NSISO2022JPStringEncoding
= 21, /* ISO 2022 Japanese encoding for e-mail */
160 NSMacOSRomanStringEncoding
= 30,
162 NSProprietaryStringEncoding
= 65536 /* Installation-specific encoding */
165 #define NSENCODING_MASK (1 << 31)
167 unsigned long CFStringConvertEncodingToNSStringEncoding(CFStringEncoding theEncoding
) {
168 switch (theEncoding
& 0xFFF) {
169 case kCFStringEncodingUnicode
:
170 if (theEncoding
== kCFStringEncodingUTF16
) return NSUnicodeStringEncoding
;
171 else if (theEncoding
== kCFStringEncodingUTF8
) return NSUTF8StringEncoding
;
174 case kCFStringEncodingWindowsLatin1
: return NSWindowsCP1252StringEncoding
;
175 case kCFStringEncodingMacRoman
: return NSMacOSRomanStringEncoding
;
177 case kCFStringEncodingASCII
: return NSASCIIStringEncoding
;
179 case kCFStringEncodingDOSJapanese
: return NSShiftJISStringEncoding
;
180 case kCFStringEncodingWindowsCyrillic
: return NSWindowsCP1251StringEncoding
;
181 case kCFStringEncodingWindowsGreek
: return NSWindowsCP1253StringEncoding
;
182 case kCFStringEncodingWindowsLatin5
: return NSWindowsCP1254StringEncoding
;
183 case kCFStringEncodingWindowsLatin2
: return NSWindowsCP1250StringEncoding
;
184 case kCFStringEncodingISOLatin1
: return NSISOLatin1StringEncoding
;
186 case kCFStringEncodingNonLossyASCII
: return NSNonLossyASCIIStringEncoding
;
187 case kCFStringEncodingEUC_JP
: return NSJapaneseEUCStringEncoding
;
188 case kCFStringEncodingMacSymbol
: return NSSymbolStringEncoding
;
189 case kCFStringEncodingISOLatin2
: return NSISOLatin2StringEncoding
;
190 case kCFStringEncodingISO_2022_JP
: return NSISO2022JPStringEncoding
;
191 case kCFStringEncodingNextStepLatin
: return NSNEXTSTEPStringEncoding
;
194 return NSENCODING_MASK
| theEncoding
;
197 CFStringEncoding
CFStringConvertNSStringEncodingToEncoding(unsigned long theEncoding
) {
198 const uint16_t encodings
[] = {
199 kCFStringEncodingASCII
,
200 kCFStringEncodingNextStepLatin
,
201 kCFStringEncodingEUC_JP
,
203 kCFStringEncodingISOLatin1
,
204 kCFStringEncodingMacSymbol
,
205 kCFStringEncodingNonLossyASCII
,
206 kCFStringEncodingDOSJapanese
,
207 kCFStringEncodingISOLatin2
,
208 kCFStringEncodingUTF16
,
209 kCFStringEncodingWindowsCyrillic
,
210 kCFStringEncodingWindowsLatin1
,
211 kCFStringEncodingWindowsGreek
,
212 kCFStringEncodingWindowsLatin5
,
213 kCFStringEncodingWindowsLatin2
216 if (NSUTF8StringEncoding
== theEncoding
) return kCFStringEncodingUTF8
;
218 if ((theEncoding
> 0) && (theEncoding
<= NSWindowsCP1250StringEncoding
)) return encodings
[theEncoding
- 1];
220 switch (theEncoding
) {
221 case NSMacOSRomanStringEncoding
: return kCFStringEncodingMacRoman
;
222 case NSISO2022JPStringEncoding
: return kCFStringEncodingISO_2022_JP
;
225 return ((theEncoding
& NSENCODING_MASK
) ? theEncoding
& ~NSENCODING_MASK
: kCFStringEncodingInvalidId
);
229 UInt32
CFStringConvertEncodingToWindowsCodepage(CFStringEncoding theEncoding
) {
230 uint16_t codepage
= __CFStringEncodingGetWindowsCodePage(theEncoding
);
232 return ((0 == codepage
) ? kCFStringEncodingInvalidId
: codepage
);
235 CFStringEncoding
CFStringConvertWindowsCodepageToEncoding(UInt32 theEncoding
) {
236 return __CFStringEncodingGetFromWindowsCodePage(theEncoding
);
239 CFStringEncoding
CFStringGetMostCompatibleMacStringEncoding(CFStringEncoding encoding
) {
240 CFStringEncoding macEncoding
= __CFStringEncodingGetMostCompatibleMacScript(encoding
);
246 #define kCFStringCompareAllocationIncrement (128)
249 // -------------------------------------------------------------------------------------------------
250 // CompareSpecials - ignore case & diacritic differences
252 // Decomposed have 2nd-4th chars of type Mn or Mc, or in range 1160-11FF (jamo)
253 // Fullwidth & halfwidth are in range FF00-FFEF
254 // Parenthesized & circled are in range 3200-32FF
255 // -------------------------------------------------------------------------------------------------
258 kUpperCaseWeightMin
= 0x80 | 0x0F,
259 kUpperCaseWeightMax
= 0x80 | 0x17,
260 kUpperToLowerDelta
= 0x80 | 0x0A, // 0x0A = 0x0F - 0x05
261 kMaskPrimarySecondary
= 0xFFFFFF00,
262 kMaskPrimaryOnly
= 0xFFFF0000,
263 kMaskSecondaryOnly
= 0x0000FF00,
264 kMaskCaseTertiary
= 0x000000FF // 2 hi bits case, 6 lo bits tertiary
267 static SInt32
__CompareSpecials(const UCollator
*collator
, CFOptionFlags options
, const UniChar
*text1Ptr
, UniCharCount text1Length
, const UniChar
*text2Ptr
, UniCharCount text2Length
) {
268 UErrorCode icuStatus
= U_ZERO_ERROR
;
269 SInt32 orderWidth
= 0;
270 SInt32 orderCompos
= 0;
272 UCollationElements
* collElems1
= ucol_openElements(collator
, (const UChar
*)text1Ptr
, text1Length
, &icuStatus
);
273 UCollationElements
* collElems2
= ucol_openElements(collator
, (const UChar
*)text2Ptr
, text2Length
, &icuStatus
);
274 if (U_SUCCESS(icuStatus
)) {
275 int32_t startOffset1
= 0;
276 int32_t startOffset2
= 0;
279 int32_t elemOrder1
, elemOrder2
;
280 int32_t offset1
, offset2
;
282 elemOrder1
= ucol_next(collElems1
, &icuStatus
);
283 elemOrder2
= ucol_next(collElems2
, &icuStatus
);
284 if ( U_FAILURE(icuStatus
) || elemOrder1
== (int32_t)UCOL_NULLORDER
|| elemOrder2
== (int32_t)UCOL_NULLORDER
) {
288 offset1
= ucol_getOffset(collElems1
);
289 offset2
= ucol_getOffset(collElems2
);
290 if ( (elemOrder1
& kMaskPrimarySecondary
) == (elemOrder2
& kMaskPrimarySecondary
) ) {
291 if ( (elemOrder1
& kMaskPrimaryOnly
) != 0 ) {
292 // keys may differ in case, width, circling, etc.
294 int32_t tertiary1
= (elemOrder1
& kMaskCaseTertiary
);
295 int32_t tertiary2
= (elemOrder2
& kMaskCaseTertiary
);
296 // fold upper to lower case
297 if (tertiary1
>= kUpperCaseWeightMin
&& tertiary1
<= kUpperCaseWeightMax
) {
298 tertiary1
-= kUpperToLowerDelta
;
300 if (tertiary2
>= kUpperCaseWeightMin
&& tertiary2
<= kUpperCaseWeightMax
) {
301 tertiary2
-= kUpperToLowerDelta
;
304 if (tertiary1
!= tertiary2
) {
305 orderWidth
= (tertiary1
< tertiary2
)? -1: 1;
309 } else if ( (elemOrder1
& kMaskSecondaryOnly
) != 0 ) {
310 // primary weights are both zero, but secondaries are not.
311 if ( orderCompos
== 0 && (options
& kCFCompareNonliteral
) == 0 ) {
312 // We have a code element which is a diacritic.
313 // It may have come from a composed char or a combining char.
314 // If it came from a combining char (longer element length) it sorts first.
315 // This is only an approximation to what the Mac OS 9 code did, but this is an
316 // unusual case anyway.
317 int32_t elem1Length
= offset1
- startOffset1
;
318 int32_t elem2Length
= offset2
- startOffset2
;
319 if (elem1Length
!= elem2Length
) {
320 orderCompos
= (elem1Length
> elem2Length
)? -1: 1;
326 startOffset1
= offset1
;
327 startOffset2
= offset2
;
329 ucol_closeElements(collElems1
);
330 ucol_closeElements(collElems2
);
333 return (orderWidth
!= 0)? orderWidth
: orderCompos
;
336 static SInt32
__CompareCodePoints(const UniChar
*text1Ptr
, UniCharCount text1Length
, const UniChar
*text2Ptr
, UniCharCount text2Length
) {
337 const UniChar
* text1P
= text1Ptr
;
338 const UniChar
* text2P
= text2Ptr
;
339 UInt32 textLimit
= (text1Length
<= text2Length
)? text1Length
: text2Length
;
341 SInt32 orderResult
= 0;
343 // Loop through either string...the first difference differentiates this.
344 for (textCounter
= 0; textCounter
< textLimit
&& *text1P
== *text2P
; textCounter
++) {
348 if (textCounter
< textLimit
) {
349 // code point difference
350 orderResult
= (*text1P
< *text2P
) ? -1 : 1;
351 } else if (text1Length
!= text2Length
) {
352 // one string has extra stuff at end
353 orderResult
= (text1Length
< text2Length
) ? -1 : 1;
359 extern const CFStringRef __kCFLocaleCollatorID
;
361 static UCollator
*__CFStringCreateCollator(CFLocaleRef compareLocale
) {
362 CFStringRef canonLocaleCFStr
= (CFStringRef
)CFLocaleGetValue(compareLocale
, __kCFLocaleCollatorID
);
363 char icuLocaleStr
[128] = {0};
364 CFStringGetCString(canonLocaleCFStr
, icuLocaleStr
, sizeof(icuLocaleStr
), kCFStringEncodingASCII
);
365 UErrorCode icuStatus
= U_ZERO_ERROR
;
366 UCollator
* collator
= ucol_open(icuLocaleStr
, &icuStatus
);
367 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &icuStatus
);
368 ucol_setAttribute(collator
, UCOL_ALTERNATE_HANDLING
, UCOL_NON_IGNORABLE
, &icuStatus
);
369 ucol_setAttribute(collator
, UCOL_STRENGTH
, UCOL_PRIMARY
, &icuStatus
);
370 ucol_setAttribute(collator
, UCOL_CASE_LEVEL
, UCOL_OFF
, &icuStatus
);
371 ucol_setAttribute(collator
, UCOL_NUMERIC_COLLATION
, UCOL_OFF
, &icuStatus
);
375 #define kCFMaxCachedDefaultCollators (8)
376 static UCollator
*__CFDefaultCollators
[kCFMaxCachedDefaultCollators
];
377 static CFIndex __CFDefaultCollatorsCount
= 0;
378 static const void *__CFDefaultCollatorLocale
= NULL
;
379 static CFSpinLock_t __CFDefaultCollatorLock
= CFSpinLockInit
;
381 static UCollator
*__CFStringCopyDefaultCollator(CFLocaleRef compareLocale
) {
382 CFLocaleRef currentLocale
= NULL
;
383 UCollator
* collator
= NULL
;
385 if (compareLocale
!= __CFDefaultCollatorLocale
) {
386 currentLocale
= CFLocaleCopyCurrent();
387 CFRelease(currentLocale
);
388 if (compareLocale
!= currentLocale
) return NULL
;
391 __CFSpinLock(&__CFDefaultCollatorLock
);
392 if ((NULL
!= currentLocale
) && (__CFDefaultCollatorLocale
!= currentLocale
)) {
393 while (__CFDefaultCollatorsCount
> 0) ucol_close(__CFDefaultCollators
[--__CFDefaultCollatorsCount
]);
394 __CFDefaultCollatorLocale
= currentLocale
;
397 if (__CFDefaultCollatorsCount
> 0) collator
= __CFDefaultCollators
[--__CFDefaultCollatorsCount
];
398 __CFSpinUnlock(&__CFDefaultCollatorLock
);
400 if (NULL
== collator
) {
401 collator
= __CFStringCreateCollator(compareLocale
);
407 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
408 static void __collatorFinalize(UCollator
*collator
) {
409 CFLocaleRef locale
= pthread_getspecific(__CFTSDKeyCollatorLocale
);
410 pthread_setspecific(__CFTSDKeyCollatorUCollator
, NULL
);
411 pthread_setspecific(__CFTSDKeyCollatorLocale
, NULL
);
412 __CFSpinLock(&__CFDefaultCollatorLock
);
413 if ((__CFDefaultCollatorLocale
== locale
) && (__CFDefaultCollatorsCount
< kCFMaxCachedDefaultCollators
)) {
414 __CFDefaultCollators
[__CFDefaultCollatorsCount
++] = collator
;
417 __CFSpinUnlock(&__CFDefaultCollatorLock
);
418 if (NULL
!= collator
) ucol_close(collator
);
419 if (locale
) CFRelease(locale
);
423 // -------------------------------------------------------------------------------------------------
424 // __CompareTextDefault
426 // A primary difference is denoted by values 2/-2 in orderP. Other differences are indicated with a -1/1.
427 // A negative value indicates that text1 sorts before text2.
428 // -------------------------------------------------------------------------------------------------
429 static OSStatus
__CompareTextDefault(UCollator
*collator
, CFOptionFlags options
, const UniChar
*text1Ptr
, UniCharCount text1Length
, const UniChar
*text2Ptr
, UniCharCount text2Length
, Boolean
*equivalentP
, SInt32
*orderP
) {
431 // collator must have default settings restored on exit from this function
436 if (options
& kCFCompareNumerically
) {
437 UErrorCode icuStatus
= U_ZERO_ERROR
;
438 ucol_setAttribute(collator
, UCOL_NUMERIC_COLLATION
, UCOL_ON
, &icuStatus
);
441 // Most string differences are Primary. Do a primary check first, then if there
442 // are no differences do a comparison with the options in the collator.
443 UCollationResult icuResult
= ucol_strcoll(collator
, (const UChar
*)text1Ptr
, text1Length
, (const UChar
*)text2Ptr
, text2Length
);
444 if (icuResult
!= UCOL_EQUAL
) {
445 *orderP
= (icuResult
== UCOL_LESS
) ? -2 : 2;
448 UErrorCode icuStatus
= U_ZERO_ERROR
;
449 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &icuStatus
);
450 ucol_setAttribute(collator
, UCOL_STRENGTH
, (options
& kCFCompareDiacriticInsensitive
) ? UCOL_PRIMARY
: UCOL_SECONDARY
, &icuStatus
);
451 ucol_setAttribute(collator
, UCOL_CASE_LEVEL
, (options
& kCFCompareCaseInsensitive
) ? UCOL_OFF
: UCOL_ON
, &icuStatus
);
452 if (!U_SUCCESS(icuStatus
)) {
453 icuStatus
= U_ZERO_ERROR
;
454 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &icuStatus
);
455 ucol_setAttribute(collator
, UCOL_STRENGTH
, UCOL_PRIMARY
, &icuStatus
);
456 ucol_setAttribute(collator
, UCOL_CASE_LEVEL
, UCOL_OFF
, &icuStatus
);
457 ucol_setAttribute(collator
, UCOL_NUMERIC_COLLATION
, UCOL_OFF
, &icuStatus
);
461 // We don't have a primary difference. Recompare with standard collator.
462 icuResult
= ucol_strcoll(collator
, (const UChar
*)text1Ptr
, text1Length
, (const UChar
*)text2Ptr
, text2Length
);
463 if (icuResult
!= UCOL_EQUAL
) {
464 *orderP
= (icuResult
== UCOL_LESS
) ? -1 : 1;
466 icuStatus
= U_ZERO_ERROR
;
467 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &icuStatus
);
468 ucol_setAttribute(collator
, UCOL_STRENGTH
, UCOL_PRIMARY
, &icuStatus
);
469 ucol_setAttribute(collator
, UCOL_CASE_LEVEL
, UCOL_OFF
, &icuStatus
);
471 if (*orderP
== 0 && (options
& kCFCompareNonliteral
) == 0) {
472 *orderP
= __CompareSpecials(collator
, options
, text1Ptr
, text1Length
, text2Ptr
, text2Length
);
475 *equivalentP
= (*orderP
== 0);
477 // If strings are equivalent but we care about order and have not yet checked
478 // to the level of code point order, then do some more checks for order
480 UErrorCode icuStatus
= U_ZERO_ERROR
;
481 // First try to see if ICU can find any differences above code point level
482 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &icuStatus
);
483 ucol_setAttribute(collator
, UCOL_STRENGTH
, UCOL_TERTIARY
, &icuStatus
);
484 ucol_setAttribute(collator
, UCOL_CASE_LEVEL
, UCOL_ON
, &icuStatus
);
485 if (!U_SUCCESS(icuStatus
)) {
486 icuStatus
= U_ZERO_ERROR
;
487 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &icuStatus
);
488 ucol_setAttribute(collator
, UCOL_STRENGTH
, UCOL_PRIMARY
, &icuStatus
);
489 ucol_setAttribute(collator
, UCOL_CASE_LEVEL
, UCOL_OFF
, &icuStatus
);
490 ucol_setAttribute(collator
, UCOL_NUMERIC_COLLATION
, UCOL_OFF
, &icuStatus
);
493 icuResult
= ucol_strcoll(collator
, (const UChar
*)text1Ptr
, text1Length
, (const UChar
*)text2Ptr
, text2Length
);
494 if (icuResult
!= UCOL_EQUAL
) {
495 *orderP
= (icuResult
== UCOL_LESS
) ? -1 : 1;
497 // no ICU differences above code point level, compare code points
498 *orderP
= __CompareCodePoints( text1Ptr
, text1Length
, text2Ptr
, text2Length
);
500 icuStatus
= U_ZERO_ERROR
;
501 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &icuStatus
);
502 ucol_setAttribute(collator
, UCOL_STRENGTH
, UCOL_PRIMARY
, &icuStatus
);
503 ucol_setAttribute(collator
, UCOL_CASE_LEVEL
, UCOL_OFF
, &icuStatus
);
506 if (options
& kCFCompareNumerically
) {
507 UErrorCode icuStatus
= U_ZERO_ERROR
;
508 ucol_setAttribute(collator
, UCOL_NUMERIC_COLLATION
, UCOL_OFF
, &icuStatus
);
513 static inline CFIndex
__extendLocationBackward(CFIndex location
, CFStringInlineBuffer
*str
, const uint8_t *nonBaseBMP
, const uint8_t *punctBMP
) {
514 while (location
> 0) {
515 UTF32Char ch
= CFStringGetCharacterFromInlineBuffer(str
, location
);
517 if (CFUniCharIsSurrogateLowCharacter(ch
) && CFUniCharIsSurrogateHighCharacter((otherChar
= CFStringGetCharacterFromInlineBuffer(str
, location
- 1)))) {
518 ch
= CFUniCharGetLongCharacterForSurrogatePair(ch
, otherChar
);
519 uint8_t planeNo
= (ch
>> 16);
520 if ((planeNo
> 1) || (!CFUniCharIsMemberOfBitmap(ch
, CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet
, planeNo
)) && !CFUniCharIsMemberOfBitmap(ch
, CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet
, planeNo
)))) break;
523 if ((!CFUniCharIsMemberOfBitmap(ch
, nonBaseBMP
) && !CFUniCharIsMemberOfBitmap(ch
, punctBMP
)) || ((ch
>= 0x2E80) && (ch
< 0xAC00))) break;
531 static inline CFIndex
__extendLocationForward(CFIndex location
, CFStringInlineBuffer
*str
, const uint8_t *alnumBMP
, const uint8_t *punctBMP
, const uint8_t *controlBMP
, CFIndex strMax
) {
533 UTF32Char ch
= CFStringGetCharacterFromInlineBuffer(str
, location
);
535 if (CFUniCharIsSurrogateHighCharacter(ch
) && CFUniCharIsSurrogateLowCharacter((otherChar
= CFStringGetCharacterFromInlineBuffer(str
, location
+ 1)))) {
536 ch
= CFUniCharGetLongCharacterForSurrogatePair(ch
, otherChar
);
538 uint8_t planeNo
= (ch
>> 16);
539 if (!CFUniCharIsMemberOfBitmap(ch
, CFUniCharGetBitmapPtrForPlane(kCFUniCharAlphaNumericCharacterSet
, planeNo
)) && !CFUniCharIsMemberOfBitmap(ch
, CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet
, planeNo
)) && !CFUniCharIsMemberOfBitmap(ch
, CFUniCharGetBitmapPtrForPlane(kCFUniCharControlAndFormatterCharacterSet
, planeNo
))) break;
542 if ((!CFUniCharIsMemberOfBitmap(ch
, alnumBMP
) && !CFUniCharIsMemberOfBitmap(ch
, punctBMP
) && !CFUniCharIsMemberOfBitmap(ch
, controlBMP
)) || ((ch
>= 0x2E80) && (ch
< 0xAC00))) break;
544 } while (location
< strMax
);
548 __private_extern__ CFComparisonResult
_CFCompareStringsWithLocale(CFStringInlineBuffer
*str1
, CFRange str1Range
, CFStringInlineBuffer
*str2
, CFRange str2Range
, CFOptionFlags options
, const void *compareLocale
) {
549 const UniChar
*characters1
;
550 const UniChar
*characters2
;
551 CFComparisonResult compResult
= kCFCompareEqualTo
;
552 CFRange range1
= str1Range
;
553 CFRange range2
= str2Range
;
556 bool forcedOrdering
= ((options
& kCFCompareForcedOrdering
) ? true : false);
558 UCollator
*collator
= NULL
;
559 bool defaultCollator
= true;
560 static const uint8_t *alnumBMP
= NULL
;
561 static const uint8_t *nonBaseBMP
= NULL
;
562 static const uint8_t *punctBMP
= NULL
;
563 static const uint8_t *controlBMP
= NULL
;
565 if (NULL
== alnumBMP
) {
566 alnumBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharAlphaNumericCharacterSet
, 0);
567 nonBaseBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet
, 0);
568 punctBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet
, 0);
569 controlBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharControlAndFormatterCharacterSet
, 0);
572 // Determine the range of characters surrodiing the current index significant for localized comparison. The range is extended backward and forward as long as they are contextual. Contextual characters include all letters and punctuations. Since most control/format characters are ignorable in localized comparison, we also include them extending forward.
574 range1
.location
= str1Range
.location
;
575 range2
.location
= str2Range
.location
;
578 // The characters upto the current index are already determined to be equal by the CFString's standard character folding algorithm. Extend as long as truly contextual (all letters and punctuations).
579 if (range1
.location
> 0) {
580 range1
.location
= __extendLocationBackward(range1
.location
- 1, str1
, nonBaseBMP
, punctBMP
);
583 if (range2
.location
> 0) {
584 range2
.location
= __extendLocationBackward(range2
.location
- 1, str2
, nonBaseBMP
, punctBMP
);
587 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
588 // First we try to use the last one used on this thread, if the locale is the same,
589 // otherwise we try to check out a default one, or then we create one.
590 UCollator
*threadCollator
= pthread_getspecific(__CFTSDKeyCollatorUCollator
);
591 CFLocaleRef threadLocale
= pthread_getspecific(__CFTSDKeyCollatorLocale
);
592 if (compareLocale
== threadLocale
) {
593 collator
= threadCollator
;
596 collator
= __CFStringCopyDefaultCollator((CFLocaleRef
)compareLocale
);
597 defaultCollator
= true;
598 if (NULL
== collator
) {
599 collator
= __CFStringCreateCollator((CFLocaleRef
)compareLocale
);
600 defaultCollator
= false;
602 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
606 characters1
= CFStringGetCharactersPtrFromInlineBuffer(str1
, range1
);
607 characters2
= CFStringGetCharactersPtrFromInlineBuffer(str2
, range2
);
609 if ((NULL
!= characters1
) && (NULL
!= characters2
)) { // do fast
610 range1
.length
= (str1Range
.location
+ str1Range
.length
) - range1
.location
;
611 range2
.length
= (str2Range
.location
+ str2Range
.length
) - range2
.location
;
613 if ((NULL
!= collator
) && (__CompareTextDefault(collator
, options
, characters1
, range1
.length
, characters2
, range2
.length
, &isEqual
, &order
) == 0 /* noErr */)) {
614 compResult
= ((isEqual
&& !forcedOrdering
) ? kCFCompareEqualTo
: ((order
< 0) ? kCFCompareLessThan
: kCFCompareGreaterThan
));
616 compResult
= ((memcmp(characters1
, characters2
, sizeof(UniChar
) * range1
.length
) < 0) ? kCFCompareLessThan
: kCFCompareGreaterThan
);
619 UniChar
*buffer1
= NULL
;
620 UniChar
*buffer2
= NULL
;
621 UTF16Char sBuffer1
[kCFStringCompareAllocationIncrement
];
622 UTF16Char sBuffer2
[kCFStringCompareAllocationIncrement
];
623 CFIndex buffer1Len
= 0, buffer2Len
= 0;
624 CFIndex str1Max
= str1Range
.location
+ str1Range
.length
;
625 CFIndex str2Max
= str2Range
.location
+ str2Range
.length
;
628 // Extend forward and compare until the result is deterministic. The result is indeterministic if the differences are weak and can be resolved by character folding. For example, comparision between "abc" and "ABC" is considered to be indeterministic.
630 if (str1Range
.location
< str1Max
) {
631 str1Range
.location
= __extendLocationForward(str1Range
.location
, str1
, alnumBMP
, punctBMP
, controlBMP
, str1Max
);
632 range1
.length
= (str1Range
.location
- range1
.location
);
633 characters1
= CFStringGetCharactersPtrFromInlineBuffer(str1
, range1
);
635 if (NULL
== characters1
) {
636 if ((0 > buffer1Len
) || (range1
.length
> kCFStringCompareAllocationIncrement
)) {
637 if (buffer1Len
< range1
.length
) {
638 bufferSize
= range1
.length
+ (kCFStringCompareAllocationIncrement
- (range1
.length
% kCFStringCompareAllocationIncrement
));
639 if (0 == buffer1Len
) {
640 buffer1
= (UniChar
*)CFAllocatorAllocate(kCFAllocatorSystemDefault
, sizeof(UTF16Char
) * bufferSize
, 0);
641 } else if (buffer1Len
< range1
.length
) {
642 buffer1
= (UniChar
*)CFAllocatorReallocate(kCFAllocatorSystemDefault
, buffer1
, sizeof(UTF16Char
) * bufferSize
, 0);
644 buffer1Len
= bufferSize
;
650 CFStringGetCharactersFromInlineBuffer(str1
, range1
, buffer1
);
651 characters1
= buffer1
;
655 if (str2Range
.location
< str2Max
) {
656 str2Range
.location
= __extendLocationForward(str2Range
.location
, str2
, alnumBMP
, punctBMP
, controlBMP
, str2Max
);
657 range2
.length
= (str2Range
.location
- range2
.location
);
658 characters2
= CFStringGetCharactersPtrFromInlineBuffer(str2
, range2
);
660 if (NULL
== characters2
) {
661 if ((0 > buffer2Len
) || (range2
.length
> kCFStringCompareAllocationIncrement
)) {
662 if (buffer2Len
< range2
.length
) {
663 bufferSize
= range2
.length
+ (kCFStringCompareAllocationIncrement
- (range2
.length
% kCFStringCompareAllocationIncrement
));
664 if (0 == buffer2Len
) {
665 buffer2
= (UniChar
*)CFAllocatorAllocate(kCFAllocatorSystemDefault
, sizeof(UTF16Char
) * bufferSize
, 0);
666 } else if (buffer2Len
< range2
.length
) {
667 buffer2
= (UniChar
*)CFAllocatorReallocate(kCFAllocatorSystemDefault
, buffer2
, sizeof(UTF16Char
) * bufferSize
, 0);
669 buffer2Len
= bufferSize
;
675 CFStringGetCharactersFromInlineBuffer(str2
, range2
, buffer2
);
676 characters2
= buffer2
;
680 if ((NULL
!= collator
) && (__CompareTextDefault(collator
, options
, characters1
, range1
.length
, characters2
, range2
.length
, &isEqual
, &order
) == 0 /* noErr */)) {
682 if (forcedOrdering
&& (kCFCompareEqualTo
== compResult
) && (0 != order
)) compResult
= ((order
< 0) ? kCFCompareLessThan
: kCFCompareGreaterThan
);
686 order
= memcmp(characters1
, characters2
, sizeof(UTF16Char
) * ((range1
.length
< range2
.length
) ? range1
.length
: range2
.length
));
688 if (range1
.length
< range2
.length
) {
690 } else if (range2
.length
< range1
.length
) {
693 } else if (order
< 0) {
695 } else if (order
> 0) {
700 if ((order
< -1) || (order
> 1)) break; // the result is deterministic
703 range1
.location
= str1Range
.location
;
704 range2
.location
= str2Range
.location
;
706 } while ((str1Range
.location
< str1Max
) || (str2Range
.location
< str2Max
));
708 if (0 != order
) compResult
= ((order
< 0) ? kCFCompareLessThan
: kCFCompareGreaterThan
);
710 if (buffer1Len
> 0) CFAllocatorDeallocate(kCFAllocatorSystemDefault
, buffer1
);
711 if (buffer2Len
> 0) CFAllocatorDeallocate(kCFAllocatorSystemDefault
, buffer2
);
714 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
715 if (collator
== threadCollator
) {
716 // do nothing, already cached
718 if (threadLocale
) __collatorFinalize((UCollator
*)pthread_getspecific(__CFTSDKeyCollatorUCollator
)); // need to dealloc collators
720 pthread_key_init_np(__CFTSDKeyCollatorUCollator
, (void *)__collatorFinalize
);
721 pthread_setspecific(__CFTSDKeyCollatorUCollator
, collator
);
722 pthread_setspecific(__CFTSDKeyCollatorLocale
, CFRetain(compareLocale
));