2 * Copyright (c) 2010 Apple Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
24 /* CFStringUtilities.c
25 Copyright (c) 1999-2009, Apple Inc. All rights reserved.
26 Responsibility: Aki Inoue
29 #include "CFInternal.h"
30 #include <CoreFoundation/CFStringEncodingConverterExt.h>
31 #include <CoreFoundation/CFUniChar.h>
32 #include <CoreFoundation/CFStringEncodingExt.h>
33 #include "CFStringEncodingDatabase.h"
34 #include "CFICUConverters.h"
35 #include <CoreFoundation/CFPreferences.h>
38 #include <unicode/ucol.h>
39 #include <unicode/ucoleitr.h>
42 #if DEPLOYMENT_TARGET_WINDOWS
47 Boolean
CFStringIsEncodingAvailable(CFStringEncoding theEncoding
) {
48 switch (theEncoding
) {
49 case kCFStringEncodingASCII
: // Built-in encodings
50 case kCFStringEncodingMacRoman
:
51 case kCFStringEncodingUTF8
:
52 case kCFStringEncodingNonLossyASCII
:
53 case kCFStringEncodingWindowsLatin1
:
54 case kCFStringEncodingNextStepLatin
:
55 case kCFStringEncodingUTF16
:
56 case kCFStringEncodingUTF16BE
:
57 case kCFStringEncodingUTF16LE
:
58 case kCFStringEncodingUTF32
:
59 case kCFStringEncodingUTF32BE
:
60 case kCFStringEncodingUTF32LE
:
64 return CFStringEncodingIsValidEncoding(theEncoding
);
68 const CFStringEncoding
* CFStringGetListOfAvailableEncodings() {
69 return (const CFStringEncoding
*)CFStringEncodingListOfAvailableEncodings();
72 CFStringRef
CFStringGetNameOfEncoding(CFStringEncoding theEncoding
) {
73 static CFMutableDictionaryRef mappingTable
= NULL
;
74 CFStringRef theName
= mappingTable
? (CFStringRef
)CFDictionaryGetValue(mappingTable
, (const void*)(uintptr_t)theEncoding
) : NULL
;
77 const char *encodingName
= __CFStringEncodingGetName(theEncoding
);
80 theName
= CFStringCreateWithCString(kCFAllocatorSystemDefault
, encodingName
, kCFStringEncodingASCII
);
84 if (!mappingTable
) mappingTable
= CFDictionaryCreateMutable(kCFAllocatorSystemDefault
, 0, (const CFDictionaryKeyCallBacks
*)NULL
, &kCFTypeDictionaryValueCallBacks
);
86 CFDictionaryAddValue(mappingTable
, (const void*)(uintptr_t)theEncoding
, (const void*)theName
);
94 CFStringEncoding
CFStringConvertIANACharSetNameToEncoding(CFStringRef charsetName
) {
95 CFStringEncoding encoding
= kCFStringEncodingInvalidId
;
96 #define BUFFER_SIZE (100)
97 char buffer
[BUFFER_SIZE
];
98 const char *name
= CFStringGetCStringPtr(charsetName
, __CFStringGetEightBitStringEncoding());
101 if (false == CFStringGetCString(charsetName
, buffer
, BUFFER_SIZE
, __CFStringGetEightBitStringEncoding())) return kCFStringEncodingInvalidId
;
106 encoding
= __CFStringEncodingGetFromCanonicalName(name
);
108 if (kCFStringEncodingInvalidId
== encoding
) encoding
= __CFStringEncodingGetFromICUName(name
);
114 CFStringRef
CFStringConvertEncodingToIANACharSetName(CFStringEncoding encoding
) {
115 CFStringRef name
= NULL
;
116 CFIndex value
= encoding
;
117 static CFMutableDictionaryRef mappingTable
= NULL
;
118 static CFSpinLock_t lock
= CFSpinLockInit
;
121 name
= ((NULL
== mappingTable
) ? NULL
: (CFStringRef
)CFDictionaryGetValue(mappingTable
, (const void*)value
));
124 #define STACK_BUFFER_SIZE (100)
125 char buffer
[STACK_BUFFER_SIZE
];
127 if (__CFStringEncodingGetCanonicalName(encoding
, buffer
, STACK_BUFFER_SIZE
)) name
= CFStringCreateWithCString(NULL
, buffer
, kCFStringEncodingASCII
);
131 CFIndex value
= encoding
;
133 if (NULL
== mappingTable
) mappingTable
= CFDictionaryCreateMutable(NULL
, 0, NULL
, &kCFTypeDictionaryValueCallBacks
);
135 CFDictionaryAddValue(mappingTable
, (const void*)value
, (const void*)name
);
139 __CFSpinUnlock(&lock
);
145 NSASCIIStringEncoding
= 1, /* 0..127 only */
146 NSNEXTSTEPStringEncoding
= 2,
147 NSJapaneseEUCStringEncoding
= 3,
148 NSUTF8StringEncoding
= 4,
149 NSISOLatin1StringEncoding
= 5,
150 NSSymbolStringEncoding
= 6,
151 NSNonLossyASCIIStringEncoding
= 7,
152 NSShiftJISStringEncoding
= 8,
153 NSISOLatin2StringEncoding
= 9,
154 NSUnicodeStringEncoding
= 10,
155 NSWindowsCP1251StringEncoding
= 11, /* Cyrillic; same as AdobeStandardCyrillic */
156 NSWindowsCP1252StringEncoding
= 12, /* WinLatin1 */
157 NSWindowsCP1253StringEncoding
= 13, /* Greek */
158 NSWindowsCP1254StringEncoding
= 14, /* Turkish */
159 NSWindowsCP1250StringEncoding
= 15, /* WinLatin2 */
160 NSISO2022JPStringEncoding
= 21, /* ISO 2022 Japanese encoding for e-mail */
161 NSMacOSRomanStringEncoding
= 30,
163 NSProprietaryStringEncoding
= 65536 /* Installation-specific encoding */
166 #define NSENCODING_MASK (1 << 31)
168 unsigned long CFStringConvertEncodingToNSStringEncoding(CFStringEncoding theEncoding
) {
169 switch (theEncoding
& 0xFFF) {
170 case kCFStringEncodingUnicode
:
171 if (theEncoding
== kCFStringEncodingUTF16
) return NSUnicodeStringEncoding
;
172 else if (theEncoding
== kCFStringEncodingUTF8
) return NSUTF8StringEncoding
;
175 case kCFStringEncodingWindowsLatin1
: return NSWindowsCP1252StringEncoding
;
176 case kCFStringEncodingMacRoman
: return NSMacOSRomanStringEncoding
;
178 case kCFStringEncodingASCII
: return NSASCIIStringEncoding
;
180 case kCFStringEncodingDOSJapanese
: return NSShiftJISStringEncoding
;
181 case kCFStringEncodingWindowsCyrillic
: return NSWindowsCP1251StringEncoding
;
182 case kCFStringEncodingWindowsGreek
: return NSWindowsCP1253StringEncoding
;
183 case kCFStringEncodingWindowsLatin5
: return NSWindowsCP1254StringEncoding
;
184 case kCFStringEncodingWindowsLatin2
: return NSWindowsCP1250StringEncoding
;
185 case kCFStringEncodingISOLatin1
: return NSISOLatin1StringEncoding
;
187 case kCFStringEncodingNonLossyASCII
: return NSNonLossyASCIIStringEncoding
;
188 case kCFStringEncodingEUC_JP
: return NSJapaneseEUCStringEncoding
;
189 case kCFStringEncodingMacSymbol
: return NSSymbolStringEncoding
;
190 case kCFStringEncodingISOLatin2
: return NSISOLatin2StringEncoding
;
191 case kCFStringEncodingISO_2022_JP
: return NSISO2022JPStringEncoding
;
192 case kCFStringEncodingNextStepLatin
: return NSNEXTSTEPStringEncoding
;
195 return NSENCODING_MASK
| theEncoding
;
198 CFStringEncoding
CFStringConvertNSStringEncodingToEncoding(unsigned long theEncoding
) {
199 const uint16_t encodings
[] = {
200 kCFStringEncodingASCII
,
201 kCFStringEncodingNextStepLatin
,
202 kCFStringEncodingEUC_JP
,
204 kCFStringEncodingISOLatin1
,
205 kCFStringEncodingMacSymbol
,
206 kCFStringEncodingNonLossyASCII
,
207 kCFStringEncodingDOSJapanese
,
208 kCFStringEncodingISOLatin2
,
209 kCFStringEncodingUTF16
,
210 kCFStringEncodingWindowsCyrillic
,
211 kCFStringEncodingWindowsLatin1
,
212 kCFStringEncodingWindowsGreek
,
213 kCFStringEncodingWindowsLatin5
,
214 kCFStringEncodingWindowsLatin2
217 if (NSUTF8StringEncoding
== theEncoding
) return kCFStringEncodingUTF8
;
219 if ((theEncoding
> 0) && (theEncoding
<= NSWindowsCP1250StringEncoding
)) return encodings
[theEncoding
- 1];
221 switch (theEncoding
) {
222 case NSMacOSRomanStringEncoding
: return kCFStringEncodingMacRoman
;
223 case NSISO2022JPStringEncoding
: return kCFStringEncodingISO_2022_JP
;
226 return ((theEncoding
& NSENCODING_MASK
) ? theEncoding
& ~NSENCODING_MASK
: kCFStringEncodingInvalidId
);
230 UInt32
CFStringConvertEncodingToWindowsCodepage(CFStringEncoding theEncoding
) {
231 uint16_t codepage
= __CFStringEncodingGetWindowsCodePage(theEncoding
);
233 return ((0 == codepage
) ? kCFStringEncodingInvalidId
: codepage
);
236 CFStringEncoding
CFStringConvertWindowsCodepageToEncoding(UInt32 theEncoding
) {
237 return __CFStringEncodingGetFromWindowsCodePage(theEncoding
);
240 CFStringEncoding
CFStringGetMostCompatibleMacStringEncoding(CFStringEncoding encoding
) {
241 CFStringEncoding macEncoding
= __CFStringEncodingGetMostCompatibleMacScript(encoding
);
247 #define kCFStringCompareAllocationIncrement (128)
250 // -------------------------------------------------------------------------------------------------
251 // CompareSpecials - ignore case & diacritic differences
253 // Decomposed have 2nd-4th chars of type Mn or Mc, or in range 1160-11FF (jamo)
254 // Fullwidth & halfwidth are in range FF00-FFEF
255 // Parenthesized & circled are in range 3200-32FF
256 // -------------------------------------------------------------------------------------------------
259 kUpperCaseWeightMin
= 0x80 | 0x0F,
260 kUpperCaseWeightMax
= 0x80 | 0x17,
261 kUpperToLowerDelta
= 0x80 | 0x0A, // 0x0A = 0x0F - 0x05
262 kMaskPrimarySecondary
= 0xFFFFFF00,
263 kMaskPrimaryOnly
= 0xFFFF0000,
264 kMaskSecondaryOnly
= 0x0000FF00,
265 kMaskCaseTertiary
= 0x000000FF // 2 hi bits case, 6 lo bits tertiary
268 static SInt32
__CompareSpecials(const UCollator
*collator
, CFOptionFlags options
, const UniChar
*text1Ptr
, UniCharCount text1Length
, const UniChar
*text2Ptr
, UniCharCount text2Length
) {
269 UErrorCode icuStatus
= U_ZERO_ERROR
;
270 SInt32 orderWidth
= 0;
271 SInt32 orderCompos
= 0;
273 UCollationElements
* collElems1
= ucol_openElements(collator
, (const UChar
*)text1Ptr
, text1Length
, &icuStatus
);
274 UCollationElements
* collElems2
= ucol_openElements(collator
, (const UChar
*)text2Ptr
, text2Length
, &icuStatus
);
275 if (U_SUCCESS(icuStatus
)) {
276 int32_t startOffset1
= 0;
277 int32_t startOffset2
= 0;
280 int32_t elemOrder1
, elemOrder2
;
281 int32_t offset1
, offset2
;
283 elemOrder1
= ucol_next(collElems1
, &icuStatus
);
284 elemOrder2
= ucol_next(collElems2
, &icuStatus
);
285 if ( U_FAILURE(icuStatus
) || elemOrder1
== (int32_t)UCOL_NULLORDER
|| elemOrder2
== (int32_t)UCOL_NULLORDER
) {
289 offset1
= ucol_getOffset(collElems1
);
290 offset2
= ucol_getOffset(collElems2
);
291 if ( (elemOrder1
& kMaskPrimarySecondary
) == (elemOrder2
& kMaskPrimarySecondary
) ) {
292 if ( (elemOrder1
& kMaskPrimaryOnly
) != 0 ) {
293 // keys may differ in case, width, circling, etc.
295 int32_t tertiary1
= (elemOrder1
& kMaskCaseTertiary
);
296 int32_t tertiary2
= (elemOrder2
& kMaskCaseTertiary
);
297 // fold upper to lower case
298 if (tertiary1
>= kUpperCaseWeightMin
&& tertiary1
<= kUpperCaseWeightMax
) {
299 tertiary1
-= kUpperToLowerDelta
;
301 if (tertiary2
>= kUpperCaseWeightMin
&& tertiary2
<= kUpperCaseWeightMax
) {
302 tertiary2
-= kUpperToLowerDelta
;
305 if (tertiary1
!= tertiary2
) {
306 orderWidth
= (tertiary1
< tertiary2
)? -1: 1;
310 } else if ( (elemOrder1
& kMaskSecondaryOnly
) != 0 ) {
311 // primary weights are both zero, but secondaries are not.
312 if ( orderCompos
== 0 && (options
& kCFCompareNonliteral
) == 0 ) {
313 // We have a code element which is a diacritic.
314 // It may have come from a composed char or a combining char.
315 // If it came from a combining char (longer element length) it sorts first.
316 // This is only an approximation to what the Mac OS 9 code did, but this is an
317 // unusual case anyway.
318 int32_t elem1Length
= offset1
- startOffset1
;
319 int32_t elem2Length
= offset2
- startOffset2
;
320 if (elem1Length
!= elem2Length
) {
321 orderCompos
= (elem1Length
> elem2Length
)? -1: 1;
327 startOffset1
= offset1
;
328 startOffset2
= offset2
;
330 ucol_closeElements(collElems1
);
331 ucol_closeElements(collElems2
);
334 return (orderWidth
!= 0)? orderWidth
: orderCompos
;
337 static SInt32
__CompareCodePoints(const UniChar
*text1Ptr
, UniCharCount text1Length
, const UniChar
*text2Ptr
, UniCharCount text2Length
) {
338 const UniChar
* text1P
= text1Ptr
;
339 const UniChar
* text2P
= text2Ptr
;
340 UInt32 textLimit
= (text1Length
<= text2Length
)? text1Length
: text2Length
;
342 SInt32 orderResult
= 0;
344 // Loop through either string...the first difference differentiates this.
345 for (textCounter
= 0; textCounter
< textLimit
&& *text1P
== *text2P
; textCounter
++) {
349 if (textCounter
< textLimit
) {
350 // code point difference
351 orderResult
= (*text1P
< *text2P
) ? -1 : 1;
352 } else if (text1Length
!= text2Length
) {
353 // one string has extra stuff at end
354 orderResult
= (text1Length
< text2Length
) ? -1 : 1;
360 extern const CFStringRef __kCFLocaleCollatorID
;
362 static UCollator
*__CFStringCreateCollator(CFLocaleRef compareLocale
) {
363 CFStringRef canonLocaleCFStr
= (CFStringRef
)CFLocaleGetValue(compareLocale
, __kCFLocaleCollatorID
);
364 char icuLocaleStr
[128] = {0};
365 CFStringGetCString(canonLocaleCFStr
, icuLocaleStr
, sizeof(icuLocaleStr
), kCFStringEncodingASCII
);
366 UErrorCode icuStatus
= U_ZERO_ERROR
;
367 UCollator
* collator
= ucol_open(icuLocaleStr
, &icuStatus
);
368 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &icuStatus
);
369 ucol_setAttribute(collator
, UCOL_ALTERNATE_HANDLING
, UCOL_NON_IGNORABLE
, &icuStatus
);
370 ucol_setAttribute(collator
, UCOL_STRENGTH
, UCOL_PRIMARY
, &icuStatus
);
371 ucol_setAttribute(collator
, UCOL_CASE_LEVEL
, UCOL_OFF
, &icuStatus
);
372 ucol_setAttribute(collator
, UCOL_NUMERIC_COLLATION
, UCOL_OFF
, &icuStatus
);
376 #define kCFMaxCachedDefaultCollators (8)
377 static UCollator
*__CFDefaultCollators
[kCFMaxCachedDefaultCollators
];
378 static CFIndex __CFDefaultCollatorsCount
= 0;
379 static const void *__CFDefaultCollatorLocale
= NULL
;
380 static CFSpinLock_t __CFDefaultCollatorLock
= CFSpinLockInit
;
382 static UCollator
*__CFStringCopyDefaultCollator(CFLocaleRef compareLocale
) {
383 CFLocaleRef currentLocale
= NULL
;
384 UCollator
* collator
= NULL
;
386 if (compareLocale
!= __CFDefaultCollatorLocale
) {
387 currentLocale
= CFLocaleCopyCurrent();
388 CFRelease(currentLocale
);
389 if (compareLocale
!= currentLocale
) return NULL
;
392 __CFSpinLock(&__CFDefaultCollatorLock
);
393 if ((NULL
!= currentLocale
) && (__CFDefaultCollatorLocale
!= currentLocale
)) {
394 while (__CFDefaultCollatorsCount
> 0) ucol_close(__CFDefaultCollators
[--__CFDefaultCollatorsCount
]);
395 __CFDefaultCollatorLocale
= currentLocale
;
398 if (__CFDefaultCollatorsCount
> 0) collator
= __CFDefaultCollators
[--__CFDefaultCollatorsCount
];
399 __CFSpinUnlock(&__CFDefaultCollatorLock
);
401 if (NULL
== collator
) {
402 collator
= __CFStringCreateCollator(compareLocale
);
408 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
409 static void __collatorFinalize(UCollator
*collator
) {
410 CFLocaleRef locale
= pthread_getspecific(__CFTSDKeyCollatorLocale
);
411 pthread_setspecific(__CFTSDKeyCollatorUCollator
, NULL
);
412 pthread_setspecific(__CFTSDKeyCollatorLocale
, NULL
);
413 __CFSpinLock(&__CFDefaultCollatorLock
);
414 if ((__CFDefaultCollatorLocale
== locale
) && (__CFDefaultCollatorsCount
< kCFMaxCachedDefaultCollators
)) {
415 __CFDefaultCollators
[__CFDefaultCollatorsCount
++] = collator
;
418 __CFSpinUnlock(&__CFDefaultCollatorLock
);
419 if (NULL
!= collator
) ucol_close(collator
);
420 if (locale
) CFRelease(locale
);
424 // -------------------------------------------------------------------------------------------------
425 // __CompareTextDefault
427 // A primary difference is denoted by values 2/-2 in orderP. Other differences are indicated with a -1/1.
428 // A negative value indicates that text1 sorts before text2.
429 // -------------------------------------------------------------------------------------------------
430 static OSStatus
__CompareTextDefault(UCollator
*collator
, CFOptionFlags options
, const UniChar
*text1Ptr
, UniCharCount text1Length
, const UniChar
*text2Ptr
, UniCharCount text2Length
, Boolean
*equivalentP
, SInt32
*orderP
) {
432 // collator must have default settings restored on exit from this function
437 if (options
& kCFCompareNumerically
) {
438 UErrorCode icuStatus
= U_ZERO_ERROR
;
439 ucol_setAttribute(collator
, UCOL_NUMERIC_COLLATION
, UCOL_ON
, &icuStatus
);
442 // Most string differences are Primary. Do a primary check first, then if there
443 // are no differences do a comparison with the options in the collator.
444 UCollationResult icuResult
= ucol_strcoll(collator
, (const UChar
*)text1Ptr
, text1Length
, (const UChar
*)text2Ptr
, text2Length
);
445 if (icuResult
!= UCOL_EQUAL
) {
446 *orderP
= (icuResult
== UCOL_LESS
) ? -2 : 2;
449 UErrorCode icuStatus
= U_ZERO_ERROR
;
450 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &icuStatus
);
451 ucol_setAttribute(collator
, UCOL_STRENGTH
, (options
& kCFCompareDiacriticInsensitive
) ? UCOL_PRIMARY
: UCOL_SECONDARY
, &icuStatus
);
452 ucol_setAttribute(collator
, UCOL_CASE_LEVEL
, (options
& kCFCompareCaseInsensitive
) ? UCOL_OFF
: UCOL_ON
, &icuStatus
);
453 if (!U_SUCCESS(icuStatus
)) {
454 icuStatus
= U_ZERO_ERROR
;
455 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &icuStatus
);
456 ucol_setAttribute(collator
, UCOL_STRENGTH
, UCOL_PRIMARY
, &icuStatus
);
457 ucol_setAttribute(collator
, UCOL_CASE_LEVEL
, UCOL_OFF
, &icuStatus
);
458 ucol_setAttribute(collator
, UCOL_NUMERIC_COLLATION
, UCOL_OFF
, &icuStatus
);
462 // We don't have a primary difference. Recompare with standard collator.
463 icuResult
= ucol_strcoll(collator
, (const UChar
*)text1Ptr
, text1Length
, (const UChar
*)text2Ptr
, text2Length
);
464 if (icuResult
!= UCOL_EQUAL
) {
465 *orderP
= (icuResult
== UCOL_LESS
) ? -1 : 1;
467 icuStatus
= U_ZERO_ERROR
;
468 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &icuStatus
);
469 ucol_setAttribute(collator
, UCOL_STRENGTH
, UCOL_PRIMARY
, &icuStatus
);
470 ucol_setAttribute(collator
, UCOL_CASE_LEVEL
, UCOL_OFF
, &icuStatus
);
472 if (*orderP
== 0 && (options
& kCFCompareNonliteral
) == 0) {
473 *orderP
= __CompareSpecials(collator
, options
, text1Ptr
, text1Length
, text2Ptr
, text2Length
);
476 *equivalentP
= (*orderP
== 0);
478 // If strings are equivalent but we care about order and have not yet checked
479 // to the level of code point order, then do some more checks for order
481 UErrorCode icuStatus
= U_ZERO_ERROR
;
482 // First try to see if ICU can find any differences above code point level
483 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &icuStatus
);
484 ucol_setAttribute(collator
, UCOL_STRENGTH
, UCOL_TERTIARY
, &icuStatus
);
485 ucol_setAttribute(collator
, UCOL_CASE_LEVEL
, UCOL_ON
, &icuStatus
);
486 if (!U_SUCCESS(icuStatus
)) {
487 icuStatus
= U_ZERO_ERROR
;
488 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &icuStatus
);
489 ucol_setAttribute(collator
, UCOL_STRENGTH
, UCOL_PRIMARY
, &icuStatus
);
490 ucol_setAttribute(collator
, UCOL_CASE_LEVEL
, UCOL_OFF
, &icuStatus
);
491 ucol_setAttribute(collator
, UCOL_NUMERIC_COLLATION
, UCOL_OFF
, &icuStatus
);
494 icuResult
= ucol_strcoll(collator
, (const UChar
*)text1Ptr
, text1Length
, (const UChar
*)text2Ptr
, text2Length
);
495 if (icuResult
!= UCOL_EQUAL
) {
496 *orderP
= (icuResult
== UCOL_LESS
) ? -1 : 1;
498 // no ICU differences above code point level, compare code points
499 *orderP
= __CompareCodePoints( text1Ptr
, text1Length
, text2Ptr
, text2Length
);
501 icuStatus
= U_ZERO_ERROR
;
502 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &icuStatus
);
503 ucol_setAttribute(collator
, UCOL_STRENGTH
, UCOL_PRIMARY
, &icuStatus
);
504 ucol_setAttribute(collator
, UCOL_CASE_LEVEL
, UCOL_OFF
, &icuStatus
);
507 if (options
& kCFCompareNumerically
) {
508 UErrorCode icuStatus
= U_ZERO_ERROR
;
509 ucol_setAttribute(collator
, UCOL_NUMERIC_COLLATION
, UCOL_OFF
, &icuStatus
);
514 static inline CFIndex
__extendLocationBackward(CFIndex location
, CFStringInlineBuffer
*str
, const uint8_t *nonBaseBMP
, const uint8_t *punctBMP
) {
515 while (location
> 0) {
516 UTF32Char ch
= CFStringGetCharacterFromInlineBuffer(str
, location
);
518 if (CFUniCharIsSurrogateLowCharacter(ch
) && CFUniCharIsSurrogateHighCharacter((otherChar
= CFStringGetCharacterFromInlineBuffer(str
, location
- 1)))) {
519 ch
= CFUniCharGetLongCharacterForSurrogatePair(ch
, otherChar
);
520 uint8_t planeNo
= (ch
>> 16);
521 if ((planeNo
> 1) || (!CFUniCharIsMemberOfBitmap(ch
, CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet
, planeNo
)) && !CFUniCharIsMemberOfBitmap(ch
, CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet
, planeNo
)))) break;
524 if ((!CFUniCharIsMemberOfBitmap(ch
, nonBaseBMP
) && !CFUniCharIsMemberOfBitmap(ch
, punctBMP
)) || ((ch
>= 0x2E80) && (ch
< 0xAC00))) break;
532 static inline CFIndex
__extendLocationForward(CFIndex location
, CFStringInlineBuffer
*str
, const uint8_t *alnumBMP
, const uint8_t *punctBMP
, const uint8_t *controlBMP
, CFIndex strMax
) {
534 UTF32Char ch
= CFStringGetCharacterFromInlineBuffer(str
, location
);
536 if (CFUniCharIsSurrogateHighCharacter(ch
) && CFUniCharIsSurrogateLowCharacter((otherChar
= CFStringGetCharacterFromInlineBuffer(str
, location
+ 1)))) {
537 ch
= CFUniCharGetLongCharacterForSurrogatePair(ch
, otherChar
);
539 uint8_t planeNo
= (ch
>> 16);
540 if (!CFUniCharIsMemberOfBitmap(ch
, CFUniCharGetBitmapPtrForPlane(kCFUniCharAlphaNumericCharacterSet
, planeNo
)) && !CFUniCharIsMemberOfBitmap(ch
, CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet
, planeNo
)) && !CFUniCharIsMemberOfBitmap(ch
, CFUniCharGetBitmapPtrForPlane(kCFUniCharControlAndFormatterCharacterSet
, planeNo
))) break;
543 if ((!CFUniCharIsMemberOfBitmap(ch
, alnumBMP
) && !CFUniCharIsMemberOfBitmap(ch
, punctBMP
) && !CFUniCharIsMemberOfBitmap(ch
, controlBMP
)) || ((ch
>= 0x2E80) && (ch
< 0xAC00))) break;
545 } while (location
< strMax
);
549 __private_extern__ CFComparisonResult
_CFCompareStringsWithLocale(CFStringInlineBuffer
*str1
, CFRange str1Range
, CFStringInlineBuffer
*str2
, CFRange str2Range
, CFOptionFlags options
, const void *compareLocale
) {
550 const UniChar
*characters1
;
551 const UniChar
*characters2
;
552 CFComparisonResult compResult
= kCFCompareEqualTo
;
553 CFRange range1
= str1Range
;
554 CFRange range2
= str2Range
;
557 bool forcedOrdering
= ((options
& kCFCompareForcedOrdering
) ? true : false);
559 UCollator
*collator
= NULL
;
560 bool defaultCollator
= true;
561 static const uint8_t *alnumBMP
= NULL
;
562 static const uint8_t *nonBaseBMP
= NULL
;
563 static const uint8_t *punctBMP
= NULL
;
564 static const uint8_t *controlBMP
= NULL
;
566 if (NULL
== alnumBMP
) {
567 alnumBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharAlphaNumericCharacterSet
, 0);
568 nonBaseBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet
, 0);
569 punctBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet
, 0);
570 controlBMP
= CFUniCharGetBitmapPtrForPlane(kCFUniCharControlAndFormatterCharacterSet
, 0);
573 // Determine the range of characters surrodiing the current index significant for localized comparison. The range is extended backward and forward as long as they are contextual. Contextual characters include all letters and punctuations. Since most control/format characters are ignorable in localized comparison, we also include them extending forward.
575 range1
.location
= str1Range
.location
;
576 range2
.location
= str2Range
.location
;
579 // The characters upto the current index are already determined to be equal by the CFString's standard character folding algorithm. Extend as long as truly contextual (all letters and punctuations).
580 if (range1
.location
> 0) {
581 range1
.location
= __extendLocationBackward(range1
.location
- 1, str1
, nonBaseBMP
, punctBMP
);
584 if (range2
.location
> 0) {
585 range2
.location
= __extendLocationBackward(range2
.location
- 1, str2
, nonBaseBMP
, punctBMP
);
588 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
589 // First we try to use the last one used on this thread, if the locale is the same,
590 // otherwise we try to check out a default one, or then we create one.
591 UCollator
*threadCollator
= pthread_getspecific(__CFTSDKeyCollatorUCollator
);
592 CFLocaleRef threadLocale
= pthread_getspecific(__CFTSDKeyCollatorLocale
);
593 if (compareLocale
== threadLocale
) {
594 collator
= threadCollator
;
597 collator
= __CFStringCopyDefaultCollator((CFLocaleRef
)compareLocale
);
598 defaultCollator
= true;
599 if (NULL
== collator
) {
600 collator
= __CFStringCreateCollator((CFLocaleRef
)compareLocale
);
601 defaultCollator
= false;
603 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
607 characters1
= CFStringGetCharactersPtrFromInlineBuffer(str1
, range1
);
608 characters2
= CFStringGetCharactersPtrFromInlineBuffer(str2
, range2
);
610 if ((NULL
!= characters1
) && (NULL
!= characters2
)) { // do fast
611 range1
.length
= (str1Range
.location
+ str1Range
.length
) - range1
.location
;
612 range2
.length
= (str2Range
.location
+ str2Range
.length
) - range2
.location
;
614 if ((NULL
!= collator
) && (__CompareTextDefault(collator
, options
, characters1
, range1
.length
, characters2
, range2
.length
, &isEqual
, &order
) == 0 /* noErr */)) {
615 compResult
= ((isEqual
&& !forcedOrdering
) ? kCFCompareEqualTo
: ((order
< 0) ? kCFCompareLessThan
: kCFCompareGreaterThan
));
617 compResult
= ((memcmp(characters1
, characters2
, sizeof(UniChar
) * range1
.length
) < 0) ? kCFCompareLessThan
: kCFCompareGreaterThan
);
620 UniChar
*buffer1
= NULL
;
621 UniChar
*buffer2
= NULL
;
622 UTF16Char sBuffer1
[kCFStringCompareAllocationIncrement
];
623 UTF16Char sBuffer2
[kCFStringCompareAllocationIncrement
];
624 CFIndex buffer1Len
= 0, buffer2Len
= 0;
625 CFIndex str1Max
= str1Range
.location
+ str1Range
.length
;
626 CFIndex str2Max
= str2Range
.location
+ str2Range
.length
;
629 // Extend forward and compare until the result is deterministic. The result is indeterministic if the differences are weak and can be resolved by character folding. For example, comparision between "abc" and "ABC" is considered to be indeterministic.
631 if (str1Range
.location
< str1Max
) {
632 str1Range
.location
= __extendLocationForward(str1Range
.location
, str1
, alnumBMP
, punctBMP
, controlBMP
, str1Max
);
633 range1
.length
= (str1Range
.location
- range1
.location
);
634 characters1
= CFStringGetCharactersPtrFromInlineBuffer(str1
, range1
);
636 if (NULL
== characters1
) {
637 if ((0 > buffer1Len
) || (range1
.length
> kCFStringCompareAllocationIncrement
)) {
638 if (buffer1Len
< range1
.length
) {
639 bufferSize
= range1
.length
+ (kCFStringCompareAllocationIncrement
- (range1
.length
% kCFStringCompareAllocationIncrement
));
640 if (0 == buffer1Len
) {
641 buffer1
= (UniChar
*)CFAllocatorAllocate(kCFAllocatorSystemDefault
, sizeof(UTF16Char
) * bufferSize
, 0);
642 } else if (buffer1Len
< range1
.length
) {
643 buffer1
= (UniChar
*)CFAllocatorReallocate(kCFAllocatorSystemDefault
, buffer1
, sizeof(UTF16Char
) * bufferSize
, 0);
645 buffer1Len
= bufferSize
;
651 CFStringGetCharactersFromInlineBuffer(str1
, range1
, buffer1
);
652 characters1
= buffer1
;
656 if (str2Range
.location
< str2Max
) {
657 str2Range
.location
= __extendLocationForward(str2Range
.location
, str2
, alnumBMP
, punctBMP
, controlBMP
, str2Max
);
658 range2
.length
= (str2Range
.location
- range2
.location
);
659 characters2
= CFStringGetCharactersPtrFromInlineBuffer(str2
, range2
);
661 if (NULL
== characters2
) {
662 if ((0 > buffer2Len
) || (range2
.length
> kCFStringCompareAllocationIncrement
)) {
663 if (buffer2Len
< range2
.length
) {
664 bufferSize
= range2
.length
+ (kCFStringCompareAllocationIncrement
- (range2
.length
% kCFStringCompareAllocationIncrement
));
665 if (0 == buffer2Len
) {
666 buffer2
= (UniChar
*)CFAllocatorAllocate(kCFAllocatorSystemDefault
, sizeof(UTF16Char
) * bufferSize
, 0);
667 } else if (buffer2Len
< range2
.length
) {
668 buffer2
= (UniChar
*)CFAllocatorReallocate(kCFAllocatorSystemDefault
, buffer2
, sizeof(UTF16Char
) * bufferSize
, 0);
670 buffer2Len
= bufferSize
;
676 CFStringGetCharactersFromInlineBuffer(str2
, range2
, buffer2
);
677 characters2
= buffer2
;
681 if ((NULL
!= collator
) && (__CompareTextDefault(collator
, options
, characters1
, range1
.length
, characters2
, range2
.length
, &isEqual
, &order
) == 0 /* noErr */)) {
683 if (forcedOrdering
&& (kCFCompareEqualTo
== compResult
) && (0 != order
)) compResult
= ((order
< 0) ? kCFCompareLessThan
: kCFCompareGreaterThan
);
687 order
= memcmp(characters1
, characters2
, sizeof(UTF16Char
) * ((range1
.length
< range2
.length
) ? range1
.length
: range2
.length
));
689 if (range1
.length
< range2
.length
) {
691 } else if (range2
.length
< range1
.length
) {
694 } else if (order
< 0) {
696 } else if (order
> 0) {
701 if ((order
< -1) || (order
> 1)) break; // the result is deterministic
704 range1
.location
= str1Range
.location
;
705 range2
.location
= str2Range
.location
;
707 } while ((str1Range
.location
< str1Max
) || (str2Range
.location
< str2Max
));
709 if (0 != order
) compResult
= ((order
< 0) ? kCFCompareLessThan
: kCFCompareGreaterThan
);
711 if (buffer1Len
> 0) CFAllocatorDeallocate(kCFAllocatorSystemDefault
, buffer1
);
712 if (buffer2Len
> 0) CFAllocatorDeallocate(kCFAllocatorSystemDefault
, buffer2
);
715 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
716 if (collator
== threadCollator
) {
717 // do nothing, already cached
719 if (threadLocale
) __collatorFinalize((UCollator
*)pthread_getspecific(__CFTSDKeyCollatorUCollator
)); // need to dealloc collators
721 pthread_key_init_np(__CFTSDKeyCollatorUCollator
, (void *)__collatorFinalize
);
722 pthread_setspecific(__CFTSDKeyCollatorUCollator
, collator
);
723 pthread_setspecific(__CFTSDKeyCollatorLocale
, CFRetain(compareLocale
));