]> git.saurik.com Git - apple/cf.git/blob - CFStringUtilities.c
CF-550.13.tar.gz
[apple/cf.git] / CFStringUtilities.c
1 /*
2 * Copyright (c) 2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /* CFStringUtilities.c
25 Copyright (c) 1999-2009, Apple Inc. All rights reserved.
26 Responsibility: Aki Inoue
27 */
28
29 #include "CFInternal.h"
30 #include <CoreFoundation/CFStringEncodingConverterExt.h>
31 #include <CoreFoundation/CFUniChar.h>
32 #include <CoreFoundation/CFStringEncodingExt.h>
33 #include "CFStringEncodingDatabase.h"
34 #include "CFICUConverters.h"
35 #include <CoreFoundation/CFPreferences.h>
36 #include <limits.h>
37 #include <stdlib.h>
38 #include <unicode/ucol.h>
39 #include <unicode/ucoleitr.h>
40 #include <string.h>
41
42 #if DEPLOYMENT_TARGET_WINDOWS
43 #include <tchar.h>
44 #endif
45
46
47 Boolean CFStringIsEncodingAvailable(CFStringEncoding theEncoding) {
48 switch (theEncoding) {
49 case kCFStringEncodingASCII: // Built-in encodings
50 case kCFStringEncodingMacRoman:
51 case kCFStringEncodingUTF8:
52 case kCFStringEncodingNonLossyASCII:
53 case kCFStringEncodingWindowsLatin1:
54 case kCFStringEncodingNextStepLatin:
55 case kCFStringEncodingUTF16:
56 case kCFStringEncodingUTF16BE:
57 case kCFStringEncodingUTF16LE:
58 case kCFStringEncodingUTF32:
59 case kCFStringEncodingUTF32BE:
60 case kCFStringEncodingUTF32LE:
61 return true;
62
63 default:
64 return CFStringEncodingIsValidEncoding(theEncoding);
65 }
66 }
67
68 const CFStringEncoding* CFStringGetListOfAvailableEncodings() {
69 return (const CFStringEncoding *)CFStringEncodingListOfAvailableEncodings();
70 }
71
72 CFStringRef CFStringGetNameOfEncoding(CFStringEncoding theEncoding) {
73 static CFMutableDictionaryRef mappingTable = NULL;
74 CFStringRef theName = mappingTable ? (CFStringRef)CFDictionaryGetValue(mappingTable, (const void*)(uintptr_t)theEncoding) : NULL;
75
76 if (!theName) {
77 const char *encodingName = __CFStringEncodingGetName(theEncoding);
78
79 if (encodingName) {
80 theName = CFStringCreateWithCString(kCFAllocatorSystemDefault, encodingName, kCFStringEncodingASCII);
81 }
82
83 if (theName) {
84 if (!mappingTable) mappingTable = CFDictionaryCreateMutable(kCFAllocatorSystemDefault, 0, (const CFDictionaryKeyCallBacks *)NULL, &kCFTypeDictionaryValueCallBacks);
85
86 CFDictionaryAddValue(mappingTable, (const void*)(uintptr_t)theEncoding, (const void*)theName);
87 CFRelease(theName);
88 }
89 }
90
91 return theName;
92 }
93
94 CFStringEncoding CFStringConvertIANACharSetNameToEncoding(CFStringRef charsetName) {
95 CFStringEncoding encoding = kCFStringEncodingInvalidId;
96 #define BUFFER_SIZE (100)
97 char buffer[BUFFER_SIZE];
98 const char *name = CFStringGetCStringPtr(charsetName, __CFStringGetEightBitStringEncoding());
99
100 if (NULL == name) {
101 if (false == CFStringGetCString(charsetName, buffer, BUFFER_SIZE, __CFStringGetEightBitStringEncoding())) return kCFStringEncodingInvalidId;
102
103 name = buffer;
104 }
105
106 encoding = __CFStringEncodingGetFromCanonicalName(name);
107
108 if (kCFStringEncodingInvalidId == encoding) encoding = __CFStringEncodingGetFromICUName(name);
109
110
111 return encoding;
112 }
113
114 CFStringRef CFStringConvertEncodingToIANACharSetName(CFStringEncoding encoding) {
115 CFStringRef name = NULL;
116 CFIndex value = encoding;
117 static CFMutableDictionaryRef mappingTable = NULL;
118 static CFSpinLock_t lock = CFSpinLockInit;
119
120 __CFSpinLock(&lock);
121 name = ((NULL == mappingTable) ? NULL : (CFStringRef)CFDictionaryGetValue(mappingTable, (const void*)value));
122
123 if (NULL == name) {
124 #define STACK_BUFFER_SIZE (100)
125 char buffer[STACK_BUFFER_SIZE];
126
127 if (__CFStringEncodingGetCanonicalName(encoding, buffer, STACK_BUFFER_SIZE)) name = CFStringCreateWithCString(NULL, buffer, kCFStringEncodingASCII);
128
129
130 if (NULL != name) {
131 CFIndex value = encoding;
132
133 if (NULL == mappingTable) mappingTable = CFDictionaryCreateMutable(NULL, 0, NULL, &kCFTypeDictionaryValueCallBacks);
134
135 CFDictionaryAddValue(mappingTable, (const void*)value, (const void*)name);
136 CFRelease(name);
137 }
138 }
139 __CFSpinUnlock(&lock);
140
141 return name;
142 }
143
144 enum {
145 NSASCIIStringEncoding = 1, /* 0..127 only */
146 NSNEXTSTEPStringEncoding = 2,
147 NSJapaneseEUCStringEncoding = 3,
148 NSUTF8StringEncoding = 4,
149 NSISOLatin1StringEncoding = 5,
150 NSSymbolStringEncoding = 6,
151 NSNonLossyASCIIStringEncoding = 7,
152 NSShiftJISStringEncoding = 8,
153 NSISOLatin2StringEncoding = 9,
154 NSUnicodeStringEncoding = 10,
155 NSWindowsCP1251StringEncoding = 11, /* Cyrillic; same as AdobeStandardCyrillic */
156 NSWindowsCP1252StringEncoding = 12, /* WinLatin1 */
157 NSWindowsCP1253StringEncoding = 13, /* Greek */
158 NSWindowsCP1254StringEncoding = 14, /* Turkish */
159 NSWindowsCP1250StringEncoding = 15, /* WinLatin2 */
160 NSISO2022JPStringEncoding = 21, /* ISO 2022 Japanese encoding for e-mail */
161 NSMacOSRomanStringEncoding = 30,
162
163 NSProprietaryStringEncoding = 65536 /* Installation-specific encoding */
164 };
165
166 #define NSENCODING_MASK (1 << 31)
167
168 unsigned long CFStringConvertEncodingToNSStringEncoding(CFStringEncoding theEncoding) {
169 switch (theEncoding & 0xFFF) {
170 case kCFStringEncodingUnicode:
171 if (theEncoding == kCFStringEncodingUTF16) return NSUnicodeStringEncoding;
172 else if (theEncoding == kCFStringEncodingUTF8) return NSUTF8StringEncoding;
173 break;
174
175 case kCFStringEncodingWindowsLatin1: return NSWindowsCP1252StringEncoding;
176 case kCFStringEncodingMacRoman: return NSMacOSRomanStringEncoding;
177
178 case kCFStringEncodingASCII: return NSASCIIStringEncoding;
179
180 case kCFStringEncodingDOSJapanese: return NSShiftJISStringEncoding;
181 case kCFStringEncodingWindowsCyrillic: return NSWindowsCP1251StringEncoding;
182 case kCFStringEncodingWindowsGreek: return NSWindowsCP1253StringEncoding;
183 case kCFStringEncodingWindowsLatin5: return NSWindowsCP1254StringEncoding;
184 case kCFStringEncodingWindowsLatin2: return NSWindowsCP1250StringEncoding;
185 case kCFStringEncodingISOLatin1: return NSISOLatin1StringEncoding;
186
187 case kCFStringEncodingNonLossyASCII: return NSNonLossyASCIIStringEncoding;
188 case kCFStringEncodingEUC_JP: return NSJapaneseEUCStringEncoding;
189 case kCFStringEncodingMacSymbol: return NSSymbolStringEncoding;
190 case kCFStringEncodingISOLatin2: return NSISOLatin2StringEncoding;
191 case kCFStringEncodingISO_2022_JP: return NSISO2022JPStringEncoding;
192 case kCFStringEncodingNextStepLatin: return NSNEXTSTEPStringEncoding;
193 }
194
195 return NSENCODING_MASK | theEncoding;
196 }
197
198 CFStringEncoding CFStringConvertNSStringEncodingToEncoding(unsigned long theEncoding) {
199 const uint16_t encodings[] = {
200 kCFStringEncodingASCII,
201 kCFStringEncodingNextStepLatin,
202 kCFStringEncodingEUC_JP,
203 0,
204 kCFStringEncodingISOLatin1,
205 kCFStringEncodingMacSymbol,
206 kCFStringEncodingNonLossyASCII,
207 kCFStringEncodingDOSJapanese,
208 kCFStringEncodingISOLatin2,
209 kCFStringEncodingUTF16,
210 kCFStringEncodingWindowsCyrillic,
211 kCFStringEncodingWindowsLatin1,
212 kCFStringEncodingWindowsGreek,
213 kCFStringEncodingWindowsLatin5,
214 kCFStringEncodingWindowsLatin2
215 };
216
217 if (NSUTF8StringEncoding == theEncoding) return kCFStringEncodingUTF8;
218
219 if ((theEncoding > 0) && (theEncoding <= NSWindowsCP1250StringEncoding)) return encodings[theEncoding - 1];
220
221 switch (theEncoding) {
222 case NSMacOSRomanStringEncoding: return kCFStringEncodingMacRoman;
223 case NSISO2022JPStringEncoding: return kCFStringEncodingISO_2022_JP;
224
225 default:
226 return ((theEncoding & NSENCODING_MASK) ? theEncoding & ~NSENCODING_MASK : kCFStringEncodingInvalidId);
227 }
228 }
229
230 UInt32 CFStringConvertEncodingToWindowsCodepage(CFStringEncoding theEncoding) {
231 uint16_t codepage = __CFStringEncodingGetWindowsCodePage(theEncoding);
232
233 return ((0 == codepage) ? kCFStringEncodingInvalidId : codepage);
234 }
235
236 CFStringEncoding CFStringConvertWindowsCodepageToEncoding(UInt32 theEncoding) {
237 return __CFStringEncodingGetFromWindowsCodePage(theEncoding);
238 }
239
240 CFStringEncoding CFStringGetMostCompatibleMacStringEncoding(CFStringEncoding encoding) {
241 CFStringEncoding macEncoding = __CFStringEncodingGetMostCompatibleMacScript(encoding);
242
243
244 return macEncoding;
245 }
246
247 #define kCFStringCompareAllocationIncrement (128)
248
249
250 // -------------------------------------------------------------------------------------------------
251 // CompareSpecials - ignore case & diacritic differences
252 //
253 // Decomposed have 2nd-4th chars of type Mn or Mc, or in range 1160-11FF (jamo)
254 // Fullwidth & halfwidth are in range FF00-FFEF
255 // Parenthesized & circled are in range 3200-32FF
256 // -------------------------------------------------------------------------------------------------
257
258 enum {
259 kUpperCaseWeightMin = 0x80 | 0x0F,
260 kUpperCaseWeightMax = 0x80 | 0x17,
261 kUpperToLowerDelta = 0x80 | 0x0A, // 0x0A = 0x0F - 0x05
262 kMaskPrimarySecondary = 0xFFFFFF00,
263 kMaskPrimaryOnly = 0xFFFF0000,
264 kMaskSecondaryOnly = 0x0000FF00,
265 kMaskCaseTertiary = 0x000000FF // 2 hi bits case, 6 lo bits tertiary
266 };
267
268 static SInt32 __CompareSpecials(const UCollator *collator, CFOptionFlags options, const UniChar *text1Ptr, UniCharCount text1Length, const UniChar *text2Ptr, UniCharCount text2Length) {
269 UErrorCode icuStatus = U_ZERO_ERROR;
270 SInt32 orderWidth = 0;
271 SInt32 orderCompos = 0;
272
273 UCollationElements * collElems1 = ucol_openElements(collator, (const UChar *)text1Ptr, text1Length, &icuStatus);
274 UCollationElements * collElems2 = ucol_openElements(collator, (const UChar *)text2Ptr, text2Length, &icuStatus);
275 if (U_SUCCESS(icuStatus)) {
276 int32_t startOffset1 = 0;
277 int32_t startOffset2 = 0;
278
279 while (true) {
280 int32_t elemOrder1, elemOrder2;
281 int32_t offset1, offset2;
282
283 elemOrder1 = ucol_next(collElems1, &icuStatus);
284 elemOrder2 = ucol_next(collElems2, &icuStatus);
285 if ( U_FAILURE(icuStatus) || elemOrder1 == (int32_t)UCOL_NULLORDER || elemOrder2 == (int32_t)UCOL_NULLORDER ) {
286 break;
287 }
288
289 offset1 = ucol_getOffset(collElems1);
290 offset2 = ucol_getOffset(collElems2);
291 if ( (elemOrder1 & kMaskPrimarySecondary) == (elemOrder2 & kMaskPrimarySecondary) ) {
292 if ( (elemOrder1 & kMaskPrimaryOnly) != 0 ) {
293 // keys may differ in case, width, circling, etc.
294
295 int32_t tertiary1 = (elemOrder1 & kMaskCaseTertiary);
296 int32_t tertiary2 = (elemOrder2 & kMaskCaseTertiary);
297 // fold upper to lower case
298 if (tertiary1 >= kUpperCaseWeightMin && tertiary1 <= kUpperCaseWeightMax) {
299 tertiary1 -= kUpperToLowerDelta;
300 }
301 if (tertiary2 >= kUpperCaseWeightMin && tertiary2 <= kUpperCaseWeightMax) {
302 tertiary2 -= kUpperToLowerDelta;
303 }
304 // now compare
305 if (tertiary1 != tertiary2) {
306 orderWidth = (tertiary1 < tertiary2)? -1: 1;
307 break;
308 }
309
310 } else if ( (elemOrder1 & kMaskSecondaryOnly) != 0 ) {
311 // primary weights are both zero, but secondaries are not.
312 if ( orderCompos == 0 && (options & kCFCompareNonliteral) == 0 ) {
313 // We have a code element which is a diacritic.
314 // It may have come from a composed char or a combining char.
315 // If it came from a combining char (longer element length) it sorts first.
316 // This is only an approximation to what the Mac OS 9 code did, but this is an
317 // unusual case anyway.
318 int32_t elem1Length = offset1 - startOffset1;
319 int32_t elem2Length = offset2 - startOffset2;
320 if (elem1Length != elem2Length) {
321 orderCompos = (elem1Length > elem2Length)? -1: 1;
322 }
323 }
324 }
325 }
326
327 startOffset1 = offset1;
328 startOffset2 = offset2;
329 }
330 ucol_closeElements(collElems1);
331 ucol_closeElements(collElems2);
332 }
333
334 return (orderWidth != 0)? orderWidth: orderCompos;
335 }
336
337 static SInt32 __CompareCodePoints(const UniChar *text1Ptr, UniCharCount text1Length, const UniChar *text2Ptr, UniCharCount text2Length ) {
338 const UniChar * text1P = text1Ptr;
339 const UniChar * text2P = text2Ptr;
340 UInt32 textLimit = (text1Length <= text2Length)? text1Length: text2Length;
341 UInt32 textCounter;
342 SInt32 orderResult = 0;
343
344 // Loop through either string...the first difference differentiates this.
345 for (textCounter = 0; textCounter < textLimit && *text1P == *text2P; textCounter++) {
346 text1P++;
347 text2P++;
348 }
349 if (textCounter < textLimit) {
350 // code point difference
351 orderResult = (*text1P < *text2P) ? -1 : 1;
352 } else if (text1Length != text2Length) {
353 // one string has extra stuff at end
354 orderResult = (text1Length < text2Length) ? -1 : 1;
355 }
356 return orderResult;
357 }
358
359
360 extern const CFStringRef __kCFLocaleCollatorID;
361
362 static UCollator *__CFStringCreateCollator(CFLocaleRef compareLocale) {
363 CFStringRef canonLocaleCFStr = (CFStringRef)CFLocaleGetValue(compareLocale, __kCFLocaleCollatorID);
364 char icuLocaleStr[128] = {0};
365 CFStringGetCString(canonLocaleCFStr, icuLocaleStr, sizeof(icuLocaleStr), kCFStringEncodingASCII);
366 UErrorCode icuStatus = U_ZERO_ERROR;
367 UCollator * collator = ucol_open(icuLocaleStr, &icuStatus);
368 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_OFF, &icuStatus);
369 ucol_setAttribute(collator, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &icuStatus);
370 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &icuStatus);
371 ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_OFF, &icuStatus);
372 ucol_setAttribute(collator, UCOL_NUMERIC_COLLATION, UCOL_OFF, &icuStatus);
373 return collator;
374 }
375
376 #define kCFMaxCachedDefaultCollators (8)
377 static UCollator *__CFDefaultCollators[kCFMaxCachedDefaultCollators];
378 static CFIndex __CFDefaultCollatorsCount = 0;
379 static const void *__CFDefaultCollatorLocale = NULL;
380 static CFSpinLock_t __CFDefaultCollatorLock = CFSpinLockInit;
381
382 static UCollator *__CFStringCopyDefaultCollator(CFLocaleRef compareLocale) {
383 CFLocaleRef currentLocale = NULL;
384 UCollator * collator = NULL;
385
386 if (compareLocale != __CFDefaultCollatorLocale) {
387 currentLocale = CFLocaleCopyCurrent();
388 CFRelease(currentLocale);
389 if (compareLocale != currentLocale) return NULL;
390 }
391
392 __CFSpinLock(&__CFDefaultCollatorLock);
393 if ((NULL != currentLocale) && (__CFDefaultCollatorLocale != currentLocale)) {
394 while (__CFDefaultCollatorsCount > 0) ucol_close(__CFDefaultCollators[--__CFDefaultCollatorsCount]);
395 __CFDefaultCollatorLocale = currentLocale;
396 }
397
398 if (__CFDefaultCollatorsCount > 0) collator = __CFDefaultCollators[--__CFDefaultCollatorsCount];
399 __CFSpinUnlock(&__CFDefaultCollatorLock);
400
401 if (NULL == collator) {
402 collator = __CFStringCreateCollator(compareLocale);
403 }
404
405 return collator;
406 }
407
408 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
409 static void __collatorFinalize(UCollator *collator) {
410 CFLocaleRef locale = pthread_getspecific(__CFTSDKeyCollatorLocale);
411 pthread_setspecific(__CFTSDKeyCollatorUCollator, NULL);
412 pthread_setspecific(__CFTSDKeyCollatorLocale, NULL);
413 __CFSpinLock(&__CFDefaultCollatorLock);
414 if ((__CFDefaultCollatorLocale == locale) && (__CFDefaultCollatorsCount < kCFMaxCachedDefaultCollators)) {
415 __CFDefaultCollators[__CFDefaultCollatorsCount++] = collator;
416 collator = NULL;
417 }
418 __CFSpinUnlock(&__CFDefaultCollatorLock);
419 if (NULL != collator) ucol_close(collator);
420 if (locale) CFRelease(locale);
421 }
422 #endif
423
424 // -------------------------------------------------------------------------------------------------
425 // __CompareTextDefault
426 //
427 // A primary difference is denoted by values 2/-2 in orderP. Other differences are indicated with a -1/1.
428 // A negative value indicates that text1 sorts before text2.
429 // -------------------------------------------------------------------------------------------------
430 static OSStatus __CompareTextDefault(UCollator *collator, CFOptionFlags options, const UniChar *text1Ptr, UniCharCount text1Length, const UniChar *text2Ptr, UniCharCount text2Length, Boolean *equivalentP, SInt32 *orderP) {
431
432 // collator must have default settings restored on exit from this function
433
434 *equivalentP = true;
435 *orderP = 0;
436
437 if (options & kCFCompareNumerically) {
438 UErrorCode icuStatus = U_ZERO_ERROR;
439 ucol_setAttribute(collator, UCOL_NUMERIC_COLLATION, UCOL_ON, &icuStatus);
440 }
441
442 // Most string differences are Primary. Do a primary check first, then if there
443 // are no differences do a comparison with the options in the collator.
444 UCollationResult icuResult = ucol_strcoll(collator, (const UChar *)text1Ptr, text1Length, (const UChar *)text2Ptr, text2Length);
445 if (icuResult != UCOL_EQUAL) {
446 *orderP = (icuResult == UCOL_LESS) ? -2 : 2;
447 }
448 if (*orderP == 0) {
449 UErrorCode icuStatus = U_ZERO_ERROR;
450 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &icuStatus);
451 ucol_setAttribute(collator, UCOL_STRENGTH, (options & kCFCompareDiacriticInsensitive) ? UCOL_PRIMARY : UCOL_SECONDARY, &icuStatus);
452 ucol_setAttribute(collator, UCOL_CASE_LEVEL, (options & kCFCompareCaseInsensitive) ? UCOL_OFF : UCOL_ON, &icuStatus);
453 if (!U_SUCCESS(icuStatus)) {
454 icuStatus = U_ZERO_ERROR;
455 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_OFF, &icuStatus);
456 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &icuStatus);
457 ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_OFF, &icuStatus);
458 ucol_setAttribute(collator, UCOL_NUMERIC_COLLATION, UCOL_OFF, &icuStatus);
459 return 666;
460 }
461
462 // We don't have a primary difference. Recompare with standard collator.
463 icuResult = ucol_strcoll(collator, (const UChar *)text1Ptr, text1Length, (const UChar *)text2Ptr, text2Length);
464 if (icuResult != UCOL_EQUAL) {
465 *orderP = (icuResult == UCOL_LESS) ? -1 : 1;
466 }
467 icuStatus = U_ZERO_ERROR;
468 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_OFF, &icuStatus);
469 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &icuStatus);
470 ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_OFF, &icuStatus);
471 }
472 if (*orderP == 0 && (options & kCFCompareNonliteral) == 0) {
473 *orderP = __CompareSpecials(collator, options, text1Ptr, text1Length, text2Ptr, text2Length);
474 }
475
476 *equivalentP = (*orderP == 0);
477
478 // If strings are equivalent but we care about order and have not yet checked
479 // to the level of code point order, then do some more checks for order
480 if (*orderP == 0) {
481 UErrorCode icuStatus = U_ZERO_ERROR;
482 // First try to see if ICU can find any differences above code point level
483 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &icuStatus);
484 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_TERTIARY, &icuStatus);
485 ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_ON, &icuStatus);
486 if (!U_SUCCESS(icuStatus)) {
487 icuStatus = U_ZERO_ERROR;
488 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_OFF, &icuStatus);
489 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &icuStatus);
490 ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_OFF, &icuStatus);
491 ucol_setAttribute(collator, UCOL_NUMERIC_COLLATION, UCOL_OFF, &icuStatus);
492 return 666;
493 }
494 icuResult = ucol_strcoll(collator, (const UChar *)text1Ptr, text1Length, (const UChar *)text2Ptr, text2Length);
495 if (icuResult != UCOL_EQUAL) {
496 *orderP = (icuResult == UCOL_LESS) ? -1 : 1;
497 } else {
498 // no ICU differences above code point level, compare code points
499 *orderP = __CompareCodePoints( text1Ptr, text1Length, text2Ptr, text2Length );
500 }
501 icuStatus = U_ZERO_ERROR;
502 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_OFF, &icuStatus);
503 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &icuStatus);
504 ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_OFF, &icuStatus);
505 }
506
507 if (options & kCFCompareNumerically) {
508 UErrorCode icuStatus = U_ZERO_ERROR;
509 ucol_setAttribute(collator, UCOL_NUMERIC_COLLATION, UCOL_OFF, &icuStatus);
510 }
511 return 0; // noErr
512 }
513
514 static inline CFIndex __extendLocationBackward(CFIndex location, CFStringInlineBuffer *str, const uint8_t *nonBaseBMP, const uint8_t *punctBMP) {
515 while (location > 0) {
516 UTF32Char ch = CFStringGetCharacterFromInlineBuffer(str, location);
517 UTF32Char otherChar;
518 if (CFUniCharIsSurrogateLowCharacter(ch) && CFUniCharIsSurrogateHighCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(str, location - 1)))) {
519 ch = CFUniCharGetLongCharacterForSurrogatePair(ch, otherChar);
520 uint8_t planeNo = (ch >> 16);
521 if ((planeNo > 1) || (!CFUniCharIsMemberOfBitmap(ch, CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, planeNo)) && !CFUniCharIsMemberOfBitmap(ch, CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet, planeNo)))) break;
522 location -= 2;
523 } else {
524 if ((!CFUniCharIsMemberOfBitmap(ch, nonBaseBMP) && !CFUniCharIsMemberOfBitmap(ch, punctBMP)) || ((ch >= 0x2E80) && (ch < 0xAC00))) break;
525 --location;
526 }
527 }
528
529 return location;
530 }
531
532 static inline CFIndex __extendLocationForward(CFIndex location, CFStringInlineBuffer *str, const uint8_t *alnumBMP, const uint8_t *punctBMP, const uint8_t *controlBMP, CFIndex strMax) {
533 do {
534 UTF32Char ch = CFStringGetCharacterFromInlineBuffer(str, location);
535 UTF32Char otherChar;
536 if (CFUniCharIsSurrogateHighCharacter(ch) && CFUniCharIsSurrogateLowCharacter((otherChar = CFStringGetCharacterFromInlineBuffer(str, location + 1)))) {
537 ch = CFUniCharGetLongCharacterForSurrogatePair(ch, otherChar);
538 location += 2;
539 uint8_t planeNo = (ch >> 16);
540 if (!CFUniCharIsMemberOfBitmap(ch, CFUniCharGetBitmapPtrForPlane(kCFUniCharAlphaNumericCharacterSet, planeNo)) && !CFUniCharIsMemberOfBitmap(ch, CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet, planeNo)) && !CFUniCharIsMemberOfBitmap(ch, CFUniCharGetBitmapPtrForPlane(kCFUniCharControlAndFormatterCharacterSet, planeNo))) break;
541 } else {
542 ++location;
543 if ((!CFUniCharIsMemberOfBitmap(ch, alnumBMP) && !CFUniCharIsMemberOfBitmap(ch, punctBMP) && !CFUniCharIsMemberOfBitmap(ch, controlBMP)) || ((ch >= 0x2E80) && (ch < 0xAC00))) break;
544 }
545 } while (location < strMax);
546 return location;
547 }
548
549 __private_extern__ CFComparisonResult _CFCompareStringsWithLocale(CFStringInlineBuffer *str1, CFRange str1Range, CFStringInlineBuffer *str2, CFRange str2Range, CFOptionFlags options, const void *compareLocale) {
550 const UniChar *characters1;
551 const UniChar *characters2;
552 CFComparisonResult compResult = kCFCompareEqualTo;
553 CFRange range1 = str1Range;
554 CFRange range2 = str2Range;
555 SInt32 order;
556 Boolean isEqual;
557 bool forcedOrdering = ((options & kCFCompareForcedOrdering) ? true : false);
558
559 UCollator *collator = NULL;
560 bool defaultCollator = true;
561 static const uint8_t *alnumBMP = NULL;
562 static const uint8_t *nonBaseBMP = NULL;
563 static const uint8_t *punctBMP = NULL;
564 static const uint8_t *controlBMP = NULL;
565
566 if (NULL == alnumBMP) {
567 alnumBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharAlphaNumericCharacterSet, 0);
568 nonBaseBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, 0);
569 punctBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharPunctuationCharacterSet, 0);
570 controlBMP = CFUniCharGetBitmapPtrForPlane(kCFUniCharControlAndFormatterCharacterSet, 0);
571 }
572
573 // Determine the range of characters surrodiing the current index significant for localized comparison. The range is extended backward and forward as long as they are contextual. Contextual characters include all letters and punctuations. Since most control/format characters are ignorable in localized comparison, we also include them extending forward.
574
575 range1.location = str1Range.location;
576 range2.location = str2Range.location;
577
578 // go backward
579 // The characters upto the current index are already determined to be equal by the CFString's standard character folding algorithm. Extend as long as truly contextual (all letters and punctuations).
580 if (range1.location > 0) {
581 range1.location = __extendLocationBackward(range1.location - 1, str1, nonBaseBMP, punctBMP);
582 }
583
584 if (range2.location > 0) {
585 range2.location = __extendLocationBackward(range2.location - 1, str2, nonBaseBMP, punctBMP);
586 }
587
588 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
589 // First we try to use the last one used on this thread, if the locale is the same,
590 // otherwise we try to check out a default one, or then we create one.
591 UCollator *threadCollator = pthread_getspecific(__CFTSDKeyCollatorUCollator);
592 CFLocaleRef threadLocale = pthread_getspecific(__CFTSDKeyCollatorLocale);
593 if (compareLocale == threadLocale) {
594 collator = threadCollator;
595 } else {
596 #endif
597 collator = __CFStringCopyDefaultCollator((CFLocaleRef)compareLocale);
598 defaultCollator = true;
599 if (NULL == collator) {
600 collator = __CFStringCreateCollator((CFLocaleRef)compareLocale);
601 defaultCollator = false;
602 }
603 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
604 }
605 #endif
606
607 characters1 = CFStringGetCharactersPtrFromInlineBuffer(str1, range1);
608 characters2 = CFStringGetCharactersPtrFromInlineBuffer(str2, range2);
609
610 if ((NULL != characters1) && (NULL != characters2)) { // do fast
611 range1.length = (str1Range.location + str1Range.length) - range1.location;
612 range2.length = (str2Range.location + str2Range.length) - range2.location;
613
614 if ((NULL != collator) && (__CompareTextDefault(collator, options, characters1, range1.length, characters2, range2.length, &isEqual, &order) == 0 /* noErr */)) {
615 compResult = ((isEqual && !forcedOrdering) ? kCFCompareEqualTo : ((order < 0) ? kCFCompareLessThan : kCFCompareGreaterThan));
616 } else {
617 compResult = ((memcmp(characters1, characters2, sizeof(UniChar) * range1.length) < 0) ? kCFCompareLessThan : kCFCompareGreaterThan);
618 }
619 } else {
620 UniChar *buffer1 = NULL;
621 UniChar *buffer2 = NULL;
622 UTF16Char sBuffer1[kCFStringCompareAllocationIncrement];
623 UTF16Char sBuffer2[kCFStringCompareAllocationIncrement];
624 CFIndex buffer1Len = 0, buffer2Len = 0;
625 CFIndex str1Max = str1Range.location + str1Range.length;
626 CFIndex str2Max = str2Range.location + str2Range.length;
627 CFIndex bufferSize;
628
629 // Extend forward and compare until the result is deterministic. The result is indeterministic if the differences are weak and can be resolved by character folding. For example, comparision between "abc" and "ABC" is considered to be indeterministic.
630 do {
631 if (str1Range.location < str1Max) {
632 str1Range.location = __extendLocationForward(str1Range.location, str1, alnumBMP, punctBMP, controlBMP, str1Max);
633 range1.length = (str1Range.location - range1.location);
634 characters1 = CFStringGetCharactersPtrFromInlineBuffer(str1, range1);
635
636 if (NULL == characters1) {
637 if ((0 > buffer1Len) || (range1.length > kCFStringCompareAllocationIncrement)) {
638 if (buffer1Len < range1.length) {
639 bufferSize = range1.length + (kCFStringCompareAllocationIncrement - (range1.length % kCFStringCompareAllocationIncrement));
640 if (0 == buffer1Len) {
641 buffer1 = (UniChar *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(UTF16Char) * bufferSize, 0);
642 } else if (buffer1Len < range1.length) {
643 buffer1 = (UniChar *)CFAllocatorReallocate(kCFAllocatorSystemDefault, buffer1, sizeof(UTF16Char) * bufferSize, 0);
644 }
645 buffer1Len = bufferSize;
646 }
647 } else {
648 buffer1 = sBuffer1;
649 }
650
651 CFStringGetCharactersFromInlineBuffer(str1, range1, buffer1);
652 characters1 = buffer1;
653 }
654 }
655
656 if (str2Range.location < str2Max) {
657 str2Range.location = __extendLocationForward(str2Range.location, str2, alnumBMP, punctBMP, controlBMP, str2Max);
658 range2.length = (str2Range.location - range2.location);
659 characters2 = CFStringGetCharactersPtrFromInlineBuffer(str2, range2);
660
661 if (NULL == characters2) {
662 if ((0 > buffer2Len) || (range2.length > kCFStringCompareAllocationIncrement)) {
663 if (buffer2Len < range2.length) {
664 bufferSize = range2.length + (kCFStringCompareAllocationIncrement - (range2.length % kCFStringCompareAllocationIncrement));
665 if (0 == buffer2Len) {
666 buffer2 = (UniChar *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(UTF16Char) * bufferSize, 0);
667 } else if (buffer2Len < range2.length) {
668 buffer2 = (UniChar *)CFAllocatorReallocate(kCFAllocatorSystemDefault, buffer2, sizeof(UTF16Char) * bufferSize, 0);
669 }
670 buffer2Len = bufferSize;
671 }
672 } else {
673 buffer2 = sBuffer2;
674 }
675
676 CFStringGetCharactersFromInlineBuffer(str2, range2, buffer2);
677 characters2 = buffer2;
678 }
679 }
680
681 if ((NULL != collator) && (__CompareTextDefault(collator, options, characters1, range1.length, characters2, range2.length, &isEqual, &order) == 0 /* noErr */)) {
682 if (isEqual) {
683 if (forcedOrdering && (kCFCompareEqualTo == compResult) && (0 != order)) compResult = ((order < 0) ? kCFCompareLessThan : kCFCompareGreaterThan);
684 order = 0;
685 }
686 } else {
687 order = memcmp(characters1, characters2, sizeof(UTF16Char) * ((range1.length < range2.length) ? range1.length : range2.length));
688 if (0 == order) {
689 if (range1.length < range2.length) {
690 order = -2;
691 } else if (range2.length < range1.length) {
692 order = 2;
693 }
694 } else if (order < 0) {
695 --order;
696 } else if (order > 0) {
697 ++order;
698 }
699 }
700
701 if ((order < -1) || (order > 1)) break; // the result is deterministic
702
703 if (0 == order) {
704 range1.location = str1Range.location;
705 range2.location = str2Range.location;
706 }
707 } while ((str1Range.location < str1Max) || (str2Range.location < str2Max));
708
709 if (0 != order) compResult = ((order < 0) ? kCFCompareLessThan : kCFCompareGreaterThan);
710
711 if (buffer1Len > 0) CFAllocatorDeallocate(kCFAllocatorSystemDefault, buffer1);
712 if (buffer2Len > 0) CFAllocatorDeallocate(kCFAllocatorSystemDefault, buffer2);
713 }
714
715 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
716 if (collator == threadCollator) {
717 // do nothing, already cached
718 } else {
719 if (threadLocale) __collatorFinalize((UCollator *)pthread_getspecific(__CFTSDKeyCollatorUCollator)); // need to dealloc collators
720
721 pthread_key_init_np(__CFTSDKeyCollatorUCollator, (void *)__collatorFinalize);
722 pthread_setspecific(__CFTSDKeyCollatorUCollator, collator);
723 pthread_setspecific(__CFTSDKeyCollatorLocale, CFRetain(compareLocale));
724 }
725 #endif
726
727 return compResult;
728 }
729